PerfTestThreadSemaphore.cpp 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392
  1. ////////////////////////////////////////////////////////////////////////
  2. // PerfTestThreadSemaphore.cpp
  3. //
  4. // Copyright (c) 2014, Electronic Arts Inc. All rights reserved.
  5. ////////////////////////////////////////////////////////////////////////
  6. #include "benchmarkenvironment/results.h"
  7. #include "benchmarkenvironment/statistics.h"
  8. #include "benchmarkenvironment/timer.h"
  9. #include "eathread/eathread_atomic.h"
  10. #include "eathread/eathread_semaphore.h"
  11. #include "eathread/eathread_thread.h"
  12. #include "EATest/EATest.h"
  13. #include "PerfTestThread.h"
  14. using namespace EA::Thread;
  15. using namespace benchmarkenvironment;
  16. // Used to set how many times the contended thread functions run.
  17. const int kNumTestIterations = 10000;
  18. #define THREAD_WAIT_TIMEOUT 15000
  19. /////////////////////////////////////////////////////////////////////////////////////////////////
  20. // Producer/Consumer Tests & Test Functions
  21. /////////////////////////////////////////////////////////////////////////////////////////////////
  22. static AtomicInt32 gThreadSyncer = 0;
  23. void DECREMENT_AND_SPINWAIT(AtomicInt32& atomic_var)
  24. {
  25. atomic_var--;
  26. while (atomic_var > 0)
  27. ;
  28. }
  29. struct ProducerConsumerTestData
  30. {
  31. static Semaphore* mpEmptySlots;
  32. static Semaphore* mpFullSlots;
  33. Timer mThreadLocalTimer;
  34. ProducerConsumerTestData()
  35. : mThreadLocalTimer() {}
  36. static void InitSemaphores(int bufferCapacity)
  37. {
  38. SemaphoreParameters temp(bufferCapacity, true, "Producer/Consumer Full");
  39. mpEmptySlots = new Semaphore(&temp);
  40. SemaphoreParameters temp2(0, false, "Producer/Consumer Empty");
  41. mpFullSlots = new Semaphore(&temp2);
  42. }
  43. static void ResetSemaphores()
  44. {
  45. if (mpEmptySlots)
  46. {
  47. delete mpEmptySlots;
  48. mpEmptySlots = NULL;
  49. }
  50. if (mpFullSlots)
  51. {
  52. delete mpFullSlots;
  53. mpFullSlots = NULL;
  54. }
  55. }
  56. };
  57. // Initialization of the static members...
  58. Semaphore* ProducerConsumerTestData::mpEmptySlots = NULL;
  59. Semaphore* ProducerConsumerTestData::mpFullSlots = NULL;
  60. static intptr_t ProducerThreadFunction(void* pTestData)
  61. {
  62. ProducerConsumerTestData& testData = *static_cast<ProducerConsumerTestData*>(pTestData);
  63. testData.mThreadLocalTimer.Start();
  64. for (int i = 0; i < kNumTestIterations; ++i)
  65. {
  66. testData.mpEmptySlots->Wait();
  67. testData.mpFullSlots->Post();
  68. }
  69. testData.mThreadLocalTimer.Stop();
  70. EAT_ASSERT(testData.mThreadLocalTimer.AsSeconds() >= 0.0);
  71. return 0;
  72. }
  73. static intptr_t ConsumerThreadFunction(void* pTestData)
  74. {
  75. ProducerConsumerTestData& testData = *static_cast<ProducerConsumerTestData*>(pTestData);
  76. testData.mThreadLocalTimer.Start();
  77. for (int i = 0; i < kNumTestIterations; ++i)
  78. {
  79. testData.mpFullSlots->Wait();
  80. testData.mpEmptySlots->Post();
  81. }
  82. testData.mThreadLocalTimer.Stop();
  83. EAT_ASSERT(testData.mThreadLocalTimer.AsSeconds() >= 0.0);
  84. return 0;
  85. }
  86. void ProducerConsumerTest(
  87. Sample& sample,
  88. ThreadEntryFunction pProducer,
  89. ThreadEntryFunction pConsumer,
  90. int bufferCapacity,
  91. bool isContended)
  92. {
  93. const int kThreadArraySize = 12;
  94. const int kMinThreads = 4;
  95. const int kNumCores = (isContended ? eastl::min(kThreadArraySize, eastl::max(GetProcessorCount(), kMinThreads)) : 2);
  96. const int kThreadGroupSize = (isContended ? kNumCores / 2 : 1);
  97. ProducerConsumerTestData::InitSemaphores(bufferCapacity);
  98. eastl::vector<ProducerConsumerTestData> producerThreadTimers(kThreadGroupSize);
  99. eastl::vector<ProducerConsumerTestData> consumerThreadTimers(kThreadGroupSize);
  100. eastl::vector<Thread> producers(kThreadGroupSize);
  101. eastl::vector<Thread> consumers(kThreadGroupSize);
  102. ThreadAffinityMask affinityMask = 1;
  103. ThreadId newThread;
  104. for (int i = 0; i < kThreadGroupSize; ++i)
  105. {
  106. newThread = producers[i].Begin(pProducer, &producerThreadTimers[i]);
  107. EA::Thread::SetThreadAffinityMask(newThread, affinityMask);
  108. affinityMask = affinityMask << 1;
  109. newThread = consumers[i].Begin(pConsumer, &consumerThreadTimers[i]);
  110. EA::Thread::SetThreadAffinityMask(newThread, affinityMask);
  111. affinityMask = affinityMask << 1;
  112. }
  113. for (int i = 0; i < kThreadGroupSize; ++i)
  114. {
  115. EA::Thread::Thread::Status producerThreadExitStatus = producers[i].WaitForEnd(GetThreadTime() + (THREAD_WAIT_TIMEOUT * kNumCores));
  116. EA::Thread::Thread::Status consumerThreadExitStatus = consumers[i].WaitForEnd(GetThreadTime() + (THREAD_WAIT_TIMEOUT * kNumCores));
  117. EA_UNUSED(producerThreadExitStatus);
  118. EA_UNUSED(consumerThreadExitStatus);
  119. EAT_ASSERT(producerThreadExitStatus != Thread::kStatusRunning);
  120. EAT_ASSERT(consumerThreadExitStatus != Thread::kStatusRunning);
  121. }
  122. double totalTime = 0.0;
  123. for (int i = 0; i < kThreadGroupSize; ++i)
  124. {
  125. totalTime += producerThreadTimers[i].mThreadLocalTimer.AsSeconds();
  126. totalTime += consumerThreadTimers[i].mThreadLocalTimer.AsSeconds();
  127. }
  128. sample.AddElement(totalTime);
  129. ProducerConsumerTestData::ResetSemaphores();
  130. }
  131. /////////////////////////////////////////////////////////////////////////////////////////////////
  132. // Scheduler Tests & Test Functions
  133. /////////////////////////////////////////////////////////////////////////////////////////////////
  134. struct SemaphoreTestData
  135. {
  136. static Semaphore* mpTestSemaphore;
  137. Timer& mThreadLocalTimer;
  138. AtomicInt32 mSignal;
  139. SemaphoreTestData(Timer &timer, int signal = 0)
  140. : mThreadLocalTimer(timer)
  141. , mSignal(signal)
  142. {
  143. }
  144. SemaphoreTestData operator=(const SemaphoreTestData& other)
  145. {
  146. mThreadLocalTimer = other.mThreadLocalTimer;
  147. mSignal = other.mSignal;
  148. return *this;
  149. }
  150. SemaphoreTestData(const SemaphoreTestData& other)
  151. : mThreadLocalTimer(other.mThreadLocalTimer)
  152. , mSignal(other.mSignal)
  153. {
  154. }
  155. static void setSemaphoreInitialCount(int count)
  156. {
  157. SemaphoreParameters params(count, true, "Test Semaphore");
  158. mpTestSemaphore = new Semaphore(&params);
  159. }
  160. static void resetSemaphore()
  161. {
  162. if (mpTestSemaphore)
  163. {
  164. delete mpTestSemaphore;
  165. mpTestSemaphore = NULL;
  166. }
  167. }
  168. };
  169. Semaphore* SemaphoreTestData::mpTestSemaphore = NULL;
  170. // ------------------------------------------------------------------------------
  171. //
  172. static intptr_t SemaphoreTestSchedulerContendedFunction(void* pTestData)
  173. {
  174. // In this case, each thread will have its own timer, and share the same semaphore.
  175. SemaphoreTestData& testData = *static_cast<SemaphoreTestData*>(pTestData);
  176. DECREMENT_AND_SPINWAIT(gThreadSyncer);
  177. testData.mThreadLocalTimer.Start();
  178. for (int i = 0; i < kNumTestIterations; ++i)
  179. {
  180. testData.mpTestSemaphore->Wait();
  181. testData.mpTestSemaphore->Post();
  182. }
  183. testData.mThreadLocalTimer.Stop();
  184. return 0;
  185. }
  186. // ------------------------------------------------------------------------------
  187. //
  188. static intptr_t SemaphoreTestSchedulerUncontendedWakeFunction(void* pTestData)
  189. {
  190. // Initiate the timer when the wakeup signal is sent.
  191. SemaphoreTestData& testData = *static_cast<SemaphoreTestData*>(pTestData);
  192. testData.mThreadLocalTimer.Start();
  193. testData.mpTestSemaphore->Post();
  194. return 0;
  195. }
  196. // ------------------------------------------------------------------------------
  197. //
  198. static intptr_t SemaphoreTestSchedulerUncontendedWaitFunction(void* pTestData)
  199. {
  200. // Immediately go to sleep waiting for the semaphore, then stop the timer
  201. // once we wake up.
  202. SemaphoreTestData& testData = *static_cast<SemaphoreTestData*>(pTestData);
  203. testData.mSignal++;
  204. int exitResult = testData.mpTestSemaphore->Wait(GetThreadTime() + THREAD_WAIT_TIMEOUT);
  205. EAT_ASSERT(exitResult != Semaphore::kResultTimeout);
  206. EA_UNUSED(exitResult); // Silences gcc and clang warnings
  207. testData.mThreadLocalTimer.Stop();
  208. return 0;
  209. }
  210. // ------------------------------------------------------------------------------
  211. //
  212. void SemaphoreUncontendedPerfTest(Sample &sample, ThreadEntryFunction pWaitingFunc, ThreadEntryFunction pWakingFunc, int semaphoreInitialCount)
  213. {
  214. SemaphoreParameters params(semaphoreInitialCount, true, "Uncontended");
  215. Timer timer;
  216. SemaphoreTestData sharedData(timer);
  217. SemaphoreTestData::setSemaphoreInitialCount(semaphoreInitialCount);
  218. Thread waker;
  219. Thread sleeper;
  220. sleeper.Begin(pWaitingFunc, &sharedData);
  221. // Spin until the sleeping thread runs and blocks on the semaphore.
  222. while (sharedData.mSignal == 0) {}
  223. waker.Begin(pWakingFunc, &sharedData);
  224. EA::Thread::Thread::Status waiterThreadExitStatus = sleeper.WaitForEnd(GetThreadTime() + THREAD_WAIT_TIMEOUT);
  225. EA::Thread::Thread::Status wakerThreadExitStatus = waker.WaitForEnd(GetThreadTime() + THREAD_WAIT_TIMEOUT);
  226. EA_UNUSED(waiterThreadExitStatus);
  227. EA_UNUSED(wakerThreadExitStatus);
  228. EAT_ASSERT(waiterThreadExitStatus != Thread::kStatusRunning && wakerThreadExitStatus != Thread::kStatusRunning);
  229. sample.AddElement(timer.AsSeconds());
  230. SemaphoreTestData::resetSemaphore();
  231. }
  232. // ------------------------------------------------------------------------------
  233. //
  234. void SemaphoreContendedPerfTest(benchmarkenvironment::Sample &sample, ThreadEntryFunction pTestFunc, int semaphoreInitialCount)
  235. {
  236. // The contended test will always use per-thread timers, since any blocks are
  237. // a circumstance that would arise in normal use.
  238. const int kThreadArraySize = 12;
  239. const int kMinThreads = 4;
  240. const int kNumCores = eastl::min(kThreadArraySize, eastl::max(GetProcessorCount(), kMinThreads));
  241. gThreadSyncer = kNumCores;
  242. SemaphoreTestData::setSemaphoreInitialCount(semaphoreInitialCount);
  243. eastl::vector<Thread> threads(kNumCores);
  244. eastl::vector<Timer> timers(kNumCores);
  245. eastl::vector<SemaphoreTestData> data;
  246. for (int i = 0; i < kNumCores; ++i)
  247. data.push_back(SemaphoreTestData(timers[i]));
  248. for (int i = 0; i < kNumCores; ++i)
  249. {
  250. ThreadId newThread = threads[i].Begin(pTestFunc, &data[i]);
  251. EA::Thread::SetThreadAffinityMask(newThread, ThreadAffinityMask(1 << i));
  252. }
  253. double totalTime = 0.0;
  254. for (int i = 0; i < kNumCores; ++i)
  255. {
  256. EA::Thread::Thread::Status threadExitStatus = threads[i].WaitForEnd(GetThreadTime() + (THREAD_WAIT_TIMEOUT * kNumCores));
  257. EAT_ASSERT(threadExitStatus != Thread::kStatusRunning);
  258. EA_UNUSED(threadExitStatus);
  259. totalTime += timers[i].AsSeconds();
  260. }
  261. sample.AddElement(totalTime);
  262. SemaphoreTestData::resetSemaphore();
  263. }
  264. // ------------------------------------------------------------------------------
  265. //
  266. void PerfTestThreadSemaphore(Results &results, EA::IO::FileStream* pPerformanceLog)
  267. {
  268. using namespace eastl;
  269. const int kNumSamples = 10;
  270. const int kNumTests = 7;
  271. vector<Sample> samples;
  272. for (int i = 0; i < kNumTests; ++i)
  273. samples.push_back(Sample(kNumSamples));
  274. for (int j = 0; j < kNumSamples; ++j)
  275. SemaphoreUncontendedPerfTest(samples[0], &SemaphoreTestSchedulerUncontendedWaitFunction, &SemaphoreTestSchedulerUncontendedWakeFunction, 0);
  276. AddRowToResults(results, samples[0], "Semaphore Wakeup Time");
  277. WriteToLogFile(pPerformanceLog, "Semaphore Wakeup Time,%g,%g\r\n", samples[0].GetMean(), samples[0].GetVariance()); // Execution is in a local context, so output the results to the log file.
  278. for (int j = 0; j < kNumSamples; ++j)
  279. SemaphoreContendedPerfTest(samples[1], &SemaphoreTestSchedulerContendedFunction, 1);
  280. AddRowToResults(results, samples[1], "Semaphore as Mutex");
  281. WriteToLogFile(pPerformanceLog, "Semaphore as Mutex,%g,%g\r\n", samples[1].GetMean(), samples[1].GetVariance());
  282. for (int j = 0; j < kNumSamples; ++j)
  283. SemaphoreContendedPerfTest(samples[2], &SemaphoreTestSchedulerContendedFunction, 5);
  284. AddRowToResults(results, samples[2], "Semaphore as 5-way Mutex");
  285. WriteToLogFile(pPerformanceLog, "Semaphore as 5-way Mutex,%g,%g\r\n", samples[2].GetMean(), samples[2].GetVariance());
  286. for (int j = 0; j < kNumSamples; ++j)
  287. ProducerConsumerTest(samples[3], &ProducerThreadFunction, &ConsumerThreadFunction, 1, false);
  288. AddRowToResults(results, samples[3], "1 P/1 C (1 Thread at once)");
  289. WriteToLogFile(pPerformanceLog, "1 P/1 C (1 Thread at once),%g,%g\r\n", samples[3].GetMean(), samples[3].GetVariance());
  290. for (int j = 0; j < kNumSamples; ++j)
  291. ProducerConsumerTest(samples[4], &ProducerThreadFunction, &ConsumerThreadFunction, 5, false);
  292. AddRowToResults(results, samples[4], "1 P/1 C (5 Threads at once)");
  293. WriteToLogFile(pPerformanceLog, "1 P/1 C (5 Threads at once),%g,%g\r\n", samples[4].GetMean(), samples[4].GetVariance());
  294. for (int j = 0; j < kNumSamples; ++j)
  295. ProducerConsumerTest(samples[5], &ProducerThreadFunction, &ConsumerThreadFunction, 1, true);
  296. AddRowToResults(results, samples[5], "1+ P/1+ C (1 Thread at once)");
  297. WriteToLogFile(pPerformanceLog, "1+ P/1+ C (1 Thread at once),%g,%g\r\n", samples[5].GetMean(), samples[5].GetVariance());
  298. for (int j = 0; j < kNumSamples; ++j)
  299. ProducerConsumerTest(samples[6], &ProducerThreadFunction, &ConsumerThreadFunction, 5, true);
  300. AddRowToResults(results, samples[6], "1+ P/1+ C (5 Threads at once)");
  301. WriteToLogFile(pPerformanceLog, "1+ P/1+ C (5 Threads at once),%g,%g\r\n", samples[6].GetMean(), samples[6].GetVariance());
  302. return;
  303. }