barrier.cpp 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302
  1. // ======================================================================== //
  2. // Copyright 2009-2017 Intel Corporation //
  3. // //
  4. // Licensed under the Apache License, Version 2.0 (the "License"); //
  5. // you may not use this file except in compliance with the License. //
  6. // You may obtain a copy of the License at //
  7. // //
  8. // http://www.apache.org/licenses/LICENSE-2.0 //
  9. // //
  10. // Unless required by applicable law or agreed to in writing, software //
  11. // distributed under the License is distributed on an "AS IS" BASIS, //
  12. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. //
  13. // See the License for the specific language governing permissions and //
  14. // limitations under the License. //
  15. // ======================================================================== //
  16. #include "barrier.h"
  17. #include "condition.h"
  18. #include "regression.h"
  19. #include "thread.h"
  20. #if defined (__WIN32__)
  21. #define WIN32_LEAN_AND_MEAN
  22. #include <windows.h>
  23. namespace embree
  24. {
  25. struct BarrierSysImplementation
  26. {
  27. __forceinline BarrierSysImplementation (size_t N)
  28. : i(0), enterCount(0), exitCount(0), barrierSize(0)
  29. {
  30. events[0] = CreateEvent(nullptr, TRUE, FALSE, nullptr);
  31. events[1] = CreateEvent(nullptr, TRUE, FALSE, nullptr);
  32. init(N);
  33. }
  34. __forceinline ~BarrierSysImplementation ()
  35. {
  36. CloseHandle(events[0]);
  37. CloseHandle(events[1]);
  38. }
  39. __forceinline void init(size_t N)
  40. {
  41. barrierSize = N;
  42. enterCount.store(N);
  43. exitCount.store(N);
  44. }
  45. __forceinline void wait()
  46. {
  47. /* every thread entering the barrier decrements this count */
  48. size_t i0 = i;
  49. size_t cnt0 = enterCount--;
  50. /* all threads except the last one are wait in the barrier */
  51. if (cnt0 > 1)
  52. {
  53. if (WaitForSingleObject(events[i0], INFINITE) != WAIT_OBJECT_0)
  54. THROW_RUNTIME_ERROR("WaitForSingleObjects failed");
  55. }
  56. /* the last thread starts all threads waiting at the barrier */
  57. else
  58. {
  59. i = 1-i;
  60. enterCount.store(barrierSize);
  61. if (SetEvent(events[i0]) == 0)
  62. THROW_RUNTIME_ERROR("SetEvent failed");
  63. }
  64. /* every thread leaving the barrier decrements this count */
  65. size_t cnt1 = exitCount--;
  66. /* the last thread that left the barrier resets the event again */
  67. if (cnt1 == 1)
  68. {
  69. exitCount.store(barrierSize);
  70. if (ResetEvent(events[i0]) == 0)
  71. THROW_RUNTIME_ERROR("ResetEvent failed");
  72. }
  73. }
  74. public:
  75. HANDLE events[2];
  76. atomic<size_t> i;
  77. atomic<size_t> enterCount;
  78. atomic<size_t> exitCount;
  79. size_t barrierSize;
  80. };
  81. }
  82. #else
  83. namespace embree
  84. {
  85. struct BarrierSysImplementation
  86. {
  87. __forceinline BarrierSysImplementation (size_t N)
  88. : count(0), barrierSize(0)
  89. {
  90. init(N);
  91. }
  92. __forceinline void init(size_t N)
  93. {
  94. assert(count == 0);
  95. count = 0;
  96. barrierSize = N;
  97. }
  98. __forceinline void wait()
  99. {
  100. mutex.lock();
  101. count++;
  102. if (count == barrierSize) {
  103. count = 0;
  104. cond.notify_all();
  105. mutex.unlock();
  106. return;
  107. }
  108. cond.wait(mutex);
  109. mutex.unlock();
  110. return;
  111. }
  112. public:
  113. MutexSys mutex;
  114. ConditionSys cond;
  115. volatile size_t count;
  116. volatile size_t barrierSize;
  117. };
  118. }
  119. #endif
  120. namespace embree
  121. {
  122. BarrierSys::BarrierSys (size_t N) {
  123. opaque = new BarrierSysImplementation(N);
  124. }
  125. BarrierSys::~BarrierSys () {
  126. delete (BarrierSysImplementation*) opaque;
  127. }
  128. void BarrierSys::init(size_t count) {
  129. ((BarrierSysImplementation*) opaque)->init(count);
  130. }
  131. void BarrierSys::wait() {
  132. ((BarrierSysImplementation*) opaque)->wait();
  133. }
  134. LinearBarrierActive::LinearBarrierActive (size_t N)
  135. : count0(nullptr), count1(nullptr), mode(0), flag0(0), flag1(0), threadCount(0)
  136. {
  137. if (N == 0) N = getNumberOfLogicalThreads();
  138. init(N);
  139. }
  140. LinearBarrierActive::~LinearBarrierActive()
  141. {
  142. delete[] count0;
  143. delete[] count1;
  144. }
  145. void LinearBarrierActive::init(size_t N)
  146. {
  147. if (threadCount != N) {
  148. threadCount = N;
  149. if (count0) delete[] count0; count0 = new unsigned char[N];
  150. if (count1) delete[] count1; count1 = new unsigned char[N];
  151. }
  152. mode = 0;
  153. flag0 = 0;
  154. flag1 = 0;
  155. for (size_t i=0; i<N; i++) count0[i] = 0;
  156. for (size_t i=0; i<N; i++) count1[i] = 0;
  157. }
  158. void LinearBarrierActive::wait (const size_t threadIndex)
  159. {
  160. if (mode == 0)
  161. {
  162. if (threadIndex == 0)
  163. {
  164. for (size_t i=0; i<threadCount; i++)
  165. count1[i] = 0;
  166. for (size_t i=1; i<threadCount; i++)
  167. {
  168. while (likely(count0[i] == 0))
  169. __pause_cpu();
  170. }
  171. mode = 1;
  172. flag1 = 0;
  173. __memory_barrier();
  174. flag0 = 1;
  175. }
  176. else
  177. {
  178. count0[threadIndex] = 1;
  179. {
  180. while (likely(flag0 == 0))
  181. __pause_cpu();
  182. }
  183. }
  184. }
  185. else
  186. {
  187. if (threadIndex == 0)
  188. {
  189. for (size_t i=0; i<threadCount; i++)
  190. count0[i] = 0;
  191. for (size_t i=1; i<threadCount; i++)
  192. {
  193. while (likely(count1[i] == 0))
  194. __pause_cpu();
  195. }
  196. mode = 0;
  197. flag0 = 0;
  198. __memory_barrier();
  199. flag1 = 1;
  200. }
  201. else
  202. {
  203. count1[threadIndex] = 1;
  204. {
  205. while (likely(flag1 == 0))
  206. __pause_cpu();
  207. }
  208. }
  209. }
  210. }
  211. struct barrier_sys_regression_test : public RegressionTest
  212. {
  213. BarrierSys barrier;
  214. std::atomic<size_t> threadID;
  215. std::atomic<size_t> numFailed;
  216. std::vector<size_t> threadResults;
  217. barrier_sys_regression_test()
  218. : RegressionTest("barrier_sys_regression_test"), threadID(0), numFailed(0)
  219. {
  220. registerRegressionTest(this);
  221. }
  222. static void thread_alloc(barrier_sys_regression_test* This)
  223. {
  224. size_t tid = This->threadID++;
  225. for (size_t j=0; j<1000; j++)
  226. {
  227. This->barrier.wait();
  228. This->threadResults[tid] = tid;
  229. This->barrier.wait();
  230. }
  231. }
  232. bool run ()
  233. {
  234. threadID.store(0);
  235. numFailed.store(0);
  236. size_t numThreads = getNumberOfLogicalThreads();
  237. threadResults.resize(numThreads);
  238. barrier.init(numThreads+1);
  239. /* create threads */
  240. std::vector<thread_t> threads;
  241. for (size_t i=0; i<numThreads; i++)
  242. threads.push_back(createThread((thread_func)thread_alloc,this));
  243. /* run test */
  244. for (size_t i=0; i<1000; i++)
  245. {
  246. for (size_t i=0; i<numThreads; i++) threadResults[i] = 0;
  247. barrier.wait();
  248. barrier.wait();
  249. for (size_t i=0; i<numThreads; i++) numFailed += threadResults[i] != i;
  250. }
  251. /* destroy threads */
  252. for (size_t i=0; i<numThreads; i++)
  253. join(threads[i]);
  254. return numFailed == 0;
  255. }
  256. };
  257. barrier_sys_regression_test barrier_sys_regression_test;
  258. }