cppspmd_flow.h 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590
  1. // Do not include this header directly.
  2. // Control flow functionality in common between all the headers.
  3. //
  4. // Copyright 2020-2021 Binomial LLC
  5. //
  6. // Licensed under the Apache License, Version 2.0 (the "License");
  7. // you may not use this file except in compliance with the License.
  8. // You may obtain a copy of the License at
  9. //
  10. // http://www.apache.org/licenses/LICENSE-2.0
  11. //
  12. // Unless required by applicable law or agreed to in writing, software
  13. // distributed under the License is distributed on an "AS IS" BASIS,
  14. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. // See the License for the specific language governing permissions and
  16. // limitations under the License.
  17. #ifdef _DEBUG
  18. CPPSPMD_FORCE_INLINE void spmd_kernel::check_masks()
  19. {
  20. assert(!any(andnot(m_kernel_exec, m_exec)));
  21. }
  22. #endif
  23. CPPSPMD_FORCE_INLINE void spmd_kernel::spmd_break()
  24. {
  25. #ifdef _DEBUG
  26. assert(m_in_loop);
  27. #endif
  28. m_exec = exec_mask::all_off();
  29. }
  30. CPPSPMD_FORCE_INLINE void spmd_kernel::spmd_continue()
  31. {
  32. #ifdef _DEBUG
  33. assert(m_in_loop);
  34. #endif
  35. // Kill any active lanes, and remember which lanes were active so we can re-enable them at the end of the loop body.
  36. m_continue_mask = m_continue_mask | m_exec;
  37. m_exec = exec_mask::all_off();
  38. }
  39. CPPSPMD_FORCE_INLINE void spmd_kernel::spmd_return()
  40. {
  41. // Permenantly kill all active lanes
  42. m_kernel_exec = andnot(m_exec, m_kernel_exec);
  43. m_exec = exec_mask::all_off();
  44. }
  45. template<typename UnmaskedBody>
  46. CPPSPMD_FORCE_INLINE void spmd_kernel::spmd_unmasked(const UnmaskedBody& unmaskedBody)
  47. {
  48. exec_mask orig_exec = m_exec, orig_kernel_exec = m_kernel_exec;
  49. m_kernel_exec = exec_mask::all_on();
  50. m_exec = exec_mask::all_on();
  51. unmaskedBody();
  52. m_kernel_exec = m_kernel_exec & orig_kernel_exec;
  53. m_exec = m_exec & orig_exec;
  54. check_masks();
  55. }
  56. struct scoped_unmasked_restorer
  57. {
  58. spmd_kernel *m_pKernel;
  59. exec_mask m_orig_exec, m_orig_kernel_exec;
  60. CPPSPMD_FORCE_INLINE scoped_unmasked_restorer(spmd_kernel *pKernel) :
  61. m_pKernel(pKernel),
  62. m_orig_exec(pKernel->m_exec),
  63. m_orig_kernel_exec(pKernel->m_kernel_exec)
  64. {
  65. pKernel->m_kernel_exec = exec_mask::all_on();
  66. pKernel->m_exec = exec_mask::all_on();
  67. }
  68. CPPSPMD_FORCE_INLINE ~scoped_unmasked_restorer()
  69. {
  70. m_pKernel->m_kernel_exec = m_pKernel->m_kernel_exec & m_orig_kernel_exec;
  71. m_pKernel->m_exec = m_pKernel->m_exec & m_orig_exec;
  72. m_pKernel->check_masks();
  73. }
  74. };
  75. #define SPMD_UNMASKED_BEGIN { scoped_unmasked_restorer _unmasked_restorer(this);
  76. #define SPMD_UNMASKED_END }
  77. #if 0
  78. template<typename SPMDKernel, typename... Args>
  79. CPPSPMD_FORCE_INLINE decltype(auto) spmd_kernel::spmd_call(Args&&... args)
  80. {
  81. SPMDKernel kernel;
  82. kernel.init(m_exec);
  83. return kernel._call(std::forward<Args>(args)...);
  84. }
  85. #else
  86. template<typename SPMDKernel, typename... Args>
  87. CPPSPMD_FORCE_INLINE void spmd_kernel::spmd_call(Args&&... args)
  88. {
  89. SPMDKernel kernel;
  90. kernel.init(m_exec);
  91. kernel._call(std::forward<Args>(args)...);
  92. }
  93. #endif
  94. CPPSPMD_FORCE_INLINE void spmd_kernel::spmd_if_break(const vbool& cond)
  95. {
  96. #ifdef _DEBUG
  97. assert(m_in_loop);
  98. #endif
  99. exec_mask cond_exec(cond);
  100. m_exec = andnot(m_exec & cond_exec, m_exec);
  101. check_masks();
  102. }
  103. // No SPMD breaks, continues, etc. allowed
  104. template<typename IfBody>
  105. CPPSPMD_FORCE_INLINE void spmd_kernel::spmd_sif(const vbool& cond, const IfBody& ifBody)
  106. {
  107. exec_mask im = m_exec & exec_mask(cond);
  108. if (any(im))
  109. {
  110. const exec_mask orig_exec = m_exec;
  111. m_exec = im;
  112. ifBody();
  113. m_exec = orig_exec;
  114. }
  115. }
  116. // No SPMD breaks, continues, etc. allowed
  117. template<typename IfBody, typename ElseBody>
  118. CPPSPMD_FORCE_INLINE void spmd_kernel::spmd_sifelse(const vbool& cond, const IfBody& ifBody, const ElseBody &elseBody)
  119. {
  120. const exec_mask orig_exec = m_exec;
  121. exec_mask im = m_exec & exec_mask(cond);
  122. if (any(im))
  123. {
  124. m_exec = im;
  125. ifBody();
  126. }
  127. exec_mask em = orig_exec & exec_mask(!cond);
  128. if (any(em))
  129. {
  130. m_exec = em;
  131. elseBody();
  132. }
  133. m_exec = orig_exec;
  134. }
  135. template<typename IfBody>
  136. CPPSPMD_FORCE_INLINE void spmd_kernel::spmd_if(const vbool& cond, const IfBody& ifBody)
  137. {
  138. exec_mask cond_exec(cond);
  139. exec_mask pre_if_exec = cond_exec & m_exec;
  140. if (any(pre_if_exec))
  141. {
  142. exec_mask unexecuted_lanes = andnot(cond_exec, m_exec);
  143. m_exec = pre_if_exec;
  144. ifBody();
  145. // Propagate any lanes that got disabled inside the if body into the exec mask outside the if body, but turn on any lanes that didn't execute inside the if body.
  146. m_exec = m_exec | unexecuted_lanes;
  147. check_masks();
  148. }
  149. }
  150. template<typename IfBody, typename ElseBody>
  151. CPPSPMD_FORCE_INLINE void spmd_kernel::spmd_ifelse(const vbool& cond, const IfBody& ifBody, const ElseBody& elseBody)
  152. {
  153. bool all_flag = false;
  154. exec_mask cond_exec(cond);
  155. {
  156. exec_mask pre_if_exec = cond_exec & m_exec;
  157. int mask = pre_if_exec.get_movemask();
  158. if (mask != 0)
  159. {
  160. all_flag = ((uint32_t)mask == m_exec.get_movemask());
  161. exec_mask unexecuted_lanes = andnot(cond_exec, m_exec);
  162. m_exec = pre_if_exec;
  163. ifBody();
  164. // Propagate any lanes that got disabled inside the if body into the exec mask outside the if body, but turn on any lanes that didn't execute inside the if body.
  165. m_exec = m_exec | unexecuted_lanes;
  166. check_masks();
  167. }
  168. }
  169. if (!all_flag)
  170. {
  171. exec_mask pre_if_exec = andnot(cond_exec, m_exec);
  172. if (any(pre_if_exec))
  173. {
  174. exec_mask unexecuted_lanes = cond_exec & m_exec;
  175. m_exec = pre_if_exec;
  176. ifBody();
  177. // Propagate any lanes that got disabled inside the if body into the exec mask outside the if body, but turn on any lanes that didn't execute inside the if body.
  178. m_exec = m_exec | unexecuted_lanes;
  179. check_masks();
  180. }
  181. }
  182. }
  183. struct scoped_exec_restorer
  184. {
  185. exec_mask *m_pMask;
  186. exec_mask m_prev_mask;
  187. CPPSPMD_FORCE_INLINE scoped_exec_restorer(exec_mask *pExec_mask) : m_pMask(pExec_mask), m_prev_mask(*pExec_mask) { }
  188. CPPSPMD_FORCE_INLINE ~scoped_exec_restorer() { *m_pMask = m_prev_mask; }
  189. };
  190. // Cannot use SPMD break, continue, or return inside "simple" if/else
  191. #define SPMD_SIF(cond) exec_mask CPPSPMD_GLUER2(_exec_temp, __LINE__)(m_exec & exec_mask(vbool(cond))); if (any(CPPSPMD_GLUER2(_exec_temp, __LINE__))) \
  192. { CPPSPMD::scoped_exec_restorer CPPSPMD_GLUER2(_exec_restore_, __LINE__)(&m_exec); m_exec = CPPSPMD_GLUER2(_exec_temp, __LINE__);
  193. #define SPMD_SELSE(cond) } exec_mask CPPSPMD_GLUER2(_exec_temp, __LINE__)(m_exec & exec_mask(!vbool(cond))); if (any(CPPSPMD_GLUER2(_exec_temp, __LINE__))) \
  194. { CPPSPMD::scoped_exec_restorer CPPSPMD_GLUER2(_exec_restore_, __LINE__)(&m_exec); m_exec = CPPSPMD_GLUER2(_exec_temp, __LINE__);
  195. #define SPMD_SENDIF }
  196. // Same as SPMD_SIF, except doesn't use a scoped object
  197. #define SPMD_SIF2(cond) exec_mask CPPSPMD_GLUER2(_exec_temp, __LINE__)(m_exec & exec_mask(vbool(cond))); if (any(CPPSPMD_GLUER2(_exec_temp, __LINE__))) \
  198. { exec_mask _orig_exec = m_exec; m_exec = CPPSPMD_GLUER2(_exec_temp, __LINE__);
  199. #define SPMD_SELSE2(cond) m_exec = _orig_exec; } exec_mask CPPSPMD_GLUER2(_exec_temp, __LINE__)(m_exec & exec_mask(!vbool(cond))); if (any(CPPSPMD_GLUER2(_exec_temp, __LINE__))) \
  200. { exec_mask _orig_exec = m_exec; m_exec = CPPSPMD_GLUER2(_exec_temp, __LINE__);
  201. #define SPMD_SEND_IF2 m_exec = _orig_exec; }
  202. // Same as SPMD_SIF(), except the if/else blocks are always executed
  203. #define SPMD_SAIF(cond) exec_mask CPPSPMD_GLUER2(_exec_temp, __LINE__)(m_exec & exec_mask(vbool(cond))); { CPPSPMD::scoped_exec_restorer CPPSPMD_GLUER2(_exec_restore_, __LINE__)(&m_exec); \
  204. m_exec = CPPSPMD_GLUER2(_exec_temp, __LINE__);
  205. #define SPMD_SAELSE(cond) } exec_mask CPPSPMD_GLUER2(_exec_temp, __LINE__)(m_exec & exec_mask(!vbool(cond))); { CPPSPMD::scoped_exec_restorer CPPSPMD_GLUER2(_exec_restore_, __LINE__)(&m_exec); \
  206. m_exec = CPPSPMD_GLUER2(_exec_temp, __LINE__);
  207. #define SPMD_SAENDIF }
  208. // Cannot use SPMD break, continue, or return inside sselect
  209. #define SPMD_SSELECT(var) do { vint_t _select_var = var; scoped_exec_restorer _orig_exec(&m_exec); exec_mask _select_executed(exec_mask::all_off());
  210. #define SPMD_SCASE(value) exec_mask CPPSPMD_GLUER2(_exec_temp, __LINE__)(_orig_exec.m_prev_mask & exec_mask(vbool(_select_var == (value)))); if (any(CPPSPMD_GLUER2(_exec_temp, __LINE__))) \
  211. { m_exec = CPPSPMD_GLUER2(_exec_temp, __LINE__); _select_executed = _select_executed | m_exec;
  212. //#define SPMD_SCASE_END if (_select_executed.get_movemask() == _orig_exec.m_prev_mask.get_movemask()) break; }
  213. #define SPMD_SCASE_END if (!any(_select_executed ^ _orig_exec.m_prev_mask)) break; }
  214. #define SPMD_SDEFAULT exec_mask _all_other_lanes(andnot(_select_executed, _orig_exec.m_prev_mask)); if (any(_all_other_lanes)) { m_exec = _all_other_lanes;
  215. #define SPMD_SDEFAULT_END }
  216. #define SPMD_SSELECT_END } while(0);
  217. // Same as SPMD_SSELECT, except all cases are executed.
  218. // Cannot use SPMD break, continue, or return inside sselect
  219. #define SPMD_SASELECT(var) do { vint_t _select_var = var; scoped_exec_restorer _orig_exec(&m_exec); exec_mask _select_executed(exec_mask::all_off());
  220. #define SPMD_SACASE(value) exec_mask CPPSPMD_GLUER2(_exec_temp, __LINE__)(_orig_exec.m_prev_mask & exec_mask(vbool(_select_var == (value)))); { m_exec = CPPSPMD_GLUER2(_exec_temp, __LINE__); \
  221. _select_executed = _select_executed | m_exec;
  222. #define SPMD_SACASE_END }
  223. #define SPMD_SADEFAULT exec_mask _all_other_lanes(andnot(_select_executed, _orig_exec.m_prev_mask)); { m_exec = _all_other_lanes;
  224. #define SPMD_SADEFAULT_END }
  225. #define SPMD_SASELECT_END } while(0);
  226. struct scoped_exec_restorer2
  227. {
  228. spmd_kernel *m_pKernel;
  229. exec_mask m_unexecuted_lanes;
  230. CPPSPMD_FORCE_INLINE scoped_exec_restorer2(spmd_kernel *pKernel, const vbool &cond) :
  231. m_pKernel(pKernel)
  232. {
  233. exec_mask cond_exec(cond);
  234. m_unexecuted_lanes = andnot(cond_exec, pKernel->m_exec);
  235. pKernel->m_exec = cond_exec & pKernel->m_exec;
  236. }
  237. CPPSPMD_FORCE_INLINE ~scoped_exec_restorer2()
  238. {
  239. m_pKernel->m_exec = m_pKernel->m_exec | m_unexecuted_lanes;
  240. m_pKernel->check_masks();
  241. }
  242. };
  243. #define SPMD_IF(cond) { CPPSPMD::scoped_exec_restorer2 CPPSPMD_GLUER2(_exec_restore2_, __LINE__)(this, vbool(cond)); if (any(m_exec)) {
  244. #define SPMD_ELSE(cond) } } { CPPSPMD::scoped_exec_restorer2 CPPSPMD_GLUER2(_exec_restore2_, __LINE__)(this, !vbool(cond)); if (any(m_exec)) {
  245. #define SPMD_END_IF } }
  246. // Same as SPMD_IF, except the conditional block is always executed.
  247. #define SPMD_AIF(cond) { CPPSPMD::scoped_exec_restorer2 CPPSPMD_GLUER2(_exec_restore2_, __LINE__)(this, vbool(cond)); {
  248. #define SPMD_AELSE(cond) } } { CPPSPMD::scoped_exec_restorer2 CPPSPMD_GLUER2(_exec_restore2_, __LINE__)(this, !vbool(cond)); {
  249. #define SPMD_AEND_IF } }
  250. class scoped_exec_saver
  251. {
  252. exec_mask m_exec, m_kernel_exec, m_continue_mask;
  253. spmd_kernel *m_pKernel;
  254. #ifdef _DEBUG
  255. bool m_in_loop;
  256. #endif
  257. public:
  258. inline scoped_exec_saver(spmd_kernel *pKernel) :
  259. m_exec(pKernel->m_exec), m_kernel_exec(pKernel->m_kernel_exec), m_continue_mask(pKernel->m_continue_mask),
  260. m_pKernel(pKernel)
  261. {
  262. #ifdef _DEBUG
  263. m_in_loop = pKernel->m_in_loop;
  264. #endif
  265. }
  266. inline ~scoped_exec_saver()
  267. {
  268. m_pKernel->m_exec = m_exec;
  269. m_pKernel->m_continue_mask = m_continue_mask;
  270. m_pKernel->m_kernel_exec = m_kernel_exec;
  271. #ifdef _DEBUG
  272. m_pKernel->m_in_loop = m_in_loop;
  273. m_pKernel->check_masks();
  274. #endif
  275. }
  276. };
  277. #define SPMD_BEGIN_CALL scoped_exec_saver CPPSPMD_GLUER2(_begin_call_scoped_exec_saver, __LINE__)(this); m_continue_mask = exec_mask::all_off();
  278. #define SPMD_BEGIN_CALL_ALL_LANES scoped_exec_saver CPPSPMD_GLUER2(_begin_call_scoped_exec_saver, __LINE__)(this); m_exec = exec_mask::all_on(); m_continue_mask = exec_mask::all_off();
  279. template<typename ForeachBody>
  280. CPPSPMD_FORCE_INLINE void spmd_kernel::spmd_foreach(int begin, int end, const ForeachBody& foreachBody)
  281. {
  282. if (begin == end)
  283. return;
  284. if (!any(m_exec))
  285. return;
  286. // We don't support iterating backwards.
  287. if (begin > end)
  288. std::swap(begin, end);
  289. exec_mask prev_continue_mask = m_continue_mask, prev_exec = m_exec;
  290. int total_full = (end - begin) / PROGRAM_COUNT;
  291. int total_partial = (end - begin) % PROGRAM_COUNT;
  292. lint_t loop_index = begin + program_index;
  293. const int total_loops = total_full + (total_partial ? 1 : 0);
  294. m_continue_mask = exec_mask::all_off();
  295. for (int i = 0; i < total_loops; i++)
  296. {
  297. int n = PROGRAM_COUNT;
  298. if ((i == (total_loops - 1)) && (total_partial))
  299. {
  300. exec_mask partial_mask = exec_mask(vint_t(total_partial) > vint_t(program_index));
  301. m_exec = m_exec & partial_mask;
  302. n = total_partial;
  303. }
  304. foreachBody(loop_index, n);
  305. m_exec = m_exec | m_continue_mask;
  306. if (!any(m_exec))
  307. break;
  308. m_continue_mask = exec_mask::all_off();
  309. check_masks();
  310. store_all(loop_index, loop_index + PROGRAM_COUNT);
  311. }
  312. m_exec = prev_exec & m_kernel_exec;
  313. m_continue_mask = prev_continue_mask;
  314. check_masks();
  315. }
  316. template<typename WhileCondBody, typename WhileBody>
  317. CPPSPMD_FORCE_INLINE void spmd_kernel::spmd_while(const WhileCondBody& whileCondBody, const WhileBody& whileBody)
  318. {
  319. exec_mask orig_exec = m_exec;
  320. exec_mask orig_continue_mask = m_continue_mask;
  321. m_continue_mask = exec_mask::all_off();
  322. #ifdef _DEBUG
  323. const bool prev_in_loop = m_in_loop;
  324. m_in_loop = true;
  325. #endif
  326. while(true)
  327. {
  328. exec_mask cond_exec = exec_mask(whileCondBody());
  329. m_exec = m_exec & cond_exec;
  330. if (!any(m_exec))
  331. break;
  332. whileBody();
  333. m_exec = m_exec | m_continue_mask;
  334. m_continue_mask = exec_mask::all_off();
  335. check_masks();
  336. }
  337. #ifdef _DEBUG
  338. m_in_loop = prev_in_loop;
  339. #endif
  340. m_exec = orig_exec & m_kernel_exec;
  341. m_continue_mask = orig_continue_mask;
  342. check_masks();
  343. }
  344. struct scoped_while_restorer
  345. {
  346. spmd_kernel *m_pKernel;
  347. exec_mask m_orig_exec, m_orig_continue_mask;
  348. #ifdef _DEBUG
  349. bool m_prev_in_loop;
  350. #endif
  351. CPPSPMD_FORCE_INLINE scoped_while_restorer(spmd_kernel *pKernel) :
  352. m_pKernel(pKernel),
  353. m_orig_exec(pKernel->m_exec),
  354. m_orig_continue_mask(pKernel->m_continue_mask)
  355. {
  356. pKernel->m_continue_mask.all_off();
  357. #ifdef _DEBUG
  358. m_prev_in_loop = pKernel->m_in_loop;
  359. pKernel->m_in_loop = true;
  360. #endif
  361. }
  362. CPPSPMD_FORCE_INLINE ~scoped_while_restorer()
  363. {
  364. m_pKernel->m_exec = m_orig_exec & m_pKernel->m_kernel_exec;
  365. m_pKernel->m_continue_mask = m_orig_continue_mask;
  366. #ifdef _DEBUG
  367. m_pKernel->m_in_loop = m_prev_in_loop;
  368. m_pKernel->check_masks();
  369. #endif
  370. }
  371. };
  372. #undef SPMD_WHILE
  373. #undef SPMD_WEND
  374. #define SPMD_WHILE(cond) { scoped_while_restorer CPPSPMD_GLUER2(_while_restore_, __LINE__)(this); while(true) { exec_mask CPPSPMD_GLUER2(cond_exec, __LINE__) = exec_mask(vbool(cond)); \
  375. m_exec = m_exec & CPPSPMD_GLUER2(cond_exec, __LINE__); if (!any(m_exec)) break;
  376. #define SPMD_WEND m_exec = m_exec | m_continue_mask; m_continue_mask = exec_mask::all_off(); check_masks(); } }
  377. // Nesting is not supported (although it will compile, but the results won't make much sense).
  378. #define SPMD_FOREACH(loop_var, bi, ei) if (((bi) != (ei)) && (any(m_exec))) { \
  379. scoped_while_restorer CPPSPMD_GLUER2(_while_restore_, __LINE__)(this); \
  380. uint32_t b = (uint32_t)(bi), e = (uint32_t)(ei); if ((b) > (e)) { std::swap(b, e); } const uint32_t total_full = ((e) - (b)) >> PROGRAM_COUNT_SHIFT, total_partial = ((e) - (b)) & (PROGRAM_COUNT - 1); \
  381. lint_t loop_var = program_index + (int)b; const uint32_t total_loops = total_full + (total_partial ? 1U : 0U); \
  382. for (uint32_t CPPSPMD_GLUER2(_foreach_counter, __LINE__) = 0; CPPSPMD_GLUER2(_foreach_counter, __LINE__) < total_loops; ++CPPSPMD_GLUER2(_foreach_counter, __LINE__)) { \
  383. if ((CPPSPMD_GLUER2(_foreach_counter, __LINE__) == (total_loops - 1)) && (total_partial)) { exec_mask partial_mask = exec_mask(vint_t((int)total_partial) > vint_t(program_index)); m_exec = m_exec & partial_mask; }
  384. #define SPMD_FOREACH_END(loop_var) m_exec = m_exec | m_continue_mask; if (!any(m_exec)) break; m_continue_mask = exec_mask::all_off(); check_masks(); store_all(loop_var, loop_var + PROGRAM_COUNT); } }
  385. // Okay to use spmd_continue or spmd_return, but not spmd_break
  386. #define SPMD_FOREACH_ACTIVE(index_var) int64_t index_var; { uint64_t _movemask = m_exec.get_movemask(); if (_movemask) { scoped_while_restorer CPPSPMD_GLUER2(_while_restore_, __LINE__)(this); \
  387. for (uint32_t _i = 0; _i < PROGRAM_COUNT; ++_i) { \
  388. if (_movemask & (1U << _i)) { \
  389. m_exec.enable_lane(_i); m_exec = m_exec & m_kernel_exec; \
  390. (index_var) = _i; \
  391. #define SPMD_FOREACH_ACTIVE_END } } } }
  392. // Okay to use spmd_continue, but not spmd_break/spmd_continue
  393. #define SPMD_FOREACH_UNIQUE_INT(index_var, var) { scoped_while_restorer CPPSPMD_GLUER2(_while_restore_, __LINE__)(this); \
  394. CPPSPMD_DECL(int_t, _vals[PROGRAM_COUNT]); store_linear_all(_vals, var); std::sort(_vals, _vals + PROGRAM_COUNT); \
  395. const int _n = (int)(std::unique(_vals, _vals + PROGRAM_COUNT) - _vals); \
  396. for (int _i = 0; _i < _n; ++_i) { int index_var = _vals[_i]; vbool cond = (vint_t(var) == vint_t(index_var)); m_exec = exec_mask(cond);
  397. #define SPMD_FOREACH_UNIQUE_INT_END } }
  398. struct scoped_simple_while_restorer
  399. {
  400. spmd_kernel* m_pKernel;
  401. exec_mask m_orig_exec;
  402. #ifdef _DEBUG
  403. bool m_prev_in_loop;
  404. #endif
  405. CPPSPMD_FORCE_INLINE scoped_simple_while_restorer(spmd_kernel* pKernel) :
  406. m_pKernel(pKernel),
  407. m_orig_exec(pKernel->m_exec)
  408. {
  409. #ifdef _DEBUG
  410. m_prev_in_loop = pKernel->m_in_loop;
  411. pKernel->m_in_loop = true;
  412. #endif
  413. }
  414. CPPSPMD_FORCE_INLINE ~scoped_simple_while_restorer()
  415. {
  416. m_pKernel->m_exec = m_orig_exec;
  417. #ifdef _DEBUG
  418. m_pKernel->m_in_loop = m_prev_in_loop;
  419. m_pKernel->check_masks();
  420. #endif
  421. }
  422. };
  423. // Cannot use SPMD break, continue, or return inside simple while
  424. #define SPMD_SWHILE(cond) { scoped_simple_while_restorer CPPSPMD_GLUER2(_while_restore_, __LINE__)(this); \
  425. while(true) { \
  426. exec_mask CPPSPMD_GLUER2(cond_exec, __LINE__) = exec_mask(vbool(cond)); m_exec = m_exec & CPPSPMD_GLUER2(cond_exec, __LINE__); if (!any(m_exec)) break;
  427. #define SPMD_SWEND } }
  428. // Cannot use SPMD break, continue, or return inside simple do
  429. #define SPMD_SDO { scoped_simple_while_restorer CPPSPMD_GLUER2(_while_restore_, __LINE__)(this); while(true) {
  430. #define SPMD_SEND_DO(cond) exec_mask CPPSPMD_GLUER2(cond_exec, __LINE__) = exec_mask(vbool(cond)); m_exec = m_exec & CPPSPMD_GLUER2(cond_exec, __LINE__); if (!any(m_exec)) break; } }
  431. #undef SPMD_FOR
  432. #undef SPMD_END_FOR
  433. #define SPMD_FOR(for_init, for_cond) { for_init; scoped_while_restorer CPPSPMD_GLUER2(_while_restore_, __LINE__)(this); while(true) { exec_mask CPPSPMD_GLUER2(cond_exec, __LINE__) = exec_mask(vbool(for_cond)); \
  434. m_exec = m_exec & CPPSPMD_GLUER2(cond_exec, __LINE__); if (!any(m_exec)) break;
  435. #define SPMD_END_FOR(for_inc) m_exec = m_exec | m_continue_mask; m_continue_mask = exec_mask::all_off(); check_masks(); for_inc; } }
  436. template<typename ForInitBody, typename ForCondBody, typename ForIncrBody, typename ForBody>
  437. CPPSPMD_FORCE_INLINE void spmd_kernel::spmd_for(const ForInitBody& forInitBody, const ForCondBody& forCondBody, const ForIncrBody& forIncrBody, const ForBody& forBody)
  438. {
  439. exec_mask orig_exec = m_exec;
  440. forInitBody();
  441. exec_mask orig_continue_mask = m_continue_mask;
  442. m_continue_mask = exec_mask::all_off();
  443. #ifdef _DEBUG
  444. const bool prev_in_loop = m_in_loop;
  445. m_in_loop = true;
  446. #endif
  447. while(true)
  448. {
  449. exec_mask cond_exec = exec_mask(forCondBody());
  450. m_exec = m_exec & cond_exec;
  451. if (!any(m_exec))
  452. break;
  453. forBody();
  454. m_exec = m_exec | m_continue_mask;
  455. m_continue_mask = exec_mask::all_off();
  456. check_masks();
  457. forIncrBody();
  458. }
  459. m_exec = orig_exec & m_kernel_exec;
  460. m_continue_mask = orig_continue_mask;
  461. #ifdef _DEBUG
  462. m_in_loop = prev_in_loop;
  463. check_masks();
  464. #endif
  465. }