parallel_invoke.h 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460
  1. /*
  2. Copyright (c) 2005-2020 Intel Corporation
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. #ifndef __TBB_parallel_invoke_H
  14. #define __TBB_parallel_invoke_H
  15. #define __TBB_parallel_invoke_H_include_area
  16. #include "internal/_warning_suppress_enable_notice.h"
  17. #include "task.h"
  18. #include "tbb_profiling.h"
  19. #if __TBB_VARIADIC_PARALLEL_INVOKE
  20. #include <utility> // std::forward
  21. #endif
  22. namespace tbb {
  23. #if !__TBB_TASK_GROUP_CONTEXT
  24. /** Dummy to avoid cluttering the bulk of the header with enormous amount of ifdefs. **/
  25. struct task_group_context {
  26. task_group_context(tbb::internal::string_index){}
  27. };
  28. #endif /* __TBB_TASK_GROUP_CONTEXT */
  29. //! @cond INTERNAL
  30. namespace internal {
  31. // Simple task object, executing user method
  32. template<typename function>
  33. class function_invoker : public task{
  34. public:
  35. function_invoker(const function& _function) : my_function(_function) {}
  36. private:
  37. const function &my_function;
  38. task* execute() __TBB_override
  39. {
  40. my_function();
  41. return NULL;
  42. }
  43. };
  44. // The class spawns two or three child tasks
  45. template <size_t N, typename function1, typename function2, typename function3>
  46. class spawner : public task {
  47. private:
  48. const function1& my_func1;
  49. const function2& my_func2;
  50. const function3& my_func3;
  51. bool is_recycled;
  52. task* execute () __TBB_override {
  53. if(is_recycled){
  54. return NULL;
  55. }else{
  56. __TBB_ASSERT(N==2 || N==3, "Number of arguments passed to spawner is wrong");
  57. set_ref_count(N);
  58. recycle_as_safe_continuation();
  59. internal::function_invoker<function2>* invoker2 = new (allocate_child()) internal::function_invoker<function2>(my_func2);
  60. __TBB_ASSERT(invoker2, "Child task allocation failed");
  61. spawn(*invoker2);
  62. size_t n = N; // To prevent compiler warnings
  63. if (n>2) {
  64. internal::function_invoker<function3>* invoker3 = new (allocate_child()) internal::function_invoker<function3>(my_func3);
  65. __TBB_ASSERT(invoker3, "Child task allocation failed");
  66. spawn(*invoker3);
  67. }
  68. my_func1();
  69. is_recycled = true;
  70. return NULL;
  71. }
  72. } // execute
  73. public:
  74. spawner(const function1& _func1, const function2& _func2, const function3& _func3) : my_func1(_func1), my_func2(_func2), my_func3(_func3), is_recycled(false) {}
  75. };
  76. // Creates and spawns child tasks
  77. class parallel_invoke_helper : public empty_task {
  78. public:
  79. // Dummy functor class
  80. class parallel_invoke_noop {
  81. public:
  82. void operator() () const {}
  83. };
  84. // Creates a helper object with user-defined number of children expected
  85. parallel_invoke_helper(int number_of_children)
  86. {
  87. set_ref_count(number_of_children + 1);
  88. }
  89. #if __TBB_VARIADIC_PARALLEL_INVOKE
  90. void add_children() {}
  91. void add_children(tbb::task_group_context&) {}
  92. template <typename function>
  93. void add_children(function&& _func)
  94. {
  95. internal::function_invoker<function>* invoker = new (allocate_child()) internal::function_invoker<function>(std::forward<function>(_func));
  96. __TBB_ASSERT(invoker, "Child task allocation failed");
  97. spawn(*invoker);
  98. }
  99. template<typename function>
  100. void add_children(function&& _func, tbb::task_group_context&)
  101. {
  102. add_children(std::forward<function>(_func));
  103. }
  104. // Adds child(ren) task(s) and spawns them
  105. template <typename function1, typename function2, typename... function>
  106. void add_children(function1&& _func1, function2&& _func2, function&&... _func)
  107. {
  108. // The third argument is dummy, it is ignored actually.
  109. parallel_invoke_noop noop;
  110. typedef internal::spawner<2, function1, function2, parallel_invoke_noop> spawner_type;
  111. spawner_type & sub_root = *new(allocate_child()) spawner_type(std::forward<function1>(_func1), std::forward<function2>(_func2), noop);
  112. spawn(sub_root);
  113. add_children(std::forward<function>(_func)...);
  114. }
  115. #else
  116. // Adds child task and spawns it
  117. template <typename function>
  118. void add_children (const function &_func)
  119. {
  120. internal::function_invoker<function>* invoker = new (allocate_child()) internal::function_invoker<function>(_func);
  121. __TBB_ASSERT(invoker, "Child task allocation failed");
  122. spawn(*invoker);
  123. }
  124. // Adds a task with multiple child tasks and spawns it
  125. // two arguments
  126. template <typename function1, typename function2>
  127. void add_children (const function1& _func1, const function2& _func2)
  128. {
  129. // The third argument is dummy, it is ignored actually.
  130. parallel_invoke_noop noop;
  131. internal::spawner<2, function1, function2, parallel_invoke_noop>& sub_root = *new(allocate_child())internal::spawner<2, function1, function2, parallel_invoke_noop>(_func1, _func2, noop);
  132. spawn(sub_root);
  133. }
  134. // three arguments
  135. template <typename function1, typename function2, typename function3>
  136. void add_children (const function1& _func1, const function2& _func2, const function3& _func3)
  137. {
  138. internal::spawner<3, function1, function2, function3>& sub_root = *new(allocate_child())internal::spawner<3, function1, function2, function3>(_func1, _func2, _func3);
  139. spawn(sub_root);
  140. }
  141. #endif // __TBB_VARIADIC_PARALLEL_INVOKE
  142. // Waits for all child tasks
  143. template <typename F0>
  144. void run_and_finish(const F0& f0)
  145. {
  146. internal::function_invoker<F0>* invoker = new (allocate_child()) internal::function_invoker<F0>(f0);
  147. __TBB_ASSERT(invoker, "Child task allocation failed");
  148. spawn_and_wait_for_all(*invoker);
  149. }
  150. };
  151. // The class destroys root if exception occurred as well as in normal case
  152. class parallel_invoke_cleaner: internal::no_copy {
  153. public:
  154. #if __TBB_TASK_GROUP_CONTEXT
  155. parallel_invoke_cleaner(int number_of_children, tbb::task_group_context& context)
  156. : root(*new(task::allocate_root(context)) internal::parallel_invoke_helper(number_of_children))
  157. #else
  158. parallel_invoke_cleaner(int number_of_children, tbb::task_group_context&)
  159. : root(*new(task::allocate_root()) internal::parallel_invoke_helper(number_of_children))
  160. #endif /* !__TBB_TASK_GROUP_CONTEXT */
  161. {}
  162. ~parallel_invoke_cleaner(){
  163. root.destroy(root);
  164. }
  165. internal::parallel_invoke_helper& root;
  166. };
  167. #if __TBB_VARIADIC_PARALLEL_INVOKE
  168. // Determine whether the last parameter in a pack is task_group_context
  169. template<typename... T> struct impl_selector; // to workaround a GCC bug
  170. template<typename T1, typename... T> struct impl_selector<T1, T...> {
  171. typedef typename impl_selector<T...>::type type;
  172. };
  173. template<typename T> struct impl_selector<T> {
  174. typedef false_type type;
  175. };
  176. template<> struct impl_selector<task_group_context&> {
  177. typedef true_type type;
  178. };
  179. // Select task_group_context parameter from the back of a pack
  180. inline task_group_context& get_context( task_group_context& tgc ) { return tgc; }
  181. template<typename T1, typename... T>
  182. task_group_context& get_context( T1&& /*ignored*/, T&&... t )
  183. { return get_context( std::forward<T>(t)... ); }
  184. // task_group_context is known to be at the back of the parameter pack
  185. template<typename F0, typename F1, typename... F>
  186. void parallel_invoke_impl(true_type, F0&& f0, F1&& f1, F&&... f) {
  187. __TBB_STATIC_ASSERT(sizeof...(F)>0, "Variadic parallel_invoke implementation broken?");
  188. // # of child tasks: f0, f1, and a task for each two elements of the pack except the last
  189. const size_t number_of_children = 2 + sizeof...(F)/2;
  190. parallel_invoke_cleaner cleaner(number_of_children, get_context(std::forward<F>(f)...));
  191. parallel_invoke_helper& root = cleaner.root;
  192. root.add_children(std::forward<F>(f)...);
  193. root.add_children(std::forward<F1>(f1));
  194. root.run_and_finish(std::forward<F0>(f0));
  195. }
  196. // task_group_context is not in the pack, needs to be added
  197. template<typename F0, typename F1, typename... F>
  198. void parallel_invoke_impl(false_type, F0&& f0, F1&& f1, F&&... f) {
  199. tbb::task_group_context context(PARALLEL_INVOKE);
  200. // Add context to the arguments, and redirect to the other overload
  201. parallel_invoke_impl(true_type(), std::forward<F0>(f0), std::forward<F1>(f1), std::forward<F>(f)..., context);
  202. }
  203. #endif
  204. } // namespace internal
  205. //! @endcond
  206. /** \name parallel_invoke
  207. **/
  208. //@{
  209. //! Executes a list of tasks in parallel and waits for all tasks to complete.
  210. /** @ingroup algorithms */
  211. #if __TBB_VARIADIC_PARALLEL_INVOKE
  212. // parallel_invoke for two or more arguments via variadic templates
  213. // presence of task_group_context is defined automatically
  214. template<typename F0, typename F1, typename... F>
  215. void parallel_invoke(F0&& f0, F1&& f1, F&&... f) {
  216. typedef typename internal::impl_selector<internal::false_type, F...>::type selector_type;
  217. internal::parallel_invoke_impl(selector_type(), std::forward<F0>(f0), std::forward<F1>(f1), std::forward<F>(f)...);
  218. }
  219. #else
  220. // parallel_invoke with user-defined context
  221. // two arguments
  222. template<typename F0, typename F1 >
  223. void parallel_invoke(const F0& f0, const F1& f1, tbb::task_group_context& context) {
  224. internal::parallel_invoke_cleaner cleaner(2, context);
  225. internal::parallel_invoke_helper& root = cleaner.root;
  226. root.add_children(f1);
  227. root.run_and_finish(f0);
  228. }
  229. // three arguments
  230. template<typename F0, typename F1, typename F2 >
  231. void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, tbb::task_group_context& context) {
  232. internal::parallel_invoke_cleaner cleaner(3, context);
  233. internal::parallel_invoke_helper& root = cleaner.root;
  234. root.add_children(f2);
  235. root.add_children(f1);
  236. root.run_and_finish(f0);
  237. }
  238. // four arguments
  239. template<typename F0, typename F1, typename F2, typename F3>
  240. void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3,
  241. tbb::task_group_context& context)
  242. {
  243. internal::parallel_invoke_cleaner cleaner(4, context);
  244. internal::parallel_invoke_helper& root = cleaner.root;
  245. root.add_children(f3);
  246. root.add_children(f2);
  247. root.add_children(f1);
  248. root.run_and_finish(f0);
  249. }
  250. // five arguments
  251. template<typename F0, typename F1, typename F2, typename F3, typename F4 >
  252. void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3, const F4& f4,
  253. tbb::task_group_context& context)
  254. {
  255. internal::parallel_invoke_cleaner cleaner(3, context);
  256. internal::parallel_invoke_helper& root = cleaner.root;
  257. root.add_children(f4, f3);
  258. root.add_children(f2, f1);
  259. root.run_and_finish(f0);
  260. }
  261. // six arguments
  262. template<typename F0, typename F1, typename F2, typename F3, typename F4, typename F5>
  263. void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3, const F4& f4, const F5& f5,
  264. tbb::task_group_context& context)
  265. {
  266. internal::parallel_invoke_cleaner cleaner(3, context);
  267. internal::parallel_invoke_helper& root = cleaner.root;
  268. root.add_children(f5, f4, f3);
  269. root.add_children(f2, f1);
  270. root.run_and_finish(f0);
  271. }
  272. // seven arguments
  273. template<typename F0, typename F1, typename F2, typename F3, typename F4, typename F5, typename F6>
  274. void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3, const F4& f4,
  275. const F5& f5, const F6& f6,
  276. tbb::task_group_context& context)
  277. {
  278. internal::parallel_invoke_cleaner cleaner(3, context);
  279. internal::parallel_invoke_helper& root = cleaner.root;
  280. root.add_children(f6, f5, f4);
  281. root.add_children(f3, f2, f1);
  282. root.run_and_finish(f0);
  283. }
  284. // eight arguments
  285. template<typename F0, typename F1, typename F2, typename F3, typename F4,
  286. typename F5, typename F6, typename F7>
  287. void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3, const F4& f4,
  288. const F5& f5, const F6& f6, const F7& f7,
  289. tbb::task_group_context& context)
  290. {
  291. internal::parallel_invoke_cleaner cleaner(4, context);
  292. internal::parallel_invoke_helper& root = cleaner.root;
  293. root.add_children(f7, f6, f5);
  294. root.add_children(f4, f3);
  295. root.add_children(f2, f1);
  296. root.run_and_finish(f0);
  297. }
  298. // nine arguments
  299. template<typename F0, typename F1, typename F2, typename F3, typename F4,
  300. typename F5, typename F6, typename F7, typename F8>
  301. void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3, const F4& f4,
  302. const F5& f5, const F6& f6, const F7& f7, const F8& f8,
  303. tbb::task_group_context& context)
  304. {
  305. internal::parallel_invoke_cleaner cleaner(4, context);
  306. internal::parallel_invoke_helper& root = cleaner.root;
  307. root.add_children(f8, f7, f6);
  308. root.add_children(f5, f4, f3);
  309. root.add_children(f2, f1);
  310. root.run_and_finish(f0);
  311. }
  312. // ten arguments
  313. template<typename F0, typename F1, typename F2, typename F3, typename F4,
  314. typename F5, typename F6, typename F7, typename F8, typename F9>
  315. void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3, const F4& f4,
  316. const F5& f5, const F6& f6, const F7& f7, const F8& f8, const F9& f9,
  317. tbb::task_group_context& context)
  318. {
  319. internal::parallel_invoke_cleaner cleaner(4, context);
  320. internal::parallel_invoke_helper& root = cleaner.root;
  321. root.add_children(f9, f8, f7);
  322. root.add_children(f6, f5, f4);
  323. root.add_children(f3, f2, f1);
  324. root.run_and_finish(f0);
  325. }
  326. // two arguments
  327. template<typename F0, typename F1>
  328. void parallel_invoke(const F0& f0, const F1& f1) {
  329. task_group_context context(internal::PARALLEL_INVOKE);
  330. parallel_invoke<F0, F1>(f0, f1, context);
  331. }
  332. // three arguments
  333. template<typename F0, typename F1, typename F2>
  334. void parallel_invoke(const F0& f0, const F1& f1, const F2& f2) {
  335. task_group_context context(internal::PARALLEL_INVOKE);
  336. parallel_invoke<F0, F1, F2>(f0, f1, f2, context);
  337. }
  338. // four arguments
  339. template<typename F0, typename F1, typename F2, typename F3 >
  340. void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3) {
  341. task_group_context context(internal::PARALLEL_INVOKE);
  342. parallel_invoke<F0, F1, F2, F3>(f0, f1, f2, f3, context);
  343. }
  344. // five arguments
  345. template<typename F0, typename F1, typename F2, typename F3, typename F4>
  346. void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3, const F4& f4) {
  347. task_group_context context(internal::PARALLEL_INVOKE);
  348. parallel_invoke<F0, F1, F2, F3, F4>(f0, f1, f2, f3, f4, context);
  349. }
  350. // six arguments
  351. template<typename F0, typename F1, typename F2, typename F3, typename F4, typename F5>
  352. void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3, const F4& f4, const F5& f5) {
  353. task_group_context context(internal::PARALLEL_INVOKE);
  354. parallel_invoke<F0, F1, F2, F3, F4, F5>(f0, f1, f2, f3, f4, f5, context);
  355. }
  356. // seven arguments
  357. template<typename F0, typename F1, typename F2, typename F3, typename F4, typename F5, typename F6>
  358. void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3, const F4& f4,
  359. const F5& f5, const F6& f6)
  360. {
  361. task_group_context context(internal::PARALLEL_INVOKE);
  362. parallel_invoke<F0, F1, F2, F3, F4, F5, F6>(f0, f1, f2, f3, f4, f5, f6, context);
  363. }
  364. // eight arguments
  365. template<typename F0, typename F1, typename F2, typename F3, typename F4,
  366. typename F5, typename F6, typename F7>
  367. void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3, const F4& f4,
  368. const F5& f5, const F6& f6, const F7& f7)
  369. {
  370. task_group_context context(internal::PARALLEL_INVOKE);
  371. parallel_invoke<F0, F1, F2, F3, F4, F5, F6, F7>(f0, f1, f2, f3, f4, f5, f6, f7, context);
  372. }
  373. // nine arguments
  374. template<typename F0, typename F1, typename F2, typename F3, typename F4,
  375. typename F5, typename F6, typename F7, typename F8>
  376. void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3, const F4& f4,
  377. const F5& f5, const F6& f6, const F7& f7, const F8& f8)
  378. {
  379. task_group_context context(internal::PARALLEL_INVOKE);
  380. parallel_invoke<F0, F1, F2, F3, F4, F5, F6, F7, F8>(f0, f1, f2, f3, f4, f5, f6, f7, f8, context);
  381. }
  382. // ten arguments
  383. template<typename F0, typename F1, typename F2, typename F3, typename F4,
  384. typename F5, typename F6, typename F7, typename F8, typename F9>
  385. void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3, const F4& f4,
  386. const F5& f5, const F6& f6, const F7& f7, const F8& f8, const F9& f9)
  387. {
  388. task_group_context context(internal::PARALLEL_INVOKE);
  389. parallel_invoke<F0, F1, F2, F3, F4, F5, F6, F7, F8, F9>(f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, context);
  390. }
  391. #endif // __TBB_VARIADIC_PARALLEL_INVOKE
  392. //@}
  393. } // namespace
  394. #include "internal/_warning_suppress_disable_notice.h"
  395. #undef __TBB_parallel_invoke_H_include_area
  396. #endif /* __TBB_parallel_invoke_H */