parallel_for.h 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229
  1. // This file is part of libigl, a simple c++ geometry processing library.
  2. //
  3. // Copyright (C) 2016 Alec Jacobson <[email protected]>
  4. //
  5. // This Source Code Form is subject to the terms of the Mozilla Public License
  6. // v. 2.0. If a copy of the MPL was not distributed with this file, You can
  7. // obtain one at http://mozilla.org/MPL/2.0/.
  8. #ifndef IGL_PARALLEL_FOR_H
  9. #define IGL_PARALLEL_FOR_H
  10. #include "igl_inline.h"
  11. #include <functional>
  12. //#warning "Defining IGL_PARALLEL_FOR_FORCE_SERIAL"
  13. //#define IGL_PARALLEL_FOR_FORCE_SERIAL
  14. namespace igl
  15. {
  16. /// Functional implementation of a basic, open-mp style, parallel
  17. /// for loop. If the inner block of a for-loop can be rewritten/encapsulated in
  18. /// a single (anonymous/lambda) function call `func` so that the serial code
  19. /// looks like:
  20. ///
  21. /// \code{cpp}
  22. /// for(int i = 0;i<loop_size;i++)
  23. /// {
  24. /// func(i);
  25. /// }
  26. /// \endcode
  27. ///
  28. /// then `parallel_for(loop_size,func,min_parallel)` will use as many threads as
  29. /// available on the current hardware to parallelize this for loop so long as
  30. /// loop_size<min_parallel, otherwise it will just use a serial for loop.
  31. ///
  32. /// Often if your code looks like:
  33. ///
  34. /// \code{cpp}
  35. /// for(int i = 0;i<loop_size;i++)
  36. /// {
  37. /// …
  38. /// }
  39. /// \endcode
  40. ///
  41. /// Then you can make a minimal two-line change to parallelize it:
  42. ///
  43. /// \code{cpp}
  44. /// //for(int i = 0;i<loop_size;i++)
  45. /// parallel_for(loop_size,[&](int i)
  46. /// {
  47. /// …
  48. /// }
  49. /// ,1000);
  50. /// \endcode
  51. ///
  52. /// @param[in] loop_size number of iterations. I.e. for(int i = 0;i<loop_size;i++) ...
  53. /// @param[in] func function handle taking iteration index as only argument to compute
  54. /// inner block of for loop I.e. for(int i ...){ func(i); }
  55. /// @param[in] min_parallel min size of loop_size such that parallel (non-serial)
  56. /// thread pooling should be attempted {0}
  57. /// @return true iff thread pool was invoked
  58. template<typename Index, typename FunctionType >
  59. inline bool parallel_for(
  60. const Index loop_size,
  61. const FunctionType & func,
  62. const size_t min_parallel=0);
  63. /// Functional implementation of an open-mp style, parallel for loop with
  64. /// accumulation. For example, serial code separated into n chunks (each to be
  65. /// parallelized with a thread) might look like:
  66. ///
  67. /// \code{cpp}
  68. /// Eigen::VectorXd S;
  69. /// const auto & prep_func = [&S](int n){ S = Eigen:VectorXd::Zero(n); };
  70. /// const auto & func = [&X,&S](int i, int t){ S(t) += X(i); };
  71. /// const auto & accum_func = [&S,&sum](int t){ sum += S(t); };
  72. /// prep_func(n);
  73. /// for(int i = 0;i<loop_size;i++)
  74. /// {
  75. /// func(i,i%n);
  76. /// }
  77. /// double sum = 0;
  78. /// for(int t = 0;t<n;t++)
  79. /// {
  80. /// accum_func(t);
  81. /// }
  82. /// \endcode
  83. ///
  84. /// @param[in] loop_size number of iterations. I.e. for(int i = 0;i<loop_size;i++) ...
  85. /// @param[in] prep_func function handle taking n >= number of threads as only
  86. /// argument
  87. /// @param[in] func function handle taking iteration index i and thread id t as only
  88. /// arguments to compute inner block of for loop I.e.
  89. /// for(int i ...){ func(i,t); }
  90. /// @param[in] accum_func function handle taking thread index as only argument, to be
  91. /// called after all calls of func, e.g., for serial accumulation across
  92. /// all n (potential) threads, see n in description of prep_func.
  93. /// @param[in] min_parallel min size of loop_size such that parallel (non-serial)
  94. /// thread pooling should be attempted {0}
  95. /// @return true iff thread pool was invoked
  96. template<
  97. typename Index,
  98. typename PrepFunctionType,
  99. typename FunctionType,
  100. typename AccumFunctionType
  101. >
  102. inline bool parallel_for(
  103. const Index loop_size,
  104. const PrepFunctionType & prep_func,
  105. const FunctionType & func,
  106. const AccumFunctionType & accum_func,
  107. const size_t min_parallel=0);
  108. }
  109. // Implementation
  110. #include "default_num_threads.h"
  111. namespace igl {
  112. namespace internal {
  113. inline std::size_t & parallel_for_nesting_level()
  114. {
  115. // One counter *per thread*
  116. static thread_local std::size_t level = 0;
  117. return level;
  118. }
  119. }
  120. }
  121. #include <cmath>
  122. #include <cassert>
  123. #include <thread>
  124. #include <vector>
  125. #include <algorithm>
  126. template<typename Index, typename FunctionType >
  127. inline bool igl::parallel_for(
  128. const Index loop_size,
  129. const FunctionType & func,
  130. const size_t min_parallel)
  131. {
  132. // no op preparation/accumulation
  133. const auto & no_op = [](const size_t /*n/t*/){};
  134. // two-parameter wrapper ignoring thread id
  135. const auto & wrapper = [&func](Index i,size_t /*t*/){ func(i); };
  136. return parallel_for(loop_size,no_op,wrapper,no_op,min_parallel);
  137. }
  138. template<
  139. typename Index,
  140. typename PreFunctionType,
  141. typename FunctionType,
  142. typename AccumFunctionType>
  143. inline bool igl::parallel_for(
  144. const Index loop_size,
  145. const PreFunctionType & prep_func,
  146. const FunctionType & func,
  147. const AccumFunctionType & accum_func,
  148. const size_t min_parallel)
  149. {
  150. assert(loop_size>=0);
  151. if(loop_size==0) return false;
  152. #ifdef IGL_PARALLEL_FOR_FORCE_SERIAL
  153. const size_t nthreads = 1;
  154. #else
  155. const size_t nthreads = igl::default_num_threads();
  156. #endif
  157. // NEW: are we already inside a parallel_for worker?
  158. const bool nested = igl::internal::parallel_for_nesting_level() > 0;
  159. if(loop_size<min_parallel || nthreads<=1 || nested)
  160. {
  161. // serial
  162. prep_func(1);
  163. // (We do *not* change nesting_level here, so non-nested, purely serial
  164. // uses of parallel_for can still parallelize their own inner loops.)
  165. for(Index i = 0;i<loop_size;i++) func(i,0);
  166. accum_func(0);
  167. return false;
  168. }else
  169. {
  170. // Size of a slice for the range functions
  171. Index slice =
  172. std::max(
  173. (Index)std::round((loop_size+1)/static_cast<double>(nthreads)),(Index)1);
  174. // [Helper] Inner loop
  175. const auto & range = [&func](const Index k1, const Index k2, const size_t t)
  176. {
  177. // NEW: mark this thread as being in a parallel_for while running func
  178. auto & level = igl::internal::parallel_for_nesting_level();
  179. level++;
  180. for(Index k = k1; k < k2; k++) func(k,t);
  181. level--;
  182. };
  183. prep_func(nthreads);
  184. std::vector<std::thread> pool;
  185. pool.reserve(nthreads);
  186. Index i1 = 0;
  187. Index i2 = std::min(0 + slice, loop_size);
  188. {
  189. size_t t = 0;
  190. for (; t+1 < nthreads && i1 < loop_size; ++t)
  191. {
  192. pool.emplace_back(range, i1, i2, t);
  193. i1 = i2;
  194. i2 = std::min(i2 + slice, loop_size);
  195. }
  196. if (i1 < loop_size)
  197. {
  198. pool.emplace_back(range, i1, loop_size, t);
  199. }
  200. }
  201. for (std::thread &t : pool) if (t.joinable()) t.join();
  202. for(size_t t = 0;t<nthreads;t++)
  203. {
  204. accum_func(t);
  205. }
  206. return true;
  207. }
  208. }
  209. //#ifndef IGL_STATIC_LIBRARY
  210. //#include "parallel_for.cpp"
  211. //#endif
  212. #endif