// This file is part of libigl, a simple c++ geometry processing library. // // Copyright (C) 2016 Alec Jacobson // // This Source Code Form is subject to the terms of the Mozilla Public License // v. 2.0. If a copy of the MPL was not distributed with this file, You can // obtain one at http://mozilla.org/MPL/2.0/. #ifndef IGL_PARALLEL_FOR_H #define IGL_PARALLEL_FOR_H #include "igl_inline.h" #include //#warning "Defining IGL_PARALLEL_FOR_FORCE_SERIAL" //#define IGL_PARALLEL_FOR_FORCE_SERIAL namespace igl { /// Functional implementation of a basic, open-mp style, parallel /// for loop. If the inner block of a for-loop can be rewritten/encapsulated in /// a single (anonymous/lambda) function call `func` so that the serial code /// looks like: /// /// \code{cpp} /// for(int i = 0;i inline bool parallel_for( const Index loop_size, const FunctionType & func, const size_t min_parallel=0); /// Functional implementation of an open-mp style, parallel for loop with /// accumulation. For example, serial code separated into n chunks (each to be /// parallelized with a thread) might look like: /// /// \code{cpp} /// Eigen::VectorXd S; /// const auto & prep_func = [&S](int n){ S = Eigen:VectorXd::Zero(n); }; /// const auto & func = [&X,&S](int i, int t){ S(t) += X(i); }; /// const auto & accum_func = [&S,&sum](int t){ sum += S(t); }; /// prep_func(n); /// for(int i = 0;i= number of threads as only /// argument /// @param[in] func function handle taking iteration index i and thread id t as only /// arguments to compute inner block of for loop I.e. /// for(int i ...){ func(i,t); } /// @param[in] accum_func function handle taking thread index as only argument, to be /// called after all calls of func, e.g., for serial accumulation across /// all n (potential) threads, see n in description of prep_func. /// @param[in] min_parallel min size of loop_size such that parallel (non-serial) /// thread pooling should be attempted {0} /// @return true iff thread pool was invoked template< typename Index, typename PrepFunctionType, typename FunctionType, typename AccumFunctionType > inline bool parallel_for( const Index loop_size, const PrepFunctionType & prep_func, const FunctionType & func, const AccumFunctionType & accum_func, const size_t min_parallel=0); } // Implementation #include "default_num_threads.h" namespace igl { namespace internal { inline std::size_t & parallel_for_nesting_level() { // One counter *per thread* static thread_local std::size_t level = 0; return level; } } } #include #include #include #include #include template inline bool igl::parallel_for( const Index loop_size, const FunctionType & func, const size_t min_parallel) { // no op preparation/accumulation const auto & no_op = [](const size_t /*n/t*/){}; // two-parameter wrapper ignoring thread id const auto & wrapper = [&func](Index i,size_t /*t*/){ func(i); }; return parallel_for(loop_size,no_op,wrapper,no_op,min_parallel); } template< typename Index, typename PreFunctionType, typename FunctionType, typename AccumFunctionType> inline bool igl::parallel_for( const Index loop_size, const PreFunctionType & prep_func, const FunctionType & func, const AccumFunctionType & accum_func, const size_t min_parallel) { assert(loop_size>=0); if(loop_size==0) return false; #ifdef IGL_PARALLEL_FOR_FORCE_SERIAL const size_t nthreads = 1; #else const size_t nthreads = igl::default_num_threads(); #endif // NEW: are we already inside a parallel_for worker? const bool nested = igl::internal::parallel_for_nesting_level() > 0; if(loop_size(nthreads)),(Index)1); // [Helper] Inner loop const auto & range = [&func](const Index k1, const Index k2, const size_t t) { // NEW: mark this thread as being in a parallel_for while running func auto & level = igl::internal::parallel_for_nesting_level(); level++; for(Index k = k1; k < k2; k++) func(k,t); level--; }; prep_func(nthreads); std::vector pool; pool.reserve(nthreads); Index i1 = 0; Index i2 = std::min(0 + slice, loop_size); { size_t t = 0; for (; t+1 < nthreads && i1 < loop_size; ++t) { pool.emplace_back(range, i1, i2, t); i1 = i2; i2 = std::min(i2 + slice, loop_size); } if (i1 < loop_size) { pool.emplace_back(range, i1, loop_size, t); } } for (std::thread &t : pool) if (t.joinable()) t.join(); for(size_t t = 0;t