Browse Source

passing tests

Alec Jacobson 1 month ago
parent
commit
e6a7331381
2 changed files with 70 additions and 56 deletions
  1. 65 53
      include/igl/parallel_for.h
  2. 5 3
      include/igl/predicates/find_intersections.cpp

+ 65 - 53
include/igl/parallel_for.h

@@ -250,6 +250,7 @@ inline bool igl::parallel_for(
   return parallel_for(loop_size, no_op, wrapper, no_op, min_parallel);
   return parallel_for(loop_size, no_op, wrapper, no_op, min_parallel);
 }
 }
 
 
+
 template<
 template<
   typename Index,
   typename Index,
   typename PreFunctionType,
   typename PreFunctionType,
@@ -262,14 +263,15 @@ inline bool igl::parallel_for(
   const AccumFunctionType & accum_func,
   const AccumFunctionType & accum_func,
   const size_t min_parallel)
   const size_t min_parallel)
 {
 {
-  assert(loop_size>=0);
-  if(loop_size==0) return false;
+  assert(loop_size >= 0);
+  if (loop_size == 0) return false;
 
 
-  // If we're already inside a ThreadPool worker, run serial to avoid deadlock
-  if(igl::internal::is_worker_thread())
+  // If we're already inside a ThreadPool worker, run serial to avoid nested
+  // deadlock with the global pool.
+  if (igl::internal::is_worker_thread())
   {
   {
     prep_func(1);
     prep_func(1);
-    for(Index i = 0; i < loop_size; ++i)
+    for (Index i = 0; i < loop_size; ++i)
     {
     {
       func(i, 0);
       func(i, 0);
     }
     }
@@ -283,76 +285,86 @@ inline bool igl::parallel_for(
   const size_t configured_threads = igl::default_num_threads();
   const size_t configured_threads = igl::default_num_threads();
 #endif
 #endif
 
 
-  if(loop_size < static_cast<Index>(min_parallel) || configured_threads <= 1)
+  if (loop_size < static_cast<Index>(min_parallel) || configured_threads <= 1)
   {
   {
-    // serial
+    // Serial fallback
     prep_func(1);
     prep_func(1);
-    for(Index i = 0; i < loop_size; ++i)
+    for (Index i = 0; i < loop_size; ++i)
     {
     {
       func(i, 0);
       func(i, 0);
     }
     }
     accum_func(0);
     accum_func(0);
     return false;
     return false;
   }
   }
-  else
-  {
-    // Use shared thread pool
-    auto & pool = igl::internal::ThreadPool::instance(configured_threads);
-    const size_t pool_threads = std::max<size_t>(1, pool.size());
 
 
-    // Keep semantics: prep called with number of potential threads
-    prep_func(pool_threads);
+  // --- Parallel branch using shared thread pool ---
 
 
-    // Number of logical jobs (chunks)
-    const size_t jobs = static_cast<size_t>(
-      std::min<Index>(loop_size, static_cast<Index>(pool_threads)));
+  auto & pool = igl::internal::ThreadPool::instance(configured_threads);
+  const size_t pool_threads = std::max<size_t>(1, pool.size());
 
 
-    struct SharedCounter
-    {
-      std::atomic<size_t> remaining;
-    };
+  // Match old semantics: prep called with number of *potential* threads.
+  prep_func(pool_threads);
 
 
-    auto counter = std::make_shared<SharedCounter>();
-    counter->remaining.store(jobs, std::memory_order_relaxed);
+  // Number of "logical jobs" (chunks of the index range).
+  const size_t jobs = static_cast<size_t>(
+    std::min<Index>(loop_size, static_cast<Index>(pool_threads)));
 
 
-    const Index total = loop_size;
-    const Index base  = total / static_cast<Index>(jobs);
-    const Index rem   = total % static_cast<Index>(jobs);
+  struct Group
+  {
+    std::mutex mutex;
+    std::condition_variable cv;
+    std::atomic<size_t> remaining;
+  };
 
 
-    for(size_t t = 0; t < jobs; ++t)
-    {
-      const Index start =
-        static_cast<Index>(t) * base
-        + std::min<Index>(static_cast<Index>(t), rem);
-      const Index end = start + base + (t < static_cast<size_t>(rem) ? 1 : 0);
+  auto group = std::make_shared<Group>();
+  group->remaining.store(jobs, std::memory_order_relaxed);
 
 
-      pool.enqueue([counter, &func, start, end, t]()
-      {
-        for(Index k = start; k < end; ++k)
-        {
-          func(k, t);
-        }
-        counter->remaining.fetch_sub(1, std::memory_order_acq_rel);
-      });
-    }
+  const Index total = loop_size;
+  const Index base  = total / static_cast<Index>(jobs);
+  const Index rem   = total % static_cast<Index>(jobs);
+
+  for (size_t t = 0; t < jobs; ++t)
+  {
+    const Index start =
+      static_cast<Index>(t) * base
+      + std::min<Index>(static_cast<Index>(t), rem);
 
 
-    // Wait until all jobs for this parallel_for are finished.
-    // Busy-wait with yield to avoid hammering a core.
-    while(counter->remaining.load(std::memory_order_acquire) != 0)
+    const Index end = start + base + (t < static_cast<size_t>(rem) ? 1 : 0);
+
+    pool.enqueue([group, &func, start, end, t]()
     {
     {
-      std::this_thread::yield();
-    }
+      // Each job processes its contiguous slice [start, end)
+      for (Index k = start; k < end; ++k)
+      {
+        func(k, t);
+      }
 
 
-    // Accumulate across all potential threads (like original impl)
-    for(size_t t = 0; t < pool_threads; ++t)
+      // Signal completion of this job.
+      if (group->remaining.fetch_sub(1, std::memory_order_acq_rel) == 1)
+      {
+        std::unique_lock<std::mutex> lock(group->mutex);
+        group->cv.notify_one();
+      }
+    });
+  }
+
+  // Wait for all jobs for this parallel_for call to finish.
+  {
+    std::unique_lock<std::mutex> lock(group->mutex);
+    group->cv.wait(lock, [&group]()
     {
     {
-      accum_func(t);
-    }
+      return group->remaining.load(std::memory_order_acquire) == 0;
+    });
+  }
 
 
-    return true;
+  // Accumulate across all potential threads (same as original implementation).
+  for (size_t t = 0; t < pool_threads; ++t)
+  {
+    accum_func(t);
   }
   }
-}
 
 
+  return true;
+}
 
 
 #endif
 #endif
 
 

+ 5 - 3
include/igl/predicates/find_intersections.cpp

@@ -52,11 +52,11 @@ IGL_INLINE bool igl::predicates::find_intersections(
   const bool self_test = (&V1 == &V2) && (&F1 == &F2);
   const bool self_test = (&V1 == &V2) && (&F1 == &F2);
   if(stinker){ printf("%s\n",self_test?"🍎&(V1,F1) == 🍎&(V2,F2)":"🍎≠🍊"); }
   if(stinker){ printf("%s\n",self_test?"🍎&(V1,F1) == 🍎&(V2,F2)":"🍎≠🍊"); }
 
 
+  std::atomic<bool> found_any(false);
   int num_if = 0;
   int num_if = 0;
-  // mutex
   std::mutex append_mutex;
   std::mutex append_mutex;
-  const auto append_intersection = 
-    [&IF,&CP,&num_if,&append_mutex]( const int f1, const int f2, const bool coplanar = false)
+  const auto append_intersection =
+  [&IF,&CP,&num_if,&append_mutex,&found_any](const int f1, const int f2, const bool coplanar = false)
   {
   {
     std::lock_guard<std::mutex> lock(append_mutex);
     std::lock_guard<std::mutex> lock(append_mutex);
     if(num_if >= IF.rows())
     if(num_if >= IF.rows())
@@ -67,8 +67,10 @@ IGL_INLINE bool igl::predicates::find_intersections(
     CP(num_if) = coplanar;
     CP(num_if) = coplanar;
     IF.row(num_if) << f1,f2;
     IF.row(num_if) << f1,f2;
     num_if++;
     num_if++;
+    found_any.store(true, std::memory_order_release);
   };
   };
 
 
+
   // Returns corner in ith face opposite of shared edge; -1 otherwise
   // Returns corner in ith face opposite of shared edge; -1 otherwise
   const auto shared_edge = [&F1](const int f, const int g)->int
   const auto shared_edge = [&F1](const int f, const int g)->int
   {
   {