Browse Source

Merge pull request #108655 from gongpha/manifold-76208dc

Update manifold to upstream commit 76208dc
Thaddeus Crews 3 months ago
parent
commit
2ef2e24582

+ 1 - 1
thirdparty/README.md

@@ -642,7 +642,7 @@ See `linuxbsd_headers/README.md`.
 ## manifold
 
 - Upstream: https://github.com/elalish/manifold
-- Version: 3.1.1 (2f4741e0b1de44d6d461b869e481351335340b44, 2025)
+- Version: git (76208dc02b069d2be50ed2d8a9279ee5622fa5fd, 2025)
 - License: Apache 2.0
 
 File extracted from upstream source:

+ 1 - 1
thirdparty/manifold/include/manifold/manifold.h

@@ -69,7 +69,7 @@ class CsgLeafNode;
  *
  * If you don't have merge vectors, you can create them with the Merge() method,
  * however this will fail if the mesh is not already manifold within the set
- * tolerance. For maximum reliablility, always store the merge vectors with the
+ * tolerance. For maximum reliability, always store the merge vectors with the
  * mesh, e.g. using the EXT_mesh_manifold extension in glTF.
  *
  * You can have any number of arbitrary floating-point properties per vertex,

+ 77 - 28
thirdparty/manifold/src/boolean3.cpp

@@ -15,7 +15,9 @@
 #include "boolean3.h"
 
 #include <limits>
+#include <unordered_set>
 
+#include "disjoint_sets.h"
 #include "parallel.h"
 
 #if (MANIFOLD_PAR == 1)
@@ -326,7 +328,6 @@ struct Kernel12Tmp {
 struct Kernel12Recorder {
   using Local = Kernel12Tmp;
   Kernel12& k12;
-  VecView<const TmpEdge> tmpedges;
   bool forward;
 
 #if MANIFOLD_PAR == 1
@@ -338,7 +339,6 @@ struct Kernel12Recorder {
 #endif
 
   void record(int queryIdx, int leafIdx, Local& tmp) {
-    queryIdx = tmpedges[queryIdx].halfedgeIdx;
     const auto [x12, v12] = k12(queryIdx, leafIdx);
     if (std::isfinite(v12[0])) {
       if (forward)
@@ -394,29 +394,30 @@ std::tuple<Vec<int>, Vec<vec3>> Intersect12(const Manifold::Impl& inP,
   Kernel11 k11{inP.vertPos_,  inQ.vertPos_, inP.halfedge_,
                inQ.halfedge_, expandP,      inP.vertNormal_};
 
-  Vec<TmpEdge> tmpedges = CreateTmpEdges(a.halfedge_);
-  Vec<Box> AEdgeBB(tmpedges.size());
-  for_each_n(autoPolicy(tmpedges.size(), 1e5), countAt(0), tmpedges.size(),
-             [&](const int e) {
-               AEdgeBB[e] = Box(a.vertPos_[tmpedges[e].first],
-                                a.vertPos_[tmpedges[e].second]);
-             });
   Kernel12 k12{a.halfedge_, b.halfedge_, a.vertPos_, forward, k02, k11};
-  Kernel12Recorder recorder{k12, tmpedges, forward, {}};
-
-  b.collider_.Collisions<false, Box, Kernel12Recorder>(AEdgeBB.cview(),
-                                                       recorder);
+  Kernel12Recorder recorder{k12, forward, {}};
+  auto f = [&a](int i) {
+    return a.halfedge_[i].IsForward()
+               ? Box(a.vertPos_[a.halfedge_[i].startVert],
+                     a.vertPos_[a.halfedge_[i].endVert])
+               : Box();
+  };
+  b.collider_.Collisions<false, decltype(f), Kernel12Recorder>(
+      f, a.halfedge_.size(), recorder);
 
   Kernel12Tmp result = recorder.get();
   p1q2 = std::move(result.p1q2_);
   auto x12 = std::move(result.x12_);
   auto v12 = std::move(result.v12_);
-  // sort p1q2
+  // sort p1q2 according to edges
   Vec<size_t> i12(p1q2.size());
   sequence(i12.begin(), i12.end());
+
+  int index = forward ? 0 : 1;
   stable_sort(i12.begin(), i12.end(), [&](int a, int b) {
-    return p1q2[a][0] < p1q2[b][0] ||
-           (p1q2[a][0] == p1q2[b][0] && p1q2[a][1] < p1q2[b][1]);
+    return p1q2[a][index] < p1q2[b][index] ||
+           (p1q2[a][index] == p1q2[b][index] &&
+            p1q2[a][1 - index] < p1q2[b][1 - index]);
   });
   Permute(p1q2, i12);
   Permute(x12, i12);
@@ -425,23 +426,70 @@ std::tuple<Vec<int>, Vec<vec3>> Intersect12(const Manifold::Impl& inP,
 };
 
 Vec<int> Winding03(const Manifold::Impl& inP, const Manifold::Impl& inQ,
-                   double expandP, bool forward) {
+                   const VecView<std::array<int, 2>> p1q2, double expandP,
+                   bool forward) {
   ZoneScoped;
-  // verts that are not shadowed (not in p0q2) have winding number zero.
-  // a: 0 (vertex), b: 2 (face)
   const Manifold::Impl& a = forward ? inP : inQ;
   const Manifold::Impl& b = forward ? inQ : inP;
+  Vec<int> brokenHalfedges;
+  int index = forward ? 0 : 1;
+
+  DisjointSets uA(a.vertPos_.size());
+  for_each(autoPolicy(a.halfedge_.size()), countAt(0),
+           countAt(a.halfedge_.size()), [&](int edge) {
+             const Halfedge& he = a.halfedge_[edge];
+             if (!he.IsForward()) return;
+             // check if the edge is broken
+             auto it = std::lower_bound(
+                 p1q2.begin(), p1q2.end(), edge,
+                 [index](const std::array<int, 2>& collisionPair, int e) {
+                   return collisionPair[index] < e;
+                 });
+             if (it == p1q2.end() || (*it)[index] != edge)
+               uA.unite(he.startVert, he.endVert);
+           });
+
+  // find components, the hope is the number of components should be small
+  std::unordered_set<int> components;
+#if (MANIFOLD_PAR == 1)
+  if (a.vertPos_.size() > 1e5) {
+    tbb::combinable<std::unordered_set<int>> componentsShared;
+    for_each(autoPolicy(a.vertPos_.size()), countAt(0),
+             countAt(a.vertPos_.size()),
+             [&](int v) { componentsShared.local().insert(uA.find(v)); });
+    componentsShared.combine_each([&](const std::unordered_set<int>& data) {
+      components.insert(data.begin(), data.end());
+    });
+  } else
+#endif
+  {
+    for (size_t v = 0; v < a.vertPos_.size(); v++)
+      components.insert(uA.find(v));
+  }
+  Vec<int> verts;
+  verts.reserve(components.size());
+  for (int c : components) verts.push_back(c);
+
   Vec<int> w03(a.NumVert(), 0);
   Kernel02 k02{a.vertPos_, b.halfedge_,     b.vertPos_,
                expandP,    inP.vertNormal_, forward};
-  auto f = [&](int a, int b) {
-    const auto [s02, z02] = k02(a, b);
-    if (std::isfinite(z02)) AtomicAdd(w03[a], s02 * (!forward ? -1 : 1));
+  auto recorderf = [&](int i, int b) {
+    const auto [s02, z02] = k02(verts[i], b);
+    if (std::isfinite(z02)) w03[verts[i]] += s02 * (!forward ? -1 : 1);
   };
-  auto recorder = MakeSimpleRecorder(f);
-  b.collider_.Collisions<false>(a.vertPos_.cview(), recorder);
+  auto recorder = MakeSimpleRecorder(recorderf);
+  auto f = [&](int i) { return a.vertPos_[verts[i]]; };
+  b.collider_.Collisions<false, decltype(f), decltype(recorder)>(
+      f, verts.size(), recorder);
+  // flood fill
+  for_each(autoPolicy(w03.size()), countAt(0), countAt(w03.size()),
+           [&](size_t i) {
+             size_t root = uA.find(i);
+             if (root == i) return;
+             w03[i] = w03[root];
+           });
   return w03;
-};
+}
 }  // namespace
 
 namespace manifold {
@@ -481,9 +529,10 @@ Boolean3::Boolean3(const Manifold::Impl& inP, const Manifold::Impl& inQ,
     return;
   }
 
-  // Sum up the winding numbers of all vertices.
-  w03_ = Winding03(inP, inQ, expandP_, true);
-  w30_ = Winding03(inP, inQ, expandP_, false);
+  // Compute winding numbers of all vertices using flood fill
+  // Vertices on the same connected component have the same winding number
+  w03_ = Winding03(inP, inQ, p1q2_, expandP_, true);
+  w30_ = Winding03(inP, inQ, p2q1_, expandP_, false);
 
 #ifdef MANIFOLD_DEBUG
   intersections.Stop();

+ 0 - 1
thirdparty/manifold/src/boolean3.h

@@ -43,7 +43,6 @@
  */
 
 namespace manifold {
-
 /** @ingroup Private */
 class Boolean3 {
  public:

+ 1 - 0
thirdparty/manifold/src/boolean_result.cpp

@@ -663,6 +663,7 @@ void CreateProperties(Manifold::Impl& outR, const Manifold::Impl& inP,
 }
 
 void ReorderHalfedges(VecView<Halfedge>& halfedges) {
+  ZoneScoped;
   // halfedges in the same face are added in non-deterministic order, so we have
   // to reorder them for determinism
 

+ 18 - 5
thirdparty/manifold/src/collider.h

@@ -157,9 +157,9 @@ struct CreateRadixTree {
   }
 };
 
-template <typename T, const bool selfCollision, typename Recorder>
+template <typename F, const bool selfCollision, typename Recorder>
 struct FindCollision {
-  VecView<const T> queries;
+  F& f;
   VecView<const Box> nodeBBox_;
   VecView<const std::pair<int, int>> internalChildren_;
   Recorder& recorder;
@@ -167,7 +167,7 @@ struct FindCollision {
   using Local = typename Recorder::Local;
 
   inline int RecordCollision(int node, const int queryIdx, Local& local) {
-    bool overlaps = nodeBBox_[node].DoesOverlap(queries[queryIdx]);
+    bool overlaps = nodeBBox_[node].DoesOverlap(f(queryIdx));
     if (overlaps && IsLeaf(node)) {
       int leafIdx = Node2Leaf(node);
       if (!selfCollision || leafIdx != queryIdx) {
@@ -324,12 +324,25 @@ class Collider {
     ZoneScoped;
     using collider_internal::FindCollision;
     if (internalChildren_.empty()) return;
+    auto f = [queriesIn](const int i) { return queriesIn[i]; };
     for_each_n(parallel ? autoPolicy(queriesIn.size(),
                                      collider_internal::kSequentialThreshold)
                         : ExecutionPolicy::Seq,
                countAt(0), queriesIn.size(),
-               FindCollision<T, selfCollision, Recorder>{
-                   queriesIn, nodeBBox_, internalChildren_, recorder});
+               FindCollision<decltype(f), selfCollision, Recorder>{
+                   f, nodeBBox_, internalChildren_, recorder});
+  }
+
+  template <const bool selfCollision = false, typename F, typename Recorder>
+  void Collisions(F f, int n, Recorder& recorder, bool parallel = true) const {
+    ZoneScoped;
+    using collider_internal::FindCollision;
+    if (internalChildren_.empty()) return;
+    for_each_n(parallel ? autoPolicy(n, collider_internal::kSequentialThreshold)
+                        : ExecutionPolicy::Seq,
+               countAt(0), n,
+               FindCollision<decltype(f), selfCollision, Recorder>{
+                   f, nodeBBox_, internalChildren_, recorder});
   }
 
   static uint32_t MortonCode(vec3 position, Box bBox) {

+ 3 - 2
thirdparty/manifold/src/constructors.cpp

@@ -13,6 +13,7 @@
 // limitations under the License.
 
 #include "csg_tree.h"
+#include "disjoint_sets.h"
 #include "impl.h"
 #include "manifold/manifold.h"
 #include "manifold/polygon.h"
@@ -466,11 +467,11 @@ Manifold Manifold::Compose(const std::vector<Manifold>& manifolds) {
  */
 std::vector<Manifold> Manifold::Decompose() const {
   ZoneScoped;
-  UnionFind<> uf(NumVert());
+  DisjointSets uf(NumVert());
   // Graph graph;
   auto pImpl_ = GetCsgLeafNode().GetImpl();
   for (const Halfedge& halfedge : pImpl_->halfedge_) {
-    if (halfedge.IsForward()) uf.unionXY(halfedge.startVert, halfedge.endVert);
+    if (halfedge.IsForward()) uf.unite(halfedge.startVert, halfedge.endVert);
   }
   std::vector<int> componentIndices;
   const int numComponents = uf.connectedComponents(componentIndices);

+ 4 - 2
thirdparty/manifold/src/cross_section/cross_section.cpp

@@ -78,6 +78,9 @@ C2::JoinType jt(CrossSection::JoinType jointype) {
     case CrossSection::JoinType::Miter:
       jt = C2::JoinType::Miter;
       break;
+    case CrossSection::JoinType::Bevel:
+      jt = C2::JoinType::Bevel;
+      break;
   };
   return jt;
 }
@@ -675,8 +678,7 @@ CrossSection CrossSection::Offset(double delta, JoinType jointype,
     // (radius) in order to get back the same number of segments in Clipper2:
     // steps_per_360 = PI / acos(1 - arc_tol / abs_delta)
     const double abs_delta = std::fabs(delta);
-    const double scaled_delta = abs_delta * std::pow(10, precision_);
-    arc_tol = (std::cos(Clipper2Lib::PI / n) - 1) * -scaled_delta;
+    arc_tol = (std::cos(Clipper2Lib::PI / n) - 1) * -abs_delta;
   }
   auto ps =
       C2::InflatePaths(GetPaths()->paths_, delta, jt(jointype),

+ 121 - 0
thirdparty/manifold/src/disjoint_sets.h

@@ -0,0 +1,121 @@
+// from https://github.com/wjakob/dset, changed to add connected component
+// computation
+//
+// Copyright (c) 2015 Wenzel Jakob <[email protected]>
+//
+// This software is provided 'as-is', without any express or implied
+// warranty.  In no event will the authors be held liable for any damages
+// arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must not
+// claim that you wrote the original software. If you use this software
+// in a product, an acknowledgment in the product documentation would be
+// appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must not be
+// misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source distribution.
+//
+#pragma once
+#include <atomic>
+#include <cstddef>
+#include <cstdint>
+#include <unordered_map>
+#include <vector>
+
+class DisjointSets {
+ public:
+  DisjointSets(uint32_t size) : mData(size) {
+    for (uint32_t i = 0; i < size; ++i) mData[i] = (uint32_t)i;
+  }
+
+  uint32_t find(uint32_t id) const {
+    while (id != parent(id)) {
+      uint64_t value = mData[id];
+      uint32_t new_parent = parent((uint32_t)value);
+      uint64_t new_value = (value & 0xFFFFFFFF00000000ULL) | new_parent;
+      /* Try to update parent (may fail, that's ok) */
+      if (value != new_value) mData[id].compare_exchange_weak(value, new_value);
+      id = new_parent;
+    }
+    return id;
+  }
+
+  bool same(uint32_t id1, uint32_t id2) const {
+    for (;;) {
+      id1 = find(id1);
+      id2 = find(id2);
+      if (id1 == id2) return true;
+      if (parent(id1) == id1) return false;
+    }
+  }
+
+  uint32_t unite(uint32_t id1, uint32_t id2) {
+    for (;;) {
+      id1 = find(id1);
+      id2 = find(id2);
+
+      if (id1 == id2) return id1;
+
+      uint32_t r1 = rank(id1), r2 = rank(id2);
+
+      if (r1 > r2 || (r1 == r2 && id1 < id2)) {
+        std::swap(r1, r2);
+        std::swap(id1, id2);
+      }
+
+      uint64_t oldEntry = ((uint64_t)r1 << 32) | id1;
+      uint64_t newEntry = ((uint64_t)r1 << 32) | id2;
+
+      if (!mData[id1].compare_exchange_strong(oldEntry, newEntry)) continue;
+
+      if (r1 == r2) {
+        oldEntry = ((uint64_t)r2 << 32) | id2;
+        newEntry = ((uint64_t)(r2 + 1) << 32) | id2;
+        /* Try to update the rank (may fail, retry if rank = 0) */
+        if (!mData[id2].compare_exchange_strong(oldEntry, newEntry) && r2 == 0)
+          continue;
+      }
+
+      break;
+    }
+    return id2;
+  }
+
+  uint32_t size() const { return (uint32_t)mData.size(); }
+
+  uint32_t rank(uint32_t id) const {
+    return ((uint32_t)(mData[id] >> 32)) & 0x7FFFFFFFu;
+  }
+
+  uint32_t parent(uint32_t id) const { return (uint32_t)mData[id]; }
+
+  int connectedComponents(std::vector<int>& components) {
+    components.resize(mData.size());
+    int lonelyNodes = 0;
+    std::unordered_map<uint32_t, int> toLabel;
+    for (size_t i = 0; i < mData.size(); ++i) {
+      // we optimize for connected component of size 1
+      // no need to put them into the hashmap
+      auto iParent = find(i);
+      if (rank(iParent) == 0) {
+        components[i] = static_cast<int>(toLabel.size()) + lonelyNodes++;
+        continue;
+      }
+      auto iter = toLabel.find(iParent);
+      if (iter == toLabel.end()) {
+        auto s = static_cast<uint32_t>(toLabel.size()) + lonelyNodes;
+        toLabel.insert(std::make_pair(iParent, s));
+        components[i] = s;
+      } else {
+        components[i] = iter->second;
+      }
+    }
+    return toLabel.size() + lonelyNodes;
+  }
+
+  mutable std::vector<std::atomic<uint64_t>> mData;
+};

+ 112 - 35
thirdparty/manifold/src/impl.cpp

@@ -21,6 +21,7 @@
 #include <optional>
 
 #include "csg_tree.h"
+#include "disjoint_sets.h"
 #include "hashtable.h"
 #include "manifold/optional_assert.h"
 #include "mesh_fixes.h"
@@ -135,10 +136,10 @@ struct UpdateMeshID {
 
 int GetLabels(std::vector<int>& components,
               const Vec<std::pair<int, int>>& edges, int numNodes) {
-  UnionFind<> uf(numNodes);
+  DisjointSets uf(numNodes);
   for (auto edge : edges) {
     if (edge.first == -1 || edge.second == -1) continue;
-    uf.unionXY(edge.first, edge.second);
+    uf.unite(edge.first, edge.second);
   }
 
   return uf.connectedComponents(components);
@@ -147,6 +148,10 @@ int GetLabels(std::vector<int>& components,
 
 namespace manifold {
 
+#if (MANIFOLD_PAR == 1)
+tbb::task_arena gc_arena(1, 1, tbb::task_arena::priority::low);
+#endif
+
 std::atomic<uint32_t> Manifold::Impl::meshIDCounter_(1);
 
 uint32_t Manifold::Impl::ReserveIDs(uint32_t n) {
@@ -311,6 +316,7 @@ void Manifold::Impl::DedupePropVerts() {
   for_each_n(autoPolicy(halfedge_.size(), 1e4), countAt(0), halfedge_.size(),
              [&vert2vert, numProp, this](const int edgeIdx) {
                const Halfedge edge = halfedge_[edgeIdx];
+               if (edge.pairedHalfedge < 0) return;
                const int edgeFace = edgeIdx / 3;
                const int pairFace = edge.pairedHalfedge / 3;
 
@@ -346,6 +352,39 @@ void Manifold::Impl::DedupePropVerts() {
 
 constexpr int kRemovedHalfedge = -2;
 
+struct HalfedgePairData {
+  int largeVert;
+  int tri;
+  int edgeIndex;
+
+  bool operator<(const HalfedgePairData& other) const {
+    return largeVert < other.largeVert ||
+           (largeVert == other.largeVert && tri < other.tri);
+  }
+};
+
+template <bool useProp, typename F>
+struct PrepHalfedges {
+  VecView<Halfedge> halfedges;
+  const VecView<ivec3> triProp;
+  const VecView<ivec3> triVert;
+  F& f;
+
+  void operator()(const int tri) {
+    const ivec3& props = triProp[tri];
+    for (const int i : {0, 1, 2}) {
+      const int j = Next3(i);
+      const int k = Next3(j);
+      const int e = 3 * tri + i;
+      const int v0 = useProp ? props[i] : triVert[tri][i];
+      const int v1 = useProp ? props[j] : triVert[tri][j];
+      DEBUG_ASSERT(v0 != v1, logicErr, "topological degeneracy");
+      halfedges[e] = {v0, v1, -1, props[i]};
+      f(e, v0, v1);
+    }
+  }
+};
+
 /**
  * Create the halfedge_ data structure from a list of triangles. If the optional
  * prop2vert array is missing, it's assumed these triangles are are pointing to
@@ -361,35 +400,77 @@ void Manifold::Impl::CreateHalfedges(const Vec<ivec3>& triProp,
   // drop the old value first to avoid copy
   halfedge_.clear(true);
   halfedge_.resize_nofill(numHalfedge);
-  Vec<uint64_t> edge(numHalfedge);
-  Vec<int> ids(numHalfedge);
   auto policy = autoPolicy(numTri, 1e5);
-  sequence(ids.begin(), ids.end());
-  for_each_n(policy, countAt(0), numTri,
-             [this, &edge, &triProp, &triVert](const int tri) {
-               const ivec3& props = triProp[tri];
-               for (const int i : {0, 1, 2}) {
-                 const int j = (i + 1) % 3;
-                 const int e = 3 * tri + i;
-                 const int v0 = triVert.empty() ? props[i] : triVert[tri][i];
-                 const int v1 = triVert.empty() ? props[j] : triVert[tri][j];
-                 DEBUG_ASSERT(v0 != v1, logicErr, "topological degeneracy");
-                 halfedge_[e] = {v0, v1, -1, props[i]};
-                 // Sort the forward halfedges in front of the backward ones
-                 // by setting the highest-order bit.
-                 edge[e] = uint64_t(v0 < v1 ? 1 : 0) << 63 |
-                           ((uint64_t)std::min(v0, v1)) << 32 |
-                           std::max(v0, v1);
-               }
-             });
-  // Stable sort is required here so that halfedges from the same face are
-  // paired together (the triangles were created in face order). In some
-  // degenerate situations the triangulator can add the same internal edge in
-  // two different faces, causing this edge to not be 2-manifold. These are
-  // fixed by duplicating verts in CleanupTopology.
-  stable_sort(ids.begin(), ids.end(), [&edge](const int& a, const int& b) {
-    return edge[a] < edge[b];
-  });
+
+  int vertCount = static_cast<int>(vertPos_.size());
+  Vec<int> ids(numHalfedge);
+  {
+    ZoneScopedN("PrepHalfedges");
+    if (vertCount < (1 << 18)) {
+      // For small vertex count, it is faster to just do sorting
+      Vec<uint64_t> edge(numHalfedge);
+      auto setEdge = [&edge](int e, int v0, int v1) {
+        edge[e] = static_cast<uint64_t>(v0 < v1 ? 1 : 0) << 63 |
+                  (static_cast<uint64_t>(std::min(v0, v1))) << 32 |
+                  static_cast<uint64_t>(std::max(v0, v1));
+      };
+      if (triVert.empty()) {
+        for_each_n(policy, countAt(0), numTri,
+                   PrepHalfedges<true, decltype(setEdge)>{halfedge_, triProp,
+                                                          triVert, setEdge});
+      } else {
+        for_each_n(policy, countAt(0), numTri,
+                   PrepHalfedges<false, decltype(setEdge)>{halfedge_, triProp,
+                                                           triVert, setEdge});
+      }
+      sequence(ids.begin(), ids.end());
+      stable_sort(ids.begin(), ids.end(), [&edge](const int& a, const int& b) {
+        return edge[a] < edge[b];
+      });
+    } else {
+      // For larger vertex count, we separate the ids into slices for halfedges
+      // with the same smaller vertex.
+      // We first copy them there (as HalfedgePairData), and then do sorting
+      // locally for each slice.
+      // This helps with memory locality, and is faster for larger meshes.
+      Vec<HalfedgePairData> entries(numHalfedge);
+      Vec<int> offsets(vertCount * 2, 0);
+      auto setOffset = [&offsets, vertCount](int _e, int v0, int v1) {
+        const int offset = v0 > v1 ? 0 : vertCount;
+        AtomicAdd(offsets[std::min(v0, v1) + offset], 1);
+      };
+      if (triVert.empty()) {
+        for_each_n(policy, countAt(0), numTri,
+                   PrepHalfedges<true, decltype(setOffset)>{
+                       halfedge_, triProp, triVert, setOffset});
+      } else {
+        for_each_n(policy, countAt(0), numTri,
+                   PrepHalfedges<false, decltype(setOffset)>{
+                       halfedge_, triProp, triVert, setOffset});
+      }
+      exclusive_scan(offsets.begin(), offsets.end(), offsets.begin());
+      for_each_n(policy, countAt(0), numTri,
+                 [this, &offsets, &entries, vertCount](const int tri) {
+                   for (const int i : {0, 1, 2}) {
+                     const int e = 3 * tri + i;
+                     const int v0 = halfedge_[e].startVert;
+                     const int v1 = halfedge_[e].endVert;
+                     const int offset = v0 > v1 ? 0 : vertCount;
+                     const int start = std::min(v0, v1);
+                     const int index = AtomicAdd(offsets[start + offset], 1);
+                     entries[index] = {std::max(v0, v1), tri, e};
+                   }
+                 });
+      for_each_n(policy, countAt(0), offsets.size(), [&](const int v) {
+        int start = v == 0 ? 0 : offsets[v - 1];
+        int end = offsets[v];
+        for (int i = start; i < end; ++i) ids[i] = i;
+        std::sort(ids.begin() + start, ids.begin() + end,
+                  [&entries](int a, int b) { return entries[a] < entries[b]; });
+        for (int i = start; i < end; ++i) ids[i] = entries[ids[i]].edgeIndex;
+      });
+    }
+  }
 
   // Mark opposed triangles for removal - this may strand unreferenced verts
   // which are removed later by RemoveUnreferencedVerts() and Finish().
@@ -415,7 +496,7 @@ void Manifold::Impl::CreateHalfedges(const Vec<ivec3>& triProp,
     }
     if (i + 1 == segmentEnd) return consecutiveStart;
     Halfedge& h1 = halfedge_[ids[i + 1]];
-    if (h0.startVert == h1.startVert && h0.endVert == h1.endVert)
+    if (h1.startVert == h0.startVert && h1.endVert == h0.endVert)
       return consecutiveStart;
     return i + 1;
   };
@@ -450,10 +531,6 @@ void Manifold::Impl::CreateHalfedges(const Vec<ivec3>& triProp,
   for (int i = 0; i < numEdge; ++i)
     consecutiveStart = body(i, consecutiveStart, numEdge);
 #endif
-
-  // Once sorted, the first half of the range is the forward halfedges, which
-  // correspond to their backward pair at the same offset in the second half
-  // of the range.
   for_each_n(policy, countAt(0), numEdge, [this, &ids, numEdge](int i) {
     const int pair0 = ids[i];
     const int pair1 = ids[i + numEdge];

+ 2 - 1
thirdparty/manifold/src/impl.h

@@ -167,7 +167,8 @@ struct Manifold::Impl {
       runIndex.push_back(runEnd);
     }
 
-    const auto startID = Impl::ReserveIDs(meshGL.runOriginalID.size());
+    const auto startID =
+        Impl::ReserveIDs(std::max(1_uz, meshGL.runOriginalID.size()));
     auto runOriginalID = meshGL.runOriginalID;
     if (runOriginalID.empty()) {
       runOriginalID.push_back(startID);

+ 2 - 1
thirdparty/manifold/src/polygon.cpp

@@ -553,7 +553,8 @@ class EarClip {
 
   // Apply func to each un-clipped vert in a polygon and return an un-clipped
   // vert.
-  VertItrC Loop(VertItr first, std::function<void(VertItr)> func) const {
+  template <typename F>
+  VertItrC Loop(VertItr first, F func) const {
     VertItr v = first;
     do {
       if (Clipped(v)) {

+ 5 - 4
thirdparty/manifold/src/sort.cpp

@@ -15,6 +15,7 @@
 #include <atomic>
 #include <set>
 
+#include "disjoint_sets.h"
 #include "impl.h"
 #include "parallel.h"
 #include "shared.h"
@@ -152,17 +153,17 @@ bool MergeMeshGLP(MeshGLP<Precision, I>& mesh) {
   Permute(openVerts, vertNew2Old);
 
   Collider collider(vertBox, vertMorton);
-  UnionFind<> uf(numVert);
+  DisjointSets uf(numVert);
 
   auto f = [&uf, &openVerts](int a, int b) {
-    return uf.unionXY(openVerts[a], openVerts[b]);
+    return uf.unite(openVerts[a], openVerts[b]);
   };
   auto recorder = MakeSimpleRecorder(f);
   collider.Collisions<true>(vertBox.cview(), recorder, false);
 
   for (size_t i = 0; i < mesh.mergeFromVert.size(); ++i) {
-    uf.unionXY(static_cast<int>(mesh.mergeFromVert[i]),
-               static_cast<int>(mesh.mergeToVert[i]));
+    uf.unite(static_cast<int>(mesh.mergeFromVert[i]),
+             static_cast<int>(mesh.mergeToVert[i]));
   }
 
   mesh.mergeToVert.clear();

+ 6 - 4
thirdparty/manifold/src/tree2d.cpp

@@ -37,14 +37,16 @@ namespace manifold {
 // Recursive sorting is not the most efficient, but simple and guaranteed to
 // result in a balanced tree.
 void BuildTwoDTreeImpl(VecView<PolyVert> points, bool sortX) {
-  using CmpFn = std::function<bool(const PolyVert&, const PolyVert&)>;
-  CmpFn cmpx = [](const PolyVert& a, const PolyVert& b) {
+  auto cmpx = [](const PolyVert& a, const PolyVert& b) {
     return a.pos.x < b.pos.x;
   };
-  CmpFn cmpy = [](const PolyVert& a, const PolyVert& b) {
+  auto cmpy = [](const PolyVert& a, const PolyVert& b) {
     return a.pos.y < b.pos.y;
   };
-  manifold::stable_sort(points.begin(), points.end(), sortX ? cmpx : cmpy);
+  if (sortX)
+    manifold::stable_sort(points.begin(), points.end(), cmpx);
+  else
+    manifold::stable_sort(points.begin(), points.end(), cmpy);
   if (points.size() < 2) return;
   BuildTwoDTreeImpl(points.view(0, points.size() / 2), !sortX);
   BuildTwoDTreeImpl(points.view(points.size() / 2 + 1), !sortX);

+ 1 - 1
thirdparty/manifold/src/tree2d.h

@@ -44,7 +44,7 @@ void QueryTwoDTree(VecView<PolyVert> points, Rect r, F f) {
   int stackPointer = 0;
 
   while (1) {
-    if (currentView.size() <= 2) {
+    if (currentView.size() <= 8) {
       for (const auto& p : currentView)
         if (r.Contains(p.pos)) f(p);
       if (--stackPointer < 0) break;

+ 0 - 58
thirdparty/manifold/src/utils.h

@@ -17,7 +17,6 @@
 #include <atomic>
 #include <memory>
 #include <mutex>
-#include <unordered_map>
 
 #include "manifold/common.h"
 #include "vec.h"
@@ -136,63 +135,6 @@ class ConcurrentSharedPtr {
       std::make_shared<std::recursive_mutex>();
 };
 
-template <typename I = int, typename R = unsigned char>
-struct UnionFind {
-  Vec<I> parents;
-  // we do union by rank
-  // note that we shift rank by 1, rank 0 means it is not connected to anything
-  // else
-  Vec<R> ranks;
-
-  UnionFind(I numNodes) : parents(numNodes), ranks(numNodes, 0) {
-    sequence(parents.begin(), parents.end());
-  }
-
-  I find(I x) {
-    while (parents[x] != x) {
-      parents[x] = parents[parents[x]];
-      x = parents[x];
-    }
-    return x;
-  }
-
-  void unionXY(I x, I y) {
-    if (x == y) return;
-    if (ranks[x] == 0) ranks[x] = 1;
-    if (ranks[y] == 0) ranks[y] = 1;
-    x = find(x);
-    y = find(y);
-    if (x == y) return;
-    if (ranks[x] < ranks[y]) std::swap(x, y);
-    if (ranks[x] == ranks[y]) ranks[x]++;
-    parents[y] = x;
-  }
-
-  I connectedComponents(std::vector<I>& components) {
-    components.resize(parents.size());
-    I lonelyNodes = 0;
-    std::unordered_map<I, I> toLabel;
-    for (size_t i = 0; i < parents.size(); ++i) {
-      // we optimize for connected component of size 1
-      // no need to put them into the hashmap
-      if (ranks[i] == 0) {
-        components[i] = static_cast<I>(toLabel.size()) + lonelyNodes++;
-        continue;
-      }
-      parents[i] = find(i);
-      auto iter = toLabel.find(parents[i]);
-      if (iter == toLabel.end()) {
-        I s = static_cast<I>(toLabel.size()) + lonelyNodes;
-        toLabel.insert(std::make_pair(parents[i], s));
-        components[i] = s;
-      } else {
-        components[i] = iter->second;
-      }
-    }
-    return toLabel.size() + lonelyNodes;
-  }
-};
-
 template <typename T>
 struct Identity {
   T operator()(T v) const { return v; }

+ 24 - 10
thirdparty/manifold/src/vec.h

@@ -26,6 +26,10 @@
 
 namespace manifold {
 
+#if (MANIFOLD_PAR == 1)
+extern tbb::task_arena gc_arena;
+#endif
+
 template <typename T>
 class Vec;
 
@@ -92,8 +96,7 @@ class Vec : public VecView<T> {
 
   ~Vec() {
     if (this->ptr_ != nullptr) {
-      TracyFreeS(this->ptr_, 3);
-      free(this->ptr_);
+      free_async(this->ptr_, capacity_);
     }
     this->ptr_ = nullptr;
     this->size_ = 0;
@@ -103,8 +106,7 @@ class Vec : public VecView<T> {
   Vec<T>& operator=(const Vec<T>& other) {
     if (&other == this) return *this;
     if (this->ptr_ != nullptr) {
-      TracyFreeS(this->ptr_, 3);
-      free(this->ptr_);
+      free_async(this->ptr_, capacity_);
     }
     this->size_ = other.size_;
     capacity_ = other.size_;
@@ -120,8 +122,7 @@ class Vec : public VecView<T> {
   Vec<T>& operator=(Vec<T>&& other) {
     if (&other == this) return *this;
     if (this->ptr_ != nullptr) {
-      TracyFreeS(this->ptr_, 3);
-      free(this->ptr_);
+      free_async(this->ptr_, capacity_);
     }
     this->size_ = other.size_;
     capacity_ = other.capacity_;
@@ -166,8 +167,7 @@ class Vec : public VecView<T> {
         manifold::copy(autoPolicy(this->size_), this->ptr_,
                        this->ptr_ + this->size_, newBuffer);
       if (this->ptr_ != nullptr) {
-        TracyFreeS(this->ptr_, 3);
-        free(this->ptr_);
+        free_async(this->ptr_, capacity_);
       }
       this->ptr_ = newBuffer;
       capacity_ = n;
@@ -208,8 +208,7 @@ class Vec : public VecView<T> {
       manifold::copy(this->ptr_, this->ptr_ + this->size_, newBuffer);
     }
     if (this->ptr_ != nullptr) {
-      TracyFreeS(this->ptr_, 3);
-      free(this->ptr_);
+      free_async(this->ptr_, capacity_);
     }
     this->ptr_ = newBuffer;
     capacity_ = this->size_;
@@ -221,5 +220,20 @@ class Vec : public VecView<T> {
   size_t capacity_ = 0;
 
   static_assert(std::is_trivially_destructible<T>::value);
+
+  static void free_async(T* ptr, size_t size) {
+    // Only do async free if the size is large, because otherwise we may be able
+    // to reuse the allocation, and the deallocation probably won't trigger
+    // munmap.
+    // Currently it is set to 64 pages (4kB page).
+    constexpr size_t ASYNC_FREE_THRESHOLD = 1 << 18;
+    TracyFreeS(ptr, 3);
+#if (MANIFOLD_PAR == 1)
+    if (size * sizeof(T) > ASYNC_FREE_THRESHOLD)
+      gc_arena.enqueue([ptr]() { free(ptr); });
+    else
+#endif
+      free(ptr);
+  }
 };
 }  // namespace manifold