Ver Fonte

Add csg boolean operators using elalish/manifold.

Uses MeshGL64 for more floating point precision.

Co-Authored-By: 31 <[email protected]>
Co-Authored-By: Claudio Z <[email protected]>
K. S. Ernest (iFire) Lee há 3 anos atrás
pai
commit
fda444bb01
45 ficheiros alterados com 20001 adições e 1624 exclusões
  1. 5 0
      COPYRIGHT.txt
  2. 36 1
      modules/csg/SCsub
  3. 3 1425
      modules/csg/csg.cpp
  4. 8 140
      modules/csg/csg.h
  5. 195 58
      modules/csg/csg_shape.cpp
  6. 12 0
      thirdparty/README.md
  7. 10 0
      thirdparty/manifold/AUTHORS
  8. 201 0
      thirdparty/manifold/LICENSE
  9. 650 0
      thirdparty/manifold/include/manifold/common.h
  10. 314 0
      thirdparty/manifold/include/manifold/iters.h
  11. 2601 0
      thirdparty/manifold/include/manifold/linalg.h
  12. 435 0
      thirdparty/manifold/include/manifold/manifold.h
  13. 66 0
      thirdparty/manifold/include/manifold/optional_assert.h
  14. 62 0
      thirdparty/manifold/include/manifold/polygon.h
  15. 151 0
      thirdparty/manifold/include/manifold/vec_view.h
  16. 599 0
      thirdparty/manifold/src/boolean3.cpp
  17. 60 0
      thirdparty/manifold/src/boolean3.h
  18. 889 0
      thirdparty/manifold/src/boolean_result.cpp
  19. 382 0
      thirdparty/manifold/src/collider.h
  20. 503 0
      thirdparty/manifold/src/constructors.cpp
  21. 789 0
      thirdparty/manifold/src/cross_section/cross_section.cpp
  22. 643 0
      thirdparty/manifold/src/csg_tree.cpp
  23. 108 0
      thirdparty/manifold/src/csg_tree.h
  24. 696 0
      thirdparty/manifold/src/edge_op.cpp
  25. 319 0
      thirdparty/manifold/src/face_op.cpp
  26. 168 0
      thirdparty/manifold/src/hashtable.h
  27. 686 0
      thirdparty/manifold/src/impl.cpp
  28. 352 0
      thirdparty/manifold/src/impl.h
  29. 1038 0
      thirdparty/manifold/src/manifold.cpp
  30. 65 0
      thirdparty/manifold/src/mesh_fixes.h
  31. 1125 0
      thirdparty/manifold/src/parallel.h
  32. 1010 0
      thirdparty/manifold/src/polygon.cpp
  33. 387 0
      thirdparty/manifold/src/properties.cpp
  34. 860 0
      thirdparty/manifold/src/quickhull.cpp
  35. 288 0
      thirdparty/manifold/src/quickhull.h
  36. 533 0
      thirdparty/manifold/src/sdf.cpp
  37. 219 0
      thirdparty/manifold/src/shared.h
  38. 1003 0
      thirdparty/manifold/src/smoothing.cpp
  39. 517 0
      thirdparty/manifold/src/sort.cpp
  40. 225 0
      thirdparty/manifold/src/sparse.h
  41. 809 0
      thirdparty/manifold/src/subdivision.cpp
  42. 308 0
      thirdparty/manifold/src/svd.h
  43. 225 0
      thirdparty/manifold/src/tri_dist.h
  44. 227 0
      thirdparty/manifold/src/utils.h
  45. 219 0
      thirdparty/manifold/src/vec.h

+ 5 - 0
COPYRIGHT.txt

@@ -334,6 +334,11 @@ Comment: WebP codec
 Copyright: 2010, Google Inc.
 License: BSD-3-clause
 
+Files: ./thirdparty/manifold/
+Comment: Manifold
+Copyright: 2020-2024, The Manifold Authors
+License: Apache-2.0
+
 Files: ./thirdparty/mbedtls/
 Comment: Mbed TLS
 Copyright: The Mbed TLS Contributors

+ 36 - 1
modules/csg/SCsub

@@ -6,7 +6,42 @@ Import("env_modules")
 
 env_csg = env_modules.Clone()
 
-# Godot source files
+env_csg.Append(CPPDEFINES=("MANIFOLD_PAR", "-1"))
+
+# Thirdparty source files
+
+thirdparty_obj = []
+
+thirdparty_dir = "#thirdparty/manifold/"
+thirdparty_sources = [
+    "src/boolean_result.cpp",
+    "src/boolean3.cpp",
+    "src/constructors.cpp",
+    "src/csg_tree.cpp",
+    "src/edge_op.cpp",
+    "src/face_op.cpp",
+    "src/impl.cpp",
+    "src/manifold.cpp",
+    "src/polygon.cpp",
+    "src/properties.cpp",
+    "src/quickhull.cpp",
+    "src/smoothing.cpp",
+    "src/sort.cpp",
+    "src/subdivision.cpp",
+]
+
+thirdparty_sources = [thirdparty_dir + file for file in thirdparty_sources]
+env_csg.Prepend(
+    CPPPATH=[
+        thirdparty_dir + "include",
+    ]
+)
+env_thirdparty = env_csg.Clone()
+env_thirdparty.disable_warnings()
+env_thirdparty.add_source_files(thirdparty_obj, thirdparty_sources)
+env.modules_sources += thirdparty_obj
+
+# Godot's own source files
 env_csg.add_source_files(env.modules_sources, "*.cpp")
 if env.editor_build:
     env_csg.add_source_files(env.modules_sources, "editor/*.cpp")

+ 3 - 1425
modules/csg/csg.cpp

@@ -33,167 +33,13 @@
 #include "core/math/geometry_2d.h"
 #include "core/math/math_funcs.h"
 #include "core/templates/sort_array.h"
+#include "scene/resources/mesh_data_tool.h"
+#include "scene/resources/surface_tool.h"
 
-// Static helper functions.
-
-inline static bool is_snapable(const Vector3 &p_point1, const Vector3 &p_point2, real_t p_distance) {
-	return p_point2.distance_squared_to(p_point1) < p_distance * p_distance;
-}
-
-inline static Vector2 interpolate_segment_uv(const Vector2 p_segment_points[2], const Vector2 p_uvs[2], const Vector2 &p_interpolation_point) {
-	if (p_segment_points[0].is_equal_approx(p_segment_points[1])) {
-		return p_uvs[0];
-	}
-
-	float segment_length = p_segment_points[0].distance_to(p_segment_points[1]);
-	float distance = p_segment_points[0].distance_to(p_interpolation_point);
-	float fraction = distance / segment_length;
-
-	return p_uvs[0].lerp(p_uvs[1], fraction);
-}
-
-inline static Vector2 interpolate_triangle_uv(const Vector2 p_vertices[3], const Vector2 p_uvs[3], const Vector2 &p_interpolation_point) {
-	if (p_interpolation_point.is_equal_approx(p_vertices[0])) {
-		return p_uvs[0];
-	}
-	if (p_interpolation_point.is_equal_approx(p_vertices[1])) {
-		return p_uvs[1];
-	}
-	if (p_interpolation_point.is_equal_approx(p_vertices[2])) {
-		return p_uvs[2];
-	}
-
-	Vector2 edge1 = p_vertices[1] - p_vertices[0];
-	Vector2 edge2 = p_vertices[2] - p_vertices[0];
-	Vector2 interpolation = p_interpolation_point - p_vertices[0];
-
-	float edge1_on_edge1 = edge1.dot(edge1);
-	float edge1_on_edge2 = edge1.dot(edge2);
-	float edge2_on_edge2 = edge2.dot(edge2);
-	float inter_on_edge1 = interpolation.dot(edge1);
-	float inter_on_edge2 = interpolation.dot(edge2);
-	float scale = (edge1_on_edge1 * edge2_on_edge2 - edge1_on_edge2 * edge1_on_edge2);
-	if (scale == 0) {
-		return p_uvs[0];
-	}
-
-	float v = (edge2_on_edge2 * inter_on_edge1 - edge1_on_edge2 * inter_on_edge2) / scale;
-	float w = (edge1_on_edge1 * inter_on_edge2 - edge1_on_edge2 * inter_on_edge1) / scale;
-	float u = 1.0f - v - w;
-
-	return p_uvs[0] * u + p_uvs[1] * v + p_uvs[2] * w;
-}
-
-static inline bool ray_intersects_triangle(const Vector3 &p_from, const Vector3 &p_dir, const Vector3 p_vertices[3], float p_tolerance, Vector3 &r_intersection_point) {
-	Vector3 edge1 = p_vertices[1] - p_vertices[0];
-	Vector3 edge2 = p_vertices[2] - p_vertices[0];
-	Vector3 h = p_dir.cross(edge2);
-	real_t a = edge1.dot(h);
-	// Check if ray is parallel to triangle.
-	if (Math::is_zero_approx(a)) {
-		return false;
-	}
-	real_t f = 1.0 / a;
-
-	Vector3 s = p_from - p_vertices[0];
-	real_t u = f * s.dot(h);
-	if (u < 0.0 - p_tolerance || u > 1.0 + p_tolerance) {
-		return false;
-	}
-
-	Vector3 q = s.cross(edge1);
-	real_t v = f * p_dir.dot(q);
-	if (v < 0.0 - p_tolerance || u + v > 1.0 + p_tolerance) {
-		return false;
-	}
-
-	// Ray intersects triangle.
-	// Calculate distance.
-	real_t t = f * edge2.dot(q);
-	// Confirm triangle is in front of ray.
-	if (t >= p_tolerance) {
-		r_intersection_point = p_from + p_dir * t;
-		return true;
-	} else {
-		return false;
-	}
-}
-
-inline bool is_point_in_triangle(const Vector3 &p_point, const Vector3 p_vertices[3], int p_shifted = 0) {
-	real_t det = p_vertices[0].dot(p_vertices[1].cross(p_vertices[2]));
-
-	// If determinant is, zero try shift the triangle and the point.
-	if (Math::is_zero_approx(det)) {
-		if (p_shifted > 2) {
-			// Triangle appears degenerate, so ignore it.
-			return false;
-		}
-		Vector3 shift_by;
-		shift_by[p_shifted] = 1;
-		Vector3 shifted_point = p_point + shift_by;
-		Vector3 shifted_vertices[3] = { p_vertices[0] + shift_by, p_vertices[1] + shift_by, p_vertices[2] + shift_by };
-		return is_point_in_triangle(shifted_point, shifted_vertices, p_shifted + 1);
-	}
-
-	// Find the barycentric coordinates of the point with respect to the vertices.
-	real_t lambda[3];
-	lambda[0] = p_vertices[1].cross(p_vertices[2]).dot(p_point) / det;
-	lambda[1] = p_vertices[2].cross(p_vertices[0]).dot(p_point) / det;
-	lambda[2] = p_vertices[0].cross(p_vertices[1]).dot(p_point) / det;
-
-	// Point is in the plane if all lambdas sum to 1.
-	if (!Math::is_equal_approx(lambda[0] + lambda[1] + lambda[2], 1)) {
-		return false;
-	}
-
-	// Point is inside the triangle if all lambdas are positive.
-	if (lambda[0] < 0 || lambda[1] < 0 || lambda[2] < 0) {
-		return false;
-	}
-
-	return true;
-}
-
-inline static bool is_triangle_degenerate(const Vector2 p_vertices[3], real_t p_vertex_snap2) {
-	real_t det = p_vertices[0].x * p_vertices[1].y - p_vertices[0].x * p_vertices[2].y +
-			p_vertices[0].y * p_vertices[2].x - p_vertices[0].y * p_vertices[1].x +
-			p_vertices[1].x * p_vertices[2].y - p_vertices[1].y * p_vertices[2].x;
-
-	return det < p_vertex_snap2;
-}
-
-inline static bool are_segments_parallel(const Vector2 p_segment1_points[2], const Vector2 p_segment2_points[2], float p_vertex_snap2) {
-	Vector2 segment1 = p_segment1_points[1] - p_segment1_points[0];
-	Vector2 segment2 = p_segment2_points[1] - p_segment2_points[0];
-	real_t segment1_length2 = segment1.dot(segment1);
-	real_t segment2_length2 = segment2.dot(segment2);
-	real_t segment_onto_segment = segment2.dot(segment1);
-
-	if (segment1_length2 < p_vertex_snap2 || segment2_length2 < p_vertex_snap2) {
-		return true;
-	}
-
-	real_t max_separation2;
-	if (segment1_length2 > segment2_length2) {
-		max_separation2 = segment2_length2 - segment_onto_segment * segment_onto_segment / segment1_length2;
-	} else {
-		max_separation2 = segment1_length2 - segment_onto_segment * segment_onto_segment / segment2_length2;
-	}
-
-	return max_separation2 < p_vertex_snap2;
-}
+#include "thirdparty/manifold/include/manifold/manifold.h"
 
 // CSGBrush
 
-void CSGBrush::_regen_face_aabbs() {
-	for (int i = 0; i < faces.size(); i++) {
-		faces.write[i].aabb = AABB();
-		faces.write[i].aabb.position = faces[i].vertices[0];
-		faces.write[i].aabb.expand_to(faces[i].vertices[1]);
-		faces.write[i].aabb.expand_to(faces[i].vertices[2]);
-	}
-}
-
 void CSGBrush::build_from_faces(const Vector<Vector3> &p_vertices, const Vector<Vector2> &p_uvs, const Vector<bool> &p_smooth, const Vector<Ref<Material>> &p_materials, const Vector<bool> &p_flip_faces) {
 	faces.clear();
 
@@ -277,1271 +123,3 @@ void CSGBrush::copy_from(const CSGBrush &p_brush, const Transform3D &p_xform) {
 
 	_regen_face_aabbs();
 }
-
-// CSGBrushOperation
-
-void CSGBrushOperation::merge_brushes(Operation p_operation, const CSGBrush &p_brush_a, const CSGBrush &p_brush_b, CSGBrush &r_merged_brush, float p_vertex_snap) {
-	// Check for face collisions and add necessary faces.
-	Build2DFaceCollection build2DFaceCollection;
-	for (int i = 0; i < p_brush_a.faces.size(); i++) {
-		for (int j = 0; j < p_brush_b.faces.size(); j++) {
-			if (p_brush_a.faces[i].aabb.intersects_inclusive(p_brush_b.faces[j].aabb)) {
-				update_faces(p_brush_a, i, p_brush_b, j, build2DFaceCollection, p_vertex_snap);
-			}
-		}
-	}
-
-	// Add faces to MeshMerge.
-	MeshMerge mesh_merge;
-	mesh_merge.vertex_snap = p_vertex_snap;
-
-	for (int i = 0; i < p_brush_a.faces.size(); i++) {
-		Ref<Material> material;
-		if (p_brush_a.faces[i].material != -1) {
-			material = p_brush_a.materials[p_brush_a.faces[i].material];
-		}
-
-		if (build2DFaceCollection.build2DFacesA.has(i)) {
-			build2DFaceCollection.build2DFacesA[i].addFacesToMesh(mesh_merge, p_brush_a.faces[i].smooth, p_brush_a.faces[i].invert, material, false);
-		} else {
-			Vector3 points[3];
-			Vector2 uvs[3];
-			for (int j = 0; j < 3; j++) {
-				points[j] = p_brush_a.faces[i].vertices[j];
-				uvs[j] = p_brush_a.faces[i].uvs[j];
-			}
-			mesh_merge.add_face(points, uvs, p_brush_a.faces[i].smooth, p_brush_a.faces[i].invert, material, false);
-		}
-	}
-
-	for (int i = 0; i < p_brush_b.faces.size(); i++) {
-		Ref<Material> material;
-		if (p_brush_b.faces[i].material != -1) {
-			material = p_brush_b.materials[p_brush_b.faces[i].material];
-		}
-
-		if (build2DFaceCollection.build2DFacesB.has(i)) {
-			build2DFaceCollection.build2DFacesB[i].addFacesToMesh(mesh_merge, p_brush_b.faces[i].smooth, p_brush_b.faces[i].invert, material, true);
-		} else {
-			Vector3 points[3];
-			Vector2 uvs[3];
-			for (int j = 0; j < 3; j++) {
-				points[j] = p_brush_b.faces[i].vertices[j];
-				uvs[j] = p_brush_b.faces[i].uvs[j];
-			}
-			mesh_merge.add_face(points, uvs, p_brush_b.faces[i].smooth, p_brush_b.faces[i].invert, material, true);
-		}
-	}
-
-	// Mark faces that ended up inside the intersection.
-	mesh_merge.mark_inside_faces();
-
-	// Create new brush and fill with new faces.
-	r_merged_brush.faces.clear();
-
-	switch (p_operation) {
-		case OPERATION_UNION: {
-			int outside_count = 0;
-
-			for (int i = 0; i < mesh_merge.faces.size(); i++) {
-				if (mesh_merge.faces[i].inside) {
-					continue;
-				}
-				outside_count++;
-			}
-
-			r_merged_brush.faces.resize(outside_count);
-
-			outside_count = 0;
-
-			for (int i = 0; i < mesh_merge.faces.size(); i++) {
-				if (mesh_merge.faces[i].inside) {
-					continue;
-				}
-
-				for (int j = 0; j < 3; j++) {
-					r_merged_brush.faces.write[outside_count].vertices[j] = mesh_merge.points[mesh_merge.faces[i].points[j]];
-					r_merged_brush.faces.write[outside_count].uvs[j] = mesh_merge.faces[i].uvs[j];
-				}
-
-				r_merged_brush.faces.write[outside_count].smooth = mesh_merge.faces[i].smooth;
-				r_merged_brush.faces.write[outside_count].invert = mesh_merge.faces[i].invert;
-				r_merged_brush.faces.write[outside_count].material = mesh_merge.faces[i].material_idx;
-				outside_count++;
-			}
-
-			r_merged_brush._regen_face_aabbs();
-
-		} break;
-
-		case OPERATION_INTERSECTION: {
-			int inside_count = 0;
-
-			for (int i = 0; i < mesh_merge.faces.size(); i++) {
-				if (!mesh_merge.faces[i].inside) {
-					continue;
-				}
-				inside_count++;
-			}
-
-			r_merged_brush.faces.resize(inside_count);
-
-			inside_count = 0;
-
-			for (int i = 0; i < mesh_merge.faces.size(); i++) {
-				if (!mesh_merge.faces[i].inside) {
-					continue;
-				}
-
-				for (int j = 0; j < 3; j++) {
-					r_merged_brush.faces.write[inside_count].vertices[j] = mesh_merge.points[mesh_merge.faces[i].points[j]];
-					r_merged_brush.faces.write[inside_count].uvs[j] = mesh_merge.faces[i].uvs[j];
-				}
-
-				r_merged_brush.faces.write[inside_count].smooth = mesh_merge.faces[i].smooth;
-				r_merged_brush.faces.write[inside_count].invert = mesh_merge.faces[i].invert;
-				r_merged_brush.faces.write[inside_count].material = mesh_merge.faces[i].material_idx;
-				inside_count++;
-			}
-
-			r_merged_brush._regen_face_aabbs();
-
-		} break;
-
-		case OPERATION_SUBTRACTION: {
-			int face_count = 0;
-
-			for (int i = 0; i < mesh_merge.faces.size(); i++) {
-				if (mesh_merge.faces[i].from_b && !mesh_merge.faces[i].inside) {
-					continue;
-				}
-				if (!mesh_merge.faces[i].from_b && mesh_merge.faces[i].inside) {
-					continue;
-				}
-				face_count++;
-			}
-
-			r_merged_brush.faces.resize(face_count);
-
-			face_count = 0;
-
-			for (int i = 0; i < mesh_merge.faces.size(); i++) {
-				if (mesh_merge.faces[i].from_b && !mesh_merge.faces[i].inside) {
-					continue;
-				}
-				if (!mesh_merge.faces[i].from_b && mesh_merge.faces[i].inside) {
-					continue;
-				}
-
-				for (int j = 0; j < 3; j++) {
-					r_merged_brush.faces.write[face_count].vertices[j] = mesh_merge.points[mesh_merge.faces[i].points[j]];
-					r_merged_brush.faces.write[face_count].uvs[j] = mesh_merge.faces[i].uvs[j];
-				}
-
-				if (mesh_merge.faces[i].from_b) {
-					//invert facing of insides of B
-					SWAP(r_merged_brush.faces.write[face_count].vertices[1], r_merged_brush.faces.write[face_count].vertices[2]);
-					SWAP(r_merged_brush.faces.write[face_count].uvs[1], r_merged_brush.faces.write[face_count].uvs[2]);
-				}
-
-				r_merged_brush.faces.write[face_count].smooth = mesh_merge.faces[i].smooth;
-				r_merged_brush.faces.write[face_count].invert = mesh_merge.faces[i].invert;
-				r_merged_brush.faces.write[face_count].material = mesh_merge.faces[i].material_idx;
-				face_count++;
-			}
-
-			r_merged_brush._regen_face_aabbs();
-
-		} break;
-	}
-
-	// Update the list of materials.
-	r_merged_brush.materials.resize(mesh_merge.materials.size());
-	for (const KeyValue<Ref<Material>, int> &E : mesh_merge.materials) {
-		r_merged_brush.materials.write[E.value] = E.key;
-	}
-}
-
-// CSGBrushOperation::MeshMerge
-
-// Use a limit to speed up bvh and limit the depth.
-#define BVH_LIMIT 8
-
-int CSGBrushOperation::MeshMerge::_create_bvh(FaceBVH *r_facebvhptr, FaceBVH **r_facebvhptrptr, int p_from, int p_size, int p_depth, int &r_max_depth, int &r_max_alloc) {
-	if (p_depth > r_max_depth) {
-		r_max_depth = p_depth;
-	}
-
-	if (p_size == 0) {
-		return -1;
-	}
-
-	if (p_size <= BVH_LIMIT) {
-		for (int i = 0; i < p_size - 1; i++) {
-			r_facebvhptrptr[p_from + i]->next = r_facebvhptrptr[p_from + i + 1] - r_facebvhptr;
-		}
-		return r_facebvhptrptr[p_from] - r_facebvhptr;
-	}
-
-	AABB aabb;
-	aabb = r_facebvhptrptr[p_from]->aabb;
-	for (int i = 1; i < p_size; i++) {
-		aabb.merge_with(r_facebvhptrptr[p_from + i]->aabb);
-	}
-
-	int li = aabb.get_longest_axis_index();
-
-	switch (li) {
-		case Vector3::AXIS_X: {
-			SortArray<FaceBVH *, FaceBVHCmpX> sort_x;
-			sort_x.nth_element(0, p_size, p_size / 2, &r_facebvhptrptr[p_from]);
-			//sort_x.sort(&p_bb[p_from],p_size);
-		} break;
-
-		case Vector3::AXIS_Y: {
-			SortArray<FaceBVH *, FaceBVHCmpY> sort_y;
-			sort_y.nth_element(0, p_size, p_size / 2, &r_facebvhptrptr[p_from]);
-			//sort_y.sort(&p_bb[p_from],p_size);
-		} break;
-
-		case Vector3::AXIS_Z: {
-			SortArray<FaceBVH *, FaceBVHCmpZ> sort_z;
-			sort_z.nth_element(0, p_size, p_size / 2, &r_facebvhptrptr[p_from]);
-			//sort_z.sort(&p_bb[p_from],p_size);
-		} break;
-	}
-
-	int left = _create_bvh(r_facebvhptr, r_facebvhptrptr, p_from, p_size / 2, p_depth + 1, r_max_depth, r_max_alloc);
-	int right = _create_bvh(r_facebvhptr, r_facebvhptrptr, p_from + p_size / 2, p_size - p_size / 2, p_depth + 1, r_max_depth, r_max_alloc);
-
-	int index = r_max_alloc++;
-	FaceBVH *_new = &r_facebvhptr[index];
-	_new->aabb = aabb;
-	_new->center = aabb.get_center();
-	_new->face = -1;
-	_new->left = left;
-	_new->right = right;
-	_new->next = -1;
-
-	return index;
-}
-
-void CSGBrushOperation::MeshMerge::_add_distance(List<IntersectionDistance> &r_intersectionsA, List<IntersectionDistance> &r_intersectionsB, bool p_from_B, real_t p_distance_squared, bool p_is_conormal) const {
-	List<IntersectionDistance> &intersections = p_from_B ? r_intersectionsB : r_intersectionsA;
-
-	// Check if distance exists.
-	for (const IntersectionDistance E : intersections) {
-		if (E.is_conormal == p_is_conormal && Math::is_equal_approx(E.distance_squared, p_distance_squared)) {
-			return;
-		}
-	}
-	IntersectionDistance distance;
-	distance.is_conormal = p_is_conormal;
-	distance.distance_squared = p_distance_squared;
-	intersections.push_back(distance);
-}
-
-bool CSGBrushOperation::MeshMerge::_bvh_inside(FaceBVH *r_facebvhptr, int p_max_depth, int p_bvh_first, int p_face_idx) const {
-	Face face = faces[p_face_idx];
-	Vector3 face_points[3] = {
-		points[face.points[0]],
-		points[face.points[1]],
-		points[face.points[2]]
-	};
-	Vector3 face_center = (face_points[0] + face_points[1] + face_points[2]) / 3.0;
-	Vector3 face_normal = Plane(face_points[0], face_points[1], face_points[2]).normal;
-
-	uint32_t *stack = (uint32_t *)alloca(sizeof(int) * p_max_depth);
-
-	enum {
-		TEST_AABB_BIT = 0,
-		VISIT_LEFT_BIT = 1,
-		VISIT_RIGHT_BIT = 2,
-		VISIT_DONE_BIT = 3,
-		VISITED_BIT_SHIFT = 29,
-		NODE_IDX_MASK = (1 << VISITED_BIT_SHIFT) - 1,
-		VISITED_BIT_MASK = ~NODE_IDX_MASK
-	};
-
-	List<IntersectionDistance> intersectionsA;
-	List<IntersectionDistance> intersectionsB;
-
-	Intersection closest_intersection;
-	closest_intersection.found = false;
-
-	int level = 0;
-	int pos = p_bvh_first;
-	stack[0] = pos;
-
-	while (true) {
-		uint32_t node = stack[level] & NODE_IDX_MASK;
-		const FaceBVH *current_facebvhptr = &(r_facebvhptr[node]);
-		bool done = false;
-
-		switch (stack[level] >> VISITED_BIT_SHIFT) {
-			case TEST_AABB_BIT: {
-				if (current_facebvhptr->face >= 0) {
-					while (current_facebvhptr) {
-						if (p_face_idx != current_facebvhptr->face &&
-								current_facebvhptr->aabb.intersects_ray(face_center, face_normal)) {
-							const Face &current_face = faces[current_facebvhptr->face];
-							Vector3 current_points[3] = {
-								points[current_face.points[0]],
-								points[current_face.points[1]],
-								points[current_face.points[2]]
-							};
-							Vector3 current_normal = Plane(current_points[0], current_points[1], current_points[2]).normal;
-							Vector3 intersection_point;
-							// Check if faces are co-planar.
-							if (current_normal.is_equal_approx(face_normal) &&
-									is_point_in_triangle(face_center, current_points)) {
-								// Only add an intersection if not a B face.
-								if (!face.from_b) {
-									_add_distance(intersectionsA, intersectionsB, current_face.from_b, 0, true);
-								}
-							} else if (ray_intersects_triangle(face_center, face_normal, current_points, CMP_EPSILON, intersection_point)) {
-								real_t distance_squared = face_center.distance_squared_to(intersection_point);
-								real_t inner = current_normal.dot(face_normal);
-								// If the faces are perpendicular, ignore this face.
-								// The triangles on the side should be intersected and result in the correct behavior.
-								if (!Math::is_zero_approx(inner)) {
-									_add_distance(intersectionsA, intersectionsB, current_face.from_b, distance_squared, inner > 0.0f);
-								}
-							}
-
-							if (face.from_b != current_face.from_b) {
-								if (current_normal.is_equal_approx(face_normal) &&
-										is_point_in_triangle(face_center, current_points)) {
-									// Only add an intersection if not a B face.
-									if (!face.from_b) {
-										closest_intersection.found = true;
-										closest_intersection.conormal = 1.0f;
-										closest_intersection.distance_squared = 0.0f;
-										closest_intersection.origin_angle = -FLT_MAX;
-									}
-								} else if (ray_intersects_triangle(face_center, face_normal, current_points, CMP_EPSILON, intersection_point)) {
-									Intersection potential_intersection;
-									potential_intersection.found = true;
-									potential_intersection.conormal = face_normal.dot(current_normal);
-									potential_intersection.distance_squared = face_center.distance_squared_to(intersection_point);
-									potential_intersection.origin_angle = Math::abs(potential_intersection.conormal);
-									real_t intersection_dist_from_face = face_normal.dot(intersection_point - face_center);
-									for (int i = 0; i < 3; i++) {
-										real_t point_dist_from_face = face_normal.dot(current_points[i] - face_center);
-										if (!Math::is_equal_approx(point_dist_from_face, intersection_dist_from_face) &&
-												point_dist_from_face < intersection_dist_from_face) {
-											potential_intersection.origin_angle = -potential_intersection.origin_angle;
-											break;
-										}
-									}
-									if (potential_intersection.conormal != 0.0f) {
-										if (!closest_intersection.found) {
-											closest_intersection = potential_intersection;
-										} else if (!Math::is_equal_approx(potential_intersection.distance_squared, closest_intersection.distance_squared) &&
-												potential_intersection.distance_squared < closest_intersection.distance_squared) {
-											closest_intersection = potential_intersection;
-										} else if (Math::is_equal_approx(potential_intersection.distance_squared, closest_intersection.distance_squared)) {
-											if (potential_intersection.origin_angle < closest_intersection.origin_angle) {
-												closest_intersection = potential_intersection;
-											}
-										}
-									}
-								}
-							}
-						}
-
-						if (current_facebvhptr->next != -1) {
-							current_facebvhptr = &r_facebvhptr[current_facebvhptr->next];
-						} else {
-							current_facebvhptr = nullptr;
-						}
-					}
-
-					stack[level] = (VISIT_DONE_BIT << VISITED_BIT_SHIFT) | node;
-
-				} else {
-					bool valid = current_facebvhptr->aabb.intersects_ray(face_center, face_normal);
-
-					if (!valid) {
-						stack[level] = (VISIT_DONE_BIT << VISITED_BIT_SHIFT) | node;
-					} else {
-						stack[level] = (VISIT_LEFT_BIT << VISITED_BIT_SHIFT) | node;
-					}
-				}
-				continue;
-			}
-
-			case VISIT_LEFT_BIT: {
-				stack[level] = (VISIT_RIGHT_BIT << VISITED_BIT_SHIFT) | node;
-				stack[level + 1] = current_facebvhptr->left | TEST_AABB_BIT;
-				level++;
-				continue;
-			}
-
-			case VISIT_RIGHT_BIT: {
-				stack[level] = (VISIT_DONE_BIT << VISITED_BIT_SHIFT) | node;
-				stack[level + 1] = current_facebvhptr->right | TEST_AABB_BIT;
-				level++;
-				continue;
-			}
-
-			case VISIT_DONE_BIT: {
-				if (level == 0) {
-					done = true;
-					break;
-				} else {
-					level--;
-				}
-				continue;
-			}
-		}
-
-		if (done) {
-			break;
-		}
-	}
-
-	if (!closest_intersection.found) {
-		return false;
-	} else {
-		return closest_intersection.conormal > 0.0f;
-	}
-}
-
-void CSGBrushOperation::MeshMerge::mark_inside_faces() {
-	// Mark faces that are inside. This helps later do the boolean ops when merging.
-	// This approach is very brute force with a bunch of optimizations,
-	// such as BVH and pre AABB intersection test.
-
-	Vector<FaceBVH> bvhvec;
-	bvhvec.resize(faces.size() * 3); // Will never be larger than this (TODO: Make better)
-	FaceBVH *facebvh = bvhvec.ptrw();
-
-	AABB aabb_a;
-	AABB aabb_b;
-
-	bool first_a = true;
-	bool first_b = true;
-
-	for (int i = 0; i < faces.size(); i++) {
-		facebvh[i].left = -1;
-		facebvh[i].right = -1;
-		facebvh[i].face = i;
-		facebvh[i].aabb.position = points[faces[i].points[0]];
-		facebvh[i].aabb.expand_to(points[faces[i].points[1]]);
-		facebvh[i].aabb.expand_to(points[faces[i].points[2]]);
-		facebvh[i].center = facebvh[i].aabb.get_center();
-		facebvh[i].aabb.grow_by(vertex_snap);
-		facebvh[i].next = -1;
-
-		if (faces[i].from_b) {
-			if (first_b) {
-				aabb_b = facebvh[i].aabb;
-				first_b = false;
-			} else {
-				aabb_b.merge_with(facebvh[i].aabb);
-			}
-		} else {
-			if (first_a) {
-				aabb_a = facebvh[i].aabb;
-				first_a = false;
-			} else {
-				aabb_a.merge_with(facebvh[i].aabb);
-			}
-		}
-	}
-
-	AABB intersection_aabb = aabb_a.intersection(aabb_b);
-
-	// Check if shape AABBs intersect.
-	if (intersection_aabb.size == Vector3()) {
-		return;
-	}
-
-	Vector<FaceBVH *> bvhtrvec;
-	bvhtrvec.resize(faces.size());
-	FaceBVH **bvhptr = bvhtrvec.ptrw();
-	for (int i = 0; i < faces.size(); i++) {
-		bvhptr[i] = &facebvh[i];
-	}
-
-	int max_depth = 0;
-	int max_alloc = faces.size();
-	_create_bvh(facebvh, bvhptr, 0, faces.size(), 1, max_depth, max_alloc);
-
-	for (int i = 0; i < faces.size(); i++) {
-		// Check if face AABB intersects the intersection AABB.
-		if (!intersection_aabb.intersects_inclusive(facebvh[i].aabb)) {
-			continue;
-		}
-
-		if (_bvh_inside(facebvh, max_depth, max_alloc - 1, i)) {
-			faces.write[i].inside = true;
-		}
-	}
-}
-
-void CSGBrushOperation::MeshMerge::add_face(const Vector3 p_points[3], const Vector2 p_uvs[3], bool p_smooth, bool p_invert, const Ref<Material> &p_material, bool p_from_b) {
-	int indices[3];
-	for (int i = 0; i < 3; i++) {
-		VertexKey vk;
-		vk.x = int((double(p_points[i].x) + double(vertex_snap) * 0.31234) / double(vertex_snap));
-		vk.y = int((double(p_points[i].y) + double(vertex_snap) * 0.31234) / double(vertex_snap));
-		vk.z = int((double(p_points[i].z) + double(vertex_snap) * 0.31234) / double(vertex_snap));
-
-		int res;
-		if (snap_cache.lookup(vk, res)) {
-			indices[i] = res;
-		} else {
-			indices[i] = points.size();
-			points.push_back(p_points[i]);
-			snap_cache.set(vk, indices[i]);
-		}
-	}
-
-	// Don't add degenerate faces.
-	if (indices[0] == indices[2] || indices[0] == indices[1] || indices[1] == indices[2]) {
-		return;
-	}
-
-	MeshMerge::Face face;
-	face.from_b = p_from_b;
-	face.inside = false;
-	face.smooth = p_smooth;
-	face.invert = p_invert;
-
-	if (p_material.is_valid()) {
-		if (!materials.has(p_material)) {
-			face.material_idx = materials.size();
-			materials[p_material] = face.material_idx;
-		} else {
-			face.material_idx = materials[p_material];
-		}
-	} else {
-		face.material_idx = -1;
-	}
-
-	for (int k = 0; k < 3; k++) {
-		face.points[k] = indices[k];
-		face.uvs[k] = p_uvs[k];
-	}
-
-	faces.push_back(face);
-}
-
-// CSGBrushOperation::Build2DFaces
-
-int CSGBrushOperation::Build2DFaces::_get_point_idx(const Vector2 &p_point) {
-	for (int vertex_idx = 0; vertex_idx < vertices.size(); ++vertex_idx) {
-		if (vertices[vertex_idx].point.distance_squared_to(p_point) < vertex_snap2) {
-			return vertex_idx;
-		}
-	}
-	return -1;
-}
-
-int CSGBrushOperation::Build2DFaces::_add_vertex(const Vertex2D &p_vertex) {
-	// Check if vertex exists.
-	int vertex_id = _get_point_idx(p_vertex.point);
-	if (vertex_id != -1) {
-		return vertex_id;
-	}
-
-	vertices.push_back(p_vertex);
-	return vertices.size() - 1;
-}
-
-void CSGBrushOperation::Build2DFaces::_add_vertex_idx_sorted(Vector<int> &r_vertex_indices, int p_new_vertex_index) {
-	if (p_new_vertex_index >= 0 && !r_vertex_indices.has(p_new_vertex_index)) {
-		ERR_FAIL_COND_MSG(p_new_vertex_index >= vertices.size(), "Invalid vertex index.");
-
-		// The first vertex.
-		if (r_vertex_indices.size() == 0) {
-			// Simply add it.
-			r_vertex_indices.push_back(p_new_vertex_index);
-			return;
-		}
-
-		// The second vertex.
-		if (r_vertex_indices.size() == 1) {
-			Vector2 first_point = vertices[r_vertex_indices[0]].point;
-			Vector2 new_point = vertices[p_new_vertex_index].point;
-
-			// Sort along the axis with the greatest difference.
-			int axis = 0;
-			if (Math::abs(new_point.x - first_point.x) < Math::abs(new_point.y - first_point.y)) {
-				axis = 1;
-			}
-
-			// Add it to the beginning or the end appropriately.
-			if (new_point[axis] < first_point[axis]) {
-				r_vertex_indices.insert(0, p_new_vertex_index);
-			} else {
-				r_vertex_indices.push_back(p_new_vertex_index);
-			}
-
-			return;
-		}
-
-		// Third or later vertices.
-		Vector2 first_point = vertices[r_vertex_indices[0]].point;
-		Vector2 last_point = vertices[r_vertex_indices[r_vertex_indices.size() - 1]].point;
-		Vector2 new_point = vertices[p_new_vertex_index].point;
-
-		// Determine axis being sorted against i.e. the axis with the greatest difference.
-		int axis = 0;
-		if (Math::abs(last_point.x - first_point.x) < Math::abs(last_point.y - first_point.y)) {
-			axis = 1;
-		}
-
-		// Insert the point at the appropriate index.
-		for (int insert_idx = 0; insert_idx < r_vertex_indices.size(); ++insert_idx) {
-			Vector2 insert_point = vertices[r_vertex_indices[insert_idx]].point;
-			if (new_point[axis] < insert_point[axis]) {
-				r_vertex_indices.insert(insert_idx, p_new_vertex_index);
-				return;
-			}
-		}
-
-		// New largest, add it to the end.
-		r_vertex_indices.push_back(p_new_vertex_index);
-	}
-}
-
-void CSGBrushOperation::Build2DFaces::_merge_faces(const Vector<int> &p_segment_indices) {
-	int segments = p_segment_indices.size() - 1;
-	if (segments < 2) {
-		return;
-	}
-
-	// Faces around an inner vertex are merged by moving the inner vertex to the first vertex.
-	for (int sorted_idx = 1; sorted_idx < segments; ++sorted_idx) {
-		int closest_idx = 0;
-		int inner_idx = p_segment_indices[sorted_idx];
-
-		if (sorted_idx > segments / 2) {
-			// Merge to other segment end.
-			closest_idx = segments;
-			// Reverse the merge order.
-			inner_idx = p_segment_indices[segments + segments / 2 - sorted_idx];
-		}
-
-		// Find the mergeable faces.
-		Vector<int> merge_faces_idx;
-		Vector<Face2D> merge_faces;
-		Vector<int> merge_faces_inner_vertex_idx;
-		for (int face_idx = 0; face_idx < faces.size(); ++face_idx) {
-			for (int face_vertex_idx = 0; face_vertex_idx < 3; ++face_vertex_idx) {
-				if (faces[face_idx].vertex_idx[face_vertex_idx] == inner_idx) {
-					merge_faces_idx.push_back(face_idx);
-					merge_faces.push_back(faces[face_idx]);
-					merge_faces_inner_vertex_idx.push_back(face_vertex_idx);
-				}
-			}
-		}
-
-		Vector<int> degenerate_points;
-
-		// Create the new faces.
-		for (int merge_idx = 0; merge_idx < merge_faces.size(); ++merge_idx) {
-			int outer_edge_idx[2];
-			outer_edge_idx[0] = merge_faces[merge_idx].vertex_idx[(merge_faces_inner_vertex_idx[merge_idx] + 1) % 3];
-			outer_edge_idx[1] = merge_faces[merge_idx].vertex_idx[(merge_faces_inner_vertex_idx[merge_idx] + 2) % 3];
-
-			// Skip flattened faces.
-			if (outer_edge_idx[0] == p_segment_indices[closest_idx] ||
-					outer_edge_idx[1] == p_segment_indices[closest_idx]) {
-				continue;
-			}
-
-			//Don't create degenerate triangles.
-			Vector2 edge1[2] = {
-				vertices[outer_edge_idx[0]].point,
-				vertices[p_segment_indices[closest_idx]].point
-			};
-			Vector2 edge2[2] = {
-				vertices[outer_edge_idx[1]].point,
-				vertices[p_segment_indices[closest_idx]].point
-			};
-			if (are_segments_parallel(edge1, edge2, vertex_snap2)) {
-				if (!degenerate_points.find(outer_edge_idx[0])) {
-					degenerate_points.push_back(outer_edge_idx[0]);
-				}
-				if (!degenerate_points.find(outer_edge_idx[1])) {
-					degenerate_points.push_back(outer_edge_idx[1]);
-				}
-				continue;
-			}
-
-			// Create new faces.
-			Face2D new_face;
-			new_face.vertex_idx[0] = p_segment_indices[closest_idx];
-			new_face.vertex_idx[1] = outer_edge_idx[0];
-			new_face.vertex_idx[2] = outer_edge_idx[1];
-			faces.push_back(new_face);
-		}
-
-		// Delete the old faces in reverse index order.
-		merge_faces_idx.sort();
-		merge_faces_idx.reverse();
-		for (int i = 0; i < merge_faces_idx.size(); ++i) {
-			faces.remove_at(merge_faces_idx[i]);
-		}
-
-		if (degenerate_points.size() == 0) {
-			continue;
-		}
-
-		// Split faces using degenerate points.
-		for (int face_idx = 0; face_idx < faces.size(); ++face_idx) {
-			Face2D face = faces[face_idx];
-			Vertex2D face_vertices[3] = {
-				vertices[face.vertex_idx[0]],
-				vertices[face.vertex_idx[1]],
-				vertices[face.vertex_idx[2]]
-			};
-			Vector2 face_points[3] = {
-				face_vertices[0].point,
-				face_vertices[1].point,
-				face_vertices[2].point
-			};
-
-			for (int point_idx = 0; point_idx < degenerate_points.size(); ++point_idx) {
-				int degenerate_idx = degenerate_points[point_idx];
-				Vector2 point_2D = vertices[degenerate_idx].point;
-
-				// Check if point is existing face vertex.
-				bool existing = false;
-				for (int i = 0; i < 3; ++i) {
-					if (face_vertices[i].point.distance_squared_to(point_2D) < vertex_snap2) {
-						existing = true;
-						break;
-					}
-				}
-				if (existing) {
-					continue;
-				}
-
-				// Check if point is on each edge.
-				for (int face_edge_idx = 0; face_edge_idx < 3; ++face_edge_idx) {
-					Vector2 edge_points[2] = {
-						face_points[face_edge_idx],
-						face_points[(face_edge_idx + 1) % 3]
-					};
-					Vector2 closest_point = Geometry2D::get_closest_point_to_segment(point_2D, edge_points);
-
-					if (point_2D.distance_squared_to(closest_point) < vertex_snap2) {
-						int opposite_vertex_idx = face.vertex_idx[(face_edge_idx + 2) % 3];
-
-						// If new vertex snaps to degenerate vertex, just delete this face.
-						if (degenerate_idx == opposite_vertex_idx) {
-							faces.remove_at(face_idx);
-							// Update index.
-							--face_idx;
-							break;
-						}
-
-						// Create two new faces around the new edge and remove this face.
-						// The new edge is the last edge.
-						Face2D left_face;
-						left_face.vertex_idx[0] = degenerate_idx;
-						left_face.vertex_idx[1] = face.vertex_idx[(face_edge_idx + 1) % 3];
-						left_face.vertex_idx[2] = opposite_vertex_idx;
-						Face2D right_face;
-						right_face.vertex_idx[0] = opposite_vertex_idx;
-						right_face.vertex_idx[1] = face.vertex_idx[face_edge_idx];
-						right_face.vertex_idx[2] = degenerate_idx;
-						faces.remove_at(face_idx);
-						faces.insert(face_idx, right_face);
-						faces.insert(face_idx, left_face);
-
-						// Don't check against the new faces.
-						++face_idx;
-
-						// No need to check other edges.
-						break;
-					}
-				}
-			}
-		}
-	}
-}
-
-void CSGBrushOperation::Build2DFaces::_find_edge_intersections(const Vector2 p_segment_points[2], Vector<int> &r_segment_indices) {
-	LocalVector<Vector<Vector2>> processed_edges;
-
-	// For each face.
-	for (int face_idx = 0; face_idx < faces.size(); ++face_idx) {
-		Face2D face = faces[face_idx];
-		Vertex2D face_vertices[3] = {
-			vertices[face.vertex_idx[0]],
-			vertices[face.vertex_idx[1]],
-			vertices[face.vertex_idx[2]]
-		};
-
-		// Check each edge.
-		for (int face_edge_idx = 0; face_edge_idx < 3; ++face_edge_idx) {
-			Vector<Vector2> edge_points_and_uvs = {
-				face_vertices[face_edge_idx].point,
-				face_vertices[(face_edge_idx + 1) % 3].point,
-				face_vertices[face_edge_idx].uv,
-				face_vertices[(face_edge_idx + 1) % 3].uv
-			};
-
-			Vector2 edge_points[2] = {
-				edge_points_and_uvs[0],
-				edge_points_and_uvs[1],
-			};
-			Vector2 edge_uvs[2] = {
-				edge_points_and_uvs[2],
-				edge_points_and_uvs[3],
-			};
-
-			// Check if edge has already been processed.
-			if (processed_edges.has(edge_points_and_uvs)) {
-				continue;
-			}
-
-			processed_edges.push_back(edge_points_and_uvs);
-
-			// First check if the ends of the segment are on the edge.
-			Vector2 intersection_point;
-
-			bool on_edge = false;
-			for (int edge_point_idx = 0; edge_point_idx < 2; ++edge_point_idx) {
-				intersection_point = Geometry2D::get_closest_point_to_segment(p_segment_points[edge_point_idx], edge_points);
-				if (p_segment_points[edge_point_idx].distance_squared_to(intersection_point) < vertex_snap2) {
-					on_edge = true;
-					break;
-				}
-			}
-
-			// Else check if the segment intersects the edge.
-			if (on_edge || Geometry2D::segment_intersects_segment(p_segment_points[0], p_segment_points[1], edge_points[0], edge_points[1], &intersection_point)) {
-				// Check if intersection point is an edge point.
-				if ((edge_points[0].distance_squared_to(intersection_point) < vertex_snap2) ||
-						(edge_points[1].distance_squared_to(intersection_point) < vertex_snap2)) {
-					continue;
-				}
-
-				// Check if edge exists, by checking if the intersecting segment is parallel to the edge.
-				if (are_segments_parallel(p_segment_points, edge_points, vertex_snap2)) {
-					continue;
-				}
-
-				// Add the intersection point as a new vertex.
-				Vertex2D new_vertex;
-				new_vertex.point = intersection_point;
-				new_vertex.uv = interpolate_segment_uv(edge_points, edge_uvs, intersection_point);
-				int new_vertex_idx = _add_vertex(new_vertex);
-				int opposite_vertex_idx = face.vertex_idx[(face_edge_idx + 2) % 3];
-				_add_vertex_idx_sorted(r_segment_indices, new_vertex_idx);
-
-				// If new vertex snaps to opposite vertex, just delete this face.
-				if (new_vertex_idx == opposite_vertex_idx) {
-					faces.remove_at(face_idx);
-					// Update index.
-					--face_idx;
-					break;
-				}
-
-				// If opposite point is on the segment, add its index to segment indices too.
-				Vector2 closest_point = Geometry2D::get_closest_point_to_segment(vertices[opposite_vertex_idx].point, p_segment_points);
-				if (vertices[opposite_vertex_idx].point.distance_squared_to(closest_point) < vertex_snap2) {
-					_add_vertex_idx_sorted(r_segment_indices, opposite_vertex_idx);
-				}
-
-				// Create two new faces around the new edge and remove this face.
-				// The new edge is the last edge.
-				Face2D left_face;
-				left_face.vertex_idx[0] = new_vertex_idx;
-				left_face.vertex_idx[1] = face.vertex_idx[(face_edge_idx + 1) % 3];
-				left_face.vertex_idx[2] = opposite_vertex_idx;
-				Face2D right_face;
-				right_face.vertex_idx[0] = opposite_vertex_idx;
-				right_face.vertex_idx[1] = face.vertex_idx[face_edge_idx];
-				right_face.vertex_idx[2] = new_vertex_idx;
-				faces.remove_at(face_idx);
-				faces.insert(face_idx, right_face);
-				faces.insert(face_idx, left_face);
-
-				// Check against the new faces.
-				--face_idx;
-				break;
-			}
-		}
-	}
-}
-
-int CSGBrushOperation::Build2DFaces::_insert_point(const Vector2 &p_point) {
-	int new_vertex_idx = -1;
-
-	for (int face_idx = 0; face_idx < faces.size(); ++face_idx) {
-		Face2D face = faces[face_idx];
-		Vertex2D face_vertices[3] = {
-			vertices[face.vertex_idx[0]],
-			vertices[face.vertex_idx[1]],
-			vertices[face.vertex_idx[2]]
-		};
-		Vector2 points[3] = {
-			face_vertices[0].point,
-			face_vertices[1].point,
-			face_vertices[2].point
-		};
-		Vector2 uvs[3] = {
-			face_vertices[0].uv,
-			face_vertices[1].uv,
-			face_vertices[2].uv
-		};
-
-		// Skip degenerate triangles.
-		if (is_triangle_degenerate(points, vertex_snap2)) {
-			continue;
-		}
-
-		// Check if point is existing face vertex.
-		for (int i = 0; i < 3; ++i) {
-			if (face_vertices[i].point.distance_squared_to(p_point) < vertex_snap2) {
-				return face.vertex_idx[i];
-			}
-		}
-
-		// Check if point is on each edge.
-		bool on_edge = false;
-		for (int face_edge_idx = 0; face_edge_idx < 3; ++face_edge_idx) {
-			Vector2 edge_points[2] = {
-				points[face_edge_idx],
-				points[(face_edge_idx + 1) % 3]
-			};
-			Vector2 edge_uvs[2] = {
-				uvs[face_edge_idx],
-				uvs[(face_edge_idx + 1) % 3]
-			};
-
-			Vector2 closest_point = Geometry2D::get_closest_point_to_segment(p_point, edge_points);
-			if (p_point.distance_squared_to(closest_point) < vertex_snap2) {
-				on_edge = true;
-
-				// Add the point as a new vertex.
-				Vertex2D new_vertex;
-				new_vertex.point = p_point;
-				new_vertex.uv = interpolate_segment_uv(edge_points, edge_uvs, p_point);
-				new_vertex_idx = _add_vertex(new_vertex);
-				int opposite_vertex_idx = face.vertex_idx[(face_edge_idx + 2) % 3];
-
-				// If new vertex snaps to opposite vertex, just delete this face.
-				if (new_vertex_idx == opposite_vertex_idx) {
-					faces.remove_at(face_idx);
-					// Update index.
-					--face_idx;
-					break;
-				}
-
-				// Don't create degenerate triangles.
-				Vector2 split_edge1[2] = { vertices[new_vertex_idx].point, edge_points[0] };
-				Vector2 split_edge2[2] = { vertices[new_vertex_idx].point, edge_points[1] };
-				Vector2 new_edge[2] = { vertices[new_vertex_idx].point, vertices[opposite_vertex_idx].point };
-				if (are_segments_parallel(split_edge1, new_edge, vertex_snap2) &&
-						are_segments_parallel(split_edge2, new_edge, vertex_snap2)) {
-					break;
-				}
-
-				// Create two new faces around the new edge and remove this face.
-				// The new edge is the last edge.
-				Face2D left_face;
-				left_face.vertex_idx[0] = new_vertex_idx;
-				left_face.vertex_idx[1] = face.vertex_idx[(face_edge_idx + 1) % 3];
-				left_face.vertex_idx[2] = opposite_vertex_idx;
-				Face2D right_face;
-				right_face.vertex_idx[0] = opposite_vertex_idx;
-				right_face.vertex_idx[1] = face.vertex_idx[face_edge_idx];
-				right_face.vertex_idx[2] = new_vertex_idx;
-				faces.remove_at(face_idx);
-				faces.insert(face_idx, right_face);
-				faces.insert(face_idx, left_face);
-
-				// Don't check against the new faces.
-				++face_idx;
-
-				// No need to check other edges.
-				break;
-			}
-		}
-
-		// If not on an edge, check if the point is inside the face.
-		if (!on_edge && Geometry2D::is_point_in_triangle(p_point, face_vertices[0].point, face_vertices[1].point, face_vertices[2].point)) {
-			// Add the point as a new vertex.
-			Vertex2D new_vertex;
-			new_vertex.point = p_point;
-			new_vertex.uv = interpolate_triangle_uv(points, uvs, p_point);
-			new_vertex_idx = _add_vertex(new_vertex);
-
-			// Create three new faces around this point and remove this face.
-			// The new vertex is the last vertex.
-			for (int i = 0; i < 3; ++i) {
-				// Don't create degenerate triangles.
-				Vector2 new_points[3] = { points[i], points[(i + 1) % 3], vertices[new_vertex_idx].point };
-				if (is_triangle_degenerate(new_points, vertex_snap2)) {
-					continue;
-				}
-
-				Face2D new_face;
-				new_face.vertex_idx[0] = face.vertex_idx[i];
-				new_face.vertex_idx[1] = face.vertex_idx[(i + 1) % 3];
-				new_face.vertex_idx[2] = new_vertex_idx;
-				faces.push_back(new_face);
-			}
-			faces.remove_at(face_idx);
-
-			// No need to check other faces.
-			break;
-		}
-	}
-
-	return new_vertex_idx;
-}
-
-void CSGBrushOperation::Build2DFaces::insert(const CSGBrush &p_brush, int p_face_idx) {
-	// Find edge points that cross the plane and face points that are in the plane.
-	// Map those points to 2D.
-	// Create new faces from those points.
-
-	Vector2 points_2D[3];
-	int points_count = 0;
-
-	for (int i = 0; i < 3; i++) {
-		Vector3 point_3D = p_brush.faces[p_face_idx].vertices[i];
-
-		if (plane.has_point(point_3D)) {
-			// Point is in the plane, add it.
-			Vector3 point_2D = plane.project(point_3D);
-			point_2D = to_2D.xform(point_2D);
-			points_2D[points_count++] = Vector2(point_2D.x, point_2D.y);
-
-		} else {
-			Vector3 next_point_3D = p_brush.faces[p_face_idx].vertices[(i + 1) % 3];
-
-			if (plane.has_point(next_point_3D)) {
-				continue; // Next point is in plane, it will be added separately.
-			}
-			if (plane.is_point_over(point_3D) == plane.is_point_over(next_point_3D)) {
-				continue; // Both points on the same side of the plane, ignore.
-			}
-
-			// Edge crosses the plane, find and add the intersection point.
-			Vector3 point_2D;
-			if (plane.intersects_segment(point_3D, next_point_3D, &point_2D)) {
-				point_2D = to_2D.xform(point_2D);
-				points_2D[points_count++] = Vector2(point_2D.x, point_2D.y);
-			}
-		}
-	}
-
-	Vector<int> segment_indices;
-	Vector2 segment[2];
-	int inserted_index[3] = { -1, -1, -1 };
-
-	// Insert points.
-	for (int i = 0; i < points_count; ++i) {
-		inserted_index[i] = _insert_point(points_2D[i]);
-	}
-
-	if (points_count == 2) {
-		// Insert a single segment.
-		segment[0] = points_2D[0];
-		segment[1] = points_2D[1];
-		_find_edge_intersections(segment, segment_indices);
-		for (int i = 0; i < 2; ++i) {
-			_add_vertex_idx_sorted(segment_indices, inserted_index[i]);
-		}
-		_merge_faces(segment_indices);
-	}
-
-	if (points_count == 3) {
-		// Insert three segments.
-		for (int edge_idx = 0; edge_idx < 3; ++edge_idx) {
-			segment[0] = points_2D[edge_idx];
-			segment[1] = points_2D[(edge_idx + 1) % 3];
-			_find_edge_intersections(segment, segment_indices);
-			for (int i = 0; i < 2; ++i) {
-				_add_vertex_idx_sorted(segment_indices, inserted_index[(edge_idx + i) % 3]);
-			}
-			_merge_faces(segment_indices);
-			segment_indices.clear();
-		}
-	}
-}
-
-void CSGBrushOperation::Build2DFaces::addFacesToMesh(MeshMerge &r_mesh_merge, bool p_smooth, bool p_invert, const Ref<Material> &p_material, bool p_from_b) {
-	for (int face_idx = 0; face_idx < faces.size(); ++face_idx) {
-		Face2D face = faces[face_idx];
-		Vertex2D fv[3] = {
-			vertices[face.vertex_idx[0]],
-			vertices[face.vertex_idx[1]],
-			vertices[face.vertex_idx[2]]
-		};
-
-		// Convert 2D vertex points to 3D.
-		Vector3 points_3D[3];
-		Vector2 uvs[3];
-		for (int i = 0; i < 3; ++i) {
-			Vector3 point_2D(fv[i].point.x, fv[i].point.y, 0);
-			points_3D[i] = to_3D.xform(point_2D);
-			uvs[i] = fv[i].uv;
-		}
-
-		r_mesh_merge.add_face(points_3D, uvs, p_smooth, p_invert, p_material, p_from_b);
-	}
-}
-
-CSGBrushOperation::Build2DFaces::Build2DFaces(const CSGBrush &p_brush, int p_face_idx, float p_vertex_snap2) :
-		vertex_snap2(p_vertex_snap2 * p_vertex_snap2) {
-	// Convert 3D vertex points to 2D.
-	Vector3 points_3D[3] = {
-		p_brush.faces[p_face_idx].vertices[0],
-		p_brush.faces[p_face_idx].vertices[1],
-		p_brush.faces[p_face_idx].vertices[2],
-	};
-
-	plane = Plane(points_3D[0], points_3D[1], points_3D[2]);
-	to_3D.origin = points_3D[0];
-	to_3D.basis.set_column(2, plane.normal);
-	to_3D.basis.set_column(0, (points_3D[1] - points_3D[2]).normalized());
-	to_3D.basis.set_column(1, to_3D.basis.get_column(0).cross(to_3D.basis.get_column(2)).normalized());
-	to_2D = to_3D.affine_inverse();
-
-	Face2D face;
-	for (int i = 0; i < 3; i++) {
-		Vertex2D vertex;
-		Vector3 point_2D = to_2D.xform(points_3D[i]);
-		vertex.point.x = point_2D.x;
-		vertex.point.y = point_2D.y;
-		vertex.uv = p_brush.faces[p_face_idx].uvs[i];
-		vertices.push_back(vertex);
-		face.vertex_idx[i] = i;
-	}
-	faces.push_back(face);
-}
-
-void CSGBrushOperation::update_faces(const CSGBrush &p_brush_a, const int p_face_idx_a, const CSGBrush &p_brush_b, const int p_face_idx_b, Build2DFaceCollection &p_collection, float p_vertex_snap) {
-	Vector3 vertices_a[3] = {
-		p_brush_a.faces[p_face_idx_a].vertices[0],
-		p_brush_a.faces[p_face_idx_a].vertices[1],
-		p_brush_a.faces[p_face_idx_a].vertices[2],
-	};
-
-	Vector3 vertices_b[3] = {
-		p_brush_b.faces[p_face_idx_b].vertices[0],
-		p_brush_b.faces[p_face_idx_b].vertices[1],
-		p_brush_b.faces[p_face_idx_b].vertices[2],
-	};
-
-	// Don't use degenerate faces.
-	bool has_degenerate = false;
-	if (is_snapable(vertices_a[0], vertices_a[1], p_vertex_snap) ||
-			is_snapable(vertices_a[0], vertices_a[2], p_vertex_snap) ||
-			is_snapable(vertices_a[1], vertices_a[2], p_vertex_snap)) {
-		p_collection.build2DFacesA[p_face_idx_a] = Build2DFaces();
-		has_degenerate = true;
-	}
-
-	if (is_snapable(vertices_b[0], vertices_b[1], p_vertex_snap) ||
-			is_snapable(vertices_b[0], vertices_b[2], p_vertex_snap) ||
-			is_snapable(vertices_b[1], vertices_b[2], p_vertex_snap)) {
-		p_collection.build2DFacesB[p_face_idx_b] = Build2DFaces();
-		has_degenerate = true;
-	}
-	if (has_degenerate) {
-		return;
-	}
-
-	// Ensure B has points either side of or in the plane of A.
-	int over_count = 0, under_count = 0;
-	Plane plane_a(vertices_a[0], vertices_a[1], vertices_a[2]);
-	ERR_FAIL_COND_MSG(plane_a.normal == Vector3(), "Couldn't form plane from Brush A face.");
-
-	for (int i = 0; i < 3; i++) {
-		if (plane_a.has_point(vertices_b[i])) {
-			// In plane.
-		} else if (plane_a.is_point_over(vertices_b[i])) {
-			over_count++;
-		} else {
-			under_count++;
-		}
-	}
-	// If all points under or over the plane, there is no intersection.
-	if (over_count == 3 || under_count == 3) {
-		return;
-	}
-
-	// Ensure A has points either side of or in the plane of B.
-	over_count = 0;
-	under_count = 0;
-	Plane plane_b(vertices_b[0], vertices_b[1], vertices_b[2]);
-	ERR_FAIL_COND_MSG(plane_b.normal == Vector3(), "Couldn't form plane from Brush B face.");
-
-	for (int i = 0; i < 3; i++) {
-		if (plane_b.has_point(vertices_a[i])) {
-			// In plane.
-		} else if (plane_b.is_point_over(vertices_a[i])) {
-			over_count++;
-		} else {
-			under_count++;
-		}
-	}
-	// If all points under or over the plane, there is no intersection.
-	if (over_count == 3 || under_count == 3) {
-		return;
-	}
-
-	// Check for intersection using the SAT theorem.
-	{
-		// Edge pair cross product combinations.
-		for (int i = 0; i < 3; i++) {
-			Vector3 axis_a = (vertices_a[i] - vertices_a[(i + 1) % 3]).normalized();
-
-			for (int j = 0; j < 3; j++) {
-				Vector3 axis_b = (vertices_b[j] - vertices_b[(j + 1) % 3]).normalized();
-
-				Vector3 sep_axis = axis_a.cross(axis_b);
-				if (sep_axis == Vector3()) {
-					continue; //colineal
-				}
-				sep_axis.normalize();
-
-				real_t min_a = 1e20, max_a = -1e20;
-				real_t min_b = 1e20, max_b = -1e20;
-
-				for (int k = 0; k < 3; k++) {
-					real_t d = sep_axis.dot(vertices_a[k]);
-					min_a = MIN(min_a, d);
-					max_a = MAX(max_a, d);
-					d = sep_axis.dot(vertices_b[k]);
-					min_b = MIN(min_b, d);
-					max_b = MAX(max_b, d);
-				}
-
-				min_b -= (max_a - min_a) * 0.5;
-				max_b += (max_a - min_a) * 0.5;
-
-				real_t dmin = min_b - (min_a + max_a) * 0.5;
-				real_t dmax = max_b - (min_a + max_a) * 0.5;
-
-				if (dmin > CMP_EPSILON || dmax < -CMP_EPSILON) {
-					return; // Does not contain zero, so they don't overlap.
-				}
-			}
-		}
-	}
-
-	// If we're still here, the faces probably intersect, so add new faces.
-	if (!p_collection.build2DFacesA.has(p_face_idx_a)) {
-		p_collection.build2DFacesA[p_face_idx_a] = Build2DFaces(p_brush_a, p_face_idx_a, p_vertex_snap);
-	}
-	p_collection.build2DFacesA[p_face_idx_a].insert(p_brush_b, p_face_idx_b);
-
-	if (!p_collection.build2DFacesB.has(p_face_idx_b)) {
-		p_collection.build2DFacesB[p_face_idx_b] = Build2DFaces(p_brush_b, p_face_idx_b, p_vertex_snap);
-	}
-	p_collection.build2DFacesB[p_face_idx_b].insert(p_brush_a, p_face_idx_a);
-}

+ 8 - 140
modules/csg/csg.h

@@ -55,150 +55,18 @@ struct CSGBrush {
 	Vector<Face> faces;
 	Vector<Ref<Material>> materials;
 
-	inline void _regen_face_aabbs();
+	inline void _regen_face_aabbs() {
+		for (int i = 0; i < faces.size(); i++) {
+			faces.write[i].aabb = AABB();
+			faces.write[i].aabb.position = faces[i].vertices[0];
+			faces.write[i].aabb.expand_to(faces[i].vertices[1]);
+			faces.write[i].aabb.expand_to(faces[i].vertices[2]);
+		}
+	}
 
 	// Create a brush from faces.
 	void build_from_faces(const Vector<Vector3> &p_vertices, const Vector<Vector2> &p_uvs, const Vector<bool> &p_smooth, const Vector<Ref<Material>> &p_materials, const Vector<bool> &p_invert_faces);
 	void copy_from(const CSGBrush &p_brush, const Transform3D &p_xform);
 };
 
-struct CSGBrushOperation {
-	enum Operation {
-		OPERATION_UNION,
-		OPERATION_INTERSECTION,
-		OPERATION_SUBTRACTION,
-	};
-
-	void merge_brushes(Operation p_operation, const CSGBrush &p_brush_a, const CSGBrush &p_brush_b, CSGBrush &r_merged_brush, float p_vertex_snap);
-
-	struct MeshMerge {
-		struct Face {
-			bool from_b = false;
-			bool inside = false;
-			int points[3] = {};
-			Vector2 uvs[3];
-			bool smooth = false;
-			bool invert = false;
-			int material_idx = 0;
-		};
-
-		struct FaceBVH {
-			int face = 0;
-			int left = 0;
-			int right = 0;
-			int next = 0;
-			Vector3 center;
-			AABB aabb;
-		};
-
-		struct FaceBVHCmpX {
-			_FORCE_INLINE_ bool operator()(const FaceBVH *p_left, const FaceBVH *p_right) const {
-				return p_left->center.x < p_right->center.x;
-			}
-		};
-
-		struct FaceBVHCmpY {
-			_FORCE_INLINE_ bool operator()(const FaceBVH *p_left, const FaceBVH *p_right) const {
-				return p_left->center.y < p_right->center.y;
-			}
-		};
-		struct FaceBVHCmpZ {
-			_FORCE_INLINE_ bool operator()(const FaceBVH *p_left, const FaceBVH *p_right) const {
-				return p_left->center.z < p_right->center.z;
-			}
-		};
-
-		struct VertexKey {
-			int32_t x, y, z;
-			_FORCE_INLINE_ bool operator<(const VertexKey &p_key) const {
-				if (x == p_key.x) {
-					if (y == p_key.y) {
-						return z < p_key.z;
-					} else {
-						return y < p_key.y;
-					}
-				} else {
-					return x < p_key.x;
-				}
-			}
-
-			_FORCE_INLINE_ bool operator==(const VertexKey &p_key) const {
-				return (x == p_key.x && y == p_key.y && z == p_key.z);
-			}
-		};
-
-		struct VertexKeyHash {
-			static _FORCE_INLINE_ uint32_t hash(const VertexKey &p_vk) {
-				uint32_t h = hash_murmur3_one_32(p_vk.x);
-				h = hash_murmur3_one_32(p_vk.y, h);
-				h = hash_murmur3_one_32(p_vk.z, h);
-				return h;
-			}
-		};
-		struct Intersection {
-			bool found = false;
-			real_t conormal = FLT_MAX;
-			real_t distance_squared = FLT_MAX;
-			real_t origin_angle = FLT_MAX;
-		};
-
-		struct IntersectionDistance {
-			bool is_conormal;
-			real_t distance_squared;
-		};
-
-		Vector<Vector3> points;
-		Vector<Face> faces;
-		HashMap<Ref<Material>, int> materials;
-		HashMap<Vector3, int> vertex_map;
-		OAHashMap<VertexKey, int, VertexKeyHash> snap_cache;
-		float vertex_snap = 0.0;
-
-		inline void _add_distance(List<IntersectionDistance> &r_intersectionsA, List<IntersectionDistance> &r_intersectionsB, bool p_from_B, real_t p_distance, bool p_is_conormal) const;
-		inline bool _bvh_inside(FaceBVH *r_facebvhptr, int p_max_depth, int p_bvh_first, int p_face_idx) const;
-		inline int _create_bvh(FaceBVH *r_facebvhptr, FaceBVH **r_facebvhptrptr, int p_from, int p_size, int p_depth, int &r_max_depth, int &r_max_alloc);
-
-		void add_face(const Vector3 p_points[3], const Vector2 p_uvs[3], bool p_smooth, bool p_invert, const Ref<Material> &p_material, bool p_from_b);
-		void mark_inside_faces();
-	};
-
-	struct Build2DFaces {
-		struct Vertex2D {
-			Vector2 point;
-			Vector2 uv;
-		};
-
-		struct Face2D {
-			int vertex_idx[3] = {};
-		};
-
-		Vector<Vertex2D> vertices;
-		Vector<Face2D> faces;
-		Plane plane;
-		Transform3D to_2D;
-		Transform3D to_3D;
-		float vertex_snap2 = 0.0;
-
-		inline int _get_point_idx(const Vector2 &p_point);
-		inline int _add_vertex(const Vertex2D &p_vertex);
-		inline void _add_vertex_idx_sorted(Vector<int> &r_vertex_indices, int p_new_vertex_index);
-		inline void _merge_faces(const Vector<int> &p_segment_indices);
-		inline void _find_edge_intersections(const Vector2 p_segment_points[2], Vector<int> &r_segment_indices);
-		inline int _insert_point(const Vector2 &p_point);
-
-		void insert(const CSGBrush &p_brush, int p_brush_face);
-		void addFacesToMesh(MeshMerge &r_mesh_merge, bool p_smooth, bool p_invert, const Ref<Material> &p_material, bool p_from_b);
-
-		Build2DFaces() {}
-		Build2DFaces(const CSGBrush &p_brush, int p_brush_face, float p_vertex_snap2);
-	};
-
-	struct Build2DFaceCollection {
-		HashMap<int, Build2DFaces> build2DFacesA;
-		HashMap<int, Build2DFaces> build2DFacesB;
-	};
-
-	void update_faces(const CSGBrush &p_brush_a, const int p_face_idx_a, const CSGBrush &p_brush_b, const int p_face_idx_b, Build2DFaceCollection &p_collection, float p_vertex_snap);
-};
-
 #endif // CSG_H

+ 195 - 58
modules/csg/csg_shape.cpp

@@ -32,6 +32,8 @@
 
 #include "core/math/geometry_2d.h"
 
+#include <manifold/manifold.h>
+
 void CSGShape3D::set_use_collision(bool p_enable) {
 	if (use_collision == p_enable) {
 		return;
@@ -167,78 +169,213 @@ void CSGShape3D::_make_dirty(bool p_parent_removing) {
 	dirty = true;
 }
 
-CSGBrush *CSGShape3D::_get_brush() {
-	if (dirty) {
-		if (brush) {
-			memdelete(brush);
+enum ManifoldProperty {
+	MANIFOLD_PROPERTY_POSITION_X = 0,
+	MANIFOLD_PROPERTY_POSITION_Y,
+	MANIFOLD_PROPERTY_POSITION_Z,
+	MANIFOLD_PROPERTY_INVERT,
+	MANIFOLD_PROPERTY_SMOOTH_GROUP,
+	MANIFOLD_PROPERTY_UV_X_0,
+	MANIFOLD_PROPERTY_UV_Y_0,
+	MANIFOLD_PROPERTY_MAX
+};
+
+static void _unpack_manifold(
+		const manifold::Manifold &p_manifold,
+		const HashMap<int32_t, Ref<Material>> &p_mesh_materials,
+		CSGBrush *r_mesh_merge) {
+	manifold::MeshGL64 mesh = p_manifold.GetMeshGL64();
+
+	constexpr int32_t order[3] = { 0, 2, 1 };
+
+	for (size_t run_i = 0; run_i < mesh.runIndex.size() - 1; run_i++) {
+		uint32_t original_id = -1;
+		if (run_i < mesh.runOriginalID.size()) {
+			original_id = mesh.runOriginalID[run_i];
 		}
-		brush = nullptr;
 
-		CSGBrush *n = _build_brush();
+		Ref<Material> material;
+		if (p_mesh_materials.has(original_id)) {
+			material = p_mesh_materials[original_id];
+		}
+		// Find or reserve a material ID in the brush.
+		int32_t material_id = r_mesh_merge->materials.find(material);
+		if (material_id == -1) {
+			material_id = r_mesh_merge->materials.size();
+			r_mesh_merge->materials.push_back(material);
+		}
 
-		for (int i = 0; i < get_child_count(); i++) {
-			CSGShape3D *child = Object::cast_to<CSGShape3D>(get_child(i));
-			if (!child) {
-				continue;
-			}
-			if (!child->is_visible()) {
-				continue;
+		size_t begin = mesh.runIndex[run_i];
+		size_t end = mesh.runIndex[run_i + 1];
+		for (size_t vert_i = begin; vert_i < end; vert_i += 3) {
+			CSGBrush::Face face;
+			face.material = material_id;
+			int32_t first_property_index = mesh.triVerts[vert_i + order[0]];
+			face.smooth = mesh.vertProperties[first_property_index * mesh.numProp + MANIFOLD_PROPERTY_SMOOTH_GROUP] > 0.5f;
+			face.invert = mesh.vertProperties[first_property_index * mesh.numProp + MANIFOLD_PROPERTY_INVERT] > 0.5f;
+
+			for (int32_t tri_order_i = 0; tri_order_i < 3; tri_order_i++) {
+				int32_t property_i = mesh.triVerts[vert_i + order[tri_order_i]];
+				ERR_FAIL_COND_MSG(property_i * mesh.numProp >= mesh.vertProperties.size(), "Invalid index into vertex properties");
+				face.vertices[tri_order_i] = Vector3(
+						mesh.vertProperties[property_i * mesh.numProp + MANIFOLD_PROPERTY_POSITION_X],
+						mesh.vertProperties[property_i * mesh.numProp + MANIFOLD_PROPERTY_POSITION_Y],
+						mesh.vertProperties[property_i * mesh.numProp + MANIFOLD_PROPERTY_POSITION_Z]);
+				face.uvs[tri_order_i] = Vector2(
+						mesh.vertProperties[property_i * mesh.numProp + MANIFOLD_PROPERTY_UV_X_0],
+						mesh.vertProperties[property_i * mesh.numProp + MANIFOLD_PROPERTY_UV_Y_0]);
 			}
+			r_mesh_merge->faces.push_back(face);
+		}
+	}
 
-			CSGBrush *n2 = child->_get_brush();
-			if (!n2) {
-				continue;
-			}
-			if (!n) {
-				n = memnew(CSGBrush);
+	r_mesh_merge->_regen_face_aabbs();
+}
 
-				n->copy_from(*n2, child->get_transform());
+static void _pack_manifold(
+		const CSGBrush *const p_mesh_merge,
+		manifold::Manifold &r_manifold,
+		HashMap<int32_t, Ref<Material>> &p_mesh_materials,
+		float p_snap) {
+	ERR_FAIL_NULL_MSG(p_mesh_merge, "p_mesh_merge is null");
 
-			} else {
-				CSGBrush *nn = memnew(CSGBrush);
-				CSGBrush *nn2 = memnew(CSGBrush);
-				nn2->copy_from(*n2, child->get_transform());
-
-				CSGBrushOperation bop;
-
-				switch (child->get_operation()) {
-					case CSGShape3D::OPERATION_UNION:
-						bop.merge_brushes(CSGBrushOperation::OPERATION_UNION, *n, *nn2, *nn, snap);
-						break;
-					case CSGShape3D::OPERATION_INTERSECTION:
-						bop.merge_brushes(CSGBrushOperation::OPERATION_INTERSECTION, *n, *nn2, *nn, snap);
-						break;
-					case CSGShape3D::OPERATION_SUBTRACTION:
-						bop.merge_brushes(CSGBrushOperation::OPERATION_SUBTRACTION, *n, *nn2, *nn, snap);
-						break;
-				}
-				memdelete(n);
-				memdelete(nn2);
-				n = nn;
-			}
+	HashMap<uint32_t, Vector<CSGBrush::Face>> faces_by_material;
+	for (int face_i = 0; face_i < p_mesh_merge->faces.size(); face_i++) {
+		const CSGBrush::Face &face = p_mesh_merge->faces[face_i];
+		faces_by_material[face.material].push_back(face);
+	}
+
+	manifold::MeshGL64 mesh;
+	mesh.tolerance = p_snap;
+	mesh.numProp = MANIFOLD_PROPERTY_MAX;
+	mesh.runOriginalID.reserve(faces_by_material.size());
+	mesh.runIndex.reserve(faces_by_material.size() + 1);
+	mesh.vertProperties.reserve(p_mesh_merge->faces.size() * 3 * MANIFOLD_PROPERTY_MAX);
+
+	// Make a run of triangles for each material.
+	for (const KeyValue<uint32_t, Vector<CSGBrush::Face>> &E : faces_by_material) {
+		const uint32_t material_id = E.key;
+		const Vector<CSGBrush::Face> &faces = E.value;
+		mesh.runIndex.push_back(mesh.triVerts.size());
+
+		// Associate the material with an ID.
+		uint32_t reserved_id = r_manifold.ReserveIDs(1);
+		mesh.runOriginalID.push_back(reserved_id);
+		Ref<Material> material;
+		if (material_id < p_mesh_merge->materials.size()) {
+			material = p_mesh_merge->materials[material_id];
 		}
 
-		if (n) {
-			AABB aabb;
-			for (int i = 0; i < n->faces.size(); i++) {
-				for (int j = 0; j < 3; j++) {
-					if (i == 0 && j == 0) {
-						aabb.position = n->faces[i].vertices[j];
-					} else {
-						aabb.expand_to(n->faces[i].vertices[j]);
-					}
-				}
+		p_mesh_materials.insert(reserved_id, material);
+		for (const CSGBrush::Face &face : faces) {
+			for (int32_t tri_order_i = 0; tri_order_i < 3; tri_order_i++) {
+				constexpr int32_t order[3] = { 0, 2, 1 };
+				int i = order[tri_order_i];
+
+				mesh.triVerts.push_back(mesh.vertProperties.size() / MANIFOLD_PROPERTY_MAX);
+
+				size_t begin = mesh.vertProperties.size();
+				mesh.vertProperties.resize(mesh.vertProperties.size() + MANIFOLD_PROPERTY_MAX);
+				// Add the vertex properties.
+				// Use CSGBrush constants rather than push_back for clarity.
+				double *vert = &mesh.vertProperties[begin];
+				vert[MANIFOLD_PROPERTY_POSITION_X] = face.vertices[i].x;
+				vert[MANIFOLD_PROPERTY_POSITION_Y] = face.vertices[i].y;
+				vert[MANIFOLD_PROPERTY_POSITION_Z] = face.vertices[i].z;
+				vert[MANIFOLD_PROPERTY_UV_X_0] = face.uvs[i].x;
+				vert[MANIFOLD_PROPERTY_UV_Y_0] = face.uvs[i].y;
+				vert[MANIFOLD_PROPERTY_SMOOTH_GROUP] = face.smooth ? 1.0f : 0.0f;
+				vert[MANIFOLD_PROPERTY_INVERT] = face.invert ? 1.0f : 0.0f;
 			}
-			node_aabb = aabb;
-		} else {
-			node_aabb = AABB();
 		}
+	}
+	// runIndex needs an explicit end value.
+	mesh.runIndex.push_back(mesh.triVerts.size());
+	ERR_FAIL_COND_MSG(mesh.vertProperties.size() % mesh.numProp != 0, "Invalid vertex properties size.");
+	mesh.Merge();
+	r_manifold = manifold::Manifold(mesh);
+	manifold::Manifold::Error err = r_manifold.Status();
+	if (err != manifold::Manifold::Error::NoError) {
+		print_error(String("Manifold creation from mesh failed:" + itos((int)err)));
+	}
+}
 
-		brush = n;
-
-		dirty = false;
+struct ManifoldOperation {
+	manifold::Manifold manifold;
+	manifold::OpType operation;
+	static manifold::OpType convert_csg_op(CSGShape3D::Operation op) {
+		switch (op) {
+			case CSGShape3D::OPERATION_SUBTRACTION:
+				return manifold::OpType::Subtract;
+			case CSGShape3D::OPERATION_INTERSECTION:
+				return manifold::OpType::Intersect;
+			default:
+				return manifold::OpType::Add;
+		}
 	}
+	ManifoldOperation() :
+			operation(manifold::OpType::Add) {}
+	ManifoldOperation(const manifold::Manifold &m, manifold::OpType op) :
+			manifold(m), operation(op) {}
+};
 
+CSGBrush *CSGShape3D::_get_brush() {
+	if (!dirty) {
+		return brush;
+	}
+	if (brush) {
+		memdelete(brush);
+	}
+	brush = nullptr;
+	CSGBrush *n = _build_brush();
+	HashMap<int32_t, Ref<Material>> mesh_materials;
+	manifold::Manifold root_manifold;
+	_pack_manifold(n, root_manifold, mesh_materials, get_snap());
+	manifold::OpType current_op = ManifoldOperation::convert_csg_op(get_operation());
+	std::vector<manifold::Manifold> manifolds;
+	manifolds.push_back(root_manifold);
+	for (int i = 0; i < get_child_count(); i++) {
+		CSGShape3D *child = Object::cast_to<CSGShape3D>(get_child(i));
+		if (!child || !child->is_visible()) {
+			continue;
+		}
+		CSGBrush *child_brush = child->_get_brush();
+		if (!child_brush) {
+			continue;
+		}
+		CSGBrush transformed_brush;
+		transformed_brush.copy_from(*child_brush, child->get_transform());
+		manifold::Manifold child_manifold;
+		_pack_manifold(&transformed_brush, child_manifold, mesh_materials, get_snap());
+		manifold::OpType child_operation = ManifoldOperation::convert_csg_op(child->get_operation());
+		if (child_operation != current_op) {
+			manifold::Manifold result = manifold::Manifold::BatchBoolean(manifolds, current_op);
+			manifolds.clear();
+			manifolds.push_back(result);
+			current_op = child_operation;
+		}
+		manifolds.push_back(child_manifold);
+	}
+	if (!manifolds.empty()) {
+		manifold::Manifold manifold_result = manifold::Manifold::BatchBoolean(manifolds, current_op);
+		if (n) {
+			memdelete(n);
+		}
+		n = memnew(CSGBrush);
+		_unpack_manifold(manifold_result, mesh_materials, n);
+	}
+	AABB aabb;
+	if (n && !n->faces.is_empty()) {
+		aabb.position = n->faces[0].vertices[0];
+		for (const CSGBrush::Face &face : n->faces) {
+			for (int i = 0; i < 3; ++i) {
+				aabb.expand_to(face.vertices[i]);
+			}
+		}
+	}
+	node_aabb = aabb;
+	brush = n;
+	dirty = false;
 	return brush;
 }
 

+ 12 - 0
thirdparty/README.md

@@ -546,6 +546,18 @@ Patch `godot-node-debug-fix.patch` workarounds shadowing of Godot's Node class
 in the MSVC debugger.
 
 
+## manifold
+
+- Upstream: https://github.com/elalish/manifold
+- Version: 3.0.0 (5d127e57fbfb89225a8e905d0d914ccc86c139c8, 2024)
+- License: Apache 2.0
+
+File extracted from upstream source:
+
+- `src/`
+- `AUTHORS`, `LICENSE`
+
+
 ## mbedtls
 
 - Upstream: https://github.com/Mbed-TLS/mbedtls

+ 10 - 0
thirdparty/manifold/AUTHORS

@@ -0,0 +1,10 @@
+# This is the list of Manifold's significant contributors.
+#
+# This does not necessarily list everyone who has contributed code,
+# especially since many employees of one corporation may be contributing.
+# To see the full list of contributors, see the revision history in
+# source control.
+Emmett Lalish <elalish>
+Chun Kit LAM <pca006132>
+Geoff deRosenroll <geoffder>
+Google LLC

+ 201 - 0
thirdparty/manifold/LICENSE

@@ -0,0 +1,201 @@
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

+ 650 - 0
thirdparty/manifold/include/manifold/common.h

@@ -0,0 +1,650 @@
+// Copyright 2021 The Manifold Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include <limits>
+#include <vector>
+
+#ifdef MANIFOLD_DEBUG
+#include <chrono>
+#endif
+
+#include "manifold/linalg.h"
+
+namespace manifold {
+/** @addtogroup Math
+ * @ingroup Core
+ * @brief Simple math operations.
+ * */
+
+/** @addtogroup LinAlg
+ *  @{
+ */
+namespace la = linalg;
+using vec2 = la::vec<double, 2>;
+using vec3 = la::vec<double, 3>;
+using vec4 = la::vec<double, 4>;
+using bvec4 = la::vec<bool, 4>;
+using mat2 = la::mat<double, 2, 2>;
+using mat3x2 = la::mat<double, 3, 2>;
+using mat4x2 = la::mat<double, 4, 2>;
+using mat2x3 = la::mat<double, 2, 3>;
+using mat3 = la::mat<double, 3, 3>;
+using mat4x3 = la::mat<double, 4, 3>;
+using mat3x4 = la::mat<double, 3, 4>;
+using mat4 = la::mat<double, 4, 4>;
+using ivec2 = la::vec<int, 2>;
+using ivec3 = la::vec<int, 3>;
+using ivec4 = la::vec<int, 4>;
+using quat = la::vec<double, 4>;
+/** @} */
+
+/** @addtogroup Scalar
+ * @ingroup Math
+ *  @brief Simple scalar operations.
+ *  @{
+ */
+
+constexpr double kPi = 3.14159265358979323846264338327950288;
+constexpr double kTwoPi = 6.28318530717958647692528676655900576;
+constexpr double kHalfPi = 1.57079632679489661923132169163975144;
+
+/**
+ * Convert degrees to radians.
+ *
+ * @param a Angle in degrees.
+ */
+constexpr double radians(double a) { return a * kPi / 180; }
+
+/**
+ * Convert radians to degrees.
+ *
+ * @param a Angle in radians.
+ */
+constexpr double degrees(double a) { return a * 180 / kPi; }
+
+/**
+ * Performs smooth Hermite interpolation between 0 and 1 when edge0 < x < edge1.
+ *
+ * @param edge0 Specifies the value of the lower edge of the Hermite function.
+ * @param edge1 Specifies the value of the upper edge of the Hermite function.
+ * @param a Specifies the source value for interpolation.
+ */
+constexpr double smoothstep(double edge0, double edge1, double a) {
+  const double x = la::clamp((a - edge0) / (edge1 - edge0), 0, 1);
+  return x * x * (3 - 2 * x);
+}
+
+/**
+ * Sine function where multiples of 90 degrees come out exact.
+ *
+ * @param x Angle in degrees.
+ */
+inline double sind(double x) {
+  if (!la::isfinite(x)) return sin(x);
+  if (x < 0.0) return -sind(-x);
+  int quo;
+  x = remquo(fabs(x), 90.0, &quo);
+  switch (quo % 4) {
+    case 0:
+      return sin(radians(x));
+    case 1:
+      return cos(radians(x));
+    case 2:
+      return -sin(radians(x));
+    case 3:
+      return -cos(radians(x));
+  }
+  return 0.0;
+}
+
+/**
+ * Cosine function where multiples of 90 degrees come out exact.
+ *
+ * @param x Angle in degrees.
+ */
+inline double cosd(double x) { return sind(x + 90.0); }
+/** @} */
+
+/** @addtogroup Structs
+ * @ingroup Core
+ * @brief Miscellaneous data structures for interfacing with this library.
+ *  @{
+ */
+
+/**
+ * @brief Single polygon contour, wound CCW. First and last point are implicitly
+ * connected. Should ensure all input is
+ * [&epsilon;-valid](https://github.com/elalish/manifold/wiki/Manifold-Library#definition-of-%CE%B5-valid).
+ */
+using SimplePolygon = std::vector<vec2>;
+
+/**
+ * @brief Set of polygons with holes. Order of contours is arbitrary. Can
+ * contain any depth of nested holes and any number of separate polygons. Should
+ * ensure all input is
+ * [&epsilon;-valid](https://github.com/elalish/manifold/wiki/Manifold-Library#definition-of-%CE%B5-valid).
+ */
+using Polygons = std::vector<SimplePolygon>;
+
+/**
+ * @brief Defines which edges to sharpen and how much for the Manifold.Smooth()
+ * constructor.
+ */
+struct Smoothness {
+  /// The halfedge index = 3 * tri + i, referring to Mesh.triVerts[tri][i].
+  size_t halfedge;
+  /// A value between 0 and 1, where 0 is sharp and 1 is the default and the
+  /// curvature is interpolated between these values. The two paired halfedges
+  /// can have different values while maintaining C-1 continuity (except for 0).
+  double smoothness;
+};
+
+/**
+ * @brief Axis-aligned 3D box, primarily for bounding.
+ */
+struct Box {
+  vec3 min = vec3(std::numeric_limits<double>::infinity());
+  vec3 max = vec3(-std::numeric_limits<double>::infinity());
+
+  /**
+   * Default constructor is an infinite box that contains all space.
+   */
+  constexpr Box() {}
+
+  /**
+   * Creates a box that contains the two given points.
+   */
+  constexpr Box(const vec3 p1, const vec3 p2) {
+    min = la::min(p1, p2);
+    max = la::max(p1, p2);
+  }
+
+  /**
+   * Returns the dimensions of the Box.
+   */
+  constexpr vec3 Size() const { return max - min; }
+
+  /**
+   * Returns the center point of the Box.
+   */
+  constexpr vec3 Center() const { return 0.5 * (max + min); }
+
+  /**
+   * Returns the absolute-largest coordinate value of any contained
+   * point.
+   */
+  constexpr double Scale() const {
+    vec3 absMax = la::max(la::abs(min), la::abs(max));
+    return la::max(absMax.x, la::max(absMax.y, absMax.z));
+  }
+
+  /**
+   * Does this box contain (includes equal) the given point?
+   */
+  constexpr bool Contains(const vec3& p) const {
+    return la::all(la::gequal(p, min)) && la::all(la::gequal(max, p));
+  }
+
+  /**
+   * Does this box contain (includes equal) the given box?
+   */
+  constexpr bool Contains(const Box& box) const {
+    return la::all(la::gequal(box.min, min)) &&
+           la::all(la::gequal(max, box.max));
+  }
+
+  /**
+   * Expand this box to include the given point.
+   */
+  void Union(const vec3 p) {
+    min = la::min(min, p);
+    max = la::max(max, p);
+  }
+
+  /**
+   * Expand this box to include the given box.
+   */
+  constexpr Box Union(const Box& box) const {
+    Box out;
+    out.min = la::min(min, box.min);
+    out.max = la::max(max, box.max);
+    return out;
+  }
+
+  /**
+   * Transform the given box by the given axis-aligned affine transform.
+   *
+   * Ensure the transform passed in is axis-aligned (rotations are all
+   * multiples of 90 degrees), or else the resulting bounding box will no longer
+   * bound properly.
+   */
+  constexpr Box Transform(const mat3x4& transform) const {
+    Box out;
+    vec3 minT = transform * vec4(min, 1.0);
+    vec3 maxT = transform * vec4(max, 1.0);
+    out.min = la::min(minT, maxT);
+    out.max = la::max(minT, maxT);
+    return out;
+  }
+
+  /**
+   * Shift this box by the given vector.
+   */
+  constexpr Box operator+(vec3 shift) const {
+    Box out;
+    out.min = min + shift;
+    out.max = max + shift;
+    return out;
+  }
+
+  /**
+   * Shift this box in-place by the given vector.
+   */
+  Box& operator+=(vec3 shift) {
+    min += shift;
+    max += shift;
+    return *this;
+  }
+
+  /**
+   * Scale this box by the given vector.
+   */
+  constexpr Box operator*(vec3 scale) const {
+    Box out;
+    out.min = min * scale;
+    out.max = max * scale;
+    return out;
+  }
+
+  /**
+   * Scale this box in-place by the given vector.
+   */
+  Box& operator*=(vec3 scale) {
+    min *= scale;
+    max *= scale;
+    return *this;
+  }
+
+  /**
+   * Does this box overlap the one given (including equality)?
+   */
+  constexpr bool DoesOverlap(const Box& box) const {
+    return min.x <= box.max.x && min.y <= box.max.y && min.z <= box.max.z &&
+           max.x >= box.min.x && max.y >= box.min.y && max.z >= box.min.z;
+  }
+
+  /**
+   * Does the given point project within the XY extent of this box
+   * (including equality)?
+   */
+  constexpr bool DoesOverlap(vec3 p) const {  // projected in z
+    return p.x <= max.x && p.x >= min.x && p.y <= max.y && p.y >= min.y;
+  }
+
+  /**
+   * Does this box have finite bounds?
+   */
+  constexpr bool IsFinite() const {
+    return la::all(la::isfinite(min)) && la::all(la::isfinite(max));
+  }
+};
+
+/**
+ * @brief Axis-aligned 2D box, primarily for bounding.
+ */
+struct Rect {
+  vec2 min = vec2(std::numeric_limits<double>::infinity());
+  vec2 max = vec2(-std::numeric_limits<double>::infinity());
+
+  /**
+   * Default constructor is an empty rectangle..
+   */
+  constexpr Rect() {}
+
+  /**
+   * Create a rectangle that contains the two given points.
+   */
+  constexpr Rect(const vec2 a, const vec2 b) {
+    min = la::min(a, b);
+    max = la::max(a, b);
+  }
+
+  /** @name Information
+   *  Details of the rectangle
+   */
+  ///@{
+
+  /**
+   * Return the dimensions of the rectangle.
+   */
+  constexpr vec2 Size() const { return max - min; }
+
+  /**
+   * Return the area of the rectangle.
+   */
+  constexpr double Area() const {
+    auto sz = Size();
+    return sz.x * sz.y;
+  }
+
+  /**
+   * Returns the absolute-largest coordinate value of any contained
+   * point.
+   */
+  constexpr double Scale() const {
+    vec2 absMax = la::max(la::abs(min), la::abs(max));
+    return la::max(absMax.x, absMax.y);
+  }
+
+  /**
+   * Returns the center point of the rectangle.
+   */
+  constexpr vec2 Center() const { return 0.5 * (max + min); }
+
+  /**
+   * Does this rectangle contain (includes on border) the given point?
+   */
+  constexpr bool Contains(const vec2& p) const {
+    return la::all(la::gequal(p, min)) && la::all(la::gequal(max, p));
+  }
+
+  /**
+   * Does this rectangle contain (includes equal) the given rectangle?
+   */
+  constexpr bool Contains(const Rect& rect) const {
+    return la::all(la::gequal(rect.min, min)) &&
+           la::all(la::gequal(max, rect.max));
+  }
+
+  /**
+   * Does this rectangle overlap the one given (including equality)?
+   */
+  constexpr bool DoesOverlap(const Rect& rect) const {
+    return min.x <= rect.max.x && min.y <= rect.max.y && max.x >= rect.min.x &&
+           max.y >= rect.min.y;
+  }
+
+  /**
+   * Is the rectangle empty (containing no space)?
+   */
+  constexpr bool IsEmpty() const { return max.y <= min.y || max.x <= min.x; };
+
+  /**
+   * Does this recangle have finite bounds?
+   */
+  constexpr bool IsFinite() const {
+    return la::all(la::isfinite(min)) && la::all(la::isfinite(max));
+  }
+
+  ///@}
+
+  /** @name Modification
+   */
+  ///@{
+
+  /**
+   * Expand this rectangle (in place) to include the given point.
+   */
+  void Union(const vec2 p) {
+    min = la::min(min, p);
+    max = la::max(max, p);
+  }
+
+  /**
+   * Expand this rectangle to include the given Rect.
+   */
+  constexpr Rect Union(const Rect& rect) const {
+    Rect out;
+    out.min = la::min(min, rect.min);
+    out.max = la::max(max, rect.max);
+    return out;
+  }
+
+  /**
+   * Shift this rectangle by the given vector.
+   */
+  constexpr Rect operator+(const vec2 shift) const {
+    Rect out;
+    out.min = min + shift;
+    out.max = max + shift;
+    return out;
+  }
+
+  /**
+   * Shift this rectangle in-place by the given vector.
+   */
+  Rect& operator+=(const vec2 shift) {
+    min += shift;
+    max += shift;
+    return *this;
+  }
+
+  /**
+   * Scale this rectangle by the given vector.
+   */
+  constexpr Rect operator*(const vec2 scale) const {
+    Rect out;
+    out.min = min * scale;
+    out.max = max * scale;
+    return out;
+  }
+
+  /**
+   * Scale this rectangle in-place by the given vector.
+   */
+  Rect& operator*=(const vec2 scale) {
+    min *= scale;
+    max *= scale;
+    return *this;
+  }
+
+  /**
+   * Transform the rectangle by the given axis-aligned affine transform.
+   *
+   * Ensure the transform passed in is axis-aligned (rotations are all
+   * multiples of 90 degrees), or else the resulting rectangle will no longer
+   * bound properly.
+   */
+  constexpr Rect Transform(const mat2x3& m) const {
+    Rect rect;
+    rect.min = m * vec3(min, 1);
+    rect.max = m * vec3(max, 1);
+    return rect;
+  }
+  ///@}
+};
+
+/**
+ * @brief Boolean operation type: Add (Union), Subtract (Difference), and
+ * Intersect.
+ */
+enum class OpType { Add, Subtract, Intersect };
+
+constexpr int DEFAULT_SEGMENTS = 0;
+constexpr double DEFAULT_ANGLE = 10.0;
+constexpr double DEFAULT_LENGTH = 1.0;
+/**
+ * @brief These static properties control how circular shapes are quantized by
+ * default on construction.
+ *
+ * If circularSegments is specified, it takes
+ * precedence. If it is zero, then instead the minimum is used of the segments
+ * calculated based on edge length and angle, rounded up to the nearest
+ * multiple of four. To get numbers not divisible by four, circularSegments
+ * must be specified.
+ */
+class Quality {
+ private:
+  inline static int circularSegments_ = DEFAULT_SEGMENTS;
+  inline static double circularAngle_ = DEFAULT_ANGLE;
+  inline static double circularEdgeLength_ = DEFAULT_LENGTH;
+
+ public:
+  /**
+   * Sets an angle constraint the default number of circular segments for the
+   * CrossSection::Circle(), Manifold::Cylinder(), Manifold::Sphere(), and
+   * Manifold::Revolve() constructors. The number of segments will be rounded up
+   * to the nearest factor of four.
+   *
+   * @param angle The minimum angle in degrees between consecutive segments. The
+   * angle will increase if the the segments hit the minimum edge length.
+   * Default is 10 degrees.
+   */
+  static void SetMinCircularAngle(double angle) {
+    if (angle <= 0) return;
+    circularAngle_ = angle;
+  }
+
+  /**
+   * Sets a length constraint the default number of circular segments for the
+   * CrossSection::Circle(), Manifold::Cylinder(), Manifold::Sphere(), and
+   * Manifold::Revolve() constructors. The number of segments will be rounded up
+   * to the nearest factor of four.
+   *
+   * @param length The minimum length of segments. The length will
+   * increase if the the segments hit the minimum angle. Default is 1.0.
+   */
+  static void SetMinCircularEdgeLength(double length) {
+    if (length <= 0) return;
+    circularEdgeLength_ = length;
+  }
+
+  /**
+   * Sets the default number of circular segments for the
+   * CrossSection::Circle(), Manifold::Cylinder(), Manifold::Sphere(), and
+   * Manifold::Revolve() constructors. Overrides the edge length and angle
+   * constraints and sets the number of segments to exactly this value.
+   *
+   * @param number Number of circular segments. Default is 0, meaning no
+   * constraint is applied.
+   */
+  static void SetCircularSegments(int number) {
+    if (number < 3 && number != 0) return;
+    circularSegments_ = number;
+  }
+
+  /**
+   * Determine the result of the SetMinCircularAngle(),
+   * SetMinCircularEdgeLength(), and SetCircularSegments() defaults.
+   *
+   * @param radius For a given radius of circle, determine how many default
+   * segments there will be.
+   */
+  static int GetCircularSegments(double radius) {
+    if (circularSegments_ > 0) return circularSegments_;
+    int nSegA = 360.0 / circularAngle_;
+    int nSegL = 2.0 * radius * kPi / circularEdgeLength_;
+    int nSeg = fmin(nSegA, nSegL) + 3;
+    nSeg -= nSeg % 4;
+    return std::max(nSeg, 3);
+  }
+
+  /**
+   * Resets the circular construction parameters to their defaults if
+   * SetMinCircularAngle, SetMinCircularEdgeLength, or SetCircularSegments have
+   * been called.
+   */
+  static void ResetToDefaults() {
+    circularSegments_ = DEFAULT_SEGMENTS;
+    circularAngle_ = DEFAULT_ANGLE;
+    circularEdgeLength_ = DEFAULT_LENGTH;
+  }
+};
+/** @} */
+
+/** @addtogroup Debug
+ * @ingroup Optional
+ * @{
+ */
+
+/**
+ * @brief Global parameters that control debugging output. Only has an
+ * effect when compiled with the MANIFOLD_DEBUG flag.
+ */
+struct ExecutionParams {
+  /// Perform extra sanity checks and assertions on the intermediate data
+  /// structures.
+  bool intermediateChecks = false;
+  /// Verbose output primarily of the Boolean, including timing info and vector
+  /// sizes.
+  bool verbose = false;
+  /// If processOverlaps is false, a geometric check will be performed to assert
+  /// all triangles are CCW.
+  bool processOverlaps = true;
+  /// Suppresses printed errors regarding CW triangles. Has no effect if
+  /// processOverlaps is true.
+  bool suppressErrors = false;
+  /// Perform optional but recommended triangle cleanups in SimplifyTopology()
+  bool cleanupTriangles = true;
+};
+/** @} */
+
+#ifdef MANIFOLD_DEBUG
+inline std::ostream& operator<<(std::ostream& stream, const Box& box) {
+  return stream << "min: " << box.min << ", "
+                << "max: " << box.max;
+}
+
+inline std::ostream& operator<<(std::ostream& stream, const Rect& box) {
+  return stream << "min: " << box.min << ", "
+                << "max: " << box.max;
+}
+
+/**
+ * Print the contents of this vector to standard output. Only exists if compiled
+ * with MANIFOLD_DEBUG flag.
+ */
+template <typename T>
+void Dump(const std::vector<T>& vec) {
+  std::cout << "Vec = " << std::endl;
+  for (size_t i = 0; i < vec.size(); ++i) {
+    std::cout << i << ", " << vec[i] << ", " << std::endl;
+  }
+  std::cout << std::endl;
+}
+
+template <typename T>
+void Diff(const std::vector<T>& a, const std::vector<T>& b) {
+  std::cout << "Diff = " << std::endl;
+  if (a.size() != b.size()) {
+    std::cout << "a and b must have the same length, aborting Diff"
+              << std::endl;
+    return;
+  }
+  for (size_t i = 0; i < a.size(); ++i) {
+    if (a[i] != b[i])
+      std::cout << i << ": " << a[i] << ", " << b[i] << std::endl;
+  }
+  std::cout << std::endl;
+}
+
+struct Timer {
+  std::chrono::high_resolution_clock::time_point start, end;
+
+  void Start() { start = std::chrono::high_resolution_clock::now(); }
+
+  void Stop() { end = std::chrono::high_resolution_clock::now(); }
+
+  float Elapsed() {
+    return std::chrono::duration_cast<std::chrono::milliseconds>(end - start)
+        .count();
+  }
+  void Print(std::string message) {
+    std::cout << "----------- " << std::round(Elapsed()) << " ms for "
+              << message << std::endl;
+  }
+};
+#endif
+}  // namespace manifold

+ 314 - 0
thirdparty/manifold/include/manifold/iters.h

@@ -0,0 +1,314 @@
+// Copyright 2024 The Manifold Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+
+#include <iterator>
+#include <type_traits>
+
+namespace manifold {
+
+template <typename F, typename Iter>
+struct TransformIterator {
+ private:
+  Iter iter;
+  F f;
+
+ public:
+  using pointer = void;
+  using reference = std::invoke_result_t<
+      F, typename std::iterator_traits<std::remove_const_t<Iter>>::value_type>;
+  using difference_type =
+      typename std::iterator_traits<std::remove_const_t<Iter>>::difference_type;
+  using value_type = reference;
+  using iterator_category = typename std::iterator_traits<
+      std::remove_const_t<Iter>>::iterator_category;
+
+  constexpr TransformIterator(Iter iter, F f) : iter(iter), f(f) {}
+
+  TransformIterator& operator=(const TransformIterator& other) {
+    if (this == &other) return *this;
+    // don't copy function, should be the same
+    iter = other.iter;
+    return *this;
+  }
+
+  constexpr reference operator*() const { return f(*iter); }
+
+  constexpr reference operator[](size_t i) const { return f(iter[i]); }
+
+  // prefix increment
+  TransformIterator& operator++() {
+    iter += 1;
+    return *this;
+  }
+
+  // postfix
+  TransformIterator operator++(int) {
+    auto old = *this;
+    operator++();
+    return old;
+  }
+
+  // prefix increment
+  TransformIterator& operator--() {
+    iter -= 1;
+    return *this;
+  }
+
+  // postfix
+  TransformIterator operator--(int) {
+    auto old = *this;
+    operator--();
+    return old;
+  }
+
+  constexpr TransformIterator operator+(size_t n) const {
+    return TransformIterator(iter + n, f);
+  }
+
+  TransformIterator& operator+=(size_t n) {
+    iter += n;
+    return *this;
+  }
+
+  constexpr TransformIterator operator-(size_t n) const {
+    return TransformIterator(iter - n, f);
+  }
+
+  TransformIterator& operator-=(size_t n) {
+    iter -= n;
+    return *this;
+  }
+
+  constexpr bool operator==(TransformIterator other) const {
+    return iter == other.iter;
+  }
+
+  constexpr bool operator!=(TransformIterator other) const {
+    return !(iter == other.iter);
+  }
+
+  constexpr bool operator<(TransformIterator other) const {
+    return iter < other.iter;
+  }
+
+  constexpr difference_type operator-(TransformIterator other) const {
+    return iter - other.iter;
+  }
+
+  constexpr operator TransformIterator<F, const Iter>() const {
+    return TransformIterator(f, iter);
+  }
+};
+
+template <typename T>
+struct CountingIterator {
+ private:
+  T counter;
+
+ public:
+  using pointer = void;
+  using reference = T;
+  using difference_type = std::make_signed_t<T>;
+  using value_type = T;
+  using iterator_category = std::random_access_iterator_tag;
+
+  constexpr CountingIterator(T counter) : counter(counter) {}
+
+  constexpr value_type operator*() const { return counter; }
+  constexpr value_type operator[](T i) const { return counter + i; }
+
+  // prefix increment
+  CountingIterator& operator++() {
+    counter += 1;
+    return *this;
+  }
+
+  // postfix
+  CountingIterator operator++(int) {
+    auto old = *this;
+    operator++();
+    return old;
+  }
+
+  // prefix increment
+  CountingIterator& operator--() {
+    counter -= 1;
+    return *this;
+  }
+
+  // postfix
+  CountingIterator operator--(int) {
+    auto old = *this;
+    operator--();
+    return old;
+  }
+
+  constexpr CountingIterator operator+(T n) const {
+    return CountingIterator(counter + n);
+  }
+
+  CountingIterator& operator+=(T n) {
+    counter += n;
+    return *this;
+  }
+
+  constexpr CountingIterator operator-(T n) const {
+    return CountingIterator(counter - n);
+  }
+
+  CountingIterator& operator-=(T n) {
+    counter -= n;
+    return *this;
+  }
+
+  constexpr friend bool operator==(CountingIterator a, CountingIterator b) {
+    return a.counter == b.counter;
+  }
+
+  constexpr friend bool operator!=(CountingIterator a, CountingIterator b) {
+    return a.counter != b.counter;
+  }
+
+  constexpr friend bool operator<(CountingIterator a, CountingIterator b) {
+    return a.counter < b.counter;
+  }
+
+  constexpr friend difference_type operator-(CountingIterator a,
+                                             CountingIterator b) {
+    return a.counter - b.counter;
+  }
+
+  constexpr operator CountingIterator<const T>() const {
+    return CountingIterator(counter);
+  }
+};
+
+constexpr CountingIterator<size_t> countAt(size_t i) {
+  return CountingIterator(i);
+}
+
+template <typename Iter>
+struct StridedRange {
+ private:
+  struct StridedRangeIter {
+   private:
+    Iter iter;
+    size_t stride;
+
+   public:
+    using pointer =
+        typename std::iterator_traits<std::remove_const_t<Iter>>::pointer;
+    using reference =
+        typename std::iterator_traits<std::remove_const_t<Iter>>::reference;
+    using difference_type = typename std::iterator_traits<
+        std::remove_const_t<Iter>>::difference_type;
+    using value_type =
+        typename std::iterator_traits<std::remove_const_t<Iter>>::value_type;
+    using iterator_category = typename std::iterator_traits<
+        std::remove_const_t<Iter>>::iterator_category;
+
+    constexpr StridedRangeIter(Iter iter, int stride)
+        : iter(iter), stride(stride) {}
+
+    constexpr reference operator*() { return *iter; }
+
+    constexpr std::add_const_t<reference> operator*() const { return *iter; }
+
+    constexpr reference operator[](size_t i) { return iter[i * stride]; }
+
+    constexpr std::add_const_t<reference> operator[](size_t i) const {
+      return iter[i * stride];
+    }
+
+    // prefix increment
+    StridedRangeIter& operator++() {
+      iter += stride;
+      return *this;
+    }
+
+    // postfix
+    StridedRangeIter operator++(int) {
+      auto old = *this;
+      operator++();
+      return old;
+    }
+
+    // prefix increment
+    StridedRangeIter& operator--() {
+      iter -= stride;
+      return *this;
+    }
+
+    // postfix
+    StridedRangeIter operator--(int) {
+      auto old = *this;
+      operator--();
+      return old;
+    }
+
+    constexpr StridedRangeIter operator+(size_t n) const {
+      return StridedRangeIter(iter + n * stride, stride);
+    }
+
+    StridedRangeIter& operator+=(size_t n) {
+      iter += n * stride;
+      return *this;
+    }
+
+    constexpr StridedRangeIter operator-(size_t n) const {
+      return StridedRangeIter(iter - n * stride, stride);
+    }
+
+    StridedRangeIter& operator-=(size_t n) {
+      iter -= n * stride;
+      return *this;
+    }
+
+    constexpr friend bool operator==(StridedRangeIter a, StridedRangeIter b) {
+      return a.iter == b.iter;
+    }
+
+    constexpr friend bool operator!=(StridedRangeIter a, StridedRangeIter b) {
+      return !(a.iter == b.iter);
+    }
+
+    constexpr friend bool operator<(StridedRangeIter a, StridedRangeIter b) {
+      return a.iter < b.iter;
+    }
+
+    constexpr friend difference_type operator-(StridedRangeIter a,
+                                               StridedRangeIter b) {
+      // note that this is not well-defined if a.stride != b.stride...
+      return (a.iter - b.iter) / a.stride;
+    }
+  };
+  Iter _start, _end;
+  const size_t stride;
+
+ public:
+  constexpr StridedRange(Iter start, Iter end, size_t stride)
+      : _start(start), _end(end), stride(stride) {}
+
+  constexpr StridedRangeIter begin() const {
+    return StridedRangeIter(_start, stride);
+  }
+
+  constexpr StridedRangeIter end() const {
+    return StridedRangeIter(_start, stride) +
+           ((std::distance(_start, _end) + (stride - 1)) / stride);
+  }
+};
+
+}  // namespace manifold

+ 2601 - 0
thirdparty/manifold/include/manifold/linalg.h

@@ -0,0 +1,2601 @@
+// Copyright 2024 The Manifold Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Based on linalg.h - 2.2 - Single-header public domain linear algebra library
+//
+// The intent of this library is to provide the bulk of the functionality
+// you need to write programs that frequently use small, fixed-size vectors
+// and matrices, in domains such as computational geometry or computer
+// graphics. It strives for terse, readable source code.
+//
+// The original author of this software is Sterling Orsten, and its permanent
+// home is <http://github.com/sgorsten/linalg/>. If you find this software
+// useful, an acknowledgement in your source text and/or product documentation
+// is appreciated, but not required.
+//
+// The author acknowledges significant insights and contributions by:
+//     Stan Melax <http://github.com/melax/>
+//     Dimitri Diakopoulos <http://github.com/ddiakopoulos/>
+//
+// Some features are deprecated. Define LINALG_FORWARD_COMPATIBLE to remove
+// them.
+
+#pragma once
+#ifndef LINALG_H
+#define LINALG_H
+
+#include <array>        // For std::array
+#include <cmath>        // For various unary math functions, such as std::sqrt
+#include <cstdint>      // For implementing namespace linalg::aliases
+#include <cstdlib>      // To resolve std::abs ambiguity on clang
+#include <functional>   // For std::hash declaration
+#include <iosfwd>       // For forward definitions of std::ostream
+#include <type_traits>  // For std::enable_if, std::is_same, std::declval
+
+#ifdef MANIFOLD_DEBUG
+#include <iomanip>
+#include <iostream>
+#endif
+
+// In Visual Studio 2015, `constexpr` applied to a member function implies
+// `const`, which causes ambiguous overload resolution
+#if defined(_MSC_VER) && (_MSC_VER <= 1900)
+#define LINALG_CONSTEXPR14
+#else
+#define LINALG_CONSTEXPR14 constexpr
+#endif
+
+namespace linalg {
+// Small, fixed-length vector type, consisting of exactly M elements of type T,
+// and presumed to be a column-vector unless otherwise noted.
+template <class T, int M>
+struct vec;
+
+// Small, fixed-size matrix type, consisting of exactly M rows and N columns of
+// type T, stored in column-major order.
+template <class T, int M, int N>
+struct mat;
+
+// Specialize converter<T,U> with a function application operator that converts
+// type U to type T to enable implicit conversions
+template <class T, class U>
+struct converter {};
+namespace detail {
+template <class T, class U>
+using conv_t = typename std::enable_if<!std::is_same<T, U>::value,
+                                       decltype(converter<T, U>{}(
+                                           std::declval<U>()))>::type;
+
+// Trait for retrieving scalar type of any linear algebra object
+template <class A>
+struct scalar_type {};
+template <class T, int M>
+struct scalar_type<vec<T, M>> {
+  using type = T;
+};
+template <class T, int M, int N>
+struct scalar_type<mat<T, M, N>> {
+  using type = T;
+};
+
+// Type returned by the compare(...) function which supports all six comparison
+// operators against 0
+template <class T>
+struct ord {
+  T a, b;
+};
+template <class T>
+constexpr bool operator==(const ord<T> &o, std::nullptr_t) {
+  return o.a == o.b;
+}
+template <class T>
+constexpr bool operator!=(const ord<T> &o, std::nullptr_t) {
+  return !(o.a == o.b);
+}
+template <class T>
+constexpr bool operator<(const ord<T> &o, std::nullptr_t) {
+  return o.a < o.b;
+}
+template <class T>
+constexpr bool operator>(const ord<T> &o, std::nullptr_t) {
+  return o.b < o.a;
+}
+template <class T>
+constexpr bool operator<=(const ord<T> &o, std::nullptr_t) {
+  return !(o.b < o.a);
+}
+template <class T>
+constexpr bool operator>=(const ord<T> &o, std::nullptr_t) {
+  return !(o.a < o.b);
+}
+
+// Patterns which can be used with the compare(...) function
+template <class A, class B>
+struct any_compare {};
+template <class T>
+struct any_compare<vec<T, 1>, vec<T, 1>> {
+  using type = ord<T>;
+  constexpr ord<T> operator()(const vec<T, 1> &a, const vec<T, 1> &b) const {
+    return ord<T>{a.x, b.x};
+  }
+};
+template <class T>
+struct any_compare<vec<T, 2>, vec<T, 2>> {
+  using type = ord<T>;
+  constexpr ord<T> operator()(const vec<T, 2> &a, const vec<T, 2> &b) const {
+    return !(a.x == b.x) ? ord<T>{a.x, b.x} : ord<T>{a.y, b.y};
+  }
+};
+template <class T>
+struct any_compare<vec<T, 3>, vec<T, 3>> {
+  using type = ord<T>;
+  constexpr ord<T> operator()(const vec<T, 3> &a, const vec<T, 3> &b) const {
+    return !(a.x == b.x)   ? ord<T>{a.x, b.x}
+           : !(a.y == b.y) ? ord<T>{a.y, b.y}
+                           : ord<T>{a.z, b.z};
+  }
+};
+template <class T>
+struct any_compare<vec<T, 4>, vec<T, 4>> {
+  using type = ord<T>;
+  constexpr ord<T> operator()(const vec<T, 4> &a, const vec<T, 4> &b) const {
+    return !(a.x == b.x)   ? ord<T>{a.x, b.x}
+           : !(a.y == b.y) ? ord<T>{a.y, b.y}
+           : !(a.z == b.z) ? ord<T>{a.z, b.z}
+                           : ord<T>{a.w, b.w};
+  }
+};
+template <class T, int M>
+struct any_compare<mat<T, M, 1>, mat<T, M, 1>> {
+  using type = ord<T>;
+  constexpr ord<T> operator()(const mat<T, M, 1> &a,
+                              const mat<T, M, 1> &b) const {
+    return compare(a.x, b.x);
+  }
+};
+template <class T, int M>
+struct any_compare<mat<T, M, 2>, mat<T, M, 2>> {
+  using type = ord<T>;
+  constexpr ord<T> operator()(const mat<T, M, 2> &a,
+                              const mat<T, M, 2> &b) const {
+    return a.x != b.x ? compare(a.x, b.x) : compare(a.y, b.y);
+  }
+};
+template <class T, int M>
+struct any_compare<mat<T, M, 3>, mat<T, M, 3>> {
+  using type = ord<T>;
+  constexpr ord<T> operator()(const mat<T, M, 3> &a,
+                              const mat<T, M, 3> &b) const {
+    return a.x != b.x   ? compare(a.x, b.x)
+           : a.y != b.y ? compare(a.y, b.y)
+                        : compare(a.z, b.z);
+  }
+};
+template <class T, int M>
+struct any_compare<mat<T, M, 4>, mat<T, M, 4>> {
+  using type = ord<T>;
+  constexpr ord<T> operator()(const mat<T, M, 4> &a,
+                              const mat<T, M, 4> &b) const {
+    return a.x != b.x   ? compare(a.x, b.x)
+           : a.y != b.y ? compare(a.y, b.y)
+           : a.z != b.z ? compare(a.z, b.z)
+                        : compare(a.w, b.w);
+  }
+};
+
+// Helper for compile-time index-based access to members of vector and matrix
+// types
+template <int I>
+struct getter;
+template <>
+struct getter<0> {
+  template <class A>
+  constexpr auto operator()(A &a) const -> decltype(a.x) {
+    return a.x;
+  }
+};
+template <>
+struct getter<1> {
+  template <class A>
+  constexpr auto operator()(A &a) const -> decltype(a.y) {
+    return a.y;
+  }
+};
+template <>
+struct getter<2> {
+  template <class A>
+  constexpr auto operator()(A &a) const -> decltype(a.z) {
+    return a.z;
+  }
+};
+template <>
+struct getter<3> {
+  template <class A>
+  constexpr auto operator()(A &a) const -> decltype(a.w) {
+    return a.w;
+  }
+};
+
+// Stand-in for std::integer_sequence/std::make_integer_sequence
+template <int... I>
+struct seq {};
+template <int A, int N>
+struct make_seq_impl;
+template <int A>
+struct make_seq_impl<A, 0> {
+  using type = seq<>;
+};
+template <int A>
+struct make_seq_impl<A, 1> {
+  using type = seq<A + 0>;
+};
+template <int A>
+struct make_seq_impl<A, 2> {
+  using type = seq<A + 0, A + 1>;
+};
+template <int A>
+struct make_seq_impl<A, 3> {
+  using type = seq<A + 0, A + 1, A + 2>;
+};
+template <int A>
+struct make_seq_impl<A, 4> {
+  using type = seq<A + 0, A + 1, A + 2, A + 3>;
+};
+template <int A, int B>
+using make_seq = typename make_seq_impl<A, B - A>::type;
+template <class T, int M, int... I>
+vec<T, sizeof...(I)> constexpr swizzle(const vec<T, M> &v, seq<I...> i) {
+  return {getter<I>{}(v)...};
+}
+template <class T, int M, int N, int... I, int... J>
+mat<T, sizeof...(I), sizeof...(J)> constexpr swizzle(const mat<T, M, N> &m,
+                                                     seq<I...> i, seq<J...> j) {
+  return {swizzle(getter<J>{}(m), i)...};
+}
+
+// SFINAE helpers to determine result of function application
+template <class F, class... T>
+using ret_t = decltype(std::declval<F>()(std::declval<T>()...));
+
+// SFINAE helper which is defined if all provided types are scalars
+struct empty {};
+template <class... T>
+struct scalars;
+template <>
+struct scalars<> {
+  using type = void;
+};
+template <class T, class... U>
+struct scalars<T, U...> : std::conditional<std::is_arithmetic<T>::value,
+                                           scalars<U...>, empty>::type {};
+template <class... T>
+using scalars_t = typename scalars<T...>::type;
+
+// Helpers which indicate how apply(F, ...) should be called for various
+// arguments
+template <class F, class Void, class... T>
+struct apply {};  // Patterns which contain only vectors or scalars
+template <class F, int M, class A>
+struct apply<F, scalars_t<>, vec<A, M>> {
+  using type = vec<ret_t<F, A>, M>;
+  enum { size = M, mm = 0 };
+  template <int... I>
+  static constexpr type impl(seq<I...>, F f, const vec<A, M> &a) {
+    return {f(getter<I>{}(a))...};
+  }
+};
+template <class F, int M, class A, class B>
+struct apply<F, scalars_t<>, vec<A, M>, vec<B, M>> {
+  using type = vec<ret_t<F, A, B>, M>;
+  enum { size = M, mm = 0 };
+  template <int... I>
+  static constexpr type impl(seq<I...>, F f, const vec<A, M> &a,
+                             const vec<B, M> &b) {
+    return {f(getter<I>{}(a), getter<I>{}(b))...};
+  }
+};
+template <class F, int M, class A, class B>
+struct apply<F, scalars_t<B>, vec<A, M>, B> {
+  using type = vec<ret_t<F, A, B>, M>;
+  enum { size = M, mm = 0 };
+  template <int... I>
+  static constexpr type impl(seq<I...>, F f, const vec<A, M> &a, B b) {
+    return {f(getter<I>{}(a), b)...};
+  }
+};
+template <class F, int M, class A, class B>
+struct apply<F, scalars_t<A>, A, vec<B, M>> {
+  using type = vec<ret_t<F, A, B>, M>;
+  enum { size = M, mm = 0 };
+  template <int... I>
+  static constexpr type impl(seq<I...>, F f, A a, const vec<B, M> &b) {
+    return {f(a, getter<I>{}(b))...};
+  }
+};
+template <class F, int M, class A, class B, class C>
+struct apply<F, scalars_t<>, vec<A, M>, vec<B, M>, vec<C, M>> {
+  using type = vec<ret_t<F, A, B, C>, M>;
+  enum { size = M, mm = 0 };
+  template <int... I>
+  static constexpr type impl(seq<I...>, F f, const vec<A, M> &a,
+                             const vec<B, M> &b, const vec<C, M> &c) {
+    return {f(getter<I>{}(a), getter<I>{}(b), getter<I>{}(c))...};
+  }
+};
+template <class F, int M, class A, class B, class C>
+struct apply<F, scalars_t<C>, vec<A, M>, vec<B, M>, C> {
+  using type = vec<ret_t<F, A, B, C>, M>;
+  enum { size = M, mm = 0 };
+  template <int... I>
+  static constexpr type impl(seq<I...>, F f, const vec<A, M> &a,
+                             const vec<B, M> &b, C c) {
+    return {f(getter<I>{}(a), getter<I>{}(b), c)...};
+  }
+};
+template <class F, int M, class A, class B, class C>
+struct apply<F, scalars_t<B>, vec<A, M>, B, vec<C, M>> {
+  using type = vec<ret_t<F, A, B, C>, M>;
+  enum { size = M, mm = 0 };
+  template <int... I>
+  static constexpr type impl(seq<I...>, F f, const vec<A, M> &a, B b,
+                             const vec<C, M> &c) {
+    return {f(getter<I>{}(a), b, getter<I>{}(c))...};
+  }
+};
+template <class F, int M, class A, class B, class C>
+struct apply<F, scalars_t<B, C>, vec<A, M>, B, C> {
+  using type = vec<ret_t<F, A, B, C>, M>;
+  enum { size = M, mm = 0 };
+  template <int... I>
+  static constexpr type impl(seq<I...>, F f, const vec<A, M> &a, B b, C c) {
+    return {f(getter<I>{}(a), b, c)...};
+  }
+};
+template <class F, int M, class A, class B, class C>
+struct apply<F, scalars_t<A>, A, vec<B, M>, vec<C, M>> {
+  using type = vec<ret_t<F, A, B, C>, M>;
+  enum { size = M, mm = 0 };
+  template <int... I>
+  static constexpr type impl(seq<I...>, F f, A a, const vec<B, M> &b,
+                             const vec<C, M> &c) {
+    return {f(a, getter<I>{}(b), getter<I>{}(c))...};
+  }
+};
+template <class F, int M, class A, class B, class C>
+struct apply<F, scalars_t<A, C>, A, vec<B, M>, C> {
+  using type = vec<ret_t<F, A, B, C>, M>;
+  enum { size = M, mm = 0 };
+  template <int... I>
+  static constexpr type impl(seq<I...>, F f, A a, const vec<B, M> &b, C c) {
+    return {f(a, getter<I>{}(b), c)...};
+  }
+};
+template <class F, int M, class A, class B, class C>
+struct apply<F, scalars_t<A, B>, A, B, vec<C, M>> {
+  using type = vec<ret_t<F, A, B, C>, M>;
+  enum { size = M, mm = 0 };
+  template <int... I>
+  static constexpr type impl(seq<I...>, F f, A a, B b, const vec<C, M> &c) {
+    return {f(a, b, getter<I>{}(c))...};
+  }
+};
+template <class F, int M, int N, class A>
+struct apply<F, scalars_t<>, mat<A, M, N>> {
+  using type = mat<ret_t<F, A>, M, N>;
+  enum { size = N, mm = 0 };
+  template <int... J>
+  static constexpr type impl(seq<J...>, F f, const mat<A, M, N> &a) {
+    return {apply<F, void, vec<A, M>>::impl(make_seq<0, M>{}, f,
+                                            getter<J>{}(a))...};
+  }
+};
+template <class F, int M, int N, class A, class B>
+struct apply<F, scalars_t<>, mat<A, M, N>, mat<B, M, N>> {
+  using type = mat<ret_t<F, A, B>, M, N>;
+  enum { size = N, mm = 1 };
+  template <int... J>
+  static constexpr type impl(seq<J...>, F f, const mat<A, M, N> &a,
+                             const mat<B, M, N> &b) {
+    return {apply<F, void, vec<A, M>, vec<B, M>>::impl(
+        make_seq<0, M>{}, f, getter<J>{}(a), getter<J>{}(b))...};
+  }
+};
+template <class F, int M, int N, class A, class B>
+struct apply<F, scalars_t<B>, mat<A, M, N>, B> {
+  using type = mat<ret_t<F, A, B>, M, N>;
+  enum { size = N, mm = 0 };
+  template <int... J>
+  static constexpr type impl(seq<J...>, F f, const mat<A, M, N> &a, B b) {
+    return {apply<F, void, vec<A, M>, B>::impl(make_seq<0, M>{}, f,
+                                               getter<J>{}(a), b)...};
+  }
+};
+template <class F, int M, int N, class A, class B>
+struct apply<F, scalars_t<A>, A, mat<B, M, N>> {
+  using type = mat<ret_t<F, A, B>, M, N>;
+  enum { size = N, mm = 0 };
+  template <int... J>
+  static constexpr type impl(seq<J...>, F f, A a, const mat<B, M, N> &b) {
+    return {apply<F, void, A, vec<B, M>>::impl(make_seq<0, M>{}, f, a,
+                                               getter<J>{}(b))...};
+  }
+};
+template <class F, class... A>
+struct apply<F, scalars_t<A...>, A...> {
+  using type = ret_t<F, A...>;
+  enum { size = 0, mm = 0 };
+  static constexpr type impl(seq<>, F f, A... a) { return f(a...); }
+};
+
+// Function objects for selecting between alternatives
+struct min {
+  template <class A, class B>
+  constexpr auto operator()(A a, B b) const ->
+      typename std::remove_reference<decltype(a < b ? a : b)>::type {
+    return a < b ? a : b;
+  }
+};
+struct max {
+  template <class A, class B>
+  constexpr auto operator()(A a, B b) const ->
+      typename std::remove_reference<decltype(a < b ? b : a)>::type {
+    return a < b ? b : a;
+  }
+};
+struct clamp {
+  template <class A, class B, class C>
+  constexpr auto operator()(A a, B b, C c) const ->
+      typename std::remove_reference<decltype(a < b   ? b
+                                              : a < c ? a
+                                                      : c)>::type {
+    return a < b ? b : a < c ? a : c;
+  }
+};
+struct select {
+  template <class A, class B, class C>
+  constexpr auto operator()(A a, B b, C c) const ->
+      typename std::remove_reference<decltype(a ? b : c)>::type {
+    return a ? b : c;
+  }
+};
+struct lerp {
+  template <class A, class B, class C>
+  constexpr auto operator()(A a, B b,
+                            C c) const -> decltype(a * (1 - c) + b * c) {
+    return a * (1 - c) + b * c;
+  }
+};
+
+// Function objects for applying operators
+struct op_pos {
+  template <class A>
+  constexpr auto operator()(A a) const -> decltype(+a) {
+    return +a;
+  }
+};
+struct op_neg {
+  template <class A>
+  constexpr auto operator()(A a) const -> decltype(-a) {
+    return -a;
+  }
+};
+struct op_not {
+  template <class A>
+  constexpr auto operator()(A a) const -> decltype(!a) {
+    return !a;
+  }
+};
+struct op_cmp {
+  template <class A>
+  constexpr auto operator()(A a) const -> decltype(~(a)) {
+    return ~a;
+  }
+};
+struct op_mul {
+  template <class A, class B>
+  constexpr auto operator()(A a, B b) const -> decltype(a * b) {
+    return a * b;
+  }
+};
+struct op_div {
+  template <class A, class B>
+  constexpr auto operator()(A a, B b) const -> decltype(a / b) {
+    return a / b;
+  }
+};
+struct op_mod {
+  template <class A, class B>
+  constexpr auto operator()(A a, B b) const -> decltype(a % b) {
+    return a % b;
+  }
+};
+struct op_add {
+  template <class A, class B>
+  constexpr auto operator()(A a, B b) const -> decltype(a + b) {
+    return a + b;
+  }
+};
+struct op_sub {
+  template <class A, class B>
+  constexpr auto operator()(A a, B b) const -> decltype(a - b) {
+    return a - b;
+  }
+};
+struct op_lsh {
+  template <class A, class B>
+  constexpr auto operator()(A a, B b) const -> decltype(a << b) {
+    return a << b;
+  }
+};
+struct op_rsh {
+  template <class A, class B>
+  constexpr auto operator()(A a, B b) const -> decltype(a >> b) {
+    return a >> b;
+  }
+};
+struct op_lt {
+  template <class A, class B>
+  constexpr auto operator()(A a, B b) const -> decltype(a < b) {
+    return a < b;
+  }
+};
+struct op_gt {
+  template <class A, class B>
+  constexpr auto operator()(A a, B b) const -> decltype(a > b) {
+    return a > b;
+  }
+};
+struct op_le {
+  template <class A, class B>
+  constexpr auto operator()(A a, B b) const -> decltype(a <= b) {
+    return a <= b;
+  }
+};
+struct op_ge {
+  template <class A, class B>
+  constexpr auto operator()(A a, B b) const -> decltype(a >= b) {
+    return a >= b;
+  }
+};
+struct op_eq {
+  template <class A, class B>
+  constexpr auto operator()(A a, B b) const -> decltype(a == b) {
+    return a == b;
+  }
+};
+struct op_ne {
+  template <class A, class B>
+  constexpr auto operator()(A a, B b) const -> decltype(a != b) {
+    return a != b;
+  }
+};
+struct op_int {
+  template <class A, class B>
+  constexpr auto operator()(A a, B b) const -> decltype(a & b) {
+    return a & b;
+  }
+};
+struct op_xor {
+  template <class A, class B>
+  constexpr auto operator()(A a, B b) const -> decltype(a ^ b) {
+    return a ^ b;
+  }
+};
+struct op_un {
+  template <class A, class B>
+  constexpr auto operator()(A a, B b) const -> decltype(a | b) {
+    return a | b;
+  }
+};
+struct op_and {
+  template <class A, class B>
+  constexpr auto operator()(A a, B b) const -> decltype(a && b) {
+    return a && b;
+  }
+};
+struct op_or {
+  template <class A, class B>
+  constexpr auto operator()(A a, B b) const -> decltype(a || b) {
+    return a || b;
+  }
+};
+
+// Function objects for applying standard library math functions
+struct std_isfinite {
+  template <class A>
+  constexpr auto operator()(A a) const -> decltype(std::isfinite(a)) {
+    return std::isfinite(a);
+  }
+};
+struct std_abs {
+  template <class A>
+  constexpr auto operator()(A a) const -> decltype(std::abs(a)) {
+    return std::abs(a);
+  }
+};
+struct std_floor {
+  template <class A>
+  constexpr auto operator()(A a) const -> decltype(std::floor(a)) {
+    return std::floor(a);
+  }
+};
+struct std_ceil {
+  template <class A>
+  constexpr auto operator()(A a) const -> decltype(std::ceil(a)) {
+    return std::ceil(a);
+  }
+};
+struct std_exp {
+  template <class A>
+  constexpr auto operator()(A a) const -> decltype(std::exp(a)) {
+    return std::exp(a);
+  }
+};
+struct std_log {
+  template <class A>
+  constexpr auto operator()(A a) const -> decltype(std::log(a)) {
+    return std::log(a);
+  }
+};
+struct std_log2 {
+  template <class A>
+  constexpr auto operator()(A a) const -> decltype(std::log2(a)) {
+    return std::log2(a);
+  }
+};
+struct std_log10 {
+  template <class A>
+  constexpr auto operator()(A a) const -> decltype(std::log10(a)) {
+    return std::log10(a);
+  }
+};
+struct std_sqrt {
+  template <class A>
+  constexpr auto operator()(A a) const -> decltype(std::sqrt(a)) {
+    return std::sqrt(a);
+  }
+};
+struct std_sin {
+  template <class A>
+  constexpr auto operator()(A a) const -> decltype(std::sin(a)) {
+    return std::sin(a);
+  }
+};
+struct std_cos {
+  template <class A>
+  constexpr auto operator()(A a) const -> decltype(std::cos(a)) {
+    return std::cos(a);
+  }
+};
+struct std_tan {
+  template <class A>
+  constexpr auto operator()(A a) const -> decltype(std::tan(a)) {
+    return std::tan(a);
+  }
+};
+struct std_asin {
+  template <class A>
+  constexpr auto operator()(A a) const -> decltype(std::asin(a)) {
+    return std::asin(a);
+  }
+};
+struct std_acos {
+  template <class A>
+  constexpr auto operator()(A a) const -> decltype(std::acos(a)) {
+    return std::acos(a);
+  }
+};
+struct std_atan {
+  template <class A>
+  constexpr auto operator()(A a) const -> decltype(std::atan(a)) {
+    return std::atan(a);
+  }
+};
+struct std_sinh {
+  template <class A>
+  constexpr auto operator()(A a) const -> decltype(std::sinh(a)) {
+    return std::sinh(a);
+  }
+};
+struct std_cosh {
+  template <class A>
+  constexpr auto operator()(A a) const -> decltype(std::cosh(a)) {
+    return std::cosh(a);
+  }
+};
+struct std_tanh {
+  template <class A>
+  constexpr auto operator()(A a) const -> decltype(std::tanh(a)) {
+    return std::tanh(a);
+  }
+};
+struct std_round {
+  template <class A>
+  constexpr auto operator()(A a) const -> decltype(std::round(a)) {
+    return std::round(a);
+  }
+};
+struct std_fmod {
+  template <class A, class B>
+  constexpr auto operator()(A a, B b) const -> decltype(std::fmod(a, b)) {
+    return std::fmod(a, b);
+  }
+};
+struct std_pow {
+  template <class A, class B>
+  constexpr auto operator()(A a, B b) const -> decltype(std::pow(a, b)) {
+    return std::pow(a, b);
+  }
+};
+struct std_atan2 {
+  template <class A, class B>
+  constexpr auto operator()(A a, B b) const -> decltype(std::atan2(a, b)) {
+    return std::atan2(a, b);
+  }
+};
+struct std_copysign {
+  template <class A, class B>
+  constexpr auto operator()(A a, B b) const -> decltype(std::copysign(a, b)) {
+    return std::copysign(a, b);
+  }
+};
+}  // namespace detail
+
+/** @addtogroup LinAlg
+ * @ingroup Math
+ */
+
+/** @addtogroup vec
+ * @ingroup LinAlg
+ * @brief `linalg::vec<T,M>` defines a fixed-length vector containing exactly
+   `M` elements of type `T`.
+
+This data structure can be used to store a wide variety of types of data,
+including geometric vectors, points, homogeneous coordinates, plane equations,
+colors, texture coordinates, or any other situation where you need to manipulate
+a small sequence of numbers. As such, `vec<T,M>` is supported by a set of
+algebraic and component-wise functions, as well as a set of standard reductions.
+
+`vec<T,M>`:
+- is
+  [`DefaultConstructible`](https://en.cppreference.com/w/cpp/named_req/DefaultConstructible):
+  ```cpp
+  float3 v; // v contains 0,0,0
+  ```
+- is constructible from `M` elements of type `T`:
+  ```cpp
+  float3 v {1,2,3}; // v contains 1,2,3
+  ```
+- is
+  [`CopyConstructible`](https://en.cppreference.com/w/cpp/named_req/CopyConstructible)
+  and
+  [`CopyAssignable`](https://en.cppreference.com/w/cpp/named_req/CopyAssignable):
+  ```cpp
+  float3 v {1,2,3}; // v contains 1,2,3
+  float3 u {v};     // u contains 1,2,3
+  float3 w;         // w contains 0,0,0
+  w = u;            // w contains 1,2,3
+  ```
+- is
+  [`EqualityComparable`](https://en.cppreference.com/w/cpp/named_req/EqualityComparable)
+  and
+  [`LessThanComparable`](https://en.cppreference.com/w/cpp/named_req/LessThanComparable):
+  ```cpp
+  if(v == y) cout << "v and u contain equal elements in the same positions" <<
+  endl; if(v < u) cout << "v precedes u lexicographically" << endl;
+  ```
+- is **explicitly** constructible from a single element of type `T`:
+  ```cpp
+  float3 v = float3{4}; // v contains 4,4,4
+  ```
+- is **explicitly** constructible from a `vec<U,M>` of some other type `U`:
+  ```cpp
+  float3 v {1.1f,2.3f,3.5f}; // v contains 1.1,2.3,3.5
+  int3 u = int3{v};          // u contains 1,2,3
+  ```
+- has fields `x,y,z,w`:
+  ```cpp
+  float y = point.y;    // y contains second element of point
+  pixel.w = 0.5;        // fourth element of pixel set to 0.5
+  float s = tc.x;       // s contains first element of tc
+  ```
+- supports indexing:
+  ```cpp
+  float x = v[0]; // x contains first element of v
+  v[2] = 5;       // third element of v set to 5
+  ```
+- supports unary operators `+`, `-`, `!` and `~` in component-wise fashion:
+  ```cpp
+  auto v = -float{2,3}; // v is float2{-2,-3}
+  ```
+- supports binary operators `+`, `-`, `*`, `/`, `%`, `|`, `&`, `^`, `<<` and
+  `>>` in component-wise fashion:
+  ```cpp
+  auto v = float2{1,1} + float2{2,3}; // v is float2{3,4}
+  ```
+- supports binary operators with a scalar on the left or the right:
+  ```cpp
+  auto v = 2 * float3{1,2,3}; // v is float3{2,4,6}
+  auto u = float3{1,2,3} + 1; // u is float3{2,3,4}
+  ```
+- supports operators `+=`, `-=`, `*=`, `/=`, `%=`, `|=`, `&=`, `^=`, `<<=` and
+  `>>=` with vectors or scalars on the right:
+  ```cpp
+  float2 v {1,2}; v *= 3; // v is float2{3,6}
+  ```
+- supports operations on mixed element types:
+  ```cpp
+  auto v = float3{1,2,3} + int3{4,5,6}; // v is float3{5,7,9}
+  ```
+- supports [range-based
+  for](https://en.cppreference.com/w/cpp/language/range-for):
+  ```cpp
+  for(auto elem : float3{1,2,3}) cout << elem << ' '; // prints "1 2 3 "
+  ```
+- has a flat memory layout:
+  ```cpp
+  float3 v {1,2,3};
+  float * p = v.data(); // &v[i] == p+i
+  p[1] = 4; // v contains 1,4,3
+  ```
+ *  @{
+ */
+template <class T>
+struct vec<T, 1> {
+  T x;
+  constexpr vec() : x() {}
+  constexpr vec(const T &x_) : x(x_) {}
+  // NOTE: vec<T,1> does NOT have a constructor from pointer, this can conflict
+  // with initializing its single element from zero
+  template <class U>
+  constexpr explicit vec(const vec<U, 1> &v) : vec(static_cast<T>(v.x)) {}
+  constexpr const T &operator[](int i) const { return x; }
+  LINALG_CONSTEXPR14 T &operator[](int i) { return x; }
+
+  template <class U, class = detail::conv_t<vec, U>>
+  constexpr vec(const U &u) : vec(converter<vec, U>{}(u)) {}
+  template <class U, class = detail::conv_t<U, vec>>
+  constexpr operator U() const {
+    return converter<U, vec>{}(*this);
+  }
+};
+template <class T>
+struct vec<T, 2> {
+  T x, y;
+  constexpr vec() : x(), y() {}
+  constexpr vec(const T &x_, const T &y_) : x(x_), y(y_) {}
+  constexpr explicit vec(const T &s) : vec(s, s) {}
+  constexpr explicit vec(const T *p) : vec(p[0], p[1]) {}
+  template <class U, int N>
+  constexpr explicit vec(const vec<U, N> &v)
+      : vec(static_cast<T>(v.x), static_cast<T>(v.y)) {
+    static_assert(
+        N >= 2,
+        "You must give extra arguments if your input vector is shorter.");
+  }
+  constexpr const T &operator[](int i) const { return i == 0 ? x : y; }
+  LINALG_CONSTEXPR14 T &operator[](int i) { return i == 0 ? x : y; }
+
+  template <class U, class = detail::conv_t<vec, U>>
+  constexpr vec(const U &u) : vec(converter<vec, U>{}(u)) {}
+  template <class U, class = detail::conv_t<U, vec>>
+  constexpr operator U() const {
+    return converter<U, vec>{}(*this);
+  }
+};
+template <class T>
+struct vec<T, 3> {
+  T x, y, z;
+  constexpr vec() : x(), y(), z() {}
+  constexpr vec(const T &x_, const T &y_, const T &z_) : x(x_), y(y_), z(z_) {}
+  constexpr vec(const vec<T, 2> &xy, const T &z_) : vec(xy.x, xy.y, z_) {}
+  constexpr explicit vec(const T &s) : vec(s, s, s) {}
+  constexpr explicit vec(const T *p) : vec(p[0], p[1], p[2]) {}
+  template <class U, int N>
+  constexpr explicit vec(const vec<U, N> &v)
+      : vec(static_cast<T>(v.x), static_cast<T>(v.y), static_cast<T>(v.z)) {
+    static_assert(
+        N >= 3,
+        "You must give extra arguments if your input vector is shorter.");
+  }
+  constexpr const T &operator[](int i) const {
+    return i == 0 ? x : i == 1 ? y : z;
+  }
+  LINALG_CONSTEXPR14 T &operator[](int i) {
+    return i == 0 ? x : i == 1 ? y : z;
+  }
+  constexpr const vec<T, 2> &xy() const {
+    return *reinterpret_cast<const vec<T, 2> *>(this);
+  }
+  vec<T, 2> &xy() { return *reinterpret_cast<vec<T, 2> *>(this); }
+
+  template <class U, class = detail::conv_t<vec, U>>
+  constexpr vec(const U &u) : vec(converter<vec, U>{}(u)) {}
+  template <class U, class = detail::conv_t<U, vec>>
+  constexpr operator U() const {
+    return converter<U, vec>{}(*this);
+  }
+};
+template <class T>
+struct vec<T, 4> {
+  T x, y, z, w;
+  constexpr vec() : x(), y(), z(), w() {}
+  constexpr vec(const T &x_, const T &y_, const T &z_, const T &w_)
+      : x(x_), y(y_), z(z_), w(w_) {}
+  constexpr vec(const vec<T, 2> &xy, const T &z_, const T &w_)
+      : vec(xy.x, xy.y, z_, w_) {}
+  constexpr vec(const vec<T, 3> &xyz, const T &w_)
+      : vec(xyz.x, xyz.y, xyz.z, w_) {}
+  constexpr explicit vec(const T &s) : vec(s, s, s, s) {}
+  constexpr explicit vec(const T *p) : vec(p[0], p[1], p[2], p[3]) {}
+  template <class U, int N>
+  constexpr explicit vec(const vec<U, N> &v)
+      : vec(static_cast<T>(v.x), static_cast<T>(v.y), static_cast<T>(v.z),
+            static_cast<T>(v.w)) {
+    static_assert(
+        N >= 4,
+        "You must give extra arguments if your input vector is shorter.");
+  }
+  constexpr const T &operator[](int i) const {
+    return i == 0 ? x : i == 1 ? y : i == 2 ? z : w;
+  }
+  LINALG_CONSTEXPR14 T &operator[](int i) {
+    return i == 0 ? x : i == 1 ? y : i == 2 ? z : w;
+  }
+  constexpr const vec<T, 2> &xy() const {
+    return *reinterpret_cast<const vec<T, 2> *>(this);
+  }
+  constexpr const vec<T, 3> &xyz() const {
+    return *reinterpret_cast<const vec<T, 3> *>(this);
+  }
+  vec<T, 2> &xy() { return *reinterpret_cast<vec<T, 2> *>(this); }
+  vec<T, 3> &xyz() { return *reinterpret_cast<vec<T, 3> *>(this); }
+
+  template <class U, class = detail::conv_t<vec, U>>
+  constexpr vec(const U &u) : vec(converter<vec, U>{}(u)) {}
+  template <class U, class = detail::conv_t<U, vec>>
+  constexpr operator U() const {
+    return converter<U, vec>{}(*this);
+  }
+};
+/** @} */
+
+/** @addtogroup mat
+ * @ingroup LinAlg
+ * @brief `linalg::mat<T,M,N>` defines a fixed-size matrix containing exactly
+   `M` rows and `N` columns of type `T`, in column-major order.
+
+This data structure is supported by a set of algebraic and component-wise
+functions, as well as a set of standard reductions.
+
+`mat<T,M,N>`:
+- is
+  [`DefaultConstructible`](https://en.cppreference.com/w/cpp/named_req/DefaultConstructible):
+  ```cpp
+  float2x2 m; // m contains columns 0,0; 0,0
+  ```
+- is constructible from `N` columns of type `vec<T,M>`:
+  ```cpp
+  float2x2 m {{1,2},{3,4}}; // m contains columns 1,2; 3,4
+  ```
+- is constructible from `linalg::identity`:
+  ```cpp
+  float3x3 m = linalg::identity; // m contains columns 1,0,0; 0,1,0; 0,0,1
+  ```
+- is
+  [`CopyConstructible`](https://en.cppreference.com/w/cpp/named_req/CopyConstructible)
+  and
+  [`CopyAssignable`](https://en.cppreference.com/w/cpp/named_req/CopyAssignable):
+  ```cpp
+  float2x2 m {{1,2},{3,4}}; // m contains columns 1,2; 3,4
+  float2x2 n {m};           // n contains columns 1,2; 3,4
+  float2x2 p;               // p contains columns 0,0; 0,0
+  p = n;                    // p contains columns 1,2; 3,4
+  ```
+- is
+  [`EqualityComparable`](https://en.cppreference.com/w/cpp/named_req/EqualityComparable)
+  and
+  [`LessThanComparable`](https://en.cppreference.com/w/cpp/named_req/LessThanComparable):
+  ```cpp
+  if(m == n) cout << "m and n contain equal elements in the same positions" <<
+  endl; if(m < n) cout << "m precedes n lexicographically when compared in
+  column-major order" << endl;
+  ```
+- is **explicitly** constructible from a single element of type `T`:
+  ```cpp
+  float2x2 m {5}; // m contains columns 5,5; 5,5
+  ```
+- is **explicitly** constructible from a `mat<U,M,N>` of some other type `U`:
+  ```cpp
+  float2x2 m {int2x2{{5,6},{7,8}}}; // m contains columns 5,6; 7,8
+  ```
+- supports indexing into *columns*:
+  ```cpp
+  float2x3 m {{1,2},{3,4},{5,6}}; // m contains columns 1,2; 3,4; 5,6
+  float2 c = m[0];                // c contains 1,2
+  m[1]     = {7,8};               // m contains columns 1,2; 7,8; 5,6
+  ```
+- supports retrieval (but not assignment) of rows:
+  ```cpp
+  float2x3 m {{1,2},{3,4},{5,6}}; // m contains columns 1,2; 3,4; 5,6
+  float3 r = m.row(1);            // r contains 2,4,6
+  ```
+
+- supports unary operators `+`, `-`, `!` and `~` in component-wise fashion:
+  ```cpp
+  float2x2 m {{1,2},{3,4}}; // m contains columns 1,2; 3,4
+  float2x2 n = -m;          // n contains columns -1,-2; -3,-4
+  ```
+- supports binary operators `+`, `-`, `*`, `/`, `%`, `|`, `&`, `^`, `<<` and
+  `>>` in component-wise fashion:
+  ```cpp
+  float2x2 a {{0,0},{2,2}}; // a contains columns 0,0; 2,2
+  float2x2 b {{1,2},{1,2}}; // b contains columns 1,2; 1,2
+  float2x2 c = a + b;       // c contains columns 1,2; 3,4
+  ```
+
+- supports binary operators with a scalar on the left or the right:
+  ```cpp
+  auto m = 2 * float2x2{{1,2},{3,4}}; // m is float2x2{{2,4},{6,8}}
+  ```
+
+- supports operators `+=`, `-=`, `*=`, `/=`, `%=`, `|=`, `&=`, `^=`, `<<=` and
+  `>>=` with matrices or scalars on the right:
+  ```cpp
+  float2x2 v {{5,4},{3,2}};
+  v *= 3; // v is float2x2{{15,12},{9,6}}
+  ```
+
+- supports operations on mixed element types:
+
+- supports [range-based
+  for](https://en.cppreference.com/w/cpp/language/range-for) over columns
+
+- has a flat memory layout
+ *  @{
+ */
+template <class T, int M>
+struct mat<T, M, 1> {
+  typedef vec<T, M> V;
+  V x;
+  constexpr mat() : x() {}
+  constexpr mat(const V &x_) : x(x_) {}
+  constexpr explicit mat(const T &s) : x(s) {}
+  constexpr explicit mat(const T *p) : x(p + M * 0) {}
+  template <class U>
+  constexpr explicit mat(const mat<U, M, 1> &m) : mat(V(m.x)) {}
+  constexpr vec<T, 1> row(int i) const { return {x[i]}; }
+  constexpr const V &operator[](int j) const { return x; }
+  LINALG_CONSTEXPR14 V &operator[](int j) { return x; }
+
+  template <class U, class = detail::conv_t<mat, U>>
+  constexpr mat(const U &u) : mat(converter<mat, U>{}(u)) {}
+  template <class U, class = detail::conv_t<U, mat>>
+  constexpr operator U() const {
+    return converter<U, mat>{}(*this);
+  }
+};
+template <class T, int M>
+struct mat<T, M, 2> {
+  typedef vec<T, M> V;
+  V x, y;
+  constexpr mat() : x(), y() {}
+  constexpr mat(const V &x_, const V &y_) : x(x_), y(y_) {}
+  constexpr explicit mat(const T &s) : x(s), y(s) {}
+  constexpr explicit mat(const T *p) : x(p + M * 0), y(p + M * 1) {}
+  template <class U, int N, int P>
+  constexpr explicit mat(const mat<U, N, P> &m) : mat(V(m.x), V(m.y)) {
+    static_assert(P >= 2, "Input matrix dimensions must be at least as big.");
+  }
+  constexpr vec<T, 2> row(int i) const { return {x[i], y[i]}; }
+  constexpr const V &operator[](int j) const { return j == 0 ? x : y; }
+  LINALG_CONSTEXPR14 V &operator[](int j) { return j == 0 ? x : y; }
+
+  template <class U, class = detail::conv_t<mat, U>>
+  constexpr mat(const U &u) : mat(converter<mat, U>{}(u)) {}
+  template <class U, class = detail::conv_t<U, mat>>
+  constexpr operator U() const {
+    return converter<U, mat>{}(*this);
+  }
+};
+template <class T, int M>
+struct mat<T, M, 3> {
+  typedef vec<T, M> V;
+  V x, y, z;
+  constexpr mat() : x(), y(), z() {}
+  constexpr mat(const V &x_, const V &y_, const V &z_) : x(x_), y(y_), z(z_) {}
+  constexpr mat(const mat<T, M, 2> &m_, const V &z_)
+      : x(m_.x), y(m_.y), z(z_) {}
+  constexpr explicit mat(const T &s) : x(s), y(s), z(s) {}
+  constexpr explicit mat(const T *p)
+      : x(p + M * 0), y(p + M * 1), z(p + M * 2) {}
+  template <class U, int N, int P>
+  constexpr explicit mat(const mat<U, N, P> &m) : mat(V(m.x), V(m.y), V(m.z)) {
+    static_assert(P >= 3, "Input matrix dimensions must be at least as big.");
+  }
+  constexpr vec<T, 3> row(int i) const { return {x[i], y[i], z[i]}; }
+  constexpr const V &operator[](int j) const {
+    return j == 0 ? x : j == 1 ? y : z;
+  }
+  LINALG_CONSTEXPR14 V &operator[](int j) {
+    return j == 0 ? x : j == 1 ? y : z;
+  }
+
+  template <class U, class = detail::conv_t<mat, U>>
+  constexpr mat(const U &u) : mat(converter<mat, U>{}(u)) {}
+  template <class U, class = detail::conv_t<U, mat>>
+  constexpr operator U() const {
+    return converter<U, mat>{}(*this);
+  }
+};
+template <class T, int M>
+struct mat<T, M, 4> {
+  typedef vec<T, M> V;
+  V x, y, z, w;
+  constexpr mat() : x(), y(), z(), w() {}
+  constexpr mat(const V &x_, const V &y_, const V &z_, const V &w_)
+      : x(x_), y(y_), z(z_), w(w_) {}
+  constexpr mat(const mat<T, M, 3> &m_, const V &w_)
+      : x(m_.x), y(m_.y), z(m_.z), w(w_) {}
+  constexpr explicit mat(const T &s) : x(s), y(s), z(s), w(s) {}
+  constexpr explicit mat(const T *p)
+      : x(p + M * 0), y(p + M * 1), z(p + M * 2), w(p + M * 3) {}
+  template <class U, int N, int P>
+  constexpr explicit mat(const mat<U, N, P> &m)
+      : mat(V(m.x), V(m.y), V(m.z), V(m.w)) {
+    static_assert(P >= 4, "Input matrix dimensions must be at least as big.");
+  }
+
+  constexpr vec<T, 4> row(int i) const { return {x[i], y[i], z[i], w[i]}; }
+  constexpr const V &operator[](int j) const {
+    return j == 0 ? x : j == 1 ? y : j == 2 ? z : w;
+  }
+  LINALG_CONSTEXPR14 V &operator[](int j) {
+    return j == 0 ? x : j == 1 ? y : j == 2 ? z : w;
+  }
+
+  template <class U, class = detail::conv_t<mat, U>>
+  constexpr mat(const U &u) : mat(converter<mat, U>{}(u)) {}
+  template <class U, class = detail::conv_t<U, mat>>
+  constexpr operator U() const {
+    return converter<U, mat>{}(*this);
+  }
+};
+/** @} */
+
+/** @addtogroup identity
+ * @ingroup LinAlg
+ * @brief Define a type which will convert to the multiplicative identity of any
+ * square matrix.
+ *  @{
+ */
+struct identity_t {
+  constexpr explicit identity_t(int) {}
+};
+template <class T>
+struct converter<mat<T, 1, 1>, identity_t> {
+  constexpr mat<T, 1, 1> operator()(identity_t) const { return {vec<T, 1>{1}}; }
+};
+template <class T>
+struct converter<mat<T, 2, 2>, identity_t> {
+  constexpr mat<T, 2, 2> operator()(identity_t) const {
+    return {{1, 0}, {0, 1}};
+  }
+};
+template <class T>
+struct converter<mat<T, 2, 3>, identity_t> {
+  constexpr mat<T, 2, 3> operator()(identity_t) const {
+    return {{1, 0}, {0, 1}, {0, 0}};
+  }
+};
+template <class T>
+struct converter<mat<T, 3, 3>, identity_t> {
+  constexpr mat<T, 3, 3> operator()(identity_t) const {
+    return {{1, 0, 0}, {0, 1, 0}, {0, 0, 1}};
+  }
+};
+template <class T>
+struct converter<mat<T, 3, 4>, identity_t> {
+  constexpr mat<T, 3, 4> operator()(identity_t) const {
+    return {{1, 0, 0}, {0, 1, 0}, {0, 0, 1}, {0, 0, 0}};
+  }
+};
+template <class T>
+struct converter<mat<T, 4, 4>, identity_t> {
+  constexpr mat<T, 4, 4> operator()(identity_t) const {
+    return {{1, 0, 0, 0}, {0, 1, 0, 0}, {0, 0, 1, 0}, {0, 0, 0, 1}};
+  }
+};
+constexpr identity_t identity{1};
+/** @} */
+
+/** @addtogroup fold
+ * @ingroup LinAlg
+ * @brief Produce a scalar by applying f(A,B) -> A to adjacent pairs of elements
+ * from a vec/mat in left-to-right/column-major order (matching the
+ * associativity of arithmetic and logical operators).
+ *  @{
+ */
+template <class F, class A, class B>
+constexpr A fold(F f, A a, const vec<B, 1> &b) {
+  return f(a, b.x);
+}
+template <class F, class A, class B>
+constexpr A fold(F f, A a, const vec<B, 2> &b) {
+  return f(f(a, b.x), b.y);
+}
+template <class F, class A, class B>
+constexpr A fold(F f, A a, const vec<B, 3> &b) {
+  return f(f(f(a, b.x), b.y), b.z);
+}
+template <class F, class A, class B>
+constexpr A fold(F f, A a, const vec<B, 4> &b) {
+  return f(f(f(f(a, b.x), b.y), b.z), b.w);
+}
+template <class F, class A, class B, int M>
+constexpr A fold(F f, A a, const mat<B, M, 1> &b) {
+  return fold(f, a, b.x);
+}
+template <class F, class A, class B, int M>
+constexpr A fold(F f, A a, const mat<B, M, 2> &b) {
+  return fold(f, fold(f, a, b.x), b.y);
+}
+template <class F, class A, class B, int M>
+constexpr A fold(F f, A a, const mat<B, M, 3> &b) {
+  return fold(f, fold(f, fold(f, a, b.x), b.y), b.z);
+}
+template <class F, class A, class B, int M>
+constexpr A fold(F f, A a, const mat<B, M, 4> &b) {
+  return fold(f, fold(f, fold(f, fold(f, a, b.x), b.y), b.z), b.w);
+}
+/** @} */
+
+/** @addtogroup apply
+ * @ingroup LinAlg
+ * @brief apply(f,...) applies the provided function in an elementwise fashion
+ * to its arguments, producing an object of the same dimensions.
+ *  @{
+ */
+
+// Type aliases for the result of calling apply(...) with various arguments, can
+// be used with return type SFINAE to constrain overload sets
+template <class F, class... A>
+using apply_t = typename detail::apply<F, void, A...>::type;
+template <class F, class... A>
+using mm_apply_t = typename std::enable_if<detail::apply<F, void, A...>::mm,
+                                           apply_t<F, A...>>::type;
+template <class F, class... A>
+using no_mm_apply_t = typename std::enable_if<!detail::apply<F, void, A...>::mm,
+                                              apply_t<F, A...>>::type;
+template <class A>
+using scalar_t =
+    typename detail::scalar_type<A>::type;  // Underlying scalar type when
+                                            // performing elementwise operations
+
+// apply(f,...) applies the provided function in an elementwise fashion to its
+// arguments, producing an object of the same dimensions
+template <class F, class... A>
+constexpr apply_t<F, A...> apply(F func, const A &...args) {
+  return detail::apply<F, void, A...>::impl(
+      detail::make_seq<0, detail::apply<F, void, A...>::size>{}, func, args...);
+}
+
+// map(a,f) is equivalent to apply(f,a)
+template <class A, class F>
+constexpr apply_t<F, A> map(const A &a, F func) {
+  return apply(func, a);
+}
+
+// zip(a,b,f) is equivalent to apply(f,a,b)
+template <class A, class B, class F>
+constexpr apply_t<F, A, B> zip(const A &a, const B &b, F func) {
+  return apply(func, a, b);
+}
+/** @} */
+
+/** @addtogroup comparison_ops
+ * @ingroup LinAlg
+ * @brief Relational operators are defined to compare the elements of two
+ * vectors or matrices lexicographically, in column-major order.
+ *  @{
+ */
+template <class A, class B>
+constexpr typename detail::any_compare<A, B>::type compare(const A &a,
+                                                           const B &b) {
+  return detail::any_compare<A, B>()(a, b);
+}
+template <class A, class B>
+constexpr auto operator==(const A &a,
+                          const B &b) -> decltype(compare(a, b) == 0) {
+  return compare(a, b) == 0;
+}
+template <class A, class B>
+constexpr auto operator!=(const A &a,
+                          const B &b) -> decltype(compare(a, b) != 0) {
+  return compare(a, b) != 0;
+}
+template <class A, class B>
+constexpr auto operator<(const A &a,
+                         const B &b) -> decltype(compare(a, b) < 0) {
+  return compare(a, b) < 0;
+}
+template <class A, class B>
+constexpr auto operator>(const A &a,
+                         const B &b) -> decltype(compare(a, b) > 0) {
+  return compare(a, b) > 0;
+}
+template <class A, class B>
+constexpr auto operator<=(const A &a,
+                          const B &b) -> decltype(compare(a, b) <= 0) {
+  return compare(a, b) <= 0;
+}
+template <class A, class B>
+constexpr auto operator>=(const A &a,
+                          const B &b) -> decltype(compare(a, b) >= 0) {
+  return compare(a, b) >= 0;
+}
+/** @} */
+
+/** @addtogroup reductions
+ * @ingroup LinAlg
+ * @brief Functions for coalescing scalar values.
+ *  @{
+ */
+template <class A>
+constexpr bool any(const A &a) {
+  return fold(detail::op_or{}, false, a);
+}
+template <class A>
+constexpr bool all(const A &a) {
+  return fold(detail::op_and{}, true, a);
+}
+template <class A>
+constexpr scalar_t<A> sum(const A &a) {
+  return fold(detail::op_add{}, scalar_t<A>(0), a);
+}
+template <class A>
+constexpr scalar_t<A> product(const A &a) {
+  return fold(detail::op_mul{}, scalar_t<A>(1), a);
+}
+template <class A>
+constexpr scalar_t<A> minelem(const A &a) {
+  return fold(detail::min{}, a.x, a);
+}
+template <class A>
+constexpr scalar_t<A> maxelem(const A &a) {
+  return fold(detail::max{}, a.x, a);
+}
+template <class T, int M>
+int argmin(const vec<T, M> &a) {
+  int j = 0;
+  for (int i = 1; i < M; ++i)
+    if (a[i] < a[j]) j = i;
+  return j;
+}
+template <class T, int M>
+int argmax(const vec<T, M> &a) {
+  int j = 0;
+  for (int i = 1; i < M; ++i)
+    if (a[i] > a[j]) j = i;
+  return j;
+}
+/** @} */
+
+/** @addtogroup unary_ops
+ * @ingroup LinAlg
+ * @brief Unary operators are defined component-wise for linalg types.
+ *  @{
+ */
+template <class A>
+constexpr apply_t<detail::op_pos, A> operator+(const A &a) {
+  return apply(detail::op_pos{}, a);
+}
+template <class A>
+constexpr apply_t<detail::op_neg, A> operator-(const A &a) {
+  return apply(detail::op_neg{}, a);
+}
+template <class A>
+constexpr apply_t<detail::op_cmp, A> operator~(const A &a) {
+  return apply(detail::op_cmp{}, a);
+}
+template <class A>
+constexpr apply_t<detail::op_not, A> operator!(const A &a) {
+  return apply(detail::op_not{}, a);
+}
+/** @} */
+
+/** @addtogroup binary_ops
+ * @ingroup LinAlg
+ * @brief Binary operators are defined component-wise for linalg types, EXCEPT
+ * for `operator *`, which does standard matrix multiplication, scalar
+ * multiplication, and component-wise multiplication for same-size vectors. Use
+ * `cmul` for the matrix Hadamard product.
+ *  @{
+ */
+template <class A, class B>
+constexpr apply_t<detail::op_add, A, B> operator+(const A &a, const B &b) {
+  return apply(detail::op_add{}, a, b);
+}
+template <class A, class B>
+constexpr apply_t<detail::op_sub, A, B> operator-(const A &a, const B &b) {
+  return apply(detail::op_sub{}, a, b);
+}
+template <class A, class B>
+constexpr apply_t<detail::op_mul, A, B> cmul(const A &a, const B &b) {
+  return apply(detail::op_mul{}, a, b);
+}
+template <class A, class B>
+constexpr apply_t<detail::op_div, A, B> operator/(const A &a, const B &b) {
+  return apply(detail::op_div{}, a, b);
+}
+template <class A, class B>
+constexpr apply_t<detail::op_mod, A, B> operator%(const A &a, const B &b) {
+  return apply(detail::op_mod{}, a, b);
+}
+template <class A, class B>
+constexpr apply_t<detail::op_un, A, B> operator|(const A &a, const B &b) {
+  return apply(detail::op_un{}, a, b);
+}
+template <class A, class B>
+constexpr apply_t<detail::op_xor, A, B> operator^(const A &a, const B &b) {
+  return apply(detail::op_xor{}, a, b);
+}
+template <class A, class B>
+constexpr apply_t<detail::op_int, A, B> operator&(const A &a, const B &b) {
+  return apply(detail::op_int{}, a, b);
+}
+template <class A, class B>
+constexpr apply_t<detail::op_lsh, A, B> operator<<(const A &a, const B &b) {
+  return apply(detail::op_lsh{}, a, b);
+}
+template <class A, class B>
+constexpr apply_t<detail::op_rsh, A, B> operator>>(const A &a, const B &b) {
+  return apply(detail::op_rsh{}, a, b);
+}
+
+// Binary `operator *` represents the algebraic matrix product - use cmul(a, b)
+// for the Hadamard (component-wise) product.
+template <class A, class B>
+constexpr auto operator*(const A &a, const B &b) {
+  return mul(a, b);
+}
+
+// Binary assignment operators a $= b is always defined as though it were
+// explicitly written a = a $ b
+template <class A, class B>
+constexpr auto operator+=(A &a, const B &b) -> decltype(a = a + b) {
+  return a = a + b;
+}
+template <class A, class B>
+constexpr auto operator-=(A &a, const B &b) -> decltype(a = a - b) {
+  return a = a - b;
+}
+template <class A, class B>
+constexpr auto operator*=(A &a, const B &b) -> decltype(a = a * b) {
+  return a = a * b;
+}
+template <class A, class B>
+constexpr auto operator/=(A &a, const B &b) -> decltype(a = a / b) {
+  return a = a / b;
+}
+template <class A, class B>
+constexpr auto operator%=(A &a, const B &b) -> decltype(a = a % b) {
+  return a = a % b;
+}
+template <class A, class B>
+constexpr auto operator|=(A &a, const B &b) -> decltype(a = a | b) {
+  return a = a | b;
+}
+template <class A, class B>
+constexpr auto operator^=(A &a, const B &b) -> decltype(a = a ^ b) {
+  return a = a ^ b;
+}
+template <class A, class B>
+constexpr auto operator&=(A &a, const B &b) -> decltype(a = a & b) {
+  return a = a & b;
+}
+template <class A, class B>
+constexpr auto operator<<=(A &a, const B &b) -> decltype(a = a << b) {
+  return a = a << b;
+}
+template <class A, class B>
+constexpr auto operator>>=(A &a, const B &b) -> decltype(a = a >> b) {
+  return a = a >> b;
+}
+/** @} */
+
+/** @addtogroup swizzles
+ * @ingroup LinAlg
+ * @brief Swizzles and subobjects.
+ *  @{
+ */
+/**
+ * @brief Returns a vector containing the specified ordered indices, e.g.
+ * linalg::swizzle<1, 2, 0>(vec4(4, 5, 6, 7)) == vec3(5, 6, 4)
+ */
+template <int... I, class T, int M>
+constexpr vec<T, sizeof...(I)> swizzle(const vec<T, M> &a) {
+  return {detail::getter<I>{}(a)...};
+}
+/**
+ * @brief Returns a vector containing the specified index range, e.g.
+ * linalg::subvec<1, 4>(vec4(4, 5, 6, 7)) == vec3(5, 6, 7)
+ */
+template <int I0, int I1, class T, int M>
+constexpr vec<T, I1 - I0> subvec(const vec<T, M> &a) {
+  return detail::swizzle(a, detail::make_seq<I0, I1>{});
+}
+/**
+ * @brief Returns a matrix containing the specified row and column range:
+ * linalg::submat<rowStart, colStart, rowEnd, colEnd>
+ */
+template <int I0, int J0, int I1, int J1, class T, int M, int N>
+constexpr mat<T, I1 - I0, J1 - J0> submat(const mat<T, M, N> &a) {
+  return detail::swizzle(a, detail::make_seq<I0, I1>{},
+                         detail::make_seq<J0, J1>{});
+}
+/** @} */
+
+/** @addtogroup unary_STL
+ * @ingroup LinAlg
+ * @brief Component-wise standard library math functions.
+ *  @{
+ */
+template <class A>
+constexpr apply_t<detail::std_isfinite, A> isfinite(const A &a) {
+  return apply(detail::std_isfinite{}, a);
+}
+template <class A>
+constexpr apply_t<detail::std_abs, A> abs(const A &a) {
+  return apply(detail::std_abs{}, a);
+}
+template <class A>
+constexpr apply_t<detail::std_floor, A> floor(const A &a) {
+  return apply(detail::std_floor{}, a);
+}
+template <class A>
+constexpr apply_t<detail::std_ceil, A> ceil(const A &a) {
+  return apply(detail::std_ceil{}, a);
+}
+template <class A>
+constexpr apply_t<detail::std_exp, A> exp(const A &a) {
+  return apply(detail::std_exp{}, a);
+}
+template <class A>
+constexpr apply_t<detail::std_log, A> log(const A &a) {
+  return apply(detail::std_log{}, a);
+}
+template <class A>
+constexpr apply_t<detail::std_log2, A> log2(const A &a) {
+  return apply(detail::std_log2{}, a);
+}
+template <class A>
+constexpr apply_t<detail::std_log10, A> log10(const A &a) {
+  return apply(detail::std_log10{}, a);
+}
+template <class A>
+constexpr apply_t<detail::std_sqrt, A> sqrt(const A &a) {
+  return apply(detail::std_sqrt{}, a);
+}
+template <class A>
+constexpr apply_t<detail::std_sin, A> sin(const A &a) {
+  return apply(detail::std_sin{}, a);
+}
+template <class A>
+constexpr apply_t<detail::std_cos, A> cos(const A &a) {
+  return apply(detail::std_cos{}, a);
+}
+template <class A>
+constexpr apply_t<detail::std_tan, A> tan(const A &a) {
+  return apply(detail::std_tan{}, a);
+}
+template <class A>
+constexpr apply_t<detail::std_asin, A> asin(const A &a) {
+  return apply(detail::std_asin{}, a);
+}
+template <class A>
+constexpr apply_t<detail::std_acos, A> acos(const A &a) {
+  return apply(detail::std_acos{}, a);
+}
+template <class A>
+constexpr apply_t<detail::std_atan, A> atan(const A &a) {
+  return apply(detail::std_atan{}, a);
+}
+template <class A>
+constexpr apply_t<detail::std_sinh, A> sinh(const A &a) {
+  return apply(detail::std_sinh{}, a);
+}
+template <class A>
+constexpr apply_t<detail::std_cosh, A> cosh(const A &a) {
+  return apply(detail::std_cosh{}, a);
+}
+template <class A>
+constexpr apply_t<detail::std_tanh, A> tanh(const A &a) {
+  return apply(detail::std_tanh{}, a);
+}
+template <class A>
+constexpr apply_t<detail::std_round, A> round(const A &a) {
+  return apply(detail::std_round{}, a);
+}
+/** @} */
+
+/** @addtogroup binary_STL
+ * @ingroup LinAlg
+ * @brief Component-wise standard library math functions. Either argument can be
+ * a vector or a scalar.
+ *  @{
+ */
+template <class A, class B>
+constexpr apply_t<detail::std_fmod, A, B> fmod(const A &a, const B &b) {
+  return apply(detail::std_fmod{}, a, b);
+}
+template <class A, class B>
+constexpr apply_t<detail::std_pow, A, B> pow(const A &a, const B &b) {
+  return apply(detail::std_pow{}, a, b);
+}
+template <class A, class B>
+constexpr apply_t<detail::std_atan2, A, B> atan2(const A &a, const B &b) {
+  return apply(detail::std_atan2{}, a, b);
+}
+template <class A, class B>
+constexpr apply_t<detail::std_copysign, A, B> copysign(const A &a, const B &b) {
+  return apply(detail::std_copysign{}, a, b);
+}
+/** @} */
+
+/** @addtogroup relational
+ * @ingroup LinAlg
+ * @brief Component-wise relational functions on vectors. Either argument can be
+ * a vector or a scalar.
+ *  @{
+ */
+template <class A, class B>
+constexpr apply_t<detail::op_eq, A, B> equal(const A &a, const B &b) {
+  return apply(detail::op_eq{}, a, b);
+}
+template <class A, class B>
+constexpr apply_t<detail::op_ne, A, B> nequal(const A &a, const B &b) {
+  return apply(detail::op_ne{}, a, b);
+}
+template <class A, class B>
+constexpr apply_t<detail::op_lt, A, B> less(const A &a, const B &b) {
+  return apply(detail::op_lt{}, a, b);
+}
+template <class A, class B>
+constexpr apply_t<detail::op_gt, A, B> greater(const A &a, const B &b) {
+  return apply(detail::op_gt{}, a, b);
+}
+template <class A, class B>
+constexpr apply_t<detail::op_le, A, B> lequal(const A &a, const B &b) {
+  return apply(detail::op_le{}, a, b);
+}
+template <class A, class B>
+constexpr apply_t<detail::op_ge, A, B> gequal(const A &a, const B &b) {
+  return apply(detail::op_ge{}, a, b);
+}
+/** @} */
+
+/** @addtogroup selection
+ * @ingroup LinAlg
+ * @brief Component-wise selection functions on vectors. Either argument can be
+ * a vector or a scalar.
+ *  @{
+ */
+template <class A, class B>
+constexpr apply_t<detail::min, A, B> min(const A &a, const B &b) {
+  return apply(detail::min{}, a, b);
+}
+template <class A, class B>
+constexpr apply_t<detail::max, A, B> max(const A &a, const B &b) {
+  return apply(detail::max{}, a, b);
+}
+/**
+ * @brief Clamps the components of x between l and h, provided l[i] < h[i].
+ */
+template <class X, class L, class H>
+constexpr apply_t<detail::clamp, X, L, H> clamp(const X &x, const L &l,
+                                                const H &h) {
+  return apply(detail::clamp{}, x, l, h);
+}
+/**
+ * @brief Returns the component from a if the corresponding component of p is
+ * true and from b otherwise.
+ */
+template <class P, class A, class B>
+constexpr apply_t<detail::select, P, A, B> select(const P &p, const A &a,
+                                                  const B &b) {
+  return apply(detail::select{}, p, a, b);
+}
+/**
+ * @brief Linear interpolation from a to b as t goes from 0 -> 1. Values beyond
+ * [a, b] will result if t is outside [0, 1].
+ */
+template <class A, class B, class T>
+constexpr apply_t<detail::lerp, A, B, T> lerp(const A &a, const B &b,
+                                              const T &t) {
+  return apply(detail::lerp{}, a, b, t);
+}
+/** @} */
+
+/** @addtogroup vec_algebra
+ * @ingroup LinAlg
+ * @brief Support for vector algebra.
+ *  @{
+ */
+/**
+ * @brief shorthand for `cross({a.x,a.y,0}, {b.x,b.y,0}).z`
+ */
+template <class T>
+constexpr T cross(const vec<T, 2> &a, const vec<T, 2> &b) {
+  return a.x * b.y - a.y * b.x;
+}
+/**
+ * @brief shorthand for `cross({0,0,a.z}, {b.x,b.y,0}).xy()`
+ */
+template <class T>
+constexpr vec<T, 2> cross(T a, const vec<T, 2> &b) {
+  return {-a * b.y, a * b.x};
+}
+/**
+ * @brief shorthand for `cross({a.x,a.y,0}, {0,0,b.z}).xy()`
+ */
+template <class T>
+constexpr vec<T, 2> cross(const vec<T, 2> &a, T b) {
+  return {a.y * b, -a.x * b};
+}
+/**
+ * @brief the [cross or vector
+ * product](https://en.wikipedia.org/wiki/Cross_product) of vectors `a` and `b`
+ */
+template <class T>
+constexpr vec<T, 3> cross(const vec<T, 3> &a, const vec<T, 3> &b) {
+  return {a.y * b.z - a.z * b.y, a.z * b.x - a.x * b.z, a.x * b.y - a.y * b.x};
+}
+/**
+ * @brief [dot or inner product](https://en.wikipedia.org/wiki/Dot_product) of
+ * vectors `a` and `b`
+ */
+template <class T, int M>
+constexpr T dot(const vec<T, M> &a, const vec<T, M> &b) {
+  return sum(a * b);
+}
+/**
+ * @brief *square* of the length or magnitude of vector `a`
+ */
+template <class T, int M>
+constexpr T length2(const vec<T, M> &a) {
+  return dot(a, a);
+}
+/**
+ * @brief length or magnitude of a vector `a`
+ */
+template <class T, int M>
+T length(const vec<T, M> &a) {
+  return std::sqrt(length2(a));
+}
+/**
+ * @brief unit length vector in the same direction as `a` (undefined for
+ zero-length vectors)
+
+ */
+template <class T, int M>
+vec<T, M> normalize(const vec<T, M> &a) {
+  return a / length(a);
+}
+/**
+ * @brief *square* of the [Euclidean
+ * distance](https://en.wikipedia.org/wiki/Euclidean_distance) between points
+ * `a` and `b`
+ */
+template <class T, int M>
+constexpr T distance2(const vec<T, M> &a, const vec<T, M> &b) {
+  return length2(b - a);
+}
+/**
+ * @brief [Euclidean distance](https://en.wikipedia.org/wiki/Euclidean_distance)
+ * between points `a` and `b`
+ */
+template <class T, int M>
+T distance(const vec<T, M> &a, const vec<T, M> &b) {
+  return length(b - a);
+}
+/**
+ * @brief Return the angle in radians between two unit vectors.
+ */
+template <class T, int M>
+T uangle(const vec<T, M> &a, const vec<T, M> &b) {
+  T d = dot(a, b);
+  return d > 1 ? 0 : std::acos(d < -1 ? -1 : d);
+}
+/**
+ * @brief Return the angle in radians between two non-unit vectors.
+ */
+template <class T, int M>
+T angle(const vec<T, M> &a, const vec<T, M> &b) {
+  return uangle(normalize(a), normalize(b));
+}
+/**
+ * @brief vector `v` rotated counter-clockwise by the angle `a` in
+ * [radians](https://en.wikipedia.org/wiki/Radian)
+ */
+template <class T>
+vec<T, 2> rot(T a, const vec<T, 2> &v) {
+  const T s = std::sin(a), c = std::cos(a);
+  return {v.x * c - v.y * s, v.x * s + v.y * c};
+}
+/**
+ * @brief vector `v` rotated counter-clockwise by the angle `a` in
+ * [radians](https://en.wikipedia.org/wiki/Radian) around the X axis
+ */
+template <class T>
+vec<T, 3> rotx(T a, const vec<T, 3> &v) {
+  const T s = std::sin(a), c = std::cos(a);
+  return {v.x, v.y * c - v.z * s, v.y * s + v.z * c};
+}
+/**
+ * @brief vector `v` rotated counter-clockwise by the angle `a` in
+ * [radians](https://en.wikipedia.org/wiki/Radian) around the Y axis
+ */
+template <class T>
+vec<T, 3> roty(T a, const vec<T, 3> &v) {
+  const T s = std::sin(a), c = std::cos(a);
+  return {v.x * c + v.z * s, v.y, -v.x * s + v.z * c};
+}
+/**
+ * @brief vector `v` rotated counter-clockwise by the angle `a` in
+ * [radians](https://en.wikipedia.org/wiki/Radian) around the Z axis
+ */
+template <class T>
+vec<T, 3> rotz(T a, const vec<T, 3> &v) {
+  const T s = std::sin(a), c = std::cos(a);
+  return {v.x * c - v.y * s, v.x * s + v.y * c, v.z};
+}
+/**
+ * @brief shorthand for `normalize(lerp(a,b,t))`
+ */
+template <class T, int M>
+vec<T, M> nlerp(const vec<T, M> &a, const vec<T, M> &b, T t) {
+  return normalize(lerp(a, b, t));
+}
+/**
+ * @brief [spherical linear interpolation](https://en.wikipedia.org/wiki/Slerp)
+ * between unit vectors `a` and `b` (undefined for non-unit vectors) by
+ * parameter `t`
+ */
+template <class T, int M>
+vec<T, M> slerp(const vec<T, M> &a, const vec<T, M> &b, T t) {
+  T th = uangle(a, b);
+  return th == 0 ? a
+                 : a * (std::sin(th * (1 - t)) / std::sin(th)) +
+                       b * (std::sin(th * t) / std::sin(th));
+}
+/** @} */
+
+/** @addtogroup quaternions
+ * @ingroup LinAlg
+ * @brief Support for quaternion algebra using 4D vectors of arbitrary length,
+ * representing xi + yj + zk + w.
+ *  @{
+ */
+/**
+ * @brief
+ * [conjugate](https://en.wikipedia.org/wiki/Quaternion#Conjugation,_the_norm,_and_reciprocal)
+ * of quaternion `q`
+ */
+template <class T>
+constexpr vec<T, 4> qconj(const vec<T, 4> &q) {
+  return {-q.x, -q.y, -q.z, q.w};
+}
+/**
+ * @brief [inverse or
+ * reciprocal](https://en.wikipedia.org/wiki/Quaternion#Conjugation,_the_norm,_and_reciprocal)
+ * of quaternion `q` (undefined for zero-length quaternions)
+ */
+template <class T>
+vec<T, 4> qinv(const vec<T, 4> &q) {
+  return qconj(q) / length2(q);
+}
+/**
+ * @brief
+ * [exponential](https://en.wikipedia.org/wiki/Quaternion#Exponential,_logarithm,_and_power_functions)
+ * of quaternion `q`
+ */
+template <class T>
+vec<T, 4> qexp(const vec<T, 4> &q) {
+  const auto v = q.xyz();
+  const auto vv = length(v);
+  return std::exp(q.w) *
+         vec<T, 4>{v * (vv > 0 ? std::sin(vv) / vv : 0), std::cos(vv)};
+}
+/**
+ * @brief
+ * [logarithm](https://en.wikipedia.org/wiki/Quaternion#Exponential,_logarithm,_and_power_functions)
+ * of quaternion `q`
+ */
+template <class T>
+vec<T, 4> qlog(const vec<T, 4> &q) {
+  const auto v = q.xyz();
+  const auto vv = length(v), qq = length(q);
+  return {v * (vv > 0 ? std::acos(q.w / qq) / vv : 0), std::log(qq)};
+}
+/**
+ * @brief quaternion `q` raised to the exponent `p`
+ */
+template <class T>
+vec<T, 4> qpow(const vec<T, 4> &q, const T &p) {
+  const auto v = q.xyz();
+  const auto vv = length(v), qq = length(q), th = std::acos(q.w / qq);
+  return std::pow(qq, p) *
+         vec<T, 4>{v * (vv > 0 ? std::sin(p * th) / vv : 0), std::cos(p * th)};
+}
+/**
+ * @brief [Hamilton
+ * product](https://en.wikipedia.org/wiki/Quaternion#Hamilton_product) of
+ * quaternions `a` and `b`
+ */
+template <class T>
+constexpr vec<T, 4> qmul(const vec<T, 4> &a, const vec<T, 4> &b) {
+  return {a.x * b.w + a.w * b.x + a.y * b.z - a.z * b.y,
+          a.y * b.w + a.w * b.y + a.z * b.x - a.x * b.z,
+          a.z * b.w + a.w * b.z + a.x * b.y - a.y * b.x,
+          a.w * b.w - a.x * b.x - a.y * b.y - a.z * b.z};
+}
+/**
+ * @brief Multiply as many input quaternions together as desired.
+ */
+template <class T, class... R>
+constexpr vec<T, 4> qmul(const vec<T, 4> &a, R... r) {
+  return qmul(a, qmul(r...));
+}
+/** @} */
+
+/** @addtogroup quaternion_rotation
+ * @ingroup LinAlg
+ * @brief Support for 3D spatial rotations using normalized quaternions.
+ *  @{
+ */
+/**
+ * @brief efficient shorthand for `qrot(q, {1,0,0})`
+ */
+template <class T>
+constexpr vec<T, 3> qxdir(const vec<T, 4> &q) {
+  return {q.w * q.w + q.x * q.x - q.y * q.y - q.z * q.z,
+          (q.x * q.y + q.z * q.w) * 2, (q.z * q.x - q.y * q.w) * 2};
+}
+/**
+ * @brief efficient shorthand for `qrot(q, {0,1,0})`
+ */
+template <class T>
+constexpr vec<T, 3> qydir(const vec<T, 4> &q) {
+  return {(q.x * q.y - q.z * q.w) * 2,
+          q.w * q.w - q.x * q.x + q.y * q.y - q.z * q.z,
+          (q.y * q.z + q.x * q.w) * 2};
+}
+/**
+ * @brief efficient shorthand for `qrot(q, {0,0,1})`
+ */
+template <class T>
+constexpr vec<T, 3> qzdir(const vec<T, 4> &q) {
+  return {(q.z * q.x + q.y * q.w) * 2, (q.y * q.z - q.x * q.w) * 2,
+          q.w * q.w - q.x * q.x - q.y * q.y + q.z * q.z};
+}
+/**
+ * @brief Create an equivalent mat3 rotation matrix from the input quaternion.
+ */
+template <class T>
+constexpr mat<T, 3, 3> qmat(const vec<T, 4> &q) {
+  return {qxdir(q), qydir(q), qzdir(q)};
+}
+/**
+ * @brief Rotate a vector by a quaternion.
+ */
+template <class T>
+constexpr vec<T, 3> qrot(const vec<T, 4> &q, const vec<T, 3> &v) {
+  return qxdir(q) * v.x + qydir(q) * v.y + qzdir(q) * v.z;
+}
+/**
+ * @brief Return the angle in radians of the axis-angle representation of the
+ * input normalized quaternion.
+ */
+template <class T>
+T qangle(const vec<T, 4> &q) {
+  return std::atan2(length(q.xyz()), q.w) * 2;
+}
+/**
+ * @brief Return the normalized axis of the axis-angle representation of the
+ * input normalized quaternion.
+ */
+template <class T>
+vec<T, 3> qaxis(const vec<T, 4> &q) {
+  return normalize(q.xyz());
+}
+/**
+ * @brief Linear interpolation that takes the shortest path - this is not
+ * geometrically sensible, consider qslerp instead.
+ */
+template <class T>
+vec<T, 4> qnlerp(const vec<T, 4> &a, const vec<T, 4> &b, T t) {
+  return nlerp(a, dot(a, b) < 0 ? -b : b, t);
+}
+/**
+ * @brief Spherical linear interpolation that takes the shortest path.
+ */
+template <class T>
+vec<T, 4> qslerp(const vec<T, 4> &a, const vec<T, 4> &b, T t) {
+  return slerp(a, dot(a, b) < 0 ? -b : b, t);
+}
+/**
+ * @brief Returns a normalized quaternion representing a rotation by angle in
+ * radians about the provided axis.
+ */
+template <class T>
+vec<T, 4> constexpr rotation_quat(const vec<T, 3> &axis, T angle) {
+  return {axis * std::sin(angle / 2), std::cos(angle / 2)};
+}
+/**
+ * @brief Returns a normalized quaternion representing the shortest rotation
+ * from orig vector to dest vector.
+ */
+template <class T>
+vec<T, 4> rotation_quat(const vec<T, 3> &orig, const vec<T, 3> &dest);
+/**
+ * @brief Returns a normalized quaternion representing the input rotation
+ * matrix, which should be orthonormal.
+ */
+template <class T>
+vec<T, 4> rotation_quat(const mat<T, 3, 3> &m);
+/** @} */
+
+/** @addtogroup mat_algebra
+ * @ingroup LinAlg
+ * @brief Support for matrix algebra.
+ *  @{
+ */
+template <class T, int M>
+constexpr vec<T, M> mul(const vec<T, M> &a, const T &b) {
+  return cmul(a, b);
+}
+template <class T, int M>
+constexpr vec<T, M> mul(const T &b, const vec<T, M> &a) {
+  return cmul(b, a);
+}
+template <class T, int M, int N>
+constexpr mat<T, M, N> mul(const mat<T, M, N> &a, const T &b) {
+  return cmul(a, b);
+}
+template <class T, int M, int N>
+constexpr mat<T, M, N> mul(const T &b, const mat<T, M, N> &a) {
+  return cmul(b, a);
+}
+template <class T, int M>
+constexpr vec<T, M> mul(const vec<T, M> &a, const vec<T, M> &b) {
+  return cmul(a, b);
+}
+template <class T, int M>
+constexpr vec<T, M> mul(const mat<T, M, 1> &a, const vec<T, 1> &b) {
+  return a.x * b.x;
+}
+template <class T, int M>
+constexpr vec<T, M> mul(const mat<T, M, 2> &a, const vec<T, 2> &b) {
+  return a.x * b.x + a.y * b.y;
+}
+template <class T, int M>
+constexpr vec<T, M> mul(const mat<T, M, 3> &a, const vec<T, 3> &b) {
+  return a.x * b.x + a.y * b.y + a.z * b.z;
+}
+template <class T, int M>
+constexpr vec<T, M> mul(const mat<T, M, 4> &a, const vec<T, 4> &b) {
+  return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w;
+}
+template <class T, int M, int N>
+constexpr mat<T, M, 1> mul(const mat<T, M, N> &a, const mat<T, N, 1> &b) {
+  return {mul(a, b.x)};
+}
+template <class T, int M, int N>
+constexpr mat<T, M, 2> mul(const mat<T, M, N> &a, const mat<T, N, 2> &b) {
+  return {mul(a, b.x), mul(a, b.y)};
+}
+template <class T, int M, int N>
+constexpr mat<T, M, 3> mul(const mat<T, M, N> &a, const mat<T, N, 3> &b) {
+  return {mul(a, b.x), mul(a, b.y), mul(a, b.z)};
+}
+template <class T, int M, int N>
+constexpr mat<T, M, 4> mul(const mat<T, M, N> &a, const mat<T, N, 4> &b) {
+  return {mul(a, b.x), mul(a, b.y), mul(a, b.z), mul(a, b.w)};
+}
+template <class T, int M, int N, int P>
+constexpr vec<T, M> mul(const mat<T, M, N> &a, const mat<T, N, P> &b,
+                        const vec<T, P> &c) {
+  return mul(mul(a, b), c);
+}
+template <class T, int M, int N, int P, int Q>
+constexpr mat<T, M, Q> mul(const mat<T, M, N> &a, const mat<T, N, P> &b,
+                           const mat<T, P, Q> &c) {
+  return mul(mul(a, b), c);
+}
+template <class T, int M, int N, int P, int Q>
+constexpr vec<T, M> mul(const mat<T, M, N> &a, const mat<T, N, P> &b,
+                        const mat<T, P, Q> &c, const vec<T, Q> &d) {
+  return mul(mul(a, b, c), d);
+}
+template <class T, int M, int N, int P, int Q, int R>
+constexpr mat<T, M, R> mul(const mat<T, M, N> &a, const mat<T, N, P> &b,
+                           const mat<T, P, Q> &c, const mat<T, Q, R> &d) {
+  return mul(mul(a, b, c), d);
+}
+template <class T, int M>
+constexpr mat<T, M, 1> outerprod(const vec<T, M> &a, const vec<T, 1> &b) {
+  return {a * b.x};
+}
+template <class T, int M>
+constexpr mat<T, M, 2> outerprod(const vec<T, M> &a, const vec<T, 2> &b) {
+  return {a * b.x, a * b.y};
+}
+template <class T, int M>
+constexpr mat<T, M, 3> outerprod(const vec<T, M> &a, const vec<T, 3> &b) {
+  return {a * b.x, a * b.y, a * b.z};
+}
+template <class T, int M>
+constexpr mat<T, M, 4> outerprod(const vec<T, M> &a, const vec<T, 4> &b) {
+  return {a * b.x, a * b.y, a * b.z, a * b.w};
+}
+template <class T>
+constexpr vec<T, 1> diagonal(const mat<T, 1, 1> &a) {
+  return {a.x.x};
+}
+template <class T>
+constexpr vec<T, 2> diagonal(const mat<T, 2, 2> &a) {
+  return {a.x.x, a.y.y};
+}
+template <class T>
+constexpr vec<T, 3> diagonal(const mat<T, 3, 3> &a) {
+  return {a.x.x, a.y.y, a.z.z};
+}
+template <class T>
+constexpr vec<T, 4> diagonal(const mat<T, 4, 4> &a) {
+  return {a.x.x, a.y.y, a.z.z, a.w.w};
+}
+template <class T, int N>
+constexpr T trace(const mat<T, N, N> &a) {
+  return sum(diagonal(a));
+}
+template <class T, int M>
+constexpr mat<T, M, 1> transpose(const mat<T, 1, M> &m) {
+  return {m.row(0)};
+}
+template <class T, int M>
+constexpr mat<T, M, 2> transpose(const mat<T, 2, M> &m) {
+  return {m.row(0), m.row(1)};
+}
+template <class T, int M>
+constexpr mat<T, M, 3> transpose(const mat<T, 3, M> &m) {
+  return {m.row(0), m.row(1), m.row(2)};
+}
+template <class T, int M>
+constexpr mat<T, M, 4> transpose(const mat<T, 4, M> &m) {
+  return {m.row(0), m.row(1), m.row(2), m.row(3)};
+}
+template <class T, int M>
+constexpr mat<T, 1, M> transpose(const vec<T, M> &m) {
+  return transpose(mat<T, M, 1>(m));
+}
+template <class T>
+constexpr mat<T, 1, 1> adjugate(const mat<T, 1, 1> &a) {
+  return {vec<T, 1>{1}};
+}
+template <class T>
+constexpr mat<T, 2, 2> adjugate(const mat<T, 2, 2> &a) {
+  return {{a.y.y, -a.x.y}, {-a.y.x, a.x.x}};
+}
+template <class T>
+constexpr mat<T, 3, 3> adjugate(const mat<T, 3, 3> &a);
+template <class T>
+constexpr mat<T, 4, 4> adjugate(const mat<T, 4, 4> &a);
+template <class T, int N>
+constexpr mat<T, N, N> comatrix(const mat<T, N, N> &a) {
+  return transpose(adjugate(a));
+}
+template <class T>
+constexpr T determinant(const mat<T, 1, 1> &a) {
+  return a.x.x;
+}
+template <class T>
+constexpr T determinant(const mat<T, 2, 2> &a) {
+  return a.x.x * a.y.y - a.x.y * a.y.x;
+}
+template <class T>
+constexpr T determinant(const mat<T, 3, 3> &a) {
+  return a.x.x * (a.y.y * a.z.z - a.z.y * a.y.z) +
+         a.x.y * (a.y.z * a.z.x - a.z.z * a.y.x) +
+         a.x.z * (a.y.x * a.z.y - a.z.x * a.y.y);
+}
+template <class T>
+constexpr T determinant(const mat<T, 4, 4> &a);
+template <class T, int N>
+constexpr mat<T, N, N> inverse(const mat<T, N, N> &a) {
+  return adjugate(a) / determinant(a);
+}
+/** @} */
+
+/** @addtogroup iterators
+ * @ingroup LinAlg
+ * @brief Vectors and matrices can be used as ranges.
+ *  @{
+ */
+template <class T, int M>
+T *begin(vec<T, M> &a) {
+  return &a.x;
+}
+template <class T, int M>
+const T *begin(const vec<T, M> &a) {
+  return &a.x;
+}
+template <class T, int M>
+T *end(vec<T, M> &a) {
+  return begin(a) + M;
+}
+template <class T, int M>
+const T *end(const vec<T, M> &a) {
+  return begin(a) + M;
+}
+template <class T, int M, int N>
+vec<T, M> *begin(mat<T, M, N> &a) {
+  return &a.x;
+}
+template <class T, int M, int N>
+const vec<T, M> *begin(const mat<T, M, N> &a) {
+  return &a.x;
+}
+template <class T, int M, int N>
+vec<T, M> *end(mat<T, M, N> &a) {
+  return begin(a) + N;
+}
+template <class T, int M, int N>
+const vec<T, M> *end(const mat<T, M, N> &a) {
+  return begin(a) + N;
+}
+/** @} */
+
+/** @addtogroup transforms
+ * @ingroup LinAlg
+ * @brief Factory functions for 3D spatial transformations.
+ *  @{
+ */
+enum fwd_axis {
+  neg_z,
+  pos_z
+};  // Should projection matrices be generated assuming forward is {0,0,-1} or
+    // {0,0,1}
+enum z_range {
+  neg_one_to_one,
+  zero_to_one
+};  // Should projection matrices map z into the range of [-1,1] or [0,1]?
+template <class T>
+mat<T, 4, 4> translation_matrix(const vec<T, 3> &translation) {
+  return {{1, 0, 0, 0}, {0, 1, 0, 0}, {0, 0, 1, 0}, {translation, 1}};
+}
+template <class T>
+mat<T, 4, 4> rotation_matrix(const vec<T, 4> &rotation) {
+  return {{qxdir(rotation), 0},
+          {qydir(rotation), 0},
+          {qzdir(rotation), 0},
+          {0, 0, 0, 1}};
+}
+template <class T>
+mat<T, 4, 4> scaling_matrix(const vec<T, 3> &scaling) {
+  return {{scaling.x, 0, 0, 0},
+          {0, scaling.y, 0, 0},
+          {0, 0, scaling.z, 0},
+          {0, 0, 0, 1}};
+}
+template <class T>
+mat<T, 4, 4> pose_matrix(const vec<T, 4> &q, const vec<T, 3> &p) {
+  return {{qxdir(q), 0}, {qydir(q), 0}, {qzdir(q), 0}, {p, 1}};
+}
+template <class T>
+mat<T, 4, 4> lookat_matrix(const vec<T, 3> &eye, const vec<T, 3> &center,
+                           const vec<T, 3> &view_y_dir, fwd_axis fwd = neg_z);
+template <class T>
+mat<T, 4, 4> frustum_matrix(T x0, T x1, T y0, T y1, T n, T f,
+                            fwd_axis a = neg_z, z_range z = neg_one_to_one);
+template <class T>
+mat<T, 4, 4> perspective_matrix(T fovy, T aspect, T n, T f, fwd_axis a = neg_z,
+                                z_range z = neg_one_to_one) {
+  T y = n * std::tan(fovy / 2), x = y * aspect;
+  return frustum_matrix(-x, x, -y, y, n, f, a, z);
+}
+/** @} */
+
+/** @addtogroup array
+ * @ingroup LinAlg
+ * @brief Provide implicit conversion between linalg::vec<T,M> and
+ * std::array<T,M>.
+ *  @{
+ */
+template <class T>
+struct converter<vec<T, 1>, std::array<T, 1>> {
+  vec<T, 1> operator()(const std::array<T, 1> &a) const { return {a[0]}; }
+};
+template <class T>
+struct converter<vec<T, 2>, std::array<T, 2>> {
+  vec<T, 2> operator()(const std::array<T, 2> &a) const { return {a[0], a[1]}; }
+};
+template <class T>
+struct converter<vec<T, 3>, std::array<T, 3>> {
+  vec<T, 3> operator()(const std::array<T, 3> &a) const {
+    return {a[0], a[1], a[2]};
+  }
+};
+template <class T>
+struct converter<vec<T, 4>, std::array<T, 4>> {
+  vec<T, 4> operator()(const std::array<T, 4> &a) const {
+    return {a[0], a[1], a[2], a[3]};
+  }
+};
+
+template <class T>
+struct converter<std::array<T, 1>, vec<T, 1>> {
+  std::array<T, 1> operator()(const vec<T, 1> &a) const { return {a[0]}; }
+};
+template <class T>
+struct converter<std::array<T, 2>, vec<T, 2>> {
+  std::array<T, 2> operator()(const vec<T, 2> &a) const { return {a[0], a[1]}; }
+};
+template <class T>
+struct converter<std::array<T, 3>, vec<T, 3>> {
+  std::array<T, 3> operator()(const vec<T, 3> &a) const {
+    return {a[0], a[1], a[2]};
+  }
+};
+template <class T>
+struct converter<std::array<T, 4>, vec<T, 4>> {
+  std::array<T, 4> operator()(const vec<T, 4> &a) const {
+    return {a[0], a[1], a[2], a[3]};
+  }
+};
+/** @} */
+
+#ifdef MANIFOLD_DEBUG
+template <class T>
+std::ostream &operator<<(std::ostream &out, const vec<T, 1> &v) {
+  return out << '{' << v[0] << '}';
+}
+template <class T>
+std::ostream &operator<<(std::ostream &out, const vec<T, 2> &v) {
+  return out << '{' << v[0] << ',' << v[1] << '}';
+}
+template <class T>
+std::ostream &operator<<(std::ostream &out, const vec<T, 3> &v) {
+  return out << '{' << v[0] << ',' << v[1] << ',' << v[2] << '}';
+}
+template <class T>
+std::ostream &operator<<(std::ostream &out, const vec<T, 4> &v) {
+  return out << '{' << v[0] << ',' << v[1] << ',' << v[2] << ',' << v[3] << '}';
+}
+
+template <class T, int M>
+std::ostream &operator<<(std::ostream &out, const mat<T, M, 1> &m) {
+  return out << '{' << m[0] << '}';
+}
+template <class T, int M>
+std::ostream &operator<<(std::ostream &out, const mat<T, M, 2> &m) {
+  return out << '{' << m[0] << ',' << m[1] << '}';
+}
+template <class T, int M>
+std::ostream &operator<<(std::ostream &out, const mat<T, M, 3> &m) {
+  return out << '{' << m[0] << ',' << m[1] << ',' << m[2] << '}';
+}
+template <class T, int M>
+std::ostream &operator<<(std::ostream &out, const mat<T, M, 4> &m) {
+  return out << '{' << m[0] << ',' << m[1] << ',' << m[2] << ',' << m[3] << '}';
+}
+#endif
+}  // namespace linalg
+
+namespace std {
+/** @addtogroup hash
+ * @ingroup LinAlg
+ * @brief Provide specializations for std::hash<...> with linalg types.
+ *  @{
+ */
+template <class T>
+struct hash<linalg::vec<T, 1>> {
+  std::size_t operator()(const linalg::vec<T, 1> &v) const {
+    std::hash<T> h;
+    return h(v.x);
+  }
+};
+template <class T>
+struct hash<linalg::vec<T, 2>> {
+  std::size_t operator()(const linalg::vec<T, 2> &v) const {
+    std::hash<T> h;
+    return h(v.x) ^ (h(v.y) << 1);
+  }
+};
+template <class T>
+struct hash<linalg::vec<T, 3>> {
+  std::size_t operator()(const linalg::vec<T, 3> &v) const {
+    std::hash<T> h;
+    return h(v.x) ^ (h(v.y) << 1) ^ (h(v.z) << 2);
+  }
+};
+template <class T>
+struct hash<linalg::vec<T, 4>> {
+  std::size_t operator()(const linalg::vec<T, 4> &v) const {
+    std::hash<T> h;
+    return h(v.x) ^ (h(v.y) << 1) ^ (h(v.z) << 2) ^ (h(v.w) << 3);
+  }
+};
+
+template <class T, int M>
+struct hash<linalg::mat<T, M, 1>> {
+  std::size_t operator()(const linalg::mat<T, M, 1> &v) const {
+    std::hash<linalg::vec<T, M>> h;
+    return h(v.x);
+  }
+};
+template <class T, int M>
+struct hash<linalg::mat<T, M, 2>> {
+  std::size_t operator()(const linalg::mat<T, M, 2> &v) const {
+    std::hash<linalg::vec<T, M>> h;
+    return h(v.x) ^ (h(v.y) << M);
+  }
+};
+template <class T, int M>
+struct hash<linalg::mat<T, M, 3>> {
+  std::size_t operator()(const linalg::mat<T, M, 3> &v) const {
+    std::hash<linalg::vec<T, M>> h;
+    return h(v.x) ^ (h(v.y) << M) ^ (h(v.z) << (M * 2));
+  }
+};
+template <class T, int M>
+struct hash<linalg::mat<T, M, 4>> {
+  std::size_t operator()(const linalg::mat<T, M, 4> &v) const {
+    std::hash<linalg::vec<T, M>> h;
+    return h(v.x) ^ (h(v.y) << M) ^ (h(v.z) << (M * 2)) ^ (h(v.w) << (M * 3));
+  }
+};
+/** @} */
+}  // namespace std
+
+// Definitions of functions too long to be defined inline
+template <class T>
+constexpr linalg::mat<T, 3, 3> linalg::adjugate(const mat<T, 3, 3> &a) {
+  return {{a.y.y * a.z.z - a.z.y * a.y.z, a.z.y * a.x.z - a.x.y * a.z.z,
+           a.x.y * a.y.z - a.y.y * a.x.z},
+          {a.y.z * a.z.x - a.z.z * a.y.x, a.z.z * a.x.x - a.x.z * a.z.x,
+           a.x.z * a.y.x - a.y.z * a.x.x},
+          {a.y.x * a.z.y - a.z.x * a.y.y, a.z.x * a.x.y - a.x.x * a.z.y,
+           a.x.x * a.y.y - a.y.x * a.x.y}};
+}
+
+template <class T>
+constexpr linalg::mat<T, 4, 4> linalg::adjugate(const mat<T, 4, 4> &a) {
+  return {{a.y.y * a.z.z * a.w.w + a.w.y * a.y.z * a.z.w +
+               a.z.y * a.w.z * a.y.w - a.y.y * a.w.z * a.z.w -
+               a.z.y * a.y.z * a.w.w - a.w.y * a.z.z * a.y.w,
+           a.x.y * a.w.z * a.z.w + a.z.y * a.x.z * a.w.w +
+               a.w.y * a.z.z * a.x.w - a.w.y * a.x.z * a.z.w -
+               a.z.y * a.w.z * a.x.w - a.x.y * a.z.z * a.w.w,
+           a.x.y * a.y.z * a.w.w + a.w.y * a.x.z * a.y.w +
+               a.y.y * a.w.z * a.x.w - a.x.y * a.w.z * a.y.w -
+               a.y.y * a.x.z * a.w.w - a.w.y * a.y.z * a.x.w,
+           a.x.y * a.z.z * a.y.w + a.y.y * a.x.z * a.z.w +
+               a.z.y * a.y.z * a.x.w - a.x.y * a.y.z * a.z.w -
+               a.z.y * a.x.z * a.y.w - a.y.y * a.z.z * a.x.w},
+          {a.y.z * a.w.w * a.z.x + a.z.z * a.y.w * a.w.x +
+               a.w.z * a.z.w * a.y.x - a.y.z * a.z.w * a.w.x -
+               a.w.z * a.y.w * a.z.x - a.z.z * a.w.w * a.y.x,
+           a.x.z * a.z.w * a.w.x + a.w.z * a.x.w * a.z.x +
+               a.z.z * a.w.w * a.x.x - a.x.z * a.w.w * a.z.x -
+               a.z.z * a.x.w * a.w.x - a.w.z * a.z.w * a.x.x,
+           a.x.z * a.w.w * a.y.x + a.y.z * a.x.w * a.w.x +
+               a.w.z * a.y.w * a.x.x - a.x.z * a.y.w * a.w.x -
+               a.w.z * a.x.w * a.y.x - a.y.z * a.w.w * a.x.x,
+           a.x.z * a.y.w * a.z.x + a.z.z * a.x.w * a.y.x +
+               a.y.z * a.z.w * a.x.x - a.x.z * a.z.w * a.y.x -
+               a.y.z * a.x.w * a.z.x - a.z.z * a.y.w * a.x.x},
+          {a.y.w * a.z.x * a.w.y + a.w.w * a.y.x * a.z.y +
+               a.z.w * a.w.x * a.y.y - a.y.w * a.w.x * a.z.y -
+               a.z.w * a.y.x * a.w.y - a.w.w * a.z.x * a.y.y,
+           a.x.w * a.w.x * a.z.y + a.z.w * a.x.x * a.w.y +
+               a.w.w * a.z.x * a.x.y - a.x.w * a.z.x * a.w.y -
+               a.w.w * a.x.x * a.z.y - a.z.w * a.w.x * a.x.y,
+           a.x.w * a.y.x * a.w.y + a.w.w * a.x.x * a.y.y +
+               a.y.w * a.w.x * a.x.y - a.x.w * a.w.x * a.y.y -
+               a.y.w * a.x.x * a.w.y - a.w.w * a.y.x * a.x.y,
+           a.x.w * a.z.x * a.y.y + a.y.w * a.x.x * a.z.y +
+               a.z.w * a.y.x * a.x.y - a.x.w * a.y.x * a.z.y -
+               a.z.w * a.x.x * a.y.y - a.y.w * a.z.x * a.x.y},
+          {a.y.x * a.w.y * a.z.z + a.z.x * a.y.y * a.w.z +
+               a.w.x * a.z.y * a.y.z - a.y.x * a.z.y * a.w.z -
+               a.w.x * a.y.y * a.z.z - a.z.x * a.w.y * a.y.z,
+           a.x.x * a.z.y * a.w.z + a.w.x * a.x.y * a.z.z +
+               a.z.x * a.w.y * a.x.z - a.x.x * a.w.y * a.z.z -
+               a.z.x * a.x.y * a.w.z - a.w.x * a.z.y * a.x.z,
+           a.x.x * a.w.y * a.y.z + a.y.x * a.x.y * a.w.z +
+               a.w.x * a.y.y * a.x.z - a.x.x * a.y.y * a.w.z -
+               a.w.x * a.x.y * a.y.z - a.y.x * a.w.y * a.x.z,
+           a.x.x * a.y.y * a.z.z + a.z.x * a.x.y * a.y.z +
+               a.y.x * a.z.y * a.x.z - a.x.x * a.z.y * a.y.z -
+               a.y.x * a.x.y * a.z.z - a.z.x * a.y.y * a.x.z}};
+}
+
+template <class T>
+constexpr T linalg::determinant(const mat<T, 4, 4> &a) {
+  return a.x.x * (a.y.y * a.z.z * a.w.w + a.w.y * a.y.z * a.z.w +
+                  a.z.y * a.w.z * a.y.w - a.y.y * a.w.z * a.z.w -
+                  a.z.y * a.y.z * a.w.w - a.w.y * a.z.z * a.y.w) +
+         a.x.y * (a.y.z * a.w.w * a.z.x + a.z.z * a.y.w * a.w.x +
+                  a.w.z * a.z.w * a.y.x - a.y.z * a.z.w * a.w.x -
+                  a.w.z * a.y.w * a.z.x - a.z.z * a.w.w * a.y.x) +
+         a.x.z * (a.y.w * a.z.x * a.w.y + a.w.w * a.y.x * a.z.y +
+                  a.z.w * a.w.x * a.y.y - a.y.w * a.w.x * a.z.y -
+                  a.z.w * a.y.x * a.w.y - a.w.w * a.z.x * a.y.y) +
+         a.x.w * (a.y.x * a.w.y * a.z.z + a.z.x * a.y.y * a.w.z +
+                  a.w.x * a.z.y * a.y.z - a.y.x * a.z.y * a.w.z -
+                  a.w.x * a.y.y * a.z.z - a.z.x * a.w.y * a.y.z);
+}
+
+template <class T>
+linalg::vec<T, 4> linalg::rotation_quat(const vec<T, 3> &orig,
+                                        const vec<T, 3> &dest) {
+  T cosTheta = dot(orig, dest);
+  if (cosTheta >= 1 - std::numeric_limits<T>::epsilon()) {
+    return {0, 0, 0, 1};
+  }
+  if (cosTheta < -1 + std::numeric_limits<T>::epsilon()) {
+    vec<T, 3> axis = cross(vec<T, 3>(0, 0, 1), orig);
+    if (length2(axis) < std::numeric_limits<T>::epsilon())
+      axis = cross(vec<T, 3>(1, 0, 0), orig);
+    return rotation_quat(normalize(axis),
+                         3.14159265358979323846264338327950288);
+  }
+  vec<T, 3> axis = cross(orig, dest);
+  T s = std::sqrt((1 + cosTheta) * 2);
+  return {axis * (1 / s), s * 0.5};
+}
+
+template <class T>
+linalg::vec<T, 4> linalg::rotation_quat(const mat<T, 3, 3> &m) {
+  const vec<T, 4> q{m.x.x - m.y.y - m.z.z, m.y.y - m.x.x - m.z.z,
+                    m.z.z - m.x.x - m.y.y, m.x.x + m.y.y + m.z.z},
+      s[]{{1, m.x.y + m.y.x, m.z.x + m.x.z, m.y.z - m.z.y},
+          {m.x.y + m.y.x, 1, m.y.z + m.z.y, m.z.x - m.x.z},
+          {m.x.z + m.z.x, m.y.z + m.z.y, 1, m.x.y - m.y.x},
+          {m.y.z - m.z.y, m.z.x - m.x.z, m.x.y - m.y.x, 1}};
+  return copysign(normalize(sqrt(max(T(0), T(1) + q))), s[argmax(q)]);
+}
+
+template <class T>
+linalg::mat<T, 4, 4> linalg::lookat_matrix(const vec<T, 3> &eye,
+                                           const vec<T, 3> &center,
+                                           const vec<T, 3> &view_y_dir,
+                                           fwd_axis a) {
+  const vec<T, 3> f = normalize(center - eye), z = a == pos_z ? f : -f,
+                  x = normalize(cross(view_y_dir, z)), y = cross(z, x);
+  return inverse(mat<T, 4, 4>{{x, 0}, {y, 0}, {z, 0}, {eye, 1}});
+}
+
+template <class T>
+linalg::mat<T, 4, 4> linalg::frustum_matrix(T x0, T x1, T y0, T y1, T n, T f,
+                                            fwd_axis a, z_range z) {
+  const T s = a == pos_z ? T(1) : T(-1), o = z == neg_one_to_one ? n : 0;
+  return {{2 * n / (x1 - x0), 0, 0, 0},
+          {0, 2 * n / (y1 - y0), 0, 0},
+          {-s * (x0 + x1) / (x1 - x0), -s * (y0 + y1) / (y1 - y0),
+           s * (f + o) / (f - n), s},
+          {0, 0, -(n + o) * f / (f - n), 0}};
+}
+#endif

+ 435 - 0
thirdparty/manifold/include/manifold/manifold.h

@@ -0,0 +1,435 @@
+// Copyright 2021 The Manifold Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include <functional>
+#include <memory>
+
+#include "manifold/common.h"
+#include "manifold/vec_view.h"
+
+namespace manifold {
+
+/**
+ * @ingroup Debug
+ *
+ * Allows modification of the assertions checked in MANIFOLD_DEBUG mode.
+ *
+ * @return ExecutionParams&
+ */
+ExecutionParams& ManifoldParams();
+
+class CsgNode;
+class CsgLeafNode;
+
+/** @addtogroup Core
+ *  @brief The central classes of the library
+ *  @{
+ */
+
+/**
+ * @brief Mesh input/output suitable for pushing directly into graphics
+ * libraries.
+ *
+ * This may not be manifold since the verts are duplicated along property
+ * boundaries that do not match. The additional merge vectors store this missing
+ * information, allowing the manifold to be reconstructed. MeshGL is an alias
+ * for the standard single-precision version. Use MeshGL64 to output the full
+ * double precision that Manifold uses internally.
+ */
+template <typename Precision, typename I = uint32_t>
+struct MeshGLP {
+  /// Number of property vertices
+  I NumVert() const { return vertProperties.size() / numProp; };
+  /// Number of triangles
+  I NumTri() const { return triVerts.size() / 3; };
+  /// Number of properties per vertex, always >= 3.
+  I numProp = 3;
+  /// Flat, GL-style interleaved list of all vertex properties: propVal =
+  /// vertProperties[vert * numProp + propIdx]. The first three properties are
+  /// always the position x, y, z.
+  std::vector<Precision> vertProperties;
+  /// The vertex indices of the three triangle corners in CCW (from the outside)
+  /// order, for each triangle.
+  std::vector<I> triVerts;
+  /// Optional: A list of only the vertex indicies that need to be merged to
+  /// reconstruct the manifold.
+  std::vector<I> mergeFromVert;
+  /// Optional: The same length as mergeFromVert, and the corresponding value
+  /// contains the vertex to merge with. It will have an identical position, but
+  /// the other properties may differ.
+  std::vector<I> mergeToVert;
+  /// Optional: Indicates runs of triangles that correspond to a particular
+  /// input mesh instance. The runs encompass all of triVerts and are sorted
+  /// by runOriginalID. Run i begins at triVerts[runIndex[i]] and ends at
+  /// triVerts[runIndex[i+1]]. All runIndex values are divisible by 3. Returned
+  /// runIndex will always be 1 longer than runOriginalID, but same length is
+  /// also allowed as input: triVerts.size() will be automatically appended in
+  /// this case.
+  std::vector<I> runIndex;
+  /// Optional: The OriginalID of the mesh this triangle run came from. This ID
+  /// is ideal for reapplying materials to the output mesh. Multiple runs may
+  /// have the same ID, e.g. representing different copies of the same input
+  /// mesh. If you create an input MeshGL that you want to be able to reference
+  /// as one or more originals, be sure to set unique values from ReserveIDs().
+  std::vector<uint32_t> runOriginalID;
+  /// Optional: For each run, a 3x4 transform is stored representing how the
+  /// corresponding original mesh was transformed to create this triangle run.
+  /// This matrix is stored in column-major order and the length of the overall
+  /// vector is 12 * runOriginalID.size().
+  std::vector<Precision> runTransform;
+  /// Optional: Length NumTri, contains the source face ID this
+  /// triangle comes from. When auto-generated, this ID will be a triangle index
+  /// into the original mesh. This index/ID is purely for external use (e.g.
+  /// recreating polygonal faces) and will not affect Manifold's algorithms.
+  std::vector<I> faceID;
+  /// Optional: The X-Y-Z-W weighted tangent vectors for smooth Refine(). If
+  /// non-empty, must be exactly four times as long as Mesh.triVerts. Indexed
+  /// as 4 * (3 * tri + i) + j, i < 3, j < 4, representing the tangent value
+  /// Mesh.triVerts[tri][i] along the CCW edge. If empty, mesh is faceted.
+  std::vector<Precision> halfedgeTangent;
+  /// Tolerance for mesh simplification. When creating a Manifold, the tolerance
+  /// used will be the maximum of this and a baseline tolerance from the size of
+  /// the bounding box. Any edge shorter than tolerance may be collapsed.
+  /// Tolerance may be enlarged when floating point error accumulates.
+  Precision tolerance = 0;
+
+  MeshGLP() = default;
+
+  /**
+   * Updates the mergeFromVert and mergeToVert vectors in order to create a
+   * manifold solid. If the MeshGL is already manifold, no change will occur and
+   * the function will return false. Otherwise, this will merge verts along open
+   * edges within tolerance (the maximum of the MeshGL tolerance and the
+   * baseline bounding-box tolerance), keeping any from the existing merge
+   * vectors, and return true.
+   *
+   * There is no guarantee the result will be manifold - this is a best-effort
+   * helper function designed primarily to aid in the case where a manifold
+   * multi-material MeshGL was produced, but its merge vectors were lost due to
+   * a round-trip through a file format. Constructing a Manifold from the result
+   * will report an error status if it is not manifold.
+   */
+  bool Merge();
+
+  /**
+   * Returns the x, y, z position of the ith vertex.
+   *
+   * @param v vertex index.
+   */
+  la::vec<Precision, 3> GetVertPos(size_t v) const {
+    size_t offset = v * numProp;
+    return la::vec<Precision, 3>(vertProperties[offset],
+                                 vertProperties[offset + 1],
+                                 vertProperties[offset + 2]);
+  }
+
+  /**
+   * Returns the three vertex indices of the ith triangle.
+   *
+   * @param t triangle index.
+   */
+  la::vec<I, 3> GetTriVerts(size_t t) const {
+    size_t offset = 3 * t;
+    return la::vec<I, 3>(triVerts[offset], triVerts[offset + 1],
+                         triVerts[offset + 2]);
+  }
+
+  /**
+   * Returns the x, y, z, w tangent of the ith halfedge.
+   *
+   * @param h halfedge index (3 * triangle_index + [0|1|2]).
+   */
+  la::vec<Precision, 4> GetTangent(size_t h) const {
+    size_t offset = 4 * h;
+    return la::vec<Precision, 4>(
+        halfedgeTangent[offset], halfedgeTangent[offset + 1],
+        halfedgeTangent[offset + 2], halfedgeTangent[offset + 3]);
+  }
+};
+
+/**
+ * @brief Single-precision - ideal for most uses, especially graphics.
+ */
+using MeshGL = MeshGLP<float>;
+/**
+ * @brief Double-precision, 64-bit indices - best for huge meshes.
+ */
+using MeshGL64 = MeshGLP<double, uint64_t>;
+
+/**
+ * @brief This library's internal representation of an oriented, 2-manifold,
+ * triangle mesh - a simple boundary-representation of a solid object. Use this
+ * class to store and operate on solids, and use MeshGL for input and output.
+ *
+ * In addition to storing geometric data, a Manifold can also store an arbitrary
+ * number of vertex properties. These could be anything, e.g. normals, UV
+ * coordinates, colors, etc, but this library is completely agnostic. All
+ * properties are merely float values indexed by channel number. It is up to the
+ * user to associate channel numbers with meaning.
+ *
+ * Manifold allows vertex properties to be shared for efficient storage, or to
+ * have multiple property verts associated with a single geometric vertex,
+ * allowing sudden property changes, e.g. at Boolean intersections, without
+ * sacrificing manifoldness.
+ *
+ * Manifolds also keep track of their relationships to their inputs, via
+ * OriginalIDs and the faceIDs and transforms accessible through MeshGL. This
+ * allows object-level properties to be re-associated with the output after many
+ * operations, particularly useful for materials. Since separate object's
+ * properties are not mixed, there is no requirement that channels have
+ * consistent meaning between different inputs.
+ */
+class Manifold {
+ public:
+  /** @name Basics
+   *  Copy / move / assignment
+   */
+  ///@{
+  Manifold();
+  ~Manifold();
+  Manifold(const Manifold& other);
+  Manifold& operator=(const Manifold& other);
+  Manifold(Manifold&&) noexcept;
+  Manifold& operator=(Manifold&&) noexcept;
+  ///@}
+
+  /** @name Input & Output
+   *  Create and retrieve arbitrary manifolds
+   */
+  ///@{
+  Manifold(const MeshGL&);
+  Manifold(const MeshGL64&);
+  MeshGL GetMeshGL(int normalIdx = -1) const;
+  MeshGL64 GetMeshGL64(int normalIdx = -1) const;
+  ///@}
+
+  /** @name Constructors
+   *  Topological ops, primitives, and SDF
+   */
+  ///@{
+  std::vector<Manifold> Decompose() const;
+  static Manifold Compose(const std::vector<Manifold>&);
+  static Manifold Tetrahedron();
+  static Manifold Cube(vec3 size = vec3(1.0), bool center = false);
+  static Manifold Cylinder(double height, double radiusLow,
+                           double radiusHigh = -1.0, int circularSegments = 0,
+                           bool center = false);
+  static Manifold Sphere(double radius, int circularSegments = 0);
+  static Manifold LevelSet(std::function<double(vec3)> sdf, Box bounds,
+                           double edgeLength, double level = 0,
+                           double tolerance = -1, bool canParallel = true);
+  ///@}
+
+  /** @name Polygons
+   * 3D to 2D and 2D to 3D
+   */
+  ///@{
+  Polygons Slice(double height = 0) const;
+  Polygons Project() const;
+  static Manifold Extrude(const Polygons& crossSection, double height,
+                          int nDivisions = 0, double twistDegrees = 0.0,
+                          vec2 scaleTop = vec2(1.0));
+  static Manifold Revolve(const Polygons& crossSection,
+                          int circularSegments = 0,
+                          double revolveDegrees = 360.0f);
+  ///@}
+
+  enum class Error {
+    NoError,
+    NonFiniteVertex,
+    NotManifold,
+    VertexOutOfBounds,
+    PropertiesWrongLength,
+    MissingPositionProperties,
+    MergeVectorsDifferentLengths,
+    MergeIndexOutOfBounds,
+    TransformWrongLength,
+    RunIndexWrongLength,
+    FaceIDWrongLength,
+    InvalidConstruction,
+  };
+
+  /** @name Information
+   *  Details of the manifold
+   */
+  ///@{
+  Error Status() const;
+  bool IsEmpty() const;
+  size_t NumVert() const;
+  size_t NumEdge() const;
+  size_t NumTri() const;
+  size_t NumProp() const;
+  size_t NumPropVert() const;
+  Box BoundingBox() const;
+  int Genus() const;
+  double GetTolerance() const;
+  ///@}
+
+  /** @name Measurement
+   */
+  ///@{
+  double SurfaceArea() const;
+  double Volume() const;
+  double MinGap(const Manifold& other, double searchLength) const;
+  ///@}
+
+  /** @name Mesh ID
+   *  Details of the manifold's relation to its input meshes, for the purposes
+   * of reapplying mesh properties.
+   */
+  ///@{
+  int OriginalID() const;
+  Manifold AsOriginal() const;
+  static uint32_t ReserveIDs(uint32_t);
+  ///@}
+
+  /** @name Transformations
+   */
+  ///@{
+  Manifold Translate(vec3) const;
+  Manifold Scale(vec3) const;
+  Manifold Rotate(double xDegrees, double yDegrees = 0.0,
+                  double zDegrees = 0.0) const;
+  Manifold Mirror(vec3) const;
+  Manifold Transform(const mat3x4&) const;
+  Manifold Warp(std::function<void(vec3&)>) const;
+  Manifold WarpBatch(std::function<void(VecView<vec3>)>) const;
+  Manifold SetTolerance(double) const;
+  ///@}
+
+  /** @name Boolean
+   *  Combine two manifolds
+   */
+  ///@{
+  Manifold Boolean(const Manifold& second, OpType op) const;
+  static Manifold BatchBoolean(const std::vector<Manifold>& manifolds,
+                               OpType op);
+  // Boolean operation shorthand
+  Manifold operator+(const Manifold&) const;  // Add (Union)
+  Manifold& operator+=(const Manifold&);
+  Manifold operator-(const Manifold&) const;  // Subtract (Difference)
+  Manifold& operator-=(const Manifold&);
+  Manifold operator^(const Manifold&) const;  // Intersect
+  Manifold& operator^=(const Manifold&);
+  std::pair<Manifold, Manifold> Split(const Manifold&) const;
+  std::pair<Manifold, Manifold> SplitByPlane(vec3 normal,
+                                             double originOffset) const;
+  Manifold TrimByPlane(vec3 normal, double originOffset) const;
+  ///@}
+
+  /** @name Properties
+   * Create and modify vertex properties.
+   */
+  ///@{
+  Manifold SetProperties(
+      int numProp,
+      std::function<void(double*, vec3, const double*)> propFunc) const;
+  Manifold CalculateCurvature(int gaussianIdx, int meanIdx) const;
+  Manifold CalculateNormals(int normalIdx, double minSharpAngle = 60) const;
+  ///@}
+
+  /** @name Smoothing
+   * Smooth meshes by calculating tangent vectors and refining to a higher
+   * triangle count.
+   */
+  ///@{
+  Manifold Refine(int) const;
+  Manifold RefineToLength(double) const;
+  Manifold RefineToTolerance(double) const;
+  Manifold SmoothByNormals(int normalIdx) const;
+  Manifold SmoothOut(double minSharpAngle = 60, double minSmoothness = 0) const;
+  static Manifold Smooth(const MeshGL&,
+                         const std::vector<Smoothness>& sharpenedEdges = {});
+  static Manifold Smooth(const MeshGL64&,
+                         const std::vector<Smoothness>& sharpenedEdges = {});
+  ///@}
+
+  /** @name Convex Hull
+   */
+  ///@{
+  Manifold Hull() const;
+  static Manifold Hull(const std::vector<Manifold>& manifolds);
+  static Manifold Hull(const std::vector<vec3>& pts);
+  ///@}
+
+  /** @name Testing Hooks
+   *  These are just for internal testing.
+   */
+  ///@{
+  bool MatchesTriNormals() const;
+  size_t NumDegenerateTris() const;
+  size_t NumOverlaps(const Manifold& second) const;
+  double GetEpsilon() const;
+  ///@}
+
+  struct Impl;
+
+ private:
+  Manifold(std::shared_ptr<CsgNode> pNode_);
+  Manifold(std::shared_ptr<Impl> pImpl_);
+  static Manifold Invalid();
+  mutable std::shared_ptr<CsgNode> pNode_;
+
+  CsgLeafNode& GetCsgLeafNode() const;
+};
+/** @} */
+
+/** @addtogroup Debug
+ *  @ingroup Optional
+ *  @brief Debugging features
+ *
+ * The features require compiler flags to be enabled. Assertions are enabled
+ * with the MANIFOLD_DEBUG flag and then controlled with ExecutionParams.
+ *  @{
+ */
+#ifdef MANIFOLD_DEBUG
+inline std::string ToString(const Manifold::Error& error) {
+  switch (error) {
+    case Manifold::Error::NoError:
+      return "No Error";
+    case Manifold::Error::NonFiniteVertex:
+      return "Non Finite Vertex";
+    case Manifold::Error::NotManifold:
+      return "Not Manifold";
+    case Manifold::Error::VertexOutOfBounds:
+      return "Vertex Out Of Bounds";
+    case Manifold::Error::PropertiesWrongLength:
+      return "Properties Wrong Length";
+    case Manifold::Error::MissingPositionProperties:
+      return "Missing Position Properties";
+    case Manifold::Error::MergeVectorsDifferentLengths:
+      return "Merge Vectors Different Lengths";
+    case Manifold::Error::MergeIndexOutOfBounds:
+      return "Merge Index Out Of Bounds";
+    case Manifold::Error::TransformWrongLength:
+      return "Transform Wrong Length";
+    case Manifold::Error::RunIndexWrongLength:
+      return "Run Index Wrong Length";
+    case Manifold::Error::FaceIDWrongLength:
+      return "Face ID Wrong Length";
+    case Manifold::Error::InvalidConstruction:
+      return "Invalid Construction";
+    default:
+      return "Unknown Error";
+  };
+}
+
+inline std::ostream& operator<<(std::ostream& stream,
+                                const Manifold::Error& error) {
+  return stream << ToString(error);
+}
+#endif
+/** @} */
+}  // namespace manifold

+ 66 - 0
thirdparty/manifold/include/manifold/optional_assert.h

@@ -0,0 +1,66 @@
+// Copyright 2022 The Manifold Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#ifdef MANIFOLD_DEBUG
+#include <iostream>
+#include <sstream>
+#include <stdexcept>
+#include <string>
+
+/** @addtogroup Debug
+ * @{
+ */
+struct userErr : public virtual std::runtime_error {
+  using std::runtime_error::runtime_error;
+};
+struct topologyErr : public virtual std::runtime_error {
+  using std::runtime_error::runtime_error;
+};
+struct geometryErr : public virtual std::runtime_error {
+  using std::runtime_error::runtime_error;
+};
+using logicErr = std::logic_error;
+
+template <typename Ex>
+void AssertFail(const char* file, int line, const char* cond, const char* msg) {
+  std::ostringstream output;
+  output << "Error in file: " << file << " (" << line << "): \'" << cond
+         << "\' is false: " << msg;
+  throw Ex(output.str());
+}
+
+template <typename Ex>
+void AssertFail(const char* file, int line, const std::string& cond,
+                const std::string& msg) {
+  std::ostringstream output;
+  output << "Error in file: " << file << " (" << line << "): \'" << cond
+         << "\' is false: " << msg;
+  throw Ex(output.str());
+}
+
+// DEBUG_ASSERT is slightly slower due to the function call, but gives more
+// detailed info.
+#define DEBUG_ASSERT(condition, EX, msg) \
+  if (!(condition)) AssertFail<EX>(__FILE__, __LINE__, #condition, msg);
+// ASSERT has almost no overhead, so better to use for frequent calls like
+// vector bounds checking.
+#define ASSERT(condition, EX) \
+  if (!(condition)) throw(EX);
+#else
+#define DEBUG_ASSERT(condition, EX, msg)
+#define ASSERT(condition, EX)
+#endif
+/** @} */

+ 62 - 0
thirdparty/manifold/include/manifold/polygon.h

@@ -0,0 +1,62 @@
+// Copyright 2021 The Manifold Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "manifold/common.h"
+
+namespace manifold {
+
+/** @addtogroup Structs
+ *  @{
+ */
+
+/**
+ * @brief Polygon vertex.
+ */
+struct PolyVert {
+  /// X-Y position
+  vec2 pos;
+  /// ID or index into another vertex vector
+  int idx;
+};
+
+/**
+ * @brief Single polygon contour, wound CCW, with indices. First and last point
+ * are implicitly connected. Should ensure all input is
+ * [&epsilon;-valid](https://github.com/elalish/manifold/wiki/Manifold-Library#definition-of-%CE%B5-valid).
+ */
+using SimplePolygonIdx = std::vector<PolyVert>;
+
+/**
+ * @brief Set of indexed polygons with holes. Order of contours is arbitrary.
+ * Can contain any depth of nested holes and any number of separate polygons.
+ * Should ensure all input is
+ * [&epsilon;-valid](https://github.com/elalish/manifold/wiki/Manifold-Library#definition-of-%CE%B5-valid).
+ */
+using PolygonsIdx = std::vector<SimplePolygonIdx>;
+/** @} */
+
+/** @addtogroup Triangulation
+ *  @ingroup Core
+ * @brief Polygon triangulation
+ *  @{
+ */
+std::vector<ivec3> TriangulateIdx(const PolygonsIdx &polys,
+                                  double epsilon = -1);
+
+std::vector<ivec3> Triangulate(const Polygons &polygons, double epsilon = -1);
+
+ExecutionParams &PolygonParams();
+/** @} */
+}  // namespace manifold

+ 151 - 0
thirdparty/manifold/include/manifold/vec_view.h

@@ -0,0 +1,151 @@
+// Copyright 2023 The Manifold Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <cstddef>
+#include <limits>
+#include <type_traits>
+#include <vector>
+
+#include "manifold/optional_assert.h"
+
+namespace manifold {
+
+/**
+ * View for Vec, can perform offset operation.
+ * This will be invalidated when the original vector is dropped or changes
+ * length. Roughly equivalent to std::span<T> from c++20
+ */
+template <typename T>
+class VecView {
+ public:
+  using Iter = T *;
+  using IterC = const T *;
+
+  VecView() : ptr_(nullptr), size_(0) {}
+
+  VecView(T *ptr, size_t size) : ptr_(ptr), size_(size) {}
+
+  VecView(const std::vector<std::remove_cv_t<T>> &v)
+      : ptr_(v.data()), size_(v.size()) {}
+
+  VecView(const VecView &other) {
+    ptr_ = other.ptr_;
+    size_ = other.size_;
+  }
+
+  VecView &operator=(const VecView &other) {
+    ptr_ = other.ptr_;
+    size_ = other.size_;
+    return *this;
+  }
+
+  // allows conversion to a const VecView
+  operator VecView<const T>() const { return {ptr_, size_}; }
+
+  inline const T &operator[](size_t i) const {
+    ASSERT(i < size_, std::out_of_range("Vec out of range"));
+    return ptr_[i];
+  }
+
+  inline T &operator[](size_t i) {
+    ASSERT(i < size_, std::out_of_range("Vec out of range"));
+    return ptr_[i];
+  }
+
+  IterC cbegin() const { return ptr_; }
+  IterC cend() const { return ptr_ + size_; }
+
+  IterC begin() const { return cbegin(); }
+  IterC end() const { return cend(); }
+
+  Iter begin() { return ptr_; }
+  Iter end() { return ptr_ + size_; }
+
+  const T &front() const {
+    ASSERT(size_ != 0,
+           std::out_of_range("Attempt to take the front of an empty vector"));
+    return ptr_[0];
+  }
+
+  const T &back() const {
+    ASSERT(size_ != 0,
+           std::out_of_range("Attempt to take the back of an empty vector"));
+    return ptr_[size_ - 1];
+  }
+
+  T &front() {
+    ASSERT(size_ != 0,
+           std::out_of_range("Attempt to take the front of an empty vector"));
+    return ptr_[0];
+  }
+
+  T &back() {
+    ASSERT(size_ != 0,
+           std::out_of_range("Attempt to take the back of an empty vector"));
+    return ptr_[size_ - 1];
+  }
+
+  size_t size() const { return size_; }
+
+  bool empty() const { return size_ == 0; }
+
+  VecView<T> view(size_t offset = 0,
+                  size_t length = std::numeric_limits<size_t>::max()) {
+    if (length == std::numeric_limits<size_t>::max())
+      length = this->size_ - offset;
+    ASSERT(length >= 0, std::out_of_range("Vec::view out of range"));
+    ASSERT(offset + length <= this->size_ && offset >= 0,
+           std::out_of_range("Vec::view out of range"));
+    return VecView<T>(this->ptr_ + offset, length);
+  }
+
+  VecView<const T> cview(
+      size_t offset = 0,
+      size_t length = std::numeric_limits<size_t>::max()) const {
+    if (length == std::numeric_limits<size_t>::max())
+      length = this->size_ - offset;
+    ASSERT(length >= 0, std::out_of_range("Vec::cview out of range"));
+    ASSERT(offset + length <= this->size_ && offset >= 0,
+           std::out_of_range("Vec::cview out of range"));
+    return VecView<const T>(this->ptr_ + offset, length);
+  }
+
+  VecView<const T> view(
+      size_t offset = 0,
+      size_t length = std::numeric_limits<size_t>::max()) const {
+    return cview(offset, length);
+  }
+
+  T *data() { return this->ptr_; }
+
+  const T *data() const { return this->ptr_; }
+
+#ifdef MANIFOLD_DEBUG
+  void Dump() const {
+    std::cout << "Vec = " << std::endl;
+    for (size_t i = 0; i < size(); ++i) {
+      std::cout << i << ", " << ptr_[i] << ", " << std::endl;
+    }
+    std::cout << std::endl;
+  }
+#endif
+
+ protected:
+  T *ptr_ = nullptr;
+  size_t size_ = 0;
+};
+
+}  // namespace manifold

+ 599 - 0
thirdparty/manifold/src/boolean3.cpp

@@ -0,0 +1,599 @@
+// Copyright 2021 The Manifold Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "./boolean3.h"
+
+#include <limits>
+
+#include "./parallel.h"
+
+using namespace manifold;
+
+namespace {
+
+// These two functions (Interpolate and Intersect) are the only places where
+// floating-point operations take place in the whole Boolean function. These
+// are carefully designed to minimize rounding error and to eliminate it at edge
+// cases to ensure consistency.
+
+vec2 Interpolate(vec3 pL, vec3 pR, double x) {
+  const double dxL = x - pL.x;
+  const double dxR = x - pR.x;
+  DEBUG_ASSERT(dxL * dxR <= 0, logicErr,
+               "Boolean manifold error: not in domain");
+  const bool useL = fabs(dxL) < fabs(dxR);
+  const vec3 dLR = pR - pL;
+  const double lambda = (useL ? dxL : dxR) / dLR.x;
+  if (!std::isfinite(lambda) || !std::isfinite(dLR.y) || !std::isfinite(dLR.z))
+    return vec2(pL.y, pL.z);
+  vec2 yz;
+  yz[0] = fma(lambda, dLR.y, useL ? pL.y : pR.y);
+  yz[1] = fma(lambda, dLR.z, useL ? pL.z : pR.z);
+  return yz;
+}
+
+vec4 Intersect(const vec3 &pL, const vec3 &pR, const vec3 &qL, const vec3 &qR) {
+  const double dyL = qL.y - pL.y;
+  const double dyR = qR.y - pR.y;
+  DEBUG_ASSERT(dyL * dyR <= 0, logicErr,
+               "Boolean manifold error: no intersection");
+  const bool useL = fabs(dyL) < fabs(dyR);
+  const double dx = pR.x - pL.x;
+  double lambda = (useL ? dyL : dyR) / (dyL - dyR);
+  if (!std::isfinite(lambda)) lambda = 0.0;
+  vec4 xyzz;
+  xyzz.x = fma(lambda, dx, useL ? pL.x : pR.x);
+  const double pDy = pR.y - pL.y;
+  const double qDy = qR.y - qL.y;
+  const bool useP = fabs(pDy) < fabs(qDy);
+  xyzz.y = fma(lambda, useP ? pDy : qDy,
+               useL ? (useP ? pL.y : qL.y) : (useP ? pR.y : qR.y));
+  xyzz.z = fma(lambda, pR.z - pL.z, useL ? pL.z : pR.z);
+  xyzz.w = fma(lambda, qR.z - qL.z, useL ? qL.z : qR.z);
+  return xyzz;
+}
+
+template <const bool inverted>
+struct CopyFaceEdges {
+  const SparseIndices &p1q1;
+  // x can be either vert or edge (0 or 1).
+  SparseIndices &pXq1;
+  VecView<const Halfedge> halfedgesQ;
+  const size_t offset;
+
+  void operator()(const size_t i) {
+    int idx = 3 * (i + offset);
+    int pX = p1q1.Get(i, inverted);
+    int q2 = p1q1.Get(i, !inverted);
+
+    for (const int j : {0, 1, 2}) {
+      const int q1 = 3 * q2 + j;
+      const Halfedge edge = halfedgesQ[q1];
+      int a = pX;
+      int b = edge.IsForward() ? q1 : edge.pairedHalfedge;
+      if (inverted) std::swap(a, b);
+      pXq1.Set(idx + static_cast<size_t>(j), a, b);
+    }
+  }
+};
+
+SparseIndices Filter11(const Manifold::Impl &inP, const Manifold::Impl &inQ,
+                       const SparseIndices &p1q2, const SparseIndices &p2q1) {
+  ZoneScoped;
+  SparseIndices p1q1(3 * p1q2.size() + 3 * p2q1.size());
+  for_each_n(autoPolicy(p1q2.size(), 1e5), countAt(0_uz), p1q2.size(),
+             CopyFaceEdges<false>({p1q2, p1q1, inQ.halfedge_, 0_uz}));
+  for_each_n(autoPolicy(p2q1.size(), 1e5), countAt(0_uz), p2q1.size(),
+             CopyFaceEdges<true>({p2q1, p1q1, inP.halfedge_, p1q2.size()}));
+  p1q1.Unique();
+  return p1q1;
+}
+
+inline bool Shadows(double p, double q, double dir) {
+  return p == q ? dir < 0 : p < q;
+}
+
+inline std::pair<int, vec2> Shadow01(
+    const int p0, const int q1, VecView<const vec3> vertPosP,
+    VecView<const vec3> vertPosQ, VecView<const Halfedge> halfedgeQ,
+    const double expandP, VecView<const vec3> normalP, const bool reverse) {
+  const int q1s = halfedgeQ[q1].startVert;
+  const int q1e = halfedgeQ[q1].endVert;
+  const double p0x = vertPosP[p0].x;
+  const double q1sx = vertPosQ[q1s].x;
+  const double q1ex = vertPosQ[q1e].x;
+  int s01 = reverse ? Shadows(q1sx, p0x, expandP * normalP[q1s].x) -
+                          Shadows(q1ex, p0x, expandP * normalP[q1e].x)
+                    : Shadows(p0x, q1ex, expandP * normalP[p0].x) -
+                          Shadows(p0x, q1sx, expandP * normalP[p0].x);
+  vec2 yz01(NAN);
+
+  if (s01 != 0) {
+    yz01 = Interpolate(vertPosQ[q1s], vertPosQ[q1e], vertPosP[p0].x);
+    if (reverse) {
+      vec3 diff = vertPosQ[q1s] - vertPosP[p0];
+      const double start2 = la::dot(diff, diff);
+      diff = vertPosQ[q1e] - vertPosP[p0];
+      const double end2 = la::dot(diff, diff);
+      const double dir = start2 < end2 ? normalP[q1s].y : normalP[q1e].y;
+      if (!Shadows(yz01[0], vertPosP[p0].y, expandP * dir)) s01 = 0;
+    } else {
+      if (!Shadows(vertPosP[p0].y, yz01[0], expandP * normalP[p0].y)) s01 = 0;
+    }
+  }
+  return std::make_pair(s01, yz01);
+}
+
+// https://github.com/scandum/binary_search/blob/master/README.md
+// much faster than standard binary search on large arrays
+size_t monobound_quaternary_search(VecView<const int64_t> array, int64_t key) {
+  if (array.size() == 0) {
+    return std::numeric_limits<size_t>::max();
+  }
+  size_t bot = 0;
+  size_t top = array.size();
+  while (top >= 65536) {
+    size_t mid = top / 4;
+    top -= mid * 3;
+    if (key < array[bot + mid * 2]) {
+      if (key >= array[bot + mid]) {
+        bot += mid;
+      }
+    } else {
+      bot += mid * 2;
+      if (key >= array[bot + mid]) {
+        bot += mid;
+      }
+    }
+  }
+
+  while (top > 3) {
+    size_t mid = top / 2;
+    if (key >= array[bot + mid]) {
+      bot += mid;
+    }
+    top -= mid;
+  }
+
+  while (top--) {
+    if (key == array[bot + top]) {
+      return bot + top;
+    }
+  }
+  return -1;
+}
+
+struct Kernel11 {
+  VecView<vec4> xyzz;
+  VecView<int> s;
+  VecView<const vec3> vertPosP;
+  VecView<const vec3> vertPosQ;
+  VecView<const Halfedge> halfedgeP;
+  VecView<const Halfedge> halfedgeQ;
+  const double expandP;
+  VecView<const vec3> normalP;
+  const SparseIndices &p1q1;
+
+  void operator()(const size_t idx) {
+    const int p1 = p1q1.Get(idx, false);
+    const int q1 = p1q1.Get(idx, true);
+    vec4 &xyzz11 = xyzz[idx];
+    int &s11 = s[idx];
+
+    // For pRL[k], qRL[k], k==0 is the left and k==1 is the right.
+    int k = 0;
+    vec3 pRL[2], qRL[2];
+    // Either the left or right must shadow, but not both. This ensures the
+    // intersection is between the left and right.
+    bool shadows = false;
+    s11 = 0;
+
+    const int p0[2] = {halfedgeP[p1].startVert, halfedgeP[p1].endVert};
+    for (int i : {0, 1}) {
+      const auto syz01 = Shadow01(p0[i], q1, vertPosP, vertPosQ, halfedgeQ,
+                                  expandP, normalP, false);
+      const int s01 = syz01.first;
+      const vec2 yz01 = syz01.second;
+      // If the value is NaN, then these do not overlap.
+      if (std::isfinite(yz01[0])) {
+        s11 += s01 * (i == 0 ? -1 : 1);
+        if (k < 2 && (k == 0 || (s01 != 0) != shadows)) {
+          shadows = s01 != 0;
+          pRL[k] = vertPosP[p0[i]];
+          qRL[k] = vec3(pRL[k].x, yz01.x, yz01.y);
+          ++k;
+        }
+      }
+    }
+
+    const int q0[2] = {halfedgeQ[q1].startVert, halfedgeQ[q1].endVert};
+    for (int i : {0, 1}) {
+      const auto syz10 = Shadow01(q0[i], p1, vertPosQ, vertPosP, halfedgeP,
+                                  expandP, normalP, true);
+      const int s10 = syz10.first;
+      const vec2 yz10 = syz10.second;
+      // If the value is NaN, then these do not overlap.
+      if (std::isfinite(yz10[0])) {
+        s11 += s10 * (i == 0 ? -1 : 1);
+        if (k < 2 && (k == 0 || (s10 != 0) != shadows)) {
+          shadows = s10 != 0;
+          qRL[k] = vertPosQ[q0[i]];
+          pRL[k] = vec3(qRL[k].x, yz10.x, yz10.y);
+          ++k;
+        }
+      }
+    }
+
+    if (s11 == 0) {  // No intersection
+      xyzz11 = vec4(NAN);
+    } else {
+      DEBUG_ASSERT(k == 2, logicErr, "Boolean manifold error: s11");
+      xyzz11 = Intersect(pRL[0], pRL[1], qRL[0], qRL[1]);
+
+      const int p1s = halfedgeP[p1].startVert;
+      const int p1e = halfedgeP[p1].endVert;
+      vec3 diff = vertPosP[p1s] - vec3(xyzz11);
+      const double start2 = la::dot(diff, diff);
+      diff = vertPosP[p1e] - vec3(xyzz11);
+      const double end2 = la::dot(diff, diff);
+      const double dir = start2 < end2 ? normalP[p1s].z : normalP[p1e].z;
+
+      if (!Shadows(xyzz11.z, xyzz11.w, expandP * dir)) s11 = 0;
+    }
+  }
+};
+
+std::tuple<Vec<int>, Vec<vec4>> Shadow11(SparseIndices &p1q1,
+                                         const Manifold::Impl &inP,
+                                         const Manifold::Impl &inQ,
+                                         double expandP) {
+  ZoneScoped;
+  Vec<int> s11(p1q1.size());
+  Vec<vec4> xyzz11(p1q1.size());
+
+  for_each_n(autoPolicy(p1q1.size(), 1e4), countAt(0_uz), p1q1.size(),
+             Kernel11({xyzz11, s11, inP.vertPos_, inQ.vertPos_, inP.halfedge_,
+                       inQ.halfedge_, expandP, inP.vertNormal_, p1q1}));
+
+  p1q1.KeepFinite(xyzz11, s11);
+
+  return std::make_tuple(s11, xyzz11);
+};
+
+struct Kernel02 {
+  VecView<int> s;
+  VecView<double> z;
+  VecView<const vec3> vertPosP;
+  VecView<const Halfedge> halfedgeQ;
+  VecView<const vec3> vertPosQ;
+  const double expandP;
+  VecView<const vec3> vertNormalP;
+  const SparseIndices &p0q2;
+  const bool forward;
+
+  void operator()(const size_t idx) {
+    const int p0 = p0q2.Get(idx, !forward);
+    const int q2 = p0q2.Get(idx, forward);
+    int &s02 = s[idx];
+    double &z02 = z[idx];
+
+    // For yzzLR[k], k==0 is the left and k==1 is the right.
+    int k = 0;
+    vec3 yzzRL[2];
+    // Either the left or right must shadow, but not both. This ensures the
+    // intersection is between the left and right.
+    bool shadows = false;
+    int closestVert = -1;
+    double minMetric = std::numeric_limits<double>::infinity();
+    s02 = 0;
+
+    const vec3 posP = vertPosP[p0];
+    for (const int i : {0, 1, 2}) {
+      const int q1 = 3 * q2 + i;
+      const Halfedge edge = halfedgeQ[q1];
+      const int q1F = edge.IsForward() ? q1 : edge.pairedHalfedge;
+
+      if (!forward) {
+        const int qVert = halfedgeQ[q1F].startVert;
+        const vec3 diff = posP - vertPosQ[qVert];
+        const double metric = la::dot(diff, diff);
+        if (metric < minMetric) {
+          minMetric = metric;
+          closestVert = qVert;
+        }
+      }
+
+      const auto syz01 = Shadow01(p0, q1F, vertPosP, vertPosQ, halfedgeQ,
+                                  expandP, vertNormalP, !forward);
+      const int s01 = syz01.first;
+      const vec2 yz01 = syz01.second;
+      // If the value is NaN, then these do not overlap.
+      if (std::isfinite(yz01[0])) {
+        s02 += s01 * (forward == edge.IsForward() ? -1 : 1);
+        if (k < 2 && (k == 0 || (s01 != 0) != shadows)) {
+          shadows = s01 != 0;
+          yzzRL[k++] = vec3(yz01[0], yz01[1], yz01[1]);
+        }
+      }
+    }
+
+    if (s02 == 0) {  // No intersection
+      z02 = NAN;
+    } else {
+      DEBUG_ASSERT(k == 2, logicErr, "Boolean manifold error: s02");
+      vec3 vertPos = vertPosP[p0];
+      z02 = Interpolate(yzzRL[0], yzzRL[1], vertPos.y)[1];
+      if (forward) {
+        if (!Shadows(vertPos.z, z02, expandP * vertNormalP[p0].z)) s02 = 0;
+      } else {
+        // DEBUG_ASSERT(closestVert != -1, topologyErr, "No closest vert");
+        if (!Shadows(z02, vertPos.z, expandP * vertNormalP[closestVert].z))
+          s02 = 0;
+      }
+    }
+  }
+};
+
+std::tuple<Vec<int>, Vec<double>> Shadow02(const Manifold::Impl &inP,
+                                           const Manifold::Impl &inQ,
+                                           SparseIndices &p0q2, bool forward,
+                                           double expandP) {
+  ZoneScoped;
+  Vec<int> s02(p0q2.size());
+  Vec<double> z02(p0q2.size());
+
+  auto vertNormalP = forward ? inP.vertNormal_ : inQ.vertNormal_;
+  for_each_n(autoPolicy(p0q2.size(), 1e4), countAt(0_uz), p0q2.size(),
+             Kernel02({s02, z02, inP.vertPos_, inQ.halfedge_, inQ.vertPos_,
+                       expandP, vertNormalP, p0q2, forward}));
+
+  p0q2.KeepFinite(z02, s02);
+
+  return std::make_tuple(s02, z02);
+};
+
+struct Kernel12 {
+  VecView<int> x;
+  VecView<vec3> v;
+  VecView<const int64_t> p0q2;
+  VecView<const int> s02;
+  VecView<const double> z02;
+  VecView<const int64_t> p1q1;
+  VecView<const int> s11;
+  VecView<const vec4> xyzz11;
+  VecView<const Halfedge> halfedgesP;
+  VecView<const Halfedge> halfedgesQ;
+  VecView<const vec3> vertPosP;
+  const bool forward;
+  const SparseIndices &p1q2;
+
+  void operator()(const size_t idx) {
+    int p1 = p1q2.Get(idx, !forward);
+    int q2 = p1q2.Get(idx, forward);
+    int &x12 = x[idx];
+    vec3 &v12 = v[idx];
+
+    // For xzyLR-[k], k==0 is the left and k==1 is the right.
+    int k = 0;
+    vec3 xzyLR0[2];
+    vec3 xzyLR1[2];
+    // Either the left or right must shadow, but not both. This ensures the
+    // intersection is between the left and right.
+    bool shadows = false;
+    x12 = 0;
+
+    const Halfedge edge = halfedgesP[p1];
+
+    for (int vert : {edge.startVert, edge.endVert}) {
+      const int64_t key = forward ? SparseIndices::EncodePQ(vert, q2)
+                                  : SparseIndices::EncodePQ(q2, vert);
+      const size_t idx = monobound_quaternary_search(p0q2, key);
+      if (idx != std::numeric_limits<size_t>::max()) {
+        const int s = s02[idx];
+        x12 += s * ((vert == edge.startVert) == forward ? 1 : -1);
+        if (k < 2 && (k == 0 || (s != 0) != shadows)) {
+          shadows = s != 0;
+          xzyLR0[k] = vertPosP[vert];
+          std::swap(xzyLR0[k].y, xzyLR0[k].z);
+          xzyLR1[k] = xzyLR0[k];
+          xzyLR1[k][1] = z02[idx];
+          k++;
+        }
+      }
+    }
+
+    for (const int i : {0, 1, 2}) {
+      const int q1 = 3 * q2 + i;
+      const Halfedge edge = halfedgesQ[q1];
+      const int q1F = edge.IsForward() ? q1 : edge.pairedHalfedge;
+      const int64_t key = forward ? SparseIndices::EncodePQ(p1, q1F)
+                                  : SparseIndices::EncodePQ(q1F, p1);
+      const size_t idx = monobound_quaternary_search(p1q1, key);
+      if (idx !=
+          std::numeric_limits<size_t>::max()) {  // s is implicitly zero for
+                                                 // anything not found
+        const int s = s11[idx];
+        x12 -= s * (edge.IsForward() ? 1 : -1);
+        if (k < 2 && (k == 0 || (s != 0) != shadows)) {
+          shadows = s != 0;
+          const vec4 xyzz = xyzz11[idx];
+          xzyLR0[k][0] = xyzz.x;
+          xzyLR0[k][1] = xyzz.z;
+          xzyLR0[k][2] = xyzz.y;
+          xzyLR1[k] = xzyLR0[k];
+          xzyLR1[k][1] = xyzz.w;
+          if (!forward) std::swap(xzyLR0[k][1], xzyLR1[k][1]);
+          k++;
+        }
+      }
+    }
+
+    if (x12 == 0) {  // No intersection
+      v12 = vec3(NAN);
+    } else {
+      DEBUG_ASSERT(k == 2, logicErr, "Boolean manifold error: v12");
+      const vec4 xzyy = Intersect(xzyLR0[0], xzyLR0[1], xzyLR1[0], xzyLR1[1]);
+      v12.x = xzyy[0];
+      v12.y = xzyy[2];
+      v12.z = xzyy[1];
+    }
+  }
+};
+
+std::tuple<Vec<int>, Vec<vec3>> Intersect12(
+    const Manifold::Impl &inP, const Manifold::Impl &inQ, const Vec<int> &s02,
+    const SparseIndices &p0q2, const Vec<int> &s11, const SparseIndices &p1q1,
+    const Vec<double> &z02, const Vec<vec4> &xyzz11, SparseIndices &p1q2,
+    bool forward) {
+  ZoneScoped;
+  Vec<int> x12(p1q2.size());
+  Vec<vec3> v12(p1q2.size());
+
+  for_each_n(
+      autoPolicy(p1q2.size(), 1e4), countAt(0_uz), p1q2.size(),
+      Kernel12({x12, v12, p0q2.AsVec64(), s02, z02, p1q1.AsVec64(), s11, xyzz11,
+                inP.halfedge_, inQ.halfedge_, inP.vertPos_, forward, p1q2}));
+
+  p1q2.KeepFinite(v12, x12);
+
+  return std::make_tuple(x12, v12);
+};
+
+Vec<int> Winding03(const Manifold::Impl &inP, Vec<int> &vertices, Vec<int> &s02,
+                   bool reverse) {
+  ZoneScoped;
+  // verts that are not shadowed (not in p0q2) have winding number zero.
+  Vec<int> w03(inP.NumVert(), 0);
+  if (vertices.size() <= 1e5) {
+    for_each_n(ExecutionPolicy::Seq, countAt(0), s02.size(),
+               [&w03, &vertices, &s02, reverse](const int i) {
+                 w03[vertices[i]] += s02[i] * (reverse ? -1 : 1);
+               });
+  } else {
+    for_each_n(ExecutionPolicy::Par, countAt(0), s02.size(),
+               [&w03, &vertices, &s02, reverse](const int i) {
+                 AtomicAdd(w03[vertices[i]], s02[i] * (reverse ? -1 : 1));
+               });
+  }
+  return w03;
+};
+}  // namespace
+
+namespace manifold {
+Boolean3::Boolean3(const Manifold::Impl &inP, const Manifold::Impl &inQ,
+                   OpType op)
+    : inP_(inP), inQ_(inQ), expandP_(op == OpType::Add ? 1.0 : -1.0) {
+  // Symbolic perturbation:
+  // Union -> expand inP
+  // Difference, Intersection -> contract inP
+
+#ifdef MANIFOLD_DEBUG
+  Timer broad;
+  broad.Start();
+#endif
+
+  if (inP.IsEmpty() || inQ.IsEmpty() || !inP.bBox_.DoesOverlap(inQ.bBox_)) {
+    PRINT("No overlap, early out");
+    w03_.resize(inP.NumVert(), 0);
+    w30_.resize(inQ.NumVert(), 0);
+    return;
+  }
+
+  // Level 3
+  // Find edge-triangle overlaps (broad phase)
+  p1q2_ = inQ_.EdgeCollisions(inP_);
+  p2q1_ = inP_.EdgeCollisions(inQ_, true);  // inverted
+
+  p1q2_.Sort();
+  PRINT("p1q2 size = " << p1q2_.size());
+
+  p2q1_.Sort();
+  PRINT("p2q1 size = " << p2q1_.size());
+
+  // Level 2
+  // Find vertices that overlap faces in XY-projection
+  SparseIndices p0q2 = inQ.VertexCollisionsZ(inP.vertPos_);
+  p0q2.Sort();
+  PRINT("p0q2 size = " << p0q2.size());
+
+  SparseIndices p2q0 = inP.VertexCollisionsZ(inQ.vertPos_, true);  // inverted
+  p2q0.Sort();
+  PRINT("p2q0 size = " << p2q0.size());
+
+  // Find involved edge pairs from Level 3
+  SparseIndices p1q1 = Filter11(inP_, inQ_, p1q2_, p2q1_);
+  PRINT("p1q1 size = " << p1q1.size());
+
+#ifdef MANIFOLD_DEBUG
+  broad.Stop();
+  Timer intersections;
+  intersections.Start();
+#endif
+
+  // Level 2
+  // Build up XY-projection intersection of two edges, including the z-value for
+  // each edge, keeping only those whose intersection exists.
+  Vec<int> s11;
+  Vec<vec4> xyzz11;
+  std::tie(s11, xyzz11) = Shadow11(p1q1, inP, inQ, expandP_);
+  PRINT("s11 size = " << s11.size());
+
+  // Build up Z-projection of vertices onto triangles, keeping only those that
+  // fall inside the triangle.
+  Vec<int> s02;
+  Vec<double> z02;
+  std::tie(s02, z02) = Shadow02(inP, inQ, p0q2, true, expandP_);
+  PRINT("s02 size = " << s02.size());
+
+  Vec<int> s20;
+  Vec<double> z20;
+  std::tie(s20, z20) = Shadow02(inQ, inP, p2q0, false, expandP_);
+  PRINT("s20 size = " << s20.size());
+
+  // Level 3
+  // Build up the intersection of the edges and triangles, keeping only those
+  // that intersect, and record the direction the edge is passing through the
+  // triangle.
+  std::tie(x12_, v12_) =
+      Intersect12(inP, inQ, s02, p0q2, s11, p1q1, z02, xyzz11, p1q2_, true);
+  PRINT("x12 size = " << x12_.size());
+
+  std::tie(x21_, v21_) =
+      Intersect12(inQ, inP, s20, p2q0, s11, p1q1, z20, xyzz11, p2q1_, false);
+  PRINT("x21 size = " << x21_.size());
+
+  s11.clear();
+  xyzz11.clear();
+  z02.clear();
+  z20.clear();
+
+  Vec<int> p0 = p0q2.Copy(false);
+  p0q2.Resize(0);
+  Vec<int> q0 = p2q0.Copy(true);
+  p2q0.Resize(0);
+  // Sum up the winding numbers of all vertices.
+  w03_ = Winding03(inP, p0, s02, false);
+
+  w30_ = Winding03(inQ, q0, s20, true);
+
+#ifdef MANIFOLD_DEBUG
+  intersections.Stop();
+
+  if (ManifoldParams().verbose) {
+    broad.Print("Broad phase");
+    intersections.Print("Intersections");
+  }
+#endif
+}
+}  // namespace manifold

+ 60 - 0
thirdparty/manifold/src/boolean3.h

@@ -0,0 +1,60 @@
+// Copyright 2020 The Manifold Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "./impl.h"
+
+#ifdef MANIFOLD_DEBUG
+#define PRINT(msg) \
+  if (ManifoldParams().verbose) std::cout << msg << std::endl;
+#else
+#define PRINT(msg)
+#endif
+
+/**
+ * The notation in these files is abbreviated due to the complexity of the
+ * functions involved. The key is that the input manifolds are P and Q, while
+ * the output is R, and these letters in both upper and lower case refer to
+ * these objects. Operations are based on dimensionality: vert: 0, edge: 1,
+ * face: 2, solid: 3. X denotes a winding-number type quantity from the source
+ * paper of this algorithm, while S is closely related but includes only the
+ * subset of X values which "shadow" (are on the correct side of).
+ *
+ * Nearly everything here are sparse arrays, where for instance each pair in
+ * p2q1 refers to a face index of P interacting with a halfedge index of Q.
+ * Adjacent arrays like x21 refer to the values of X corresponding to each
+ * sparse index pair.
+ *
+ * Note many functions are designed to work symmetrically, for instance for both
+ * p2q1 and p1q2. Inside of these functions P and Q are marked as though the
+ * function is forwards, but it may include a Boolean "reverse" that indicates P
+ * and Q have been swapped.
+ */
+
+namespace manifold {
+
+/** @ingroup Private */
+class Boolean3 {
+ public:
+  Boolean3(const Manifold::Impl& inP, const Manifold::Impl& inQ, OpType op);
+  Manifold::Impl Result(OpType op) const;
+
+ private:
+  const Manifold::Impl &inP_, &inQ_;
+  const double expandP_;
+  SparseIndices p1q2_, p2q1_;
+  Vec<int> x12_, x21_, w03_, w30_;
+  Vec<vec3> v12_, v21_;
+};
+}  // namespace manifold

+ 889 - 0
thirdparty/manifold/src/boolean_result.cpp

@@ -0,0 +1,889 @@
+// Copyright 2021 The Manifold Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <algorithm>
+#include <array>
+#include <map>
+
+#include "./boolean3.h"
+#include "./parallel.h"
+#include "./utils.h"
+
+#if (MANIFOLD_PAR == 1) && __has_include(<tbb/concurrent_map.h>)
+#define TBB_PREVIEW_CONCURRENT_ORDERED_CONTAINERS 1
+#include <tbb/concurrent_map.h>
+#include <tbb/parallel_for.h>
+
+template <typename K, typename V>
+using concurrent_map = tbb::concurrent_map<K, V>;
+#else
+template <typename K, typename V>
+// not really concurrent when tbb is disabled
+using concurrent_map = std::map<K, V>;
+#endif
+
+using namespace manifold;
+
+template <>
+struct std::hash<std::pair<int, int>> {
+  size_t operator()(const std::pair<int, int> &p) const {
+    return std::hash<int>()(p.first) ^ std::hash<int>()(p.second);
+  }
+};
+
+namespace {
+
+constexpr int kParallelThreshold = 128;
+
+struct AbsSum {
+  int operator()(int a, int b) const { return abs(a) + abs(b); }
+};
+
+struct DuplicateVerts {
+  VecView<vec3> vertPosR;
+  VecView<const int> inclusion;
+  VecView<const int> vertR;
+  VecView<const vec3> vertPosP;
+
+  void operator()(const int vert) {
+    const int n = std::abs(inclusion[vert]);
+    for (int i = 0; i < n; ++i) {
+      vertPosR[vertR[vert] + i] = vertPosP[vert];
+    }
+  }
+};
+
+template <bool atomic>
+struct CountVerts {
+  VecView<Halfedge> halfedges;
+  VecView<int> count;
+  VecView<const int> inclusion;
+
+  void operator()(size_t i) {
+    if (atomic)
+      AtomicAdd(count[i / 3], std::abs(inclusion[halfedges[i].startVert]));
+    else
+      count[i / 3] += std::abs(inclusion[halfedges[i].startVert]);
+  }
+};
+
+template <const bool inverted, const bool atomic>
+struct CountNewVerts {
+  VecView<int> countP;
+  VecView<int> countQ;
+  VecView<const int> i12;
+  const SparseIndices &pq;
+  VecView<const Halfedge> halfedges;
+
+  void operator()(const int idx) {
+    int edgeP = pq.Get(idx, inverted);
+    int faceQ = pq.Get(idx, !inverted);
+    int inclusion = std::abs(i12[idx]);
+
+    if (atomic) {
+      AtomicAdd(countQ[faceQ], inclusion);
+      const Halfedge half = halfedges[edgeP];
+      AtomicAdd(countP[edgeP / 3], inclusion);
+      AtomicAdd(countP[half.pairedHalfedge / 3], inclusion);
+    } else {
+      countQ[faceQ] += inclusion;
+      const Halfedge half = halfedges[edgeP];
+      countP[edgeP / 3] += inclusion;
+      countP[half.pairedHalfedge / 3] += inclusion;
+    }
+  }
+};
+
+std::tuple<Vec<int>, Vec<int>> SizeOutput(
+    Manifold::Impl &outR, const Manifold::Impl &inP, const Manifold::Impl &inQ,
+    const Vec<int> &i03, const Vec<int> &i30, const Vec<int> &i12,
+    const Vec<int> &i21, const SparseIndices &p1q2, const SparseIndices &p2q1,
+    bool invertQ) {
+  ZoneScoped;
+  Vec<int> sidesPerFacePQ(inP.NumTri() + inQ.NumTri(), 0);
+  // note: numFaceR <= facePQ2R.size() = sidesPerFacePQ.size() + 1
+
+  auto sidesPerFaceP = sidesPerFacePQ.view(0, inP.NumTri());
+  auto sidesPerFaceQ = sidesPerFacePQ.view(inP.NumTri(), inQ.NumTri());
+
+  if (inP.halfedge_.size() >= 1e5) {
+    for_each(ExecutionPolicy::Par, countAt(0_uz), countAt(inP.halfedge_.size()),
+             CountVerts<true>({inP.halfedge_, sidesPerFaceP, i03}));
+    for_each(ExecutionPolicy::Par, countAt(0_uz), countAt(inQ.halfedge_.size()),
+             CountVerts<true>({inQ.halfedge_, sidesPerFaceQ, i30}));
+  } else {
+    for_each(ExecutionPolicy::Seq, countAt(0_uz), countAt(inP.halfedge_.size()),
+             CountVerts<false>({inP.halfedge_, sidesPerFaceP, i03}));
+    for_each(ExecutionPolicy::Seq, countAt(0_uz), countAt(inQ.halfedge_.size()),
+             CountVerts<false>({inQ.halfedge_, sidesPerFaceQ, i30}));
+  }
+
+  if (i12.size() >= 1e5) {
+    for_each_n(ExecutionPolicy::Par, countAt(0), i12.size(),
+               CountNewVerts<false, true>(
+                   {sidesPerFaceP, sidesPerFaceQ, i12, p1q2, inP.halfedge_}));
+    for_each_n(ExecutionPolicy::Par, countAt(0), i21.size(),
+               CountNewVerts<true, true>(
+                   {sidesPerFaceQ, sidesPerFaceP, i21, p2q1, inQ.halfedge_}));
+  } else {
+    for_each_n(ExecutionPolicy::Seq, countAt(0), i12.size(),
+               CountNewVerts<false, false>(
+                   {sidesPerFaceP, sidesPerFaceQ, i12, p1q2, inP.halfedge_}));
+    for_each_n(ExecutionPolicy::Seq, countAt(0), i21.size(),
+               CountNewVerts<true, false>(
+                   {sidesPerFaceQ, sidesPerFaceP, i21, p2q1, inQ.halfedge_}));
+  }
+
+  Vec<int> facePQ2R(inP.NumTri() + inQ.NumTri() + 1, 0);
+  auto keepFace = TransformIterator(sidesPerFacePQ.begin(),
+                                    [](int x) { return x > 0 ? 1 : 0; });
+
+  inclusive_scan(keepFace, keepFace + sidesPerFacePQ.size(),
+                 facePQ2R.begin() + 1);
+  int numFaceR = facePQ2R.back();
+  facePQ2R.resize(inP.NumTri() + inQ.NumTri());
+
+  outR.faceNormal_.resize(numFaceR);
+
+  Vec<size_t> tmpBuffer(outR.faceNormal_.size());
+  auto faceIds = TransformIterator(countAt(0_uz), [&sidesPerFacePQ](size_t i) {
+    if (sidesPerFacePQ[i] > 0) return i;
+    return std::numeric_limits<size_t>::max();
+  });
+
+  auto next =
+      copy_if(faceIds, faceIds + inP.faceNormal_.size(), tmpBuffer.begin(),
+              [](size_t v) { return v != std::numeric_limits<size_t>::max(); });
+
+  gather(tmpBuffer.begin(), next, inP.faceNormal_.begin(),
+         outR.faceNormal_.begin());
+
+  auto faceIdsQ =
+      TransformIterator(countAt(0_uz), [&sidesPerFacePQ, &inP](size_t i) {
+        if (sidesPerFacePQ[i + inP.faceNormal_.size()] > 0) return i;
+        return std::numeric_limits<size_t>::max();
+      });
+  auto end =
+      copy_if(faceIdsQ, faceIdsQ + inQ.faceNormal_.size(), next,
+              [](size_t v) { return v != std::numeric_limits<size_t>::max(); });
+
+  if (invertQ) {
+    gather(next, end,
+           TransformIterator(inQ.faceNormal_.begin(), Negate<vec3>()),
+           outR.faceNormal_.begin() + std::distance(tmpBuffer.begin(), next));
+  } else {
+    gather(next, end, inQ.faceNormal_.begin(),
+           outR.faceNormal_.begin() + std::distance(tmpBuffer.begin(), next));
+  }
+
+  auto newEnd = remove(sidesPerFacePQ.begin(), sidesPerFacePQ.end(), 0);
+  Vec<int> faceEdge(newEnd - sidesPerFacePQ.begin() + 1, 0);
+  inclusive_scan(sidesPerFacePQ.begin(), newEnd, faceEdge.begin() + 1);
+  outR.halfedge_.resize(faceEdge.back());
+
+  return std::make_tuple(faceEdge, facePQ2R);
+}
+
+struct EdgePos {
+  int vert;
+  double edgePos;
+  bool isStart;
+};
+
+// thread sanitizer doesn't really know how to check when there are too many
+// mutex
+#if defined(__has_feature)
+#if __has_feature(thread_sanitizer)
+__attribute__((no_sanitize("thread")))
+#endif
+#endif
+void AddNewEdgeVerts(
+    // we need concurrent_map because we will be adding things concurrently
+    concurrent_map<int, std::vector<EdgePos>> &edgesP,
+    concurrent_map<std::pair<int, int>, std::vector<EdgePos>> &edgesNew,
+    const SparseIndices &p1q2, const Vec<int> &i12, const Vec<int> &v12R,
+    const Vec<Halfedge> &halfedgeP, bool forward) {
+  ZoneScoped;
+  // For each edge of P that intersects a face of Q (p1q2), add this vertex to
+  // P's corresponding edge vector and to the two new edges, which are
+  // intersections between the face of Q and the two faces of P attached to the
+  // edge. The direction and duplicity are given by i12, while v12R remaps to
+  // the output vert index. When forward is false, all is reversed.
+  auto process = [&](std::function<void(size_t)> lock,
+                     std::function<void(size_t)> unlock, size_t i) {
+    const int edgeP = p1q2.Get(i, !forward);
+    const int faceQ = p1q2.Get(i, forward);
+    const int vert = v12R[i];
+    const int inclusion = i12[i];
+
+    Halfedge halfedge = halfedgeP[edgeP];
+    std::pair<int, int> keyRight = {halfedge.pairedHalfedge / 3, faceQ};
+    if (!forward) std::swap(keyRight.first, keyRight.second);
+
+    std::pair<int, int> keyLeft = {edgeP / 3, faceQ};
+    if (!forward) std::swap(keyLeft.first, keyLeft.second);
+
+    bool direction = inclusion < 0;
+    std::hash<std::pair<int, int>> pairHasher;
+    std::array<std::tuple<bool, size_t, std::vector<EdgePos> *>, 3> edges = {
+        std::make_tuple(direction, std::hash<int>{}(edgeP), &edgesP[edgeP]),
+        std::make_tuple(direction ^ !forward,  // revert if not forward
+                        pairHasher(keyRight), &edgesNew[keyRight]),
+        std::make_tuple(direction ^ forward,  // revert if forward
+                        pairHasher(keyLeft), &edgesNew[keyLeft])};
+    for (const auto &tuple : edges) {
+      lock(std::get<1>(tuple));
+      for (int j = 0; j < std::abs(inclusion); ++j)
+        std::get<2>(tuple)->push_back({vert + j, 0.0, std::get<0>(tuple)});
+      unlock(std::get<1>(tuple));
+      direction = !direction;
+    }
+  };
+#if (MANIFOLD_PAR == 1) && __has_include(<tbb/tbb.h>)
+  // parallelize operations, requires concurrent_map so we can only enable this
+  // with tbb
+  if (p1q2.size() > kParallelThreshold) {
+    // ideally we should have 1 mutex per key, but kParallelThreshold is enough
+    // to avoid contention for most of the cases
+    std::array<std::mutex, kParallelThreshold> mutexes;
+    static tbb::affinity_partitioner ap;
+    auto processFun = std::bind(
+        process, [&](size_t hash) { mutexes[hash % mutexes.size()].lock(); },
+        [&](size_t hash) { mutexes[hash % mutexes.size()].unlock(); },
+        std::placeholders::_1);
+    tbb::parallel_for(
+        tbb::blocked_range<size_t>(0_uz, p1q2.size(), 32),
+        [&](const tbb::blocked_range<size_t> &range) {
+          for (size_t i = range.begin(); i != range.end(); i++) processFun(i);
+        },
+        ap);
+    return;
+  }
+#endif
+  auto processFun = std::bind(
+      process, [](size_t _) {}, [](size_t _) {}, std::placeholders::_1);
+  for (size_t i = 0; i < p1q2.size(); ++i) processFun(i);
+}
+
+std::vector<Halfedge> PairUp(std::vector<EdgePos> &edgePos) {
+  // Pair start vertices with end vertices to form edges. The choice of pairing
+  // is arbitrary for the manifoldness guarantee, but must be ordered to be
+  // geometrically valid. If the order does not go start-end-start-end... then
+  // the input and output are not geometrically valid and this algorithm becomes
+  // a heuristic.
+  DEBUG_ASSERT(edgePos.size() % 2 == 0, topologyErr,
+               "Non-manifold edge! Not an even number of points.");
+  size_t nEdges = edgePos.size() / 2;
+  auto middle = std::partition(edgePos.begin(), edgePos.end(),
+                               [](EdgePos x) { return x.isStart; });
+  DEBUG_ASSERT(static_cast<size_t>(middle - edgePos.begin()) == nEdges,
+               topologyErr, "Non-manifold edge!");
+  auto cmp = [](EdgePos a, EdgePos b) { return a.edgePos < b.edgePos; };
+  std::stable_sort(edgePos.begin(), middle, cmp);
+  std::stable_sort(middle, edgePos.end(), cmp);
+  std::vector<Halfedge> edges;
+  for (size_t i = 0; i < nEdges; ++i)
+    edges.push_back({edgePos[i].vert, edgePos[i + nEdges].vert, -1});
+  return edges;
+}
+
+void AppendPartialEdges(Manifold::Impl &outR, Vec<char> &wholeHalfedgeP,
+                        Vec<int> &facePtrR,
+                        concurrent_map<int, std::vector<EdgePos>> &edgesP,
+                        Vec<TriRef> &halfedgeRef, const Manifold::Impl &inP,
+                        const Vec<int> &i03, const Vec<int> &vP2R,
+                        const Vec<int>::IterC faceP2R, bool forward) {
+  ZoneScoped;
+  // Each edge in the map is partially retained; for each of these, look up
+  // their original verts and include them based on their winding number (i03),
+  // while remapping them to the output using vP2R. Use the verts position
+  // projected along the edge vector to pair them up, then distribute these
+  // edges to their faces.
+  Vec<Halfedge> &halfedgeR = outR.halfedge_;
+  const Vec<vec3> &vertPosP = inP.vertPos_;
+  const Vec<Halfedge> &halfedgeP = inP.halfedge_;
+
+  for (auto &value : edgesP) {
+    const int edgeP = value.first;
+    std::vector<EdgePos> &edgePosP = value.second;
+
+    const Halfedge &halfedge = halfedgeP[edgeP];
+    wholeHalfedgeP[edgeP] = false;
+    wholeHalfedgeP[halfedge.pairedHalfedge] = false;
+
+    const int vStart = halfedge.startVert;
+    const int vEnd = halfedge.endVert;
+    const vec3 edgeVec = vertPosP[vEnd] - vertPosP[vStart];
+    // Fill in the edge positions of the old points.
+    for (EdgePos &edge : edgePosP) {
+      edge.edgePos = la::dot(outR.vertPos_[edge.vert], edgeVec);
+    }
+
+    int inclusion = i03[vStart];
+    EdgePos edgePos = {vP2R[vStart],
+                       la::dot(outR.vertPos_[vP2R[vStart]], edgeVec),
+                       inclusion > 0};
+    for (int j = 0; j < std::abs(inclusion); ++j) {
+      edgePosP.push_back(edgePos);
+      ++edgePos.vert;
+    }
+
+    inclusion = i03[vEnd];
+    edgePos = {vP2R[vEnd], la::dot(outR.vertPos_[vP2R[vEnd]], edgeVec),
+               inclusion < 0};
+    for (int j = 0; j < std::abs(inclusion); ++j) {
+      edgePosP.push_back(edgePos);
+      ++edgePos.vert;
+    }
+
+    // sort edges into start/end pairs along length
+    std::vector<Halfedge> edges = PairUp(edgePosP);
+
+    // add halfedges to result
+    const int faceLeftP = edgeP / 3;
+    const int faceLeft = faceP2R[faceLeftP];
+    const int faceRightP = halfedge.pairedHalfedge / 3;
+    const int faceRight = faceP2R[faceRightP];
+    // Negative inclusion means the halfedges are reversed, which means our
+    // reference is now to the endVert instead of the startVert, which is one
+    // position advanced CCW. This is only valid if this is a retained vert; it
+    // will be ignored later if the vert is new.
+    const TriRef forwardRef = {forward ? 0 : 1, -1, faceLeftP};
+    const TriRef backwardRef = {forward ? 0 : 1, -1, faceRightP};
+
+    for (Halfedge e : edges) {
+      const int forwardEdge = facePtrR[faceLeft]++;
+      const int backwardEdge = facePtrR[faceRight]++;
+
+      e.pairedHalfedge = backwardEdge;
+      halfedgeR[forwardEdge] = e;
+      halfedgeRef[forwardEdge] = forwardRef;
+
+      std::swap(e.startVert, e.endVert);
+      e.pairedHalfedge = forwardEdge;
+      halfedgeR[backwardEdge] = e;
+      halfedgeRef[backwardEdge] = backwardRef;
+    }
+  }
+}
+
+void AppendNewEdges(
+    Manifold::Impl &outR, Vec<int> &facePtrR,
+    concurrent_map<std::pair<int, int>, std::vector<EdgePos>> &edgesNew,
+    Vec<TriRef> &halfedgeRef, const Vec<int> &facePQ2R, const int numFaceP) {
+  ZoneScoped;
+  // Pair up each edge's verts and distribute to faces based on indices in key.
+  Vec<Halfedge> &halfedgeR = outR.halfedge_;
+  Vec<vec3> &vertPosR = outR.vertPos_;
+
+  for (auto &value : edgesNew) {
+    const int faceP = value.first.first;
+    const int faceQ = value.first.second;
+    std::vector<EdgePos> &edgePos = value.second;
+
+    Box bbox;
+    for (auto edge : edgePos) {
+      bbox.Union(vertPosR[edge.vert]);
+    }
+    const vec3 size = bbox.Size();
+    // Order the points along their longest dimension.
+    const int i = (size.x > size.y && size.x > size.z) ? 0
+                  : size.y > size.z                    ? 1
+                                                       : 2;
+    for (auto &edge : edgePos) {
+      edge.edgePos = vertPosR[edge.vert][i];
+    }
+
+    // sort edges into start/end pairs along length.
+    std::vector<Halfedge> edges = PairUp(edgePos);
+
+    // add halfedges to result
+    const int faceLeft = facePQ2R[faceP];
+    const int faceRight = facePQ2R[numFaceP + faceQ];
+    const TriRef forwardRef = {0, -1, faceP};
+    const TriRef backwardRef = {1, -1, faceQ};
+    for (Halfedge e : edges) {
+      const int forwardEdge = facePtrR[faceLeft]++;
+      const int backwardEdge = facePtrR[faceRight]++;
+
+      e.pairedHalfedge = backwardEdge;
+      halfedgeR[forwardEdge] = e;
+      halfedgeRef[forwardEdge] = forwardRef;
+
+      std::swap(e.startVert, e.endVert);
+      e.pairedHalfedge = forwardEdge;
+      halfedgeR[backwardEdge] = e;
+      halfedgeRef[backwardEdge] = backwardRef;
+    }
+  }
+}
+
+struct DuplicateHalfedges {
+  VecView<Halfedge> halfedgesR;
+  VecView<TriRef> halfedgeRef;
+  VecView<int> facePtr;
+  VecView<const char> wholeHalfedgeP;
+  VecView<const Halfedge> halfedgesP;
+  VecView<const int> i03;
+  VecView<const int> vP2R;
+  VecView<const int> faceP2R;
+  const bool forward;
+
+  void operator()(const int idx) {
+    if (!wholeHalfedgeP[idx]) return;
+    Halfedge halfedge = halfedgesP[idx];
+    if (!halfedge.IsForward()) return;
+
+    const int inclusion = i03[halfedge.startVert];
+    if (inclusion == 0) return;
+    if (inclusion < 0) {  // reverse
+      std::swap(halfedge.startVert, halfedge.endVert);
+    }
+    halfedge.startVert = vP2R[halfedge.startVert];
+    halfedge.endVert = vP2R[halfedge.endVert];
+    const int faceLeftP = idx / 3;
+    const int newFace = faceP2R[faceLeftP];
+    const int faceRightP = halfedge.pairedHalfedge / 3;
+    const int faceRight = faceP2R[faceRightP];
+    // Negative inclusion means the halfedges are reversed, which means our
+    // reference is now to the endVert instead of the startVert, which is one
+    // position advanced CCW.
+    const TriRef forwardRef = {forward ? 0 : 1, -1, faceLeftP};
+    const TriRef backwardRef = {forward ? 0 : 1, -1, faceRightP};
+
+    for (int i = 0; i < std::abs(inclusion); ++i) {
+      int forwardEdge = AtomicAdd(facePtr[newFace], 1);
+      int backwardEdge = AtomicAdd(facePtr[faceRight], 1);
+      halfedge.pairedHalfedge = backwardEdge;
+
+      halfedgesR[forwardEdge] = halfedge;
+      halfedgesR[backwardEdge] = {halfedge.endVert, halfedge.startVert,
+                                  forwardEdge};
+      halfedgeRef[forwardEdge] = forwardRef;
+      halfedgeRef[backwardEdge] = backwardRef;
+
+      ++halfedge.startVert;
+      ++halfedge.endVert;
+    }
+  }
+};
+
+void AppendWholeEdges(Manifold::Impl &outR, Vec<int> &facePtrR,
+                      Vec<TriRef> &halfedgeRef, const Manifold::Impl &inP,
+                      const Vec<char> wholeHalfedgeP, const Vec<int> &i03,
+                      const Vec<int> &vP2R, VecView<const int> faceP2R,
+                      bool forward) {
+  ZoneScoped;
+  for_each_n(
+      autoPolicy(inP.halfedge_.size()), countAt(0), inP.halfedge_.size(),
+      DuplicateHalfedges({outR.halfedge_, halfedgeRef, facePtrR, wholeHalfedgeP,
+                          inP.halfedge_, i03, vP2R, faceP2R, forward}));
+}
+
+struct MapTriRef {
+  VecView<const TriRef> triRefP;
+  VecView<const TriRef> triRefQ;
+  const int offsetQ;
+
+  void operator()(TriRef &triRef) {
+    const int tri = triRef.tri;
+    const bool PQ = triRef.meshID == 0;
+    triRef = PQ ? triRefP[tri] : triRefQ[tri];
+    if (!PQ) triRef.meshID += offsetQ;
+  }
+};
+
+void UpdateReference(Manifold::Impl &outR, const Manifold::Impl &inP,
+                     const Manifold::Impl &inQ, bool invertQ) {
+  const int offsetQ = Manifold::Impl::meshIDCounter_;
+  for_each_n(
+      autoPolicy(outR.NumTri(), 1e5), outR.meshRelation_.triRef.begin(),
+      outR.NumTri(),
+      MapTriRef({inP.meshRelation_.triRef, inQ.meshRelation_.triRef, offsetQ}));
+
+  for (const auto &pair : inP.meshRelation_.meshIDtransform) {
+    outR.meshRelation_.meshIDtransform[pair.first] = pair.second;
+  }
+  for (const auto &pair : inQ.meshRelation_.meshIDtransform) {
+    outR.meshRelation_.meshIDtransform[pair.first + offsetQ] = pair.second;
+    outR.meshRelation_.meshIDtransform[pair.first + offsetQ].backSide ^=
+        invertQ;
+  }
+}
+
+struct Barycentric {
+  VecView<vec3> uvw;
+  VecView<const TriRef> ref;
+  VecView<const vec3> vertPosP;
+  VecView<const vec3> vertPosQ;
+  VecView<const vec3> vertPosR;
+  VecView<const Halfedge> halfedgeP;
+  VecView<const Halfedge> halfedgeQ;
+  VecView<const Halfedge> halfedgeR;
+  const double epsilon;
+
+  void operator()(const int tri) {
+    const TriRef refPQ = ref[tri];
+    if (halfedgeR[3 * tri].startVert < 0) return;
+
+    const int triPQ = refPQ.tri;
+    const bool PQ = refPQ.meshID == 0;
+    const auto &vertPos = PQ ? vertPosP : vertPosQ;
+    const auto &halfedge = PQ ? halfedgeP : halfedgeQ;
+
+    mat3 triPos;
+    for (const int j : {0, 1, 2})
+      triPos[j] = vertPos[halfedge[3 * triPQ + j].startVert];
+
+    for (const int i : {0, 1, 2}) {
+      const int vert = halfedgeR[3 * tri + i].startVert;
+      uvw[3 * tri + i] = GetBarycentric(vertPosR[vert], triPos, epsilon);
+    }
+  }
+};
+
+void CreateProperties(Manifold::Impl &outR, const Manifold::Impl &inP,
+                      const Manifold::Impl &inQ) {
+  ZoneScoped;
+  const int numPropP = inP.NumProp();
+  const int numPropQ = inQ.NumProp();
+  const int numProp = std::max(numPropP, numPropQ);
+  outR.meshRelation_.numProp = numProp;
+  if (numProp == 0) return;
+
+  const int numTri = outR.NumTri();
+  outR.meshRelation_.triProperties.resize(numTri);
+
+  Vec<vec3> bary(outR.halfedge_.size());
+  for_each_n(autoPolicy(numTri, 1e4), countAt(0), numTri,
+             Barycentric({bary, outR.meshRelation_.triRef, inP.vertPos_,
+                          inQ.vertPos_, outR.vertPos_, inP.halfedge_,
+                          inQ.halfedge_, outR.halfedge_, outR.epsilon_}));
+
+  using Entry = std::pair<ivec3, int>;
+  int idMissProp = outR.NumVert();
+  std::vector<std::vector<Entry>> propIdx(outR.NumVert() + 1);
+  std::vector<int> propMissIdx[2];
+  propMissIdx[0].resize(inQ.NumPropVert(), -1);
+  propMissIdx[1].resize(inP.NumPropVert(), -1);
+
+  outR.meshRelation_.properties.reserve(outR.NumVert() * numProp);
+  int idx = 0;
+
+  for (int tri = 0; tri < numTri; ++tri) {
+    // Skip collapsed triangles
+    if (outR.halfedge_[3 * tri].startVert < 0) continue;
+
+    const TriRef ref = outR.meshRelation_.triRef[tri];
+    const bool PQ = ref.meshID == 0;
+    const int oldNumProp = PQ ? numPropP : numPropQ;
+    const auto &properties =
+        PQ ? inP.meshRelation_.properties : inQ.meshRelation_.properties;
+    const ivec3 &triProp = oldNumProp == 0 ? ivec3(-1)
+                           : PQ ? inP.meshRelation_.triProperties[ref.tri]
+                                : inQ.meshRelation_.triProperties[ref.tri];
+
+    for (const int i : {0, 1, 2}) {
+      const int vert = outR.halfedge_[3 * tri + i].startVert;
+      const vec3 &uvw = bary[3 * tri + i];
+
+      ivec4 key(PQ, idMissProp, -1, -1);
+      if (oldNumProp > 0) {
+        int edge = -2;
+        for (const int j : {0, 1, 2}) {
+          if (uvw[j] == 1) {
+            // On a retained vert, the propVert must also match
+            key[2] = triProp[j];
+            edge = -1;
+            break;
+          }
+          if (uvw[j] == 0) edge = j;
+        }
+        if (edge >= 0) {
+          // On an edge, both propVerts must match
+          const int p0 = triProp[Next3(edge)];
+          const int p1 = triProp[Prev3(edge)];
+          key[1] = vert;
+          key[2] = std::min(p0, p1);
+          key[3] = std::max(p0, p1);
+        } else if (edge == -2) {
+          key[1] = vert;
+        }
+      }
+
+      if (key.y == idMissProp && key.z >= 0) {
+        // only key.x/key.z matters
+        auto &entry = propMissIdx[key.x][key.z];
+        if (entry >= 0) {
+          outR.meshRelation_.triProperties[tri][i] = entry;
+          continue;
+        }
+        entry = idx;
+      } else {
+        auto &bin = propIdx[key.y];
+        bool bFound = false;
+        for (const auto &b : bin) {
+          if (b.first == ivec3(key.x, key.z, key.w)) {
+            bFound = true;
+            outR.meshRelation_.triProperties[tri][i] = b.second;
+            break;
+          }
+        }
+        if (bFound) continue;
+        bin.push_back(std::make_pair(ivec3(key.x, key.z, key.w), idx));
+      }
+
+      outR.meshRelation_.triProperties[tri][i] = idx++;
+      for (int p = 0; p < numProp; ++p) {
+        if (p < oldNumProp) {
+          vec3 oldProps;
+          for (const int j : {0, 1, 2})
+            oldProps[j] = properties[oldNumProp * triProp[j] + p];
+          outR.meshRelation_.properties.push_back(la::dot(uvw, oldProps));
+        } else {
+          outR.meshRelation_.properties.push_back(0);
+        }
+      }
+    }
+  }
+}
+}  // namespace
+
+namespace manifold {
+
+Manifold::Impl Boolean3::Result(OpType op) const {
+#ifdef MANIFOLD_DEBUG
+  Timer assemble;
+  assemble.Start();
+#endif
+
+  DEBUG_ASSERT((expandP_ > 0) == (op == OpType::Add), logicErr,
+               "Result op type not compatible with constructor op type.");
+  const int c1 = op == OpType::Intersect ? 0 : 1;
+  const int c2 = op == OpType::Add ? 1 : 0;
+  const int c3 = op == OpType::Intersect ? 1 : -1;
+
+  if (inP_.status_ != Manifold::Error::NoError) {
+    auto impl = Manifold::Impl();
+    impl.status_ = inP_.status_;
+    return impl;
+  }
+  if (inQ_.status_ != Manifold::Error::NoError) {
+    auto impl = Manifold::Impl();
+    impl.status_ = inQ_.status_;
+    return impl;
+  }
+
+  if (inP_.IsEmpty()) {
+    if (!inQ_.IsEmpty() && op == OpType::Add) {
+      return inQ_;
+    }
+    return Manifold::Impl();
+  } else if (inQ_.IsEmpty()) {
+    if (op == OpType::Intersect) {
+      return Manifold::Impl();
+    }
+    return inP_;
+  }
+
+  const bool invertQ = op == OpType::Subtract;
+
+  // Convert winding numbers to inclusion values based on operation type.
+  Vec<int> i12(x12_.size());
+  Vec<int> i21(x21_.size());
+  Vec<int> i03(w03_.size());
+  Vec<int> i30(w30_.size());
+
+  transform(x12_.begin(), x12_.end(), i12.begin(),
+            [c3](int v) { return c3 * v; });
+  transform(x21_.begin(), x21_.end(), i21.begin(),
+            [c3](int v) { return c3 * v; });
+  transform(w03_.begin(), w03_.end(), i03.begin(),
+            [c1, c3](int v) { return c1 + c3 * v; });
+  transform(w30_.begin(), w30_.end(), i30.begin(),
+            [c2, c3](int v) { return c2 + c3 * v; });
+
+  Vec<int> vP2R(inP_.NumVert());
+  exclusive_scan(i03.begin(), i03.end(), vP2R.begin(), 0, AbsSum());
+  int numVertR = AbsSum()(vP2R.back(), i03.back());
+  const int nPv = numVertR;
+
+  Vec<int> vQ2R(inQ_.NumVert());
+  exclusive_scan(i30.begin(), i30.end(), vQ2R.begin(), numVertR, AbsSum());
+  numVertR = AbsSum()(vQ2R.back(), i30.back());
+  const int nQv = numVertR - nPv;
+
+  Vec<int> v12R(v12_.size());
+  if (v12_.size() > 0) {
+    exclusive_scan(i12.begin(), i12.end(), v12R.begin(), numVertR, AbsSum());
+    numVertR = AbsSum()(v12R.back(), i12.back());
+  }
+  const int n12 = numVertR - nPv - nQv;
+
+  Vec<int> v21R(v21_.size());
+  if (v21_.size() > 0) {
+    exclusive_scan(i21.begin(), i21.end(), v21R.begin(), numVertR, AbsSum());
+    numVertR = AbsSum()(v21R.back(), i21.back());
+  }
+  const int n21 = numVertR - nPv - nQv - n12;
+
+  // Create the output Manifold
+  Manifold::Impl outR;
+
+  if (numVertR == 0) return outR;
+
+  outR.epsilon_ = std::max(inP_.epsilon_, inQ_.epsilon_);
+  outR.tolerance_ = std::max(inP_.tolerance_, inQ_.tolerance_);
+
+  outR.vertPos_.resize(numVertR);
+  // Add vertices, duplicating for inclusion numbers not in [-1, 1].
+  // Retained vertices from P and Q:
+  for_each_n(autoPolicy(inP_.NumVert(), 1e4), countAt(0), inP_.NumVert(),
+             DuplicateVerts({outR.vertPos_, i03, vP2R, inP_.vertPos_}));
+  for_each_n(autoPolicy(inQ_.NumVert(), 1e4), countAt(0), inQ_.NumVert(),
+             DuplicateVerts({outR.vertPos_, i30, vQ2R, inQ_.vertPos_}));
+  // New vertices created from intersections:
+  for_each_n(autoPolicy(i12.size(), 1e4), countAt(0), i12.size(),
+             DuplicateVerts({outR.vertPos_, i12, v12R, v12_}));
+  for_each_n(autoPolicy(i21.size(), 1e4), countAt(0), i21.size(),
+             DuplicateVerts({outR.vertPos_, i21, v21R, v21_}));
+
+  PRINT(nPv << " verts from inP");
+  PRINT(nQv << " verts from inQ");
+  PRINT(n12 << " new verts from edgesP -> facesQ");
+  PRINT(n21 << " new verts from facesP -> edgesQ");
+
+  // Build up new polygonal faces from triangle intersections. At this point the
+  // calculation switches from parallel to serial.
+
+  // Level 3
+
+  // This key is the forward halfedge index of P or Q. Only includes intersected
+  // edges.
+  concurrent_map<int, std::vector<EdgePos>> edgesP, edgesQ;
+  // This key is the face index of <P, Q>
+  concurrent_map<std::pair<int, int>, std::vector<EdgePos>> edgesNew;
+
+  AddNewEdgeVerts(edgesP, edgesNew, p1q2_, i12, v12R, inP_.halfedge_, true);
+  AddNewEdgeVerts(edgesQ, edgesNew, p2q1_, i21, v21R, inQ_.halfedge_, false);
+
+  v12R.clear();
+  v21R.clear();
+
+  // Level 4
+  Vec<int> faceEdge;
+  Vec<int> facePQ2R;
+  std::tie(faceEdge, facePQ2R) =
+      SizeOutput(outR, inP_, inQ_, i03, i30, i12, i21, p1q2_, p2q1_, invertQ);
+
+  i12.clear();
+  i21.clear();
+
+  // This gets incremented for each halfedge that's added to a face so that the
+  // next one knows where to slot in.
+  Vec<int> facePtrR = faceEdge;
+  // Intersected halfedges are marked false.
+  Vec<char> wholeHalfedgeP(inP_.halfedge_.size(), true);
+  Vec<char> wholeHalfedgeQ(inQ_.halfedge_.size(), true);
+  // The halfedgeRef contains the data that will become triRef once the faces
+  // are triangulated.
+  Vec<TriRef> halfedgeRef(2 * outR.NumEdge());
+
+  AppendPartialEdges(outR, wholeHalfedgeP, facePtrR, edgesP, halfedgeRef, inP_,
+                     i03, vP2R, facePQ2R.begin(), true);
+  AppendPartialEdges(outR, wholeHalfedgeQ, facePtrR, edgesQ, halfedgeRef, inQ_,
+                     i30, vQ2R, facePQ2R.begin() + inP_.NumTri(), false);
+
+  edgesP.clear();
+  edgesQ.clear();
+
+  AppendNewEdges(outR, facePtrR, edgesNew, halfedgeRef, facePQ2R,
+                 inP_.NumTri());
+
+  edgesNew.clear();
+
+  AppendWholeEdges(outR, facePtrR, halfedgeRef, inP_, wholeHalfedgeP, i03, vP2R,
+                   facePQ2R.cview(0, inP_.NumTri()), true);
+  AppendWholeEdges(outR, facePtrR, halfedgeRef, inQ_, wholeHalfedgeQ, i30, vQ2R,
+                   facePQ2R.cview(inP_.NumTri(), inQ_.NumTri()), false);
+
+  wholeHalfedgeP.clear();
+  wholeHalfedgeQ.clear();
+  facePtrR.clear();
+  facePQ2R.clear();
+  i03.clear();
+  i30.clear();
+  vP2R.clear();
+  vQ2R.clear();
+
+#ifdef MANIFOLD_DEBUG
+  assemble.Stop();
+  Timer triangulate;
+  triangulate.Start();
+#endif
+
+  // Level 6
+
+  if (ManifoldParams().intermediateChecks)
+    DEBUG_ASSERT(outR.IsManifold(), logicErr, "polygon mesh is not manifold!");
+
+  outR.Face2Tri(faceEdge, halfedgeRef);
+  halfedgeRef.clear();
+  faceEdge.clear();
+
+#ifdef MANIFOLD_DEBUG
+  triangulate.Stop();
+  Timer simplify;
+  simplify.Start();
+#endif
+
+  if (ManifoldParams().intermediateChecks)
+    DEBUG_ASSERT(outR.IsManifold(), logicErr,
+                 "triangulated mesh is not manifold!");
+
+  CreateProperties(outR, inP_, inQ_);
+
+  UpdateReference(outR, inP_, inQ_, invertQ);
+
+  outR.SimplifyTopology();
+
+  if (ManifoldParams().intermediateChecks)
+    DEBUG_ASSERT(outR.Is2Manifold(), logicErr,
+                 "simplified mesh is not 2-manifold!");
+
+#ifdef MANIFOLD_DEBUG
+  simplify.Stop();
+  Timer sort;
+  sort.Start();
+#endif
+
+  outR.Finish();
+  outR.IncrementMeshIDs();
+
+#ifdef MANIFOLD_DEBUG
+  sort.Stop();
+  if (ManifoldParams().verbose) {
+    assemble.Print("Assembly");
+    triangulate.Print("Triangulation");
+    simplify.Print("Simplification");
+    sort.Print("Sorting");
+    std::cout << outR.NumVert() << " verts and " << outR.NumTri() << " tris"
+              << std::endl;
+  }
+#endif
+
+  return outR;
+}
+
+}  // namespace manifold

+ 382 - 0
thirdparty/manifold/src/collider.h

@@ -0,0 +1,382 @@
+// Copyright 2021 The Manifold Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "./parallel.h"
+#include "./sparse.h"
+#include "./utils.h"
+#include "./vec.h"
+#include "manifold/common.h"
+
+#ifdef _MSC_VER
+#include <intrin.h>
+#endif
+
+#if (MANIFOLD_PAR == 1)
+#include <tbb/combinable.h>
+#endif
+
+namespace manifold {
+
+namespace collider_internal {
+// Adjustable parameters
+constexpr int kInitialLength = 128;
+constexpr int kLengthMultiple = 4;
+constexpr int kSequentialThreshold = 512;
+// Fundamental constants
+constexpr int kRoot = 1;
+
+#ifdef _MSC_VER
+
+#ifndef _WINDEF_
+typedef unsigned long DWORD;
+#endif
+
+uint32_t inline ctz(uint32_t value) {
+  DWORD trailing_zero = 0;
+
+  if (_BitScanForward(&trailing_zero, value)) {
+    return trailing_zero;
+  } else {
+    // This is undefined, I better choose 32 than 0
+    return 32;
+  }
+}
+
+uint32_t inline clz(uint32_t value) {
+  DWORD leading_zero = 0;
+
+  if (_BitScanReverse(&leading_zero, value)) {
+    return 31 - leading_zero;
+  } else {
+    // Same remarks as above
+    return 32;
+  }
+}
+#endif
+
+constexpr inline bool IsLeaf(int node) { return node % 2 == 0; }
+constexpr inline bool IsInternal(int node) { return node % 2 == 1; }
+constexpr inline int Node2Internal(int node) { return (node - 1) / 2; }
+constexpr inline int Internal2Node(int internal) { return internal * 2 + 1; }
+constexpr inline int Node2Leaf(int node) { return node / 2; }
+constexpr inline int Leaf2Node(int leaf) { return leaf * 2; }
+
+struct CreateRadixTree {
+  VecView<int> nodeParent_;
+  VecView<std::pair<int, int>> internalChildren_;
+  const VecView<const uint32_t> leafMorton_;
+
+  int PrefixLength(uint32_t a, uint32_t b) const {
+// count-leading-zeros is used to find the number of identical highest-order
+// bits
+#ifdef _MSC_VER
+    // return __lzcnt(a ^ b);
+    return clz(a ^ b);
+#else
+    return __builtin_clz(a ^ b);
+#endif
+  }
+
+  int PrefixLength(int i, int j) const {
+    if (j < 0 || j >= static_cast<int>(leafMorton_.size())) {
+      return -1;
+    } else {
+      int out;
+      if (leafMorton_[i] == leafMorton_[j])
+        // use index to disambiguate
+        out = 32 +
+              PrefixLength(static_cast<uint32_t>(i), static_cast<uint32_t>(j));
+      else
+        out = PrefixLength(leafMorton_[i], leafMorton_[j]);
+      return out;
+    }
+  }
+
+  int RangeEnd(int i) const {
+    // Determine direction of range (+1 or -1)
+    int dir = PrefixLength(i, i + 1) - PrefixLength(i, i - 1);
+    dir = (dir > 0) - (dir < 0);
+    // Compute conservative range length with exponential increase
+    int commonPrefix = PrefixLength(i, i - dir);
+    int max_length = kInitialLength;
+    while (PrefixLength(i, i + dir * max_length) > commonPrefix)
+      max_length *= kLengthMultiple;
+    // Compute precise range length with binary search
+    int length = 0;
+    for (int step = max_length / 2; step > 0; step /= 2) {
+      if (PrefixLength(i, i + dir * (length + step)) > commonPrefix)
+        length += step;
+    }
+    return i + dir * length;
+  }
+
+  int FindSplit(int first, int last) const {
+    int commonPrefix = PrefixLength(first, last);
+    // Find the furthest object that shares more than commonPrefix bits with the
+    // first one, using binary search.
+    int split = first;
+    int step = last - first;
+    do {
+      step = (step + 1) >> 1;  // divide by 2, rounding up
+      int newSplit = split + step;
+      if (newSplit < last) {
+        int splitPrefix = PrefixLength(first, newSplit);
+        if (splitPrefix > commonPrefix) split = newSplit;
+      }
+    } while (step > 1);
+    return split;
+  }
+
+  void operator()(int internal) {
+    int first = internal;
+    // Find the range of objects with a common prefix
+    int last = RangeEnd(first);
+    if (first > last) std::swap(first, last);
+    // Determine where the next-highest difference occurs
+    int split = FindSplit(first, last);
+    int child1 = split == first ? Leaf2Node(split) : Internal2Node(split);
+    ++split;
+    int child2 = split == last ? Leaf2Node(split) : Internal2Node(split);
+    // Record parent_child relationships.
+    internalChildren_[internal].first = child1;
+    internalChildren_[internal].second = child2;
+    int node = Internal2Node(internal);
+    nodeParent_[child1] = node;
+    nodeParent_[child2] = node;
+  }
+};
+
+template <typename T, const bool selfCollision, typename Recorder>
+struct FindCollision {
+  VecView<const T> queries;
+  VecView<const Box> nodeBBox_;
+  VecView<const std::pair<int, int>> internalChildren_;
+  Recorder recorder;
+
+  inline int RecordCollision(int node, const int queryIdx, SparseIndices& ind) {
+    bool overlaps = nodeBBox_[node].DoesOverlap(queries[queryIdx]);
+    if (overlaps && IsLeaf(node)) {
+      int leafIdx = Node2Leaf(node);
+      if (!selfCollision || leafIdx != queryIdx) {
+        recorder.record(queryIdx, leafIdx, ind);
+      }
+    }
+    return overlaps && IsInternal(node);  // Should traverse into node
+  }
+
+  void operator()(const int queryIdx) {
+    // stack cannot overflow because radix tree has max depth 30 (Morton code) +
+    // 32 (index).
+    int stack[64];
+    int top = -1;
+    // Depth-first search
+    int node = kRoot;
+    SparseIndices& ind = recorder.local();
+    while (1) {
+      int internal = Node2Internal(node);
+      int child1 = internalChildren_[internal].first;
+      int child2 = internalChildren_[internal].second;
+
+      int traverse1 = RecordCollision(child1, queryIdx, ind);
+      int traverse2 = RecordCollision(child2, queryIdx, ind);
+
+      if (!traverse1 && !traverse2) {
+        if (top < 0) break;   // done
+        node = stack[top--];  // get a saved node
+      } else {
+        node = traverse1 ? child1 : child2;  // go here next
+        if (traverse1 && traverse2) {
+          stack[++top] = child2;  // save the other for later
+        }
+      }
+    }
+  }
+};
+
+template <const bool inverted>
+struct SeqCollisionRecorder {
+  SparseIndices& queryTri_;
+  inline void record(int queryIdx, int leafIdx, SparseIndices& ind) const {
+    if (inverted)
+      ind.Add(leafIdx, queryIdx);
+    else
+      ind.Add(queryIdx, leafIdx);
+  }
+  SparseIndices& local() { return queryTri_; }
+};
+
+#if (MANIFOLD_PAR == 1)
+template <const bool inverted>
+struct ParCollisionRecorder {
+  tbb::combinable<SparseIndices>& store;
+  inline void record(int queryIdx, int leafIdx, SparseIndices& ind) const {
+    // Add may invoke something in parallel, and it may return in
+    // another thread, making thread local unsafe
+    // we need to explicitly forbid parallelization by passing a flag
+    if (inverted)
+      ind.Add(leafIdx, queryIdx, true);
+    else
+      ind.Add(queryIdx, leafIdx, true);
+  }
+  SparseIndices& local() { return store.local(); }
+};
+#endif
+
+struct BuildInternalBoxes {
+  VecView<Box> nodeBBox_;
+  VecView<int> counter_;
+  const VecView<int> nodeParent_;
+  const VecView<std::pair<int, int>> internalChildren_;
+
+  void operator()(int leaf) {
+    int node = Leaf2Node(leaf);
+    do {
+      node = nodeParent_[node];
+      int internal = Node2Internal(node);
+      if (AtomicAdd(counter_[internal], 1) == 0) return;
+      nodeBBox_[node] = nodeBBox_[internalChildren_[internal].first].Union(
+          nodeBBox_[internalChildren_[internal].second]);
+    } while (node != kRoot);
+  }
+};
+
+struct TransformBox {
+  const mat3x4 transform;
+  void operator()(Box& box) { box = box.Transform(transform); }
+};
+
+constexpr inline uint32_t SpreadBits3(uint32_t v) {
+  v = 0xFF0000FFu & (v * 0x00010001u);
+  v = 0x0F00F00Fu & (v * 0x00000101u);
+  v = 0xC30C30C3u & (v * 0x00000011u);
+  v = 0x49249249u & (v * 0x00000005u);
+  return v;
+}
+}  // namespace collider_internal
+
+/** @ingroup Private */
+class Collider {
+ public:
+  Collider() {};
+
+  Collider(const VecView<const Box>& leafBB,
+           const VecView<const uint32_t>& leafMorton) {
+    ZoneScoped;
+    DEBUG_ASSERT(leafBB.size() == leafMorton.size(), userErr,
+                 "vectors must be the same length");
+    int num_nodes = 2 * leafBB.size() - 1;
+    // assign and allocate members
+    nodeBBox_.resize(num_nodes);
+    nodeParent_.resize(num_nodes, -1);
+    internalChildren_.resize(leafBB.size() - 1, std::make_pair(-1, -1));
+    // organize tree
+    for_each_n(autoPolicy(NumInternal(), 1e4), countAt(0), NumInternal(),
+               collider_internal::CreateRadixTree(
+                   {nodeParent_, internalChildren_, leafMorton}));
+    UpdateBoxes(leafBB);
+  }
+
+  bool Transform(mat3x4 transform) {
+    ZoneScoped;
+    bool axisAligned = true;
+    for (int row : {0, 1, 2}) {
+      int count = 0;
+      for (int col : {0, 1, 2}) {
+        if (transform[col][row] == 0.0) ++count;
+      }
+      if (count != 2) axisAligned = false;
+    }
+    if (axisAligned) {
+      for_each(autoPolicy(nodeBBox_.size(), 1e5), nodeBBox_.begin(),
+               nodeBBox_.end(),
+               [transform](Box& box) { box = box.Transform(transform); });
+    }
+    return axisAligned;
+  }
+
+  void UpdateBoxes(const VecView<const Box>& leafBB) {
+    ZoneScoped;
+    DEBUG_ASSERT(leafBB.size() == NumLeaves(), userErr,
+                 "must have the same number of updated boxes as original");
+    // copy in leaf node Boxes
+    auto leaves = StridedRange(nodeBBox_.begin(), nodeBBox_.end(), 2);
+    copy(leafBB.cbegin(), leafBB.cend(), leaves.begin());
+    // create global counters
+    Vec<int> counter(NumInternal(), 0);
+    // kernel over leaves to save internal Boxes
+    for_each_n(autoPolicy(NumInternal(), 1e3), countAt(0), NumLeaves(),
+               collider_internal::BuildInternalBoxes(
+                   {nodeBBox_, counter, nodeParent_, internalChildren_}));
+  }
+
+  template <const bool selfCollision = false, const bool inverted = false,
+            typename T>
+  void Collisions(const VecView<const T>& queriesIn,
+                  SparseIndices& queryTri) const {
+    ZoneScoped;
+    using collider_internal::FindCollision;
+#if (MANIFOLD_PAR == 1)
+    if (queriesIn.size() > collider_internal::kSequentialThreshold) {
+      tbb::combinable<SparseIndices> store;
+      for_each_n(
+          ExecutionPolicy::Par, countAt(0), queriesIn.size(),
+          FindCollision<T, selfCollision,
+                        collider_internal::ParCollisionRecorder<inverted>>{
+              queriesIn, nodeBBox_, internalChildren_, {store}});
+
+      std::vector<SparseIndices> tmp;
+      store.combine_each(
+          [&](SparseIndices& ind) { tmp.emplace_back(std::move(ind)); });
+      queryTri.FromIndices(tmp);
+      return;
+    }
+#endif
+    for_each_n(ExecutionPolicy::Seq, countAt(0), queriesIn.size(),
+               FindCollision<T, selfCollision,
+                             collider_internal::SeqCollisionRecorder<inverted>>{
+                   queriesIn, nodeBBox_, internalChildren_, {queryTri}});
+  }
+
+  template <const bool selfCollision = false, const bool inverted = false,
+            typename T>
+  SparseIndices Collisions(const VecView<const T>& queriesIn) const {
+    SparseIndices result;
+    Collisions<selfCollision, inverted, T>(queriesIn, result);
+    return result;
+  }
+
+  static uint32_t MortonCode(vec3 position, Box bBox) {
+    using collider_internal::SpreadBits3;
+    vec3 xyz = (position - bBox.min) / (bBox.max - bBox.min);
+    xyz = la::min(vec3(1023.0), la::max(vec3(0.0), 1024.0 * xyz));
+    uint32_t x = SpreadBits3(static_cast<uint32_t>(xyz.x));
+    uint32_t y = SpreadBits3(static_cast<uint32_t>(xyz.y));
+    uint32_t z = SpreadBits3(static_cast<uint32_t>(xyz.z));
+    return x * 4 + y * 2 + z;
+  }
+
+ private:
+  Vec<Box> nodeBBox_;
+  Vec<int> nodeParent_;
+  // even nodes are leaves, odd nodes are internal, root is 1
+  Vec<std::pair<int, int>> internalChildren_;
+
+  size_t NumInternal() const { return internalChildren_.size(); };
+  size_t NumLeaves() const {
+    return internalChildren_.empty() ? 0 : (NumInternal() + 1);
+  };
+};
+
+}  // namespace manifold

+ 503 - 0
thirdparty/manifold/src/constructors.cpp

@@ -0,0 +1,503 @@
+// Copyright 2021 The Manifold Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "./csg_tree.h"
+#include "./impl.h"
+#include "./parallel.h"
+#include "manifold/polygon.h"
+
+namespace manifold {
+/**
+ * Constructs a smooth version of the input mesh by creating tangents; this
+ * method will throw if you have supplied tangents with your mesh already. The
+ * actual triangle resolution is unchanged; use the Refine() method to
+ * interpolate to a higher-resolution curve.
+ *
+ * By default, every edge is calculated for maximum smoothness (very much
+ * approximately), attempting to minimize the maximum mean Curvature magnitude.
+ * No higher-order derivatives are considered, as the interpolation is
+ * independent per triangle, only sharing constraints on their boundaries.
+ *
+ * @param meshGL input MeshGL.
+ * @param sharpenedEdges If desired, you can supply a vector of sharpened
+ * halfedges, which should in general be a small subset of all halfedges. Order
+ * of entries doesn't matter, as each one specifies the desired smoothness
+ * (between zero and one, with one the default for all unspecified halfedges)
+ * and the halfedge index (3 * triangle index + [0,1,2] where 0 is the edge
+ * between triVert 0 and 1, etc).
+ *
+ * At a smoothness value of zero, a sharp crease is made. The smoothness is
+ * interpolated along each edge, so the specified value should be thought of as
+ * an average. Where exactly two sharpened edges meet at a vertex, their
+ * tangents are rotated to be colinear so that the sharpened edge can be
+ * continuous. Vertices with only one sharpened edge are completely smooth,
+ * allowing sharpened edges to smoothly vanish at termination. A single vertex
+ * can be sharpened by sharping all edges that are incident on it, allowing
+ * cones to be formed.
+ */
+Manifold Manifold::Smooth(const MeshGL& meshGL,
+                          const std::vector<Smoothness>& sharpenedEdges) {
+  DEBUG_ASSERT(meshGL.halfedgeTangent.empty(), std::runtime_error,
+               "when supplying tangents, the normal constructor should be used "
+               "rather than Smooth().");
+
+  std::shared_ptr<Impl> impl = std::make_shared<Impl>(meshGL);
+  impl->CreateTangents(impl->UpdateSharpenedEdges(sharpenedEdges));
+  return Manifold(impl);
+}
+
+/**
+ * Constructs a smooth version of the input mesh by creating tangents; this
+ * method will throw if you have supplied tangents with your mesh already. The
+ * actual triangle resolution is unchanged; use the Refine() method to
+ * interpolate to a higher-resolution curve.
+ *
+ * By default, every edge is calculated for maximum smoothness (very much
+ * approximately), attempting to minimize the maximum mean Curvature magnitude.
+ * No higher-order derivatives are considered, as the interpolation is
+ * independent per triangle, only sharing constraints on their boundaries.
+ *
+ * @param meshGL64 input MeshGL64.
+ * @param sharpenedEdges If desired, you can supply a vector of sharpened
+ * halfedges, which should in general be a small subset of all halfedges. Order
+ * of entries doesn't matter, as each one specifies the desired smoothness
+ * (between zero and one, with one the default for all unspecified halfedges)
+ * and the halfedge index (3 * triangle index + [0,1,2] where 0 is the edge
+ * between triVert 0 and 1, etc).
+ *
+ * At a smoothness value of zero, a sharp crease is made. The smoothness is
+ * interpolated along each edge, so the specified value should be thought of as
+ * an average. Where exactly two sharpened edges meet at a vertex, their
+ * tangents are rotated to be colinear so that the sharpened edge can be
+ * continuous. Vertices with only one sharpened edge are completely smooth,
+ * allowing sharpened edges to smoothly vanish at termination. A single vertex
+ * can be sharpened by sharping all edges that are incident on it, allowing
+ * cones to be formed.
+ */
+Manifold Manifold::Smooth(const MeshGL64& meshGL64,
+                          const std::vector<Smoothness>& sharpenedEdges) {
+  DEBUG_ASSERT(meshGL64.halfedgeTangent.empty(), std::runtime_error,
+               "when supplying tangents, the normal constructor should be used "
+               "rather than Smooth().");
+
+  std::shared_ptr<Impl> impl = std::make_shared<Impl>(meshGL64);
+  impl->CreateTangents(impl->UpdateSharpenedEdges(sharpenedEdges));
+  return Manifold(impl);
+}
+
+/**
+ * Constructs a tetrahedron centered at the origin with one vertex at (1,1,1)
+ * and the rest at similarly symmetric points.
+ */
+Manifold Manifold::Tetrahedron() {
+  return Manifold(std::make_shared<Impl>(Impl::Shape::Tetrahedron));
+}
+
+/**
+ * Constructs a unit cube (edge lengths all one), by default in the first
+ * octant, touching the origin. If any dimensions in size are negative, or if
+ * all are zero, an empty Manifold will be returned.
+ *
+ * @param size The X, Y, and Z dimensions of the box.
+ * @param center Set to true to shift the center to the origin.
+ */
+Manifold Manifold::Cube(vec3 size, bool center) {
+  if (size.x < 0.0 || size.y < 0.0 || size.z < 0.0 || la::length(size) == 0.) {
+    return Invalid();
+  }
+  mat3x4 m({{size.x, 0.0, 0.0}, {0.0, size.y, 0.0}, {0.0, 0.0, size.z}},
+           center ? (-size / 2.0) : vec3(0.0));
+  return Manifold(std::make_shared<Impl>(Manifold::Impl::Shape::Cube, m));
+}
+
+/**
+ * A convenience constructor for the common case of extruding a circle. Can also
+ * form cones if both radii are specified.
+ *
+ * @param height Z-extent
+ * @param radiusLow Radius of bottom circle. Must be positive.
+ * @param radiusHigh Radius of top circle. Can equal zero. Default is equal to
+ * radiusLow.
+ * @param circularSegments How many line segments to use around the circle.
+ * Default is calculated by the static Defaults.
+ * @param center Set to true to shift the center to the origin. Default is
+ * origin at the bottom.
+ */
+Manifold Manifold::Cylinder(double height, double radiusLow, double radiusHigh,
+                            int circularSegments, bool center) {
+  if (height <= 0.0 || radiusLow <= 0.0) {
+    return Invalid();
+  }
+  const double scale = radiusHigh >= 0.0 ? radiusHigh / radiusLow : 1.0;
+  const double radius = fmax(radiusLow, radiusHigh);
+  const int n = circularSegments > 2 ? circularSegments
+                                     : Quality::GetCircularSegments(radius);
+
+  SimplePolygon circle(n);
+  const double dPhi = 360.0 / n;
+  for (int i = 0; i < n; ++i) {
+    circle[i] = {radiusLow * cosd(dPhi * i), radiusLow * sind(dPhi * i)};
+  }
+
+  Manifold cylinder = Manifold::Extrude({circle}, height, 0, 0.0, vec2(scale));
+  if (center)
+    cylinder = cylinder.Translate(vec3(0.0, 0.0, -height / 2.0)).AsOriginal();
+  return cylinder;
+}
+
+/**
+ * Constructs a geodesic sphere of a given radius.
+ *
+ * @param radius Radius of the sphere. Must be positive.
+ * @param circularSegments Number of segments along its
+ * diameter. This number will always be rounded up to the nearest factor of
+ * four, as this sphere is constructed by refining an octahedron. This means
+ * there are a circle of vertices on all three of the axis planes. Default is
+ * calculated by the static Defaults.
+ */
+Manifold Manifold::Sphere(double radius, int circularSegments) {
+  if (radius <= 0.0) {
+    return Invalid();
+  }
+  int n = circularSegments > 0 ? (circularSegments + 3) / 4
+                               : Quality::GetCircularSegments(radius) / 4;
+  auto pImpl_ = std::make_shared<Impl>(Impl::Shape::Octahedron);
+  pImpl_->Subdivide(
+      [n](vec3 edge, vec4 tangentStart, vec4 tangentEnd) { return n - 1; });
+  for_each_n(autoPolicy(pImpl_->NumVert(), 1e5), pImpl_->vertPos_.begin(),
+             pImpl_->NumVert(), [radius](vec3& v) {
+               v = la::cos(kHalfPi * (1.0 - v));
+               v = radius * la::normalize(v);
+               if (std::isnan(v.x)) v = vec3(0.0);
+             });
+  pImpl_->Finish();
+  // Ignore preceding octahedron.
+  pImpl_->InitializeOriginal();
+  return Manifold(pImpl_);
+}
+
+/**
+ * Constructs a manifold from a set of polygons by extruding them along the
+ * Z-axis.
+ * Note that high twistDegrees with small nDivisions may cause
+ * self-intersection. This is not checked here and it is up to the user to
+ * choose the correct parameters.
+ *
+ * @param crossSection A set of non-overlapping polygons to extrude.
+ * @param height Z-extent of extrusion.
+ * @param nDivisions Number of extra copies of the crossSection to insert into
+ * the shape vertically; especially useful in combination with twistDegrees to
+ * avoid interpolation artifacts. Default is none.
+ * @param twistDegrees Amount to twist the top crossSection relative to the
+ * bottom, interpolated linearly for the divisions in between.
+ * @param scaleTop Amount to scale the top (independently in X and Y). If the
+ * scale is {0, 0}, a pure cone is formed with only a single vertex at the top.
+ * Note that scale is applied after twist.
+ * Default {1, 1}.
+ */
+Manifold Manifold::Extrude(const Polygons& crossSection, double height,
+                           int nDivisions, double twistDegrees, vec2 scaleTop) {
+  ZoneScoped;
+  if (crossSection.size() == 0 || height <= 0.0) {
+    return Invalid();
+  }
+
+  scaleTop.x = std::max(scaleTop.x, 0.0);
+  scaleTop.y = std::max(scaleTop.y, 0.0);
+
+  auto pImpl_ = std::make_shared<Impl>();
+  ++nDivisions;
+  auto& vertPos = pImpl_->vertPos_;
+  Vec<ivec3> triVertsDH;
+  auto& triVerts = triVertsDH;
+  int nCrossSection = 0;
+  bool isCone = scaleTop.x == 0.0 && scaleTop.y == 0.0;
+  size_t idx = 0;
+  PolygonsIdx polygonsIndexed;
+  for (auto& poly : crossSection) {
+    nCrossSection += poly.size();
+    SimplePolygonIdx simpleIndexed;
+    for (const vec2& polyVert : poly) {
+      vertPos.push_back({polyVert.x, polyVert.y, 0.0});
+      simpleIndexed.push_back({polyVert, static_cast<int>(idx++)});
+    }
+    polygonsIndexed.push_back(simpleIndexed);
+  }
+  for (int i = 1; i < nDivisions + 1; ++i) {
+    double alpha = i / double(nDivisions);
+    double phi = alpha * twistDegrees;
+    vec2 scale = la::lerp(vec2(1.0), scaleTop, alpha);
+    mat2 rotation({cosd(phi), sind(phi)}, {-sind(phi), cosd(phi)});
+    mat2 transform = mat2({scale.x, 0.0}, {0.0, scale.y}) * rotation;
+    size_t j = 0;
+    size_t idx = 0;
+    for (const auto& poly : crossSection) {
+      for (size_t vert = 0; vert < poly.size(); ++vert) {
+        size_t offset = idx + nCrossSection * i;
+        size_t thisVert = vert + offset;
+        size_t lastVert = (vert == 0 ? poly.size() : vert) - 1 + offset;
+        if (i == nDivisions && isCone) {
+          triVerts.push_back(ivec3(nCrossSection * i + j,
+                                   lastVert - nCrossSection,
+                                   thisVert - nCrossSection));
+        } else {
+          vec2 pos = transform * poly[vert];
+          vertPos.push_back({pos.x, pos.y, height * alpha});
+          triVerts.push_back(
+              ivec3(thisVert, lastVert, thisVert - nCrossSection));
+          triVerts.push_back(ivec3(lastVert, lastVert - nCrossSection,
+                                   thisVert - nCrossSection));
+        }
+      }
+      ++j;
+      idx += poly.size();
+    }
+  }
+  if (isCone)
+    for (size_t j = 0; j < crossSection.size();
+         ++j)  // Duplicate vertex for Genus
+      vertPos.push_back({0.0, 0.0, height});
+  std::vector<ivec3> top = TriangulateIdx(polygonsIndexed);
+  for (const ivec3& tri : top) {
+    triVerts.push_back({tri[0], tri[2], tri[1]});
+    if (!isCone) triVerts.push_back(tri + nCrossSection * nDivisions);
+  }
+
+  pImpl_->CreateHalfedges(triVertsDH);
+  pImpl_->Finish();
+  pImpl_->InitializeOriginal();
+  pImpl_->CreateFaces();
+  return Manifold(pImpl_);
+}
+
+/**
+ * Constructs a manifold from a set of polygons by revolving this cross-section
+ * around its Y-axis and then setting this as the Z-axis of the resulting
+ * manifold. If the polygons cross the Y-axis, only the part on the positive X
+ * side is used. Geometrically valid input will result in geometrically valid
+ * output.
+ *
+ * @param crossSection A set of non-overlapping polygons to revolve.
+ * @param circularSegments Number of segments along its diameter. Default is
+ * calculated by the static Defaults.
+ * @param revolveDegrees Number of degrees to revolve. Default is 360 degrees.
+ */
+Manifold Manifold::Revolve(const Polygons& crossSection, int circularSegments,
+                           double revolveDegrees) {
+  ZoneScoped;
+
+  Polygons polygons;
+  double radius = 0;
+  for (const SimplePolygon& poly : crossSection) {
+    size_t i = 0;
+    while (i < poly.size() && poly[i].x < 0) {
+      ++i;
+    }
+    if (i == poly.size()) {
+      continue;
+    }
+    polygons.push_back({});
+    const size_t start = i;
+    do {
+      if (poly[i].x >= 0) {
+        polygons.back().push_back(poly[i]);
+        radius = std::max(radius, poly[i].x);
+      }
+      const size_t next = i + 1 == poly.size() ? 0 : i + 1;
+      if ((poly[next].x < 0) != (poly[i].x < 0)) {
+        const double y = poly[next].y + poly[next].x *
+                                            (poly[i].y - poly[next].y) /
+                                            (poly[i].x - poly[next].x);
+        polygons.back().push_back({0, y});
+      }
+      i = next;
+    } while (i != start);
+  }
+
+  if (polygons.empty()) {
+    return Invalid();
+  }
+
+  if (revolveDegrees > 360.0) {
+    revolveDegrees = 360.0;
+  }
+  const bool isFullRevolution = revolveDegrees == 360.0;
+
+  const int nDivisions =
+      circularSegments > 2
+          ? circularSegments
+          : Quality::GetCircularSegments(radius) * revolveDegrees / 360;
+
+  auto pImpl_ = std::make_shared<Impl>();
+  auto& vertPos = pImpl_->vertPos_;
+  Vec<ivec3> triVertsDH;
+  auto& triVerts = triVertsDH;
+
+  std::vector<int> startPoses;
+  std::vector<int> endPoses;
+
+  const double dPhi = revolveDegrees / nDivisions;
+  // first and last slice are distinguished if not a full revolution.
+  const int nSlices = isFullRevolution ? nDivisions : nDivisions + 1;
+
+  for (const auto& poly : polygons) {
+    std::size_t nPosVerts = 0;
+    std::size_t nRevolveAxisVerts = 0;
+    for (auto& pt : poly) {
+      if (pt.x > 0) {
+        nPosVerts++;
+      } else {
+        nRevolveAxisVerts++;
+      }
+    }
+
+    for (size_t polyVert = 0; polyVert < poly.size(); ++polyVert) {
+      const size_t startPosIndex = vertPos.size();
+
+      if (!isFullRevolution) startPoses.push_back(startPosIndex);
+
+      const vec2 currPolyVertex = poly[polyVert];
+      const vec2 prevPolyVertex =
+          poly[polyVert == 0 ? poly.size() - 1 : polyVert - 1];
+
+      const int prevStartPosIndex =
+          startPosIndex +
+          (polyVert == 0 ? nRevolveAxisVerts + (nSlices * nPosVerts) : 0) +
+          (prevPolyVertex.x == 0.0 ? -1 : -nSlices);
+
+      for (int slice = 0; slice < nSlices; ++slice) {
+        const double phi = slice * dPhi;
+        if (slice == 0 || currPolyVertex.x > 0) {
+          vertPos.push_back({currPolyVertex.x * cosd(phi),
+                             currPolyVertex.x * sind(phi), currPolyVertex.y});
+        }
+
+        if (isFullRevolution || slice > 0) {
+          const int lastSlice = (slice == 0 ? nDivisions : slice) - 1;
+          if (currPolyVertex.x > 0.0) {
+            triVerts.push_back(ivec3(
+                startPosIndex + slice, startPosIndex + lastSlice,
+                // "Reuse" vertex of first slice if it lies on the revolve axis
+                (prevPolyVertex.x == 0.0 ? prevStartPosIndex
+                                         : prevStartPosIndex + lastSlice)));
+          }
+
+          if (prevPolyVertex.x > 0.0) {
+            triVerts.push_back(
+                ivec3(prevStartPosIndex + lastSlice, prevStartPosIndex + slice,
+                      (currPolyVertex.x == 0.0 ? startPosIndex
+                                               : startPosIndex + slice)));
+          }
+        }
+      }
+      if (!isFullRevolution) endPoses.push_back(vertPos.size() - 1);
+    }
+  }
+
+  // Add front and back triangles if not a full revolution.
+  if (!isFullRevolution) {
+    std::vector<ivec3> frontTriangles = Triangulate(polygons, pImpl_->epsilon_);
+    for (auto& t : frontTriangles) {
+      triVerts.push_back({startPoses[t.x], startPoses[t.y], startPoses[t.z]});
+    }
+
+    for (auto& t : frontTriangles) {
+      triVerts.push_back({endPoses[t.z], endPoses[t.y], endPoses[t.x]});
+    }
+  }
+
+  pImpl_->CreateHalfedges(triVertsDH);
+  pImpl_->Finish();
+  pImpl_->InitializeOriginal();
+  pImpl_->CreateFaces();
+  return Manifold(pImpl_);
+}
+
+/**
+ * Constructs a new manifold from a vector of other manifolds. This is a purely
+ * topological operation, so care should be taken to avoid creating
+ * overlapping results. It is the inverse operation of Decompose().
+ *
+ * @param manifolds A vector of Manifolds to lazy-union together.
+ */
+Manifold Manifold::Compose(const std::vector<Manifold>& manifolds) {
+  std::vector<std::shared_ptr<CsgLeafNode>> children;
+  for (const auto& manifold : manifolds) {
+    children.push_back(manifold.pNode_->ToLeafNode());
+  }
+  return Manifold(std::make_shared<Impl>(CsgLeafNode::Compose(children)));
+}
+
+/**
+ * This operation returns a vector of Manifolds that are topologically
+ * disconnected. If everything is connected, the vector is length one,
+ * containing a copy of the original. It is the inverse operation of Compose().
+ */
+std::vector<Manifold> Manifold::Decompose() const {
+  ZoneScoped;
+  UnionFind<> uf(NumVert());
+  // Graph graph;
+  auto pImpl_ = GetCsgLeafNode().GetImpl();
+  for (const Halfedge& halfedge : pImpl_->halfedge_) {
+    if (halfedge.IsForward()) uf.unionXY(halfedge.startVert, halfedge.endVert);
+  }
+  std::vector<int> componentIndices;
+  const int numComponents = uf.connectedComponents(componentIndices);
+
+  if (numComponents == 1) {
+    std::vector<Manifold> meshes(1);
+    meshes[0] = *this;
+    return meshes;
+  }
+  Vec<int> vertLabel(componentIndices);
+
+  const int numVert = NumVert();
+  std::vector<Manifold> meshes;
+  for (int i = 0; i < numComponents; ++i) {
+    auto impl = std::make_shared<Impl>();
+    // inherit original object's precision
+    impl->epsilon_ = pImpl_->epsilon_;
+    impl->tolerance_ = pImpl_->tolerance_;
+
+    Vec<int> vertNew2Old(numVert);
+    const int nVert =
+        copy_if(countAt(0), countAt(numVert), vertNew2Old.begin(),
+                [i, &vertLabel](int v) { return vertLabel[v] == i; }) -
+        vertNew2Old.begin();
+    impl->vertPos_.resize(nVert);
+    vertNew2Old.resize(nVert);
+    gather(vertNew2Old.begin(), vertNew2Old.end(), pImpl_->vertPos_.begin(),
+           impl->vertPos_.begin());
+
+    Vec<int> faceNew2Old(NumTri());
+    const auto& halfedge = pImpl_->halfedge_;
+    const int nFace =
+        copy_if(countAt(0_uz), countAt(NumTri()), faceNew2Old.begin(),
+                [i, &vertLabel, &halfedge](int face) {
+                  return vertLabel[halfedge[3 * face].startVert] == i;
+                }) -
+        faceNew2Old.begin();
+
+    if (nFace == 0) continue;
+    faceNew2Old.resize(nFace);
+
+    impl->GatherFaces(*pImpl_, faceNew2Old);
+    impl->ReindexVerts(vertNew2Old, pImpl_->NumVert());
+    impl->Finish();
+
+    meshes.push_back(Manifold(impl));
+  }
+  return meshes;
+}
+}  // namespace manifold

+ 789 - 0
thirdparty/manifold/src/cross_section/cross_section.cpp

@@ -0,0 +1,789 @@
+// Copyright 2023 The Manifold Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "manifold/cross_section.h"
+
+#include "../utils.h"
+#include "clipper2/clipper.core.h"
+#include "clipper2/clipper.h"
+#include "clipper2/clipper.offset.h"
+
+namespace C2 = Clipper2Lib;
+
+using namespace manifold;
+
+namespace manifold {
+struct PathImpl {
+  PathImpl(const C2::PathsD paths_) : paths_(paths_) {}
+  operator const C2::PathsD&() const { return paths_; }
+  const C2::PathsD paths_;
+};
+}  // namespace manifold
+
+namespace {
+const int precision_ = 8;
+
+C2::ClipType cliptype_of_op(OpType op) {
+  C2::ClipType ct = C2::ClipType::Union;
+  switch (op) {
+    case OpType::Add:
+      break;
+    case OpType::Subtract:
+      ct = C2::ClipType::Difference;
+      break;
+    case OpType::Intersect:
+      ct = C2::ClipType::Intersection;
+      break;
+  };
+  return ct;
+}
+
+C2::FillRule fr(CrossSection::FillRule fillrule) {
+  C2::FillRule fr = C2::FillRule::EvenOdd;
+  switch (fillrule) {
+    case CrossSection::FillRule::EvenOdd:
+      break;
+    case CrossSection::FillRule::NonZero:
+      fr = C2::FillRule::NonZero;
+      break;
+    case CrossSection::FillRule::Positive:
+      fr = C2::FillRule::Positive;
+      break;
+    case CrossSection::FillRule::Negative:
+      fr = C2::FillRule::Negative;
+      break;
+  };
+  return fr;
+}
+
+C2::JoinType jt(CrossSection::JoinType jointype) {
+  C2::JoinType jt = C2::JoinType::Square;
+  switch (jointype) {
+    case CrossSection::JoinType::Square:
+      break;
+    case CrossSection::JoinType::Round:
+      jt = C2::JoinType::Round;
+      break;
+    case CrossSection::JoinType::Miter:
+      jt = C2::JoinType::Miter;
+      break;
+  };
+  return jt;
+}
+
+vec2 v2_of_pd(const C2::PointD p) { return {p.x, p.y}; }
+
+C2::PointD v2_to_pd(const vec2 v) { return C2::PointD(v.x, v.y); }
+
+C2::PathD pathd_of_contour(const SimplePolygon& ctr) {
+  auto p = C2::PathD();
+  p.reserve(ctr.size());
+  for (auto v : ctr) {
+    p.push_back(v2_to_pd(v));
+  }
+  return p;
+}
+
+C2::PathsD transform(const C2::PathsD ps, const mat2x3 m) {
+  const bool invert = la::determinant(mat2(m)) < 0;
+  auto transformed = C2::PathsD();
+  transformed.reserve(ps.size());
+  for (auto path : ps) {
+    auto sz = path.size();
+    auto s = C2::PathD(sz);
+    for (size_t i = 0; i < sz; ++i) {
+      auto idx = invert ? sz - 1 - i : i;
+      s[idx] = v2_to_pd(m * vec3(path[i].x, path[i].y, 1));
+    }
+    transformed.push_back(s);
+  }
+  return transformed;
+}
+
+std::shared_ptr<const PathImpl> shared_paths(const C2::PathsD& ps) {
+  return std::make_shared<const PathImpl>(ps);
+}
+
+// forward declaration for mutual recursion
+void decompose_hole(const C2::PolyTreeD* outline,
+                    std::vector<C2::PathsD>& polys, C2::PathsD& poly,
+                    size_t n_holes, size_t j);
+
+void decompose_outline(const C2::PolyTreeD* tree,
+                       std::vector<C2::PathsD>& polys, size_t i) {
+  auto n_outlines = tree->Count();
+  if (i < n_outlines) {
+    auto outline = tree->Child(i);
+    auto n_holes = outline->Count();
+    auto poly = C2::PathsD(n_holes + 1);
+    poly[0] = outline->Polygon();
+    decompose_hole(outline, polys, poly, n_holes, 0);
+    polys.push_back(poly);
+    if (i < n_outlines - 1) {
+      decompose_outline(tree, polys, i + 1);
+    }
+  }
+}
+
+void decompose_hole(const C2::PolyTreeD* outline,
+                    std::vector<C2::PathsD>& polys, C2::PathsD& poly,
+                    size_t n_holes, size_t j) {
+  if (j < n_holes) {
+    auto child = outline->Child(j);
+    decompose_outline(child, polys, 0);
+    poly[j + 1] = child->Polygon();
+    decompose_hole(outline, polys, poly, n_holes, j + 1);
+  }
+}
+
+void flatten(const C2::PolyTreeD* tree, C2::PathsD& polys, size_t i) {
+  auto n_outlines = tree->Count();
+  if (i < n_outlines) {
+    auto outline = tree->Child(i);
+    flatten(outline, polys, 0);
+    polys.push_back(outline->Polygon());
+    if (i < n_outlines - 1) {
+      flatten(tree, polys, i + 1);
+    }
+  }
+}
+
+bool V2Lesser(vec2 a, vec2 b) {
+  if (a.x == b.x) return a.y < b.y;
+  return a.x < b.x;
+}
+
+void HullBacktrack(const vec2& pt, std::vector<vec2>& stack) {
+  auto sz = stack.size();
+  while (sz >= 2 && CCW(stack[sz - 2], stack[sz - 1], pt, 0.0) <= 0.0) {
+    stack.pop_back();
+    sz = stack.size();
+  }
+}
+
+// Based on method described here:
+// https://www.hackerearth.com/practice/math/geometry/line-sweep-technique/tutorial/
+// Changed to follow:
+// https://en.wikibooks.org/wiki/Algorithm_Implementation/Geometry/Convex_hull/Monotone_chain
+// This is the same algorithm (Andrew, also called Montone Chain).
+C2::PathD HullImpl(SimplePolygon& pts) {
+  size_t len = pts.size();
+  if (len < 3) return C2::PathD();  // not enough points to create a polygon
+  std::sort(pts.begin(), pts.end(), V2Lesser);
+
+  auto lower = std::vector<vec2>{};
+  for (auto& pt : pts) {
+    HullBacktrack(pt, lower);
+    lower.push_back(pt);
+  }
+  auto upper = std::vector<vec2>{};
+  for (auto pt_iter = pts.rbegin(); pt_iter != pts.rend(); pt_iter++) {
+    HullBacktrack(*pt_iter, upper);
+    upper.push_back(*pt_iter);
+  }
+
+  upper.pop_back();
+  lower.pop_back();
+
+  auto path = C2::PathD();
+  path.reserve(lower.size() + upper.size());
+  for (const auto& l : lower) path.push_back(v2_to_pd(l));
+  for (const auto& u : upper) path.push_back(v2_to_pd(u));
+  return path;
+}
+}  // namespace
+
+namespace manifold {
+
+/**
+ * The default constructor is an empty cross-section (containing no contours).
+ */
+CrossSection::CrossSection() {
+  paths_ = std::make_shared<const PathImpl>(C2::PathsD());
+}
+
+CrossSection::~CrossSection() = default;
+CrossSection::CrossSection(CrossSection&&) noexcept = default;
+CrossSection& CrossSection::operator=(CrossSection&&) noexcept = default;
+
+/**
+ * The copy constructor avoids copying the underlying paths vector (sharing
+ * with its parent via shared_ptr), however subsequent transformations, and
+ * their application will not be shared. It is generally recommended to avoid
+ * this, opting instead to simply create CrossSections with the available
+ * const methods.
+ */
+CrossSection::CrossSection(const CrossSection& other) {
+  paths_ = other.paths_;
+  transform_ = other.transform_;
+}
+
+CrossSection& CrossSection::operator=(const CrossSection& other) {
+  if (this != &other) {
+    paths_ = other.paths_;
+    transform_ = other.transform_;
+  }
+  return *this;
+};
+
+// Private, skips unioning.
+CrossSection::CrossSection(std::shared_ptr<const PathImpl> ps) { paths_ = ps; }
+
+/**
+ * Create a 2d cross-section from a single contour. A boolean union operation
+ * (with Positive filling rule by default) is performed to ensure the
+ * resulting CrossSection is free of self-intersections.
+ *
+ * @param contour A closed path outlining the desired cross-section.
+ * @param fillrule The filling rule used to interpret polygon sub-regions
+ * created by self-intersections in contour.
+ */
+CrossSection::CrossSection(const SimplePolygon& contour, FillRule fillrule) {
+  auto ps = C2::PathsD{(pathd_of_contour(contour))};
+  paths_ = shared_paths(C2::Union(ps, fr(fillrule), precision_));
+}
+
+/**
+ * Create a 2d cross-section from a set of contours (complex polygons). A
+ * boolean union operation (with Positive filling rule by default) is
+ * performed to combine overlapping polygons and ensure the resulting
+ * CrossSection is free of intersections.
+ *
+ * @param contours A set of closed paths describing zero or more complex
+ * polygons.
+ * @param fillrule The filling rule used to interpret polygon sub-regions in
+ * contours.
+ */
+CrossSection::CrossSection(const Polygons& contours, FillRule fillrule) {
+  auto ps = C2::PathsD();
+  ps.reserve(contours.size());
+  for (auto ctr : contours) {
+    ps.push_back(pathd_of_contour(ctr));
+  }
+  paths_ = shared_paths(C2::Union(ps, fr(fillrule), precision_));
+}
+
+/**
+ * Create a 2d cross-section from an axis-aligned rectangle (bounding box).
+ *
+ * @param rect An axis-aligned rectangular bounding box.
+ */
+CrossSection::CrossSection(const Rect& rect) {
+  C2::PathD p(4);
+  p[0] = C2::PointD(rect.min.x, rect.min.y);
+  p[1] = C2::PointD(rect.max.x, rect.min.y);
+  p[2] = C2::PointD(rect.max.x, rect.max.y);
+  p[3] = C2::PointD(rect.min.x, rect.max.y);
+  paths_ = shared_paths(C2::PathsD{p});
+}
+
+// Private
+// All access to paths_ should be done through the GetPaths() method, which
+// applies the accumulated transform_
+std::shared_ptr<const PathImpl> CrossSection::GetPaths() const {
+  if (transform_ == mat2x3(la::identity)) {
+    return paths_;
+  }
+  paths_ = shared_paths(::transform(paths_->paths_, transform_));
+  transform_ = mat2x3(la::identity);
+  return paths_;
+}
+
+/**
+ * Constructs a square with the given XY dimensions. By default it is
+ * positioned in the first quadrant, touching the origin. If any dimensions in
+ * size are negative, or if all are zero, an empty Manifold will be returned.
+ *
+ * @param size The X, and Y dimensions of the square.
+ * @param center Set to true to shift the center to the origin.
+ */
+CrossSection CrossSection::Square(const vec2 size, bool center) {
+  if (size.x < 0.0 || size.y < 0.0 || la::length(size) == 0.0) {
+    return CrossSection();
+  }
+
+  auto p = C2::PathD(4);
+  if (center) {
+    const auto w = size.x / 2;
+    const auto h = size.y / 2;
+    p[0] = C2::PointD(w, h);
+    p[1] = C2::PointD(-w, h);
+    p[2] = C2::PointD(-w, -h);
+    p[3] = C2::PointD(w, -h);
+  } else {
+    const double x = size.x;
+    const double y = size.y;
+    p[0] = C2::PointD(0.0, 0.0);
+    p[1] = C2::PointD(x, 0.0);
+    p[2] = C2::PointD(x, y);
+    p[3] = C2::PointD(0.0, y);
+  }
+  return CrossSection(shared_paths(C2::PathsD{p}));
+}
+
+/**
+ * Constructs a circle of a given radius.
+ *
+ * @param radius Radius of the circle. Must be positive.
+ * @param circularSegments Number of segments along its diameter. Default is
+ * calculated by the static Quality defaults according to the radius.
+ */
+CrossSection CrossSection::Circle(double radius, int circularSegments) {
+  if (radius <= 0.0) {
+    return CrossSection();
+  }
+  int n = circularSegments > 2 ? circularSegments
+                               : Quality::GetCircularSegments(radius);
+  double dPhi = 360.0 / n;
+  auto circle = C2::PathD(n);
+  for (int i = 0; i < n; ++i) {
+    circle[i] = C2::PointD(radius * cosd(dPhi * i), radius * sind(dPhi * i));
+  }
+  return CrossSection(shared_paths(C2::PathsD{circle}));
+}
+
+/**
+ * Perform the given boolean operation between this and another CrossSection.
+ */
+CrossSection CrossSection::Boolean(const CrossSection& second,
+                                   OpType op) const {
+  auto ct = cliptype_of_op(op);
+  auto res = C2::BooleanOp(ct, C2::FillRule::Positive, GetPaths()->paths_,
+                           second.GetPaths()->paths_, precision_);
+  return CrossSection(shared_paths(res));
+}
+
+/**
+ * Perform the given boolean operation on a list of CrossSections. In case of
+ * Subtract, all CrossSections in the tail are differenced from the head.
+ */
+CrossSection CrossSection::BatchBoolean(
+    const std::vector<CrossSection>& crossSections, OpType op) {
+  if (crossSections.size() == 0)
+    return CrossSection();
+  else if (crossSections.size() == 1)
+    return crossSections[0];
+
+  auto subjs = crossSections[0].GetPaths();
+  int n_clips = 0;
+  for (size_t i = 1; i < crossSections.size(); ++i) {
+    n_clips += crossSections[i].GetPaths()->paths_.size();
+  }
+  auto clips = C2::PathsD();
+  clips.reserve(n_clips);
+  for (size_t i = 1; i < crossSections.size(); ++i) {
+    auto ps = crossSections[i].GetPaths();
+    clips.insert(clips.end(), ps->paths_.begin(), ps->paths_.end());
+  }
+
+  auto ct = cliptype_of_op(op);
+  auto res = C2::BooleanOp(ct, C2::FillRule::Positive, subjs->paths_, clips,
+                           precision_);
+  return CrossSection(shared_paths(res));
+}
+
+/**
+ * Compute the boolean union between two cross-sections.
+ */
+CrossSection CrossSection::operator+(const CrossSection& Q) const {
+  return Boolean(Q, OpType::Add);
+}
+
+/**
+ * Compute the boolean union between two cross-sections, assigning the result
+ * to the first.
+ */
+CrossSection& CrossSection::operator+=(const CrossSection& Q) {
+  *this = *this + Q;
+  return *this;
+}
+
+/**
+ * Compute the boolean difference of a (clip) cross-section from another
+ * (subject).
+ */
+CrossSection CrossSection::operator-(const CrossSection& Q) const {
+  return Boolean(Q, OpType::Subtract);
+}
+
+/**
+ * Compute the boolean difference of a (clip) cross-section from a another
+ * (subject), assigning the result to the subject.
+ */
+CrossSection& CrossSection::operator-=(const CrossSection& Q) {
+  *this = *this - Q;
+  return *this;
+}
+
+/**
+ * Compute the boolean intersection between two cross-sections.
+ */
+CrossSection CrossSection::operator^(const CrossSection& Q) const {
+  return Boolean(Q, OpType::Intersect);
+}
+
+/**
+ * Compute the boolean intersection between two cross-sections, assigning the
+ * result to the first.
+ */
+CrossSection& CrossSection::operator^=(const CrossSection& Q) {
+  *this = *this ^ Q;
+  return *this;
+}
+
+/**
+ * Construct a CrossSection from a vector of other CrossSections (batch
+ * boolean union).
+ */
+CrossSection CrossSection::Compose(std::vector<CrossSection>& crossSections) {
+  return BatchBoolean(crossSections, OpType::Add);
+}
+
+/**
+ * This operation returns a vector of CrossSections that are topologically
+ * disconnected, each containing one outline contour with zero or more
+ * holes.
+ */
+std::vector<CrossSection> CrossSection::Decompose() const {
+  if (NumContour() < 2) {
+    return std::vector<CrossSection>{CrossSection(*this)};
+  }
+
+  C2::PolyTreeD tree;
+  C2::BooleanOp(C2::ClipType::Union, C2::FillRule::Positive, GetPaths()->paths_,
+                C2::PathsD(), tree, precision_);
+
+  auto polys = std::vector<C2::PathsD>();
+  decompose_outline(&tree, polys, 0);
+
+  auto comps = std::vector<CrossSection>();
+  comps.reserve(polys.size());
+  // reverse the stack while wrapping
+  for (auto poly = polys.rbegin(); poly != polys.rend(); ++poly)
+    comps.emplace_back(CrossSection(shared_paths(*poly)));
+
+  return comps;
+}
+
+/**
+ * Move this CrossSection in space. This operation can be chained. Transforms
+ * are combined and applied lazily.
+ *
+ * @param v The vector to add to every vertex.
+ */
+CrossSection CrossSection::Translate(const vec2 v) const {
+  mat2x3 m({1.0, 0.0},  //
+           {0.0, 1.0},  //
+           {v.x, v.y});
+  return Transform(m);
+}
+
+/**
+ * Applies a (Z-axis) rotation to the CrossSection, in degrees. This operation
+ * can be chained. Transforms are combined and applied lazily.
+ *
+ * @param degrees degrees about the Z-axis to rotate.
+ */
+CrossSection CrossSection::Rotate(double degrees) const {
+  auto s = sind(degrees);
+  auto c = cosd(degrees);
+  mat2x3 m({c, s},   //
+           {-s, c},  //
+           {0.0, 0.0});
+  return Transform(m);
+}
+
+/**
+ * Scale this CrossSection in space. This operation can be chained. Transforms
+ * are combined and applied lazily.
+ *
+ * @param scale The vector to multiply every vertex by per component.
+ */
+CrossSection CrossSection::Scale(const vec2 scale) const {
+  mat2x3 m({scale.x, 0.0},  //
+           {0.0, scale.y},  //
+           {0.0, 0.0});
+  return Transform(m);
+}
+
+/**
+ * Mirror this CrossSection over the arbitrary axis described by the unit form
+ * of the given vector. If the length of the vector is zero, an empty
+ * CrossSection is returned. This operation can be chained. Transforms are
+ * combined and applied lazily.
+ *
+ * @param ax the axis to be mirrored over
+ */
+CrossSection CrossSection::Mirror(const vec2 ax) const {
+  if (la::length(ax) == 0.) {
+    return CrossSection();
+  }
+  auto n = la::normalize(la::abs(ax));
+  auto m = mat2x3(mat2(la::identity) - 2.0 * la::outerprod(n, n), vec2(0.0));
+  return Transform(m);
+}
+
+/**
+ * Transform this CrossSection in space. The first two columns form a 2x2
+ * matrix transform and the last is a translation vector. This operation can
+ * be chained. Transforms are combined and applied lazily.
+ *
+ * @param m The affine transform matrix to apply to all the vertices.
+ */
+CrossSection CrossSection::Transform(const mat2x3& m) const {
+  auto transformed = CrossSection();
+  transformed.transform_ = m * Mat3(transform_);
+  transformed.paths_ = paths_;
+  return transformed;
+}
+
+/**
+ * Move the vertices of this CrossSection (creating a new one) according to
+ * any arbitrary input function, followed by a union operation (with a
+ * Positive fill rule) that ensures any introduced intersections are not
+ * included in the result.
+ *
+ * @param warpFunc A function that modifies a given vertex position.
+ */
+CrossSection CrossSection::Warp(std::function<void(vec2&)> warpFunc) const {
+  return WarpBatch([&warpFunc](VecView<vec2> vecs) {
+    for (vec2& p : vecs) {
+      warpFunc(p);
+    }
+  });
+}
+
+/**
+ * Same as CrossSection::Warp but calls warpFunc with
+ * a VecView which is roughly equivalent to std::span
+ * pointing to all vec2 elements to be modified in-place
+ *
+ * @param warpFunc A function that modifies multiple vertex positions.
+ */
+CrossSection CrossSection::WarpBatch(
+    std::function<void(VecView<vec2>)> warpFunc) const {
+  std::vector<vec2> tmp_verts;
+  C2::PathsD paths = GetPaths()->paths_;  // deep copy
+  for (C2::PathD const& path : paths) {
+    for (C2::PointD const& p : path) {
+      tmp_verts.push_back(v2_of_pd(p));
+    }
+  }
+
+  warpFunc(VecView<vec2>(tmp_verts.data(), tmp_verts.size()));
+
+  auto cursor = tmp_verts.begin();
+  for (C2::PathD& path : paths) {
+    for (C2::PointD& p : path) {
+      p = v2_to_pd(*cursor);
+      ++cursor;
+    }
+  }
+
+  return CrossSection(
+      shared_paths(C2::Union(paths, C2::FillRule::Positive, precision_)));
+}
+
+/**
+ * Remove vertices from the contours in this CrossSection that are less than
+ * the specified distance epsilon from an imaginary line that passes through
+ * its two adjacent vertices. Near duplicate vertices and collinear points
+ * will be removed at lower epsilons, with elimination of line segments
+ * becoming increasingly aggressive with larger epsilons.
+ *
+ * It is recommended to apply this function following Offset, in order to
+ * clean up any spurious tiny line segments introduced that do not improve
+ * quality in any meaningful way. This is particularly important if further
+ * offseting operations are to be performed, which would compound the issue.
+ */
+CrossSection CrossSection::Simplify(double epsilon) const {
+  C2::PolyTreeD tree;
+  C2::BooleanOp(C2::ClipType::Union, C2::FillRule::Positive, GetPaths()->paths_,
+                C2::PathsD(), tree, precision_);
+
+  C2::PathsD polys;
+  flatten(&tree, polys, 0);
+
+  // Filter out contours less than epsilon wide.
+  C2::PathsD filtered;
+  for (C2::PathD poly : polys) {
+    auto area = C2::Area(poly);
+    Rect box;
+    for (auto vert : poly) {
+      box.Union(vec2(vert.x, vert.y));
+    }
+    vec2 size = box.Size();
+    if (std::abs(area) > std::max(size.x, size.y) * epsilon) {
+      filtered.push_back(poly);
+    }
+  }
+
+  auto ps = SimplifyPaths(filtered, epsilon, true);
+  return CrossSection(shared_paths(ps));
+}
+
+/**
+ * Inflate the contours in CrossSection by the specified delta, handling
+ * corners according to the given JoinType.
+ *
+ * @param delta Positive deltas will cause the expansion of outlining contours
+ * to expand, and retraction of inner (hole) contours. Negative deltas will
+ * have the opposite effect.
+ * @param jointype The join type specifying the treatment of contour joins
+ * (corners).
+ * @param miter_limit The maximum distance in multiples of delta that vertices
+ * can be offset from their original positions with before squaring is
+ * applied, <B>when the join type is Miter</B> (default is 2, which is the
+ * minimum allowed). See the [Clipper2
+ * MiterLimit](http://www.angusj.com/clipper2/Docs/Units/Clipper.Offset/Classes/ClipperOffset/Properties/MiterLimit.htm)
+ * page for a visual example.
+ * @param circularSegments Number of segments per 360 degrees of
+ * <B>JoinType::Round</B> corners (roughly, the number of vertices that
+ * will be added to each contour). Default is calculated by the static Quality
+ * defaults according to the radius.
+ */
+CrossSection CrossSection::Offset(double delta, JoinType jointype,
+                                  double miter_limit,
+                                  int circularSegments) const {
+  double arc_tol = 0.;
+  if (jointype == JoinType::Round) {
+    int n = circularSegments > 2 ? circularSegments
+                                 : Quality::GetCircularSegments(delta);
+    // This calculates tolerance as a function of circular segments and delta
+    // (radius) in order to get back the same number of segments in Clipper2:
+    // steps_per_360 = PI / acos(1 - arc_tol / abs_delta)
+    const double abs_delta = std::fabs(delta);
+    const double scaled_delta = abs_delta * std::pow(10, precision_);
+    arc_tol = (std::cos(Clipper2Lib::PI / n) - 1) * -scaled_delta;
+  }
+  auto ps =
+      C2::InflatePaths(GetPaths()->paths_, delta, jt(jointype),
+                       C2::EndType::Polygon, miter_limit, precision_, arc_tol);
+  return CrossSection(shared_paths(ps));
+}
+
+/**
+ * Compute the convex hull enveloping a set of cross-sections.
+ *
+ * @param crossSections A vector of cross-sections over which to compute a
+ * convex hull.
+ */
+CrossSection CrossSection::Hull(
+    const std::vector<CrossSection>& crossSections) {
+  int n = 0;
+  for (auto cs : crossSections) n += cs.NumVert();
+  SimplePolygon pts;
+  pts.reserve(n);
+  for (auto cs : crossSections) {
+    auto paths = cs.GetPaths()->paths_;
+    for (auto path : paths) {
+      for (auto p : path) {
+        pts.push_back(v2_of_pd(p));
+      }
+    }
+  }
+  return CrossSection(shared_paths(C2::PathsD{HullImpl(pts)}));
+}
+
+/**
+ * Compute the convex hull of this cross-section.
+ */
+CrossSection CrossSection::Hull() const {
+  return Hull(std::vector<CrossSection>{*this});
+}
+
+/**
+ * Compute the convex hull of a set of points. If the given points are fewer
+ * than 3, an empty CrossSection will be returned.
+ *
+ * @param pts A vector of 2-dimensional points over which to compute a convex
+ * hull.
+ */
+CrossSection CrossSection::Hull(SimplePolygon pts) {
+  return CrossSection(shared_paths(C2::PathsD{HullImpl(pts)}));
+}
+
+/**
+ * Compute the convex hull of a set of points/polygons. If the given points are
+ * fewer than 3, an empty CrossSection will be returned.
+ *
+ * @param polys A vector of vectors of 2-dimensional points over which to
+ * compute a convex hull.
+ */
+CrossSection CrossSection::Hull(const Polygons polys) {
+  SimplePolygon pts;
+  for (auto poly : polys) {
+    for (auto p : poly) {
+      pts.push_back(p);
+    }
+  }
+  return Hull(pts);
+}
+
+/**
+ * Return the total area covered by complex polygons making up the
+ * CrossSection.
+ */
+double CrossSection::Area() const { return C2::Area(GetPaths()->paths_); }
+
+/**
+ * Return the number of vertices in the CrossSection.
+ */
+int CrossSection::NumVert() const {
+  int n = 0;
+  auto paths = GetPaths()->paths_;
+  for (auto p : paths) {
+    n += p.size();
+  }
+  return n;
+}
+
+/**
+ * Return the number of contours (both outer and inner paths) in the
+ * CrossSection.
+ */
+int CrossSection::NumContour() const { return GetPaths()->paths_.size(); }
+
+/**
+ * Does the CrossSection contain any contours?
+ */
+bool CrossSection::IsEmpty() const { return GetPaths()->paths_.empty(); }
+
+/**
+ * Returns the axis-aligned bounding rectangle of all the CrossSections'
+ * vertices.
+ */
+Rect CrossSection::Bounds() const {
+  auto r = C2::GetBounds(GetPaths()->paths_);
+  return Rect({r.left, r.bottom}, {r.right, r.top});
+}
+
+/**
+ * Return the contours of this CrossSection as a Polygons.
+ */
+Polygons CrossSection::ToPolygons() const {
+  auto polys = Polygons();
+  auto paths = GetPaths()->paths_;
+  polys.reserve(paths.size());
+  for (auto p : paths) {
+    auto sp = SimplePolygon();
+    sp.reserve(p.size());
+    for (auto v : p) {
+      sp.push_back({v.x, v.y});
+    }
+    polys.push_back(sp);
+  }
+  return polys;
+}
+}  // namespace manifold

+ 643 - 0
thirdparty/manifold/src/csg_tree.cpp

@@ -0,0 +1,643 @@
+// Copyright 2022 The Manifold Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#if (MANIFOLD_PAR == 1) && __has_include(<tbb/concurrent_priority_queue.h>)
+#include <tbb/tbb.h>
+#define TBB_PREVIEW_CONCURRENT_ORDERED_CONTAINERS 1
+#include <tbb/concurrent_priority_queue.h>
+#endif
+
+#include <algorithm>
+#include <variant>
+
+#include "./boolean3.h"
+#include "./csg_tree.h"
+#include "./impl.h"
+#include "./mesh_fixes.h"
+#include "./parallel.h"
+
+constexpr int kParallelThreshold = 4096;
+
+namespace {
+using namespace manifold;
+struct Transform4x3 {
+  mat3x4 transform;
+
+  vec3 operator()(vec3 position) const {
+    return transform * vec4(position, 1.0);
+  }
+};
+
+struct UpdateHalfedge {
+  const int nextVert;
+  const int nextEdge;
+  const int nextFace;
+
+  Halfedge operator()(Halfedge edge) {
+    edge.startVert += nextVert;
+    edge.endVert += nextVert;
+    edge.pairedHalfedge += nextEdge;
+    return edge;
+  }
+};
+
+struct UpdateTriProp {
+  const int nextProp;
+
+  ivec3 operator()(ivec3 tri) {
+    tri += nextProp;
+    return tri;
+  }
+};
+
+struct UpdateMeshIDs {
+  const int offset;
+
+  TriRef operator()(TriRef ref) {
+    ref.meshID += offset;
+    return ref;
+  }
+};
+
+struct CheckOverlap {
+  VecView<const Box> boxes;
+  const size_t i;
+  bool operator()(size_t j) { return boxes[i].DoesOverlap(boxes[j]); }
+};
+
+using SharedImpl = std::variant<std::shared_ptr<const Manifold::Impl>,
+                                std::shared_ptr<Manifold::Impl>>;
+struct GetImplPtr {
+  const Manifold::Impl *operator()(const SharedImpl &p) {
+    if (std::holds_alternative<std::shared_ptr<const Manifold::Impl>>(p)) {
+      return std::get_if<std::shared_ptr<const Manifold::Impl>>(&p)->get();
+    } else {
+      return std::get_if<std::shared_ptr<Manifold::Impl>>(&p)->get();
+    }
+  };
+};
+
+struct MeshCompare {
+  bool operator()(const SharedImpl &a, const SharedImpl &b) {
+    return GetImplPtr()(a)->NumVert() < GetImplPtr()(b)->NumVert();
+  }
+};
+
+}  // namespace
+namespace manifold {
+
+std::shared_ptr<CsgNode> CsgNode::Boolean(
+    const std::shared_ptr<CsgNode> &second, OpType op) {
+  if (auto opNode = std::dynamic_pointer_cast<CsgOpNode>(second)) {
+    // "this" is not a CsgOpNode (which overrides Boolean), but if "second" is
+    // and the operation is commutative, we let it built the tree.
+    if ((op == OpType::Add || op == OpType::Intersect)) {
+      return opNode->Boolean(shared_from_this(), op);
+    }
+  }
+  std::vector<std::shared_ptr<CsgNode>> children({shared_from_this(), second});
+  return std::make_shared<CsgOpNode>(children, op);
+}
+
+std::shared_ptr<CsgNode> CsgNode::Translate(const vec3 &t) const {
+  mat3x4 transform = la::identity;
+  transform[3] += t;
+  return Transform(transform);
+}
+
+std::shared_ptr<CsgNode> CsgNode::Scale(const vec3 &v) const {
+  mat3x4 transform;
+  for (int i : {0, 1, 2}) transform[i][i] = v[i];
+  return Transform(transform);
+}
+
+std::shared_ptr<CsgNode> CsgNode::Rotate(double xDegrees, double yDegrees,
+                                         double zDegrees) const {
+  mat3 rX({1.0, 0.0, 0.0},                        //
+          {0.0, cosd(xDegrees), sind(xDegrees)},  //
+          {0.0, -sind(xDegrees), cosd(xDegrees)});
+  mat3 rY({cosd(yDegrees), 0.0, -sind(yDegrees)},  //
+          {0.0, 1.0, 0.0},                         //
+          {sind(yDegrees), 0.0, cosd(yDegrees)});
+  mat3 rZ({cosd(zDegrees), sind(zDegrees), 0.0},   //
+          {-sind(zDegrees), cosd(zDegrees), 0.0},  //
+          {0.0, 0.0, 1.0});
+  mat3x4 transform(rZ * rY * rX, vec3());
+  return Transform(transform);
+}
+
+CsgLeafNode::CsgLeafNode() : pImpl_(std::make_shared<Manifold::Impl>()) {}
+
+CsgLeafNode::CsgLeafNode(std::shared_ptr<const Manifold::Impl> pImpl_)
+    : pImpl_(pImpl_) {}
+
+CsgLeafNode::CsgLeafNode(std::shared_ptr<const Manifold::Impl> pImpl_,
+                         mat3x4 transform_)
+    : pImpl_(pImpl_), transform_(transform_) {}
+
+std::shared_ptr<const Manifold::Impl> CsgLeafNode::GetImpl() const {
+  if (transform_ == mat3x4(la::identity)) return pImpl_;
+  pImpl_ =
+      std::make_shared<const Manifold::Impl>(pImpl_->Transform(transform_));
+  transform_ = la::identity;
+  return pImpl_;
+}
+
+mat3x4 CsgLeafNode::GetTransform() const { return transform_; }
+
+std::shared_ptr<CsgLeafNode> CsgLeafNode::ToLeafNode() const {
+  return std::make_shared<CsgLeafNode>(*this);
+}
+
+std::shared_ptr<CsgNode> CsgLeafNode::Transform(const mat3x4 &m) const {
+  return std::make_shared<CsgLeafNode>(pImpl_, m * Mat4(transform_));
+}
+
+CsgNodeType CsgLeafNode::GetNodeType() const { return CsgNodeType::Leaf; }
+
+/**
+ * Efficient union of a set of pairwise disjoint meshes.
+ */
+Manifold::Impl CsgLeafNode::Compose(
+    const std::vector<std::shared_ptr<CsgLeafNode>> &nodes) {
+  ZoneScoped;
+  double epsilon = -1;
+  double tolerance = -1;
+  int numVert = 0;
+  int numEdge = 0;
+  int numTri = 0;
+  int numPropVert = 0;
+  std::vector<int> vertIndices;
+  std::vector<int> edgeIndices;
+  std::vector<int> triIndices;
+  std::vector<int> propVertIndices;
+  int numPropOut = 0;
+  for (auto &node : nodes) {
+    if (node->pImpl_->status_ != Manifold::Error::NoError) {
+      Manifold::Impl impl;
+      impl.status_ = node->pImpl_->status_;
+      return impl;
+    }
+    double nodeOldScale = node->pImpl_->bBox_.Scale();
+    double nodeNewScale =
+        node->pImpl_->bBox_.Transform(node->transform_).Scale();
+    double nodeEpsilon = node->pImpl_->epsilon_;
+    nodeEpsilon *= std::max(1.0, nodeNewScale / nodeOldScale);
+    nodeEpsilon = std::max(nodeEpsilon, kPrecision * nodeNewScale);
+    if (!std::isfinite(nodeEpsilon)) nodeEpsilon = -1;
+    epsilon = std::max(epsilon, nodeEpsilon);
+    tolerance = std::max(tolerance, node->pImpl_->tolerance_);
+
+    vertIndices.push_back(numVert);
+    edgeIndices.push_back(numEdge * 2);
+    triIndices.push_back(numTri);
+    propVertIndices.push_back(numPropVert);
+    numVert += node->pImpl_->NumVert();
+    numEdge += node->pImpl_->NumEdge();
+    numTri += node->pImpl_->NumTri();
+    const int numProp = node->pImpl_->NumProp();
+    numPropOut = std::max(numPropOut, numProp);
+    numPropVert +=
+        numProp == 0 ? 1
+                     : node->pImpl_->meshRelation_.properties.size() / numProp;
+  }
+
+  Manifold::Impl combined;
+  combined.epsilon_ = epsilon;
+  combined.tolerance_ = tolerance;
+  combined.vertPos_.resize(numVert);
+  combined.halfedge_.resize(2 * numEdge);
+  combined.faceNormal_.resize(numTri);
+  combined.halfedgeTangent_.resize(2 * numEdge);
+  combined.meshRelation_.triRef.resize(numTri);
+  if (numPropOut > 0) {
+    combined.meshRelation_.numProp = numPropOut;
+    combined.meshRelation_.properties.resize(numPropOut * numPropVert, 0);
+    combined.meshRelation_.triProperties.resize(numTri);
+  }
+  auto policy = autoPolicy(numTri);
+
+  // if we are already parallelizing for each node, do not perform multithreaded
+  // copying as it will slightly hurt performance
+  if (nodes.size() > 1 && policy == ExecutionPolicy::Par)
+    policy = ExecutionPolicy::Seq;
+
+  for_each_n(
+      nodes.size() > 1 ? ExecutionPolicy::Par : ExecutionPolicy::Seq,
+      countAt(0), nodes.size(),
+      [&nodes, &vertIndices, &edgeIndices, &triIndices, &propVertIndices,
+       numPropOut, &combined, policy](int i) {
+        auto &node = nodes[i];
+        copy(node->pImpl_->halfedgeTangent_.begin(),
+             node->pImpl_->halfedgeTangent_.end(),
+             combined.halfedgeTangent_.begin() + edgeIndices[i]);
+        transform(
+            node->pImpl_->halfedge_.begin(), node->pImpl_->halfedge_.end(),
+            combined.halfedge_.begin() + edgeIndices[i],
+            UpdateHalfedge({vertIndices[i], edgeIndices[i], triIndices[i]}));
+
+        if (numPropOut > 0) {
+          auto start =
+              combined.meshRelation_.triProperties.begin() + triIndices[i];
+          if (node->pImpl_->NumProp() > 0) {
+            auto &triProp = node->pImpl_->meshRelation_.triProperties;
+            transform(triProp.begin(), triProp.end(), start,
+                      UpdateTriProp({propVertIndices[i]}));
+
+            const int numProp = node->pImpl_->NumProp();
+            auto &oldProp = node->pImpl_->meshRelation_.properties;
+            auto &newProp = combined.meshRelation_.properties;
+            for (int p = 0; p < numProp; ++p) {
+              auto oldRange =
+                  StridedRange(oldProp.cbegin() + p, oldProp.cend(), numProp);
+              auto newRange = StridedRange(
+                  newProp.begin() + numPropOut * propVertIndices[i] + p,
+                  newProp.end(), numPropOut);
+              copy(oldRange.begin(), oldRange.end(), newRange.begin());
+            }
+          } else {
+            // point all triangles at single new property of zeros.
+            fill(start, start + node->pImpl_->NumTri(),
+                 ivec3(propVertIndices[i]));
+          }
+        }
+
+        if (node->transform_ == mat3x4(la::identity)) {
+          copy(node->pImpl_->vertPos_.begin(), node->pImpl_->vertPos_.end(),
+               combined.vertPos_.begin() + vertIndices[i]);
+          copy(node->pImpl_->faceNormal_.begin(),
+               node->pImpl_->faceNormal_.end(),
+               combined.faceNormal_.begin() + triIndices[i]);
+        } else {
+          // no need to apply the transform to the node, just copy the vertices
+          // and face normals and apply transform on the fly
+          auto vertPosBegin = TransformIterator(
+              node->pImpl_->vertPos_.begin(), Transform4x3({node->transform_}));
+          mat3 normalTransform =
+              la::inverse(la::transpose(mat3(node->transform_)));
+          auto faceNormalBegin =
+              TransformIterator(node->pImpl_->faceNormal_.begin(),
+                                TransformNormals({normalTransform}));
+          copy_n(vertPosBegin, node->pImpl_->vertPos_.size(),
+                 combined.vertPos_.begin() + vertIndices[i]);
+          copy_n(faceNormalBegin, node->pImpl_->faceNormal_.size(),
+                 combined.faceNormal_.begin() + triIndices[i]);
+
+          const bool invert = la::determinant(mat3(node->transform_)) < 0;
+          for_each_n(policy, countAt(0), node->pImpl_->halfedgeTangent_.size(),
+                     TransformTangents{combined.halfedgeTangent_,
+                                       edgeIndices[i], mat3(node->transform_),
+                                       invert, node->pImpl_->halfedgeTangent_,
+                                       node->pImpl_->halfedge_});
+          if (invert)
+            for_each_n(policy, countAt(triIndices[i]), node->pImpl_->NumTri(),
+                       FlipTris({combined.halfedge_}));
+        }
+        // Since the nodes may be copies containing the same meshIDs, it is
+        // important to add an offset so that each node instance gets
+        // unique meshIDs.
+        const int offset = i * Manifold::Impl::meshIDCounter_;
+        transform(node->pImpl_->meshRelation_.triRef.begin(),
+                  node->pImpl_->meshRelation_.triRef.end(),
+                  combined.meshRelation_.triRef.begin() + triIndices[i],
+                  UpdateMeshIDs({offset}));
+      });
+
+  for (size_t i = 0; i < nodes.size(); i++) {
+    auto &node = nodes[i];
+    const int offset = i * Manifold::Impl::meshIDCounter_;
+
+    for (const auto &pair : node->pImpl_->meshRelation_.meshIDtransform) {
+      combined.meshRelation_.meshIDtransform[pair.first + offset] = pair.second;
+    }
+  }
+
+  // required to remove parts that are smaller than the tolerance
+  combined.SimplifyTopology();
+  combined.Finish();
+  combined.IncrementMeshIDs();
+  return combined;
+}
+
+CsgOpNode::CsgOpNode() {}
+
+CsgOpNode::CsgOpNode(const std::vector<std::shared_ptr<CsgNode>> &children,
+                     OpType op)
+    : impl_(Impl{}) {
+  auto impl = impl_.GetGuard();
+  impl->children_ = children;
+  SetOp(op);
+}
+
+CsgOpNode::CsgOpNode(std::vector<std::shared_ptr<CsgNode>> &&children,
+                     OpType op)
+    : impl_(Impl{}) {
+  auto impl = impl_.GetGuard();
+  impl->children_ = children;
+  SetOp(op);
+}
+
+std::shared_ptr<CsgNode> CsgOpNode::Boolean(
+    const std::shared_ptr<CsgNode> &second, OpType op) {
+  std::vector<std::shared_ptr<CsgNode>> children;
+
+  auto isReused = [](const auto &node) { return node->impl_.UseCount() > 1; };
+
+  auto copyChildren = [&](const auto &list, const mat3x4 &transform) {
+    for (const auto &child : list) {
+      children.push_back(child->Transform(transform));
+    }
+  };
+
+  auto self = std::dynamic_pointer_cast<CsgOpNode>(shared_from_this());
+  if (IsOp(op) && !isReused(self)) {
+    auto impl = impl_.GetGuard();
+    copyChildren(impl->children_, transform_);
+  } else {
+    children.push_back(self);
+  }
+
+  auto secondOp = std::dynamic_pointer_cast<CsgOpNode>(second);
+  auto canInlineSecondOp = [&]() {
+    switch (op) {
+      case OpType::Add:
+      case OpType::Intersect:
+        return secondOp->IsOp(op);
+      case OpType::Subtract:
+        return secondOp->IsOp(OpType::Add);
+      default:
+        return false;
+    }
+  };
+
+  if (secondOp && canInlineSecondOp() && !isReused(secondOp)) {
+    auto secondImpl = secondOp->impl_.GetGuard();
+    copyChildren(secondImpl->children_, secondOp->transform_);
+  } else {
+    children.push_back(second);
+  }
+
+  return std::make_shared<CsgOpNode>(children, op);
+}
+
+std::shared_ptr<CsgNode> CsgOpNode::Transform(const mat3x4 &m) const {
+  auto node = std::make_shared<CsgOpNode>();
+  node->impl_ = impl_;
+  node->transform_ = m * Mat4(transform_);
+  node->op_ = op_;
+  return node;
+}
+
+std::shared_ptr<CsgLeafNode> CsgOpNode::ToLeafNode() const {
+  if (cache_ != nullptr) return cache_;
+  // turn the children into leaf nodes
+  GetChildren();
+  auto impl = impl_.GetGuard();
+  auto &children_ = impl->children_;
+  if (children_.size() > 1) {
+    switch (op_) {
+      case CsgNodeType::Union:
+        BatchUnion();
+        break;
+      case CsgNodeType::Intersection: {
+        std::vector<std::shared_ptr<const Manifold::Impl>> impls;
+        for (auto &child : children_) {
+          impls.push_back(
+              std::dynamic_pointer_cast<CsgLeafNode>(child)->GetImpl());
+        }
+        children_.clear();
+        children_.push_back(std::make_shared<CsgLeafNode>(
+            BatchBoolean(OpType::Intersect, impls)));
+        break;
+      };
+      case CsgNodeType::Difference: {
+        // take the lhs out and treat the remaining nodes as the rhs, perform
+        // union optimization for them
+        auto lhs = std::dynamic_pointer_cast<CsgLeafNode>(children_.front());
+        children_.erase(children_.begin());
+        BatchUnion();
+        auto rhs = std::dynamic_pointer_cast<CsgLeafNode>(children_.front());
+        children_.clear();
+        Boolean3 boolean(*lhs->GetImpl(), *rhs->GetImpl(), OpType::Subtract);
+        children_.push_back(
+            std::make_shared<CsgLeafNode>(std::make_shared<Manifold::Impl>(
+                boolean.Result(OpType::Subtract))));
+      };
+      case CsgNodeType::Leaf:
+        // unreachable
+        break;
+    }
+  } else if (children_.size() == 0) {
+    return nullptr;
+  }
+  // children_ must contain only one CsgLeafNode now, and its Transform will
+  // give CsgLeafNode as well
+  cache_ = std::dynamic_pointer_cast<CsgLeafNode>(
+      children_.front()->Transform(transform_));
+  return cache_;
+}
+
+/**
+ * Efficient boolean operation on a set of nodes utilizing commutativity of the
+ * operation. Only supports union and intersection.
+ */
+std::shared_ptr<Manifold::Impl> CsgOpNode::BatchBoolean(
+    OpType operation,
+    std::vector<std::shared_ptr<const Manifold::Impl>> &results) {
+  ZoneScoped;
+  auto getImplPtr = GetImplPtr();
+  DEBUG_ASSERT(operation != OpType::Subtract, logicErr,
+               "BatchBoolean doesn't support Difference.");
+  // common cases
+  if (results.size() == 0) return std::make_shared<Manifold::Impl>();
+  if (results.size() == 1)
+    return std::make_shared<Manifold::Impl>(*results.front());
+  if (results.size() == 2) {
+    Boolean3 boolean(*results[0], *results[1], operation);
+    return std::make_shared<Manifold::Impl>(boolean.Result(operation));
+  }
+#if (MANIFOLD_PAR == 1) && __has_include(<tbb/tbb.h>)
+  tbb::task_group group;
+  tbb::concurrent_priority_queue<SharedImpl, MeshCompare> queue(results.size());
+  for (auto result : results) {
+    queue.emplace(result);
+  }
+  results.clear();
+  std::function<void()> process = [&]() {
+    while (queue.size() > 1) {
+      SharedImpl a, b;
+      if (!queue.try_pop(a)) continue;
+      if (!queue.try_pop(b)) {
+        queue.push(a);
+        continue;
+      }
+      group.run([&, a, b]() {
+        Boolean3 boolean(*getImplPtr(a), *getImplPtr(b), operation);
+        queue.emplace(
+            std::make_shared<Manifold::Impl>(boolean.Result(operation)));
+        return group.run(process);
+      });
+    }
+  };
+  group.run_and_wait(process);
+  SharedImpl r;
+  queue.try_pop(r);
+  return *std::get_if<std::shared_ptr<Manifold::Impl>>(&r);
+#endif
+  // apply boolean operations starting from smaller meshes
+  // the assumption is that boolean operations on smaller meshes is faster,
+  // due to less data being copied and processed
+  auto cmpFn = MeshCompare();
+  std::make_heap(results.begin(), results.end(), cmpFn);
+  while (results.size() > 1) {
+    std::pop_heap(results.begin(), results.end(), cmpFn);
+    auto a = std::move(results.back());
+    results.pop_back();
+    std::pop_heap(results.begin(), results.end(), cmpFn);
+    auto b = std::move(results.back());
+    results.pop_back();
+    // boolean operation
+    Boolean3 boolean(*a, *b, operation);
+    auto result = std::make_shared<Manifold::Impl>(boolean.Result(operation));
+    if (results.size() == 0) {
+      return result;
+    }
+    results.push_back(result);
+    std::push_heap(results.begin(), results.end(), cmpFn);
+  }
+  return std::make_shared<Manifold::Impl>(*results.front());
+}
+
+/**
+ * Efficient union operation on a set of nodes by doing Compose as much as
+ * possible.
+ * Note: Due to some unknown issues with `Compose`, we are now doing
+ * `BatchBoolean` instead of using `Compose` for non-intersecting manifolds.
+ */
+void CsgOpNode::BatchUnion() const {
+  ZoneScoped;
+  // INVARIANT: children_ is a vector of leaf nodes
+  // this kMaxUnionSize is a heuristic to avoid the pairwise disjoint check
+  // with O(n^2) complexity to take too long.
+  // If the number of children exceeded this limit, we will operate on chunks
+  // with size kMaxUnionSize.
+  constexpr size_t kMaxUnionSize = 1000;
+  auto impl = impl_.GetGuard();
+  auto &children_ = impl->children_;
+  while (children_.size() > 1) {
+    const size_t start = (children_.size() > kMaxUnionSize)
+                             ? (children_.size() - kMaxUnionSize)
+                             : 0;
+    Vec<Box> boxes;
+    boxes.reserve(children_.size() - start);
+    for (size_t i = start; i < children_.size(); i++) {
+      boxes.push_back(std::dynamic_pointer_cast<CsgLeafNode>(children_[i])
+                          ->GetImpl()
+                          ->bBox_);
+    }
+    // partition the children into a set of disjoint sets
+    // each set contains a set of children that are pairwise disjoint
+    std::vector<Vec<size_t>> disjointSets;
+    for (size_t i = 0; i < boxes.size(); i++) {
+      auto lambda = [&boxes, i](const Vec<size_t> &set) {
+        return std::find_if(set.begin(), set.end(), CheckOverlap({boxes, i})) ==
+               set.end();
+      };
+      auto it = std::find_if(disjointSets.begin(), disjointSets.end(), lambda);
+      if (it == disjointSets.end()) {
+        disjointSets.push_back(std::vector<size_t>{i});
+      } else {
+        it->push_back(i);
+      }
+    }
+    // compose each set of disjoint children
+    std::vector<std::shared_ptr<const Manifold::Impl>> impls;
+    for (auto &set : disjointSets) {
+      if (set.size() == 1) {
+        impls.push_back(
+            std::dynamic_pointer_cast<CsgLeafNode>(children_[start + set[0]])
+                ->GetImpl());
+      } else {
+        std::vector<std::shared_ptr<CsgLeafNode>> tmp;
+        for (size_t j : set) {
+          tmp.push_back(
+              std::dynamic_pointer_cast<CsgLeafNode>(children_[start + j]));
+        }
+        impls.push_back(
+            std::make_shared<const Manifold::Impl>(CsgLeafNode::Compose(tmp)));
+      }
+    }
+
+    children_.erase(children_.begin() + start, children_.end());
+    children_.push_back(
+        std::make_shared<CsgLeafNode>(BatchBoolean(OpType::Add, impls)));
+    // move it to the front as we process from the back, and the newly added
+    // child should be quite complicated
+    std::swap(children_.front(), children_.back());
+  }
+}
+
+/**
+ * Flatten the children to a list of leaf nodes and return them.
+ * If forceToLeafNodes is true, the list will be guaranteed to be a list of leaf
+ * nodes (i.e. no ops). Otherwise, the list may contain ops. Note that this
+ * function will not apply the transform to children, as they may be shared with
+ * other nodes.
+ */
+std::vector<std::shared_ptr<CsgNode>> &CsgOpNode::GetChildren(
+    bool forceToLeafNodes) const {
+  auto impl = impl_.GetGuard();
+
+  if (forceToLeafNodes && !impl->forcedToLeafNodes_) {
+    impl->forcedToLeafNodes_ = true;
+    for_each(ExecutionPolicy::Par, impl->children_.begin(),
+             impl->children_.end(), [](auto &child) {
+               if (child->GetNodeType() != CsgNodeType::Leaf) {
+                 child = child->ToLeafNode();
+               }
+             });
+  }
+  return impl->children_;
+}
+
+void CsgOpNode::SetOp(OpType op) {
+  switch (op) {
+    case OpType::Add:
+      op_ = CsgNodeType::Union;
+      break;
+    case OpType::Subtract:
+      op_ = CsgNodeType::Difference;
+      break;
+    case OpType::Intersect:
+      op_ = CsgNodeType::Intersection;
+      break;
+  }
+}
+
+bool CsgOpNode::IsOp(OpType op) {
+  switch (op) {
+    case OpType::Add:
+      return op_ == CsgNodeType::Union;
+    case OpType::Subtract:
+      return op_ == CsgNodeType::Difference;
+    case OpType::Intersect:
+      return op_ == CsgNodeType::Intersection;
+    default:
+      return false;
+  }
+}
+
+mat3x4 CsgOpNode::GetTransform() const { return transform_; }
+
+}  // namespace manifold

+ 108 - 0
thirdparty/manifold/src/csg_tree.h

@@ -0,0 +1,108 @@
+// Copyright 2022 The Manifold Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "./utils.h"
+#include "manifold/manifold.h"
+
+namespace manifold {
+
+enum class CsgNodeType { Union, Intersection, Difference, Leaf };
+
+class CsgLeafNode;
+
+class CsgNode : public std::enable_shared_from_this<CsgNode> {
+ public:
+  virtual std::shared_ptr<CsgLeafNode> ToLeafNode() const = 0;
+  virtual std::shared_ptr<CsgNode> Transform(const mat3x4 &m) const = 0;
+  virtual CsgNodeType GetNodeType() const = 0;
+  virtual mat3x4 GetTransform() const = 0;
+
+  virtual std::shared_ptr<CsgNode> Boolean(
+      const std::shared_ptr<CsgNode> &second, OpType op);
+
+  std::shared_ptr<CsgNode> Translate(const vec3 &t) const;
+  std::shared_ptr<CsgNode> Scale(const vec3 &s) const;
+  std::shared_ptr<CsgNode> Rotate(double xDegrees = 0, double yDegrees = 0,
+                                  double zDegrees = 0) const;
+};
+
+class CsgLeafNode final : public CsgNode {
+ public:
+  CsgLeafNode();
+  CsgLeafNode(std::shared_ptr<const Manifold::Impl> pImpl_);
+  CsgLeafNode(std::shared_ptr<const Manifold::Impl> pImpl_, mat3x4 transform_);
+
+  std::shared_ptr<const Manifold::Impl> GetImpl() const;
+
+  std::shared_ptr<CsgLeafNode> ToLeafNode() const override;
+
+  std::shared_ptr<CsgNode> Transform(const mat3x4 &m) const override;
+
+  CsgNodeType GetNodeType() const override;
+
+  mat3x4 GetTransform() const override;
+
+  static Manifold::Impl Compose(
+      const std::vector<std::shared_ptr<CsgLeafNode>> &nodes);
+
+ private:
+  mutable std::shared_ptr<const Manifold::Impl> pImpl_;
+  mutable mat3x4 transform_ = la::identity;
+};
+
+class CsgOpNode final : public CsgNode {
+ public:
+  CsgOpNode();
+
+  CsgOpNode(const std::vector<std::shared_ptr<CsgNode>> &children, OpType op);
+
+  CsgOpNode(std::vector<std::shared_ptr<CsgNode>> &&children, OpType op);
+
+  std::shared_ptr<CsgNode> Boolean(const std::shared_ptr<CsgNode> &second,
+                                   OpType op) override;
+
+  std::shared_ptr<CsgNode> Transform(const mat3x4 &m) const override;
+
+  std::shared_ptr<CsgLeafNode> ToLeafNode() const override;
+
+  CsgNodeType GetNodeType() const override { return op_; }
+
+  mat3x4 GetTransform() const override;
+
+ private:
+  struct Impl {
+    std::vector<std::shared_ptr<CsgNode>> children_;
+    bool forcedToLeafNodes_ = false;
+  };
+  mutable ConcurrentSharedPtr<Impl> impl_ = ConcurrentSharedPtr<Impl>(Impl{});
+  CsgNodeType op_;
+  mat3x4 transform_ = la::identity;
+  // the following fields are for lazy evaluation, so they are mutable
+  mutable std::shared_ptr<CsgLeafNode> cache_ = nullptr;
+
+  void SetOp(OpType);
+  bool IsOp(OpType op);
+
+  static std::shared_ptr<Manifold::Impl> BatchBoolean(
+      OpType operation,
+      std::vector<std::shared_ptr<const Manifold::Impl>> &results);
+
+  void BatchUnion() const;
+
+  std::vector<std::shared_ptr<CsgNode>> &GetChildren(
+      bool forceToLeafNodes = true) const;
+};
+
+}  // namespace manifold

+ 696 - 0
thirdparty/manifold/src/edge_op.cpp

@@ -0,0 +1,696 @@
+// Copyright 2021 The Manifold Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "./impl.h"
+#include "./parallel.h"
+
+namespace {
+using namespace manifold;
+
+ivec3 TriOf(int edge) {
+  ivec3 triEdge;
+  triEdge[0] = edge;
+  triEdge[1] = NextHalfedge(triEdge[0]);
+  triEdge[2] = NextHalfedge(triEdge[1]);
+  return triEdge;
+}
+
+bool Is01Longest(vec2 v0, vec2 v1, vec2 v2) {
+  const vec2 e[3] = {v1 - v0, v2 - v1, v0 - v2};
+  double l[3];
+  for (int i : {0, 1, 2}) l[i] = la::dot(e[i], e[i]);
+  return l[0] > l[1] && l[0] > l[2];
+}
+
+struct DuplicateEdge {
+  const Halfedge* sortedHalfedge;
+
+  bool operator()(int edge) {
+    const Halfedge& halfedge = sortedHalfedge[edge];
+    const Halfedge& nextHalfedge = sortedHalfedge[edge + 1];
+    return halfedge.startVert == nextHalfedge.startVert &&
+           halfedge.endVert == nextHalfedge.endVert;
+  }
+};
+
+struct ShortEdge {
+  VecView<const Halfedge> halfedge;
+  VecView<const vec3> vertPos;
+  const double tolerance;
+
+  bool operator()(int edge) const {
+    if (halfedge[edge].pairedHalfedge < 0) return false;
+    // Flag short edges
+    const vec3 delta =
+        vertPos[halfedge[edge].endVert] - vertPos[halfedge[edge].startVert];
+    return la::dot(delta, delta) < tolerance * tolerance;
+  }
+};
+
+struct FlagEdge {
+  VecView<const Halfedge> halfedge;
+  VecView<const TriRef> triRef;
+
+  bool operator()(int edge) const {
+    if (halfedge[edge].pairedHalfedge < 0) return false;
+    // Flag redundant edges - those where the startVert is surrounded by only
+    // two original triangles.
+    const TriRef ref0 = triRef[edge / 3];
+    int current = NextHalfedge(halfedge[edge].pairedHalfedge);
+    const TriRef ref1 = triRef[current / 3];
+    while (current != edge) {
+      current = NextHalfedge(halfedge[current].pairedHalfedge);
+      int tri = current / 3;
+      const TriRef ref = triRef[tri];
+      if (!ref.SameFace(ref0) && !ref.SameFace(ref1)) return false;
+    }
+    return true;
+  }
+};
+
+struct SwappableEdge {
+  VecView<const Halfedge> halfedge;
+  VecView<const vec3> vertPos;
+  VecView<const vec3> triNormal;
+  const double tolerance;
+
+  bool operator()(int edge) const {
+    if (halfedge[edge].pairedHalfedge < 0) return false;
+
+    int tri = edge / 3;
+    ivec3 triEdge = TriOf(edge);
+    mat2x3 projection = GetAxisAlignedProjection(triNormal[tri]);
+    vec2 v[3];
+    for (int i : {0, 1, 2})
+      v[i] = projection * vertPos[halfedge[triEdge[i]].startVert];
+    if (CCW(v[0], v[1], v[2], tolerance) > 0 || !Is01Longest(v[0], v[1], v[2]))
+      return false;
+
+    // Switch to neighbor's projection.
+    edge = halfedge[edge].pairedHalfedge;
+    tri = edge / 3;
+    triEdge = TriOf(edge);
+    projection = GetAxisAlignedProjection(triNormal[tri]);
+    for (int i : {0, 1, 2})
+      v[i] = projection * vertPos[halfedge[triEdge[i]].startVert];
+    return CCW(v[0], v[1], v[2], tolerance) > 0 ||
+           Is01Longest(v[0], v[1], v[2]);
+  }
+};
+
+struct SortEntry {
+  int start;
+  int end;
+  size_t index;
+  inline bool operator<(const SortEntry& other) const {
+    return start == other.start ? end < other.end : start < other.start;
+  }
+};
+}  // namespace
+
+namespace manifold {
+
+/**
+ * Duplicates just enough verts to covert an even-manifold to a proper
+ * 2-manifold, splitting non-manifold verts and edges with too many triangles.
+ */
+void Manifold::Impl::CleanupTopology() {
+  if (!halfedge_.size()) return;
+
+  // In the case of a very bad triangulation, it is possible to create pinched
+  // verts. They must be removed before edge collapse.
+  SplitPinchedVerts();
+
+  while (1) {
+    ZoneScopedN("DedupeEdge");
+
+    const size_t nbEdges = halfedge_.size();
+    size_t numFlagged = 0;
+
+    Vec<SortEntry> entries;
+    entries.reserve(nbEdges / 2);
+    for (size_t i = 0; i < nbEdges; ++i) {
+      if (halfedge_[i].IsForward()) {
+        entries.push_back({halfedge_[i].startVert, halfedge_[i].endVert, i});
+      }
+    }
+
+    stable_sort(entries.begin(), entries.end());
+    for (size_t i = 0; i < entries.size() - 1; ++i) {
+      const int h0 = entries[i].index;
+      const int h1 = entries[i + 1].index;
+      if (halfedge_[h0].startVert == halfedge_[h1].startVert &&
+          halfedge_[h0].endVert == halfedge_[h1].endVert) {
+        DedupeEdge(entries[i].index);
+        numFlagged++;
+      }
+    }
+
+    if (numFlagged == 0) break;
+
+#ifdef MANIFOLD_DEBUG
+    if (ManifoldParams().verbose) {
+      std::cout << "found " << numFlagged << " duplicate edges to split"
+                << std::endl;
+    }
+#endif
+  }
+}
+
+/**
+ * Collapses degenerate triangles by removing edges shorter than tolerance_ and
+ * any edge that is preceeded by an edge that joins the same two face relations.
+ * It also performs edge swaps on the long edges of degenerate triangles, though
+ * there are some configurations of degenerates that cannot be removed this way.
+ *
+ * Before collapsing edges, the mesh is checked for duplicate edges (more than
+ * one pair of triangles sharing the same edge), which are removed by
+ * duplicating one vert and adding two triangles. These degenerate triangles are
+ * likely to be collapsed again in the subsequent simplification.
+ *
+ * Note when an edge collapse would result in something non-manifold, the
+ * vertices are duplicated in such a way as to remove handles or separate
+ * meshes, thus decreasing the Genus(). It only increases when meshes that have
+ * collapsed to just a pair of triangles are removed entirely.
+ *
+ * Rather than actually removing the edges, this step merely marks them for
+ * removal, by setting vertPos to NaN and halfedge to {-1, -1, -1, -1}.
+ */
+void Manifold::Impl::SimplifyTopology() {
+  if (!halfedge_.size()) return;
+
+  CleanupTopology();
+
+  if (!ManifoldParams().cleanupTriangles) {
+    return;
+  }
+
+  const size_t nbEdges = halfedge_.size();
+  auto policy = autoPolicy(nbEdges, 1e5);
+  size_t numFlagged = 0;
+  Vec<uint8_t> bFlags(nbEdges);
+
+  std::vector<int> scratchBuffer;
+  scratchBuffer.reserve(10);
+  {
+    ZoneScopedN("CollapseShortEdge");
+    numFlagged = 0;
+    ShortEdge se{halfedge_, vertPos_, epsilon_};
+    for_each_n(policy, countAt(0_uz), nbEdges,
+               [&](size_t i) { bFlags[i] = se(i); });
+    for (size_t i = 0; i < nbEdges; ++i) {
+      if (bFlags[i]) {
+        CollapseEdge(i, scratchBuffer);
+        scratchBuffer.resize(0);
+        numFlagged++;
+      }
+    }
+  }
+
+#ifdef MANIFOLD_DEBUG
+  if (ManifoldParams().verbose && numFlagged > 0) {
+    std::cout << "found " << numFlagged << " short edges to collapse"
+              << std::endl;
+  }
+#endif
+
+  {
+    ZoneScopedN("CollapseFlaggedEdge");
+    numFlagged = 0;
+    FlagEdge se{halfedge_, meshRelation_.triRef};
+    for_each_n(policy, countAt(0_uz), nbEdges,
+               [&](size_t i) { bFlags[i] = se(i); });
+    for (size_t i = 0; i < nbEdges; ++i) {
+      if (bFlags[i]) {
+        CollapseEdge(i, scratchBuffer);
+        scratchBuffer.resize(0);
+        numFlagged++;
+      }
+    }
+  }
+
+#ifdef MANIFOLD_DEBUG
+  if (ManifoldParams().verbose && numFlagged > 0) {
+    std::cout << "found " << numFlagged << " colinear edges to collapse"
+              << std::endl;
+  }
+#endif
+
+  {
+    ZoneScopedN("RecursiveEdgeSwap");
+    numFlagged = 0;
+    SwappableEdge se{halfedge_, vertPos_, faceNormal_, tolerance_};
+    for_each_n(policy, countAt(0_uz), nbEdges,
+               [&](size_t i) { bFlags[i] = se(i); });
+    std::vector<int> edgeSwapStack;
+    std::vector<int> visited(halfedge_.size(), -1);
+    int tag = 0;
+    for (size_t i = 0; i < nbEdges; ++i) {
+      if (bFlags[i]) {
+        numFlagged++;
+        tag++;
+        RecursiveEdgeSwap(i, tag, visited, edgeSwapStack, scratchBuffer);
+        while (!edgeSwapStack.empty()) {
+          int last = edgeSwapStack.back();
+          edgeSwapStack.pop_back();
+          RecursiveEdgeSwap(last, tag, visited, edgeSwapStack, scratchBuffer);
+        }
+      }
+    }
+  }
+
+#ifdef MANIFOLD_DEBUG
+  if (ManifoldParams().verbose && numFlagged > 0) {
+    std::cout << "found " << numFlagged << " edges to swap" << std::endl;
+  }
+#endif
+}
+
+// Deduplicate the given 4-manifold edge by duplicating endVert, thus making the
+// edges distinct. Also duplicates startVert if it becomes pinched.
+void Manifold::Impl::DedupeEdge(const int edge) {
+  // Orbit endVert
+  const int startVert = halfedge_[edge].startVert;
+  const int endVert = halfedge_[edge].endVert;
+  int current = halfedge_[NextHalfedge(edge)].pairedHalfedge;
+  while (current != edge) {
+    const int vert = halfedge_[current].startVert;
+    if (vert == startVert) {
+      // Single topological unit needs 2 faces added to be split
+      const int newVert = vertPos_.size();
+      vertPos_.push_back(vertPos_[endVert]);
+      if (vertNormal_.size() > 0) vertNormal_.push_back(vertNormal_[endVert]);
+      current = halfedge_[NextHalfedge(current)].pairedHalfedge;
+      const int opposite = halfedge_[NextHalfedge(edge)].pairedHalfedge;
+
+      UpdateVert(newVert, current, opposite);
+
+      int newHalfedge = halfedge_.size();
+      int newFace = newHalfedge / 3;
+      int oldFace = current / 3;
+      int outsideVert = halfedge_[current].startVert;
+      halfedge_.push_back({endVert, newVert, -1});
+      halfedge_.push_back({newVert, outsideVert, -1});
+      halfedge_.push_back({outsideVert, endVert, -1});
+      PairUp(newHalfedge + 2, halfedge_[current].pairedHalfedge);
+      PairUp(newHalfedge + 1, current);
+      if (meshRelation_.triRef.size() > 0)
+        meshRelation_.triRef.push_back(meshRelation_.triRef[oldFace]);
+      if (meshRelation_.triProperties.size() > 0)
+        meshRelation_.triProperties.push_back(
+            meshRelation_.triProperties[oldFace]);
+      if (faceNormal_.size() > 0) faceNormal_.push_back(faceNormal_[oldFace]);
+
+      newHalfedge += 3;
+      ++newFace;
+      oldFace = opposite / 3;
+      outsideVert = halfedge_[opposite].startVert;
+      halfedge_.push_back({newVert, endVert, -1});
+      halfedge_.push_back({endVert, outsideVert, -1});
+      halfedge_.push_back({outsideVert, newVert, -1});
+      PairUp(newHalfedge + 2, halfedge_[opposite].pairedHalfedge);
+      PairUp(newHalfedge + 1, opposite);
+      PairUp(newHalfedge, newHalfedge - 3);
+      if (meshRelation_.triRef.size() > 0)
+        meshRelation_.triRef.push_back(meshRelation_.triRef[oldFace]);
+      if (meshRelation_.triProperties.size() > 0)
+        meshRelation_.triProperties.push_back(
+            meshRelation_.triProperties[oldFace]);
+      if (faceNormal_.size() > 0) faceNormal_.push_back(faceNormal_[oldFace]);
+
+      break;
+    }
+
+    current = halfedge_[NextHalfedge(current)].pairedHalfedge;
+  }
+
+  if (current == edge) {
+    // Separate topological unit needs no new faces to be split
+    const int newVert = vertPos_.size();
+    vertPos_.push_back(vertPos_[endVert]);
+    if (vertNormal_.size() > 0) vertNormal_.push_back(vertNormal_[endVert]);
+
+    ForVert(NextHalfedge(current), [this, newVert](int e) {
+      halfedge_[e].startVert = newVert;
+      halfedge_[halfedge_[e].pairedHalfedge].endVert = newVert;
+    });
+  }
+
+  // Orbit startVert
+  const int pair = halfedge_[edge].pairedHalfedge;
+  current = halfedge_[NextHalfedge(pair)].pairedHalfedge;
+  while (current != pair) {
+    const int vert = halfedge_[current].startVert;
+    if (vert == endVert) {
+      break;  // Connected: not a pinched vert
+    }
+    current = halfedge_[NextHalfedge(current)].pairedHalfedge;
+  }
+
+  if (current == pair) {
+    // Split the pinched vert the previous split created.
+    const int newVert = vertPos_.size();
+    vertPos_.push_back(vertPos_[endVert]);
+    if (vertNormal_.size() > 0) vertNormal_.push_back(vertNormal_[endVert]);
+
+    ForVert(NextHalfedge(current), [this, newVert](int e) {
+      halfedge_[e].startVert = newVert;
+      halfedge_[halfedge_[e].pairedHalfedge].endVert = newVert;
+    });
+  }
+}
+
+void Manifold::Impl::PairUp(int edge0, int edge1) {
+  halfedge_[edge0].pairedHalfedge = edge1;
+  halfedge_[edge1].pairedHalfedge = edge0;
+}
+
+// Traverses CW around startEdge.endVert from startEdge to endEdge
+// (edgeEdge.endVert must == startEdge.endVert), updating each edge to point
+// to vert instead.
+void Manifold::Impl::UpdateVert(int vert, int startEdge, int endEdge) {
+  int current = startEdge;
+  while (current != endEdge) {
+    halfedge_[current].endVert = vert;
+    current = NextHalfedge(current);
+    halfedge_[current].startVert = vert;
+    current = halfedge_[current].pairedHalfedge;
+    DEBUG_ASSERT(current != startEdge, logicErr, "infinite loop in decimator!");
+  }
+}
+
+// In the event that the edge collapse would create a non-manifold edge,
+// instead we duplicate the two verts and attach the manifolds the other way
+// across this edge.
+void Manifold::Impl::FormLoop(int current, int end) {
+  int startVert = vertPos_.size();
+  vertPos_.push_back(vertPos_[halfedge_[current].startVert]);
+  int endVert = vertPos_.size();
+  vertPos_.push_back(vertPos_[halfedge_[current].endVert]);
+
+  int oldMatch = halfedge_[current].pairedHalfedge;
+  int newMatch = halfedge_[end].pairedHalfedge;
+
+  UpdateVert(startVert, oldMatch, newMatch);
+  UpdateVert(endVert, end, current);
+
+  halfedge_[current].pairedHalfedge = newMatch;
+  halfedge_[newMatch].pairedHalfedge = current;
+  halfedge_[end].pairedHalfedge = oldMatch;
+  halfedge_[oldMatch].pairedHalfedge = end;
+
+  RemoveIfFolded(end);
+}
+
+void Manifold::Impl::CollapseTri(const ivec3& triEdge) {
+  if (halfedge_[triEdge[1]].pairedHalfedge == -1) return;
+  int pair1 = halfedge_[triEdge[1]].pairedHalfedge;
+  int pair2 = halfedge_[triEdge[2]].pairedHalfedge;
+  halfedge_[pair1].pairedHalfedge = pair2;
+  halfedge_[pair2].pairedHalfedge = pair1;
+  for (int i : {0, 1, 2}) {
+    halfedge_[triEdge[i]] = {-1, -1, -1};
+  }
+}
+
+void Manifold::Impl::RemoveIfFolded(int edge) {
+  const ivec3 tri0edge = TriOf(edge);
+  const ivec3 tri1edge = TriOf(halfedge_[edge].pairedHalfedge);
+  if (halfedge_[tri0edge[1]].pairedHalfedge == -1) return;
+  if (halfedge_[tri0edge[1]].endVert == halfedge_[tri1edge[1]].endVert) {
+    if (halfedge_[tri0edge[1]].pairedHalfedge == tri1edge[2]) {
+      if (halfedge_[tri0edge[2]].pairedHalfedge == tri1edge[1]) {
+        for (int i : {0, 1, 2})
+          vertPos_[halfedge_[tri0edge[i]].startVert] = vec3(NAN);
+      } else {
+        vertPos_[halfedge_[tri0edge[1]].startVert] = vec3(NAN);
+      }
+    } else {
+      if (halfedge_[tri0edge[2]].pairedHalfedge == tri1edge[1]) {
+        vertPos_[halfedge_[tri1edge[1]].startVert] = vec3(NAN);
+      }
+    }
+    PairUp(halfedge_[tri0edge[1]].pairedHalfedge,
+           halfedge_[tri1edge[2]].pairedHalfedge);
+    PairUp(halfedge_[tri0edge[2]].pairedHalfedge,
+           halfedge_[tri1edge[1]].pairedHalfedge);
+    for (int i : {0, 1, 2}) {
+      halfedge_[tri0edge[i]] = {-1, -1, -1};
+      halfedge_[tri1edge[i]] = {-1, -1, -1};
+    }
+  }
+}
+
+// Collapses the given edge by removing startVert. May split the mesh
+// topologically if the collapse would have resulted in a 4-manifold edge. Do
+// not collapse an edge if startVert is pinched - the vert will be marked NaN,
+// but other edges may still be pointing to it.
+void Manifold::Impl::CollapseEdge(const int edge, std::vector<int>& edges) {
+  Vec<TriRef>& triRef = meshRelation_.triRef;
+  Vec<ivec3>& triProp = meshRelation_.triProperties;
+
+  const Halfedge toRemove = halfedge_[edge];
+  if (toRemove.pairedHalfedge < 0) return;
+
+  const int endVert = toRemove.endVert;
+  const ivec3 tri0edge = TriOf(edge);
+  const ivec3 tri1edge = TriOf(toRemove.pairedHalfedge);
+
+  const vec3 pNew = vertPos_[endVert];
+  const vec3 pOld = vertPos_[toRemove.startVert];
+  const vec3 delta = pNew - pOld;
+  const bool shortEdge = la::dot(delta, delta) < tolerance_ * tolerance_;
+
+  // Orbit endVert
+  int current = halfedge_[tri0edge[1]].pairedHalfedge;
+  while (current != tri1edge[2]) {
+    current = NextHalfedge(current);
+    edges.push_back(current);
+    current = halfedge_[current].pairedHalfedge;
+  }
+
+  // Orbit startVert
+  int start = halfedge_[tri1edge[1]].pairedHalfedge;
+  if (!shortEdge) {
+    current = start;
+    TriRef refCheck = triRef[toRemove.pairedHalfedge / 3];
+    vec3 pLast = vertPos_[halfedge_[tri1edge[1]].endVert];
+    while (current != tri0edge[2]) {
+      current = NextHalfedge(current);
+      vec3 pNext = vertPos_[halfedge_[current].endVert];
+      const int tri = current / 3;
+      const TriRef ref = triRef[tri];
+      const mat2x3 projection = GetAxisAlignedProjection(faceNormal_[tri]);
+      // Don't collapse if the edge is not redundant (this may have changed due
+      // to the collapse of neighbors).
+      if (!ref.SameFace(refCheck)) {
+        refCheck = triRef[edge / 3];
+        if (!ref.SameFace(refCheck)) {
+          return;
+        } else {
+          // Don't collapse if the edges separating the faces are not colinear
+          // (can happen when the two faces are coplanar).
+          if (CCW(projection * pOld, projection * pLast, projection * pNew,
+                  epsilon_) != 0)
+            return;
+        }
+      }
+
+      // Don't collapse edge if it would cause a triangle to invert.
+      if (CCW(projection * pNext, projection * pLast, projection * pNew,
+              epsilon_) < 0)
+        return;
+
+      pLast = pNext;
+      current = halfedge_[current].pairedHalfedge;
+    }
+  }
+
+  // Remove toRemove.startVert and replace with endVert.
+  vertPos_[toRemove.startVert] = vec3(NAN);
+  CollapseTri(tri1edge);
+
+  // Orbit startVert
+  const int tri0 = edge / 3;
+  const int tri1 = toRemove.pairedHalfedge / 3;
+  const int triVert0 = (edge + 1) % 3;
+  const int triVert1 = toRemove.pairedHalfedge % 3;
+  current = start;
+  while (current != tri0edge[2]) {
+    current = NextHalfedge(current);
+
+    if (triProp.size() > 0) {
+      // Update the shifted triangles to the vertBary of endVert
+      const int tri = current / 3;
+      const int vIdx = current - 3 * tri;
+      if (triRef[tri].SameFace(triRef[tri0])) {
+        triProp[tri][vIdx] = triProp[tri0][triVert0];
+      } else if (triRef[tri].SameFace(triRef[tri1])) {
+        triProp[tri][vIdx] = triProp[tri1][triVert1];
+      }
+    }
+
+    const int vert = halfedge_[current].endVert;
+    const int next = halfedge_[current].pairedHalfedge;
+    for (size_t i = 0; i < edges.size(); ++i) {
+      if (vert == halfedge_[edges[i]].endVert) {
+        FormLoop(edges[i], current);
+        start = next;
+        edges.resize(i);
+        break;
+      }
+    }
+    current = next;
+  }
+
+  UpdateVert(endVert, start, tri0edge[2]);
+  CollapseTri(tri0edge);
+  RemoveIfFolded(start);
+}
+
+void Manifold::Impl::RecursiveEdgeSwap(const int edge, int& tag,
+                                       std::vector<int>& visited,
+                                       std::vector<int>& edgeSwapStack,
+                                       std::vector<int>& edges) {
+  Vec<TriRef>& triRef = meshRelation_.triRef;
+
+  if (edge < 0) return;
+  const int pair = halfedge_[edge].pairedHalfedge;
+  if (pair < 0) return;
+
+  // avoid infinite recursion
+  if (visited[edge] == tag && visited[pair] == tag) return;
+
+  const ivec3 tri0edge = TriOf(edge);
+  const ivec3 tri1edge = TriOf(pair);
+  const ivec3 perm0 = TriOf(edge % 3);
+  const ivec3 perm1 = TriOf(pair % 3);
+
+  mat2x3 projection = GetAxisAlignedProjection(faceNormal_[edge / 3]);
+  vec2 v[4];
+  for (int i : {0, 1, 2})
+    v[i] = projection * vertPos_[halfedge_[tri0edge[i]].startVert];
+  // Only operate on the long edge of a degenerate triangle.
+  if (CCW(v[0], v[1], v[2], tolerance_) > 0 || !Is01Longest(v[0], v[1], v[2]))
+    return;
+
+  // Switch to neighbor's projection.
+  projection = GetAxisAlignedProjection(faceNormal_[pair / 3]);
+  for (int i : {0, 1, 2})
+    v[i] = projection * vertPos_[halfedge_[tri0edge[i]].startVert];
+  v[3] = projection * vertPos_[halfedge_[tri1edge[2]].startVert];
+
+  auto SwapEdge = [&]() {
+    // The 0-verts are swapped to the opposite 2-verts.
+    const int v0 = halfedge_[tri0edge[2]].startVert;
+    const int v1 = halfedge_[tri1edge[2]].startVert;
+    halfedge_[tri0edge[0]].startVert = v1;
+    halfedge_[tri0edge[2]].endVert = v1;
+    halfedge_[tri1edge[0]].startVert = v0;
+    halfedge_[tri1edge[2]].endVert = v0;
+    PairUp(tri0edge[0], halfedge_[tri1edge[2]].pairedHalfedge);
+    PairUp(tri1edge[0], halfedge_[tri0edge[2]].pairedHalfedge);
+    PairUp(tri0edge[2], tri1edge[2]);
+    // Both triangles are now subsets of the neighboring triangle.
+    const int tri0 = tri0edge[0] / 3;
+    const int tri1 = tri1edge[0] / 3;
+    faceNormal_[tri0] = faceNormal_[tri1];
+    triRef[tri0] = triRef[tri1];
+    const double l01 = la::length(v[1] - v[0]);
+    const double l02 = la::length(v[2] - v[0]);
+    const double a = std::max(0.0, std::min(1.0, l02 / l01));
+    // Update properties if applicable
+    if (meshRelation_.properties.size() > 0) {
+      Vec<ivec3>& triProp = meshRelation_.triProperties;
+      Vec<double>& prop = meshRelation_.properties;
+      triProp[tri0] = triProp[tri1];
+      triProp[tri0][perm0[1]] = triProp[tri1][perm1[0]];
+      triProp[tri0][perm0[0]] = triProp[tri1][perm1[2]];
+      const int numProp = NumProp();
+      const int newProp = prop.size() / numProp;
+      const int propIdx0 = triProp[tri1][perm1[0]];
+      const int propIdx1 = triProp[tri1][perm1[1]];
+      for (int p = 0; p < numProp; ++p) {
+        prop.push_back(a * prop[numProp * propIdx0 + p] +
+                       (1 - a) * prop[numProp * propIdx1 + p]);
+      }
+      triProp[tri1][perm1[0]] = newProp;
+      triProp[tri0][perm0[2]] = newProp;
+    }
+
+    // if the new edge already exists, duplicate the verts and split the mesh.
+    int current = halfedge_[tri1edge[0]].pairedHalfedge;
+    const int endVert = halfedge_[tri1edge[1]].endVert;
+    while (current != tri0edge[1]) {
+      current = NextHalfedge(current);
+      if (halfedge_[current].endVert == endVert) {
+        FormLoop(tri0edge[2], current);
+        RemoveIfFolded(tri0edge[2]);
+        return;
+      }
+      current = halfedge_[current].pairedHalfedge;
+    }
+  };
+
+  // Only operate if the other triangles are not degenerate.
+  if (CCW(v[1], v[0], v[3], tolerance_) <= 0) {
+    if (!Is01Longest(v[1], v[0], v[3])) return;
+    // Two facing, long-edge degenerates can swap.
+    SwapEdge();
+    const vec2 e23 = v[3] - v[2];
+    if (la::dot(e23, e23) < tolerance_ * tolerance_) {
+      tag++;
+      CollapseEdge(tri0edge[2], edges);
+      edges.resize(0);
+    } else {
+      visited[edge] = tag;
+      visited[pair] = tag;
+      edgeSwapStack.insert(edgeSwapStack.end(), {tri1edge[1], tri1edge[0],
+                                                 tri0edge[1], tri0edge[0]});
+    }
+    return;
+  } else if (CCW(v[0], v[3], v[2], tolerance_) <= 0 ||
+             CCW(v[1], v[2], v[3], tolerance_) <= 0) {
+    return;
+  }
+  // Normal path
+  SwapEdge();
+  visited[edge] = tag;
+  visited[pair] = tag;
+  edgeSwapStack.insert(edgeSwapStack.end(),
+                       {halfedge_[tri1edge[0]].pairedHalfedge,
+                        halfedge_[tri0edge[1]].pairedHalfedge});
+}
+
+void Manifold::Impl::SplitPinchedVerts() {
+  ZoneScoped;
+  std::vector<bool> vertProcessed(NumVert(), false);
+  std::vector<bool> halfedgeProcessed(halfedge_.size(), false);
+  for (size_t i = 0; i < halfedge_.size(); ++i) {
+    if (halfedgeProcessed[i]) continue;
+    int vert = halfedge_[i].startVert;
+    if (vertProcessed[vert]) {
+      vertPos_.push_back(vertPos_[vert]);
+      vert = NumVert() - 1;
+    } else {
+      vertProcessed[vert] = true;
+    }
+    ForVert(i, [this, &halfedgeProcessed, vert](int current) {
+      halfedgeProcessed[current] = true;
+      halfedge_[current].startVert = vert;
+      halfedge_[halfedge_[current].pairedHalfedge].endVert = vert;
+    });
+  }
+}
+}  // namespace manifold

+ 319 - 0
thirdparty/manifold/src/face_op.cpp

@@ -0,0 +1,319 @@
+// Copyright 2021 The Manifold Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#if (MANIFOLD_PAR == 1) && __has_include(<tbb/concurrent_map.h>)
+#include <tbb/tbb.h>
+#define TBB_PREVIEW_CONCURRENT_ORDERED_CONTAINERS 1
+#include <tbb/concurrent_map.h>
+#endif
+#include <unordered_set>
+
+#include "./impl.h"
+#include "./parallel.h"
+#include "manifold/polygon.h"
+
+namespace manifold {
+
+using GeneralTriangulation = std::function<std::vector<ivec3>(int)>;
+using AddTriangle = std::function<void(int, ivec3, vec3, TriRef)>;
+
+/**
+ * Triangulates the faces. In this case, the halfedge_ vector is not yet a set
+ * of triangles as required by this data structure, but is instead a set of
+ * general faces with the input faceEdge vector having length of the number of
+ * faces + 1. The values are indicies into the halfedge_ vector for the first
+ * edge of each face, with the final value being the length of the halfedge_
+ * vector itself. Upon return, halfedge_ has been lengthened and properly
+ * represents the mesh as a set of triangles as usual. In this process the
+ * faceNormal_ values are retained, repeated as necessary.
+ */
+void Manifold::Impl::Face2Tri(const Vec<int>& faceEdge,
+                              const Vec<TriRef>& halfedgeRef) {
+  ZoneScoped;
+  Vec<ivec3> triVerts;
+  Vec<vec3> triNormal;
+  Vec<TriRef>& triRef = meshRelation_.triRef;
+  triRef.resize(0);
+  auto processFace = [&](GeneralTriangulation general, AddTriangle addTri,
+                         int face) {
+    const int firstEdge = faceEdge[face];
+    const int lastEdge = faceEdge[face + 1];
+    const int numEdge = lastEdge - firstEdge;
+    DEBUG_ASSERT(numEdge >= 3, topologyErr, "face has less than three edges.");
+    const vec3 normal = faceNormal_[face];
+
+    if (numEdge == 3) {  // Single triangle
+      int mapping[3] = {halfedge_[firstEdge].startVert,
+                        halfedge_[firstEdge + 1].startVert,
+                        halfedge_[firstEdge + 2].startVert};
+      ivec3 tri(halfedge_[firstEdge].startVert,
+                halfedge_[firstEdge + 1].startVert,
+                halfedge_[firstEdge + 2].startVert);
+      ivec3 ends(halfedge_[firstEdge].endVert, halfedge_[firstEdge + 1].endVert,
+                 halfedge_[firstEdge + 2].endVert);
+      if (ends[0] == tri[2]) {
+        std::swap(tri[1], tri[2]);
+        std::swap(ends[1], ends[2]);
+      }
+      DEBUG_ASSERT(ends[0] == tri[1] && ends[1] == tri[2] && ends[2] == tri[0],
+                   topologyErr, "These 3 edges do not form a triangle!");
+
+      addTri(face, tri, normal, halfedgeRef[firstEdge]);
+    } else if (numEdge == 4) {  // Pair of triangles
+      int mapping[4] = {halfedge_[firstEdge].startVert,
+                        halfedge_[firstEdge + 1].startVert,
+                        halfedge_[firstEdge + 2].startVert,
+                        halfedge_[firstEdge + 3].startVert};
+      const mat2x3 projection = GetAxisAlignedProjection(normal);
+      auto triCCW = [&projection, this](const ivec3 tri) {
+        return CCW(projection * this->vertPos_[tri[0]],
+                   projection * this->vertPos_[tri[1]],
+                   projection * this->vertPos_[tri[2]], epsilon_) >= 0;
+      };
+
+      ivec3 tri0(halfedge_[firstEdge].startVert, halfedge_[firstEdge].endVert,
+                 -1);
+      ivec3 tri1(-1, -1, tri0[0]);
+      for (const int i : {1, 2, 3}) {
+        if (halfedge_[firstEdge + i].startVert == tri0[1]) {
+          tri0[2] = halfedge_[firstEdge + i].endVert;
+          tri1[0] = tri0[2];
+        }
+        if (halfedge_[firstEdge + i].endVert == tri0[0]) {
+          tri1[1] = halfedge_[firstEdge + i].startVert;
+        }
+      }
+      DEBUG_ASSERT(la::all(la::gequal(tri0, ivec3(0))) &&
+                       la::all(la::gequal(tri1, ivec3(0))),
+                   topologyErr, "non-manifold quad!");
+      bool firstValid = triCCW(tri0) && triCCW(tri1);
+      tri0[2] = tri1[1];
+      tri1[2] = tri0[1];
+      bool secondValid = triCCW(tri0) && triCCW(tri1);
+
+      if (!secondValid) {
+        tri0[2] = tri1[0];
+        tri1[2] = tri0[0];
+      } else if (firstValid) {
+        vec3 firstCross = vertPos_[tri0[0]] - vertPos_[tri1[0]];
+        vec3 secondCross = vertPos_[tri0[1]] - vertPos_[tri1[1]];
+        if (la::dot(firstCross, firstCross) <
+            la::dot(secondCross, secondCross)) {
+          tri0[2] = tri1[0];
+          tri1[2] = tri0[0];
+        }
+      }
+
+      for (const auto& tri : {tri0, tri1}) {
+        addTri(face, tri, normal, halfedgeRef[firstEdge]);
+      }
+    } else {  // General triangulation
+      for (const auto& tri : general(face)) {
+        addTri(face, tri, normal, halfedgeRef[firstEdge]);
+      }
+    }
+  };
+  auto generalTriangulation = [&](int face) {
+    const vec3 normal = faceNormal_[face];
+    const mat2x3 projection = GetAxisAlignedProjection(normal);
+    const PolygonsIdx polys =
+        Face2Polygons(halfedge_.cbegin() + faceEdge[face],
+                      halfedge_.cbegin() + faceEdge[face + 1], projection);
+    return TriangulateIdx(polys, epsilon_);
+  };
+#if (MANIFOLD_PAR == 1) && __has_include(<tbb/tbb.h>)
+  tbb::task_group group;
+  // map from face to triangle
+  tbb::concurrent_unordered_map<int, std::vector<ivec3>> results;
+  Vec<size_t> triCount(faceEdge.size());
+  triCount.back() = 0;
+  // precompute number of triangles per face, and launch async tasks to
+  // triangulate complex faces
+  for_each(autoPolicy(faceEdge.size(), 1e5), countAt(0_uz),
+           countAt(faceEdge.size() - 1), [&](size_t face) {
+             triCount[face] = faceEdge[face + 1] - faceEdge[face] - 2;
+             DEBUG_ASSERT(triCount[face] >= 1, topologyErr,
+                          "face has less than three edges.");
+             if (triCount[face] > 2)
+               group.run([&, face] {
+                 std::vector<ivec3> newTris = generalTriangulation(face);
+                 triCount[face] = newTris.size();
+                 results[face] = std::move(newTris);
+               });
+           });
+  group.wait();
+  // prefix sum computation (assign unique index to each face) and preallocation
+  exclusive_scan(triCount.begin(), triCount.end(), triCount.begin(), 0_uz);
+  triVerts.resize(triCount.back());
+  triNormal.resize(triCount.back());
+  triRef.resize(triCount.back());
+
+  auto processFace2 = std::bind(
+      processFace, [&](size_t face) { return std::move(results[face]); },
+      [&](size_t face, ivec3 tri, vec3 normal, TriRef r) {
+        triVerts[triCount[face]] = tri;
+        triNormal[triCount[face]] = normal;
+        triRef[triCount[face]] = r;
+        triCount[face]++;
+      },
+      std::placeholders::_1);
+  // set triangles in parallel
+  for_each(autoPolicy(faceEdge.size(), 1e4), countAt(0_uz),
+           countAt(faceEdge.size() - 1), processFace2);
+#else
+  triVerts.reserve(faceEdge.size());
+  triNormal.reserve(faceEdge.size());
+  triRef.reserve(faceEdge.size());
+  auto processFace2 = std::bind(
+      processFace, generalTriangulation,
+      [&](size_t _face, ivec3 tri, vec3 normal, TriRef r) {
+        triVerts.push_back(tri);
+        triNormal.push_back(normal);
+        triRef.push_back(r);
+      },
+      std::placeholders::_1);
+  for (size_t face = 0; face < faceEdge.size() - 1; ++face) {
+    processFace2(face);
+  }
+#endif
+
+  faceNormal_ = std::move(triNormal);
+  CreateHalfedges(triVerts);
+}
+
+/**
+ * Returns a set of 2D polygons formed by the input projection of the vertices
+ * of the list of Halfedges, which must be an even-manifold, meaning each vert
+ * must be referenced the same number of times as a startVert and endVert.
+ */
+PolygonsIdx Manifold::Impl::Face2Polygons(VecView<Halfedge>::IterC start,
+                                          VecView<Halfedge>::IterC end,
+                                          mat2x3 projection) const {
+  std::multimap<int, int> vert_edge;
+  for (auto edge = start; edge != end; ++edge) {
+    vert_edge.emplace(
+        std::make_pair(edge->startVert, static_cast<int>(edge - start)));
+  }
+
+  PolygonsIdx polys;
+  int startEdge = 0;
+  int thisEdge = startEdge;
+  while (1) {
+    if (thisEdge == startEdge) {
+      if (vert_edge.empty()) break;
+      startEdge = vert_edge.begin()->second;
+      thisEdge = startEdge;
+      polys.push_back({});
+    }
+    int vert = (start + thisEdge)->startVert;
+    polys.back().push_back({projection * vertPos_[vert], vert});
+    const auto result = vert_edge.find((start + thisEdge)->endVert);
+    DEBUG_ASSERT(result != vert_edge.end(), topologyErr, "non-manifold edge");
+    thisEdge = result->second;
+    vert_edge.erase(result);
+  }
+  return polys;
+}
+
+Polygons Manifold::Impl::Slice(double height) const {
+  Box plane = bBox_;
+  plane.min.z = plane.max.z = height;
+  Vec<Box> query;
+  query.push_back(plane);
+  const SparseIndices collisions =
+      collider_.Collisions<false, false>(query.cview());
+
+  std::unordered_set<int> tris;
+  for (size_t i = 0; i < collisions.size(); ++i) {
+    const int tri = collisions.Get(i, 1);
+    double min = std::numeric_limits<double>::infinity();
+    double max = -std::numeric_limits<double>::infinity();
+    for (const int j : {0, 1, 2}) {
+      const double z = vertPos_[halfedge_[3 * tri + j].startVert].z;
+      min = std::min(min, z);
+      max = std::max(max, z);
+    }
+
+    if (min <= height && max > height) {
+      tris.insert(tri);
+    }
+  }
+
+  Polygons polys;
+  while (!tris.empty()) {
+    const int startTri = *tris.begin();
+    SimplePolygon poly;
+
+    int k = 0;
+    for (const int j : {0, 1, 2}) {
+      if (vertPos_[halfedge_[3 * startTri + j].startVert].z > height &&
+          vertPos_[halfedge_[3 * startTri + Next3(j)].startVert].z <= height) {
+        k = Next3(j);
+        break;
+      }
+    }
+
+    int tri = startTri;
+    do {
+      tris.erase(tris.find(tri));
+      if (vertPos_[halfedge_[3 * tri + k].endVert].z <= height) {
+        k = Next3(k);
+      }
+
+      Halfedge up = halfedge_[3 * tri + k];
+      const vec3 below = vertPos_[up.startVert];
+      const vec3 above = vertPos_[up.endVert];
+      const double a = (height - below.z) / (above.z - below.z);
+      poly.push_back(vec2(la::lerp(below, above, a)));
+
+      const int pair = up.pairedHalfedge;
+      tri = pair / 3;
+      k = Next3(pair % 3);
+    } while (tri != startTri);
+
+    polys.push_back(poly);
+  }
+
+  return polys;
+}
+
+Polygons Manifold::Impl::Project() const {
+  const mat2x3 projection = GetAxisAlignedProjection({0, 0, 1});
+  Vec<Halfedge> cusps(NumEdge());
+  cusps.resize(
+      copy_if(
+          halfedge_.cbegin(), halfedge_.cend(), cusps.begin(),
+          [&](Halfedge edge) {
+            return faceNormal_[halfedge_[edge.pairedHalfedge].pairedHalfedge /
+                               3]
+                           .z >= 0 &&
+                   faceNormal_[edge.pairedHalfedge / 3].z < 0;
+          }) -
+      cusps.begin());
+
+  PolygonsIdx polysIndexed =
+      Face2Polygons(cusps.cbegin(), cusps.cend(), projection);
+
+  Polygons polys;
+  for (const auto& poly : polysIndexed) {
+    SimplePolygon simple;
+    for (const PolyVert& polyVert : poly) {
+      simple.push_back(polyVert.pos);
+    }
+    polys.push_back(simple);
+  }
+
+  return polys;
+}
+}  // namespace manifold

+ 168 - 0
thirdparty/manifold/src/hashtable.h

@@ -0,0 +1,168 @@
+// Copyright 2022 The Manifold Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+#include <stdint.h>
+
+#include <atomic>
+
+#include "./utils.h"
+#include "./vec.h"
+
+namespace {
+typedef unsigned long long int Uint64;
+typedef Uint64 (*hash_fun_t)(Uint64);
+inline constexpr Uint64 kOpen = std::numeric_limits<Uint64>::max();
+
+template <typename T>
+T AtomicCAS(T& target, T compare, T val) {
+  std::atomic<T>& tar = reinterpret_cast<std::atomic<T>&>(target);
+  tar.compare_exchange_strong(compare, val, std::memory_order_acq_rel);
+  return compare;
+}
+
+template <typename T>
+void AtomicStore(T& target, T val) {
+  std::atomic<T>& tar = reinterpret_cast<std::atomic<T>&>(target);
+  // release is good enough, although not really something general
+  tar.store(val, std::memory_order_release);
+}
+
+template <typename T>
+T AtomicLoad(const T& target) {
+  const std::atomic<T>& tar = reinterpret_cast<const std::atomic<T>&>(target);
+  // acquire is good enough, although not general
+  return tar.load(std::memory_order_acquire);
+}
+
+// https://stackoverflow.com/questions/664014/what-integer-hash-function-are-good-that-accepts-an-integer-hash-key
+inline Uint64 hash64bit(Uint64 x) {
+  x = (x ^ (x >> 30)) * 0xbf58476d1ce4e5b9ull;
+  x = (x ^ (x >> 27)) * 0x94d049bb133111ebull;
+  x = x ^ (x >> 31);
+  return x;
+}
+}  // namespace
+
+namespace manifold {
+
+template <typename V, hash_fun_t H = hash64bit>
+class HashTableD {
+ public:
+  HashTableD(Vec<Uint64>& keys, Vec<V>& values, std::atomic<size_t>& used,
+             uint32_t step = 1)
+      : step_{step}, keys_{keys}, values_{values}, used_{used} {}
+
+  int Size() const { return keys_.size(); }
+
+  bool Full() const {
+    return used_.load(std::memory_order_relaxed) * 2 >
+           static_cast<size_t>(Size());
+  }
+
+  void Insert(Uint64 key, const V& val) {
+    uint32_t idx = H(key) & (Size() - 1);
+    while (1) {
+      if (Full()) return;
+      Uint64& k = keys_[idx];
+      const Uint64 found = AtomicCAS(k, kOpen, key);
+      if (found == kOpen) {
+        used_.fetch_add(1, std::memory_order_relaxed);
+        values_[idx] = val;
+        return;
+      }
+      if (found == key) return;
+      idx = (idx + step_) & (Size() - 1);
+    }
+  }
+
+  V& operator[](Uint64 key) {
+    uint32_t idx = H(key) & (Size() - 1);
+    while (1) {
+      const Uint64 k = AtomicLoad(keys_[idx]);
+      if (k == key || k == kOpen) {
+        return values_[idx];
+      }
+      idx = (idx + step_) & (Size() - 1);
+    }
+  }
+
+  const V& operator[](Uint64 key) const {
+    uint32_t idx = H(key) & (Size() - 1);
+    while (1) {
+      const Uint64 k = AtomicLoad(keys_[idx]);
+      if (k == key || k == kOpen) {
+        return values_[idx];
+      }
+      idx = (idx + step_) & (Size() - 1);
+    }
+  }
+
+  Uint64 KeyAt(int idx) const { return AtomicLoad(keys_[idx]); }
+  V& At(int idx) { return values_[idx]; }
+  const V& At(int idx) const { return values_[idx]; }
+
+ private:
+  uint32_t step_;
+  VecView<Uint64> keys_;
+  VecView<V> values_;
+  std::atomic<size_t>& used_;
+};
+
+template <typename V, hash_fun_t H = hash64bit>
+class HashTable {
+ public:
+  HashTable(size_t size, uint32_t step = 1)
+      : keys_{size == 0 ? 0 : 1_uz << (int)ceil(log2(size)), kOpen},
+        values_{size == 0 ? 0 : 1_uz << (int)ceil(log2(size)), {}},
+        step_(step) {}
+
+  HashTable(const HashTable& other)
+      : keys_(other.keys_), values_(other.values_), step_(other.step_) {
+    used_.store(other.used_.load());
+  }
+
+  HashTable& operator=(const HashTable& other) {
+    if (this == &other) return *this;
+    keys_ = other.keys_;
+    values_ = other.values_;
+    used_.store(other.used_.load());
+    step_ = other.step_;
+    return *this;
+  }
+
+  HashTableD<V, H> D() { return {keys_, values_, used_, step_}; }
+
+  int Entries() const { return used_.load(std::memory_order_relaxed); }
+
+  size_t Size() const { return keys_.size(); }
+
+  bool Full() const {
+    return used_.load(std::memory_order_relaxed) * 2 > Size();
+  }
+
+  double FilledFraction() const {
+    return static_cast<double>(used_.load(std::memory_order_relaxed)) / Size();
+  }
+
+  Vec<V>& GetValueStore() { return values_; }
+
+  static Uint64 Open() { return kOpen; }
+
+ private:
+  Vec<Uint64> keys_;
+  Vec<V> values_;
+  std::atomic<size_t> used_ = 0;
+  uint32_t step_;
+};
+}  // namespace manifold

+ 686 - 0
thirdparty/manifold/src/impl.cpp

@@ -0,0 +1,686 @@
+// Copyright 2021 The Manifold Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "./impl.h"
+
+#include <algorithm>
+#include <atomic>
+#include <map>
+
+#include "./hashtable.h"
+#include "./mesh_fixes.h"
+#include "./parallel.h"
+#include "./svd.h"
+
+namespace {
+using namespace manifold;
+
+constexpr uint64_t kRemove = std::numeric_limits<uint64_t>::max();
+
+void AtomicAddVec3(vec3& target, const vec3& add) {
+  for (int i : {0, 1, 2}) {
+    std::atomic<double>& tar =
+        reinterpret_cast<std::atomic<double>&>(target[i]);
+    double old_val = tar.load(std::memory_order_relaxed);
+    while (!tar.compare_exchange_weak(old_val, old_val + add[i],
+                                      std::memory_order_relaxed)) {
+    }
+  }
+}
+
+struct Transform4x3 {
+  const mat3x4 transform;
+
+  vec3 operator()(vec3 position) { return transform * vec4(position, 1.0); }
+};
+
+template <bool calculateTriNormal>
+struct AssignNormals {
+  VecView<vec3> faceNormal;
+  VecView<vec3> vertNormal;
+  VecView<const vec3> vertPos;
+  VecView<const Halfedge> halfedges;
+
+  void operator()(const int face) {
+    vec3& triNormal = faceNormal[face];
+
+    ivec3 triVerts;
+    for (int i : {0, 1, 2}) triVerts[i] = halfedges[3 * face + i].startVert;
+
+    vec3 edge[3];
+    for (int i : {0, 1, 2}) {
+      const int j = (i + 1) % 3;
+      edge[i] = la::normalize(vertPos[triVerts[j]] - vertPos[triVerts[i]]);
+    }
+
+    if (calculateTriNormal) {
+      triNormal = la::normalize(la::cross(edge[0], edge[1]));
+      if (std::isnan(triNormal.x)) triNormal = vec3(0, 0, 1);
+    }
+
+    // corner angles
+    vec3 phi;
+    double dot = -la::dot(edge[2], edge[0]);
+    phi[0] = dot >= 1 ? 0 : (dot <= -1 ? kPi : std::acos(dot));
+    dot = -la::dot(edge[0], edge[1]);
+    phi[1] = dot >= 1 ? 0 : (dot <= -1 ? kPi : std::acos(dot));
+    phi[2] = kPi - phi[0] - phi[1];
+
+    // assign weighted sum
+    for (int i : {0, 1, 2}) {
+      AtomicAddVec3(vertNormal[triVerts[i]], phi[i] * triNormal);
+    }
+  }
+};
+
+struct UpdateMeshID {
+  const HashTableD<uint32_t> meshIDold2new;
+
+  void operator()(TriRef& ref) { ref.meshID = meshIDold2new[ref.meshID]; }
+};
+
+struct CoplanarEdge {
+  VecView<std::pair<int, int>> face2face;
+  VecView<double> triArea;
+  VecView<const Halfedge> halfedge;
+  VecView<const vec3> vertPos;
+  VecView<const TriRef> triRef;
+  VecView<const ivec3> triProp;
+  const int numProp;
+  const double epsilon;
+  const double tolerance;
+
+  void operator()(const int edgeIdx) {
+    const Halfedge edge = halfedge[edgeIdx];
+    const Halfedge pair = halfedge[edge.pairedHalfedge];
+    const int edgeFace = edgeIdx / 3;
+    const int pairFace = edge.pairedHalfedge / 3;
+
+    if (triRef[edgeFace].meshID != triRef[pairFace].meshID) return;
+
+    const vec3 base = vertPos[edge.startVert];
+    const int baseNum = edgeIdx - 3 * edgeFace;
+    const int jointNum = edge.pairedHalfedge - 3 * pairFace;
+
+    if (numProp > 0) {
+      if (triProp[edgeFace][baseNum] != triProp[pairFace][Next3(jointNum)] ||
+          triProp[edgeFace][Next3(baseNum)] != triProp[pairFace][jointNum])
+        return;
+    }
+
+    if (!edge.IsForward()) return;
+
+    const int edgeNum = baseNum == 0 ? 2 : baseNum - 1;
+    const int pairNum = jointNum == 0 ? 2 : jointNum - 1;
+    const vec3 jointVec = vertPos[pair.startVert] - base;
+    const vec3 edgeVec =
+        vertPos[halfedge[3 * edgeFace + edgeNum].startVert] - base;
+    const vec3 pairVec =
+        vertPos[halfedge[3 * pairFace + pairNum].startVert] - base;
+
+    const double length = std::max(la::length(jointVec), la::length(edgeVec));
+    const double lengthPair =
+        std::max(la::length(jointVec), la::length(pairVec));
+    vec3 normal = la::cross(jointVec, edgeVec);
+    const double area = la::length(normal);
+    const double areaPair = la::length(la::cross(pairVec, jointVec));
+
+    // make sure we only write this once
+    if (edgeIdx % 3 == 0) triArea[edgeFace] = area;
+    // Don't link degenerate triangles
+    if (area < length * epsilon || areaPair < lengthPair * epsilon) return;
+
+    const double volume = std::abs(la::dot(normal, pairVec));
+    // Only operate on coplanar triangles
+    if (volume > std::max(area, areaPair) * tolerance) return;
+
+    face2face[edgeIdx] = std::make_pair(edgeFace, pairFace);
+  }
+};
+
+struct CheckCoplanarity {
+  VecView<int> comp2tri;
+  VecView<const Halfedge> halfedge;
+  VecView<const vec3> vertPos;
+  std::vector<int>* components;
+  const double tolerance;
+
+  void operator()(int tri) {
+    const int component = (*components)[tri];
+    const int referenceTri =
+        reinterpret_cast<std::atomic<int>*>(&comp2tri[component])
+            ->load(std::memory_order_relaxed);
+    if (referenceTri < 0 || referenceTri == tri) return;
+
+    const vec3 origin = vertPos[halfedge[3 * referenceTri].startVert];
+    const vec3 normal = la::normalize(
+        la::cross(vertPos[halfedge[3 * referenceTri + 1].startVert] - origin,
+                  vertPos[halfedge[3 * referenceTri + 2].startVert] - origin));
+
+    for (const int i : {0, 1, 2}) {
+      const vec3 vert = vertPos[halfedge[3 * tri + i].startVert];
+      // If any component vertex is not coplanar with the component's reference
+      // triangle, unmark the entire component so that none of its triangles are
+      // marked coplanar.
+      if (std::abs(la::dot(normal, vert - origin)) > tolerance) {
+        reinterpret_cast<std::atomic<int>*>(&comp2tri[component])
+            ->store(-1, std::memory_order_relaxed);
+        break;
+      }
+    }
+  }
+};
+
+int GetLabels(std::vector<int>& components,
+              const Vec<std::pair<int, int>>& edges, int numNodes) {
+  UnionFind<> uf(numNodes);
+  for (auto edge : edges) {
+    if (edge.first == -1 || edge.second == -1) continue;
+    uf.unionXY(edge.first, edge.second);
+  }
+
+  return uf.connectedComponents(components);
+}
+
+void DedupePropVerts(manifold::Vec<ivec3>& triProp,
+                     const Vec<std::pair<int, int>>& vert2vert) {
+  ZoneScoped;
+  std::vector<int> vertLabels;
+  const int numLabels = GetLabels(vertLabels, vert2vert, vert2vert.size());
+
+  std::vector<int> label2vert(numLabels);
+  for (size_t v = 0; v < vert2vert.size(); ++v) label2vert[vertLabels[v]] = v;
+  for (auto& prop : triProp)
+    for (int i : {0, 1, 2}) prop[i] = label2vert[vertLabels[prop[i]]];
+}
+}  // namespace
+
+namespace manifold {
+
+std::atomic<uint32_t> Manifold::Impl::meshIDCounter_(1);
+
+uint32_t Manifold::Impl::ReserveIDs(uint32_t n) {
+  return Manifold::Impl::meshIDCounter_.fetch_add(n, std::memory_order_relaxed);
+}
+
+/**
+ * Create either a unit tetrahedron, cube or octahedron. The cube is in the
+ * first octant, while the others are symmetric about the origin.
+ */
+Manifold::Impl::Impl(Shape shape, const mat3x4 m) {
+  std::vector<vec3> vertPos;
+  std::vector<ivec3> triVerts;
+  switch (shape) {
+    case Shape::Tetrahedron:
+      vertPos = {{-1.0, -1.0, 1.0},
+                 {-1.0, 1.0, -1.0},
+                 {1.0, -1.0, -1.0},
+                 {1.0, 1.0, 1.0}};
+      triVerts = {{2, 0, 1}, {0, 3, 1}, {2, 3, 0}, {3, 2, 1}};
+      break;
+    case Shape::Cube:
+      vertPos = {{0.0, 0.0, 0.0},  //
+                 {0.0, 0.0, 1.0},  //
+                 {0.0, 1.0, 0.0},  //
+                 {0.0, 1.0, 1.0},  //
+                 {1.0, 0.0, 0.0},  //
+                 {1.0, 0.0, 1.0},  //
+                 {1.0, 1.0, 0.0},  //
+                 {1.0, 1.0, 1.0}};
+      triVerts = {{1, 0, 4}, {2, 4, 0},  //
+                  {1, 3, 0}, {3, 1, 5},  //
+                  {3, 2, 0}, {3, 7, 2},  //
+                  {5, 4, 6}, {5, 1, 4},  //
+                  {6, 4, 2}, {7, 6, 2},  //
+                  {7, 3, 5}, {7, 5, 6}};
+      break;
+    case Shape::Octahedron:
+      vertPos = {{1.0, 0.0, 0.0},   //
+                 {-1.0, 0.0, 0.0},  //
+                 {0.0, 1.0, 0.0},   //
+                 {0.0, -1.0, 0.0},  //
+                 {0.0, 0.0, 1.0},   //
+                 {0.0, 0.0, -1.0}};
+      triVerts = {{0, 2, 4}, {1, 5, 3},  //
+                  {2, 1, 4}, {3, 5, 0},  //
+                  {1, 3, 4}, {0, 5, 2},  //
+                  {3, 0, 4}, {2, 5, 1}};
+      break;
+  }
+  vertPos_ = vertPos;
+  for (auto& v : vertPos_) v = m * vec4(v, 1.0);
+  CreateHalfedges(triVerts);
+  Finish();
+  InitializeOriginal();
+  CreateFaces();
+}
+
+void Manifold::Impl::RemoveUnreferencedVerts() {
+  ZoneScoped;
+  Vec<int> vertOld2New(NumVert(), 0);
+  auto policy = autoPolicy(NumVert(), 1e5);
+  for_each(policy, halfedge_.cbegin(), halfedge_.cend(),
+           [&vertOld2New](Halfedge h) {
+             reinterpret_cast<std::atomic<int>*>(&vertOld2New[h.startVert])
+                 ->store(1, std::memory_order_relaxed);
+           });
+
+  const Vec<vec3> oldVertPos = vertPos_;
+
+  Vec<size_t> tmpBuffer(oldVertPos.size());
+  auto vertIdIter = TransformIterator(countAt(0_uz), [&vertOld2New](size_t i) {
+    if (vertOld2New[i] > 0) return i;
+    return std::numeric_limits<size_t>::max();
+  });
+
+  auto next =
+      copy_if(vertIdIter, vertIdIter + tmpBuffer.size(), tmpBuffer.begin(),
+              [](size_t v) { return v != std::numeric_limits<size_t>::max(); });
+  if (next == tmpBuffer.end()) return;
+
+  gather(tmpBuffer.begin(), next, oldVertPos.begin(), vertPos_.begin());
+
+  vertPos_.resize(std::distance(tmpBuffer.begin(), next));
+
+  exclusive_scan(vertOld2New.begin(), vertOld2New.end(), vertOld2New.begin());
+
+  for_each(policy, halfedge_.begin(), halfedge_.end(),
+           [&vertOld2New](Halfedge& h) {
+             h.startVert = vertOld2New[h.startVert];
+             h.endVert = vertOld2New[h.endVert];
+           });
+}
+
+void Manifold::Impl::InitializeOriginal(bool keepFaceID) {
+  const int meshID = ReserveIDs(1);
+  meshRelation_.originalID = meshID;
+  auto& triRef = meshRelation_.triRef;
+  triRef.resize(NumTri());
+  for_each_n(autoPolicy(NumTri(), 1e5), countAt(0), NumTri(),
+             [meshID, keepFaceID, &triRef](const int tri) {
+               triRef[tri] = {meshID, meshID, tri,
+                              keepFaceID ? triRef[tri].faceID : tri};
+             });
+  meshRelation_.meshIDtransform.clear();
+  meshRelation_.meshIDtransform[meshID] = {meshID};
+}
+
+void Manifold::Impl::CreateFaces() {
+  ZoneScoped;
+  Vec<std::pair<int, int>> face2face(halfedge_.size(), {-1, -1});
+  Vec<std::pair<int, int>> vert2vert(halfedge_.size(), {-1, -1});
+  Vec<double> triArea(NumTri());
+
+  const size_t numProp = NumProp();
+  if (numProp > 0) {
+    for_each_n(
+        autoPolicy(halfedge_.size(), 1e4), countAt(0), halfedge_.size(),
+        [&vert2vert, numProp, this](const int edgeIdx) {
+          const Halfedge edge = halfedge_[edgeIdx];
+          const Halfedge pair = halfedge_[edge.pairedHalfedge];
+          const int edgeFace = edgeIdx / 3;
+          const int pairFace = edge.pairedHalfedge / 3;
+
+          if (meshRelation_.triRef[edgeFace].meshID !=
+              meshRelation_.triRef[pairFace].meshID)
+            return;
+
+          const int baseNum = edgeIdx - 3 * edgeFace;
+          const int jointNum = edge.pairedHalfedge - 3 * pairFace;
+
+          const int prop0 = meshRelation_.triProperties[edgeFace][baseNum];
+          const int prop1 =
+              meshRelation_
+                  .triProperties[pairFace][jointNum == 2 ? 0 : jointNum + 1];
+          bool propEqual = true;
+          for (size_t p = 0; p < numProp; ++p) {
+            if (meshRelation_.properties[numProp * prop0 + p] !=
+                meshRelation_.properties[numProp * prop1 + p]) {
+              propEqual = false;
+              break;
+            }
+          }
+          if (propEqual) {
+            vert2vert[edgeIdx] = std::make_pair(prop0, prop1);
+          }
+        });
+    DedupePropVerts(meshRelation_.triProperties, vert2vert);
+  }
+
+  for_each_n(autoPolicy(halfedge_.size(), 1e4), countAt(0), halfedge_.size(),
+             CoplanarEdge({face2face, triArea, halfedge_, vertPos_,
+                           meshRelation_.triRef, meshRelation_.triProperties,
+                           meshRelation_.numProp, epsilon_, tolerance_}));
+
+  std::vector<int> components;
+  const int numComponent = GetLabels(components, face2face, NumTri());
+
+  Vec<int> comp2tri(numComponent, -1);
+  for (size_t tri = 0; tri < NumTri(); ++tri) {
+    const int comp = components[tri];
+    const int current = comp2tri[comp];
+    if (current < 0 || triArea[tri] > triArea[current]) {
+      comp2tri[comp] = tri;
+      triArea[comp] = triArea[tri];
+    }
+  }
+
+  for_each_n(autoPolicy(halfedge_.size(), 1e4), countAt(0), NumTri(),
+             CheckCoplanarity(
+                 {comp2tri, halfedge_, vertPos_, &components, tolerance_}));
+
+  Vec<TriRef>& triRef = meshRelation_.triRef;
+  for (size_t tri = 0; tri < NumTri(); ++tri) {
+    const int referenceTri = comp2tri[components[tri]];
+    if (referenceTri >= 0) {
+      triRef[tri].faceID = referenceTri;
+    }
+  }
+}
+
+/**
+ * Create the halfedge_ data structure from an input triVerts array like Mesh.
+ */
+void Manifold::Impl::CreateHalfedges(const Vec<ivec3>& triVerts) {
+  ZoneScoped;
+  const size_t numTri = triVerts.size();
+  const int numHalfedge = 3 * numTri;
+  // drop the old value first to avoid copy
+  halfedge_.resize(0);
+  halfedge_.resize(numHalfedge);
+  Vec<uint64_t> edge(numHalfedge);
+  Vec<int> ids(numHalfedge);
+  auto policy = autoPolicy(numTri, 1e5);
+  sequence(ids.begin(), ids.end());
+  for_each_n(policy, countAt(0), numTri,
+             [this, &edge, &triVerts](const int tri) {
+               const ivec3& verts = triVerts[tri];
+               for (const int i : {0, 1, 2}) {
+                 const int j = (i + 1) % 3;
+                 const int e = 3 * tri + i;
+                 halfedge_[e] = {verts[i], verts[j], -1};
+                 // Sort the forward halfedges in front of the backward ones
+                 // by setting the highest-order bit.
+                 edge[e] = uint64_t(verts[i] < verts[j] ? 1 : 0) << 63 |
+                           ((uint64_t)std::min(verts[i], verts[j])) << 32 |
+                           std::max(verts[i], verts[j]);
+               }
+             });
+  // Stable sort is required here so that halfedges from the same face are
+  // paired together (the triangles were created in face order). In some
+  // degenerate situations the triangulator can add the same internal edge in
+  // two different faces, causing this edge to not be 2-manifold. These are
+  // fixed by duplicating verts in SimplifyTopology.
+  stable_sort(ids.begin(), ids.end(), [&edge](const int& a, const int& b) {
+    return edge[a] < edge[b];
+  });
+
+  // Mark opposed triangles for removal
+  const int numEdge = numHalfedge / 2;
+  for (int i = 0; i < numEdge; ++i) {
+    const int pair0 = ids[i];
+    Halfedge h0 = halfedge_[pair0];
+    int k = i + numEdge;
+    while (1) {
+      const int pair1 = ids[k];
+      Halfedge h1 = halfedge_[pair1];
+      if (h0.startVert != h1.endVert || h0.endVert != h1.startVert) break;
+      if (halfedge_[NextHalfedge(pair0)].endVert ==
+          halfedge_[NextHalfedge(pair1)].endVert) {
+        // Reorder so that remaining edges pair up
+        if (k != i + numEdge) std::swap(ids[i + numEdge], ids[k]);
+        break;
+      }
+      ++k;
+      if (k >= numHalfedge) break;
+    }
+  }
+
+  // Once sorted, the first half of the range is the forward halfedges, which
+  // correspond to their backward pair at the same offset in the second half
+  // of the range.
+  for_each_n(policy, countAt(0), numEdge, [this, &ids, numEdge](int i) {
+    const int pair0 = ids[i];
+    const int pair1 = ids[i + numEdge];
+    halfedge_[pair0].pairedHalfedge = pair1;
+    halfedge_[pair1].pairedHalfedge = pair0;
+  });
+
+  // When opposed triangles are removed, they may strand unreferenced verts.
+  RemoveUnreferencedVerts();
+}
+
+/**
+ * Does a full recalculation of the face bounding boxes, including updating
+ * the collider, but does not resort the faces.
+ */
+void Manifold::Impl::Update() {
+  CalculateBBox();
+  Vec<Box> faceBox;
+  Vec<uint32_t> faceMorton;
+  GetFaceBoxMorton(faceBox, faceMorton);
+  collider_.UpdateBoxes(faceBox);
+}
+
+void Manifold::Impl::MarkFailure(Error status) {
+  bBox_ = Box();
+  vertPos_.resize(0);
+  halfedge_.resize(0);
+  vertNormal_.resize(0);
+  faceNormal_.resize(0);
+  halfedgeTangent_.resize(0);
+  meshRelation_ = MeshRelationD();
+  status_ = status;
+}
+
+void Manifold::Impl::Warp(std::function<void(vec3&)> warpFunc) {
+  WarpBatch([&warpFunc](VecView<vec3> vecs) {
+    for_each(ExecutionPolicy::Seq, vecs.begin(), vecs.end(), warpFunc);
+  });
+}
+
+void Manifold::Impl::WarpBatch(std::function<void(VecView<vec3>)> warpFunc) {
+  warpFunc(vertPos_.view());
+  CalculateBBox();
+  if (!IsFinite()) {
+    MarkFailure(Error::NonFiniteVertex);
+    return;
+  }
+  Update();
+  faceNormal_.resize(0);  // force recalculation of triNormal
+  CalculateNormals();
+  SetEpsilon();
+  Finish();
+  CreateFaces();
+  meshRelation_.originalID = -1;
+}
+
+Manifold::Impl Manifold::Impl::Transform(const mat3x4& transform_) const {
+  ZoneScoped;
+  if (transform_ == mat3x4(la::identity)) return *this;
+  auto policy = autoPolicy(NumVert());
+  Impl result;
+  if (status_ != Manifold::Error::NoError) {
+    result.status_ = status_;
+    return result;
+  }
+  if (!all(la::isfinite(transform_))) {
+    result.MarkFailure(Error::NonFiniteVertex);
+    return result;
+  }
+  result.collider_ = collider_;
+  result.meshRelation_ = meshRelation_;
+  result.epsilon_ = epsilon_;
+  result.tolerance_ = tolerance_;
+  result.bBox_ = bBox_;
+  result.halfedge_ = halfedge_;
+  result.halfedgeTangent_.resize(halfedgeTangent_.size());
+
+  result.meshRelation_.originalID = -1;
+  for (auto& m : result.meshRelation_.meshIDtransform) {
+    m.second.transform = transform_ * Mat4(m.second.transform);
+  }
+
+  result.vertPos_.resize(NumVert());
+  result.faceNormal_.resize(faceNormal_.size());
+  result.vertNormal_.resize(vertNormal_.size());
+  transform(vertPos_.begin(), vertPos_.end(), result.vertPos_.begin(),
+            Transform4x3({transform_}));
+
+  mat3 normalTransform = NormalTransform(transform_);
+  transform(faceNormal_.begin(), faceNormal_.end(), result.faceNormal_.begin(),
+            TransformNormals({normalTransform}));
+  transform(vertNormal_.begin(), vertNormal_.end(), result.vertNormal_.begin(),
+            TransformNormals({normalTransform}));
+
+  const bool invert = la::determinant(mat3(transform_)) < 0;
+
+  if (halfedgeTangent_.size() > 0) {
+    for_each_n(policy, countAt(0), halfedgeTangent_.size(),
+               TransformTangents({result.halfedgeTangent_, 0, mat3(transform_),
+                                  invert, halfedgeTangent_, halfedge_}));
+  }
+
+  if (invert) {
+    for_each_n(policy, countAt(0), result.NumTri(),
+               FlipTris({result.halfedge_}));
+  }
+
+  // This optimization does a cheap collider update if the transform is
+  // axis-aligned.
+  if (!result.collider_.Transform(transform_)) result.Update();
+
+  result.CalculateBBox();
+  // Scale epsilon by the norm of the 3x3 portion of the transform.
+  result.epsilon_ *= SpectralNorm(mat3(transform_));
+  // Maximum of inherited epsilon loss and translational epsilon loss.
+  result.SetEpsilon(result.epsilon_);
+  return result;
+}
+
+/**
+ * Sets epsilon based on the bounding box, and limits its minimum value
+ * by the optional input.
+ */
+void Manifold::Impl::SetEpsilon(double minEpsilon, bool useSingle) {
+  epsilon_ = MaxEpsilon(minEpsilon, bBox_);
+  double minTol = epsilon_;
+  if (useSingle)
+    minTol =
+        std::max(minTol, std::numeric_limits<float>::epsilon() * bBox_.Scale());
+  tolerance_ = std::max(tolerance_, minTol);
+}
+
+/**
+ * If face normals are already present, this function uses them to compute
+ * vertex normals (angle-weighted pseudo-normals); otherwise it also computes
+ * the face normals. Face normals are only calculated when needed because
+ * nearly degenerate faces will accrue rounding error, while the Boolean can
+ * retain their original normal, which is more accurate and can help with
+ * merging coplanar faces.
+ *
+ * If the face normals have been invalidated by an operation like Warp(),
+ * ensure you do faceNormal_.resize(0) before calling this function to force
+ * recalculation.
+ */
+void Manifold::Impl::CalculateNormals() {
+  ZoneScoped;
+  vertNormal_.resize(NumVert());
+  auto policy = autoPolicy(NumTri(), 1e4);
+  fill(vertNormal_.begin(), vertNormal_.end(), vec3(0.0));
+  bool calculateTriNormal = false;
+  if (faceNormal_.size() != NumTri()) {
+    faceNormal_.resize(NumTri());
+    calculateTriNormal = true;
+  }
+  if (calculateTriNormal)
+    for_each_n(
+        policy, countAt(0), NumTri(),
+        AssignNormals<true>({faceNormal_, vertNormal_, vertPos_, halfedge_}));
+  else
+    for_each_n(
+        policy, countAt(0), NumTri(),
+        AssignNormals<false>({faceNormal_, vertNormal_, vertPos_, halfedge_}));
+  for_each(policy, vertNormal_.begin(), vertNormal_.end(),
+           [](vec3& v) { v = SafeNormalize(v); });
+}
+
+/**
+ * Remaps all the contained meshIDs to new unique values to represent new
+ * instances of these meshes.
+ */
+void Manifold::Impl::IncrementMeshIDs() {
+  HashTable<uint32_t> meshIDold2new(meshRelation_.meshIDtransform.size() * 2);
+  // Update keys of the transform map
+  std::map<int, Relation> oldTransforms;
+  std::swap(meshRelation_.meshIDtransform, oldTransforms);
+  const int numMeshIDs = oldTransforms.size();
+  int nextMeshID = ReserveIDs(numMeshIDs);
+  for (const auto& pair : oldTransforms) {
+    meshIDold2new.D().Insert(pair.first, nextMeshID);
+    meshRelation_.meshIDtransform[nextMeshID++] = pair.second;
+  }
+
+  const size_t numTri = NumTri();
+  for_each_n(autoPolicy(numTri, 1e5), meshRelation_.triRef.begin(), numTri,
+             UpdateMeshID({meshIDold2new.D()}));
+}
+
+/**
+ * Returns a sparse array of the bounding box overlaps between the edges of
+ * the input manifold, Q and the faces of this manifold. Returned indices only
+ * point to forward halfedges.
+ */
+SparseIndices Manifold::Impl::EdgeCollisions(const Impl& Q,
+                                             bool inverted) const {
+  ZoneScoped;
+  Vec<TmpEdge> edges = CreateTmpEdges(Q.halfedge_);
+  const size_t numEdge = edges.size();
+  Vec<Box> QedgeBB(numEdge);
+  const auto& vertPos = Q.vertPos_;
+  auto policy = autoPolicy(numEdge, 1e5);
+  for_each_n(
+      policy, countAt(0), numEdge, [&QedgeBB, &edges, &vertPos](const int e) {
+        QedgeBB[e] = Box(vertPos[edges[e].first], vertPos[edges[e].second]);
+      });
+
+  SparseIndices q1p2(0);
+  if (inverted)
+    q1p2 = collider_.Collisions<false, true>(QedgeBB.cview());
+  else
+    q1p2 = collider_.Collisions<false, false>(QedgeBB.cview());
+
+  if (inverted)
+    for_each(policy, countAt(0_uz), countAt(q1p2.size()),
+             ReindexEdge<true>({edges, q1p2}));
+  else
+    for_each(policy, countAt(0_uz), countAt(q1p2.size()),
+             ReindexEdge<false>({edges, q1p2}));
+  return q1p2;
+}
+
+/**
+ * Returns a sparse array of the input vertices that project inside the XY
+ * bounding boxes of the faces of this manifold.
+ */
+SparseIndices Manifold::Impl::VertexCollisionsZ(VecView<const vec3> vertsIn,
+                                                bool inverted) const {
+  ZoneScoped;
+  if (inverted)
+    return collider_.Collisions<false, true>(vertsIn);
+  else
+    return collider_.Collisions<false, false>(vertsIn);
+}
+
+}  // namespace manifold

+ 352 - 0
thirdparty/manifold/src/impl.h

@@ -0,0 +1,352 @@
+// Copyright 2021 The Manifold Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include <map>
+
+#include "./collider.h"
+#include "./shared.h"
+#include "./sparse.h"
+#include "./vec.h"
+#include "manifold/manifold.h"
+#include "manifold/polygon.h"
+
+namespace manifold {
+
+/** @ingroup Private */
+struct Manifold::Impl {
+  struct Relation {
+    int originalID = -1;
+    mat3x4 transform = la::identity;
+    bool backSide = false;
+  };
+  struct MeshRelationD {
+    /// The originalID of this Manifold if it is an original; -1 otherwise.
+    int originalID = -1;
+    int numProp = 0;
+    Vec<double> properties;
+    std::map<int, Relation> meshIDtransform;
+    Vec<TriRef> triRef;
+    Vec<ivec3> triProperties;
+  };
+  struct BaryIndices {
+    int tri, start4, end4;
+  };
+
+  Box bBox_;
+  double epsilon_ = -1;
+  double tolerance_ = -1;
+  Error status_ = Error::NoError;
+  Vec<vec3> vertPos_;
+  Vec<Halfedge> halfedge_;
+  Vec<vec3> vertNormal_;
+  Vec<vec3> faceNormal_;
+  Vec<vec4> halfedgeTangent_;
+  MeshRelationD meshRelation_;
+  Collider collider_;
+
+  static std::atomic<uint32_t> meshIDCounter_;
+  static uint32_t ReserveIDs(uint32_t);
+
+  Impl() {}
+  enum class Shape { Tetrahedron, Cube, Octahedron };
+  Impl(Shape, const mat3x4 = la::identity);
+
+  template <typename Precision, typename I>
+  Impl(const MeshGLP<Precision, I>& meshGL) {
+    const uint32_t numVert = meshGL.NumVert();
+    const uint32_t numTri = meshGL.NumTri();
+
+    if (meshGL.numProp < 3) {
+      MarkFailure(Error::MissingPositionProperties);
+      return;
+    }
+
+    if (meshGL.mergeFromVert.size() != meshGL.mergeToVert.size()) {
+      MarkFailure(Error::MergeVectorsDifferentLengths);
+      return;
+    }
+
+    if (!meshGL.runTransform.empty() &&
+        12 * meshGL.runOriginalID.size() != meshGL.runTransform.size()) {
+      MarkFailure(Error::TransformWrongLength);
+      return;
+    }
+
+    if (!meshGL.runOriginalID.empty() && !meshGL.runIndex.empty() &&
+        meshGL.runOriginalID.size() + 1 != meshGL.runIndex.size() &&
+        meshGL.runOriginalID.size() != meshGL.runIndex.size()) {
+      MarkFailure(Error::RunIndexWrongLength);
+      return;
+    }
+
+    if (!meshGL.faceID.empty() && meshGL.faceID.size() != meshGL.NumTri()) {
+      MarkFailure(Error::FaceIDWrongLength);
+      return;
+    }
+
+    std::vector<int> prop2vert(numVert);
+    std::iota(prop2vert.begin(), prop2vert.end(), 0);
+    for (size_t i = 0; i < meshGL.mergeFromVert.size(); ++i) {
+      const uint32_t from = meshGL.mergeFromVert[i];
+      const uint32_t to = meshGL.mergeToVert[i];
+      if (from >= numVert || to >= numVert) {
+        MarkFailure(Error::MergeIndexOutOfBounds);
+        return;
+      }
+      prop2vert[from] = to;
+    }
+
+    const auto numProp = meshGL.numProp - 3;
+    meshRelation_.numProp = numProp;
+    meshRelation_.properties.resize(meshGL.NumVert() * numProp);
+    tolerance_ = meshGL.tolerance;
+    // This will have unreferenced duplicate positions that will be removed by
+    // Impl::RemoveUnreferencedVerts().
+    vertPos_.resize(meshGL.NumVert());
+
+    for (size_t i = 0; i < meshGL.NumVert(); ++i) {
+      for (const int j : {0, 1, 2})
+        vertPos_[i][j] = meshGL.vertProperties[meshGL.numProp * i + j];
+      for (size_t j = 0; j < numProp; ++j)
+        meshRelation_.properties[i * numProp + j] =
+            meshGL.vertProperties[meshGL.numProp * i + 3 + j];
+    }
+
+    halfedgeTangent_.resize(meshGL.halfedgeTangent.size() / 4);
+    for (size_t i = 0; i < halfedgeTangent_.size(); ++i) {
+      for (const int j : {0, 1, 2, 3})
+        halfedgeTangent_[i][j] = meshGL.halfedgeTangent[4 * i + j];
+    }
+
+    Vec<TriRef> triRef;
+    if (!meshGL.runOriginalID.empty()) {
+      auto runIndex = meshGL.runIndex;
+      const auto runEnd = meshGL.triVerts.size();
+      if (runIndex.empty()) {
+        runIndex = {0, static_cast<I>(runEnd)};
+      } else if (runIndex.size() == meshGL.runOriginalID.size()) {
+        runIndex.push_back(runEnd);
+      }
+      triRef.resize(meshGL.NumTri());
+      const auto startID = Impl::ReserveIDs(meshGL.runOriginalID.size());
+      for (size_t i = 0; i < meshGL.runOriginalID.size(); ++i) {
+        const int meshID = startID + i;
+        const int originalID = meshGL.runOriginalID[i];
+        for (size_t tri = runIndex[i] / 3; tri < runIndex[i + 1] / 3; ++tri) {
+          TriRef& ref = triRef[tri];
+          ref.meshID = meshID;
+          ref.originalID = originalID;
+          ref.tri = meshGL.faceID.empty() ? tri : meshGL.faceID[tri];
+          ref.faceID = tri;
+        }
+
+        if (meshGL.runTransform.empty()) {
+          meshRelation_.meshIDtransform[meshID] = {originalID};
+        } else {
+          const Precision* m = meshGL.runTransform.data() + 12 * i;
+          meshRelation_.meshIDtransform[meshID] = {originalID,
+                                                   {{m[0], m[1], m[2]},
+                                                    {m[3], m[4], m[5]},
+                                                    {m[6], m[7], m[8]},
+                                                    {m[9], m[10], m[11]}}};
+        }
+      }
+    }
+
+    Vec<ivec3> triVerts;
+    triVerts.reserve(numTri);
+    for (size_t i = 0; i < numTri; ++i) {
+      ivec3 tri;
+      for (const size_t j : {0, 1, 2}) {
+        uint32_t vert = (uint32_t)meshGL.triVerts[3 * i + j];
+        if (vert >= numVert) {
+          MarkFailure(Error::VertexOutOfBounds);
+          return;
+        }
+        tri[j] = prop2vert[vert];
+      }
+      if (tri[0] != tri[1] && tri[1] != tri[2] && tri[2] != tri[0]) {
+        triVerts.push_back(tri);
+        if (triRef.size() > 0) {
+          meshRelation_.triRef.push_back(triRef[i]);
+        }
+        if (numProp > 0) {
+          meshRelation_.triProperties.push_back(
+              ivec3(static_cast<uint32_t>(meshGL.triVerts[3 * i]),
+                    static_cast<uint32_t>(meshGL.triVerts[3 * i + 1]),
+                    static_cast<uint32_t>(meshGL.triVerts[3 * i + 2])));
+        }
+      }
+    }
+
+    CreateHalfedges(triVerts);
+    if (!IsManifold()) {
+      MarkFailure(Error::NotManifold);
+      return;
+    }
+
+    CalculateBBox();
+    if (!IsFinite()) {
+      MarkFailure(Error::NonFiniteVertex);
+      return;
+    }
+    SetEpsilon(-1, std::is_same<Precision, float>::value);
+
+    SplitPinchedVerts();
+
+    CalculateNormals();
+
+    if (meshGL.runOriginalID.empty()) {
+      InitializeOriginal();
+    }
+
+    CreateFaces();
+
+    SimplifyTopology();
+    Finish();
+
+    // A Manifold created from an input mesh is never an original - the input is
+    // the original.
+    meshRelation_.originalID = -1;
+  }
+
+  inline void ForVert(int halfedge, std::function<void(int halfedge)> func) {
+    int current = halfedge;
+    do {
+      current = NextHalfedge(halfedge_[current].pairedHalfedge);
+      func(current);
+    } while (current != halfedge);
+  }
+
+  template <typename T>
+  void ForVert(
+      int halfedge, std::function<T(int halfedge)> transform,
+      std::function<void(int halfedge, const T& here, T& next)> binaryOp) {
+    T here = transform(halfedge);
+    int current = halfedge;
+    do {
+      const int nextHalfedge = NextHalfedge(halfedge_[current].pairedHalfedge);
+      T next = transform(nextHalfedge);
+      binaryOp(current, here, next);
+      here = next;
+      current = nextHalfedge;
+    } while (current != halfedge);
+  }
+
+  void CreateFaces();
+  void RemoveUnreferencedVerts();
+  void InitializeOriginal(bool keepFaceID = false);
+  void CreateHalfedges(const Vec<ivec3>& triVerts);
+  void CalculateNormals();
+  void IncrementMeshIDs();
+
+  void Update();
+  void MarkFailure(Error status);
+  void Warp(std::function<void(vec3&)> warpFunc);
+  void WarpBatch(std::function<void(VecView<vec3>)> warpFunc);
+  Impl Transform(const mat3x4& transform) const;
+  SparseIndices EdgeCollisions(const Impl& B, bool inverted = false) const;
+  SparseIndices VertexCollisionsZ(VecView<const vec3> vertsIn,
+                                  bool inverted = false) const;
+
+  bool IsEmpty() const { return NumTri() == 0; }
+  size_t NumVert() const { return vertPos_.size(); }
+  size_t NumEdge() const { return halfedge_.size() / 2; }
+  size_t NumTri() const { return halfedge_.size() / 3; }
+  size_t NumProp() const { return meshRelation_.numProp; }
+  size_t NumPropVert() const {
+    return NumProp() == 0 ? NumVert()
+                          : meshRelation_.properties.size() / NumProp();
+  }
+
+  // properties.cu
+  enum class Property { Volume, SurfaceArea };
+  double GetProperty(Property prop) const;
+  void CalculateCurvature(int gaussianIdx, int meanIdx);
+  void CalculateBBox();
+  bool IsFinite() const;
+  bool IsIndexInBounds(VecView<const ivec3> triVerts) const;
+  void SetEpsilon(double minEpsilon = -1, bool useSingle = false);
+  bool IsManifold() const;
+  bool Is2Manifold() const;
+  bool MatchesTriNormals() const;
+  int NumDegenerateTris() const;
+  double MinGap(const Impl& other, double searchLength) const;
+
+  // sort.cu
+  void Finish();
+  void SortVerts();
+  void ReindexVerts(const Vec<int>& vertNew2Old, size_t numOldVert);
+  void CompactProps();
+  void GetFaceBoxMorton(Vec<Box>& faceBox, Vec<uint32_t>& faceMorton) const;
+  void SortFaces(Vec<Box>& faceBox, Vec<uint32_t>& faceMorton);
+  void GatherFaces(const Vec<int>& faceNew2Old);
+  void GatherFaces(const Impl& old, const Vec<int>& faceNew2Old);
+
+  // face_op.cu
+  void Face2Tri(const Vec<int>& faceEdge, const Vec<TriRef>& halfedgeRef);
+  PolygonsIdx Face2Polygons(VecView<Halfedge>::IterC start,
+                            VecView<Halfedge>::IterC end,
+                            mat2x3 projection) const;
+  Polygons Slice(double height) const;
+  Polygons Project() const;
+
+  // edge_op.cu
+  void CleanupTopology();
+  void SimplifyTopology();
+  void DedupeEdge(int edge);
+  void CollapseEdge(int edge, std::vector<int>& edges);
+  void RecursiveEdgeSwap(int edge, int& tag, std::vector<int>& visited,
+                         std::vector<int>& edgeSwapStack,
+                         std::vector<int>& edges);
+  void RemoveIfFolded(int edge);
+  void PairUp(int edge0, int edge1);
+  void UpdateVert(int vert, int startEdge, int endEdge);
+  void FormLoop(int current, int end);
+  void CollapseTri(const ivec3& triEdge);
+  void SplitPinchedVerts();
+
+  // subdivision.cpp
+  int GetNeighbor(int tri) const;
+  ivec4 GetHalfedges(int tri) const;
+  BaryIndices GetIndices(int halfedge) const;
+  void FillRetainedVerts(Vec<Barycentric>& vertBary) const;
+  Vec<Barycentric> Subdivide(std::function<int(vec3, vec4, vec4)>,
+                             bool = false);
+
+  // smoothing.cpp
+  bool IsInsideQuad(int halfedge) const;
+  bool IsMarkedInsideQuad(int halfedge) const;
+  vec3 GetNormal(int halfedge, int normalIdx) const;
+  vec4 TangentFromNormal(const vec3& normal, int halfedge) const;
+  std::vector<Smoothness> UpdateSharpenedEdges(
+      const std::vector<Smoothness>&) const;
+  Vec<bool> FlatFaces() const;
+  Vec<int> VertFlatFace(const Vec<bool>&) const;
+  Vec<int> VertHalfedge() const;
+  std::vector<Smoothness> SharpenEdges(double minSharpAngle,
+                                       double minSmoothness) const;
+  void SharpenTangent(int halfedge, double smoothness);
+  void SetNormals(int normalIdx, double minSharpAngle);
+  void LinearizeFlatTangents();
+  void DistributeTangents(const Vec<bool>& fixedHalfedges);
+  void CreateTangents(int normalIdx);
+  void CreateTangents(std::vector<Smoothness>);
+  void Refine(std::function<int(vec3, vec4, vec4)>, bool = false);
+
+  // quickhull.cpp
+  void Hull(VecView<vec3> vertPos);
+};
+}  // namespace manifold

+ 1038 - 0
thirdparty/manifold/src/manifold.cpp

@@ -0,0 +1,1038 @@
+// Copyright 2021 The Manifold Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <algorithm>
+#include <map>
+#include <numeric>
+
+#include "./boolean3.h"
+#include "./csg_tree.h"
+#include "./impl.h"
+#include "./parallel.h"
+
+namespace {
+using namespace manifold;
+
+ExecutionParams manifoldParams;
+
+struct UpdateProperties {
+  double* properties;
+  const int numProp;
+  const double* oldProperties;
+  const int numOldProp;
+  const vec3* vertPos;
+  const ivec3* triProperties;
+  const Halfedge* halfedges;
+  std::function<void(double*, vec3, const double*)> propFunc;
+
+  void operator()(int tri) {
+    for (int i : {0, 1, 2}) {
+      const int vert = halfedges[3 * tri + i].startVert;
+      const int propVert = triProperties[tri][i];
+      propFunc(properties + numProp * propVert, vertPos[vert],
+               oldProperties + numOldProp * propVert);
+    }
+  }
+};
+
+Manifold Halfspace(Box bBox, vec3 normal, double originOffset) {
+  normal = la::normalize(normal);
+  Manifold cutter = Manifold::Cube(vec3(2.0), true).Translate({1.0, 0.0, 0.0});
+  double size = la::length(bBox.Center() - normal * originOffset) +
+                0.5 * la::length(bBox.Size());
+  cutter = cutter.Scale(vec3(size)).Translate({originOffset, 0.0, 0.0});
+  double yDeg = degrees(-std::asin(normal.z));
+  double zDeg = degrees(std::atan2(normal.y, normal.x));
+  return cutter.Rotate(0.0, yDeg, zDeg);
+}
+
+template <typename Precision, typename I>
+MeshGLP<Precision, I> GetMeshGLImpl(const manifold::Manifold::Impl& impl,
+                                    int normalIdx) {
+  ZoneScoped;
+  const int numProp = impl.NumProp();
+  const int numVert = impl.NumPropVert();
+  const int numTri = impl.NumTri();
+
+  const bool isOriginal = impl.meshRelation_.originalID >= 0;
+  const bool updateNormals = !isOriginal && normalIdx >= 0;
+
+  MeshGLP<Precision, I> out;
+  out.numProp = 3 + numProp;
+  out.tolerance = impl.tolerance_;
+  if (std::is_same<Precision, float>::value)
+    out.tolerance =
+        std::max(out.tolerance,
+                 static_cast<Precision>(std::numeric_limits<float>::epsilon() *
+                                        impl.bBox_.Scale()));
+  out.triVerts.resize(3 * numTri);
+
+  const int numHalfedge = impl.halfedgeTangent_.size();
+  out.halfedgeTangent.resize(4 * numHalfedge);
+  for (int i = 0; i < numHalfedge; ++i) {
+    const vec4 t = impl.halfedgeTangent_[i];
+    out.halfedgeTangent[4 * i] = t.x;
+    out.halfedgeTangent[4 * i + 1] = t.y;
+    out.halfedgeTangent[4 * i + 2] = t.z;
+    out.halfedgeTangent[4 * i + 3] = t.w;
+  }
+  // Sort the triangles into runs
+  out.faceID.resize(numTri);
+  std::vector<int> triNew2Old(numTri);
+  std::iota(triNew2Old.begin(), triNew2Old.end(), 0);
+  VecView<const TriRef> triRef = impl.meshRelation_.triRef;
+  // Don't sort originals - keep them in order
+  if (!isOriginal) {
+    std::sort(triNew2Old.begin(), triNew2Old.end(), [triRef](int a, int b) {
+      return triRef[a].originalID == triRef[b].originalID
+                 ? triRef[a].meshID < triRef[b].meshID
+                 : triRef[a].originalID < triRef[b].originalID;
+    });
+  }
+
+  std::vector<mat3> runNormalTransform;
+  auto addRun = [updateNormals, isOriginal](
+                    MeshGLP<Precision, I>& out,
+                    std::vector<mat3>& runNormalTransform, int tri,
+                    const manifold::Manifold::Impl::Relation& rel) {
+    out.runIndex.push_back(3 * tri);
+    out.runOriginalID.push_back(rel.originalID);
+    if (updateNormals) {
+      runNormalTransform.push_back(NormalTransform(rel.transform) *
+                                   (rel.backSide ? -1.0 : 1.0));
+    }
+    if (!isOriginal) {
+      for (const int col : {0, 1, 2, 3}) {
+        for (const int row : {0, 1, 2}) {
+          out.runTransform.push_back(rel.transform[col][row]);
+        }
+      }
+    }
+  };
+
+  auto meshIDtransform = impl.meshRelation_.meshIDtransform;
+  int lastID = -1;
+  for (int tri = 0; tri < numTri; ++tri) {
+    const int oldTri = triNew2Old[tri];
+    const auto ref = triRef[oldTri];
+    const int meshID = ref.meshID;
+
+    out.faceID[tri] = ref.tri;
+    for (const int i : {0, 1, 2})
+      out.triVerts[3 * tri + i] = impl.halfedge_[3 * oldTri + i].startVert;
+
+    if (meshID != lastID) {
+      manifold::Manifold::Impl::Relation rel;
+      auto it = meshIDtransform.find(meshID);
+      if (it != meshIDtransform.end()) rel = it->second;
+      addRun(out, runNormalTransform, tri, rel);
+      meshIDtransform.erase(meshID);
+      lastID = meshID;
+    }
+  }
+  // Add runs for originals that did not contribute any faces to the output
+  for (const auto& pair : meshIDtransform) {
+    addRun(out, runNormalTransform, numTri, pair.second);
+  }
+  out.runIndex.push_back(3 * numTri);
+
+  // Early return for no props
+  if (numProp == 0) {
+    out.vertProperties.resize(3 * numVert);
+    for (int i = 0; i < numVert; ++i) {
+      const vec3 v = impl.vertPos_[i];
+      out.vertProperties[3 * i] = v.x;
+      out.vertProperties[3 * i + 1] = v.y;
+      out.vertProperties[3 * i + 2] = v.z;
+    }
+    return out;
+  }
+  // Duplicate verts with different props
+  std::vector<int> vert2idx(impl.NumVert(), -1);
+  std::vector<std::vector<ivec2>> vertPropPair(impl.NumVert());
+  out.vertProperties.reserve(numVert * static_cast<size_t>(out.numProp));
+
+  for (size_t run = 0; run < out.runOriginalID.size(); ++run) {
+    for (size_t tri = out.runIndex[run] / 3; tri < out.runIndex[run + 1] / 3;
+         ++tri) {
+      const ivec3 triProp = impl.meshRelation_.triProperties[triNew2Old[tri]];
+      for (const int i : {0, 1, 2}) {
+        const int prop = triProp[i];
+        const int vert = out.triVerts[3 * tri + i];
+
+        auto& bin = vertPropPair[vert];
+        bool bFound = false;
+        for (const auto& b : bin) {
+          if (b.x == prop) {
+            bFound = true;
+            out.triVerts[3 * tri + i] = b.y;
+            break;
+          }
+        }
+        if (bFound) continue;
+        const int idx = out.vertProperties.size() / out.numProp;
+        out.triVerts[3 * tri + i] = idx;
+        bin.push_back({prop, idx});
+
+        for (int p : {0, 1, 2}) {
+          out.vertProperties.push_back(impl.vertPos_[vert][p]);
+        }
+        for (int p = 0; p < numProp; ++p) {
+          out.vertProperties.push_back(
+              impl.meshRelation_.properties[prop * numProp + p]);
+        }
+
+        if (updateNormals) {
+          vec3 normal;
+          const int start = out.vertProperties.size() - out.numProp;
+          for (int i : {0, 1, 2}) {
+            normal[i] = out.vertProperties[start + 3 + normalIdx + i];
+          }
+          normal = la::normalize(runNormalTransform[run] * normal);
+          for (int i : {0, 1, 2}) {
+            out.vertProperties[start + 3 + normalIdx + i] = normal[i];
+          }
+        }
+
+        if (vert2idx[vert] == -1) {
+          vert2idx[vert] = idx;
+        } else {
+          out.mergeFromVert.push_back(idx);
+          out.mergeToVert.push_back(vert2idx[vert]);
+        }
+      }
+    }
+  }
+  return out;
+}
+}  // namespace
+
+namespace manifold {
+
+/**
+ * Construct an empty Manifold.
+ *
+ */
+Manifold::Manifold() : pNode_{std::make_shared<CsgLeafNode>()} {}
+Manifold::~Manifold() = default;
+Manifold::Manifold(Manifold&&) noexcept = default;
+Manifold& Manifold::operator=(Manifold&&) noexcept = default;
+
+Manifold::Manifold(const Manifold& other) : pNode_(other.pNode_) {}
+
+Manifold::Manifold(std::shared_ptr<CsgNode> pNode) : pNode_(pNode) {}
+
+Manifold::Manifold(std::shared_ptr<Impl> pImpl_)
+    : pNode_(std::make_shared<CsgLeafNode>(pImpl_)) {}
+
+Manifold Manifold::Invalid() {
+  auto pImpl_ = std::make_shared<Impl>();
+  pImpl_->status_ = Error::InvalidConstruction;
+  return Manifold(pImpl_);
+}
+
+Manifold& Manifold::operator=(const Manifold& other) {
+  if (this != &other) {
+    pNode_ = other.pNode_;
+  }
+  return *this;
+}
+
+CsgLeafNode& Manifold::GetCsgLeafNode() const {
+  if (pNode_->GetNodeType() != CsgNodeType::Leaf) {
+    pNode_ = pNode_->ToLeafNode();
+  }
+  return *std::static_pointer_cast<CsgLeafNode>(pNode_);
+}
+
+/**
+ * Convert a MeshGL into a Manifold, retaining its properties and merging only
+ * the positions according to the merge vectors. Will return an empty Manifold
+ * and set an Error Status if the result is not an oriented 2-manifold. Will
+ * collapse degenerate triangles and unnecessary vertices.
+ *
+ * All fields are read, making this structure suitable for a lossless round-trip
+ * of data from GetMeshGL. For multi-material input, use ReserveIDs to set a
+ * unique originalID for each material, and sort the materials into triangle
+ * runs.
+ *
+ * @param meshGL The input MeshGL.
+ */
+Manifold::Manifold(const MeshGL& meshGL)
+    : pNode_(std::make_shared<CsgLeafNode>(std::make_shared<Impl>(meshGL))) {}
+
+/**
+ * Convert a MeshGL into a Manifold, retaining its properties and merging only
+ * the positions according to the merge vectors. Will return an empty Manifold
+ * and set an Error Status if the result is not an oriented 2-manifold. Will
+ * collapse degenerate triangles and unnecessary vertices.
+ *
+ * All fields are read, making this structure suitable for a lossless round-trip
+ * of data from GetMeshGL. For multi-material input, use ReserveIDs to set a
+ * unique originalID for each material, and sort the materials into triangle
+ * runs.
+ *
+ * @param meshGL64 The input MeshGL64.
+ */
+Manifold::Manifold(const MeshGL64& meshGL64)
+    : pNode_(std::make_shared<CsgLeafNode>(std::make_shared<Impl>(meshGL64))) {}
+
+/**
+ * The most complete output of this library, returning a MeshGL that is designed
+ * to easily push into a renderer, including all interleaved vertex properties
+ * that may have been input. It also includes relations to all the input meshes
+ * that form a part of this result and the transforms applied to each.
+ *
+ * @param normalIdx If the original MeshGL inputs that formed this manifold had
+ * properties corresponding to normal vectors, you can specify the first of the
+ * three consecutive property channels forming the (x, y, z) normals, which will
+ * cause this output MeshGL to automatically update these normals according to
+ * the applied transforms and front/back side. normalIdx + 3 must be <=
+ * numProp, and all original MeshGLs must use the same channels for their
+ * normals.
+ */
+MeshGL Manifold::GetMeshGL(int normalIdx) const {
+  const Impl& impl = *GetCsgLeafNode().GetImpl();
+  return GetMeshGLImpl<float, uint32_t>(impl, normalIdx);
+}
+
+/**
+ * The most complete output of this library, returning a MeshGL that is designed
+ * to easily push into a renderer, including all interleaved vertex properties
+ * that may have been input. It also includes relations to all the input meshes
+ * that form a part of this result and the transforms applied to each.
+ *
+ * @param normalIdx If the original MeshGL inputs that formed this manifold had
+ * properties corresponding to normal vectors, you can specify the first of the
+ * three consecutive property channels forming the (x, y, z) normals, which will
+ * cause this output MeshGL to automatically update these normals according to
+ * the applied transforms and front/back side. normalIdx + 3 must be <=
+ * numProp, and all original MeshGLs must use the same channels for their
+ * normals.
+ */
+MeshGL64 Manifold::GetMeshGL64(int normalIdx) const {
+  const Impl& impl = *GetCsgLeafNode().GetImpl();
+  return GetMeshGLImpl<double, uint64_t>(impl, normalIdx);
+}
+
+/**
+ * Does the Manifold have any triangles?
+ */
+bool Manifold::IsEmpty() const { return GetCsgLeafNode().GetImpl()->IsEmpty(); }
+/**
+ * Returns the reason for an input Mesh producing an empty Manifold. This Status
+ * only applies to Manifolds newly-created from an input Mesh - once they are
+ * combined into a new Manifold via operations, the status reverts to NoError,
+ * simply processing the problem mesh as empty. Likewise, empty meshes may still
+ * show NoError, for instance if they are small enough relative to their
+ * tolerance to be collapsed to nothing.
+ */
+Manifold::Error Manifold::Status() const {
+  return GetCsgLeafNode().GetImpl()->status_;
+}
+/**
+ * The number of vertices in the Manifold.
+ */
+size_t Manifold::NumVert() const {
+  return GetCsgLeafNode().GetImpl()->NumVert();
+}
+/**
+ * The number of edges in the Manifold.
+ */
+size_t Manifold::NumEdge() const {
+  return GetCsgLeafNode().GetImpl()->NumEdge();
+}
+/**
+ * The number of triangles in the Manifold.
+ */
+size_t Manifold::NumTri() const { return GetCsgLeafNode().GetImpl()->NumTri(); }
+/**
+ * The number of properties per vertex in the Manifold.
+ */
+size_t Manifold::NumProp() const {
+  return GetCsgLeafNode().GetImpl()->NumProp();
+}
+/**
+ * The number of property vertices in the Manifold. This will always be >=
+ * NumVert, as some physical vertices may be duplicated to account for different
+ * properties on different neighboring triangles.
+ */
+size_t Manifold::NumPropVert() const {
+  return GetCsgLeafNode().GetImpl()->NumPropVert();
+}
+
+/**
+ * Returns the axis-aligned bounding box of all the Manifold's vertices.
+ */
+Box Manifold::BoundingBox() const { return GetCsgLeafNode().GetImpl()->bBox_; }
+
+/**
+ * Returns the epsilon value of this Manifold's vertices, which tracks the
+ * approximate rounding error over all the transforms and operations that have
+ * led to this state. This is the value of &epsilon; defining
+ * [&epsilon;-valid](https://github.com/elalish/manifold/wiki/Manifold-Library#definition-of-%CE%B5-valid).
+ */
+double Manifold::GetEpsilon() const {
+  return GetCsgLeafNode().GetImpl()->epsilon_;
+}
+
+/**
+ * Returns the tolerance value of this Manifold. Triangles that are coplanar
+ * within tolerance tend to be merged and edges shorter than tolerance tend to
+ * be collapsed.
+ */
+double Manifold::GetTolerance() const {
+  return GetCsgLeafNode().GetImpl()->tolerance_;
+}
+
+/**
+ * Return a copy of the manifold with the set tolerance value.
+ * This performs mesh simplification when the tolerance value is increased.
+ */
+Manifold Manifold::SetTolerance(double tolerance) const {
+  auto impl = std::make_shared<Impl>(*GetCsgLeafNode().GetImpl());
+  if (tolerance > impl->tolerance_) {
+    impl->tolerance_ = tolerance;
+    impl->CreateFaces();
+    impl->SimplifyTopology();
+    impl->Finish();
+  } else {
+    // for reducing tolerance, we need to make sure it is still at least
+    // equal to epsilon.
+    impl->tolerance_ = std::max(impl->epsilon_, tolerance);
+  }
+  return Manifold(impl);
+}
+
+/**
+ * The genus is a topological property of the manifold, representing the number
+ * of "handles". A sphere is 0, torus 1, etc. It is only meaningful for a single
+ * mesh, so it is best to call Decompose() first.
+ */
+int Manifold::Genus() const {
+  int chi = NumVert() - NumEdge() + NumTri();
+  return 1 - chi / 2;
+}
+
+/**
+ * Returns the surface area of the manifold.
+ */
+double Manifold::SurfaceArea() const {
+  return GetCsgLeafNode().GetImpl()->GetProperty(Impl::Property::SurfaceArea);
+}
+
+/**
+ * Returns the volume of the manifold.
+ */
+double Manifold::Volume() const {
+  return GetCsgLeafNode().GetImpl()->GetProperty(Impl::Property::Volume);
+}
+
+/**
+ * If this mesh is an original, this returns its meshID that can be referenced
+ * by product manifolds' MeshRelation. If this manifold is a product, this
+ * returns -1.
+ */
+int Manifold::OriginalID() const {
+  return GetCsgLeafNode().GetImpl()->meshRelation_.originalID;
+}
+
+/**
+ * This removes all relations (originalID, faceID, transform) to ancestor meshes
+ * and this new Manifold is marked an original. It also collapses colinear edges
+ * - these don't get collapsed at boundaries where originalID changes, so the
+ * reset may allow flat faces to be further simplified.
+ */
+Manifold Manifold::AsOriginal() const {
+  auto oldImpl = GetCsgLeafNode().GetImpl();
+  if (oldImpl->status_ != Error::NoError) {
+    auto newImpl = std::make_shared<Impl>();
+    newImpl->status_ = oldImpl->status_;
+    return Manifold(std::make_shared<CsgLeafNode>(newImpl));
+  }
+  auto newImpl = std::make_shared<Impl>(*oldImpl);
+  newImpl->InitializeOriginal();
+  newImpl->CreateFaces();
+  newImpl->SimplifyTopology();
+  newImpl->Finish();
+  newImpl->InitializeOriginal(true);
+  return Manifold(std::make_shared<CsgLeafNode>(newImpl));
+}
+
+/**
+ * Returns the first of n sequential new unique mesh IDs for marking sets of
+ * triangles that can be looked up after further operations. Assign to
+ * MeshGL.runOriginalID vector.
+ */
+uint32_t Manifold::ReserveIDs(uint32_t n) {
+  return Manifold::Impl::ReserveIDs(n);
+}
+
+/**
+ * The triangle normal vectors are saved over the course of operations rather
+ * than recalculated to avoid rounding error. This checks that triangles still
+ * match their normal vectors within Precision().
+ */
+bool Manifold::MatchesTriNormals() const {
+  return GetCsgLeafNode().GetImpl()->MatchesTriNormals();
+}
+
+/**
+ * The number of triangles that are colinear within Precision(). This library
+ * attempts to remove all of these, but it cannot always remove all of them
+ * without changing the mesh by too much.
+ */
+size_t Manifold::NumDegenerateTris() const {
+  return GetCsgLeafNode().GetImpl()->NumDegenerateTris();
+}
+
+/**
+ * This is a checksum-style verification of the collider, simply returning the
+ * total number of edge-face bounding box overlaps between this and other.
+ *
+ * @param other A Manifold to overlap with.
+ */
+size_t Manifold::NumOverlaps(const Manifold& other) const {
+  SparseIndices overlaps = GetCsgLeafNode().GetImpl()->EdgeCollisions(
+      *other.GetCsgLeafNode().GetImpl());
+  int num_overlaps = overlaps.size();
+
+  overlaps = other.GetCsgLeafNode().GetImpl()->EdgeCollisions(
+      *GetCsgLeafNode().GetImpl());
+  return num_overlaps + overlaps.size();
+}
+
+/**
+ * Move this Manifold in space. This operation can be chained. Transforms are
+ * combined and applied lazily.
+ *
+ * @param v The vector to add to every vertex.
+ */
+Manifold Manifold::Translate(vec3 v) const {
+  return Manifold(pNode_->Translate(v));
+}
+
+/**
+ * Scale this Manifold in space. This operation can be chained. Transforms are
+ * combined and applied lazily.
+ *
+ * @param v The vector to multiply every vertex by per component.
+ */
+Manifold Manifold::Scale(vec3 v) const { return Manifold(pNode_->Scale(v)); }
+
+/**
+ * Applies an Euler angle rotation to the manifold, first about the X axis, then
+ * Y, then Z, in degrees. We use degrees so that we can minimize rounding error,
+ * and eliminate it completely for any multiples of 90 degrees. Additionally,
+ * more efficient code paths are used to update the manifold when the transforms
+ * only rotate by multiples of 90 degrees. This operation can be chained.
+ * Transforms are combined and applied lazily.
+ *
+ * @param xDegrees First rotation, degrees about the X-axis.
+ * @param yDegrees Second rotation, degrees about the Y-axis.
+ * @param zDegrees Third rotation, degrees about the Z-axis.
+ */
+Manifold Manifold::Rotate(double xDegrees, double yDegrees,
+                          double zDegrees) const {
+  return Manifold(pNode_->Rotate(xDegrees, yDegrees, zDegrees));
+}
+
+/**
+ * Transform this Manifold in space. The first three columns form a 3x3 matrix
+ * transform and the last is a translation vector. This operation can be
+ * chained. Transforms are combined and applied lazily.
+ *
+ * @param m The affine transform matrix to apply to all the vertices.
+ */
+Manifold Manifold::Transform(const mat3x4& m) const {
+  return Manifold(pNode_->Transform(m));
+}
+
+/**
+ * Mirror this Manifold over the plane described by the unit form of the given
+ * normal vector. If the length of the normal is zero, an empty Manifold is
+ * returned. This operation can be chained. Transforms are combined and applied
+ * lazily.
+ *
+ * @param normal The normal vector of the plane to be mirrored over
+ */
+Manifold Manifold::Mirror(vec3 normal) const {
+  if (la::length(normal) == 0.) {
+    return Manifold();
+  }
+  auto n = la::normalize(normal);
+  auto m = mat3x4(mat3(la::identity) - 2.0 * la::outerprod(n, n), vec3());
+  return Manifold(pNode_->Transform(m));
+}
+
+/**
+ * This function does not change the topology, but allows the vertices to be
+ * moved according to any arbitrary input function. It is easy to create a
+ * function that warps a geometrically valid object into one which overlaps, but
+ * that is not checked here, so it is up to the user to choose their function
+ * with discretion.
+ *
+ * @param warpFunc A function that modifies a given vertex position.
+ */
+Manifold Manifold::Warp(std::function<void(vec3&)> warpFunc) const {
+  auto oldImpl = GetCsgLeafNode().GetImpl();
+  if (oldImpl->status_ != Error::NoError) {
+    auto pImpl = std::make_shared<Impl>();
+    pImpl->status_ = oldImpl->status_;
+    return Manifold(std::make_shared<CsgLeafNode>(pImpl));
+  }
+  auto pImpl = std::make_shared<Impl>(*oldImpl);
+  pImpl->Warp(warpFunc);
+  return Manifold(std::make_shared<CsgLeafNode>(pImpl));
+}
+
+/**
+ * Same as Manifold::Warp but calls warpFunc with with
+ * a VecView which is roughly equivalent to std::span
+ * pointing to all vec3 elements to be modified in-place
+ *
+ * @param warpFunc A function that modifies multiple vertex positions.
+ */
+Manifold Manifold::WarpBatch(
+    std::function<void(VecView<vec3>)> warpFunc) const {
+  auto oldImpl = GetCsgLeafNode().GetImpl();
+  if (oldImpl->status_ != Error::NoError) {
+    auto pImpl = std::make_shared<Impl>();
+    pImpl->status_ = oldImpl->status_;
+    return Manifold(std::make_shared<CsgLeafNode>(pImpl));
+  }
+  auto pImpl = std::make_shared<Impl>(*oldImpl);
+  pImpl->WarpBatch(warpFunc);
+  return Manifold(std::make_shared<CsgLeafNode>(pImpl));
+}
+
+/**
+ * Create a new copy of this manifold with updated vertex properties by
+ * supplying a function that takes the existing position and properties as
+ * input. You may specify any number of output properties, allowing creation and
+ * removal of channels. Note: undefined behavior will result if you read past
+ * the number of input properties or write past the number of output properties.
+ *
+ * If propFunc is a nullptr, this function will just set the channel to zeroes.
+ *
+ * @param numProp The new number of properties per vertex.
+ * @param propFunc A function that modifies the properties of a given vertex.
+ */
+Manifold Manifold::SetProperties(
+    int numProp,
+    std::function<void(double* newProp, vec3 position, const double* oldProp)>
+        propFunc) const {
+  auto pImpl = std::make_shared<Impl>(*GetCsgLeafNode().GetImpl());
+  const int oldNumProp = NumProp();
+  const Vec<double> oldProperties = pImpl->meshRelation_.properties;
+
+  auto& triProperties = pImpl->meshRelation_.triProperties;
+  if (numProp == 0) {
+    triProperties.resize(0);
+    pImpl->meshRelation_.properties.resize(0);
+  } else {
+    if (triProperties.size() == 0) {
+      const int numTri = NumTri();
+      triProperties.resize(numTri);
+      for (int i = 0; i < numTri; ++i) {
+        for (const int j : {0, 1, 2}) {
+          triProperties[i][j] = pImpl->halfedge_[3 * i + j].startVert;
+        }
+      }
+      pImpl->meshRelation_.properties = Vec<double>(numProp * NumVert(), 0);
+    } else {
+      pImpl->meshRelation_.properties = Vec<double>(numProp * NumPropVert(), 0);
+    }
+    for_each_n(
+        propFunc == nullptr ? ExecutionPolicy::Par : ExecutionPolicy::Seq,
+        countAt(0), NumTri(),
+        UpdateProperties(
+            {pImpl->meshRelation_.properties.data(), numProp,
+             oldProperties.data(), oldNumProp, pImpl->vertPos_.data(),
+             triProperties.data(), pImpl->halfedge_.data(),
+             propFunc == nullptr ? [](double* newProp, vec3 position,
+                                      const double* oldProp) { *newProp = 0; }
+                                 : propFunc}));
+  }
+
+  pImpl->meshRelation_.numProp = numProp;
+  return Manifold(std::make_shared<CsgLeafNode>(pImpl));
+}
+
+/**
+ * Curvature is the inverse of the radius of curvature, and signed such that
+ * positive is convex and negative is concave. There are two orthogonal
+ * principal curvatures at any point on a manifold, with one maximum and the
+ * other minimum. Gaussian curvature is their product, while mean
+ * curvature is their sum. This approximates them for every vertex and assigns
+ * them as vertex properties on the given channels.
+ *
+ * @param gaussianIdx The property channel index in which to store the Gaussian
+ * curvature. An index < 0 will be ignored (stores nothing). The property set
+ * will be automatically expanded to include the channel index specified.
+ *
+ * @param meanIdx The property channel index in which to store the mean
+ * curvature. An index < 0 will be ignored (stores nothing). The property set
+ * will be automatically expanded to include the channel index specified.
+ */
+Manifold Manifold::CalculateCurvature(int gaussianIdx, int meanIdx) const {
+  auto pImpl = std::make_shared<Impl>(*GetCsgLeafNode().GetImpl());
+  pImpl->CalculateCurvature(gaussianIdx, meanIdx);
+  return Manifold(std::make_shared<CsgLeafNode>(pImpl));
+}
+
+/**
+ * Fills in vertex properties for normal vectors, calculated from the mesh
+ * geometry. Flat faces composed of three or more triangles will remain flat.
+ *
+ * @param normalIdx The property channel in which to store the X
+ * values of the normals. The X, Y, and Z channels will be sequential. The
+ * property set will be automatically expanded such that NumProp will be at
+ * least normalIdx + 3.
+ *
+ * @param minSharpAngle Any edges with angles greater than this value will
+ * remain sharp, getting different normal vector properties on each side of the
+ * edge. By default, no edges are sharp and all normals are shared. With a value
+ * of zero, the model is faceted and all normals match their triangle normals,
+ * but in this case it would be better not to calculate normals at all.
+ */
+Manifold Manifold::CalculateNormals(int normalIdx, double minSharpAngle) const {
+  auto pImpl = std::make_shared<Impl>(*GetCsgLeafNode().GetImpl());
+  pImpl->SetNormals(normalIdx, minSharpAngle);
+  return Manifold(std::make_shared<CsgLeafNode>(pImpl));
+}
+
+/**
+ * Smooths out the Manifold by filling in the halfedgeTangent vectors. The
+ * geometry will remain unchanged until Refine or RefineToLength is called to
+ * interpolate the surface. This version uses the supplied vertex normal
+ * properties to define the tangent vectors. Faces of two coplanar triangles
+ * will be marked as quads, while faces with three or more will be flat.
+ *
+ * @param normalIdx The first property channel of the normals. NumProp must be
+ * at least normalIdx + 3. Any vertex where multiple normals exist and don't
+ * agree will result in a sharp edge.
+ */
+Manifold Manifold::SmoothByNormals(int normalIdx) const {
+  auto pImpl = std::make_shared<Impl>(*GetCsgLeafNode().GetImpl());
+  if (!IsEmpty()) {
+    pImpl->CreateTangents(normalIdx);
+  }
+  return Manifold(std::make_shared<CsgLeafNode>(pImpl));
+}
+
+/**
+ * Smooths out the Manifold by filling in the halfedgeTangent vectors. The
+ * geometry will remain unchanged until Refine or RefineToLength is called to
+ * interpolate the surface. This version uses the geometry of the triangles and
+ * pseudo-normals to define the tangent vectors. Faces of two coplanar triangles
+ * will be marked as quads.
+ *
+ * @param minSharpAngle degrees, default 60. Any edges with angles greater than
+ * this value will remain sharp. The rest will be smoothed to G1 continuity,
+ * with the caveat that flat faces of three or more triangles will always remain
+ * flat. With a value of zero, the model is faceted, but in this case there is
+ * no point in smoothing.
+ *
+ * @param minSmoothness range: 0 - 1, default 0. The smoothness applied to sharp
+ * angles. The default gives a hard edge, while values > 0 will give a small
+ * fillet on these sharp edges. A value of 1 is equivalent to a minSharpAngle of
+ * 180 - all edges will be smooth.
+ */
+Manifold Manifold::SmoothOut(double minSharpAngle, double minSmoothness) const {
+  auto pImpl = std::make_shared<Impl>(*GetCsgLeafNode().GetImpl());
+  if (!IsEmpty()) {
+    if (minSmoothness == 0) {
+      const int numProp = pImpl->meshRelation_.numProp;
+      Vec<double> properties = pImpl->meshRelation_.properties;
+      Vec<ivec3> triProperties = pImpl->meshRelation_.triProperties;
+      pImpl->SetNormals(0, minSharpAngle);
+      pImpl->CreateTangents(0);
+      pImpl->meshRelation_.numProp = numProp;
+      pImpl->meshRelation_.properties.swap(properties);
+      pImpl->meshRelation_.triProperties.swap(triProperties);
+    } else {
+      pImpl->CreateTangents(pImpl->SharpenEdges(minSharpAngle, minSmoothness));
+    }
+  }
+  return Manifold(std::make_shared<CsgLeafNode>(pImpl));
+}
+
+/**
+ * Increase the density of the mesh by splitting every edge into n pieces. For
+ * instance, with n = 2, each triangle will be split into 4 triangles. Quads
+ * will ignore their interior triangle bisector. These will all be coplanar (and
+ * will not be immediately collapsed) unless the Mesh/Manifold has
+ * halfedgeTangents specified (e.g. from the Smooth() constructor), in which
+ * case the new vertices will be moved to the interpolated surface according to
+ * their barycentric coordinates.
+ *
+ * @param n The number of pieces to split every edge into. Must be > 1.
+ */
+Manifold Manifold::Refine(int n) const {
+  auto pImpl = std::make_shared<Impl>(*GetCsgLeafNode().GetImpl());
+  if (n > 1) {
+    pImpl->Refine(
+        [n](vec3 edge, vec4 tangentStart, vec4 tangentEnd) { return n - 1; });
+  }
+  return Manifold(std::make_shared<CsgLeafNode>(pImpl));
+}
+
+/**
+ * Increase the density of the mesh by splitting each edge into pieces of
+ * roughly the input length. Interior verts are added to keep the rest of the
+ * triangulation edges also of roughly the same length. If halfedgeTangents are
+ * present (e.g. from the Smooth() constructor), the new vertices will be moved
+ * to the interpolated surface according to their barycentric coordinates. Quads
+ * will ignore their interior triangle bisector.
+ *
+ * @param length The length that edges will be broken down to.
+ */
+Manifold Manifold::RefineToLength(double length) const {
+  length = std::abs(length);
+  auto pImpl = std::make_shared<Impl>(*GetCsgLeafNode().GetImpl());
+  pImpl->Refine([length](vec3 edge, vec4 tangentStart, vec4 tangentEnd) {
+    return static_cast<int>(la::length(edge) / length);
+  });
+  return Manifold(std::make_shared<CsgLeafNode>(pImpl));
+}
+
+/**
+ * Increase the density of the mesh by splitting each edge into pieces such that
+ * any point on the resulting triangles is roughly within tolerance of the
+ * smoothly curved surface defined by the tangent vectors. This means tightly
+ * curving regions will be divided more finely than smoother regions. If
+ * halfedgeTangents are not present, the result will simply be a copy of the
+ * original. Quads will ignore their interior triangle bisector.
+ *
+ * @param tolerance The desired maximum distance between the faceted mesh
+ * produced and the exact smoothly curving surface. All vertices are exactly on
+ * the surface, within rounding error.
+ */
+Manifold Manifold::RefineToTolerance(double tolerance) const {
+  tolerance = std::abs(tolerance);
+  auto pImpl = std::make_shared<Impl>(*GetCsgLeafNode().GetImpl());
+  if (!pImpl->halfedgeTangent_.empty()) {
+    pImpl->Refine(
+        [tolerance](vec3 edge, vec4 tangentStart, vec4 tangentEnd) {
+          const vec3 edgeNorm = la::normalize(edge);
+          // Weight heuristic
+          const vec3 tStart = vec3(tangentStart);
+          const vec3 tEnd = vec3(tangentEnd);
+          // Perpendicular to edge
+          const vec3 start = tStart - edgeNorm * la::dot(edgeNorm, tStart);
+          const vec3 end = tEnd - edgeNorm * la::dot(edgeNorm, tEnd);
+          // Circular arc result plus heuristic term for non-circular curves
+          const double d = 0.5 * (la::length(start) + la::length(end)) +
+                           la::length(start - end);
+          return static_cast<int>(std::sqrt(3 * d / (4 * tolerance)));
+        },
+        true);
+  }
+  return Manifold(std::make_shared<CsgLeafNode>(pImpl));
+}
+
+/**
+ * The central operation of this library: the Boolean combines two manifolds
+ * into another by calculating their intersections and removing the unused
+ * portions.
+ * [&epsilon;-valid](https://github.com/elalish/manifold/wiki/Manifold-Library#definition-of-%CE%B5-valid)
+ * inputs will produce &epsilon;-valid output. &epsilon;-invalid input may fail
+ * triangulation.
+ *
+ * These operations are optimized to produce nearly-instant results if either
+ * input is empty or their bounding boxes do not overlap.
+ *
+ * @param second The other Manifold.
+ * @param op The type of operation to perform.
+ */
+Manifold Manifold::Boolean(const Manifold& second, OpType op) const {
+  return Manifold(pNode_->Boolean(second.pNode_, op));
+}
+
+/**
+ * Perform the given boolean operation on a list of Manifolds. In case of
+ * Subtract, all Manifolds in the tail are differenced from the head.
+ */
+Manifold Manifold::BatchBoolean(const std::vector<Manifold>& manifolds,
+                                OpType op) {
+  if (manifolds.size() == 0)
+    return Manifold();
+  else if (manifolds.size() == 1)
+    return manifolds[0];
+  std::vector<std::shared_ptr<CsgNode>> children;
+  children.reserve(manifolds.size());
+  for (const auto& m : manifolds) children.push_back(m.pNode_);
+  return Manifold(std::make_shared<CsgOpNode>(children, op));
+}
+
+/**
+ * Shorthand for Boolean Union.
+ */
+Manifold Manifold::operator+(const Manifold& Q) const {
+  return Boolean(Q, OpType::Add);
+}
+
+/**
+ * Shorthand for Boolean Union assignment.
+ */
+Manifold& Manifold::operator+=(const Manifold& Q) {
+  *this = *this + Q;
+  return *this;
+}
+
+/**
+ * Shorthand for Boolean Difference.
+ */
+Manifold Manifold::operator-(const Manifold& Q) const {
+  return Boolean(Q, OpType::Subtract);
+}
+
+/**
+ * Shorthand for Boolean Difference assignment.
+ */
+Manifold& Manifold::operator-=(const Manifold& Q) {
+  *this = *this - Q;
+  return *this;
+}
+
+/**
+ * Shorthand for Boolean Intersection.
+ */
+Manifold Manifold::operator^(const Manifold& Q) const {
+  return Boolean(Q, OpType::Intersect);
+}
+
+/**
+ * Shorthand for Boolean Intersection assignment.
+ */
+Manifold& Manifold::operator^=(const Manifold& Q) {
+  *this = *this ^ Q;
+  return *this;
+}
+
+/**
+ * Split cuts this manifold in two using the cutter manifold. The first result
+ * is the intersection, second is the difference. This is more efficient than
+ * doing them separately.
+ *
+ * @param cutter
+ */
+std::pair<Manifold, Manifold> Manifold::Split(const Manifold& cutter) const {
+  auto impl1 = GetCsgLeafNode().GetImpl();
+  auto impl2 = cutter.GetCsgLeafNode().GetImpl();
+
+  Boolean3 boolean(*impl1, *impl2, OpType::Subtract);
+  auto result1 = std::make_shared<CsgLeafNode>(
+      std::make_unique<Impl>(boolean.Result(OpType::Intersect)));
+  auto result2 = std::make_shared<CsgLeafNode>(
+      std::make_unique<Impl>(boolean.Result(OpType::Subtract)));
+  return std::make_pair(Manifold(result1), Manifold(result2));
+}
+
+/**
+ * Convenient version of Split() for a half-space.
+ *
+ * @param normal This vector is normal to the cutting plane and its length does
+ * not matter. The first result is in the direction of this vector, the second
+ * result is on the opposite side.
+ * @param originOffset The distance of the plane from the origin in the
+ * direction of the normal vector.
+ */
+std::pair<Manifold, Manifold> Manifold::SplitByPlane(
+    vec3 normal, double originOffset) const {
+  return Split(Halfspace(BoundingBox(), normal, originOffset));
+}
+
+/**
+ * Identical to SplitByPlane(), but calculating and returning only the first
+ * result.
+ *
+ * @param normal This vector is normal to the cutting plane and its length does
+ * not matter. The result is in the direction of this vector from the plane.
+ * @param originOffset The distance of the plane from the origin in the
+ * direction of the normal vector.
+ */
+Manifold Manifold::TrimByPlane(vec3 normal, double originOffset) const {
+  return *this ^ Halfspace(BoundingBox(), normal, originOffset);
+}
+
+/**
+ * Returns the cross section of this object parallel to the X-Y plane at the
+ * specified Z height, defaulting to zero. Using a height equal to the bottom of
+ * the bounding box will return the bottom faces, while using a height equal to
+ * the top of the bounding box will return empty.
+ */
+Polygons Manifold::Slice(double height) const {
+  return GetCsgLeafNode().GetImpl()->Slice(height);
+}
+
+/**
+ * Returns polygons representing the projected outline of this object
+ * onto the X-Y plane. These polygons will often self-intersect, so it is
+ * recommended to run them through the positive fill rule of CrossSection to get
+ * a sensible result before using them.
+ */
+Polygons Manifold::Project() const {
+  return GetCsgLeafNode().GetImpl()->Project();
+}
+
+ExecutionParams& ManifoldParams() { return manifoldParams; }
+
+/**
+ * Compute the convex hull of a set of points. If the given points are fewer
+ * than 4, or they are all coplanar, an empty Manifold will be returned.
+ *
+ * @param pts A vector of 3-dimensional points over which to compute a convex
+ * hull.
+ */
+Manifold Manifold::Hull(const std::vector<vec3>& pts) {
+  std::shared_ptr<Impl> impl = std::make_shared<Impl>();
+  impl->Hull(Vec<vec3>(pts));
+  return Manifold(std::make_shared<CsgLeafNode>(impl));
+}
+
+/**
+ * Compute the convex hull of this manifold.
+ */
+Manifold Manifold::Hull() const {
+  std::shared_ptr<Impl> impl = std::make_shared<Impl>();
+  impl->Hull(GetCsgLeafNode().GetImpl()->vertPos_);
+  return Manifold(std::make_shared<CsgLeafNode>(impl));
+}
+
+/**
+ * Compute the convex hull enveloping a set of manifolds.
+ *
+ * @param manifolds A vector of manifolds over which to compute a convex hull.
+ */
+Manifold Manifold::Hull(const std::vector<Manifold>& manifolds) {
+  return Compose(manifolds).Hull();
+}
+
+/**
+ * Returns the minimum gap between two manifolds. Returns a double between
+ * 0 and searchLength.
+ *
+ * @param other The other manifold to compute the minimum gap to.
+ * @param searchLength The maximum distance to search for a minimum gap.
+ */
+double Manifold::MinGap(const Manifold& other, double searchLength) const {
+  auto intersect = *this ^ other;
+  if (!intersect.IsEmpty()) return 0.0;
+
+  return GetCsgLeafNode().GetImpl()->MinGap(*other.GetCsgLeafNode().GetImpl(),
+                                            searchLength);
+}
+}  // namespace manifold

+ 65 - 0
thirdparty/manifold/src/mesh_fixes.h

@@ -0,0 +1,65 @@
+// Copyright 2024 The Manifold Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+#include "./shared.h"
+
+namespace {
+using namespace manifold;
+
+inline int FlipHalfedge(int halfedge) {
+  const int tri = halfedge / 3;
+  const int vert = 2 - (halfedge - 3 * tri);
+  return 3 * tri + vert;
+}
+
+struct TransformNormals {
+  mat3 transform;
+
+  vec3 operator()(vec3 normal) const {
+    normal = la::normalize(transform * normal);
+    if (std::isnan(normal.x)) normal = vec3(0.0);
+    return normal;
+  }
+};
+
+struct TransformTangents {
+  VecView<vec4> tangent;
+  const int edgeOffset;
+  const mat3 transform;
+  const bool invert;
+  VecView<const vec4> oldTangents;
+  VecView<const Halfedge> halfedge;
+
+  void operator()(const int edgeOut) {
+    const int edgeIn =
+        invert ? halfedge[FlipHalfedge(edgeOut)].pairedHalfedge : edgeOut;
+    tangent[edgeOut + edgeOffset] =
+        vec4(transform * vec3(oldTangents[edgeIn]), oldTangents[edgeIn].w);
+  }
+};
+
+struct FlipTris {
+  VecView<Halfedge> halfedge;
+
+  void operator()(const int tri) {
+    std::swap(halfedge[3 * tri], halfedge[3 * tri + 2]);
+
+    for (const int i : {0, 1, 2}) {
+      std::swap(halfedge[3 * tri + i].startVert, halfedge[3 * tri + i].endVert);
+      halfedge[3 * tri + i].pairedHalfedge =
+          FlipHalfedge(halfedge[3 * tri + i].pairedHalfedge);
+    }
+  }
+};
+}  // namespace

+ 1125 - 0
thirdparty/manifold/src/parallel.h

@@ -0,0 +1,1125 @@
+// Copyright 2022 The Manifold Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Simple implementation of selected functions in PSTL.
+// Iterators must be RandomAccessIterator.
+
+#pragma once
+
+#if (MANIFOLD_PAR == 1)
+#include <tbb/combinable.h>
+#include <tbb/parallel_for.h>
+#include <tbb/parallel_invoke.h>
+#include <tbb/parallel_reduce.h>
+#include <tbb/parallel_scan.h>
+#endif
+#include <algorithm>
+#include <numeric>
+
+#include "manifold/iters.h"
+namespace manifold {
+
+enum class ExecutionPolicy {
+  Par,
+  Seq,
+};
+
+constexpr size_t kSeqThreshold = 1e4;
+// ExecutionPolicy:
+// - Sequential for small workload,
+// - Parallel (CPU) for medium workload,
+inline constexpr ExecutionPolicy autoPolicy(size_t size,
+                                            size_t threshold = kSeqThreshold) {
+  if (size <= threshold) {
+    return ExecutionPolicy::Seq;
+  }
+  return ExecutionPolicy::Par;
+}
+
+template <typename Iter,
+          typename Dummy = std::enable_if_t<!std::is_integral_v<Iter>>>
+inline constexpr ExecutionPolicy autoPolicy(Iter first, Iter last,
+                                            size_t threshold = kSeqThreshold) {
+  if (static_cast<size_t>(std::distance(first, last)) <= threshold) {
+    return ExecutionPolicy::Seq;
+  }
+  return ExecutionPolicy::Par;
+}
+
+template <typename InputIter, typename OutputIter>
+void copy(ExecutionPolicy policy, InputIter first, InputIter last,
+          OutputIter d_first);
+template <typename InputIter, typename OutputIter>
+void copy(InputIter first, InputIter last, OutputIter d_first);
+
+#if (MANIFOLD_PAR == 1)
+namespace details {
+using manifold::kSeqThreshold;
+// implementation from
+// https://duvanenko.tech.blog/2018/01/14/parallel-merge/
+// https://github.com/DragonSpit/ParallelAlgorithms
+// note that the ranges are now [p, r) to fit our convention.
+template <typename SrcIter, typename DestIter, typename Comp>
+void mergeRec(SrcIter src, DestIter dest, size_t p1, size_t r1, size_t p2,
+              size_t r2, size_t p3, Comp comp) {
+  size_t length1 = r1 - p1;
+  size_t length2 = r2 - p2;
+  if (length1 < length2) {
+    std::swap(p1, p2);
+    std::swap(r1, r2);
+    std::swap(length1, length2);
+  }
+  if (length1 == 0) return;
+  if (length1 + length2 <= kSeqThreshold) {
+    std::merge(src + p1, src + r1, src + p2, src + r2, dest + p3, comp);
+  } else {
+    size_t q1 = p1 + length1 / 2;
+    size_t q2 =
+        std::distance(src, std::lower_bound(src + p2, src + r2, src[q1], comp));
+    size_t q3 = p3 + (q1 - p1) + (q2 - p2);
+    dest[q3] = src[q1];
+    tbb::parallel_invoke(
+        [=] { mergeRec(src, dest, p1, q1, p2, q2, p3, comp); },
+        [=] { mergeRec(src, dest, q1 + 1, r1, q2, r2, q3 + 1, comp); });
+  }
+}
+
+template <typename SrcIter, typename DestIter, typename Comp>
+void mergeSortRec(SrcIter src, DestIter dest, size_t begin, size_t end,
+                  Comp comp) {
+  size_t numElements = end - begin;
+  if (numElements <= kSeqThreshold) {
+    std::copy(src + begin, src + end, dest + begin);
+    std::stable_sort(dest + begin, dest + end, comp);
+  } else {
+    size_t middle = begin + numElements / 2;
+    tbb::parallel_invoke([=] { mergeSortRec(dest, src, begin, middle, comp); },
+                         [=] { mergeSortRec(dest, src, middle, end, comp); });
+    mergeRec(src, dest, begin, middle, middle, end, begin, comp);
+  }
+}
+
+template <typename T, typename InputIter, typename OutputIter, typename BinOp>
+struct ScanBody {
+  T sum;
+  T identity;
+  BinOp &f;
+  InputIter input;
+  OutputIter output;
+
+  ScanBody(T sum, T identity, BinOp &f, InputIter input, OutputIter output)
+      : sum(sum), identity(identity), f(f), input(input), output(output) {}
+  ScanBody(ScanBody &b, tbb::split)
+      : sum(b.identity),
+        identity(b.identity),
+        f(b.f),
+        input(b.input),
+        output(b.output) {}
+  template <typename Tag>
+  void operator()(const tbb::blocked_range<size_t> &r, Tag) {
+    T temp = sum;
+    for (size_t i = r.begin(); i < r.end(); ++i) {
+      T inputTmp = input[i];
+      if (Tag::is_final_scan()) output[i] = temp;
+      temp = f(temp, inputTmp);
+    }
+    sum = temp;
+  }
+  T get_sum() const { return sum; }
+  void reverse_join(ScanBody &a) { sum = f(a.sum, sum); }
+  void assign(ScanBody &b) { sum = b.sum; }
+};
+
+template <typename InputIter, typename OutputIter, typename P>
+struct CopyIfScanBody {
+  size_t sum;
+  P &pred;
+  InputIter input;
+  OutputIter output;
+
+  CopyIfScanBody(P &pred, InputIter input, OutputIter output)
+      : sum(0), pred(pred), input(input), output(output) {}
+  CopyIfScanBody(CopyIfScanBody &b, tbb::split)
+      : sum(0), pred(b.pred), input(b.input), output(b.output) {}
+  template <typename Tag>
+  void operator()(const tbb::blocked_range<size_t> &r, Tag) {
+    size_t temp = sum;
+    for (size_t i = r.begin(); i < r.end(); ++i) {
+      if (pred(i)) {
+        temp += 1;
+        if (Tag::is_final_scan()) output[temp - 1] = input[i];
+      }
+    }
+    sum = temp;
+  }
+  size_t get_sum() const { return sum; }
+  void reverse_join(CopyIfScanBody &a) { sum = a.sum + sum; }
+  void assign(CopyIfScanBody &b) { sum = b.sum; }
+};
+
+template <typename N, const int K>
+struct Hist {
+  using SizeType = N;
+  static constexpr int k = K;
+  N hist[k][256] = {{0}};
+  void merge(const Hist<N, K> &other) {
+    for (int i = 0; i < k; ++i)
+      for (int j = 0; j < 256; ++j) hist[i][j] += other.hist[i][j];
+  }
+  void prefixSum(N total, bool *canSkip) {
+    for (int i = 0; i < k; ++i) {
+      size_t count = 0;
+      for (int j = 0; j < 256; ++j) {
+        N tmp = hist[i][j];
+        hist[i][j] = count;
+        count += tmp;
+        if (tmp == total) canSkip[i] = true;
+      }
+    }
+  }
+};
+
+template <typename T, typename H>
+void histogram(T *ptr, typename H::SizeType n, H &hist) {
+  auto worker = [](T *ptr, typename H::SizeType n, H &hist) {
+    for (typename H::SizeType i = 0; i < n; ++i)
+      for (int k = 0; k < hist.k; ++k)
+        ++hist.hist[k][(ptr[i] >> (8 * k)) & 0xFF];
+  };
+  if (n < kSeqThreshold) {
+    worker(ptr, n, hist);
+  } else {
+    tbb::combinable<H> store;
+    tbb::parallel_for(
+        tbb::blocked_range<typename H::SizeType>(0, n, kSeqThreshold),
+        [&worker, &store, ptr](const auto &r) {
+          worker(ptr + r.begin(), r.end() - r.begin(), store.local());
+        });
+    store.combine_each([&hist](const H &h) { hist.merge(h); });
+  }
+}
+
+template <typename T, typename H>
+void shuffle(T *src, T *target, typename H::SizeType n, H &hist, int k) {
+  for (typename H::SizeType i = 0; i < n; ++i)
+    target[hist.hist[k][(src[i] >> (8 * k)) & 0xFF]++] = src[i];
+}
+
+template <typename T, typename SizeType>
+bool LSB_radix_sort(T *input, T *tmp, SizeType n) {
+  Hist<SizeType, sizeof(T) / sizeof(char)> hist;
+  if (std::is_sorted(input, input + n)) return false;
+  histogram(input, n, hist);
+  bool canSkip[hist.k] = {0};
+  hist.prefixSum(n, canSkip);
+  T *a = input, *b = tmp;
+  for (int k = 0; k < hist.k; ++k) {
+    if (!canSkip[k]) {
+      shuffle(a, b, n, hist, k);
+      std::swap(a, b);
+    }
+  }
+  return a == tmp;
+}
+
+// LSB radix sort with merge
+template <typename T, typename SizeType>
+struct SortedRange {
+  T *input, *tmp;
+  SizeType offset = 0, length = 0;
+  bool inTmp = false;
+
+  SortedRange(T *input, T *tmp, SizeType offset = 0, SizeType length = 0)
+      : input(input), tmp(tmp), offset(offset), length(length) {}
+  SortedRange(SortedRange<T, SizeType> &r, tbb::split)
+      : input(r.input), tmp(r.tmp) {}
+  // FIXME: no idea why thread sanitizer reports data race here
+#if defined(__has_feature)
+#if __has_feature(thread_sanitizer)
+  __attribute__((no_sanitize("thread")))
+#endif
+#endif
+  void
+  operator()(const tbb::blocked_range<SizeType> &range) {
+    SortedRange<T, SizeType> rhs(input, tmp, range.begin(),
+                                 range.end() - range.begin());
+    rhs.inTmp =
+        LSB_radix_sort(input + rhs.offset, tmp + rhs.offset, rhs.length);
+    if (length == 0)
+      *this = rhs;
+    else
+      join(rhs);
+  }
+  bool swapBuffer() const {
+    T *src = input, *target = tmp;
+    if (inTmp) std::swap(src, target);
+    copy(src + offset, src + offset + length, target + offset);
+    return !inTmp;
+  }
+  void join(const SortedRange<T, SizeType> &rhs) {
+    if (inTmp != rhs.inTmp) {
+      if (length < rhs.length)
+        inTmp = swapBuffer();
+      else
+        rhs.swapBuffer();
+    }
+    T *src = input, *target = tmp;
+    if (inTmp) std::swap(src, target);
+    if (src[offset + length - 1] > src[rhs.offset]) {
+      mergeRec(src, target, offset, offset + length, rhs.offset,
+               rhs.offset + rhs.length, offset, std::less<T>());
+      inTmp = !inTmp;
+    }
+    length += rhs.length;
+  }
+};
+
+template <typename T, typename SizeTy>
+void radix_sort(T *input, SizeTy n) {
+  T *aux = new T[n];
+  SizeTy blockSize = std::max(n / tbb::this_task_arena::max_concurrency() / 4,
+                              static_cast<SizeTy>(kSeqThreshold / sizeof(T)));
+  SortedRange<T, SizeTy> result(input, aux);
+  tbb::parallel_reduce(tbb::blocked_range<SizeTy>(0, n, blockSize), result);
+  if (result.inTmp) copy(aux, aux + n, input);
+  delete[] aux;
+}
+
+template <typename Iterator,
+          typename T = typename std::iterator_traits<Iterator>::value_type,
+          typename Comp = decltype(std::less<T>())>
+void mergeSort(ExecutionPolicy policy, Iterator first, Iterator last,
+               Comp comp) {
+#if (MANIFOLD_PAR == 1)
+  if (policy == ExecutionPolicy::Par) {
+    // apparently this prioritizes threads inside here?
+    tbb::this_task_arena::isolate([&] {
+      size_t length = std::distance(first, last);
+      T *tmp = new T[length];
+      copy(policy, first, last, tmp);
+      details::mergeSortRec(tmp, first, 0, length, comp);
+      delete[] tmp;
+    });
+    return;
+  }
+#endif
+  std::stable_sort(first, last, comp);
+}
+
+// stable_sort using merge sort.
+//
+// For simpler implementation, we do not support types that are not trivially
+// destructable.
+template <typename Iterator,
+          typename T = typename std::iterator_traits<Iterator>::value_type,
+          typename Dummy = void>
+struct SortFunctor {
+  void operator()(ExecutionPolicy policy, Iterator first, Iterator last) {
+    static_assert(
+        std::is_convertible_v<
+            typename std::iterator_traits<Iterator>::iterator_category,
+            std::random_access_iterator_tag>,
+        "You can only parallelize RandomAccessIterator.");
+    static_assert(std::is_trivially_destructible_v<T>,
+                  "Our simple implementation does not support types that are "
+                  "not trivially destructable.");
+    return mergeSort(policy, first, last, std::less<T>());
+  }
+};
+
+// stable_sort specialized with radix sort for integral types.
+// Typically faster than merge sort.
+template <typename Iterator, typename T>
+struct SortFunctor<
+    Iterator, T,
+    std::enable_if_t<
+        std::is_integral_v<T> &&
+        std::is_pointer_v<typename std::iterator_traits<Iterator>::pointer>>> {
+  void operator()(ExecutionPolicy policy, Iterator first, Iterator last) {
+    static_assert(
+        std::is_convertible_v<
+            typename std::iterator_traits<Iterator>::iterator_category,
+            std::random_access_iterator_tag>,
+        "You can only parallelize RandomAccessIterator.");
+    static_assert(std::is_trivially_destructible_v<T>,
+                  "Our simple implementation does not support types that are "
+                  "not trivially destructable.");
+#if (MANIFOLD_PAR == 1)
+    if (policy == ExecutionPolicy::Par) {
+      radix_sort(&*first, static_cast<size_t>(std::distance(first, last)));
+      return;
+    }
+#endif
+    stable_sort(policy, first, last, std::less<T>());
+  }
+};
+
+}  // namespace details
+
+#endif
+
+// Applies the function `f` to each element in the range `[first, last)`
+template <typename Iter, typename F>
+void for_each(ExecutionPolicy policy, Iter first, Iter last, F f) {
+  static_assert(std::is_convertible_v<
+                    typename std::iterator_traits<Iter>::iterator_category,
+                    std::random_access_iterator_tag>,
+                "You can only parallelize RandomAccessIterator.");
+#if (MANIFOLD_PAR == 1)
+  if (policy == ExecutionPolicy::Par) {
+    tbb::parallel_for(tbb::blocked_range<Iter>(first, last),
+                      [&f](const tbb::blocked_range<Iter> &range) {
+                        for (Iter i = range.begin(); i != range.end(); i++)
+                          f(*i);
+                      });
+    return;
+  }
+#endif
+  std::for_each(first, last, f);
+}
+
+// Applies the function `f` to each element in the range `[first, last)`
+template <typename Iter, typename F>
+void for_each_n(ExecutionPolicy policy, Iter first, size_t n, F f) {
+  static_assert(std::is_convertible_v<
+                    typename std::iterator_traits<Iter>::iterator_category,
+                    std::random_access_iterator_tag>,
+                "You can only parallelize RandomAccessIterator.");
+  for_each(policy, first, first + n, f);
+}
+
+// Reduce the range `[first, last)` using a binary operation `f` with an initial
+// value `init`.
+//
+// The binary operation should be commutative and associative. Otherwise, the
+// result is non-deterministic.
+template <typename InputIter, typename BinaryOp,
+          typename T = typename std::iterator_traits<InputIter>::value_type>
+T reduce(ExecutionPolicy policy, InputIter first, InputIter last, T init,
+         BinaryOp f) {
+  static_assert(std::is_convertible_v<
+                    typename std::iterator_traits<InputIter>::iterator_category,
+                    std::random_access_iterator_tag>,
+                "You can only parallelize RandomAccessIterator.");
+#if (MANIFOLD_PAR == 1)
+  if (policy == ExecutionPolicy::Par) {
+    // should we use deterministic reduce here?
+    return tbb::parallel_reduce(
+        tbb::blocked_range<InputIter>(first, last, details::kSeqThreshold),
+        init,
+        [&f](const tbb::blocked_range<InputIter> &range, T value) {
+          return std::reduce(range.begin(), range.end(), value, f);
+        },
+        f);
+  }
+#endif
+  return std::reduce(first, last, init, f);
+}
+
+// Reduce the range `[first, last)` using a binary operation `f` with an initial
+// value `init`.
+//
+// The binary operation should be commutative and associative. Otherwise, the
+// result is non-deterministic.
+template <typename InputIter, typename BinaryOp,
+          typename T = typename std::iterator_traits<InputIter>::value_type>
+T reduce(InputIter first, InputIter last, T init, BinaryOp f) {
+  return reduce(autoPolicy(first, last, 1e5), first, last, init, f);
+}
+
+// Transform and reduce the range `[first, last)` by first applying a unary
+// function `g`, and then combining the results using a binary operation `f`
+// with an initial value `init`.
+//
+// The binary operation should be commutative and associative. Otherwise, the
+// result is non-deterministic.
+template <typename InputIter, typename BinaryOp, typename UnaryOp,
+          typename T = std::invoke_result_t<
+              UnaryOp, typename std::iterator_traits<InputIter>::value_type>>
+T transform_reduce(ExecutionPolicy policy, InputIter first, InputIter last,
+                   T init, BinaryOp f, UnaryOp g) {
+  return reduce(policy, TransformIterator(first, g), TransformIterator(last, g),
+                init, f);
+}
+
+// Transform and reduce the range `[first, last)` by first applying a unary
+// function `g`, and then combining the results using a binary operation `f`
+// with an initial value `init`.
+//
+// The binary operation should be commutative and associative. Otherwise, the
+// result is non-deterministic.
+template <typename InputIter, typename BinaryOp, typename UnaryOp,
+          typename T = std::invoke_result_t<
+              UnaryOp, typename std::iterator_traits<InputIter>::value_type>>
+T transform_reduce(InputIter first, InputIter last, T init, BinaryOp f,
+                   UnaryOp g) {
+  return manifold::reduce(TransformIterator(first, g),
+                          TransformIterator(last, g), init, f);
+}
+
+// Compute the inclusive prefix sum for the range `[first, last)`
+// using the summation operator, and store the result in the range
+// starting from `d_first`.
+//
+// The input range `[first, last)` and
+// the output range `[d_first, d_first + last - first)`
+// must be equal or non-overlapping.
+template <typename InputIter, typename OutputIter,
+          typename T = typename std::iterator_traits<InputIter>::value_type>
+void inclusive_scan(ExecutionPolicy policy, InputIter first, InputIter last,
+                    OutputIter d_first) {
+  static_assert(std::is_convertible_v<
+                    typename std::iterator_traits<InputIter>::iterator_category,
+                    std::random_access_iterator_tag>,
+                "You can only parallelize RandomAccessIterator.");
+  static_assert(
+      std::is_convertible_v<
+          typename std::iterator_traits<OutputIter>::iterator_category,
+          std::random_access_iterator_tag>,
+      "You can only parallelize RandomAccessIterator.");
+#if (MANIFOLD_PAR == 1)
+  if (policy == ExecutionPolicy::Par) {
+    tbb::parallel_scan(
+        tbb::blocked_range<size_t>(0, std::distance(first, last)),
+        static_cast<T>(0),
+        [&](const tbb::blocked_range<size_t> &range, T sum,
+            bool is_final_scan) {
+          T temp = sum;
+          for (size_t i = range.begin(); i < range.end(); ++i) {
+            temp = temp + first[i];
+            if (is_final_scan) d_first[i] = temp;
+          }
+          return temp;
+        },
+        std::plus<T>());
+    return;
+  }
+#endif
+  std::inclusive_scan(first, last, d_first);
+}
+
+// Compute the inclusive prefix sum for the range `[first, last)` using the
+// summation operator, and store the result in the range
+// starting from `d_first`.
+//
+// The input range `[first, last)` and
+// the output range `[d_first, d_first + last - first)`
+// must be equal or non-overlapping.
+template <typename InputIter, typename OutputIter,
+          typename T = typename std::iterator_traits<InputIter>::value_type>
+void inclusive_scan(InputIter first, InputIter last, OutputIter d_first) {
+  return inclusive_scan(autoPolicy(first, last, 1e5), first, last, d_first);
+}
+
+// Compute the inclusive prefix sum for the range `[first, last)` using the
+// binary operator `f`, with initial value `init` and
+// identity element `identity`, and store the result in the range
+// starting from `d_first`.
+//
+// This is different from `exclusive_scan` in the sequential algorithm by
+// requiring an identity element. This is needed so that each block can be
+// scanned in parallel and combined later.
+//
+// The input range `[first, last)` and
+// the output range `[d_first, d_first + last - first)`
+// must be equal or non-overlapping.
+template <typename InputIter, typename OutputIter,
+          typename BinOp = decltype(std::plus<typename std::iterator_traits<
+                                        InputIter>::value_type>()),
+          typename T = typename std::iterator_traits<InputIter>::value_type>
+void exclusive_scan(ExecutionPolicy policy, InputIter first, InputIter last,
+                    OutputIter d_first, T init = static_cast<T>(0),
+                    BinOp f = std::plus<T>(), T identity = static_cast<T>(0)) {
+  static_assert(std::is_convertible_v<
+                    typename std::iterator_traits<InputIter>::iterator_category,
+                    std::random_access_iterator_tag>,
+                "You can only parallelize RandomAccessIterator.");
+  static_assert(
+      std::is_convertible_v<
+          typename std::iterator_traits<OutputIter>::iterator_category,
+          std::random_access_iterator_tag>,
+      "You can only parallelize RandomAccessIterator.");
+#if (MANIFOLD_PAR == 1)
+  if (policy == ExecutionPolicy::Par) {
+    details::ScanBody<T, InputIter, OutputIter, BinOp> body(init, identity, f,
+                                                            first, d_first);
+    tbb::parallel_scan(
+        tbb::blocked_range<size_t>(0, std::distance(first, last)), body);
+    return;
+  }
+#endif
+  std::exclusive_scan(first, last, d_first, init, f);
+}
+
+// Compute the inclusive prefix sum for the range `[first, last)` using the
+// binary operator `f`, with initial value `init` and
+// identity element `identity`, and store the result in the range
+// starting from `d_first`.
+//
+// This is different from `exclusive_scan` in the sequential algorithm by
+// requiring an identity element. This is needed so that each block can be
+// scanned in parallel and combined later.
+//
+// The input range `[first, last)` and
+// the output range `[d_first, d_first + last - first)`
+// must be equal or non-overlapping.
+template <typename InputIter, typename OutputIter,
+          typename BinOp = decltype(std::plus<typename std::iterator_traits<
+                                        InputIter>::value_type>()),
+          typename T = typename std::iterator_traits<InputIter>::value_type>
+void exclusive_scan(InputIter first, InputIter last, OutputIter d_first,
+                    T init = static_cast<T>(0), BinOp f = std::plus<T>(),
+                    T identity = static_cast<T>(0)) {
+  exclusive_scan(autoPolicy(first, last, 1e5), first, last, d_first, init, f,
+                 identity);
+}
+
+// Apply function `f` on the input range `[first, last)` and store the result in
+// the range starting from `d_first`.
+//
+// The input range `[first, last)` and
+// the output range `[d_first, d_first + last - first)`
+// must be equal or non-overlapping.
+template <typename InputIter, typename OutputIter, typename F>
+void transform(ExecutionPolicy policy, InputIter first, InputIter last,
+               OutputIter d_first, F f) {
+  static_assert(std::is_convertible_v<
+                    typename std::iterator_traits<InputIter>::iterator_category,
+                    std::random_access_iterator_tag>,
+                "You can only parallelize RandomAccessIterator.");
+  static_assert(
+      std::is_convertible_v<
+          typename std::iterator_traits<OutputIter>::iterator_category,
+          std::random_access_iterator_tag>,
+      "You can only parallelize RandomAccessIterator.");
+#if (MANIFOLD_PAR == 1)
+  if (policy == ExecutionPolicy::Par) {
+    tbb::parallel_for(tbb::blocked_range<size_t>(
+                          0, static_cast<size_t>(std::distance(first, last))),
+                      [&](const tbb::blocked_range<size_t> &range) {
+                        std::transform(first + range.begin(),
+                                       first + range.end(),
+                                       d_first + range.begin(), f);
+                      });
+    return;
+  }
+#endif
+  std::transform(first, last, d_first, f);
+}
+
+// Apply function `f` on the input range `[first, last)` and store the result in
+// the range starting from `d_first`.
+//
+// The input range `[first, last)` and
+// the output range `[d_first, d_first + last - first)`
+// must be equal or non-overlapping.
+template <typename InputIter, typename OutputIter, typename F>
+void transform(InputIter first, InputIter last, OutputIter d_first, F f) {
+  transform(autoPolicy(first, last, 1e5), first, last, d_first, f);
+}
+
+// Copy the input range `[first, last)` to the output range
+// starting from `d_first`.
+//
+// The input range `[first, last)` and
+// the output range `[d_first, d_first + last - first)`
+// must not overlap.
+template <typename InputIter, typename OutputIter>
+void copy(ExecutionPolicy policy, InputIter first, InputIter last,
+          OutputIter d_first) {
+  static_assert(std::is_convertible_v<
+                    typename std::iterator_traits<InputIter>::iterator_category,
+                    std::random_access_iterator_tag>,
+                "You can only parallelize RandomAccessIterator.");
+  static_assert(
+      std::is_convertible_v<
+          typename std::iterator_traits<OutputIter>::iterator_category,
+          std::random_access_iterator_tag>,
+      "You can only parallelize RandomAccessIterator.");
+#if (MANIFOLD_PAR == 1)
+  if (policy == ExecutionPolicy::Par) {
+    tbb::parallel_for(tbb::blocked_range<size_t>(
+                          0, static_cast<size_t>(std::distance(first, last)),
+                          details::kSeqThreshold),
+                      [&](const tbb::blocked_range<size_t> &range) {
+                        std::copy(first + range.begin(), first + range.end(),
+                                  d_first + range.begin());
+                      });
+    return;
+  }
+#endif
+  std::copy(first, last, d_first);
+}
+
+// Copy the input range `[first, last)` to the output range
+// starting from `d_first`.
+//
+// The input range `[first, last)` and
+// the output range `[d_first, d_first + last - first)`
+// must not overlap.
+template <typename InputIter, typename OutputIter>
+void copy(InputIter first, InputIter last, OutputIter d_first) {
+  copy(autoPolicy(first, last, 1e6), first, last, d_first);
+}
+
+// Copy the input range `[first, first + n)` to the output range
+// starting from `d_first`.
+//
+// The input range `[first, first + n)` and
+// the output range `[d_first, d_first + n)`
+// must not overlap.
+template <typename InputIter, typename OutputIter>
+void copy_n(ExecutionPolicy policy, InputIter first, size_t n,
+            OutputIter d_first) {
+  copy(policy, first, first + n, d_first);
+}
+
+// Copy the input range `[first, first + n)` to the output range
+// starting from `d_first`.
+//
+// The input range `[first, first + n)` and
+// the output range `[d_first, d_first + n)`
+// must not overlap.
+template <typename InputIter, typename OutputIter>
+void copy_n(InputIter first, size_t n, OutputIter d_first) {
+  copy(autoPolicy(n, 1e6), first, first + n, d_first);
+}
+
+// Fill the range `[first, last)` with `value`.
+template <typename OutputIter, typename T>
+void fill(ExecutionPolicy policy, OutputIter first, OutputIter last, T value) {
+  static_assert(
+      std::is_convertible_v<
+          typename std::iterator_traits<OutputIter>::iterator_category,
+          std::random_access_iterator_tag>,
+      "You can only parallelize RandomAccessIterator.");
+#if (MANIFOLD_PAR == 1)
+  if (policy == ExecutionPolicy::Par) {
+    tbb::parallel_for(tbb::blocked_range<OutputIter>(first, last),
+                      [&](const tbb::blocked_range<OutputIter> &range) {
+                        std::fill(range.begin(), range.end(), value);
+                      });
+    return;
+  }
+#endif
+  std::fill(first, last, value);
+}
+
+// Fill the range `[first, last)` with `value`.
+template <typename OutputIter, typename T>
+void fill(OutputIter first, OutputIter last, T value) {
+  fill(autoPolicy(first, last, 5e5), first, last, value);
+}
+
+// Count the number of elements in the input range `[first, last)` satisfying
+// predicate `pred`, i.e. `pred(x) == true`.
+template <typename InputIter, typename P>
+size_t count_if(ExecutionPolicy policy, InputIter first, InputIter last,
+                P pred) {
+#if (MANIFOLD_PAR == 1)
+  if (policy == ExecutionPolicy::Par) {
+    return reduce(policy, TransformIterator(first, pred),
+                  TransformIterator(last, pred), 0, std::plus<size_t>());
+  }
+#endif
+  return std::count_if(first, last, pred);
+}
+
+// Count the number of elements in the input range `[first, last)` satisfying
+// predicate `pred`, i.e. `pred(x) == true`.
+template <typename InputIter, typename P>
+size_t count_if(InputIter first, InputIter last, P pred) {
+  return count_if(autoPolicy(first, last, 1e4), first, last, pred);
+}
+
+// Check if all elements in the input range `[first, last)` satisfy
+// predicate `pred`, i.e. `pred(x) == true`.
+template <typename InputIter, typename P>
+bool all_of(ExecutionPolicy policy, InputIter first, InputIter last, P pred) {
+  static_assert(std::is_convertible_v<
+                    typename std::iterator_traits<InputIter>::iterator_category,
+                    std::random_access_iterator_tag>,
+                "You can only parallelize RandomAccessIterator.");
+#if (MANIFOLD_PAR == 1)
+  if (policy == ExecutionPolicy::Par) {
+    // should we use deterministic reduce here?
+    return tbb::parallel_reduce(
+        tbb::blocked_range<InputIter>(first, last), true,
+        [&](const tbb::blocked_range<InputIter> &range, bool value) {
+          if (!value) return false;
+          for (InputIter i = range.begin(); i != range.end(); i++)
+            if (!pred(*i)) return false;
+          return true;
+        },
+        [](bool a, bool b) { return a && b; });
+  }
+#endif
+  return std::all_of(first, last, pred);
+}
+
+// Check if all elements in the input range `[first, last)` satisfy
+// predicate `pred`, i.e. `pred(x) == true`.
+template <typename InputIter, typename P>
+bool all_of(InputIter first, InputIter last, P pred) {
+  return all_of(autoPolicy(first, last, 1e5), first, last, pred);
+}
+
+// Copy values in the input range `[first, last)` to the output range
+// starting from `d_first` that satisfies the predicate `pred`,
+// i.e. `pred(x) == true`, and returns `d_first + n` where `n` is the number of
+// times the predicate is evaluated to true.
+//
+// This function is stable, meaning that the relative order of elements in the
+// output range remains unchanged.
+//
+// The input range `[first, last)` and
+// the output range `[d_first, d_first + last - first)`
+// must not overlap.
+template <typename InputIter, typename OutputIter, typename P>
+OutputIter copy_if(ExecutionPolicy policy, InputIter first, InputIter last,
+                   OutputIter d_first, P pred) {
+  static_assert(std::is_convertible_v<
+                    typename std::iterator_traits<InputIter>::iterator_category,
+                    std::random_access_iterator_tag>,
+                "You can only parallelize RandomAccessIterator.");
+  static_assert(
+      std::is_convertible_v<
+          typename std::iterator_traits<OutputIter>::iterator_category,
+          std::random_access_iterator_tag>,
+      "You can only parallelize RandomAccessIterator.");
+#if (MANIFOLD_PAR == 1)
+  if (policy == ExecutionPolicy::Par) {
+    auto pred2 = [&](size_t i) { return pred(first[i]); };
+    details::CopyIfScanBody body(pred2, first, d_first);
+    tbb::parallel_scan(
+        tbb::blocked_range<size_t>(0, std::distance(first, last)), body);
+    return d_first + body.get_sum();
+  }
+#endif
+  return std::copy_if(first, last, d_first, pred);
+}
+
+// Copy values in the input range `[first, last)` to the output range
+// starting from `d_first` that satisfies the predicate `pred`, i.e. `pred(x) ==
+// true`, and returns `d_first + n` where `n` is the number of times the
+// predicate is evaluated to true.
+//
+// This function is stable, meaning that the relative order of elements in the
+// output range remains unchanged.
+//
+// The input range `[first, last)` and
+// the output range `[d_first, d_first + last - first)`
+// must not overlap.
+template <typename InputIter, typename OutputIter, typename P>
+OutputIter copy_if(InputIter first, InputIter last, OutputIter d_first,
+                   P pred) {
+  return copy_if(autoPolicy(first, last, 1e5), first, last, d_first, pred);
+}
+
+// Remove values in the input range `[first, last)` that satisfies
+// the predicate `pred`, i.e. `pred(x) == true`, and returns `first + n`
+// where `n` is the number of times the predicate is evaluated to false.
+//
+// This function is stable, meaning that the relative order of elements that
+// remained are unchanged.
+//
+// Only trivially destructable types are supported.
+template <typename Iter, typename P,
+          typename T = typename std::iterator_traits<Iter>::value_type>
+Iter remove_if(ExecutionPolicy policy, Iter first, Iter last, P pred) {
+  static_assert(std::is_convertible_v<
+                    typename std::iterator_traits<Iter>::iterator_category,
+                    std::random_access_iterator_tag>,
+                "You can only parallelize RandomAccessIterator.");
+  static_assert(std::is_trivially_destructible_v<T>,
+                "Our simple implementation does not support types that are "
+                "not trivially destructable.");
+#if (MANIFOLD_PAR == 1)
+  if (policy == ExecutionPolicy::Par) {
+    T *tmp = new T[std::distance(first, last)];
+    auto back =
+        copy_if(policy, first, last, tmp, [&](T v) { return !pred(v); });
+    copy(policy, tmp, back, first);
+    auto d = std::distance(tmp, back);
+    delete[] tmp;
+    return first + d;
+  }
+#endif
+  return std::remove_if(first, last, pred);
+}
+
+// Remove values in the input range `[first, last)` that satisfies
+// the predicate `pred`, i.e. `pred(x) == true`, and
+// returns `first + n` where `n` is the number of times the predicate is
+// evaluated to false.
+//
+// This function is stable, meaning that the relative order of elements that
+// remained are unchanged.
+//
+// Only trivially destructable types are supported.
+template <typename Iter, typename P,
+          typename T = typename std::iterator_traits<Iter>::value_type>
+Iter remove_if(Iter first, Iter last, P pred) {
+  return remove_if(autoPolicy(first, last, 1e4), first, last, pred);
+}
+
+// Remove values in the input range `[first, last)` that are equal to `value`.
+// Returns `first + n` where `n` is the number of values
+// that are not equal to `value`.
+//
+// This function is stable, meaning that the relative order of elements that
+// remained are unchanged.
+//
+// Only trivially destructable types are supported.
+template <typename Iter,
+          typename T = typename std::iterator_traits<Iter>::value_type>
+Iter remove(ExecutionPolicy policy, Iter first, Iter last, T value) {
+  static_assert(std::is_convertible_v<
+                    typename std::iterator_traits<Iter>::iterator_category,
+                    std::random_access_iterator_tag>,
+                "You can only parallelize RandomAccessIterator.");
+  static_assert(std::is_trivially_destructible_v<T>,
+                "Our simple implementation does not support types that are "
+                "not trivially destructable.");
+#if (MANIFOLD_PAR == 1)
+  if (policy == ExecutionPolicy::Par) {
+    T *tmp = new T[std::distance(first, last)];
+    auto back =
+        copy_if(policy, first, last, tmp, [&](T v) { return v != value; });
+    copy(policy, tmp, back, first);
+    auto d = std::distance(tmp, back);
+    delete[] tmp;
+    return first + d;
+  }
+#endif
+  return std::remove(first, last, value);
+}
+
+// Remove values in the input range `[first, last)` that are equal to `value`.
+// Returns `first + n` where `n` is the number of values
+// that are not equal to `value`.
+//
+// This function is stable, meaning that the relative order of elements that
+// remained are unchanged.
+//
+// Only trivially destructable types are supported.
+template <typename Iter,
+          typename T = typename std::iterator_traits<Iter>::value_type>
+Iter remove(Iter first, Iter last, T value) {
+  return remove(autoPolicy(first, last, 1e4), first, last, value);
+}
+
+// For each group of consecutive elements in the range `[first, last)` with the
+// same value, unique removes all but the first element of the group. The return
+// value is an iterator `new_last` such that no two consecutive elements in the
+// range `[first, new_last)` are equal.
+//
+// This function is stable, meaning that the relative order of elements that
+// remained are unchanged.
+//
+// Only trivially destructable types are supported.
+template <typename Iter,
+          typename T = typename std::iterator_traits<Iter>::value_type>
+Iter unique(ExecutionPolicy policy, Iter first, Iter last) {
+  static_assert(std::is_convertible_v<
+                    typename std::iterator_traits<Iter>::iterator_category,
+                    std::random_access_iterator_tag>,
+                "You can only parallelize RandomAccessIterator.");
+  static_assert(std::is_trivially_destructible_v<T>,
+                "Our simple implementation does not support types that are "
+                "not trivially destructable.");
+#if (MANIFOLD_PAR == 1)
+  if (policy == ExecutionPolicy::Par && first != last) {
+    Iter newSrcStart = first;
+    // cap the maximum buffer size, proved to be beneficial for unique with huge
+    // array size
+    constexpr size_t MAX_BUFFER_SIZE = 1 << 16;
+    T *tmp = new T[std::min(MAX_BUFFER_SIZE,
+                            static_cast<size_t>(std::distance(first, last)))];
+    auto pred = [&](size_t i) { return tmp[i] != tmp[i + 1]; };
+    do {
+      size_t length =
+          std::min(MAX_BUFFER_SIZE,
+                   static_cast<size_t>(std::distance(newSrcStart, last)));
+      copy(policy, newSrcStart, newSrcStart + length, tmp);
+      *first = *newSrcStart;
+      // this is not a typo, the index i is offset by 1, so to compare an
+      // element with its predecessor we need to compare i and i + 1.
+      details::CopyIfScanBody body(pred, tmp + 1, first + 1);
+      tbb::parallel_scan(tbb::blocked_range<size_t>(0, length - 1), body);
+      first += body.get_sum() + 1;
+      newSrcStart += length;
+    } while (newSrcStart != last);
+    delete[] tmp;
+    return first;
+  }
+#endif
+  return std::unique(first, last);
+}
+
+// For each group of consecutive elements in the range `[first, last)` with the
+// same value, unique removes all but the first element of the group. The return
+// value is an iterator `new_last` such that no two consecutive elements in the
+// range `[first, new_last)` are equal.
+//
+// This function is stable, meaning that the relative order of elements that
+// remained are unchanged.
+//
+// Only trivially destructable types are supported.
+template <typename Iter,
+          typename T = typename std::iterator_traits<Iter>::value_type>
+Iter unique(Iter first, Iter last) {
+  return unique(autoPolicy(first, last, 1e4), first, last);
+}
+
+// Sort the input range `[first, last)` in ascending order.
+//
+// This function is stable, meaning that the relative order of elements that are
+// incomparable remains unchanged.
+//
+// Only trivially destructable types are supported.
+template <typename Iterator,
+          typename T = typename std::iterator_traits<Iterator>::value_type>
+void stable_sort(ExecutionPolicy policy, Iterator first, Iterator last) {
+#if (MANIFOLD_PAR == 1)
+  details::SortFunctor<Iterator, T>()(policy, first, last);
+#else
+  std::stable_sort(first, last);
+#endif
+}
+
+// Sort the input range `[first, last)` in ascending order.
+//
+// This function is stable, meaning that the relative order of elements that are
+// incomparable remains unchanged.
+//
+// Only trivially destructable types are supported.
+template <typename Iterator,
+          typename T = typename std::iterator_traits<Iterator>::value_type>
+void stable_sort(Iterator first, Iterator last) {
+  stable_sort(autoPolicy(first, last, 1e4), first, last);
+}
+
+// Sort the input range `[first, last)` in ascending order using the comparison
+// function `comp`.
+//
+// This function is stable, meaning that the relative order of elements that are
+// incomparable remains unchanged.
+//
+// Only trivially destructable types are supported.
+template <typename Iterator,
+          typename T = typename std::iterator_traits<Iterator>::value_type,
+          typename Comp = decltype(std::less<T>())>
+void stable_sort(ExecutionPolicy policy, Iterator first, Iterator last,
+                 Comp comp) {
+#if (MANIFOLD_PAR == 1)
+  details::mergeSort(policy, first, last, comp);
+#else
+  std::stable_sort(first, last, comp);
+#endif
+}
+
+// Sort the input range `[first, last)` in ascending order using the comparison
+// function `comp`.
+//
+// This function is stable, meaning that the relative order of elements that are
+// incomparable remains unchanged.
+//
+// Only trivially destructable types are supported.
+template <typename Iterator,
+          typename T = typename std::iterator_traits<Iterator>::value_type,
+          typename Comp = decltype(std::less<T>())>
+void stable_sort(Iterator first, Iterator last, Comp comp) {
+  stable_sort(autoPolicy(first, last, 1e4), first, last, comp);
+}
+
+// `scatter` copies elements from a source range into an output array according
+// to a map. For each iterator `i` in the range `[first, last)`, the value `*i`
+// is assigned to `outputFirst[mapFirst[i - first]]`.  If the same index appears
+// more than once in the range `[mapFirst, mapFirst + (last - first))`, the
+// result is undefined.
+//
+// The map range, input range and the output range must not overlap.
+template <typename InputIterator1, typename InputIterator2,
+          typename OutputIterator>
+void scatter(ExecutionPolicy policy, InputIterator1 first, InputIterator1 last,
+             InputIterator2 mapFirst, OutputIterator outputFirst) {
+  for_each(policy, countAt(0),
+           countAt(static_cast<size_t>(std::distance(first, last))),
+           [first, mapFirst, outputFirst](size_t i) {
+             outputFirst[mapFirst[i]] = first[i];
+           });
+}
+
+// `scatter` copies elements from a source range into an output array according
+// to a map. For each iterator `i` in the range `[first, last)`, the value `*i`
+// is assigned to `outputFirst[mapFirst[i - first]]`. If the same index appears
+// more than once in the range `[mapFirst, mapFirst + (last - first))`,
+// the result is undefined.
+//
+// The map range, input range and the output range must not overlap.
+template <typename InputIterator1, typename InputIterator2,
+          typename OutputIterator>
+void scatter(InputIterator1 first, InputIterator1 last, InputIterator2 mapFirst,
+             OutputIterator outputFirst) {
+  scatter(autoPolicy(first, last, 1e5), first, last, mapFirst, outputFirst);
+}
+
+// `gather` copies elements from a source array into a destination range
+// according to a map. For each input iterator `i`
+// in the range `[mapFirst, mapLast)`, the value `inputFirst[*i]`
+// is assigned to `outputFirst[i - map_first]`.
+//
+// The map range, input range and the output range must not overlap.
+template <typename InputIterator, typename RandomAccessIterator,
+          typename OutputIterator>
+void gather(ExecutionPolicy policy, InputIterator mapFirst,
+            InputIterator mapLast, RandomAccessIterator inputFirst,
+            OutputIterator outputFirst) {
+  for_each(policy, countAt(0),
+           countAt(static_cast<size_t>(std::distance(mapFirst, mapLast))),
+           [mapFirst, inputFirst, outputFirst](size_t i) {
+             outputFirst[i] = inputFirst[mapFirst[i]];
+           });
+}
+
+// `gather` copies elements from a source array into a destination range
+// according to a map. For each input iterator `i`
+// in the range `[mapFirst, mapLast)`, the value `inputFirst[*i]`
+// is assigned to `outputFirst[i - map_first]`.
+//
+// The map range, input range and the output range must not overlap.
+template <typename InputIterator, typename RandomAccessIterator,
+          typename OutputIterator>
+void gather(InputIterator mapFirst, InputIterator mapLast,
+            RandomAccessIterator inputFirst, OutputIterator outputFirst) {
+  gather(autoPolicy(std::distance(mapFirst, mapLast), 1e5), mapFirst, mapLast,
+         inputFirst, outputFirst);
+}
+
+// Write `[0, last - first)` to the range `[first, last)`.
+template <typename Iterator>
+void sequence(ExecutionPolicy policy, Iterator first, Iterator last) {
+  for_each(policy, countAt(0),
+           countAt(static_cast<size_t>(std::distance(first, last))),
+           [first](size_t i) { first[i] = i; });
+}
+
+// Write `[0, last - first)` to the range `[first, last)`.
+template <typename Iterator>
+void sequence(Iterator first, Iterator last) {
+  sequence(autoPolicy(first, last, 1e5), first, last);
+}
+
+}  // namespace manifold

+ 1010 - 0
thirdparty/manifold/src/polygon.cpp

@@ -0,0 +1,1010 @@
+// Copyright 2021 The Manifold Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "manifold/polygon.h"
+
+#include <functional>
+#include <map>
+#include <set>
+
+#include "./collider.h"
+#include "./parallel.h"
+#include "./utils.h"
+#include "manifold/optional_assert.h"
+
+namespace {
+using namespace manifold;
+
+static ExecutionParams params;
+
+constexpr double kBest = -std::numeric_limits<double>::infinity();
+
+// it seems that MSVC cannot optimize la::determinant(mat2(a, b))
+constexpr double determinant2x2(vec2 a, vec2 b) {
+  return a.x * b.y - a.y * b.x;
+}
+
+#ifdef MANIFOLD_DEBUG
+struct PolyEdge {
+  int startVert, endVert;
+};
+
+std::vector<PolyEdge> Polygons2Edges(const PolygonsIdx &polys) {
+  std::vector<PolyEdge> halfedges;
+  for (const auto &poly : polys) {
+    for (size_t i = 1; i < poly.size(); ++i) {
+      halfedges.push_back({poly[i - 1].idx, poly[i].idx});
+    }
+    halfedges.push_back({poly.back().idx, poly[0].idx});
+  }
+  return halfedges;
+}
+
+std::vector<PolyEdge> Triangles2Edges(const std::vector<ivec3> &triangles) {
+  std::vector<PolyEdge> halfedges;
+  halfedges.reserve(triangles.size() * 3);
+  for (const ivec3 &tri : triangles) {
+    halfedges.push_back({tri[0], tri[1]});
+    halfedges.push_back({tri[1], tri[2]});
+    halfedges.push_back({tri[2], tri[0]});
+  }
+  return halfedges;
+}
+
+void CheckTopology(const std::vector<PolyEdge> &halfedges) {
+  DEBUG_ASSERT(halfedges.size() % 2 == 0, topologyErr,
+               "Odd number of halfedges.");
+  size_t n_edges = halfedges.size() / 2;
+  std::vector<PolyEdge> forward(halfedges.size()), backward(halfedges.size());
+
+  auto end = std::copy_if(halfedges.begin(), halfedges.end(), forward.begin(),
+                          [](PolyEdge e) { return e.endVert > e.startVert; });
+  DEBUG_ASSERT(
+      static_cast<size_t>(std::distance(forward.begin(), end)) == n_edges,
+      topologyErr, "Half of halfedges should be forward.");
+  forward.resize(n_edges);
+
+  end = std::copy_if(halfedges.begin(), halfedges.end(), backward.begin(),
+                     [](PolyEdge e) { return e.endVert < e.startVert; });
+  DEBUG_ASSERT(
+      static_cast<size_t>(std::distance(backward.begin(), end)) == n_edges,
+      topologyErr, "Half of halfedges should be backward.");
+  backward.resize(n_edges);
+
+  std::for_each(backward.begin(), backward.end(),
+                [](PolyEdge &e) { std::swap(e.startVert, e.endVert); });
+  auto cmp = [](const PolyEdge &a, const PolyEdge &b) {
+    return a.startVert < b.startVert ||
+           (a.startVert == b.startVert && a.endVert < b.endVert);
+  };
+  std::stable_sort(forward.begin(), forward.end(), cmp);
+  std::stable_sort(backward.begin(), backward.end(), cmp);
+  for (size_t i = 0; i < n_edges; ++i) {
+    DEBUG_ASSERT(forward[i].startVert == backward[i].startVert &&
+                     forward[i].endVert == backward[i].endVert,
+                 topologyErr, "Not manifold.");
+  }
+}
+
+void CheckTopology(const std::vector<ivec3> &triangles,
+                   const PolygonsIdx &polys) {
+  std::vector<PolyEdge> halfedges = Triangles2Edges(triangles);
+  std::vector<PolyEdge> openEdges = Polygons2Edges(polys);
+  for (PolyEdge e : openEdges) {
+    halfedges.push_back({e.endVert, e.startVert});
+  }
+  CheckTopology(halfedges);
+}
+
+void CheckGeometry(const std::vector<ivec3> &triangles,
+                   const PolygonsIdx &polys, double epsilon) {
+  std::unordered_map<int, vec2> vertPos;
+  for (const auto &poly : polys) {
+    for (size_t i = 0; i < poly.size(); ++i) {
+      vertPos[poly[i].idx] = poly[i].pos;
+    }
+  }
+  DEBUG_ASSERT(std::all_of(triangles.begin(), triangles.end(),
+                           [&vertPos, epsilon](const ivec3 &tri) {
+                             return CCW(vertPos[tri[0]], vertPos[tri[1]],
+                                        vertPos[tri[2]], epsilon) >= 0;
+                           }),
+               geometryErr, "triangulation is not entirely CCW!");
+}
+
+void Dump(const PolygonsIdx &polys, double epsilon) {
+  std::cout << "Polygon 0 " << epsilon << " " << polys.size() << std::endl;
+  for (auto poly : polys) {
+    std::cout << poly.size() << std::endl;
+    for (auto v : poly) {
+      std::cout << v.pos.x << " " << v.pos.y << std::endl;
+    }
+  }
+  std::cout << "# ... " << std::endl;
+  for (auto poly : polys) {
+    std::cout << "show(array([" << std::endl;
+    for (auto v : poly) {
+      std::cout << "  [" << v.pos.x << ", " << v.pos.y << "]," << std::endl;
+    }
+    std::cout << "]))" << std::endl;
+  }
+}
+
+void PrintFailure(const std::exception &e, const PolygonsIdx &polys,
+                  std::vector<ivec3> &triangles, double epsilon) {
+  std::cout << "-----------------------------------" << std::endl;
+  std::cout << "Triangulation failed! Precision = " << epsilon << std::endl;
+  std::cout << e.what() << std::endl;
+  if (triangles.size() > 1000 && !PolygonParams().verbose) {
+    std::cout << "Output truncated due to producing " << triangles.size()
+              << " triangles." << std::endl;
+    return;
+  }
+  Dump(polys, epsilon);
+  std::cout << "produced this triangulation:" << std::endl;
+  for (size_t j = 0; j < triangles.size(); ++j) {
+    std::cout << triangles[j][0] << ", " << triangles[j][1] << ", "
+              << triangles[j][2] << std::endl;
+  }
+}
+
+#define PRINT(msg) \
+  if (params.verbose) std::cout << msg << std::endl;
+#else
+#define PRINT(msg)
+#endif
+
+/**
+ * Tests if the input polygons are convex by searching for any reflex vertices.
+ * Exactly colinear edges and zero-length edges are treated conservatively as
+ * reflex. Does not check for overlaps.
+ */
+bool IsConvex(const PolygonsIdx &polys, double epsilon) {
+  for (const SimplePolygonIdx &poly : polys) {
+    const vec2 firstEdge = poly[0].pos - poly[poly.size() - 1].pos;
+    // Zero-length edges comes out NaN, which won't trip the early return, but
+    // it's okay because that zero-length edge will also get tested
+    // non-normalized and will trip det == 0.
+    vec2 lastEdge = la::normalize(firstEdge);
+    for (size_t v = 0; v < poly.size(); ++v) {
+      const vec2 edge =
+          v + 1 < poly.size() ? poly[v + 1].pos - poly[v].pos : firstEdge;
+      const double det = determinant2x2(lastEdge, edge);
+      if (det <= 0 || (std::abs(det) < epsilon && la::dot(lastEdge, edge) < 0))
+        return false;
+      lastEdge = la::normalize(edge);
+    }
+  }
+  return true;
+}
+
+/**
+ * Triangulates a set of convex polygons by alternating instead of a fan, to
+ * avoid creating high-degree vertices.
+ */
+std::vector<ivec3> TriangulateConvex(const PolygonsIdx &polys) {
+  const size_t numTri = manifold::transform_reduce(
+      polys.begin(), polys.end(), 0_uz,
+      [](size_t a, size_t b) { return a + b; },
+      [](const SimplePolygonIdx &poly) { return poly.size() - 2; });
+  std::vector<ivec3> triangles;
+  triangles.reserve(numTri);
+  for (const SimplePolygonIdx &poly : polys) {
+    size_t i = 0;
+    size_t k = poly.size() - 1;
+    bool right = true;
+    while (i + 1 < k) {
+      const size_t j = right ? i + 1 : k - 1;
+      triangles.push_back({poly[i].idx, poly[j].idx, poly[k].idx});
+      if (right) {
+        i = j;
+      } else {
+        k = j;
+      }
+      right = !right;
+    }
+  }
+  return triangles;
+}
+
+/**
+ * Ear-clipping triangulator based on David Eberly's approach from Geometric
+ * Tools, but adjusted to handle epsilon-valid polygons, and including a
+ * fallback that ensures a manifold triangulation even for overlapping polygons.
+ * This is an O(n^2) algorithm, but hopefully this is not a big problem as the
+ * number of edges in a given polygon is generally much less than the number of
+ * triangles in a mesh, and relatively few faces even need triangulation.
+ *
+ * The main adjustments for robustness involve clipping the sharpest ears first
+ * (a known technique to get higher triangle quality), and doing an exhaustive
+ * search to determine ear convexity exactly if the first geometric result is
+ * within epsilon.
+ */
+
+class EarClip {
+ public:
+  EarClip(const PolygonsIdx &polys, double epsilon) : epsilon_(epsilon) {
+    ZoneScoped;
+
+    size_t numVert = 0;
+    for (const SimplePolygonIdx &poly : polys) {
+      numVert += poly.size();
+    }
+    polygon_.reserve(numVert + 2 * polys.size());
+
+    std::vector<VertItr> starts = Initialize(polys);
+
+    for (VertItr v = polygon_.begin(); v != polygon_.end(); ++v) {
+      ClipIfDegenerate(v);
+    }
+
+    for (const VertItr first : starts) {
+      FindStart(first);
+    }
+  }
+
+  std::vector<ivec3> Triangulate() {
+    ZoneScoped;
+
+    for (const VertItr start : holes_) {
+      CutKeyhole(start);
+    }
+
+    for (const VertItr start : simples_) {
+      TriangulatePoly(start);
+    }
+
+    return triangles_;
+  }
+
+  double GetPrecision() const { return epsilon_; }
+
+ private:
+  struct Vert;
+  typedef std::vector<Vert>::iterator VertItr;
+  typedef std::vector<Vert>::const_iterator VertItrC;
+  struct MaxX {
+    bool operator()(const VertItr &a, const VertItr &b) const {
+      return a->pos.x > b->pos.x;
+    }
+  };
+  struct MinCost {
+    bool operator()(const VertItr &a, const VertItr &b) const {
+      return a->cost < b->cost;
+    }
+  };
+  typedef std::set<VertItr, MinCost>::iterator qItr;
+
+  // The flat list where all the Verts are stored. Not used much for traversal.
+  std::vector<Vert> polygon_;
+  // The set of right-most starting points, one for each negative-area contour.
+  std::multiset<VertItr, MaxX> holes_;
+  // The set of starting points, one for each positive-area contour.
+  std::vector<VertItr> outers_;
+  // The set of starting points, one for each simple polygon.
+  std::vector<VertItr> simples_;
+  // Maps each hole (by way of starting point) to its bounding box.
+  std::map<VertItr, Rect> hole2BBox_;
+  // A priority queue of valid ears - the multiset allows them to be updated.
+  std::multiset<VertItr, MinCost> earsQueue_;
+  // The output triangulation.
+  std::vector<ivec3> triangles_;
+  // Bounding box of the entire set of polygons
+  Rect bBox_;
+  // Working epsilon: max of float error and input value.
+  double epsilon_;
+
+  struct IdxCollider {
+    Collider collider;
+    std::vector<VertItr> itr;
+    SparseIndices ind;
+  };
+
+  // A circularly-linked list representing the polygon(s) that still need to be
+  // triangulated. This gets smaller as ears are clipped until it degenerates to
+  // two points and terminates.
+  struct Vert {
+    int mesh_idx;
+    double cost;
+    qItr ear;
+    vec2 pos, rightDir;
+    VertItr left, right;
+
+    // Shorter than half of epsilon, to be conservative so that it doesn't
+    // cause CW triangles that exceed epsilon due to rounding error.
+    bool IsShort(double epsilon) const {
+      const vec2 edge = right->pos - pos;
+      return la::dot(edge, edge) * 4 < epsilon * epsilon;
+    }
+
+    // Like CCW, returns 1 if v is on the inside of the angle formed at this
+    // vert, -1 on the outside, and 0 if it's within epsilon of the boundary.
+    // Ensure v is more than epsilon from pos, as case this will not return 0.
+    int Interior(vec2 v, double epsilon) const {
+      const vec2 diff = v - pos;
+      if (la::dot(diff, diff) < epsilon * epsilon) {
+        return 0;
+      }
+      return CCW(pos, left->pos, right->pos, epsilon) +
+             CCW(pos, right->pos, v, epsilon) + CCW(pos, v, left->pos, epsilon);
+    }
+
+    // Returns true if Vert is on the inside of the edge that goes from tail to
+    // tail->right. This will walk the edges if necessary until a clear answer
+    // is found (beyond epsilon). If toLeft is true, this Vert will walk its
+    // edges to the left. This should be chosen so that the edges walk in the
+    // same general direction - tail always walks to the right.
+    bool InsideEdge(VertItr tail, double epsilon, bool toLeft) const {
+      const double p2 = epsilon * epsilon;
+      VertItr nextL = left->right;
+      VertItr nextR = tail->right;
+      VertItr center = tail;
+      VertItr last = center;
+
+      while (nextL != nextR && tail != nextR &&
+             nextL != (toLeft ? right : left)) {
+        const vec2 edgeL = nextL->pos - center->pos;
+        const double l2 = la::dot(edgeL, edgeL);
+        if (l2 <= p2) {
+          nextL = toLeft ? nextL->left : nextL->right;
+          continue;
+        }
+
+        const vec2 edgeR = nextR->pos - center->pos;
+        const double r2 = la::dot(edgeR, edgeR);
+        if (r2 <= p2) {
+          nextR = nextR->right;
+          continue;
+        }
+
+        const vec2 vecLR = nextR->pos - nextL->pos;
+        const double lr2 = la::dot(vecLR, vecLR);
+        if (lr2 <= p2) {
+          last = center;
+          center = nextL;
+          nextL = toLeft ? nextL->left : nextL->right;
+          if (nextL == nextR) break;
+          nextR = nextR->right;
+          continue;
+        }
+
+        int convexity = CCW(nextL->pos, center->pos, nextR->pos, epsilon);
+        if (center != last) {
+          convexity += CCW(last->pos, center->pos, nextL->pos, epsilon) +
+                       CCW(nextR->pos, center->pos, last->pos, epsilon);
+        }
+        if (convexity != 0) return convexity > 0;
+
+        if (l2 < r2) {
+          center = nextL;
+          nextL = toLeft ? nextL->left : nextL->right;
+        } else {
+          center = nextR;
+          nextR = nextR->right;
+        }
+        last = center;
+      }
+      // The whole polygon is degenerate - consider this to be convex.
+      return true;
+    }
+
+    // A major key to robustness is to only clip convex ears, but this is
+    // difficult to determine when an edge is folded back on itself. This
+    // function walks down the kinks in a degenerate portion of a polygon until
+    // it finds a clear geometric result. In the vast majority of cases the loop
+    // will only need one or two iterations.
+    bool IsConvex(double epsilon) const {
+      const int convexity = CCW(left->pos, pos, right->pos, epsilon);
+      if (convexity != 0) {
+        return convexity > 0;
+      }
+      if (la::dot(left->pos - pos, right->pos - pos) <= 0) {
+        return true;
+      }
+      return left->InsideEdge(left->right, epsilon, true);
+    }
+
+    // Subtly different from !IsConvex because IsConvex will return true for
+    // colinear non-folded verts, while IsReflex will always check until actual
+    // certainty is determined.
+    bool IsReflex(double epsilon) const {
+      return !left->InsideEdge(left->right, epsilon, true);
+    }
+
+    // Returns the x-value on this edge corresponding to the start.y value,
+    // returning NAN if the edge does not cross the value from below to above,
+    // right of start - all within a epsilon tolerance. If onTop != 0, this
+    // restricts which end is allowed to terminate within the epsilon band.
+    double InterpY2X(vec2 start, int onTop, double epsilon) const {
+      if (la::abs(pos.y - start.y) <= epsilon) {
+        if (right->pos.y <= start.y + epsilon || onTop == 1) {
+          return NAN;
+        } else {
+          return pos.x;
+        }
+      } else if (pos.y < start.y - epsilon) {
+        if (right->pos.y > start.y + epsilon) {
+          return pos.x + (start.y - pos.y) * (right->pos.x - pos.x) /
+                             (right->pos.y - pos.y);
+        } else if (right->pos.y < start.y - epsilon || onTop == -1) {
+          return NAN;
+        } else {
+          return right->pos.x;
+        }
+      } else {
+        return NAN;
+      }
+    }
+
+    // This finds the cost of this vert relative to one of the two closed sides
+    // of the ear. Points are valid even when they touch, so long as their edge
+    // goes to the outside. No need to check the other side, since all verts are
+    // processed in the EarCost loop.
+    double SignedDist(VertItr v, vec2 unit, double epsilon) const {
+      double d = determinant2x2(unit, v->pos - pos);
+      if (std::abs(d) < epsilon) {
+        double dR = determinant2x2(unit, v->right->pos - pos);
+        if (std::abs(dR) > epsilon) return dR;
+        double dL = determinant2x2(unit, v->left->pos - pos);
+        if (std::abs(dL) > epsilon) return dL;
+      }
+      return d;
+    }
+
+    // Find the cost of Vert v within this ear, where openSide is the unit
+    // vector from Verts right to left - passed in for reuse.
+    double Cost(VertItr v, vec2 openSide, double epsilon) const {
+      double cost = std::min(SignedDist(v, rightDir, epsilon),
+                             SignedDist(v, left->rightDir, epsilon));
+
+      const double openCost = determinant2x2(openSide, v->pos - right->pos);
+      return std::min(cost, openCost);
+    }
+
+    // For verts outside the ear, apply a cost based on the Delaunay condition
+    // to aid in prioritization and produce cleaner triangulations. This doesn't
+    // affect robustness, but may be adjusted to improve output.
+    static double DelaunayCost(vec2 diff, double scale, double epsilon) {
+      return -epsilon - scale * la::dot(diff, diff);
+    }
+
+    // This is the expensive part of the algorithm, checking this ear against
+    // every Vert to ensure none are inside. The Collider brings the total
+    // triangulator cost down from O(n^2) to O(nlogn) for most large polygons.
+    //
+    // Think of a cost as vaguely a distance metric - 0 is right on the edge of
+    // being invalid. cost > epsilon is definitely invalid. Cost < -epsilon
+    // is definitely valid, so all improvement costs are designed to always give
+    // values < -epsilon so they will never affect validity. The first
+    // totalCost is designed to give priority to sharper angles. Any cost < (-1
+    // - epsilon) has satisfied the Delaunay condition.
+    double EarCost(double epsilon, IdxCollider &collider) const {
+      vec2 openSide = left->pos - right->pos;
+      const vec2 center = 0.5 * (left->pos + right->pos);
+      const double scale = 4 / la::dot(openSide, openSide);
+      const double radius = la::length(openSide) / 2;
+      openSide = la::normalize(openSide);
+
+      double totalCost = la::dot(left->rightDir, rightDir) - 1 - epsilon;
+      if (CCW(pos, left->pos, right->pos, epsilon) == 0) {
+        // Clip folded ears first
+        return totalCost;
+      }
+
+      Box earBox = Box{vec3(center.x - radius, center.y - radius, 0),
+                       vec3(center.x + radius, center.y + radius, 0)};
+      earBox.Union(vec3(pos, 0));
+      collider.collider.Collisions(VecView<const Box>(&earBox, 1),
+                                   collider.ind);
+
+      const int lid = left->mesh_idx;
+      const int rid = right->mesh_idx;
+
+      totalCost = transform_reduce(
+          countAt(0), countAt(collider.ind.size()), totalCost,
+          [](double a, double b) { return std::max(a, b); },
+          [&](size_t i) {
+            const VertItr test = collider.itr[collider.ind.Get(i, true)];
+            if (!Clipped(test) && test->mesh_idx != mesh_idx &&
+                test->mesh_idx != lid &&
+                test->mesh_idx != rid) {  // Skip duplicated verts
+              double cost = Cost(test, openSide, epsilon);
+              if (cost < -epsilon) {
+                cost = DelaunayCost(test->pos - center, scale, epsilon);
+              }
+              return cost;
+            }
+            return std::numeric_limits<double>::lowest();
+          });
+      collider.ind.Clear();
+      return totalCost;
+    }
+
+    void PrintVert() const {
+#ifdef MANIFOLD_DEBUG
+      if (!params.verbose) return;
+      std::cout << "vert: " << mesh_idx << ", left: " << left->mesh_idx
+                << ", right: " << right->mesh_idx << ", cost: " << cost
+                << std::endl;
+#endif
+    }
+  };
+
+  static vec2 SafeNormalize(vec2 v) {
+    vec2 n = la::normalize(v);
+    return std::isfinite(n.x) ? n : vec2(0, 0);
+  }
+
+  // This function and JoinPolygons are the only functions that affect the
+  // circular list data structure. This helps ensure it remains circular.
+  static void Link(VertItr left, VertItr right) {
+    left->right = right;
+    right->left = left;
+    left->rightDir = SafeNormalize(right->pos - left->pos);
+  }
+
+  // When an ear vert is clipped, its neighbors get linked, so they get unlinked
+  // from it, but it is still linked to them.
+  static bool Clipped(VertItr v) { return v->right->left != v; }
+
+  // Apply func to each un-clipped vert in a polygon and return an un-clipped
+  // vert.
+  VertItrC Loop(VertItr first, std::function<void(VertItr)> func) const {
+    VertItr v = first;
+    do {
+      if (Clipped(v)) {
+        // Update first to an un-clipped vert so we will return to it instead
+        // of infinite-looping.
+        first = v->right->left;
+        if (!Clipped(first)) {
+          v = first;
+          if (v->right == v->left) {
+            return polygon_.end();
+          }
+          func(v);
+        }
+      } else {
+        if (v->right == v->left) {
+          return polygon_.end();
+        }
+        func(v);
+      }
+      v = v->right;
+    } while (v != first);
+    return v;
+  }
+
+  // Remove this vert from the circular list and output a corresponding
+  // triangle.
+  void ClipEar(VertItrC ear) {
+    Link(ear->left, ear->right);
+    if (ear->left->mesh_idx != ear->mesh_idx &&
+        ear->mesh_idx != ear->right->mesh_idx &&
+        ear->right->mesh_idx != ear->left->mesh_idx) {
+      // Filter out topological degenerates, which can form in bad
+      // triangulations of polygons with holes, due to vert duplication.
+      triangles_.push_back(
+          {ear->left->mesh_idx, ear->mesh_idx, ear->right->mesh_idx});
+    } else {
+      PRINT("Topological degenerate!");
+    }
+  }
+
+  // If an ear will make a degenerate triangle, clip it early to avoid
+  // difficulty in key-holing. This function is recursive, as the process of
+  // clipping may cause the neighbors to degenerate. Reflex degenerates *must
+  // not* be clipped, unless they have a short edge.
+  void ClipIfDegenerate(VertItr ear) {
+    if (Clipped(ear)) {
+      return;
+    }
+    if (ear->left == ear->right) {
+      return;
+    }
+    if (ear->IsShort(epsilon_) ||
+        (CCW(ear->left->pos, ear->pos, ear->right->pos, epsilon_) == 0 &&
+         la::dot(ear->left->pos - ear->pos, ear->right->pos - ear->pos) > 0 &&
+         ear->IsConvex(epsilon_))) {
+      ClipEar(ear);
+      ClipIfDegenerate(ear->left);
+      ClipIfDegenerate(ear->right);
+    }
+  }
+
+  // Build the circular list polygon structures.
+  std::vector<VertItr> Initialize(const PolygonsIdx &polys) {
+    std::vector<VertItr> starts;
+    for (const SimplePolygonIdx &poly : polys) {
+      auto vert = poly.begin();
+      polygon_.push_back({vert->idx, 0.0, earsQueue_.end(), vert->pos});
+      const VertItr first = std::prev(polygon_.end());
+
+      bBox_.Union(first->pos);
+      VertItr last = first;
+      // This is not the real rightmost start, but just an arbitrary vert for
+      // now to identify each polygon.
+      starts.push_back(first);
+
+      for (++vert; vert != poly.end(); ++vert) {
+        bBox_.Union(vert->pos);
+
+        polygon_.push_back({vert->idx, 0.0, earsQueue_.end(), vert->pos});
+        VertItr next = std::prev(polygon_.end());
+
+        Link(last, next);
+        last = next;
+      }
+      Link(last, first);
+    }
+
+    if (epsilon_ < 0) epsilon_ = bBox_.Scale() * kPrecision;
+
+    // Slightly more than enough, since each hole can cause two extra triangles.
+    triangles_.reserve(polygon_.size() + 2 * starts.size());
+    return starts;
+  }
+
+  // Find the actual rightmost starts after degenerate removal. Also calculate
+  // the polygon bounding boxes.
+  void FindStart(VertItr first) {
+    const vec2 origin = first->pos;
+
+    VertItr start = first;
+    double maxX = -std::numeric_limits<double>::infinity();
+    Rect bBox;
+    // Kahan summation
+    double area = 0;
+    double areaCompensation = 0;
+
+    auto AddPoint = [&](VertItr v) {
+      bBox.Union(v->pos);
+      const double area1 =
+          determinant2x2(v->pos - origin, v->right->pos - origin);
+      const double t1 = area + area1;
+      areaCompensation += (area - t1) + area1;
+      area = t1;
+
+      if (v->pos.x > maxX) {
+        maxX = v->pos.x;
+        start = v;
+      }
+    };
+
+    if (Loop(first, AddPoint) == polygon_.end()) {
+      // No polygon left if all ears were degenerate and already clipped.
+      return;
+    }
+
+    area += areaCompensation;
+    const vec2 size = bBox.Size();
+    const double minArea = epsilon_ * std::max(size.x, size.y);
+
+    if (std::isfinite(maxX) && area < -minArea) {
+      holes_.insert(start);
+      hole2BBox_.insert({start, bBox});
+    } else {
+      simples_.push_back(start);
+      if (area > minArea) {
+        outers_.push_back(start);
+      }
+    }
+  }
+
+  // All holes must be key-holed (attached to an outer polygon) before ear
+  // clipping can commence. Instead of relying on sorting, which may be
+  // incorrect due to epsilon, we check for polygon edges both ahead and
+  // behind to ensure all valid options are found.
+  void CutKeyhole(const VertItr start) {
+    const Rect bBox = hole2BBox_[start];
+    const int onTop = start->pos.y >= bBox.max.y - epsilon_   ? 1
+                      : start->pos.y <= bBox.min.y + epsilon_ ? -1
+                                                              : 0;
+    VertItr connector = polygon_.end();
+
+    auto CheckEdge = [&](VertItr edge) {
+      const double x = edge->InterpY2X(start->pos, onTop, epsilon_);
+      if (std::isfinite(x) && start->InsideEdge(edge, epsilon_, true) &&
+          (connector == polygon_.end() ||
+           CCW({x, start->pos.y}, connector->pos, connector->right->pos,
+               epsilon_) == 1 ||
+           (connector->pos.y < edge->pos.y
+                ? edge->InsideEdge(connector, epsilon_, false)
+                : !connector->InsideEdge(edge, epsilon_, false)))) {
+        connector = edge;
+      }
+    };
+
+    for (const VertItr first : outers_) {
+      Loop(first, CheckEdge);
+    }
+
+    if (connector == polygon_.end()) {
+      PRINT("hole did not find an outer contour!");
+      simples_.push_back(start);
+      return;
+    }
+
+    connector = FindCloserBridge(start, connector);
+
+    JoinPolygons(start, connector);
+
+#ifdef MANIFOLD_DEBUG
+    if (params.verbose) {
+      std::cout << "connected " << start->mesh_idx << " to "
+                << connector->mesh_idx << std::endl;
+    }
+#endif
+  }
+
+  // This converts the initial guess for the keyhole location into the final one
+  // and returns it. It does so by finding any reflex verts inside the triangle
+  // containing the best connection and the initial horizontal line.
+  VertItr FindCloserBridge(VertItr start, VertItr edge) {
+    VertItr connector =
+        edge->pos.x < start->pos.x          ? edge->right
+        : edge->right->pos.x < start->pos.x ? edge
+        : edge->right->pos.y - start->pos.y > start->pos.y - edge->pos.y
+            ? edge
+            : edge->right;
+    if (la::abs(connector->pos.y - start->pos.y) <= epsilon_) {
+      return connector;
+    }
+    const double above = connector->pos.y > start->pos.y ? 1 : -1;
+
+    auto CheckVert = [&](VertItr vert) {
+      const double inside =
+          above * CCW(start->pos, vert->pos, connector->pos, epsilon_);
+      if (vert->pos.x > start->pos.x - epsilon_ &&
+          vert->pos.y * above > start->pos.y * above - epsilon_ &&
+          (inside > 0 || (inside == 0 && vert->pos.x < connector->pos.x)) &&
+          vert->InsideEdge(edge, epsilon_, true) && vert->IsReflex(epsilon_)) {
+        connector = vert;
+      }
+    };
+
+    for (const VertItr first : outers_) {
+      Loop(first, CheckVert);
+    }
+
+    return connector;
+  }
+
+  // Creates a keyhole between the start vert of a hole and the connector vert
+  // of an outer polygon. To do this, both verts are duplicated and reattached.
+  // This process may create degenerate ears, so these are clipped if necessary
+  // to keep from confusing subsequent key-holing operations.
+  void JoinPolygons(VertItr start, VertItr connector) {
+    polygon_.push_back(*start);
+    const VertItr newStart = std::prev(polygon_.end());
+    polygon_.push_back(*connector);
+    const VertItr newConnector = std::prev(polygon_.end());
+
+    start->right->left = newStart;
+    connector->left->right = newConnector;
+    Link(start, connector);
+    Link(newConnector, newStart);
+
+    ClipIfDegenerate(start);
+    ClipIfDegenerate(newStart);
+    ClipIfDegenerate(connector);
+    ClipIfDegenerate(newConnector);
+  }
+
+  // Recalculate the cost of the Vert v ear, updating it in the queue by
+  // removing and reinserting it.
+  void ProcessEar(VertItr v, IdxCollider &collider) {
+    if (v->ear != earsQueue_.end()) {
+      earsQueue_.erase(v->ear);
+      v->ear = earsQueue_.end();
+    }
+    if (v->IsShort(epsilon_)) {
+      v->cost = kBest;
+      v->ear = earsQueue_.insert(v);
+    } else if (v->IsConvex(2 * epsilon_)) {
+      v->cost = v->EarCost(epsilon_, collider);
+      v->ear = earsQueue_.insert(v);
+    } else {
+      v->cost = 1;  // not used, but marks reflex verts for debug
+    }
+  }
+
+  // Create a collider of all vertices in this polygon, each expanded by
+  // epsilon_. Each ear uses this BVH to quickly find a subset of vertices to
+  // check for cost.
+  IdxCollider VertCollider(VertItr start) const {
+    Vec<Box> vertBox;
+    Vec<uint32_t> vertMorton;
+    std::vector<VertItr> itr;
+    const Box box(vec3(bBox_.min, 0), vec3(bBox_.max, 0));
+
+    Loop(start, [&vertBox, &vertMorton, &itr, &box, this](VertItr v) {
+      itr.push_back(v);
+      const vec3 pos(v->pos, 0);
+      vertBox.push_back({pos - epsilon_, pos + epsilon_});
+      vertMorton.push_back(Collider::MortonCode(pos, box));
+    });
+
+    if (itr.empty()) {
+      return {Collider(), itr};
+    }
+
+    const int numVert = itr.size();
+    Vec<int> vertNew2Old(numVert);
+    sequence(vertNew2Old.begin(), vertNew2Old.end());
+
+    stable_sort(vertNew2Old.begin(), vertNew2Old.end(),
+                [&vertMorton](const int a, const int b) {
+                  return vertMorton[a] < vertMorton[b];
+                });
+    Permute(vertMorton, vertNew2Old);
+    Permute(vertBox, vertNew2Old);
+    Permute(itr, vertNew2Old);
+
+    return {Collider(vertBox, vertMorton), itr};
+  }
+
+  // The main ear-clipping loop. This is called once for each simple polygon -
+  // all holes have already been key-holed and joined to an outer polygon.
+  void TriangulatePoly(VertItr start) {
+    ZoneScoped;
+
+    IdxCollider vertCollider = VertCollider(start);
+
+    if (vertCollider.itr.empty()) {
+      PRINT("Empty poly");
+      return;
+    }
+
+    // A simple polygon always creates two fewer triangles than it has verts.
+    int numTri = -2;
+    earsQueue_.clear();
+
+    auto QueueVert = [&](VertItr v) {
+      ProcessEar(v, vertCollider);
+      ++numTri;
+      v->PrintVert();
+    };
+
+    VertItrC v = Loop(start, QueueVert);
+    if (v == polygon_.end()) return;
+    Dump(v);
+
+    while (numTri > 0) {
+      const qItr ear = earsQueue_.begin();
+      if (ear != earsQueue_.end()) {
+        v = *ear;
+        // Cost should always be negative, generally < -epsilon.
+        v->PrintVert();
+        earsQueue_.erase(ear);
+      } else {
+        PRINT("No ear found!");
+      }
+
+      ClipEar(v);
+      --numTri;
+
+      ProcessEar(v->left, vertCollider);
+      ProcessEar(v->right, vertCollider);
+      // This is a backup vert that is used if the queue is empty (geometrically
+      // invalid polygon), to ensure manifoldness.
+      v = v->right;
+    }
+
+    DEBUG_ASSERT(v->right == v->left, logicErr, "Triangulator error!");
+    PRINT("Finished poly");
+  }
+
+  void Dump(VertItrC start) const {
+#ifdef MANIFOLD_DEBUG
+    if (!params.verbose) return;
+    VertItrC v = start;
+    std::cout << "show(array([" << std::setprecision(15) << std::endl;
+    do {
+      std::cout << "  [" << v->pos.x << ", " << v->pos.y << "],# "
+                << v->mesh_idx << ", cost: " << v->cost << std::endl;
+      v = v->right;
+    } while (v != start);
+    std::cout << "  [" << v->pos.x << ", " << v->pos.y << "],# " << v->mesh_idx
+              << std::endl;
+    std::cout << "]))" << std::endl;
+
+    v = start;
+    std::cout << "polys.push_back({" << std::setprecision(15) << std::endl;
+    do {
+      std::cout << "    {" << v->pos.x << ", " << v->pos.y << "},  //"
+                << std::endl;
+      v = v->right;
+    } while (v != start);
+    std::cout << "});" << std::endl;
+#endif
+  }
+};
+}  // namespace
+
+namespace manifold {
+
+/**
+ * @brief Triangulates a set of &epsilon;-valid polygons. If the input is not
+ * &epsilon;-valid, the triangulation may overlap, but will always return a
+ * manifold result that matches the input edge directions.
+ *
+ * @param polys The set of polygons, wound CCW and representing multiple
+ * polygons and/or holes. These have 2D-projected positions as well as
+ * references back to the original vertices.
+ * @param epsilon The value of &epsilon;, bounding the uncertainty of the
+ * input.
+ * @return std::vector<ivec3> The triangles, referencing the original
+ * vertex indicies.
+ */
+std::vector<ivec3> TriangulateIdx(const PolygonsIdx &polys, double epsilon) {
+  std::vector<ivec3> triangles;
+  double updatedEpsilon = epsilon;
+#ifdef MANIFOLD_DEBUG
+  try {
+#endif
+    if (IsConvex(polys, epsilon)) {  // fast path
+      triangles = TriangulateConvex(polys);
+    } else {
+      EarClip triangulator(polys, epsilon);
+      triangles = triangulator.Triangulate();
+      updatedEpsilon = triangulator.GetPrecision();
+    }
+#ifdef MANIFOLD_DEBUG
+    if (params.intermediateChecks) {
+      CheckTopology(triangles, polys);
+      if (!params.processOverlaps) {
+        CheckGeometry(triangles, polys, 2 * updatedEpsilon);
+      }
+    }
+  } catch (const geometryErr &e) {
+    if (!params.suppressErrors) {
+      PrintFailure(e, polys, triangles, updatedEpsilon);
+    }
+    throw;
+  } catch (const std::exception &e) {
+    PrintFailure(e, polys, triangles, updatedEpsilon);
+    throw;
+  }
+#endif
+  return triangles;
+}
+
+/**
+ * @brief Triangulates a set of &epsilon;-valid polygons. If the input is not
+ * &epsilon;-valid, the triangulation may overlap, but will always return a
+ * manifold result that matches the input edge directions.
+ *
+ * @param polygons The set of polygons, wound CCW and representing multiple
+ * polygons and/or holes.
+ * @param epsilon The value of &epsilon;, bounding the uncertainty of the
+ * input.
+ * @return std::vector<ivec3> The triangles, referencing the original
+ * polygon points in order.
+ */
+std::vector<ivec3> Triangulate(const Polygons &polygons, double epsilon) {
+  int idx = 0;
+  PolygonsIdx polygonsIndexed;
+  for (const auto &poly : polygons) {
+    SimplePolygonIdx simpleIndexed;
+    for (const vec2 &polyVert : poly) {
+      simpleIndexed.push_back({polyVert, idx++});
+    }
+    polygonsIndexed.push_back(simpleIndexed);
+  }
+  return TriangulateIdx(polygonsIndexed, epsilon);
+}
+
+ExecutionParams &PolygonParams() { return params; }
+
+}  // namespace manifold

+ 387 - 0
thirdparty/manifold/src/properties.cpp

@@ -0,0 +1,387 @@
+// Copyright 2021 The Manifold Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <limits>
+
+#include "./impl.h"
+#include "./parallel.h"
+#include "./tri_dist.h"
+
+namespace {
+using namespace manifold;
+
+struct CurvatureAngles {
+  VecView<double> meanCurvature;
+  VecView<double> gaussianCurvature;
+  VecView<double> area;
+  VecView<double> degree;
+  VecView<const Halfedge> halfedge;
+  VecView<const vec3> vertPos;
+  VecView<const vec3> triNormal;
+
+  void operator()(size_t tri) {
+    vec3 edge[3];
+    vec3 edgeLength(0.0);
+    for (int i : {0, 1, 2}) {
+      const int startVert = halfedge[3 * tri + i].startVert;
+      const int endVert = halfedge[3 * tri + i].endVert;
+      edge[i] = vertPos[endVert] - vertPos[startVert];
+      edgeLength[i] = la::length(edge[i]);
+      edge[i] /= edgeLength[i];
+      const int neighborTri = halfedge[3 * tri + i].pairedHalfedge / 3;
+      const double dihedral =
+          0.25 * edgeLength[i] *
+          std::asin(la::dot(la::cross(triNormal[tri], triNormal[neighborTri]),
+                            edge[i]));
+      AtomicAdd(meanCurvature[startVert], dihedral);
+      AtomicAdd(meanCurvature[endVert], dihedral);
+      AtomicAdd(degree[startVert], 1.0);
+    }
+
+    vec3 phi;
+    phi[0] = std::acos(-la::dot(edge[2], edge[0]));
+    phi[1] = std::acos(-la::dot(edge[0], edge[1]));
+    phi[2] = kPi - phi[0] - phi[1];
+    const double area3 = edgeLength[0] * edgeLength[1] *
+                         la::length(la::cross(edge[0], edge[1])) / 6;
+
+    for (int i : {0, 1, 2}) {
+      const int vert = halfedge[3 * tri + i].startVert;
+      AtomicAdd(gaussianCurvature[vert], -phi[i]);
+      AtomicAdd(area[vert], area3);
+    }
+  }
+};
+
+struct UpdateProperties {
+  VecView<ivec3> triProp;
+  VecView<double> properties;
+  VecView<uint8_t> counters;
+
+  VecView<const double> oldProperties;
+  VecView<const Halfedge> halfedge;
+  VecView<const double> meanCurvature;
+  VecView<const double> gaussianCurvature;
+  const int oldNumProp;
+  const int numProp;
+  const int gaussianIdx;
+  const int meanIdx;
+
+  void operator()(const size_t tri) {
+    for (const int i : {0, 1, 2}) {
+      const int vert = halfedge[3 * tri + i].startVert;
+      if (oldNumProp == 0) {
+        triProp[tri][i] = vert;
+      }
+      const int propVert = triProp[tri][i];
+
+      auto old = std::atomic_exchange(
+          reinterpret_cast<std::atomic<uint8_t>*>(&counters[propVert]),
+          static_cast<uint8_t>(1));
+      if (old == 1) continue;
+
+      for (int p = 0; p < oldNumProp; ++p) {
+        properties[numProp * propVert + p] =
+            oldProperties[oldNumProp * propVert + p];
+      }
+
+      if (gaussianIdx >= 0) {
+        properties[numProp * propVert + gaussianIdx] = gaussianCurvature[vert];
+      }
+      if (meanIdx >= 0) {
+        properties[numProp * propVert + meanIdx] = meanCurvature[vert];
+      }
+    }
+  }
+};
+
+struct CheckHalfedges {
+  VecView<const Halfedge> halfedges;
+
+  bool operator()(size_t edge) const {
+    const Halfedge halfedge = halfedges[edge];
+    if (halfedge.startVert == -1 || halfedge.endVert == -1) return true;
+    if (halfedge.pairedHalfedge == -1) return false;
+
+    const Halfedge paired = halfedges[halfedge.pairedHalfedge];
+    bool good = true;
+    good &= paired.pairedHalfedge == static_cast<int>(edge);
+    good &= halfedge.startVert != halfedge.endVert;
+    good &= halfedge.startVert == paired.endVert;
+    good &= halfedge.endVert == paired.startVert;
+    return good;
+  }
+};
+
+struct CheckCCW {
+  VecView<const Halfedge> halfedges;
+  VecView<const vec3> vertPos;
+  VecView<const vec3> triNormal;
+  const double tol;
+
+  bool operator()(size_t face) const {
+    if (halfedges[3 * face].pairedHalfedge < 0) return true;
+
+    const mat2x3 projection = GetAxisAlignedProjection(triNormal[face]);
+    vec2 v[3];
+    for (int i : {0, 1, 2})
+      v[i] = projection * vertPos[halfedges[3 * face + i].startVert];
+
+    int ccw = CCW(v[0], v[1], v[2], std::abs(tol));
+    bool check = tol > 0 ? ccw >= 0 : ccw == 0;
+
+#ifdef MANIFOLD_DEBUG
+    if (tol > 0 && !check) {
+      vec2 v1 = v[1] - v[0];
+      vec2 v2 = v[2] - v[0];
+      double area = v1.x * v2.y - v1.y * v2.x;
+      double base2 = std::max(la::dot(v1, v1), la::dot(v2, v2));
+      double base = std::sqrt(base2);
+      vec3 V0 = vertPos[halfedges[3 * face].startVert];
+      vec3 V1 = vertPos[halfedges[3 * face + 1].startVert];
+      vec3 V2 = vertPos[halfedges[3 * face + 2].startVert];
+      vec3 norm = la::cross(V1 - V0, V2 - V0);
+      printf(
+          "Tri %ld does not match normal, approx height = %g, base = %g\n"
+          "tol = %g, area2 = %g, base2*tol2 = %g\n"
+          "normal = %g, %g, %g\n"
+          "norm = %g, %g, %g\nverts: %d, %d, %d\n",
+          face, area / base, base, tol, area * area, base2 * tol * tol,
+          triNormal[face].x, triNormal[face].y, triNormal[face].z, norm.x,
+          norm.y, norm.z, halfedges[3 * face].startVert,
+          halfedges[3 * face + 1].startVert, halfedges[3 * face + 2].startVert);
+    }
+#endif
+    return check;
+  }
+};
+}  // namespace
+
+namespace manifold {
+
+/**
+ * Returns true if this manifold is in fact an oriented even manifold and all of
+ * the data structures are consistent.
+ */
+bool Manifold::Impl::IsManifold() const {
+  if (halfedge_.size() == 0) return true;
+  return all_of(countAt(0_uz), countAt(halfedge_.size()),
+                CheckHalfedges({halfedge_}));
+}
+
+/**
+ * Returns true if this manifold is in fact an oriented 2-manifold and all of
+ * the data structures are consistent.
+ */
+bool Manifold::Impl::Is2Manifold() const {
+  if (halfedge_.size() == 0) return true;
+  if (!IsManifold()) return false;
+
+  Vec<Halfedge> halfedge(halfedge_);
+  stable_sort(halfedge.begin(), halfedge.end());
+
+  return all_of(
+      countAt(0_uz), countAt(2 * NumEdge() - 1), [&halfedge](size_t edge) {
+        const Halfedge h = halfedge[edge];
+        if (h.startVert == -1 && h.endVert == -1 && h.pairedHalfedge == -1)
+          return true;
+        return h.startVert != halfedge[edge + 1].startVert ||
+               h.endVert != halfedge[edge + 1].endVert;
+      });
+}
+
+/**
+ * Returns true if all triangles are CCW relative to their triNormals_.
+ */
+bool Manifold::Impl::MatchesTriNormals() const {
+  if (halfedge_.size() == 0 || faceNormal_.size() != NumTri()) return true;
+  return all_of(countAt(0_uz), countAt(NumTri()),
+                CheckCCW({halfedge_, vertPos_, faceNormal_, 2 * epsilon_}));
+}
+
+/**
+ * Returns the number of triangles that are colinear within epsilon_.
+ */
+int Manifold::Impl::NumDegenerateTris() const {
+  if (halfedge_.size() == 0 || faceNormal_.size() != NumTri()) return true;
+  return count_if(
+      countAt(0_uz), countAt(NumTri()),
+      CheckCCW({halfedge_, vertPos_, faceNormal_, -1 * epsilon_ / 2}));
+}
+
+double Manifold::Impl::GetProperty(Property prop) const {
+  ZoneScoped;
+  if (IsEmpty()) return 0;
+
+  auto Volume = [this](size_t tri) {
+    const vec3 v = vertPos_[halfedge_[3 * tri].startVert];
+    vec3 crossP = la::cross(vertPos_[halfedge_[3 * tri + 1].startVert] - v,
+                            vertPos_[halfedge_[3 * tri + 2].startVert] - v);
+    return la::dot(crossP, v) / 6.0;
+  };
+
+  auto Area = [this](size_t tri) {
+    const vec3 v = vertPos_[halfedge_[3 * tri].startVert];
+    return la::length(
+               la::cross(vertPos_[halfedge_[3 * tri + 1].startVert] - v,
+                         vertPos_[halfedge_[3 * tri + 2].startVert] - v)) /
+           2.0;
+  };
+
+  // Kahan summation
+  double value = 0;
+  double valueCompensation = 0;
+  for (size_t i = 0; i < NumTri(); ++i) {
+    const double value1 = prop == Property::SurfaceArea ? Area(i) : Volume(i);
+    const double t = value + value1;
+    valueCompensation += (value - t) + value1;
+    value = t;
+  }
+  value += valueCompensation;
+  return value;
+}
+
+void Manifold::Impl::CalculateCurvature(int gaussianIdx, int meanIdx) {
+  ZoneScoped;
+  if (IsEmpty()) return;
+  if (gaussianIdx < 0 && meanIdx < 0) return;
+  Vec<double> vertMeanCurvature(NumVert(), 0);
+  Vec<double> vertGaussianCurvature(NumVert(), kTwoPi);
+  Vec<double> vertArea(NumVert(), 0);
+  Vec<double> degree(NumVert(), 0);
+  auto policy = autoPolicy(NumTri(), 1e4);
+  for_each(policy, countAt(0_uz), countAt(NumTri()),
+           CurvatureAngles({vertMeanCurvature, vertGaussianCurvature, vertArea,
+                            degree, halfedge_, vertPos_, faceNormal_}));
+  for_each_n(policy, countAt(0), NumVert(),
+             [&vertMeanCurvature, &vertGaussianCurvature, &vertArea,
+              &degree](const int vert) {
+               const double factor = degree[vert] / (6 * vertArea[vert]);
+               vertMeanCurvature[vert] *= factor;
+               vertGaussianCurvature[vert] *= factor;
+             });
+
+  const int oldNumProp = NumProp();
+  const int numProp = std::max(oldNumProp, std::max(gaussianIdx, meanIdx) + 1);
+  const Vec<double> oldProperties = meshRelation_.properties;
+  meshRelation_.properties = Vec<double>(numProp * NumPropVert(), 0);
+  meshRelation_.numProp = numProp;
+  if (meshRelation_.triProperties.size() == 0) {
+    meshRelation_.triProperties.resize(NumTri());
+  }
+
+  const Vec<uint8_t> counters(NumPropVert(), 0);
+  for_each_n(
+      policy, countAt(0_uz), NumTri(),
+      UpdateProperties({meshRelation_.triProperties, meshRelation_.properties,
+                        counters, oldProperties, halfedge_, vertMeanCurvature,
+                        vertGaussianCurvature, oldNumProp, numProp, gaussianIdx,
+                        meanIdx}));
+}
+
+/**
+ * Calculates the bounding box of the entire manifold, which is stored
+ * internally to short-cut Boolean operations. Ignores NaNs.
+ */
+void Manifold::Impl::CalculateBBox() {
+  bBox_.min =
+      reduce(vertPos_.begin(), vertPos_.end(),
+             vec3(std::numeric_limits<double>::infinity()), [](auto a, auto b) {
+               if (std::isnan(a.x)) return b;
+               if (std::isnan(b.x)) return a;
+               return la::min(a, b);
+             });
+  bBox_.max = reduce(vertPos_.begin(), vertPos_.end(),
+                     vec3(-std::numeric_limits<double>::infinity()),
+                     [](auto a, auto b) {
+                       if (std::isnan(a.x)) return b;
+                       if (std::isnan(b.x)) return a;
+                       return la::max(a, b);
+                     });
+}
+
+/**
+ * Determines if all verts are finite. Checking just the bounding box dimensions
+ * is insufficient as it ignores NaNs.
+ */
+bool Manifold::Impl::IsFinite() const {
+  return transform_reduce(
+      vertPos_.begin(), vertPos_.end(), true,
+      [](bool a, bool b) { return a && b; },
+      [](auto v) { return la::all(la::isfinite(v)); });
+}
+
+/**
+ * Checks that the input triVerts array has all indices inside bounds of the
+ * vertPos_ array.
+ */
+bool Manifold::Impl::IsIndexInBounds(VecView<const ivec3> triVerts) const {
+  ivec2 minmax = transform_reduce(
+      triVerts.begin(), triVerts.end(),
+      ivec2(std::numeric_limits<int>::max(), std::numeric_limits<int>::min()),
+      [](auto a, auto b) {
+        a[0] = std::min(a[0], b[0]);
+        a[1] = std::max(a[1], b[1]);
+        return a;
+      },
+      [](auto tri) {
+        return ivec2(std::min(tri[0], std::min(tri[1], tri[2])),
+                     std::max(tri[0], std::max(tri[1], tri[2])));
+      });
+
+  return minmax[0] >= 0 && minmax[1] < static_cast<int>(NumVert());
+}
+
+/*
+ * Returns the minimum gap between two manifolds. Returns a double between
+ * 0 and searchLength.
+ */
+double Manifold::Impl::MinGap(const Manifold::Impl& other,
+                              double searchLength) const {
+  ZoneScoped;
+  Vec<Box> faceBoxOther;
+  Vec<uint32_t> faceMortonOther;
+
+  other.GetFaceBoxMorton(faceBoxOther, faceMortonOther);
+
+  transform(faceBoxOther.begin(), faceBoxOther.end(), faceBoxOther.begin(),
+            [searchLength](const Box& box) {
+              return Box(box.min - vec3(searchLength),
+                         box.max + vec3(searchLength));
+            });
+
+  SparseIndices collisions = collider_.Collisions(faceBoxOther.cview());
+
+  double minDistanceSquared = transform_reduce(
+      countAt(0_uz), countAt(collisions.size()), searchLength * searchLength,
+      [](double a, double b) { return std::min(a, b); },
+      [&collisions, this, &other](int i) {
+        const int tri = collisions.Get(i, 1);
+        const int triOther = collisions.Get(i, 0);
+
+        std::array<vec3, 3> p;
+        std::array<vec3, 3> q;
+
+        for (const int j : {0, 1, 2}) {
+          p[j] = vertPos_[halfedge_[3 * tri + j].startVert];
+          q[j] = other.vertPos_[other.halfedge_[3 * triOther + j].startVert];
+        }
+
+        return DistanceTriangleTriangleSquared(p, q);
+      });
+
+  return sqrt(minDistanceSquared);
+};
+
+}  // namespace manifold

+ 860 - 0
thirdparty/manifold/src/quickhull.cpp

@@ -0,0 +1,860 @@
+// Copyright 2024 The Manifold Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Derived from the public domain work of Antti Kuukka at
+// https://github.com/akuukka/quickhull
+
+#include "quickhull.h"
+
+#include <algorithm>
+#include <limits>
+
+#include "./impl.h"
+
+namespace manifold {
+
+double defaultEps() { return 0.0000001; }
+
+inline double getSquaredDistanceBetweenPointAndRay(const vec3& p,
+                                                   const Ray& r) {
+  const vec3 s = p - r.S;
+  double t = la::dot(s, r.V);
+  return la::dot(s, s) - t * t * r.VInvLengthSquared;
+}
+
+inline double getSquaredDistance(const vec3& p1, const vec3& p2) {
+  return la::dot(p1 - p2, p1 - p2);
+}
+// Note that the unit of distance returned is relative to plane's normal's
+// length (divide by N.getNormalized() if needed to get the "real" distance).
+inline double getSignedDistanceToPlane(const vec3& v, const Plane& p) {
+  return la::dot(p.N, v) + p.D;
+}
+
+inline vec3 getTriangleNormal(const vec3& a, const vec3& b, const vec3& c) {
+  // We want to get (a-c).crossProduct(b-c) without constructing temp vectors
+  double x = a.x - c.x;
+  double y = a.y - c.y;
+  double z = a.z - c.z;
+  double rhsx = b.x - c.x;
+  double rhsy = b.y - c.y;
+  double rhsz = b.z - c.z;
+  double px = y * rhsz - z * rhsy;
+  double py = z * rhsx - x * rhsz;
+  double pz = x * rhsy - y * rhsx;
+  return la::normalize(vec3(px, py, pz));
+}
+
+size_t MeshBuilder::addFace() {
+  if (disabledFaces.size()) {
+    size_t index = disabledFaces.back();
+    auto& f = faces[index];
+    DEBUG_ASSERT(f.isDisabled(), logicErr, "f should be disabled");
+    DEBUG_ASSERT(!f.pointsOnPositiveSide, logicErr,
+                 "f should not be on the positive side");
+    f.mostDistantPointDist = 0;
+    disabledFaces.pop_back();
+    return index;
+  }
+  faces.emplace_back();
+  return faces.size() - 1;
+}
+
+size_t MeshBuilder::addHalfedge() {
+  if (disabledHalfedges.size()) {
+    const size_t index = disabledHalfedges.back();
+    disabledHalfedges.pop_back();
+    return index;
+  }
+  halfedges.push_back({});
+  halfedgeToFace.push_back(0);
+  halfedgeNext.push_back(0);
+  return halfedges.size() - 1;
+}
+
+void MeshBuilder::setup(int a, int b, int c, int d) {
+  faces.clear();
+  halfedges.clear();
+  halfedgeToFace.clear();
+  halfedgeNext.clear();
+  disabledFaces.clear();
+  disabledHalfedges.clear();
+
+  faces.reserve(4);
+  halfedges.reserve(12);
+
+  // Create halfedges
+  // AB
+  halfedges.push_back({0, b, 6});
+  halfedgeToFace.push_back(0);
+  halfedgeNext.push_back(1);
+  // BC
+  halfedges.push_back({0, c, 9});
+  halfedgeToFace.push_back(0);
+  halfedgeNext.push_back(2);
+  // CA
+  halfedges.push_back({0, a, 3});
+  halfedgeToFace.push_back(0);
+  halfedgeNext.push_back(0);
+  // AC
+  halfedges.push_back({0, c, 2});
+  halfedgeToFace.push_back(1);
+  halfedgeNext.push_back(4);
+  // CD
+  halfedges.push_back({0, d, 11});
+  halfedgeToFace.push_back(1);
+  halfedgeNext.push_back(5);
+  // DA
+  halfedges.push_back({0, a, 7});
+  halfedgeToFace.push_back(1);
+  halfedgeNext.push_back(3);
+  // BA
+  halfedges.push_back({0, a, 0});
+  halfedgeToFace.push_back(2);
+  halfedgeNext.push_back(7);
+  // AD
+  halfedges.push_back({0, d, 5});
+  halfedgeToFace.push_back(2);
+  halfedgeNext.push_back(8);
+  // DB
+  halfedges.push_back({0, b, 10});
+  halfedgeToFace.push_back(2);
+  halfedgeNext.push_back(6);
+  // CB
+  halfedges.push_back({0, b, 1});
+  halfedgeToFace.push_back(3);
+  halfedgeNext.push_back(10);
+  // BD
+  halfedges.push_back({0, d, 8});
+  halfedgeToFace.push_back(3);
+  halfedgeNext.push_back(11);
+  // DC
+  halfedges.push_back({0, c, 4});
+  halfedgeToFace.push_back(3);
+  halfedgeNext.push_back(9);
+
+  // Create faces
+  faces.emplace_back(0);
+  faces.emplace_back(3);
+  faces.emplace_back(6);
+  faces.emplace_back(9);
+}
+
+std::array<int, 3> MeshBuilder::getVertexIndicesOfFace(const Face& f) const {
+  std::array<int, 3> v;
+  size_t index = f.he;
+  auto* he = &halfedges[index];
+  v[0] = he->endVert;
+
+  index = halfedgeNext[index];
+  he = &halfedges[index];
+  v[1] = he->endVert;
+
+  index = halfedgeNext[index];
+  he = &halfedges[index];
+  v[2] = he->endVert;
+  return v;
+}
+
+HalfEdgeMesh::HalfEdgeMesh(const MeshBuilder& builderObject,
+                           const VecView<vec3>& vertexData) {
+  std::unordered_map<size_t, size_t> faceMapping;
+  std::unordered_map<size_t, size_t> halfEdgeMapping;
+  std::unordered_map<size_t, size_t> vertexMapping;
+
+  size_t i = 0;
+  for (const auto& face : builderObject.faces) {
+    if (!face.isDisabled()) {
+      halfEdgeIndexFaces.emplace_back(static_cast<size_t>(face.he));
+      faceMapping[i] = halfEdgeIndexFaces.size() - 1;
+
+      const auto heIndices = builderObject.getHalfEdgeIndicesOfFace(face);
+      for (const auto heIndex : heIndices) {
+        const auto vertexIndex = builderObject.halfedges[heIndex].endVert;
+        if (vertexMapping.count(vertexIndex) == 0) {
+          vertices.push_back(vertexData[vertexIndex]);
+          vertexMapping[vertexIndex] = vertices.size() - 1;
+        }
+      }
+    }
+    i++;
+  }
+
+  i = 0;
+  for (const auto& halfEdge : builderObject.halfedges) {
+    if (halfEdge.pairedHalfedge != -1) {
+      halfedges.push_back({halfEdge.endVert, halfEdge.pairedHalfedge,
+                           builderObject.halfedgeToFace[i]});
+      halfedgeToFace.push_back(builderObject.halfedgeToFace[i]);
+      halfedgeNext.push_back(builderObject.halfedgeNext[i]);
+      halfEdgeMapping[i] = halfedges.size() - 1;
+    }
+    i++;
+  }
+
+  for (auto& halfEdgeIndexFace : halfEdgeIndexFaces) {
+    DEBUG_ASSERT(halfEdgeMapping.count(halfEdgeIndexFace) == 1, logicErr,
+                 "invalid halfedge mapping");
+    halfEdgeIndexFace = halfEdgeMapping[halfEdgeIndexFace];
+  }
+
+  for (size_t i = 0; i < halfedges.size(); i++) {
+    auto& he = halfedges[i];
+    halfedgeToFace[i] = faceMapping[halfedgeToFace[i]];
+    he.pairedHalfedge = halfEdgeMapping[he.pairedHalfedge];
+    halfedgeNext[i] = halfEdgeMapping[halfedgeNext[i]];
+    he.endVert = vertexMapping[he.endVert];
+  }
+}
+
+/*
+ * Implementation of the algorithm
+ */
+std::pair<Vec<Halfedge>, Vec<vec3>> QuickHull::buildMesh(double epsilon) {
+  if (originalVertexData.size() == 0) {
+    return {Vec<Halfedge>(), Vec<vec3>()};
+  }
+
+  // Very first: find extreme values and use them to compute the scale of the
+  // point cloud.
+  extremeValues = getExtremeValues();
+  scale = getScale(extremeValues);
+
+  // Epsilon we use depends on the scale
+  m_epsilon = epsilon * scale;
+  epsilonSquared = m_epsilon * m_epsilon;
+
+  // The planar case happens when all the points appear to lie on a two
+  // dimensional subspace of R^3.
+  planar = false;
+  createConvexHalfedgeMesh();
+  if (planar) {
+    const int extraPointIndex = planarPointCloudTemp.size() - 1;
+    for (auto& he : mesh.halfedges) {
+      if (he.endVert == extraPointIndex) {
+        he.endVert = 0;
+      }
+    }
+    planarPointCloudTemp.clear();
+  }
+
+  // reorder halfedges
+  Vec<Halfedge> halfedges(mesh.halfedges.size());
+  Vec<int> halfedgeToFace(mesh.halfedges.size());
+  Vec<int> counts(mesh.halfedges.size(), 0);
+  Vec<int> mapping(mesh.halfedges.size());
+  Vec<int> faceMap(mesh.faces.size());
+
+  // Some faces are disabled and should not go into the halfedge vector, we can
+  // update the face indices of the halfedges at the end using index/3
+  int j = 0;
+  for_each(
+      autoPolicy(mesh.halfedges.size()), countAt(0_uz),
+      countAt(mesh.halfedges.size()), [&](size_t i) {
+        if (mesh.halfedges[i].pairedHalfedge < 0) return;
+        if (mesh.faces[mesh.halfedgeToFace[i]].isDisabled()) return;
+        if (AtomicAdd(counts[mesh.halfedgeToFace[i]], 1) > 0) return;
+        int currIndex = AtomicAdd(j, 3);
+        mapping[i] = currIndex;
+        halfedges[currIndex + 0] = mesh.halfedges[i];
+        halfedgeToFace[currIndex + 0] = mesh.halfedgeToFace[i];
+
+        size_t k = mesh.halfedgeNext[i];
+        mapping[k] = currIndex + 1;
+        halfedges[currIndex + 1] = mesh.halfedges[k];
+        halfedgeToFace[currIndex + 1] = mesh.halfedgeToFace[k];
+
+        k = mesh.halfedgeNext[k];
+        mapping[k] = currIndex + 2;
+        halfedges[currIndex + 2] = mesh.halfedges[k];
+        halfedgeToFace[currIndex + 2] = mesh.halfedgeToFace[k];
+        halfedges[currIndex + 0].startVert = halfedges[currIndex + 2].endVert;
+        halfedges[currIndex + 1].startVert = halfedges[currIndex + 0].endVert;
+        halfedges[currIndex + 2].startVert = halfedges[currIndex + 1].endVert;
+      });
+  halfedges.resize(j);
+  halfedgeToFace.resize(j);
+  // fix pairedHalfedge id
+  for_each(
+      autoPolicy(halfedges.size()), halfedges.begin(), halfedges.end(),
+      [&](Halfedge& he) { he.pairedHalfedge = mapping[he.pairedHalfedge]; });
+  counts.resize(originalVertexData.size() + 1);
+  fill(counts.begin(), counts.end(), 0);
+
+  // remove unused vertices
+  for_each(autoPolicy(halfedges.size() / 3), countAt(0_uz),
+           countAt(halfedges.size() / 3), [&](size_t i) {
+             AtomicAdd(counts[halfedges[3 * i].startVert], 1);
+             AtomicAdd(counts[halfedges[3 * i + 1].startVert], 1);
+             AtomicAdd(counts[halfedges[3 * i + 2].startVert], 1);
+           });
+  auto saturate = [](int c) { return c > 0 ? 1 : 0; };
+  exclusive_scan(TransformIterator(counts.begin(), saturate),
+                 TransformIterator(counts.end(), saturate), counts.begin(), 0);
+  Vec<vec3> vertices(counts.back());
+  for_each(autoPolicy(originalVertexData.size()), countAt(0_uz),
+           countAt(originalVertexData.size()), [&](size_t i) {
+             if (counts[i + 1] - counts[i] > 0) {
+               vertices[counts[i]] = originalVertexData[i];
+             }
+           });
+  for_each(autoPolicy(halfedges.size()), halfedges.begin(), halfedges.end(),
+           [&](Halfedge& he) {
+             he.startVert = counts[he.startVert];
+             he.endVert = counts[he.endVert];
+           });
+  return {std::move(halfedges), std::move(vertices)};
+}
+
+void QuickHull::createConvexHalfedgeMesh() {
+  visibleFaces.clear();
+  horizonEdgesData.clear();
+  possiblyVisibleFaces.clear();
+
+  // Compute base tetrahedron
+  setupInitialTetrahedron();
+  DEBUG_ASSERT(mesh.faces.size() == 4, logicErr, "not a tetrahedron");
+
+  // Init face stack with those faces that have points assigned to them
+  faceList.clear();
+  for (size_t i = 0; i < 4; i++) {
+    auto& f = mesh.faces[i];
+    if (f.pointsOnPositiveSide && f.pointsOnPositiveSide->size() > 0) {
+      faceList.push_back(i);
+      f.inFaceStack = 1;
+    }
+  }
+
+  // Process faces until the face list is empty.
+  size_t iter = 0;
+  while (!faceList.empty()) {
+    iter++;
+    if (iter == std::numeric_limits<size_t>::max()) {
+      // Visible face traversal marks visited faces with iteration counter (to
+      // mark that the face has been visited on this iteration) and the max
+      // value represents unvisited faces. At this point we have to reset
+      // iteration counter. This shouldn't be an issue on 64 bit machines.
+      iter = 0;
+    }
+
+    const auto topFaceIndex = faceList.front();
+    faceList.pop_front();
+
+    auto& tf = mesh.faces[topFaceIndex];
+    tf.inFaceStack = 0;
+
+    DEBUG_ASSERT(
+        !tf.pointsOnPositiveSide || tf.pointsOnPositiveSide->size() > 0,
+        logicErr, "there should be points on the positive side");
+    if (!tf.pointsOnPositiveSide || tf.isDisabled()) {
+      continue;
+    }
+
+    // Pick the most distant point to this triangle plane as the point to which
+    // we extrude
+    const vec3& activePoint = originalVertexData[tf.mostDistantPoint];
+    const size_t activePointIndex = tf.mostDistantPoint;
+
+    // Find out the faces that have our active point on their positive side
+    // (these are the "visible faces"). The face on top of the stack of course
+    // is one of them. At the same time, we create a list of horizon edges.
+    horizonEdgesData.clear();
+    possiblyVisibleFaces.clear();
+    visibleFaces.clear();
+    possiblyVisibleFaces.push_back({topFaceIndex, -1});
+    while (possiblyVisibleFaces.size()) {
+      const auto faceData = possiblyVisibleFaces.back();
+      possiblyVisibleFaces.pop_back();
+      auto& pvf = mesh.faces[faceData.faceIndex];
+      DEBUG_ASSERT(!pvf.isDisabled(), logicErr, "pvf should not be disabled");
+
+      if (pvf.visibilityCheckedOnIteration == iter) {
+        if (pvf.isVisibleFaceOnCurrentIteration) {
+          continue;
+        }
+      } else {
+        const Plane& P = pvf.P;
+        pvf.visibilityCheckedOnIteration = iter;
+        const double d = la::dot(P.N, activePoint) + P.D;
+        if (d > 0) {
+          pvf.isVisibleFaceOnCurrentIteration = 1;
+          pvf.horizonEdgesOnCurrentIteration = 0;
+          visibleFaces.push_back(faceData.faceIndex);
+          for (auto heIndex : mesh.getHalfEdgeIndicesOfFace(pvf)) {
+            if (mesh.halfedges[heIndex].pairedHalfedge !=
+                faceData.enteredFromHalfedge) {
+              possiblyVisibleFaces.push_back(
+                  {mesh.halfedgeToFace[mesh.halfedges[heIndex].pairedHalfedge],
+                   heIndex});
+            }
+          }
+          continue;
+        }
+        DEBUG_ASSERT(faceData.faceIndex != topFaceIndex, logicErr,
+                     "face index invalid");
+      }
+
+      // The face is not visible. Therefore, the halfedge we came from is part
+      // of the horizon edge.
+      pvf.isVisibleFaceOnCurrentIteration = 0;
+      horizonEdgesData.push_back(faceData.enteredFromHalfedge);
+      // Store which half edge is the horizon edge. The other half edges of the
+      // face will not be part of the final mesh so their data slots can by
+      // recycled.
+      const auto halfEdgesMesh = mesh.getHalfEdgeIndicesOfFace(
+          mesh.faces[mesh.halfedgeToFace[faceData.enteredFromHalfedge]]);
+      const std::int8_t ind =
+          (halfEdgesMesh[0] == faceData.enteredFromHalfedge)
+              ? 0
+              : (halfEdgesMesh[1] == faceData.enteredFromHalfedge ? 1 : 2);
+      mesh.faces[mesh.halfedgeToFace[faceData.enteredFromHalfedge]]
+          .horizonEdgesOnCurrentIteration |= (1 << ind);
+    }
+    const size_t horizonEdgeCount = horizonEdgesData.size();
+
+    // Order horizon edges so that they form a loop. This may fail due to
+    // numerical instability in which case we give up trying to solve horizon
+    // edge for this point and accept a minor degeneration in the convex hull.
+    if (!reorderHorizonEdges(horizonEdgesData)) {
+      failedHorizonEdges++;
+      int change_flag = 0;
+      for (size_t index = 0; index < tf.pointsOnPositiveSide->size(); index++) {
+        if ((*tf.pointsOnPositiveSide)[index] == activePointIndex) {
+          change_flag = 1;
+        } else if (change_flag == 1) {
+          change_flag = 2;
+          (*tf.pointsOnPositiveSide)[index - 1] =
+              (*tf.pointsOnPositiveSide)[index];
+        }
+      }
+      if (change_flag == 1)
+        tf.pointsOnPositiveSide->resize(tf.pointsOnPositiveSide->size() - 1);
+
+      if (tf.pointsOnPositiveSide->size() == 0) {
+        reclaimToIndexVectorPool(tf.pointsOnPositiveSide);
+      }
+      continue;
+    }
+
+    // Except for the horizon edges, all half edges of the visible faces can be
+    // marked as disabled. Their data slots will be reused. The faces will be
+    // disabled as well, but we need to remember the points that were on the
+    // positive side of them - therefore we save pointers to them.
+    newFaceIndices.clear();
+    newHalfedgeIndices.clear();
+    disabledFacePointVectors.clear();
+    size_t disableCounter = 0;
+    for (auto faceIndex : visibleFaces) {
+      auto& disabledFace = mesh.faces[faceIndex];
+      auto halfEdgesMesh = mesh.getHalfEdgeIndicesOfFace(disabledFace);
+      for (size_t j = 0; j < 3; j++) {
+        if ((disabledFace.horizonEdgesOnCurrentIteration & (1 << j)) == 0) {
+          if (disableCounter < horizonEdgeCount * 2) {
+            // Use on this iteration
+            newHalfedgeIndices.push_back(halfEdgesMesh[j]);
+            disableCounter++;
+          } else {
+            // Mark for reusal on later iteration step
+            mesh.disableHalfedge(halfEdgesMesh[j]);
+          }
+        }
+      }
+      // Disable the face, but retain pointer to the points that were on the
+      // positive side of it. We need to assign those points to the new faces we
+      // create shortly.
+      auto t = mesh.disableFace(faceIndex);
+      if (t) {
+        // Because we should not assign point vectors to faces unless needed...
+        DEBUG_ASSERT(t->size(), logicErr, "t should not be empty");
+        disabledFacePointVectors.push_back(std::move(t));
+      }
+    }
+    if (disableCounter < horizonEdgeCount * 2) {
+      const size_t newHalfEdgesNeeded = horizonEdgeCount * 2 - disableCounter;
+      for (size_t i = 0; i < newHalfEdgesNeeded; i++) {
+        newHalfedgeIndices.push_back(mesh.addHalfedge());
+      }
+    }
+
+    // Create new faces using the edgeloop
+    for (size_t i = 0; i < horizonEdgeCount; i++) {
+      const size_t AB = horizonEdgesData[i];
+
+      auto horizonEdgeVertexIndices =
+          mesh.getVertexIndicesOfHalfEdge(mesh.halfedges[AB]);
+      size_t A, B, C;
+      A = horizonEdgeVertexIndices[0];
+      B = horizonEdgeVertexIndices[1];
+      C = activePointIndex;
+
+      const size_t newFaceIndex = mesh.addFace();
+      newFaceIndices.push_back(newFaceIndex);
+
+      const size_t CA = newHalfedgeIndices[2 * i + 0];
+      const size_t BC = newHalfedgeIndices[2 * i + 1];
+
+      mesh.halfedgeNext[AB] = BC;
+      mesh.halfedgeNext[BC] = CA;
+      mesh.halfedgeNext[CA] = AB;
+
+      mesh.halfedgeToFace[BC] = newFaceIndex;
+      mesh.halfedgeToFace[CA] = newFaceIndex;
+      mesh.halfedgeToFace[AB] = newFaceIndex;
+
+      mesh.halfedges[CA].endVert = A;
+      mesh.halfedges[BC].endVert = C;
+
+      auto& newFace = mesh.faces[newFaceIndex];
+
+      const vec3 planeNormal = getTriangleNormal(
+          originalVertexData[A], originalVertexData[B], activePoint);
+      newFace.P = Plane(planeNormal, activePoint);
+      newFace.he = AB;
+
+      mesh.halfedges[CA].pairedHalfedge =
+          newHalfedgeIndices[i > 0 ? i * 2 - 1 : 2 * horizonEdgeCount - 1];
+      mesh.halfedges[BC].pairedHalfedge =
+          newHalfedgeIndices[((i + 1) * 2) % (horizonEdgeCount * 2)];
+    }
+
+    // Assign points that were on the positive side of the disabled faces to the
+    // new faces.
+    for (auto& disabledPoints : disabledFacePointVectors) {
+      DEBUG_ASSERT(disabledPoints != nullptr, logicErr,
+                   "disabledPoints should not be null");
+      for (const auto& point : *(disabledPoints)) {
+        if (point == activePointIndex) {
+          continue;
+        }
+        for (size_t j = 0; j < horizonEdgeCount; j++) {
+          if (addPointToFace(mesh.faces[newFaceIndices[j]], point)) {
+            break;
+          }
+        }
+      }
+      // The points are no longer needed: we can move them to the vector pool
+      // for reuse.
+      reclaimToIndexVectorPool(disabledPoints);
+    }
+
+    // Increase face stack size if needed
+    for (const auto newFaceIndex : newFaceIndices) {
+      auto& newFace = mesh.faces[newFaceIndex];
+      if (newFace.pointsOnPositiveSide) {
+        DEBUG_ASSERT(newFace.pointsOnPositiveSide->size() > 0, logicErr,
+                     "there should be points on the positive side");
+        if (!newFace.inFaceStack) {
+          faceList.push_back(newFaceIndex);
+          newFace.inFaceStack = 1;
+        }
+      }
+    }
+  }
+
+  // Cleanup
+  indexVectorPool.clear();
+}
+
+/*
+ * Private helper functions
+ */
+
+std::array<size_t, 6> QuickHull::getExtremeValues() {
+  std::array<size_t, 6> outIndices{0, 0, 0, 0, 0, 0};
+  double extremeVals[6] = {originalVertexData[0].x, originalVertexData[0].x,
+                           originalVertexData[0].y, originalVertexData[0].y,
+                           originalVertexData[0].z, originalVertexData[0].z};
+  const size_t vCount = originalVertexData.size();
+  for (size_t i = 1; i < vCount; i++) {
+    const vec3& pos = originalVertexData[i];
+    if (pos.x > extremeVals[0]) {
+      extremeVals[0] = pos.x;
+      outIndices[0] = i;
+    } else if (pos.x < extremeVals[1]) {
+      extremeVals[1] = pos.x;
+      outIndices[1] = i;
+    }
+    if (pos.y > extremeVals[2]) {
+      extremeVals[2] = pos.y;
+      outIndices[2] = i;
+    } else if (pos.y < extremeVals[3]) {
+      extremeVals[3] = pos.y;
+      outIndices[3] = i;
+    }
+    if (pos.z > extremeVals[4]) {
+      extremeVals[4] = pos.z;
+      outIndices[4] = i;
+    } else if (pos.z < extremeVals[5]) {
+      extremeVals[5] = pos.z;
+      outIndices[5] = i;
+    }
+  }
+  return outIndices;
+}
+
+bool QuickHull::reorderHorizonEdges(VecView<size_t>& horizonEdges) {
+  const size_t horizonEdgeCount = horizonEdges.size();
+  for (size_t i = 0; i + 1 < horizonEdgeCount; i++) {
+    const size_t endVertexCheck = mesh.halfedges[horizonEdges[i]].endVert;
+    bool foundNext = false;
+    for (size_t j = i + 1; j < horizonEdgeCount; j++) {
+      const size_t beginVertex =
+          mesh.halfedges[mesh.halfedges[horizonEdges[j]].pairedHalfedge]
+              .endVert;
+      if (beginVertex == endVertexCheck) {
+        std::swap(horizonEdges[i + 1], horizonEdges[j]);
+        foundNext = true;
+        break;
+      }
+    }
+    if (!foundNext) {
+      return false;
+    }
+  }
+  DEBUG_ASSERT(
+      mesh.halfedges[horizonEdges[horizonEdges.size() - 1]].endVert ==
+          mesh.halfedges[mesh.halfedges[horizonEdges[0]].pairedHalfedge]
+              .endVert,
+      logicErr, "invalid halfedge");
+  return true;
+}
+
+double QuickHull::getScale(const std::array<size_t, 6>& extremeValuesInput) {
+  double s = 0;
+  for (size_t i = 0; i < 6; i++) {
+    const double* v =
+        (const double*)(&originalVertexData[extremeValuesInput[i]]);
+    v += i / 2;
+    auto a = std::abs(*v);
+    if (a > s) {
+      s = a;
+    }
+  }
+  return s;
+}
+
+void QuickHull::setupInitialTetrahedron() {
+  const size_t vertexCount = originalVertexData.size();
+
+  // If we have at most 4 points, just return a degenerate tetrahedron:
+  if (vertexCount <= 4) {
+    size_t v[4] = {0, std::min((size_t)1, vertexCount - 1),
+                   std::min((size_t)2, vertexCount - 1),
+                   std::min((size_t)3, vertexCount - 1)};
+    const vec3 N =
+        getTriangleNormal(originalVertexData[v[0]], originalVertexData[v[1]],
+                          originalVertexData[v[2]]);
+    const Plane trianglePlane(N, originalVertexData[v[0]]);
+    if (trianglePlane.isPointOnPositiveSide(originalVertexData[v[3]])) {
+      std::swap(v[0], v[1]);
+    }
+    return mesh.setup(v[0], v[1], v[2], v[3]);
+  }
+
+  // Find two most distant extreme points.
+  double maxD = epsilonSquared;
+  std::pair<size_t, size_t> selectedPoints;
+  for (size_t i = 0; i < 6; i++) {
+    for (size_t j = i + 1; j < 6; j++) {
+      // I found a function for squaredDistance but i can't seem to include it
+      // like this for some reason
+      const double d = getSquaredDistance(originalVertexData[extremeValues[i]],
+                                          originalVertexData[extremeValues[j]]);
+      if (d > maxD) {
+        maxD = d;
+        selectedPoints = {extremeValues[i], extremeValues[j]};
+      }
+    }
+  }
+  if (maxD == epsilonSquared) {
+    // A degenerate case: the point cloud seems to consists of a single point
+    return mesh.setup(0, std::min((size_t)1, vertexCount - 1),
+                      std::min((size_t)2, vertexCount - 1),
+                      std::min((size_t)3, vertexCount - 1));
+  }
+  DEBUG_ASSERT(selectedPoints.first != selectedPoints.second, logicErr,
+               "degenerate selectedPoints");
+
+  // Find the most distant point to the line between the two chosen extreme
+  // points.
+  const Ray r(originalVertexData[selectedPoints.first],
+              (originalVertexData[selectedPoints.second] -
+               originalVertexData[selectedPoints.first]));
+  maxD = epsilonSquared;
+  size_t maxI = std::numeric_limits<size_t>::max();
+  const size_t vCount = originalVertexData.size();
+  for (size_t i = 0; i < vCount; i++) {
+    const double distToRay =
+        getSquaredDistanceBetweenPointAndRay(originalVertexData[i], r);
+    if (distToRay > maxD) {
+      maxD = distToRay;
+      maxI = i;
+    }
+  }
+  if (maxD == epsilonSquared) {
+    // It appears that the point cloud belongs to a 1 dimensional subspace of
+    // R^3: convex hull has no volume => return a thin triangle Pick any point
+    // other than selectedPoints.first and selectedPoints.second as the third
+    // point of the triangle
+    auto it =
+        std::find_if(originalVertexData.begin(), originalVertexData.end(),
+                     [&](const vec3& ve) {
+                       return ve != originalVertexData[selectedPoints.first] &&
+                              ve != originalVertexData[selectedPoints.second];
+                     });
+    const size_t thirdPoint =
+        (it == originalVertexData.end())
+            ? selectedPoints.first
+            : std::distance(originalVertexData.begin(), it);
+    it =
+        std::find_if(originalVertexData.begin(), originalVertexData.end(),
+                     [&](const vec3& ve) {
+                       return ve != originalVertexData[selectedPoints.first] &&
+                              ve != originalVertexData[selectedPoints.second] &&
+                              ve != originalVertexData[thirdPoint];
+                     });
+    const size_t fourthPoint =
+        (it == originalVertexData.end())
+            ? selectedPoints.first
+            : std::distance(originalVertexData.begin(), it);
+    return mesh.setup(selectedPoints.first, selectedPoints.second, thirdPoint,
+                      fourthPoint);
+  }
+
+  // These three points form the base triangle for our tetrahedron.
+  DEBUG_ASSERT(selectedPoints.first != maxI && selectedPoints.second != maxI,
+               logicErr, "degenerate selectedPoints");
+  std::array<size_t, 3> baseTriangle{selectedPoints.first,
+                                     selectedPoints.second, maxI};
+  const vec3 baseTriangleVertices[] = {originalVertexData[baseTriangle[0]],
+                                       originalVertexData[baseTriangle[1]],
+                                       originalVertexData[baseTriangle[2]]};
+
+  // Next step is to find the 4th vertex of the tetrahedron. We naturally choose
+  // the point farthest away from the triangle plane.
+  maxD = m_epsilon;
+  maxI = 0;
+  const vec3 N =
+      getTriangleNormal(baseTriangleVertices[0], baseTriangleVertices[1],
+                        baseTriangleVertices[2]);
+  Plane trianglePlane(N, baseTriangleVertices[0]);
+  for (size_t i = 0; i < vCount; i++) {
+    const double d = std::abs(
+        getSignedDistanceToPlane(originalVertexData[i], trianglePlane));
+    if (d > maxD) {
+      maxD = d;
+      maxI = i;
+    }
+  }
+  if (maxD == m_epsilon) {
+    // All the points seem to lie on a 2D subspace of R^3. How to handle this?
+    // Well, let's add one extra point to the point cloud so that the convex
+    // hull will have volume.
+    planar = true;
+    const vec3 N1 =
+        getTriangleNormal(baseTriangleVertices[1], baseTriangleVertices[2],
+                          baseTriangleVertices[0]);
+    planarPointCloudTemp = Vec<vec3>(originalVertexData);
+    const vec3 extraPoint = N1 + originalVertexData[0];
+    planarPointCloudTemp.push_back(extraPoint);
+    maxI = planarPointCloudTemp.size() - 1;
+    originalVertexData = planarPointCloudTemp;
+  }
+
+  // Enforce CCW orientation (if user prefers clockwise orientation, swap two
+  // vertices in each triangle when final mesh is created)
+  const Plane triPlane(N, baseTriangleVertices[0]);
+  if (triPlane.isPointOnPositiveSide(originalVertexData[maxI])) {
+    std::swap(baseTriangle[0], baseTriangle[1]);
+  }
+
+  // Create a tetrahedron half edge mesh and compute planes defined by each
+  // triangle
+  mesh.setup(baseTriangle[0], baseTriangle[1], baseTriangle[2], maxI);
+  for (auto& f : mesh.faces) {
+    auto v = mesh.getVertexIndicesOfFace(f);
+    const vec3 N1 =
+        getTriangleNormal(originalVertexData[v[0]], originalVertexData[v[1]],
+                          originalVertexData[v[2]]);
+    const Plane plane(N1, originalVertexData[v[0]]);
+    f.P = plane;
+  }
+
+  // Finally we assign a face for each vertex outside the tetrahedron (vertices
+  // inside the tetrahedron have no role anymore)
+  for (size_t i = 0; i < vCount; i++) {
+    for (auto& face : mesh.faces) {
+      if (addPointToFace(face, i)) {
+        break;
+      }
+    }
+  }
+}
+
+std::unique_ptr<Vec<size_t>> QuickHull::getIndexVectorFromPool() {
+  auto r = indexVectorPool.get();
+  r->resize(0);
+  return r;
+}
+
+void QuickHull::reclaimToIndexVectorPool(std::unique_ptr<Vec<size_t>>& ptr) {
+  const size_t oldSize = ptr->size();
+  if ((oldSize + 1) * 128 < ptr->capacity()) {
+    // Reduce memory usage! Huge vectors are needed at the beginning of
+    // iteration when faces have many points on their positive side. Later on,
+    // smaller vectors will suffice.
+    ptr.reset(nullptr);
+    return;
+  }
+  indexVectorPool.reclaim(ptr);
+}
+
+bool QuickHull::addPointToFace(typename MeshBuilder::Face& f,
+                               size_t pointIndex) {
+  const double D =
+      getSignedDistanceToPlane(originalVertexData[pointIndex], f.P);
+  if (D > 0 && D * D > epsilonSquared * f.P.sqrNLength) {
+    if (!f.pointsOnPositiveSide) {
+      f.pointsOnPositiveSide = getIndexVectorFromPool();
+    }
+    f.pointsOnPositiveSide->push_back(pointIndex);
+    if (D > f.mostDistantPointDist) {
+      f.mostDistantPointDist = D;
+      f.mostDistantPoint = pointIndex;
+    }
+    return true;
+  }
+  return false;
+}
+
+// Wrapper to call the QuickHull algorithm with the given vertex data to build
+// the Impl
+void Manifold::Impl::Hull(VecView<vec3> vertPos) {
+  size_t numVert = vertPos.size();
+  if (numVert < 4) {
+    status_ = Error::InvalidConstruction;
+    return;
+  }
+
+  QuickHull qh(vertPos);
+  std::tie(halfedge_, vertPos_) = qh.buildMesh();
+  CalculateBBox();
+  SetEpsilon();
+  CalculateNormals();
+  InitializeOriginal();
+  Finish();
+  CreateFaces();
+}
+
+}  // namespace manifold

+ 288 - 0
thirdparty/manifold/src/quickhull.h

@@ -0,0 +1,288 @@
+// Copyright 2024 The Manifold Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Derived from the public domain work of Antti Kuukka at
+// https://github.com/akuukka/quickhull
+
+/*
+ * INPUT:  a list of points in 3D space (for example, vertices of a 3D mesh)
+ *
+ * OUTPUT: a ConvexHull object which provides vertex and index buffers of the
+ *generated convex hull as a triangle mesh.
+ *
+ *
+ *
+ * The implementation is thread-safe if each thread is using its own QuickHull
+ *object.
+ *
+ *
+ * SUMMARY OF THE ALGORITHM:
+ *         - Create initial simplex (tetrahedron) using extreme points. We have
+ *four faces now and they form a convex mesh M.
+ *         - For each point, assign them to the first face for which they are on
+ *the positive side of (so each point is assigned to at most one face). Points
+ *inside the initial tetrahedron are left behind now and no longer affect the
+ *calculations.
+ *         - Add all faces that have points assigned to them to Face Stack.
+ *         - Iterate until Face Stack is empty:
+ *              - Pop topmost face F from the stack
+ *              - From the points assigned to F, pick the point P that is
+ *farthest away from the plane defined by F.
+ *              - Find all faces of M that have P on their positive side. Let us
+ *call these the "visible faces".
+ *              - Because of the way M is constructed, these faces are
+ *connected. Solve their horizon edge loop.
+ *				- "Extrude to P": Create new faces by connecting
+ *P with the points belonging to the horizon edge. Add the new faces to M and
+ *remove the visible faces from M.
+ *              - Each point that was assigned to visible faces is now assigned
+ *to at most one of the newly created faces.
+ *              - Those new faces that have points assigned to them are added to
+ *the top of Face Stack.
+ *          - M is now the convex hull.
+ *
+ * */
+#pragma once
+#include <array>
+#include <deque>
+#include <vector>
+
+#include "./shared.h"
+#include "./vec.h"
+
+namespace manifold {
+
+class Pool {
+  std::vector<std::unique_ptr<Vec<size_t>>> data;
+
+ public:
+  void clear() { data.clear(); }
+
+  void reclaim(std::unique_ptr<Vec<size_t>>& ptr) {
+    data.push_back(std::move(ptr));
+  }
+
+  std::unique_ptr<Vec<size_t>> get() {
+    if (data.size() == 0) {
+      return std::make_unique<Vec<size_t>>();
+    }
+    auto it = data.end() - 1;
+    std::unique_ptr<Vec<size_t>> r = std::move(*it);
+    data.erase(it);
+    return r;
+  }
+};
+
+class Plane {
+ public:
+  vec3 N;
+
+  // Signed distance (if normal is of length 1) to the plane from origin
+  double D;
+
+  // Normal length squared
+  double sqrNLength;
+
+  bool isPointOnPositiveSide(const vec3& Q) const {
+    double d = la::dot(N, Q) + D;
+    if (d >= 0) return true;
+    return false;
+  }
+
+  Plane() = default;
+
+  // Construct a plane using normal N and any point P on the plane
+  Plane(const vec3& N, const vec3& P)
+      : N(N), D(la::dot(-N, P)), sqrNLength(la::dot(N, N)) {}
+};
+
+struct Ray {
+  const vec3 S;
+  const vec3 V;
+  const double VInvLengthSquared;
+
+  Ray(const vec3& S, const vec3& V)
+      : S(S), V(V), VInvLengthSquared(1 / (la::dot(V, V))) {}
+};
+
+class MeshBuilder {
+ public:
+  struct Face {
+    int he;
+    Plane P{};
+    double mostDistantPointDist = 0.0;
+    size_t mostDistantPoint = 0;
+    size_t visibilityCheckedOnIteration = 0;
+    std::uint8_t isVisibleFaceOnCurrentIteration : 1;
+    std::uint8_t inFaceStack : 1;
+    // Bit for each half edge assigned to this face, each being 0 or 1 depending
+    // on whether the edge belongs to horizon edge
+    std::uint8_t horizonEdgesOnCurrentIteration : 3;
+    std::unique_ptr<Vec<size_t>> pointsOnPositiveSide;
+
+    Face(size_t he)
+        : he(he),
+          isVisibleFaceOnCurrentIteration(0),
+          inFaceStack(0),
+          horizonEdgesOnCurrentIteration(0) {}
+
+    Face()
+        : he(-1),
+          isVisibleFaceOnCurrentIteration(0),
+          inFaceStack(0),
+          horizonEdgesOnCurrentIteration(0) {}
+
+    void disable() { he = -1; }
+
+    bool isDisabled() const { return he == -1; }
+  };
+
+  // Mesh data
+  std::vector<Face> faces;
+  Vec<Halfedge> halfedges;
+  Vec<int> halfedgeToFace;
+  Vec<int> halfedgeNext;
+
+  // When the mesh is modified and faces and half edges are removed from it, we
+  // do not actually remove them from the container vectors. Insted, they are
+  // marked as disabled which means that the indices can be reused when we need
+  // to add new faces and half edges to the mesh. We store the free indices in
+  // the following vectors.
+  Vec<size_t> disabledFaces, disabledHalfedges;
+
+  size_t addFace();
+
+  size_t addHalfedge();
+
+  // Mark a face as disabled and return a pointer to the points that were on the
+  // positive of it.
+  std::unique_ptr<Vec<size_t>> disableFace(size_t faceIndex) {
+    auto& f = faces[faceIndex];
+    f.disable();
+    disabledFaces.push_back(faceIndex);
+    return std::move(f.pointsOnPositiveSide);
+  }
+
+  void disableHalfedge(size_t heIndex) {
+    auto& he = halfedges[heIndex];
+    he.pairedHalfedge = -1;
+    disabledHalfedges.push_back(heIndex);
+  }
+
+  MeshBuilder() = default;
+
+  // Create a mesh with initial tetrahedron ABCD. Dot product of AB with the
+  // normal of triangle ABC should be negative.
+  void setup(int a, int b, int c, int d);
+
+  std::array<int, 3> getVertexIndicesOfFace(const Face& f) const;
+
+  std::array<int, 2> getVertexIndicesOfHalfEdge(const Halfedge& he) const {
+    return {halfedges[he.pairedHalfedge].endVert, he.endVert};
+  }
+
+  std::array<int, 3> getHalfEdgeIndicesOfFace(const Face& f) const {
+    return {f.he, halfedgeNext[f.he], halfedgeNext[halfedgeNext[f.he]]};
+  }
+};
+
+class HalfEdgeMesh {
+ public:
+  Vec<vec3> vertices;
+  // Index of one of the half edges of the faces
+  std::vector<size_t> halfEdgeIndexFaces;
+  Vec<Halfedge> halfedges;
+  Vec<int> halfedgeToFace;
+  Vec<int> halfedgeNext;
+
+  HalfEdgeMesh(const MeshBuilder& builderObject,
+               const VecView<vec3>& vertexData);
+};
+
+double defaultEps();
+
+class QuickHull {
+  struct FaceData {
+    int faceIndex;
+    // If the face turns out not to be visible, this half edge will be marked as
+    // horizon edge
+    int enteredFromHalfedge;
+  };
+
+  double m_epsilon, epsilonSquared, scale;
+  bool planar;
+  Vec<vec3> planarPointCloudTemp;
+  VecView<vec3> originalVertexData;
+  MeshBuilder mesh;
+  std::array<size_t, 6> extremeValues;
+  size_t failedHorizonEdges = 0;
+
+  // Temporary variables used during iteration process
+  Vec<size_t> newFaceIndices;
+  Vec<size_t> newHalfedgeIndices;
+  Vec<size_t> visibleFaces;
+  Vec<size_t> horizonEdgesData;
+  Vec<FaceData> possiblyVisibleFaces;
+  std::vector<std::unique_ptr<Vec<size_t>>> disabledFacePointVectors;
+  std::deque<int> faceList;
+
+  // Create a half edge mesh representing the base tetrahedron from which the
+  // QuickHull iteration proceeds. extremeValues must be properly set up when
+  // this is called.
+  void setupInitialTetrahedron();
+
+  // Given a list of half edges, try to rearrange them so that they form a loop.
+  // Return true on success.
+  bool reorderHorizonEdges(VecView<size_t>& horizonEdges);
+
+  // Find indices of extreme values (max x, min x, max y, min y, max z, min z)
+  // for the given point cloud
+  std::array<size_t, 6> getExtremeValues();
+
+  // Compute scale of the vertex data.
+  double getScale(const std::array<size_t, 6>& extremeValuesInput);
+
+  // Each face contains a unique pointer to a vector of indices. However, many -
+  // often most - faces do not have any points on the positive side of them
+  // especially at the the end of the iteration. When a face is removed from the
+  // mesh, its associated point vector, if such exists, is moved to the index
+  // vector pool, and when we need to add new faces with points on the positive
+  // side to the mesh, we reuse these vectors. This reduces the amount of
+  // std::vectors we have to deal with, and impact on performance is remarkable.
+  Pool indexVectorPool;
+  inline std::unique_ptr<Vec<size_t>> getIndexVectorFromPool();
+  inline void reclaimToIndexVectorPool(std::unique_ptr<Vec<size_t>>& ptr);
+
+  // Associates a point with a face if the point resides on the positive side of
+  // the plane. Returns true if the points was on the positive side.
+  inline bool addPointToFace(typename MeshBuilder::Face& f, size_t pointIndex);
+
+  // This will create HalfedgeMesh from which we create the ConvexHull object
+  // that buildMesh function returns
+  void createConvexHalfedgeMesh();
+
+ public:
+  // This function assumes that the pointCloudVec data resides in memory in the
+  // following format: x_0,y_0,z_0,x_1,y_1,z_1,...
+  QuickHull(VecView<vec3> pointCloudVec)
+      : originalVertexData(VecView(pointCloudVec)) {}
+
+  // Computes convex hull for a given point cloud. Params: eps: minimum distance
+  // to a plane to consider a point being on positive side of it (for a point
+  // cloud with scale 1) Returns: Convex hull of the point cloud as halfEdge
+  // vector and vertex vector
+  std::pair<Vec<Halfedge>, Vec<vec3>> buildMesh(double eps = defaultEps());
+};
+
+}  // namespace manifold

+ 533 - 0
thirdparty/manifold/src/sdf.cpp

@@ -0,0 +1,533 @@
+// Copyright 2023 The Manifold Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "./hashtable.h"
+#include "./impl.h"
+#include "./parallel.h"
+#include "./utils.h"
+#include "./vec.h"
+#include "manifold/manifold.h"
+
+namespace {
+using namespace manifold;
+
+constexpr int kCrossing = -2;
+constexpr int kNone = -1;
+constexpr ivec4 kVoxelOffset(1, 1, 1, 0);
+// Maximum fraction of spacing that a vert can move.
+constexpr double kS = 0.25;
+// Corresponding approximate distance ratio bound.
+constexpr double kD = 1 / kS - 1;
+// Maximum number of opposed verts (of 7) to allow collapse.
+constexpr int kMaxOpposed = 3;
+
+ivec3 TetTri0(int i) {
+  constexpr ivec3 tetTri0[16] = {{-1, -1, -1},  //
+                                 {0, 3, 4},     //
+                                 {0, 1, 5},     //
+                                 {1, 5, 3},     //
+                                 {1, 4, 2},     //
+                                 {1, 0, 3},     //
+                                 {2, 5, 0},     //
+                                 {5, 3, 2},     //
+                                 {2, 3, 5},     //
+                                 {0, 5, 2},     //
+                                 {3, 0, 1},     //
+                                 {2, 4, 1},     //
+                                 {3, 5, 1},     //
+                                 {5, 1, 0},     //
+                                 {4, 3, 0},     //
+                                 {-1, -1, -1}};
+  return tetTri0[i];
+}
+
+ivec3 TetTri1(int i) {
+  constexpr ivec3 tetTri1[16] = {{-1, -1, -1},  //
+                                 {-1, -1, -1},  //
+                                 {-1, -1, -1},  //
+                                 {3, 4, 1},     //
+                                 {-1, -1, -1},  //
+                                 {3, 2, 1},     //
+                                 {0, 4, 2},     //
+                                 {-1, -1, -1},  //
+                                 {-1, -1, -1},  //
+                                 {2, 4, 0},     //
+                                 {1, 2, 3},     //
+                                 {-1, -1, -1},  //
+                                 {1, 4, 3},     //
+                                 {-1, -1, -1},  //
+                                 {-1, -1, -1},  //
+                                 {-1, -1, -1}};
+  return tetTri1[i];
+}
+
+ivec4 Neighbor(ivec4 base, int i) {
+  constexpr ivec4 neighbors[14] = {{0, 0, 0, 1},     //
+                                   {1, 0, 0, 0},     //
+                                   {0, 1, 0, 0},     //
+                                   {0, 0, 1, 0},     //
+                                   {-1, 0, 0, 1},    //
+                                   {0, -1, 0, 1},    //
+                                   {0, 0, -1, 1},    //
+                                   {-1, -1, -1, 1},  //
+                                   {-1, 0, 0, 0},    //
+                                   {0, -1, 0, 0},    //
+                                   {0, 0, -1, 0},    //
+                                   {0, -1, -1, 1},   //
+                                   {-1, 0, -1, 1},   //
+                                   {-1, -1, 0, 1}};
+  ivec4 neighborIndex = base + neighbors[i];
+  if (neighborIndex.w == 2) {
+    neighborIndex += 1;
+    neighborIndex.w = 0;
+  }
+  return neighborIndex;
+}
+
+Uint64 EncodeIndex(ivec4 gridPos, ivec3 gridPow) {
+  return static_cast<Uint64>(gridPos.w) | static_cast<Uint64>(gridPos.z) << 1 |
+         static_cast<Uint64>(gridPos.y) << (1 + gridPow.z) |
+         static_cast<Uint64>(gridPos.x) << (1 + gridPow.z + gridPow.y);
+}
+
+ivec4 DecodeIndex(Uint64 idx, ivec3 gridPow) {
+  ivec4 gridPos;
+  gridPos.w = idx & 1;
+  idx = idx >> 1;
+  gridPos.z = idx & ((1 << gridPow.z) - 1);
+  idx = idx >> gridPow.z;
+  gridPos.y = idx & ((1 << gridPow.y) - 1);
+  idx = idx >> gridPow.y;
+  gridPos.x = idx & ((1 << gridPow.x) - 1);
+  return gridPos;
+}
+
+vec3 Position(ivec4 gridIndex, vec3 origin, vec3 spacing) {
+  return origin + spacing * (vec3(gridIndex) + (gridIndex.w == 1 ? 0.0 : -0.5));
+}
+
+vec3 Bound(vec3 pos, vec3 origin, vec3 spacing, ivec3 gridSize) {
+  return min(max(pos, origin), origin + spacing * (vec3(gridSize) - 1));
+}
+
+double BoundedSDF(ivec4 gridIndex, vec3 origin, vec3 spacing, ivec3 gridSize,
+                  double level, std::function<double(vec3)> sdf) {
+  const ivec3 xyz(gridIndex);
+  const int lowerBoundDist = minelem(xyz);
+  const int upperBoundDist = minelem(gridSize - xyz);
+  const int boundDist = std::min(lowerBoundDist, upperBoundDist - gridIndex.w);
+
+  if (boundDist < 0) {
+    return 0.0;
+  }
+  const double d = sdf(Position(gridIndex, origin, spacing)) - level;
+  return boundDist == 0 ? std::min(d, 0.0) : d;
+}
+
+// Simplified ITP root finding algorithm - same worst-case performance as
+// bisection, better average performance.
+inline vec3 FindSurface(vec3 pos0, double d0, vec3 pos1, double d1, double tol,
+                        double level, std::function<double(vec3)> sdf) {
+  if (d0 == 0) {
+    return pos0;
+  } else if (d1 == 0) {
+    return pos1;
+  }
+
+  // Sole tuning parameter, k: (0, 1) - smaller value gets better median
+  // performance, but also hits the worst case more often.
+  const double k = 0.1;
+  const double check = 2 * tol / la::length(pos0 - pos1);
+  double frac = 1;
+  double biFrac = 1;
+  while (frac > check) {
+    const double t = la::lerp(d0 / (d0 - d1), 0.5, k);
+    const double r = biFrac / frac - 0.5;
+    const double x = la::abs(t - 0.5) < r ? t : 0.5 - r * (t < 0.5 ? 1 : -1);
+
+    const vec3 mid = la::lerp(pos0, pos1, x);
+    const double d = sdf(mid) - level;
+
+    if ((d > 0) == (d0 > 0)) {
+      d0 = d;
+      pos0 = mid;
+      frac *= 1 - x;
+    } else {
+      d1 = d;
+      pos1 = mid;
+      frac *= x;
+    }
+    biFrac /= 2;
+  }
+
+  return la::lerp(pos0, pos1, d0 / (d0 - d1));
+}
+
+/**
+ * Each GridVert is connected to 14 others, and in charge of 7 of these edges
+ * (see Neighbor() above). Each edge that changes sign contributes one vert,
+ * unless the GridVert is close enough to the surface, in which case it
+ * contributes only a single movedVert and all crossing edgeVerts refer to that.
+ */
+struct GridVert {
+  double distance = NAN;
+  int movedVert = kNone;
+  int edgeVerts[7] = {kNone, kNone, kNone, kNone, kNone, kNone, kNone};
+
+  inline bool HasMoved() const { return movedVert >= 0; }
+
+  inline bool SameSide(double dist) const {
+    return (dist > 0) == (distance > 0);
+  }
+
+  inline int Inside() const { return distance > 0 ? 1 : -1; }
+
+  inline int NeighborInside(int i) const {
+    return Inside() * (edgeVerts[i] == kNone ? 1 : -1);
+  }
+};
+
+struct NearSurface {
+  VecView<vec3> vertPos;
+  VecView<int> vertIndex;
+  HashTableD<GridVert> gridVerts;
+  VecView<const double> voxels;
+  const std::function<double(vec3)> sdf;
+  const vec3 origin;
+  const ivec3 gridSize;
+  const ivec3 gridPow;
+  const vec3 spacing;
+  const double level;
+  const double tol;
+
+  inline void operator()(Uint64 index) {
+    ZoneScoped;
+    if (gridVerts.Full()) return;
+
+    const ivec4 gridIndex = DecodeIndex(index, gridPow);
+
+    if (la::any(la::greater(ivec3(gridIndex), gridSize))) return;
+
+    GridVert gridVert;
+    gridVert.distance = voxels[EncodeIndex(gridIndex + kVoxelOffset, gridPow)];
+
+    bool keep = false;
+    double vMax = 0;
+    int closestNeighbor = -1;
+    int opposedVerts = 0;
+    for (int i = 0; i < 7; ++i) {
+      const double val =
+          voxels[EncodeIndex(Neighbor(gridIndex, i) + kVoxelOffset, gridPow)];
+      const double valOp = voxels[EncodeIndex(
+          Neighbor(gridIndex, i + 7) + kVoxelOffset, gridPow)];
+
+      if (!gridVert.SameSide(val)) {
+        gridVert.edgeVerts[i] = kCrossing;
+        keep = true;
+        if (!gridVert.SameSide(valOp)) {
+          ++opposedVerts;
+        }
+        // Approximate bound on vert movement.
+        if (la::abs(val) > kD * la::abs(gridVert.distance) &&
+            la::abs(val) > la::abs(vMax)) {
+          vMax = val;
+          closestNeighbor = i;
+        }
+      } else if (!gridVert.SameSide(valOp) &&
+                 la::abs(valOp) > kD * la::abs(gridVert.distance) &&
+                 la::abs(valOp) > la::abs(vMax)) {
+        vMax = valOp;
+        closestNeighbor = i + 7;
+      }
+    }
+
+    // This is where we collapse all the crossing edge verts into this GridVert,
+    // speeding up the algorithm and avoiding poor quality triangles. Without
+    // this step the result is guaranteed 2-manifold, but with this step it can
+    // become an even-manifold with kissing verts. These must be removed in a
+    // post-process: CleanupTopology().
+    if (closestNeighbor >= 0 && opposedVerts <= kMaxOpposed) {
+      const vec3 gridPos = Position(gridIndex, origin, spacing);
+      const ivec4 neighborIndex = Neighbor(gridIndex, closestNeighbor);
+      const vec3 pos = FindSurface(gridPos, gridVert.distance,
+                                   Position(neighborIndex, origin, spacing),
+                                   vMax, tol, level, sdf);
+      // Bound the delta of each vert to ensure the tetrahedron cannot invert.
+      if (la::all(la::less(la::abs(pos - gridPos), kS * spacing))) {
+        const int idx = AtomicAdd(vertIndex[0], 1);
+        vertPos[idx] = Bound(pos, origin, spacing, gridSize);
+        gridVert.movedVert = idx;
+        for (int j = 0; j < 7; ++j) {
+          if (gridVert.edgeVerts[j] == kCrossing) gridVert.edgeVerts[j] = idx;
+        }
+        keep = true;
+      }
+    } else {
+      for (int j = 0; j < 7; ++j) gridVert.edgeVerts[j] = kNone;
+    }
+
+    if (keep) gridVerts.Insert(index, gridVert);
+  }
+};
+
+struct ComputeVerts {
+  VecView<vec3> vertPos;
+  VecView<int> vertIndex;
+  HashTableD<GridVert> gridVerts;
+  VecView<const double> voxels;
+  const std::function<double(vec3)> sdf;
+  const vec3 origin;
+  const ivec3 gridSize;
+  const ivec3 gridPow;
+  const vec3 spacing;
+  const double level;
+  const double tol;
+
+  void operator()(int idx) {
+    ZoneScoped;
+    Uint64 baseKey = gridVerts.KeyAt(idx);
+    if (baseKey == kOpen) return;
+
+    GridVert& gridVert = gridVerts.At(idx);
+
+    if (gridVert.HasMoved()) return;
+
+    const ivec4 gridIndex = DecodeIndex(baseKey, gridPow);
+
+    const vec3 position = Position(gridIndex, origin, spacing);
+
+    // These seven edges are uniquely owned by this gridVert; any of them
+    // which intersect the surface create a vert.
+    for (int i = 0; i < 7; ++i) {
+      const ivec4 neighborIndex = Neighbor(gridIndex, i);
+      const GridVert& neighbor = gridVerts[EncodeIndex(neighborIndex, gridPow)];
+
+      const double val =
+          std::isfinite(neighbor.distance)
+              ? neighbor.distance
+              : voxels[EncodeIndex(neighborIndex + kVoxelOffset, gridPow)];
+      if (gridVert.SameSide(val)) continue;
+
+      if (neighbor.HasMoved()) {
+        gridVert.edgeVerts[i] = neighbor.movedVert;
+        continue;
+      }
+
+      const int idx = AtomicAdd(vertIndex[0], 1);
+      const vec3 pos = FindSurface(position, gridVert.distance,
+                                   Position(neighborIndex, origin, spacing),
+                                   val, tol, level, sdf);
+      vertPos[idx] = Bound(pos, origin, spacing, gridSize);
+      gridVert.edgeVerts[i] = idx;
+    }
+  }
+};
+
+struct BuildTris {
+  VecView<ivec3> triVerts;
+  VecView<int> triIndex;
+  const HashTableD<GridVert> gridVerts;
+  const ivec3 gridPow;
+
+  void CreateTri(const ivec3& tri, const int edges[6]) {
+    if (tri[0] < 0) return;
+    const ivec3 verts(edges[tri[0]], edges[tri[1]], edges[tri[2]]);
+    if (verts[0] == verts[1] || verts[1] == verts[2] || verts[2] == verts[0])
+      return;
+    int idx = AtomicAdd(triIndex[0], 1);
+    triVerts[idx] = verts;
+  }
+
+  void CreateTris(const ivec4& tet, const int edges[6]) {
+    const int i = (tet[0] > 0 ? 1 : 0) + (tet[1] > 0 ? 2 : 0) +
+                  (tet[2] > 0 ? 4 : 0) + (tet[3] > 0 ? 8 : 0);
+    CreateTri(TetTri0(i), edges);
+    CreateTri(TetTri1(i), edges);
+  }
+
+  void operator()(int idx) {
+    ZoneScoped;
+    Uint64 baseKey = gridVerts.KeyAt(idx);
+    if (baseKey == kOpen) return;
+
+    const GridVert& base = gridVerts.At(idx);
+    const ivec4 baseIndex = DecodeIndex(baseKey, gridPow);
+
+    ivec4 leadIndex = baseIndex;
+    if (leadIndex.w == 0)
+      leadIndex.w = 1;
+    else {
+      leadIndex += 1;
+      leadIndex.w = 0;
+    }
+
+    // This GridVert is in charge of the 6 tetrahedra surrounding its edge in
+    // the (1,1,1) direction (edge 0).
+    ivec4 tet(base.NeighborInside(0), base.Inside(), -2, -2);
+    ivec4 thisIndex = baseIndex;
+    thisIndex.x += 1;
+
+    GridVert thisVert = gridVerts[EncodeIndex(thisIndex, gridPow)];
+
+    tet[2] = base.NeighborInside(1);
+    for (const int i : {0, 1, 2}) {
+      thisIndex = leadIndex;
+      --thisIndex[Prev3(i)];
+      // Indices take unsigned input, so check for negatives, given the
+      // decrement. If negative, the vert is outside and only connected to other
+      // outside verts - no edgeVerts.
+      GridVert nextVert = thisIndex[Prev3(i)] < 0
+                              ? GridVert()
+                              : gridVerts[EncodeIndex(thisIndex, gridPow)];
+      tet[3] = base.NeighborInside(Prev3(i) + 4);
+
+      const int edges1[6] = {base.edgeVerts[0],
+                             base.edgeVerts[i + 1],
+                             nextVert.edgeVerts[Next3(i) + 4],
+                             nextVert.edgeVerts[Prev3(i) + 1],
+                             thisVert.edgeVerts[i + 4],
+                             base.edgeVerts[Prev3(i) + 4]};
+      thisVert = nextVert;
+      CreateTris(tet, edges1);
+
+      thisIndex = baseIndex;
+      ++thisIndex[Next3(i)];
+      nextVert = gridVerts[EncodeIndex(thisIndex, gridPow)];
+      tet[2] = tet[3];
+      tet[3] = base.NeighborInside(Next3(i) + 1);
+
+      const int edges2[6] = {base.edgeVerts[0],
+                             edges1[5],
+                             thisVert.edgeVerts[i + 4],
+                             nextVert.edgeVerts[Next3(i) + 4],
+                             edges1[3],
+                             base.edgeVerts[Next3(i) + 1]};
+      thisVert = nextVert;
+      CreateTris(tet, edges2);
+
+      tet[2] = tet[3];
+    }
+  }
+};
+}  // namespace
+
+namespace manifold {
+
+/**
+ * Constructs a level-set manifold from the input Signed-Distance Function
+ * (SDF). This uses a form of Marching Tetrahedra (akin to Marching
+ * Cubes, but better for manifoldness). Instead of using a cubic grid, it uses a
+ * body-centered cubic grid (two shifted cubic grids). These grid points are
+ * snapped to the surface where possible to keep short edges from forming.
+ *
+ * @param sdf The signed-distance functor, containing this function signature:
+ * `double operator()(vec3 point)`, which returns the
+ * signed distance of a given point in R^3. Positive values are inside,
+ * negative outside. There is no requirement that the function be a true
+ * distance, or even continuous.
+ * @param bounds An axis-aligned box that defines the extent of the grid.
+ * @param edgeLength Approximate maximum edge length of the triangles in the
+ * final result. This affects grid spacing, and hence has a strong effect on
+ * performance.
+ * @param level Extract the surface at this value of your sdf; defaults to
+ * zero. You can inset your mesh by using a positive value, or outset it with a
+ * negative value.
+ * @param tolerance Ensure each vertex is within this distance of the true
+ * surface. Defaults to -1, which will return the interpolated
+ * crossing-point based on the two nearest grid points. Small positive values
+ * will require more sdf evaluations per output vertex.
+ * @param canParallel Parallel policies violate will crash language runtimes
+ * with runtime locks that expect to not be called back by unregistered threads.
+ * This allows bindings use LevelSet despite being compiled with MANIFOLD_PAR
+ * active.
+ */
+Manifold Manifold::LevelSet(std::function<double(vec3)> sdf, Box bounds,
+                            double edgeLength, double level, double tolerance,
+                            bool canParallel) {
+  if (tolerance <= 0) {
+    tolerance = std::numeric_limits<double>::infinity();
+  }
+
+  auto pImpl_ = std::make_shared<Impl>();
+  auto& vertPos = pImpl_->vertPos_;
+
+  const vec3 dim = bounds.Size();
+  const ivec3 gridSize(dim / edgeLength + 1.0);
+  const vec3 spacing = dim / (vec3(gridSize - 1));
+
+  const ivec3 gridPow(la::log2(gridSize + 2) + 1);
+  const Uint64 maxIndex = EncodeIndex(ivec4(gridSize + 2, 1), gridPow);
+
+  // Parallel policies violate will crash language runtimes with runtime locks
+  // that expect to not be called back by unregistered threads. This allows
+  // bindings use LevelSet despite being compiled with MANIFOLD_PAR
+  // active.
+  const auto pol = canParallel ? autoPolicy(maxIndex) : ExecutionPolicy::Seq;
+
+  const vec3 origin = bounds.min;
+  Vec<double> voxels(maxIndex);
+  for_each_n(
+      pol, countAt(0_uz), maxIndex,
+      [&voxels, sdf, level, origin, spacing, gridSize, gridPow](Uint64 idx) {
+        voxels[idx] = BoundedSDF(DecodeIndex(idx, gridPow) - kVoxelOffset,
+                                 origin, spacing, gridSize, level, sdf);
+      });
+
+  size_t tableSize = std::min(
+      2 * maxIndex, static_cast<Uint64>(10 * la::pow(maxIndex, 0.667)));
+  HashTable<GridVert> gridVerts(tableSize);
+  vertPos.resize(gridVerts.Size() * 7);
+
+  while (1) {
+    Vec<int> index(1, 0);
+    for_each_n(pol, countAt(0_uz), EncodeIndex(ivec4(gridSize, 1), gridPow),
+               NearSurface({vertPos, index, gridVerts.D(), voxels, sdf, origin,
+                            gridSize, gridPow, spacing, level, tolerance}));
+
+    if (gridVerts.Full()) {  // Resize HashTable
+      const vec3 lastVert = vertPos[index[0] - 1];
+      const Uint64 lastIndex =
+          EncodeIndex(ivec4(ivec3((lastVert - origin) / spacing), 1), gridPow);
+      const double ratio = static_cast<double>(maxIndex) / lastIndex;
+
+      if (ratio > 1000)  // do not trust the ratio if it is too large
+        tableSize *= 2;
+      else
+        tableSize *= ratio;
+      gridVerts = HashTable<GridVert>(tableSize);
+      vertPos = Vec<vec3>(gridVerts.Size() * 7);
+    } else {  // Success
+      for_each_n(
+          pol, countAt(0), gridVerts.Size(),
+          ComputeVerts({vertPos, index, gridVerts.D(), voxels, sdf, origin,
+                        gridSize, gridPow, spacing, level, tolerance}));
+      vertPos.resize(index[0]);
+      break;
+    }
+  }
+
+  Vec<ivec3> triVerts(gridVerts.Entries() * 12);  // worst case
+
+  Vec<int> index(1, 0);
+  for_each_n(pol, countAt(0), gridVerts.Size(),
+             BuildTris({triVerts, index, gridVerts.D(), gridPow}));
+  triVerts.resize(index[0]);
+
+  pImpl_->CreateHalfedges(triVerts);
+  pImpl_->CleanupTopology();
+  pImpl_->Finish();
+  pImpl_->InitializeOriginal();
+  return Manifold(pImpl_);
+}
+}  // namespace manifold

+ 219 - 0
thirdparty/manifold/src/shared.h

@@ -0,0 +1,219 @@
+// Copyright 2021 The Manifold Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "./parallel.h"
+#include "./sparse.h"
+#include "./utils.h"
+#include "./vec.h"
+
+namespace manifold {
+
+inline vec3 SafeNormalize(vec3 v) {
+  v = la::normalize(v);
+  return std::isfinite(v.x) ? v : vec3(0.0);
+}
+
+inline double MaxEpsilon(double minEpsilon, const Box& bBox) {
+  double epsilon = std::max(minEpsilon, kPrecision * bBox.Scale());
+  return std::isfinite(epsilon) ? epsilon : -1;
+}
+
+inline int NextHalfedge(int current) {
+  ++current;
+  if (current % 3 == 0) current -= 3;
+  return current;
+}
+
+inline mat3 NormalTransform(const mat3x4& transform) {
+  return la::inverse(la::transpose(mat3(transform)));
+}
+
+/**
+ * By using the closest axis-aligned projection to the normal instead of a
+ * projection along the normal, we avoid introducing any rounding error.
+ */
+inline mat2x3 GetAxisAlignedProjection(vec3 normal) {
+  vec3 absNormal = la::abs(normal);
+  double xyzMax;
+  mat3x2 projection;
+  if (absNormal.z > absNormal.x && absNormal.z > absNormal.y) {
+    projection = mat3x2({1.0, 0.0, 0.0},  //
+                        {0.0, 1.0, 0.0});
+    xyzMax = normal.z;
+  } else if (absNormal.y > absNormal.x) {
+    projection = mat3x2({0.0, 0.0, 1.0},  //
+                        {1.0, 0.0, 0.0});
+    xyzMax = normal.y;
+  } else {
+    projection = mat3x2({0.0, 1.0, 0.0},  //
+                        {0.0, 0.0, 1.0});
+    xyzMax = normal.x;
+  }
+  if (xyzMax < 0) projection[0] *= -1.0;
+  return la::transpose(projection);
+}
+
+inline vec3 GetBarycentric(const vec3& v, const mat3& triPos,
+                           double tolerance) {
+  const mat3 edges(triPos[2] - triPos[1], triPos[0] - triPos[2],
+                   triPos[1] - triPos[0]);
+  const vec3 d2(la::dot(edges[0], edges[0]), la::dot(edges[1], edges[1]),
+                la::dot(edges[2], edges[2]));
+  const int longSide = d2[0] > d2[1] && d2[0] > d2[2] ? 0
+                       : d2[1] > d2[2]                ? 1
+                                                      : 2;
+  const vec3 crossP = la::cross(edges[0], edges[1]);
+  const double area2 = la::dot(crossP, crossP);
+  const double tol2 = tolerance * tolerance;
+
+  vec3 uvw(0.0);
+  for (const int i : {0, 1, 2}) {
+    const vec3 dv = v - triPos[i];
+    if (la::dot(dv, dv) < tol2) {
+      // Return exactly equal if within tolerance of vert.
+      uvw[i] = 1;
+      return uvw;
+    }
+  }
+
+  if (d2[longSide] < tol2) {  // point
+    return vec3(1, 0, 0);
+  } else if (area2 > d2[longSide] * tol2) {  // triangle
+    for (const int i : {0, 1, 2}) {
+      const int j = Next3(i);
+      const vec3 crossPv = la::cross(edges[i], v - triPos[j]);
+      const double area2v = la::dot(crossPv, crossPv);
+      // Return exactly equal if within tolerance of edge.
+      uvw[i] = area2v < d2[i] * tol2 ? 0 : la::dot(crossPv, crossP);
+    }
+    uvw /= (uvw[0] + uvw[1] + uvw[2]);
+    return uvw;
+  } else {  // line
+    const int nextV = Next3(longSide);
+    const double alpha =
+        la::dot(v - triPos[nextV], edges[longSide]) / d2[longSide];
+    uvw[longSide] = 0;
+    uvw[nextV] = 1 - alpha;
+    const int lastV = Next3(nextV);
+    uvw[lastV] = alpha;
+    return uvw;
+  }
+}
+
+/**
+ * The fundamental component of the halfedge data structure used for storing and
+ * operating on the Manifold.
+ */
+struct Halfedge {
+  int startVert, endVert;
+  int pairedHalfedge;
+  bool IsForward() const { return startVert < endVert; }
+  bool operator<(const Halfedge& other) const {
+    return startVert == other.startVert ? endVert < other.endVert
+                                        : startVert < other.startVert;
+  }
+};
+
+struct Barycentric {
+  int tri;
+  vec4 uvw;
+};
+
+struct TriRef {
+  /// The unique ID of the mesh instance of this triangle. If .meshID and .tri
+  /// match for two triangles, then they are coplanar and came from the same
+  /// face.
+  int meshID;
+  /// The OriginalID of the mesh this triangle came from. This ID is ideal for
+  /// reapplying properties like UV coordinates to the output mesh.
+  int originalID;
+  /// Probably the triangle index of the original triangle this was part of:
+  /// Mesh.triVerts[tri], but it's an input, so just pass it along unchanged.
+  int tri;
+  /// Triangles with the same face ID are coplanar.
+  int faceID;
+
+  bool SameFace(const TriRef& other) const {
+    return meshID == other.meshID && faceID == other.faceID;
+  }
+};
+
+/**
+ * This is a temporary edge structure which only stores edges forward and
+ * references the halfedge it was created from.
+ */
+struct TmpEdge {
+  int first, second, halfedgeIdx;
+
+  TmpEdge() {}
+  TmpEdge(int start, int end, int idx) {
+    first = std::min(start, end);
+    second = std::max(start, end);
+    halfedgeIdx = idx;
+  }
+
+  bool operator<(const TmpEdge& other) const {
+    return first == other.first ? second < other.second : first < other.first;
+  }
+};
+
+Vec<TmpEdge> inline CreateTmpEdges(const Vec<Halfedge>& halfedge) {
+  Vec<TmpEdge> edges(halfedge.size());
+  for_each_n(autoPolicy(edges.size()), countAt(0), edges.size(),
+             [&edges, &halfedge](const int idx) {
+               const Halfedge& half = halfedge[idx];
+               edges[idx] = TmpEdge(half.startVert, half.endVert,
+                                    half.IsForward() ? idx : -1);
+             });
+
+  size_t numEdge =
+      remove_if(edges.begin(), edges.end(),
+                [](const TmpEdge& edge) { return edge.halfedgeIdx < 0; }) -
+      edges.begin();
+  DEBUG_ASSERT(numEdge == halfedge.size() / 2, topologyErr, "Not oriented!");
+  edges.resize(numEdge);
+  return edges;
+}
+
+template <const bool inverted>
+struct ReindexEdge {
+  VecView<const TmpEdge> edges;
+  SparseIndices& indices;
+
+  void operator()(size_t i) {
+    int& edge = indices.Get(i, inverted);
+    edge = edges[edge].halfedgeIdx;
+  }
+};
+
+#ifdef MANIFOLD_DEBUG
+inline std::ostream& operator<<(std::ostream& stream, const Halfedge& edge) {
+  return stream << "startVert = " << edge.startVert
+                << ", endVert = " << edge.endVert
+                << ", pairedHalfedge = " << edge.pairedHalfedge;
+}
+
+inline std::ostream& operator<<(std::ostream& stream, const Barycentric& bary) {
+  return stream << "tri = " << bary.tri << ", uvw = " << bary.uvw;
+}
+
+inline std::ostream& operator<<(std::ostream& stream, const TriRef& ref) {
+  return stream << "meshID: " << ref.meshID
+                << ", originalID: " << ref.originalID << ", tri: " << ref.tri
+                << ", faceID: " << ref.faceID;
+}
+#endif
+}  // namespace manifold

+ 1003 - 0
thirdparty/manifold/src/smoothing.cpp

@@ -0,0 +1,1003 @@
+// Copyright 2021 The Manifold Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "./impl.h"
+#include "./parallel.h"
+
+namespace {
+using namespace manifold;
+
+// Returns a normalized vector orthogonal to ref, in the plane of ref and in,
+// unless in and ref are colinear, in which case it falls back to the plane of
+// ref and altIn.
+vec3 OrthogonalTo(vec3 in, vec3 altIn, vec3 ref) {
+  vec3 out = in - la::dot(in, ref) * ref;
+  if (la::dot(out, out) < kPrecision * la::dot(in, in)) {
+    out = altIn - la::dot(altIn, ref) * ref;
+  }
+  return SafeNormalize(out);
+}
+
+double Wrap(double radians) {
+  return radians < -kPi  ? radians + kTwoPi
+         : radians > kPi ? radians - kTwoPi
+                         : radians;
+}
+
+// Get the angle between two unit-vectors.
+double AngleBetween(vec3 a, vec3 b) {
+  const double dot = la::dot(a, b);
+  return dot >= 1 ? 0 : (dot <= -1 ? kPi : la::acos(dot));
+}
+
+// Calculate a tangent vector in the form of a weighted cubic Bezier taking as
+// input the desired tangent direction (length doesn't matter) and the edge
+// vector to the neighboring vertex. In a symmetric situation where the tangents
+// at each end are mirror images of each other, this will result in a circular
+// arc.
+vec4 CircularTangent(const vec3& tangent, const vec3& edgeVec) {
+  const vec3 dir = SafeNormalize(tangent);
+
+  double weight = std::max(0.5, la::dot(dir, SafeNormalize(edgeVec)));
+  // Quadratic weighted bezier for circular interpolation
+  const vec4 bz2 = vec4(dir * 0.5 * la::length(edgeVec), weight);
+  // Equivalent cubic weighted bezier
+  const vec4 bz3 = la::lerp(vec4(0, 0, 0, 1), bz2, 2 / 3.0);
+  // Convert from homogeneous form to geometric form
+  return vec4(vec3(bz3) / bz3.w, bz3.w);
+}
+
+struct InterpTri {
+  VecView<vec3> vertPos;
+  VecView<const Barycentric> vertBary;
+  const Manifold::Impl* impl;
+
+  static vec4 Homogeneous(vec4 v) {
+    v.x *= v.w;
+    v.y *= v.w;
+    v.z *= v.w;
+    return v;
+  }
+
+  static vec4 Homogeneous(vec3 v) { return vec4(v, 1.0); }
+
+  static vec3 HNormalize(vec4 v) {
+    return v.w == 0 ? vec3(v) : (vec3(v) / v.w);
+  }
+
+  static vec4 Scale(vec4 v, double scale) { return vec4(scale * vec3(v), v.w); }
+
+  static vec4 Bezier(vec3 point, vec4 tangent) {
+    return Homogeneous(vec4(point, 0) + tangent);
+  }
+
+  static mat4x2 CubicBezier2Linear(vec4 p0, vec4 p1, vec4 p2, vec4 p3,
+                                   double x) {
+    mat4x2 out;
+    vec4 p12 = la::lerp(p1, p2, x);
+    out[0] = la::lerp(la::lerp(p0, p1, x), p12, x);
+    out[1] = la::lerp(p12, la::lerp(p2, p3, x), x);
+    return out;
+  }
+
+  static vec3 BezierPoint(mat4x2 points, double x) {
+    return HNormalize(la::lerp(points[0], points[1], x));
+  }
+
+  static vec3 BezierTangent(mat4x2 points) {
+    return SafeNormalize(HNormalize(points[1]) - HNormalize(points[0]));
+  }
+
+  static vec3 RotateFromTo(vec3 v, quat start, quat end) {
+    return la::qrot(end, la::qrot(la::qconj(start), v));
+  }
+
+  static quat Slerp(const quat& x, const quat& y, double a, bool longWay) {
+    quat z = y;
+    double cosTheta = la::dot(x, y);
+
+    // Take the long way around the sphere only when requested
+    if ((cosTheta < 0) != longWay) {
+      z = -y;
+      cosTheta = -cosTheta;
+    }
+
+    if (cosTheta > 1.0 - std::numeric_limits<double>::epsilon()) {
+      return la::lerp(x, z, a);  // for numerical stability
+    } else {
+      double angle = std::acos(cosTheta);
+      return (std::sin((1.0 - a) * angle) * x + std::sin(a * angle) * z) /
+             std::sin(angle);
+    }
+  }
+
+  static mat4x2 Bezier2Bezier(const mat3x2& corners, const mat4x2& tangentsX,
+                              const mat4x2& tangentsY, double x,
+                              const vec3& anchor) {
+    const mat4x2 bez = CubicBezier2Linear(
+        Homogeneous(corners[0]), Bezier(corners[0], tangentsX[0]),
+        Bezier(corners[1], tangentsX[1]), Homogeneous(corners[1]), x);
+    const vec3 end = BezierPoint(bez, x);
+    const vec3 tangent = BezierTangent(bez);
+
+    const mat3x2 nTangentsX(SafeNormalize(vec3(tangentsX[0])),
+                            -SafeNormalize(vec3(tangentsX[1])));
+    const mat3x2 biTangents = {
+        OrthogonalTo(vec3(tangentsY[0]), (anchor - corners[0]), nTangentsX[0]),
+        OrthogonalTo(vec3(tangentsY[1]), (anchor - corners[1]), nTangentsX[1])};
+
+    const quat q0 = la::rotation_quat(mat3(
+        nTangentsX[0], biTangents[0], la::cross(nTangentsX[0], biTangents[0])));
+    const quat q1 = la::rotation_quat(mat3(
+        nTangentsX[1], biTangents[1], la::cross(nTangentsX[1], biTangents[1])));
+    const vec3 edge = corners[1] - corners[0];
+    const bool longWay =
+        la::dot(nTangentsX[0], edge) + la::dot(nTangentsX[1], edge) < 0;
+    const quat qTmp = Slerp(q0, q1, x, longWay);
+    const quat q = la::qmul(la::rotation_quat(la::qxdir(qTmp), tangent), qTmp);
+
+    const vec3 delta = la::lerp(RotateFromTo(vec3(tangentsY[0]), q0, q),
+                                RotateFromTo(vec3(tangentsY[1]), q1, q), x);
+    const double deltaW = la::lerp(tangentsY[0].w, tangentsY[1].w, x);
+
+    return {Homogeneous(end), vec4(delta, deltaW)};
+  }
+
+  static vec3 Bezier2D(const mat3x4& corners, const mat4& tangentsX,
+                       const mat4& tangentsY, double x, double y,
+                       const vec3& centroid) {
+    mat4x2 bez0 =
+        Bezier2Bezier({corners[0], corners[1]}, {tangentsX[0], tangentsX[1]},
+                      {tangentsY[0], tangentsY[1]}, x, centroid);
+    mat4x2 bez1 =
+        Bezier2Bezier({corners[2], corners[3]}, {tangentsX[2], tangentsX[3]},
+                      {tangentsY[2], tangentsY[3]}, 1 - x, centroid);
+
+    const mat4x2 bez =
+        CubicBezier2Linear(bez0[0], Bezier(vec3(bez0[0]), bez0[1]),
+                           Bezier(vec3(bez1[0]), bez1[1]), bez1[0], y);
+    return BezierPoint(bez, y);
+  }
+
+  void operator()(const int vert) {
+    vec3& pos = vertPos[vert];
+    const int tri = vertBary[vert].tri;
+    const vec4 uvw = vertBary[vert].uvw;
+
+    const ivec4 halfedges = impl->GetHalfedges(tri);
+    const mat3x4 corners = {
+        impl->vertPos_[impl->halfedge_[halfedges[0]].startVert],
+        impl->vertPos_[impl->halfedge_[halfedges[1]].startVert],
+        impl->vertPos_[impl->halfedge_[halfedges[2]].startVert],
+        halfedges[3] < 0
+            ? vec3(0.0)
+            : impl->vertPos_[impl->halfedge_[halfedges[3]].startVert]};
+
+    for (const int i : {0, 1, 2, 3}) {
+      if (uvw[i] == 1) {
+        pos = corners[i];
+        return;
+      }
+    }
+
+    vec4 posH(0.0);
+
+    if (halfedges[3] < 0) {  // tri
+      const mat4x3 tangentR = {impl->halfedgeTangent_[halfedges[0]],
+                               impl->halfedgeTangent_[halfedges[1]],
+                               impl->halfedgeTangent_[halfedges[2]]};
+      const mat4x3 tangentL = {
+          impl->halfedgeTangent_[impl->halfedge_[halfedges[2]].pairedHalfedge],
+          impl->halfedgeTangent_[impl->halfedge_[halfedges[0]].pairedHalfedge],
+          impl->halfedgeTangent_[impl->halfedge_[halfedges[1]].pairedHalfedge]};
+      const vec3 centroid = mat3(corners) * vec3(1.0 / 3);
+
+      for (const int i : {0, 1, 2}) {
+        const int j = Next3(i);
+        const int k = Prev3(i);
+        const double x = uvw[k] / (1 - uvw[i]);
+
+        const mat4x2 bez =
+            Bezier2Bezier({corners[j], corners[k]}, {tangentR[j], tangentL[k]},
+                          {tangentL[j], tangentR[k]}, x, centroid);
+
+        const mat4x2 bez1 = CubicBezier2Linear(
+            bez[0], Bezier(vec3(bez[0]), bez[1]),
+            Bezier(corners[i], la::lerp(tangentR[i], tangentL[i], x)),
+            Homogeneous(corners[i]), uvw[i]);
+        const vec3 p = BezierPoint(bez1, uvw[i]);
+        posH += Homogeneous(vec4(p, uvw[j] * uvw[k]));
+      }
+    } else {  // quad
+      const mat4 tangentsX = {
+          impl->halfedgeTangent_[halfedges[0]],
+          impl->halfedgeTangent_[impl->halfedge_[halfedges[0]].pairedHalfedge],
+          impl->halfedgeTangent_[halfedges[2]],
+          impl->halfedgeTangent_[impl->halfedge_[halfedges[2]].pairedHalfedge]};
+      const mat4 tangentsY = {
+          impl->halfedgeTangent_[impl->halfedge_[halfedges[3]].pairedHalfedge],
+          impl->halfedgeTangent_[halfedges[1]],
+          impl->halfedgeTangent_[impl->halfedge_[halfedges[1]].pairedHalfedge],
+          impl->halfedgeTangent_[halfedges[3]]};
+      const vec3 centroid = corners * vec4(0.25);
+      const double x = uvw[1] + uvw[2];
+      const double y = uvw[2] + uvw[3];
+      const vec3 pX = Bezier2D(corners, tangentsX, tangentsY, x, y, centroid);
+      const vec3 pY =
+          Bezier2D({corners[1], corners[2], corners[3], corners[0]},
+                   {tangentsY[1], tangentsY[2], tangentsY[3], tangentsY[0]},
+                   {tangentsX[1], tangentsX[2], tangentsX[3], tangentsX[0]}, y,
+                   1 - x, centroid);
+      posH += Homogeneous(vec4(pX, x * (1 - x)));
+      posH += Homogeneous(vec4(pY, y * (1 - y)));
+    }
+    pos = HNormalize(posH);
+  }
+};
+}  // namespace
+
+namespace manifold {
+
+/**
+ * Get the property normal associated with the startVert of this halfedge, where
+ * normalIdx shows the beginning of where normals are stored in the properties.
+ */
+vec3 Manifold::Impl::GetNormal(int halfedge, int normalIdx) const {
+  const int tri = halfedge / 3;
+  const int j = halfedge % 3;
+  const int prop = meshRelation_.triProperties[tri][j];
+  vec3 normal;
+  for (const int i : {0, 1, 2}) {
+    normal[i] =
+        meshRelation_.properties[prop * meshRelation_.numProp + normalIdx + i];
+  }
+  return normal;
+}
+
+/**
+ * Returns a circular tangent for the requested halfedge, orthogonal to the
+ * given normal vector, and avoiding folding.
+ */
+vec4 Manifold::Impl::TangentFromNormal(const vec3& normal, int halfedge) const {
+  const Halfedge edge = halfedge_[halfedge];
+  const vec3 edgeVec = vertPos_[edge.endVert] - vertPos_[edge.startVert];
+  const vec3 edgeNormal =
+      faceNormal_[halfedge / 3] + faceNormal_[edge.pairedHalfedge / 3];
+  vec3 dir = la::cross(la::cross(edgeNormal, edgeVec), normal);
+  return CircularTangent(dir, edgeVec);
+}
+
+/**
+ * Returns true if this halfedge should be marked as the interior of a quad, as
+ * defined by its two triangles referring to the same face, and those triangles
+ * having no further face neighbors beyond.
+ */
+bool Manifold::Impl::IsInsideQuad(int halfedge) const {
+  if (halfedgeTangent_.size() > 0) {
+    return halfedgeTangent_[halfedge].w < 0;
+  }
+  const int tri = halfedge / 3;
+  const TriRef ref = meshRelation_.triRef[tri];
+  const int pair = halfedge_[halfedge].pairedHalfedge;
+  const int pairTri = pair / 3;
+  const TriRef pairRef = meshRelation_.triRef[pairTri];
+  if (!ref.SameFace(pairRef)) return false;
+
+  auto SameFace = [this](int halfedge, const TriRef& ref) {
+    return ref.SameFace(
+        meshRelation_.triRef[halfedge_[halfedge].pairedHalfedge / 3]);
+  };
+
+  int neighbor = NextHalfedge(halfedge);
+  if (SameFace(neighbor, ref)) return false;
+  neighbor = NextHalfedge(neighbor);
+  if (SameFace(neighbor, ref)) return false;
+  neighbor = NextHalfedge(pair);
+  if (SameFace(neighbor, pairRef)) return false;
+  neighbor = NextHalfedge(neighbor);
+  if (SameFace(neighbor, pairRef)) return false;
+  return true;
+}
+
+/**
+ * Returns true if this halfedge is an interior of a quad, as defined by its
+ * halfedge tangent having negative weight.
+ */
+bool Manifold::Impl::IsMarkedInsideQuad(int halfedge) const {
+  return halfedgeTangent_.size() > 0 && halfedgeTangent_[halfedge].w < 0;
+}
+
+// sharpenedEdges are referenced to the input Mesh, but the triangles have
+// been sorted in creating the Manifold, so the indices are converted using
+// meshRelation_.
+std::vector<Smoothness> Manifold::Impl::UpdateSharpenedEdges(
+    const std::vector<Smoothness>& sharpenedEdges) const {
+  std::unordered_map<int, int> oldHalfedge2New;
+  for (size_t tri = 0; tri < NumTri(); ++tri) {
+    int oldTri = meshRelation_.triRef[tri].tri;
+    for (int i : {0, 1, 2}) oldHalfedge2New[3 * oldTri + i] = 3 * tri + i;
+  }
+  std::vector<Smoothness> newSharp = sharpenedEdges;
+  for (Smoothness& edge : newSharp) {
+    edge.halfedge = oldHalfedge2New[edge.halfedge];
+  }
+  return newSharp;
+}
+
+// Find faces containing at least 3 triangles - these will not have
+// interpolated normals - all their vert normals must match their face normal.
+Vec<bool> Manifold::Impl::FlatFaces() const {
+  const int numTri = NumTri();
+  Vec<bool> triIsFlatFace(numTri, false);
+  for_each_n(autoPolicy(numTri, 1e5), countAt(0), numTri,
+             [this, &triIsFlatFace](const int tri) {
+               const TriRef& ref = meshRelation_.triRef[tri];
+               int faceNeighbors = 0;
+               ivec3 faceTris = {-1, -1, -1};
+               for (const int j : {0, 1, 2}) {
+                 const int neighborTri =
+                     halfedge_[3 * tri + j].pairedHalfedge / 3;
+                 const TriRef& jRef = meshRelation_.triRef[neighborTri];
+                 if (jRef.SameFace(ref)) {
+                   ++faceNeighbors;
+                   faceTris[j] = neighborTri;
+                 }
+               }
+               if (faceNeighbors > 1) {
+                 triIsFlatFace[tri] = true;
+                 for (const int j : {0, 1, 2}) {
+                   if (faceTris[j] >= 0) {
+                     triIsFlatFace[faceTris[j]] = true;
+                   }
+                 }
+               }
+             });
+  return triIsFlatFace;
+}
+
+// Returns a vector of length numVert that has a tri that is part of a
+// neighboring flat face if there is only one flat face. If there are none it
+// gets -1, and if there are more than one it gets -2.
+Vec<int> Manifold::Impl::VertFlatFace(const Vec<bool>& flatFaces) const {
+  Vec<int> vertFlatFace(NumVert(), -1);
+  Vec<TriRef> vertRef(NumVert(), {-1, -1, -1});
+  for (size_t tri = 0; tri < NumTri(); ++tri) {
+    if (flatFaces[tri]) {
+      for (const int j : {0, 1, 2}) {
+        const int vert = halfedge_[3 * tri + j].startVert;
+        if (vertRef[vert].SameFace(meshRelation_.triRef[tri])) continue;
+        vertRef[vert] = meshRelation_.triRef[tri];
+        vertFlatFace[vert] = vertFlatFace[vert] == -1 ? tri : -2;
+      }
+    }
+  }
+  return vertFlatFace;
+}
+
+Vec<int> Manifold::Impl::VertHalfedge() const {
+  Vec<int> vertHalfedge(NumVert());
+  Vec<uint8_t> counters(NumVert(), 0);
+  for_each_n(autoPolicy(halfedge_.size(), 1e5), countAt(0), halfedge_.size(),
+             [&vertHalfedge, &counters, this](const int idx) {
+               auto old = std::atomic_exchange(
+                   reinterpret_cast<std::atomic<uint8_t>*>(
+                       &counters[halfedge_[idx].startVert]),
+                   static_cast<uint8_t>(1));
+               if (old == 1) return;
+               // arbitrary, last one wins.
+               vertHalfedge[halfedge_[idx].startVert] = idx;
+             });
+  return vertHalfedge;
+}
+
+std::vector<Smoothness> Manifold::Impl::SharpenEdges(
+    double minSharpAngle, double minSmoothness) const {
+  std::vector<Smoothness> sharpenedEdges;
+  const double minRadians = radians(minSharpAngle);
+  for (size_t e = 0; e < halfedge_.size(); ++e) {
+    if (!halfedge_[e].IsForward()) continue;
+    const size_t pair = halfedge_[e].pairedHalfedge;
+    const double dihedral =
+        std::acos(la::dot(faceNormal_[e / 3], faceNormal_[pair / 3]));
+    if (dihedral > minRadians) {
+      sharpenedEdges.push_back({e, minSmoothness});
+      sharpenedEdges.push_back({pair, minSmoothness});
+    }
+  }
+  return sharpenedEdges;
+}
+
+/**
+ * Sharpen tangents that intersect an edge to sharpen that edge. The weight is
+ * unchanged, as this has a squared effect on radius of curvature, except
+ * in the case of zero radius, which is marked with weight = 0.
+ */
+void Manifold::Impl::SharpenTangent(int halfedge, double smoothness) {
+  halfedgeTangent_[halfedge] =
+      vec4(smoothness * vec3(halfedgeTangent_[halfedge]),
+           smoothness == 0 ? 0 : halfedgeTangent_[halfedge].w);
+}
+
+/**
+ * Instead of calculating the internal shared normals like CalculateNormals
+ * does, this method fills in vertex properties, unshared across edges that
+ * are bent more than minSharpAngle.
+ */
+void Manifold::Impl::SetNormals(int normalIdx, double minSharpAngle) {
+  if (IsEmpty()) return;
+  if (normalIdx < 0) return;
+
+  const int oldNumProp = NumProp();
+  const int numTri = NumTri();
+
+  Vec<bool> triIsFlatFace = FlatFaces();
+  Vec<int> vertFlatFace = VertFlatFace(triIsFlatFace);
+  Vec<int> vertNumSharp(NumVert(), 0);
+  for (size_t e = 0; e < halfedge_.size(); ++e) {
+    if (!halfedge_[e].IsForward()) continue;
+    const int pair = halfedge_[e].pairedHalfedge;
+    const int tri1 = e / 3;
+    const int tri2 = pair / 3;
+    const double dihedral =
+        degrees(std::acos(la::dot(faceNormal_[tri1], faceNormal_[tri2])));
+    if (dihedral > minSharpAngle) {
+      ++vertNumSharp[halfedge_[e].startVert];
+      ++vertNumSharp[halfedge_[e].endVert];
+    } else {
+      const bool faceSplit =
+          triIsFlatFace[tri1] != triIsFlatFace[tri2] ||
+          (triIsFlatFace[tri1] && triIsFlatFace[tri2] &&
+           !meshRelation_.triRef[tri1].SameFace(meshRelation_.triRef[tri2]));
+      if (vertFlatFace[halfedge_[e].startVert] == -2 && faceSplit) {
+        ++vertNumSharp[halfedge_[e].startVert];
+      }
+      if (vertFlatFace[halfedge_[e].endVert] == -2 && faceSplit) {
+        ++vertNumSharp[halfedge_[e].endVert];
+      }
+    }
+  }
+
+  const int numProp = std::max(oldNumProp, normalIdx + 3);
+  Vec<double> oldProperties(numProp * NumPropVert(), 0);
+  meshRelation_.properties.swap(oldProperties);
+  meshRelation_.numProp = numProp;
+  if (meshRelation_.triProperties.size() == 0) {
+    meshRelation_.triProperties.resize(numTri);
+    for_each_n(autoPolicy(numTri, 1e5), countAt(0), numTri, [this](int tri) {
+      for (const int j : {0, 1, 2})
+        meshRelation_.triProperties[tri][j] = halfedge_[3 * tri + j].startVert;
+    });
+  }
+  Vec<ivec3> oldTriProp(numTri, {-1, -1, -1});
+  meshRelation_.triProperties.swap(oldTriProp);
+
+  for (int tri = 0; tri < numTri; ++tri) {
+    for (const int i : {0, 1, 2}) {
+      if (meshRelation_.triProperties[tri][i] >= 0) continue;
+      int startEdge = 3 * tri + i;
+      const int vert = halfedge_[startEdge].startVert;
+
+      if (vertNumSharp[vert] < 2) {  // vertex has single normal
+        const vec3 normal = vertFlatFace[vert] >= 0
+                                ? faceNormal_[vertFlatFace[vert]]
+                                : vertNormal_[vert];
+        int lastProp = -1;
+        ForVert(startEdge, [&](int current) {
+          const int thisTri = current / 3;
+          const int j = current - 3 * thisTri;
+          const int prop = oldTriProp[thisTri][j];
+          meshRelation_.triProperties[thisTri][j] = prop;
+          if (prop == lastProp) return;
+          lastProp = prop;
+          // update property vertex
+          auto start = oldProperties.begin() + prop * oldNumProp;
+          std::copy(start, start + oldNumProp,
+                    meshRelation_.properties.begin() + prop * numProp);
+          for (const int i : {0, 1, 2})
+            meshRelation_.properties[prop * numProp + normalIdx + i] =
+                normal[i];
+        });
+      } else {  // vertex has multiple normals
+        const vec3 centerPos = vertPos_[vert];
+        // Length degree
+        std::vector<int> group;
+        // Length number of normals
+        std::vector<vec3> normals;
+        int current = startEdge;
+        int prevFace = current / 3;
+
+        do {  // find a sharp edge to start on
+          int next = NextHalfedge(halfedge_[current].pairedHalfedge);
+          const int face = next / 3;
+
+          const double dihedral = degrees(
+              std::acos(la::dot(faceNormal_[face], faceNormal_[prevFace])));
+          if (dihedral > minSharpAngle ||
+              triIsFlatFace[face] != triIsFlatFace[prevFace] ||
+              (triIsFlatFace[face] && triIsFlatFace[prevFace] &&
+               !meshRelation_.triRef[face].SameFace(
+                   meshRelation_.triRef[prevFace]))) {
+            break;
+          }
+          current = next;
+          prevFace = face;
+        } while (current != startEdge);
+
+        const int endEdge = current;
+
+        struct FaceEdge {
+          int face;
+          vec3 edgeVec;
+        };
+
+        // calculate pseudo-normals between each sharp edge
+        ForVert<FaceEdge>(
+            endEdge,
+            [this, centerPos, &vertNumSharp, &vertFlatFace](int current) {
+              if (IsInsideQuad(current)) {
+                return FaceEdge({current / 3, vec3(NAN)});
+              }
+              const int vert = halfedge_[current].endVert;
+              vec3 pos = vertPos_[vert];
+              const vec3 edgeVec = centerPos - pos;
+              if (vertNumSharp[vert] < 2) {
+                // opposite vert has fixed normal
+                const vec3 normal = vertFlatFace[vert] >= 0
+                                        ? faceNormal_[vertFlatFace[vert]]
+                                        : vertNormal_[vert];
+                // Flair out the normal we're calculating to give the edge a
+                // more constant curvature to meet the opposite normal. Achieve
+                // this by pointing the tangent toward the opposite bezier
+                // control point instead of the vert itself.
+                pos += vec3(TangentFromNormal(
+                    normal, halfedge_[current].pairedHalfedge));
+              }
+              return FaceEdge({current / 3, SafeNormalize(pos - centerPos)});
+            },
+            [this, &triIsFlatFace, &normals, &group, minSharpAngle](
+                int current, const FaceEdge& here, FaceEdge& next) {
+              const double dihedral = degrees(std::acos(
+                  la::dot(faceNormal_[here.face], faceNormal_[next.face])));
+              if (dihedral > minSharpAngle ||
+                  triIsFlatFace[here.face] != triIsFlatFace[next.face] ||
+                  (triIsFlatFace[here.face] && triIsFlatFace[next.face] &&
+                   !meshRelation_.triRef[here.face].SameFace(
+                       meshRelation_.triRef[next.face]))) {
+                normals.push_back(vec3(0.0));
+              }
+              group.push_back(normals.size() - 1);
+              if (std::isfinite(next.edgeVec.x)) {
+                normals.back() +=
+                    SafeNormalize(la::cross(next.edgeVec, here.edgeVec)) *
+                    AngleBetween(here.edgeVec, next.edgeVec);
+              } else {
+                next.edgeVec = here.edgeVec;
+              }
+            });
+
+        for (auto& normal : normals) {
+          normal = SafeNormalize(normal);
+        }
+
+        int lastGroup = 0;
+        int lastProp = -1;
+        int newProp = -1;
+        int idx = 0;
+        ForVert(endEdge, [&](int current1) {
+          const int thisTri = current1 / 3;
+          const int j = current1 - 3 * thisTri;
+          const int prop = oldTriProp[thisTri][j];
+          auto start = oldProperties.begin() + prop * oldNumProp;
+
+          if (group[idx] != lastGroup && group[idx] != 0 && prop == lastProp) {
+            // split property vertex, duplicating but with an updated normal
+            lastGroup = group[idx];
+            newProp = NumPropVert();
+            meshRelation_.properties.resize(meshRelation_.properties.size() +
+                                            numProp);
+            std::copy(start, start + oldNumProp,
+                      meshRelation_.properties.begin() + newProp * numProp);
+            for (const int i : {0, 1, 2}) {
+              meshRelation_.properties[newProp * numProp + normalIdx + i] =
+                  normals[group[idx]][i];
+            }
+          } else if (prop != lastProp) {
+            // update property vertex
+            lastProp = prop;
+            newProp = prop;
+            std::copy(start, start + oldNumProp,
+                      meshRelation_.properties.begin() + prop * numProp);
+            for (const int i : {0, 1, 2})
+              meshRelation_.properties[prop * numProp + normalIdx + i] =
+                  normals[group[idx]][i];
+          }
+
+          // point to updated property vertex
+          meshRelation_.triProperties[thisTri][j] = newProp;
+          ++idx;
+        });
+      }
+    }
+  }
+}
+
+/**
+ * Tangents get flattened to create sharp edges by setting their weight to zero.
+ * This is the natural limit of reducing the weight to increase the sharpness
+ * smoothly. This limit gives a decent shape, but it causes the parameterization
+ * to be stretched and compresses it near the edges, which is good for resolving
+ * tight curvature, but bad for property interpolation. This function fixes the
+ * parameter stretch at the limit for sharp edges, since there is no curvature
+ * to resolve. Note this also changes the overall shape - making it more evenly
+ * curved.
+ */
+void Manifold::Impl::LinearizeFlatTangents() {
+  const int n = halfedgeTangent_.size();
+  for_each_n(autoPolicy(n, 1e4), countAt(0), n, [this](const int halfedge) {
+    vec4& tangent = halfedgeTangent_[halfedge];
+    vec4& otherTangent = halfedgeTangent_[halfedge_[halfedge].pairedHalfedge];
+
+    const bool flat[2] = {tangent.w == 0, otherTangent.w == 0};
+    if (!halfedge_[halfedge].IsForward() || (!flat[0] && !flat[1])) {
+      return;
+    }
+
+    const vec3 edgeVec = vertPos_[halfedge_[halfedge].endVert] -
+                         vertPos_[halfedge_[halfedge].startVert];
+
+    if (flat[0] && flat[1]) {
+      tangent = vec4(edgeVec / 3.0, 1);
+      otherTangent = vec4(-edgeVec / 3.0, 1);
+    } else if (flat[0]) {
+      tangent = vec4((edgeVec + vec3(otherTangent)) / 2.0, 1);
+    } else {
+      otherTangent = vec4((-edgeVec + vec3(tangent)) / 2.0, 1);
+    }
+  });
+}
+
+/**
+ * Redistribute the tangents around each vertex so that the angles between them
+ * have the same ratios as the angles of the triangles between the corresponding
+ * edges. This avoids folding the output shape and gives smoother results. There
+ * must be at least one fixed halfedge on a vertex for that vertex to be
+ * operated on. If there is only one, then that halfedge is not treated as
+ * fixed, but the whole circle is turned to an average orientation.
+ */
+void Manifold::Impl::DistributeTangents(const Vec<bool>& fixedHalfedges) {
+  const int numHalfedge = fixedHalfedges.size();
+  for_each_n(
+      autoPolicy(numHalfedge, 1e4), countAt(0), numHalfedge,
+      [this, &fixedHalfedges](int halfedge) {
+        if (!fixedHalfedges[halfedge]) return;
+
+        if (IsMarkedInsideQuad(halfedge)) {
+          halfedge = NextHalfedge(halfedge_[halfedge].pairedHalfedge);
+        }
+
+        vec3 normal(0.0);
+        Vec<double> currentAngle;
+        Vec<double> desiredAngle;
+
+        const vec3 approxNormal = vertNormal_[halfedge_[halfedge].startVert];
+        const vec3 center = vertPos_[halfedge_[halfedge].startVert];
+        vec3 lastEdgeVec =
+            SafeNormalize(vertPos_[halfedge_[halfedge].endVert] - center);
+        const vec3 firstTangent =
+            SafeNormalize(vec3(halfedgeTangent_[halfedge]));
+        vec3 lastTangent = firstTangent;
+        int current = halfedge;
+        do {
+          current = NextHalfedge(halfedge_[current].pairedHalfedge);
+          if (IsMarkedInsideQuad(current)) continue;
+          const vec3 thisEdgeVec =
+              SafeNormalize(vertPos_[halfedge_[current].endVert] - center);
+          const vec3 thisTangent =
+              SafeNormalize(vec3(halfedgeTangent_[current]));
+          normal += la::cross(thisTangent, lastTangent);
+          // cumulative sum
+          desiredAngle.push_back(
+              AngleBetween(thisEdgeVec, lastEdgeVec) +
+              (desiredAngle.size() > 0 ? desiredAngle.back() : 0));
+          if (current == halfedge) {
+            currentAngle.push_back(kTwoPi);
+          } else {
+            currentAngle.push_back(AngleBetween(thisTangent, firstTangent));
+            if (la::dot(approxNormal, la::cross(thisTangent, firstTangent)) <
+                0) {
+              currentAngle.back() = kTwoPi - currentAngle.back();
+            }
+          }
+          lastEdgeVec = thisEdgeVec;
+          lastTangent = thisTangent;
+        } while (!fixedHalfedges[current]);
+
+        if (currentAngle.size() == 1 || la::dot(normal, normal) == 0) return;
+
+        const double scale = currentAngle.back() / desiredAngle.back();
+        double offset = 0;
+        if (current == halfedge) {  // only one - find average offset
+          for (size_t i = 0; i < currentAngle.size(); ++i) {
+            offset += Wrap(currentAngle[i] - scale * desiredAngle[i]);
+          }
+          offset /= currentAngle.size();
+        }
+
+        current = halfedge;
+        size_t i = 0;
+        do {
+          current = NextHalfedge(halfedge_[current].pairedHalfedge);
+          if (IsMarkedInsideQuad(current)) continue;
+          desiredAngle[i] *= scale;
+          const double lastAngle = i > 0 ? desiredAngle[i - 1] : 0;
+          // shrink obtuse angles
+          if (desiredAngle[i] - lastAngle > kPi) {
+            desiredAngle[i] = lastAngle + kPi;
+          } else if (i + 1 < desiredAngle.size() &&
+                     scale * desiredAngle[i + 1] - desiredAngle[i] > kPi) {
+            desiredAngle[i] = scale * desiredAngle[i + 1] - kPi;
+          }
+          const double angle = currentAngle[i] - desiredAngle[i] - offset;
+          vec3 tangent(halfedgeTangent_[current]);
+          const quat q = la::rotation_quat(la::normalize(normal), angle);
+          halfedgeTangent_[current] =
+              vec4(la::qrot(q, tangent), halfedgeTangent_[current].w);
+          ++i;
+        } while (!fixedHalfedges[current]);
+      });
+}
+
+/**
+ * Calculates halfedgeTangent_, allowing the manifold to be refined and
+ * smoothed. The tangents form weighted cubic Beziers along each edge. This
+ * function creates circular arcs where possible (minimizing maximum curvature),
+ * constrained to the indicated property normals. Across edges that form
+ * discontinuities in the normals, the tangent vectors are zero-length, allowing
+ * the shape to form a sharp corner with minimal oscillation.
+ */
+void Manifold::Impl::CreateTangents(int normalIdx) {
+  ZoneScoped;
+  const int numVert = NumVert();
+  const int numHalfedge = halfedge_.size();
+  halfedgeTangent_.resize(0);
+  Vec<vec4> tangent(numHalfedge);
+  Vec<bool> fixedHalfedge(numHalfedge, false);
+
+  Vec<int> vertHalfedge = VertHalfedge();
+  for_each_n(
+      autoPolicy(numVert, 1e4), vertHalfedge.begin(), numVert,
+      [this, &tangent, &fixedHalfedge, normalIdx](int e) {
+        struct FlatNormal {
+          bool isFlatFace;
+          vec3 normal;
+        };
+
+        ivec2 faceEdges(-1, -1);
+
+        ForVert<FlatNormal>(
+            e,
+            [normalIdx, this](int halfedge) {
+              const vec3 normal = GetNormal(halfedge, normalIdx);
+              const vec3 diff = faceNormal_[halfedge / 3] - normal;
+              return FlatNormal(
+                  {la::dot(diff, diff) < kPrecision * kPrecision, normal});
+            },
+            [&faceEdges, &tangent, &fixedHalfedge, this](
+                int halfedge, const FlatNormal& here, const FlatNormal& next) {
+              if (IsInsideQuad(halfedge)) {
+                tangent[halfedge] = {0, 0, 0, -1};
+                return;
+              }
+              // mark special edges
+              const vec3 diff = next.normal - here.normal;
+              const bool differentNormals =
+                  la::dot(diff, diff) > kPrecision * kPrecision;
+              if (differentNormals || here.isFlatFace != next.isFlatFace) {
+                fixedHalfedge[halfedge] = true;
+                if (faceEdges[0] == -1) {
+                  faceEdges[0] = halfedge;
+                } else if (faceEdges[1] == -1) {
+                  faceEdges[1] = halfedge;
+                } else {
+                  faceEdges[0] = -2;
+                }
+              }
+              // calculate tangents
+              if (differentNormals) {
+                const vec3 edgeVec = vertPos_[halfedge_[halfedge].endVert] -
+                                     vertPos_[halfedge_[halfedge].startVert];
+                const vec3 dir = la::cross(here.normal, next.normal);
+                tangent[halfedge] = CircularTangent(
+                    (la::dot(dir, edgeVec) < 0 ? -1.0 : 1.0) * dir, edgeVec);
+              } else {
+                tangent[halfedge] = TangentFromNormal(here.normal, halfedge);
+              }
+            });
+
+        if (faceEdges[0] >= 0 && faceEdges[1] >= 0) {
+          const vec3 edge0 = vertPos_[halfedge_[faceEdges[0]].endVert] -
+                             vertPos_[halfedge_[faceEdges[0]].startVert];
+          const vec3 edge1 = vertPos_[halfedge_[faceEdges[1]].endVert] -
+                             vertPos_[halfedge_[faceEdges[1]].startVert];
+          const vec3 newTangent = la::normalize(edge0) - la::normalize(edge1);
+          tangent[faceEdges[0]] = CircularTangent(newTangent, edge0);
+          tangent[faceEdges[1]] = CircularTangent(-newTangent, edge1);
+        } else if (faceEdges[0] == -1 && faceEdges[0] == -1) {
+          fixedHalfedge[e] = true;
+        }
+      });
+
+  halfedgeTangent_.swap(tangent);
+  DistributeTangents(fixedHalfedge);
+}
+
+/**
+ * Calculates halfedgeTangent_, allowing the manifold to be refined and
+ * smoothed. The tangents form weighted cubic Beziers along each edge. This
+ * function creates circular arcs where possible (minimizing maximum curvature),
+ * constrained to the vertex normals. Where sharpenedEdges are specified, the
+ * tangents are shortened that intersect the sharpened edge, concentrating the
+ * curvature there, while the tangents of the sharp edges themselves are aligned
+ * for continuity.
+ */
+void Manifold::Impl::CreateTangents(std::vector<Smoothness> sharpenedEdges) {
+  ZoneScoped;
+  const int numHalfedge = halfedge_.size();
+  halfedgeTangent_.resize(0);
+  Vec<vec4> tangent(numHalfedge);
+  Vec<bool> fixedHalfedge(numHalfedge, false);
+
+  Vec<int> vertHalfedge = VertHalfedge();
+  Vec<bool> triIsFlatFace = FlatFaces();
+  Vec<int> vertFlatFace = VertFlatFace(triIsFlatFace);
+  Vec<vec3> vertNormal = vertNormal_;
+  for (size_t v = 0; v < NumVert(); ++v) {
+    if (vertFlatFace[v] >= 0) {
+      vertNormal[v] = faceNormal_[vertFlatFace[v]];
+    }
+  }
+
+  for_each_n(autoPolicy(numHalfedge, 1e4), countAt(0), numHalfedge,
+             [&tangent, &vertNormal, this](const int edgeIdx) {
+               tangent[edgeIdx] =
+                   IsInsideQuad(edgeIdx)
+                       ? vec4(0, 0, 0, -1)
+                       : TangentFromNormal(
+                             vertNormal[halfedge_[edgeIdx].startVert], edgeIdx);
+             });
+
+  halfedgeTangent_.swap(tangent);
+
+  // Add sharpened edges around faces, just on the face side.
+  for (size_t tri = 0; tri < NumTri(); ++tri) {
+    if (!triIsFlatFace[tri]) continue;
+    for (const int j : {0, 1, 2}) {
+      const int tri2 = halfedge_[3 * tri + j].pairedHalfedge / 3;
+      if (!triIsFlatFace[tri2] ||
+          !meshRelation_.triRef[tri].SameFace(meshRelation_.triRef[tri2])) {
+        sharpenedEdges.push_back({3 * tri + j, 0});
+      }
+    }
+  }
+
+  using Pair = std::pair<Smoothness, Smoothness>;
+  // Fill in missing pairs with default smoothness = 1.
+  std::map<int, Pair> edges;
+  for (Smoothness edge : sharpenedEdges) {
+    if (edge.smoothness >= 1) continue;
+    const bool forward = halfedge_[edge.halfedge].IsForward();
+    const int pair = halfedge_[edge.halfedge].pairedHalfedge;
+    const int idx = forward ? edge.halfedge : pair;
+    if (edges.find(idx) == edges.end()) {
+      edges[idx] = {edge, {static_cast<size_t>(pair), 1}};
+      if (!forward) std::swap(edges[idx].first, edges[idx].second);
+    } else {
+      Smoothness& e = forward ? edges[idx].first : edges[idx].second;
+      e.smoothness = std::min(edge.smoothness, e.smoothness);
+    }
+  }
+
+  std::map<int, std::vector<Pair>> vertTangents;
+  for (const auto& value : edges) {
+    const Pair edge = value.second;
+    vertTangents[halfedge_[edge.first.halfedge].startVert].push_back(edge);
+    vertTangents[halfedge_[edge.second.halfedge].startVert].push_back(
+        {edge.second, edge.first});
+  }
+
+  const int numVert = NumVert();
+  for_each_n(
+      autoPolicy(numVert, 1e4), countAt(0), numVert,
+      [this, &vertTangents, &fixedHalfedge, &vertHalfedge,
+       &triIsFlatFace](int v) {
+        auto it = vertTangents.find(v);
+        if (it == vertTangents.end()) {
+          fixedHalfedge[vertHalfedge[v]] = true;
+          return;
+        }
+        const std::vector<Pair>& vert = it->second;
+        // Sharp edges that end are smooth at their terminal vert.
+        if (vert.size() == 1) return;
+        if (vert.size() == 2) {  // Make continuous edge
+          const int first = vert[0].first.halfedge;
+          const int second = vert[1].first.halfedge;
+          fixedHalfedge[first] = true;
+          fixedHalfedge[second] = true;
+          const vec3 newTangent = la::normalize(vec3(halfedgeTangent_[first]) -
+                                                vec3(halfedgeTangent_[second]));
+
+          const vec3 pos = vertPos_[halfedge_[first].startVert];
+          halfedgeTangent_[first] = CircularTangent(
+              newTangent, vertPos_[halfedge_[first].endVert] - pos);
+          halfedgeTangent_[second] = CircularTangent(
+              -newTangent, vertPos_[halfedge_[second].endVert] - pos);
+
+          double smoothness =
+              (vert[0].second.smoothness + vert[1].first.smoothness) / 2;
+          ForVert(first, [this, &smoothness, &vert, first,
+                          second](int current) {
+            if (current == second) {
+              smoothness =
+                  (vert[1].second.smoothness + vert[0].first.smoothness) / 2;
+            } else if (current != first && !IsMarkedInsideQuad(current)) {
+              SharpenTangent(current, smoothness);
+            }
+          });
+        } else {  // Sharpen vertex uniformly
+          double smoothness = 0;
+          double denom = 0;
+          for (const Pair& pair : vert) {
+            smoothness += pair.first.smoothness;
+            smoothness += pair.second.smoothness;
+            denom += pair.first.smoothness == 0 ? 0 : 1;
+            denom += pair.second.smoothness == 0 ? 0 : 1;
+          }
+          smoothness /= denom;
+
+          ForVert(vert[0].first.halfedge,
+                  [this, &triIsFlatFace, smoothness](int current) {
+                    if (!IsMarkedInsideQuad(current)) {
+                      const int pair = halfedge_[current].pairedHalfedge;
+                      SharpenTangent(current, triIsFlatFace[current / 3] ||
+                                                      triIsFlatFace[pair / 3]
+                                                  ? 0
+                                                  : smoothness);
+                    }
+                  });
+        }
+      });
+
+  LinearizeFlatTangents();
+  DistributeTangents(fixedHalfedge);
+}
+
+void Manifold::Impl::Refine(std::function<int(vec3, vec4, vec4)> edgeDivisions,
+                            bool keepInterior) {
+  if (IsEmpty()) return;
+  Manifold::Impl old = *this;
+  Vec<Barycentric> vertBary = Subdivide(edgeDivisions, keepInterior);
+  if (vertBary.size() == 0) return;
+
+  if (old.halfedgeTangent_.size() == old.halfedge_.size()) {
+    for_each_n(autoPolicy(NumTri(), 1e4), countAt(0), NumVert(),
+               InterpTri({vertPos_, vertBary, &old}));
+  }
+
+  halfedgeTangent_.resize(0);
+  Finish();
+  CreateFaces();
+  meshRelation_.originalID = -1;
+}
+
+}  // namespace manifold

+ 517 - 0
thirdparty/manifold/src/sort.cpp

@@ -0,0 +1,517 @@
+// Copyright 2021 The Manifold Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <atomic>
+#include <set>
+
+#include "./impl.h"
+#include "./parallel.h"
+
+namespace {
+using namespace manifold;
+
+constexpr uint32_t kNoCode = 0xFFFFFFFFu;
+
+uint32_t MortonCode(vec3 position, Box bBox) {
+  // Unreferenced vertices are marked NaN, and this will sort them to the end
+  // (the Morton code only uses the first 30 of 32 bits).
+  if (std::isnan(position.x)) return kNoCode;
+
+  return Collider::MortonCode(position, bBox);
+}
+
+struct Reindex {
+  VecView<const int> indexInv;
+
+  void operator()(Halfedge& edge) {
+    if (edge.startVert < 0) return;
+    edge.startVert = indexInv[edge.startVert];
+    edge.endVert = indexInv[edge.endVert];
+  }
+};
+
+struct MarkProp {
+  VecView<int> keep;
+
+  void operator()(ivec3 triProp) {
+    for (const int i : {0, 1, 2}) {
+      reinterpret_cast<std::atomic<int>*>(&keep[triProp[i]])
+          ->store(1, std::memory_order_relaxed);
+    }
+  }
+};
+
+struct ReindexProps {
+  VecView<const int> old2new;
+
+  void operator()(ivec3& triProp) {
+    for (const int i : {0, 1, 2}) {
+      triProp[i] = old2new[triProp[i]];
+    }
+  }
+};
+
+struct ReindexFace {
+  VecView<Halfedge> halfedge;
+  VecView<vec4> halfedgeTangent;
+  VecView<const Halfedge> oldHalfedge;
+  VecView<const vec4> oldHalfedgeTangent;
+  VecView<const int> faceNew2Old;
+  VecView<const int> faceOld2New;
+
+  void operator()(int newFace) {
+    const int oldFace = faceNew2Old[newFace];
+    for (const int i : {0, 1, 2}) {
+      const int oldEdge = 3 * oldFace + i;
+      Halfedge edge = oldHalfedge[oldEdge];
+      const int pairedFace = edge.pairedHalfedge / 3;
+      const int offset = edge.pairedHalfedge - 3 * pairedFace;
+      edge.pairedHalfedge = 3 * faceOld2New[pairedFace] + offset;
+      const int newEdge = 3 * newFace + i;
+      halfedge[newEdge] = edge;
+      if (!oldHalfedgeTangent.empty()) {
+        halfedgeTangent[newEdge] = oldHalfedgeTangent[oldEdge];
+      }
+    }
+  }
+};
+
+template <typename Precision, typename I>
+bool MergeMeshGLP(MeshGLP<Precision, I>& mesh) {
+  ZoneScoped;
+  std::multiset<std::pair<int, int>> openEdges;
+
+  std::vector<int> merge(mesh.NumVert());
+  std::iota(merge.begin(), merge.end(), 0);
+  for (size_t i = 0; i < mesh.mergeFromVert.size(); ++i) {
+    merge[mesh.mergeFromVert[i]] = mesh.mergeToVert[i];
+  }
+
+  const auto numVert = mesh.NumVert();
+  const auto numTri = mesh.NumTri();
+  const int next[3] = {1, 2, 0};
+  for (size_t tri = 0; tri < numTri; ++tri) {
+    for (int i : {0, 1, 2}) {
+      auto edge = std::make_pair(merge[mesh.triVerts[3 * tri + next[i]]],
+                                 merge[mesh.triVerts[3 * tri + i]]);
+      auto it = openEdges.find(edge);
+      if (it == openEdges.end()) {
+        std::swap(edge.first, edge.second);
+        openEdges.insert(edge);
+      } else {
+        openEdges.erase(it);
+      }
+    }
+  }
+  if (openEdges.empty()) {
+    return false;
+  }
+
+  const auto numOpenVert = openEdges.size();
+  Vec<int> openVerts(numOpenVert);
+  int i = 0;
+  for (const auto& edge : openEdges) {
+    const int vert = edge.first;
+    openVerts[i++] = vert;
+  }
+
+  Vec<Precision> vertPropD(mesh.vertProperties);
+  Box bBox;
+  for (const int i : {0, 1, 2}) {
+    auto iPos =
+        StridedRange(vertPropD.begin() + i, vertPropD.end(), mesh.numProp);
+    auto minMax = manifold::transform_reduce(
+        iPos.begin(), iPos.end(),
+        std::make_pair(std::numeric_limits<double>::infinity(),
+                       -std::numeric_limits<double>::infinity()),
+        [](auto a, auto b) {
+          return std::make_pair(std::min(a.first, b.first),
+                                std::max(a.second, b.second));
+        },
+        [](double f) { return std::make_pair(f, f); });
+    bBox.min[i] = minMax.first;
+    bBox.max[i] = minMax.second;
+  }
+
+  const double tolerance = std::max(static_cast<double>(mesh.tolerance),
+                                    (std::is_same<Precision, float>::value
+                                         ? std::numeric_limits<float>::epsilon()
+                                         : kPrecision) *
+                                        bBox.Scale());
+
+  auto policy = autoPolicy(numOpenVert, 1e5);
+  Vec<Box> vertBox(numOpenVert);
+  Vec<uint32_t> vertMorton(numOpenVert);
+
+  for_each_n(policy, countAt(0), numOpenVert,
+             [&vertMorton, &vertBox, &openVerts, &bBox, &mesh,
+              tolerance](const int i) {
+               int vert = openVerts[i];
+
+               const vec3 center(mesh.vertProperties[mesh.numProp * vert],
+                                 mesh.vertProperties[mesh.numProp * vert + 1],
+                                 mesh.vertProperties[mesh.numProp * vert + 2]);
+
+               vertBox[i].min = center - tolerance / 2.0;
+               vertBox[i].max = center + tolerance / 2.0;
+
+               vertMorton[i] = MortonCode(center, bBox);
+             });
+
+  Vec<int> vertNew2Old(numOpenVert);
+  sequence(vertNew2Old.begin(), vertNew2Old.end());
+
+  stable_sort(vertNew2Old.begin(), vertNew2Old.end(),
+              [&vertMorton](const int& a, const int& b) {
+                return vertMorton[a] < vertMorton[b];
+              });
+
+  Permute(vertMorton, vertNew2Old);
+  Permute(vertBox, vertNew2Old);
+  Permute(openVerts, vertNew2Old);
+  Collider collider(vertBox, vertMorton);
+  SparseIndices toMerge = collider.Collisions<true>(vertBox.cview());
+
+  UnionFind<> uf(numVert);
+  for (size_t i = 0; i < mesh.mergeFromVert.size(); ++i) {
+    uf.unionXY(static_cast<int>(mesh.mergeFromVert[i]),
+               static_cast<int>(mesh.mergeToVert[i]));
+  }
+  for (size_t i = 0; i < toMerge.size(); ++i) {
+    uf.unionXY(openVerts[toMerge.Get(i, false)],
+               openVerts[toMerge.Get(i, true)]);
+  }
+
+  mesh.mergeToVert.clear();
+  mesh.mergeFromVert.clear();
+  for (size_t v = 0; v < numVert; ++v) {
+    const size_t mergeTo = uf.find(v);
+    if (mergeTo != v) {
+      mesh.mergeFromVert.push_back(v);
+      mesh.mergeToVert.push_back(mergeTo);
+    }
+  }
+
+  return true;
+}
+}  // namespace
+
+namespace manifold {
+
+/**
+ * Once halfedge_ has been filled in, this function can be called to create the
+ * rest of the internal data structures. This function also removes the verts
+ * and halfedges flagged for removal (NaN verts and -1 halfedges).
+ */
+void Manifold::Impl::Finish() {
+  if (halfedge_.size() == 0) return;
+
+  CalculateBBox();
+  SetEpsilon(epsilon_);
+  if (!bBox_.IsFinite()) {
+    // Decimated out of existence - early out.
+    MarkFailure(Error::NoError);
+    return;
+  }
+
+  SortVerts();
+  Vec<Box> faceBox;
+  Vec<uint32_t> faceMorton;
+  GetFaceBoxMorton(faceBox, faceMorton);
+  SortFaces(faceBox, faceMorton);
+  if (halfedge_.size() == 0) return;
+  CompactProps();
+
+  DEBUG_ASSERT(halfedge_.size() % 6 == 0, topologyErr,
+               "Not an even number of faces after sorting faces!");
+
+#ifdef MANIFOLD_DEBUG
+  auto MaxOrMinus = [](int a, int b) {
+    return std::min(a, b) < 0 ? -1 : std::max(a, b);
+  };
+  int face = 0;
+  Halfedge extrema = {0, 0, 0};
+  for (size_t i = 0; i < halfedge_.size(); i++) {
+    Halfedge e = halfedge_[i];
+    if (!e.IsForward()) std::swap(e.startVert, e.endVert);
+    extrema.startVert = std::min(extrema.startVert, e.startVert);
+    extrema.endVert = std::min(extrema.endVert, e.endVert);
+    extrema.pairedHalfedge =
+        MaxOrMinus(extrema.pairedHalfedge, e.pairedHalfedge);
+    face = MaxOrMinus(face, i / 3);
+  }
+  DEBUG_ASSERT(extrema.startVert >= 0, topologyErr,
+               "Vertex index is negative!");
+  DEBUG_ASSERT(extrema.endVert < static_cast<int>(NumVert()), topologyErr,
+               "Vertex index exceeds number of verts!");
+  DEBUG_ASSERT(extrema.pairedHalfedge >= 0, topologyErr,
+               "Halfedge index is negative!");
+  DEBUG_ASSERT(extrema.pairedHalfedge < 2 * static_cast<int>(NumEdge()),
+               topologyErr, "Halfedge index exceeds number of halfedges!");
+  DEBUG_ASSERT(face >= 0, topologyErr, "Face index is negative!");
+  DEBUG_ASSERT(face < static_cast<int>(NumTri()), topologyErr,
+               "Face index exceeds number of faces!");
+#endif
+
+  DEBUG_ASSERT(meshRelation_.triRef.size() == NumTri() ||
+                   meshRelation_.triRef.size() == 0,
+               logicErr, "Mesh Relation doesn't fit!");
+  DEBUG_ASSERT(faceNormal_.size() == NumTri() || faceNormal_.size() == 0,
+               logicErr,
+               "faceNormal size = " + std::to_string(faceNormal_.size()) +
+                   ", NumTri = " + std::to_string(NumTri()));
+  CalculateNormals();
+  collider_ = Collider(faceBox, faceMorton);
+
+  DEBUG_ASSERT(Is2Manifold(), logicErr, "mesh is not 2-manifold!");
+}
+
+/**
+ * Sorts the vertices according to their Morton code.
+ */
+void Manifold::Impl::SortVerts() {
+  ZoneScoped;
+  const auto numVert = NumVert();
+  Vec<uint32_t> vertMorton(numVert);
+  auto policy = autoPolicy(numVert, 1e5);
+  for_each_n(policy, countAt(0), numVert, [this, &vertMorton](const int vert) {
+    vertMorton[vert] = MortonCode(vertPos_[vert], bBox_);
+  });
+
+  Vec<int> vertNew2Old(numVert);
+  sequence(vertNew2Old.begin(), vertNew2Old.end());
+
+  stable_sort(vertNew2Old.begin(), vertNew2Old.end(),
+              [&vertMorton](const int& a, const int& b) {
+                return vertMorton[a] < vertMorton[b];
+              });
+
+  ReindexVerts(vertNew2Old, numVert);
+
+  // Verts were flagged for removal with NaNs and assigned kNoCode to sort
+  // them to the end, which allows them to be removed.
+  const auto newNumVert = std::find_if(vertNew2Old.begin(), vertNew2Old.end(),
+                                       [&vertMorton](const int vert) {
+                                         return vertMorton[vert] == kNoCode;
+                                       }) -
+                          vertNew2Old.begin();
+
+  vertNew2Old.resize(newNumVert);
+  Permute(vertPos_, vertNew2Old);
+
+  if (vertNormal_.size() == numVert) {
+    Permute(vertNormal_, vertNew2Old);
+  }
+}
+
+/**
+ * Updates the halfedges to point to new vert indices based on a mapping,
+ * vertNew2Old. This may be a subset, so the total number of original verts is
+ * also given.
+ */
+void Manifold::Impl::ReindexVerts(const Vec<int>& vertNew2Old,
+                                  size_t oldNumVert) {
+  ZoneScoped;
+  Vec<int> vertOld2New(oldNumVert);
+  scatter(countAt(0), countAt(static_cast<int>(NumVert())), vertNew2Old.begin(),
+          vertOld2New.begin());
+  for_each(autoPolicy(oldNumVert, 1e5), halfedge_.begin(), halfedge_.end(),
+           Reindex({vertOld2New}));
+}
+
+/**
+ * Removes unreferenced property verts and reindexes triProperties.
+ */
+void Manifold::Impl::CompactProps() {
+  ZoneScoped;
+  if (meshRelation_.numProp == 0) return;
+
+  const auto numVerts = meshRelation_.properties.size() / meshRelation_.numProp;
+  Vec<int> keep(numVerts, 0);
+  auto policy = autoPolicy(numVerts, 1e5);
+
+  for_each(policy, meshRelation_.triProperties.cbegin(),
+           meshRelation_.triProperties.cend(), MarkProp({keep}));
+  Vec<int> propOld2New(numVerts + 1, 0);
+  inclusive_scan(keep.begin(), keep.end(), propOld2New.begin() + 1);
+
+  Vec<double> oldProp = meshRelation_.properties;
+  const int numVertsNew = propOld2New[numVerts];
+  const int numProp = meshRelation_.numProp;
+  auto& properties = meshRelation_.properties;
+  properties.resize(numProp * numVertsNew);
+  for_each_n(
+      policy, countAt(0), numVerts,
+      [&properties, &oldProp, &propOld2New, &keep, &numProp](const int oldIdx) {
+        if (keep[oldIdx] == 0) return;
+        for (int p = 0; p < numProp; ++p) {
+          properties[propOld2New[oldIdx] * numProp + p] =
+              oldProp[oldIdx * numProp + p];
+        }
+      });
+  for_each_n(policy, meshRelation_.triProperties.begin(), NumTri(),
+             ReindexProps({propOld2New}));
+}
+
+/**
+ * Fills the faceBox and faceMorton input with the bounding boxes and Morton
+ * codes of the faces, respectively. The Morton code is based on the center of
+ * the bounding box.
+ */
+void Manifold::Impl::GetFaceBoxMorton(Vec<Box>& faceBox,
+                                      Vec<uint32_t>& faceMorton) const {
+  ZoneScoped;
+  faceBox.resize(NumTri());
+  faceMorton.resize(NumTri());
+  for_each_n(autoPolicy(NumTri(), 1e5), countAt(0), NumTri(),
+             [this, &faceBox, &faceMorton](const int face) {
+               // Removed tris are marked by all halfedges having pairedHalfedge
+               // = -1, and this will sort them to the end (the Morton code only
+               // uses the first 30 of 32 bits).
+               if (halfedge_[3 * face].pairedHalfedge < 0) {
+                 faceMorton[face] = kNoCode;
+                 return;
+               }
+
+               vec3 center(0.0);
+
+               for (const int i : {0, 1, 2}) {
+                 const vec3 pos = vertPos_[halfedge_[3 * face + i].startVert];
+                 center += pos;
+                 faceBox[face].Union(pos);
+               }
+               center /= 3;
+
+               faceMorton[face] = MortonCode(center, bBox_);
+             });
+}
+
+/**
+ * Sorts the faces of this manifold according to their input Morton code. The
+ * bounding box and Morton code arrays are also sorted accordingly.
+ */
+void Manifold::Impl::SortFaces(Vec<Box>& faceBox, Vec<uint32_t>& faceMorton) {
+  ZoneScoped;
+  Vec<int> faceNew2Old(NumTri());
+  sequence(faceNew2Old.begin(), faceNew2Old.end());
+
+  stable_sort(faceNew2Old.begin(), faceNew2Old.end(),
+              [&faceMorton](const int& a, const int& b) {
+                return faceMorton[a] < faceMorton[b];
+              });
+
+  // Tris were flagged for removal with pairedHalfedge = -1 and assigned kNoCode
+  // to sort them to the end, which allows them to be removed.
+  const int newNumTri = std::find_if(faceNew2Old.begin(), faceNew2Old.end(),
+                                     [&faceMorton](const int face) {
+                                       return faceMorton[face] == kNoCode;
+                                     }) -
+                        faceNew2Old.begin();
+  faceNew2Old.resize(newNumTri);
+
+  Permute(faceMorton, faceNew2Old);
+  Permute(faceBox, faceNew2Old);
+  GatherFaces(faceNew2Old);
+}
+
+/**
+ * Creates the halfedge_ vector for this manifold by copying a set of faces from
+ * another manifold, given by oldHalfedge. Input faceNew2Old defines the old
+ * faces to gather into this.
+ */
+void Manifold::Impl::GatherFaces(const Vec<int>& faceNew2Old) {
+  ZoneScoped;
+  const auto numTri = faceNew2Old.size();
+  if (meshRelation_.triRef.size() == NumTri())
+    Permute(meshRelation_.triRef, faceNew2Old);
+  if (meshRelation_.triProperties.size() == NumTri())
+    Permute(meshRelation_.triProperties, faceNew2Old);
+  if (faceNormal_.size() == NumTri()) Permute(faceNormal_, faceNew2Old);
+
+  Vec<Halfedge> oldHalfedge(std::move(halfedge_));
+  Vec<vec4> oldHalfedgeTangent(std::move(halfedgeTangent_));
+  Vec<int> faceOld2New(oldHalfedge.size() / 3);
+  auto policy = autoPolicy(numTri, 1e5);
+  scatter(countAt(0_uz), countAt(numTri), faceNew2Old.begin(),
+          faceOld2New.begin());
+
+  halfedge_.resize(3 * numTri);
+  if (oldHalfedgeTangent.size() != 0) halfedgeTangent_.resize(3 * numTri);
+  for_each_n(policy, countAt(0), numTri,
+             ReindexFace({halfedge_, halfedgeTangent_, oldHalfedge,
+                          oldHalfedgeTangent, faceNew2Old, faceOld2New}));
+}
+
+void Manifold::Impl::GatherFaces(const Impl& old, const Vec<int>& faceNew2Old) {
+  ZoneScoped;
+  const auto numTri = faceNew2Old.size();
+
+  meshRelation_.triRef.resize(numTri);
+  gather(faceNew2Old.begin(), faceNew2Old.end(),
+         old.meshRelation_.triRef.begin(), meshRelation_.triRef.begin());
+
+  for (const auto& pair : old.meshRelation_.meshIDtransform) {
+    meshRelation_.meshIDtransform[pair.first] = pair.second;
+  }
+
+  if (old.meshRelation_.triProperties.size() > 0) {
+    meshRelation_.triProperties.resize(numTri);
+    gather(faceNew2Old.begin(), faceNew2Old.end(),
+           old.meshRelation_.triProperties.begin(),
+           meshRelation_.triProperties.begin());
+    meshRelation_.numProp = old.meshRelation_.numProp;
+    meshRelation_.properties = old.meshRelation_.properties;
+  }
+
+  if (old.faceNormal_.size() == old.NumTri()) {
+    faceNormal_.resize(numTri);
+    gather(faceNew2Old.begin(), faceNew2Old.end(), old.faceNormal_.begin(),
+           faceNormal_.begin());
+  }
+
+  Vec<int> faceOld2New(old.NumTri());
+  scatter(countAt(0_uz), countAt(numTri), faceNew2Old.begin(),
+          faceOld2New.begin());
+
+  halfedge_.resize(3 * numTri);
+  if (old.halfedgeTangent_.size() != 0) halfedgeTangent_.resize(3 * numTri);
+  for_each_n(autoPolicy(numTri, 1e5), countAt(0), numTri,
+             ReindexFace({halfedge_, halfedgeTangent_, old.halfedge_,
+                          old.halfedgeTangent_, faceNew2Old, faceOld2New}));
+}
+
+/**
+ * Updates the mergeFromVert and mergeToVert vectors in order to create a
+ * manifold solid. If the MeshGL is already manifold, no change will occur and
+ * the function will return false. Otherwise, this will merge verts along open
+ * edges within tolerance (the maximum of the MeshGL tolerance and the
+ * baseline bounding-box tolerance), keeping any from the existing merge
+ * vectors, and return true.
+ *
+ * There is no guarantee the result will be manifold - this is a best-effort
+ * helper function designed primarily to aid in the case where a manifold
+ * multi-material MeshGL was produced, but its merge vectors were lost due to
+ * a round-trip through a file format. Constructing a Manifold from the result
+ * will report an error status if it is not manifold.
+ */
+template <>
+bool MeshGL::Merge() {
+  return MergeMeshGLP(*this);
+}
+
+template <>
+bool MeshGL64::Merge() {
+  return MergeMeshGLP(*this);
+}
+}  // namespace manifold

+ 225 - 0
thirdparty/manifold/src/sparse.h

@@ -0,0 +1,225 @@
+// Copyright 2021 The Manifold Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "./parallel.h"
+#include "./utils.h"
+#include "./vec.h"
+#include "manifold/common.h"
+#include "manifold/optional_assert.h"
+
+namespace {
+template <typename T>
+inline bool FirstFinite(T v) {
+  return std::isfinite(v[0]);
+}
+
+template <>
+inline bool FirstFinite<double>(double v) {
+  return std::isfinite(v);
+}
+}  // namespace
+
+namespace manifold {
+
+/** @ingroup Private */
+class SparseIndices {
+  // sparse indices where {p1: q1, p2: q2, ...} are laid out as
+  // p1 q1 p2 q2 or q1 p1 q2 p2, depending on endianness
+  // such that the indices are sorted by (p << 32) | q
+ public:
+#if defined(__BYTE_ORDER) && __BYTE_ORDER == __BIG_ENDIAN ||                 \
+    defined(__BIG_ENDIAN__) || defined(__ARMEB__) || defined(__THUMBEB__) || \
+    defined(__AARCH64EB__) || defined(_MIBSEB) || defined(__MIBSEB) ||       \
+    defined(__MIBSEB__)
+  static constexpr size_t pOffset = 0;
+#elif defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN ||          \
+    defined(__LITTLE_ENDIAN__) || defined(__ARMEL__) ||                    \
+    defined(__THUMBEL__) || defined(__AARCH64EL__) || defined(_MIPSEL) ||  \
+    defined(__MIPSEL) || defined(__MIPSEL__) || defined(__EMSCRIPTEN__) || \
+    defined(_WIN32)
+  static constexpr size_t pOffset = 1;
+#else
+#error "unknown architecture"
+#endif
+  static constexpr int64_t EncodePQ(int p, int q) {
+    return (int64_t(p) << 32) | q;
+  }
+
+  SparseIndices() = default;
+  SparseIndices(size_t size) { data_ = Vec<char>(size * sizeof(int64_t)); }
+
+  void Clear() { data_.clear(false); }
+
+  void FromIndices(const std::vector<SparseIndices>& indices) {
+    std::vector<size_t> sizes;
+    size_t total_size = 0;
+    for (const auto& ind : indices) {
+      sizes.push_back(total_size);
+      total_size += ind.data_.size();
+    }
+    data_ = Vec<char>(total_size);
+    for_each_n(ExecutionPolicy::Par, countAt(0), indices.size(), [&](size_t i) {
+      std::copy(indices[i].data_.begin(), indices[i].data_.end(),
+                data_.begin() + sizes[i]);
+    });
+  }
+
+  size_t size() const { return data_.size() / sizeof(int64_t); }
+
+  Vec<int> Copy(bool use_q) const {
+    Vec<int> out(size());
+    size_t offset = pOffset;
+    if (use_q) offset = 1 - offset;
+    const int* p = ptr();
+    for_each(autoPolicy(out.size()), countAt(0_uz), countAt(out.size()),
+             [&](size_t i) { out[i] = p[i * 2 + offset]; });
+    return out;
+  }
+
+  void Sort() {
+    VecView<int64_t> view = AsVec64();
+    stable_sort(view.begin(), view.end());
+  }
+
+  void Resize(size_t size) { data_.resize(size * sizeof(int64_t), -1); }
+
+  inline int& Get(size_t i, bool use_q) {
+    if (use_q)
+      return ptr()[2 * i + 1 - pOffset];
+    else
+      return ptr()[2 * i + pOffset];
+  }
+
+  inline int Get(size_t i, bool use_q) const {
+    if (use_q)
+      return ptr()[2 * i + 1 - pOffset];
+    else
+      return ptr()[2 * i + pOffset];
+  }
+
+  inline int64_t GetPQ(size_t i) const {
+    VecView<const int64_t> view = AsVec64();
+    return view[i];
+  }
+
+  inline void Set(size_t i, int p, int q) {
+    VecView<int64_t> view = AsVec64();
+    view[i] = EncodePQ(p, q);
+  }
+
+  inline void SetPQ(size_t i, int64_t pq) {
+    VecView<int64_t> view = AsVec64();
+    view[i] = pq;
+  }
+
+  VecView<int64_t> AsVec64() {
+    return VecView<int64_t>(reinterpret_cast<int64_t*>(data_.data()),
+                            data_.size() / sizeof(int64_t));
+  }
+
+  VecView<const int64_t> AsVec64() const {
+    return VecView<const int64_t>(
+        reinterpret_cast<const int64_t*>(data_.data()),
+        data_.size() / sizeof(int64_t));
+  }
+
+  VecView<int32_t> AsVec32() {
+    return VecView<int32_t>(reinterpret_cast<int32_t*>(data_.data()),
+                            data_.size() / sizeof(int32_t));
+  }
+
+  VecView<const int32_t> AsVec32() const {
+    return VecView<const int32_t>(
+        reinterpret_cast<const int32_t*>(data_.data()),
+        data_.size() / sizeof(int32_t));
+  }
+
+  inline void Add(int p, int q, bool seq = false) {
+    data_.extend(sizeof(int64_t), seq);
+    Set(size() - 1, p, q);
+  }
+
+  void Unique() {
+    Sort();
+    VecView<int64_t> view = AsVec64();
+    size_t newSize = unique(view.begin(), view.end()) - view.begin();
+    Resize(newSize);
+  }
+
+  size_t RemoveZeros(Vec<int>& S) {
+    DEBUG_ASSERT(S.size() == size(), userErr,
+                 "Different number of values than indicies!");
+
+    Vec<size_t> new2Old(S.size());
+    sequence(new2Old.begin(), new2Old.end());
+
+    size_t size = copy_if(countAt(0_uz), countAt(S.size()), new2Old.begin(),
+                          [&S](const size_t i) { return S[i] != 0; }) -
+                  new2Old.begin();
+    new2Old.resize(size);
+
+    Permute(S, new2Old);
+    Vec<char> tmp(std::move(data_));
+    Resize(size);
+    gather(new2Old.begin(), new2Old.end(),
+           reinterpret_cast<int64_t*>(tmp.data()),
+           reinterpret_cast<int64_t*>(data_.data()));
+
+    return size;
+  }
+
+  template <typename T>
+  size_t KeepFinite(Vec<T>& v, Vec<int>& x) {
+    DEBUG_ASSERT(x.size() == size(), userErr,
+                 "Different number of values than indicies!");
+
+    Vec<int> new2Old(v.size());
+    size_t size = copy_if(countAt(0_uz), countAt(v.size()), new2Old.begin(),
+                          [&v](size_t i) { return FirstFinite(v[i]); }) -
+                  new2Old.begin();
+    new2Old.resize(size);
+
+    Permute(v, new2Old);
+    Permute(x, new2Old);
+    Vec<char> tmp(std::move(data_));
+    Resize(size);
+    gather(new2Old.begin(), new2Old.end(),
+           reinterpret_cast<int64_t*>(tmp.data()),
+           reinterpret_cast<int64_t*>(data_.data()));
+
+    return size;
+  }
+
+#ifdef MANIFOLD_DEBUG
+  void Dump() const {
+    std::cout << "SparseIndices = " << std::endl;
+    const int* p = ptr();
+    for (size_t i = 0; i < size(); ++i) {
+      std::cout << i << ", p = " << Get(i, false) << ", q = " << Get(i, true)
+                << std::endl;
+    }
+    std::cout << std::endl;
+  }
+#endif
+
+ private:
+  Vec<char> data_;
+  inline int* ptr() { return reinterpret_cast<int32_t*>(data_.data()); }
+  inline const int* ptr() const {
+    return reinterpret_cast<const int32_t*>(data_.data());
+  }
+};
+
+}  // namespace manifold

+ 809 - 0
thirdparty/manifold/src/subdivision.cpp

@@ -0,0 +1,809 @@
+// Copyright 2024 The Manifold Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "./impl.h"
+#include "./parallel.h"
+
+template <>
+struct std::hash<manifold::ivec4> {
+  size_t operator()(const manifold::ivec4& p) const {
+    return std::hash<int>()(p.x) ^ std::hash<int>()(p.y) ^
+           std::hash<int>()(p.z) ^ std::hash<int>()(p.w);
+  }
+};
+
+namespace {
+using namespace manifold;
+
+class Partition {
+ public:
+  // The cached partitions don't have idx - it's added to the copy returned
+  // from GetPartition that contains the mapping of the input divisions into the
+  // sorted divisions that are uniquely cached.
+  ivec4 idx;
+  ivec4 sortedDivisions;
+  Vec<vec4> vertBary;
+  Vec<ivec3> triVert;
+
+  int InteriorOffset() const {
+    return sortedDivisions[0] + sortedDivisions[1] + sortedDivisions[2] +
+           sortedDivisions[3];
+  }
+
+  int NumInterior() const { return vertBary.size() - InteriorOffset(); }
+
+  static Partition GetPartition(ivec4 divisions) {
+    if (divisions[0] == 0) return Partition();  // skip wrong side of quad
+
+    ivec4 sortedDiv = divisions;
+    ivec4 triIdx = {0, 1, 2, 3};
+    if (divisions[3] == 0) {  // triangle
+      if (sortedDiv[2] > sortedDiv[1]) {
+        std::swap(sortedDiv[2], sortedDiv[1]);
+        std::swap(triIdx[2], triIdx[1]);
+      }
+      if (sortedDiv[1] > sortedDiv[0]) {
+        std::swap(sortedDiv[1], sortedDiv[0]);
+        std::swap(triIdx[1], triIdx[0]);
+        if (sortedDiv[2] > sortedDiv[1]) {
+          std::swap(sortedDiv[2], sortedDiv[1]);
+          std::swap(triIdx[2], triIdx[1]);
+        }
+      }
+    } else {  // quad
+      int minIdx = 0;
+      int min = divisions[minIdx];
+      int next = divisions[1];
+      for (const int i : {1, 2, 3}) {
+        const int n = divisions[(i + 1) % 4];
+        if (divisions[i] < min || (divisions[i] == min && n < next)) {
+          minIdx = i;
+          min = divisions[i];
+          next = n;
+        }
+      }
+      // Backwards (mirrored) quads get a separate cache key for now for
+      // simplicity, so there is no reversal necessary for quads when
+      // re-indexing.
+      ivec4 tmp = sortedDiv;
+      for (const int i : {0, 1, 2, 3}) {
+        triIdx[i] = (i + minIdx) % 4;
+        sortedDiv[i] = tmp[triIdx[i]];
+      }
+    }
+
+    Partition partition = GetCachedPartition(sortedDiv);
+    partition.idx = triIdx;
+
+    return partition;
+  }
+
+  Vec<ivec3> Reindex(ivec4 triVerts, ivec4 edgeOffsets, bvec4 edgeFwd,
+                     int interiorOffset) const {
+    Vec<int> newVerts;
+    newVerts.reserve(vertBary.size());
+    ivec4 triIdx = idx;
+    ivec4 outTri = {0, 1, 2, 3};
+    if (triVerts[3] < 0 && idx[1] != Next3(idx[0])) {
+      triIdx = {idx[2], idx[0], idx[1], idx[3]};
+      edgeFwd = !edgeFwd;
+      std::swap(outTri[0], outTri[1]);
+    }
+    for (const int i : {0, 1, 2, 3}) {
+      if (triVerts[triIdx[i]] >= 0) newVerts.push_back(triVerts[triIdx[i]]);
+    }
+    for (const int i : {0, 1, 2, 3}) {
+      const int n = sortedDivisions[i] - 1;
+      int offset = edgeOffsets[idx[i]] + (edgeFwd[idx[i]] ? 0 : n - 1);
+      for (int j = 0; j < n; ++j) {
+        newVerts.push_back(offset);
+        offset += edgeFwd[idx[i]] ? 1 : -1;
+      }
+    }
+    const int offset = interiorOffset - newVerts.size();
+    size_t old = newVerts.size();
+    newVerts.resize(vertBary.size());
+    std::iota(newVerts.begin() + old, newVerts.end(), old + offset);
+
+    const int numTri = triVert.size();
+    Vec<ivec3> newTriVert(numTri);
+    for_each_n(autoPolicy(numTri), countAt(0), numTri,
+               [&newTriVert, &outTri, &newVerts, this](const int tri) {
+                 for (const int j : {0, 1, 2}) {
+                   newTriVert[tri][outTri[j]] = newVerts[triVert[tri][j]];
+                 }
+               });
+    return newTriVert;
+  }
+
+ private:
+  static inline auto cacheLock = std::mutex();
+  static inline auto cache =
+      std::unordered_map<ivec4, std::unique_ptr<Partition>>();
+
+  // This triangulation is purely topological - it depends only on the number of
+  // divisions of the three sides of the triangle. This allows them to be cached
+  // and reused for similar triangles. The shape of the final surface is defined
+  // by the tangents and the barycentric coordinates of the new verts. For
+  // triangles, the input must be sorted: n[0] >= n[1] >= n[2] > 0.
+  static Partition GetCachedPartition(ivec4 n) {
+    {
+      auto lockGuard = std::lock_guard<std::mutex>(cacheLock);
+      auto cached = cache.find(n);
+      if (cached != cache.end()) {
+        return *cached->second;
+      }
+    }
+    Partition partition;
+    partition.sortedDivisions = n;
+    if (n[3] > 0) {  // quad
+      partition.vertBary.push_back({1, 0, 0, 0});
+      partition.vertBary.push_back({0, 1, 0, 0});
+      partition.vertBary.push_back({0, 0, 1, 0});
+      partition.vertBary.push_back({0, 0, 0, 1});
+      ivec4 edgeOffsets;
+      edgeOffsets[0] = 4;
+      for (const int i : {0, 1, 2, 3}) {
+        if (i > 0) {
+          edgeOffsets[i] = edgeOffsets[i - 1] + n[i - 1] - 1;
+        }
+        const vec4 nextBary = partition.vertBary[(i + 1) % 4];
+        for (int j = 1; j < n[i]; ++j) {
+          partition.vertBary.push_back(
+              la::lerp(partition.vertBary[i], nextBary, (double)j / n[i]));
+        }
+      }
+      PartitionQuad(partition.triVert, partition.vertBary, {0, 1, 2, 3},
+                    edgeOffsets, n - 1, {true, true, true, true});
+    } else {  // tri
+      partition.vertBary.push_back({1, 0, 0, 0});
+      partition.vertBary.push_back({0, 1, 0, 0});
+      partition.vertBary.push_back({0, 0, 1, 0});
+      for (const int i : {0, 1, 2}) {
+        const vec4 nextBary = partition.vertBary[(i + 1) % 3];
+        for (int j = 1; j < n[i]; ++j) {
+          partition.vertBary.push_back(
+              la::lerp(partition.vertBary[i], nextBary, (double)j / n[i]));
+        }
+      }
+      const ivec3 edgeOffsets = {3, 3 + n[0] - 1, 3 + n[0] - 1 + n[1] - 1};
+
+      const double f = n[2] * n[2] + n[0] * n[0];
+      if (n[1] == 1) {
+        if (n[0] == 1) {
+          partition.triVert.push_back({0, 1, 2});
+        } else {
+          PartitionFan(partition.triVert, {0, 1, 2}, n[0] - 1, edgeOffsets[0]);
+        }
+      } else if (n[1] * n[1] > f - std::sqrt(2.0) * n[0] * n[2]) {  // acute-ish
+        partition.triVert.push_back({edgeOffsets[1] - 1, 1, edgeOffsets[1]});
+        PartitionQuad(partition.triVert, partition.vertBary,
+                      {edgeOffsets[1] - 1, edgeOffsets[1], 2, 0},
+                      {-1, edgeOffsets[1] + 1, edgeOffsets[2], edgeOffsets[0]},
+                      {0, n[1] - 2, n[2] - 1, n[0] - 2},
+                      {true, true, true, true});
+      } else {  // obtuse -> spit into two acute
+        // portion of n[0] under n[2]
+        const int ns =
+            std::min(n[0] - 2, (int)std::round((f - n[1] * n[1]) / (2 * n[0])));
+        // height from n[0]: nh <= n[2]
+        const int nh =
+            std::max(1., std::round(std::sqrt(n[2] * n[2] - ns * ns)));
+
+        const int hOffset = partition.vertBary.size();
+        const vec4 middleBary = partition.vertBary[edgeOffsets[0] + ns - 1];
+        for (int j = 1; j < nh; ++j) {
+          partition.vertBary.push_back(
+              la::lerp(partition.vertBary[2], middleBary, (double)j / nh));
+        }
+
+        partition.triVert.push_back({edgeOffsets[1] - 1, 1, edgeOffsets[1]});
+        PartitionQuad(
+            partition.triVert, partition.vertBary,
+            {edgeOffsets[1] - 1, edgeOffsets[1], 2, edgeOffsets[0] + ns - 1},
+            {-1, edgeOffsets[1] + 1, hOffset, edgeOffsets[0] + ns},
+            {0, n[1] - 2, nh - 1, n[0] - ns - 2}, {true, true, true, true});
+
+        if (n[2] == 1) {
+          PartitionFan(partition.triVert, {0, edgeOffsets[0] + ns - 1, 2},
+                       ns - 1, edgeOffsets[0]);
+        } else {
+          if (ns == 1) {
+            partition.triVert.push_back({hOffset, 2, edgeOffsets[2]});
+            PartitionQuad(partition.triVert, partition.vertBary,
+                          {hOffset, edgeOffsets[2], 0, edgeOffsets[0]},
+                          {-1, edgeOffsets[2] + 1, -1, hOffset + nh - 2},
+                          {0, n[2] - 2, ns - 1, nh - 2},
+                          {true, true, true, false});
+          } else {
+            partition.triVert.push_back({hOffset - 1, 0, edgeOffsets[0]});
+            PartitionQuad(
+                partition.triVert, partition.vertBary,
+                {hOffset - 1, edgeOffsets[0], edgeOffsets[0] + ns - 1, 2},
+                {-1, edgeOffsets[0] + 1, hOffset + nh - 2, edgeOffsets[2]},
+                {0, ns - 2, nh - 1, n[2] - 2}, {true, true, false, true});
+          }
+        }
+      }
+    }
+
+    auto lockGuard = std::lock_guard<std::mutex>(cacheLock);
+    cache.insert({n, std::make_unique<Partition>(partition)});
+    return partition;
+  }
+
+  // Side 0 has added edges while sides 1 and 2 do not. Fan spreads from vert 2.
+  static void PartitionFan(Vec<ivec3>& triVert, ivec3 cornerVerts, int added,
+                           int edgeOffset) {
+    int last = cornerVerts[0];
+    for (int i = 0; i < added; ++i) {
+      const int next = edgeOffset + i;
+      triVert.push_back({last, next, cornerVerts[2]});
+      last = next;
+    }
+    triVert.push_back({last, cornerVerts[1], cornerVerts[2]});
+  }
+
+  // Partitions are parallel to the first edge unless two consecutive edgeAdded
+  // are zero, in which case a terminal triangulation is performed.
+  static void PartitionQuad(Vec<ivec3>& triVert, Vec<vec4>& vertBary,
+                            ivec4 cornerVerts, ivec4 edgeOffsets,
+                            ivec4 edgeAdded, bvec4 edgeFwd) {
+    auto GetEdgeVert = [&](int edge, int idx) {
+      return edgeOffsets[edge] + (edgeFwd[edge] ? 1 : -1) * idx;
+    };
+
+    DEBUG_ASSERT(la::all(la::gequal(edgeAdded, ivec4(0))), logicErr,
+                 "negative divisions!");
+
+    int corner = -1;
+    int last = 3;
+    int maxEdge = -1;
+    for (const int i : {0, 1, 2, 3}) {
+      if (corner == -1 && edgeAdded[i] == 0 && edgeAdded[last] == 0) {
+        corner = i;
+      }
+      if (edgeAdded[i] > 0) {
+        maxEdge = maxEdge == -1 ? i : -2;
+      }
+      last = i;
+    }
+    if (corner >= 0) {  // terminate
+      if (maxEdge >= 0) {
+        ivec4 edge = (ivec4(0, 1, 2, 3) + maxEdge) % 4;
+        const int middle = edgeAdded[maxEdge] / 2;
+        triVert.push_back({cornerVerts[edge[2]], cornerVerts[edge[3]],
+                           GetEdgeVert(maxEdge, middle)});
+        int last = cornerVerts[edge[0]];
+        for (int i = 0; i <= middle; ++i) {
+          const int next = GetEdgeVert(maxEdge, i);
+          triVert.push_back({cornerVerts[edge[3]], last, next});
+          last = next;
+        }
+        last = cornerVerts[edge[1]];
+        for (int i = edgeAdded[maxEdge] - 1; i >= middle; --i) {
+          const int next = GetEdgeVert(maxEdge, i);
+          triVert.push_back({cornerVerts[edge[2]], next, last});
+          last = next;
+        }
+      } else {
+        int sideVert = cornerVerts[0];  // initial value is unused
+        for (const int j : {1, 2}) {
+          const int side = (corner + j) % 4;
+          if (j == 2 && edgeAdded[side] > 0) {
+            triVert.push_back(
+                {cornerVerts[side], GetEdgeVert(side, 0), sideVert});
+          } else {
+            sideVert = cornerVerts[side];
+          }
+          for (int i = 0; i < edgeAdded[side]; ++i) {
+            const int nextVert = GetEdgeVert(side, i);
+            triVert.push_back({cornerVerts[corner], sideVert, nextVert});
+            sideVert = nextVert;
+          }
+          if (j == 2 || edgeAdded[side] == 0) {
+            triVert.push_back({cornerVerts[corner], sideVert,
+                               cornerVerts[(corner + j + 1) % 4]});
+          }
+        }
+      }
+      return;
+    }
+    // recursively partition
+    const int partitions = 1 + std::min(edgeAdded[1], edgeAdded[3]);
+    ivec4 newCornerVerts = {cornerVerts[1], -1, -1, cornerVerts[0]};
+    ivec4 newEdgeOffsets = {edgeOffsets[1], -1,
+                            GetEdgeVert(3, edgeAdded[3] + 1), edgeOffsets[0]};
+    ivec4 newEdgeAdded = {0, -1, 0, edgeAdded[0]};
+    bvec4 newEdgeFwd = {edgeFwd[1], true, edgeFwd[3], edgeFwd[0]};
+
+    for (int i = 1; i < partitions; ++i) {
+      const int cornerOffset1 = (edgeAdded[1] * i) / partitions;
+      const int cornerOffset3 =
+          edgeAdded[3] - 1 - (edgeAdded[3] * i) / partitions;
+      const int nextOffset1 = GetEdgeVert(1, cornerOffset1 + 1);
+      const int nextOffset3 = GetEdgeVert(3, cornerOffset3 + 1);
+      const int added = std::round(la::lerp(
+          (double)edgeAdded[0], (double)edgeAdded[2], (double)i / partitions));
+
+      newCornerVerts[1] = GetEdgeVert(1, cornerOffset1);
+      newCornerVerts[2] = GetEdgeVert(3, cornerOffset3);
+      newEdgeAdded[0] = std::abs(nextOffset1 - newEdgeOffsets[0]) - 1;
+      newEdgeAdded[1] = added;
+      newEdgeAdded[2] = std::abs(nextOffset3 - newEdgeOffsets[2]) - 1;
+      newEdgeOffsets[1] = vertBary.size();
+      newEdgeOffsets[2] = nextOffset3;
+
+      for (int j = 0; j < added; ++j) {
+        vertBary.push_back(la::lerp(vertBary[newCornerVerts[1]],
+                                    vertBary[newCornerVerts[2]],
+                                    (j + 1.0) / (added + 1.0)));
+      }
+
+      PartitionQuad(triVert, vertBary, newCornerVerts, newEdgeOffsets,
+                    newEdgeAdded, newEdgeFwd);
+
+      newCornerVerts[0] = newCornerVerts[1];
+      newCornerVerts[3] = newCornerVerts[2];
+      newEdgeAdded[3] = newEdgeAdded[1];
+      newEdgeOffsets[0] = nextOffset1;
+      newEdgeOffsets[3] = newEdgeOffsets[1] + newEdgeAdded[1] - 1;
+      newEdgeFwd[3] = false;
+    }
+
+    newCornerVerts[1] = cornerVerts[2];
+    newCornerVerts[2] = cornerVerts[3];
+    newEdgeOffsets[1] = edgeOffsets[2];
+    newEdgeAdded[0] =
+        edgeAdded[1] - std::abs(newEdgeOffsets[0] - edgeOffsets[1]);
+    newEdgeAdded[1] = edgeAdded[2];
+    newEdgeAdded[2] = std::abs(newEdgeOffsets[2] - edgeOffsets[3]) - 1;
+    newEdgeOffsets[2] = edgeOffsets[3];
+    newEdgeFwd[1] = edgeFwd[2];
+
+    PartitionQuad(triVert, vertBary, newCornerVerts, newEdgeOffsets,
+                  newEdgeAdded, newEdgeFwd);
+  }
+};
+}  // namespace
+
+namespace manifold {
+
+/**
+ * Returns the tri side index (0-2) connected to the other side of this quad if
+ * this tri is part of a quad, or -1 otherwise.
+ */
+int Manifold::Impl::GetNeighbor(int tri) const {
+  int neighbor = -1;
+  for (const int i : {0, 1, 2}) {
+    if (IsMarkedInsideQuad(3 * tri + i)) {
+      neighbor = neighbor == -1 ? i : -2;
+    }
+  }
+  return neighbor;
+}
+
+/**
+ * For the given triangle index, returns either the three halfedge indices of
+ * that triangle and halfedges[3] = -1, or if the triangle is part of a quad, it
+ * returns those four indices. If the triangle is part of a quad and is not the
+ * lower of the two triangle indices, it returns all -1s.
+ */
+ivec4 Manifold::Impl::GetHalfedges(int tri) const {
+  ivec4 halfedges(-1);
+  for (const int i : {0, 1, 2}) {
+    halfedges[i] = 3 * tri + i;
+  }
+  const int neighbor = GetNeighbor(tri);
+  if (neighbor >= 0) {  // quad
+    const int pair = halfedge_[3 * tri + neighbor].pairedHalfedge;
+    if (pair / 3 < tri) {
+      return ivec4(-1);  // only process lower tri index
+    }
+    // The order here matters to keep small quads split the way they started, or
+    // else it can create a 4-manifold edge.
+    halfedges[2] = NextHalfedge(halfedges[neighbor]);
+    halfedges[3] = NextHalfedge(halfedges[2]);
+    halfedges[0] = NextHalfedge(pair);
+    halfedges[1] = NextHalfedge(halfedges[0]);
+  }
+  return halfedges;
+}
+
+/**
+ * Returns the BaryIndices, which gives the tri and indices (0-3), such that
+ * GetHalfedges(val.tri)[val.start4] points back to this halfedge, and val.end4
+ * will point to the next one. This function handles this for both triangles and
+ * quads. Returns {-1, -1, -1} if the edge is the interior of a quad.
+ */
+Manifold::Impl::BaryIndices Manifold::Impl::GetIndices(int halfedge) const {
+  int tri = halfedge / 3;
+  int idx = halfedge % 3;
+  const int neighbor = GetNeighbor(tri);
+  if (idx == neighbor) {
+    return {-1, -1, -1};
+  }
+
+  if (neighbor < 0) {  // tri
+    return {tri, idx, Next3(idx)};
+  } else {  // quad
+    const int pair = halfedge_[3 * tri + neighbor].pairedHalfedge;
+    if (pair / 3 < tri) {
+      tri = pair / 3;
+      idx = Next3(neighbor) == idx ? 0 : 1;
+    } else {
+      idx = Next3(neighbor) == idx ? 2 : 3;
+    }
+    return {tri, idx, (idx + 1) % 4};
+  }
+}
+
+/**
+ * Retained verts are part of several triangles, and it doesn't matter which one
+ * the vertBary refers to. Here, whichever is last will win and it's done on the
+ * CPU for simplicity for now. Using AtomicCAS on .tri should work for a GPU
+ * version if desired.
+ */
+void Manifold::Impl::FillRetainedVerts(Vec<Barycentric>& vertBary) const {
+  const int numTri = halfedge_.size() / 3;
+  for (int tri = 0; tri < numTri; ++tri) {
+    for (const int i : {0, 1, 2}) {
+      const BaryIndices indices = GetIndices(3 * tri + i);
+      if (indices.start4 < 0) continue;  // skip quad interiors
+      vec4 uvw(0.0);
+      uvw[indices.start4] = 1;
+      vertBary[halfedge_[3 * tri + i].startVert] = {indices.tri, uvw};
+    }
+  }
+}
+
+/**
+ * Split each edge into n pieces as defined by calling the edgeDivisions
+ * function, and sub-triangulate each triangle accordingly. This function
+ * doesn't run Finish(), as that is expensive and it'll need to be run after
+ * the new vertices have moved, which is a likely scenario after refinement
+ * (smoothing).
+ */
+Vec<Barycentric> Manifold::Impl::Subdivide(
+    std::function<int(vec3, vec4, vec4)> edgeDivisions, bool keepInterior) {
+  Vec<TmpEdge> edges = CreateTmpEdges(halfedge_);
+  const int numVert = NumVert();
+  const int numEdge = edges.size();
+  const int numTri = NumTri();
+  Vec<int> half2Edge(2 * numEdge);
+  auto policy = autoPolicy(numEdge, 1e4);
+  for_each_n(policy, countAt(0), numEdge,
+             [&half2Edge, &edges, this](const int edge) {
+               const int idx = edges[edge].halfedgeIdx;
+               half2Edge[idx] = edge;
+               half2Edge[halfedge_[idx].pairedHalfedge] = edge;
+             });
+
+  Vec<ivec4> faceHalfedges(numTri);
+  for_each_n(policy, countAt(0), numTri, [&faceHalfedges, this](const int tri) {
+    faceHalfedges[tri] = GetHalfedges(tri);
+  });
+
+  Vec<int> edgeAdded(numEdge);
+  for_each_n(policy, countAt(0), numEdge,
+             [&edgeAdded, &edges, edgeDivisions, this](const int i) {
+               const TmpEdge edge = edges[i];
+               const int hIdx = edge.halfedgeIdx;
+               if (IsMarkedInsideQuad(hIdx)) {
+                 edgeAdded[i] = 0;
+                 return;
+               }
+               const vec3 vec = vertPos_[edge.first] - vertPos_[edge.second];
+               const vec4 tangent0 = halfedgeTangent_.empty()
+                                         ? vec4(0.0)
+                                         : halfedgeTangent_[hIdx];
+               const vec4 tangent1 =
+                   halfedgeTangent_.empty()
+                       ? vec4(0.0)
+                       : halfedgeTangent_[halfedge_[hIdx].pairedHalfedge];
+               edgeAdded[i] = edgeDivisions(vec, tangent0, tangent1);
+             });
+
+  if (keepInterior) {
+    // Triangles where the greatest number of divisions exceeds the sum of the
+    // other two sides will be triangulated as a strip, since if the sub-edges
+    // were all equal length it would be degenerate. This leads to poor results
+    // with RefineToTolerance, so we avoid this case by adding some extra
+    // divisions to the short sides so that the triangulation has some thickness
+    // and creates more interior facets.
+    Vec<int> tmp(numEdge);
+    for_each_n(
+        policy, countAt(0), numEdge,
+        [&tmp, &edgeAdded, &edges, &half2Edge, this](const int i) {
+          tmp[i] = edgeAdded[i];
+          const TmpEdge edge = edges[i];
+          int hIdx = edge.halfedgeIdx;
+          if (IsMarkedInsideQuad(hIdx)) return;
+
+          const int thisAdded = tmp[i];
+          auto Added = [&edgeAdded, &half2Edge, thisAdded, this](int hIdx) {
+            int longest = 0;
+            int total = 0;
+            for (int j : {0, 1, 2}) {
+              const int added = edgeAdded[half2Edge[hIdx]];
+              longest = la::max(longest, added);
+              total += added;
+              hIdx = NextHalfedge(hIdx);
+              if (IsMarkedInsideQuad(hIdx)) {
+                // No extra on quads
+                longest = 0;
+                total = 1;
+                break;
+              }
+            }
+            const int minExtra = longest * 0.2 + 1;
+            const int extra = 2 * longest + minExtra - total;
+            return extra > 0 ? (extra * (longest - thisAdded)) / longest : 0;
+          };
+
+          tmp[i] += la::max(Added(hIdx), Added(halfedge_[hIdx].pairedHalfedge));
+        });
+    edgeAdded.swap(tmp);
+  }
+
+  Vec<int> edgeOffset(numEdge);
+  exclusive_scan(edgeAdded.begin(), edgeAdded.end(), edgeOffset.begin(),
+                 numVert);
+
+  Vec<Barycentric> vertBary(edgeOffset.back() + edgeAdded.back());
+  const int totalEdgeAdded = vertBary.size() - numVert;
+  FillRetainedVerts(vertBary);
+  for_each_n(policy, countAt(0), numEdge,
+             [&vertBary, &edges, &edgeAdded, &edgeOffset, this](const int i) {
+               const int n = edgeAdded[i];
+               const int offset = edgeOffset[i];
+
+               const BaryIndices indices = GetIndices(edges[i].halfedgeIdx);
+               if (indices.tri < 0) {
+                 return;  // inside quad
+               }
+               const double frac = 1.0 / (n + 1);
+
+               for (int i = 0; i < n; ++i) {
+                 vec4 uvw(0.0);
+                 uvw[indices.end4] = (i + 1) * frac;
+                 uvw[indices.start4] = 1 - uvw[indices.end4];
+                 vertBary[offset + i].uvw = uvw;
+                 vertBary[offset + i].tri = indices.tri;
+               }
+             });
+
+  std::vector<Partition> subTris(numTri);
+  for_each_n(policy, countAt(0), numTri,
+             [this, &subTris, &half2Edge, &edgeAdded, &faceHalfedges](int tri) {
+               const ivec4 halfedges = faceHalfedges[tri];
+               ivec4 divisions(0);
+               for (const int i : {0, 1, 2, 3}) {
+                 if (halfedges[i] >= 0) {
+                   divisions[i] = edgeAdded[half2Edge[halfedges[i]]] + 1;
+                 }
+               }
+               subTris[tri] = Partition::GetPartition(divisions);
+             });
+
+  Vec<int> triOffset(numTri);
+  auto numSubTris =
+      TransformIterator(subTris.begin(), [](const Partition& part) {
+        return static_cast<int>(part.triVert.size());
+      });
+  manifold::exclusive_scan(numSubTris, numSubTris + numTri, triOffset.begin(),
+                           0);
+
+  Vec<int> interiorOffset(numTri);
+  auto numInterior =
+      TransformIterator(subTris.begin(), [](const Partition& part) {
+        return static_cast<int>(part.NumInterior());
+      });
+  manifold::exclusive_scan(numInterior, numInterior + numTri,
+                           interiorOffset.begin(),
+                           static_cast<int>(vertBary.size()));
+
+  Vec<ivec3> triVerts(triOffset.back() + subTris.back().triVert.size());
+  vertBary.resize(interiorOffset.back() + subTris.back().NumInterior());
+  Vec<TriRef> triRef(triVerts.size());
+  for_each_n(
+      policy, countAt(0), numTri,
+      [this, &triVerts, &triRef, &vertBary, &subTris, &edgeOffset, &half2Edge,
+       &triOffset, &interiorOffset, &faceHalfedges](int tri) {
+        const ivec4 halfedges = faceHalfedges[tri];
+        if (halfedges[0] < 0) return;
+        ivec4 tri3;
+        ivec4 edgeOffsets;
+        bvec4 edgeFwd(false);
+        for (const int i : {0, 1, 2, 3}) {
+          if (halfedges[i] < 0) {
+            tri3[i] = -1;
+            continue;
+          }
+          const Halfedge& halfedge = halfedge_[halfedges[i]];
+          tri3[i] = halfedge.startVert;
+          edgeOffsets[i] = edgeOffset[half2Edge[halfedges[i]]];
+          edgeFwd[i] = halfedge.IsForward();
+        }
+
+        Vec<ivec3> newTris = subTris[tri].Reindex(tri3, edgeOffsets, edgeFwd,
+                                                  interiorOffset[tri]);
+        copy(newTris.begin(), newTris.end(), triVerts.begin() + triOffset[tri]);
+        auto start = triRef.begin() + triOffset[tri];
+        fill(start, start + newTris.size(), meshRelation_.triRef[tri]);
+
+        const ivec4 idx = subTris[tri].idx;
+        const ivec4 vIdx = halfedges[3] >= 0 || idx[1] == Next3(idx[0])
+                               ? idx
+                               : ivec4(idx[2], idx[0], idx[1], idx[3]);
+        ivec4 rIdx;
+        for (const int i : {0, 1, 2, 3}) {
+          rIdx[vIdx[i]] = i;
+        }
+
+        const auto& subBary = subTris[tri].vertBary;
+        transform(subBary.begin() + subTris[tri].InteriorOffset(),
+                  subBary.end(), vertBary.begin() + interiorOffset[tri],
+                  [tri, rIdx](vec4 bary) {
+                    return Barycentric({tri,
+                                        {bary[rIdx[0]], bary[rIdx[1]],
+                                         bary[rIdx[2]], bary[rIdx[3]]}});
+                  });
+      });
+  meshRelation_.triRef = triRef;
+
+  Vec<vec3> newVertPos(vertBary.size());
+  for_each_n(policy, countAt(0), vertBary.size(),
+             [&newVertPos, &vertBary, &faceHalfedges, this](const int vert) {
+               const Barycentric bary = vertBary[vert];
+               const ivec4 halfedges = faceHalfedges[bary.tri];
+               if (halfedges[3] < 0) {
+                 mat3 triPos;
+                 for (const int i : {0, 1, 2}) {
+                   triPos[i] = vertPos_[halfedge_[halfedges[i]].startVert];
+                 }
+                 newVertPos[vert] = triPos * vec3(bary.uvw);
+               } else {
+                 mat3x4 quadPos;
+                 for (const int i : {0, 1, 2, 3}) {
+                   quadPos[i] = vertPos_[halfedge_[halfedges[i]].startVert];
+                 }
+                 newVertPos[vert] = quadPos * bary.uvw;
+               }
+             });
+  vertPos_ = newVertPos;
+
+  faceNormal_.resize(0);
+
+  if (meshRelation_.numProp > 0) {
+    const int numPropVert = NumPropVert();
+    const int addedVerts = NumVert() - numVert;
+    const int propOffset = numPropVert - numVert;
+    Vec<double> prop(meshRelation_.numProp *
+                     (numPropVert + addedVerts + totalEdgeAdded));
+
+    // copy retained prop verts
+    copy(meshRelation_.properties.begin(), meshRelation_.properties.end(),
+         prop.begin());
+
+    // copy interior prop verts and forward edge prop verts
+    for_each_n(
+        policy, countAt(0), addedVerts,
+        [&prop, &vertBary, &faceHalfedges, numVert, numPropVert,
+         this](const int i) {
+          const int vert = numPropVert + i;
+          const Barycentric bary = vertBary[numVert + i];
+          const ivec4 halfedges = faceHalfedges[bary.tri];
+          auto& rel = meshRelation_;
+
+          for (int p = 0; p < rel.numProp; ++p) {
+            if (halfedges[3] < 0) {
+              vec3 triProp;
+              for (const int i : {0, 1, 2}) {
+                triProp[i] = rel.properties[rel.triProperties[bary.tri][i] *
+                                                rel.numProp +
+                                            p];
+              }
+              prop[vert * rel.numProp + p] = la::dot(triProp, vec3(bary.uvw));
+            } else {
+              vec4 quadProp;
+              for (const int i : {0, 1, 2, 3}) {
+                const int tri = halfedges[i] / 3;
+                const int j = halfedges[i] % 3;
+                quadProp[i] =
+                    rel.properties[rel.triProperties[tri][j] * rel.numProp + p];
+              }
+              prop[vert * rel.numProp + p] = la::dot(quadProp, bary.uvw);
+            }
+          }
+        });
+
+    // copy backward edge prop verts
+    for_each_n(policy, countAt(0), numEdge,
+               [this, &prop, &edges, &edgeAdded, &edgeOffset, propOffset,
+                addedVerts](const int i) {
+                 const int n = edgeAdded[i];
+                 const int offset = edgeOffset[i] + propOffset + addedVerts;
+                 auto& rel = meshRelation_;
+
+                 const double frac = 1.0 / (n + 1);
+                 const int halfedgeIdx =
+                     halfedge_[edges[i].halfedgeIdx].pairedHalfedge;
+                 const int v0 = halfedgeIdx % 3;
+                 const int tri = halfedgeIdx / 3;
+                 const int prop0 = rel.triProperties[tri][v0];
+                 const int prop1 = rel.triProperties[tri][Next3(v0)];
+                 for (int i = 0; i < n; ++i) {
+                   for (int p = 0; p < rel.numProp; ++p) {
+                     prop[(offset + i) * rel.numProp + p] =
+                         la::lerp(rel.properties[prop0 * rel.numProp + p],
+                                  rel.properties[prop1 * rel.numProp + p],
+                                  (i + 1) * frac);
+                   }
+                 }
+               });
+
+    Vec<ivec3> triProp(triVerts.size());
+    for_each_n(policy, countAt(0), numTri,
+               [this, &triProp, &subTris, &edgeOffset, &half2Edge, &triOffset,
+                &interiorOffset, &faceHalfedges, propOffset,
+                addedVerts](const int tri) {
+                 const ivec4 halfedges = faceHalfedges[tri];
+                 if (halfedges[0] < 0) return;
+
+                 auto& rel = meshRelation_;
+                 ivec4 tri3;
+                 ivec4 edgeOffsets;
+                 bvec4 edgeFwd(true);
+                 for (const int i : {0, 1, 2, 3}) {
+                   if (halfedges[i] < 0) {
+                     tri3[i] = -1;
+                     continue;
+                   }
+                   const int thisTri = halfedges[i] / 3;
+                   const int j = halfedges[i] % 3;
+                   const Halfedge& halfedge = halfedge_[halfedges[i]];
+                   tri3[i] = rel.triProperties[thisTri][j];
+                   edgeOffsets[i] = edgeOffset[half2Edge[halfedges[i]]];
+                   if (!halfedge.IsForward()) {
+                     const int pairTri = halfedge.pairedHalfedge / 3;
+                     const int k = halfedge.pairedHalfedge % 3;
+                     if (rel.triProperties[pairTri][k] !=
+                             rel.triProperties[thisTri][Next3(j)] ||
+                         rel.triProperties[pairTri][Next3(k)] !=
+                             rel.triProperties[thisTri][j]) {
+                       edgeOffsets[i] += addedVerts;
+                     } else {
+                       edgeFwd[i] = false;
+                     }
+                   }
+                 }
+
+                 Vec<ivec3> newTris = subTris[tri].Reindex(
+                     tri3, edgeOffsets + propOffset, edgeFwd,
+                     interiorOffset[tri] + propOffset);
+                 copy(newTris.begin(), newTris.end(),
+                      triProp.begin() + triOffset[tri]);
+               });
+
+    meshRelation_.properties = prop;
+    meshRelation_.triProperties = triProp;
+  }
+
+  CreateHalfedges(triVerts);
+
+  return vertBary;
+}
+
+}  // namespace manifold

+ 308 - 0
thirdparty/manifold/src/svd.h

@@ -0,0 +1,308 @@
+// MIT License
+
+// Copyright (c) 2019 wi-re
+// Copyright 2023 The Manifold Authors.
+
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+
+// Modified from https://github.com/wi-re/tbtSVD, removing CUDA dependence and
+// approximate inverse square roots.
+
+#include <cmath>
+
+#include "manifold/common.h"
+
+namespace {
+using manifold::mat3;
+using manifold::vec3;
+using manifold::vec4;
+
+// Constants used for calculation of Givens quaternions
+inline constexpr double _gamma = 5.82842712474619;    // sqrt(8)+3;
+inline constexpr double _cStar = 0.9238795325112867;  // cos(pi/8)
+inline constexpr double _sStar = 0.3826834323650898;  // sin(pi/8)
+// Threshold value
+inline constexpr double _SVD_EPSILON = 1e-6;
+// Iteration counts for Jacobi Eigen Analysis, influences precision
+inline constexpr int JACOBI_STEPS = 12;
+
+// Helper function used to swap X with Y and Y with  X if c == true
+inline void CondSwap(bool c, double& X, double& Y) {
+  double Z = X;
+  X = c ? Y : X;
+  Y = c ? Z : Y;
+}
+// Helper function used to swap X with Y and Y with -X if c == true
+inline void CondNegSwap(bool c, double& X, double& Y) {
+  double Z = -X;
+  X = c ? Y : X;
+  Y = c ? Z : Y;
+}
+// A simple symmetric 3x3 Matrix class (contains no storage for (0, 1) (0, 2)
+// and (1, 2)
+struct Symmetric3x3 {
+  double m_00 = 1.0;
+  double m_10 = 0.0, m_11 = 1.0;
+  double m_20 = 0.0, m_21 = 0.0, m_22 = 1.0;
+
+  Symmetric3x3(double a11 = 1.0, double a21 = 0.0, double a22 = 1.0,
+               double a31 = 0.0, double a32 = 0.0, double a33 = 1.0)
+      : m_00(a11), m_10(a21), m_11(a22), m_20(a31), m_21(a32), m_22(a33) {}
+  Symmetric3x3(mat3 o)
+      : m_00(o[0][0]),
+        m_10(o[0][1]),
+        m_11(o[1][1]),
+        m_20(o[0][2]),
+        m_21(o[1][2]),
+        m_22(o[2][2]) {}
+};
+// Helper struct to store 2 doubles to avoid OUT parameters on functions
+struct Givens {
+  double ch = _cStar;
+  double sh = _sStar;
+};
+// Helper struct to store 2 Matrices to avoid OUT parameters on functions
+struct QR {
+  mat3 Q, R;
+};
+// Calculates the squared norm of the vector.
+inline double Dist2(vec3 v) { return la::dot(v, v); }
+// For an explanation of the math see
+// http://pages.cs.wisc.edu/~sifakis/papers/SVD_TR1690.pdf Computing the
+// Singular Value Decomposition of 3 x 3 matrices with minimal branching and
+// elementary floating point operations See Algorithm 2 in reference. Given a
+// matrix A this function returns the Givens quaternion (x and w component, y
+// and z are 0)
+inline Givens ApproximateGivensQuaternion(Symmetric3x3& A) {
+  Givens g{2.0 * (A.m_00 - A.m_11), A.m_10};
+  bool b = _gamma * g.sh * g.sh < g.ch * g.ch;
+  double w = 1.0 / hypot(g.ch, g.sh);
+  if (!std::isfinite(w)) b = 0;
+  return Givens{b ? w * g.ch : _cStar, b ? w * g.sh : _sStar};
+}
+// Function used to apply a Givens rotation S. Calculates the weights and
+// updates the quaternion to contain the cumulative rotation
+inline void JacobiConjugation(const int32_t x, const int32_t y, const int32_t z,
+                              Symmetric3x3& S, vec4& q) {
+  auto g = ApproximateGivensQuaternion(S);
+  double scale = 1.0 / fma(g.ch, g.ch, g.sh * g.sh);
+  double a = fma(g.ch, g.ch, -g.sh * g.sh) * scale;
+  double b = 2.0 * g.sh * g.ch * scale;
+  Symmetric3x3 _S = S;
+  // perform conjugation S = Q'*S*Q
+  S.m_00 =
+      fma(a, fma(a, _S.m_00, b * _S.m_10), b * (fma(a, _S.m_10, b * _S.m_11)));
+  S.m_10 = fma(a, fma(-b, _S.m_00, a * _S.m_10),
+               b * (fma(-b, _S.m_10, a * _S.m_11)));
+  S.m_11 = fma(-b, fma(-b, _S.m_00, a * _S.m_10),
+               a * (fma(-b, _S.m_10, a * _S.m_11)));
+  S.m_20 = fma(a, _S.m_20, b * _S.m_21);
+  S.m_21 = fma(-b, _S.m_20, a * _S.m_21);
+  S.m_22 = _S.m_22;
+  // update cumulative rotation qV
+  vec3 tmp = g.sh * vec3(q);
+  g.sh *= q[3];
+  // (x,y,z) corresponds to ((0,1,2),(1,2,0),(2,0,1)) for (p,q) =
+  // ((0,1),(1,2),(0,2))
+  q[z] = fma(q[z], g.ch, g.sh);
+  q[3] = fma(q[3], g.ch, -tmp[z]);  // w
+  q[x] = fma(q[x], g.ch, tmp[y]);
+  q[y] = fma(q[y], g.ch, -tmp[x]);
+  // re-arrange matrix for next iteration
+  _S.m_00 = S.m_11;
+  _S.m_10 = S.m_21;
+  _S.m_11 = S.m_22;
+  _S.m_20 = S.m_10;
+  _S.m_21 = S.m_20;
+  _S.m_22 = S.m_00;
+  S.m_00 = _S.m_00;
+  S.m_10 = _S.m_10;
+  S.m_11 = _S.m_11;
+  S.m_20 = _S.m_20;
+  S.m_21 = _S.m_21;
+  S.m_22 = _S.m_22;
+}
+// Function used to contain the Givens permutations and the loop of the jacobi
+// steps controlled by JACOBI_STEPS Returns the quaternion q containing the
+// cumulative result used to reconstruct S
+inline mat3 JacobiEigenAnalysis(Symmetric3x3 S) {
+  vec4 q(0, 0, 0, 1);
+  for (int32_t i = 0; i < JACOBI_STEPS; i++) {
+    JacobiConjugation(0, 1, 2, S, q);
+    JacobiConjugation(1, 2, 0, S, q);
+    JacobiConjugation(2, 0, 1, S, q);
+  }
+  return mat3({1.0 - 2.0 * (fma(q.y, q.y, q.z * q.z)),  //
+               2.0 * fma(q.x, q.y, +q.w * q.z),         //
+               2.0 * fma(q.x, q.z, -q.w * q.y)},        //
+              {2 * fma(q.x, q.y, -q.w * q.z),           //
+               1 - 2 * fma(q.x, q.x, q.z * q.z),        //
+               2 * fma(q.y, q.z, q.w * q.x)},           //
+              {2 * fma(q.x, q.z, q.w * q.y),            //
+               2 * fma(q.y, q.z, -q.w * q.x),           //
+               1 - 2 * fma(q.x, q.x, q.y * q.y)});
+}
+// Implementation of Algorithm 3
+inline void SortSingularValues(mat3& B, mat3& V) {
+  double rho1 = Dist2(B[0]);
+  double rho2 = Dist2(B[1]);
+  double rho3 = Dist2(B[2]);
+  bool c;
+  c = rho1 < rho2;
+  CondNegSwap(c, B[0][0], B[1][0]);
+  CondNegSwap(c, V[0][0], V[1][0]);
+  CondNegSwap(c, B[0][1], B[1][1]);
+  CondNegSwap(c, V[0][1], V[1][1]);
+  CondNegSwap(c, B[0][2], B[1][2]);
+  CondNegSwap(c, V[0][2], V[1][2]);
+  CondSwap(c, rho1, rho2);
+  c = rho1 < rho3;
+  CondNegSwap(c, B[0][0], B[2][0]);
+  CondNegSwap(c, V[0][0], V[2][0]);
+  CondNegSwap(c, B[0][1], B[2][1]);
+  CondNegSwap(c, V[0][1], V[2][1]);
+  CondNegSwap(c, B[0][2], B[2][2]);
+  CondNegSwap(c, V[0][2], V[2][2]);
+  CondSwap(c, rho1, rho3);
+  c = rho2 < rho3;
+  CondNegSwap(c, B[1][0], B[2][0]);
+  CondNegSwap(c, V[1][0], V[2][0]);
+  CondNegSwap(c, B[1][1], B[2][1]);
+  CondNegSwap(c, V[1][1], V[2][1]);
+  CondNegSwap(c, B[1][2], B[2][2]);
+  CondNegSwap(c, V[1][2], V[2][2]);
+}
+// Implementation of Algorithm 4
+inline Givens QRGivensQuaternion(double a1, double a2) {
+  // a1 = pivot point on diagonal
+  // a2 = lower triangular entry we want to annihilate
+  double epsilon = _SVD_EPSILON;
+  double rho = hypot(a1, a2);
+  Givens g{fabs(a1) + fmax(rho, epsilon), rho > epsilon ? a2 : 0};
+  bool b = a1 < 0.0;
+  CondSwap(b, g.sh, g.ch);
+  double w = 1.0 / hypot(g.ch, g.sh);
+  g.ch *= w;
+  g.sh *= w;
+  return g;
+}
+// Implements a QR decomposition of a Matrix, see Sec 4.2
+inline QR QRDecomposition(mat3& B) {
+  mat3 Q, R;
+  // first Givens rotation (ch,0,0,sh)
+  auto g1 = QRGivensQuaternion(B[0][0], B[0][1]);
+  auto a = fma(-2.0, g1.sh * g1.sh, 1.0);
+  auto b = 2.0 * g1.ch * g1.sh;
+  // apply B = Q' * B
+  R[0][0] = fma(a, B[0][0], b * B[0][1]);
+  R[1][0] = fma(a, B[1][0], b * B[1][1]);
+  R[2][0] = fma(a, B[2][0], b * B[2][1]);
+  R[0][1] = fma(-b, B[0][0], a * B[0][1]);
+  R[1][1] = fma(-b, B[1][0], a * B[1][1]);
+  R[2][1] = fma(-b, B[2][0], a * B[2][1]);
+  R[0][2] = B[0][2];
+  R[1][2] = B[1][2];
+  R[2][2] = B[2][2];
+  // second Givens rotation (ch,0,-sh,0)
+  auto g2 = QRGivensQuaternion(R[0][0], R[0][2]);
+  a = fma(-2.0, g2.sh * g2.sh, 1.0);
+  b = 2.0 * g2.ch * g2.sh;
+  // apply B = Q' * B;
+  B[0][0] = fma(a, R[0][0], b * R[0][2]);
+  B[1][0] = fma(a, R[1][0], b * R[1][2]);
+  B[2][0] = fma(a, R[2][0], b * R[2][2]);
+  B[0][1] = R[0][1];
+  B[1][1] = R[1][1];
+  B[2][1] = R[2][1];
+  B[0][2] = fma(-b, R[0][0], a * R[0][2]);
+  B[1][2] = fma(-b, R[1][0], a * R[1][2]);
+  B[2][2] = fma(-b, R[2][0], a * R[2][2]);
+  // third Givens rotation (ch,sh,0,0)
+  auto g3 = QRGivensQuaternion(B[1][1], B[1][2]);
+  a = fma(-2.0, g3.sh * g3.sh, 1.0);
+  b = 2.0 * g3.ch * g3.sh;
+  // R is now set to desired value
+  R[0][0] = B[0][0];
+  R[1][0] = B[1][0];
+  R[2][0] = B[2][0];
+  R[0][1] = fma(a, B[0][1], b * B[0][2]);
+  R[1][1] = fma(a, B[1][1], b * B[1][2]);
+  R[2][1] = fma(a, B[2][1], b * B[2][2]);
+  R[0][2] = fma(-b, B[0][1], a * B[0][2]);
+  R[1][2] = fma(-b, B[1][1], a * B[1][2]);
+  R[2][2] = fma(-b, B[2][1], a * B[2][2]);
+  // construct the cumulative rotation Q=Q1 * Q2 * Q3
+  // the number of floating point operations for three quaternion
+  // multiplications is more or less comparable to the explicit form of the
+  // joined matrix. certainly more memory-efficient!
+  auto sh12 = 2.0 * fma(g1.sh, g1.sh, -0.5);
+  auto sh22 = 2.0 * fma(g2.sh, g2.sh, -0.5);
+  auto sh32 = 2.0 * fma(g3.sh, g3.sh, -0.5);
+  Q[0][0] = sh12 * sh22;
+  Q[1][0] = fma(4.0 * g2.ch * g3.ch, sh12 * g2.sh * g3.sh,
+                2.0 * g1.ch * g1.sh * sh32);
+  Q[2][0] = fma(4.0 * g1.ch * g3.ch, g1.sh * g3.sh,
+                -2.0 * g2.ch * sh12 * g2.sh * sh32);
+
+  Q[0][1] = -2.0 * g1.ch * g1.sh * sh22;
+  Q[1][1] =
+      fma(-8.0 * g1.ch * g2.ch * g3.ch, g1.sh * g2.sh * g3.sh, sh12 * sh32);
+  Q[2][1] = fma(
+      -2.0 * g3.ch, g3.sh,
+      4.0 * g1.sh * fma(g3.ch * g1.sh, g3.sh, g1.ch * g2.ch * g2.sh * sh32));
+
+  Q[0][2] = 2.0 * g2.ch * g2.sh;
+  Q[1][2] = -2.0 * g3.ch * sh22 * g3.sh;
+  Q[2][2] = sh22 * sh32;
+  return QR{Q, R};
+}
+}  // namespace
+
+namespace manifold {
+
+/**
+ * The three matrices of a Singular Value Decomposition.
+ */
+struct SVDSet {
+  mat3 U, S, V;
+};
+
+/**
+ * Returns the Singular Value Decomposition of A: A = U * S * la::transpose(V).
+ *
+ * @param A The matrix to decompose.
+ */
+inline SVDSet SVD(mat3 A) {
+  mat3 V = JacobiEigenAnalysis(la::transpose(A) * A);
+  auto B = A * V;
+  SortSingularValues(B, V);
+  QR qr = QRDecomposition(B);
+  return SVDSet{qr.Q, qr.R, V};
+}
+
+/**
+ * Returns the largest singular value of A.
+ *
+ * @param A The matrix to measure.
+ */
+inline double SpectralNorm(mat3 A) {
+  SVDSet usv = SVD(A);
+  return usv.S[0][0];
+}
+}  // namespace manifold

+ 225 - 0
thirdparty/manifold/src/tri_dist.h

@@ -0,0 +1,225 @@
+// Copyright 2024 The Manifold Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <array>
+
+#include "manifold/common.h"
+
+namespace manifold {
+
+// From NVIDIA-Omniverse PhysX - BSD 3-Clause "New" or "Revised" License
+// https://github.com/NVIDIA-Omniverse/PhysX/blob/main/LICENSE.md
+// https://github.com/NVIDIA-Omniverse/PhysX/blob/main/physx/source/geomutils/src/sweep/GuSweepCapsuleCapsule.cpp
+// With minor modifications
+
+/**
+ * Returns the distance between two line segments.
+ *
+ * @param[out] x Closest point on line segment pa.
+ * @param[out] y Closest point on line segment qb.
+ * @param[in]  p  One endpoint of the first line segment.
+ * @param[in]  a  Other endpoint of the first line segment.
+ * @param[in]  p  One endpoint of the second line segment.
+ * @param[in]  b  Other endpoint of the second line segment.
+ */
+inline void EdgeEdgeDist(vec3& x, vec3& y,  // closest points
+                         const vec3& p,
+                         const vec3& a,  // seg 1 origin, vector
+                         const vec3& q,
+                         const vec3& b)  // seg 2 origin, vector
+{
+  const vec3 T = q - p;
+  const auto ADotA = la::dot(a, a);
+  const auto BDotB = la::dot(b, b);
+  const auto ADotB = la::dot(a, b);
+  const auto ADotT = la::dot(a, T);
+  const auto BDotT = la::dot(b, T);
+
+  // t parameterizes ray (p, a)
+  // u parameterizes ray (q, b)
+
+  // Compute t for the closest point on ray (p, a) to ray (q, b)
+  const auto Denom = ADotA * BDotB - ADotB * ADotB;
+
+  double t;  // We will clamp result so t is on the segment (p, a)
+  t = Denom != 0.0
+          ? la::clamp((ADotT * BDotB - BDotT * ADotB) / Denom, 0.0, 1.0)
+          : 0.0;
+
+  // find u for point on ray (q, b) closest to point at t
+  double u;
+  if (BDotB != 0.0) {
+    u = (t * ADotB - BDotT) / BDotB;
+
+    // if u is on segment (q, b), t and u correspond to closest points,
+    // otherwise, clamp u, recompute and clamp t
+    if (u < 0.0) {
+      u = 0.0;
+      t = ADotA != 0.0 ? la::clamp(ADotT / ADotA, 0.0, 1.0) : 0.0;
+    } else if (u > 1.0) {
+      u = 1.0;
+      t = ADotA != 0.0 ? la::clamp((ADotB + ADotT) / ADotA, 0.0, 1.0) : 0.0;
+    }
+  } else {
+    u = 0.0;
+    t = ADotA != 0.0 ? la::clamp(ADotT / ADotA, 0.0, 1.0) : 0.0;
+  }
+  x = p + a * t;
+  y = q + b * u;
+}
+
+// From NVIDIA-Omniverse PhysX - BSD 3-Clause "New" or "Revised" License
+// https://github.com/NVIDIA-Omniverse/PhysX/blob/main/LICENSE.md
+// https://github.com/NVIDIA-Omniverse/PhysX/blob/main/physx/source/geomutils/src/distance/GuDistanceTriangleTriangle.cpp
+// With minor modifications
+
+/**
+ * Returns the minimum squared distance between two triangles.
+ *
+ * @param  p  First  triangle.
+ * @param  q  Second triangle.
+ */
+inline auto DistanceTriangleTriangleSquared(const std::array<vec3, 3>& p,
+                                            const std::array<vec3, 3>& q) {
+  std::array<vec3, 3> Sv;
+  Sv[0] = p[1] - p[0];
+  Sv[1] = p[2] - p[1];
+  Sv[2] = p[0] - p[2];
+
+  std::array<vec3, 3> Tv;
+  Tv[0] = q[1] - q[0];
+  Tv[1] = q[2] - q[1];
+  Tv[2] = q[0] - q[2];
+
+  bool shown_disjoint = false;
+
+  auto mindd = std::numeric_limits<double>::max();
+
+  for (uint32_t i = 0; i < 3; i++) {
+    for (uint32_t j = 0; j < 3; j++) {
+      vec3 cp;
+      vec3 cq;
+      EdgeEdgeDist(cp, cq, p[i], Sv[i], q[j], Tv[j]);
+      const vec3 V = cq - cp;
+      const auto dd = la::dot(V, V);
+
+      if (dd <= mindd) {
+        mindd = dd;
+
+        uint32_t id = i + 2;
+        if (id >= 3) id -= 3;
+        vec3 Z = p[id] - cp;
+        auto a = la::dot(Z, V);
+        id = j + 2;
+        if (id >= 3) id -= 3;
+        Z = q[id] - cq;
+        auto b = la::dot(Z, V);
+
+        if ((a <= 0.0) && (b >= 0.0)) {
+          return la::dot(V, V);
+        };
+
+        if (a <= 0.0)
+          a = 0.0;
+        else if (b > 0.0)
+          b = 0.0;
+
+        if ((mindd - a + b) > 0.0) shown_disjoint = true;
+      }
+    }
+  }
+
+  vec3 Sn = la::cross(Sv[0], Sv[1]);
+  auto Snl = la::dot(Sn, Sn);
+
+  if (Snl > 1e-15) {
+    const vec3 Tp(la::dot(p[0] - q[0], Sn), la::dot(p[0] - q[1], Sn),
+                  la::dot(p[0] - q[2], Sn));
+
+    int index = -1;
+    if ((Tp[0] > 0.0) && (Tp[1] > 0.0) && (Tp[2] > 0.0)) {
+      index = Tp[0] < Tp[1] ? 0 : 1;
+      if (Tp[2] < Tp[index]) index = 2;
+    } else if ((Tp[0] < 0.0) && (Tp[1] < 0.0) && (Tp[2] < 0.0)) {
+      index = Tp[0] > Tp[1] ? 0 : 1;
+      if (Tp[2] > Tp[index]) index = 2;
+    }
+
+    if (index >= 0) {
+      shown_disjoint = true;
+
+      const vec3& qIndex = q[index];
+
+      vec3 V = qIndex - p[0];
+      vec3 Z = la::cross(Sn, Sv[0]);
+      if (la::dot(V, Z) > 0.0) {
+        V = qIndex - p[1];
+        Z = la::cross(Sn, Sv[1]);
+        if (la::dot(V, Z) > 0.0) {
+          V = qIndex - p[2];
+          Z = la::cross(Sn, Sv[2]);
+          if (la::dot(V, Z) > 0.0) {
+            vec3 cp = qIndex + Sn * Tp[index] / Snl;
+            vec3 cq = qIndex;
+            return la::dot(cp - cq, cp - cq);
+          }
+        }
+      }
+    }
+  }
+
+  vec3 Tn = la::cross(Tv[0], Tv[1]);
+  auto Tnl = la::dot(Tn, Tn);
+
+  if (Tnl > 1e-15) {
+    const vec3 Sp(la::dot(q[0] - p[0], Tn), la::dot(q[0] - p[1], Tn),
+                  la::dot(q[0] - p[2], Tn));
+
+    int index = -1;
+    if ((Sp[0] > 0.0) && (Sp[1] > 0.0) && (Sp[2] > 0.0)) {
+      index = Sp[0] < Sp[1] ? 0 : 1;
+      if (Sp[2] < Sp[index]) index = 2;
+    } else if ((Sp[0] < 0.0) && (Sp[1] < 0.0) && (Sp[2] < 0.0)) {
+      index = Sp[0] > Sp[1] ? 0 : 1;
+      if (Sp[2] > Sp[index]) index = 2;
+    }
+
+    if (index >= 0) {
+      shown_disjoint = true;
+
+      const vec3& pIndex = p[index];
+
+      vec3 V = pIndex - q[0];
+      vec3 Z = la::cross(Tn, Tv[0]);
+      if (la::dot(V, Z) > 0.0) {
+        V = pIndex - q[1];
+        Z = la::cross(Tn, Tv[1]);
+        if (la::dot(V, Z) > 0.0) {
+          V = pIndex - q[2];
+          Z = la::cross(Tn, Tv[2]);
+          if (la::dot(V, Z) > 0.0) {
+            vec3 cp = pIndex;
+            vec3 cq = pIndex + Tn * Sp[index] / Tnl;
+            return la::dot(cp - cq, cp - cq);
+          }
+        }
+      }
+    }
+  }
+
+  return shown_disjoint ? mindd : 0.0;
+};
+}  // namespace manifold

+ 227 - 0
thirdparty/manifold/src/utils.h

@@ -0,0 +1,227 @@
+// Copyright 2020 The Manifold Authors, Jared Hoberock and Nathan Bell of
+// NVIDIA Research
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include <atomic>
+#include <memory>
+#include <mutex>
+#include <unordered_map>
+
+#include "./vec.h"
+#include "manifold/common.h"
+
+#ifndef MANIFOLD_PAR
+#error "MANIFOLD_PAR must be defined to either 1 (parallel) or -1 (series)"
+#else
+#if (MANIFOLD_PAR != 1) && (MANIFOLD_PAR != -1)
+#define XSTR(x) STR(x)
+#define STR(x) #x
+#pragma message "Current value of MANIFOLD_PAR is: " XSTR(MANIFOLD_PAR)
+#error "MANIFOLD_PAR must be defined to either 1 (parallel) or -1 (series)"
+#endif
+#endif
+
+#include "./parallel.h"
+
+#if __has_include(<tracy/Tracy.hpp>)
+#include <tracy/Tracy.hpp>
+#else
+#define FrameMarkStart(x)
+#define FrameMarkEnd(x)
+// putting ZoneScoped in a function will instrument the function execution when
+// TRACY_ENABLE is set, which allows the profiler to record more accurate
+// timing.
+#define ZoneScoped
+#define ZoneScopedN(name)
+#endif
+
+namespace manifold {
+
+/**
+ * Stand-in for C++23's operator""uz (P0330R8)[https://wg21.link/P0330R8].
+ */
+[[nodiscard]] constexpr std::size_t operator""_uz(
+    unsigned long long n) noexcept {
+  return n;
+}
+
+constexpr double kPrecision = 1e-12;
+
+inline int Next3(int i) {
+  constexpr ivec3 next3(1, 2, 0);
+  return next3[i];
+}
+
+inline int Prev3(int i) {
+  constexpr ivec3 prev3(2, 0, 1);
+  return prev3[i];
+}
+
+template <typename T, typename T1>
+void Permute(Vec<T>& inOut, const Vec<T1>& new2Old) {
+  Vec<T> tmp(std::move(inOut));
+  inOut.resize(new2Old.size());
+  gather(new2Old.begin(), new2Old.end(), tmp.begin(), inOut.begin());
+}
+
+template <typename T, typename T1>
+void Permute(std::vector<T>& inOut, const Vec<T1>& new2Old) {
+  std::vector<T> tmp(std::move(inOut));
+  inOut.resize(new2Old.size());
+  gather(new2Old.begin(), new2Old.end(), tmp.begin(), inOut.begin());
+}
+
+template <typename T>
+T AtomicAdd(T& target, T add) {
+  std::atomic<T>& tar = reinterpret_cast<std::atomic<T>&>(target);
+  T old_val = tar.load();
+  while (!tar.compare_exchange_weak(old_val, old_val + add,
+                                    std::memory_order_seq_cst)) {
+  }
+  return old_val;
+}
+
+template <>
+inline int AtomicAdd(int& target, int add) {
+  std::atomic<int>& tar = reinterpret_cast<std::atomic<int>&>(target);
+  int old_val = tar.fetch_add(add, std::memory_order_seq_cst);
+  return old_val;
+}
+
+template <typename T>
+class ConcurrentSharedPtr {
+ public:
+  ConcurrentSharedPtr(T value) : impl(std::make_shared<T>(value)) {}
+  ConcurrentSharedPtr(const ConcurrentSharedPtr<T>& other)
+      : impl(other.impl), mutex(other.mutex) {}
+  class SharedPtrGuard {
+   public:
+    SharedPtrGuard(std::recursive_mutex* mutex, T* content)
+        : mutex(mutex), content(content) {
+      mutex->lock();
+    }
+    ~SharedPtrGuard() { mutex->unlock(); }
+
+    T& operator*() { return *content; }
+    T* operator->() { return content; }
+
+   private:
+    std::recursive_mutex* mutex;
+    T* content;
+  };
+  SharedPtrGuard GetGuard() { return SharedPtrGuard(mutex.get(), impl.get()); };
+  unsigned int UseCount() { return impl.use_count(); };
+
+ private:
+  std::shared_ptr<T> impl;
+  std::shared_ptr<std::recursive_mutex> mutex =
+      std::make_shared<std::recursive_mutex>();
+};
+
+template <typename I = int, typename R = unsigned char>
+struct UnionFind {
+  Vec<I> parents;
+  // we do union by rank
+  // note that we shift rank by 1, rank 0 means it is not connected to anything
+  // else
+  Vec<R> ranks;
+
+  UnionFind(I numNodes) : parents(numNodes), ranks(numNodes, 0) {
+    sequence(parents.begin(), parents.end());
+  }
+
+  I find(I x) {
+    while (parents[x] != x) {
+      parents[x] = parents[parents[x]];
+      x = parents[x];
+    }
+    return x;
+  }
+
+  void unionXY(I x, I y) {
+    if (x == y) return;
+    if (ranks[x] == 0) ranks[x] = 1;
+    if (ranks[y] == 0) ranks[y] = 1;
+    x = find(x);
+    y = find(y);
+    if (x == y) return;
+    if (ranks[x] < ranks[y]) std::swap(x, y);
+    if (ranks[x] == ranks[y]) ranks[x]++;
+    parents[y] = x;
+  }
+
+  I connectedComponents(std::vector<I>& components) {
+    components.resize(parents.size());
+    I lonelyNodes = 0;
+    std::unordered_map<I, I> toLabel;
+    for (size_t i = 0; i < parents.size(); ++i) {
+      // we optimize for connected component of size 1
+      // no need to put them into the hashmap
+      if (ranks[i] == 0) {
+        components[i] = static_cast<I>(toLabel.size()) + lonelyNodes++;
+        continue;
+      }
+      parents[i] = find(i);
+      auto iter = toLabel.find(parents[i]);
+      if (iter == toLabel.end()) {
+        I s = static_cast<I>(toLabel.size()) + lonelyNodes;
+        toLabel.insert(std::make_pair(parents[i], s));
+        components[i] = s;
+      } else {
+        components[i] = iter->second;
+      }
+    }
+    return toLabel.size() + lonelyNodes;
+  }
+};
+
+template <typename T>
+struct Identity {
+  T operator()(T v) const { return v; }
+};
+
+template <typename T>
+struct Negate {
+  T operator()(T v) const { return -v; }
+};
+
+/**
+ * Determines if the three points are wound counter-clockwise, clockwise, or
+ * colinear within the specified tolerance.
+ *
+ * @param p0 First point
+ * @param p1 Second point
+ * @param p2 Third point
+ * @param tol Tolerance value for colinearity
+ * @return int, like Signum, this returns 1 for CCW, -1 for CW, and 0 if within
+ * tol of colinear.
+ */
+inline int CCW(vec2 p0, vec2 p1, vec2 p2, double tol) {
+  vec2 v1 = p1 - p0;
+  vec2 v2 = p2 - p0;
+  double area = fma(v1.x, v2.y, -v1.y * v2.x);
+  double base2 = la::max(la::dot(v1, v1), la::dot(v2, v2));
+  if (area * area * 4 <= base2 * tol * tol)
+    return 0;
+  else
+    return area > 0 ? 1 : -1;
+}
+
+inline mat4 Mat4(mat3x4 a) {
+  return mat4({a[0], 0}, {a[1], 0}, {a[2], 0}, {a[3], 1});
+}
+inline mat3 Mat3(mat2x3 a) { return mat3({a[0], 0}, {a[1], 0}, {a[2], 1}); }
+
+}  // namespace manifold

+ 219 - 0
thirdparty/manifold/src/vec.h

@@ -0,0 +1,219 @@
+// Copyright 2021 The Manifold Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#if TRACY_ENABLE && TRACY_MEMORY_USAGE
+#include "tracy/Tracy.hpp"
+#else
+#define TracyAllocS(ptr, size, n) (void)0
+#define TracyFreeS(ptr, n) (void)0
+#endif
+#include <vector>
+
+#include "./parallel.h"
+#include "manifold/vec_view.h"
+
+namespace manifold {
+
+template <typename T>
+class Vec;
+
+/*
+ * Specialized vector implementation with multithreaded fill and uninitialized
+ * memory optimizations.
+ * Note that the constructor and resize function will not perform initialization
+ * if the parameter val is not set. Also, this implementation is a toy
+ * implementation that did not consider things like non-trivial
+ * constructor/destructor, please keep T trivial.
+ */
+template <typename T>
+class Vec : public VecView<T> {
+ public:
+  Vec() {}
+
+  // Note that the vector constructed with this constructor will contain
+  // uninitialized memory. Please specify `val` if you need to make sure that
+  // the data is initialized.
+  Vec(size_t size) {
+    reserve(size);
+    this->size_ = size;
+  }
+
+  Vec(size_t size, T val) { resize(size, val); }
+
+  Vec(const Vec<T> &vec) { *this = Vec(vec.view()); }
+
+  Vec(const VecView<const T> &vec) {
+    this->size_ = vec.size();
+    this->capacity_ = this->size_;
+    auto policy = autoPolicy(this->size_);
+    if (this->size_ != 0) {
+      this->ptr_ = reinterpret_cast<T *>(malloc(this->size_ * sizeof(T)));
+      ASSERT(this->ptr_ != nullptr, std::bad_alloc());
+      TracyAllocS(this->ptr_, this->size_ * sizeof(T), 3);
+      copy(policy, vec.begin(), vec.end(), this->ptr_);
+    }
+  }
+
+  Vec(const std::vector<T> &vec) {
+    this->size_ = vec.size();
+    this->capacity_ = this->size_;
+    auto policy = autoPolicy(this->size_);
+    if (this->size_ != 0) {
+      this->ptr_ = reinterpret_cast<T *>(malloc(this->size_ * sizeof(T)));
+      ASSERT(this->ptr_ != nullptr, std::bad_alloc());
+      TracyAllocS(this->ptr_, this->size_ * sizeof(T), 3);
+      copy(policy, vec.begin(), vec.end(), this->ptr_);
+    }
+  }
+
+  Vec(Vec<T> &&vec) {
+    this->ptr_ = vec.ptr_;
+    this->size_ = vec.size_;
+    capacity_ = vec.capacity_;
+    vec.ptr_ = nullptr;
+    vec.size_ = 0;
+    vec.capacity_ = 0;
+  }
+
+  operator VecView<T>() { return {this->ptr_, this->size_}; }
+  operator VecView<const T>() const { return {this->ptr_, this->size_}; }
+
+  ~Vec() {
+    if (this->ptr_ != nullptr) {
+      TracyFreeS(this->ptr_, 3);
+      free(this->ptr_);
+    }
+    this->ptr_ = nullptr;
+    this->size_ = 0;
+    capacity_ = 0;
+  }
+
+  Vec<T> &operator=(const Vec<T> &other) {
+    if (&other == this) return *this;
+    if (this->ptr_ != nullptr) {
+      TracyFreeS(this->ptr_, 3);
+      free(this->ptr_);
+    }
+    this->size_ = other.size_;
+    capacity_ = other.size_;
+    if (this->size_ != 0) {
+      this->ptr_ = reinterpret_cast<T *>(malloc(this->size_ * sizeof(T)));
+      ASSERT(this->ptr_ != nullptr, std::bad_alloc());
+      TracyAllocS(this->ptr_, this->size_ * sizeof(T), 3);
+      manifold::copy(other.begin(), other.end(), this->ptr_);
+    }
+    return *this;
+  }
+
+  Vec<T> &operator=(Vec<T> &&other) {
+    if (&other == this) return *this;
+    if (this->ptr_ != nullptr) {
+      TracyFreeS(this->ptr_, 3);
+      free(this->ptr_);
+    }
+    this->size_ = other.size_;
+    capacity_ = other.capacity_;
+    this->ptr_ = other.ptr_;
+    other.ptr_ = nullptr;
+    other.size_ = 0;
+    other.capacity_ = 0;
+    return *this;
+  }
+
+  operator VecView<T>() const { return {this->ptr_, this->size_}; }
+
+  void swap(Vec<T> &other) {
+    std::swap(this->ptr_, other.ptr_);
+    std::swap(this->size_, other.size_);
+    std::swap(capacity_, other.capacity_);
+  }
+
+  inline void push_back(const T &val, bool seq = false) {
+    if (this->size_ >= capacity_) {
+      // avoid dangling pointer in case val is a reference of our array
+      T val_copy = val;
+      reserve(capacity_ == 0 ? 128 : capacity_ * 2, seq);
+      this->ptr_[this->size_++] = val_copy;
+      return;
+    }
+    this->ptr_[this->size_++] = val;
+  }
+
+  inline void extend(size_t n, bool seq = false) {
+    if (this->size_ + n >= capacity_)
+      reserve(capacity_ == 0 ? 128 : std::max(capacity_ * 2, this->size_ + n),
+              seq);
+    this->size_ += n;
+  }
+
+  void reserve(size_t n, bool seq = false) {
+    if (n > capacity_) {
+      T *newBuffer = reinterpret_cast<T *>(malloc(n * sizeof(T)));
+      ASSERT(newBuffer != nullptr, std::bad_alloc());
+      TracyAllocS(newBuffer, n * sizeof(T), 3);
+      if (this->size_ > 0)
+        manifold::copy(seq ? ExecutionPolicy::Seq : autoPolicy(this->size_),
+                       this->ptr_, this->ptr_ + this->size_, newBuffer);
+      if (this->ptr_ != nullptr) {
+        TracyFreeS(this->ptr_, 3);
+        free(this->ptr_);
+      }
+      this->ptr_ = newBuffer;
+      capacity_ = n;
+    }
+  }
+
+  void resize(size_t newSize, T val = T()) {
+    bool shrink = this->size_ > 2 * newSize;
+    reserve(newSize);
+    if (this->size_ < newSize) {
+      fill(autoPolicy(newSize - this->size_), this->ptr_ + this->size_,
+           this->ptr_ + newSize, val);
+    }
+    this->size_ = newSize;
+    if (shrink) shrink_to_fit();
+  }
+
+  void pop_back() { resize(this->size_ - 1); }
+
+  void clear(bool shrink = true) {
+    this->size_ = 0;
+    if (shrink) shrink_to_fit();
+  }
+
+  void shrink_to_fit() {
+    T *newBuffer = nullptr;
+    if (this->size_ > 0) {
+      newBuffer = reinterpret_cast<T *>(malloc(this->size_ * sizeof(T)));
+      ASSERT(newBuffer != nullptr, std::bad_alloc());
+      TracyAllocS(newBuffer, this->size_ * sizeof(T), 3);
+      manifold::copy(this->ptr_, this->ptr_ + this->size_, newBuffer);
+    }
+    if (this->ptr_ != nullptr) {
+      TracyFreeS(this->ptr_, 3);
+      free(this->ptr_);
+    }
+    this->ptr_ = newBuffer;
+    capacity_ = this->size_;
+  }
+
+  size_t capacity() const { return capacity_; }
+
+ private:
+  size_t capacity_ = 0;
+
+  static_assert(std::is_trivially_destructible<T>::value);
+};
+}  // namespace manifold