hai 2 meses · 893f5b37f4
--- a/modules/meshoptimizer/SCsub
+++ b/modules/meshoptimizer/SCsub
@@ -14,18 +14,19 @@ thirdparty_dir = "#thirdparty/meshoptimizer/"
 
				 thirdparty_sources = [
			
 
				     "allocator.cpp",
			
 
				     "clusterizer.cpp",
			
 
				+    "indexanalyzer.cpp",
			
 
				     "indexcodec.cpp",
			
 
				     "indexgenerator.cpp",
			
 
				-    "overdrawanalyzer.cpp",
			
 
				     "overdrawoptimizer.cpp",
			
 
				+    "partition.cpp",
			
 
				+    "quantization.cpp",
			
 
				+    "rasterizer.cpp",
			
 
				     "simplifier.cpp",
			
 
				     "spatialorder.cpp",
			
 
				     "stripifier.cpp",
			
 
				-    "vcacheanalyzer.cpp",
			
 
				     "vcacheoptimizer.cpp",
			
 
				     "vertexcodec.cpp",
			
 
				     "vertexfilter.cpp",
			
 
				-    "vfetchanalyzer.cpp",
			
 
				     "vfetchoptimizer.cpp",
			
 
				 ]
			
 
				 thirdparty_sources = [thirdparty_dir + file for file in thirdparty_sources]
			
--- a/thirdparty/README.md
+++ b/thirdparty/README.md
@@ -680,7 +680,7 @@ Patches:
 
				 ## meshoptimizer
			
 
				 
			
 
				 - Upstream: https://github.com/zeux/meshoptimizer
			
 
				-- Version: 0.23 (3e9d1ff3135794f519f3237515277c8d9a3fd3f2, 2025)
			
 
				+- Version: 0.24 (7b2d4f4c817aea55d74dcd65d9763ac2ca608026, 2025)
			
 
				 - License: MIT
			
 
				 
			
 
				 Files extracted from upstream repository:
			
--- a/thirdparty/meshoptimizer/clusterizer.cpp
+++ b/thirdparty/meshoptimizer/clusterizer.cpp
@@ -10,6 +10,8 @@
 
				 // Graham Wihlidal. Optimizing the Graphics Pipeline with Compute. 2016
			
 
				 // Matthaeus Chajdas. GeometryFX 1.2 - Cluster Culling. 2016
			
 
				 // Jack Ritter. An Efficient Bounding Sphere. 1990
			
 
				+// Thomas Larsson. Fast and Tight Fitting Bounding Spheres. 2008
			
 
				+// Ingo Wald, Vlastimil Havran. On building fast kd-Trees for Ray Tracing, and on doing that in O(N log N). 2006
			
 
				 namespace meshopt
			
 
				 {
			
 
				 
			
@@ -23,6 +25,9 @@ const size_t kMeshletMaxTriangles = 512;
 
				 const size_t kMeshletMaxSeeds = 256;
			
 
				 const size_t kMeshletAddSeeds = 4;
			
 
				 
			
 
				+// To avoid excessive recursion for malformed inputs, we limit the maximum depth of the tree
			
 
				+const int kMeshletMaxTreeDepth = 50;
			
 
				+
			
 
				 struct TriangleAdjacency2
			
 
				 {
			
 
				 	unsigned int* counts;
			
@@ -144,37 +149,62 @@ static void buildTriangleAdjacencySparse(TriangleAdjacency2& adjacency, const un
 
				 	}
			
 
				 }
			
 
				 
			
 
				-static void computeBoundingSphere(float result[4], const float* points, size_t count, size_t points_stride, const float* radii, size_t radii_stride)
			
 
				+static void computeBoundingSphere(float result[4], const float* points, size_t count, size_t points_stride, const float* radii, size_t radii_stride, size_t axis_count)
			
 
				 {
			
 
				+	static const float kAxes[7][3] = {
			
 
				+	    // X, Y, Z
			
 
				+	    {1, 0, 0},
			
 
				+	    {0, 1, 0},
			
 
				+	    {0, 0, 1},
			
 
				+
			
 
				+	    // XYZ, -XYZ, X-YZ, XY-Z; normalized to unit length
			
 
				+	    {0.57735026f, 0.57735026f, 0.57735026f},
			
 
				+	    {-0.57735026f, 0.57735026f, 0.57735026f},
			
 
				+	    {0.57735026f, -0.57735026f, 0.57735026f},
			
 
				+	    {0.57735026f, 0.57735026f, -0.57735026f},
			
 
				+	};
			
 
				+
			
 
				 	assert(count > 0);
			
 
				+	assert(axis_count <= sizeof(kAxes) / sizeof(kAxes[0]));
			
 
				 
			
 
				 	size_t points_stride_float = points_stride / sizeof(float);
			
 
				 	size_t radii_stride_float = radii_stride / sizeof(float);
			
 
				 
			
 
				-	// find extremum points along all 3 axes; for each axis we get a pair of points with min/max coordinates
			
 
				-	size_t pmin[3] = {0, 0, 0};
			
 
				-	size_t pmax[3] = {0, 0, 0};
			
 
				+	// find extremum points along all axes; for each axis we get a pair of points with min/max coordinates
			
 
				+	size_t pmin[7], pmax[7];
			
 
				+	float tmin[7], tmax[7];
			
 
				+
			
 
				+	for (size_t axis = 0; axis < axis_count; ++axis)
			
 
				+	{
			
 
				+		pmin[axis] = pmax[axis] = 0;
			
 
				+		tmin[axis] = FLT_MAX;
			
 
				+		tmax[axis] = -FLT_MAX;
			
 
				+	}
			
 
				 
			
 
				 	for (size_t i = 0; i < count; ++i)
			
 
				 	{
			
 
				 		const float* p = points + i * points_stride_float;
			
 
				 		float r = radii[i * radii_stride_float];
			
 
				 
			
 
				-		for (int axis = 0; axis < 3; ++axis)
			
 
				+		for (size_t axis = 0; axis < axis_count; ++axis)
			
 
				 		{
			
 
				-			float bmin = points[pmin[axis] * points_stride_float + axis] - radii[pmin[axis] * radii_stride_float];
			
 
				-			float bmax = points[pmax[axis] * points_stride_float + axis] + radii[pmax[axis] * radii_stride_float];
			
 
				+			const float* ax = kAxes[axis];
			
 
				+
			
 
				+			float tp = ax[0] * p[0] + ax[1] * p[1] + ax[2] * p[2];
			
 
				+			float tpmin = tp - r, tpmax = tp + r;
			
 
				 
			
 
				-			pmin[axis] = (p[axis] - r < bmin) ? i : pmin[axis];
			
 
				-			pmax[axis] = (p[axis] + r > bmax) ? i : pmax[axis];
			
 
				+			pmin[axis] = (tpmin < tmin[axis]) ? i : pmin[axis];
			
 
				+			pmax[axis] = (tpmax > tmax[axis]) ? i : pmax[axis];
			
 
				+			tmin[axis] = (tpmin < tmin[axis]) ? tpmin : tmin[axis];
			
 
				+			tmax[axis] = (tpmax > tmax[axis]) ? tpmax : tmax[axis];
			
 
				 		}
			
 
				 	}
			
 
				 
			
 
				 	// find the pair of points with largest distance
			
 
				-	int paxis = 0;
			
 
				+	size_t paxis = 0;
			
 
				 	float paxisdr = 0;
			
 
				 
			
 
				-	for (int axis = 0; axis < 3; ++axis)
			
 
				+	for (size_t axis = 0; axis < axis_count; ++axis)
			
 
				 	{
			
 
				 		const float* p1 = points + pmin[axis] * points_stride_float;
			
 
				 		const float* p2 = points + pmax[axis] * points_stride_float;
			
@@ -698,6 +728,314 @@ static void kdtreeNearest(KDNode* nodes, unsigned int root, const float* points,
 
				 	}
			
 
				 }
			
 
				 
			
 
				+struct BVHBox
			
 
				+{
			
 
				+	float min[3];
			
 
				+	float max[3];
			
 
				+};
			
 
				+
			
 
				+static void boxMerge(BVHBox& box, const BVHBox& other)
			
 
				+{
			
 
				+	for (int k = 0; k < 3; ++k)
			
 
				+	{
			
 
				+		box.min[k] = other.min[k] < box.min[k] ? other.min[k] : box.min[k];
			
 
				+		box.max[k] = other.max[k] > box.max[k] ? other.max[k] : box.max[k];
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+inline float boxSurface(const BVHBox& box)
			
 
				+{
			
 
				+	float sx = box.max[0] - box.min[0], sy = box.max[1] - box.min[1], sz = box.max[2] - box.min[2];
			
 
				+	return sx * sy + sx * sz + sy * sz;
			
 
				+}
			
 
				+
			
 
				+inline unsigned int radixFloat(unsigned int v)
			
 
				+{
			
 
				+	// if sign bit is 0, flip sign bit
			
 
				+	// if sign bit is 1, flip everything
			
 
				+	unsigned int mask = (int(v) >> 31) | 0x80000000;
			
 
				+	return v ^ mask;
			
 
				+}
			
 
				+
			
 
				+static void computeHistogram(unsigned int (&hist)[1024][3], const float* data, size_t count)
			
 
				+{
			
 
				+	memset(hist, 0, sizeof(hist));
			
 
				+
			
 
				+	const unsigned int* bits = reinterpret_cast<const unsigned int*>(data);
			
 
				+
			
 
				+	// compute 3 10-bit histograms in parallel (dropping 2 LSB)
			
 
				+	for (size_t i = 0; i < count; ++i)
			
 
				+	{
			
 
				+		unsigned int id = radixFloat(bits[i]);
			
 
				+
			
 
				+		hist[(id >> 2) & 1023][0]++;
			
 
				+		hist[(id >> 12) & 1023][1]++;
			
 
				+		hist[(id >> 22) & 1023][2]++;
			
 
				+	}
			
 
				+
			
 
				+	unsigned int sum0 = 0, sum1 = 0, sum2 = 0;
			
 
				+
			
 
				+	// replace histogram data with prefix histogram sums in-place
			
 
				+	for (int i = 0; i < 1024; ++i)
			
 
				+	{
			
 
				+		unsigned int hx = hist[i][0], hy = hist[i][1], hz = hist[i][2];
			
 
				+
			
 
				+		hist[i][0] = sum0;
			
 
				+		hist[i][1] = sum1;
			
 
				+		hist[i][2] = sum2;
			
 
				+
			
 
				+		sum0 += hx;
			
 
				+		sum1 += hy;
			
 
				+		sum2 += hz;
			
 
				+	}
			
 
				+
			
 
				+	assert(sum0 == count && sum1 == count && sum2 == count);
			
 
				+}
			
 
				+
			
 
				+static void radixPass(unsigned int* destination, const unsigned int* source, const float* keys, size_t count, unsigned int (&hist)[1024][3], int pass)
			
 
				+{
			
 
				+	const unsigned int* bits = reinterpret_cast<const unsigned int*>(keys);
			
 
				+	int bitoff = pass * 10 + 2; // drop 2 LSB to be able to use 3 10-bit passes
			
 
				+
			
 
				+	for (size_t i = 0; i < count; ++i)
			
 
				+	{
			
 
				+		unsigned int id = (radixFloat(bits[source[i]]) >> bitoff) & 1023;
			
 
				+
			
 
				+		destination[hist[id][pass]++] = source[i];
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static void bvhPrepare(BVHBox* boxes, float* centroids, const unsigned int* indices, size_t face_count, const float* vertex_positions, size_t vertex_count, size_t vertex_stride_float)
			
 
				+{
			
 
				+	(void)vertex_count;
			
 
				+
			
 
				+	for (size_t i = 0; i < face_count; ++i)
			
 
				+	{
			
 
				+		unsigned int a = indices[i * 3 + 0], b = indices[i * 3 + 1], c = indices[i * 3 + 2];
			
 
				+		assert(a < vertex_count && b < vertex_count && c < vertex_count);
			
 
				+
			
 
				+		const float* va = vertex_positions + vertex_stride_float * a;
			
 
				+		const float* vb = vertex_positions + vertex_stride_float * b;
			
 
				+		const float* vc = vertex_positions + vertex_stride_float * c;
			
 
				+
			
 
				+		BVHBox& box = boxes[i];
			
 
				+
			
 
				+		for (int k = 0; k < 3; ++k)
			
 
				+		{
			
 
				+			box.min[k] = va[k] < vb[k] ? va[k] : vb[k];
			
 
				+			box.min[k] = vc[k] < box.min[k] ? vc[k] : box.min[k];
			
 
				+
			
 
				+			box.max[k] = va[k] > vb[k] ? va[k] : vb[k];
			
 
				+			box.max[k] = vc[k] > box.max[k] ? vc[k] : box.max[k];
			
 
				+
			
 
				+			centroids[i + face_count * k] = (box.min[k] + box.max[k]) / 2.f;
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static bool bvhPackLeaf(unsigned char* boundary, const unsigned int* order, size_t count, short* used, const unsigned int* indices, size_t max_vertices)
			
 
				+{
			
 
				+	// count number of unique vertices
			
 
				+	size_t used_vertices = 0;
			
 
				+	for (size_t i = 0; i < count; ++i)
			
 
				+	{
			
 
				+		unsigned int index = order[i];
			
 
				+		unsigned int a = indices[index * 3 + 0], b = indices[index * 3 + 1], c = indices[index * 3 + 2];
			
 
				+
			
 
				+		used_vertices += (used[a] < 0) + (used[b] < 0) + (used[c] < 0);
			
 
				+		used[a] = used[b] = used[c] = 1;
			
 
				+	}
			
 
				+
			
 
				+	// reset used[] for future invocations
			
 
				+	for (size_t i = 0; i < count; ++i)
			
 
				+	{
			
 
				+		unsigned int index = order[i];
			
 
				+		unsigned int a = indices[index * 3 + 0], b = indices[index * 3 + 1], c = indices[index * 3 + 2];
			
 
				+
			
 
				+		used[a] = used[b] = used[c] = -1;
			
 
				+	}
			
 
				+
			
 
				+	if (used_vertices > max_vertices)
			
 
				+		return false;
			
 
				+
			
 
				+	// mark meshlet boundary for future reassembly
			
 
				+	assert(count > 0);
			
 
				+
			
 
				+	boundary[0] = 1;
			
 
				+	memset(boundary + 1, 0, count - 1);
			
 
				+
			
 
				+	return true;
			
 
				+}
			
 
				+
			
 
				+static void bvhPackTail(unsigned char* boundary, const unsigned int* order, size_t count, short* used, const unsigned int* indices, size_t max_vertices, size_t max_triangles)
			
 
				+{
			
 
				+	for (size_t i = 0; i < count;)
			
 
				+	{
			
 
				+		size_t chunk = i + max_triangles <= count ? max_triangles : count - i;
			
 
				+
			
 
				+		if (bvhPackLeaf(boundary + i, order + i, chunk, used, indices, max_vertices))
			
 
				+		{
			
 
				+			i += chunk;
			
 
				+			continue;
			
 
				+		}
			
 
				+
			
 
				+		// chunk is vertex bound, split it into smaller meshlets
			
 
				+		assert(chunk > max_vertices / 3);
			
 
				+
			
 
				+		bvhPackLeaf(boundary + i, order + i, max_vertices / 3, used, indices, max_vertices);
			
 
				+		i += max_vertices / 3;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static bool bvhDivisible(size_t count, size_t min, size_t max)
			
 
				+{
			
 
				+	// count is representable as a sum of values in [min..max] if if it in range of [k*min..k*min+k*(max-min)]
			
 
				+	// equivalent to ceil(count / max) <= floor(count / min), but the form below allows using idiv (see nv_cluster_builder)
			
 
				+	// we avoid expensive integer divisions in the common case where min is <= max/2
			
 
				+	return min * 2 <= max ? count >= min : count % min <= (count / min) * (max - min);
			
 
				+}
			
 
				+
			
 
				+static size_t bvhPivot(const BVHBox* boxes, const unsigned int* order, size_t count, void* scratch, size_t step, size_t min, size_t max, float fill, float* out_cost)
			
 
				+{
			
 
				+	BVHBox accuml = boxes[order[0]], accumr = boxes[order[count - 1]];
			
 
				+	float* costs = static_cast<float*>(scratch);
			
 
				+
			
 
				+	// accumulate SAH cost in forward and backward directions
			
 
				+	for (size_t i = 0; i < count; ++i)
			
 
				+	{
			
 
				+		boxMerge(accuml, boxes[order[i]]);
			
 
				+		boxMerge(accumr, boxes[order[count - 1 - i]]);
			
 
				+
			
 
				+		costs[i] = boxSurface(accuml);
			
 
				+		costs[i + count] = boxSurface(accumr);
			
 
				+	}
			
 
				+
			
 
				+	bool aligned = count >= min * 2 && bvhDivisible(count, min, max);
			
 
				+	size_t end = aligned ? count - min : count - 1;
			
 
				+
			
 
				+	float rmaxf = 1.f / float(int(max));
			
 
				+
			
 
				+	// find best split that minimizes SAH
			
 
				+	size_t bestsplit = 0;
			
 
				+	float bestcost = FLT_MAX;
			
 
				+
			
 
				+	for (size_t i = min - 1; i < end; i += step)
			
 
				+	{
			
 
				+		size_t lsplit = i + 1, rsplit = count - (i + 1);
			
 
				+
			
 
				+		if (!bvhDivisible(lsplit, min, max))
			
 
				+			continue;
			
 
				+		if (aligned && !bvhDivisible(rsplit, min, max))
			
 
				+			continue;
			
 
				+
			
 
				+		// costs[x] = inclusive surface area of boxes[0..x]
			
 
				+		// costs[count-1-x] = inclusive surface area of boxes[x..count-1]
			
 
				+		float larea = costs[i], rarea = costs[(count - 1 - (i + 1)) + count];
			
 
				+		float cost = larea * float(int(lsplit)) + rarea * float(int(rsplit));
			
 
				+
			
 
				+		if (cost > bestcost)
			
 
				+			continue;
			
 
				+
			
 
				+		// fill cost; use floating point math to avoid expensive integer modulo
			
 
				+		int lrest = int(float(int(lsplit + max - 1)) * rmaxf) * int(max) - int(lsplit);
			
 
				+		int rrest = int(float(int(rsplit + max - 1)) * rmaxf) * int(max) - int(rsplit);
			
 
				+
			
 
				+		cost += fill * (float(lrest) * larea + float(rrest) * rarea);
			
 
				+
			
 
				+		if (cost < bestcost)
			
 
				+		{
			
 
				+			bestcost = cost;
			
 
				+			bestsplit = i + 1;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	*out_cost = bestcost;
			
 
				+	return bestsplit;
			
 
				+}
			
 
				+
			
 
				+static void bvhPartition(unsigned int* target, const unsigned int* order, const unsigned char* sides, size_t split, size_t count)
			
 
				+{
			
 
				+	size_t l = 0, r = split;
			
 
				+
			
 
				+	for (size_t i = 0; i < count; ++i)
			
 
				+	{
			
 
				+		unsigned char side = sides[order[i]];
			
 
				+		target[side ? r : l] = order[i];
			
 
				+		l += 1;
			
 
				+		l -= side;
			
 
				+		r += side;
			
 
				+	}
			
 
				+
			
 
				+	assert(l == split && r == count);
			
 
				+}
			
 
				+
			
 
				+static void bvhSplit(const BVHBox* boxes, unsigned int* orderx, unsigned int* ordery, unsigned int* orderz, unsigned char* boundary, size_t count, int depth, void* scratch, short* used, const unsigned int* indices, size_t max_vertices, size_t min_triangles, size_t max_triangles, float fill_weight)
			
 
				+{
			
 
				+	if (depth >= kMeshletMaxTreeDepth)
			
 
				+		return bvhPackTail(boundary, orderx, count, used, indices, max_vertices, max_triangles);
			
 
				+
			
 
				+	if (count <= max_triangles && bvhPackLeaf(boundary, orderx, count, used, indices, max_vertices))
			
 
				+		return;
			
 
				+
			
 
				+	unsigned int* axes[3] = {orderx, ordery, orderz};
			
 
				+
			
 
				+	// we can use step=1 unconditionally but to reduce the cost for min=max case we use step=max
			
 
				+	size_t step = min_triangles == max_triangles && count > max_triangles ? max_triangles : 1;
			
 
				+
			
 
				+	// if we could not pack the meshlet, we must be vertex bound
			
 
				+	size_t mint = count <= max_triangles && max_vertices / 3 < min_triangles ? max_vertices / 3 : min_triangles;
			
 
				+
			
 
				+	// only use fill weight if we are optimizing for triangle count
			
 
				+	float fill = count <= max_triangles ? 0.f : fill_weight;
			
 
				+
			
 
				+	// find best split that minimizes SAH
			
 
				+	int bestk = -1;
			
 
				+	size_t bestsplit = 0;
			
 
				+	float bestcost = FLT_MAX;
			
 
				+
			
 
				+	for (int k = 0; k < 3; ++k)
			
 
				+	{
			
 
				+		float axiscost = FLT_MAX;
			
 
				+		size_t axissplit = bvhPivot(boxes, axes[k], count, scratch, step, mint, max_triangles, fill, &axiscost);
			
 
				+
			
 
				+		if (axissplit && axiscost < bestcost)
			
 
				+		{
			
 
				+			bestk = k;
			
 
				+			bestcost = axiscost;
			
 
				+			bestsplit = axissplit;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	// this may happen if SAH costs along the admissible splits are NaN
			
 
				+	if (bestk < 0)
			
 
				+		return bvhPackTail(boundary, orderx, count, used, indices, max_vertices, max_triangles);
			
 
				+
			
 
				+	// mark sides of split for partitioning
			
 
				+	unsigned char* sides = static_cast<unsigned char*>(scratch) + count * sizeof(unsigned int);
			
 
				+
			
 
				+	for (size_t i = 0; i < bestsplit; ++i)
			
 
				+		sides[axes[bestk][i]] = 0;
			
 
				+
			
 
				+	for (size_t i = bestsplit; i < count; ++i)
			
 
				+		sides[axes[bestk][i]] = 1;
			
 
				+
			
 
				+	// partition all axes into two sides, maintaining order
			
 
				+	unsigned int* temp = static_cast<unsigned int*>(scratch);
			
 
				+
			
 
				+	for (int k = 0; k < 3; ++k)
			
 
				+	{
			
 
				+		if (k == bestk)
			
 
				+			continue;
			
 
				+
			
 
				+		unsigned int* axis = axes[k];
			
 
				+		memcpy(temp, axis, sizeof(unsigned int) * count);
			
 
				+		bvhPartition(axis, temp, sides, bestsplit, count);
			
 
				+	}
			
 
				+
			
 
				+	bvhSplit(boxes, orderx, ordery, orderz, boundary, bestsplit, depth + 1, scratch, used, indices, max_vertices, min_triangles, max_triangles, fill_weight);
			
 
				+	bvhSplit(boxes, orderx + bestsplit, ordery + bestsplit, orderz + bestsplit, boundary + bestsplit, count - bestsplit, depth + 1, scratch, used, indices, max_vertices, min_triangles, max_triangles, fill_weight);
			
 
				+}
			
 
				+
			
 
				 } // namespace meshopt
			
 
				 
			
 
				 size_t meshopt_buildMeshletsBound(size_t index_count, size_t max_vertices, size_t max_triangles)
			
@@ -962,6 +1300,108 @@ size_t meshopt_buildMeshletsScan(meshopt_Meshlet* meshlets, unsigned int* meshle
 
				 	return meshlet_offset;
			
 
				 }
			
 
				 
			
 
				+size_t meshopt_buildMeshletsSpatial(struct meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t max_vertices, size_t min_triangles, size_t max_triangles, float fill_weight)
			
 
				+{
			
 
				+	using namespace meshopt;
			
 
				+
			
 
				+	assert(index_count % 3 == 0);
			
 
				+	assert(vertex_positions_stride >= 12 && vertex_positions_stride <= 256);
			
 
				+	assert(vertex_positions_stride % sizeof(float) == 0);
			
 
				+
			
 
				+	assert(max_vertices >= 3 && max_vertices <= kMeshletMaxVertices);
			
 
				+	assert(min_triangles >= 1 && min_triangles <= max_triangles && max_triangles <= kMeshletMaxTriangles);
			
 
				+	assert(min_triangles % 4 == 0 && max_triangles % 4 == 0); // ensures the caller will compute output space properly as index data is 4b aligned
			
 
				+
			
 
				+	if (index_count == 0)
			
 
				+		return 0;
			
 
				+
			
 
				+	size_t face_count = index_count / 3;
			
 
				+	size_t vertex_stride_float = vertex_positions_stride / sizeof(float);
			
 
				+
			
 
				+	meshopt_Allocator allocator;
			
 
				+
			
 
				+	// 3 floats plus 1 uint for sorting, or
			
 
				+	// 2 floats for SAH costs, or
			
 
				+	// 1 uint plus 1 byte for partitioning
			
 
				+	float* scratch = allocator.allocate<float>(face_count * 4);
			
 
				+
			
 
				+	// compute bounding boxes and centroids for sorting
			
 
				+	BVHBox* boxes = allocator.allocate<BVHBox>(face_count);
			
 
				+	bvhPrepare(boxes, scratch, indices, face_count, vertex_positions, vertex_count, vertex_stride_float);
			
 
				+
			
 
				+	unsigned int* axes = allocator.allocate<unsigned int>(face_count * 3);
			
 
				+	unsigned int* temp = reinterpret_cast<unsigned int*>(scratch) + face_count * 3;
			
 
				+
			
 
				+	for (int k = 0; k < 3; ++k)
			
 
				+	{
			
 
				+		unsigned int* order = axes + k * face_count;
			
 
				+		const float* keys = scratch + k * face_count;
			
 
				+
			
 
				+		unsigned int hist[1024][3];
			
 
				+		computeHistogram(hist, keys, face_count);
			
 
				+
			
 
				+		// 3-pass radix sort computes the resulting order into axes
			
 
				+		for (size_t i = 0; i < face_count; ++i)
			
 
				+			temp[i] = unsigned(i);
			
 
				+
			
 
				+		radixPass(order, temp, keys, face_count, hist, 0);
			
 
				+		radixPass(temp, order, keys, face_count, hist, 1);
			
 
				+		radixPass(order, temp, keys, face_count, hist, 2);
			
 
				+	}
			
 
				+
			
 
				+	// index of the vertex in the meshlet, -1 if the vertex isn't used
			
 
				+	short* used = allocator.allocate<short>(vertex_count);
			
 
				+	memset(used, -1, vertex_count * sizeof(short));
			
 
				+
			
 
				+	unsigned char* boundary = allocator.allocate<unsigned char>(face_count);
			
 
				+
			
 
				+	bvhSplit(boxes, &axes[0], &axes[face_count], &axes[face_count * 2], boundary, face_count, 0, scratch, used, indices, max_vertices, min_triangles, max_triangles, fill_weight);
			
 
				+
			
 
				+	// compute the desired number of meshlets; note that on some meshes with a lot of vertex bound clusters this might go over the bound
			
 
				+	size_t meshlet_count = 0;
			
 
				+	for (size_t i = 0; i < face_count; ++i)
			
 
				+	{
			
 
				+		assert(boundary[i] <= 1);
			
 
				+		meshlet_count += boundary[i];
			
 
				+	}
			
 
				+
			
 
				+	size_t meshlet_bound = meshopt_buildMeshletsBound(index_count, max_vertices, min_triangles);
			
 
				+
			
 
				+	// pack triangles into meshlets according to the order and boundaries marked by bvhSplit
			
 
				+	meshopt_Meshlet meshlet = {};
			
 
				+	size_t meshlet_offset = 0;
			
 
				+	size_t meshlet_pending = meshlet_count;
			
 
				+
			
 
				+	for (size_t i = 0; i < face_count; ++i)
			
 
				+	{
			
 
				+		assert(boundary[i] <= 1);
			
 
				+		bool split = i > 0 && boundary[i] == 1;
			
 
				+
			
 
				+		// while we are over the limit, we ignore boundary[] data and disable splits until we free up enough space
			
 
				+		if (split && meshlet_count > meshlet_bound && meshlet_offset + meshlet_pending >= meshlet_bound)
			
 
				+			split = false;
			
 
				+
			
 
				+		unsigned int index = axes[i];
			
 
				+		assert(index < face_count);
			
 
				+
			
 
				+		unsigned int a = indices[index * 3 + 0], b = indices[index * 3 + 1], c = indices[index * 3 + 2];
			
 
				+
			
 
				+		// appends triangle to the meshlet and writes previous meshlet to the output if full
			
 
				+		meshlet_offset += appendMeshlet(meshlet, a, b, c, used, meshlets, meshlet_vertices, meshlet_triangles, meshlet_offset, max_vertices, max_triangles, split);
			
 
				+		meshlet_pending -= boundary[i];
			
 
				+	}
			
 
				+
			
 
				+	if (meshlet.triangle_count)
			
 
				+	{
			
 
				+		finishMeshlet(meshlet, meshlet_triangles);
			
 
				+
			
 
				+		meshlets[meshlet_offset++] = meshlet;
			
 
				+	}
			
 
				+
			
 
				+	assert(meshlet_offset <= meshlet_bound);
			
 
				+	return meshlet_offset;
			
 
				+}
			
 
				+
			
 
				 meshopt_Bounds meshopt_computeClusterBounds(const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
			
 
				 {
			
 
				 	using namespace meshopt;
			
@@ -1022,13 +1462,13 @@ meshopt_Bounds meshopt_computeClusterBounds(const unsigned int* indices, size_t
 
				 
			
 
				 	// compute cluster bounding sphere; we'll use the center to determine normal cone apex as well
			
 
				 	float psphere[4] = {};
			
 
				-	computeBoundingSphere(psphere, corners[0][0], triangles * 3, sizeof(float) * 3, &rzero, 0);
			
 
				+	computeBoundingSphere(psphere, corners[0][0], triangles * 3, sizeof(float) * 3, &rzero, 0, 7);
			
 
				 
			
 
				 	float center[3] = {psphere[0], psphere[1], psphere[2]};
			
 
				 
			
 
				 	// treating triangle normals as points, find the bounding sphere - the sphere center determines the optimal cone axis
			
 
				 	float nsphere[4] = {};
			
 
				-	computeBoundingSphere(nsphere, normals[0], triangles, sizeof(float) * 3, &rzero, 0);
			
 
				+	computeBoundingSphere(nsphere, normals[0], triangles, sizeof(float) * 3, &rzero, 0, 3);
			
 
				 
			
 
				 	float axis[3] = {nsphere[0], nsphere[1], nsphere[2]};
			
 
				 	float axislength = sqrtf(axis[0] * axis[0] + axis[1] * axis[1] + axis[2] * axis[2]);
			
@@ -1155,7 +1595,7 @@ meshopt_Bounds meshopt_computeSphereBounds(const float* positions, size_t count,
 
				 	const float rzero = 0.f;
			
 
				 
			
 
				 	float psphere[4] = {};
			
 
				-	computeBoundingSphere(psphere, positions, count, positions_stride, radii ? radii : &rzero, radii ? radii_stride : 0);
			
 
				+	computeBoundingSphere(psphere, positions, count, positions_stride, radii ? radii : &rzero, radii ? radii_stride : 0, 7);
			
 
				 
			
 
				 	bounds.center[0] = psphere[0];
			
 
				 	bounds.center[1] = psphere[1];
			
--- a/thirdparty/meshoptimizer/vcacheanalyzer.cpp
+++ b/thirdparty/meshoptimizer/vcacheanalyzer.cpp
@@ -71,3 +71,56 @@ meshopt_VertexCacheStatistics meshopt_analyzeVertexCache(const unsigned int* ind
 
				 
			
 
				 	return result;
			
 
				 }
			
 
				+
			
 
				+meshopt_VertexFetchStatistics meshopt_analyzeVertexFetch(const unsigned int* indices, size_t index_count, size_t vertex_count, size_t vertex_size)
			
 
				+{
			
 
				+	assert(index_count % 3 == 0);
			
 
				+	assert(vertex_size > 0 && vertex_size <= 256);
			
 
				+
			
 
				+	meshopt_Allocator allocator;
			
 
				+
			
 
				+	meshopt_VertexFetchStatistics result = {};
			
 
				+
			
 
				+	unsigned char* vertex_visited = allocator.allocate<unsigned char>(vertex_count);
			
 
				+	memset(vertex_visited, 0, vertex_count);
			
 
				+
			
 
				+	const size_t kCacheLine = 64;
			
 
				+	const size_t kCacheSize = 128 * 1024;
			
 
				+
			
 
				+	// simple direct mapped cache; on typical mesh data this is close to 4-way cache, and this model is a gross approximation anyway
			
 
				+	size_t cache[kCacheSize / kCacheLine] = {};
			
 
				+
			
 
				+	for (size_t i = 0; i < index_count; ++i)
			
 
				+	{
			
 
				+		unsigned int index = indices[i];
			
 
				+		assert(index < vertex_count);
			
 
				+
			
 
				+		vertex_visited[index] = 1;
			
 
				+
			
 
				+		size_t start_address = index * vertex_size;
			
 
				+		size_t end_address = start_address + vertex_size;
			
 
				+
			
 
				+		size_t start_tag = start_address / kCacheLine;
			
 
				+		size_t end_tag = (end_address + kCacheLine - 1) / kCacheLine;
			
 
				+
			
 
				+		assert(start_tag < end_tag);
			
 
				+
			
 
				+		for (size_t tag = start_tag; tag < end_tag; ++tag)
			
 
				+		{
			
 
				+			size_t line = tag % (sizeof(cache) / sizeof(cache[0]));
			
 
				+
			
 
				+			// we store +1 since cache is filled with 0 by default
			
 
				+			result.bytes_fetched += (cache[line] != tag + 1) * kCacheLine;
			
 
				+			cache[line] = tag + 1;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	size_t unique_vertex_count = 0;
			
 
				+
			
 
				+	for (size_t i = 0; i < vertex_count; ++i)
			
 
				+		unique_vertex_count += vertex_visited[i];
			
 
				+
			
 
				+	result.overfetch = unique_vertex_count == 0 ? 0 : float(result.bytes_fetched) / float(unique_vertex_count * vertex_size);
			
 
				+
			
 
				+	return result;
			
 
				+}
			
--- a/thirdparty/meshoptimizer/indexcodec.cpp
+++ b/thirdparty/meshoptimizer/indexcodec.cpp
@@ -210,6 +210,7 @@ size_t meshopt_encodeIndexBuffer(unsigned char* buffer, size_t buffer_size, cons
 
				 
			
 
				 		if (fer >= 0 && (fer >> 2) < 15)
			
 
				 		{
			
 
				+			// note: getEdgeFifo implicitly rotates triangles by matching a/b to existing edge
			
 
				 			const unsigned int* order = kTriangleIndexOrder[fer & 3];
			
 
				 
			
 
				 			unsigned int a = indices[i + order[0]], b = indices[i + order[1]], c = indices[i + order[2]];
			
@@ -267,6 +268,7 @@ size_t meshopt_encodeIndexBuffer(unsigned char* buffer, size_t buffer_size, cons
 
				 			int fc = getVertexFifo(vertexfifo, c, vertexfifooffset);
			
 
				 
			
 
				 			// after rotation, a is almost always equal to next, so we don't waste bits on FIFO encoding for a
			
 
				+			// note: decoder implicitly assumes that if feb=fec=0, then fea=0 (reset code); this is enforced by rotation
			
 
				 			int fea = (a == next) ? (next++, 0) : 15;
			
 
				 			int feb = (fb >= 0 && fb < 14) ? fb + 1 : (b == next ? (next++, 0) : 15);
			
 
				 			int fec = (fc >= 0 && fc < 14) ? fc + 1 : (c == next ? (next++, 0) : 15);
			
@@ -433,6 +435,7 @@ int meshopt_decodeIndexBuffer(void* destination, size_t index_count, size_t inde
 
				 			// fifo reads are wrapped around 16 entry buffer
			
 
				 			unsigned int a = edgefifo[(edgefifooffset - 1 - fe) & 15][0];
			
 
				 			unsigned int b = edgefifo[(edgefifooffset - 1 - fe) & 15][1];
			
 
				+			unsigned int c = 0;
			
 
				 
			
 
				 			int fec = codetri & 15;
			
 
				 
			
@@ -442,37 +445,30 @@ int meshopt_decodeIndexBuffer(void* destination, size_t index_count, size_t inde
 
				 			{
			
 
				 				// fifo reads are wrapped around 16 entry buffer
			
 
				 				unsigned int cf = vertexfifo[(vertexfifooffset - 1 - fec) & 15];
			
 
				-				unsigned int c = (fec == 0) ? next : cf;
			
 
				+				c = (fec == 0) ? next : cf;
			
 
				 
			
 
				 				int fec0 = fec == 0;
			
 
				 				next += fec0;
			
 
				 
			
 
				-				// output triangle
			
 
				-				writeTriangle(destination, i, index_size, a, b, c);
			
 
				-
			
 
				-				// push vertex/edge fifo must match the encoding step *exactly* otherwise the data will not be decoded correctly
			
 
				+				// push vertex fifo must match the encoding step *exactly* otherwise the data will not be decoded correctly
			
 
				 				pushVertexFifo(vertexfifo, c, vertexfifooffset, fec0);
			
 
				-
			
 
				-				pushEdgeFifo(edgefifo, c, b, edgefifooffset);
			
 
				-				pushEdgeFifo(edgefifo, a, c, edgefifooffset);
			
 
				 			}
			
 
				 			else
			
 
				 			{
			
 
				-				unsigned int c = 0;
			
 
				-
			
 
				 				// fec - (fec ^ 3) decodes 13, 14 into -1, 1
			
 
				 				// note that we need to update the last index since free indices are delta-encoded
			
 
				 				last = c = (fec != 15) ? last + (fec - (fec ^ 3)) : decodeIndex(data, last);
			
 
				 
			
 
				-				// output triangle
			
 
				-				writeTriangle(destination, i, index_size, a, b, c);
			
 
				-
			
 
				 				// push vertex/edge fifo must match the encoding step *exactly* otherwise the data will not be decoded correctly
			
 
				 				pushVertexFifo(vertexfifo, c, vertexfifooffset);
			
 
				-
			
 
				-				pushEdgeFifo(edgefifo, c, b, edgefifooffset);
			
 
				-				pushEdgeFifo(edgefifo, a, c, edgefifooffset);
			
 
				 			}
			
 
				+
			
 
				+			// push edge fifo must match the encoding step *exactly* otherwise the data will not be decoded correctly
			
 
				+			pushEdgeFifo(edgefifo, c, b, edgefifooffset);
			
 
				+			pushEdgeFifo(edgefifo, a, c, edgefifooffset);
			
 
				+
			
 
				+			// output triangle
			
 
				+			writeTriangle(destination, i, index_size, a, b, c);
			
 
				 		}
			
 
				 		else
			
 
				 		{
			
--- a/thirdparty/meshoptimizer/indexgenerator.cpp
+++ b/thirdparty/meshoptimizer/indexgenerator.cpp
@@ -5,6 +5,7 @@
 
				 #include <string.h>
			
 
				 
			
 
				 // This work is based on:
			
 
				+// Matthias Teschner, Bruno Heidelberger, Matthias Mueller, Danat Pomeranets, Markus Gross. Optimized Spatial Hashing for Collision Detection of Deformable Objects. 2003
			
 
				 // John McDonald, Mark Kilgard. Crack-Free Point-Normal Triangles using Adjacent Edge Normals. 2010
			
 
				 // John Hable. Variable Rate Shading with Visibility Buffer Rendering. 2024
			
 
				 namespace meshopt
			
@@ -86,6 +87,46 @@ struct VertexStreamHasher
 
				 	}
			
 
				 };
			
 
				 
			
 
				+struct VertexCustomHasher
			
 
				+{
			
 
				+	const float* vertex_positions;
			
 
				+	size_t vertex_stride_float;
			
 
				+
			
 
				+	int (*callback)(void*, unsigned int, unsigned int);
			
 
				+	void* context;
			
 
				+
			
 
				+	size_t hash(unsigned int index) const
			
 
				+	{
			
 
				+		const unsigned int* key = reinterpret_cast<const unsigned int*>(vertex_positions + index * vertex_stride_float);
			
 
				+
			
 
				+		unsigned int x = key[0], y = key[1], z = key[2];
			
 
				+
			
 
				+		// replace negative zero with zero
			
 
				+		x = (x == 0x80000000) ? 0 : x;
			
 
				+		y = (y == 0x80000000) ? 0 : y;
			
 
				+		z = (z == 0x80000000) ? 0 : z;
			
 
				+
			
 
				+		// scramble bits to make sure that integer coordinates have entropy in lower bits
			
 
				+		x ^= x >> 17;
			
 
				+		y ^= y >> 17;
			
 
				+		z ^= z >> 17;
			
 
				+
			
 
				+		// Optimized Spatial Hashing for Collision Detection of Deformable Objects
			
 
				+		return (x * 73856093) ^ (y * 19349663) ^ (z * 83492791);
			
 
				+	}
			
 
				+
			
 
				+	bool equal(unsigned int lhs, unsigned int rhs) const
			
 
				+	{
			
 
				+		const float* lp = vertex_positions + lhs * vertex_stride_float;
			
 
				+		const float* rp = vertex_positions + rhs * vertex_stride_float;
			
 
				+
			
 
				+		if (lp[0] != rp[0] || lp[1] != rp[1] || lp[2] != rp[2])
			
 
				+			return false;
			
 
				+
			
 
				+		return callback ? callback(context, lhs, rhs) : true;
			
 
				+	}
			
 
				+};
			
 
				+
			
 
				 struct EdgeHasher
			
 
				 {
			
 
				 	const unsigned int* remap;
			
@@ -183,6 +224,43 @@ static void buildPositionRemap(unsigned int* remap, const float* vertex_position
 
				 	allocator.deallocate(vertex_table);
			
 
				 }
			
 
				 
			
 
				+template <typename Hash>
			
 
				+static size_t generateVertexRemap(unsigned int* remap, const unsigned int* indices, size_t index_count, size_t vertex_count, const Hash& hash, meshopt_Allocator& allocator)
			
 
				+{
			
 
				+	memset(remap, -1, vertex_count * sizeof(unsigned int));
			
 
				+
			
 
				+	size_t table_size = hashBuckets(vertex_count);
			
 
				+	unsigned int* table = allocator.allocate<unsigned int>(table_size);
			
 
				+	memset(table, -1, table_size * sizeof(unsigned int));
			
 
				+
			
 
				+	unsigned int next_vertex = 0;
			
 
				+
			
 
				+	for (size_t i = 0; i < index_count; ++i)
			
 
				+	{
			
 
				+		unsigned int index = indices ? indices[i] : unsigned(i);
			
 
				+		assert(index < vertex_count);
			
 
				+
			
 
				+		if (remap[index] != ~0u)
			
 
				+			continue;
			
 
				+
			
 
				+		unsigned int* entry = hashLookup(table, table_size, hash, index, ~0u);
			
 
				+
			
 
				+		if (*entry == ~0u)
			
 
				+		{
			
 
				+			*entry = index;
			
 
				+			remap[index] = next_vertex++;
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			assert(remap[*entry] != ~0u);
			
 
				+			remap[index] = remap[*entry];
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	assert(next_vertex <= vertex_count);
			
 
				+	return next_vertex;
			
 
				+}
			
 
				+
			
 
				 template <size_t BlockSize>
			
 
				 static void remapVertices(void* destination, const void* vertices, size_t vertex_count, size_t vertex_size, const unsigned int* remap)
			
 
				 {
			
@@ -197,55 +275,49 @@ static void remapVertices(void* destination, const void* vertices, size_t vertex
 
				 		}
			
 
				 }
			
 
				 
			
 
				-} // namespace meshopt
			
 
				-
			
 
				-size_t meshopt_generateVertexRemap(unsigned int* destination, const unsigned int* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size)
			
 
				+template <typename Hash>
			
 
				+static void generateShadowBuffer(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, const Hash& hash, meshopt_Allocator& allocator)
			
 
				 {
			
 
				-	using namespace meshopt;
			
 
				-
			
 
				-	assert(indices || index_count == vertex_count);
			
 
				-	assert(!indices || index_count % 3 == 0);
			
 
				-	assert(vertex_size > 0 && vertex_size <= 256);
			
 
				-
			
 
				-	meshopt_Allocator allocator;
			
 
				-
			
 
				-	memset(destination, -1, vertex_count * sizeof(unsigned int));
			
 
				-
			
 
				-	VertexHasher hasher = {static_cast<const unsigned char*>(vertices), vertex_size, vertex_size};
			
 
				+	unsigned int* remap = allocator.allocate<unsigned int>(vertex_count);
			
 
				+	memset(remap, -1, vertex_count * sizeof(unsigned int));
			
 
				 
			
 
				 	size_t table_size = hashBuckets(vertex_count);
			
 
				 	unsigned int* table = allocator.allocate<unsigned int>(table_size);
			
 
				 	memset(table, -1, table_size * sizeof(unsigned int));
			
 
				 
			
 
				-	unsigned int next_vertex = 0;
			
 
				-
			
 
				 	for (size_t i = 0; i < index_count; ++i)
			
 
				 	{
			
 
				-		unsigned int index = indices ? indices[i] : unsigned(i);
			
 
				+		unsigned int index = indices[i];
			
 
				 		assert(index < vertex_count);
			
 
				 
			
 
				-		if (destination[index] == ~0u)
			
 
				+		if (remap[index] == ~0u)
			
 
				 		{
			
 
				-			unsigned int* entry = hashLookup(table, table_size, hasher, index, ~0u);
			
 
				+			unsigned int* entry = hashLookup(table, table_size, hash, index, ~0u);
			
 
				 
			
 
				 			if (*entry == ~0u)
			
 
				-			{
			
 
				 				*entry = index;
			
 
				 
			
 
				-				destination[index] = next_vertex++;
			
 
				-			}
			
 
				-			else
			
 
				-			{
			
 
				-				assert(destination[*entry] != ~0u);
			
 
				-
			
 
				-				destination[index] = destination[*entry];
			
 
				-			}
			
 
				+			remap[index] = *entry;
			
 
				 		}
			
 
				+
			
 
				+		destination[i] = remap[index];
			
 
				 	}
			
 
				+}
			
 
				 
			
 
				-	assert(next_vertex <= vertex_count);
			
 
				+} // namespace meshopt
			
 
				 
			
 
				-	return next_vertex;
			
 
				+size_t meshopt_generateVertexRemap(unsigned int* destination, const unsigned int* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size)
			
 
				+{
			
 
				+	using namespace meshopt;
			
 
				+
			
 
				+	assert(indices || index_count == vertex_count);
			
 
				+	assert(!indices || index_count % 3 == 0);
			
 
				+	assert(vertex_size > 0 && vertex_size <= 256);
			
 
				+
			
 
				+	meshopt_Allocator allocator;
			
 
				+	VertexHasher hasher = {static_cast<const unsigned char*>(vertices), vertex_size, vertex_size};
			
 
				+
			
 
				+	return generateVertexRemap(destination, indices, index_count, vertex_count, hasher, allocator);
			
 
				 }
			
 
				 
			
 
				 size_t meshopt_generateVertexRemapMulti(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, const struct meshopt_Stream* streams, size_t stream_count)
			
@@ -263,44 +335,24 @@ size_t meshopt_generateVertexRemapMulti(unsigned int* destination, const unsigne
 
				 	}
			
 
				 
			
 
				 	meshopt_Allocator allocator;
			
 
				-
			
 
				-	memset(destination, -1, vertex_count * sizeof(unsigned int));
			
 
				-
			
 
				 	VertexStreamHasher hasher = {streams, stream_count};
			
 
				 
			
 
				-	size_t table_size = hashBuckets(vertex_count);
			
 
				-	unsigned int* table = allocator.allocate<unsigned int>(table_size);
			
 
				-	memset(table, -1, table_size * sizeof(unsigned int));
			
 
				-
			
 
				-	unsigned int next_vertex = 0;
			
 
				-
			
 
				-	for (size_t i = 0; i < index_count; ++i)
			
 
				-	{
			
 
				-		unsigned int index = indices ? indices[i] : unsigned(i);
			
 
				-		assert(index < vertex_count);
			
 
				-
			
 
				-		if (destination[index] == ~0u)
			
 
				-		{
			
 
				-			unsigned int* entry = hashLookup(table, table_size, hasher, index, ~0u);
			
 
				-
			
 
				-			if (*entry == ~0u)
			
 
				-			{
			
 
				-				*entry = index;
			
 
				+	return generateVertexRemap(destination, indices, index_count, vertex_count, hasher, allocator);
			
 
				+}
			
 
				 
			
 
				-				destination[index] = next_vertex++;
			
 
				-			}
			
 
				-			else
			
 
				-			{
			
 
				-				assert(destination[*entry] != ~0u);
			
 
				+size_t meshopt_generateVertexRemapCustom(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, int (*callback)(void*, unsigned int, unsigned int), void* context)
			
 
				+{
			
 
				+	using namespace meshopt;
			
 
				 
			
 
				-				destination[index] = destination[*entry];
			
 
				-			}
			
 
				-		}
			
 
				-	}
			
 
				+	assert(indices || index_count == vertex_count);
			
 
				+	assert(!indices || index_count % 3 == 0);
			
 
				+	assert(vertex_positions_stride >= 12 && vertex_positions_stride <= 256);
			
 
				+	assert(vertex_positions_stride % sizeof(float) == 0);
			
 
				 
			
 
				-	assert(next_vertex <= vertex_count);
			
 
				+	meshopt_Allocator allocator;
			
 
				+	VertexCustomHasher hasher = {vertex_positions, vertex_positions_stride / sizeof(float), callback, context};
			
 
				 
			
 
				-	return next_vertex;
			
 
				+	return generateVertexRemap(destination, indices, index_count, vertex_count, hasher, allocator);
			
 
				 }
			
 
				 
			
 
				 void meshopt_remapVertexBuffer(void* destination, const void* vertices, size_t vertex_count, size_t vertex_size, const unsigned int* remap)
			
@@ -362,33 +414,9 @@ void meshopt_generateShadowIndexBuffer(unsigned int* destination, const unsigned
 
				 	assert(vertex_size <= vertex_stride);
			
 
				 
			
 
				 	meshopt_Allocator allocator;
			
 
				-
			
 
				-	unsigned int* remap = allocator.allocate<unsigned int>(vertex_count);
			
 
				-	memset(remap, -1, vertex_count * sizeof(unsigned int));
			
 
				-
			
 
				 	VertexHasher hasher = {static_cast<const unsigned char*>(vertices), vertex_size, vertex_stride};
			
 
				 
			
 
				-	size_t table_size = hashBuckets(vertex_count);
			
 
				-	unsigned int* table = allocator.allocate<unsigned int>(table_size);
			
 
				-	memset(table, -1, table_size * sizeof(unsigned int));
			
 
				-
			
 
				-	for (size_t i = 0; i < index_count; ++i)
			
 
				-	{
			
 
				-		unsigned int index = indices[i];
			
 
				-		assert(index < vertex_count);
			
 
				-
			
 
				-		if (remap[index] == ~0u)
			
 
				-		{
			
 
				-			unsigned int* entry = hashLookup(table, table_size, hasher, index, ~0u);
			
 
				-
			
 
				-			if (*entry == ~0u)
			
 
				-				*entry = index;
			
 
				-
			
 
				-			remap[index] = *entry;
			
 
				-		}
			
 
				-
			
 
				-		destination[i] = remap[index];
			
 
				-	}
			
 
				+	generateShadowBuffer(destination, indices, index_count, vertex_count, hasher, allocator);
			
 
				 }
			
 
				 
			
 
				 void meshopt_generateShadowIndexBufferMulti(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, const struct meshopt_Stream* streams, size_t stream_count)
			
@@ -406,33 +434,9 @@ void meshopt_generateShadowIndexBufferMulti(unsigned int* destination, const uns
 
				 	}
			
 
				 
			
 
				 	meshopt_Allocator allocator;
			
 
				-
			
 
				-	unsigned int* remap = allocator.allocate<unsigned int>(vertex_count);
			
 
				-	memset(remap, -1, vertex_count * sizeof(unsigned int));
			
 
				-
			
 
				 	VertexStreamHasher hasher = {streams, stream_count};
			
 
				 
			
 
				-	size_t table_size = hashBuckets(vertex_count);
			
 
				-	unsigned int* table = allocator.allocate<unsigned int>(table_size);
			
 
				-	memset(table, -1, table_size * sizeof(unsigned int));
			
 
				-
			
 
				-	for (size_t i = 0; i < index_count; ++i)
			
 
				-	{
			
 
				-		unsigned int index = indices[i];
			
 
				-		assert(index < vertex_count);
			
 
				-
			
 
				-		if (remap[index] == ~0u)
			
 
				-		{
			
 
				-			unsigned int* entry = hashLookup(table, table_size, hasher, index, ~0u);
			
 
				-
			
 
				-			if (*entry == ~0u)
			
 
				-				*entry = index;
			
 
				-
			
 
				-			remap[index] = *entry;
			
 
				-		}
			
 
				-
			
 
				-		destination[i] = remap[index];
			
 
				-	}
			
 
				+	generateShadowBuffer(destination, indices, index_count, vertex_count, hasher, allocator);
			
 
				 }
			
 
				 
			
 
				 void meshopt_generateAdjacencyIndexBuffer(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
			
--- a/thirdparty/meshoptimizer/meshoptimizer.h
+++ b/thirdparty/meshoptimizer/meshoptimizer.h
@@ -1,5 +1,5 @@
 
				 /**
			
 
				- * meshoptimizer - version 0.23
			
 
				+ * meshoptimizer - version 0.24
			
 
				  *
			
 
				  * Copyright (C) 2016-2025, by Arseny Kapoulkine ([email protected])
			
 
				  * Report bugs and download new versions at https://github.com/zeux/meshoptimizer
			
@@ -12,7 +12,7 @@
 
				 #include <stddef.h>
			
 
				 
			
 
				 /* Version macro; major * 1000 + minor * 10 + patch */
			
 
				-#define MESHOPTIMIZER_VERSION 230 /* 0.23 */
			
 
				+#define MESHOPTIMIZER_VERSION 240 /* 0.24 */
			
 
				 
			
 
				 /* If no API is defined, assume default */
			
 
				 #ifndef MESHOPTIMIZER_API
			
@@ -74,6 +74,19 @@ MESHOPTIMIZER_API size_t meshopt_generateVertexRemap(unsigned int* destination,
 
				  */
			
 
				 MESHOPTIMIZER_API size_t meshopt_generateVertexRemapMulti(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, const struct meshopt_Stream* streams, size_t stream_count);
			
 
				 
			
 
				+/**
			
 
				+ * Experimental: Generates a vertex remap table from the vertex buffer and an optional index buffer and returns number of unique vertices
			
 
				+ * As a result, all vertices that are equivalent map to the same (new) location, with no gaps in the resulting sequence.
			
 
				+ * Equivalence is checked in two steps: vertex positions are compared for equality, and then the user-specified equality function is called (if provided).
			
 
				+ * Resulting remap table maps old vertices to new vertices and can be used in meshopt_remapVertexBuffer/meshopt_remapIndexBuffer.
			
 
				+ *
			
 
				+ * destination must contain enough space for the resulting remap table (vertex_count elements)
			
 
				+ * indices can be NULL if the input is unindexed
			
 
				+ * vertex_positions should have float3 position in the first 12 bytes of each vertex
			
 
				+ * callback can be NULL if no additional equality check is needed; otherwise, it should return 1 if vertices with specified indices are equivalent and 0 if they are not
			
 
				+ */
			
 
				+MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_generateVertexRemapCustom(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, int (*callback)(void*, unsigned int, unsigned int), void* context);
			
 
				+
			
 
				 /**
			
 
				  * Generates vertex buffer from the source vertex buffer and remap table generated by meshopt_generateVertexRemap
			
 
				  *
			
@@ -141,7 +154,7 @@ MESHOPTIMIZER_API void meshopt_generateAdjacencyIndexBuffer(unsigned int* destin
 
				 MESHOPTIMIZER_API void meshopt_generateTessellationIndexBuffer(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride);
			
 
				 
			
 
				 /**
			
 
				- * Experimental: Generate index buffer that can be used for visibility buffer rendering and returns the size of the reorder table
			
 
				+ * Generate index buffer that can be used for visibility buffer rendering and returns the size of the reorder table
			
 
				  * Each triangle's provoking vertex index is equal to primitive id; this allows passing it to the fragment shader using nointerpolate attribute.
			
 
				  * This is important for performance on hardware where primitive id can't be accessed efficiently in fragment shader.
			
 
				  * The reorder table stores the original vertex id for each vertex in the new index buffer, and should be used in the vertex shader to load vertex data.
			
@@ -151,7 +164,7 @@ MESHOPTIMIZER_API void meshopt_generateTessellationIndexBuffer(unsigned int* des
 
				  * destination must contain enough space for the resulting index buffer (index_count elements)
			
 
				  * reorder must contain enough space for the worst case reorder table (vertex_count + index_count/3 elements)
			
 
				  */
			
 
				-MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_generateProvokingIndexBuffer(unsigned int* destination, unsigned int* reorder, const unsigned int* indices, size_t index_count, size_t vertex_count);
			
 
				+MESHOPTIMIZER_API size_t meshopt_generateProvokingIndexBuffer(unsigned int* destination, unsigned int* reorder, const unsigned int* indices, size_t index_count, size_t vertex_count);
			
 
				 
			
 
				 /**
			
 
				  * Vertex transform cache optimizer
			
@@ -287,12 +300,13 @@ MESHOPTIMIZER_API size_t meshopt_encodeVertexBufferBound(size_t vertex_count, si
 
				 /**
			
 
				  * Experimental: Vertex buffer encoder
			
 
				  * Encodes vertex data just like meshopt_encodeVertexBuffer, but allows to override compression level.
			
 
				- * For compression level to take effect, the vertex encoding version must be set to 1 via meshopt_encodeVertexVersion.
			
 
				+ * For compression level to take effect, the vertex encoding version must be set to 1.
			
 
				  * The default compression level implied by meshopt_encodeVertexBuffer is 2.
			
 
				  *
			
 
				  * level should be in the range [0, 3] with 0 being the fastest and 3 being the slowest and producing the best compression ratio.
			
 
				+ * version should be -1 to use the default version (specified via meshopt_encodeVertexVersion), or 0/1 to override the version; per above, level won't take effect if version is 0.
			
 
				  */
			
 
				-MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_encodeVertexBufferLevel(unsigned char* buffer, size_t buffer_size, const void* vertices, size_t vertex_count, size_t vertex_size, int level);
			
 
				+MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_encodeVertexBufferLevel(unsigned char* buffer, size_t buffer_size, const void* vertices, size_t vertex_count, size_t vertex_size, int level, int version);
			
 
				 
			
 
				 /**
			
 
				  * Set vertex encoder format version
			
@@ -425,6 +439,19 @@ MESHOPTIMIZER_API size_t meshopt_simplifyWithAttributes(unsigned int* destinatio
 
				  */
			
 
				 MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_simplifySloppy(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, float* result_error);
			
 
				 
			
 
				+/**
			
 
				+ * Experimental: Mesh simplifier (pruner)
			
 
				+ * Reduces the number of triangles in the mesh by removing small isolated parts of the mesh
			
 
				+ * Returns the number of indices after simplification, with destination containing new index data
			
 
				+ * The resulting index buffer references vertices from the original vertex buffer.
			
 
				+ * If the original vertex data isn't required, creating a compact vertex buffer using meshopt_optimizeVertexFetch is recommended.
			
 
				+ *
			
 
				+ * destination must contain enough space for the target index buffer, worst case is index_count elements
			
 
				+ * vertex_positions should have float3 position in the first 12 bytes of each vertex
			
 
				+ * target_error represents the error relative to mesh extents that can be tolerated, e.g. 0.01 = 1% deformation; value range [0..1]
			
 
				+ */
			
 
				+MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_simplifyPrune(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, float target_error);
			
 
				+
			
 
				 /**
			
 
				  * Point cloud simplifier
			
 
				  * Reduces the number of points in the cloud to reach the given target
			
@@ -485,6 +512,19 @@ struct meshopt_VertexCacheStatistics
 
				  */
			
 
				 MESHOPTIMIZER_API struct meshopt_VertexCacheStatistics meshopt_analyzeVertexCache(const unsigned int* indices, size_t index_count, size_t vertex_count, unsigned int cache_size, unsigned int warp_size, unsigned int primgroup_size);
			
 
				 
			
 
				+struct meshopt_VertexFetchStatistics
			
 
				+{
			
 
				+	unsigned int bytes_fetched;
			
 
				+	float overfetch; /* fetched bytes / vertex buffer size; best case 1.0 (each byte is fetched once) */
			
 
				+};
			
 
				+
			
 
				+/**
			
 
				+ * Vertex fetch cache analyzer
			
 
				+ * Returns cache hit statistics using a simplified direct mapped model
			
 
				+ * Results may not match actual GPU performance
			
 
				+ */
			
 
				+MESHOPTIMIZER_API struct meshopt_VertexFetchStatistics meshopt_analyzeVertexFetch(const unsigned int* indices, size_t index_count, size_t vertex_count, size_t vertex_size);
			
 
				+
			
 
				 struct meshopt_OverdrawStatistics
			
 
				 {
			
 
				 	unsigned int pixels_covered;
			
@@ -501,18 +541,19 @@ struct meshopt_OverdrawStatistics
 
				  */
			
 
				 MESHOPTIMIZER_API struct meshopt_OverdrawStatistics meshopt_analyzeOverdraw(const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride);
			
 
				 
			
 
				-struct meshopt_VertexFetchStatistics
			
 
				+struct meshopt_CoverageStatistics
			
 
				 {
			
 
				-	unsigned int bytes_fetched;
			
 
				-	float overfetch; /* fetched bytes / vertex buffer size; best case 1.0 (each byte is fetched once) */
			
 
				+	float coverage[3];
			
 
				+	float extent; /* viewport size in mesh coordinates */
			
 
				 };
			
 
				 
			
 
				 /**
			
 
				- * Vertex fetch cache analyzer
			
 
				- * Returns cache hit statistics using a simplified direct mapped model
			
 
				- * Results may not match actual GPU performance
			
 
				+ * Experimental: Coverage analyzer
			
 
				+ * Returns coverage statistics (ratio of viewport pixels covered from each axis) using a software rasterizer
			
 
				+ *
			
 
				+ * vertex_positions should have float3 position in the first 12 bytes of each vertex
			
 
				  */
			
 
				-MESHOPTIMIZER_API struct meshopt_VertexFetchStatistics meshopt_analyzeVertexFetch(const unsigned int* indices, size_t index_count, size_t vertex_count, size_t vertex_size);
			
 
				+MESHOPTIMIZER_EXPERIMENTAL struct meshopt_CoverageStatistics meshopt_analyzeCoverage(const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride);
			
 
				 
			
 
				 /**
			
 
				  * Meshlet is a small mesh cluster (subset) that consists of:
			
@@ -567,6 +608,19 @@ MESHOPTIMIZER_API size_t meshopt_buildMeshletsBound(size_t index_count, size_t m
 
				  */
			
 
				 MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_buildMeshletsFlex(struct meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t max_vertices, size_t min_triangles, size_t max_triangles, float cone_weight, float split_factor);
			
 
				 
			
 
				+/**
			
 
				+ * Experimental: Meshlet builder that produces clusters optimized for raytracing
			
 
				+ * Splits the mesh into a set of meshlets, similarly to meshopt_buildMeshlets, but optimizes cluster subdivision for raytracing and allows to specify minimum and maximum number of triangles per meshlet.
			
 
				+ *
			
 
				+ * meshlets must contain enough space for all meshlets, worst case size can be computed with meshopt_buildMeshletsBound using min_triangles (not max!)
			
 
				+ * meshlet_vertices must contain enough space for all meshlets, worst case size is equal to max_meshlets * max_vertices
			
 
				+ * meshlet_triangles must contain enough space for all meshlets, worst case size is equal to max_meshlets * max_triangles * 3
			
 
				+ * vertex_positions should have float3 position in the first 12 bytes of each vertex
			
 
				+ * max_vertices, min_triangles and max_triangles must not exceed implementation limits (max_vertices <= 256, max_triangles <= 512; min_triangles <= max_triangles; both min_triangles and max_triangles must be divisible by 4)
			
 
				+ * fill_weight allows to prioritize clusters that are closer to maximum size at some cost to SAH quality; 0.5 is a safe default
			
 
				+ */
			
 
				+MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_buildMeshletsSpatial(struct meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t max_vertices, size_t min_triangles, size_t max_triangles, float fill_weight);
			
 
				+
			
 
				 /**
			
 
				  * Meshlet optimizer
			
 
				  * Reorders meshlet vertices and triangles to maximize locality to improve rasterizer throughput
			
@@ -630,15 +684,16 @@ MESHOPTIMIZER_EXPERIMENTAL struct meshopt_Bounds meshopt_computeSphereBounds(con
 
				 
			
 
				 /**
			
 
				  * Experimental: Cluster partitioner
			
 
				- * Partitions clusters into groups of similar size, prioritizing grouping clusters that share vertices.
			
 
				+ * Partitions clusters into groups of similar size, prioritizing grouping clusters that share vertices or are close to each other.
			
 
				  *
			
 
				  * destination must contain enough space for the resulting partiotion data (cluster_count elements)
			
 
				  * destination[i] will contain the partition id for cluster i, with the total number of partitions returned by the function
			
 
				  * cluster_indices should have the vertex indices referenced by each cluster, stored sequentially
			
 
				  * cluster_index_counts should have the number of indices in each cluster; sum of all cluster_index_counts must be equal to total_index_count
			
 
				+ * vertex_positions should have float3 position in the first 12 bytes of each vertex (or can be NULL if not used)
			
 
				  * target_partition_size is a target size for each partition, in clusters; the resulting partitions may be smaller or larger
			
 
				  */
			
 
				-MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_partitionClusters(unsigned int* destination, const unsigned int* cluster_indices, size_t total_index_count, const unsigned int* cluster_index_counts, size_t cluster_count, size_t vertex_count, size_t target_partition_size);
			
 
				+MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_partitionClusters(unsigned int* destination, const unsigned int* cluster_indices, size_t total_index_count, const unsigned int* cluster_index_counts, size_t cluster_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_partition_size);
			
 
				 
			
 
				 /**
			
 
				  * Spatial sorter
			
@@ -651,13 +706,23 @@ MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_partitionClusters(unsigned int* destin
 
				 MESHOPTIMIZER_API void meshopt_spatialSortRemap(unsigned int* destination, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride);
			
 
				 
			
 
				 /**
			
 
				- * Experimental: Spatial sorter
			
 
				+ * Spatial sorter
			
 
				  * Reorders triangles for spatial locality, and generates a new index buffer. The resulting index buffer can be used with other functions like optimizeVertexCache.
			
 
				  *
			
 
				  * destination must contain enough space for the resulting index buffer (index_count elements)
			
 
				  * vertex_positions should have float3 position in the first 12 bytes of each vertex
			
 
				  */
			
 
				-MESHOPTIMIZER_EXPERIMENTAL void meshopt_spatialSortTriangles(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride);
			
 
				+MESHOPTIMIZER_API void meshopt_spatialSortTriangles(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride);
			
 
				+
			
 
				+/**
			
 
				+ * Experimental: Spatial clusterizer
			
 
				+ * Reorders points into clusters optimized for spatial locality, and generates a new index buffer.
			
 
				+ * Ensures the output can be split into cluster_size chunks where each chunk has good positional locality. Only the last chunk will be smaller than cluster_size.
			
 
				+ *
			
 
				+ * destination must contain enough space for the resulting index buffer (vertex_count elements)
			
 
				+ * vertex_positions should have float3 position in the first 12 bytes of each vertex
			
 
				+ */
			
 
				+MESHOPTIMIZER_EXPERIMENTAL void meshopt_spatialClusterPoints(unsigned int* destination, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t cluster_size);
			
 
				 
			
 
				 /**
			
 
				  * Quantize a float into half-precision (as defined by IEEE-754 fp16) floating point value
			
@@ -722,6 +787,10 @@ template <typename T>
 
				 inline size_t meshopt_generateVertexRemap(unsigned int* destination, const T* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size);
			
 
				 template <typename T>
			
 
				 inline size_t meshopt_generateVertexRemapMulti(unsigned int* destination, const T* indices, size_t index_count, size_t vertex_count, const meshopt_Stream* streams, size_t stream_count);
			
 
				+template <typename F>
			
 
				+inline size_t meshopt_generateVertexRemapCustom(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, F callback);
			
 
				+template <typename T, typename F>
			
 
				+inline size_t meshopt_generateVertexRemapCustom(unsigned int* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, F callback);
			
 
				 template <typename T>
			
 
				 inline void meshopt_remapIndexBuffer(T* destination, const T* indices, size_t index_count, const unsigned int* remap);
			
 
				 template <typename T>
			
@@ -754,6 +823,7 @@ template <typename T>
 
				 inline size_t meshopt_encodeIndexSequence(unsigned char* buffer, size_t buffer_size, const T* indices, size_t index_count);
			
 
				 template <typename T>
			
 
				 inline int meshopt_decodeIndexSequence(T* destination, size_t index_count, const unsigned char* buffer, size_t buffer_size);
			
 
				+inline size_t meshopt_encodeVertexBufferLevel(unsigned char* buffer, size_t buffer_size, const void* vertices, size_t vertex_count, size_t vertex_size, int level);
			
 
				 template <typename T>
			
 
				 inline size_t meshopt_simplify(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, unsigned int options = 0, float* result_error = NULL);
			
 
				 template <typename T>
			
@@ -761,15 +831,19 @@ inline size_t meshopt_simplifyWithAttributes(T* destination, const T* indices, s
 
				 template <typename T>
			
 
				 inline size_t meshopt_simplifySloppy(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, float* result_error = NULL);
			
 
				 template <typename T>
			
 
				+inline size_t meshopt_simplifyPrune(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, float target_error);
			
 
				+template <typename T>
			
 
				 inline size_t meshopt_stripify(T* destination, const T* indices, size_t index_count, size_t vertex_count, T restart_index);
			
 
				 template <typename T>
			
 
				 inline size_t meshopt_unstripify(T* destination, const T* indices, size_t index_count, T restart_index);
			
 
				 template <typename T>
			
 
				 inline meshopt_VertexCacheStatistics meshopt_analyzeVertexCache(const T* indices, size_t index_count, size_t vertex_count, unsigned int cache_size, unsigned int warp_size, unsigned int buffer_size);
			
 
				 template <typename T>
			
 
				+inline meshopt_VertexFetchStatistics meshopt_analyzeVertexFetch(const T* indices, size_t index_count, size_t vertex_count, size_t vertex_size);
			
 
				+template <typename T>
			
 
				 inline meshopt_OverdrawStatistics meshopt_analyzeOverdraw(const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride);
			
 
				 template <typename T>
			
 
				-inline meshopt_VertexFetchStatistics meshopt_analyzeVertexFetch(const T* indices, size_t index_count, size_t vertex_count, size_t vertex_size);
			
 
				+inline meshopt_CoverageStatistics meshopt_analyzeCoverage(const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride);
			
 
				 template <typename T>
			
 
				 inline size_t meshopt_buildMeshlets(meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t max_vertices, size_t max_triangles, float cone_weight);
			
 
				 template <typename T>
			
@@ -777,9 +851,11 @@ inline size_t meshopt_buildMeshletsScan(meshopt_Meshlet* meshlets, unsigned int*
 
				 template <typename T>
			
 
				 inline size_t meshopt_buildMeshletsFlex(meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t max_vertices, size_t min_triangles, size_t max_triangles, float cone_weight, float split_factor);
			
 
				 template <typename T>
			
 
				+inline size_t meshopt_buildMeshletsSpatial(meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t max_vertices, size_t min_triangles, size_t max_triangles, float fill_weight);
			
 
				+template <typename T>
			
 
				 inline meshopt_Bounds meshopt_computeClusterBounds(const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride);
			
 
				 template <typename T>
			
 
				-inline size_t meshopt_partitionClusters(unsigned int* destination, const T* cluster_indices, size_t total_index_count, const unsigned int* cluster_index_counts, size_t cluster_count, size_t vertex_count, size_t target_partition_size);
			
 
				+inline size_t meshopt_partitionClusters(unsigned int* destination, const T* cluster_indices, size_t total_index_count, const unsigned int* cluster_index_counts, size_t cluster_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_partition_size);
			
 
				 template <typename T>
			
 
				 inline void meshopt_spatialSortTriangles(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride);
			
 
				 #endif
			
@@ -930,6 +1006,30 @@ inline size_t meshopt_generateVertexRemapMulti(unsigned int* destination, const
 
				 	return meshopt_generateVertexRemapMulti(destination, indices ? in.data : NULL, index_count, vertex_count, streams, stream_count);
			
 
				 }
			
 
				 
			
 
				+template <typename F>
			
 
				+inline size_t meshopt_generateVertexRemapCustom(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, F callback)
			
 
				+{
			
 
				+	struct Call
			
 
				+	{
			
 
				+		static int compare(void* context, unsigned int lhs, unsigned int rhs) { return (*static_cast<F*>(context))(lhs, rhs) ? 1 : 0; }
			
 
				+	};
			
 
				+
			
 
				+	return meshopt_generateVertexRemapCustom(destination, indices, index_count, vertex_positions, vertex_count, vertex_positions_stride, &Call::compare, &callback);
			
 
				+}
			
 
				+
			
 
				+template <typename T, typename F>
			
 
				+inline size_t meshopt_generateVertexRemapCustom(unsigned int* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, F callback)
			
 
				+{
			
 
				+	struct Call
			
 
				+	{
			
 
				+		static int compare(void* context, unsigned int lhs, unsigned int rhs) { return (*static_cast<F*>(context))(lhs, rhs) ? 1 : 0; }
			
 
				+	};
			
 
				+
			
 
				+	meshopt_IndexAdapter<T> in(NULL, indices, indices ? index_count : 0);
			
 
				+
			
 
				+	return meshopt_generateVertexRemapCustom(destination, indices ? in.data : NULL, index_count, vertex_positions, vertex_count, vertex_positions_stride, &Call::compare, &callback);
			
 
				+}
			
 
				+
			
 
				 template <typename T>
			
 
				 inline void meshopt_remapIndexBuffer(T* destination, const T* indices, size_t index_count, const unsigned int* remap)
			
 
				 {
			
@@ -1074,6 +1174,11 @@ inline int meshopt_decodeIndexSequence(T* destination, size_t index_count, const
 
				 	return meshopt_decodeIndexSequence(destination, index_count, sizeof(T), buffer, buffer_size);
			
 
				 }
			
 
				 
			
 
				+inline size_t meshopt_encodeVertexBufferLevel(unsigned char* buffer, size_t buffer_size, const void* vertices, size_t vertex_count, size_t vertex_size, int level)
			
 
				+{
			
 
				+	return meshopt_encodeVertexBufferLevel(buffer, buffer_size, vertices, vertex_count, vertex_size, level, -1);
			
 
				+}
			
 
				+
			
 
				 template <typename T>
			
 
				 inline size_t meshopt_simplify(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, unsigned int options, float* result_error)
			
 
				 {
			
@@ -1101,6 +1206,15 @@ inline size_t meshopt_simplifySloppy(T* destination, const T* indices, size_t in
 
				 	return meshopt_simplifySloppy(out.data, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride, target_index_count, target_error, result_error);
			
 
				 }
			
 
				 
			
 
				+template <typename T>
			
 
				+inline size_t meshopt_simplifyPrune(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, float target_error)
			
 
				+{
			
 
				+	meshopt_IndexAdapter<T> in(NULL, indices, index_count);
			
 
				+	meshopt_IndexAdapter<T> out(destination, NULL, index_count);
			
 
				+
			
 
				+	return meshopt_simplifyPrune(out.data, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride, target_error);
			
 
				+}
			
 
				+
			
 
				 template <typename T>
			
 
				 inline size_t meshopt_stripify(T* destination, const T* indices, size_t index_count, size_t vertex_count, T restart_index)
			
 
				 {
			
@@ -1127,6 +1241,14 @@ inline meshopt_VertexCacheStatistics meshopt_analyzeVertexCache(const T* indices
 
				 	return meshopt_analyzeVertexCache(in.data, index_count, vertex_count, cache_size, warp_size, buffer_size);
			
 
				 }
			
 
				 
			
 
				+template <typename T>
			
 
				+inline meshopt_VertexFetchStatistics meshopt_analyzeVertexFetch(const T* indices, size_t index_count, size_t vertex_count, size_t vertex_size)
			
 
				+{
			
 
				+	meshopt_IndexAdapter<T> in(NULL, indices, index_count);
			
 
				+
			
 
				+	return meshopt_analyzeVertexFetch(in.data, index_count, vertex_count, vertex_size);
			
 
				+}
			
 
				+
			
 
				 template <typename T>
			
 
				 inline meshopt_OverdrawStatistics meshopt_analyzeOverdraw(const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
			
 
				 {
			
@@ -1136,11 +1258,11 @@ inline meshopt_OverdrawStatistics meshopt_analyzeOverdraw(const T* indices, size
 
				 }
			
 
				 
			
 
				 template <typename T>
			
 
				-inline meshopt_VertexFetchStatistics meshopt_analyzeVertexFetch(const T* indices, size_t index_count, size_t vertex_count, size_t vertex_size)
			
 
				+inline meshopt_CoverageStatistics meshopt_analyzeCoverage(const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
			
 
				 {
			
 
				 	meshopt_IndexAdapter<T> in(NULL, indices, index_count);
			
 
				 
			
 
				-	return meshopt_analyzeVertexFetch(in.data, index_count, vertex_count, vertex_size);
			
 
				+	return meshopt_analyzeCoverage(in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride);
			
 
				 }
			
 
				 
			
 
				 template <typename T>
			
@@ -1167,6 +1289,14 @@ inline size_t meshopt_buildMeshletsFlex(meshopt_Meshlet* meshlets, unsigned int*
 
				 	return meshopt_buildMeshletsFlex(meshlets, meshlet_vertices, meshlet_triangles, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride, max_vertices, min_triangles, max_triangles, cone_weight, split_factor);
			
 
				 }
			
 
				 
			
 
				+template <typename T>
			
 
				+inline size_t meshopt_buildMeshletsSpatial(meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t max_vertices, size_t min_triangles, size_t max_triangles, float fill_weight)
			
 
				+{
			
 
				+	meshopt_IndexAdapter<T> in(NULL, indices, index_count);
			
 
				+
			
 
				+	return meshopt_buildMeshletsSpatial(meshlets, meshlet_vertices, meshlet_triangles, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride, max_vertices, min_triangles, max_triangles, fill_weight);
			
 
				+}
			
 
				+
			
 
				 template <typename T>
			
 
				 inline meshopt_Bounds meshopt_computeClusterBounds(const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
			
 
				 {
			
@@ -1176,11 +1306,11 @@ inline meshopt_Bounds meshopt_computeClusterBounds(const T* indices, size_t inde
 
				 }
			
 
				 
			
 
				 template <typename T>
			
 
				-inline size_t meshopt_partitionClusters(unsigned int* destination, const T* cluster_indices, size_t total_index_count, const unsigned int* cluster_index_counts, size_t cluster_count, size_t vertex_count, size_t target_partition_size)
			
 
				+inline size_t meshopt_partitionClusters(unsigned int* destination, const T* cluster_indices, size_t total_index_count, const unsigned int* cluster_index_counts, size_t cluster_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_partition_size)
			
 
				 {
			
 
				 	meshopt_IndexAdapter<T> in(NULL, cluster_indices, total_index_count);
			
 
				 
			
 
				-	return meshopt_partitionClusters(destination, in.data, total_index_count, cluster_index_counts, cluster_count, vertex_count, target_partition_size);
			
 
				+	return meshopt_partitionClusters(destination, in.data, total_index_count, cluster_index_counts, cluster_count, vertex_positions, vertex_count, vertex_positions_stride, target_partition_size);
			
 
				 }
			
 
				 
			
 
				 template <typename T>
			
--- a/thirdparty/meshoptimizer/partition.cpp
+++ b/thirdparty/meshoptimizer/partition.cpp
@@ -5,6 +5,8 @@
 
				 #include <math.h>
			
 
				 #include <string.h>
			
 
				 
			
 
				+// This work is based on:
			
 
				+// Takio Kurita. An efficient agglomerative clustering algorithm using a heap. 1991
			
 
				 namespace meshopt
			
 
				 {
			
 
				 
			
@@ -15,26 +17,97 @@ struct ClusterAdjacency
 
				 	unsigned int* shared;
			
 
				 };
			
 
				 
			
 
				-static void buildClusterAdjacency(ClusterAdjacency& adjacency, const unsigned int* cluster_indices, const unsigned int* cluster_offsets, size_t cluster_count, unsigned char* used, size_t vertex_count, meshopt_Allocator& allocator)
			
 
				+static void filterClusterIndices(unsigned int* data, unsigned int* offsets, const unsigned int* cluster_indices, const unsigned int* cluster_index_counts, size_t cluster_count, unsigned char* used, size_t vertex_count, size_t total_index_count)
			
 
				 {
			
 
				-	unsigned int* ref_offsets = allocator.allocate<unsigned int>(vertex_count + 1);
			
 
				+	(void)vertex_count;
			
 
				+	(void)total_index_count;
			
 
				 
			
 
				-	// compute number of clusters referenced by each vertex
			
 
				-	memset(ref_offsets, 0, vertex_count * sizeof(unsigned int));
			
 
				+	size_t cluster_start = 0;
			
 
				+	size_t cluster_write = 0;
			
 
				 
			
 
				 	for (size_t i = 0; i < cluster_count; ++i)
			
 
				 	{
			
 
				-		for (size_t j = cluster_offsets[i]; j < cluster_offsets[i + 1]; ++j)
			
 
				+		offsets[i] = unsigned(cluster_write);
			
 
				+
			
 
				+		// copy cluster indices, skipping duplicates
			
 
				+		for (size_t j = 0; j < cluster_index_counts[i]; ++j)
			
 
				 		{
			
 
				-			unsigned int v = cluster_indices[j];
			
 
				+			unsigned int v = cluster_indices[cluster_start + j];
			
 
				 			assert(v < vertex_count);
			
 
				 
			
 
				-			ref_offsets[v] += 1 - used[v];
			
 
				+			data[cluster_write] = v;
			
 
				+			cluster_write += 1 - used[v];
			
 
				 			used[v] = 1;
			
 
				 		}
			
 
				 
			
 
				+		// reset used flags for the next cluster
			
 
				+		for (size_t j = offsets[i]; j < cluster_write; ++j)
			
 
				+			used[data[j]] = 0;
			
 
				+
			
 
				+		cluster_start += cluster_index_counts[i];
			
 
				+	}
			
 
				+
			
 
				+	assert(cluster_start == total_index_count);
			
 
				+	assert(cluster_write <= total_index_count);
			
 
				+	offsets[cluster_count] = unsigned(cluster_write);
			
 
				+}
			
 
				+
			
 
				+static void computeClusterBounds(float* cluster_bounds, const unsigned int* cluster_indices, const unsigned int* cluster_offsets, size_t cluster_count, const float* vertex_positions, size_t vertex_positions_stride)
			
 
				+{
			
 
				+	size_t vertex_stride_float = vertex_positions_stride / sizeof(float);
			
 
				+
			
 
				+	for (size_t i = 0; i < cluster_count; ++i)
			
 
				+	{
			
 
				+		float center[3] = {0, 0, 0};
			
 
				+
			
 
				+		// approximate center of the cluster by averaging all vertex positions
			
 
				+		for (size_t j = cluster_offsets[i]; j < cluster_offsets[i + 1]; ++j)
			
 
				+		{
			
 
				+			const float* p = vertex_positions + cluster_indices[j] * vertex_stride_float;
			
 
				+
			
 
				+			center[0] += p[0];
			
 
				+			center[1] += p[1];
			
 
				+			center[2] += p[2];
			
 
				+		}
			
 
				+
			
 
				+		// note: technically clusters can't be empty per meshopt_partitionCluster but we check for a division by zero in case that changes
			
 
				+		if (size_t cluster_size = cluster_offsets[i + 1] - cluster_offsets[i])
			
 
				+		{
			
 
				+			center[0] /= float(cluster_size);
			
 
				+			center[1] /= float(cluster_size);
			
 
				+			center[2] /= float(cluster_size);
			
 
				+		}
			
 
				+
			
 
				+		// compute radius of the bounding sphere for each cluster
			
 
				+		float radiussq = 0;
			
 
				+
			
 
				 		for (size_t j = cluster_offsets[i]; j < cluster_offsets[i + 1]; ++j)
			
 
				-			used[cluster_indices[j]] = 0;
			
 
				+		{
			
 
				+			const float* p = vertex_positions + cluster_indices[j] * vertex_stride_float;
			
 
				+
			
 
				+			float d2 = (p[0] - center[0]) * (p[0] - center[0]) + (p[1] - center[1]) * (p[1] - center[1]) + (p[2] - center[2]) * (p[2] - center[2]);
			
 
				+
			
 
				+			radiussq = radiussq < d2 ? d2 : radiussq;
			
 
				+		}
			
 
				+
			
 
				+		cluster_bounds[i * 4 + 0] = center[0];
			
 
				+		cluster_bounds[i * 4 + 1] = center[1];
			
 
				+		cluster_bounds[i * 4 + 2] = center[2];
			
 
				+		cluster_bounds[i * 4 + 3] = sqrtf(radiussq);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static void buildClusterAdjacency(ClusterAdjacency& adjacency, const unsigned int* cluster_indices, const unsigned int* cluster_offsets, size_t cluster_count, size_t vertex_count, meshopt_Allocator& allocator)
			
 
				+{
			
 
				+	unsigned int* ref_offsets = allocator.allocate<unsigned int>(vertex_count + 1);
			
 
				+
			
 
				+	// compute number of clusters referenced by each vertex
			
 
				+	memset(ref_offsets, 0, vertex_count * sizeof(unsigned int));
			
 
				+
			
 
				+	for (size_t i = 0; i < cluster_count; ++i)
			
 
				+	{
			
 
				+		for (size_t j = cluster_offsets[i]; j < cluster_offsets[i + 1]; ++j)
			
 
				+			ref_offsets[cluster_indices[j]]++;
			
 
				 	}
			
 
				 
			
 
				 	// compute (worst-case) number of adjacent clusters for each cluster
			
@@ -43,21 +116,13 @@ static void buildClusterAdjacency(ClusterAdjacency& adjacency, const unsigned in
 
				 	for (size_t i = 0; i < cluster_count; ++i)
			
 
				 	{
			
 
				 		size_t count = 0;
			
 
				-		for (size_t j = cluster_offsets[i]; j < cluster_offsets[i + 1]; ++j)
			
 
				-		{
			
 
				-			unsigned int v = cluster_indices[j];
			
 
				-			assert(v < vertex_count);
			
 
				 
			
 
				-			// worst case is every vertex has a disjoint cluster list
			
 
				-			count += used[v] ? 0 : ref_offsets[v] - 1;
			
 
				-			used[v] = 1;
			
 
				-		}
			
 
				+		// worst case is every vertex has a disjoint cluster list
			
 
				+		for (size_t j = cluster_offsets[i]; j < cluster_offsets[i + 1]; ++j)
			
 
				+			count += ref_offsets[cluster_indices[j]] - 1;
			
 
				 
			
 
				 		// ... but only every other cluster can be adjacent in the end
			
 
				 		total_adjacency += count < cluster_count - 1 ? count : cluster_count - 1;
			
 
				-
			
 
				-		for (size_t j = cluster_offsets[i]; j < cluster_offsets[i + 1]; ++j)
			
 
				-			used[cluster_indices[j]] = 0;
			
 
				 	}
			
 
				 
			
 
				 	// we can now allocate adjacency buffers
			
@@ -81,19 +146,7 @@ static void buildClusterAdjacency(ClusterAdjacency& adjacency, const unsigned in
 
				 	for (size_t i = 0; i < cluster_count; ++i)
			
 
				 	{
			
 
				 		for (size_t j = cluster_offsets[i]; j < cluster_offsets[i + 1]; ++j)
			
 
				-		{
			
 
				-			unsigned int v = cluster_indices[j];
			
 
				-			assert(v < vertex_count);
			
 
				-
			
 
				-			if (used[v])
			
 
				-				continue;
			
 
				-
			
 
				-			ref_data[ref_offsets[v]++] = unsigned(i);
			
 
				-			used[v] = 1;
			
 
				-		}
			
 
				-
			
 
				-		for (size_t j = cluster_offsets[i]; j < cluster_offsets[i + 1]; ++j)
			
 
				-			used[cluster_indices[j]] = 0;
			
 
				+			ref_data[ref_offsets[cluster_indices[j]]++] = unsigned(i);
			
 
				 	}
			
 
				 
			
 
				 	// after the previous pass, ref_offsets contain the end of the data for each vertex; shift it forward to get the start
			
@@ -112,10 +165,6 @@ static void buildClusterAdjacency(ClusterAdjacency& adjacency, const unsigned in
 
				 		for (size_t j = cluster_offsets[i]; j < cluster_offsets[i + 1]; ++j)
			
 
				 		{
			
 
				 			unsigned int v = cluster_indices[j];
			
 
				-			assert(v < vertex_count);
			
 
				-
			
 
				-			if (used[v])
			
 
				-				continue;
			
 
				 
			
 
				 			// merge the entire cluster list of each vertex into current list
			
 
				 			for (size_t k = ref_offsets[v]; k < ref_offsets[v + 1]; ++k)
			
@@ -144,13 +193,8 @@ static void buildClusterAdjacency(ClusterAdjacency& adjacency, const unsigned in
 
				 					count++;
			
 
				 				}
			
 
				 			}
			
 
				-
			
 
				-			used[v] = 1;
			
 
				 		}
			
 
				 
			
 
				-		for (size_t j = cluster_offsets[i]; j < cluster_offsets[i + 1]; ++j)
			
 
				-			used[cluster_indices[j]] = 0;
			
 
				-
			
 
				 		// mark the end of the adjacency list; the next cluster will start there as well
			
 
				 		adjacency.offsets[i + 1] = adjacency.offsets[i] + unsigned(count);
			
 
				 	}
			
@@ -223,29 +267,6 @@ static GroupOrder heapPop(GroupOrder* heap, size_t size)
 
				 	return top;
			
 
				 }
			
 
				 
			
 
				-static unsigned int countTotal(const ClusterGroup* groups, int id, const unsigned int* cluster_indices, const unsigned int* cluster_offsets, unsigned char* used)
			
 
				-{
			
 
				-	unsigned int total = 0;
			
 
				-
			
 
				-	for (int i = id; i >= 0; i = groups[i].next)
			
 
				-	{
			
 
				-		for (size_t j = cluster_offsets[i]; j < cluster_offsets[i + 1]; ++j)
			
 
				-		{
			
 
				-			unsigned int v = cluster_indices[j];
			
 
				-			total += 1 - used[v];
			
 
				-			used[v] = 1;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	for (int i = id; i >= 0; i = groups[i].next)
			
 
				-	{
			
 
				-		for (size_t j = cluster_offsets[i]; j < cluster_offsets[i + 1]; ++j)
			
 
				-			used[cluster_indices[j]] = 0;
			
 
				-	}
			
 
				-
			
 
				-	return total;
			
 
				-}
			
 
				-
			
 
				 static unsigned int countShared(const ClusterGroup* groups, int group1, int group2, const ClusterAdjacency& adjacency)
			
 
				 {
			
 
				 	unsigned int total = 0;
			
@@ -264,7 +285,41 @@ static unsigned int countShared(const ClusterGroup* groups, int group1, int grou
 
				 	return total;
			
 
				 }
			
 
				 
			
 
				-static int pickGroupToMerge(const ClusterGroup* groups, int id, const ClusterAdjacency& adjacency, size_t max_partition_size)
			
 
				+static void mergeBounds(float* target, const float* source)
			
 
				+{
			
 
				+	float r1 = target[3], r2 = source[3];
			
 
				+	float dx = source[0] - target[0], dy = source[1] - target[1], dz = source[2] - target[2];
			
 
				+	float d = sqrtf(dx * dx + dy * dy + dz * dz);
			
 
				+
			
 
				+	if (d + r1 < r2)
			
 
				+	{
			
 
				+		memcpy(target, source, 4 * sizeof(float));
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	if (d + r2 > r1)
			
 
				+	{
			
 
				+		float k = d > 0 ? (d + r2 - r1) / (2 * d) : 0.f;
			
 
				+
			
 
				+		target[0] += dx * k;
			
 
				+		target[1] += dy * k;
			
 
				+		target[2] += dz * k;
			
 
				+		target[3] = (d + r2 + r1) / 2;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static float boundsScore(const float* target, const float* source)
			
 
				+{
			
 
				+	float r1 = target[3], r2 = source[3];
			
 
				+	float dx = source[0] - target[0], dy = source[1] - target[1], dz = source[2] - target[2];
			
 
				+	float d = sqrtf(dx * dx + dy * dy + dz * dz);
			
 
				+
			
 
				+	float mr = d + r1 < r2 ? r2 : (d + r2 < r1 ? r1 : (d + r2 + r1) / 2);
			
 
				+
			
 
				+	return mr > 0 ? r1 / mr : 0.f;
			
 
				+}
			
 
				+
			
 
				+static int pickGroupToMerge(const ClusterGroup* groups, int id, const ClusterAdjacency& adjacency, size_t max_partition_size, const float* cluster_bounds)
			
 
				 {
			
 
				 	assert(groups[id].size > 0);
			
 
				 
			
@@ -291,6 +346,10 @@ static int pickGroupToMerge(const ClusterGroup* groups, int id, const ClusterAdj
 
				 			// normalize shared count by the expected boundary of each group (+ keeps scoring symmetric)
			
 
				 			float score = float(int(shared)) * (group_rsqrt + other_rsqrt);
			
 
				 
			
 
				+			// incorporate spatial score to favor merging nearby groups
			
 
				+			if (cluster_bounds)
			
 
				+				score *= 1.f + 0.4f * boundsScore(&cluster_bounds[id * 4], &cluster_bounds[other * 4]);
			
 
				+
			
 
				 			if (score > best_score)
			
 
				 			{
			
 
				 				best_group = other;
			
@@ -304,10 +363,12 @@ static int pickGroupToMerge(const ClusterGroup* groups, int id, const ClusterAdj
 
				 
			
 
				 } // namespace meshopt
			
 
				 
			
 
				-size_t meshopt_partitionClusters(unsigned int* destination, const unsigned int* cluster_indices, size_t total_index_count, const unsigned int* cluster_index_counts, size_t cluster_count, size_t vertex_count, size_t target_partition_size)
			
 
				+size_t meshopt_partitionClusters(unsigned int* destination, const unsigned int* cluster_indices, size_t total_index_count, const unsigned int* cluster_index_counts, size_t cluster_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_partition_size)
			
 
				 {
			
 
				 	using namespace meshopt;
			
 
				 
			
 
				+	assert((vertex_positions == NULL || vertex_positions_stride >= 12) && vertex_positions_stride <= 256);
			
 
				+	assert(vertex_positions_stride % sizeof(float) == 0);
			
 
				 	assert(target_partition_size > 0);
			
 
				 
			
 
				 	size_t max_partition_size = target_partition_size + target_partition_size * 3 / 8;
			
@@ -317,24 +378,25 @@ size_t meshopt_partitionClusters(unsigned int* destination, const unsigned int*
 
				 	unsigned char* used = allocator.allocate<unsigned char>(vertex_count);
			
 
				 	memset(used, 0, vertex_count);
			
 
				 
			
 
				-	// build cluster index offsets as a prefix sum
			
 
				+	unsigned int* cluster_newindices = allocator.allocate<unsigned int>(total_index_count);
			
 
				 	unsigned int* cluster_offsets = allocator.allocate<unsigned int>(cluster_count + 1);
			
 
				-	unsigned int cluster_nextoffset = 0;
			
 
				 
			
 
				-	for (size_t i = 0; i < cluster_count; ++i)
			
 
				-	{
			
 
				-		assert(cluster_index_counts[i] > 0);
			
 
				+	// make new cluster index list that filters out duplicate indices
			
 
				+	filterClusterIndices(cluster_newindices, cluster_offsets, cluster_indices, cluster_index_counts, cluster_count, used, vertex_count, total_index_count);
			
 
				+	cluster_indices = cluster_newindices;
			
 
				 
			
 
				-		cluster_offsets[i] = cluster_nextoffset;
			
 
				-		cluster_nextoffset += cluster_index_counts[i];
			
 
				-	}
			
 
				+	// compute bounding sphere for each cluster if positions are provided
			
 
				+	float* cluster_bounds = NULL;
			
 
				 
			
 
				-	assert(cluster_nextoffset == total_index_count);
			
 
				-	cluster_offsets[cluster_count] = unsigned(total_index_count);
			
 
				+	if (vertex_positions)
			
 
				+	{
			
 
				+		cluster_bounds = allocator.allocate<float>(cluster_count * 4);
			
 
				+		computeClusterBounds(cluster_bounds, cluster_indices, cluster_offsets, cluster_count, vertex_positions, vertex_positions_stride);
			
 
				+	}
			
 
				 
			
 
				 	// build cluster adjacency along with edge weights (shared vertex count)
			
 
				 	ClusterAdjacency adjacency = {};
			
 
				-	buildClusterAdjacency(adjacency, cluster_indices, cluster_offsets, cluster_count, used, vertex_count, allocator);
			
 
				+	buildClusterAdjacency(adjacency, cluster_indices, cluster_offsets, cluster_count, vertex_count, allocator);
			
 
				 
			
 
				 	ClusterGroup* groups = allocator.allocate<ClusterGroup>(cluster_count);
			
 
				 
			
@@ -347,7 +409,8 @@ size_t meshopt_partitionClusters(unsigned int* destination, const unsigned int*
 
				 		groups[i].group = int(i);
			
 
				 		groups[i].next = -1;
			
 
				 		groups[i].size = 1;
			
 
				-		groups[i].vertices = countTotal(groups, int(i), cluster_indices, cluster_offsets, used);
			
 
				+		groups[i].vertices = cluster_offsets[i + 1] - cluster_offsets[i];
			
 
				+		assert(groups[i].vertices > 0);
			
 
				 
			
 
				 		GroupOrder item = {};
			
 
				 		item.id = unsigned(i);
			
@@ -376,7 +439,7 @@ size_t meshopt_partitionClusters(unsigned int* destination, const unsigned int*
 
				 		if (groups[top.id].size >= target_partition_size)
			
 
				 			continue;
			
 
				 
			
 
				-		int best_group = pickGroupToMerge(groups, top.id, adjacency, max_partition_size);
			
 
				+		int best_group = pickGroupToMerge(groups, top.id, adjacency, max_partition_size, cluster_bounds);
			
 
				 
			
 
				 		// we can't grow the group any more, emit as is
			
 
				 		if (best_group == -1)
			
@@ -395,7 +458,7 @@ size_t meshopt_partitionClusters(unsigned int* destination, const unsigned int*
 
				 				break;
			
 
				 			}
			
 
				 
			
 
				-		// update group sizes; note, the vertex update is an approximation which avoids recomputing the true size via countTotal
			
 
				+		// update group sizes; note, the vertex update is a O(1) approximation which avoids recomputing the true size
			
 
				 		groups[top.id].size += groups[best_group].size;
			
 
				 		groups[top.id].vertices += groups[best_group].vertices;
			
 
				 		groups[top.id].vertices = (groups[top.id].vertices > shared) ? groups[top.id].vertices - shared : 1;
			
@@ -403,6 +466,13 @@ size_t meshopt_partitionClusters(unsigned int* destination, const unsigned int*
 
				 		groups[best_group].size = 0;
			
 
				 		groups[best_group].vertices = 0;
			
 
				 
			
 
				+		// merge bounding spheres if bounds are available
			
 
				+		if (cluster_bounds)
			
 
				+		{
			
 
				+			mergeBounds(&cluster_bounds[top.id * 4], &cluster_bounds[best_group * 4]);
			
 
				+			memset(&cluster_bounds[best_group * 4], 0, 4 * sizeof(float));
			
 
				+		}
			
 
				+
			
 
				 		// re-associate all clusters back to the merged group
			
 
				 		for (int i = top.id; i >= 0; i = groups[i].next)
			
 
				 			groups[i].group = int(top.id);
			
--- a/thirdparty/meshoptimizer/overdrawanalyzer.cpp
+++ b/thirdparty/meshoptimizer/overdrawanalyzer.cpp
@@ -18,14 +18,6 @@ struct OverdrawBuffer
 
				 	unsigned int overdraw[kViewport][kViewport][2];
			
 
				 };
			
 
				 
			
 
				-#ifndef min
			
 
				-#define min(a, b) ((a) < (b) ? (a) : (b))
			
 
				-#endif
			
 
				-
			
 
				-#ifndef max
			
 
				-#define max(a, b) ((a) > (b) ? (a) : (b))
			
 
				-#endif
			
 
				-
			
 
				 static float computeDepthGradients(float& dzdx, float& dzdy, float x1, float y1, float z1, float x2, float y2, float z2, float x3, float y3, float z3)
			
 
				 {
			
 
				 	// z2 = z1 + dzdx * (x2 - x1) + dzdy * (y2 - y1)
			
@@ -36,8 +28,8 @@ static float computeDepthGradients(float& dzdx, float& dzdy, float x1, float y1,
 
				 	float det = (x2 - x1) * (y3 - y1) - (y2 - y1) * (x3 - x1);
			
 
				 	float invdet = (det == 0) ? 0 : 1 / det;
			
 
				 
			
 
				-	dzdx = (z2 - z1) * (y3 - y1) - (y2 - y1) * (z3 - z1) * invdet;
			
 
				-	dzdy = (x2 - x1) * (z3 - z1) - (z2 - z1) * (x3 - x1) * invdet;
			
 
				+	dzdx = ((z2 - z1) * (y3 - y1) - (y2 - y1) * (z3 - z1)) * invdet;
			
 
				+	dzdy = ((x2 - x1) * (z3 - z1) - (z2 - z1) * (x3 - x1)) * invdet;
			
 
				 
			
 
				 	return det;
			
 
				 }
			
@@ -76,11 +68,26 @@ static void rasterize(OverdrawBuffer* buffer, float v1x, float v1y, float v1z, f
 
				 	// bounding rectangle, clipped against viewport
			
 
				 	// since we rasterize pixels with covered centers, min >0.5 should round up
			
 
				 	// as for max, due to top-left filling convention we will never rasterize right/bottom edges
			
 
				-	// so max >= 0.5 should round down
			
 
				-	int minx = max((min(X1, min(X2, X3)) + 7) >> 4, 0);
			
 
				-	int maxx = min((max(X1, max(X2, X3)) + 7) >> 4, kViewport);
			
 
				-	int miny = max((min(Y1, min(Y2, Y3)) + 7) >> 4, 0);
			
 
				-	int maxy = min((max(Y1, max(Y2, Y3)) + 7) >> 4, kViewport);
			
 
				+	// so max >= 0.5 should round down for inclusive bounds, and up for exclusive (in our case)
			
 
				+	int minx = X1 < X2 ? X1 : X2;
			
 
				+	minx = minx < X3 ? minx : X3;
			
 
				+	minx = (minx + 7) >> 4;
			
 
				+	minx = minx < 0 ? 0 : minx;
			
 
				+
			
 
				+	int miny = Y1 < Y2 ? Y1 : Y2;
			
 
				+	miny = miny < Y3 ? miny : Y3;
			
 
				+	miny = (miny + 7) >> 4;
			
 
				+	miny = miny < 0 ? 0 : miny;
			
 
				+
			
 
				+	int maxx = X1 > X2 ? X1 : X2;
			
 
				+	maxx = maxx > X3 ? maxx : X3;
			
 
				+	maxx = (maxx + 7) >> 4;
			
 
				+	maxx = maxx > kViewport ? kViewport : maxx;
			
 
				+
			
 
				+	int maxy = Y1 > Y2 ? Y1 : Y2;
			
 
				+	maxy = maxy > Y3 ? maxy : Y3;
			
 
				+	maxy = (maxy + 7) >> 4;
			
 
				+	maxy = maxy > kViewport ? kViewport : maxy;
			
 
				 
			
 
				 	// deltas, 28.4 fixed point
			
 
				 	int DX12 = X1 - X2;
			
@@ -139,22 +146,10 @@ static void rasterize(OverdrawBuffer* buffer, float v1x, float v1y, float v1z, f
 
				 	}
			
 
				 }
			
 
				 
			
 
				-} // namespace meshopt
			
 
				-
			
 
				-meshopt_OverdrawStatistics meshopt_analyzeOverdraw(const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
			
 
				+static float transformTriangles(float* triangles, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
			
 
				 {
			
 
				-	using namespace meshopt;
			
 
				-
			
 
				-	assert(index_count % 3 == 0);
			
 
				-	assert(vertex_positions_stride >= 12 && vertex_positions_stride <= 256);
			
 
				-	assert(vertex_positions_stride % sizeof(float) == 0);
			
 
				-
			
 
				-	meshopt_Allocator allocator;
			
 
				-
			
 
				 	size_t vertex_stride_float = vertex_positions_stride / sizeof(float);
			
 
				 
			
 
				-	meshopt_OverdrawStatistics result = {};
			
 
				-
			
 
				 	float minv[3] = {FLT_MAX, FLT_MAX, FLT_MAX};
			
 
				 	float maxv[3] = {-FLT_MAX, -FLT_MAX, -FLT_MAX};
			
 
				 
			
@@ -164,15 +159,20 @@ meshopt_OverdrawStatistics meshopt_analyzeOverdraw(const unsigned int* indices,
 
				 
			
 
				 		for (int j = 0; j < 3; ++j)
			
 
				 		{
			
 
				-			minv[j] = min(minv[j], v[j]);
			
 
				-			maxv[j] = max(maxv[j], v[j]);
			
 
				+			float vj = v[j];
			
 
				+
			
 
				+			minv[j] = minv[j] > vj ? vj : minv[j];
			
 
				+			maxv[j] = maxv[j] < vj ? vj : maxv[j];
			
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	float extent = max(maxv[0] - minv[0], max(maxv[1] - minv[1], maxv[2] - minv[2]));
			
 
				-	float scale = kViewport / extent;
			
 
				+	float extent = 0.f;
			
 
				 
			
 
				-	float* triangles = allocator.allocate<float>(index_count * 3);
			
 
				+	extent = (maxv[0] - minv[0]) < extent ? extent : (maxv[0] - minv[0]);
			
 
				+	extent = (maxv[1] - minv[1]) < extent ? extent : (maxv[1] - minv[1]);
			
 
				+	extent = (maxv[2] - minv[2]) < extent ? extent : (maxv[2] - minv[2]);
			
 
				+
			
 
				+	float scale = kViewport / extent;
			
 
				 
			
 
				 	for (size_t i = 0; i < index_count; ++i)
			
 
				 	{
			
@@ -186,31 +186,55 @@ meshopt_OverdrawStatistics meshopt_analyzeOverdraw(const unsigned int* indices,
 
				 		triangles[i * 3 + 2] = (v[2] - minv[2]) * scale;
			
 
				 	}
			
 
				 
			
 
				-	OverdrawBuffer* buffer = allocator.allocate<OverdrawBuffer>(1);
			
 
				+	return extent;
			
 
				+}
			
 
				 
			
 
				-	for (int axis = 0; axis < 3; ++axis)
			
 
				+static void rasterizeTriangles(OverdrawBuffer* buffer, const float* triangles, size_t index_count, int axis)
			
 
				+{
			
 
				+	for (size_t i = 0; i < index_count; i += 3)
			
 
				 	{
			
 
				-		memset(buffer, 0, sizeof(OverdrawBuffer));
			
 
				+		const float* vn0 = &triangles[3 * (i + 0)];
			
 
				+		const float* vn1 = &triangles[3 * (i + 1)];
			
 
				+		const float* vn2 = &triangles[3 * (i + 2)];
			
 
				 
			
 
				-		for (size_t i = 0; i < index_count; i += 3)
			
 
				+		switch (axis)
			
 
				 		{
			
 
				-			const float* vn0 = &triangles[3 * (i + 0)];
			
 
				-			const float* vn1 = &triangles[3 * (i + 1)];
			
 
				-			const float* vn2 = &triangles[3 * (i + 2)];
			
 
				-
			
 
				-			switch (axis)
			
 
				-			{
			
 
				-			case 0:
			
 
				-				rasterize(buffer, vn0[2], vn0[1], vn0[0], vn1[2], vn1[1], vn1[0], vn2[2], vn2[1], vn2[0]);
			
 
				-				break;
			
 
				-			case 1:
			
 
				-				rasterize(buffer, vn0[0], vn0[2], vn0[1], vn1[0], vn1[2], vn1[1], vn2[0], vn2[2], vn2[1]);
			
 
				-				break;
			
 
				-			case 2:
			
 
				-				rasterize(buffer, vn0[1], vn0[0], vn0[2], vn1[1], vn1[0], vn1[2], vn2[1], vn2[0], vn2[2]);
			
 
				-				break;
			
 
				-			}
			
 
				+		case 0:
			
 
				+			rasterize(buffer, vn0[2], vn0[1], vn0[0], vn1[2], vn1[1], vn1[0], vn2[2], vn2[1], vn2[0]);
			
 
				+			break;
			
 
				+		case 1:
			
 
				+			rasterize(buffer, vn0[0], vn0[2], vn0[1], vn1[0], vn1[2], vn1[1], vn2[0], vn2[2], vn2[1]);
			
 
				+			break;
			
 
				+		case 2:
			
 
				+			rasterize(buffer, vn0[1], vn0[0], vn0[2], vn1[1], vn1[0], vn1[2], vn2[1], vn2[0], vn2[2]);
			
 
				+			break;
			
 
				 		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+} // namespace meshopt
			
 
				+
			
 
				+meshopt_OverdrawStatistics meshopt_analyzeOverdraw(const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
			
 
				+{
			
 
				+	using namespace meshopt;
			
 
				+
			
 
				+	assert(index_count % 3 == 0);
			
 
				+	assert(vertex_positions_stride >= 12 && vertex_positions_stride <= 256);
			
 
				+	assert(vertex_positions_stride % sizeof(float) == 0);
			
 
				+
			
 
				+	meshopt_Allocator allocator;
			
 
				+
			
 
				+	meshopt_OverdrawStatistics result = {};
			
 
				+
			
 
				+	float* triangles = allocator.allocate<float>(index_count * 3);
			
 
				+	transformTriangles(triangles, indices, index_count, vertex_positions, vertex_count, vertex_positions_stride);
			
 
				+
			
 
				+	OverdrawBuffer* buffer = allocator.allocate<OverdrawBuffer>(1);
			
 
				+
			
 
				+	for (int axis = 0; axis < 3; ++axis)
			
 
				+	{
			
 
				+		memset(buffer, 0, sizeof(OverdrawBuffer));
			
 
				+		rasterizeTriangles(buffer, triangles, index_count, axis);
			
 
				 
			
 
				 		for (int y = 0; y < kViewport; ++y)
			
 
				 			for (int x = 0; x < kViewport; ++x)
			
@@ -227,3 +251,39 @@ meshopt_OverdrawStatistics meshopt_analyzeOverdraw(const unsigned int* indices,
 
				 
			
 
				 	return result;
			
 
				 }
			
 
				+
			
 
				+meshopt_CoverageStatistics meshopt_analyzeCoverage(const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
			
 
				+{
			
 
				+	using namespace meshopt;
			
 
				+
			
 
				+	assert(index_count % 3 == 0);
			
 
				+	assert(vertex_positions_stride >= 12 && vertex_positions_stride <= 256);
			
 
				+	assert(vertex_positions_stride % sizeof(float) == 0);
			
 
				+
			
 
				+	meshopt_Allocator allocator;
			
 
				+
			
 
				+	meshopt_CoverageStatistics result = {};
			
 
				+
			
 
				+	float* triangles = allocator.allocate<float>(index_count * 3);
			
 
				+	float extent = transformTriangles(triangles, indices, index_count, vertex_positions, vertex_count, vertex_positions_stride);
			
 
				+
			
 
				+	OverdrawBuffer* buffer = allocator.allocate<OverdrawBuffer>(1);
			
 
				+
			
 
				+	for (int axis = 0; axis < 3; ++axis)
			
 
				+	{
			
 
				+		memset(buffer, 0, sizeof(OverdrawBuffer));
			
 
				+		rasterizeTriangles(buffer, triangles, index_count, axis);
			
 
				+
			
 
				+		unsigned int covered = 0;
			
 
				+
			
 
				+		for (int y = 0; y < kViewport; ++y)
			
 
				+			for (int x = 0; x < kViewport; ++x)
			
 
				+				covered += (buffer->overdraw[y][x][0] | buffer->overdraw[y][x][1]) > 0;
			
 
				+
			
 
				+		result.coverage[axis] = float(covered) / float(kViewport * kViewport);
			
 
				+	}
			
 
				+
			
 
				+	result.extent = extent;
			
 
				+
			
 
				+	return result;
			
 
				+}
			
--- a/thirdparty/meshoptimizer/simplifier.cpp
+++ b/thirdparty/meshoptimizer/simplifier.cpp
@@ -118,10 +118,17 @@ struct PositionHasher
 
				 		unsigned int ri = sparse_remap ? sparse_remap[index] : index;
			
 
				 		const unsigned int* key = reinterpret_cast<const unsigned int*>(vertex_positions + ri * vertex_stride_float);
			
 
				 
			
 
				+		unsigned int x = key[0], y = key[1], z = key[2];
			
 
				+
			
 
				+		// replace negative zero with zero
			
 
				+		x = (x == 0x80000000) ? 0 : x;
			
 
				+		y = (y == 0x80000000) ? 0 : y;
			
 
				+		z = (z == 0x80000000) ? 0 : z;
			
 
				+
			
 
				 		// scramble bits to make sure that integer coordinates have entropy in lower bits
			
 
				-		unsigned int x = key[0] ^ (key[0] >> 17);
			
 
				-		unsigned int y = key[1] ^ (key[1] >> 17);
			
 
				-		unsigned int z = key[2] ^ (key[2] >> 17);
			
 
				+		x ^= x >> 17;
			
 
				+		y ^= y >> 17;
			
 
				+		z ^= z >> 17;
			
 
				 
			
 
				 		// Optimized Spatial Hashing for Collision Detection of Deformable Objects
			
 
				 		return (x * 73856093) ^ (y * 19349663) ^ (z * 83492791);
			
@@ -132,7 +139,10 @@ struct PositionHasher
 
				 		unsigned int li = sparse_remap ? sparse_remap[lhs] : lhs;
			
 
				 		unsigned int ri = sparse_remap ? sparse_remap[rhs] : rhs;
			
 
				 
			
 
				-		return memcmp(vertex_positions + li * vertex_stride_float, vertex_positions + ri * vertex_stride_float, sizeof(float) * 3) == 0;
			
 
				+		const float* lv = vertex_positions + li * vertex_stride_float;
			
 
				+		const float* rv = vertex_positions + ri * vertex_stride_float;
			
 
				+
			
 
				+		return lv[0] == rv[0] && lv[1] == rv[1] && lv[2] == rv[2];
			
 
				 	}
			
 
				 };
			
 
				 
			
@@ -208,6 +218,11 @@ static void buildPositionRemap(unsigned int* remap, unsigned int* wedge, const f
 
				 		remap[index] = *entry;
			
 
				 	}
			
 
				 
			
 
				+	allocator.deallocate(table);
			
 
				+
			
 
				+	if (!wedge)
			
 
				+		return;
			
 
				+
			
 
				 	// build wedge table: for each vertex, which other vertex is the next wedge that also maps to the same vertex?
			
 
				 	// entries in table form a (cyclic) wedge loop per vertex; for manifold vertices, wedge[i] == remap[i] == i
			
 
				 	for (size_t i = 0; i < vertex_count; ++i)
			
@@ -221,8 +236,6 @@ static void buildPositionRemap(unsigned int* remap, unsigned int* wedge, const f
 
				 			wedge[i] = wedge[r];
			
 
				 			wedge[r] = unsigned(i);
			
 
				 		}
			
 
				-
			
 
				-	allocator.deallocate(table);
			
 
				 }
			
 
				 
			
 
				 static unsigned int* buildSparseRemap(unsigned int* indices, size_t index_count, size_t vertex_count, size_t* out_vertex_count, meshopt_Allocator& allocator)
			
@@ -1862,6 +1875,7 @@ size_t meshopt_simplifyEdge(unsigned int* destination, const unsigned int* indic
 
				 	updateEdgeAdjacency(adjacency, result, index_count, vertex_count, NULL);
			
 
				 
			
 
				 	// build position remap that maps each vertex to the one with identical position
			
 
				+	// wedge table stores next vertex with identical position for each vertex
			
 
				 	unsigned int* remap = allocator.allocate<unsigned int>(vertex_count);
			
 
				 	unsigned int* wedge = allocator.allocate<unsigned int>(vertex_count);
			
 
				 	buildPositionRemap(remap, wedge, vertex_positions_data, vertex_count, vertex_positions_stride, sparse_remap, allocator);
			
@@ -2216,6 +2230,40 @@ size_t meshopt_simplifySloppy(unsigned int* destination, const unsigned int* ind
 
				 	return write;
			
 
				 }
			
 
				 
			
 
				+size_t meshopt_simplifyPrune(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, float target_error)
			
 
				+{
			
 
				+	using namespace meshopt;
			
 
				+
			
 
				+	assert(index_count % 3 == 0);
			
 
				+	assert(vertex_positions_stride >= 12 && vertex_positions_stride <= 256);
			
 
				+	assert(vertex_positions_stride % sizeof(float) == 0);
			
 
				+	assert(target_error >= 0);
			
 
				+
			
 
				+	meshopt_Allocator allocator;
			
 
				+
			
 
				+	unsigned int* result = destination;
			
 
				+	if (result != indices)
			
 
				+		memcpy(result, indices, index_count * sizeof(unsigned int));
			
 
				+
			
 
				+	// build position remap that maps each vertex to the one with identical position
			
 
				+	unsigned int* remap = allocator.allocate<unsigned int>(vertex_count);
			
 
				+	buildPositionRemap(remap, NULL, vertex_positions_data, vertex_count, vertex_positions_stride, NULL, allocator);
			
 
				+
			
 
				+	Vector3* vertex_positions = allocator.allocate<Vector3>(vertex_count);
			
 
				+	rescalePositions(vertex_positions, vertex_positions_data, vertex_count, vertex_positions_stride, NULL);
			
 
				+
			
 
				+	unsigned int* components = allocator.allocate<unsigned int>(vertex_count);
			
 
				+	size_t component_count = buildComponents(components, vertex_count, indices, index_count, remap);
			
 
				+
			
 
				+	float* component_errors = allocator.allocate<float>(component_count * 4); // overallocate for temporary use inside measureComponents
			
 
				+	measureComponents(component_errors, component_count, components, vertex_positions, vertex_count);
			
 
				+
			
 
				+	float component_nexterror = 0;
			
 
				+	size_t result_count = pruneComponents(result, index_count, components, component_errors, component_count, target_error * target_error, component_nexterror);
			
 
				+
			
 
				+	return result_count;
			
 
				+}
			
 
				+
			
 
				 size_t meshopt_simplifyPoints(unsigned int* destination, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, const float* vertex_colors, size_t vertex_colors_stride, float color_weight, size_t target_vertex_count)
			
 
				 {
			
 
				 	using namespace meshopt;
			
--- a/thirdparty/meshoptimizer/spatialorder.cpp
+++ b/thirdparty/meshoptimizer/spatialorder.cpp
@@ -10,18 +10,19 @@
 
				 namespace meshopt
			
 
				 {
			
 
				 
			
 
				-// "Insert" two 0 bits after each of the 10 low bits of x
			
 
				-inline unsigned int part1By2(unsigned int x)
			
 
				+// "Insert" two 0 bits after each of the 20 low bits of x
			
 
				+inline unsigned long long part1By2(unsigned long long x)
			
 
				 {
			
 
				-	x &= 0x000003ff;                  // x = ---- ---- ---- ---- ---- --98 7654 3210
			
 
				-	x = (x ^ (x << 16)) & 0xff0000ff; // x = ---- --98 ---- ---- ---- ---- 7654 3210
			
 
				-	x = (x ^ (x << 8)) & 0x0300f00f;  // x = ---- --98 ---- ---- 7654 ---- ---- 3210
			
 
				-	x = (x ^ (x << 4)) & 0x030c30c3;  // x = ---- --98 ---- 76-- --54 ---- 32-- --10
			
 
				-	x = (x ^ (x << 2)) & 0x09249249;  // x = ---- 9--8 --7- -6-- 5--4 --3- -2-- 1--0
			
 
				+	x &= 0x000fffffull;                          // x = ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- jihg fedc ba98 7654 3210
			
 
				+	x = (x ^ (x << 32)) & 0x000f00000000ffffull; // x = ---- ---- ---- jihg ---- ---- ---- ---- ---- ---- ---- ---- fedc ba98 7654 3210
			
 
				+	x = (x ^ (x << 16)) & 0x000f0000ff0000ffull; // x = ---- ---- ---- jihg ---- ---- ---- ---- fedc ba98 ---- ---- ---- ---- 7654 3210
			
 
				+	x = (x ^ (x << 8)) & 0x000f00f00f00f00full;  // x = ---- ---- ---- jihg ---- ---- fedc ---- ---- ba98 ---- ---- 7654 ---- ---- 3210
			
 
				+	x = (x ^ (x << 4)) & 0x00c30c30c30c30c3ull;  // x = ---- ---- ji-- --hg ---- fe-- --dc ---- ba-- --98 ---- 76-- --54 ---- 32-- --10
			
 
				+	x = (x ^ (x << 2)) & 0x0249249249249249ull;  // x = ---- --j- -i-- h--g --f- -e-- d--c --b- -a-- 9--8 --7- -6-- 5--4 --3- -2-- 1--0
			
 
				 	return x;
			
 
				 }
			
 
				 
			
 
				-static void computeOrder(unsigned int* result, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride)
			
 
				+static void computeOrder(unsigned long long* result, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, bool morton)
			
 
				 {
			
 
				 	size_t vertex_stride_float = vertex_positions_stride / sizeof(float);
			
 
				 
			
@@ -47,66 +48,170 @@ static void computeOrder(unsigned int* result, const float* vertex_positions_dat
 
				 	extent = (maxv[1] - minv[1]) < extent ? extent : (maxv[1] - minv[1]);
			
 
				 	extent = (maxv[2] - minv[2]) < extent ? extent : (maxv[2] - minv[2]);
			
 
				 
			
 
				-	float scale = extent == 0 ? 0.f : 1.f / extent;
			
 
				+	// rescale each axis to 16 bits to get 48-bit Morton codes
			
 
				+	float scale = extent == 0 ? 0.f : 65535.f / extent;
			
 
				 
			
 
				 	// generate Morton order based on the position inside a unit cube
			
 
				 	for (size_t i = 0; i < vertex_count; ++i)
			
 
				 	{
			
 
				 		const float* v = vertex_positions_data + i * vertex_stride_float;
			
 
				 
			
 
				-		int x = int((v[0] - minv[0]) * scale * 1023.f + 0.5f);
			
 
				-		int y = int((v[1] - minv[1]) * scale * 1023.f + 0.5f);
			
 
				-		int z = int((v[2] - minv[2]) * scale * 1023.f + 0.5f);
			
 
				+		int x = int((v[0] - minv[0]) * scale + 0.5f);
			
 
				+		int y = int((v[1] - minv[1]) * scale + 0.5f);
			
 
				+		int z = int((v[2] - minv[2]) * scale + 0.5f);
			
 
				 
			
 
				-		result[i] = part1By2(x) | (part1By2(y) << 1) | (part1By2(z) << 2);
			
 
				+		if (morton)
			
 
				+			result[i] = part1By2(x) | (part1By2(y) << 1) | (part1By2(z) << 2);
			
 
				+		else
			
 
				+			result[i] = ((unsigned long long)x << 0) | ((unsigned long long)y << 20) | ((unsigned long long)z << 40);
			
 
				 	}
			
 
				 }
			
 
				 
			
 
				-static void computeHistogram(unsigned int (&hist)[1024][3], const unsigned int* data, size_t count)
			
 
				+static void radixSort10(unsigned int* destination, const unsigned int* source, const unsigned short* keys, size_t count)
			
 
				 {
			
 
				+	unsigned int hist[1024];
			
 
				 	memset(hist, 0, sizeof(hist));
			
 
				 
			
 
				-	// compute 3 10-bit histograms in parallel
			
 
				+	// compute histogram (assume keys are 10-bit)
			
 
				+	for (size_t i = 0; i < count; ++i)
			
 
				+		hist[keys[i]]++;
			
 
				+
			
 
				+	unsigned int sum = 0;
			
 
				+
			
 
				+	// replace histogram data with prefix histogram sums in-place
			
 
				+	for (int i = 0; i < 1024; ++i)
			
 
				+	{
			
 
				+		unsigned int h = hist[i];
			
 
				+		hist[i] = sum;
			
 
				+		sum += h;
			
 
				+	}
			
 
				+
			
 
				+	assert(sum == count);
			
 
				+
			
 
				+	// reorder values
			
 
				 	for (size_t i = 0; i < count; ++i)
			
 
				 	{
			
 
				-		unsigned int id = data[i];
			
 
				+		unsigned int id = keys[source[i]];
			
 
				 
			
 
				-		hist[(id >> 0) & 1023][0]++;
			
 
				-		hist[(id >> 10) & 1023][1]++;
			
 
				-		hist[(id >> 20) & 1023][2]++;
			
 
				+		destination[hist[id]++] = source[i];
			
 
				 	}
			
 
				+}
			
 
				 
			
 
				-	unsigned int sumx = 0, sumy = 0, sumz = 0;
			
 
				+static void computeHistogram(unsigned int (&hist)[256][2], const unsigned short* data, size_t count)
			
 
				+{
			
 
				+	memset(hist, 0, sizeof(hist));
			
 
				+
			
 
				+	// compute 2 8-bit histograms in parallel
			
 
				+	for (size_t i = 0; i < count; ++i)
			
 
				+	{
			
 
				+		unsigned long long id = data[i];
			
 
				+
			
 
				+		hist[(id >> 0) & 255][0]++;
			
 
				+		hist[(id >> 8) & 255][1]++;
			
 
				+	}
			
 
				+
			
 
				+	unsigned int sum0 = 0, sum1 = 0;
			
 
				 
			
 
				 	// replace histogram data with prefix histogram sums in-place
			
 
				-	for (int i = 0; i < 1024; ++i)
			
 
				+	for (int i = 0; i < 256; ++i)
			
 
				 	{
			
 
				-		unsigned int hx = hist[i][0], hy = hist[i][1], hz = hist[i][2];
			
 
				+		unsigned int h0 = hist[i][0], h1 = hist[i][1];
			
 
				 
			
 
				-		hist[i][0] = sumx;
			
 
				-		hist[i][1] = sumy;
			
 
				-		hist[i][2] = sumz;
			
 
				+		hist[i][0] = sum0;
			
 
				+		hist[i][1] = sum1;
			
 
				 
			
 
				-		sumx += hx;
			
 
				-		sumy += hy;
			
 
				-		sumz += hz;
			
 
				+		sum0 += h0;
			
 
				+		sum1 += h1;
			
 
				 	}
			
 
				 
			
 
				-	assert(sumx == count && sumy == count && sumz == count);
			
 
				+	assert(sum0 == count && sum1 == count);
			
 
				 }
			
 
				 
			
 
				-static void radixPass(unsigned int* destination, const unsigned int* source, const unsigned int* keys, size_t count, unsigned int (&hist)[1024][3], int pass)
			
 
				+static void radixPass(unsigned int* destination, const unsigned int* source, const unsigned short* keys, size_t count, unsigned int (&hist)[256][2], int pass)
			
 
				 {
			
 
				-	int bitoff = pass * 10;
			
 
				+	int bitoff = pass * 8;
			
 
				 
			
 
				 	for (size_t i = 0; i < count; ++i)
			
 
				 	{
			
 
				-		unsigned int id = (keys[source[i]] >> bitoff) & 1023;
			
 
				+		unsigned int id = unsigned(keys[source[i]] >> bitoff) & 255;
			
 
				 
			
 
				 		destination[hist[id][pass]++] = source[i];
			
 
				 	}
			
 
				 }
			
 
				 
			
 
				+static void partitionPoints(unsigned int* target, const unsigned int* order, const unsigned char* sides, size_t split, size_t count)
			
 
				+{
			
 
				+	size_t l = 0, r = split;
			
 
				+
			
 
				+	for (size_t i = 0; i < count; ++i)
			
 
				+	{
			
 
				+		unsigned char side = sides[order[i]];
			
 
				+		target[side ? r : l] = order[i];
			
 
				+		l += 1;
			
 
				+		l -= side;
			
 
				+		r += side;
			
 
				+	}
			
 
				+
			
 
				+	assert(l == split && r == count);
			
 
				+}
			
 
				+
			
 
				+static void splitPoints(unsigned int* destination, unsigned int* orderx, unsigned int* ordery, unsigned int* orderz, const unsigned long long* keys, size_t count, void* scratch, size_t cluster_size)
			
 
				+{
			
 
				+	if (count <= cluster_size)
			
 
				+	{
			
 
				+		memcpy(destination, orderx, count * sizeof(unsigned int));
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	unsigned int* axes[3] = {orderx, ordery, orderz};
			
 
				+
			
 
				+	int bestk = -1;
			
 
				+	unsigned int bestdim = 0;
			
 
				+
			
 
				+	for (int k = 0; k < 3; ++k)
			
 
				+	{
			
 
				+		const unsigned int mask = (1 << 20) - 1;
			
 
				+		unsigned int dim = (unsigned(keys[axes[k][count - 1]] >> (k * 20)) & mask) - (unsigned(keys[axes[k][0]] >> (k * 20)) & mask);
			
 
				+
			
 
				+		if (dim >= bestdim)
			
 
				+		{
			
 
				+			bestk = k;
			
 
				+			bestdim = dim;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	assert(bestk >= 0);
			
 
				+
			
 
				+	// split roughly in half, with the left split always being aligned to cluster size
			
 
				+	size_t split = ((count / 2) + cluster_size - 1) / cluster_size * cluster_size;
			
 
				+	assert(split > 0 && split < count);
			
 
				+
			
 
				+	// mark sides of split for partitioning
			
 
				+	unsigned char* sides = static_cast<unsigned char*>(scratch) + count * sizeof(unsigned int);
			
 
				+
			
 
				+	for (size_t i = 0; i < split; ++i)
			
 
				+		sides[axes[bestk][i]] = 0;
			
 
				+
			
 
				+	for (size_t i = split; i < count; ++i)
			
 
				+		sides[axes[bestk][i]] = 1;
			
 
				+
			
 
				+	// partition all axes into two sides, maintaining order
			
 
				+	unsigned int* temp = static_cast<unsigned int*>(scratch);
			
 
				+
			
 
				+	for (int k = 0; k < 3; ++k)
			
 
				+	{
			
 
				+		if (k == bestk)
			
 
				+			continue;
			
 
				+
			
 
				+		unsigned int* axis = axes[k];
			
 
				+		memcpy(temp, axis, sizeof(unsigned int) * count);
			
 
				+		partitionPoints(axis, temp, sides, split, count);
			
 
				+	}
			
 
				+
			
 
				+	splitPoints(destination, orderx, ordery, orderz, keys, split, scratch, cluster_size);
			
 
				+	splitPoints(destination + split, orderx + split, ordery + split, orderz + split, keys, count - split, scratch, cluster_size);
			
 
				+}
			
 
				+
			
 
				 } // namespace meshopt
			
 
				 
			
 
				 void meshopt_spatialSortRemap(unsigned int* destination, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
			
@@ -118,21 +223,26 @@ void meshopt_spatialSortRemap(unsigned int* destination, const float* vertex_pos
 
				 
			
 
				 	meshopt_Allocator allocator;
			
 
				 
			
 
				-	unsigned int* keys = allocator.allocate<unsigned int>(vertex_count);
			
 
				-	computeOrder(keys, vertex_positions, vertex_count, vertex_positions_stride);
			
 
				-
			
 
				-	unsigned int hist[1024][3];
			
 
				-	computeHistogram(hist, keys, vertex_count);
			
 
				+	unsigned long long* keys = allocator.allocate<unsigned long long>(vertex_count);
			
 
				+	computeOrder(keys, vertex_positions, vertex_count, vertex_positions_stride, /* morton= */ true);
			
 
				 
			
 
				-	unsigned int* scratch = allocator.allocate<unsigned int>(vertex_count);
			
 
				+	unsigned int* scratch = allocator.allocate<unsigned int>(vertex_count * 2); // 4b for order + 2b for keys
			
 
				+	unsigned short* keyk = (unsigned short*)(scratch + vertex_count);
			
 
				 
			
 
				 	for (size_t i = 0; i < vertex_count; ++i)
			
 
				 		destination[i] = unsigned(i);
			
 
				 
			
 
				-	// 3-pass radix sort computes the resulting order into scratch
			
 
				-	radixPass(scratch, destination, keys, vertex_count, hist, 0);
			
 
				-	radixPass(destination, scratch, keys, vertex_count, hist, 1);
			
 
				-	radixPass(scratch, destination, keys, vertex_count, hist, 2);
			
 
				+	unsigned int* order[] = {scratch, destination};
			
 
				+
			
 
				+	// 5-pass radix sort computes the resulting order into scratch
			
 
				+	for (int k = 0; k < 5; ++k)
			
 
				+	{
			
 
				+		// copy 10-bit key segments into keyk to reduce cache pressure during radix pass
			
 
				+		for (size_t i = 0; i < vertex_count; ++i)
			
 
				+			keyk[i] = (unsigned short)((keys[i] >> (k * 10)) & 1023);
			
 
				+
			
 
				+		radixSort10(order[k % 2], order[(k + 1) % 2], keyk, vertex_count);
			
 
				+	}
			
 
				 
			
 
				 	// since our remap table is mapping old=>new, we need to reverse it
			
 
				 	for (size_t i = 0; i < vertex_count; ++i)
			
@@ -192,3 +302,39 @@ void meshopt_spatialSortTriangles(unsigned int* destination, const unsigned int*
 
				 		destination[r * 3 + 2] = c;
			
 
				 	}
			
 
				 }
			
 
				+
			
 
				+void meshopt_spatialClusterPoints(unsigned int* destination, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t cluster_size)
			
 
				+{
			
 
				+	using namespace meshopt;
			
 
				+
			
 
				+	assert(vertex_positions_stride >= 12 && vertex_positions_stride <= 256);
			
 
				+	assert(vertex_positions_stride % sizeof(float) == 0);
			
 
				+	assert(cluster_size > 0);
			
 
				+
			
 
				+	meshopt_Allocator allocator;
			
 
				+
			
 
				+	unsigned long long* keys = allocator.allocate<unsigned long long>(vertex_count);
			
 
				+	computeOrder(keys, vertex_positions, vertex_count, vertex_positions_stride, /* morton= */ false);
			
 
				+
			
 
				+	unsigned int* order = allocator.allocate<unsigned int>(vertex_count * 3);
			
 
				+	unsigned int* scratch = allocator.allocate<unsigned int>(vertex_count * 2); // 4b for order + 1b for side or 2b for keys
			
 
				+	unsigned short* keyk = reinterpret_cast<unsigned short*>(scratch + vertex_count);
			
 
				+
			
 
				+	for (int k = 0; k < 3; ++k)
			
 
				+	{
			
 
				+		// copy 16-bit key segments into keyk to reduce cache pressure during radix pass
			
 
				+		for (size_t i = 0; i < vertex_count; ++i)
			
 
				+			keyk[i] = (unsigned short)(keys[i] >> (k * 20));
			
 
				+
			
 
				+		unsigned int hist[256][2];
			
 
				+		computeHistogram(hist, keyk, vertex_count);
			
 
				+
			
 
				+		for (size_t i = 0; i < vertex_count; ++i)
			
 
				+			order[k * vertex_count + i] = unsigned(i);
			
 
				+
			
 
				+		radixPass(scratch, order + k * vertex_count, keyk, vertex_count, hist, 0);
			
 
				+		radixPass(order + k * vertex_count, scratch, keyk, vertex_count, hist, 1);
			
 
				+	}
			
 
				+
			
 
				+	splitPoints(destination, order, order + vertex_count, order + 2 * vertex_count, keys, vertex_count, scratch, cluster_size);
			
 
				+}
			
--- a/thirdparty/meshoptimizer/vertexcodec.cpp
+++ b/thirdparty/meshoptimizer/vertexcodec.cpp
@@ -1643,13 +1643,16 @@ static unsigned int cpuid = getCpuFeatures();
 
				 
			
 
				 } // namespace meshopt
			
 
				 
			
 
				-size_t meshopt_encodeVertexBufferLevel(unsigned char* buffer, size_t buffer_size, const void* vertices, size_t vertex_count, size_t vertex_size, int level)
			
 
				+size_t meshopt_encodeVertexBufferLevel(unsigned char* buffer, size_t buffer_size, const void* vertices, size_t vertex_count, size_t vertex_size, int level, int version)
			
 
				 {
			
 
				 	using namespace meshopt;
			
 
				 
			
 
				 	assert(vertex_size > 0 && vertex_size <= 256);
			
 
				 	assert(vertex_size % 4 == 0);
			
 
				 	assert(level >= 0 && level <= 9); // only a subset of this range is used right now
			
 
				+	assert(version < 0 || unsigned(version) <= kDecodeVertexVersion);
			
 
				+
			
 
				+	version = version < 0 ? gEncodeVertexVersion : version;
			
 
				 
			
 
				 #if TRACE
			
 
				 	memset(vertexstats, 0, sizeof(vertexstats));
			
@@ -1663,8 +1666,6 @@ size_t meshopt_encodeVertexBufferLevel(unsigned char* buffer, size_t buffer_size
 
				 	if (size_t(data_end - data) < 1)
			
 
				 		return 0;
			
 
				 
			
 
				-	int version = gEncodeVertexVersion;
			
 
				-
			
 
				 	*data++ = (unsigned char)(kVertexHeader | version);
			
 
				 
			
 
				 	unsigned char first_vertex[256] = {};
			
@@ -1777,7 +1778,7 @@ size_t meshopt_encodeVertexBufferLevel(unsigned char* buffer, size_t buffer_size
 
				 
			
 
				 size_t meshopt_encodeVertexBuffer(unsigned char* buffer, size_t buffer_size, const void* vertices, size_t vertex_count, size_t vertex_size)
			
 
				 {
			
 
				-	return meshopt_encodeVertexBufferLevel(buffer, buffer_size, vertices, vertex_count, vertex_size, meshopt::kEncodeDefaultLevel);
			
 
				+	return meshopt_encodeVertexBufferLevel(buffer, buffer_size, vertices, vertex_count, vertex_size, meshopt::kEncodeDefaultLevel, meshopt::gEncodeVertexVersion);
			
 
				 }
			
 
				 
			
 
				 size_t meshopt_encodeVertexBufferBound(size_t vertex_count, size_t vertex_size)
			
--- a/thirdparty/meshoptimizer/vertexfilter.cpp
+++ b/thirdparty/meshoptimizer/vertexfilter.cpp
@@ -201,7 +201,7 @@ inline uint64_t rotateleft64(uint64_t v, int x)
 
				 #endif
			
 
				 
			
 
				 #ifdef SIMD_SSE
			
 
				-static void decodeFilterOctSimd(signed char* data, size_t count)
			
 
				+static void decodeFilterOctSimd8(signed char* data, size_t count)
			
 
				 {
			
 
				 	const __m128 sign = _mm_set1_ps(-0.f);
			
 
				 
			
@@ -246,7 +246,7 @@ static void decodeFilterOctSimd(signed char* data, size_t count)
 
				 	}
			
 
				 }
			
 
				 
			
 
				-static void decodeFilterOctSimd(short* data, size_t count)
			
 
				+static void decodeFilterOctSimd16(short* data, size_t count)
			
 
				 {
			
 
				 	const __m128 sign = _mm_set1_ps(-0.f);
			
 
				 
			
@@ -295,8 +295,9 @@ static void decodeFilterOctSimd(short* data, size_t count)
 
				 		__m128i res_1 = _mm_unpackhi_epi16(xzr, y0r);
			
 
				 
			
 
				 		// patch in .w
			
 
				-		res_0 = _mm_or_si128(res_0, _mm_and_si128(_mm_castps_si128(n4_0), _mm_set1_epi64x(0xffff000000000000)));
			
 
				-		res_1 = _mm_or_si128(res_1, _mm_and_si128(_mm_castps_si128(n4_1), _mm_set1_epi64x(0xffff000000000000)));
			
 
				+		__m128i maskw = _mm_set_epi32(0xffff0000, 0, 0xffff0000, 0);
			
 
				+		res_0 = _mm_or_si128(res_0, _mm_and_si128(_mm_castps_si128(n4_0), maskw));
			
 
				+		res_1 = _mm_or_si128(res_1, _mm_and_si128(_mm_castps_si128(n4_1), maskw));
			
 
				 
			
 
				 		_mm_storeu_si128(reinterpret_cast<__m128i*>(&data[(i + 0) * 4]), res_0);
			
 
				 		_mm_storeu_si128(reinterpret_cast<__m128i*>(&data[(i + 2) * 4]), res_1);
			
@@ -404,7 +405,7 @@ inline float32x4_t vdivq_f32(float32x4_t x, float32x4_t y)
 
				 #endif
			
 
				 
			
 
				 #ifdef SIMD_NEON
			
 
				-static void decodeFilterOctSimd(signed char* data, size_t count)
			
 
				+static void decodeFilterOctSimd8(signed char* data, size_t count)
			
 
				 {
			
 
				 	const int32x4_t sign = vdupq_n_s32(0x80000000);
			
 
				 
			
@@ -453,7 +454,7 @@ static void decodeFilterOctSimd(signed char* data, size_t count)
 
				 	}
			
 
				 }
			
 
				 
			
 
				-static void decodeFilterOctSimd(short* data, size_t count)
			
 
				+static void decodeFilterOctSimd16(short* data, size_t count)
			
 
				 {
			
 
				 	const int32x4_t sign = vdupq_n_s32(0x80000000);
			
 
				 
			
@@ -598,7 +599,7 @@ static void decodeFilterExpSimd(unsigned int* data, size_t count)
 
				 #endif
			
 
				 
			
 
				 #ifdef SIMD_WASM
			
 
				-static void decodeFilterOctSimd(signed char* data, size_t count)
			
 
				+static void decodeFilterOctSimd8(signed char* data, size_t count)
			
 
				 {
			
 
				 	const v128_t sign = wasm_f32x4_splat(-0.f);
			
 
				 
			
@@ -647,7 +648,7 @@ static void decodeFilterOctSimd(signed char* data, size_t count)
 
				 	}
			
 
				 }
			
 
				 
			
 
				-static void decodeFilterOctSimd(short* data, size_t count)
			
 
				+static void decodeFilterOctSimd16(short* data, size_t count)
			
 
				 {
			
 
				 	const v128_t sign = wasm_f32x4_splat(-0.f);
			
 
				 	const v128_t zmask = wasm_i32x4_splat(0x7fff);
			
@@ -833,9 +834,9 @@ void meshopt_decodeFilterOct(void* buffer, size_t count, size_t stride)
 
				 
			
 
				 #if defined(SIMD_SSE) || defined(SIMD_NEON) || defined(SIMD_WASM)
			
 
				 	if (stride == 4)
			
 
				-		dispatchSimd(decodeFilterOctSimd, static_cast<signed char*>(buffer), count, 4);
			
 
				+		dispatchSimd(decodeFilterOctSimd8, static_cast<signed char*>(buffer), count, 4);
			
 
				 	else
			
 
				-		dispatchSimd(decodeFilterOctSimd, static_cast<short*>(buffer), count, 4);
			
 
				+		dispatchSimd(decodeFilterOctSimd16, static_cast<short*>(buffer), count, 4);
			
 
				 #else
			
 
				 	if (stride == 4)
			
 
				 		decodeFilterOct(static_cast<signed char*>(buffer), count);
			
--- a/thirdparty/meshoptimizer/vfetchanalyzer.cpp
+++ b/thirdparty/meshoptimizer/vfetchanalyzer.cpp
@@ -1,58 +0,0 @@
 
				-// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
			
 
				-#include "meshoptimizer.h"
			
 
				-
			
 
				-#include <assert.h>
			
 
				-#include <string.h>
			
 
				-
			
 
				-meshopt_VertexFetchStatistics meshopt_analyzeVertexFetch(const unsigned int* indices, size_t index_count, size_t vertex_count, size_t vertex_size)
			
 
				-{
			
 
				-	assert(index_count % 3 == 0);
			
 
				-	assert(vertex_size > 0 && vertex_size <= 256);
			
 
				-
			
 
				-	meshopt_Allocator allocator;
			
 
				-
			
 
				-	meshopt_VertexFetchStatistics result = {};
			
 
				-
			
 
				-	unsigned char* vertex_visited = allocator.allocate<unsigned char>(vertex_count);
			
 
				-	memset(vertex_visited, 0, vertex_count);
			
 
				-
			
 
				-	const size_t kCacheLine = 64;
			
 
				-	const size_t kCacheSize = 128 * 1024;
			
 
				-
			
 
				-	// simple direct mapped cache; on typical mesh data this is close to 4-way cache, and this model is a gross approximation anyway
			
 
				-	size_t cache[kCacheSize / kCacheLine] = {};
			
 
				-
			
 
				-	for (size_t i = 0; i < index_count; ++i)
			
 
				-	{
			
 
				-		unsigned int index = indices[i];
			
 
				-		assert(index < vertex_count);
			
 
				-
			
 
				-		vertex_visited[index] = 1;
			
 
				-
			
 
				-		size_t start_address = index * vertex_size;
			
 
				-		size_t end_address = start_address + vertex_size;
			
 
				-
			
 
				-		size_t start_tag = start_address / kCacheLine;
			
 
				-		size_t end_tag = (end_address + kCacheLine - 1) / kCacheLine;
			
 
				-
			
 
				-		assert(start_tag < end_tag);
			
 
				-
			
 
				-		for (size_t tag = start_tag; tag < end_tag; ++tag)
			
 
				-		{
			
 
				-			size_t line = tag % (sizeof(cache) / sizeof(cache[0]));
			
 
				-
			
 
				-			// we store +1 since cache is filled with 0 by default
			
 
				-			result.bytes_fetched += (cache[line] != tag + 1) * kCacheLine;
			
 
				-			cache[line] = tag + 1;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	size_t unique_vertex_count = 0;
			
 
				-
			
 
				-	for (size_t i = 0; i < vertex_count; ++i)
			
 
				-		unique_vertex_count += vertex_visited[i];
			
 
				-
			
 
				-	result.overfetch = unique_vertex_count == 0 ? 0 : float(result.bytes_fetched) / float(unique_vertex_count * vertex_size);
			
 
				-
			
 
				-	return result;
			
 
				-}