2 years ago · 6bab752aca
--- a/3rdparty/meshoptimizer/src/indexgenerator.cpp
+++ b/3rdparty/meshoptimizer/src/indexgenerator.cpp
@@ -178,6 +178,22 @@ static void buildPositionRemap(unsigned int* remap, const float* vertex_position
 
				 
			
 
				 		remap[index] = *entry;
			
 
				 	}
			
 
				+
			
 
				+	allocator.deallocate(vertex_table);
			
 
				+}
			
 
				+
			
 
				+template <size_t BlockSize>
			
 
				+static void remapVertices(void* destination, const void* vertices, size_t vertex_count, size_t vertex_size, const unsigned int* remap)
			
 
				+{
			
 
				+	size_t block_size = BlockSize == 0 ? vertex_size : BlockSize;
			
 
				+	assert(block_size == vertex_size);
			
 
				+
			
 
				+	for (size_t i = 0; i < vertex_count; ++i)
			
 
				+		if (remap[i] != ~0u)
			
 
				+		{
			
 
				+			assert(remap[i] < vertex_count);
			
 
				+			memcpy(static_cast<unsigned char*>(destination) + remap[i] * block_size, static_cast<const unsigned char*>(vertices) + i * block_size, block_size);
			
 
				+		}
			
 
				 }
			
 
				 
			
 
				 } // namespace meshopt
			
@@ -288,6 +304,8 @@ size_t meshopt_generateVertexRemapMulti(unsigned int* destination, const unsigne
 
				 
			
 
				 void meshopt_remapVertexBuffer(void* destination, const void* vertices, size_t vertex_count, size_t vertex_size, const unsigned int* remap)
			
 
				 {
			
 
				+	using namespace meshopt;
			
 
				+
			
 
				 	assert(vertex_size > 0 && vertex_size <= 256);
			
 
				 
			
 
				 	meshopt_Allocator allocator;
			
@@ -300,14 +318,23 @@ void meshopt_remapVertexBuffer(void* destination, const void* vertices, size_t v
 
				 		vertices = vertices_copy;
			
 
				 	}
			
 
				 
			
 
				-	for (size_t i = 0; i < vertex_count; ++i)
			
 
				+	// specialize the loop for common vertex sizes to ensure memcpy is compiled as an inlined intrinsic
			
 
				+	switch (vertex_size)
			
 
				 	{
			
 
				-		if (remap[i] != ~0u)
			
 
				-		{
			
 
				-			assert(remap[i] < vertex_count);
			
 
				+	case 4:
			
 
				+		return remapVertices<4>(destination, vertices, vertex_count, vertex_size, remap);
			
 
				 
			
 
				-			memcpy(static_cast<unsigned char*>(destination) + remap[i] * vertex_size, static_cast<const unsigned char*>(vertices) + i * vertex_size, vertex_size);
			
 
				-		}
			
 
				+	case 8:
			
 
				+		return remapVertices<8>(destination, vertices, vertex_count, vertex_size, remap);
			
 
				+
			
 
				+	case 12:
			
 
				+		return remapVertices<12>(destination, vertices, vertex_count, vertex_size, remap);
			
 
				+
			
 
				+	case 16:
			
 
				+		return remapVertices<16>(destination, vertices, vertex_count, vertex_size, remap);
			
 
				+
			
 
				+	default:
			
 
				+		return remapVertices<0>(destination, vertices, vertex_count, vertex_size, remap);
			
 
				 	}
			
 
				 }
			
 
				 
			
--- a/3rdparty/meshoptimizer/src/meshoptimizer.h
+++ b/3rdparty/meshoptimizer/src/meshoptimizer.h
@@ -347,6 +347,17 @@ enum
 
				  */
			
 
				 MESHOPTIMIZER_API size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, unsigned int options, float* result_error);
			
 
				 
			
 
				+/**
			
 
				+ * Experimental: Mesh simplifier with attribute metric
			
 
				+ * The algorithm ehnahces meshopt_simplify by incorporating attribute values into the error metric used to prioritize simplification order; see meshopt_simplify documentation for details.
			
 
				+ * Note that the number of attributes affects memory requirements and running time; this algorithm requires ~1.5x more memory and time compared to meshopt_simplify when using 4 scalar attributes.
			
 
				+ *
			
 
				+ * vertex_attributes should have attribute_count floats for each vertex
			
 
				+ * attribute_weights should have attribute_count floats in total; the weights determine relative priority of attributes between each other and wrt position. The recommended weight range is [1e-3..1e-1], assuming attribute data is in [0..1] range.
			
 
				+ * TODO target_error/result_error currently use combined distance+attribute error; this may change in the future
			
 
				+ */
			
 
				+MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_simplifyWithAttributes(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, const float* vertex_attributes, size_t vertex_attributes_stride, const float* attribute_weights, size_t attribute_count, size_t target_index_count, float target_error, unsigned int options, float* result_error);
			
 
				+
			
 
				 /**
			
 
				  * Experimental: Mesh simplifier (sloppy)
			
 
				  * Reduces the number of triangles in the mesh, sacrificing mesh appearance for simplification performance
			
@@ -627,6 +638,8 @@ inline int meshopt_decodeIndexSequence(T* destination, size_t index_count, const
 
				 template <typename T>
			
 
				 inline size_t meshopt_simplify(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, unsigned int options = 0, float* result_error = 0);
			
 
				 template <typename T>
			
 
				+inline size_t meshopt_simplifyWithAttributes(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, const float* vertex_attributes, size_t vertex_attributes_stride, const float* attribute_weights, size_t attribute_count, size_t target_index_count, float target_error, unsigned int options = 0, float* result_error = 0);
			
 
				+template <typename T>
			
 
				 inline size_t meshopt_simplifySloppy(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, float* result_error = 0);
			
 
				 template <typename T>
			
 
				 inline size_t meshopt_stripify(T* destination, const T* indices, size_t index_count, size_t vertex_count, T restart_index);
			
@@ -751,6 +764,13 @@ public:
 
				 		return result;
			
 
				 	}
			
 
				 
			
 
				+	void deallocate(void* ptr)
			
 
				+	{
			
 
				+		assert(count > 0 && blocks[count - 1] == ptr);
			
 
				+		Storage::deallocate(ptr);
			
 
				+		count--;
			
 
				+	}
			
 
				+
			
 
				 private:
			
 
				 	void* blocks[24];
			
 
				 	size_t count;
			
@@ -968,6 +988,15 @@ inline size_t meshopt_simplify(T* destination, const T* indices, size_t index_co
 
				 	return meshopt_simplify(out.data, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride, target_index_count, target_error, options, result_error);
			
 
				 }
			
 
				 
			
 
				+template <typename T>
			
 
				+inline size_t meshopt_simplifyWithAttributes(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, const float* vertex_attributes, size_t vertex_attributes_stride, const float* attribute_weights, size_t attribute_count, size_t target_index_count, float target_error, unsigned int options, float* result_error)
			
 
				+{
			
 
				+    meshopt_IndexAdapter<T> in(0, indices, index_count);
			
 
				+    meshopt_IndexAdapter<T> out(destination, 0, index_count);
			
 
				+
			
 
				+    return meshopt_simplifyWithAttributes(out.data, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride, vertex_attributes, vertex_attributes_stride, attribute_weights, attribute_count, target_index_count, target_error, options, result_error);
			
 
				+}
			
 
				+
			
 
				 template <typename T>
			
 
				 inline size_t meshopt_simplifySloppy(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, float* result_error)
			
 
				 {
			
--- a/3rdparty/meshoptimizer/src/simplifier.cpp
+++ b/3rdparty/meshoptimizer/src/simplifier.cpp
@@ -26,6 +26,7 @@
 
				 // Peter Lindstrom. Out-of-Core Simplification of Large Polygonal Models. 2000
			
 
				 // Matthias Teschner, Bruno Heidelberger, Matthias Mueller, Danat Pomeranets, Markus Gross. Optimized Spatial Hashing for Collision Detection of Deformable Objects. 2003
			
 
				 // Peter Van Sandt, Yannis Chronis, Jignesh M. Patel. Efficiently Searching In-Memory Sorted Arrays: Revenge of the Interpolation Search? 2019
			
 
				+// Hugues Hoppe. New Quadric Metric for Simplifying Meshes with Appearance Attributes. 1999
			
 
				 namespace meshopt
			
 
				 {
			
 
				 
			
@@ -37,31 +38,31 @@ struct EdgeAdjacency
 
				 		unsigned int prev;
			
 
				 	};
			
 
				 
			
 
				-	unsigned int* counts;
			
 
				 	unsigned int* offsets;
			
 
				 	Edge* data;
			
 
				 };
			
 
				 
			
 
				 static void prepareEdgeAdjacency(EdgeAdjacency& adjacency, size_t index_count, size_t vertex_count, meshopt_Allocator& allocator)
			
 
				 {
			
 
				-	adjacency.counts = allocator.allocate<unsigned int>(vertex_count);
			
 
				-	adjacency.offsets = allocator.allocate<unsigned int>(vertex_count);
			
 
				+	adjacency.offsets = allocator.allocate<unsigned int>(vertex_count + 1);
			
 
				 	adjacency.data = allocator.allocate<EdgeAdjacency::Edge>(index_count);
			
 
				 }
			
 
				 
			
 
				 static void updateEdgeAdjacency(EdgeAdjacency& adjacency, const unsigned int* indices, size_t index_count, size_t vertex_count, const unsigned int* remap)
			
 
				 {
			
 
				 	size_t face_count = index_count / 3;
			
 
				+	unsigned int* offsets = adjacency.offsets + 1;
			
 
				+	EdgeAdjacency::Edge* data = adjacency.data;
			
 
				 
			
 
				 	// fill edge counts
			
 
				-	memset(adjacency.counts, 0, vertex_count * sizeof(unsigned int));
			
 
				+	memset(offsets, 0, vertex_count * sizeof(unsigned int));
			
 
				 
			
 
				 	for (size_t i = 0; i < index_count; ++i)
			
 
				 	{
			
 
				 		unsigned int v = remap ? remap[indices[i]] : indices[i];
			
 
				 		assert(v < vertex_count);
			
 
				 
			
 
				-		adjacency.counts[v]++;
			
 
				+		offsets[v]++;
			
 
				 	}
			
 
				 
			
 
				 	// fill offset table
			
@@ -69,8 +70,9 @@ static void updateEdgeAdjacency(EdgeAdjacency& adjacency, const unsigned int* in
 
				 
			
 
				 	for (size_t i = 0; i < vertex_count; ++i)
			
 
				 	{
			
 
				-		adjacency.offsets[i] = offset;
			
 
				-		offset += adjacency.counts[i];
			
 
				+		unsigned int count = offsets[i];
			
 
				+		offsets[i] = offset;
			
 
				+		offset += count;
			
 
				 	}
			
 
				 
			
 
				 	assert(offset == index_count);
			
@@ -87,26 +89,22 @@ static void updateEdgeAdjacency(EdgeAdjacency& adjacency, const unsigned int* in
 
				 			c = remap[c];
			
 
				 		}
			
 
				 
			
 
				-		adjacency.data[adjacency.offsets[a]].next = b;
			
 
				-		adjacency.data[adjacency.offsets[a]].prev = c;
			
 
				-		adjacency.offsets[a]++;
			
 
				+		data[offsets[a]].next = b;
			
 
				+		data[offsets[a]].prev = c;
			
 
				+		offsets[a]++;
			
 
				 
			
 
				-		adjacency.data[adjacency.offsets[b]].next = c;
			
 
				-		adjacency.data[adjacency.offsets[b]].prev = a;
			
 
				-		adjacency.offsets[b]++;
			
 
				+		data[offsets[b]].next = c;
			
 
				+		data[offsets[b]].prev = a;
			
 
				+		offsets[b]++;
			
 
				 
			
 
				-		adjacency.data[adjacency.offsets[c]].next = a;
			
 
				-		adjacency.data[adjacency.offsets[c]].prev = b;
			
 
				-		adjacency.offsets[c]++;
			
 
				+		data[offsets[c]].next = a;
			
 
				+		data[offsets[c]].prev = b;
			
 
				+		offsets[c]++;
			
 
				 	}
			
 
				 
			
 
				-	// fix offsets that have been disturbed by the previous pass
			
 
				-	for (size_t i = 0; i < vertex_count; ++i)
			
 
				-	{
			
 
				-		assert(adjacency.offsets[i] >= adjacency.counts[i]);
			
 
				-
			
 
				-		adjacency.offsets[i] -= adjacency.counts[i];
			
 
				-	}
			
 
				+	// finalize offsets
			
 
				+	adjacency.offsets[0] = 0;
			
 
				+	assert(adjacency.offsets[vertex_count] == index_count);
			
 
				 }
			
 
				 
			
 
				 struct PositionHasher
			
@@ -203,6 +201,8 @@ static void buildPositionRemap(unsigned int* remap, unsigned int* wedge, const f
 
				 			wedge[i] = wedge[r];
			
 
				 			wedge[r] = unsigned(i);
			
 
				 		}
			
 
				+
			
 
				+	allocator.deallocate(table);
			
 
				 }
			
 
				 
			
 
				 enum VertexKind
			
@@ -242,7 +242,7 @@ const unsigned char kHasOpposite[Kind_Count][Kind_Count] = {
 
				 
			
 
				 static bool hasEdge(const EdgeAdjacency& adjacency, unsigned int a, unsigned int b)
			
 
				 {
			
 
				-	unsigned int count = adjacency.counts[a];
			
 
				+	unsigned int count = adjacency.offsets[a + 1] - adjacency.offsets[a];
			
 
				 	const EdgeAdjacency::Edge* edges = adjacency.data + adjacency.offsets[a];
			
 
				 
			
 
				 	for (size_t i = 0; i < count; ++i)
			
@@ -267,7 +267,7 @@ static void classifyVertices(unsigned char* result, unsigned int* loop, unsigned
 
				 	{
			
 
				 		unsigned int vertex = unsigned(i);
			
 
				 
			
 
				-		unsigned int count = adjacency.counts[vertex];
			
 
				+		unsigned int count = adjacency.offsets[vertex + 1] - adjacency.offsets[vertex];
			
 
				 		const EdgeAdjacency::Edge* edges = adjacency.data + adjacency.offsets[vertex];
			
 
				 
			
 
				 		for (size_t j = 0; j < count; ++j)
			
@@ -426,6 +426,23 @@ static float rescalePositions(Vector3* result, const float* vertex_positions_dat
 
				 	return extent;
			
 
				 }
			
 
				 
			
 
				+static void rescaleAttributes(float* result, const float* vertex_attributes_data, size_t vertex_count, size_t vertex_attributes_stride, const float* attribute_weights, size_t attribute_count)
			
 
				+{
			
 
				+	size_t vertex_attributes_stride_float = vertex_attributes_stride / sizeof(float);
			
 
				+
			
 
				+	for (size_t i = 0; i < vertex_count; ++i)
			
 
				+	{
			
 
				+		for (size_t k = 0; k < attribute_count; ++k)
			
 
				+		{
			
 
				+			float a = vertex_attributes_data[i * vertex_attributes_stride_float + k];
			
 
				+
			
 
				+			result[i * attribute_count + k] = a * attribute_weights[k];
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static const size_t kMaxAttributes = 16;
			
 
				+
			
 
				 struct Quadric
			
 
				 {
			
 
				 	float a00, a11, a22;
			
@@ -434,6 +451,11 @@ struct Quadric
 
				 	float w;
			
 
				 };
			
 
				 
			
 
				+struct QuadricGrad
			
 
				+{
			
 
				+	float gx, gy, gz, gw;
			
 
				+};
			
 
				+
			
 
				 struct Collapse
			
 
				 {
			
 
				 	unsigned int v0;
			
@@ -476,6 +498,17 @@ static void quadricAdd(Quadric& Q, const Quadric& R)
 
				 	Q.w += R.w;
			
 
				 }
			
 
				 
			
 
				+static void quadricAdd(QuadricGrad* G, const QuadricGrad* R, size_t attribute_count)
			
 
				+{
			
 
				+	for (size_t k = 0; k < attribute_count; ++k)
			
 
				+	{
			
 
				+		G[k].gx += R[k].gx;
			
 
				+		G[k].gy += R[k].gy;
			
 
				+		G[k].gz += R[k].gz;
			
 
				+		G[k].gw += R[k].gw;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				 static float quadricError(const Quadric& Q, const Vector3& v)
			
 
				 {
			
 
				 	float rx = Q.b0;
			
@@ -504,6 +537,45 @@ static float quadricError(const Quadric& Q, const Vector3& v)
 
				 	return fabsf(r) * s;
			
 
				 }
			
 
				 
			
 
				+static float quadricError(const Quadric& Q, const QuadricGrad* G, size_t attribute_count, const Vector3& v, const float* va)
			
 
				+{
			
 
				+	float rx = Q.b0;
			
 
				+	float ry = Q.b1;
			
 
				+	float rz = Q.b2;
			
 
				+
			
 
				+	rx += Q.a10 * v.y;
			
 
				+	ry += Q.a21 * v.z;
			
 
				+	rz += Q.a20 * v.x;
			
 
				+
			
 
				+	rx *= 2;
			
 
				+	ry *= 2;
			
 
				+	rz *= 2;
			
 
				+
			
 
				+	rx += Q.a00 * v.x;
			
 
				+	ry += Q.a11 * v.y;
			
 
				+	rz += Q.a22 * v.z;
			
 
				+
			
 
				+	float r = Q.c;
			
 
				+	r += rx * v.x;
			
 
				+	r += ry * v.y;
			
 
				+	r += rz * v.z;
			
 
				+
			
 
				+	// see quadricFromAttributes for general derivation; here we need to add the parts of (eval(pos) - attr)^2 that depend on attr
			
 
				+	for (size_t k = 0; k < attribute_count; ++k)
			
 
				+	{
			
 
				+		float a = va[k];
			
 
				+		float g = v.x * G[k].gx + v.y * G[k].gy + v.z * G[k].gz + G[k].gw;
			
 
				+
			
 
				+		r += a * a * Q.w;
			
 
				+		r -= 2 * a * g;
			
 
				+	}
			
 
				+
			
 
				+	// TODO: weight normalization is breaking attribute error somehow
			
 
				+	float s = 1;// Q.w == 0.f ? 0.f : 1.f / Q.w;
			
 
				+
			
 
				+	return fabsf(r) * s;
			
 
				+}
			
 
				+
			
 
				 static void quadricFromPlane(Quadric& Q, float a, float b, float c, float d, float w)
			
 
				 {
			
 
				 	float aw = a * w;
			
@@ -574,6 +646,82 @@ static void quadricFromTriangleEdge(Quadric& Q, const Vector3& p0, const Vector3
 
				 	quadricFromPlane(Q, normal.x, normal.y, normal.z, -distance, length * weight);
			
 
				 }
			
 
				 
			
 
				+static void quadricFromAttributes(Quadric& Q, QuadricGrad* G, const Vector3& p0, const Vector3& p1, const Vector3& p2, const float* va0, const float* va1, const float* va2, size_t attribute_count)
			
 
				+{
			
 
				+	// for each attribute we want to encode the following function into the quadric:
			
 
				+	// (eval(pos) - attr)^2
			
 
				+	// where eval(pos) interpolates attribute across the triangle like so:
			
 
				+	// eval(pos) = pos.x * gx + pos.y * gy + pos.z * gz + gw
			
 
				+	// where gx/gy/gz/gw are gradients
			
 
				+	Vector3 p10 = {p1.x - p0.x, p1.y - p0.y, p1.z - p0.z};
			
 
				+	Vector3 p20 = {p2.x - p0.x, p2.y - p0.y, p2.z - p0.z};
			
 
				+
			
 
				+	// weight is scaled linearly with edge length
			
 
				+	Vector3 normal = {p10.y * p20.z - p10.z * p20.y, p10.z * p20.x - p10.x * p20.z, p10.x * p20.y - p10.y * p20.x};
			
 
				+	float area = sqrtf(normal.x * normal.x + normal.y * normal.y + normal.z * normal.z);
			
 
				+	float w = sqrtf(area); // TODO this needs more experimentation
			
 
				+
			
 
				+	// we compute gradients using barycentric coordinates; barycentric coordinates can be computed as follows:
			
 
				+	// v = (d11 * d20 - d01 * d21) / denom
			
 
				+	// w = (d00 * d21 - d01 * d20) / denom
			
 
				+	// u = 1 - v - w
			
 
				+	// here v0, v1 are triangle edge vectors, v2 is a vector from point to triangle corner, and dij = dot(vi, vj)
			
 
				+	const Vector3& v0 = p10;
			
 
				+	const Vector3& v1 = p20;
			
 
				+	float d00 = v0.x * v0.x + v0.y * v0.y + v0.z * v0.z;
			
 
				+	float d01 = v0.x * v1.x + v0.y * v1.y + v0.z * v1.z;
			
 
				+	float d11 = v1.x * v1.x + v1.y * v1.y + v1.z * v1.z;
			
 
				+	float denom = d00 * d11 - d01 * d01;
			
 
				+	float denomr = denom == 0 ? 0.f : 1.f / denom;
			
 
				+
			
 
				+	// precompute gradient factors
			
 
				+	// these are derived by directly computing derivative of eval(pos) = a0 * u + a1 * v + a2 * w and factoring out common factors that are shared between attributes
			
 
				+	float gx1 = (d11 * v0.x - d01 * v1.x) * denomr;
			
 
				+	float gx2 = (d00 * v1.x - d01 * v0.x) * denomr;
			
 
				+	float gy1 = (d11 * v0.y - d01 * v1.y) * denomr;
			
 
				+	float gy2 = (d00 * v1.y - d01 * v0.y) * denomr;
			
 
				+	float gz1 = (d11 * v0.z - d01 * v1.z) * denomr;
			
 
				+	float gz2 = (d00 * v1.z - d01 * v0.z) * denomr;
			
 
				+
			
 
				+	memset(&Q, 0, sizeof(Quadric));
			
 
				+
			
 
				+	Q.w = w;
			
 
				+
			
 
				+	for (size_t k = 0; k < attribute_count; ++k)
			
 
				+	{
			
 
				+		float a0 = va0[k], a1 = va1[k], a2 = va2[k];
			
 
				+
			
 
				+		// compute gradient of eval(pos) for x/y/z/w
			
 
				+		// the formulas below are obtained by directly computing derivative of eval(pos) = a0 * u + a1 * v + a2 * w
			
 
				+		float gx = gx1 * (a1 - a0) + gx2 * (a2 - a0);
			
 
				+		float gy = gy1 * (a1 - a0) + gy2 * (a2 - a0);
			
 
				+		float gz = gz1 * (a1 - a0) + gz2 * (a2 - a0);
			
 
				+		float gw = a0 - p0.x * gx - p0.y * gy - p0.z * gz;
			
 
				+
			
 
				+		// quadric encodes (eval(pos)-attr)^2; this means that the resulting expansion needs to compute, for example, pos.x * pos.y * K
			
 
				+		// since quadrics already encode factors for pos.x * pos.y, we can accumulate almost everything in basic quadric fields
			
 
				+		Q.a00 += w * (gx * gx);
			
 
				+		Q.a11 += w * (gy * gy);
			
 
				+		Q.a22 += w * (gz * gz);
			
 
				+
			
 
				+		Q.a10 += w * (gy * gx);
			
 
				+		Q.a20 += w * (gz * gx);
			
 
				+		Q.a21 += w * (gz * gy);
			
 
				+
			
 
				+		Q.b0 += w * (gx * gw);
			
 
				+		Q.b1 += w * (gy * gw);
			
 
				+		Q.b2 += w * (gz * gw);
			
 
				+
			
 
				+		Q.c += w * (gw * gw);
			
 
				+
			
 
				+		// the only remaining sum components are ones that depend on attr; these will be addded during error evaluation, see quadricError
			
 
				+		G[k].gx = w * gx;
			
 
				+		G[k].gy = w * gy;
			
 
				+		G[k].gz = w * gz;
			
 
				+		G[k].gw = w * gw;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				 static void fillFaceQuadrics(Quadric* vertex_quadrics, const unsigned int* indices, size_t index_count, const Vector3* vertex_positions, const unsigned int* remap)
			
 
				 {
			
 
				 	for (size_t i = 0; i < index_count; i += 3)
			
@@ -639,6 +787,29 @@ static void fillEdgeQuadrics(Quadric* vertex_quadrics, const unsigned int* indic
 
				 	}
			
 
				 }
			
 
				 
			
 
				+static void fillAttributeQuadrics(Quadric* attribute_quadrics, QuadricGrad* attribute_gradients, const unsigned int* indices, size_t index_count, const Vector3* vertex_positions, const float* vertex_attributes, size_t attribute_count, const unsigned int* remap)
			
 
				+{
			
 
				+	for (size_t i = 0; i < index_count; i += 3)
			
 
				+	{
			
 
				+		unsigned int i0 = indices[i + 0];
			
 
				+		unsigned int i1 = indices[i + 1];
			
 
				+		unsigned int i2 = indices[i + 2];
			
 
				+
			
 
				+		Quadric QA;
			
 
				+		QuadricGrad G[kMaxAttributes];
			
 
				+		quadricFromAttributes(QA, G, vertex_positions[i0], vertex_positions[i1], vertex_positions[i2], &vertex_attributes[i0 * attribute_count], &vertex_attributes[i1 * attribute_count], &vertex_attributes[i2 * attribute_count], attribute_count);
			
 
				+
			
 
				+		// TODO: This blends together attribute weights across attribute discontinuities, which is probably not a great idea
			
 
				+		quadricAdd(attribute_quadrics[remap[i0]], QA);
			
 
				+		quadricAdd(attribute_quadrics[remap[i1]], QA);
			
 
				+		quadricAdd(attribute_quadrics[remap[i2]], QA);
			
 
				+
			
 
				+		quadricAdd(&attribute_gradients[remap[i0] * attribute_count], G, attribute_count);
			
 
				+		quadricAdd(&attribute_gradients[remap[i1] * attribute_count], G, attribute_count);
			
 
				+		quadricAdd(&attribute_gradients[remap[i2] * attribute_count], G, attribute_count);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				 // does triangle ABC flip when C is replaced with D?
			
 
				 static bool hasTriangleFlip(const Vector3& a, const Vector3& b, const Vector3& c, const Vector3& d)
			
 
				 {
			
@@ -649,7 +820,7 @@ static bool hasTriangleFlip(const Vector3& a, const Vector3& b, const Vector3& c
 
				 	Vector3 nbc = {eb.y * ec.z - eb.z * ec.y, eb.z * ec.x - eb.x * ec.z, eb.x * ec.y - eb.y * ec.x};
			
 
				 	Vector3 nbd = {eb.y * ed.z - eb.z * ed.y, eb.z * ed.x - eb.x * ed.z, eb.x * ed.y - eb.y * ed.x};
			
 
				 
			
 
				-	return nbc.x * nbd.x + nbc.y * nbd.y + nbc.z * nbd.z < 0;
			
 
				+	return nbc.x * nbd.x + nbc.y * nbd.y + nbc.z * nbd.z <= 0;
			
 
				 }
			
 
				 
			
 
				 static bool hasTriangleFlips(const EdgeAdjacency& adjacency, const Vector3* vertex_positions, const unsigned int* collapse_remap, unsigned int i0, unsigned int i1)
			
@@ -661,16 +832,15 @@ static bool hasTriangleFlips(const EdgeAdjacency& adjacency, const Vector3* vert
 
				 	const Vector3& v1 = vertex_positions[i1];
			
 
				 
			
 
				 	const EdgeAdjacency::Edge* edges = &adjacency.data[adjacency.offsets[i0]];
			
 
				-	size_t count = adjacency.counts[i0];
			
 
				+	size_t count = adjacency.offsets[i0 + 1] - adjacency.offsets[i0];
			
 
				 
			
 
				 	for (size_t i = 0; i < count; ++i)
			
 
				 	{
			
 
				 		unsigned int a = collapse_remap[edges[i].next];
			
 
				 		unsigned int b = collapse_remap[edges[i].prev];
			
 
				 
			
 
				-		// skip triangles that get collapsed
			
 
				-		// note: this is mathematically redundant as if either of these is true, the dot product in hasTriangleFlip should be 0
			
 
				-		if (a == i1 || b == i1)
			
 
				+		// skip triangles that will get collapsed by i0->i1 collapse or already got collapsed previously
			
 
				+		if (a == i1 || b == i1 || a == b)
			
 
				 			continue;
			
 
				 
			
 
				 		// early-out when at least one triangle flips due to a collapse
			
@@ -681,7 +851,25 @@ static bool hasTriangleFlips(const EdgeAdjacency& adjacency, const Vector3* vert
 
				 	return false;
			
 
				 }
			
 
				 
			
 
				-static size_t pickEdgeCollapses(Collapse* collapses, const unsigned int* indices, size_t index_count, const unsigned int* remap, const unsigned char* vertex_kind, const unsigned int* loop)
			
 
				+static size_t boundEdgeCollapses(const EdgeAdjacency& adjacency, size_t vertex_count, size_t index_count, unsigned char* vertex_kind)
			
 
				+{
			
 
				+	size_t dual_count = 0;
			
 
				+
			
 
				+	for (size_t i = 0; i < vertex_count; ++i)
			
 
				+	{
			
 
				+		unsigned char k = vertex_kind[i];
			
 
				+		unsigned int e = adjacency.offsets[i + 1] - adjacency.offsets[i];
			
 
				+
			
 
				+		dual_count += (k == Kind_Manifold || k == Kind_Seam) ? e : 0;
			
 
				+	}
			
 
				+
			
 
				+	assert(dual_count <= index_count);
			
 
				+
			
 
				+	// pad capacity by 3 so that we can check for overflow once per triangle instead of once per edge
			
 
				+	return (index_count - dual_count / 2) + 3;
			
 
				+}
			
 
				+
			
 
				+static size_t pickEdgeCollapses(Collapse* collapses, size_t collapse_capacity, const unsigned int* indices, size_t index_count, const unsigned int* remap, const unsigned char* vertex_kind, const unsigned int* loop)
			
 
				 {
			
 
				 	size_t collapse_count = 0;
			
 
				 
			
@@ -689,6 +877,10 @@ static size_t pickEdgeCollapses(Collapse* collapses, const unsigned int* indices
 
				 	{
			
 
				 		static const int next[3] = {1, 2, 0};
			
 
				 
			
 
				+		// this should never happen as boundEdgeCollapses should give an upper bound for the collapse count, but in an unlikely event it does we can just drop extra collapses
			
 
				+		if (collapse_count + 3 > collapse_capacity)
			
 
				+			break;
			
 
				+
			
 
				 		for (int e = 0; e < 3; ++e)
			
 
				 		{
			
 
				 			unsigned int i0 = indices[i + e];
			
@@ -739,7 +931,7 @@ static size_t pickEdgeCollapses(Collapse* collapses, const unsigned int* indices
 
				 	return collapse_count;
			
 
				 }
			
 
				 
			
 
				-static void rankEdgeCollapses(Collapse* collapses, size_t collapse_count, const Vector3* vertex_positions, const Quadric* vertex_quadrics, const unsigned int* remap)
			
 
				+static void rankEdgeCollapses(Collapse* collapses, size_t collapse_count, const Vector3* vertex_positions, const float* vertex_attributes, const Quadric* vertex_quadrics, const Quadric* attribute_quadrics, const QuadricGrad* attribute_gradients, size_t attribute_count, const unsigned int* remap)
			
 
				 {
			
 
				 	for (size_t i = 0; i < collapse_count; ++i)
			
 
				 	{
			
@@ -753,11 +945,14 @@ static void rankEdgeCollapses(Collapse* collapses, size_t collapse_count, const
 
				 		unsigned int j0 = c.bidi ? i1 : i0;
			
 
				 		unsigned int j1 = c.bidi ? i0 : i1;
			
 
				 
			
 
				-		const Quadric& qi = vertex_quadrics[remap[i0]];
			
 
				-		const Quadric& qj = vertex_quadrics[remap[j0]];
			
 
				+		float ei = quadricError(vertex_quadrics[remap[i0]], vertex_positions[i1]);
			
 
				+		float ej = quadricError(vertex_quadrics[remap[j0]], vertex_positions[j1]);
			
 
				 
			
 
				-		float ei = quadricError(qi, vertex_positions[i1]);
			
 
				-		float ej = quadricError(qj, vertex_positions[j1]);
			
 
				+		if (attribute_count)
			
 
				+		{
			
 
				+			ei += quadricError(attribute_quadrics[remap[i0]], &attribute_gradients[remap[i0] * attribute_count], attribute_count, vertex_positions[i1], &vertex_attributes[i1 * attribute_count]);
			
 
				+			ej += quadricError(attribute_quadrics[remap[j0]], &attribute_gradients[remap[j0] * attribute_count], attribute_count, vertex_positions[j1], &vertex_attributes[j1 * attribute_count]);
			
 
				+		}
			
 
				 
			
 
				 		// pick edge direction with minimal error
			
 
				 		c.v0 = ei <= ej ? i0 : j0;
			
@@ -766,61 +961,6 @@ static void rankEdgeCollapses(Collapse* collapses, size_t collapse_count, const
 
				 	}
			
 
				 }
			
 
				 
			
 
				-#if TRACE > 1
			
 
				-static void dumpEdgeCollapses(const Collapse* collapses, size_t collapse_count, const unsigned char* vertex_kind)
			
 
				-{
			
 
				-	size_t ckinds[Kind_Count][Kind_Count] = {};
			
 
				-	float cerrors[Kind_Count][Kind_Count] = {};
			
 
				-
			
 
				-	for (int k0 = 0; k0 < Kind_Count; ++k0)
			
 
				-		for (int k1 = 0; k1 < Kind_Count; ++k1)
			
 
				-			cerrors[k0][k1] = FLT_MAX;
			
 
				-
			
 
				-	for (size_t i = 0; i < collapse_count; ++i)
			
 
				-	{
			
 
				-		unsigned int i0 = collapses[i].v0;
			
 
				-		unsigned int i1 = collapses[i].v1;
			
 
				-
			
 
				-		unsigned char k0 = vertex_kind[i0];
			
 
				-		unsigned char k1 = vertex_kind[i1];
			
 
				-
			
 
				-		ckinds[k0][k1]++;
			
 
				-		cerrors[k0][k1] = (collapses[i].error < cerrors[k0][k1]) ? collapses[i].error : cerrors[k0][k1];
			
 
				-	}
			
 
				-
			
 
				-	for (int k0 = 0; k0 < Kind_Count; ++k0)
			
 
				-		for (int k1 = 0; k1 < Kind_Count; ++k1)
			
 
				-			if (ckinds[k0][k1])
			
 
				-				printf("collapses %d -> %d: %d, min error %e\n", k0, k1, int(ckinds[k0][k1]), ckinds[k0][k1] ? sqrtf(cerrors[k0][k1]) : 0.f);
			
 
				-}
			
 
				-
			
 
				-static void dumpLockedCollapses(const unsigned int* indices, size_t index_count, const unsigned char* vertex_kind)
			
 
				-{
			
 
				-	size_t locked_collapses[Kind_Count][Kind_Count] = {};
			
 
				-
			
 
				-	for (size_t i = 0; i < index_count; i += 3)
			
 
				-	{
			
 
				-		static const int next[3] = {1, 2, 0};
			
 
				-
			
 
				-		for (int e = 0; e < 3; ++e)
			
 
				-		{
			
 
				-			unsigned int i0 = indices[i + e];
			
 
				-			unsigned int i1 = indices[i + next[e]];
			
 
				-
			
 
				-			unsigned char k0 = vertex_kind[i0];
			
 
				-			unsigned char k1 = vertex_kind[i1];
			
 
				-
			
 
				-			locked_collapses[k0][k1] += !kCanCollapse[k0][k1] && !kCanCollapse[k1][k0];
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	for (int k0 = 0; k0 < Kind_Count; ++k0)
			
 
				-		for (int k1 = 0; k1 < Kind_Count; ++k1)
			
 
				-			if (locked_collapses[k0][k1])
			
 
				-				printf("locked collapses %d -> %d: %d\n", k0, k1, int(locked_collapses[k0][k1]));
			
 
				-}
			
 
				-#endif
			
 
				-
			
 
				 static void sortEdgeCollapses(unsigned int* sort_order, const Collapse* collapses, size_t collapse_count)
			
 
				 {
			
 
				 	const int sort_bits = 11;
			
@@ -859,7 +999,7 @@ static void sortEdgeCollapses(unsigned int* sort_order, const Collapse* collapse
 
				 	}
			
 
				 }
			
 
				 
			
 
				-static size_t performEdgeCollapses(unsigned int* collapse_remap, unsigned char* collapse_locked, Quadric* vertex_quadrics, const Collapse* collapses, size_t collapse_count, const unsigned int* collapse_order, const unsigned int* remap, const unsigned int* wedge, const unsigned char* vertex_kind, const Vector3* vertex_positions, const EdgeAdjacency& adjacency, size_t triangle_collapse_goal, float error_limit, float& result_error)
			
 
				+static size_t performEdgeCollapses(unsigned int* collapse_remap, unsigned char* collapse_locked, Quadric* vertex_quadrics, Quadric* attribute_quadrics, QuadricGrad* attribute_gradients, size_t attribute_count, const Collapse* collapses, size_t collapse_count, const unsigned int* collapse_order, const unsigned int* remap, const unsigned int* wedge, const unsigned char* vertex_kind, const Vector3* vertex_positions, const EdgeAdjacency& adjacency, size_t triangle_collapse_goal, float error_limit, float& result_error)
			
 
				 {
			
 
				 	size_t edge_collapses = 0;
			
 
				 	size_t triangle_collapses = 0;
			
@@ -922,6 +1062,12 @@ static size_t performEdgeCollapses(unsigned int* collapse_remap, unsigned char*
 
				 
			
 
				 		quadricAdd(vertex_quadrics[r1], vertex_quadrics[r0]);
			
 
				 
			
 
				+		if (attribute_count)
			
 
				+		{
			
 
				+			quadricAdd(attribute_quadrics[r1], attribute_quadrics[r0]);
			
 
				+			quadricAdd(&attribute_gradients[r1 * attribute_count], &attribute_gradients[r0 * attribute_count], attribute_count);
			
 
				+		}
			
 
				+
			
 
				 		if (vertex_kind[i0] == Kind_Complex)
			
 
				 		{
			
 
				 			unsigned int v = i0;
			
@@ -1277,7 +1423,7 @@ MESHOPTIMIZER_API unsigned int* meshopt_simplifyDebugLoop = 0;
 
				 MESHOPTIMIZER_API unsigned int* meshopt_simplifyDebugLoopBack = 0;
			
 
				 #endif
			
 
				 
			
 
				-size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, unsigned int options, float* out_result_error)
			
 
				+size_t meshopt_simplifyEdge(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, const float* vertex_attributes_data, size_t vertex_attributes_stride, const float* attribute_weights, size_t attribute_count, size_t target_index_count, float target_error, unsigned int options, float* out_result_error)
			
 
				 {
			
 
				 	using namespace meshopt;
			
 
				 
			
@@ -1286,6 +1432,9 @@ size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices,
 
				 	assert(vertex_positions_stride % sizeof(float) == 0);
			
 
				 	assert(target_index_count <= index_count);
			
 
				 	assert((options & ~(meshopt_SimplifyLockBorder)) == 0);
			
 
				+	assert(vertex_attributes_stride >= attribute_count * sizeof(float) && vertex_attributes_stride <= 256);
			
 
				+	assert(vertex_attributes_stride % sizeof(float) == 0);
			
 
				+	assert(attribute_count <= kMaxAttributes);
			
 
				 
			
 
				 	meshopt_Allocator allocator;
			
 
				 
			
@@ -1325,12 +1474,35 @@ size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices,
 
				 	Vector3* vertex_positions = allocator.allocate<Vector3>(vertex_count);
			
 
				 	rescalePositions(vertex_positions, vertex_positions_data, vertex_count, vertex_positions_stride);
			
 
				 
			
 
				+	float* vertex_attributes = NULL;
			
 
				+
			
 
				+	if (attribute_count)
			
 
				+	{
			
 
				+		vertex_attributes = allocator.allocate<float>(vertex_count * attribute_count);
			
 
				+		rescaleAttributes(vertex_attributes, vertex_attributes_data, vertex_count, vertex_attributes_stride, attribute_weights, attribute_count);
			
 
				+	}
			
 
				+
			
 
				 	Quadric* vertex_quadrics = allocator.allocate<Quadric>(vertex_count);
			
 
				 	memset(vertex_quadrics, 0, vertex_count * sizeof(Quadric));
			
 
				 
			
 
				+	Quadric* attribute_quadrics = NULL;
			
 
				+	QuadricGrad* attribute_gradients = NULL;
			
 
				+
			
 
				+	if (attribute_count)
			
 
				+	{
			
 
				+		attribute_quadrics = allocator.allocate<Quadric>(vertex_count);
			
 
				+		memset(attribute_quadrics, 0, vertex_count * sizeof(Quadric));
			
 
				+
			
 
				+		attribute_gradients = allocator.allocate<QuadricGrad>(vertex_count * attribute_count);
			
 
				+		memset(attribute_gradients, 0, vertex_count * attribute_count * sizeof(QuadricGrad));
			
 
				+	}
			
 
				+
			
 
				 	fillFaceQuadrics(vertex_quadrics, indices, index_count, vertex_positions, remap);
			
 
				 	fillEdgeQuadrics(vertex_quadrics, indices, index_count, vertex_positions, remap, vertex_kind, loop, loopback);
			
 
				 
			
 
				+	if (attribute_count)
			
 
				+		fillAttributeQuadrics(attribute_quadrics, attribute_gradients, indices, index_count, vertex_positions, vertex_attributes, attribute_count, remap);
			
 
				+
			
 
				 	if (result != indices)
			
 
				 		memcpy(result, indices, index_count * sizeof(unsigned int));
			
 
				 
			
@@ -1338,8 +1510,10 @@ size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices,
 
				 	size_t pass_count = 0;
			
 
				 #endif
			
 
				 
			
 
				-	Collapse* edge_collapses = allocator.allocate<Collapse>(index_count);
			
 
				-	unsigned int* collapse_order = allocator.allocate<unsigned int>(index_count);
			
 
				+	size_t collapse_capacity = boundEdgeCollapses(adjacency, vertex_count, index_count, vertex_kind);
			
 
				+
			
 
				+	Collapse* edge_collapses = allocator.allocate<Collapse>(collapse_capacity);
			
 
				+	unsigned int* collapse_order = allocator.allocate<unsigned int>(collapse_capacity);
			
 
				 	unsigned int* collapse_remap = allocator.allocate<unsigned int>(vertex_count);
			
 
				 	unsigned char* collapse_locked = allocator.allocate<unsigned char>(vertex_count);
			
 
				 
			
@@ -1354,17 +1528,14 @@ size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices,
 
				 		// note: throughout the simplification process adjacency structure reflects welded topology for result-in-progress
			
 
				 		updateEdgeAdjacency(adjacency, result, result_count, vertex_count, remap);
			
 
				 
			
 
				-		size_t edge_collapse_count = pickEdgeCollapses(edge_collapses, result, result_count, remap, vertex_kind, loop);
			
 
				+		size_t edge_collapse_count = pickEdgeCollapses(edge_collapses, collapse_capacity, result, result_count, remap, vertex_kind, loop);
			
 
				+		assert(edge_collapse_count <= collapse_capacity);
			
 
				 
			
 
				 		// no edges can be collapsed any more due to topology restrictions
			
 
				 		if (edge_collapse_count == 0)
			
 
				 			break;
			
 
				 
			
 
				-		rankEdgeCollapses(edge_collapses, edge_collapse_count, vertex_positions, vertex_quadrics, remap);
			
 
				-
			
 
				-#if TRACE > 1
			
 
				-		dumpEdgeCollapses(edge_collapses, edge_collapse_count, vertex_kind);
			
 
				-#endif
			
 
				+		rankEdgeCollapses(edge_collapses, edge_collapse_count, vertex_positions, vertex_attributes, vertex_quadrics, attribute_quadrics, attribute_gradients, attribute_count, remap);
			
 
				 
			
 
				 		sortEdgeCollapses(collapse_order, edge_collapses, edge_collapse_count);
			
 
				 
			
@@ -1379,7 +1550,7 @@ size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices,
 
				 		printf("pass %d: ", int(pass_count++));
			
 
				 #endif
			
 
				 
			
 
				-		size_t collapses = performEdgeCollapses(collapse_remap, collapse_locked, vertex_quadrics, edge_collapses, edge_collapse_count, collapse_order, remap, wedge, vertex_kind, vertex_positions, adjacency, triangle_collapse_goal, error_limit, result_error);
			
 
				+		size_t collapses = performEdgeCollapses(collapse_remap, collapse_locked, vertex_quadrics, attribute_quadrics, attribute_gradients, attribute_count, edge_collapses, edge_collapse_count, collapse_order, remap, wedge, vertex_kind, vertex_positions, adjacency, triangle_collapse_goal, error_limit, result_error);
			
 
				 
			
 
				 		// no edges can be collapsed any more due to hitting the error limit or triangle collapse limit
			
 
				 		if (collapses == 0)
			
@@ -1398,10 +1569,6 @@ size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices,
 
				 	printf("result: %d triangles, error: %e; total %d passes\n", int(result_count), sqrtf(result_error), int(pass_count));
			
 
				 #endif
			
 
				 
			
 
				-#if TRACE > 1
			
 
				-	dumpLockedCollapses(result, result_count, vertex_kind);
			
 
				-#endif
			
 
				-
			
 
				 #ifndef NDEBUG
			
 
				 	if (meshopt_simplifyDebugKind)
			
 
				 		memcpy(meshopt_simplifyDebugKind, vertex_kind, vertex_count);
			
@@ -1420,6 +1587,16 @@ size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices,
 
				 	return result_count;
			
 
				 }
			
 
				 
			
 
				+size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, unsigned int options, float* out_result_error)
			
 
				+{
			
 
				+	return meshopt_simplifyEdge(destination, indices, index_count, vertex_positions_data, vertex_count, vertex_positions_stride, NULL, 0, NULL, 0, target_index_count, target_error, options, out_result_error);
			
 
				+}
			
 
				+
			
 
				+size_t meshopt_simplifyWithAttributes(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, const float* vertex_attributes_data, size_t vertex_attributes_stride, const float* attribute_weights, size_t attribute_count, size_t target_index_count, float target_error, unsigned int options, float* out_result_error)
			
 
				+{
			
 
				+	return meshopt_simplifyEdge(destination, indices, index_count, vertex_positions_data, vertex_count, vertex_positions_stride, vertex_attributes_data, vertex_attributes_stride, attribute_weights, attribute_count, target_index_count, target_error, options, out_result_error);
			
 
				+}
			
 
				+
			
 
				 size_t meshopt_simplifySloppy(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, float* out_result_error)
			
 
				 {
			
 
				 	using namespace meshopt;
			
--- a/3rdparty/meshoptimizer/src/vcacheoptimizer.cpp
+++ b/3rdparty/meshoptimizer/src/vcacheoptimizer.cpp
@@ -221,9 +221,9 @@ void meshopt_optimizeVertexCacheTable(unsigned int* destination, const unsigned
 
				 		triangle_scores[i] = vertex_scores[a] + vertex_scores[b] + vertex_scores[c];
			
 
				 	}
			
 
				 
			
 
				-	unsigned int cache_holder[2 * (kCacheSizeMax + 3)];
			
 
				+	unsigned int cache_holder[2 * (kCacheSizeMax + 4)];
			
 
				 	unsigned int* cache = cache_holder;
			
 
				-	unsigned int* cache_new = cache_holder + kCacheSizeMax + 3;
			
 
				+	unsigned int* cache_new = cache_holder + kCacheSizeMax + 4;
			
 
				 	size_t cache_count = 0;
			
 
				 
			
 
				 	unsigned int current_triangle = 0;
			
@@ -260,10 +260,8 @@ void meshopt_optimizeVertexCacheTable(unsigned int* destination, const unsigned
 
				 		{
			
 
				 			unsigned int index = cache[i];
			
 
				 
			
 
				-			if (index != a && index != b && index != c)
			
 
				-			{
			
 
				-				cache_new[cache_write++] = index;
			
 
				-			}
			
 
				+			cache_new[cache_write] = index;
			
 
				+			cache_write += (index != a && index != b && index != c);
			
 
				 		}
			
 
				 
			
 
				 		unsigned int* cache_temp = cache;
			
@@ -305,6 +303,10 @@ void meshopt_optimizeVertexCacheTable(unsigned int* destination, const unsigned
 
				 		{
			
 
				 			unsigned int index = cache[i];
			
 
				 
			
 
				+			// no need to update scores if we are never going to use this vertex
			
 
				+			if (adjacency.counts[index] == 0)
			
 
				+				continue;
			
 
				+
			
 
				 			int cache_position = i >= cache_size ? -1 : int(i);
			
 
				 
			
 
				 			// update vertex score
			
@@ -325,11 +327,8 @@ void meshopt_optimizeVertexCacheTable(unsigned int* destination, const unsigned
 
				 				float tri_score = triangle_scores[tri] + score_diff;
			
 
				 				assert(tri_score > 0);
			
 
				 
			
 
				-				if (best_score < tri_score)
			
 
				-				{
			
 
				-					best_triangle = tri;
			
 
				-					best_score = tri_score;
			
 
				-				}
			
 
				+				best_triangle = best_score < tri_score ? tri : best_triangle;
			
 
				+				best_score = best_score < tri_score ? tri_score : best_score;
			
 
				 
			
 
				 				triangle_scores[tri] = tri_score;
			
 
				 			}