2
0
Эх сурвалжийг харах

Optimized mesh shape creation (#147)

* When passing indexed triangles building is approx. 30% faster
* Reduced amount of allocations in MeshShape::sFindActiveEdges
* Reduced amount of allocations in TriangleSplitterBinning
* Optimized triangle codec by removing an unordered_map in favor of a vector
* Removed unused stat collecting
Jorrit Rouwe 3 жил өмнө
parent
commit
dfec0f2250

+ 12 - 86
Jolt/AABBTree/AABBTreeToBuffer.h

@@ -13,41 +13,6 @@ JPH_SUPPRESS_WARNINGS_STD_END
 
 JPH_NAMESPACE_BEGIN
 
-/// How the tree should be converted
-enum class EAABBTreeToBufferConvertMode
-{
-	DepthFirst,							///< Arrange the nodes depth first, put the triangles right after the leaf nodes (so interleaving them with nodes)
-	DepthFirstTrianglesLast,			///< Arrange the nodes depth first and put all triangles blocks after the last node block
-	BreadthFirst,						///< Arrange the nodes breadth first, put the triangles right after the leaf nodes (so interleaving them with nodes)
-	BreadthFirstTrianglesLast,			///< Arrange the nodes breadth first and put all triangles blocks after the last node block
-};
-
-/// Convert mode to string
-inline string ConvertToString(EAABBTreeToBufferConvertMode inConvertMode)
-{
-	switch (inConvertMode)
-	{
-	case EAABBTreeToBufferConvertMode::DepthFirst:					return "DepthFirst";
-	case EAABBTreeToBufferConvertMode::DepthFirstTrianglesLast:		return "DepthFirstTrianglesLast";
-	case EAABBTreeToBufferConvertMode::BreadthFirst:				return "BreadthFirst";
-	case EAABBTreeToBufferConvertMode::BreadthFirstTrianglesLast:	return "BreadthFirstTrianglesLast";
-	}
-
-	JPH_ASSERT(false);
-	return "Invalid";
-}
-
-/// Struct that holds statistics about the AABB tree that was built
-struct AABBTreeToBufferStats
-{
-	uint			mTotalSize = 0;									///< Total size of the built tree in bytes
-	uint			mNodesSize = 0;									///< Total size of all nodes in the tree in bytes
-	uint			mTrianglesSize = 0;								///< Total size of all triangles in the tree in bytes
-	float			mBytesPerTriangle = 0.0f;						///< Average number of bytes per triangle (includes all tree overhead)
-	string			mTriangleCodecName;								///< Name of the codec that was used to build the tree
-	float			mVerticesPerTriangle = 0.0f;					///< How many vertices a triangle on average has
-};
-		
 /// Conversion algorithm that converts an AABB tree to an optimized binary buffer
 template <class TriangleCodec, class NodeCodec>
 class AABBTreeToBuffer
@@ -69,10 +34,10 @@ public:
 	static const int TriangleHeaderSize = TriangleCodec::TriangleHeaderSize;
 
 	/// Convert AABB tree. Returns false if failed.
-	bool							Convert(const VertexList &inVertices, const AABBTreeBuilder::Node *inRoot, AABBTreeToBufferStats &outStats, const char *&outError, EAABBTreeToBufferConvertMode inConvertMode = EAABBTreeToBufferConvertMode::DepthFirst)
+	bool							Convert(const VertexList &inVertices, const AABBTreeBuilder::Node *inRoot, const char *&outError)
 	{
 		const typename NodeCodec::EncodingContext node_ctx;
-		typename TriangleCodec::EncodingContext tri_ctx;
+		typename TriangleCodec::EncodingContext tri_ctx(inVertices);
 
 		// Estimate the amount of memory required
 		uint tri_count = inRoot->GetTriangleCountInTree();
@@ -124,25 +89,8 @@ public:
 			while (!to_process.empty())
 			{
 				// Get the next node to process
-				NodeData *node_data = nullptr;
-				switch (inConvertMode)
-				{
-				case EAABBTreeToBufferConvertMode::DepthFirst:
-				case EAABBTreeToBufferConvertMode::DepthFirstTrianglesLast:
-					node_data = to_process.back();
-					to_process.pop_back();
-					break;
-
-				case EAABBTreeToBufferConvertMode::BreadthFirst:
-				case EAABBTreeToBufferConvertMode::BreadthFirstTrianglesLast:
-					node_data = to_process.front();
-					to_process.pop_front();
-					break;
-
-				default:
-					JPH_ASSERT(false);
-					break;
-				}
+				NodeData *node_data = to_process.back();
+				to_process.pop_back();
 
 				// Due to quantization box could have become bigger, not smaller
 				JPH_ASSERT(AABox(node_data->mNodeBoundsMin, node_data->mNodeBoundsMax).Contains(node_data->mNode->mBounds), "AABBTreeToBuffer: Bounding box became smaller!");
@@ -175,12 +123,9 @@ public:
 
 				if (node_data->mNode->HasChildren())
 				{
-					for (size_t i = 0; i < child_nodes.size(); ++i)
+					// Insert in reverse order so we process left child first when taking nodes from the back
+					for (int idx = int(child_nodes.size()) - 1; idx >= 0; --idx)
 					{
-						// Depth first: Insert in reverse order so we process left child first when taking nodes from the back
-						size_t idx = (inConvertMode == EAABBTreeToBufferConvertMode::DepthFirst || inConvertMode == EAABBTreeToBufferConvertMode::DepthFirstTrianglesLast)? 
-							child_nodes.size() - 1 - i : i;
-					
 						// Due to quantization box could have become bigger, not smaller
 						JPH_ASSERT(AABox(child_bounds_min[idx], child_bounds_max[idx]).Contains(child_nodes[idx]->mBounds), "AABBTreeToBuffer: Bounding box became smaller!");
 
@@ -199,27 +144,17 @@ public:
 							return false;
 						}
 
-						switch (inConvertMode)
-						{
-						case EAABBTreeToBufferConvertMode::DepthFirst:
-						case EAABBTreeToBufferConvertMode::BreadthFirst:
+						// Store triangles in separate list so we process them last
+						if (node_list.back().mNode->HasChildren())
 							to_process.push_back(&node_list.back());
-							break;
-
-						case EAABBTreeToBufferConvertMode::DepthFirstTrianglesLast:
-						case EAABBTreeToBufferConvertMode::BreadthFirstTrianglesLast:
-							if (node_list.back().mNode->HasChildren())
-								to_process.push_back(&node_list.back());
-							else
-								to_process_triangles.push_back(&node_list.back());
-							break;
-						}
+						else
+							to_process_triangles.push_back(&node_list.back());
 					}
 				}
 				else
 				{				
 					// Add triangles
-					node_data->mTriangleStart = tri_ctx.Pack(inVertices, node_data->mNode->mTriangles, mTree, outError);
+					node_data->mTriangleStart = tri_ctx.Pack(node_data->mNode->mTriangles, mTree, outError);
 					if (node_data->mTriangleStart == uint(-1))
 						return false;
 				}
@@ -245,10 +180,7 @@ public:
 				return false;
 		
 		// Finalize the triangles
-		tri_ctx.Finalize(triangle_header, mTree);
-
-		// Get stats
-		tri_ctx.GetStats(outStats.mTriangleCodecName, outStats.mVerticesPerTriangle);
+		tri_ctx.Finalize(inVertices, triangle_header, mTree);
 
 		// Validate that we reserved enough memory
 		if (nodes_size < mNodesSize)
@@ -269,12 +201,6 @@ public:
 		// Shrink the tree, this will invalidate the header and triangle_header variables
 		mTree.shrink_to_fit();
 
-		// Output stats
-		outStats.mTotalSize = (uint)mTree.size();
-		outStats.mNodesSize = mNodesSize;
-		outStats.mTrianglesSize = (uint)mTree.size() - mNodesSize;
-		outStats.mBytesPerTriangle = (float)mTree.size() / tri_count;
-
 		return true;
 	}
 

+ 19 - 25
Jolt/AABBTree/TriangleCodec/TriangleCodecIndexed8BitPackSOA4Flags.h

@@ -95,6 +95,14 @@ public:
 	class EncodingContext
 	{
 	public:
+		/// Construct the encoding context
+		explicit					EncodingContext(const VertexList &inVertices) :
+			mVertexMap(inVertices.size(), 0xffffffff) // Fill vertex map with 'not found'
+		{
+			// Reserve for worst case to avoid allocating in the inner loop
+			mVertices.reserve(inVertices.size());
+		}
+
 		/// Get an upper bound on the amount of bytes needed to store inTriangleCount triangles
 		uint						GetPessimisticMemoryEstimate(uint inTriangleCount) const
 		{
@@ -104,7 +112,7 @@ public:
 
 		/// Pack the triangles in inContainer to ioBuffer. This stores the mMaterialIndex of a triangle in the 8 bit flags.
 		/// Returns uint(-1) on error.
-		uint						Pack(const VertexList &inVertices, const IndexedTriangleList &inTriangles, ByteBuffer &ioBuffer, const char *&outError)
+		uint						Pack(const IndexedTriangleList &inTriangles, ByteBuffer &ioBuffer, const char *&outError)
 		{
 			// Determine position of triangles start
 			uint offset = (uint)ioBuffer.size();
@@ -136,19 +144,12 @@ public:
 						uint32 src_vertex_index = triangle_available? inTriangles[t + block_tri_idx].mIdx[vertex_nr] : inTriangles[tri_count - 1].mIdx[0];
 
 						// Check if we've seen this vertex before and if it is in the range that we can encode
-						uint32 vertex_index;
-						VertexMap::const_iterator found = mVertexMap.find(src_vertex_index);
-						if (found == mVertexMap.end() || found->second < start_vertex)
+						uint32 &vertex_index = mVertexMap[src_vertex_index];
+						if (vertex_index == 0xffffffff || vertex_index < start_vertex)
 						{
 							// Add vertex
 							vertex_index = (uint32)mVertices.size();
-							mVertexMap[src_vertex_index] = vertex_index;
-							mVertices.push_back(inVertices[src_vertex_index]);
-						}
-						else
-						{
-							// Reuse vertex
-							vertex_index = found->second;
+							mVertices.push_back(src_vertex_index);
 						}
 
 						// Store vertex index
@@ -175,7 +176,7 @@ public:
 		}
 
 		/// After all triangles have been packed, this finalizes the header and triangle buffer
-		void						Finalize(TriangleHeader *ioHeader, ByteBuffer &ioBuffer) const
+		void						Finalize(const VertexList &inVertices, TriangleHeader *ioHeader, ByteBuffer &ioBuffer) const
 		{
 			// Check if anything to do
 			if (mVertices.empty())
@@ -190,15 +191,15 @@ public:
 
 			// Calculate bounding box
 			AABox bounds;
-			for (const Float3 &v : mVertices)
-				bounds.Encapsulate(Vec3(v));
+			for (uint32 v : mVertices)
+				bounds.Encapsulate(Vec3(inVertices[v]));
 
 			// Compress vertices
 			VertexData *vertices = ioBuffer.Allocate<VertexData>(mVertices.size());
 			Vec3 compress_scale = Vec3::sReplicate(COMPONENT_MASK) / Vec3::sMax(bounds.GetSize(), Vec3::sReplicate(1.0e-20f));
-			for (const Float3 &v : mVertices)
+			for (uint32 v : mVertices)
 			{
-				UVec4 c = ((Vec3(v) - bounds.mMin) * compress_scale + Vec3::sReplicate(0.5f)).ToInt();
+				UVec4 c = ((Vec3(inVertices[v]) - bounds.mMin) * compress_scale + Vec3::sReplicate(0.5f)).ToInt();
 				JPH_ASSERT(c.GetX() <= COMPONENT_MASK);
 				JPH_ASSERT(c.GetY() <= COMPONENT_MASK);
 				JPH_ASSERT(c.GetZ() <= COMPONENT_MASK);
@@ -212,18 +213,11 @@ public:
 			(bounds.GetSize() / Vec3::sReplicate(COMPONENT_MASK)).StoreFloat3(&ioHeader->mScale);
 		}
 
-		void						GetStats(string &outTriangleCodecName, float &outVerticesPerTriangle) const
-		{
-			// Store stats
-			outTriangleCodecName = "Indexed8BitPackSOA4";
-			outVerticesPerTriangle = (float)mVertices.size() / mNumTriangles;
-		}
-
 	private:
-		using VertexMap = unordered_map<uint32, uint32>;
+		using VertexMap = vector<uint32>;
 
 		uint						mNumTriangles = 0;
-		vector<Float3>				mVertices;				///< Output vertices, sorted according to occurrence
+		vector<uint32>				mVertices;				///< Output vertices as an index into the original vertex list (inVertices), sorted according to occurrence
 		VertexMap					mVertexMap;				///< Maps from the original mesh vertex index (inVertices) to the index in our output vertices (mVertices)
 		vector<uint>				mOffsetsToPatch;		///< Offsets to the vertex buffer that need to be patched in once all nodes have been packed
 	};

+ 43 - 24
Jolt/Physics/Collision/Shape/MeshShape.cpp

@@ -189,10 +189,9 @@ MeshShape::MeshShape(const MeshShapeSettings &inSettings, ShapeResult &outResult
 	AABBTreeBuilder::Node *root = builder.Build(builder_stats);
 
 	// Convert to buffer
-	AABBTreeToBufferStats buffer_stats;
 	AABBTreeToBuffer<TriangleCodec, NodeCodec> buffer;
 	const char *error = nullptr;
-	if (!buffer.Convert(inSettings.mTriangleVertices, root, buffer_stats, error, EAABBTreeToBufferConvertMode::DepthFirstTrianglesLast))
+	if (!buffer.Convert(inSettings.mTriangleVertices, root, error))
 	{
 		outResult.SetError(error);
 		delete root;
@@ -217,6 +216,7 @@ MeshShape::MeshShape(const MeshShapeSettings &inSettings, ShapeResult &outResult
 
 void MeshShape::sFindActiveEdges(const VertexList &inVertices, IndexedTriangleList &ioIndices)
 {
+	// A struct to hold the two vertex indices of an edge
 	struct Edge
 	{
 				Edge(int inIdx1, int inIdx2) : mIdx1(min(inIdx1, inIdx2)), mIdx2(max(inIdx1, inIdx2)) { }
@@ -245,33 +245,54 @@ void MeshShape::sFindActiveEdges(const VertexList &inVertices, IndexedTriangleLi
 
 	JPH_MAKE_HASH_STRUCT(Edge, EdgeHash, t.mIdx1, t.mIdx2)
 
+	// A struct to hold the triangles that are connected to an edge
+	struct TriangleIndices
+	{
+		uint	mNumTriangles = 0;
+		uint	mTriangleIndices[2];
+	};
+
 	// Build a list of edge to triangles
-	using EdgeToTriangle = unordered_map<Edge, vector<uint>, EdgeHash>;
+	using EdgeToTriangle = unordered_map<Edge, TriangleIndices, EdgeHash>;
 	EdgeToTriangle edge_to_triangle;
+	edge_to_triangle.reserve(ioIndices.size() * 3);
 	for (uint triangle_idx = 0; triangle_idx < ioIndices.size(); ++triangle_idx)
 	{
-		const IndexedTriangle &triangle = ioIndices[triangle_idx];
+		IndexedTriangle &triangle = ioIndices[triangle_idx];
 		for (uint edge_idx = 0; edge_idx < 3; ++edge_idx)
 		{
 			Edge edge(triangle.mIdx[edge_idx], triangle.mIdx[(edge_idx + 1) % 3]);
-			edge_to_triangle[edge].push_back(triangle_idx);
+			TriangleIndices &indices = edge_to_triangle[edge];
+			if (indices.mNumTriangles < 2)
+			{
+				// Store index of triangle that connects to this edge
+				indices.mTriangleIndices[indices.mNumTriangles] = triangle_idx;
+				indices.mNumTriangles++;
+			}
+			else
+			{
+				// 3 or more triangles share an edge, mark this edge as active
+				uint32 mask = 1 << (edge_idx + FLAGS_ACTIVE_EGDE_SHIFT);
+				JPH_ASSERT((triangle.mMaterialIndex & mask) == 0);
+				triangle.mMaterialIndex |= mask;
+			}
 		}
 	}
 
 	// Walk over all edges and determine which ones are active
 	for (const EdgeToTriangle::value_type &edge : edge_to_triangle)
 	{
-		bool active = false;
-		if (edge.second.size() == 1)
+		uint num_active = 0;
+		if (edge.second.mNumTriangles == 1)
 		{
 			// Edge is not shared, it is an active edge
-			active = true;
+			num_active = 1;
 		}
-		else if (edge.second.size() == 2)
+		else if (edge.second.mNumTriangles == 2)
 		{
 			// Simple shared edge, determine if edge is active based on the two adjacent triangles
-			const IndexedTriangle &triangle1 = ioIndices[edge.second[0]];
-			const IndexedTriangle &triangle2 = ioIndices[edge.second[1]];
+			const IndexedTriangle &triangle1 = ioIndices[edge.second.mTriangleIndices[0]];
+			const IndexedTriangle &triangle2 = ioIndices[edge.second.mTriangleIndices[1]];
 
 			// Find which edge this is for both triangles
 			uint edge_idx1 = edge.first.GetIndexInTriangle(triangle1);
@@ -290,25 +311,23 @@ void MeshShape::sFindActiveEdges(const VertexList &inVertices, IndexedTriangleLi
 			Plane triangle2_plane = Plane::sFromPointsCCW(triangle2_e1, triangle2_e2, triangle2_op);
 
 			// Determine if the edge is active
-			active = ActiveEdges::IsEdgeActive(triangle1_plane.GetNormal(), triangle2_plane.GetNormal(), triangle1_e2 - triangle1_e1);
+			num_active = ActiveEdges::IsEdgeActive(triangle1_plane.GetNormal(), triangle2_plane.GetNormal(), triangle1_e2 - triangle1_e1)? 2 : 0;
 		}
 		else
 		{
-			// Multiple edges incoming, assume active
-			active = true;
+			// More edges incoming, we've already marked all edges beyond the 2nd as active
+			num_active = 2;
 		}
 
-		if (active)
+		// Mark edges of all original triangles active
+		for (uint i = 0; i < num_active; ++i)
 		{
-			// Mark edges of all original triangles active
-			for (uint triangle_idx : edge.second)
-			{
-				IndexedTriangle &triangle = ioIndices[triangle_idx];
-				uint edge_idx = edge.first.GetIndexInTriangle(triangle);
-				uint32 mask = 1 << (edge_idx + FLAGS_ACTIVE_EGDE_SHIFT);
-				JPH_ASSERT((triangle.mMaterialIndex & mask) == 0);
-				triangle.mMaterialIndex |= mask;
-			}
+			uint triangle_idx = edge.second.mTriangleIndices[i];
+			IndexedTriangle &triangle = ioIndices[triangle_idx];
+			uint edge_idx = edge.first.GetIndexInTriangle(triangle);
+			uint32 mask = 1 << (edge_idx + FLAGS_ACTIVE_EGDE_SHIFT);
+			JPH_ASSERT((triangle.mMaterialIndex & mask) == 0);
+			triangle.mMaterialIndex |= mask;
 		}
 	}
 }

+ 6 - 6
Jolt/TriangleSplitter/TriangleSplitterBinning.cpp

@@ -13,6 +13,7 @@ TriangleSplitterBinning::TriangleSplitterBinning(const VertexList &inVertices, c
 	mMaxNumBins(inMaxNumBins),
 	mNumTrianglesPerBin(inNumTrianglesPerBin)
 {
+	mBins.resize(mMaxNumBins);
 }
 
 bool TriangleSplitterBinning::Split(const Range &inTriangles, Range &outLeft, Range &outRight)
@@ -28,7 +29,6 @@ bool TriangleSplitterBinning::Split(const Range &inTriangles, Range &outLeft, Ra
 
 	// Bin in all dimensions
 	uint num_bins = Clamp(inTriangles.Count() / mNumTrianglesPerBin, mMinNumBins, mMaxNumBins);	
-	vector<Bin> bins(num_bins);
 	for (uint dim = 0; dim < 3; ++dim)
 	{
 		float bounds_min = centroid_bounds.mMin[dim];
@@ -41,7 +41,7 @@ bool TriangleSplitterBinning::Split(const Range &inTriangles, Range &outLeft, Ra
 		// Initialize bins
 		for (uint b = 0; b < num_bins; ++b)
 		{
-			Bin &bin = bins[b];
+			Bin &bin = mBins[b];
 			bin.mBounds.SetEmpty();
 			bin.mMinCentroid = bounds_min + bounds_size * (b + 1) / num_bins;
 			bin.mNumTriangles = 0;
@@ -54,7 +54,7 @@ bool TriangleSplitterBinning::Split(const Range &inTriangles, Range &outLeft, Ra
 
 			// Select bin 
 			uint bin_no = min(uint((centroid_pos - bounds_min) / bounds_size * num_bins), num_bins - 1);
-			Bin &bin = bins[bin_no];
+			Bin &bin = mBins[bin_no];
 
 			// Accumulate triangle in bin
 			bin.mBounds.Encapsulate(mVertices, GetTriangle(t));
@@ -67,7 +67,7 @@ bool TriangleSplitterBinning::Split(const Range &inTriangles, Range &outLeft, Ra
 		int prev_triangles = 0;
 		for (uint b = 0; b < num_bins; ++b)
 		{
-			Bin &bin = bins[b];
+			Bin &bin = mBins[b];
 			bin.mBoundsAccumulatedLeft = prev_bounds; // Don't include this node as we'll take a split on the left side of the bin
 			bin.mNumTrianglesAccumulatedLeft = prev_triangles;
 			prev_bounds.Encapsulate(bin.mBounds);
@@ -79,7 +79,7 @@ bool TriangleSplitterBinning::Split(const Range &inTriangles, Range &outLeft, Ra
 		prev_triangles = 0;
 		for (int b = num_bins - 1; b >= 0; --b)
 		{
-			Bin &bin = bins[b];
+			Bin &bin = mBins[b];
 			prev_bounds.Encapsulate(bin.mBounds);
 			prev_triangles += bin.mNumTriangles;
 			bin.mBoundsAccumulatedRight = prev_bounds;
@@ -90,7 +90,7 @@ bool TriangleSplitterBinning::Split(const Range &inTriangles, Range &outLeft, Ra
 		for (uint b = 1; b < num_bins; ++b) // Start at 1 since selecting bin 0 would result in everything ending up on the right side
 		{
 			// Calculate surface area heuristic and see if it is better than the current best
-			const Bin &bin = bins[b];
+			const Bin &bin = mBins[b];
 			float cp = bin.mBoundsAccumulatedLeft.GetSurfaceArea() * bin.mNumTrianglesAccumulatedLeft + bin.mBoundsAccumulatedRight.GetSurfaceArea() * bin.mNumTrianglesAccumulatedRight;
 			if (cp < best_cp)
 			{

+ 3 - 0
Jolt/TriangleSplitter/TriangleSplitterBinning.h

@@ -43,6 +43,9 @@ private:
 		uint				mNumTrianglesAccumulatedLeft;		
 		uint				mNumTrianglesAccumulatedRight;		
 	};
+
+	// Scratch area to store the bins
+	vector<Bin>				mBins;
 };
 
 JPH_NAMESPACE_END