Explorar el Código

Feature/optimize broadphase locks (#4)

Moved from 2 mutexes per tree to 2 mutexes per broadphase to avoid locking too many mutexes
jrouwe hace 4 años
padre
commit
b1ddda0a85

+ 31 - 2
Jolt/Physics/Collision/BroadPhase/BroadPhaseQuadTree.cpp

@@ -55,6 +55,12 @@ void BroadPhaseQuadTree::FrameSync()
 {
 	JPH_PROFILE_FUNCTION();
 
+	// Take a unique lock on the old query lock so that we know no one is using the old nodes anymore.
+	// Note that nothing should be locked at this point to avoid risking a lock inversion deadlock.
+	// Note that in other places where we lock this mutex we don't use SharedLock to detect lock inversions. As long as
+	// nothing else is locked this is safe. This is why BroadPhaseQuery should be the highest priority lock.
+	UniqueLock root_lock(mQueryLocks[mQueryLockIdx ^ 1], EPhysicsLockTypes::BroadPhaseQuery);
+
 	for (BroadPhaseLayer::Type l = 0; l < mNumLayers; ++l)
 		mLayers[l].DiscardOldTree();
 }
@@ -130,6 +136,9 @@ void BroadPhaseQuadTree::UpdateFinalize(UpdateState &inUpdateState)
 		return;
 
 	update_state_impl->mTree->UpdateFinalize(mBodyManager->GetBodies(), mTracking, update_state_impl->mUpdateState);
+
+	// Make all queries from now on use the new lock
+	mQueryLockIdx = mQueryLockIdx ^ 1;
 }
 
 void BroadPhaseQuadTree::UnlockModifications()
@@ -199,7 +208,7 @@ void BroadPhaseQuadTree::AddBodiesFinalize(BodyID *ioBodies, int inNumber, AddSt
 	JPH_PROFILE_FUNCTION();
 
 	// This cannot run concurrently with UpdatePrepare()/UpdateFinalize()
-	SharedLock<SharedMutex> lock(mUpdateMutex, EPhysicsLockTypes::BroadPhaseUpdate);
+	SharedLock lock(mUpdateMutex, EPhysicsLockTypes::BroadPhaseUpdate);
 
 	BodyVector &bodies = mBodyManager->GetBodies();
 	JPH_ASSERT(mMaxBodies == mBodyManager->GetMaxBodies());
@@ -269,7 +278,7 @@ void BroadPhaseQuadTree::RemoveBodies(BodyID *ioBodies, int inNumber)
 	JPH_PROFILE_FUNCTION();
 
 	// This cannot run concurrently with UpdatePrepare()/UpdateFinalize()
-	SharedLock<SharedMutex> lock(mUpdateMutex, EPhysicsLockTypes::BroadPhaseUpdate);
+	SharedLock lock(mUpdateMutex, EPhysicsLockTypes::BroadPhaseUpdate);
 
 	JPH_ASSERT(inNumber > 0);
 
@@ -394,6 +403,9 @@ void BroadPhaseQuadTree::CastRay(const RayCast &inRay, RayCastBodyCollector &ioC
 
 	JPH_ASSERT(mMaxBodies == mBodyManager->GetMaxBodies());	
 
+	// Prevent this from running in parallel with node deletion in FrameSync(), see notes there
+	shared_lock lock(mQueryLocks[mQueryLockIdx]);
+
 	// Loop over all layers and test the ones that could hit
 	for (BroadPhaseLayer::Type l = 0; l < mNumLayers; ++l)
 	{
@@ -414,6 +426,9 @@ void BroadPhaseQuadTree::CollideAABox(const AABox &inBox, CollideShapeBodyCollec
 
 	JPH_ASSERT(mMaxBodies == mBodyManager->GetMaxBodies());	
 
+	// Prevent this from running in parallel with node deletion in FrameSync(), see notes there
+	shared_lock lock(mQueryLocks[mQueryLockIdx]);
+
 	// Loop over all layers and test the ones that could hit
 	for (BroadPhaseLayer::Type l = 0; l < mNumLayers; ++l)
 	{
@@ -434,6 +449,9 @@ void BroadPhaseQuadTree::CollideSphere(Vec3Arg inCenter, float inRadius, Collide
 
 	JPH_ASSERT(mMaxBodies == mBodyManager->GetMaxBodies());	
 
+	// Prevent this from running in parallel with node deletion in FrameSync(), see notes there
+	shared_lock lock(mQueryLocks[mQueryLockIdx]);
+
 	// Loop over all layers and test the ones that could hit
 	for (BroadPhaseLayer::Type l = 0; l < mNumLayers; ++l)
 	{
@@ -454,6 +472,9 @@ void BroadPhaseQuadTree::CollidePoint(Vec3Arg inPoint, CollideShapeBodyCollector
 
 	JPH_ASSERT(mMaxBodies == mBodyManager->GetMaxBodies());	
 
+	// Prevent this from running in parallel with node deletion in FrameSync(), see notes there
+	shared_lock lock(mQueryLocks[mQueryLockIdx]);
+
 	// Loop over all layers and test the ones that could hit
 	for (BroadPhaseLayer::Type l = 0; l < mNumLayers; ++l)
 	{
@@ -474,6 +495,9 @@ void BroadPhaseQuadTree::CollideOrientedBox(const OrientedBox &inBox, CollideSha
 
 	JPH_ASSERT(mMaxBodies == mBodyManager->GetMaxBodies());	
 
+	// Prevent this from running in parallel with node deletion in FrameSync(), see notes there
+	shared_lock lock(mQueryLocks[mQueryLockIdx]);
+
 	// Loop over all layers and test the ones that could hit
 	for (BroadPhaseLayer::Type l = 0; l < mNumLayers; ++l)
 	{
@@ -494,6 +518,9 @@ void BroadPhaseQuadTree::CastAABox(const AABoxCast &inBox, CastShapeBodyCollecto
 
 	JPH_ASSERT(mMaxBodies == mBodyManager->GetMaxBodies());	
 
+	// Prevent this from running in parallel with node deletion in FrameSync(), see notes there
+	shared_lock lock(mQueryLocks[mQueryLockIdx]);
+
 	// Loop over all layers and test the ones that could hit
 	for (BroadPhaseLayer::Type l = 0; l < mNumLayers; ++l)
 	{
@@ -515,6 +542,8 @@ void BroadPhaseQuadTree::FindCollidingPairs(BodyID *ioActiveBodies, int inNumAct
 	const BodyVector &bodies = mBodyManager->GetBodies();
 	JPH_ASSERT(mMaxBodies == mBodyManager->GetMaxBodies());	
 
+	// Note that we don't take any locks at this point. We know that the tree is not going to be swapped or deleted while finding collision pairs due to the way the jobs are scheduled in the PhysicsSystem::Update.
+
 	// Sort bodies on layer
 	const Tracking *tracking = mTracking.data(); // C pointer or else sort is incredibly slow in debug mode
 	sort(ioActiveBodies, ioActiveBodies + inNumActiveBodies, [tracking](BodyID inLHS, BodyID inRHS) -> bool { return tracking[inLHS.GetIndex()].mObjectLayer < tracking[inRHS.GetIndex()].mObjectLayer; });

+ 7 - 0
Jolt/Physics/Collision/BroadPhase/BroadPhaseQuadTree.h

@@ -78,6 +78,13 @@ private:
 	/// Mutex that prevents object modification during UpdatePrepare/Finalize()
 	SharedMutex				mUpdateMutex;
 
+	/// We double buffer all trees so that we can query while building the next one and we destroy the old tree the next physics update.
+	/// This structure ensures that we wait for queries that are still using the old tree.
+	mutable SharedMutex		mQueryLocks[2];
+
+	/// This index indicates which lock is currently active, it alternates between 0 and 1
+	atomic<uint32>			mQueryLockIdx { 0 };
+
 	/// This is the next tree to update in UpdatePrepare()
 	uint32					mNextLayerToUpdate = 0;
 };

+ 12 - 41
Jolt/Physics/Collision/BroadPhase/QuadTree.cpp

@@ -210,10 +210,6 @@ void QuadTree::DiscardOldTree()
 	RootNode &old_root_node = mRootNode[mRootNodeIndex ^ 1];
 	if (old_root_node.mIndex != cInvalidNodeIndex)
 	{
-		// Take a unique lock on the root of the old tree so that we know no one is using the old nodes anymore
-		// Note that nothing should be locked at this point to avoid risking a lock inversion deadlock
-		UniqueLock<SharedMutex> root_lock(old_root_node.mMutex, EPhysicsLockTypes::BroadPhaseTree);
-
 		// Clear the root
 		old_root_node.mIndex = cInvalidNodeIndex;
 
@@ -935,30 +931,10 @@ void QuadTree::NotifyBodiesAABBChanged(const BodyVector &inBodies, const Trackin
 	}
 }
 
-const QuadTree::RootNode &QuadTree::LockRoot(shared_lock<SharedMutex> &outLock) const
-{
-	for (;;)
-	{
-		// Prevent this from running in parallel with node deletion in DiscardOldTree()
-		// Note that we don't use SharedLock here as it is safe in this case for other locks to have been taken 
-		// (the only guarantee we need to avoid a lock inversion deadlock is that during DiscardOldTree nothing is locked)
-		const RootNode &root_node = GetCurrentRoot();
-		shared_lock<SharedMutex> lock(root_node.mMutex);
-
-		// Check that the tree was not discarded between the call to GetCurrentRoot() and when the lock was taken
-		if (root_node.mIndex != cInvalidNodeIndex)
-		{
-			outLock = move(lock);
-			return root_node;
-		}
-	}
-}
-
 void QuadTree::CastRay(const RayCast &inRay, RayCastBodyCollector &ioCollector, const ObjectLayerFilter &inObjectLayerFilter, const TrackingVector &inTracking) const
 {
-	// Get and lock the root
-	shared_lock<SharedMutex> lock;
-	const RootNode &root_node = LockRoot(lock);
+	// Get the root
+	const RootNode &root_node = GetCurrentRoot();
 
 	// Properties of ray
 	Vec3 origin(inRay.mOrigin);
@@ -1041,9 +1017,8 @@ void QuadTree::CastRay(const RayCast &inRay, RayCastBodyCollector &ioCollector,
 
 void QuadTree::CollideAABox(const AABox &inBox, CollideShapeBodyCollector &ioCollector, const ObjectLayerFilter &inObjectLayerFilter, const TrackingVector &inTracking) const
 {
-	// Get and lock the root
-	shared_lock<SharedMutex> lock;
-	const RootNode &root_node = LockRoot(lock);
+	// Get the root
+	const RootNode &root_node = GetCurrentRoot();
 
 	NodeID node_stack[cStackSize];
 	node_stack[0] = root_node.GetNodeID();
@@ -1107,9 +1082,8 @@ void QuadTree::CollideAABox(const AABox &inBox, CollideShapeBodyCollector &ioCol
 
 void QuadTree::CollideSphere(Vec3Arg inCenter, float inRadius, CollideShapeBodyCollector &ioCollector, const ObjectLayerFilter &inObjectLayerFilter, const TrackingVector &inTracking) const
 {
-	// Get and lock the root
-	shared_lock<SharedMutex> lock;
-	const RootNode &root_node = LockRoot(lock);
+	// Get the root
+	const RootNode &root_node = GetCurrentRoot();
 
 	// Splat sphere to 4 component vectors
 	Vec4 center_x = Vec4(inCenter).SplatX();
@@ -1186,9 +1160,8 @@ void QuadTree::CollideSphere(Vec3Arg inCenter, float inRadius, CollideShapeBodyC
 
 void QuadTree::CollidePoint(Vec3Arg inPoint, CollideShapeBodyCollector &ioCollector, const ObjectLayerFilter &inObjectLayerFilter, const TrackingVector &inTracking) const
 {
-	// Get and lock the root
-	shared_lock<SharedMutex> lock;
-	const RootNode &root_node = LockRoot(lock);
+	// Get the root
+	const RootNode &root_node = GetCurrentRoot();
 
 	NodeID node_stack[cStackSize];
 	node_stack[0] = root_node.GetNodeID();
@@ -1253,9 +1226,8 @@ void QuadTree::CollidePoint(Vec3Arg inPoint, CollideShapeBodyCollector &ioCollec
 
 void QuadTree::CollideOrientedBox(const OrientedBox &inBox, CollideShapeBodyCollector &ioCollector, const ObjectLayerFilter &inObjectLayerFilter, const TrackingVector &inTracking) const
 {
-	// Get and lock the root
-	shared_lock<SharedMutex> lock;
-	const RootNode &root_node = LockRoot(lock);
+	// Get the root
+	const RootNode &root_node = GetCurrentRoot();
 
 	NodeID node_stack[cStackSize];
 	node_stack[0] = root_node.GetNodeID();
@@ -1320,9 +1292,8 @@ void QuadTree::CollideOrientedBox(const OrientedBox &inBox, CollideShapeBodyColl
 
 void QuadTree::CastAABox(const AABoxCast &inBox, CastShapeBodyCollector &ioCollector, const ObjectLayerFilter &inObjectLayerFilter, const TrackingVector &inTracking) const
 {
-	// Get and lock the root
-	shared_lock<SharedMutex> lock;
-	const RootNode &root_node = LockRoot(lock);
+	// Get the root
+	const RootNode &root_node = GetCurrentRoot();
 
 	// Properties of ray
 	Vec3 origin(inBox.mBox.GetCenter());

+ 0 - 7
Jolt/Physics/Collision/BroadPhase/QuadTree.h

@@ -4,7 +4,6 @@
 #pragma once
 
 #include <Core/FixedSizeFreeList.h>
-#include <Core/Mutex.h>
 #include <Core/Atomics.h>
 #include <Core/NonCopyable.h>
 #include <Physics/Body/BodyManager.h>
@@ -251,9 +250,6 @@ private:
 
 		/// Index of the root node of the tree (this is always a node, never a body id)
 		atomic<uint32>			mIndex { cInvalidNodeIndex };
-
-		/// Mutex that protects mIndex
-		mutable SharedMutex		mMutex;
 	};
 
 	/// Caches location of body inBodyID in the tracker, body can be found in mNodes[inNodeIdx].mChildNodeID[inChildIdx]
@@ -265,9 +261,6 @@ private:
 	inline const RootNode &		GetCurrentRoot() const				{ return mRootNode[mRootNodeIndex]; }
 	inline RootNode &			GetCurrentRoot()					{ return mRootNode[mRootNodeIndex]; }
 
-	/// Get the current root of the tree and lock it
-	const RootNode &			LockRoot(shared_lock<SharedMutex> &outLock) const;
-
 	/// Depending on if inNodeID is a body or tree node return the bounding box
 	inline AABox				GetNodeOrBodyBounds(const BodyVector &inBodies, NodeID inNodeID) const;
 

+ 1 - 1
Jolt/Physics/PhysicsLock.h

@@ -10,7 +10,7 @@ namespace JPH {
 /// This is the list of locks used by the physics engine, they need to be locked in a particular order (from top of the list to bottom of the list) in order to prevent deadlocks
 enum class EPhysicsLockTypes
 {
-	BroadPhaseTree			= 1 << 0,
+	BroadPhaseQuery			= 1 << 0,
 	PerBody					= 1 << 1,
 	BodiesList				= 1 << 2,
 	BroadPhaseUpdate		= 1 << 3,