Explorar el Código

Added aligned allocation to the frame allocator
Added a scratch buffer for physx
Reworked joint anchors, again

BearishSun hace 9 años
padre
commit
94133c3a15

+ 14 - 17
BansheeCore/Source/BsCJoint.cpp

@@ -216,32 +216,29 @@ namespace BansheeEngine
 
 	void CJoint::updateTransform(JointBody body)
 	{
-		Vector3 parentPos = SO()->getWorldPosition();
-		Quaternion parentRot = SO()->getWorldRotation();
-		
-		// Add local position/rotation offset to the joint's transform
-		Vector3 worldPos = parentPos + parentRot.rotate(mPositions[(int)body]);
-		Quaternion worldRot = parentRot * mRotations[(int)body];
+		Vector3 localPos;
+		Quaternion localRot;
 
-		// Transform body's world position/rotation into space local to the joint + offset space
-		Vector3 bodyPos;
-		Quaternion bodyRot;
+		localPos = mPositions[(int)body];
+		localRot = mRotations[(int)body];
 
+		// Transform to world space of the related body
 		HRigidbody rigidbody = mBodies[(int)body];
-		if(rigidbody != nullptr)
+		if (rigidbody != nullptr)
 		{
-			bodyPos = rigidbody->SO()->getWorldPosition(); 
-			bodyRot = rigidbody->SO()->getWorldRotation();
+			localRot = rigidbody->SO()->getWorldRotation() * localRot;
+			localPos = localRot.rotate(localPos) + rigidbody->SO()->getWorldPosition();
 		}
 
-		Quaternion invRotation = worldRot.inverse();
+		// Transform to space local to the joint
+		Quaternion invRotation = SO()->getWorldRotation().inverse();
 
-		bodyPos = invRotation.rotate(bodyPos - worldPos);
-		bodyRot = invRotation * bodyRot;
+		localPos = invRotation.rotate(localPos - SO()->getWorldPosition());
+		localRot = invRotation * localRot;
 
-		mInternal->setTransform(body, bodyPos, bodyRot);
+		mInternal->setTransform(body, localPos, localRot);
 	}
-
+	
 	void CJoint::triggerOnJointBroken()
 	{
 		onJointBreak();

+ 2 - 0
BansheePhysX/Include/BsPhysX.h

@@ -213,6 +213,8 @@ namespace BansheeEngine
 
 		physx::PxMaterial* mDefaultMaterial = nullptr;
 		physx::PxTolerancesScale mScale;
+
+		static const UINT32 SCRATCH_BUFFER_SIZE;
 	};
 
 	/** Provides easier access to PhysX. */

+ 23 - 7
BansheePhysX/Source/BsPhysX.cpp

@@ -399,6 +399,9 @@ namespace BansheeEngine
 	static PhysXEventCallback gPhysXEventCallback;
 	static PhysXBroadPhaseCallback gPhysXBroadphaseCallback;
 
+	static const UINT32 SIZE_16K = 1 << 14;
+	const UINT32 PhysX::SCRATCH_BUFFER_SIZE = SIZE_16K * 64; // 1MB by default
+
 	PhysX::PhysX(const PHYSICS_INIT_DESC& input)
 		:Physics(input)
 	{
@@ -471,10 +474,25 @@ namespace BansheeEngine
 		{
 			// Note: Consider delaying fetchResults one frame. This could improve performance because Physics update would be
 			//       able to run parallel to the simulation thread, but at a cost to input latency.
-			// TODO - Provide a scratch buffer for the simulation (use the frame allocator, but I must extend it so it allocates
-			//	      on a 16 byte boundary).
-			mScene->simulate(mSimulationStep);
-			mScene->fetchResults(true);
+
+			bs_frame_mark();
+			UINT8* scratchBuffer = bs_frame_alloc_aligned(SCRATCH_BUFFER_SIZE, 16);
+
+			mScene->simulate(mSimulationStep, nullptr, scratchBuffer, SCRATCH_BUFFER_SIZE);
+			simulationAmount -= mSimulationStep;
+
+			UINT32 errorState;
+			if(!mScene->fetchResults(true, &errorState))
+			{
+				LOGWRN("Physics simualtion failed. Error code: " + toString(errorState));
+
+				bs_frame_free_aligned(scratchBuffer);
+				bs_frame_clear();
+				continue;
+			}
+
+			bs_frame_free_aligned(scratchBuffer);
+			bs_frame_clear();
 
 			// Update rigidbodies with new transforms
 			PxU32 numActiveTransforms;
@@ -489,8 +507,6 @@ namespace BansheeEngine
 				//       use non-temporal writes
 				rigidbody->_setTransform(fromPxVector(transform.p), fromPxQuaternion(transform.q));
 			}
-
-			simulationAmount -= mSimulationStep;
 		}
 
 		// TODO - Consider extrapolating for the remaining "simulationAmount" value
@@ -552,7 +568,7 @@ namespace BansheeEngine
 					point.normal = -point.normal;
 			}
 
-			SPtr<Rigidbody> rigidbody = obj->getRigidbody();
+			Rigidbody* rigidbody = obj->getRigidbody();
 			if(rigidbody != nullptr)
 			{
 				switch (type)

+ 11 - 0
BansheeUtility/Include/BsFrameAlloc.h

@@ -52,6 +52,17 @@ namespace BansheeEngine
 		 */
 		UINT8* alloc(UINT32 amount);
 
+		/**
+		 * Allocates a new block of memory of the specified size aligned to the specified boundary. If the aligment is less
+		 * or equal to 16 it is more efficient to use the allocAligned16() alternative of this method.
+		 *
+		 * @param[in]	amount		Amount of memory to allocate, in bytes.
+		 * @param[in]	alignment	Alignment of the allocated memory. Must be power of two.
+		 * 					
+		 * @note	Not thread safe.
+		 */
+		UINT8* allocAligned(UINT32 amount, UINT32 alignment);
+
 		/**
 		 * Allocates and constructs a new object.
 		 *	

+ 55 - 2
BansheeUtility/Include/BsGlobalFrameAlloc.h

@@ -27,6 +27,12 @@ namespace BansheeEngine
 	 */
 	inline BS_UTILITY_EXPORT UINT8* bs_frame_alloc(UINT32 numBytes);
 
+	/** 
+	 * Allocates the specified number of bytes aligned to the provided boundary, using the global frame allocator. Boundary
+	 * is in bytes and must be a power of two.
+	 */
+	inline BS_UTILITY_EXPORT UINT8* bs_frame_alloc_aligned(UINT32 count, UINT32 align);
+
 	/**
 	 * Deallocates memory allocated with the global frame allocator.
 	 *
@@ -34,6 +40,13 @@ namespace BansheeEngine
 	 */
 	inline BS_UTILITY_EXPORT void bs_frame_free(void* data);
 
+	/** 
+	 * Frees memory previously allocated with bs_frame_alloc_aligned(). 
+	 *
+	 * @note	Must be called on the same thread the memory was allocated on.
+	 */
+	inline BS_UTILITY_EXPORT void bs_frame_free_aligned(void* data);
+
 	/**
 	 * Allocates enough memory to hold the object of specified type using the global frame allocator, but does not 
 	 * construct the object. 
@@ -158,17 +171,57 @@ namespace BansheeEngine
 	class MemoryAllocator<FrameAlloc> : public MemoryAllocatorBase
 	{
 	public:
-		/** Allocates @p bytes bytes. */
+		/** @copydoc MemoryAllocator<T>::allocate */
 		static void* allocate(size_t bytes)
 		{
 			return bs_frame_alloc((UINT32)bytes);
 		}
 
-		/** Frees the memory at the specified location. */
+		/** @copydoc MemoryAllocator<T>::allocateAligned */
+		static void* allocateAligned(size_t bytes, size_t alignment)
+		{
+#if BS_PROFILING_ENABLED
+			incAllocCount();
+#endif
+
+			return bs_frame_alloc_aligned((UINT32)bytes, (UINT32)alignment);
+		}
+
+		/** @copydoc MemoryAllocator<T>::allocateAligned16 */
+		static void* allocateAligned16(size_t bytes)
+		{
+#if BS_PROFILING_ENABLED
+			incAllocCount();
+#endif
+
+			return bs_frame_alloc_aligned((UINT32)bytes, 16);
+		}
+
+		/** @copydoc MemoryAllocator<T>::free */
 		static void free(void* ptr)
 		{
 			bs_frame_free(ptr);
 		}
+
+		/** @copydoc MemoryAllocator<T>::freeAligned */
+		static void freeAligned(void* ptr)
+		{
+#if BS_PROFILING_ENABLED
+			incFreeCount();
+#endif
+
+			bs_frame_free_aligned(ptr);
+		}
+
+		/** @copydoc MemoryAllocator<T>::freeAligned16 */
+		static void freeAligned16(void* ptr)
+		{
+#if BS_PROFILING_ENABLED
+			incFreeCount();
+#endif
+
+			bs_frame_free_aligned(ptr);
+		}
 	};
 
 	/** @endcond */

+ 9 - 9
BansheeUtility/Include/BsMemoryAllocator.h

@@ -166,34 +166,34 @@ namespace BansheeEngine
 			return platformAlignedAlloc16(bytes);
 		}
 
-		/** Frees memory allocated with allocateAligned() */
-		static void freeAligned(void* ptr)
+		/** Frees the memory at the specified location. */
+		static void free(void* ptr)
 		{
 #if BS_PROFILING_ENABLED
 			incFreeCount();
 #endif
 
-			platformAlignedFree(ptr);
+			::free(ptr);
 		}
 
-		/** Frees memory allocated with allocateAligned16() */
-		static void freeAligned16(void* ptr)
+		/** Frees memory allocated with allocateAligned() */
+		static void freeAligned(void* ptr)
 		{
 #if BS_PROFILING_ENABLED
 			incFreeCount();
 #endif
 
-			platformAlignedFree16(ptr);
+			platformAlignedFree(ptr);
 		}
 
-		/** Frees the memory at the specified location. */
-		static void free(void* ptr)
+		/** Frees memory allocated with allocateAligned16() */
+		static void freeAligned16(void* ptr)
 		{
 #if BS_PROFILING_ENABLED
 			incFreeCount();
 #endif
 
-			::free(ptr);
+			platformAlignedFree16(ptr);
 		}
 	};
 

+ 42 - 3
BansheeUtility/Source/BsFrameAlloc.cpp

@@ -73,10 +73,49 @@ namespace BansheeEngine
 #endif
 	}
 
+	UINT8* FrameAlloc::allocAligned(UINT32 amount, UINT32 alignment)
+	{
+#if BS_DEBUG_MODE
+		assert(mOwnerThread == BS_THREAD_CURRENT_ID && "Frame allocator called from invalid thread.");
+
+		amount += sizeof(UINT32);
+#endif
+
+		UINT32 alignOffset = alignment - mFreeBlock->mFreePtr & (alignment - 1);
+
+		UINT32 freeMem = mFreeBlock->mSize - mFreeBlock->mFreePtr;
+		if ((amount + alignOffset) > freeMem)
+		{
+			// New blocks are allocated on a 16 byte boundary, ensure we enough space is allocated taking into account
+			// the requested alignment
+
+			if (alignment > 16)
+				alignOffset = alignment - 16;
+			else
+				alignOffset = 0;
+
+			allocBlock(amount + alignOffset);
+		}
+
+		amount += alignOffset;
+		UINT8* data = mFreeBlock->alloc(amount);
+
+#if BS_DEBUG_MODE
+		mTotalAllocBytes += amount;
+
+		UINT32* storedSize = reinterpret_cast<UINT32*>(data + alignOffset);
+		*storedSize = amount;
+
+		return data + sizeof(UINT32) + alignOffset;
+#else
+		return data + alignOffset;
+#endif
+	}
+
 	void FrameAlloc::dealloc(UINT8* data)
 	{
 		// Dealloc is only used for debug and can be removed if needed. All the actual deallocation
-		// happens in ::clear
+		// happens in clear()
 			
 #if BS_DEBUG_MODE
 		data -= sizeof(UINT32);
@@ -218,7 +257,7 @@ namespace BansheeEngine
 
 		if (newBlock == nullptr)
 		{
-			UINT8* data = (UINT8*)reinterpret_cast<UINT8*>(bs_alloc(blockSize + sizeof(MemBlock)));
+			UINT8* data = (UINT8*)reinterpret_cast<UINT8*>(bs_alloc_aligned16(blockSize + sizeof(MemBlock)));
 			newBlock = new (data) MemBlock(blockSize);
 			data += sizeof(MemBlock);
 			newBlock->mData = data;
@@ -235,7 +274,7 @@ namespace BansheeEngine
 	void FrameAlloc::deallocBlock(MemBlock* block)
 	{
 		block->~MemBlock();
-		bs_free(block);
+		bs_free_aligned(block);
 	}
 
 	void FrameAlloc::setOwnerThread(BS_THREAD_ID_TYPE thread)

+ 10 - 0
BansheeUtility/Source/BsGlobalFrameAlloc.cpp

@@ -25,11 +25,21 @@ namespace BansheeEngine
 		return gFrameAlloc().alloc(numBytes);
 	}
 
+	inline BS_UTILITY_EXPORT UINT8* bs_frame_alloc_aligned(UINT32 count, UINT32 align)
+	{
+		return gFrameAlloc().allocAligned(count, align);
+	}
+
 	inline BS_UTILITY_EXPORT void bs_frame_free(void* data)
 	{
 		gFrameAlloc().dealloc(data);
 	}
 
+	inline BS_UTILITY_EXPORT void bs_frame_free_aligned(void* data)
+	{
+		gFrameAlloc().dealloc(data);
+	}
+
 	inline BS_UTILITY_EXPORT void bs_frame_mark()
 	{
 		gFrameAlloc().markFrame();