Răsfoiți Sursa

Enhanced frame allocator that can allocate multiple frames in a stack-like fashion

Marko Pintera 10 ani în urmă
părinte
comite
e81ac10b99

+ 5 - 0
BansheeEditor/Include/BsEditorTestSuite.h

@@ -85,5 +85,10 @@ namespace BansheeEngine
 		 *			and re-applying the modifications.
 		 */
 		void TestPrefabDiff();
+
+		/**
+		 * @brief	Tests the frame allocator.
+		 */
+		void TestFrameAlloc();
 	};
 }

+ 48 - 0
BansheeEditor/Source/BsEditorTestSuite.cpp

@@ -10,6 +10,7 @@
 #include "BsPrefab.h"
 #include "BsResources.h"
 #include "BsPrefabDiff.h"
+#include "BsFrameAlloc.h"
 
 namespace BansheeEngine
 {
@@ -422,6 +423,7 @@ namespace BansheeEngine
 		BS_ADD_TEST(EditorTestSuite::SceneObjectRecord_UndoRedo);
 		BS_ADD_TEST(EditorTestSuite::BinaryDiff);
 		BS_ADD_TEST(EditorTestSuite::TestPrefabDiff);
+		BS_ADD_TEST(EditorTestSuite::TestFrameAlloc)
 	}
 
 	void EditorTestSuite::SceneObjectRecord_UndoRedo()
@@ -657,4 +659,50 @@ namespace BansheeEngine
 
 		root->destroy();
 	}
+
+	void EditorTestSuite::TestFrameAlloc()
+	{
+		FrameAlloc alloc(128);
+		alloc.markFrame();
+		UINT8* a1 = alloc.alloc(5);
+		UINT8* a2 = alloc.alloc(10);
+		UINT8* a3 = alloc.alloc(130);
+		UINT8* a4 = alloc.alloc(5);
+
+		alloc.dealloc(a1);
+		alloc.dealloc(a2);
+		alloc.dealloc(a3);
+		alloc.dealloc(a4);
+
+		alloc.clear();
+
+		alloc.markFrame();
+		UINT8* a5 = alloc.alloc(5);
+		UINT8* a6 = alloc.alloc(10);
+		UINT8* a7 = alloc.alloc(130);
+		UINT8* a8 = alloc.alloc(5);
+
+		alloc.dealloc(a5);
+		alloc.dealloc(a6);
+		alloc.dealloc(a7);
+		alloc.dealloc(a8);
+
+		alloc.markFrame();
+		UINT8* a9 = alloc.alloc(5);
+		UINT8* a10 = alloc.alloc(10);
+		UINT8* a11 = alloc.alloc(130);
+		UINT8* a12 = alloc.alloc(5);
+
+		alloc.dealloc(a9);
+		alloc.dealloc(a10);
+		alloc.dealloc(a11);
+		alloc.dealloc(a12);
+
+		alloc.clear();
+		alloc.clear();
+
+		UINT8* a13 = alloc.alloc(5);
+		alloc.dealloc(a13);
+		alloc.clear();
+	}
 }

+ 0 - 2
BansheeEngine/Source/BsGUIManager.cpp

@@ -191,12 +191,10 @@ namespace BansheeEngine
 		DragAndDropManager::instance()._update();
 
 		// Update layouts
-		gProfilerCPU().beginSample("UpdateLayout");
 		for(auto& widgetInfo : mWidgets)
 		{
 			widgetInfo.widget->_updateLayout();
 		}
-		gProfilerCPU().endSample("UpdateLayout");
 
 		// Destroy all queued elements (and loop in case any new ones get queued during destruction)
 		do

+ 11 - 4
BansheeUtility/Include/BsFrameAlloc.h

@@ -80,7 +80,14 @@ namespace BansheeEngine
 		}
 
 		/**
-		 * @brief	Deallocates all allocated memory.
+		 * @brief	Starts a new frame. Next call to ::clear will only clear memory
+		 *			allocated past this point.
+		 */
+		void markFrame();
+
+		/**
+		 * @brief	Deallocates all allocated memory since the last call to ::markFrame
+		 *			(or all the memory if there was no call to ::markFrame).
 		 * 			
 		 * @note	Not thread safe.
 		 */
@@ -90,17 +97,17 @@ namespace BansheeEngine
 		 * @brief	Changes the frame allocator owner thread. After the owner
 		 *			thread has changed only allocations from that thread can be made.
 		 */
-		void setOwnerThread(BS_THREAD_ID_TYPE thread) { mOwnerThread = thread; }
+		void setOwnerThread(BS_THREAD_ID_TYPE thread);
 
 	private:
 		UINT32 mBlockSize;
 		Vector<MemBlock*> mBlocks;
 		MemBlock* mFreeBlock;
+		UINT32 mNextBlockIdx;
 		std::atomic<UINT32> mTotalAllocBytes;
+		void* mLastFrame;
 
 #if BS_DEBUG_MODE
-		UINT32 mAllocId;
-		Set<UINT32> mActiveAllocs;
 		BS_THREAD_ID_TYPE mOwnerThread;
 #endif
 

+ 124 - 33
BansheeUtility/Source/BsFrameAlloc.cpp

@@ -25,9 +25,8 @@ namespace BansheeEngine
 
 	FrameAlloc::FrameAlloc(UINT32 blockSize)
 		:mTotalAllocBytes(0), mFreeBlock(nullptr), mBlockSize(blockSize),
-		mOwnerThread(BS_THREAD_CURRENT_ID)
+		mOwnerThread(BS_THREAD_CURRENT_ID), mLastFrame(nullptr), mNextBlockIdx(0)
 	{
-		
 		allocBlock(mBlockSize);
 	}
 
@@ -42,7 +41,7 @@ namespace BansheeEngine
 #if BS_DEBUG_MODE
 		assert(mOwnerThread == BS_THREAD_CURRENT_ID && "Frame allocator called from invalid thread.");
 
-		amount += sizeof(UINT32) * 2;
+		amount += sizeof(UINT32);
 #endif
 
 		UINT32 freeMem = mFreeBlock->mSize - mFreeBlock->mFreePtr;
@@ -57,13 +56,7 @@ namespace BansheeEngine
 		UINT32* storedSize = reinterpret_cast<UINT32*>(data);
 		*storedSize = amount;
 
-		UINT32* storedId = reinterpret_cast<UINT32*>(data + sizeof(UINT32));
-		*storedId = mAllocId;
-
-		mActiveAllocs.insert(mAllocId);
-		mAllocId++;
-
-		return data + sizeof(UINT32) * 2;
+		return data + sizeof(UINT32);
 #else
 		return data;
 #endif
@@ -72,40 +65,108 @@ namespace BansheeEngine
 	void FrameAlloc::dealloc(UINT8* data)
 	{
 		// Dealloc is only used for debug and can be removed if needed. All the actual deallocation
-		// happens in "clear"
+		// happens in ::clear
 			
 #if BS_DEBUG_MODE
-		data -= sizeof(UINT32) * 2;
+		data -= sizeof(UINT32);
 		UINT32* storedSize = reinterpret_cast<UINT32*>(data);
-		UINT32* storedId = reinterpret_cast<UINT32*>(data + sizeof(UINT32));
 		mTotalAllocBytes -= *storedSize;
-		mActiveAllocs.erase(*storedId);
 #endif
 	}
 
+	void FrameAlloc::markFrame()
+	{
+		void** framePtr = (void**)alloc(sizeof(void*));
+		*framePtr = mLastFrame;
+		mLastFrame = framePtr;
+	}
+
 	void FrameAlloc::clear()
 	{
-#if BS_DEBUG_MODE
 		assert(mOwnerThread == BS_THREAD_CURRENT_ID && "Frame allocator called from invalid thread.");
 
-		if(mTotalAllocBytes.load() > 0)
-			BS_EXCEPT(InvalidStateException, "Not all frame allocated bytes were properly released.");
+		if(mLastFrame != nullptr)
+		{
+			assert(mBlocks.size() > 0 && mNextBlockIdx > 0);
+
+			dealloc(mLastFrame);
 
-		mAllocId = 0;
-		mActiveAllocs.clear();
+			UINT8* framePtr = (UINT8*)mLastFrame;
+			mLastFrame = *(void**)mLastFrame;
+
+#if BS_DEBUG_MODE
+			framePtr -= sizeof(UINT32);
 #endif
 
-		// Merge all blocks into one
-		UINT32 totalBytes = 0;
-		for(auto& block : mBlocks)
-		{
-			totalBytes += block->mSize;
-			deallocBlock(block);
+			UINT32 startBlockIdx = mNextBlockIdx - 1;
+			UINT32 numFreedBlocks = 0;
+			for (UINT32 i = startBlockIdx; i >= 0; i--)
+			{
+				MemBlock* curBlock = mBlocks[i];
+				UINT8* blockEnd = curBlock->mData + curBlock->mSize;
+				if (framePtr >= curBlock->mData && framePtr < blockEnd)
+				{
+					UINT8* dataEnd = curBlock->mData + curBlock->mFreePtr;
+					UINT32 sizeInBlock = (UINT32)(dataEnd - framePtr);
+					assert(sizeInBlock <= curBlock->mFreePtr);
+
+					curBlock->mFreePtr -= sizeInBlock;
+					if (curBlock->mFreePtr == 0)
+					{
+						numFreedBlocks++;
+						mNextBlockIdx = i;
+					}
+
+					break;
+				}
+				else
+				{
+					curBlock->mFreePtr = 0;
+					mNextBlockIdx = i;
+					numFreedBlocks++;
+				}
+			}
+
+			UINT32 oldNextBlockIdx = mNextBlockIdx;
+			if (numFreedBlocks > 1)
+			{
+				UINT32 totalBytes = 0;
+				for (UINT32 i = 0; i < numFreedBlocks; i++)
+				{
+					MemBlock* curBlock = mBlocks[mNextBlockIdx];
+					totalBytes += curBlock->mSize;
+
+					deallocBlock(curBlock);
+					mBlocks.erase(mBlocks.begin() + mNextBlockIdx);
+				}
+				
+				allocBlock(totalBytes);
+			}
+			
+			// Point to the first non-full block, or if none available then point the the block we just allocated
+			if (oldNextBlockIdx > 0)
+				mFreeBlock = mBlocks[oldNextBlockIdx - 1];
 		}
+		else
+		{
+#if BS_DEBUG_MODE
+			if (mTotalAllocBytes.load() > 0)
+				BS_EXCEPT(InvalidStateException, "Not all frame allocated bytes were properly released.");
+#endif
 
-		mBlocks.clear();
-			
-		allocBlock(totalBytes);			
+			// Merge all blocks into one
+			UINT32 totalBytes = 0;
+			for (auto& block : mBlocks)
+			{
+				totalBytes += block->mSize;
+				deallocBlock(block);
+			}
+
+			mBlocks.clear();
+			mNextBlockIdx = 0;
+
+			allocBlock(totalBytes);
+		}
 	}
 
 	FrameAlloc::MemBlock* FrameAlloc::allocBlock(UINT32 wantedSize)
@@ -114,12 +175,35 @@ namespace BansheeEngine
 		if(wantedSize > blockSize)
 			blockSize = wantedSize;
 
-		UINT8* data = (UINT8*)reinterpret_cast<UINT8*>(bs_alloc(blockSize + sizeof(MemBlock)));
-		MemBlock* newBlock = new (data) MemBlock(blockSize);
-		data += sizeof(MemBlock);
-		newBlock->mData = data;
+		MemBlock* newBlock = nullptr;
+		while (mNextBlockIdx < mBlocks.size())
+		{
+			MemBlock* curBlock = mBlocks[mNextBlockIdx];
+			if (blockSize <= curBlock->mSize)
+			{
+				newBlock = curBlock;
+				mNextBlockIdx++;
+				break;
+			}
+			else
+			{
+				// Found an empty block that doesn't fit our data, delete it
+				deallocBlock(curBlock);
+				mBlocks.erase(mBlocks.begin() + mNextBlockIdx);
+			}
+		}
+
+		if (newBlock == nullptr)
+		{
+			UINT8* data = (UINT8*)reinterpret_cast<UINT8*>(bs_alloc(blockSize + sizeof(MemBlock)));
+			newBlock = new (data) MemBlock(blockSize);
+			data += sizeof(MemBlock);
+			newBlock->mData = data;
+
+			mBlocks.push_back(newBlock);
+			mNextBlockIdx++;
+		}
 
-		mBlocks.push_back(newBlock);
 		mFreeBlock = newBlock; // If previous block had some empty space it is lost until next "clear"
 
 		return newBlock;
@@ -130,4 +214,11 @@ namespace BansheeEngine
 		block->~MemBlock();
 		bs_free(block);
 	}
+
+	void FrameAlloc::setOwnerThread(BS_THREAD_ID_TYPE thread)
+	{
+#if BS_DEBUG_MODE
+		mOwnerThread = thread;
+#endif
+	}
 }

+ 2 - 1
TODO.txt

@@ -77,8 +77,9 @@ First screenshot work:
 - (Optionally) Console window
 
 Optimization:
+ - Remove profiling calls in GUIWidget and text sprite
+ - Remove pause mechanic from profiler overlay
  - HResource.isLoaded checks are using up a lot of allocations in GUI updates
- - (Profiler has a large overhead as it allocates strings with each call. Consider using stack allocated strings which are either hashed or stored internally on stack as well)
  - There are more issues with GUI allocations
 
 -----------