瀏覽代碼

Added a stack allocator

Marko Pintera 12 年之前
父節點
當前提交
9c598ab4ef

+ 6 - 6
BansheeEngine/Source/BsGUILayoutX.cpp

@@ -87,13 +87,13 @@ namespace BansheeEngine
 		UINT32 numNonClampedElements = 0;
 		UINT32 numFlexibleSpaces = 0;
 
-		bool* processedElements = CM_NEW_ARRAY(bool, (UINT32)mChildren.size(), ScratchAlloc);
+		bool* processedElements = stackAllocN<bool>((UINT32)mChildren.size(), HID_Main);
 		memset(processedElements, 0, mChildren.size() * sizeof(bool));
 
-		UINT32* elementSizes = CM_NEW_ARRAY(UINT32, (UINT32)mChildren.size(), ScratchAlloc);
+		UINT32* elementSizes = stackAllocN<UINT32>((UINT32)mChildren.size(), HID_Main);
 		memset(elementSizes, 0, mChildren.size() * sizeof(UINT32));
 
-		float* elementScaleWeights = CM_NEW_ARRAY(float, (UINT32)mChildren.size(), ScratchAlloc);
+		float* elementScaleWeights = stackAllocN<float>((UINT32)mChildren.size(), HID_Main);
 		memset(elementScaleWeights, 0, mChildren.size() * sizeof(float));
 
 		// Set initial sizes, count number of children per type and mark fixed elements as already processed
@@ -351,8 +351,8 @@ namespace BansheeEngine
 			childIdx++;
 		}
 
-		CM_DELETE_ARRAY(processedElements, bool, (UINT32)mChildren.size(), ScratchAlloc);
-		CM_DELETE_ARRAY(elementSizes, UINT32, (UINT32)mChildren.size(), ScratchAlloc);
-		CM_DELETE_ARRAY(elementScaleWeights, float, (UINT32)mChildren.size(), ScratchAlloc);
+		stackDeallocLast(elementScaleWeights, HID_Main);
+		stackDeallocLast(elementSizes, HID_Main);
+		stackDeallocLast(processedElements, HID_Main);
 	}
 }

+ 6 - 6
BansheeEngine/Source/BsGUILayoutY.cpp

@@ -87,13 +87,13 @@ namespace BansheeEngine
 		UINT32 numNonClampedElements = 0;
 		UINT32 numFlexibleSpaces = 0;
 
-		bool* processedElements = CM_NEW_ARRAY(bool, (UINT32)mChildren.size(), ScratchAlloc);
+		bool* processedElements = stackAllocN<bool>((UINT32)mChildren.size(), HID_Main);
 		memset(processedElements, 0, mChildren.size() * sizeof(bool));
 
-		UINT32* elementSizes = CM_NEW_ARRAY(UINT32, (UINT32)mChildren.size(), ScratchAlloc);
+		UINT32* elementSizes = stackAllocN<UINT32>((UINT32)mChildren.size(), HID_Main);
 		memset(elementSizes, 0, mChildren.size() * sizeof(UINT32));
 
-		float* elementScaleWeights = CM_NEW_ARRAY(float, (UINT32)mChildren.size(), ScratchAlloc);
+		float* elementScaleWeights = stackAllocN<float>((UINT32)mChildren.size(), HID_Main);
 		memset(elementScaleWeights, 0, mChildren.size() * sizeof(float));
 
 		// Set initial sizes, count number of children per type and mark fixed elements as already processed
@@ -356,8 +356,8 @@ namespace BansheeEngine
 			childIdx++;
 		}
 
-		CM_DELETE_ARRAY(processedElements, bool, (UINT32)mChildren.size(), ScratchAlloc);
-		CM_DELETE_ARRAY(elementSizes, UINT32, (UINT32)mChildren.size(), ScratchAlloc);
-		CM_DELETE_ARRAY(elementScaleWeights, float, (UINT32)mChildren.size(), ScratchAlloc);
+		stackDeallocLast(elementScaleWeights, HID_Main);
+		stackDeallocLast(elementSizes, HID_Main);
+		stackDeallocLast(processedElements, HID_Main);
 	}
 }

+ 6 - 0
CamelotCore/Include/CmPrerequisites.h

@@ -88,6 +88,12 @@ THE SOFTWARE
 
 namespace CamelotFramework {
 
+	enum HeapID
+	{
+		HID_Main = 0,
+		HID_Render = 1
+	};
+
 // Pre-declare classes
 // Allows use of pointers in header files without including individual .h
 // so decreases dependencies between files

+ 5 - 1
CamelotCore/Source/CmApplication.cpp

@@ -38,10 +38,14 @@ namespace CamelotFramework
 {
 	Application::Application()
 		:mPrimaryWindow(nullptr), mIsFrameRenderingFinished(true), mRunMainLoop(false)
-	{ }
+	{
+		
+	}
 
 	void Application::startUp(START_UP_DESC& desc)
 	{
+		MemStack::setupHeap(HID_Main);
+
 		Time::startUp(CM_NEW(Time, GenAlloc) Time());
 		Input::startUp(CM_NEW(Input, GenAlloc) Input());
 		DynLibManager::startUp(CM_NEW(DynLibManager, GenAlloc) DynLibManager());

+ 1 - 0
CamelotCore/Source/CmRenderSystem.cpp

@@ -94,6 +94,7 @@ namespace CamelotFramework {
 		mCommandQueue = CM_NEW(CommandQueue<CommandQueueSync>, GenAlloc) CommandQueue<CommandQueueSync>(CM_THREAD_CURRENT_ID, true);
 		mPrimaryWindowDesc = primaryWindowDesc;
 
+		MemStack::setupHeap(HID_Render);
 		initRenderThread();
 
 		AsyncOp op = queueReturnCommand(boost::bind(&RenderSystem::initialize_internal, this, _1), true);

+ 2 - 0
CamelotUtility/CamelotUtility.vcxproj

@@ -165,6 +165,7 @@
     <ClCompile Include="Source\CmBox.cpp" />
     <ClCompile Include="Source\CmInt2.cpp" />
     <ClCompile Include="Source\CmManagedDataBlock.cpp" />
+    <ClCompile Include="Source\CmMemStack.cpp" />
     <ClCompile Include="Source\CmORect.cpp" />
     <ClCompile Include="Source\CmRect.cpp" />
     <ClCompile Include="Source\CmTexAtlasGenerator.cpp" />
@@ -200,6 +201,7 @@
     <ClInclude Include="Include\CmRTTIReflectableField.h" />
     <ClInclude Include="Include\CmRTTIReflectablePtrField.h" />
     <ClInclude Include="Include\CmRTTIType.h" />
+    <ClInclude Include="Include\CmMemStack.h" />
     <ClInclude Include="Include\CmString.h" />
     <ClInclude Include="Include\CmThreadDefines.h" />
     <ClInclude Include="Include\CmTime.h" />

+ 6 - 0
CamelotUtility/CamelotUtility.vcxproj.filters

@@ -216,6 +216,9 @@
     <ClInclude Include="Include\CmORect.h">
       <Filter>Header Files\Math</Filter>
     </ClInclude>
+    <ClInclude Include="Include\CmMemStack.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
   </ItemGroup>
   <ItemGroup>
     <ClCompile Include="Include\CmAxisAlignedBox.cpp">
@@ -323,5 +326,8 @@
     <ClCompile Include="Source\CmRect.cpp">
       <Filter>Source Files\Math</Filter>
     </ClCompile>
+    <ClCompile Include="Source\CmMemStack.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
   </ItemGroup>
 </Project>

+ 198 - 0
CamelotUtility/Include/CmMemStack.h

@@ -0,0 +1,198 @@
+#pragma once
+
+#include <stack>
+#include <assert.h>
+#include "CmThreadDefines.h"
+
+namespace CamelotFramework
+{
+	template <int BlockCapacity = 1024 * 1024>
+	class MemStackInternal
+	{
+	private:
+		class MemBlock
+		{
+		public:
+			MemBlock(UINT32 size)
+				:mData(nullptr), mFreePtr(0), mSize(size)
+			{
+				mData = static_cast<UINT8*>(CM_NEW_BYTES(mSize, GenAlloc));
+			}
+
+			~MemBlock()
+			{
+				CM_DELETE_BYTES(mData, GenAlloc);
+			}
+
+			UINT8* alloc(UINT8 amount)
+			{
+				UINT8* freePtr = &mData[mFreePtr];
+				mFreePtr += amount;
+
+				return freePtr;
+			}
+
+			void dealloc(UINT8* data, UINT8 amount)
+			{
+				mFreePtr -= amount;
+				assert((&mData[mFreePtr]) == data && "Out of order stack deallocation detected. Deallocations need to happen in order opposite of allocations.");
+			}
+
+			UINT8* mData;
+			UINT32 mFreePtr;
+			UINT32 mSize;
+		};
+
+	public:
+		MemStackInternal()
+		{ }
+
+		~MemStackInternal()
+		{
+			assert(mBlocks.size() == 0 && "Not all blocks were released before shutting down the stack allocator.");
+
+			while(!mBlocks.empty())
+			{
+				MemBlock* curPtr = mBlocks.top();
+				mBlocks.pop();
+
+				CM_DELETE(curPtr, MemBlock, GenAlloc);
+			}
+		}
+
+		UINT8* alloc(UINT32 amount)
+		{
+			MemBlock* topBlock;
+			if(mBlocks.size() == 0)
+				topBlock = allocNewBlock(amount);
+			else
+				topBlock = mBlocks.top();
+
+			mAllocSizes.push(amount);
+
+			UINT32 freeMem = topBlock->mSize - topBlock->mFreePtr;
+			if(amount <= freeMem)
+				return topBlock->alloc(amount);
+
+			MemBlock* newBlock = allocNewBlock(amount);
+			return newBlock->alloc(amount);
+		}
+
+		void dealloc(UINT8* data)
+		{
+			assert(mAllocSizes.size() > 0 && "Out of order stack deallocation detected. Deallocations need to happen in order opposite of allocations.");
+
+			UINT32 amount = mAllocSizes.top();
+			mAllocSizes.pop();
+
+			MemBlock* topBlock = mBlocks.top();
+			topBlock->dealloc(data, amount);
+
+			if(topBlock->mFreePtr == 0)
+			{
+				CM_DELETE(topBlock, MemBlock, GenAlloc);
+				mBlocks.pop();
+			}
+		}
+
+	private:
+		std::stack<MemBlock*> mBlocks;
+		std::stack<UINT32> mAllocSizes;
+
+		MemBlock* allocNewBlock(UINT32 wantedSize)
+		{
+			UINT32 blockSize = BlockCapacity;
+			if(wantedSize > blockSize)
+				blockSize = wantedSize;
+
+			MemBlock* newBlock = CM_NEW(MemBlock, GenAlloc) MemBlock(blockSize);
+			mBlocks.push(newBlock);
+
+			return newBlock;
+		}
+	};
+
+	/**
+	 * @brief	Fastest, but also most limiting type of allocator. All deallocations
+	 * 			must happen in opposite order from allocations. 
+	 * 			
+	 * @note	It's mostly useful when you need to allocate something temporarily on the heap,
+	 * 			usually something that gets allocated and freed within the same function.
+	 * 			
+	 *			Each allocation comes with a pretty hefty 4 byte memory overhead, so don't use it for small allocations. 
+	 *			
+	 *			Operations done on a single heap are thread safe. Multiple threads are not allowed to access a heap that wasn't
+	 *			created for them.
+	 *
+	 * @tparam	BlockCapacity Minimum size of a block. Larger blocks mean less memory allocations, but also potentially
+	 * 						  more wasted memory. If an allocation requests more bytes than BlockCapacity, first largest multiple is
+	 * 						  used instead.
+	 * @tparam	VectorAligned If true, all allocations will be aligned to 16bit boundaries.
+	 */
+	class CM_UTILITY_EXPORT MemStack
+	{
+	public:
+		/**
+		 * @brief	Sets up the heap you can later use with alloc/dealloc calls. It is most common to have one heap
+		 * 			per thread.
+		 *
+		 * @param	heapId	Unique heap ID. Each heap can only be used from one thread, it cannot be shared.
+		 * 					You cannot have more than 256 heaps.
+		 */
+		static void setupHeap(UINT8 heapId);
+
+		static UINT8* alloc(UINT32 numBytes, UINT32 heapId);
+		static void deallocLast(UINT8* data, UINT32 heapId);
+
+	private:
+		static std::vector<MemStackInternal<1024 * 1024>> mStacks;
+
+#if CM_DEBUG_MODE
+		static std::vector<CM_THREAD_ID_TYPE> mThreadIds;
+#endif
+	};
+
+	CM_UTILITY_EXPORT inline UINT8* stackAlloc(UINT32 numBytes, UINT32 heapId);
+
+	template<class T>
+	T* stackAlloc(UINT32 heapId)
+	{
+		return (T*)MemStack::alloc(sizeof(T), heapId);
+	}
+
+	template<class T>
+	T* stackAllocN(UINT32 count, UINT32 heapId)
+	{
+		return (T*)MemStack::alloc(sizeof(T) * count, heapId);
+	}
+
+	template<class T>
+	T* stackConstructN(UINT32 count, UINT32 heapId)
+	{
+		T* data = stackAllocN<T>(count, heapId);
+
+		for(unsigned int i = 0; i < count; i++)
+			new ((void*)&data[i]) T;
+
+		return data;
+	}
+
+	template<class T>
+	void stackDestruct(T* data, UINT32 heapId)
+	{
+		data->~T();
+
+		MemStack::deallocLast((UINT8*)data, heapId);
+	}
+
+	template<class T>
+	void stackDestructN(T* data, UINT32 count, UINT32 heapId)
+	{
+		for(unsigned int i = 0; i < count; i++)
+			data[i].~T();
+
+		MemStack::deallocLast((UINT8*)data, heapId);
+	}
+
+	CM_UTILITY_EXPORT inline void stackDeallocLast(void* data, UINT32 heapId);
+}

+ 13 - 5
CamelotUtility/Include/CmMemoryAllocator.h

@@ -10,12 +10,12 @@ namespace CamelotFramework
 	class MemoryAllocator
 	{
 	public:
-		static inline void* allocate(unsigned int bytes)
+		static inline void* allocate(UINT32 bytes)
 		{
 			return malloc(bytes);
 		}
 
-		static inline void* allocateArray(unsigned int bytes, UINT32 count)
+		static inline void* allocateArray(UINT32 bytes, UINT32 count)
 		{
 			return malloc(bytes * count);
 		}
@@ -52,7 +52,7 @@ namespace CamelotFramework
 	};
 
 	template<class T, class category> 
-	inline T* __cm_construct_array(unsigned int count)
+	inline T* __cm_construct_array(UINT32 count)
 	{
 		T* ptr = (T*)MemoryAllocator<category>::allocateArray(sizeof(T), count);
 
@@ -71,7 +71,7 @@ namespace CamelotFramework
 	}
 
 	template<class T, class category> 
-	inline void __cm_destruct_array(T* ptr, unsigned int count)
+	inline void __cm_destruct_array(T* ptr, UINT32 count)
 	{
 		// This might seem a bit weird if T is a built-in type or a pointer, but
 		// standard allows us to call destructor on such types (they don't do anything)
@@ -81,6 +81,10 @@ namespace CamelotFramework
 		MemoryAllocator<category>::freeArray(ptr, count);
 	}
 
+	/**
+	 * @brief	General allocator provided by the OS. Use for persistent long term allocations,
+	 * 			and allocations that don't happen often.
+	 */
 	class GenAlloc
 	{ };
 
@@ -91,6 +95,10 @@ namespace CamelotFramework
 	class ScratchAlloc
 	{ };
 
+	/**
+	 * @brief	Pool allocator that is only suited for allocating one specific type of data. Most useful when you are
+	 * 			often allocating one certain data type, with no specific allocation or deallocation order.
+	 */
 	class PoolAlloc
 	{ };
 }
@@ -102,4 +110,4 @@ namespace CamelotFramework
 #define CM_DELETE_BYTES(ptr, category) CamelotFramework::MemoryAllocator<category>::free(ptr)
 #define CM_DELETE_ARRAY(ptr, T, count, category) CamelotFramework::__cm_destruct_array<T, category>(ptr, count)
 
-
+#include "CmMemStack.h"

+ 52 - 0
CamelotUtility/Source/CmMemStack.cpp

@@ -0,0 +1,52 @@
+#include "CmPrerequisitesUtil.h"
+#include "CmMemStack.h"
+
+namespace CamelotFramework
+{
+	std::vector<MemStackInternal<1024 * 1024>> MemStack::mStacks;
+
+#if CM_DEBUG_MODE
+	std::vector<CM_THREAD_ID_TYPE> MemStack::mThreadIds;
+#endif
+
+	void MemStack::setupHeap(UINT8 heapId)
+	{
+		assert(heapId < 256);
+
+		mStacks.resize(256);
+		mStacks[heapId] = MemStackInternal<1024 * 1024>();
+
+#if CM_DEBUG_MODE
+		mThreadIds.resize(256);
+		mThreadIds[heapId] = CM_THREAD_CURRENT_ID;
+#endif
+	}
+
+	UINT8* MemStack::alloc(UINT32 numBytes, UINT32 heapId)
+	{
+#if CM_DEBUG_MODE
+		assert(mThreadIds[heapId] == CM_THREAD_CURRENT_ID && "Accessing a heap from an invalid thread.");
+#endif
+
+		return mStacks[heapId].alloc(numBytes);
+	}
+
+	void MemStack::deallocLast(UINT8* data, UINT32 heapId)
+	{
+#if CM_DEBUG_MODE
+		assert(mThreadIds[heapId] == CM_THREAD_CURRENT_ID && "Accessing a heap from an invalid thread.");
+#endif
+
+		mStacks[heapId].dealloc(data);
+	}
+
+	UINT8* stackAlloc(UINT32 numBytes, UINT32 heapId)
+	{
+		return MemStack::alloc(numBytes, heapId);
+	}
+
+	void stackDeallocLast(void* data, UINT32 heapId)
+	{
+		return MemStack::deallocLast((UINT8*)data, heapId);
+	}
+}

+ 17 - 3
TODO.txt

@@ -36,6 +36,23 @@ IMMEDIATE:
   - Add support for bold (and maybe italic) fonts in Font importer
   - Add support to disable aliasing when importing font
 
+Mem allocation related:
+Following the example of MemStack add:
+ - MemArena - Allows memory to be allocated as needed but everything is freed in one go. (Useful for TextUtility for example)
+ - MemPool - Uses bit masks for querying empty slots, using the fast bit compare operators from GEG2 book allocator. Also make sure slots are split into block so we can easily dismiss large portions of allocated elements.
+ - Organize calls to CM_NEW/CM_DELETE/etc better
+ - Add an allocation counter to the default allocator
+
+Allocation critical areas:
+ - TextUtility does a massive amount of allocs
+ - Binding gpu params. It gets copied in DeferredRenderContext
+ - GameObjectHandle often allocates its internal data
+ - ResourceHandle often allocates its internal data
+ - AsyncOp allocates  AsyncOpData internally
+ - Deserialization, a lot of temporary allocations going on - But how much impact on performance will allocations have considering this is probably limited by disk read?
+ - Creating SceneObjects and Components - I might want to pool them, as I suspect user might alloc many per frame
+ - Log logMsg
+
 -----------
 
 I need to be able to provide smaller bounds used for UI input 
@@ -100,9 +117,6 @@ Longterm plans:
 ----------------------------------------------------------------------------------------------
 Other:
  - Move Debug to CamelotCore and add SetFillMode
- - Add TextUtil class
-  - Ability to calculate a size of a line (will need this if I want to add "..." to text that doesn't fit)
-  - Maybe even move line and word classes to it
 
 -----------------------BACKLOG TODO---------------------------------------------------------------