Quellcode durchsuchen

Check in mem block pool before I delete it

Marko Pintera vor 12 Jahren
Ursprung
Commit
e8a735a10d

+ 6 - 0
CamelotClient/CamelotClient.cpp

@@ -30,6 +30,9 @@
 #include "CmRTTIType.h"
 #include "CmRTTIType.h"
 #include "CmCursor.h"
 #include "CmCursor.h"
 
 
+#include "CmMemBlockPool.h"
+
+
 #define DX11
 #define DX11
 //#define DX9
 //#define DX9
 //#define GL
 //#define GL
@@ -63,6 +66,9 @@ int CALLBACK WinMain(
 	//CommandQueue::addBreakpoint(0, 22);
 	//CommandQueue::addBreakpoint(0, 22);
 	//CommandQueue::addBreakpoint(0, 12);
 	//CommandQueue::addBreakpoint(0, 12);
 
 
+	MemBlockPool<> blockPool;
+	void* someMemory = blockPool.alloc(32);
+
 	RenderSystem* renderSystem = RenderSystem::instancePtr();
 	RenderSystem* renderSystem = RenderSystem::instancePtr();
 	RenderWindowPtr renderWindow = gApplication().getPrimaryWindow();
 	RenderWindowPtr renderWindow = gApplication().getPrimaryWindow();
 
 

+ 1 - 0
CamelotUtility/CamelotUtility.vcxproj

@@ -189,6 +189,7 @@
     <ClInclude Include="Include\CmLog.h" />
     <ClInclude Include="Include\CmLog.h" />
     <ClInclude Include="Include\CmManagedDataBlock.h" />
     <ClInclude Include="Include\CmManagedDataBlock.h" />
     <ClInclude Include="Include\CmMathAsm.h" />
     <ClInclude Include="Include\CmMathAsm.h" />
+    <ClInclude Include="Include\CmMemBlockPool.h" />
     <ClInclude Include="Include\CmMemoryAllocator.h" />
     <ClInclude Include="Include\CmMemoryAllocator.h" />
     <ClInclude Include="Include\CmModule.h" />
     <ClInclude Include="Include\CmModule.h" />
     <ClInclude Include="Include\CmORect.h" />
     <ClInclude Include="Include\CmORect.h" />

+ 3 - 0
CamelotUtility/CamelotUtility.vcxproj.filters

@@ -219,6 +219,9 @@
     <ClInclude Include="Include\CmMemStack.h">
     <ClInclude Include="Include\CmMemStack.h">
       <Filter>Header Files</Filter>
       <Filter>Header Files</Filter>
     </ClInclude>
     </ClInclude>
+    <ClInclude Include="Include\CmMemBlockPool.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
   </ItemGroup>
   </ItemGroup>
   <ItemGroup>
   <ItemGroup>
     <ClCompile Include="Include\CmAxisAlignedBox.cpp">
     <ClCompile Include="Include\CmAxisAlignedBox.cpp">

+ 194 - 0
CamelotUtility/Include/CmMemBlockPool.h

@@ -0,0 +1,194 @@
+#pragma once
+
+#include <vector>
+#include <cmath>
+
+namespace CamelotFramework
+{
+	/**
+	 * @brief	Memory block pool.
+	 *
+	 * @tparam	PageSizePow2	2^PageSizePow2 is the size of one page in the pool. This size divided by number of blocks per chunk
+	 * 							determines the maximum size you can allocate in the pool. Larger allocations will use a general allocator.
+	 * @tparam	BlocksPerChunkPow2 Pages are split into chunks and chunks are split into smaller chunks based on this value.
+	 * 							   Higher values generally guarantee less allocations and better performance, but potentially 
+	 * 							   more wasted memory. Actual blocks per chunk is calculated by 2^BlocksPerChunkPow2.
+	 * 							   Has to be equal or less than PageSizePow2. Must be equal or less than 8.
+	 * 							   (e.g. 0 means 1 block per chunk, 1 means 2 blocks per chunk, etc.)
+	 */
+	template <int PageSizePow2 = 20, int BlocksPerChunkPow2 = 5>
+	class MemBlockPool
+	{
+		struct MemChunk
+		{
+			MemChunk()
+				:firstFreeBlock(0), numAvailableBlocks(0), data(nullptr)
+			{ }
+
+			void init(unsigned char* _data, unsigned int blockSize, unsigned char numBlocks)
+			{
+				data = _data;
+				numAvailableBlocks = numBlocks;
+				firstFreeBlock = 0;
+
+				unsigned char* dataPtr = data;
+				for(unsigned char i = 0; i < numBlocks; ++i)
+				{
+					*dataPtr = i;
+					dataPtr += blockSize;
+				}
+			}
+
+			void release()
+			{
+
+			}
+
+			void* alloc(unsigned int blockSize)
+			{
+				if(numAvailableBlocks == 0) return nullptr;
+
+				unsigned char* result = data + (firstFreeBlock * blockSize);
+				firstFreeBlock = *result;
+
+				--numAvailableBlocks;
+				return result;
+			}
+
+			void dealloc(void* dataToRls, unsigned int blockSize)
+			{
+				unsigned char* toRelease = static_cast<unsigned char*>(dataToRls);
+				*toRelease = firstFreeBlock;
+				firstFreeBlock = (toRelease - data) / blockSize;
+
+				++numAvailableBlocks;
+			}
+
+			unsigned char* data;
+			unsigned char firstFreeBlock;
+			unsigned char numAvailableBlocks;
+		};
+
+		struct MemPool
+		{
+			std::vector<MemChunk> mChunks;
+			unsigned int mBlockSize;
+			MemChunk* mAllocChunk;
+			MemChunk* mDeallocChunk;
+		};
+
+	public:
+		MemBlockPool()
+		{
+			unsigned int blockSize = 1 << BlocksPerChunkPow2;
+			mNumPools = PageSizePow2 - BlocksPerChunkPow2;
+			for(int i = 0; i < mNumPools + 1; i++)
+			{
+				mPools[i].mBlockSize = blockSize;
+				mPools[i].mAllocChunk = nullptr;
+				mPools[i].mDeallocChunk = nullptr;
+				blockSize = blockSize << 1;
+			}
+
+			mBlocksPerChunk = 1;
+			for(int i = 0; i < BlocksPerChunkPow2; i++)
+				mBlocksPerChunk <<= 1;
+		}
+
+		~MemBlockPool()
+		{
+			// TODO - Handle release
+
+			//for(auto iter = mChunks.begin(); iter != mChunks.end(); ++iter)
+			//	iter->release();
+		}
+
+		void* alloc(unsigned int size)
+		{
+			unsigned int poolIdx = sizeToPool(size) - BlocksPerChunkPow2;
+
+			if(poolIdx >= mNumPools) 
+				return CM_NEW_BYTES(size, GenAlloc);
+
+			MemPool& pool = mPools[poolIdx];
+			if(pool.mAllocChunk == nullptr || pool.mAllocChunk->numAvailableBlocks == 0)
+			{
+				for(auto iter = pool.mChunks.begin();; ++iter)
+				{
+					if(iter == pool.mChunks.end())
+					{
+						pool.mChunks.reserve(pool.mChunks.size() + 1);
+
+						MemChunk newChunk;
+						newChunk.init((unsigned char*)alloc(pool.mBlockSize * mBlocksPerChunk), pool.mBlockSize, mBlocksPerChunk);
+
+						pool.mChunks.push_back(newChunk);
+						pool.mAllocChunk = &pool.mChunks.back();
+						pool.mDeallocChunk = &pool.mChunks.back();
+						break;
+					}
+
+					if(iter->numAvailableBlocks > 0)
+					{
+						pool.mAllocChunk = &*iter;
+						break;
+					}
+				}
+			}
+
+			return pool.mAllocChunk->alloc(pool.mBlockSize);
+		}
+
+		void dealloc(void* dataToRls, unsigned int size)
+		{
+			unsigned int poolIdx = sizeToPool(size) - BlocksPerChunkPow2;
+
+			if(poolIdx >= mNumPools) 
+				return CM_DELETE_BYTES(dataToRls, GenAlloc);
+
+			MemPool& pool = mPools[poolIdx];
+			if(pool.mDeallocChunk == nullptr || dataToRls < pool.mDeallocChunk->data || dataToRls >= (pool.mDeallocChunk->data + pool.mBlockSize * mBlocksPerChunk))
+			{
+				for(auto iter = pool.mChunks.begin();; ++iter)
+				{
+					assert(iter != pool.mChunks.end()); // Trying to dealloc memory that wasn't allocated by this allocator
+
+					if(dataToRls >= iter->data && dataToRls < (iter->data + pool.mBlockSize * mBlocksPerChunk))
+					{
+						pool.mDeallocChunk = &*iter;
+						break;
+					}
+				}
+			}
+		
+			pool.mDeallocChunk->dealloc(dataToRls, pool.mBlockSize);
+
+			// If chunk is empty, release it (either to a higher level chunk to re-use, or to the OS if the chunk is highest level)
+			if(pool.mDeallocChunk->numAvailableBlocks == mBlocksPerChunk) // Chunk is completely empty
+			{
+				auto findIter = std::find(pool.mChunks.begin(), pool.mChunks.end(), pool.mDeallocChunk);
+				pool.mChunks.erase(findIter);
+
+				dealloc(pool.mDeallocChunk->data, pool.mBlockSize * mBlocksPerChunk);
+
+				pool.mDeallocChunk = nullptr;
+			}
+		}
+
+	private:
+		MemPool mPools[PageSizePow2 + 1];
+		unsigned int mBlocksPerChunk;
+		unsigned int mNumPools;
+
+		unsigned int sizeToPool(unsigned int size)
+		{
+			// TODO - Size cannot be zero
+
+			unsigned int targetlevel = 0;
+			while (size >>= 1) // I can speed this up using a BSR instruction, in case compiler doesn't already do it
+				++targetlevel;
+
+			return targetlevel;
+		}
+	};
+}

+ 2 - 2
CamelotUtility/Include/CmMemStack.h

@@ -145,10 +145,10 @@ namespace CamelotFramework
 		static void deallocLast(UINT8* data, UINT32 heapId);
 		static void deallocLast(UINT8* data, UINT32 heapId);
 
 
 	private:
 	private:
-		static std::vector<MemStackInternal<1024 * 1024>> mStacks;
+		static MemStackInternal<1024 * 1024> mStacks[256];
 
 
 #if CM_DEBUG_MODE
 #if CM_DEBUG_MODE
-		static std::vector<CM_THREAD_ID_TYPE> mThreadIds;
+		static CM_THREAD_ID_TYPE mThreadIds[256];
 #endif
 #endif
 	};
 	};
 
 

+ 1 - 1
CamelotUtility/Include/CmMemoryAllocator.h

@@ -110,4 +110,4 @@ namespace CamelotFramework
 #define CM_DELETE_BYTES(ptr, category) CamelotFramework::MemoryAllocator<category>::free(ptr)
 #define CM_DELETE_BYTES(ptr, category) CamelotFramework::MemoryAllocator<category>::free(ptr)
 #define CM_DELETE_ARRAY(ptr, T, count, category) CamelotFramework::__cm_destruct_array<T, category>(ptr, count)
 #define CM_DELETE_ARRAY(ptr, T, count, category) CamelotFramework::__cm_destruct_array<T, category>(ptr, count)
 
 
-#include "CmMemStack.h"
+#include "CmMemStack.h"

+ 2 - 4
CamelotUtility/Source/CmMemStack.cpp

@@ -3,21 +3,19 @@
 
 
 namespace CamelotFramework
 namespace CamelotFramework
 {
 {
-	std::vector<MemStackInternal<1024 * 1024>> MemStack::mStacks;
+	MemStackInternal<1024 * 1024> MemStack::mStacks[256];
 
 
 #if CM_DEBUG_MODE
 #if CM_DEBUG_MODE
-	std::vector<CM_THREAD_ID_TYPE> MemStack::mThreadIds;
+	CM_THREAD_ID_TYPE MemStack::mThreadIds[256];
 #endif
 #endif
 
 
 	void MemStack::setupHeap(UINT8 heapId)
 	void MemStack::setupHeap(UINT8 heapId)
 	{
 	{
 		assert(heapId < 256);
 		assert(heapId < 256);
 
 
-		mStacks.resize(256);
 		mStacks[heapId] = MemStackInternal<1024 * 1024>();
 		mStacks[heapId] = MemStackInternal<1024 * 1024>();
 
 
 #if CM_DEBUG_MODE
 #if CM_DEBUG_MODE
-		mThreadIds.resize(256);
 		mThreadIds[heapId] = CM_THREAD_CURRENT_ID;
 		mThreadIds[heapId] = CM_THREAD_CURRENT_ID;
 #endif
 #endif
 	}
 	}

+ 49 - 1
TODO.txt

@@ -38,8 +38,11 @@ IMMEDIATE:
 
 
 Mem allocation related:
 Mem allocation related:
 Following the example of MemStack add:
 Following the example of MemStack add:
- - MemArena - Allows memory to be allocated as needed but everything is freed in one go. (Useful for TextUtility for example)
+ - MemRegion - Allows user to mark regions and then free entire regions at once (FrameAllocator would be another name for it)
  - MemPool - Uses bit masks for querying empty slots, using the fast bit compare operators from GEG2 book allocator. Also make sure slots are split into block so we can easily dismiss large portions of allocated elements.
  - MemPool - Uses bit masks for querying empty slots, using the fast bit compare operators from GEG2 book allocator. Also make sure slots are split into block so we can easily dismiss large portions of allocated elements.
+    - Instead of using bit compare use something smarter like in RTSmallAlloc
+	- Also I can't place SceneObjects in the pool, since they hold a vector to a varying amount of components
+ - Implement RTSmallAlloc for use in shared_ptrs and handles/asyncop
  - Organize calls to CM_NEW/CM_DELETE/etc better
  - Organize calls to CM_NEW/CM_DELETE/etc better
  - Add an allocation counter to the default allocator
  - Add an allocation counter to the default allocator
 
 
@@ -53,6 +56,51 @@ Allocation critical areas:
  - Creating SceneObjects and Components - I might want to pool them, as I suspect user might alloc many per frame
  - Creating SceneObjects and Components - I might want to pool them, as I suspect user might alloc many per frame
  - Log logMsg
  - Log logMsg
 
 
+SOLUTION to TextUtility allocation:
+ TextLine & TextWord structs are allocated from a per-thread pool as needed (MemObjectPool)
+ Array of TextLines in TextData, array of TextWords in TextLine, and array of CHAR_DESC in TextWord are also allocated using the pool allocator. (MemBlockPool)
+  - Although this pool allocator should allocate based on certain size and not data type, but is otherwise the same
+  - But this means the array doesn't grow one by one, and instead grows N elements at a time (depending on the pool block size we choose)
+  - WHEN the array needs to be resized I need to access a larger pool
+   - USe the BSR instruction to quickly determine the pool where to fit the data
+    - Add some guards to ensure that we update the BSR instruction if we ever move to another platform
+
+HIERARCHICAL POOL MEM BLOCK ALLOCATOR: - Attempt to implement outside of camelot and test for speed compared to CRT
+MemAlloc::alloc(size(128))
+{
+  int chunkIdx = log2(size(128)); 
+
+  if(chunkIdx > maxChunkIdx)
+    new UINT8[2^chunkIdx]; // Just alloc directly if its over some maximum  
+
+  if(!chunks[chunkIdx].hasEmptySpace) // Using SmallAlloc approach which is very fast
+     chunks[chunkIdx].addNewChunk(alloc(2^(std::min(maxChunkIdx, chunkIdx + log2(blocksPerChunk))))
+  return chunks[chunkIdx].alloc()
+}
+
+MemAlloc::dealloc(UINT8* data, size(128))
+{
+   int chunkIdx = log2(size(128)); 
+
+   if(chunkIdx > maxChunkIdx)
+      delete[] data;
+
+   chunk myChunk = findChunkForData(data, size) // Using the SmallAlloc approach which is very fast
+   myChunk.dealloc(data)
+   
+   if(myChunk.empty())
+   {
+      chunks.removeChunk(myChunk)
+      dealloc(myChunk.dataPtr, myChunk.size)
+   }
+}
+
+ALLOCATOR - When I try to allocate and cannot find a chunk, try a higher level chunk to see if it has any empty space (without immediately jumping multiple levels?)
+
+ MAYBE I should have a MemoryManager class, which can be manually allocated per-thread and then just passed around to methods that need it
+  (like TextUtility). It can contain all the needed allocators - although it might be a bit too coupling as every class will use it. Plus it's hard to extend.
+   - Maybe the actual allocators exist on specific classes, but the allocators accept MemoryManager as input. It's cleaner solution than using enums I think.
+
 -----------
 -----------
 
 
 I need to be able to provide smaller bounds used for UI input 
 I need to be able to provide smaller bounds used for UI input