Explorar o código

Added frame allocator

Marko Pintera %!s(int64=12) %!d(string=hai) anos
pai
achega
99f545b06f

+ 2 - 2
BansheeForwardRenderer/Source/BsForwardRenderer.cpp

@@ -93,7 +93,7 @@ namespace BansheeEngine
 			RenderTargetPtr target = camerasPerTarget.target;
 			const Vector<HCamera>::type& cameras = camerasPerTarget.cameras;
 
-			coreAccessor.beginFrame();
+			coreAccessor.beginRender();
 
 			for(auto& camera : cameras)
 			{
@@ -116,7 +116,7 @@ namespace BansheeEngine
 				render(camera);
 			}
 
-			coreAccessor.endFrame();
+			coreAccessor.endRender();
 			coreAccessor.swapBuffers(target);
 		}
 

+ 2 - 5
CamelotCore/Include/CmBindableGpuParams.h

@@ -19,6 +19,7 @@ namespace CamelotFramework
 	class CM_EXPORT BindableGpuParams
 	{
 	public:
+		BindableGpuParams(const GpuParamsPtr& sourceParams, FrameAlloc* allocator);
 		BindableGpuParams(const BindableGpuParams& source);
 		~BindableGpuParams();
 
@@ -34,11 +35,6 @@ namespace CamelotFramework
 
 		const GpuParamDesc& getParamDesc() const { return mParamDesc; }
 
-	private:
-		friend class GpuParams;
-
-		BindableGpuParams(const GpuParamDesc& sourceParamDesc);
-
 	private:
 		mutable bool mOwnsData;
 		const GpuParamDesc& mParamDesc;
@@ -48,6 +44,7 @@ namespace CamelotFramework
 		UINT32 mNumTextures;
 		UINT32 mNumSamplerStates;
 
+		FrameAlloc* mAllocator;
 		BindableGpuParamBlock** mParamBlocks;
 		GpuParamBlockBufferPtr* mParamBlockBuffers;
 		HTexture* mTextures;

+ 20 - 0
CamelotCore/Include/CmCoreThread.h

@@ -60,6 +60,21 @@ public:
 		*/
 	void queueCommand(boost::function<void()> commandCallback, bool blockUntilComplete = false);
 
+	/**
+	 * @brief	Called once every frame.
+	 * 			
+	 * @note	Must be called before sim thread schedules any CoreThread operations that frame. 
+	 */
+	void update();
+
+	/**
+	 * @brief	Returns a frame allocator that should be used for allocating temporary data being passed to the
+	 * 			core thread. As the name implies the data only lasts one frame, so you need to be careful not
+	 * 			to use it for longer than that.
+	 * 			
+	 * @note	Sim thread only.
+	 */
+	FrameAlloc* getFrameAlloc() const;
 private:
 	class CoreThreadWorkerFunc CM_THREAD_WORKER_INHERIT
 	{
@@ -72,6 +87,11 @@ private:
 		CoreThread* mOwner;
 	};
 
+	// Double buffered frame allocators - Means sim thread cannot be more than 1 frame ahead of core thread
+	// (If that changes you should be able to easily add more)
+	FrameAlloc* mFrameAllocs[2]; 
+	UINT32 mActiveFrameAlloc;
+
 	CoreThreadWorkerFunc* mCoreThreadFunc;
 	volatile bool mCoreThreadStarted;
 	volatile bool mCoreThreadShutdown;

+ 7 - 11
CamelotCore/Include/CmCoreThreadAccessor.h

@@ -9,6 +9,7 @@
 #include "CmCoreThread.h"
 #include "CmAsyncOp.h"
 #include "CmColor.h"
+#include "CmFrameAlloc.h"
 
 namespace CamelotFramework
 {
@@ -42,9 +43,6 @@ namespace CamelotFramework
 			mCommandQueue->queue(boost::bind(&RenderSystem::disableTextureUnit, RenderSystem::instancePtr(), gptype, texUnit));
 		}
 
-		/** @copydoc RenderSystem::setPointParameters() */
-		void setPointParameters(float size, bool attenuationEnabled, float constant, float linear, float quadratic, float minSize, float maxSize);
-
 		/** @copydoc RenderSystem::setTexture() */
 		void setTexture(GpuProgramType gptype, UINT16 unit, bool enabled, const TexturePtr &texPtr)
 		{
@@ -87,7 +85,6 @@ namespace CamelotFramework
 			mCommandQueue->queue(boost::bind(&RenderSystem::setDrawOperation, RenderSystem::instancePtr(), op));
 		}
 
-
 		/** @copydoc RenderSystem::setClipPlanes() */
 		void setClipPlanes(const PlaneList& clipPlanes)
 		{
@@ -118,7 +115,6 @@ namespace CamelotFramework
 			mCommandQueue->queue(boost::bind(&RenderSystem::setScissorRect, RenderSystem::instancePtr(), left, top, right, bottom));
 		}
 
-
 		/** @copydoc RenderSystem::setRenderTarget() */
 		void setRenderTarget(RenderTargetPtr target)
 		{
@@ -138,20 +134,19 @@ namespace CamelotFramework
 		}
 
 		/** @copydoc RenderSystem::bindGpuParams() */
-		void bindGpuParams(GpuProgramType gptype, BindableGpuParams& params)
+		void bindGpuParams(GpuProgramType gptype, const GpuParamsPtr& params)
 		{
-			mCommandQueue->queue(boost::bind(&RenderSystem::bindGpuParams, RenderSystem::instancePtr(), gptype, params));
+			mCommandQueue->queue(boost::bind(&RenderSystem::bindGpuParams, RenderSystem::instancePtr(), gptype, BindableGpuParams(params, gCoreThread().getFrameAlloc())));
 		}
 
-
 		/** @copydoc RenderSystem::beginFrame() */
-		void beginFrame(void)
+		void beginRender(void)
 		{
 			mCommandQueue->queue(boost::bind(&RenderSystem::beginFrame, RenderSystem::instancePtr()));
 		}
 
 		/** @copydoc RenderSystem::endFrame() */
-		void endFrame(void)
+		void endRender(void)
 		{
 			mCommandQueue->queue(boost::bind(&RenderSystem::endFrame, RenderSystem::instancePtr()));
 		}
@@ -178,7 +173,6 @@ namespace CamelotFramework
 			mCommandQueue->queue(boost::bind(&RenderSystem::swapBuffers, RenderSystem::instancePtr(), target));
 		}
 
-
 		/** @copydoc RenderSystem::render() */
 		void render(const MeshBasePtr& mesh, UINT32 indexOffset = 0, UINT32 indexCount = 0, bool useIndices = true, DrawOperationType drawOp = DOT_TRIANGLE_LIST)
 		{
@@ -267,6 +261,8 @@ namespace CamelotFramework
 		 */
 		void cancelAll()
 		{
+			// Note that this won't free any Frame data allocated for all the canceled commands since
+			// frame data will only get cleared at frame start
 			mCommandQueue->cancelAll();
 		}
 

+ 2 - 6
CamelotCore/Include/CmGpuParams.h

@@ -98,13 +98,9 @@ namespace CamelotFramework
 		void getTextureParam(const String& name, GpuParamTexture& output) const;
 		void getSamplerStateParam(const String& name, GpuParamSampState& output) const;
 
-		/**
-		 * @brief	Creates the copy of this object in a special way. Should only be called
-		 * 			internally by core thread accessor when passing gpu params to the core thread.
-		 */
-		static BindableGpuParams createBindableCopy(const GpuParamsPtr& params);
-
 	private:
+		friend class BindableGpuParams;
+
 		GpuParamDesc& mParamDesc;
 		bool mTransposeMatrices;
 

+ 1 - 0
CamelotCore/Include/CmPrerequisites.h

@@ -131,6 +131,7 @@ namespace CamelotFramework
 	class VertexDataDesc;
 	class EventQuery;
 	class TimerQuery;
+	class FrameAlloc;
 	// Asset import
 	class SpecificImporter;
 	class Importer;

+ 2 - 1
CamelotCore/Source/CmApplication.cpp

@@ -98,7 +98,8 @@ namespace CamelotFramework
 		while(mRunMainLoop)
 		{
 			gProfiler().beginThread("Sim");
-			
+
+			gCoreThread().update();
 			Platform::update();
 			DeferredCallManager::instance().update();
 			RenderWindowManager::instance().update();

+ 60 - 7
CamelotCore/Source/CmBindableGpuParams.cpp

@@ -4,14 +4,69 @@
 #include "CmBindableGpuParamBlock.h"
 #include "CmGpuParamBlockBuffer.h"
 #include "CmDebug.h"
+#include "CmFrameAlloc.h"
 
 namespace CamelotFramework
 {
-	BindableGpuParams::BindableGpuParams(const GpuParamDesc& sourceParamDesc)
-		:mOwnsData(true), mParamDesc(sourceParamDesc), mData(nullptr), mNumParamBlocks(0), 
+	BindableGpuParams::BindableGpuParams(const GpuParamsPtr& params, FrameAlloc* allocator)
+		:mOwnsData(true), mParamDesc(params->getParamDesc()), mData(nullptr), mNumParamBlocks(0), mAllocator(allocator),
 		mNumTextures(0), mNumSamplerStates(0),mParamBlocks(nullptr), mParamBlockBuffers(nullptr), mTextures(nullptr), mSamplerStates(nullptr)
 	{
-		// Actual allocation of all data happens in parent GpuParams
+		// Allocate everything in a single block of memory to get rid of extra memory allocations
+		UINT32 paramBlockBufferSize = params->mNumParamBlocks * sizeof(BindableGpuParamBlock*);
+		UINT32 paramBlockBuffersBufferSize = params->mNumParamBlocks * sizeof(GpuParamBlockBufferPtr);
+		UINT32 textureBufferSize = params->mNumTextures * sizeof(HTexture);
+		UINT32 samplerStateBufferSize = params->mNumSamplerStates * sizeof(HSamplerState);
+
+		UINT32 bufferSize = paramBlockBufferSize + paramBlockBuffersBufferSize + textureBufferSize + samplerStateBufferSize;
+		for(UINT32 i = 0; i < params->mNumParamBlocks; i++)
+		{
+			if(params->mParamBlockBuffers[i] != nullptr)
+				bufferSize += sizeof(BindableGpuParamBlock) + params->mParamBlockBuffers[i]->getSize();
+		}
+
+		mData = (UINT8*)allocator->alloc(bufferSize);
+		mNumParamBlocks = params->mNumParamBlocks;
+		mNumTextures = params->mNumTextures;
+		mNumSamplerStates = params->mNumSamplerStates;
+
+		UINT8* dataIter = mData;
+		mParamBlocks = (BindableGpuParamBlock**)dataIter;
+		dataIter += paramBlockBufferSize;
+
+		mParamBlockBuffers = (GpuParamBlockBufferPtr*)dataIter;
+		dataIter += paramBlockBuffersBufferSize;
+
+		mTextures = (HTexture*)dataIter;
+		dataIter += textureBufferSize;
+
+		mSamplerStates = (HSamplerState*)dataIter;
+		dataIter += samplerStateBufferSize;
+
+		// Copy data
+		memcpy(mParamBlockBuffers, params->mParamBlockBuffers, paramBlockBuffersBufferSize);
+		memcpy(mTextures, params->mTextures, textureBufferSize);
+		memcpy(mSamplerStates, params->mSamplerStates, samplerStateBufferSize);
+
+		for(UINT32 i = 0; i < params->mNumParamBlocks; i++)
+		{
+			if(params->mParamBlockBuffers[i] != nullptr)
+			{
+				GpuParamBlock* paramBlock = params->mParamBlockBuffers[i]->getParamBlock();
+
+				UINT32 bufferSize = paramBlock->getSize();
+				mParamBlocks[i] = (BindableGpuParamBlock*)dataIter;
+
+				dataIter += sizeof(BindableGpuParamBlock);
+				mParamBlocks[i]->mData = dataIter;
+
+				dataIter += bufferSize;
+				memcpy(mParamBlocks[i]->mData, paramBlock->getData(), bufferSize);
+
+				mParamBlocks[i]->mSize = bufferSize;
+				mParamBlocks[i]->mDirty = paramBlock->isDirty();
+			}
+		}
 	}
 
 	BindableGpuParams::BindableGpuParams(const BindableGpuParams& source)
@@ -20,6 +75,7 @@ namespace CamelotFramework
 		mOwnsData = true;
 		source.mOwnsData = false;
 
+		mAllocator = source.mAllocator;
 		mData = source.mData;
 		mNumParamBlocks = source.mNumParamBlocks;
 		mNumTextures = source.mNumTextures;
@@ -35,13 +91,10 @@ namespace CamelotFramework
 	{
 		if(mOwnsData && mData != nullptr)
 		{
-			cm_free(mData);
-			// TODO - Dealloc using stack
+			mAllocator->dealloc(mData);
 		}
 	}
 
-	// TODO - Forbid copying but allow move semantics
-	
 	GpuParamBlockBufferPtr BindableGpuParams::getParamBlockBuffer(UINT32 slot) const
 	{
 		if(slot < 0 || slot >= mNumParamBlocks)

+ 18 - 0
CamelotCore/Source/CmCoreThread.cpp

@@ -10,7 +10,11 @@ namespace CamelotFramework
 		, mCommandQueue(nullptr)
 		, mMaxCommandNotifyId(0)
 		, mSyncedCoreAccessor(nullptr)
+		, mActiveFrameAlloc(0)
 	{
+		mFrameAllocs[0] = cm_new<FrameAlloc>();
+		mFrameAllocs[1] = cm_new<FrameAlloc>();
+
 		mCoreThreadId = CM_THREAD_CURRENT_ID;
 		mCommandQueue = cm_new<CommandQueue<CommandQueueSync>>(CM_THREAD_CURRENT_ID);
 
@@ -27,6 +31,9 @@ namespace CamelotFramework
 			cm_delete(mCommandQueue);
 			mCommandQueue = nullptr;
 		}
+
+		cm_delete(mFrameAllocs[0]);
+		cm_delete(mFrameAllocs[1]);
 	}
 
 	void CoreThread::initCoreThread()
@@ -191,6 +198,17 @@ namespace CamelotFramework
 			blockUntilCommandCompleted(commandId);
 	}
 
+	void CoreThread::update()
+	{
+		mActiveFrameAlloc = (mActiveFrameAlloc + 1) % 2;
+		mFrameAllocs[mActiveFrameAlloc]->clear();
+	}
+
+	FrameAlloc* CoreThread::getFrameAlloc() const
+	{
+		return mFrameAllocs[mActiveFrameAlloc];
+	}
+
 	void CoreThread::blockUntilCommandCompleted(UINT32 commandId)
 	{
 #if !CM_FORCE_SINGLETHREADED_RENDERING

+ 0 - 63
CamelotCore/Source/CmGpuParams.cpp

@@ -260,67 +260,4 @@ namespace CamelotFramework
 
 		return nullptr;
 	}
-
-	BindableGpuParams GpuParams::createBindableCopy(const GpuParamsPtr& params)
-	{
-		// Allocate everything in a single block of memory to get rid of extra memory allocations
-		UINT32 paramBlockBufferSize = params->mNumParamBlocks * sizeof(BindableGpuParamBlock*);
-		UINT32 paramBlockBuffersBufferSize = params->mNumParamBlocks * sizeof(GpuParamBlockBufferPtr);
-		UINT32 textureBufferSize = params->mNumTextures * sizeof(HTexture);
-		UINT32 samplerStateBufferSize = params->mNumSamplerStates * sizeof(HSamplerState);
-
-		UINT32 bufferSize = paramBlockBufferSize + paramBlockBuffersBufferSize + textureBufferSize + samplerStateBufferSize;
-		for(UINT32 i = 0; i < params->mNumParamBlocks; i++)
-		{
-			if(params->mParamBlockBuffers[i] != nullptr)
-				bufferSize += sizeof(BindableGpuParamBlock) + params->mParamBlockBuffers[i]->getSize();
-		}
-
-		// TODO - Alloc using stack
-		BindableGpuParams bindableParams(params->mParamDesc);
-		bindableParams.mData = (UINT8*)cm_alloc(bufferSize);
-		bindableParams.mNumParamBlocks = params->mNumParamBlocks;
-		bindableParams.mNumTextures = params->mNumTextures;
-		bindableParams.mNumSamplerStates = params->mNumSamplerStates;
-
-		UINT8* dataIter = bindableParams.mData;
-		bindableParams.mParamBlocks = (BindableGpuParamBlock**)dataIter;
-		dataIter += paramBlockBufferSize;
-
-		bindableParams.mParamBlockBuffers = (GpuParamBlockBufferPtr*)dataIter;
-		dataIter += paramBlockBuffersBufferSize;
-
-		bindableParams.mTextures = (HTexture*)dataIter;
-		dataIter += textureBufferSize;
-
-		bindableParams.mSamplerStates = (HSamplerState*)dataIter;
-		dataIter += samplerStateBufferSize;
-
-		// Copy data
-		memcpy(bindableParams.mParamBlockBuffers, params->mParamBlockBuffers, paramBlockBuffersBufferSize);
-		memcpy(bindableParams.mTextures, params->mTextures, textureBufferSize);
-		memcpy(bindableParams.mSamplerStates, params->mSamplerStates, samplerStateBufferSize);
-
-		for(UINT32 i = 0; i < params->mNumParamBlocks; i++)
-		{
-			if(params->mParamBlockBuffers[i] != nullptr)
-			{
-				GpuParamBlock* paramBlock = params->mParamBlockBuffers[i]->getParamBlock();
-
-				UINT32 bufferSize = paramBlock->getSize();
-				bindableParams.mParamBlocks[i] = (BindableGpuParamBlock*)dataIter;
-
-				dataIter += sizeof(BindableGpuParamBlock);
-				bindableParams.mParamBlocks[i]->mData = dataIter;
-
-				dataIter += bufferSize;
-				memcpy(bindableParams.mParamBlocks[i]->mData, paramBlock->getData(), bufferSize);
-
-				bindableParams.mParamBlocks[i]->mSize = bufferSize;
-				bindableParams.mParamBlocks[i]->mDirty = paramBlock->isDirty();
-			}
-		}
-
-		return bindableParams;
-	}
 }

+ 6 - 6
CamelotCore/Source/CmPass.cpp

@@ -166,27 +166,27 @@ namespace CamelotFramework
 	{
 		HGpuProgram vertProgram = getVertexProgram();
 		if(vertProgram)
-			coreAccessor.bindGpuParams(GPT_VERTEX_PROGRAM, GpuParams::createBindableCopy(params->mVertParams));
+			coreAccessor.bindGpuParams(GPT_VERTEX_PROGRAM, params->mVertParams);
 
 		HGpuProgram fragProgram = getFragmentProgram();
 		if(fragProgram)
-			coreAccessor.bindGpuParams(GPT_FRAGMENT_PROGRAM, GpuParams::createBindableCopy(params->mFragParams));
+			coreAccessor.bindGpuParams(GPT_FRAGMENT_PROGRAM, params->mFragParams);
 
 		HGpuProgram geomProgram = getGeometryProgram();
 		if(geomProgram)
-			coreAccessor.bindGpuParams(GPT_GEOMETRY_PROGRAM, GpuParams::createBindableCopy(params->mGeomParams));
+			coreAccessor.bindGpuParams(GPT_GEOMETRY_PROGRAM, params->mGeomParams);
 
 		HGpuProgram hullProgram = getHullProgram();
 		if(hullProgram)
-			coreAccessor.bindGpuParams(GPT_HULL_PROGRAM, GpuParams::createBindableCopy(params->mHullParams));
+			coreAccessor.bindGpuParams(GPT_HULL_PROGRAM, params->mHullParams);
 
 		HGpuProgram domainProgram = getDomainProgram();
 		if(domainProgram)
-			coreAccessor.bindGpuParams(GPT_DOMAIN_PROGRAM, GpuParams::createBindableCopy(params->mDomainParams));
+			coreAccessor.bindGpuParams(GPT_DOMAIN_PROGRAM, params->mDomainParams);
 
 		HGpuProgram computeProgram = getComputeProgram();
 		if(computeProgram)
-			coreAccessor.bindGpuParams(GPT_COMPUTE_PROGRAM, GpuParams::createBindableCopy(params->mComputeParams));
+			coreAccessor.bindGpuParams(GPT_COMPUTE_PROGRAM, params->mComputeParams);
 	}
 	//----------------------------------------------------------------------
 	RTTITypeBase* Pass::getRTTIStatic()

+ 2 - 0
CamelotUtility/CamelotUtility.vcxproj

@@ -247,6 +247,7 @@
     <ClCompile Include="Source\CmAsyncOp.cpp" />
     <ClCompile Include="Source\CmBitmapWriter.cpp" />
     <ClCompile Include="Source\CmBox.cpp" />
+    <ClCompile Include="Source\CmFrameAlloc.cpp" />
     <ClCompile Include="Source\CmFRect.cpp" />
     <ClCompile Include="Source\CmInt2.cpp" />
     <ClCompile Include="Source\CmManagedDataBlock.cpp" />
@@ -269,6 +270,7 @@
     <ClInclude Include="Include\CmException.h" />
     <ClInclude Include="Include\CmFileSerializer.h" />
     <ClInclude Include="Include\CmFileSystem.h" />
+    <ClInclude Include="Include\CmFrameAlloc.h" />
     <ClInclude Include="Include\CmFRect.h" />
     <ClInclude Include="Include\CmHString.h" />
     <ClInclude Include="Include\CmInt2.h" />

+ 6 - 0
CamelotUtility/CamelotUtility.vcxproj.filters

@@ -231,6 +231,9 @@
     <ClInclude Include="Include\CmMemAllocProfiler.h">
       <Filter>Header Files</Filter>
     </ClInclude>
+    <ClInclude Include="Include\CmFrameAlloc.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
   </ItemGroup>
   <ItemGroup>
     <ClCompile Include="Source\CmMath.cpp">
@@ -350,5 +353,8 @@
     <ClCompile Include="Source\CmMemoryAllocator.cpp">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="Source\CmFrameAlloc.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
   </ItemGroup>
 </Project>

+ 68 - 0
CamelotUtility/Include/CmFrameAlloc.h

@@ -0,0 +1,68 @@
+#pragma once
+
+#include "CmPrerequisitesUtil.h"
+
+namespace CamelotFramework
+{
+	/**
+	 * @brief	Frame allocator. Very fast allocations but can only free all of its memory at once.
+	 * 			Perfect for allocations that last just a single frame.
+	 * 			
+	 * @note	Not thread safe with an exception. "alloc" and "clear" methods need to be called from the same thread.
+	 * 			"dealloc" is thread safe and can be called from any thread.
+	 * 			
+	 *			Each allocation comes with a pretty hefty 4 byte memory overhead, so don't use it for small allocations.
+	 */
+	class CM_UTILITY_EXPORT FrameAlloc
+	{
+	private:
+		class MemBlock
+		{
+		public:
+			MemBlock(UINT32 size);
+			~MemBlock();
+
+			UINT8* alloc(UINT8 amount);
+			void clear();
+
+			UINT8* mData;
+			UINT32 mFreePtr;
+			UINT32 mSize;
+		};
+
+	public:
+		FrameAlloc(UINT32 blockSize = 1024 * 1024);
+		~FrameAlloc();
+
+		/**
+		 * @brief	Allocates a new block of memory of the specified size.
+		 *
+		 * @param	amount	Amount of memory to allocate, in bytes.
+		 */
+		UINT8* alloc(UINT32 amount);
+
+		/**
+		 * @brief	Deallocates a previously allocated block of memory.
+		 *
+		 * @note	No deallocation is actually done here. This method is only used for debug purposes
+		 * 			so it is easier to track down memory leaks and corruption.
+		 * 			
+		 *			Thread safe.
+		 */
+		void dealloc(UINT8* data);
+
+		/**
+		 * @brief	Deallocates all allocated memory.
+		 */
+		void clear();
+
+	private:
+		UINT32 mBlockSize;
+		Vector<MemBlock*>::type mBlocks;
+		MemBlock* mFreeBlock;
+		std::atomic<UINT32> mTotalAllocBytes;
+
+		MemBlock* allocBlock(UINT32 wantedSize);
+		void deallocBlock(MemBlock* block);
+	};
+}

+ 1 - 0
CamelotUtility/Include/CmMemStack.h

@@ -2,6 +2,7 @@
 
 #include <stack>
 #include <assert.h>
+#include "CmStdHeaders.h"
 #include "CmThreadDefines.h"
 
 namespace CamelotFramework

+ 105 - 0
CamelotUtility/Source/CmFrameAlloc.cpp

@@ -0,0 +1,105 @@
+#include "CmFrameAlloc.h"
+#include "CmException.h"
+
+namespace CamelotFramework
+{
+	FrameAlloc::MemBlock::MemBlock(UINT32 size)
+		:mData(nullptr), mFreePtr(0), mSize(size)
+	{ }
+
+	FrameAlloc::MemBlock::~MemBlock()
+	{ }
+
+	UINT8* FrameAlloc::MemBlock::alloc(UINT8 amount)
+	{
+		UINT8* freePtr = &mData[mFreePtr];
+		mFreePtr += amount;
+
+		return freePtr;
+	}
+
+	void FrameAlloc::MemBlock::clear()
+	{
+		mFreePtr = 0;
+	}
+
+	FrameAlloc::FrameAlloc(UINT32 blockSize)
+		:mTotalAllocBytes(0), mFreeBlock(nullptr), mBlockSize(blockSize)
+	{
+		allocBlock(mBlockSize);
+	}
+
+	FrameAlloc::~FrameAlloc()
+	{
+		for(auto& block : mBlocks)
+			deallocBlock(block);
+	}
+
+	UINT8* FrameAlloc::alloc(UINT32 amount)
+	{
+		amount += sizeof(UINT32);
+
+		UINT32 freeMem = mFreeBlock->mSize - mFreeBlock->mFreePtr;
+		if(amount > freeMem)
+			allocBlock(amount);
+
+		UINT8* data = mFreeBlock->alloc(amount);
+		mTotalAllocBytes += amount;
+
+		UINT32* storedSize = reinterpret_cast<UINT32*>(data);
+		*storedSize = amount;
+
+		return data + sizeof(UINT32);
+	}
+
+	void FrameAlloc::dealloc(UINT8* data)
+	{
+		// Dealloc is only used for debug and can be removed if needed. All the actual deallocation
+		// happens in "clear"
+			
+		data -= sizeof(UINT32);
+		UINT32* storedSize = reinterpret_cast<UINT32*>(data);
+		mTotalAllocBytes -= *storedSize;
+	}
+
+	void FrameAlloc::clear()
+	{
+		if(mTotalAllocBytes.load() > 0)
+			CM_EXCEPT(InvalidStateException, "Not all frame allocated bytes were properly released.");
+
+		// Merge all blocks into one
+		UINT32 totalBytes = 0;
+		for(auto& block : mBlocks)
+		{
+			totalBytes += block->mSize;
+			deallocBlock(block);
+		}
+
+		mBlocks.clear();
+			
+		allocBlock(totalBytes);			
+	}
+
+	FrameAlloc::MemBlock* FrameAlloc::allocBlock(UINT32 wantedSize)
+	{
+		UINT32 blockSize = mBlockSize;
+		if(wantedSize > blockSize)
+			blockSize = wantedSize;
+
+		UINT8* data = (UINT8*)reinterpret_cast<UINT8*>(cm_alloc(blockSize + sizeof(MemBlock)));
+		MemBlock* newBlock = new (data) MemBlock(blockSize);
+		data += sizeof(MemBlock);
+		newBlock->mData = data;
+
+		mBlocks.push_back(newBlock);
+		mFreeBlock = newBlock; // If previous block had some empty space it is lost until next "clear"
+
+		return newBlock;
+	}
+
+	void FrameAlloc::deallocBlock(MemBlock* block)
+	{
+		block->~MemBlock();
+		cm_free(block);
+	}
+}

+ 2 - 16
Opts.txt

@@ -1,24 +1,10 @@
 Make sure to also update TextSprite and ImageSprite and anything else in UpdateMesh, then don't forget to find the issue that causes elements to get marked as dirty every single frame
 
-FrameAlloc
-   Keeps two internal stacks
-     - One core, one sim
-   coreThreadBegin()
-    - swaps the active stacks
-   coreThreadEnd() - Must be called after Core thread has finished
-    - frees the previously active stack
-   During frame we call alloc()
-  
  There are many allocations in CPUProfiler (especially report generation), and they aren't counted anywhere since they use
- a special allocator.
+ a special allocator. - Add this as a Low priority task
 
 --------------------
 
 When optimizing UpdateLayout make sure to mark elements that are fully culled as Culled
  - But in order to determine that I first need to update the sprite to find out the elements bounds which defeats the point
- - TODO - FIgure this out
-
-----------
-
-Fix OpenGL MeshHeap. All the meshes are rendering corrupted
-When doing allocInternal I don't check that index/vertex desc in MeshData actually matches the ones in MeshHeap.
+ - TODO - FIgure this out