Quellcode durchsuchen

Allocating GpuParams and especially bindable copy of GpuParams will now cause much less alocations
Fixed an issue with CPUProfiler that would happen if beginThread wasn't called before beginSample

Marko Pintera vor 12 Jahren
Ursprung
Commit
b0b11a73da

+ 1 - 1
BansheeEngine.sln

@@ -43,8 +43,8 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution
 		DrawHelper.txt = DrawHelper.txt
 		EditorWindowDock.txt = EditorWindowDock.txt
 		Notes.txt = Notes.txt
+		Opts.txt = Opts.txt
 		RenderOperation.txt = RenderOperation.txt
-		TextOpts.txt = TextOpts.txt
 		TODO.txt = TODO.txt
 		TODODoc.txt = TODODoc.txt
 		TODOEditor.txt = TODOEditor.txt

+ 0 - 2
BansheeEngine/Source/BsApplication.cpp

@@ -51,7 +51,6 @@ namespace BansheeEngine
 		DrawHelper3D::startUp(cm_new<DrawHelper3D>());
 
 		EngineGUI::startUp(cm_new<EngineGUI>());
-		Profiler::startUp(cm_new<Profiler>());
 
 		updateCallbackConn = CM::gApplication().mainLoopCallback.connect(boost::bind(&Application::update, this));
 	}
@@ -65,7 +64,6 @@ namespace BansheeEngine
 	{
 		CM::gApplication().mainLoopCallback.disconnect(updateCallbackConn);
 
-		Profiler::shutDown();
 		EngineGUI::shutDown();
 
 		DrawHelper3D::shutDown();

+ 4 - 0
CamelotCore/CamelotCore.vcxproj

@@ -272,6 +272,8 @@
     </Link>
   </ItemDefinitionGroup>
   <ItemGroup>
+    <ClInclude Include="Include\CmBindableGpuParamBlock.h" />
+    <ClInclude Include="Include\CmBindableGpuParams.h" />
     <ClInclude Include="Include\CmCoreThread.h" />
     <ClInclude Include="Include\CmCPUProfiler.h" />
     <ClInclude Include="Include\CmDefaultRenderQueue.h" />
@@ -404,6 +406,8 @@
   </ItemGroup>
   <ItemGroup>
     <ClCompile Include="Include\CmMaterialManager.cpp" />
+    <ClCompile Include="Source\CmBindableGpuParamBlock.cpp" />
+    <ClCompile Include="Source\CmBindableGpuParams.cpp" />
     <ClCompile Include="Source\CmCoreThread.cpp" />
     <ClCompile Include="Source\CmCPUProfiler.cpp" />
     <ClCompile Include="Source\CmDefaultRenderQueue.cpp" />

+ 12 - 0
CamelotCore/CamelotCore.vcxproj.filters

@@ -483,6 +483,12 @@
     <ClInclude Include="Include\CmTextData.h">
       <Filter>Header Files\Text</Filter>
     </ClInclude>
+    <ClInclude Include="Include\CmBindableGpuParams.h">
+      <Filter>Header Files\RenderSystem</Filter>
+    </ClInclude>
+    <ClInclude Include="Include\CmBindableGpuParamBlock.h">
+      <Filter>Header Files\RenderSystem</Filter>
+    </ClInclude>
   </ItemGroup>
   <ItemGroup>
     <ClCompile Include="Source\CmApplication.cpp">
@@ -749,5 +755,11 @@
     <ClCompile Include="Source\CmTextData.cpp">
       <Filter>Source Files\Text</Filter>
     </ClCompile>
+    <ClCompile Include="Source\CmBindableGpuParams.cpp">
+      <Filter>Source Files\RenderSystem</Filter>
+    </ClCompile>
+    <ClCompile Include="Source\CmBindableGpuParamBlock.cpp">
+      <Filter>Source Files\RenderSystem</Filter>
+    </ClCompile>
   </ItemGroup>
 </Project>

+ 32 - 0
CamelotCore/Include/CmBindableGpuParamBlock.h

@@ -0,0 +1,32 @@
+#pragma once
+
+#include "CmPrerequisites.h"
+#include "CmCommonEnums.h"
+#include "CmCoreObject.h"
+
+namespace CamelotFramework
+{
+	/**
+	* @note	Due to the way allocation is handled, this class is not allowed to have a destructor.
+	*/
+	class CM_EXPORT BindableGpuParamBlock
+	{
+	public:
+		BindableGpuParamBlock();
+
+		/**
+		 * @brief	Uploads the current data to the specified buffer, and marks the block a non-dirty.
+		 * 			Should only be called from the core thread.
+		 */
+		void uploadToBuffer(GpuParamBlockBufferPtr buffer);
+
+		bool isDirty() const { return mDirty; }
+	protected:
+		friend class GpuParams;
+		friend class BindableGpuParams;
+
+		UINT8* mData;
+		UINT32 mSize;
+		bool mDirty;
+	};
+}

+ 56 - 0
CamelotCore/Include/CmBindableGpuParams.h

@@ -0,0 +1,56 @@
+#pragma once
+
+#include "CmPrerequisites.h"
+
+namespace CamelotFramework
+{
+	/**
+		* @brief	Specialized class for binding GPU parameters to the render system. This is a temporary class that
+		* 			is used for temporarily saving parameter data while parameters are scheduled to be bound to the GPU.
+		* 			This allows us to freely modify base GpuParams without worrying about changing scheduled by still 
+		* 			not executed parameter binds.
+		* 			
+		* @note		Upon assignment this class transfers ownership of its data. Internal data
+		* 			is destroyed when last assigned instance goes out of scope.
+		* 			(In short, you should never have more than one active copy of an instance of this class)
+		* 			
+		*			Created on the sim thread and used exclusively on the core thread.
+	**/
+	class CM_EXPORT BindableGpuParams
+	{
+	public:
+		BindableGpuParams(const BindableGpuParams& source);
+		~BindableGpuParams();
+
+		/**
+		 * @brief	Updates all used hardware parameter buffers. Should ONLY be called from core thread.
+		 */
+		void updateHardwareBuffers();
+
+		GpuParamBlockBufferPtr getParamBlockBuffer(UINT32 slot) const;
+		GpuParamBlockBufferPtr getParamBlockBuffer(const String& name) const;
+		HTexture getTexture(UINT32 slot);
+		HSamplerState getSamplerState(UINT32 slot);
+
+		const GpuParamDesc& getParamDesc() const { return mParamDesc; }
+
+	private:
+		friend class GpuParams;
+
+		BindableGpuParams(const GpuParamDesc& sourceParamDesc);
+
+	private:
+		mutable bool mOwnsData;
+		const GpuParamDesc& mParamDesc;
+		UINT8* mData;
+
+		UINT32 mNumParamBlocks;
+		UINT32 mNumTextures;
+		UINT32 mNumSamplerStates;
+
+		BindableGpuParamBlock** mParamBlocks;
+		GpuParamBlockBufferPtr* mParamBlockBuffers;
+		HTexture* mTextures;
+		HSamplerState* mSamplerStates;
+	};
+}

+ 0 - 6
CamelotCore/Include/CmGpuParamBlock.h

@@ -17,12 +17,6 @@ namespace CamelotFramework
 		void write(UINT32 offset, const void* data, UINT32 size);
 		void zeroOut(UINT32 offset, UINT32 size);
 
-		/**
-		 * @brief	Uploads the current data to the specified buffer, and marks the block a non-dirty.
-		 * 			Should only be called from the core thread.
-		 */
-		void uploadToBuffer(GpuParamBlockBufferPtr buffer);
-
 		UINT32 getSize() const { return mSize; }
 		UINT8* getData() const { return mData; }
 		bool isDirty() const { return mDirty; }

+ 11 - 44
CamelotCore/Include/CmGpuParams.h

@@ -1,6 +1,7 @@
 #pragma once
 
 #include "CmPrerequisites.h"
+#include "CmBindableGpuParams.h"
 
 namespace CamelotFramework
 {
@@ -10,9 +11,6 @@ namespace CamelotFramework
 		GpuParams(GpuParamDesc& paramDesc);
 		~GpuParams();
 
-		GpuParamBlockBufferPtr getParamBlockBuffer(UINT32 slot) const;
-		GpuParamBlockBufferPtr getParamBlockBuffer(const String& name) const;
-
 		void setParamBlockBuffer(UINT32 slot, GpuParamBlockBufferPtr paramBlockBuffer);
 		void setParamBlockBuffer(const String& name, GpuParamBlockBufferPtr paramBlockBuffer);
 
@@ -46,62 +44,31 @@ namespace CamelotFramework
 		void setParam(const String& name, const void* value, UINT32 sizeBytes, UINT32 arrayIndex = 0);
 
 		void setTexture(const String& name, const HTexture& val);
-		HTexture getTexture(UINT32 slot);
-
 		void setSamplerState(const String& name, const HSamplerState& val);
-		HSamplerState getSamplerState(UINT32 slot);
-
+		
 		void setTransposeMatrices(bool transpose) { mTransposeMatrices = transpose; }
 
-		/**
-		 * @brief	Updates all used hardware parameter buffers. Should ONLY be called from core thread.
-		 */
-		void updateHardwareBuffers();
-
 		/**
 		 * @brief	Creates the copy of this object in a special way. Should only be called
 		 * 			internally by core thread accessor when passing gpu params to the core thread.
 		 */
-		static BindableGpuParams createBindableCopy(GpuParamsPtr params);
+		static BindableGpuParams createBindableCopy(const GpuParamsPtr& params);
 
-		/**
-		 * @brief	Needs to be called on any copy created with "createBindableCopy" before the object is deleted.
-		 */
-		static void releaseBindableCopy(BindableGpuParams& bindableParams);
 	private:
 		GpuParamDesc& mParamDesc;
 		bool mTransposeMatrices;
 
 		GpuParamDataDesc* getParamDesc(const String& name) const;
 
-		Vector<GpuParamBlock*>::type mParamBlocks;
-		Vector<GpuParamBlockBufferPtr>::type mParamBlockBuffers;
-		Vector<HTexture>::type mTextures;
-		Vector<HSamplerState>::type mSamplerStates;
-	};
-
-	/**
-	 * @brief	Specialized class for binding GPU parameters to the render system. You should not
-	 * 			handle this class manually.
-	 * 			
-	 * @note	Upon assignment this class transfers ownership of its data. Internal data
-	 * 			is destroyed when last assigned instance goes out of scope.
-	 * 			(In short, you should never have more than one active copy of an instance of this class)
-	 */
-	class CM_EXPORT BindableGpuParams
-	{
-	public:
-		BindableGpuParams(const BindableGpuParams& source);
-		~BindableGpuParams();
-
-		GpuParams& getParams() const { return *mParams; }
-
-	private:
-		friend class GpuParams;
+		UINT8* mData;
 
-		BindableGpuParams(GpuParams* params);
+		UINT32 mNumParamBlocks;
+		UINT32 mNumTextures;
+		UINT32 mNumSamplerStates;
 
-		GpuParams *mParams;
-		mutable bool mIsDataOwner;
+		GpuParamBlock** mParamBlocks;
+		GpuParamBlockBufferPtr* mParamBlockBuffers;
+		HTexture* mTextures;
+		HSamplerState* mSamplerStates;
 	};
 }

+ 1 - 0
CamelotCore/Include/CmPrerequisites.h

@@ -130,6 +130,7 @@ namespace CamelotFramework {
 	class GpuResource;
 	class GpuResourceData;
 	class BindableGpuParams;
+	class BindableGpuParamBlock;
 	struct RenderOperation;
 	class RenderQueue;
 	struct ProfilerReport;

+ 9 - 0
CamelotCore/Include/CmTextData.h

@@ -9,6 +9,9 @@ namespace CamelotFramework
 	class TextData
 	{
 	private:
+		/**
+		 * @note	Due to the way allocation is handled, this class is not allowed to have a destructor.
+		 */
 		class TextWord
 		{
 		public:
@@ -36,6 +39,9 @@ namespace CamelotFramework
 			UINT32 mSpaceWidth;
 		};
 
+		/**
+		 * @note	Due to the way allocation is handled, this class is not allowed to have a destructor.
+		 */
 		struct PageInfo
 		{
 			UINT32 numQuads;
@@ -43,6 +49,9 @@ namespace CamelotFramework
 		};
 
 	public:
+		/**
+		 * @note	Due to the way allocation is handled, this class is not allowed to have a destructor.
+		 */
 		class CM_EXPORT TextLine
 		{
 		public:

+ 2 - 0
CamelotCore/Source/CmApplication.cpp

@@ -51,6 +51,7 @@ namespace CamelotFramework
 		Platform::startUp();
 		MemStack::setupHeap(HID_Main);
 
+		Profiler::startUp(cm_new<Profiler>());
 		StringTable::startUp(cm_new<StringTable>());
 		DeferredCallManager::startUp(cm_new<DeferredCallManager>());
 		Time::startUp(cm_new<Time>());
@@ -186,6 +187,7 @@ namespace CamelotFramework
 		DeferredCallManager::shutDown();
 		StringTable::shutDown();
 
+		Profiler::shutDown();
 		Platform::shutDown();
 	}
 

+ 15 - 0
CamelotCore/Source/CmBindableGpuParamBlock.cpp

@@ -0,0 +1,15 @@
+#include "CmBindableGpuParamBlock.h"
+#include "CmGpuParamBlockBuffer.h"
+
+namespace CamelotFramework
+{
+	BindableGpuParamBlock::BindableGpuParamBlock()
+		:mDirty(true), mData(nullptr), mSize(0)
+	{ }
+
+	void BindableGpuParamBlock::uploadToBuffer(GpuParamBlockBufferPtr buffer)
+	{
+		buffer->writeData(mData);
+		mDirty = false;
+	}
+}

+ 102 - 0
CamelotCore/Source/CmBindableGpuParams.cpp

@@ -0,0 +1,102 @@
+#include "CmBindableGpuParams.h"
+#include "CmGpuParams.h"
+#include "CmGpuParamDesc.h"
+#include "CmBindableGpuParamBlock.h"
+#include "CmGpuParamBlockBuffer.h"
+#include "CmDebug.h"
+
+namespace CamelotFramework
+{
+	BindableGpuParams::BindableGpuParams(const GpuParamDesc& sourceParamDesc)
+		:mOwnsData(true), mParamDesc(sourceParamDesc), mData(nullptr), mNumParamBlocks(0), 
+		mNumTextures(0), mNumSamplerStates(0),mParamBlocks(nullptr), mParamBlockBuffers(nullptr), mTextures(nullptr), mSamplerStates(nullptr)
+	{
+		// Actual allocation of all data happens in parent GpuParams
+	}
+
+	BindableGpuParams::BindableGpuParams(const BindableGpuParams& source)
+		:mParamDesc(source.mParamDesc)
+	{
+		mOwnsData = true;
+		source.mOwnsData = false;
+
+		mData = source.mData;
+		mNumParamBlocks = source.mNumParamBlocks;
+		mNumTextures = source.mNumTextures;
+		mNumSamplerStates = source.mNumSamplerStates;
+
+		mParamBlocks = source.mParamBlocks;
+		mParamBlockBuffers = source.mParamBlockBuffers;
+		mTextures = source.mTextures;
+		mSamplerStates = source.mSamplerStates;
+	}
+
+	BindableGpuParams::~BindableGpuParams()
+	{
+		if(mOwnsData && mData != nullptr)
+		{
+			cm_free(mData);
+			// TODO - Dealloc using stack
+		}
+	}
+
+	// TODO - Forbid copying but allow move semantics
+	
+	GpuParamBlockBufferPtr BindableGpuParams::getParamBlockBuffer(UINT32 slot) const
+	{
+		if(slot < 0 || slot >= mNumParamBlocks)
+		{
+			CM_EXCEPT(InvalidParametersException, "Index out of range: Valid range: 0 .. " + 
+				toString(mNumParamBlocks - 1) + ". Requested: " + toString(slot));
+		}
+
+		return mParamBlockBuffers[slot];
+	}
+
+	GpuParamBlockBufferPtr BindableGpuParams::getParamBlockBuffer(const String& name) const
+	{
+		auto iterFind = mParamDesc.paramBlocks.find(name);
+
+		if(iterFind == mParamDesc.paramBlocks.end())
+		{
+			LOGWRN("Cannot find parameter block with the name: " + name);
+			return nullptr;
+		}
+
+		return mParamBlockBuffers[iterFind->second.slot];
+	}
+
+	HTexture BindableGpuParams::getTexture(UINT32 slot)
+	{
+		if(slot < 0 || slot >= mNumTextures)
+		{
+			CM_EXCEPT(InvalidParametersException, "Index out of range: Valid range: 0 .. " + 
+				toString(mNumTextures - 1) + ". Requested: " + toString(slot));
+		}
+
+		return mTextures[slot];
+	}
+
+	HSamplerState BindableGpuParams::getSamplerState(UINT32 slot)
+	{
+		if(slot < 0 || slot >= mNumSamplerStates)
+		{
+			CM_EXCEPT(InvalidParametersException, "Index out of range: Valid range: 0 .. " + 
+				toString(mNumSamplerStates - 1) + ". Requested: " + toString(slot));
+		}
+
+		return mSamplerStates[slot];
+	}
+
+	void BindableGpuParams::updateHardwareBuffers()
+	{
+		for(size_t i = 0; i < mNumParamBlocks; i++)
+		{
+			if(mParamBlocks[i] != nullptr && mParamBlockBuffers[i] != nullptr)
+			{
+				if(mParamBlocks[i]->isDirty())
+					mParamBlocks[i]->uploadToBuffer(mParamBlockBuffers[i]);
+			}
+		}
+	}
+}

+ 3 - 0
CamelotCore/Source/CmCPUProfiler.cpp

@@ -268,7 +268,10 @@ namespace CamelotFramework
 	{
 		ThreadInfo* thread = ThreadInfo::activeThread;
 		if(thread == nullptr || !thread->isActive)
+		{
 			beginThread("Unknown");
+			thread = ThreadInfo::activeThread;
+		}
 
 		ProfiledBlock* parent = thread->activeBlock.block;
 		ProfiledBlock* block = nullptr;

+ 0 - 6
CamelotCore/Source/CmGpuParamBlock.cpp

@@ -58,10 +58,4 @@ namespace CamelotFramework
 
 		mDirty = true;
 	}
-
-	void GpuParamBlock::uploadToBuffer(GpuParamBlockBufferPtr buffer)
-	{
-		buffer->writeData(mData);
-		mDirty = false;
-	}
 }

+ 107 - 102
CamelotCore/Source/CmGpuParams.cpp

@@ -2,6 +2,7 @@
 #include "CmGpuParamDesc.h"
 #include "CmGpuParamBlock.h"
 #include "CmGpuParamBlockBuffer.h"
+#include "CmBindableGpuParamBlock.h"
 #include "CmVector2.h"
 #include "CmDebug.h"
 #include "CmException.h"
@@ -9,75 +10,97 @@
 namespace CamelotFramework
 {
 	GpuParams::GpuParams(GpuParamDesc& paramDesc)
-		:mParamDesc(paramDesc), mTransposeMatrices(false)
+		:mParamDesc(paramDesc), mTransposeMatrices(false), mData(nullptr), mNumParamBlocks(0), mNumTextures(0), mNumSamplerStates(0),
+		mParamBlocks(nullptr), mParamBlockBuffers(nullptr), mTextures(nullptr), mSamplerStates(nullptr)
 	{
-		UINT32 numParamBlockSlots = 0;
 		for(auto iter = mParamDesc.paramBlocks.begin(); iter != mParamDesc.paramBlocks.end(); ++iter)
 		{
-			if((iter->second.slot + 1) > numParamBlockSlots)
-				numParamBlockSlots = iter->second.slot + 1;
+			if((iter->second.slot + 1) > mNumParamBlocks)
+				mNumParamBlocks = iter->second.slot + 1;
 		}
 
-		mParamBlocks.resize(numParamBlockSlots, nullptr);
-		mParamBlockBuffers.resize(numParamBlockSlots);
-
-		UINT32 numTextureSlots = 0;
 		for(auto iter = mParamDesc.textures.begin(); iter != mParamDesc.textures.end(); ++iter)
 		{
-			if((iter->second.slot + 1) > numTextureSlots)
-				numTextureSlots = iter->second.slot + 1;
+			if((iter->second.slot + 1) > mNumTextures)
+				mNumTextures = iter->second.slot + 1;
 		}
 
-		mTextures.resize(numTextureSlots);
-
-		UINT32 numSamplerSlots = 0;
 		for(auto iter = mParamDesc.samplers.begin(); iter != mParamDesc.samplers.end(); ++iter)
 		{
-			if((iter->second.slot + 1) > numSamplerSlots)
-				numSamplerSlots = iter->second.slot + 1;
+			if((iter->second.slot + 1) > mNumSamplerStates)
+				mNumSamplerStates = iter->second.slot + 1;
 		}
 
-		mSamplerStates.resize(numSamplerSlots);
-	}
+		// Allocate everything in a single block of memory to get rid of extra memory allocations
+		UINT32 paramBlockBufferSize = mNumParamBlocks * sizeof(GpuParamBlock*);
+		UINT32 paramBlockBuffersBufferSize = mNumParamBlocks * sizeof(GpuParamBlockBufferPtr);
+		UINT32 textureBufferSize = mNumTextures * sizeof(HTexture);
+		UINT32 samplerStateBufferSize = mNumSamplerStates * sizeof(HSamplerState);
 
-	GpuParams::~GpuParams()
-	{
-		for(auto& paramBlock : mParamBlocks)
+		UINT32 bufferSize = paramBlockBufferSize + paramBlockBuffersBufferSize + textureBufferSize + samplerStateBufferSize;
+
+		mData = (UINT8*)cm_alloc(bufferSize);
+		
+		UINT8* dataIter = mData;
+		mParamBlocks = (GpuParamBlock**)dataIter;
+		dataIter += paramBlockBufferSize;
+
+		mParamBlockBuffers = (GpuParamBlockBufferPtr*)dataIter;
+		dataIter += paramBlockBuffersBufferSize;
+
+		mTextures = (HTexture*)dataIter;
+		dataIter += textureBufferSize;
+
+		mSamplerStates = (HSamplerState*)dataIter;
+
+		// Ensure everything is constructed
+		for(UINT32 i = 0; i < mNumParamBlocks; i++)
 		{
-			cm_delete<PoolAlloc>(paramBlock);
+			mParamBlocks[i] = nullptr;
+
+			GpuParamBlockBufferPtr* ptrToIdx = (&mParamBlockBuffers[i]);
+			ptrToIdx = new (&mParamBlockBuffers[i]) GpuParamBlockBufferPtr(nullptr);
 		}
-	}
 
-	GpuParamBlockBufferPtr GpuParams::getParamBlockBuffer(UINT32 slot) const
-	{
-		if(slot < 0 || slot >= (UINT32)mParamBlocks.size())
+		for(UINT32 i = 0; i < mNumTextures; i++)
 		{
-			CM_EXCEPT(InvalidParametersException, "Index out of range: Valid range: 0 .. " + 
-				toString((int)mParamBlocks.size() - 1) + ". Requested: " + toString(slot));
+			HTexture* ptrToIdx = (&mTextures[i]);
+			ptrToIdx = new (&mTextures[i]) HTexture();
 		}
 
-		return mParamBlockBuffers[slot];
+		for(UINT32 i = 0; i < mNumSamplerStates; i++)
+		{
+			HSamplerState* ptrToIdx = (&mSamplerStates[i]);
+			ptrToIdx = new (&mSamplerStates[i]) HSamplerState();
+		}
 	}
 
-	GpuParamBlockBufferPtr GpuParams::getParamBlockBuffer(const String& name) const
+	GpuParams::~GpuParams()
 	{
-		auto iterFind = mParamDesc.paramBlocks.find(name);
-
-		if(iterFind == mParamDesc.paramBlocks.end())
+		// Ensure everything is destructed
+		for(UINT32 i = 0; i < mNumParamBlocks; i++)
 		{
-			LOGWRN("Cannot find parameter block with the name: " + name);
-			return nullptr;
+			if(mParamBlocks[i] != nullptr)
+				cm_delete<PoolAlloc>(mParamBlocks[i]);
+
+			mParamBlockBuffers[i].~shared_ptr();
 		}
 
-		return mParamBlockBuffers[iterFind->second.slot];
+		for(UINT32 i = 0; i < mNumTextures; i++)
+			mTextures[i].~ResourceHandle();
+
+		for(UINT32 i = 0; i < mNumSamplerStates; i++)
+			mSamplerStates[i].~ResourceHandle();
+
+		cm_free(mData);
 	}
 
 	void GpuParams::setParamBlockBuffer(UINT32 slot, GpuParamBlockBufferPtr paramBlockBuffer)
 	{
-		if(slot < 0 || slot >= (UINT32)mParamBlocks.size())
+		if(slot < 0 || slot >= mNumParamBlocks)
 		{
 			CM_EXCEPT(InvalidParametersException, "Index out of range: Valid range: 0 .. " + 
-				toString((int)mParamBlocks.size() - 1) + ". Requested: " + toString(slot));
+				toString(mNumParamBlocks - 1) + ". Requested: " + toString(slot));
 		}
 
 		if(mParamBlocks[slot] != nullptr)
@@ -259,17 +282,6 @@ namespace CamelotFramework
 		mTextures[paramIter->second.slot] = val;
 	}
 
-	HTexture GpuParams::getTexture(UINT32 slot)
-	{
-		if(slot < 0 || slot >= (UINT32)mTextures.size())
-		{
-			CM_EXCEPT(InvalidParametersException, "Index out of range: Valid range: 0 .. " + 
-				toString((int)mTextures.size() - 1) + ". Requested: " + toString(slot));
-		}
-
-		return mTextures[slot];
-	}
-
 	void GpuParams::setSamplerState(const String& name, const HSamplerState& val)
 	{
 		auto paramIter = mParamDesc.samplers.find(name);
@@ -282,17 +294,6 @@ namespace CamelotFramework
 		mSamplerStates[paramIter->second.slot] = val;
 	}
 
-	HSamplerState GpuParams::getSamplerState(UINT32 slot)
-	{
-		if(slot < 0 || slot >= (UINT32)mSamplerStates.size())
-		{
-			CM_EXCEPT(InvalidParametersException, "Index out of range: Valid range: 0 .. " + 
-				toString((int)mSamplerStates.size() - 1) + ". Requested: " + toString(slot));
-		}
-
-		return mSamplerStates[slot];
-	}
-
 	GpuParamDataDesc* GpuParams::getParamDesc(const String& name) const
 	{
 		auto paramIter = mParamDesc.params.find(name);
@@ -302,60 +303,64 @@ namespace CamelotFramework
 		return nullptr;
 	}
 
-	BindableGpuParams GpuParams::createBindableCopy(GpuParamsPtr params)
+	BindableGpuParams GpuParams::createBindableCopy(const GpuParamsPtr& params)
 	{
-		GpuParams* copy = cm_new<GpuParams, ScratchAlloc>(std::ref(params->mParamDesc));
+		// Allocate everything in a single block of memory to get rid of extra memory allocations
+		UINT32 paramBlockBufferSize = params->mNumParamBlocks * sizeof(GpuParamBlock*);
+		UINT32 paramBlockBuffersBufferSize = params->mNumParamBlocks * sizeof(GpuParamBlockBufferPtr);
+		UINT32 textureBufferSize = params->mNumTextures * sizeof(HTexture);
+		UINT32 samplerStateBufferSize = params->mNumSamplerStates * sizeof(HSamplerState);
 
-		copy->mTransposeMatrices = params->mTransposeMatrices;
-		copy->mSamplerStates = params->mSamplerStates;
-		copy->mTextures = params->mTextures;
-		copy->mParamBlockBuffers =params-> mParamBlockBuffers;
-
-		copy->mParamBlocks.clear();
-		for(auto& paramBlock : params->mParamBlocks)
+		UINT32 bufferSize = sizeof(BindableGpuParamBlock) + paramBlockBufferSize + paramBlockBuffersBufferSize + textureBufferSize + samplerStateBufferSize;
+		for(UINT32 i = 0; i < params->mNumParamBlocks; i++)
 		{
-			GpuParamBlock* blockCopy = cm_new<GpuParamBlock, ScratchAlloc>(paramBlock);
-
-			copy->mParamBlocks.push_back(blockCopy);
+			if(params->mParamBlocks[i] != nullptr)
+				bufferSize += params->mParamBlocks[i]->getSize();
 		}
 
-		return BindableGpuParams(copy);
-	}
+		// TODO - Alloc using stack
+		BindableGpuParams bindableParams(params->mParamDesc);
+		bindableParams.mData = (UINT8*)cm_alloc(bufferSize);
+		bindableParams.mNumParamBlocks = params->mNumParamBlocks;
+		bindableParams.mNumTextures = params->mNumTextures;
+		bindableParams.mNumSamplerStates = params->mNumSamplerStates;
 
-	void GpuParams::updateHardwareBuffers()
-	{
-		for(size_t i = 0; i < mParamBlocks.size(); i++)
-		{
-			if(mParamBlocks[i] != nullptr && mParamBlockBuffers[i] != nullptr)
-			{
-				if(mParamBlocks[i]->isDirty())
-					mParamBlocks[i]->uploadToBuffer(mParamBlockBuffers[i]);
-			}
-		}
-	}
+		UINT8* dataIter = bindableParams.mData;
+		bindableParams.mParamBlocks = (BindableGpuParamBlock**)dataIter;
+		dataIter += paramBlockBufferSize;
 
-	void GpuParams::releaseBindableCopy(BindableGpuParams& bindableParams)
-	{
-		cm_delete<ScratchAlloc>(bindableParams.mParams);
-	}
+		bindableParams.mParamBlockBuffers = (GpuParamBlockBufferPtr*)dataIter;
+		dataIter += paramBlockBuffersBufferSize;
 
-	BindableGpuParams::BindableGpuParams(GpuParams* params)
-		:mParams(params), mIsDataOwner(true)
-	{
+		bindableParams.mTextures = (HTexture*)dataIter;
+		dataIter += textureBufferSize;
 
-	}
+		bindableParams.mSamplerStates = (HSamplerState*)dataIter;
+		dataIter += samplerStateBufferSize;
 
-	BindableGpuParams::~BindableGpuParams()
-	{
-		if(mIsDataOwner)
-			GpuParams::releaseBindableCopy(*this);
-	}
+		// Copy data
+		memcpy(bindableParams.mParamBlocks, params->mParamBlocks, paramBlockBufferSize);
+		memcpy(bindableParams.mParamBlockBuffers, params->mParamBlockBuffers, paramBlockBuffersBufferSize);
+		memcpy(bindableParams.mTextures, params->mTextures, textureBufferSize);
+		memcpy(bindableParams.mSamplerStates, params->mSamplerStates, samplerStateBufferSize);
 
-	BindableGpuParams::BindableGpuParams(const BindableGpuParams& source)
-	{
-		mParams = source.mParams;
+		for(UINT32 i = 0; i < params->mNumParamBlocks; i++)
+		{
+			if(params->mParamBlocks[i] != nullptr)
+			{
+				UINT32 bufferSize = params->mParamBlocks[i]->getSize();
+				bindableParams.mParamBlocks[i] = (BindableGpuParamBlock*)dataIter;
+				dataIter += sizeof(BindableGpuParamBlock);
+
+				bindableParams.mParamBlocks[i]->mData = dataIter;
+				dataIter += bufferSize;
+
+				memcpy(bindableParams.mParamBlocks[i]->mData, params->mParamBlocks[i]->getData(), bufferSize);
+				bindableParams.mParamBlocks[i]->mSize = bufferSize;
+				bindableParams.mParamBlocks[i]->mDirty = params->mParamBlocks[i]->isDirty();
+			}
+		}
 
-		mIsDataOwner = true;
-		source.mIsDataOwner = false;
+		return bindableParams;
 	}
 }

+ 3 - 0
CamelotCore/Source/CmMesh.cpp

@@ -10,6 +10,8 @@
 #include "CmAsyncOp.h"
 #include "CmAABox.h"
 
+#include "CmProfiler.h"
+
 namespace CamelotFramework
 {
 	Mesh::Mesh()
@@ -68,6 +70,7 @@ namespace CamelotFramework
 				mVertexData->vertexCount,
 				GBU_STATIC);
 
+
 			mVertexData->setBuffer(i, vertexBuffer);
 
 			UINT8* srcVertBufferData = meshData.getStreamData(i);

+ 6 - 7
CamelotD3D11RenderSystem/Source/CmD3D11RenderSystem.cpp

@@ -21,7 +21,7 @@
 #include "CmD3D11InputLayoutManager.h"
 #include "CmD3D11HLSLProgram.h"
 #include "CmD3D11RenderUtility.h"
-#include "CmGpuParams.h"
+#include "CmBindableGpuParams.h"
 #include "CmCoreThread.h"
 #include "CmDebug.h"
 #include "CmException.h"
@@ -468,14 +468,13 @@ namespace CamelotFramework
 	{
 		THROW_IF_NOT_CORE_THREAD;
 
-		GpuParams& params = bindableParams.getParams();
-		params.updateHardwareBuffers();
+		bindableParams.updateHardwareBuffers();
 
-		const GpuParamDesc& paramDesc = params.getParamDesc();
+		const GpuParamDesc& paramDesc = bindableParams.getParamDesc();
 		
 		for(auto iter = paramDesc.samplers.begin(); iter != paramDesc.samplers.end(); ++iter)
 		{
-			HSamplerState& samplerState = params.getSamplerState(iter->second.slot);
+			HSamplerState& samplerState = bindableParams.getSamplerState(iter->second.slot);
 
 			if(samplerState == nullptr)
 				setSamplerState(gptype, iter->second.slot, SamplerState::getDefault());
@@ -485,7 +484,7 @@ namespace CamelotFramework
 
 		for(auto iter = paramDesc.textures.begin(); iter != paramDesc.textures.end(); ++iter)
 		{
-			HTexture texture = params.getTexture(iter->second.slot);
+			HTexture texture = bindableParams.getTexture(iter->second.slot);
 
 			if(!texture.isLoaded())
 				setTexture(gptype, iter->second.slot, false, nullptr);
@@ -499,7 +498,7 @@ namespace CamelotFramework
 
 		for(auto iter = paramDesc.paramBlocks.begin(); iter != paramDesc.paramBlocks.end(); ++iter)
 		{
-			GpuParamBlockBufferPtr currentBlockBuffer = params.getParamBlockBuffer(iter->second.slot);
+			GpuParamBlockBufferPtr currentBlockBuffer = bindableParams.getParamBlockBuffer(iter->second.slot);
 
 			if(currentBlockBuffer != nullptr)
 			{

+ 5 - 6
CamelotD3D9Renderer/Source/CmD3D9RenderSystem.cpp

@@ -339,13 +339,12 @@ namespace CamelotFramework
 	{
 		THROW_IF_NOT_CORE_THREAD;
 
-		GpuParams& params = bindableParams.getParams();
-		params.updateHardwareBuffers();
-		const GpuParamDesc& paramDesc = params.getParamDesc();
+		bindableParams.updateHardwareBuffers();
+		const GpuParamDesc& paramDesc = bindableParams.getParamDesc();
 
 		for(auto iter = paramDesc.samplers.begin(); iter != paramDesc.samplers.end(); ++iter)
 		{
-			HSamplerState& samplerState = params.getSamplerState(iter->second.slot);
+			HSamplerState& samplerState = bindableParams.getSamplerState(iter->second.slot);
 
 			if(samplerState == nullptr)
 				setSamplerState(gptype, iter->second.slot, SamplerState::getDefault());
@@ -355,7 +354,7 @@ namespace CamelotFramework
 
 		for(auto iter = paramDesc.textures.begin(); iter != paramDesc.textures.end(); ++iter)
 		{
-			HTexture texture = params.getTexture(iter->second.slot);
+			HTexture texture = bindableParams.getTexture(iter->second.slot);
 
 			if(!texture.isLoaded())
 				setTexture(gptype, iter->second.slot, false, nullptr);
@@ -374,7 +373,7 @@ namespace CamelotFramework
 
 			if(iterFind == bufferData.end())
 			{
-				GpuParamBlockBufferPtr paramBlock = params.getParamBlockBuffer(paramBlockSlot);
+				GpuParamBlockBufferPtr paramBlock = bindableParams.getParamBlockBuffer(paramBlockSlot);
 
 				UINT8* data = (UINT8*)cm_alloc<ScratchAlloc>(paramBlock->getSize());
 				paramBlock->readData(data);

+ 5 - 6
CamelotGLRenderer/Source/CmGLRenderSystem.cpp

@@ -303,15 +303,14 @@ namespace CamelotFramework
 	{
 		THROW_IF_NOT_CORE_THREAD;
 
-		GpuParams& params = bindableParams.getParams();
-		params.updateHardwareBuffers();
-		const GpuParamDesc& paramDesc = params.getParamDesc();
+		bindableParams.updateHardwareBuffers();
+		const GpuParamDesc& paramDesc = bindableParams.getParamDesc();
 		GLSLGpuProgramPtr activeProgram = getActiveProgram(gptype);
 		GLuint glProgram = activeProgram->getGLSLProgram()->getGLHandle();
 
 		for(auto iter = paramDesc.textures.begin(); iter != paramDesc.textures.end(); ++iter)
 		{
-			HTexture texture = params.getTexture(iter->second.slot);
+			HTexture texture = bindableParams.getTexture(iter->second.slot);
 
 			if(!texture.isLoaded())
 				setTexture(gptype, iter->second.slot, false, nullptr);
@@ -322,7 +321,7 @@ namespace CamelotFramework
 		UINT32 texUnit = 0;
 		for(auto iter = paramDesc.samplers.begin(); iter != paramDesc.samplers.end(); ++iter)
 		{
-			HSamplerState& samplerState = params.getSamplerState(iter->second.slot);
+			HSamplerState& samplerState = bindableParams.getSamplerState(iter->second.slot);
 
 			if(samplerState == nullptr)
 				setSamplerState(gptype, iter->second.slot, SamplerState::getDefault());
@@ -339,7 +338,7 @@ namespace CamelotFramework
 		UINT32 blockBinding = 0;
 		for(auto iter = paramDesc.paramBlocks.begin(); iter != paramDesc.paramBlocks.end(); ++iter)
 		{
-			GpuParamBlockBufferPtr paramBlockBuffer = params.getParamBlockBuffer(iter->second.slot);
+			GpuParamBlockBufferPtr paramBlockBuffer = bindableParams.getParamBlockBuffer(iter->second.slot);
 			if(paramBlockBuffer == nullptr)
 				continue;
 

+ 34 - 0
Opts.txt

@@ -0,0 +1,34 @@
+Make sure to also update TextSprite and ImageSprite and anything else in UpdateMesh, then don't forget to find the issue that causes elements to get marked as dirty every single frame
+
+Render:
+Each queued command has an AsyncOp which gets constructed whether it is used or not
+mCommands in CommandQueue gets resized a lot
+
+Pass::bindParameters calls multiple GPuParams::createBindableCopy which does dynamic mem alloc
+ - Plus GpuParams contains 4 different std::vectors so those are four extra allocations
+ - Plus after they're constructed resize() is called on them almost immediately
+
+Strings when setting material params cause an allocation
+ - Consider using normal char arrays
+ - Or ensure materials are set using some smarter way, e.g. you get material param reference by name, you save it and then use that for setting the value
+
+HardwareBuffer::lock (more exactly ImmediateContext::Map) is very slow, with 35 calls taking up almost 3ms. 
+ - First, mesh creates both vertex and index buffer whenever writeSubresource is called!!! It should instead just update them.
+ - Second, mesh buffers are created with default buffer flags (static). Try changing it to dynamic
+
+When optimizing UpdateLayout make sure to mark elements that are fully culled as Culled
+ - But in order to determine that I first need to update the sprite to find out the elements bounds which defeats the point
+ - TODO - FIgure this out
+
+ // BindableGpuParams
+ // TODO - Make this class non-copyable, only movable
+/*
+
+GpuParams optimization
+- Ensure GpuParams internal data is allocated with a single allocation
+- This means a maximum of two allocations for GpuParams
+When creating bindable copies ensure that both allocations happen on stack
+- Such stack is freed when frame rendering is finished
+- Two stacks will likely be needed, one for data that is currently rendering and another for scheduled data
+
+*/

+ 0 - 5
TextOpts.txt

@@ -1,5 +0,0 @@
-Make sure to also update TextSprite and ImageSprite and anything else in UpdateMesh, then don't forget to find the issue that causes elements to get marked as dirty every single frame
-
-When optimizing UpdateLayout make sure to mark elements that are fully culled as Culled
- - But in order to determine that I first need to update the sprite to find out the elements bounds which defeats the point
- - TODO - FIgure this out