소스 검색

Significantly reduced number of allocations required by the CPU profiler

Marko Pintera 10 년 전
부모
커밋
a54c334990

+ 20 - 14
BansheeCore/Include/BsProfilerCPU.h

@@ -2,6 +2,7 @@
 
 #include "BsCorePrerequisites.h"
 #include "BsModule.h"
+#include "BsFrameAlloc.h"
 
 namespace BansheeEngine
 {
@@ -124,6 +125,8 @@ namespace BansheeEngine
 		 */
 		struct ProfileData
 		{
+			ProfileData(FrameAlloc* alloc);
+
 			/**
 			 * @brief	Begins a new sample and records current sample state. Previous sample must
 			 *			not be active.
@@ -142,7 +145,7 @@ namespace BansheeEngine
 			 */
 			void resumeLastSample();
 
-			ProfilerVector<ProfileSample> samples;
+			Vector<ProfileSample, StdFrameAlloc<ProfileSample>> samples;
 			Timer timer;
 
 			UINT64 memAllocs;
@@ -154,6 +157,8 @@ namespace BansheeEngine
 		 */
 		struct PreciseProfileData
 		{
+			PreciseProfileData(FrameAlloc* alloc);
+
 			/**
 			 * @brief	Begins a new sample and records current sample state. Previous sample must
 			 *			not be active.
@@ -172,7 +177,7 @@ namespace BansheeEngine
 			 */
 			void resumeLastSample();
 
-			ProfilerVector<PreciseProfileSample> samples;
+			Vector<PreciseProfileSample, StdFrameAlloc<ProfileSample>> samples;
 			TimerPrecise timer;
 
 			UINT64 memAllocs;
@@ -185,21 +190,21 @@ namespace BansheeEngine
 		 */
 		struct ProfiledBlock
 		{
-			ProfiledBlock();
+			ProfiledBlock(FrameAlloc* alloc);
 			~ProfiledBlock();
 
 			/**
 			 * @brief	Attempts to find a child block with the specified name. Returns
 			 *			null if not found.
 			 */
-			ProfiledBlock* findChild(const ProfilerString& name) const;
+			ProfiledBlock* findChild(const char* name) const;
 
-			ProfilerString name;
+			char* name;
 			
 			ProfileData basic;
 			PreciseProfileData precise;
 
-			ProfilerVector<ProfiledBlock*> children;
+			Vector<ProfiledBlock*, StdFrameAlloc<ProfiledBlock*>> children;
 		};
 
 		/**
@@ -239,7 +244,7 @@ namespace BansheeEngine
 			 * @brief	Starts profiling on the thread. New primary profiling block
 			 *			is created with the given name.
 			 */
-			void begin(const ProfilerString& _name);
+			void begin(const char* _name);
 
 			/**
 			 * @brief	Ends profiling on the thread. You should end all samples before calling this,
@@ -256,7 +261,7 @@ namespace BansheeEngine
 			/**
 			 * @brief	Gets the primary profiling block used by the thread.
 			 */
-			ProfiledBlock* getBlock();
+			ProfiledBlock* getBlock(const char* name);
 			
 			/**
 			 * @brief	Deletes the provided block.
@@ -268,8 +273,9 @@ namespace BansheeEngine
 
 			ProfiledBlock* rootBlock;
 
-			ProfilerStack<ActiveBlock> activeBlocks;
+			FrameAlloc frameAlloc;
 			ActiveBlock activeBlock;
+			Stack<ActiveBlock, StdFrameAlloc<ActiveBlock>>* activeBlocks;
 		};
 
 	public:
@@ -282,7 +288,7 @@ namespace BansheeEngine
 		 *
 		 * @param	name	Name that will allow you to more easily identify the thread.
 		 */
-		void beginThread(const ProfilerString& name);
+		void beginThread(const char* name);
 
 		/**
 		 * @brief	Ends sampling for the current thread. No beginSample*\endSample* calls after this point.
@@ -294,7 +300,7 @@ namespace BansheeEngine
 		 *
 		 * @param	name	Unique name for the sample you can later use to find the sampling data.
 		 */
-		void beginSample(const ProfilerString& name);
+		void beginSample(const char* name);
 
 		/**
 		 * @brief	Ends sample measurement.
@@ -304,7 +310,7 @@ namespace BansheeEngine
 		 * @note	Unique name is primarily needed to more easily identify mismatched
 		 * 			begin/end sample pairs. Otherwise the name in beginSample would be enough.
 		 */
-		void endSample(const ProfilerString& name);
+		void endSample(const char* name);
 
 		/**
 		 * @brief	Begins sample measurement. Must be followed by endSample. 
@@ -316,7 +322,7 @@ namespace BansheeEngine
 		 * 			not use this method for larger parts of code. It does not consider context switches so if the OS
 		 * 			decides to switch context between measurements you will get invalid data.
 		 */
-		void beginSamplePrecise(const ProfilerString& name);
+		void beginSamplePrecise(const char* name);
 
 		/**
 		 * @brief	Ends precise sample measurement.
@@ -326,7 +332,7 @@ namespace BansheeEngine
 		 * @note	Unique name is primarily needed to more easily identify mismatched
 		 * 			begin/end sample pairs. Otherwise the name in beginSamplePrecise would be enough.
 		 */
-		void endSamplePrecise(const ProfilerString& name);
+		void endSamplePrecise(const char* name);
 
 		/**
 		 * @brief	Clears all sampling data, and ends any unfinished sampling blocks.

+ 69 - 54
BansheeCore/Source/BsProfilerCPU.cpp

@@ -64,6 +64,10 @@ namespace BansheeEngine
 #endif		
 	}
 
+	ProfilerCPU::ProfileData::ProfileData(FrameAlloc* alloc)
+		:samples(alloc)
+	{ }
+
 	void ProfilerCPU::ProfileData::beginSample()
 	{
 		memAllocs = MemoryCounter::getNumAllocs();
@@ -89,6 +93,10 @@ namespace BansheeEngine
 		samples.erase(samples.end() - 1);
 	}
 
+	ProfilerCPU::PreciseProfileData::PreciseProfileData(FrameAlloc* alloc)
+		:samples(alloc)
+	{ }
+
 	void ProfilerCPU::PreciseProfileData::beginSample()
 	{
 		memAllocs = MemoryCounter::getNumAllocs();
@@ -117,12 +125,12 @@ namespace BansheeEngine
 	BS_THREADLOCAL ProfilerCPU::ThreadInfo* ProfilerCPU::ThreadInfo::activeThread = nullptr;
 
 	ProfilerCPU::ThreadInfo::ThreadInfo()
-		:isActive(false), rootBlock(nullptr)
+		:isActive(false), rootBlock(nullptr), frameAlloc(1024 * 512), activeBlocks(nullptr)
 	{
 
 	}
 
-	void ProfilerCPU::ThreadInfo::begin(const ProfilerString& _name)
+	void ProfilerCPU::ThreadInfo::begin(const char* _name)
 	{
 		if(isActive)
 		{
@@ -131,12 +139,14 @@ namespace BansheeEngine
 		}
 
 		if(rootBlock == nullptr)
-			rootBlock = getBlock();
+			rootBlock = getBlock(_name);
 
 		activeBlock = ActiveBlock(ActiveSamplingType::Basic, rootBlock);
-		activeBlocks.push(activeBlock);
+		if (activeBlocks == nullptr)
+			activeBlocks = frameAlloc.alloc<Stack<ActiveBlock, StdFrameAlloc<ActiveBlock>>>(&frameAlloc);
+
+		activeBlocks->push(activeBlock);
 		
-		rootBlock->name = _name; 
 		rootBlock->basic.beginSample();
 		isActive = true;
 	}
@@ -148,30 +158,32 @@ namespace BansheeEngine
 		else
 			activeBlock.block->precise.endSample();
 
-		activeBlocks.pop();
+		activeBlocks->pop();
 
 		if(!isActive)
 			LOGWRN("Profiler::endThread called on a thread that isn't being sampled.");
 
-		if(activeBlocks.size() > 0)
+		if (activeBlocks->size() > 0)
 		{
 			LOGWRN("Profiler::endThread called but not all sample pairs were closed. Sampling data will not be valid.");
 
-			while(activeBlocks.size() > 0)
+			while (activeBlocks->size() > 0)
 			{
-				ActiveBlock& curBlock = activeBlocks.top();
+				ActiveBlock& curBlock = activeBlocks->top();
 				if(curBlock.type == ActiveSamplingType::Basic)
 					curBlock.block->basic.endSample();
 				else
 					curBlock.block->precise.endSample();
 
-				activeBlocks.pop();
+				activeBlocks->pop();
 			}
 		}
 
 		isActive = false;
-		activeBlocks = ProfilerStack<ActiveBlock>();
 		activeBlock = ActiveBlock();
+
+		frameAlloc.dealloc(activeBlocks);
+		activeBlocks = nullptr;
 	}
 
 	void ProfilerCPU::ThreadInfo::reset()
@@ -183,21 +195,26 @@ namespace BansheeEngine
 			releaseBlock(rootBlock);
 
 		rootBlock = nullptr;
+		frameAlloc.clear(); // Note: This never actually frees memory
 	}
 
-	ProfilerCPU::ProfiledBlock* ProfilerCPU::ThreadInfo::getBlock()
+	ProfilerCPU::ProfiledBlock* ProfilerCPU::ThreadInfo::getBlock(const char* name)
 	{
-		// TODO - Pool this, if possible using the memory allocator stuff
-		// TODO - Also consider moving all samples in ThreadInfo, and also pool them (otherwise I can't pool ProfiledBlock since it will be variable size)
-		return bs_new<ProfiledBlock, ProfilerAlloc>();
+		ProfiledBlock* block = frameAlloc.alloc<ProfiledBlock>(&frameAlloc);
+		block->name = (char*)frameAlloc.alloc(((UINT32)strlen(name) + 1) * sizeof(char));
+		strcpy(block->name, name);
+
+		return block;
 	}
 
-	void ProfilerCPU::ThreadInfo::releaseBlock(ProfilerCPU::ProfiledBlock* block)
+	void ProfilerCPU::ThreadInfo::releaseBlock(ProfiledBlock* block)
 	{
-		bs_delete<ProfilerAlloc>(block);
+		frameAlloc.dealloc((UINT8*)block->name);
+		frameAlloc.dealloc(block);
 	}
 
-	ProfilerCPU::ProfiledBlock::ProfiledBlock()
+	ProfilerCPU::ProfiledBlock::ProfiledBlock(FrameAlloc* alloc)
+		:children(alloc), basic(alloc), precise(alloc)
 	{ }
 
 	ProfilerCPU::ProfiledBlock::~ProfiledBlock()
@@ -210,11 +227,11 @@ namespace BansheeEngine
 		children.clear();
 	}
 
-	ProfilerCPU::ProfiledBlock* ProfilerCPU::ProfiledBlock::findChild(const ProfilerString& name) const
+	ProfilerCPU::ProfiledBlock* ProfilerCPU::ProfiledBlock::findChild(const char* name) const
 	{
 		for(auto& child : children)
 		{
-			if(child->name == name)
+			if(strcmp(child->name, name) == 0)
 				return child;
 		}
 
@@ -240,7 +257,7 @@ namespace BansheeEngine
 			bs_delete<ProfilerAlloc>(threadInfo);
 	}
 
-	void ProfilerCPU::beginThread(const ProfilerString& name)
+	void ProfilerCPU::beginThread(const char* name)
 	{
 		ThreadInfo* thread = ThreadInfo::activeThread;
 		if(thread == nullptr)
@@ -264,7 +281,7 @@ namespace BansheeEngine
 		ThreadInfo::activeThread->end();
 	}
 
-	void ProfilerCPU::beginSample(const ProfilerString& name)
+	void ProfilerCPU::beginSample(const char* name)
 	{
 		ThreadInfo* thread = ThreadInfo::activeThread;
 		if(thread == nullptr || !thread->isActive)
@@ -281,8 +298,7 @@ namespace BansheeEngine
 
 		if(block == nullptr)
 		{
-			block = thread->getBlock();
-			block->name = name;
+			block = thread->getBlock(name);
 
 			if(parent != nullptr)
 				parent->children.push_back(block);
@@ -291,12 +307,12 @@ namespace BansheeEngine
 		}
 
 		thread->activeBlock = ActiveBlock(ActiveSamplingType::Basic, block);
-		thread->activeBlocks.push(thread->activeBlock);
+		thread->activeBlocks->push(thread->activeBlock);
 
 		block->basic.beginSample();
 	}
 
-	void ProfilerCPU::endSample(const ProfilerString& name)
+	void ProfilerCPU::endSample(const char* name)
 	{
 		ThreadInfo* thread = ThreadInfo::activeThread;
 		ProfiledBlock* block = thread->activeBlock.block;
@@ -314,25 +330,25 @@ namespace BansheeEngine
 			return;
 		}
 
-		if(block->name != name)
+		if(strcmp(block->name, name) != 0)
 		{
-			LOGWRN("Mismatched CPUProfiler::endSample. Was expecting \"" + String(block->name.c_str()) + 
-				"\" but got \"" + String(name.c_str()) + "\". Sampling data will not be valid.");
+			LOGWRN("Mismatched CPUProfiler::endSample. Was expecting \"" + String(block->name) + 
+				"\" but got \"" + String(name) + "\". Sampling data will not be valid.");
 			return;
 		}
 #endif
 
 		block->basic.endSample();
 
-		thread->activeBlocks.pop();
+		thread->activeBlocks->pop();
 
-		if(!thread->activeBlocks.empty())
-			thread->activeBlock = thread->activeBlocks.top();
+		if (!thread->activeBlocks->empty())
+			thread->activeBlock = thread->activeBlocks->top();
 		else
 			thread->activeBlock = ActiveBlock();
 	}
 
-	void ProfilerCPU::beginSamplePrecise(const ProfilerString& name)
+	void ProfilerCPU::beginSamplePrecise(const char* name)
 	{
 		// Note: There is a (small) possibility a context switch will happen during this measurement in which case result will be skewed. 
 		// Increasing thread priority might help. This is generally only a problem with code that executes a long time (10-15+ ms - depending on OS quant length)
@@ -349,8 +365,7 @@ namespace BansheeEngine
 
 		if(block == nullptr)
 		{
-			block = thread->getBlock();
-			block->name = name;
+			block = thread->getBlock(name);
 
 			if(parent != nullptr)
 				parent->children.push_back(block);
@@ -359,12 +374,12 @@ namespace BansheeEngine
 		}
 
 		thread->activeBlock = ActiveBlock(ActiveSamplingType::Precise, block);
-		thread->activeBlocks.push(thread->activeBlock);
+		thread->activeBlocks->push(thread->activeBlock);
 
 		block->precise.beginSample();
 	}
 
-	void ProfilerCPU::endSamplePrecise(const ProfilerString& name)
+	void ProfilerCPU::endSamplePrecise(const char* name)
 	{
 		ThreadInfo* thread = ThreadInfo::activeThread;
 		ProfiledBlock* block = thread->activeBlock.block;
@@ -382,20 +397,20 @@ namespace BansheeEngine
 			return;
 		}
 
-		if(block->name != name)
+		if (strcmp(block->name, name) != 0)
 		{
-			LOGWRN("Mismatched Profiler::endSamplePrecise. Was expecting \"" + String(block->name.c_str()) + 
-				"\" but got \"" + String(name.c_str()) + "\". Sampling data will not be valid.");
+			LOGWRN("Mismatched Profiler::endSamplePrecise. Was expecting \"" + String(block->name) + 
+				"\" but got \"" + String(name) + "\". Sampling data will not be valid.");
 			return;
 		}
 #endif
 
 		block->precise.endSample();
 
-		thread->activeBlocks.pop();
+		thread->activeBlocks->pop();
 
-		if(!thread->activeBlocks.empty())
-			thread->activeBlock = thread->activeBlocks.top();
+		if (!thread->activeBlocks->empty())
+			thread->activeBlock = thread->activeBlocks->top();
 		else
 			thread->activeBlock = ActiveBlock();
 	}
@@ -468,7 +483,7 @@ namespace BansheeEngine
 		basicEntries.resize(flatHierarchy.size());
 		preciseEntries.resize(flatHierarchy.size());
 
-		for(auto& iter = flatHierarchy.rbegin(); iter != flatHierarchy.rend(); ++iter)
+		for(auto iter = flatHierarchy.rbegin(); iter != flatHierarchy.rend(); ++iter)
 		{
 			TempEntry& curData = *iter;
 			ProfiledBlock* curBlock = curData.parentBlock;
@@ -477,7 +492,7 @@ namespace BansheeEngine
 			CPUProfilerPreciseSamplingEntry* entryPrecise = &preciseEntries[curData.entryIdx];
 
 			// Calculate basic data
-			entryBasic->data.name = String(curBlock->name.c_str());
+			entryBasic->data.name = String(curBlock->name);
 
 			entryBasic->data.memAllocs = 0;
 			entryBasic->data.memFrees = 0;
@@ -517,7 +532,7 @@ namespace BansheeEngine
 			entryBasic->data.estimatedSelfOverheadMs = mBasicTimerOverhead;
 
 			// Calculate precise data
-			entryPrecise->data.name = String(curBlock->name.c_str());
+			entryPrecise->data.name = String(curBlock->name);
 
 			entryPrecise->data.memAllocs = 0;
 			entryPrecise->data.memFrees = 0;
@@ -783,8 +798,8 @@ namespace BansheeEngine
 
 			for (UINT32 i = 0; i < sampleReps * 5; i++) 
 			{
-				beginSample("TestAvg#" + ProfilerString(toString(i).c_str()));
-				endSample("TestAvg#" + ProfilerString(toString(i).c_str()));
+				beginSample(("TestAvg#" + toString(i)).c_str());
+				endSample(("TestAvg#" + toString(i)).c_str());
 			}
 
 			endThread();
@@ -834,8 +849,8 @@ namespace BansheeEngine
 
 			for (UINT32 i = 0; i < sampleReps * 5; i++) 
 			{
-				beginSample("TestAvg#" + ProfilerString(toString(i).c_str()));
-				endSample("TestAvg#" + ProfilerString(toString(i).c_str()));
+				beginSample(("TestAvg#" + toString(i)).c_str());
+				endSample(("TestAvg#" + toString(i)).c_str());
 			}
 
 			endThread();
@@ -883,8 +898,8 @@ namespace BansheeEngine
 
 			for (UINT32 i = 0; i < sampleReps * 5; i++) 
 			{
-				beginSamplePrecise("TestAvg#" + ProfilerString(toString(i).c_str()));
-				endSamplePrecise("TestAvg#" + ProfilerString(toString(i).c_str()));
+				beginSamplePrecise(("TestAvg#" + toString(i)).c_str());
+				endSamplePrecise(("TestAvg#" + toString(i)).c_str());
 			}
 
 			endThread();
@@ -932,8 +947,8 @@ namespace BansheeEngine
 
 			for (UINT32 i = 0; i < sampleReps * 5; i++) 
 			{
-				beginSamplePrecise("TestAvg#" + ProfilerString(toString(i).c_str()));
-				endSamplePrecise("TestAvg#" + ProfilerString(toString(i).c_str()));
+				beginSamplePrecise(("TestAvg#" + toString(i)).c_str());
+				endSamplePrecise(("TestAvg#" + toString(i)).c_str());
 			}
 
 			endThread();

+ 7 - 1
BansheeEngine/Source/BsGUIManager.cpp

@@ -359,10 +359,12 @@ namespace BansheeEngine
 
 			for(auto& widget : renderData.widgets)
 			{
-				if(widget->isDirty(true))
+				gProfilerCPU().beginSample("Widget::isDirty");
+				if (widget->isDirty(true))
 				{
 					isDirty = true;
 				}
+				gProfilerCPU().endSample("Widget::isDirty");
 			}
 
 			if(!isDirty)
@@ -564,7 +566,9 @@ namespace BansheeEngine
 				UINT32 quadOffset = 0;
 				for(auto& matElement : group->elements)
 				{
+					gProfilerCPU().beginSample("_fillBuffer");
 					matElement.element->_fillBuffer(vertices, uvs, indices, quadOffset, group->numQuads, vertexStride, indexStride, matElement.renderElement);
+					gProfilerCPU().endSample("_fillBuffer");
 
 					UINT32 numQuads = matElement.element->_getNumQuads(matElement.renderElement);
 					UINT32 indexStart = quadOffset * 6;
@@ -577,6 +581,7 @@ namespace BansheeEngine
 					quadOffset += numQuads;
 				}
 
+				gProfilerCPU().beginSample("alloc/dealloc mesh data");
 				if(groupIdx < (UINT32)renderData.cachedMeshes.size())
 				{
 					mMeshHeap->dealloc(renderData.cachedMeshes[groupIdx]);
@@ -586,6 +591,7 @@ namespace BansheeEngine
 				{
 					renderData.cachedMeshes.push_back(mMeshHeap->alloc(meshData));
 				}
+				gProfilerCPU().endSample("alloc/dealloc mesh data");
 
 				groupIdx++;
 			}

+ 4 - 1
BansheeEngine/Source/BsGUIWidget.cpp

@@ -16,6 +16,7 @@
 #include "BsSceneObject.h"
 #include "BsRenderWindow.h"
 #include "BsGUIWidgetRTTI.h"
+#include "BsProfilerCPU.h"
 
 namespace BansheeEngine
 {
@@ -274,7 +275,9 @@ namespace BansheeEngine
 			mWidgetIsDirty = false;
 
 			for (auto& dirtyElement : mDirtyContents)
-				dirtyElement->_updateRenderElements();
+			{
+				PROFILE_CALL(dirtyElement->_updateRenderElements(), "UpdateDirty");
+			}
 
 			mDirtyContents.clear();
 			updateBounds();

+ 10 - 0
BansheeEngine/Source/BsProfilerOverlay.cpp

@@ -531,6 +531,11 @@ namespace BansheeEngine
 
 	void ProfilerOverlayInternal::update()
 	{
+		static float pausedTime = 0.0f; // DEBUG ONLY
+
+		if ((gTime().getTime() - pausedTime) <= 5.0f)
+			return;
+
 		const ProfilerReport& latestSimReport = ProfilingManager::instance().getReport(ProfiledThread::Sim);
 		const ProfilerReport& latestCoreReport = ProfilingManager::instance().getReport(ProfiledThread::Core);
 
@@ -543,6 +548,11 @@ namespace BansheeEngine
 		{
 			updateGPUSampleContents(ProfilerGPU::instance().getNextReport());
 		}
+
+		if (gTime().getFrameDelta() > 0.100f)
+		{
+			pausedTime = gTime().getTime();
+		}
 	}
 
 	void ProfilerOverlayInternal::targetResized()

+ 14 - 4
BansheeEngine/Source/BsSprite.cpp

@@ -1,6 +1,7 @@
 #include "BsTextSprite.h"
 #include "BsGUIMaterialManager.h"
 #include "BsVector2.h"
+#include "BsProfilerCPU.h"
 
 namespace BansheeEngine
 {
@@ -45,7 +46,7 @@ namespace BansheeEngine
 	UINT32 Sprite::fillBuffer(UINT8* vertices, UINT8* uv, UINT32* indices, UINT32 startingQuad, UINT32 maxNumQuads, 
 		UINT32 vertexStride, UINT32 indexStride, UINT32 renderElementIdx, const Vector2I& offset, const Rect2I& clipRect, bool clip) const
 	{
-		auto renderElem = mCachedRenderElements.at(renderElementIdx);
+		const auto& renderElem = mCachedRenderElements.at(renderElementIdx);
 
 		UINT32 startVert = startingQuad * 4;
 		UINT32 startIndex = startingQuad * 6;
@@ -248,14 +249,23 @@ namespace BansheeEngine
 			UINT32 vertexCount = renderElem.numQuads * 4;
 			UINT32 indexCount = renderElem.numQuads * 6;
 
-			if(renderElem.vertices != nullptr)
+			if (renderElem.vertices != nullptr)
+			{
 				bs_deleteN<ScratchAlloc>(renderElem.vertices, vertexCount);
+				renderElem.vertices = nullptr;
+			}
 
-			if(renderElem.uvs != nullptr)
+			if (renderElem.uvs != nullptr)
+			{
 				bs_deleteN<ScratchAlloc>(renderElem.uvs, vertexCount);
+				renderElem.uvs = nullptr;
+			}
 
-			if(renderElem.indexes != nullptr)
+			if (renderElem.indexes != nullptr)
+			{
 				bs_deleteN<ScratchAlloc>(renderElem.indexes, indexCount);
+				renderElem.indexes = nullptr;
+			}
 
 			if(renderElem.matInfo.material != nullptr)
 			{

+ 4 - 0
BansheeEngine/Source/BsTextSprite.cpp

@@ -15,6 +15,8 @@ namespace BansheeEngine
 
 	void TextSprite::update(const TEXT_SPRITE_DESC& desc, UINT64 groupId)
 	{
+		gProfilerCPU().beginSample("UpdateTextSprite");
+
 		TextData textData(desc.text, desc.font, desc.fontSize, desc.width, desc.height, desc.wordWrap, desc.wordBreak);
 
 		UINT32 numLines = textData.getNumLines();
@@ -107,6 +109,8 @@ namespace BansheeEngine
 		}
 
 		updateBounds();
+
+		gProfilerCPU().endSample("UpdateTextSprite");
 	}
 
 	UINT32 TextSprite::genTextQuads(UINT32 page, const TextData& textData, UINT32 width, UINT32 height, 

+ 28 - 0
BansheeUtility/Include/BsFrameAlloc.h

@@ -41,6 +41,17 @@ namespace BansheeEngine
 		 */
 		UINT8* alloc(UINT32 amount);
 
+		/**
+		 * @brief	Allocates and constructs a new object.
+		 *	
+		 * @note	Not thread safe.
+		 */
+		template<class T, class... Args>
+		T* alloc(Args &&...args)
+		{
+			return new ((T*)alloc(sizeof(T))) T(std::forward<Args>(args)...);
+		}
+
 		/**
 		 * @brief	Deallocates a previously allocated block of memory.
 		 *
@@ -51,6 +62,23 @@ namespace BansheeEngine
 		 */
 		void dealloc(UINT8* data);
 
+		/**
+		 * @brief	Deallocates and destructs a previously allocated object.
+		 *
+		 * @note	No deallocation is actually done here. This method is only used to call the destructor
+		 *			and for debug purposes so it is easier to track down memory leaks and corruption.
+		 * 			
+		 *			Thread safe.
+		 */
+		template<class T>
+		void dealloc(T* obj)
+		{
+			if (obj != nullptr)
+				obj->~T();
+
+			dealloc((UINT8*)obj);
+		}
+
 		/**
 		 * @brief	Deallocates all allocated memory.
 		 * 			

+ 3 - 1
BansheeUtility/Source/BsFrameAlloc.cpp

@@ -24,8 +24,10 @@ namespace BansheeEngine
 	}
 
 	FrameAlloc::FrameAlloc(UINT32 blockSize)
-		:mTotalAllocBytes(0), mFreeBlock(nullptr), mBlockSize(blockSize)
+		:mTotalAllocBytes(0), mFreeBlock(nullptr), mBlockSize(blockSize),
+		mOwnerThread(BS_THREAD_CURRENT_ID)
 	{
+		
 		allocBlock(mBlockSize);
 	}
 

+ 6 - 2
TODO.txt

@@ -58,9 +58,8 @@ Code quality improvements:
 ----------------------------------------------------------------------
 Polish stage 1
 
-IMPORTANT: Overlay camera will also draw renderables
 Track down the large performance spike
-
+ - Seems to be caused by something on core thread
 ProjectLibrary seems to import some files on every start-up
 Crash on shutdown in mono_gchandle_free
 
@@ -77,6 +76,11 @@ First screenshot work:
 - Status bar with last console message
 - (Optionally) Console window
 
+Optimization:
+ - HResource.isLoaded checks are using up a lot of allocations in GUI updates
+ - (Profiler has a large overhead as it allocates strings with each call. Consider using stack allocated strings which are either hashed or stored internally on stack as well)
+ - There are more issues with GUI allocations
+
 -----------
 
 SceneTreeView