Просмотр исходного кода

Reuse command queues in order to save on allocations
Bunch of profiling blocks

Marko Pintera 12 лет назад
Родитель
Сommit
f6a9ae7cf9

+ 4 - 0
BansheeEngine/Source/BsGUIManager.cpp

@@ -285,6 +285,8 @@ namespace BansheeEngine
 		{
 			GUIRenderData& renderData = cachedMeshData.second;
 
+			gProfiler().beginSample("UM_A");
+
 			// Check if anything is dirty. If nothing is we can skip the update
 			bool isDirty = renderData.isDirty;
 			renderData.isDirty = false;
@@ -297,6 +299,8 @@ namespace BansheeEngine
 				}
 			}
 
+			gProfiler().endSample("UM_A");
+
 			if(!isDirty)
 				continue;
 

+ 6 - 0
BansheeEngine/Source/BsImageSprite.cpp

@@ -4,6 +4,8 @@
 #include "BsSpriteTexture.h"
 #include "CmTexture.h"
 
+#include "CmProfiler.h"
+
 using namespace CamelotFramework;
 
 namespace BansheeEngine
@@ -22,6 +24,8 @@ namespace BansheeEngine
 			return;
 		}
 
+		gProfiler().beginSample("UpdateImageSprite");
+
 		// Actually generate a mesh
 		if(mCachedRenderElements.size() < 1)
 			mCachedRenderElements.resize(1);
@@ -230,5 +234,7 @@ namespace BansheeEngine
 		}
 
 		updateBounds();
+
+		gProfiler().endSample("UpdateImageSprite");
 	}
 }

+ 32 - 0
BansheeForwardRenderer/Source/BsForwardRenderer.cpp

@@ -18,6 +18,8 @@
 #include "BsDrawHelper3D.h"
 #include "BsGUIManager.h"
 
+#include "CmProfiler.h"
+
 using namespace CamelotFramework;
 
 namespace BansheeEngine
@@ -40,6 +42,8 @@ namespace BansheeEngine
 
 	void ForwardRenderer::renderAll() 
 	{
+		gProfiler().beginSample("renderA");
+
 		gSceneManager().updateRenderableBounds();
 
 		CoreAccessor& coreAccessor = gMainCA();
@@ -80,6 +84,9 @@ namespace BansheeEngine
 			std::sort(begin(cameras), end(cameras), cameraComparer);
 		}
 
+		gProfiler().endSample("renderA");
+		gProfiler().beginSample("renderB");
+
 		// Render everything, target by target
 		for(auto& camerasPerTarget : camerasPerRenderTarget)
 		{
@@ -112,10 +119,14 @@ namespace BansheeEngine
 			coreAccessor.endFrame();
 			coreAccessor.swapBuffers(target);
 		}
+
+		gProfiler().endSample("renderB");
 	}
 
 	void ForwardRenderer::render(const HCamera& camera) 
 	{
+		gProfiler().beginSample("renderC");
+
 		Vector<HRenderable>::type allRenderables;
 		
 		if(!camera->getIgnoreSceneRenderables())
@@ -131,6 +142,9 @@ namespace BansheeEngine
 
 		mRenderQueue->clear();
 
+		gProfiler().endSample("renderC");
+		gProfiler().beginSample("renderD");
+
 		// Get scene render operations
 		for(auto iter = allRenderables.begin(); iter != allRenderables.end(); ++iter)
 		{
@@ -150,6 +164,9 @@ namespace BansheeEngine
 			(*iter)->render(*mRenderQueue);
 		}
 
+		gProfiler().endSample("renderD");
+		gProfiler().beginSample("renderE");
+
 		// Get GUI render operations
 		GUIManager::instance().render(camera->getViewport(), *mRenderQueue);
 
@@ -160,6 +177,9 @@ namespace BansheeEngine
 		DrawHelper3D::instance().render(camera, *mRenderQueue);
 		DrawHelper2D::instance().render(camera, *mRenderQueue);
 
+		gProfiler().endSample("renderE");
+		gProfiler().beginSample("renderF");
+
 		// Get any operations from hooked up callbacks
 		const Viewport* viewportRawPtr = camera->getViewport().get();
 		auto callbacksForViewport = mRenderCallbacks[viewportRawPtr];
@@ -171,18 +191,30 @@ namespace BansheeEngine
 		mRenderQueue->sort();
 		const Vector<SortedRenderOp>::type& sortedROps =  mRenderQueue->getSortedRenderOps();
 
+		gProfiler().endSample("renderF");
+
 		for(auto iter = sortedROps.begin(); iter != sortedROps.end(); ++iter)
 		{
+			gProfiler().beginSample("renderG");
+
 			const RenderOperation& renderOp = *iter->baseOperation;
 			HMaterial material = renderOp.material;
 
 			PassPtr pass = material->getPass(iter->passIdx);
 			pass->activate(coreAccessor);
 
+			gProfiler().endSample("renderG");
+			gProfiler().beginSample("renderH");
+
 			PassParametersPtr paramsPtr = material->getPassParameters(iter->passIdx);
 			pass->bindParameters(coreAccessor, paramsPtr);
 
+			gProfiler().endSample("renderH");
+			gProfiler().beginSample("renderI");
+
 			coreAccessor.render(renderOp.mesh.getInternalPtr(), renderOp.submeshIdx);
+
+			gProfiler().endSample("renderI");
 		}
 	}
 }

+ 1 - 0
CamelotCore/Include/CmCommandQueue.h

@@ -186,6 +186,7 @@ namespace CamelotFramework
 
 	private:
 		CamelotFramework::Queue<QueuedCommand>::type* mCommands;
+		Stack<CamelotFramework::Queue<QueuedCommand>::type*>::type mEmptyCommandQueues; // List of empty queues for reuse
 
 		CM_THREAD_ID_TYPE mMyThreadId;
 

+ 22 - 3
CamelotCore/Source/CmCommandQueue.cpp

@@ -30,6 +30,12 @@ namespace CamelotFramework
 	{
 		if(mCommands != nullptr)
 			cm_delete(mCommands);
+
+		while(!mEmptyCommandQueues.empty())
+		{
+			cm_delete(mEmptyCommandQueues.top());
+			mEmptyCommandQueues.pop();
+		}
 	}
 
 	AsyncOp CommandQueueBase::queueReturn(boost::function<void(AsyncOp&)> commandCallback, bool _notifyWhenComplete, UINT32 _callbackId)
@@ -73,7 +79,16 @@ namespace CamelotFramework
 	CamelotFramework::Queue<QueuedCommand>::type* CommandQueueBase::flush()
 	{
 		CamelotFramework::Queue<QueuedCommand>::type* oldCommands = mCommands;
-		mCommands = cm_new<CamelotFramework::Queue<QueuedCommand>::type, PoolAlloc>();
+
+		if(!mEmptyCommandQueues.empty())
+		{
+			mCommands = mEmptyCommandQueues.top();
+			mEmptyCommandQueues.pop();
+		}
+		else
+		{
+			mCommands = cm_new<CamelotFramework::Queue<QueuedCommand>::type, PoolAlloc>();
+		}
 
 		return oldCommands;
 	}
@@ -113,7 +128,7 @@ namespace CamelotFramework
 			commands->pop();
 		}
 
-		cm_delete<PoolAlloc>(commands);
+		mEmptyCommandQueues.push(commands);
 	}
 
 	void CommandQueueBase::playback(CamelotFramework::Queue<QueuedCommand>::type* commands)
@@ -124,7 +139,11 @@ namespace CamelotFramework
 	void CommandQueueBase::cancelAll()
 	{
 		CamelotFramework::Queue<QueuedCommand>::type* commands = flush();
-		cm_delete<PoolAlloc>(commands);
+
+		while(!commands->empty())
+			commands->pop();
+
+		mEmptyCommandQueues.push(commands);
 	}
 
 	bool CommandQueueBase::isEmpty()

+ 19 - 0
CamelotCore/Source/CmRenderSystem.cpp

@@ -45,6 +45,8 @@ THE SOFTWARE.
 #include "CmMesh.h"
 #include "boost/bind.hpp"
 
+#include "CmProfiler.h"
+
 namespace CamelotFramework {
 
     static const TexturePtr sNullTexPtr;
@@ -214,6 +216,8 @@ namespace CamelotFramework {
 	{
 		THROW_IF_NOT_CORE_THREAD;
 
+		gProfiler().beginSample("render");
+
 		// sort out clip planes
 		// have to do it here in case of matrix issues
 		if (mClipPlanesDirty)
@@ -239,6 +243,8 @@ namespace CamelotFramework {
 		}
 		else
 			draw(subMesh.vertexData->vertexCount);
+
+		gProfiler().endSample("render");
 	}
 
 	void RenderSystem::swapBuffers(RenderTargetPtr target)
@@ -252,17 +258,30 @@ namespace CamelotFramework {
 	{
 		THROW_IF_NOT_CORE_THREAD;
 
+		gProfiler().beginSample("writeSubresource");
+
 		resource->writeSubresource(subresourceIdx, *data);
+
+		gProfiler().endSample("writeSubresource");
+
+		gProfiler().beginSample("writeSubresourceB");
+
 		data->unlock();
 		asyncOp.completeOperation();
+
+		gProfiler().endSample("writeSubresourceB");
 	}
 
 	void RenderSystem::readSubresource(GpuResourcePtr resource, UINT32 subresourceIdx, GpuResourceDataPtr& data, AsyncOp& asyncOp)
 	{
 		THROW_IF_NOT_CORE_THREAD;
 
+		gProfiler().beginSample("readSubresource");
+
 		resource->readSubresource(subresourceIdx, *data);
 		data->unlock();
 		asyncOp.completeOperation();
+
+		gProfiler().endSample("readSubresource");
 	}
 }

+ 4 - 0
CamelotD3D11RenderSystem/Source/CmD3D11HardwareBuffer.cpp

@@ -4,6 +4,8 @@
 #include "CmException.h"
 #include "CmDebug.h"
 
+#include "CmProfiler.h"
+
 namespace CamelotFramework
 {
 	D3D11HardwareBuffer::D3D11HardwareBuffer(BufferType btype, GpuBufferUsage usage, UINT32 elementCount, UINT32 elementSize, 
@@ -171,12 +173,14 @@ namespace CamelotFramework
 			D3D11_MAPPED_SUBRESOURCE mappedSubResource;
 			mappedSubResource.pData = NULL;
 			mDevice.clearErrors();
+			gProfiler().beginSample("Map");
 			HRESULT hr = mDevice.getImmediateContext()->Map(mD3DBuffer, 0, mapType, 0, &mappedSubResource);
 			if (FAILED(hr) || mDevice.hasError())
 			{
 				String msg = mDevice.getErrorDescription();
 				CM_EXCEPT(RenderingAPIException, "Error calling Map: " + msg);
 			}
+			gProfiler().endSample("Map");
 
 			pRet = static_cast<void*>(static_cast<char*>(mappedSubResource.pData) + offset);
 

+ 11 - 15
Opts.txt

@@ -2,11 +2,6 @@ Make sure to also update TextSprite and ImageSprite and anything else in UpdateM
 
 Render:
 Each queued command has an AsyncOp which gets constructed whether it is used or not
-mCommands in CommandQueue gets resized a lot
-
-Pass::bindParameters calls multiple GPuParams::createBindableCopy which does dynamic mem alloc
- - Plus GpuParams contains 4 different std::vectors so those are four extra allocations
- - Plus after they're constructed resize() is called on them almost immediately
 
 Strings when setting material params cause an allocation
  - Consider using normal char arrays
@@ -20,15 +15,16 @@ When optimizing UpdateLayout make sure to mark elements that are fully culled as
  - But in order to determine that I first need to update the sprite to find out the elements bounds which defeats the point
  - TODO - FIgure this out
 
- // BindableGpuParams
- // TODO - Make this class non-copyable, only movable
-/*
+FrameAlloc
+   Keeps two internal stacks
+     - One core, one sim
+   coreThreadBegin()
+    - swaps the active stacks
+   coreThreadEnd() - Must be called after Core thread has finished
+    - frees the previously active stack
+   During frame we call alloc()
+  
 
-GpuParams optimization
-- Ensure GpuParams internal data is allocated with a single allocation
-- This means a maximum of two allocations for GpuParams
-When creating bindable copies ensure that both allocations happen on stack
-- Such stack is freed when frame rendering is finished
-- Two stacks will likely be needed, one for data that is currently rendering and another for scheduled data
+Make sure BindableGpuParams are not copyable
 
-*/
+CPUProfiler still uses normal allocator for containers