Browse Source

Add stats in the GPU visibility tests

Panagiotis Christopoulos Charitos 2 years ago
parent
commit
26dbef3b36

+ 2 - 0
AnKi/Core/GpuMemory/GpuReadbackMemoryPool.h

@@ -37,6 +37,8 @@ public:
 		ANKI_ASSERT(!isValid() && "Forgot to delete");
 		ANKI_ASSERT(!isValid() && "Forgot to delete");
 		m_token = b.m_token;
 		m_token = b.m_token;
 		b.m_token = {};
 		b.m_token = {};
+		m_buffer = b.m_buffer;
+		m_mappedMemory = b.m_mappedMemory;
 		return *this;
 		return *this;
 	}
 	}
 
 

+ 49 - 2
AnKi/Renderer/GpuVisibility.cpp

@@ -12,9 +12,13 @@
 #include <AnKi/Core/GpuMemory/GpuSceneBuffer.h>
 #include <AnKi/Core/GpuMemory/GpuSceneBuffer.h>
 #include <AnKi/Collision/Functions.h>
 #include <AnKi/Collision/Functions.h>
 #include <AnKi/Shaders/Include/MiscRendererTypes.h>
 #include <AnKi/Shaders/Include/MiscRendererTypes.h>
+#include <AnKi/Core/StatsSet.h>
 
 
 namespace anki {
 namespace anki {
 
 
+static StatCounter g_visibleObjects(StatCategory::kMisc, "Visible objects", StatFlag::kZeroEveryFrame);
+static StatCounter g_testedObjects(StatCategory::kMisc, "Visbility tested objects", StatFlag::kZeroEveryFrame);
+
 static GpuSceneContiguousArrayType techniqueToArrayType(RenderingTechnique technique)
 static GpuSceneContiguousArrayType techniqueToArrayType(RenderingTechnique technique)
 {
 {
 	GpuSceneContiguousArrayType arrayType;
 	GpuSceneContiguousArrayType arrayType;
@@ -42,6 +46,7 @@ Error GpuVisibility::init()
 	{
 	{
 		ShaderProgramResourceVariantInitInfo variantInit(m_prog);
 		ShaderProgramResourceVariantInitInfo variantInit(m_prog);
 		variantInit.addMutation("HZB_TEST", i);
 		variantInit.addMutation("HZB_TEST", i);
+		variantInit.addMutation("STATS", ANKI_STATS_ENABLED);
 
 
 		const ShaderProgramResourceVariant* variant;
 		const ShaderProgramResourceVariant* variant;
 		m_prog->getOrCreateVariant(variantInit, variant);
 		m_prog->getOrCreateVariant(variantInit, variant);
@@ -49,16 +54,48 @@ Error GpuVisibility::init()
 		m_grProgs[i].reset(&variant->getProgram());
 		m_grProgs[i].reset(&variant->getProgram());
 	}
 	}
 
 
+	for(GpuReadbackMemoryAllocation& alloc : m_readbackMemory)
+	{
+		alloc = GpuReadbackMemoryPool::getSingleton().allocate(sizeof(U32));
+	}
+
 	return Error::kNone;
 	return Error::kNone;
 }
 }
 
 
 void GpuVisibility::populateRenderGraph(CString passesName, RenderingTechnique technique, const Mat4& viewProjectionMat, Vec3 lodReferencePoint,
 void GpuVisibility::populateRenderGraph(CString passesName, RenderingTechnique technique, const Mat4& viewProjectionMat, Vec3 lodReferencePoint,
 										const Array<F32, kMaxLodCount - 1> lodDistances, const RenderTargetHandle* hzbRt,
 										const Array<F32, kMaxLodCount - 1> lodDistances, const RenderTargetHandle* hzbRt,
-										RenderGraphDescription& rgraph, GpuVisibilityOutput& out) const
+										RenderGraphDescription& rgraph, GpuVisibilityOutput& out)
 {
 {
 	const U32 aabbCount = GpuSceneContiguousArrays::getSingleton().getElementCount(techniqueToArrayType(technique));
 	const U32 aabbCount = GpuSceneContiguousArrays::getSingleton().getElementCount(techniqueToArrayType(technique));
 	const U32 bucketCount = RenderStateBucketContainer::getSingleton().getBucketCount(technique);
 	const U32 bucketCount = RenderStateBucketContainer::getSingleton().getBucketCount(technique);
 
 
+#if ANKI_STATS_ENABLED
+	Bool firstCallInTheFrame = false;
+	if(m_lastFrameIdx != getRenderer().getFrameCount())
+	{
+		firstCallInTheFrame = true;
+		m_lastFrameIdx = getRenderer().getFrameCount();
+	}
+
+	const GpuReadbackMemoryAllocation& readAlloc = m_readbackMemory[(m_lastFrameIdx + 1) % m_readbackMemory.getSize()];
+	const GpuReadbackMemoryAllocation& writeAlloc = m_readbackMemory[m_lastFrameIdx % m_readbackMemory.getSize()];
+
+	Buffer* clearStatsBuffer = &readAlloc.getBuffer();
+	const PtrSize clearStatsBufferOffset = readAlloc.getOffset();
+	Buffer* writeStatsBuffer = &writeAlloc.getBuffer();
+	const PtrSize writeStatsBufferOffset = writeAlloc.getOffset();
+
+	if(firstCallInTheFrame)
+	{
+		U32 visibleCount;
+		memcpy(&visibleCount, readAlloc.getMappedMemory(), sizeof(visibleCount));
+
+		g_visibleObjects.set(visibleCount);
+	}
+
+	g_testedObjects.increment(aabbCount);
+#endif
+
 	// Allocate memory for the indirect commands
 	// Allocate memory for the indirect commands
 	const GpuVisibleTransientMemoryAllocation indirectArgs =
 	const GpuVisibleTransientMemoryAllocation indirectArgs =
 		GpuVisibleTransientMemoryPool::getSingleton().allocate(aabbCount * sizeof(DrawIndexedIndirectArgs));
 		GpuVisibleTransientMemoryPool::getSingleton().allocate(aabbCount * sizeof(DrawIndexedIndirectArgs));
@@ -99,7 +136,12 @@ void GpuVisibility::populateRenderGraph(CString passesName, RenderingTechnique t
 		(hzbRt) ? *hzbRt : RenderTargetHandle(); // Can't pass to the lambda the hzbRt which is a pointer to who knows what
 		(hzbRt) ? *hzbRt : RenderTargetHandle(); // Can't pass to the lambda the hzbRt which is a pointer to who knows what
 
 
 	pass.setWork([this, viewProjectionMat, lodReferencePoint, lodDistances, technique, hzbRtCopy, mdiDrawCountsHandle = out.m_mdiDrawCountsHandle,
 	pass.setWork([this, viewProjectionMat, lodReferencePoint, lodDistances, technique, hzbRtCopy, mdiDrawCountsHandle = out.m_mdiDrawCountsHandle,
-				  instanceRateRenderables, indirectArgs](RenderPassWorkContext& rpass) {
+				  instanceRateRenderables, indirectArgs
+#if ANKI_STATS_ENABLED
+				  ,
+				  clearStatsBuffer, clearStatsBufferOffset, writeStatsBuffer, writeStatsBufferOffset
+#endif
+	](RenderPassWorkContext& rpass) {
 		CommandBuffer& cmdb = *rpass.m_commandBuffer;
 		CommandBuffer& cmdb = *rpass.m_commandBuffer;
 
 
 		cmdb.bindShaderProgram(m_grProgs[hzbRtCopy.isValid()].get());
 		cmdb.bindShaderProgram(m_grProgs[hzbRtCopy.isValid()].get());
@@ -158,6 +200,11 @@ void GpuVisibility::populateRenderGraph(CString passesName, RenderingTechnique t
 			cmdb.bindSampler(0, 9, getRenderer().getSamplers().m_nearestNearestClamp.get());
 			cmdb.bindSampler(0, 9, getRenderer().getSamplers().m_nearestNearestClamp.get());
 		}
 		}
 
 
+#if ANKI_STATS_ENABLED
+		cmdb.bindStorageBuffer(0, 10, writeStatsBuffer, writeStatsBufferOffset, sizeof(U32));
+		cmdb.bindStorageBuffer(0, 11, clearStatsBuffer, clearStatsBufferOffset, sizeof(U32));
+#endif
+
 		dispatchPPCompute(cmdb, 64, 1, aabbCount, 1);
 		dispatchPPCompute(cmdb, 64, 1, aabbCount, 1);
 	});
 	});
 }
 }

+ 7 - 1
AnKi/Renderer/GpuVisibility.h

@@ -6,6 +6,7 @@
 #pragma once
 #pragma once
 
 
 #include <AnKi/Renderer/RendererObject.h>
 #include <AnKi/Renderer/RendererObject.h>
+#include <AnKi/Renderer/Readback.h>
 #include <AnKi/Resource/RenderingKey.h>
 #include <AnKi/Resource/RenderingKey.h>
 
 
 namespace anki {
 namespace anki {
@@ -40,11 +41,16 @@ public:
 	/// Populate the rendergraph.
 	/// Populate the rendergraph.
 	void populateRenderGraph(CString passesName, RenderingTechnique technique, const Mat4& viewProjectionMat, Vec3 lodReferencePoint,
 	void populateRenderGraph(CString passesName, RenderingTechnique technique, const Mat4& viewProjectionMat, Vec3 lodReferencePoint,
 							 const Array<F32, kMaxLodCount - 1> lodDistances, const RenderTargetHandle* hzbRt, RenderGraphDescription& rgraph,
 							 const Array<F32, kMaxLodCount - 1> lodDistances, const RenderTargetHandle* hzbRt, RenderGraphDescription& rgraph,
-							 GpuVisibilityOutput& out) const;
+							 GpuVisibilityOutput& out);
 
 
 private:
 private:
 	ShaderProgramResourcePtr m_prog;
 	ShaderProgramResourcePtr m_prog;
 	Array<ShaderProgramPtr, 2> m_grProgs;
 	Array<ShaderProgramPtr, 2> m_grProgs;
+
+#if ANKI_STATS_ENABLED
+	Array<GpuReadbackMemoryAllocation, kMaxFramesInFlight> m_readbackMemory;
+	U64 m_lastFrameIdx = kMaxU64;
+#endif
 };
 };
 /// @}
 /// @}
 
 

+ 3 - 2
AnKi/Renderer/Readback.cpp

@@ -45,6 +45,7 @@ void ReadbackManager::getMostRecentReadDataAndRelease(MultiframeReadbackToken& t
 		if(token.m_frameIds[i] == earliestFrame && token.m_allocations[i].isValid())
 		if(token.m_frameIds[i] == earliestFrame && token.m_allocations[i].isValid())
 		{
 		{
 			bestSlot = i;
 			bestSlot = i;
+			break;
 		}
 		}
 		else if(token.m_frameIds[i] < earliestFrame && token.m_allocations[i].isValid())
 		else if(token.m_frameIds[i] < earliestFrame && token.m_allocations[i].isValid())
 		{
 		{
@@ -59,9 +60,9 @@ void ReadbackManager::getMostRecentReadDataAndRelease(MultiframeReadbackToken& t
 	}
 	}
 
 
 	GpuReadbackMemoryAllocation& allocation = token.m_allocations[slot];
 	GpuReadbackMemoryAllocation& allocation = token.m_allocations[slot];
-	dataSize = allocation.getAllocatedSize();
+	dataOut = min(dataSize, PtrSize(allocation.getAllocatedSize()));
 
 
-	memcpy(data, static_cast<const U8*>(allocation.getMappedMemory()) + allocation.getOffset(), min(dataSize, dataSize));
+	memcpy(data, static_cast<const U8*>(allocation.getMappedMemory()) + allocation.getOffset(), min(dataOut, dataOut));
 
 
 	GpuReadbackMemoryPool::getSingleton().deferredFree(allocation);
 	GpuReadbackMemoryPool::getSingleton().deferredFree(allocation);
 }
 }

+ 1 - 1
AnKi/Renderer/Renderer.h

@@ -91,7 +91,7 @@ public:
 		return m_sceneDrawer;
 		return m_sceneDrawer;
 	}
 	}
 
 
-	const GpuVisibility& getGpuVisibility() const
+	GpuVisibility& getGpuVisibility()
 	{
 	{
 		return m_visibility;
 		return m_visibility;
 	}
 	}

+ 18 - 0
AnKi/Shaders/GpuVisibility.ankiprog

@@ -4,6 +4,7 @@
 // http://www.anki3d.org/LICENSE
 // http://www.anki3d.org/LICENSE
 
 
 #pragma anki mutator HZB_TEST 0 1
 #pragma anki mutator HZB_TEST 0 1
+#pragma anki mutator STATS 0 1
 
 
 #pragma anki start comp
 #pragma anki start comp
 
 
@@ -33,6 +34,11 @@
 [[vk::binding(9)]] SamplerState g_nearestAnyClampSampler;
 [[vk::binding(9)]] SamplerState g_nearestAnyClampSampler;
 #endif
 #endif
 
 
+#if STATS
+[[vk::binding(10)]] RWStructuredBuffer<U32> g_testsPassed;
+[[vk::binding(11)]] RWStructuredBuffer<U32> g_testsPassedClear; ///< Some previous value. Will be cleared this frame
+#endif
+
 [numthreads(64, 1, 1)] void main(UVec3 svDispatchThreadId : SV_DISPATCHTHREADID)
 [numthreads(64, 1, 1)] void main(UVec3 svDispatchThreadId : SV_DISPATCHTHREADID)
 {
 {
 	const U32 aabbIdx = svDispatchThreadId.x;
 	const U32 aabbIdx = svDispatchThreadId.x;
@@ -41,6 +47,13 @@
 		return;
 		return;
 	}
 	}
 
 
+#if STATS
+	if(svDispatchThreadId.x == 0)
+	{
+		g_testsPassedClear[0] = 0;
+	}
+#endif
+
 	const GpuSceneRenderableAabb aabb = g_aabbs[aabbIdx];
 	const GpuSceneRenderableAabb aabb = g_aabbs[aabbIdx];
 
 
 	// Frustum test
 	// Frustum test
@@ -168,6 +181,11 @@
 	GpuSceneRenderable renderableOut = renderableIn;
 	GpuSceneRenderable renderableOut = renderableIn;
 	renderableOut.m_geometryOffset = meshLodOffset;
 	renderableOut.m_geometryOffset = meshLodOffset;
 	g_instanceRateRenderables[indirectIdx] = renderableOut;
 	g_instanceRateRenderables[indirectIdx] = renderableOut;
+
+	// Now update the stats
+#if STATS
+	InterlockedAdd(g_testsPassed[0], 1);
+#endif
 }
 }
 
 
 #pragma anki end
 #pragma anki end

+ 8 - 0
AnKi/Shaders/Intellisense.hlsl

@@ -147,6 +147,14 @@ T max(T a, T b);
 template<typename T>
 template<typename T>
 T saturate(T a);
 T saturate(T a);
 
 
+// Atomics
+
+template<typename T>
+void InterlockedAdd(T dest, T value, T& originalValue);
+
+template<typename T>
+void InterlockedAdd(T dest, T value);
+
 // Wave ops
 // Wave ops
 
 
 template<typename T>
 template<typename T>