Browse Source

Add primitive rendered stats

Panagiotis Christopoulos Charitos 2 years ago
parent
commit
eb3c794434

+ 1 - 0
AnKi/Core/GpuMemory/RebarTransientMemoryPool.cpp

@@ -56,6 +56,7 @@ RebarAllocation RebarTransientMemoryPool::allocateFrame(PtrSize size, void*& map
 
 RebarAllocation RebarTransientMemoryPool::tryAllocateFrame(PtrSize origSize, void*& mappedMem)
 {
+	ANKI_ASSERT(origSize > 0);
 	const PtrSize size = getAlignedRoundUp(m_alignment, origSize);
 
 	// Try in a loop because we may end up with an allocation its offset crosses the buffer's end

+ 26 - 5
AnKi/Renderer/Utils/Drawer.cpp

@@ -20,6 +20,7 @@ namespace anki {
 
 static StatCounter g_executedDrawcallsStatVar(StatCategory::kRenderer, "Drawcalls executed", StatFlag::kZeroEveryFrame);
 static StatCounter g_maxDrawcallsStatVar(StatCategory::kRenderer, "Drawcalls possible", StatFlag::kZeroEveryFrame);
+static StatCounter g_renderedPrimitivesStatVar(StatCategory::kRenderer, "Rendered primitives", StatFlag::kZeroEveryFrame);
 
 RenderableDrawer::~RenderableDrawer()
 {
@@ -76,6 +77,11 @@ void RenderableDrawer::setState(const RenderableDrawerArguments& args, CommandBu
 
 void RenderableDrawer::drawMdi(const RenderableDrawerArguments& args, CommandBuffer& cmdb)
 {
+	if(RenderStateBucketContainer::getSingleton().getBucketCount(args.m_renderingTechinuqe) == 0) [[unlikely]]
+	{
+		return;
+	}
+
 #if ANKI_STATS_ENABLED
 	U32 variant = 0;
 	switch(args.m_renderingTechinuqe)
@@ -101,16 +107,18 @@ void RenderableDrawer::drawMdi(const RenderableDrawerArguments& args, CommandBuf
 			m_stats.m_frameIdx = getRenderer().getFrameCount();
 
 			// Get previous stats
-			U32 prevFrameCount;
+			UVec4 prevFrameStats;
 			PtrSize dataRead;
-			getRenderer().getReadbackManager().readMostRecentData(m_stats.m_readback, &prevFrameCount, sizeof(prevFrameCount), dataRead);
+			getRenderer().getReadbackManager().readMostRecentData(m_stats.m_readback, &prevFrameStats, sizeof(prevFrameStats), dataRead);
 			if(dataRead > 0) [[likely]]
 			{
-				g_executedDrawcallsStatVar.set(prevFrameCount);
+				g_executedDrawcallsStatVar.set(prevFrameStats[0]);
+				g_renderedPrimitivesStatVar.set(prevFrameStats[1] / 3);
 			}
 
 			// Get place to write new stats
-			getRenderer().getReadbackManager().allocateData(m_stats.m_readback, sizeof(U32), m_stats.m_statsBuffer, m_stats.m_statsBufferOffset);
+			getRenderer().getReadbackManager().allocateData(m_stats.m_readback, sizeof(prevFrameStats), m_stats.m_statsBuffer,
+															m_stats.m_statsBufferOffset);
 
 			// Allocate another atomic to count the passes. Do that because the calls to drawMdi might not be in the same order as they run on the GPU
 			U32* counter;
@@ -125,10 +133,23 @@ void RenderableDrawer::drawMdi(const RenderableDrawerArguments& args, CommandBuf
 		cmdb.pushDebugMarker("Draw stats", Vec3(0.0f, 1.0f, 0.0f));
 
 		cmdb.bindShaderProgram(m_stats.m_updateStatsGrProgs[variant].get());
-		cmdb.bindUavBuffer(0, 0, m_stats.m_statsBuffer, m_stats.m_statsBufferOffset, sizeof(U32));
+		cmdb.bindUavBuffer(0, 0, m_stats.m_statsBuffer, m_stats.m_statsBufferOffset, sizeof(UVec4));
 		cmdb.bindUavBuffer(0, 1, threadCountBuff);
 		cmdb.bindUavBuffer(0, 2, args.m_mdiDrawCountsBuffer);
 		cmdb.bindUavBuffer(0, 3, m_stats.m_passCountBuffer);
+		cmdb.bindUavBuffer(0, 4, args.m_drawIndexedIndirectArgsBuffer);
+
+		DynamicArray<U32, MemoryPoolPtrWrapper<StackMemoryPool>> offsets(&getRenderer().getFrameMemoryPool());
+		U32 allUserCount = 0;
+		RenderStateBucketContainer::getSingleton().iterateBuckets(args.m_renderingTechinuqe,
+																  [&]([[maybe_unused]] const RenderStateInfo& state, U32 userCount) {
+																	  offsets.emplaceBack(allUserCount);
+																	  allUserCount += userCount;
+																  });
+		U32* firstDrawArgIndices;
+		BufferOffsetRange firstDrawArgIndicesBuffer = RebarTransientMemoryPool::getSingleton().allocateFrame(offsets.getSize(), firstDrawArgIndices);
+		memcpy(firstDrawArgIndices, &offsets[0], offsets.getSizeInBytes());
+		cmdb.bindUavBuffer(0, 5, firstDrawArgIndicesBuffer);
 
 		cmdb.draw(PrimitiveTopology::kTriangles, 6);
 

+ 4 - 0
AnKi/Resource/MeshBinaryLoader.h

@@ -21,7 +21,11 @@ namespace anki {
 /// * Header
 /// * Submeshes
 /// * Index buffer of max LOD
+/// ** Index buffer of #0 submesh
+/// ** etc ...
 /// * Vertex buffer #0 of max LOD
+/// ** Vert buffer #0 of #0 submesh
+/// ** etc ...
 /// * etc...
 class MeshBinaryLoader
 {

+ 16 - 4
AnKi/Shaders/DrawerStats.ankiprog

@@ -35,10 +35,12 @@ VertOut main(U32 vertId : SV_VERTEXID)
 
 #pragma anki start frag
 
-[[vk::binding(0)]] RWStructuredBuffer<U32> g_visibleObjectCount;
+[[vk::binding(0)]] RWStructuredBuffer<UVec4> g_stats;
 [[vk::binding(1)]] RWStructuredBuffer<U32> g_fragThreadCount;
 [[vk::binding(2)]] StructuredBuffer<U32> g_mdiDrawCounts;
 [[vk::binding(3)]] RWStructuredBuffer<U32> g_passCount;
+[[vk::binding(4)]] StructuredBuffer<DrawIndexedIndirectArgs> g_drawArguments;
+[[vk::binding(5)]] StructuredBuffer<U32> g_bucketFirstDrawArgIndex;
 
 #if COLOR_ATTACHMENT_COUNT > 0
 struct FragOut
@@ -68,9 +70,17 @@ main()
 		g_mdiDrawCounts.GetDimensions(bucketCount, unused);
 
 		U32 visiblesCount = 0;
+		U32 indexAndVertexCount = 0;
 		for(U32 i = 0; i < bucketCount; ++i)
 		{
 			visiblesCount += g_mdiDrawCounts[i];
+
+			const U32 begin = g_bucketFirstDrawArgIndex[i];
+			const U32 end = begin + g_mdiDrawCounts[i];
+			for(U32 j = begin; j < end; ++j)
+			{
+				indexAndVertexCount += g_drawArguments[j].m_indexCount;
+			}
 		}
 
 		U32 passIdx;
@@ -78,13 +88,15 @@ main()
 
 		if(passIdx == 0)
 		{
-			// 1st pass initializes the atomic
+			// 1st pass initializes the atomics
 			U32 origValue;
-			InterlockedExchange(g_visibleObjectCount[0], visiblesCount, origValue);
+			InterlockedExchange(g_stats[0][0], visiblesCount, origValue);
+			InterlockedExchange(g_stats[0][1], indexAndVertexCount, origValue);
 		}
 		else if(visiblesCount > 0)
 		{
-			InterlockedAdd(g_visibleObjectCount[0], visiblesCount);
+			InterlockedAdd(g_stats[0][0], visiblesCount);
+			InterlockedAdd(g_stats[0][1], indexAndVertexCount);
 		}
 	}