Browse Source

MDI works for GBuffer

Panagiotis Christopoulos Charitos 2 years ago
parent
commit
fe7f75db8c

+ 1 - 1
AnKi/Core/GpuMemory/RebarTransientMemoryPool.cpp

@@ -25,7 +25,7 @@ void RebarTransientMemoryPool::init()
 	buffInit.m_mapAccess = BufferMapAccessBit::kWrite;
 	buffInit.m_size = ConfigSet::getSingleton().getCoreRebarGpuMemorySize();
 	buffInit.m_usage = BufferUsageBit::kAllUniform | BufferUsageBit::kAllStorage | BufferUsageBit::kVertex | BufferUsageBit::kIndex
-					   | BufferUsageBit::kShaderBindingTable;
+					   | BufferUsageBit::kShaderBindingTable | BufferUsageBit::kAllIndirect;
 	m_buffer = GrManager::getSingleton().newBuffer(buffInit);
 
 	m_bufferSize = buffInit.m_size;

+ 2 - 0
AnKi/Gr/Vulkan/CommandBufferImpl.h

@@ -329,10 +329,12 @@ public:
 		const BufferImpl& argBufferImpl = static_cast<const BufferImpl&>(*argBuffer);
 		ANKI_ASSERT(argBufferImpl.usageValid(BufferUsageBit::kIndirectDraw));
 		ANKI_ASSERT((argBufferOffset % 4) == 0);
+		ANKI_ASSERT(argBufferOffset + maxDrawCount * sizeof(DrawIndexedIndirectArgs) <= argBuffer->getSize());
 
 		const BufferImpl& countBufferImpl = static_cast<const BufferImpl&>(*countBuffer);
 		ANKI_ASSERT(countBufferImpl.usageValid(BufferUsageBit::kIndirectDraw));
 		ANKI_ASSERT((countBufferOffset % 4) == 0);
+		ANKI_ASSERT(countBufferOffset + maxDrawCount * sizeof(U32) <= countBuffer->getSize());
 
 		ANKI_ASSERT(maxDrawCount > 0 && maxDrawCount <= getGrManagerImpl().getDeviceCapabilities().m_maxDrawIndirectCount);
 

+ 2 - 1
AnKi/Renderer/Drawer.cpp

@@ -151,7 +151,7 @@ void RenderableDrawer::drawMdi(const RenderableDrawerArguments& args, CommandBuf
 {
 	setState(args, cmdb);
 
-	cmdb.bindVertexBuffer(0, args.m_instaceRateRenderables, args.m_instaceRateRenderablesOffset, sizeof(GpuSceneRenderablePacked),
+	cmdb.bindVertexBuffer(0, args.m_instaceRateRenderablesBuffer, args.m_instaceRateRenderablesOffset, sizeof(GpuSceneRenderablePacked),
 						  VertexStepRate::kInstance);
 
 	U32 allUserCount = 0;
@@ -169,6 +169,7 @@ void RenderableDrawer::drawMdi(const RenderableDrawerArguments& args, CommandBuf
 		cmdb.bindShaderProgram(prog.get());
 
 		const U32 maxDrawCount = userCount;
+
 		cmdb.drawIndexedIndirectCount(state.m_primitiveTopology, args.m_drawIndexedIndirectArgsBuffer,
 									  args.m_drawIndexedIndirectArgsBufferOffset + sizeof(DrawIndexedIndirectArgs) * allUserCount,
 									  args.m_mdiDrawCountsBuffer, args.m_mdiDrawCountsBufferOffset + sizeof(U32) * bucketCount, maxDrawCount);

+ 2 - 1
AnKi/Renderer/Drawer.h

@@ -41,7 +41,7 @@ public:
 	PtrSize m_drawIndexedIndirectArgsBufferOffset = 0;
 	PtrSize m_drawIndexedIndirectArgsBufferRange = 0;
 
-	Buffer* m_instaceRateRenderables = nullptr;
+	Buffer* m_instaceRateRenderablesBuffer = nullptr;
 	PtrSize m_instaceRateRenderablesOffset = 0;
 	PtrSize m_instaceRateRenderablesRange = 0;
 };
@@ -59,6 +59,7 @@ public:
 	void drawRange(const RenderableDrawerArguments& args, const RenderableQueueElement* begin, const RenderableQueueElement* end,
 				   CommandBuffer& cmdb);
 
+	/// Draw using multidraw indirect.
 	void drawMdi(const RenderableDrawerArguments& args, CommandBuffer& cmdb);
 
 private:

+ 19 - 56
AnKi/Renderer/GBuffer.cpp

@@ -8,6 +8,7 @@
 #include <AnKi/Renderer/RenderQueue.h>
 #include <AnKi/Renderer/VrsSriGeneration.h>
 #include <AnKi/Renderer/Scale.h>
+#include <AnKi/Renderer/GpuVisibility.h>
 #include <AnKi/Util/Logger.h>
 #include <AnKi/Util/Tracer.h>
 #include <AnKi/Core/ConfigSet.h>
@@ -89,28 +90,9 @@ void GBuffer::runInThread(const RenderingContext& ctx, RenderPassWorkContext& rg
 	ANKI_TRACE_SCOPED_EVENT(RGBuffer);
 
 	CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
-	const U32 threadId = rgraphCtx.m_currentSecondLevelCommandBufferIndex;
-	const U32 threadCount = rgraphCtx.m_secondLevelCommandBufferCount;
-
-	// Get some stuff
-	const U32 earlyZCount = ctx.m_renderQueue->m_earlyZRenderables.getSize();
-	const U32 problemSize = ctx.m_renderQueue->m_renderables.getSize() + earlyZCount;
-	U32 start, end;
-	splitThreadedProblem(threadId, threadCount, problemSize, start, end);
-
-	if(end == start) [[unlikely]]
-	{
-		return;
-	}
 
 	// Set some state, leave the rest to default
 	cmdb.setViewport(0, 0, getRenderer().getInternalResolution().x(), getRenderer().getInternalResolution().y());
-
-	const I32 earlyZStart = max(I32(start), 0);
-	const I32 earlyZEnd = min(I32(end), I32(earlyZCount));
-	const I32 colorStart = max(I32(start) - I32(earlyZCount), 0);
-	const I32 colorEnd = I32(end) - I32(earlyZCount);
-
 	cmdb.setRasterizationOrder(RasterizationOrder::kRelaxed);
 
 	const Bool enableVrs =
@@ -127,38 +109,18 @@ void GBuffer::runInThread(const RenderingContext& ctx, RenderPassWorkContext& rg
 	args.m_viewProjectionMatrix = ctx.m_matrices.m_viewProjectionJitter;
 	args.m_previousViewProjectionMatrix = ctx.m_matrices.m_jitter * ctx.m_prevMatrices.m_viewProjection;
 	args.m_sampler = getRenderer().getSamplers().m_trilinearRepeatAnisoResolutionScalingBias.get();
-
-	// First do early Z (if needed)
-	if(earlyZStart < earlyZEnd)
-	{
-		for(U32 i = 0; i < kGBufferColorRenderTargetCount; ++i)
-		{
-			cmdb.setColorChannelWriteMask(i, ColorBit::kNone);
-		}
-
-		ANKI_ASSERT(earlyZStart < earlyZEnd && earlyZEnd <= I32(earlyZCount));
-		getRenderer().getSceneDrawer().drawRange(args, ctx.m_renderQueue->m_earlyZRenderables.getBegin() + earlyZStart,
-												 ctx.m_renderQueue->m_earlyZRenderables.getBegin() + earlyZEnd, cmdb);
-
-		// Restore state for the color write
-		if(colorStart < colorEnd)
-		{
-			for(U32 i = 0; i < kGBufferColorRenderTargetCount; ++i)
-			{
-				cmdb.setColorChannelWriteMask(i, ColorBit::kAll);
-			}
-		}
-	}
-
-	// Do the color writes
-	if(colorStart < colorEnd)
-	{
-		cmdb.setDepthCompareOperation(CompareOperation::kLessEqual);
-
-		ANKI_ASSERT(colorStart < colorEnd && colorEnd <= I32(ctx.m_renderQueue->m_renderables.getSize()));
-		getRenderer().getSceneDrawer().drawRange(args, ctx.m_renderQueue->m_renderables.getBegin() + colorStart,
-												 ctx.m_renderQueue->m_renderables.getBegin() + colorEnd, cmdb);
-	}
+	args.m_renderingTechinuqe = RenderingTechnique::kGBuffer;
+
+	const GpuVisibility& gpuVis = getRenderer().getGpuVisibility();
+	rgraphCtx.getBufferState(gpuVis.getMdiDrawCountsBufferHandle(), args.m_mdiDrawCountsBuffer, args.m_mdiDrawCountsBufferOffset,
+							 args.m_mdiDrawCountsBufferRange);
+	rgraphCtx.getBufferState(gpuVis.getDrawIndexedIndirectArgsBufferHandle(), args.m_drawIndexedIndirectArgsBuffer,
+							 args.m_drawIndexedIndirectArgsBufferOffset, args.m_drawIndexedIndirectArgsBufferRange);
+	rgraphCtx.getBufferState(gpuVis.getInstanceRateRenderablesBufferHandle(), args.m_instaceRateRenderablesBuffer,
+							 args.m_instaceRateRenderablesOffset, args.m_instaceRateRenderablesRange);
+
+	cmdb.setDepthCompareOperation(CompareOperation::kLessEqual);
+	getRenderer().getSceneDrawer().drawMdi(args, cmdb);
 }
 
 void GBuffer::importRenderTargets(RenderingContext& ctx)
@@ -226,11 +188,9 @@ void GBuffer::populateRenderGraph(RenderingContext& ctx)
 
 	pass.setFramebufferInfo(m_fbDescr, ConstWeakArray<RenderTargetHandle>(&rts[0], kGBufferColorRenderTargetCount), m_runCtx.m_crntFrameDepthRt,
 							sriRt);
-	pass.setWork(
-		computeNumberOfSecondLevelCommandBuffers(ctx.m_renderQueue->m_earlyZRenderables.getSize() + ctx.m_renderQueue->m_renderables.getSize()),
-		[this, &ctx](RenderPassWorkContext& rgraphCtx) {
-			runInThread(ctx, rgraphCtx);
-		});
+	pass.setWork(1, [this, &ctx](RenderPassWorkContext& rgraphCtx) {
+		runInThread(ctx, rgraphCtx);
+	});
 
 	for(U i = 0; i < kGBufferColorRenderTargetCount; ++i)
 	{
@@ -246,6 +206,9 @@ void GBuffer::populateRenderGraph(RenderingContext& ctx)
 	}
 
 	pass.newBufferDependency(getRenderer().getGpuSceneBufferHandle(), BufferUsageBit::kStorageGeometryRead | BufferUsageBit::kStorageFragmentRead);
+
+	// Only add one depedency to the GPU visibility. No need to track all buffers
+	pass.newBufferDependency(getRenderer().getGpuVisibility().getMdiDrawCountsBufferHandle(), BufferUsageBit::kIndirectDraw);
 }
 
 } // end namespace anki

+ 4 - 5
AnKi/Renderer/GpuVisibility.cpp

@@ -49,14 +49,13 @@ void GpuVisibility::populateRenderGraph(RenderingContext& ctx)
 												   mdiDrawCounts.m_offset, mdiDrawCounts.m_range);
 
 	// Create the renderpass
-	constexpr BufferUsageBit bufferUsage = BufferUsageBit::kStorageComputeRead;
 	ComputeRenderPassDescription& pass = rgraph.newComputeRenderPass("GPU occlusion GBuffer");
 
-	pass.newBufferDependency(getRenderer().getGpuSceneBufferHandle(), bufferUsage);
+	pass.newBufferDependency(getRenderer().getGpuSceneBufferHandle(), BufferUsageBit::kStorageComputeRead);
 	pass.newTextureDependency(getRenderer().getHiZ().getHiZRt(), TextureUsageBit::kSampledCompute);
-	pass.newBufferDependency(m_runCtx.m_instanceRateRenderables, bufferUsage);
-	pass.newBufferDependency(m_runCtx.m_drawIndexedIndirectArgs, bufferUsage);
-	pass.newBufferDependency(m_runCtx.m_mdiDrawCounts, bufferUsage);
+	pass.newBufferDependency(m_runCtx.m_instanceRateRenderables, BufferUsageBit::kStorageComputeWrite);
+	pass.newBufferDependency(m_runCtx.m_drawIndexedIndirectArgs, BufferUsageBit::kStorageComputeWrite);
+	pass.newBufferDependency(m_runCtx.m_mdiDrawCounts, BufferUsageBit::kStorageComputeWrite);
 
 	pass.setWork([this, &ctx](RenderPassWorkContext& rpass) {
 		CommandBuffer& cmdb = *rpass.m_commandBuffer;

+ 15 - 0
AnKi/Renderer/GpuVisibility.h

@@ -21,6 +21,21 @@ public:
 	/// Populate the rendergraph.
 	void populateRenderGraph(RenderingContext& ctx);
 
+	BufferHandle getMdiDrawCountsBufferHandle() const
+	{
+		return m_runCtx.m_mdiDrawCounts;
+	}
+
+	BufferHandle getDrawIndexedIndirectArgsBufferHandle() const
+	{
+		return m_runCtx.m_drawIndexedIndirectArgs;
+	}
+
+	BufferHandle getInstanceRateRenderablesBufferHandle() const
+	{
+		return m_runCtx.m_instanceRateRenderables;
+	}
+
 private:
 	ShaderProgramResourcePtr m_prog;
 	ShaderProgramPtr m_grProg;

+ 2 - 1
AnKi/Scene/Components/ModelComponent.cpp

@@ -189,7 +189,8 @@ Error ModelComponent::update(SceneComponentUpdateInfo& info, Bool& updated)
 				PtrSize offset;
 				IndexType indexType;
 				mesh.getIndexBufferInfo(l, offset, indexCount, indexType);
-				meshLod.m_indexBufferOffset = U32(offset) / getIndexSize(indexType) + firstIndex;
+				ANKI_ASSERT((U32(offset) % getIndexSize(indexType)) == 0);
+				meshLod.m_firstIndex = U32(offset) / getIndexSize(indexType) + firstIndex;
 				meshLod.m_indexCount = indexCount;
 			}
 

+ 1 - 1
AnKi/Shaders/GpuVisibility.ankiprog

@@ -78,7 +78,7 @@ struct Uniforms
 	DrawIndexedIndirectArgs indirect;
 	indirect.m_indexCount = meshLod.m_indexCount;
 	indirect.m_instanceCount = 1;
-	indirect.m_firstIndex = meshLod.m_indexBufferOffset / 2;
+	indirect.m_firstIndex = meshLod.m_firstIndex;
 	indirect.m_vertexOffset = 0;
 	indirect.m_firstInstance = indirectIdx;
 	g_drawIndexedIndirectArgs[indirectIdx] = indirect;

+ 2 - 1
AnKi/Shaders/Include/GpuSceneTypes.h

@@ -33,11 +33,12 @@ struct GpuSceneRenderableAabb
 };
 static_assert(sizeof(GpuSceneRenderableAabb) == sizeof(Vec4) * 2);
 
+/// Represents the geometry data of a single LOD of an indexed mesh.
 struct GpuSceneMeshLod
 {
 	U32 m_vertexOffsets[(U32)VertexStreamId::kMeshRelatedCount];
 	U32 m_indexCount;
-	U32 m_indexBufferOffset; // In sizeof(indexType)
+	U32 m_firstIndex; // In sizeof(indexType)
 
 	Vec3 m_positionTranslation;
 	F32 m_positionScale;

+ 3 - 3
AnKi/Shaders/RtShadowsHit.ankiprog

@@ -35,9 +35,9 @@
 
 	const GpuSceneMeshLod mesh = g_gpuScene.Load<GpuSceneMeshLod>(g_gpuSceneRenderable.m_geometryOffset);
 
-	const U32 idx0 = g_unifiedGeom_R16_Uint[mesh.m_indexBufferOffset + PrimitiveIndex() * 3 + 0];
-	const U32 idx1 = g_unifiedGeom_R16_Uint[mesh.m_indexBufferOffset + PrimitiveIndex() * 3 + 1];
-	const U32 idx2 = g_unifiedGeom_R16_Uint[mesh.m_indexBufferOffset + PrimitiveIndex() * 3 + 2];
+	const U32 idx0 = g_unifiedGeom_R16_Uint[mesh.m_firstIndex + PrimitiveIndex() * 3 + 0];
+	const U32 idx1 = g_unifiedGeom_R16_Uint[mesh.m_firstIndex + PrimitiveIndex() * 3 + 1];
+	const U32 idx2 = g_unifiedGeom_R16_Uint[mesh.m_firstIndex + PrimitiveIndex() * 3 + 2];
 
 	const UnpackedMeshVertex vert0 = loadVertex(mesh, idx0, false);
 	const UnpackedMeshVertex vert1 = loadVertex(mesh, idx1, false);