Browse Source

Remove the need for SBT setup job

Panagiotis Christopoulos Charitos 1 year ago
parent
commit
8f07f98a0c

+ 3 - 2
AnKi/Renderer/AccelerationStructureBuilder.cpp

@@ -32,8 +32,9 @@ void AccelerationStructureBuilder::populateRenderGraph(RenderingContext& ctx)
 
 
 		getRenderer().getGpuVisibilityAccelerationStructures().pupulateRenderGraph(in, visOut);
 		getRenderer().getGpuVisibilityAccelerationStructures().pupulateRenderGraph(in, visOut);
 
 
-		m_runCtx.m_visibilityHandle = visOut.m_someBufferHandle;
+		m_runCtx.m_dependency = visOut.m_dependency;
 		m_runCtx.m_visibleRenderablesBuff = visOut.m_renderablesBuffer;
 		m_runCtx.m_visibleRenderablesBuff = visOut.m_renderablesBuffer;
+		m_runCtx.m_buildSbtIndirectArgsBuff = visOut.m_buildSbtIndirectArgsBuffer;
 	}
 	}
 
 
 	// Create the TLAS
 	// Create the TLAS
@@ -55,7 +56,7 @@ void AccelerationStructureBuilder::populateRenderGraph(RenderingContext& ctx)
 
 
 		NonGraphicsRenderPass& rpass = rgraph.newNonGraphicsRenderPass("Build TLAS");
 		NonGraphicsRenderPass& rpass = rgraph.newNonGraphicsRenderPass("Build TLAS");
 		rpass.newAccelerationStructureDependency(m_runCtx.m_tlasHandle, AccelerationStructureUsageBit::kBuild);
 		rpass.newAccelerationStructureDependency(m_runCtx.m_tlasHandle, AccelerationStructureUsageBit::kBuild);
-		rpass.newBufferDependency(visOut.m_someBufferHandle, BufferUsageBit::kAccelerationStructureBuild);
+		rpass.newBufferDependency(visOut.m_dependency, BufferUsageBit::kAccelerationStructureBuild);
 
 
 		rpass.setWork([this, scratchBuff](RenderPassWorkContext& rgraphCtx) {
 		rpass.setWork([this, scratchBuff](RenderPassWorkContext& rgraphCtx) {
 			ANKI_TRACE_SCOPED_EVENT(ASBuilder);
 			ANKI_TRACE_SCOPED_EVENT(ASBuilder);

+ 5 - 3
AnKi/Renderer/AccelerationStructureBuilder.h

@@ -32,10 +32,11 @@ public:
 		return m_runCtx.m_tlasHandle;
 		return m_runCtx.m_tlasHandle;
 	}
 	}
 
 
-	void getVisibilityInfo(BufferHandle& handle, BufferView& visibleRenderables) const
+	void getVisibilityInfo(BufferHandle& depedency, BufferView& visibleRenderables, BufferView& buildSbtIndirectArgs) const
 	{
 	{
-		handle = m_runCtx.m_visibilityHandle;
+		depedency = m_runCtx.m_dependency;
 		visibleRenderables = m_runCtx.m_visibleRenderablesBuff;
 		visibleRenderables = m_runCtx.m_visibleRenderablesBuff;
+		buildSbtIndirectArgs = m_runCtx.m_buildSbtIndirectArgsBuff;
 	}
 	}
 
 
 public:
 public:
@@ -45,8 +46,9 @@ public:
 		AccelerationStructurePtr m_tlas;
 		AccelerationStructurePtr m_tlas;
 		AccelerationStructureHandle m_tlasHandle;
 		AccelerationStructureHandle m_tlasHandle;
 
 
-		BufferHandle m_visibilityHandle;
+		BufferHandle m_dependency;
 		BufferView m_visibleRenderablesBuff;
 		BufferView m_visibleRenderablesBuff;
+		BufferView m_buildSbtIndirectArgsBuff;
 	} m_runCtx;
 	} m_runCtx;
 };
 };
 /// @}
 /// @}

+ 6 - 33
AnKi/Renderer/RtMaterialFetchDbg.cpp

@@ -16,7 +16,6 @@ namespace anki {
 
 
 Error RtMaterialFetchDbg::init()
 Error RtMaterialFetchDbg::init()
 {
 {
-	ANKI_CHECK(loadShaderProgram("ShaderBinaries/RtSbtBuild.ankiprogbin", {{"TECHNIQUE", 1}}, m_sbtProg, m_sbtBuildSetupGrProg, "SbtBuildSetup"));
 	ANKI_CHECK(loadShaderProgram("ShaderBinaries/RtSbtBuild.ankiprogbin", {{"TECHNIQUE", 1}}, m_sbtProg, m_sbtBuildGrProg, "SbtBuild"));
 	ANKI_CHECK(loadShaderProgram("ShaderBinaries/RtSbtBuild.ankiprogbin", {{"TECHNIQUE", 1}}, m_sbtProg, m_sbtBuildGrProg, "SbtBuild"));
 
 
 	// Ray gen and miss
 	// Ray gen and miss
@@ -50,30 +49,9 @@ void RtMaterialFetchDbg::populateRenderGraph(RenderingContext& ctx)
 {
 {
 	RenderGraphBuilder& rgraph = ctx.m_renderGraphDescr;
 	RenderGraphBuilder& rgraph = ctx.m_renderGraphDescr;
 
 
-	// SBT build setup
-	BufferHandle sbtBuildIndirectArgsHandle;
-	BufferView sbtBuildIndirectArgsBuffer;
-	{
-		sbtBuildIndirectArgsBuffer = GpuVisibleTransientMemoryPool::getSingleton().allocateStructuredBuffer<DispatchIndirectArgs>(1);
-		sbtBuildIndirectArgsHandle = rgraph.importBuffer(sbtBuildIndirectArgsBuffer, BufferUsageBit::kNone);
-
-		NonGraphicsRenderPass& rpass = rgraph.newNonGraphicsRenderPass("RtMaterialFetch setup build SBT");
-
-		rpass.newBufferDependency(sbtBuildIndirectArgsHandle, BufferUsageBit::kUavCompute);
-		rpass.newBufferDependency(getRenderer().getGpuSceneBufferHandle(), BufferUsageBit::kSrvCompute);
-
-		rpass.setWork([this, sbtBuildIndirectArgsBuffer](RenderPassWorkContext& rgraphCtx) {
-			ANKI_TRACE_SCOPED_EVENT(RtMaterialFetch);
-			CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
-
-			cmdb.bindShaderProgram(m_sbtBuildSetupGrProg.get());
-
-			cmdb.bindSrv(0, 0, GpuSceneArrays::RenderableBoundingVolumeRt::getSingleton().getBufferView());
-			cmdb.bindUav(0, 0, sbtBuildIndirectArgsBuffer);
-
-			cmdb.dispatchCompute(1, 1, 1);
-		});
-	}
+	BufferHandle visibilityDep;
+	BufferView visibleRenderableIndicesBuff, sbtBuildIndirectArgsBuff;
+	getRenderer().getAccelerationStructureBuilder().getVisibilityInfo(visibilityDep, visibleRenderableIndicesBuff, sbtBuildIndirectArgsBuff);
 
 
 	// SBT build
 	// SBT build
 	BufferHandle sbtHandle;
 	BufferHandle sbtHandle;
@@ -98,15 +76,10 @@ void RtMaterialFetchDbg::populateRenderGraph(RenderingContext& ctx)
 		// Create the pass
 		// Create the pass
 		NonGraphicsRenderPass& rpass = rgraph.newNonGraphicsRenderPass("RtMaterialFetch build SBT");
 		NonGraphicsRenderPass& rpass = rgraph.newNonGraphicsRenderPass("RtMaterialFetch build SBT");
 
 
-		BufferHandle visibilityHandle;
-		BufferView visibleRenderableIndicesBuff;
-		getRenderer().getAccelerationStructureBuilder().getVisibilityInfo(visibilityHandle, visibleRenderableIndicesBuff);
-
-		rpass.newBufferDependency(visibilityHandle, BufferUsageBit::kSrvCompute);
-		rpass.newBufferDependency(sbtBuildIndirectArgsHandle, BufferUsageBit::kIndirectCompute);
+		rpass.newBufferDependency(visibilityDep, BufferUsageBit::kSrvCompute | BufferUsageBit::kIndirectCompute);
 		rpass.newBufferDependency(sbtHandle, BufferUsageBit::kUavCompute);
 		rpass.newBufferDependency(sbtHandle, BufferUsageBit::kUavCompute);
 
 
-		rpass.setWork([this, sbtBuildIndirectArgsBuffer, sbtBuffer, visibleRenderableIndicesBuff](RenderPassWorkContext& rgraphCtx) {
+		rpass.setWork([this, sbtBuildIndirectArgsBuff, sbtBuffer, visibleRenderableIndicesBuff](RenderPassWorkContext& rgraphCtx) {
 			ANKI_TRACE_SCOPED_EVENT(RtShadows);
 			ANKI_TRACE_SCOPED_EVENT(RtShadows);
 			CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
 			CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
 
 
@@ -125,7 +98,7 @@ void RtMaterialFetchDbg::populateRenderGraph(RenderingContext& ctx)
 			consts.m_shaderHandleDwordSize = shaderHandleSize / 4;
 			consts.m_shaderHandleDwordSize = shaderHandleSize / 4;
 			cmdb.setFastConstants(&consts, sizeof(consts));
 			cmdb.setFastConstants(&consts, sizeof(consts));
 
 
-			cmdb.dispatchComputeIndirect(sbtBuildIndirectArgsBuffer);
+			cmdb.dispatchComputeIndirect(sbtBuildIndirectArgsBuff);
 		});
 		});
 	}
 	}
 
 

+ 0 - 1
AnKi/Renderer/RtMaterialFetchDbg.h

@@ -36,7 +36,6 @@ public:
 public:
 public:
 	ShaderProgramResourcePtr m_sbtProg;
 	ShaderProgramResourcePtr m_sbtProg;
 	ShaderProgramResourcePtr m_rtProg;
 	ShaderProgramResourcePtr m_rtProg;
-	ShaderProgramPtr m_sbtBuildSetupGrProg;
 	ShaderProgramPtr m_sbtBuildGrProg;
 	ShaderProgramPtr m_sbtBuildGrProg;
 	ShaderProgramPtr m_libraryGrProg;
 	ShaderProgramPtr m_libraryGrProg;
 
 

+ 6 - 32
AnKi/Renderer/RtReflections.cpp

@@ -19,7 +19,6 @@ namespace anki {
 
 
 Error RtReflections::init()
 Error RtReflections::init()
 {
 {
-	ANKI_CHECK(loadShaderProgram("ShaderBinaries/RtSbtBuild.ankiprogbin", {{"TECHNIQUE", 1}}, m_sbtProg, m_sbtBuildSetupGrProg, "SbtBuildSetup"));
 	ANKI_CHECK(loadShaderProgram("ShaderBinaries/RtSbtBuild.ankiprogbin", {{"TECHNIQUE", 1}}, m_sbtProg, m_sbtBuildGrProg, "SbtBuild"));
 	ANKI_CHECK(loadShaderProgram("ShaderBinaries/RtSbtBuild.ankiprogbin", {{"TECHNIQUE", 1}}, m_sbtProg, m_sbtBuildGrProg, "SbtBuild"));
 
 
 	// Ray gen and miss
 	// Ray gen and miss
@@ -102,34 +101,9 @@ void RtReflections::populateRenderGraph(RenderingContext& ctx)
 	const RenderTargetHandle transientRt2 = rgraph.newRenderTarget(m_transientRtDesc2);
 	const RenderTargetHandle transientRt2 = rgraph.newRenderTarget(m_transientRtDesc2);
 	const RenderTargetHandle hitPosAndDepthRt = rgraph.newRenderTarget(m_hitPosAndDepthRtDesc);
 	const RenderTargetHandle hitPosAndDepthRt = rgraph.newRenderTarget(m_hitPosAndDepthRtDesc);
 
 
-	BufferHandle visibilityHandle;
-	BufferView visibleRenderableIndicesBuff;
-	getRenderer().getAccelerationStructureBuilder().getVisibilityInfo(visibilityHandle, visibleRenderableIndicesBuff);
-
-	// SBT build setup
-	BufferHandle sbtBuildIndirectArgsHandle;
-	BufferView sbtBuildIndirectArgsBuffer;
-	{
-		sbtBuildIndirectArgsBuffer = GpuVisibleTransientMemoryPool::getSingleton().allocateStructuredBuffer<DispatchIndirectArgs>(1);
-		sbtBuildIndirectArgsHandle = rgraph.importBuffer(sbtBuildIndirectArgsBuffer, BufferUsageBit::kNone);
-
-		NonGraphicsRenderPass& rpass = rgraph.newNonGraphicsRenderPass("RtReflections setup build SBT");
-
-		rpass.newBufferDependency(sbtBuildIndirectArgsHandle, BufferUsageBit::kUavCompute);
-		rpass.newBufferDependency(visibilityHandle, BufferUsageBit::kSrvCompute);
-
-		rpass.setWork([this, sbtBuildIndirectArgsBuffer, visibleRenderableIndicesBuff](RenderPassWorkContext& rgraphCtx) {
-			ANKI_TRACE_SCOPED_EVENT(RtReflections);
-			CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
-
-			cmdb.bindShaderProgram(m_sbtBuildSetupGrProg.get());
-
-			cmdb.bindSrv(0, 0, visibleRenderableIndicesBuff);
-			cmdb.bindUav(0, 0, sbtBuildIndirectArgsBuffer);
-
-			cmdb.dispatchCompute(1, 1, 1);
-		});
-	}
+	BufferHandle visibilityDep;
+	BufferView visibleRenderableIndicesBuff, buildSbtIndirectArgsBuff;
+	getRenderer().getAccelerationStructureBuilder().getVisibilityInfo(visibilityDep, visibleRenderableIndicesBuff, buildSbtIndirectArgsBuff);
 
 
 	// SBT build
 	// SBT build
 	BufferHandle sbtHandle;
 	BufferHandle sbtHandle;
@@ -154,10 +128,10 @@ void RtReflections::populateRenderGraph(RenderingContext& ctx)
 		// Create the pass
 		// Create the pass
 		NonGraphicsRenderPass& rpass = rgraph.newNonGraphicsRenderPass("RtReflections build SBT");
 		NonGraphicsRenderPass& rpass = rgraph.newNonGraphicsRenderPass("RtReflections build SBT");
 
 
-		rpass.newBufferDependency(sbtBuildIndirectArgsHandle, BufferUsageBit::kIndirectCompute);
+		rpass.newBufferDependency(visibilityDep, BufferUsageBit::kIndirectCompute | BufferUsageBit::kSrvCompute);
 		rpass.newBufferDependency(sbtHandle, BufferUsageBit::kUavCompute);
 		rpass.newBufferDependency(sbtHandle, BufferUsageBit::kUavCompute);
 
 
-		rpass.setWork([this, sbtBuildIndirectArgsBuffer, sbtBuffer, visibleRenderableIndicesBuff](RenderPassWorkContext& rgraphCtx) {
+		rpass.setWork([this, buildSbtIndirectArgsBuff, sbtBuffer, visibleRenderableIndicesBuff](RenderPassWorkContext& rgraphCtx) {
 			ANKI_TRACE_SCOPED_EVENT(RtShadows);
 			ANKI_TRACE_SCOPED_EVENT(RtShadows);
 			CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
 			CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
 
 
@@ -176,7 +150,7 @@ void RtReflections::populateRenderGraph(RenderingContext& ctx)
 			consts.m_shaderHandleDwordSize = shaderHandleSize / 4;
 			consts.m_shaderHandleDwordSize = shaderHandleSize / 4;
 			cmdb.setFastConstants(&consts, sizeof(consts));
 			cmdb.setFastConstants(&consts, sizeof(consts));
 
 
-			cmdb.dispatchComputeIndirect(sbtBuildIndirectArgsBuffer);
+			cmdb.dispatchComputeIndirect(buildSbtIndirectArgsBuff);
 		});
 		});
 	}
 	}
 
 

+ 0 - 1
AnKi/Renderer/RtReflections.h

@@ -42,7 +42,6 @@ public:
 public:
 public:
 	ShaderProgramResourcePtr m_sbtProg;
 	ShaderProgramResourcePtr m_sbtProg;
 	ShaderProgramResourcePtr m_rtProg;
 	ShaderProgramResourcePtr m_rtProg;
-	ShaderProgramPtr m_sbtBuildSetupGrProg;
 	ShaderProgramPtr m_sbtBuildGrProg;
 	ShaderProgramPtr m_sbtBuildGrProg;
 	ShaderProgramPtr m_libraryGrProg;
 	ShaderProgramPtr m_libraryGrProg;
 	ShaderProgramPtr m_spatialDenoisingGrProg;
 	ShaderProgramPtr m_spatialDenoisingGrProg;

+ 2 - 2
AnKi/Renderer/RtShadows.cpp

@@ -230,8 +230,8 @@ void RtShadows::populateRenderGraph(RenderingContext& ctx)
 		NonGraphicsRenderPass& rpass = rgraph.newNonGraphicsRenderPass("RtShadows build SBT");
 		NonGraphicsRenderPass& rpass = rgraph.newNonGraphicsRenderPass("RtShadows build SBT");
 
 
 		BufferHandle visibilityHandle;
 		BufferHandle visibilityHandle;
-		BufferView visibleRenderableIndicesBuff;
-		getRenderer().getAccelerationStructureBuilder().getVisibilityInfo(visibilityHandle, visibleRenderableIndicesBuff);
+		BufferView visibleRenderableIndicesBuff, sbtBuildIndirectArgsBuff;
+		getRenderer().getAccelerationStructureBuilder().getVisibilityInfo(visibilityHandle, visibleRenderableIndicesBuff, sbtBuildIndirectArgsBuff);
 
 
 		rpass.newBufferDependency(visibilityHandle, BufferUsageBit::kSrvCompute);
 		rpass.newBufferDependency(visibilityHandle, BufferUsageBit::kSrvCompute);
 		rpass.newBufferDependency(sbtBuildIndirectArgsHandle, BufferUsageBit::kIndirectCompute);
 		rpass.newBufferDependency(sbtBuildIndirectArgsHandle, BufferUsageBit::kIndirectCompute);

+ 11 - 9
AnKi/Renderer/Utils/GpuVisibility.cpp

@@ -1083,7 +1083,7 @@ Error GpuVisibilityAccelerationStructures::init()
 	ANKI_CHECK(
 	ANKI_CHECK(
 		loadShaderProgram("ShaderBinaries/GpuVisibilityAccelerationStructures.ankiprogbin", {}, m_visibilityProg, m_visibilityGrProg, "Visibility"));
 		loadShaderProgram("ShaderBinaries/GpuVisibilityAccelerationStructures.ankiprogbin", {}, m_visibilityProg, m_visibilityGrProg, "Visibility"));
 	ANKI_CHECK(loadShaderProgram("ShaderBinaries/GpuVisibilityAccelerationStructures.ankiprogbin", {}, m_visibilityProg,
 	ANKI_CHECK(loadShaderProgram("ShaderBinaries/GpuVisibilityAccelerationStructures.ankiprogbin", {}, m_visibilityProg,
-								 m_zeroRemainingInstancesGrProg, "ZeroRemaining"));
+								 m_zeroRemainingInstancesGrProg, "ZeroRemainingInstances"));
 
 
 	BufferInitInfo inf("GpuVisibilityAccelerationStructuresCounters");
 	BufferInitInfo inf("GpuVisibilityAccelerationStructuresCounters");
 	inf.m_size = sizeof(U32) * 2;
 	inf.m_size = sizeof(U32) * 2;
@@ -1110,22 +1110,24 @@ void GpuVisibilityAccelerationStructures::pupulateRenderGraph(GpuVisibilityAccel
 	const U32 aabbCount = GpuSceneArrays::RenderableBoundingVolumeRt::getSingleton().getElementCount();
 	const U32 aabbCount = GpuSceneArrays::RenderableBoundingVolumeRt::getSingleton().getElementCount();
 
 
 	out.m_instancesBuffer = allocateStructuredBuffer<AccelerationStructureInstance>(aabbCount);
 	out.m_instancesBuffer = allocateStructuredBuffer<AccelerationStructureInstance>(aabbCount);
-	out.m_someBufferHandle = rgraph.importBuffer(out.m_instancesBuffer, BufferUsageBit::kUavCompute);
+	out.m_dependency = rgraph.importBuffer(out.m_instancesBuffer, BufferUsageBit::kUavCompute);
 
 
 	out.m_renderablesBuffer = allocateStructuredBuffer<LodAndRenderableIndex>(aabbCount + 1);
 	out.m_renderablesBuffer = allocateStructuredBuffer<LodAndRenderableIndex>(aabbCount + 1);
 
 
-	const BufferView zeroInstancesDispatchArgsBuff = allocateStructuredBuffer<DispatchIndirectArgs>(1);
+	const BufferView zeroInstancesAndSbtBuildDispatchArgsBuff = allocateStructuredBuffer<DispatchIndirectArgs>(2);
+
+	out.m_buildSbtIndirectArgsBuffer = BufferView(zeroInstancesAndSbtBuildDispatchArgsBuff).incrementOffset(sizeof(DispatchIndirectArgs));
 
 
 	// Create vis pass
 	// Create vis pass
 	{
 	{
 		NonGraphicsRenderPass& pass = rgraph.newNonGraphicsRenderPass(generateTempPassName("Accel vis: %s", in.m_passesName.cstr()));
 		NonGraphicsRenderPass& pass = rgraph.newNonGraphicsRenderPass(generateTempPassName("Accel vis: %s", in.m_passesName.cstr()));
 
 
 		pass.newBufferDependency(getRenderer().getGpuSceneBufferHandle(), BufferUsageBit::kSrvCompute);
 		pass.newBufferDependency(getRenderer().getGpuSceneBufferHandle(), BufferUsageBit::kSrvCompute);
-		pass.newBufferDependency(out.m_someBufferHandle, BufferUsageBit::kUavCompute);
+		pass.newBufferDependency(out.m_dependency, BufferUsageBit::kUavCompute);
 
 
 		pass.setWork([this, viewProjMat = in.m_viewProjectionMatrix, lodDistances = in.m_lodDistances, pointOfTest = in.m_pointOfTest,
 		pass.setWork([this, viewProjMat = in.m_viewProjectionMatrix, lodDistances = in.m_lodDistances, pointOfTest = in.m_pointOfTest,
 					  testRadius = in.m_testRadius, instancesBuff = out.m_instancesBuffer, visRenderablesBuff = out.m_renderablesBuffer,
 					  testRadius = in.m_testRadius, instancesBuff = out.m_instancesBuffer, visRenderablesBuff = out.m_renderablesBuffer,
-					  zeroInstancesDispatchArgsBuff](RenderPassWorkContext& rgraph) {
+					  zeroInstancesAndSbtBuildDispatchArgsBuff](RenderPassWorkContext& rgraph) {
 			CommandBuffer& cmdb = *rgraph.m_commandBuffer;
 			CommandBuffer& cmdb = *rgraph.m_commandBuffer;
 
 
 			cmdb.bindShaderProgram(m_visibilityGrProg.get());
 			cmdb.bindShaderProgram(m_visibilityGrProg.get());
@@ -1156,7 +1158,7 @@ void GpuVisibilityAccelerationStructures::pupulateRenderGraph(GpuVisibilityAccel
 			cmdb.bindUav(0, 0, instancesBuff);
 			cmdb.bindUav(0, 0, instancesBuff);
 			cmdb.bindUav(1, 0, visRenderablesBuff);
 			cmdb.bindUav(1, 0, visRenderablesBuff);
 			cmdb.bindUav(2, 0, BufferView(m_counterBuffer.get(), 0, sizeof(U32) * 2));
 			cmdb.bindUav(2, 0, BufferView(m_counterBuffer.get(), 0, sizeof(U32) * 2));
-			cmdb.bindUav(3, 0, zeroInstancesDispatchArgsBuff);
+			cmdb.bindUav(3, 0, zeroInstancesAndSbtBuildDispatchArgsBuff);
 
 
 			const U32 aabbCount = GpuSceneArrays::RenderableBoundingVolumeRt::getSingleton().getElementCount();
 			const U32 aabbCount = GpuSceneArrays::RenderableBoundingVolumeRt::getSingleton().getElementCount();
 			dispatchPPCompute(cmdb, 64, 1, aabbCount, 1);
 			dispatchPPCompute(cmdb, 64, 1, aabbCount, 1);
@@ -1168,9 +1170,9 @@ void GpuVisibilityAccelerationStructures::pupulateRenderGraph(GpuVisibilityAccel
 		NonGraphicsRenderPass& pass =
 		NonGraphicsRenderPass& pass =
 			rgraph.newNonGraphicsRenderPass(generateTempPassName("Accel vis zero remaining instances: %s", in.m_passesName.cstr()));
 			rgraph.newNonGraphicsRenderPass(generateTempPassName("Accel vis zero remaining instances: %s", in.m_passesName.cstr()));
 
 
-		pass.newBufferDependency(out.m_someBufferHandle, BufferUsageBit::kUavCompute | BufferUsageBit::kIndirectCompute);
+		pass.newBufferDependency(out.m_dependency, BufferUsageBit::kUavCompute | BufferUsageBit::kIndirectCompute);
 
 
-		pass.setWork([this, zeroInstancesDispatchArgsBuff, instancesBuff = out.m_instancesBuffer,
+		pass.setWork([this, zeroInstancesAndSbtBuildDispatchArgsBuff, instancesBuff = out.m_instancesBuffer,
 					  visRenderablesBuff = out.m_renderablesBuffer](RenderPassWorkContext& rgraph) {
 					  visRenderablesBuff = out.m_renderablesBuffer](RenderPassWorkContext& rgraph) {
 			CommandBuffer& cmdb = *rgraph.m_commandBuffer;
 			CommandBuffer& cmdb = *rgraph.m_commandBuffer;
 
 
@@ -1179,7 +1181,7 @@ void GpuVisibilityAccelerationStructures::pupulateRenderGraph(GpuVisibilityAccel
 			cmdb.bindSrv(0, 0, visRenderablesBuff);
 			cmdb.bindSrv(0, 0, visRenderablesBuff);
 			cmdb.bindUav(0, 0, instancesBuff);
 			cmdb.bindUav(0, 0, instancesBuff);
 
 
-			cmdb.dispatchComputeIndirect(zeroInstancesDispatchArgsBuff);
+			cmdb.dispatchComputeIndirect(BufferView(zeroInstancesAndSbtBuildDispatchArgsBuff).setRange(sizeof(DispatchIndirectArgs)));
 		});
 		});
 	}
 	}
 }
 }

+ 3 - 1
AnKi/Renderer/Utils/GpuVisibility.h

@@ -288,10 +288,12 @@ public:
 class GpuVisibilityAccelerationStructuresOutput
 class GpuVisibilityAccelerationStructuresOutput
 {
 {
 public:
 public:
-	BufferHandle m_someBufferHandle; ///< Some handle to track dependencies. No need to track every buffer.
+	BufferHandle m_dependency; ///< Some handle to track dependencies. No need to track every buffer.
 
 
 	BufferView m_instancesBuffer; ///< Points to AccelerationStructureBuildRangeInfo::m_primitiveCount number of AccelerationStructureInstance.
 	BufferView m_instancesBuffer; ///< Points to AccelerationStructureBuildRangeInfo::m_primitiveCount number of AccelerationStructureInstance.
 	BufferView m_renderablesBuffer; ///< AccelerationStructureBuildRangeInfo::m_primitiveCount + 1 number of indices to renderables.
 	BufferView m_renderablesBuffer; ///< AccelerationStructureBuildRangeInfo::m_primitiveCount + 1 number of indices to renderables.
+
+	BufferView m_buildSbtIndirectArgsBuffer; ///< The DispatchIndirectArgs for the SBT dispatches.
 };
 };
 
 
 /// Performs visibility to gather bottom-level acceleration structures in a buffer that can be used to build a TLAS.
 /// Performs visibility to gather bottom-level acceleration structures in a buffer that can be used to build a TLAS.

+ 11 - 6
AnKi/Shaders/GpuVisibilityAccelerationStructures.ankiprog

@@ -4,7 +4,7 @@
 // http://www.anki3d.org/LICENSE
 // http://www.anki3d.org/LICENSE
 
 
 #pragma anki technique Visibility comp
 #pragma anki technique Visibility comp
-#pragma anki technique ZeroRemaining comp
+#pragma anki technique ZeroRemainingInstances comp
 
 
 #include <AnKi/Shaders/Common.hlsl>
 #include <AnKi/Shaders/Common.hlsl>
 #include <AnKi/Shaders/Include/GpuSceneTypes.h>
 #include <AnKi/Shaders/Include/GpuSceneTypes.h>
@@ -15,7 +15,7 @@
 // ===========================================================================
 // ===========================================================================
 // Visibility                                                                =
 // Visibility                                                                =
 // ===========================================================================
 // ===========================================================================
-#if ANKI_COMPUTE_SHADER && ANKI_TECHNIQUE_Visibility
+#if ANKI_COMPUTE_SHADER && NOT_ZERO(ANKI_TECHNIQUE_Visibility)
 
 
 // Buffers that point to the GPU scene
 // Buffers that point to the GPU scene
 StructuredBuffer<GpuSceneRenderableBoundingVolume> g_renderableBoundingVolumes : register(t0);
 StructuredBuffer<GpuSceneRenderableBoundingVolume> g_renderableBoundingVolumes : register(t0);
@@ -28,6 +28,7 @@ RWStructuredBuffer<LodAndRenderableIndex> g_visibleRenderables : register(u1); /
 
 
 globallycoherent RWStructuredBuffer<U32> g_counterBuffer : register(u2); // 2 counters per dispatch
 globallycoherent RWStructuredBuffer<U32> g_counterBuffer : register(u2); // 2 counters per dispatch
 
 
+// Contains 2 elements. 1st is the args of the ZeroRemainingInstances
 RWStructuredBuffer<DispatchIndirectArgs> g_nextDispatchIndirectArgs : register(u3);
 RWStructuredBuffer<DispatchIndirectArgs> g_nextDispatchIndirectArgs : register(u3);
 
 
 ANKI_FAST_CONSTANTS(GpuVisibilityAccelerationStructuresConstants, g_consts)
 ANKI_FAST_CONSTANTS(GpuVisibilityAccelerationStructuresConstants, g_consts)
@@ -141,7 +142,7 @@ ANKI_FAST_CONSTANTS(GpuVisibilityAccelerationStructuresConstants, g_consts)
 				SBUFF(g_counterBuffer, 0) = 0;
 				SBUFF(g_counterBuffer, 0) = 0;
 				SBUFF(g_counterBuffer, 1) = 0;
 				SBUFF(g_counterBuffer, 1) = 0;
 
 
-				// Update indirect args of some next job
+				// Update indirect args of the ZeroRemainingInstances
 				const U32 remaining = maxVisibleInstances - visible;
 				const U32 remaining = maxVisibleInstances - visible;
 
 
 				DispatchIndirectArgs args;
 				DispatchIndirectArgs args;
@@ -149,6 +150,10 @@ ANKI_FAST_CONSTANTS(GpuVisibilityAccelerationStructuresConstants, g_consts)
 				args.m_threadGroupCountY = 1;
 				args.m_threadGroupCountY = 1;
 				args.m_threadGroupCountZ = 1;
 				args.m_threadGroupCountZ = 1;
 				SBUFF(g_nextDispatchIndirectArgs, 0) = args;
 				SBUFF(g_nextDispatchIndirectArgs, 0) = args;
+
+				// Update the args for the various SBT build dispatches
+				args.m_threadGroupCountX = (visible + NUMTHREADS - 1) / NUMTHREADS;
+				SBUFF(g_nextDispatchIndirectArgs, 1) = args;
 			}
 			}
 		}
 		}
 	}
 	}
@@ -156,9 +161,9 @@ ANKI_FAST_CONSTANTS(GpuVisibilityAccelerationStructuresConstants, g_consts)
 #endif // ANKI_COMPUTE_SHADER && ANKI_TECHNIQUE_Visibility
 #endif // ANKI_COMPUTE_SHADER && ANKI_TECHNIQUE_Visibility
 
 
 // ===========================================================================
 // ===========================================================================
-// ZeroRemaining                                                             =
+// ZeroRemainingInstances                                                    =
 // ===========================================================================
 // ===========================================================================
-#if ANKI_COMPUTE_SHADER && ANKI_TECHNIQUE_ZeroRemaining
+#if ANKI_COMPUTE_SHADER && NOT_ZERO(ANKI_TECHNIQUE_ZeroRemainingInstances)
 
 
 StructuredBuffer<U32> g_visibleRenderableIndices : register(t0); // 1st element is the count
 StructuredBuffer<U32> g_visibleRenderableIndices : register(t0); // 1st element is the count
 RWStructuredBuffer<AccelerationStructureInstance> g_instances : register(u0);
 RWStructuredBuffer<AccelerationStructureInstance> g_instances : register(u0);
@@ -179,4 +184,4 @@ RWStructuredBuffer<AccelerationStructureInstance> g_instances : register(u0);
 		SBUFF(g_instances, visibleInstances + svDispatchThreadId) = (AccelerationStructureInstance)0;
 		SBUFF(g_instances, visibleInstances + svDispatchThreadId) = (AccelerationStructureInstance)0;
 	}
 	}
 }
 }
-#endif // ANKI_COMPUTE_SHADER && ANKI_TECHNIQUE_ZeroRemaining
+#endif

+ 1 - 24
AnKi/Shaders/RtSbtBuild.ankiprog

@@ -5,39 +5,16 @@
 
 
 #pragma anki mutator TECHNIQUE 0 1 // Shdows or MaterialFetch
 #pragma anki mutator TECHNIQUE 0 1 // Shdows or MaterialFetch
 
 
-#pragma anki technique SbtBuildSetup comp
 #pragma anki technique SbtBuild comp
 #pragma anki technique SbtBuild comp
 
 
 #include <AnKi/Shaders/Include/GpuSceneTypes.h>
 #include <AnKi/Shaders/Include/GpuSceneTypes.h>
 #include <AnKi/Shaders/Include/MiscRendererTypes.h>
 #include <AnKi/Shaders/Include/MiscRendererTypes.h>
 #include <AnKi/Shaders/Common.hlsl>
 #include <AnKi/Shaders/Common.hlsl>
 
 
-// ===========================================================================
-// SbtBuildSetup                                                             =
-// ===========================================================================
-#if ANKI_COMPUTE_SHADER && ANKI_TECHNIQUE_SbtBuildSetup
-StructuredBuffer<LodAndRenderableIndex> g_visibleRenderables : register(t0); // 1st element is the count
-RWStructuredBuffer<DispatchIndirectArgs> g_args : register(u0);
-
-#	define NUMTHREADS 64
-
-[numthreads(1, 1, 1)] void main()
-{
-	const U32 renderableCount = SBUFF(g_visibleRenderables, 0).m_lod_2bit_renderableIndex_30bit;
-
-	DispatchIndirectArgs args;
-	args.m_threadGroupCountX = (renderableCount + NUMTHREADS - 1) / NUMTHREADS;
-	args.m_threadGroupCountY = 1;
-	args.m_threadGroupCountZ = 1;
-
-	g_args[0] = args;
-}
-#endif // ANKI_COMPUTE_SHADER && ANKI_TECHNIQUE_SbtBuildSetup
-
 // ===========================================================================
 // ===========================================================================
 // SbtBuild                                                                  =
 // SbtBuild                                                                  =
 // ===========================================================================
 // ===========================================================================
-#if ANKI_COMPUTE_SHADER && ANKI_TECHNIQUE_SbtBuild
+#if ANKI_COMPUTE_SHADER && NOT_ZERO(ANKI_TECHNIQUE_SbtBuild)
 
 
 StructuredBuffer<GpuSceneRenderable> g_renderables : register(t0);
 StructuredBuffer<GpuSceneRenderable> g_renderables : register(t0);