浏览代码

Make GPU visibility memory persistent

Panagiotis Christopoulos Charitos 1 年之前
父节点
当前提交
ff2a300abd

+ 6 - 2
AnKi/Gr/RenderGraph.cpp

@@ -1068,7 +1068,8 @@ void RenderGraph::setBatchBarriers(const RenderGraphDescription& descr)
 				const BufferUsageBit depUsage = dep.m_buffer.m_usage;
 				const BufferUsageBit depUsage = dep.m_buffer.m_usage;
 				BufferUsageBit& crntUsage = ctx.m_buffers[buffIdx].m_usage;
 				BufferUsageBit& crntUsage = ctx.m_buffers[buffIdx].m_usage;
 
 
-				if(depUsage == crntUsage)
+				const Bool skipBarrier = crntUsage == depUsage && !(crntUsage & BufferUsageBit::kAllWrite);
+				if(skipBarrier)
 				{
 				{
 					continue;
 					continue;
 				}
 				}
@@ -1112,7 +1113,8 @@ void RenderGraph::setBatchBarriers(const RenderGraphDescription& descr)
 				const AccelerationStructureUsageBit depUsage = dep.m_as.m_usage;
 				const AccelerationStructureUsageBit depUsage = dep.m_as.m_usage;
 				AccelerationStructureUsageBit& crntUsage = ctx.m_as[asIdx].m_usage;
 				AccelerationStructureUsageBit& crntUsage = ctx.m_as[asIdx].m_usage;
 
 
-				if(depUsage == crntUsage)
+				const Bool skipBarrier = crntUsage == depUsage && !(crntUsage & AccelerationStructureUsageBit::kAllWrite);
+				if(skipBarrier)
 				{
 				{
 					continue;
 					continue;
 				}
 				}
@@ -1148,6 +1150,8 @@ void RenderGraph::setBatchBarriers(const RenderGraphDescription& descr)
 			}
 			}
 		} // For all passes
 		} // For all passes
 
 
+		ANKI_ASSERT(batch.m_bufferBarriersBefore.getSize() || batch.m_textureBarriersBefore.getSize() || batch.m_asBarriersBefore.getSize());
+
 #if ANKI_DBG_RENDER_GRAPH
 #if ANKI_DBG_RENDER_GRAPH
 		// Sort the barriers to ease the dumped graph
 		// Sort the barriers to ease the dumped graph
 		std::sort(batch.m_textureBarriersBefore.getBegin(), batch.m_textureBarriersBefore.getEnd(),
 		std::sort(batch.m_textureBarriersBefore.getBegin(), batch.m_textureBarriersBefore.getEnd(),

+ 5 - 0
AnKi/Gr/Vulkan/BufferImpl.cpp

@@ -258,6 +258,11 @@ VkPipelineStageFlags BufferImpl::computePplineStage(BufferUsageBit usage)
 	{
 	{
 		stageMask |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT
 		stageMask |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT
 					 | VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT | VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT;
 					 | VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT | VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT;
+
+		if(getGrManagerImpl().getDeviceCapabilities().m_meshShaders)
+		{
+			stageMask |= VK_PIPELINE_STAGE_MESH_SHADER_BIT_EXT | VK_PIPELINE_STAGE_TASK_SHADER_BIT_EXT;
+		}
 	}
 	}
 
 
 	if(!!(usage & BufferUsageBit::kAllFragment))
 	if(!!(usage & BufferUsageBit::kAllFragment))

+ 17 - 2
AnKi/Renderer/Dbg.cpp

@@ -196,7 +196,11 @@ void Dbg::run(RenderPassWorkContext& rgraphCtx, const RenderingContext& ctx)
 		cmdb.bindIndexBuffer(m_cubeIndicesBuffer.get(), 0, IndexType::kU16);
 		cmdb.bindIndexBuffer(m_cubeIndicesBuffer.get(), 0, IndexType::kU16);
 
 
 		cmdb.bindUavBuffer(0, 2, GpuSceneArrays::RenderableBoundingVolumeGBuffer::getSingleton().getBufferOffsetRange());
 		cmdb.bindUavBuffer(0, 2, GpuSceneArrays::RenderableBoundingVolumeGBuffer::getSingleton().getBufferOffsetRange());
-		cmdb.bindUavBuffer(0, 3, getRenderer().getGBuffer().getVisibleAabbsBuffer());
+
+		BufferOffsetRange indicesBuff;
+		BufferHandle dep;
+		getRenderer().getGBuffer().getVisibleAabbsBuffer(indicesBuff, dep);
+		cmdb.bindUavBuffer(0, 3, indicesBuff);
 
 
 		cmdb.drawIndexed(PrimitiveTopology::kLines, 12 * 2, allAabbCount);
 		cmdb.drawIndexed(PrimitiveTopology::kLines, 12 * 2, allAabbCount);
 	}
 	}
@@ -206,7 +210,11 @@ void Dbg::run(RenderPassWorkContext& rgraphCtx, const RenderingContext& ctx)
 		const U32 allAabbCount = GpuSceneArrays::RenderableBoundingVolumeForward::getSingleton().getElementCount();
 		const U32 allAabbCount = GpuSceneArrays::RenderableBoundingVolumeForward::getSingleton().getElementCount();
 
 
 		cmdb.bindUavBuffer(0, 2, GpuSceneArrays::RenderableBoundingVolumeForward::getSingleton().getBufferOffsetRange());
 		cmdb.bindUavBuffer(0, 2, GpuSceneArrays::RenderableBoundingVolumeForward::getSingleton().getBufferOffsetRange());
-		cmdb.bindUavBuffer(0, 3, getRenderer().getForwardShading().getVisibleAabbsBuffer());
+
+		BufferOffsetRange indicesBuff;
+		BufferHandle dep;
+		getRenderer().getForwardShading().getVisibleAabbsBuffer(indicesBuff, dep);
+		cmdb.bindUavBuffer(0, 3, indicesBuff);
 
 
 		cmdb.drawIndexed(PrimitiveTopology::kLines, 12 * 2, allAabbCount);
 		cmdb.drawIndexed(PrimitiveTopology::kLines, 12 * 2, allAabbCount);
 	}
 	}
@@ -249,6 +257,13 @@ void Dbg::populateRenderGraph(RenderingContext& ctx)
 
 
 	pass.newTextureDependency(m_runCtx.m_rt, TextureUsageBit::kFramebufferWrite);
 	pass.newTextureDependency(m_runCtx.m_rt, TextureUsageBit::kFramebufferWrite);
 	pass.newTextureDependency(getRenderer().getGBuffer().getDepthRt(), TextureUsageBit::kSampledFragment | TextureUsageBit::kFramebufferRead);
 	pass.newTextureDependency(getRenderer().getGBuffer().getDepthRt(), TextureUsageBit::kSampledFragment | TextureUsageBit::kFramebufferRead);
+
+	BufferOffsetRange indicesBuff;
+	BufferHandle dep;
+	getRenderer().getGBuffer().getVisibleAabbsBuffer(indicesBuff, dep);
+	pass.newBufferDependency(dep, BufferUsageBit::kUavGeometryRead);
+	getRenderer().getForwardShading().getVisibleAabbsBuffer(indicesBuff, dep);
+	pass.newBufferDependency(dep, BufferUsageBit::kUavGeometryRead);
 }
 }
 
 
 } // end namespace anki
 } // end namespace anki

+ 4 - 3
AnKi/Renderer/ForwardShading.h

@@ -33,10 +33,11 @@ public:
 	void run(const RenderingContext& ctx, RenderPassWorkContext& rgraphCtx);
 	void run(const RenderingContext& ctx, RenderPassWorkContext& rgraphCtx);
 
 
 	/// Returns a buffer with indices of the visible AABBs. Used in debug drawing.
 	/// Returns a buffer with indices of the visible AABBs. Used in debug drawing.
-	const BufferOffsetRange& getVisibleAabbsBuffer() const
+	void getVisibleAabbsBuffer(BufferOffsetRange& visibleAaabbIndicesBuffer, BufferHandle& dep) const
 	{
 	{
-		ANKI_ASSERT(m_runCtx.m_visOut.m_visibleAaabbIndicesBuffer.m_buffer != nullptr);
-		return m_runCtx.m_visOut.m_visibleAaabbIndicesBuffer;
+		visibleAaabbIndicesBuffer = m_runCtx.m_visOut.m_visibleAaabbIndicesBuffer;
+		dep = m_runCtx.m_visOut.m_someBufferHandle;
+		ANKI_ASSERT(visibleAaabbIndicesBuffer.m_buffer != nullptr && dep.isValid());
 	}
 	}
 
 
 private:
 private:

+ 2 - 1
AnKi/Renderer/GBuffer.cpp

@@ -186,7 +186,8 @@ void GBuffer::populateRenderGraph(RenderingContext& ctx)
 
 
 		getRenderer().getGpuVisibility().populateRenderGraph(visIn, visOut);
 		getRenderer().getGpuVisibility().populateRenderGraph(visIn, visOut);
 
 
-		m_runCtx.m_visibleAabbsBuffer = visOut.m_visibleAaabbIndicesBuffer;
+		m_runCtx.m_visibleAaabbIndicesBuffer = visOut.m_visibleAaabbIndicesBuffer;
+		m_runCtx.m_visibleAaabbIndicesBufferDepedency = visOut.m_someBufferHandle;
 	}
 	}
 
 
 	const Bool enableVrs = GrManager::getSingleton().getDeviceCapabilities().m_vrs && g_vrsCVar.get() && g_gbufferVrsCVar.get();
 	const Bool enableVrs = GrManager::getSingleton().getDeviceCapabilities().m_vrs && g_vrsCVar.get() && g_gbufferVrsCVar.get();

+ 6 - 4
AnKi/Renderer/GBuffer.h

@@ -60,10 +60,11 @@ public:
 							  [[maybe_unused]] ShaderProgramPtr& optionalShaderProgram) const override;
 							  [[maybe_unused]] ShaderProgramPtr& optionalShaderProgram) const override;
 
 
 	/// Returns a buffer with indices of the visible AABBs. Used in debug drawing.
 	/// Returns a buffer with indices of the visible AABBs. Used in debug drawing.
-	const BufferOffsetRange& getVisibleAabbsBuffer() const
+	void getVisibleAabbsBuffer(BufferOffsetRange& visibleAaabbIndicesBuffer, BufferHandle& dep) const
 	{
 	{
-		ANKI_ASSERT(m_runCtx.m_visibleAabbsBuffer.m_buffer != nullptr);
-		return m_runCtx.m_visibleAabbsBuffer;
+		visibleAaabbIndicesBuffer = m_runCtx.m_visibleAaabbIndicesBuffer;
+		dep = m_runCtx.m_visibleAaabbIndicesBufferDepedency;
+		ANKI_ASSERT(visibleAaabbIndicesBuffer.m_buffer != nullptr && dep.isValid());
 	}
 	}
 
 
 private:
 private:
@@ -83,7 +84,8 @@ private:
 		RenderTargetHandle m_prevFrameDepthRt;
 		RenderTargetHandle m_prevFrameDepthRt;
 		RenderTargetHandle m_hzbRt;
 		RenderTargetHandle m_hzbRt;
 
 
-		BufferOffsetRange m_visibleAabbsBuffer; ///< Optional
+		BufferOffsetRange m_visibleAaabbIndicesBuffer; ///< Optional
+		BufferHandle m_visibleAaabbIndicesBufferDepedency;
 	} m_runCtx;
 	} m_runCtx;
 
 
 	Error initInternal();
 	Error initInternal();

+ 1 - 1
AnKi/Renderer/Renderer.cpp

@@ -301,7 +301,6 @@ Error Renderer::populateRenderGraph(RenderingContext& ctx)
 	{
 	{
 		m_accelerationStructureBuilder->populateRenderGraph(ctx);
 		m_accelerationStructureBuilder->populateRenderGraph(ctx);
 	}
 	}
-	m_forwardShading->populateRenderGraph(ctx); // This may feel out of place but it's only visibility
 	m_gbuffer->populateRenderGraph(ctx);
 	m_gbuffer->populateRenderGraph(ctx);
 	m_shadowMapping->populateRenderGraph(ctx);
 	m_shadowMapping->populateRenderGraph(ctx);
 	m_clusterBinning2->populateRenderGraph(ctx);
 	m_clusterBinning2->populateRenderGraph(ctx);
@@ -320,6 +319,7 @@ Error Renderer::populateRenderGraph(RenderingContext& ctx)
 	m_volumetricFog->populateRenderGraph(ctx);
 	m_volumetricFog->populateRenderGraph(ctx);
 	m_lensFlare->populateRenderGraph(ctx);
 	m_lensFlare->populateRenderGraph(ctx);
 	m_ssao->populateRenderGraph(ctx);
 	m_ssao->populateRenderGraph(ctx);
+	m_forwardShading->populateRenderGraph(ctx); // This may feel out of place but it's only visibility. Keep it just before light shading
 	m_lightShading->populateRenderGraph(ctx);
 	m_lightShading->populateRenderGraph(ctx);
 	if(!getScale().getUsingGrUpscaler())
 	if(!getScale().getUsingGrUpscaler())
 	{
 	{

+ 126 - 176
AnKi/Renderer/ShadowMapping.cpp

@@ -63,20 +63,6 @@ static LightHash decodeTileHash(U64 hash)
 	return c;
 	return c;
 }
 }
 
 
-class ShadowMapping::ViewportWorkItem
-{
-public:
-	UVec4 m_viewport;
-	Mat4 m_viewProjMat;
-	Mat3x4 m_viewMat;
-
-	GpuVisibilityOutput m_visOut;
-
-	BufferOffsetRange m_clearTileIndirectArgs;
-
-	RenderTargetHandle m_hzbRt;
-};
-
 Error ShadowMapping::init()
 Error ShadowMapping::init()
 {
 {
 	const Error err = initInternal();
 	const Error err = initInternal();
@@ -180,68 +166,6 @@ void ShadowMapping::populateRenderGraph(RenderingContext& ctx)
 
 
 	// First process the lights
 	// First process the lights
 	processLights(ctx);
 	processLights(ctx);
-
-	// Build the render graph
-	U32 passIdx = 0;
-	for(const ViewportWorkItem& work : m_runCtx.m_workItems)
-	{
-		GraphicsRenderPassDescription& pass = rgraph.newGraphicsRenderPass(computeTempPassName("Shadowmapping", passIdx));
-
-		const Bool loadFb = (work.m_clearTileIndirectArgs.m_buffer != nullptr);
-
-		pass.setFramebufferInfo((loadFb) ? m_loadFbDescr : m_clearFbDescr, {}, m_runCtx.m_rt, {}, work.m_viewport[0], work.m_viewport[1],
-								work.m_viewport[2], work.m_viewport[3]);
-
-		pass.newBufferDependency(work.m_visOut.m_someBufferHandle, BufferUsageBit::kIndirectDraw);
-		pass.newTextureDependency(m_runCtx.m_rt, TextureUsageBit::kFramebufferWrite, TextureSubresourceInfo(DepthStencilAspectBit::kDepth));
-		pass.newBufferDependency(getRenderer().getGpuSceneBufferHandle(), BufferUsageBit::kUavGeometryRead | BufferUsageBit::kUavFragmentRead);
-
-		pass.setWork(1, [this, passIdx](RenderPassWorkContext& rgraphCtx) {
-			ANKI_TRACE_SCOPED_EVENT(ShadowMapping);
-
-			CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
-
-			const ViewportWorkItem& work = m_runCtx.m_workItems[passIdx];
-
-			cmdb.setViewport(work.m_viewport[0], work.m_viewport[1], work.m_viewport[2], work.m_viewport[3]);
-
-			if(work.m_clearTileIndirectArgs.m_buffer)
-			{
-				// Clear the depth buffer using a quad because it needs to be conditional
-
-				cmdb.bindShaderProgram(m_clearDepthGrProg.get());
-				cmdb.setDepthCompareOperation(CompareOperation::kAlways);
-
-				cmdb.drawIndirect(PrimitiveTopology::kTriangles, 1, work.m_clearTileIndirectArgs.m_offset, work.m_clearTileIndirectArgs.m_buffer);
-
-				cmdb.setDepthCompareOperation(CompareOperation::kLess);
-			}
-
-			// Set state
-			cmdb.setPolygonOffset(kShadowsPolygonOffsetFactor, kShadowsPolygonOffsetUnits);
-
-			RenderableDrawerArguments args;
-			args.m_renderingTechinuqe = RenderingTechnique::kDepth;
-			args.m_viewMatrix = work.m_viewMat;
-			args.m_cameraTransform = work.m_viewMat.getInverseTransformation();
-			args.m_viewProjectionMatrix = work.m_viewProjMat;
-			args.m_previousViewProjectionMatrix = Mat4::getIdentity(); // Don't care
-			args.m_sampler = getRenderer().getSamplers().m_trilinearRepeat.get();
-			args.m_viewport = UVec4(work.m_viewport[0], work.m_viewport[1], work.m_viewport[2], work.m_viewport[3]);
-			args.fillMdi(work.m_visOut);
-
-			TextureViewPtr hzbView;
-			if(work.m_hzbRt.isValid())
-			{
-				hzbView = rgraphCtx.createTextureView(work.m_hzbRt);
-				args.m_hzbTexture = hzbView.get();
-			}
-
-			getRenderer().getSceneDrawer().drawMdi(args, cmdb);
-		});
-
-		++passIdx;
-	}
 }
 }
 
 
 void ShadowMapping::chooseDetail(const Vec3& cameraOrigin, const LightComponent& lightc, Vec2 lodDistances, U32& tileAllocatorHierarchy) const
 void ShadowMapping::chooseDetail(const Vec3& cameraOrigin, const LightComponent& lightc, Vec2 lodDistances, U32& tileAllocatorHierarchy) const
@@ -352,7 +276,6 @@ void ShadowMapping::processLights(RenderingContext& ctx)
 {
 {
 	// Vars
 	// Vars
 	const Vec3 cameraOrigin = ctx.m_matrices.m_cameraTransform.getTranslationPart().xyz();
 	const Vec3 cameraOrigin = ctx.m_matrices.m_cameraTransform.getTranslationPart().xyz();
-	DynamicArray<ViewportWorkItem, MemoryPoolPtrWrapper<StackMemoryPool>> workItems(&getRenderer().getFrameMemoryPool());
 	RenderGraphDescription& rgraph = ctx.m_renderGraphDescr;
 	RenderGraphDescription& rgraph = ctx.m_renderGraphDescr;
 	const CameraComponent& mainCam = SceneGraph::getSingleton().getActiveCameraNode().getFirstComponentOfType<CameraComponent>();
 	const CameraComponent& mainCam = SceneGraph::getSingleton().getActiveCameraNode().getFirstComponentOfType<CameraComponent>();
 
 
@@ -413,31 +336,27 @@ void ShadowMapping::processLights(RenderingContext& ctx)
 
 
 		getRenderer().getHzbGenerator().populateRenderGraphDirectionalLight(hzbGenIn, rgraph);
 		getRenderer().getHzbGenerator().populateRenderGraphDirectionalLight(hzbGenIn, rgraph);
 
 
-		// Vis testing
+		// Create passes per cascade
 		for(U cascade = 0; cascade < cascadeCount; ++cascade)
 		for(U cascade = 0; cascade < cascadeCount; ++cascade)
 		{
 		{
-			ViewportWorkItem& work = *workItems.emplaceBack();
-			work.m_viewProjMat = cascadeViewProjMats[cascade];
-			work.m_viewMat = cascadeViewMats[cascade];
-			work.m_viewport = atlasViewports[cascade];
-			if(GrManager::getSingleton().getDeviceCapabilities().m_meshShaders)
-			{
-				work.m_hzbRt = hzbGenIn.m_cascades[cascade].m_hzbRt;
-			}
-
 			// Vis testing
 			// Vis testing
 			const Array<F32, kMaxLodCount - 1> lodDistances = {g_lod0MaxDistanceCVar.get(), g_lod1MaxDistanceCVar.get()};
 			const Array<F32, kMaxLodCount - 1> lodDistances = {g_lod0MaxDistanceCVar.get(), g_lod1MaxDistanceCVar.get()};
 			FrustumGpuVisibilityInput visIn;
 			FrustumGpuVisibilityInput visIn;
-			visIn.m_passesName = "Shadows dir light";
+			visIn.m_passesName = computeTempPassName("Shadows: Dir light cascade", cascade);
 			visIn.m_technique = RenderingTechnique::kDepth;
 			visIn.m_technique = RenderingTechnique::kDepth;
 			visIn.m_viewProjectionMatrix = cascadeViewProjMats[cascade];
 			visIn.m_viewProjectionMatrix = cascadeViewProjMats[cascade];
 			visIn.m_lodReferencePoint = ctx.m_matrices.m_cameraTransform.getTranslationPart().xyz();
 			visIn.m_lodReferencePoint = ctx.m_matrices.m_cameraTransform.getTranslationPart().xyz();
 			visIn.m_lodDistances = lodDistances;
 			visIn.m_lodDistances = lodDistances;
 			visIn.m_hzbRt = &hzbGenIn.m_cascades[cascade].m_hzbRt;
 			visIn.m_hzbRt = &hzbGenIn.m_cascades[cascade].m_hzbRt;
 			visIn.m_rgraph = &rgraph;
 			visIn.m_rgraph = &rgraph;
-			visIn.m_finalRenderTargetSize = work.m_viewport.zw();
+			visIn.m_finalRenderTargetSize = atlasViewports[cascade].zw();
+
+			GpuVisibilityOutput visOut;
+			getRenderer().getGpuVisibility().populateRenderGraph(visIn, visOut);
 
 
-			getRenderer().getGpuVisibility().populateRenderGraph(visIn, work.m_visOut);
+			// Draw
+			createDrawShadowsPass(atlasViewports[cascade], cascadeViewProjMats[cascade], cascadeViewMats[cascade], visOut, {},
+								  hzbGenIn.m_cascades[cascade].m_hzbRt, computeTempPassName("Shadows: Dir light cascade", cascade), rgraph);
 
 
 			// Update the texture matrix to point to the correct region in the atlas
 			// Update the texture matrix to point to the correct region in the atlas
 			ctx.m_dirLightTextureMatrices[cascade] = createSpotLightTextureMatrix(atlasViewports[cascade]) * cascadeViewProjMats[cascade];
 			ctx.m_dirLightTextureMatrices[cascade] = createSpotLightTextureMatrix(atlasViewports[cascade]) * cascadeViewProjMats[cascade];
@@ -515,10 +434,11 @@ void ShadowMapping::processLights(RenderingContext& ctx)
 			BufferOffsetRange clearTileIndirectArgs;
 			BufferOffsetRange clearTileIndirectArgs;
 			if(!renderAllways)
 			if(!renderAllways)
 			{
 			{
-				clearTileIndirectArgs = vetVisibilityPass("Shadows visibility: Vet point light", *lightc, visOut, rgraph);
+				clearTileIndirectArgs = createVetVisibilityPass("Shadows: Vet point light", *lightc, visOut, rgraph);
 			}
 			}
 
 
-			// Add work
+			// Add the draw pass
+			Array<ViewportDraw, 6> dviewports;
 			for(U32 face = 0; face < 6; ++face)
 			for(U32 face = 0; face < 6; ++face)
 			{
 			{
 				Frustum frustum;
 				Frustum frustum;
@@ -527,13 +447,13 @@ void ShadowMapping::processLights(RenderingContext& ctx)
 				frustum.setWorldTransform(Transform(lightc->getWorldPosition().xyz0(), Frustum::getOmnidirectionalFrustumRotations()[face], 1.0f));
 				frustum.setWorldTransform(Transform(lightc->getWorldPosition().xyz0(), Frustum::getOmnidirectionalFrustumRotations()[face], 1.0f));
 				frustum.update();
 				frustum.update();
 
 
-				ViewportWorkItem& work = *workItems.emplaceBack();
-				work.m_viewProjMat = frustum.getViewProjectionMatrix();
-				work.m_viewMat = frustum.getViewMatrix();
-				work.m_viewport = atlasViewports[face];
-				work.m_visOut = visOut;
-				work.m_clearTileIndirectArgs = clearTileIndirectArgs;
+				dviewports[face].m_viewport = atlasViewports[face];
+				dviewports[face].m_viewProjMat = frustum.getViewProjectionMatrix();
+				dviewports[face].m_viewMat = frustum.getViewMatrix();
+				dviewports[face].m_clearTileIndirectArgs = clearTileIndirectArgs;
 			}
 			}
+
+			createMultipleDrawShadowsPass(dviewports, visOut, "Shadows: Point light face", rgraph);
 		}
 		}
 		else
 		else
 		{
 		{
@@ -587,16 +507,12 @@ void ShadowMapping::processLights(RenderingContext& ctx)
 			BufferOffsetRange clearTileIndirectArgs;
 			BufferOffsetRange clearTileIndirectArgs;
 			if(!renderAllways)
 			if(!renderAllways)
 			{
 			{
-				clearTileIndirectArgs = vetVisibilityPass("Shadows visibility: Vet spot light", *lightc, visOut, rgraph);
+				clearTileIndirectArgs = createVetVisibilityPass("Shadows: Vet spot light", *lightc, visOut, rgraph);
 			}
 			}
 
 
-			// Add work
-			ViewportWorkItem& work = *workItems.emplaceBack();
-			work.m_viewProjMat = lightc->getSpotLightViewProjectionMatrix();
-			work.m_viewMat = lightc->getSpotLightViewMatrix();
-			work.m_viewport = atlasViewport;
-			work.m_visOut = visOut;
-			work.m_clearTileIndirectArgs = clearTileIndirectArgs;
+			// Add draw pass
+			createDrawShadowsPass(atlasViewport, lightc->getSpotLightViewProjectionMatrix(), lightc->getSpotLightViewMatrix(), visOut,
+								  clearTileIndirectArgs, {}, "Shadows: Spot light", rgraph);
 		}
 		}
 		else
 		else
 		{
 		{
@@ -604,78 +520,10 @@ void ShadowMapping::processLights(RenderingContext& ctx)
 			lightc->setShadowAtlasUvViewports({});
 			lightc->setShadowAtlasUvViewports({});
 		}
 		}
 	}
 	}
-
-	// Move the work to the context
-	if(workItems.getSize())
-	{
-		// All good, store the work items for the threads to pick up
-		workItems.moveAndReset(m_runCtx.m_workItems);
-	}
-	else
-	{
-		m_runCtx.m_workItems = {};
-	}
 }
 }
 
 
-void ShadowMapping::runShadowMapping(RenderPassWorkContext& rgraphCtx)
-{
-	ANKI_ASSERT(m_runCtx.m_workItems.getSize());
-	ANKI_TRACE_SCOPED_EVENT(ShadowMapping);
-
-	CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
-
-	// Clear the depth buffer
-	cmdb.bindShaderProgram(m_clearDepthGrProg.get());
-	cmdb.setDepthCompareOperation(CompareOperation::kAlways);
-
-	for(ViewportWorkItem& work : m_runCtx.m_workItems)
-	{
-		cmdb.setViewport(work.m_viewport[0], work.m_viewport[1], work.m_viewport[2], work.m_viewport[3]);
-
-		if(work.m_clearTileIndirectArgs.m_buffer)
-		{
-			cmdb.drawIndirect(PrimitiveTopology::kTriangles, 1, work.m_clearTileIndirectArgs.m_offset, work.m_clearTileIndirectArgs.m_buffer);
-		}
-		else
-		{
-			cmdb.draw(PrimitiveTopology::kTriangles, 3, 1);
-		}
-	}
-
-	// Restore state
-	cmdb.setDepthCompareOperation(CompareOperation::kLess);
-
-	// Draw to tiles
-	cmdb.setPolygonOffset(kShadowsPolygonOffsetFactor, kShadowsPolygonOffsetUnits);
-	for(ViewportWorkItem& work : m_runCtx.m_workItems)
-	{
-		// Set state
-		cmdb.setViewport(work.m_viewport[0], work.m_viewport[1], work.m_viewport[2], work.m_viewport[3]);
-		cmdb.setScissor(work.m_viewport[0], work.m_viewport[1], work.m_viewport[2], work.m_viewport[3]);
-
-		RenderableDrawerArguments args;
-		args.m_renderingTechinuqe = RenderingTechnique::kDepth;
-		args.m_viewMatrix = work.m_viewMat;
-		args.m_cameraTransform = work.m_viewMat.getInverseTransformation();
-		args.m_viewProjectionMatrix = work.m_viewProjMat;
-		args.m_previousViewProjectionMatrix = Mat4::getIdentity(); // Don't care
-		args.m_sampler = getRenderer().getSamplers().m_trilinearRepeat.get();
-		args.m_viewport = UVec4(work.m_viewport[0], work.m_viewport[1], work.m_viewport[2], work.m_viewport[3]);
-		args.fillMdi(work.m_visOut);
-
-		TextureViewPtr hzbView;
-		if(work.m_hzbRt.isValid())
-		{
-			hzbView = rgraphCtx.createTextureView(work.m_hzbRt);
-			args.m_hzbTexture = hzbView.get();
-		}
-
-		getRenderer().getSceneDrawer().drawMdi(args, cmdb);
-	}
-}
-
-BufferOffsetRange ShadowMapping::vetVisibilityPass(CString passName, const LightComponent& lightc, const GpuVisibilityOutput& visOut,
-												   RenderGraphDescription& rgraph) const
+BufferOffsetRange ShadowMapping::createVetVisibilityPass(CString passName, const LightComponent& lightc, const GpuVisibilityOutput& visOut,
+														 RenderGraphDescription& rgraph) const
 {
 {
 	BufferOffsetRange clearTileIndirectArgs;
 	BufferOffsetRange clearTileIndirectArgs;
 
 
@@ -709,4 +557,106 @@ BufferOffsetRange ShadowMapping::vetVisibilityPass(CString passName, const Light
 	return clearTileIndirectArgs;
 	return clearTileIndirectArgs;
 }
 }
 
 
+void ShadowMapping::createMultipleDrawShadowsPass(ConstWeakArray<ViewportDraw> viewports, const GpuVisibilityOutput visOut, CString passName,
+												  RenderGraphDescription& rgraph)
+{
+	ANKI_ASSERT(viewports.getSize() > 0);
+
+	const Bool loadFb = (viewports[0].m_clearTileIndirectArgs.m_buffer != nullptr);
+	for(const ViewportDraw& v : viewports)
+	{
+		[[maybe_unused]] const Bool loadFb2 = v.m_clearTileIndirectArgs.m_buffer != nullptr;
+		ANKI_ASSERT(loadFb == loadFb2 && "All draws should be the same for simplicity");
+	}
+
+	// Compute the agregate viewport
+	UVec2 minViewportXy = viewports[0].m_viewport.xy();
+	UVec2 maxViewportXy = viewports[0].m_viewport.xy() + viewports[0].m_viewport.zw();
+	for(U32 i = 1; i < viewports.getSize(); ++i)
+	{
+		minViewportXy = minViewportXy.min(viewports[i].m_viewport.xy());
+		maxViewportXy = maxViewportXy.max(viewports[i].m_viewport.xy() + viewports[i].m_viewport.zw());
+	}
+	const UVec4 totalViewport(minViewportXy, maxViewportXy - minViewportXy);
+
+	// Store the arguments to some permanent memory
+	DynamicArray<ViewportDraw, MemoryPoolPtrWrapper<StackMemoryPool>> dviewports(&getRenderer().getFrameMemoryPool());
+	dviewports.resize(viewports.getSize());
+	for(U32 i = 0; i < viewports.getSize(); ++i)
+	{
+		dviewports[i] = viewports[i];
+	}
+	WeakArray<ViewportDraw> dviewportsArr;
+	dviewports.moveAndReset(dviewportsArr);
+
+	// Create the pass
+	GraphicsRenderPassDescription& pass = rgraph.newGraphicsRenderPass(passName);
+	pass.setFramebufferInfo((loadFb) ? m_loadFbDescr : m_clearFbDescr, {}, m_runCtx.m_rt, {}, totalViewport[0], totalViewport[1], totalViewport[2],
+							totalViewport[3]);
+
+	pass.newBufferDependency(visOut.m_someBufferHandle, BufferUsageBit::kIndirectDraw);
+	pass.newTextureDependency(m_runCtx.m_rt, TextureUsageBit::kFramebufferWrite, TextureSubresourceInfo(DepthStencilAspectBit::kDepth));
+
+	pass.setWork(1, [this, visOut, viewports = dviewportsArr](RenderPassWorkContext& rgraphCtx) {
+		ANKI_TRACE_SCOPED_EVENT(ShadowMapping);
+
+		CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
+
+		for(U32 i = 0; i < viewports.getSize(); ++i)
+		{
+			const ViewportDraw& vp = viewports[i];
+
+			cmdb.setViewport(vp.m_viewport[0], vp.m_viewport[1], vp.m_viewport[2], vp.m_viewport[3]);
+
+			if(vp.m_clearTileIndirectArgs.m_buffer)
+			{
+				// Clear the depth buffer using a quad because it needs to be conditional
+
+				cmdb.bindShaderProgram(m_clearDepthGrProg.get());
+				cmdb.setDepthCompareOperation(CompareOperation::kAlways);
+
+				cmdb.drawIndirect(PrimitiveTopology::kTriangles, 1, vp.m_clearTileIndirectArgs.m_offset, vp.m_clearTileIndirectArgs.m_buffer);
+
+				cmdb.setDepthCompareOperation(CompareOperation::kLess);
+			}
+
+			// Set state
+			cmdb.setPolygonOffset(kShadowsPolygonOffsetFactor, kShadowsPolygonOffsetUnits);
+
+			RenderableDrawerArguments args;
+			args.m_renderingTechinuqe = RenderingTechnique::kDepth;
+			args.m_viewMatrix = vp.m_viewMat;
+			args.m_cameraTransform = vp.m_viewMat.getInverseTransformation();
+			args.m_viewProjectionMatrix = vp.m_viewProjMat;
+			args.m_previousViewProjectionMatrix = Mat4::getIdentity(); // Don't care
+			args.m_sampler = getRenderer().getSamplers().m_trilinearRepeat.get();
+			args.m_viewport = UVec4(vp.m_viewport[0], vp.m_viewport[1], vp.m_viewport[2], vp.m_viewport[3]);
+			args.fillMdi(visOut);
+
+			TextureViewPtr hzbView;
+			if(vp.m_hzbRt.isValid())
+			{
+				hzbView = rgraphCtx.createTextureView(vp.m_hzbRt);
+				args.m_hzbTexture = hzbView.get();
+			}
+
+			getRenderer().getSceneDrawer().drawMdi(args, cmdb);
+		}
+	});
+}
+
+void ShadowMapping::createDrawShadowsPass(const UVec4& viewport, const Mat4& viewProjMat, const Mat3x4& viewMat, const GpuVisibilityOutput visOut,
+										  const BufferOffsetRange& clearTileIndirectArgs, const RenderTargetHandle hzbRt, CString passName,
+										  RenderGraphDescription& rgraph)
+{
+	ViewportDraw vp;
+	vp.m_viewport = viewport;
+	vp.m_viewProjMat = viewProjMat;
+	vp.m_viewMat = viewMat;
+	vp.m_clearTileIndirectArgs = clearTileIndirectArgs;
+	vp.m_hzbRt = hzbRt;
+
+	createMultipleDrawShadowsPass({&vp, 1}, visOut, passName, rgraph);
+}
+
 } // end namespace anki
 } // end namespace anki

+ 17 - 5
AnKi/Renderer/ShadowMapping.h

@@ -34,7 +34,15 @@ public:
 	}
 	}
 
 
 private:
 private:
-	class ViewportWorkItem;
+	class ViewportDraw
+	{
+	public:
+		UVec4 m_viewport;
+		Mat4 m_viewProjMat;
+		Mat3x4 m_viewMat;
+		RenderTargetHandle m_hzbRt;
+		BufferOffsetRange m_clearTileIndirectArgs;
+	};
 
 
 	TileAllocator m_tileAlloc;
 	TileAllocator m_tileAlloc;
 	static constexpr U32 kTileAllocHierarchyCount = 4;
 	static constexpr U32 kTileAllocHierarchyCount = 4;
@@ -62,7 +70,6 @@ private:
 	{
 	{
 	public:
 	public:
 		RenderTargetHandle m_rt;
 		RenderTargetHandle m_rt;
-		WeakArray<ViewportWorkItem> m_workItems;
 	} m_runCtx;
 	} m_runCtx;
 
 
 	Error initInternal();
 	Error initInternal();
@@ -75,10 +82,15 @@ private:
 
 
 	void chooseDetail(const Vec3& cameraOrigin, const LightComponent& lightc, Vec2 lodDistances, U32& tileAllocatorHierarchy) const;
 	void chooseDetail(const Vec3& cameraOrigin, const LightComponent& lightc, Vec2 lodDistances, U32& tileAllocatorHierarchy) const;
 
 
-	void runShadowMapping(RenderPassWorkContext& rgraphCtx);
+	BufferOffsetRange createVetVisibilityPass(CString passName, const LightComponent& lightc, const GpuVisibilityOutput& visOut,
+											  RenderGraphDescription& rgraph) const;
+
+	void createMultipleDrawShadowsPass(ConstWeakArray<ViewportDraw> viewports, const GpuVisibilityOutput visOut, CString passName,
+									   RenderGraphDescription& rgraph);
 
 
-	BufferOffsetRange vetVisibilityPass(CString passName, const LightComponent& lightc, const GpuVisibilityOutput& visOut,
-										RenderGraphDescription& rgraph) const;
+	void createDrawShadowsPass(const UVec4& viewport, const Mat4& viewProjMat, const Mat3x4& viewMat, const GpuVisibilityOutput visOut,
+							   const BufferOffsetRange& clearTileIndirectArgs, const RenderTargetHandle hzbRt, CString passName,
+							   RenderGraphDescription& rgraph);
 };
 };
 /// @}
 /// @}
 
 

+ 92 - 54
AnKi/Renderer/Utils/GpuVisibility.cpp

@@ -44,6 +44,44 @@ Error GpuVisibility::init()
 	return Error::kNone;
 	return Error::kNone;
 }
 }
 
 
+GpuVisibility::Counts GpuVisibility::countTechnique(RenderingTechnique t)
+{
+	Counts out = {};
+
+	switch(t)
+	{
+	case RenderingTechnique::kGBuffer:
+		out.m_aabbCount = GpuSceneArrays::RenderableBoundingVolumeGBuffer::getSingleton().getElementCount();
+		break;
+	case RenderingTechnique::kDepth:
+		out.m_aabbCount = GpuSceneArrays::RenderableBoundingVolumeDepth::getSingleton().getElementCount();
+		break;
+	case RenderingTechnique::kForward:
+		out.m_aabbCount = GpuSceneArrays::RenderableBoundingVolumeForward::getSingleton().getElementCount();
+		break;
+	default:
+		ANKI_ASSERT(0);
+	}
+
+	out.m_bucketCount = RenderStateBucketContainer::getSingleton().getBucketCount(t);
+
+	RenderStateBucketContainer::getSingleton().iterateBuckets(t, [&](const RenderStateInfo&, U32 userCount, U32 meshletGroupCount_) {
+		if(meshletGroupCount_)
+		{
+			out.m_modernGeometryFlowUserCount += userCount;
+			out.m_meshletGroupCount += min(meshletGroupCount_, kMaxMeshletGroupCountPerRenderStateBucket);
+		}
+		else
+		{
+			out.m_legacyGeometryFlowUserCount += userCount;
+		}
+	});
+
+	out.m_allUserCount = out.m_legacyGeometryFlowUserCount + out.m_modernGeometryFlowUserCount;
+
+	return out;
+}
+
 void GpuVisibility::populateRenderGraphInternal(Bool distanceBased, BaseGpuVisibilityInput& in, GpuVisibilityOutput& out)
 void GpuVisibility::populateRenderGraphInternal(Bool distanceBased, BaseGpuVisibilityInput& in, GpuVisibilityOutput& out)
 {
 {
 	ANKI_ASSERT(in.m_lodReferencePoint.x() != kMaxF32);
 	ANKI_ASSERT(in.m_lodReferencePoint.x() != kMaxF32);
@@ -81,41 +119,9 @@ void GpuVisibility::populateRenderGraphInternal(Bool distanceBased, BaseGpuVisib
 		frustumTestData->m_finalRenderTargetSize = fin.m_finalRenderTargetSize;
 		frustumTestData->m_finalRenderTargetSize = fin.m_finalRenderTargetSize;
 	}
 	}
 
 
-	U32 aabbCount = 0;
-	switch(in.m_technique)
-	{
-	case RenderingTechnique::kGBuffer:
-		aabbCount = GpuSceneArrays::RenderableBoundingVolumeGBuffer::getSingleton().getElementCount();
-		break;
-	case RenderingTechnique::kDepth:
-		aabbCount = GpuSceneArrays::RenderableBoundingVolumeDepth::getSingleton().getElementCount();
-		break;
-	case RenderingTechnique::kForward:
-		aabbCount = GpuSceneArrays::RenderableBoundingVolumeForward::getSingleton().getElementCount();
-		break;
-	default:
-		ANKI_ASSERT(0);
-	}
+	const Counts counts = countTechnique(in.m_technique);
 
 
-	const U32 bucketCount = RenderStateBucketContainer::getSingleton().getBucketCount(in.m_technique);
-
-	U32 legacyGeometryFlowUserCount = 0;
-	U32 modernGeometryFlowUserCount = 0;
-	U32 meshletGroupCount = 0;
-	RenderStateBucketContainer::getSingleton().iterateBuckets(in.m_technique, [&](const RenderStateInfo&, U32 userCount, U32 meshletGroupCount_) {
-		if(meshletGroupCount_)
-		{
-			modernGeometryFlowUserCount += userCount;
-			meshletGroupCount += min(meshletGroupCount_, kMaxMeshletGroupCountPerRenderStateBucket);
-		}
-		else
-		{
-			legacyGeometryFlowUserCount += userCount;
-		}
-	});
-	const U32 allUserCount = legacyGeometryFlowUserCount + modernGeometryFlowUserCount;
-
-	if(allUserCount == 0) [[unlikely]]
+	if(counts.m_allUserCount == 0) [[unlikely]]
 	{
 	{
 		// Early exit
 		// Early exit
 
 
@@ -140,46 +146,70 @@ void GpuVisibility::populateRenderGraphInternal(Bool distanceBased, BaseGpuVisib
 	}
 	}
 
 
 	// Allocate memory
 	// Allocate memory
-	out.m_drawIndexedIndirectArgsBuffer =
-		GpuVisibleTransientMemoryPool::getSingleton().allocate(max(1u, legacyGeometryFlowUserCount) * sizeof(DrawIndexedIndirectArgs));
-	out.m_instanceRateRenderablesBuffer =
-		GpuVisibleTransientMemoryPool::getSingleton().allocate(max(1u, legacyGeometryFlowUserCount) * sizeof(GpuSceneRenderableVertex));
-	out.m_mdiDrawCountsBuffer = GpuVisibleTransientMemoryPool::getSingleton().allocate(sizeof(U32) * bucketCount);
+	const Bool firstFrame = m_runCtx.m_frameIdx != getRenderer().getFrameCount();
+	if(firstFrame)
+	{
+		// Allocate the big buffers once at the beginning of the frame
 
 
-	out.m_taskShaderIndirectArgsBuffer = GpuVisibleTransientMemoryPool::getSingleton().allocate(sizeof(DispatchIndirectArgs) * bucketCount);
-	out.m_taskShaderPayloadBuffer =
-		GpuVisibleTransientMemoryPool::getSingleton().allocate(max(1u, meshletGroupCount) * sizeof(GpuSceneTaskShaderPayload));
+		m_runCtx.m_frameIdx = getRenderer().getFrameCount();
 
 
-	if(in.m_gatherAabbIndices)
-	{
-		out.m_visibleAaabbIndicesBuffer = GpuVisibleTransientMemoryPool::getSingleton().allocate((allUserCount + 1) * sizeof(U32));
+		// Find the max counts of all techniques
+		Counts maxCounts = {};
+		for(RenderingTechnique t : EnumBitsIterable<RenderingTechnique, RenderingTechniqueBit>(RenderingTechniqueBit::kAllRaster))
+		{
+			maxCounts = maxCounts.max((in.m_technique == t) ? counts : countTechnique(t));
+		}
+
+		m_runCtx.m_drawIndexedIndirectArgsBuffer = GpuVisibleTransientMemoryPool::getSingleton().allocate(
+			max(1u, maxCounts.m_legacyGeometryFlowUserCount) * sizeof(DrawIndexedIndirectArgs));
+		m_runCtx.m_instanceRateRenderablesBuffer = GpuVisibleTransientMemoryPool::getSingleton().allocate(
+			max(1u, maxCounts.m_legacyGeometryFlowUserCount) * sizeof(GpuSceneRenderableVertex));
+
+		m_runCtx.m_taskShaderPayloadBuffer =
+			GpuVisibleTransientMemoryPool::getSingleton().allocate(max(1u, maxCounts.m_meshletGroupCount) * sizeof(GpuSceneTaskShaderPayload));
 	}
 	}
 
 
+	out.m_drawIndexedIndirectArgsBuffer = m_runCtx.m_drawIndexedIndirectArgsBuffer;
+	out.m_drawIndexedIndirectArgsBuffer.m_range = max(1u, counts.m_legacyGeometryFlowUserCount) * sizeof(DrawIndexedIndirectArgs);
+
+	out.m_instanceRateRenderablesBuffer = m_runCtx.m_instanceRateRenderablesBuffer;
+	out.m_instanceRateRenderablesBuffer.m_range = max(1u, counts.m_legacyGeometryFlowUserCount) * sizeof(GpuSceneRenderableVertex);
+
+	out.m_taskShaderPayloadBuffer = m_runCtx.m_taskShaderPayloadBuffer;
+	out.m_taskShaderPayloadBuffer.m_range = max(1u, counts.m_meshletGroupCount) * sizeof(GpuSceneTaskShaderPayload);
+
+	out.m_taskShaderIndirectArgsBuffer = GpuVisibleTransientMemoryPool::getSingleton().allocate(sizeof(DispatchIndirectArgs) * counts.m_bucketCount);
+	out.m_mdiDrawCountsBuffer = GpuVisibleTransientMemoryPool::getSingleton().allocate(sizeof(U32) * counts.m_bucketCount);
+
 	if(in.m_hashVisibles)
 	if(in.m_hashVisibles)
 	{
 	{
 		out.m_visiblesHashBuffer = GpuVisibleTransientMemoryPool::getSingleton().allocate(sizeof(GpuVisibilityHash));
 		out.m_visiblesHashBuffer = GpuVisibleTransientMemoryPool::getSingleton().allocate(sizeof(GpuVisibilityHash));
 	}
 	}
 
 
-	out.m_someBufferHandle = in.m_rgraph->importBuffer(BufferUsageBit::kNone, out.m_mdiDrawCountsBuffer);
+	if(in.m_gatherAabbIndices)
+	{
+		out.m_visibleAaabbIndicesBuffer = GpuVisibleTransientMemoryPool::getSingleton().allocate((counts.m_allUserCount + 1) * sizeof(U32));
+	}
 
 
 	// Zero some stuff
 	// Zero some stuff
+	const BufferHandle zeroStuffDependency = in.m_rgraph->importBuffer(BufferUsageBit::kNone, out.m_mdiDrawCountsBuffer);
 	{
 	{
 		Array<Char, 128> passName;
 		Array<Char, 128> passName;
 		snprintf(passName.getBegin(), passName.getSizeInBytes(), "GPU vis zero: %s", in.m_passesName.cstr());
 		snprintf(passName.getBegin(), passName.getSizeInBytes(), "GPU vis zero: %s", in.m_passesName.cstr());
 
 
 		ComputeRenderPassDescription& pass = in.m_rgraph->newComputeRenderPass(passName.getBegin());
 		ComputeRenderPassDescription& pass = in.m_rgraph->newComputeRenderPass(passName.getBegin());
-		pass.newBufferDependency(out.m_someBufferHandle, BufferUsageBit::kTransferDestination);
+		pass.newBufferDependency(zeroStuffDependency, BufferUsageBit::kTransferDestination);
 
 
 		pass.setWork([mdiDrawCountsBuffer = out.m_mdiDrawCountsBuffer, taskShaderIndirectArgsBuffer = out.m_taskShaderIndirectArgsBuffer,
 		pass.setWork([mdiDrawCountsBuffer = out.m_mdiDrawCountsBuffer, taskShaderIndirectArgsBuffer = out.m_taskShaderIndirectArgsBuffer,
-					  visibleAaabbIndicesBuffer = out.m_visibleAaabbIndicesBuffer,
-					  visiblesHashBuffer = out.m_visiblesHashBuffer](RenderPassWorkContext& rpass) {
+					  visiblesHashBuffer = out.m_visiblesHashBuffer,
+					  visibleAaabbIndicesBuffer = out.m_visibleAaabbIndicesBuffer](RenderPassWorkContext& rpass) {
 			CommandBuffer& cmdb = *rpass.m_commandBuffer;
 			CommandBuffer& cmdb = *rpass.m_commandBuffer;
 
 
 			cmdb.pushDebugMarker("MDI counts", Vec3(1.0f, 1.0f, 1.0f));
 			cmdb.pushDebugMarker("MDI counts", Vec3(1.0f, 1.0f, 1.0f));
 			cmdb.fillBuffer(mdiDrawCountsBuffer, 0);
 			cmdb.fillBuffer(mdiDrawCountsBuffer, 0);
 			cmdb.popDebugMarker();
 			cmdb.popDebugMarker();
 
 
-			cmdb.pushDebugMarker("Task shaders args", Vec3(1.0f, 1.0f, 1.0f));
+			cmdb.pushDebugMarker("Task shader indirect args", Vec3(1.0f, 1.0f, 1.0f));
 			cmdb.fillBuffer(taskShaderIndirectArgsBuffer, 0);
 			cmdb.fillBuffer(taskShaderIndirectArgsBuffer, 0);
 			cmdb.popDebugMarker();
 			cmdb.popDebugMarker();
 
 
@@ -199,6 +229,13 @@ void GpuVisibility::populateRenderGraphInternal(Bool distanceBased, BaseGpuVisib
 		});
 		});
 	}
 	}
 
 
+	// Set the out dependency. Use one of the big buffers.
+	if(firstFrame)
+	{
+		m_runCtx.m_bufferDepedency = in.m_rgraph->importBuffer(BufferUsageBit::kNone, m_runCtx.m_drawIndexedIndirectArgsBuffer);
+	}
+	out.m_someBufferHandle = m_runCtx.m_bufferDepedency;
+
 	// Create the renderpass
 	// Create the renderpass
 	Array<Char, 128> passName;
 	Array<Char, 128> passName;
 	snprintf(passName.getBegin(), passName.getSizeInBytes(), "GPU vis: %s", in.m_passesName.cstr());
 	snprintf(passName.getBegin(), passName.getSizeInBytes(), "GPU vis: %s", in.m_passesName.cstr());
@@ -206,6 +243,7 @@ void GpuVisibility::populateRenderGraphInternal(Bool distanceBased, BaseGpuVisib
 	ComputeRenderPassDescription& pass = in.m_rgraph->newComputeRenderPass(passName.getBegin());
 	ComputeRenderPassDescription& pass = in.m_rgraph->newComputeRenderPass(passName.getBegin());
 
 
 	pass.newBufferDependency(getRenderer().getGpuSceneBufferHandle(), BufferUsageBit::kUavComputeRead);
 	pass.newBufferDependency(getRenderer().getGpuSceneBufferHandle(), BufferUsageBit::kUavComputeRead);
+	pass.newBufferDependency(zeroStuffDependency, BufferUsageBit::kUavComputeWrite);
 	pass.newBufferDependency(out.m_someBufferHandle, BufferUsageBit::kUavComputeWrite);
 	pass.newBufferDependency(out.m_someBufferHandle, BufferUsageBit::kUavComputeWrite);
 
 
 	if(!distanceBased && static_cast<FrustumGpuVisibilityInput&>(in).m_hzbRt)
 	if(!distanceBased && static_cast<FrustumGpuVisibilityInput&>(in).m_hzbRt)
@@ -216,13 +254,13 @@ void GpuVisibility::populateRenderGraphInternal(Bool distanceBased, BaseGpuVisib
 
 
 	pass.setWork([this, frustumTestData, distTestData, lodReferencePoint = in.m_lodReferencePoint, lodDistances = in.m_lodDistances,
 	pass.setWork([this, frustumTestData, distTestData, lodReferencePoint = in.m_lodReferencePoint, lodDistances = in.m_lodDistances,
 				  technique = in.m_technique, mdiDrawCountsBuffer = out.m_mdiDrawCountsBuffer,
 				  technique = in.m_technique, mdiDrawCountsBuffer = out.m_mdiDrawCountsBuffer,
-				  instanceRateRenderables = out.m_instanceRateRenderablesBuffer, indirectArgs = out.m_drawIndexedIndirectArgsBuffer, aabbCount,
-				  visibleAabbsBuffer = out.m_visibleAaabbIndicesBuffer, hashBuffer = out.m_visiblesHashBuffer,
+				  instanceRateRenderables = out.m_instanceRateRenderablesBuffer, indirectArgs = out.m_drawIndexedIndirectArgsBuffer,
+				  aabbCount = counts.m_aabbCount, visibleAaabbIndicesBuffer = out.m_visibleAaabbIndicesBuffer, hashBuffer = out.m_visiblesHashBuffer,
 				  taskShaderIndirectArgsBuff = out.m_taskShaderIndirectArgsBuffer,
 				  taskShaderIndirectArgsBuff = out.m_taskShaderIndirectArgsBuffer,
 				  taskShaderPayloadBuffer = out.m_taskShaderPayloadBuffer](RenderPassWorkContext& rpass) {
 				  taskShaderPayloadBuffer = out.m_taskShaderPayloadBuffer](RenderPassWorkContext& rpass) {
 		CommandBuffer& cmdb = *rpass.m_commandBuffer;
 		CommandBuffer& cmdb = *rpass.m_commandBuffer;
 
 
-		const Bool gatherAabbIndices = visibleAabbsBuffer.m_buffer != nullptr;
+		const Bool gatherAabbIndices = visibleAaabbIndicesBuffer.m_buffer != nullptr;
 		const Bool genHash = hashBuffer.m_buffer != nullptr;
 		const Bool genHash = hashBuffer.m_buffer != nullptr;
 
 
 		if(frustumTestData)
 		if(frustumTestData)
@@ -329,7 +367,7 @@ void GpuVisibility::populateRenderGraphInternal(Bool distanceBased, BaseGpuVisib
 
 
 		if(gatherAabbIndices)
 		if(gatherAabbIndices)
 		{
 		{
-			cmdb.bindUavBuffer(0, 12, visibleAabbsBuffer);
+			cmdb.bindUavBuffer(0, 12, visibleAaabbIndicesBuffer);
 		}
 		}
 
 
 		if(genHash)
 		if(genHash)

+ 42 - 2
AnKi/Renderer/Utils/GpuVisibility.h

@@ -63,9 +63,9 @@ public:
 	BufferOffsetRange m_taskShaderIndirectArgsBuffer;
 	BufferOffsetRange m_taskShaderIndirectArgsBuffer;
 	BufferOffsetRange m_taskShaderPayloadBuffer; ///< The payloads of task shaders. One for each task shader threadgroup.
 	BufferOffsetRange m_taskShaderPayloadBuffer; ///< The payloads of task shaders. One for each task shader threadgroup.
 
 
-	BufferOffsetRange m_visibleAaabbIndicesBuffer; ///< Optional.
+	BufferOffsetRange m_visibleAaabbIndicesBuffer; ///< [Optional] Indices to the AABB buffer. The 1st element is the count.
 
 
-	BufferOffsetRange m_visiblesHashBuffer; ///< Optional.
+	BufferOffsetRange m_visiblesHashBuffer; ///< [Optional] A hash of the visible objects. Used to conditionaly not perform shadow randering.
 };
 };
 
 
 /// Performs GPU visibility for some pass.
 /// Performs GPU visibility for some pass.
@@ -95,6 +95,46 @@ private:
 	Array3d<ShaderProgramPtr, 2, 2, 2> m_frustumGrProgs;
 	Array3d<ShaderProgramPtr, 2, 2, 2> m_frustumGrProgs;
 	Array2d<ShaderProgramPtr, 2, 2> m_distGrProgs;
 	Array2d<ShaderProgramPtr, 2, 2> m_distGrProgs;
 
 
+	class
+	{
+	public:
+		U64 m_frameIdx = kMaxU64;
+
+		// Buffers bellow are quite large that's why want to reuse them muptiple times in a single frame.
+		BufferOffsetRange m_drawIndexedIndirectArgsBuffer;
+		BufferOffsetRange m_instanceRateRenderablesBuffer;
+		BufferOffsetRange m_taskShaderPayloadBuffer;
+
+		BufferHandle m_bufferDepedency;
+	} m_runCtx;
+
+	class Counts
+	{
+	public:
+		U32 m_aabbCount;
+		U32 m_bucketCount;
+		U32 m_legacyGeometryFlowUserCount;
+		U32 m_modernGeometryFlowUserCount;
+		U32 m_meshletGroupCount;
+		U32 m_allUserCount;
+
+		Counts max(const Counts& b) const
+		{
+			Counts out;
+#define ANKI_MAX(member) out.member = anki::max(member, b.member)
+			ANKI_MAX(m_aabbCount);
+			ANKI_MAX(m_bucketCount);
+			ANKI_MAX(m_legacyGeometryFlowUserCount);
+			ANKI_MAX(m_modernGeometryFlowUserCount);
+			ANKI_MAX(m_meshletGroupCount);
+			ANKI_MAX(m_allUserCount);
+#undef ANKI_MAX
+			return out;
+		}
+	};
+
+	Counts countTechnique(RenderingTechnique t);
+
 	void populateRenderGraphInternal(Bool distanceBased, BaseGpuVisibilityInput& in, GpuVisibilityOutput& out);
 	void populateRenderGraphInternal(Bool distanceBased, BaseGpuVisibilityInput& in, GpuVisibilityOutput& out);
 };
 };
 
 

+ 2 - 1
AnKi/Resource/RenderingKey.h

@@ -31,7 +31,8 @@ enum class RenderingTechniqueBit : U8
 	kForward = 1 << 2,
 	kForward = 1 << 2,
 	kRtShadow = 1 << 3,
 	kRtShadow = 1 << 3,
 
 
-	kAllRt = kRtShadow
+	kAllRt = kRtShadow,
+	kAllRaster = kGBuffer | kDepth | kForward
 };
 };
 ANKI_ENUM_ALLOW_NUMERIC_OPERATIONS(RenderingTechniqueBit)
 ANKI_ENUM_ALLOW_NUMERIC_OPERATIONS(RenderingTechniqueBit)