Selaa lähdekoodia

Merge some RG passes into one

Panagiotis Christopoulos Charitos 2 vuotta sitten
vanhempi
sitoutus
0eba53a830
2 muutettua tiedostoa jossa 81 lisäystä ja 47 poistoa
  1. 70 45
      AnKi/Renderer/Utils/HzbGenerator.cpp
  2. 11 2
      AnKi/Renderer/Utils/HzbGenerator.h

+ 70 - 45
AnKi/Renderer/Utils/HzbGenerator.cpp

@@ -93,9 +93,11 @@ Error HzbGenerator::init()
 	return Error::kNone;
 	return Error::kNone;
 }
 }
 
 
-void HzbGenerator::populateRenderGraphInternal(RenderTargetHandle srcDepthRt, UVec2 srcDepthRtSize, RenderTargetHandle dstHzbRt, UVec2 dstHzbRtSize,
-											   U32 counterBufferElement, RenderGraphDescription& rgraph, CString customName) const
+void HzbGenerator::populateRenderGraphInternal(ConstWeakArray<DispatchInput> dispatchInputs, U32 firstCounterBufferElement, CString customName,
+											   RenderGraphDescription& rgraph) const
 {
 {
+	const U32 dispatchCount = dispatchInputs.getSize();
+
 #if ANKI_ASSERTIONS_ENABLED
 #if ANKI_ASSERTIONS_ENABLED
 	if(m_crntFrame != getRenderer().getFrameCount())
 	if(m_crntFrame != getRenderer().getFrameCount())
 	{
 	{
@@ -103,72 +105,90 @@ void HzbGenerator::populateRenderGraphInternal(RenderTargetHandle srcDepthRt, UV
 		m_counterBufferElementUseMask = 0;
 		m_counterBufferElementUseMask = 0;
 	}
 	}
 
 
-	ANKI_ASSERT(!(m_counterBufferElementUseMask & (1 << counterBufferElement)));
-	m_counterBufferElementUseMask |= (1 << counterBufferElement);
+	for(U32 i = 0; i < dispatchCount; ++i)
+	{
+		ANKI_ASSERT(!(m_counterBufferElementUseMask & (1 << (firstCounterBufferElement + i))));
+		m_counterBufferElementUseMask |= (1 << (firstCounterBufferElement + i));
+	}
 #endif
 #endif
 
 
-	TextureSubresourceInfo firstMipSubresource;
-
-	const U32 hzbMipCount = min(kMaxSpdMips, computeMaxMipmapCount2d(dstHzbRtSize.x(), dstHzbRtSize.y()));
-
 	ComputeRenderPassDescription& pass = rgraph.newComputeRenderPass((customName.isEmpty()) ? "HZB generation" : customName);
 	ComputeRenderPassDescription& pass = rgraph.newComputeRenderPass((customName.isEmpty()) ? "HZB generation" : customName);
 
 
-	pass.newTextureDependency(srcDepthRt, TextureUsageBit::kSampledCompute, firstMipSubresource);
-	pass.newTextureDependency(dstHzbRt, TextureUsageBit::kImageComputeWrite);
+	Array<DispatchInput, kMaxShadowCascades> dispatchInputsCopy;
+	for(U32 i = 0; i < dispatchCount; ++i)
+	{
+		TextureSubresourceInfo firstMipSubresource;
+		pass.newTextureDependency(dispatchInputs[i].m_srcDepthRt, TextureUsageBit::kSampledCompute, firstMipSubresource);
+		pass.newTextureDependency(dispatchInputs[i].m_dstHzbRt, TextureUsageBit::kImageComputeWrite);
 
 
-	pass.setWork([this, hzbMipCount, srcDepthRt, srcDepthRtSize, dstHzbRt, dstHzbRtSize, counterBufferElement](RenderPassWorkContext& rgraphCtx) {
-		CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
+		dispatchInputsCopy[i] = dispatchInputs[i];
+	}
 
 
-		const U32 mipsToCompute = hzbMipCount;
+	pass.setWork([this, dispatchInputsCopy, dispatchCount, firstCounterBufferElement](RenderPassWorkContext& rgraphCtx) {
+		CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
 
 
 		cmdb.bindShaderProgram(m_genPyramidGrProg.get());
 		cmdb.bindShaderProgram(m_genPyramidGrProg.get());
+		cmdb.bindSampler(0, 3, m_maxSampler.isCreated() ? m_maxSampler.get() : getRenderer().getSamplers().m_trilinearClamp.get());
 
 
-		varAU2(dispatchThreadGroupCountXY);
-		varAU2(workGroupOffset); // needed if Left and Top are not 0,0
-		varAU2(numWorkGroupsAndMips);
-		varAU4(rectInfo) = initAU4(0, 0, dstHzbRtSize.x() * 2, dstHzbRtSize.y() * 2);
-		SpdSetup(dispatchThreadGroupCountXY, workGroupOffset, numWorkGroupsAndMips, rectInfo, mipsToCompute);
-
-		struct Uniforms
+		for(U32 dispatch = 0; dispatch < dispatchCount; ++dispatch)
 		{
 		{
-			Vec2 m_invSrcTexSize;
-			U32 m_threadGroupCount;
-			U32 m_mipmapCount;
-		} pc;
+			const DispatchInput& in = dispatchInputsCopy[dispatch];
 
 
-		pc.m_invSrcTexSize = 1.0f / Vec2(dstHzbRtSize * 2);
-		pc.m_threadGroupCount = numWorkGroupsAndMips[0];
-		pc.m_mipmapCount = numWorkGroupsAndMips[1];
+			const U32 hzbMipCount = min(kMaxSpdMips, computeMaxMipmapCount2d(in.m_dstHzbRtSize.x(), in.m_dstHzbRtSize.y()));
 
 
-		cmdb.setPushConstants(&pc, sizeof(pc));
+			const U32 mipsToCompute = hzbMipCount;
 
 
-		for(U32 mip = 0; mip < kMaxSpdMips; ++mip)
-		{
-			TextureSubresourceInfo subresource;
-			if(mip < mipsToCompute)
+			varAU2(dispatchThreadGroupCountXY);
+			varAU2(workGroupOffset); // needed if Left and Top are not 0,0
+			varAU2(numWorkGroupsAndMips);
+			varAU4(rectInfo) = initAU4(0, 0, in.m_dstHzbRtSize.x() * 2, in.m_dstHzbRtSize.y() * 2);
+			SpdSetup(dispatchThreadGroupCountXY, workGroupOffset, numWorkGroupsAndMips, rectInfo, mipsToCompute);
+
+			struct Uniforms
 			{
 			{
-				subresource.m_firstMipmap = mip;
-			}
-			else
+				Vec2 m_invSrcTexSize;
+				U32 m_threadGroupCount;
+				U32 m_mipmapCount;
+			} pc;
+
+			pc.m_invSrcTexSize = 1.0f / Vec2(in.m_dstHzbRtSize * 2);
+			pc.m_threadGroupCount = numWorkGroupsAndMips[0];
+			pc.m_mipmapCount = numWorkGroupsAndMips[1];
+
+			cmdb.setPushConstants(&pc, sizeof(pc));
+
+			for(U32 mip = 0; mip < kMaxSpdMips; ++mip)
 			{
 			{
-				subresource.m_firstMipmap = 0; // Put something random
+				TextureSubresourceInfo subresource;
+				if(mip < mipsToCompute)
+				{
+					subresource.m_firstMipmap = mip;
+				}
+				else
+				{
+					subresource.m_firstMipmap = 0; // Put something random
+				}
+
+				rgraphCtx.bindImage(0, 0, in.m_dstHzbRt, subresource, mip);
 			}
 			}
 
 
-			rgraphCtx.bindImage(0, 0, dstHzbRt, subresource, mip);
-		}
+			cmdb.bindStorageBuffer(0, 1, m_counterBuffer.get(), (firstCounterBufferElement + dispatch) * m_counterBufferElementSize, sizeof(U32));
+			rgraphCtx.bindTexture(0, 2, in.m_srcDepthRt, TextureSubresourceInfo(DepthStencilAspectBit::kDepth));
 
 
-		cmdb.bindStorageBuffer(0, 1, m_counterBuffer.get(), counterBufferElement * m_counterBufferElementSize, sizeof(U32));
-		rgraphCtx.bindTexture(0, 2, srcDepthRt, TextureSubresourceInfo(DepthStencilAspectBit::kDepth));
-		cmdb.bindSampler(0, 3, m_maxSampler.isCreated() ? m_maxSampler.get() : getRenderer().getSamplers().m_trilinearClamp.get());
-
-		cmdb.dispatchCompute(dispatchThreadGroupCountXY[0], dispatchThreadGroupCountXY[1], 1);
+			cmdb.dispatchCompute(dispatchThreadGroupCountXY[0], dispatchThreadGroupCountXY[1], 1);
+		}
 	});
 	});
 }
 }
 
 
 void HzbGenerator::populateRenderGraph(RenderTargetHandle srcDepthRt, UVec2 srcDepthRtSize, RenderTargetHandle dstHzbRt, UVec2 dstHzbRtSize,
 void HzbGenerator::populateRenderGraph(RenderTargetHandle srcDepthRt, UVec2 srcDepthRtSize, RenderTargetHandle dstHzbRt, UVec2 dstHzbRtSize,
 									   RenderGraphDescription& rgraph, CString customName) const
 									   RenderGraphDescription& rgraph, CString customName) const
 {
 {
-	populateRenderGraphInternal(srcDepthRt, srcDepthRtSize, dstHzbRt, dstHzbRtSize, 0, rgraph, customName);
+	DispatchInput in;
+	in.m_dstHzbRt = dstHzbRt;
+	in.m_dstHzbRtSize = dstHzbRtSize;
+	in.m_srcDepthRt = srcDepthRt;
+	in.m_srcDepthRtSize = srcDepthRtSize;
+	populateRenderGraphInternal({&in, 1}, 0, customName, rgraph);
 }
 }
 
 
 void HzbGenerator::populateRenderGraphDirectionalLight(RenderTargetHandle srcDepthRt, UVec2 srcDepthRtSize,
 void HzbGenerator::populateRenderGraphDirectionalLight(RenderTargetHandle srcDepthRt, UVec2 srcDepthRtSize,
@@ -261,10 +281,15 @@ void HzbGenerator::populateRenderGraphDirectionalLight(RenderTargetHandle srcDep
 	}
 	}
 
 
 	// Generate the HZBs
 	// Generate the HZBs
+	Array<DispatchInput, kMaxShadowCascades> inputs;
 	for(U32 i = 0; i < cascadeCount; ++i)
 	for(U32 i = 0; i < cascadeCount; ++i)
 	{
 	{
-		populateRenderGraphInternal(depthRts[i], dstHzbSizes[i] * 2, dstHzbRts[i], dstHzbSizes[i], i + 1, rgraph, "HZB generation cascade");
+		inputs[i].m_dstHzbRt = dstHzbRts[i];
+		inputs[i].m_dstHzbRtSize = dstHzbSizes[i];
+		inputs[i].m_srcDepthRt = depthRts[i];
+		inputs[i].m_srcDepthRtSize = dstHzbSizes[i] * 2;
 	}
 	}
+	populateRenderGraphInternal({&inputs[0], cascadeCount}, 1, "HZB generation shadow cascades", rgraph);
 }
 }
 
 
 } // end namespace anki
 } // end namespace anki

+ 11 - 2
AnKi/Renderer/Utils/HzbGenerator.h

@@ -26,6 +26,15 @@ public:
 											 const Mat4& invViewProjMat, RenderGraphDescription& rgraph) const;
 											 const Mat4& invViewProjMat, RenderGraphDescription& rgraph) const;
 
 
 private:
 private:
+	class DispatchInput
+	{
+	public:
+		RenderTargetHandle m_srcDepthRt;
+		UVec2 m_srcDepthRtSize;
+		RenderTargetHandle m_dstHzbRt;
+		UVec2 m_dstHzbRtSize;
+	};
+
 	static constexpr U32 kMaxSpdMips = 12;
 	static constexpr U32 kMaxSpdMips = 12;
 
 
 	ShaderProgramResourcePtr m_genPyramidProg;
 	ShaderProgramResourcePtr m_genPyramidProg;
@@ -54,8 +63,8 @@ private:
 	mutable U8 m_counterBufferElementUseMask = 0;
 	mutable U8 m_counterBufferElementUseMask = 0;
 #endif
 #endif
 
 
-	void populateRenderGraphInternal(RenderTargetHandle srcDepthRt, UVec2 srcDepthRtSize, RenderTargetHandle dstHzbRt, UVec2 dstHzbRtSize,
-									 U32 counterBufferElement, RenderGraphDescription& rgraph, CString customName) const;
+	void populateRenderGraphInternal(ConstWeakArray<DispatchInput> dispatchInputs, U32 firstCounterBufferElement, CString customName,
+									 RenderGraphDescription& rgraph) const;
 };
 };
 /// @}
 /// @}