Browse Source

Finalize the shadows HZBs

Panagiotis Christopoulos Charitos 2 years ago
parent
commit
6bc349e534

+ 6 - 0
AnKi/Renderer/Common.h

@@ -162,6 +162,12 @@ U32 findBestCacheEntry(U64 uuid, Timestamp crntTimestamp, const TCacheEntryArray
 
 
 	return outCacheEntryIdx;
 	return outCacheEntryIdx;
 }
 }
+
+/// Choose the detail of a shadow cascade. 0 means high detail and >0 is progressively lower.
+inline U32 chooseDirectionalLightShadowCascadeDetail(U32 cascade)
+{
+	return (cascade <= 1) ? 0 : 1;
+}
 /// @}
 /// @}
 
 
 } // end namespace anki
 } // end namespace anki

+ 112 - 57
AnKi/Renderer/Hzb.cpp

@@ -51,8 +51,13 @@ Error Hzb::init()
 	m_clearHzb.m_grProg.reset(&variant->getProgram());
 	m_clearHzb.m_grProg.reset(&variant->getProgram());
 
 
 	ANKI_CHECK(ResourceManager::getSingleton().loadResource("ShaderBinaries/HzbGenPyramid.ankiprogbin", m_mipmapping.m_prog));
 	ANKI_CHECK(ResourceManager::getSingleton().loadResource("ShaderBinaries/HzbGenPyramid.ankiprogbin", m_mipmapping.m_prog));
-	m_mipmapping.m_prog->getOrCreateVariant(variant);
-	m_mipmapping.m_grProg.reset(&variant->getProgram());
+	for(U32 i = 0; i < 2; ++i)
+	{
+		ShaderProgramResourceVariantInitInfo variantInit(m_mipmapping.m_prog);
+		variantInit.addMutation("REDUCTION_TYPE", i);
+		m_mipmapping.m_prog->getOrCreateVariant(variantInit, variant);
+		m_mipmapping.m_grProgs[i].reset(&variant->getProgram());
+	}
 
 
 	m_hzbRtDescr = getRenderer().create2DRenderTargetDescription(ConfigSet::getSingleton().getRHzbWidth(), ConfigSet::getSingleton().getRHzbHeight(),
 	m_hzbRtDescr = getRenderer().create2DRenderTargetDescription(ConfigSet::getSingleton().getRHzbWidth(), ConfigSet::getSingleton().getRHzbHeight(),
 																 Format::kR32_Uint, "HZB U32");
 																 Format::kR32_Uint, "HZB U32");
@@ -63,17 +68,35 @@ Error Hzb::init()
 	{
 	{
 		RendererString name;
 		RendererString name;
 		name.sprintf("Shadow HZB U32 #%u", i);
 		name.sprintf("Shadow HZB U32 #%u", i);
-		m_hzbShadowRtDescrs[i] = getRenderer().create2DRenderTargetDescription(
-			ConfigSet::getSingleton().getRHzbShadowSize(), ConfigSet::getSingleton().getRHzbShadowSize(), Format::kR32_Uint, name);
+
+		// Calc the size of the cascade
+		UVec2 smSize(ConfigSet::getSingleton().getRShadowMappingTileResolution());
+		smSize >>= chooseDirectionalLightShadowCascadeDetail(i);
+
+		// No need for the HZB to have the same quality of the cascade
+		const UVec2 hzbSize = smSize >> 1;
+
+		m_hzbShadowRtDescrs[i] = getRenderer().create2DRenderTargetDescription(hzbSize.x(), hzbSize.y(), Format::kR32_Uint, name);
 		m_hzbShadowRtDescrs[i].m_mipmapCount = U8(computeMaxMipmapCount2d(m_hzbShadowRtDescrs[i].m_width, m_hzbShadowRtDescrs[i].m_height, 1));
 		m_hzbShadowRtDescrs[i].m_mipmapCount = U8(computeMaxMipmapCount2d(m_hzbShadowRtDescrs[i].m_width, m_hzbShadowRtDescrs[i].m_height, 1));
 		m_hzbShadowRtDescrs[i].bake();
 		m_hzbShadowRtDescrs[i].bake();
 	}
 	}
 
 
-	BufferInitInfo buffInit("HiZCounterBuffer");
+	BufferInitInfo buffInit("HzbCounterBuffer");
 	buffInit.m_size = sizeof(U32);
 	buffInit.m_size = sizeof(U32);
 	buffInit.m_usage = BufferUsageBit::kStorageComputeWrite | BufferUsageBit::kTransferDestination;
 	buffInit.m_usage = BufferUsageBit::kStorageComputeWrite | BufferUsageBit::kTransferDestination;
 	m_mipmapping.m_counterBuffer = GrManager::getSingleton().newBuffer(buffInit);
 	m_mipmapping.m_counterBuffer = GrManager::getSingleton().newBuffer(buffInit);
 
 
+	for(U32 i = 0; i < kMaxShadowCascades; ++i)
+	{
+		RendererString name;
+		name.sprintf("ShadowHzbCounterBuffer", i);
+
+		BufferInitInfo buffInit(name);
+		buffInit.m_size = sizeof(U32);
+		buffInit.m_usage = BufferUsageBit::kStorageComputeWrite | BufferUsageBit::kTransferDestination;
+		m_mipmapping.m_shadowCounterBuffers[i] = GrManager::getSingleton().newBuffer(buffInit);
+	}
+
 	return Error::kNone;
 	return Error::kNone;
 }
 }
 
 
@@ -90,7 +113,7 @@ void Hzb::populateRenderGraph(RenderingContext& ctx)
 		m_runCtx.m_hzbShadowRts[i] = rgraph.newRenderTarget(m_hzbShadowRtDescrs[i]);
 		m_runCtx.m_hzbShadowRts[i] = rgraph.newRenderTarget(m_hzbShadowRtDescrs[i]);
 	}
 	}
 
 
-	// Clear main RT
+	// Clear primary HZB
 	{
 	{
 		ComputeRenderPassDescription& pass = rgraph.newComputeRenderPass("HZB clear");
 		ComputeRenderPassDescription& pass = rgraph.newComputeRenderPass("HZB clear");
 		pass.newTextureDependency(m_runCtx.m_hzbRt, TextureUsageBit::kImageComputeWrite, firstMipSubresource);
 		pass.newTextureDependency(m_runCtx.m_hzbRt, TextureUsageBit::kImageComputeWrite, firstMipSubresource);
@@ -115,7 +138,7 @@ void Hzb::populateRenderGraph(RenderingContext& ctx)
 		});
 		});
 	}
 	}
 
 
-	// Clear SM RTs
+	// Clear SM HZBs
 	for(U32 i = 0; i < cascadeCount; ++i)
 	for(U32 i = 0; i < cascadeCount; ++i)
 	{
 	{
 		ComputeRenderPassDescription& pass = rgraph.newComputeRenderPass("Shadow HZB clear");
 		ComputeRenderPassDescription& pass = rgraph.newComputeRenderPass("Shadow HZB clear");
@@ -129,14 +152,19 @@ void Hzb::populateRenderGraph(RenderingContext& ctx)
 			TextureSubresourceInfo firstMipSubresource;
 			TextureSubresourceInfo firstMipSubresource;
 			rctx.bindImage(0, 0, m_runCtx.m_hzbShadowRts[i], firstMipSubresource);
 			rctx.bindImage(0, 0, m_runCtx.m_hzbShadowRts[i], firstMipSubresource);
 
 
-			UVec4 clearColor(1u);
+			// See the comments in the class on what this -0 means
+			const F32 negativeZero = -0.0f;
+			U32 negativeZerou;
+			memcpy(&negativeZerou, &negativeZero, sizeof(U32));
+			ANKI_ASSERT(negativeZerou > 0);
+			UVec4 clearColor(negativeZerou);
 			cmdb.setPushConstants(&clearColor, sizeof(clearColor));
 			cmdb.setPushConstants(&clearColor, sizeof(clearColor));
 
 
 			dispatchPPCompute(cmdb, 8, 8, m_hzbShadowRtDescrs[i].m_width, m_hzbShadowRtDescrs[i].m_height);
 			dispatchPPCompute(cmdb, 8, 8, m_hzbShadowRtDescrs[i].m_width, m_hzbShadowRtDescrs[i].m_height);
 		});
 		});
 	}
 	}
 
 
-	// Reproject
+	// Reproject all HZBs
 	{
 	{
 		ComputeRenderPassDescription& pass = rgraph.newComputeRenderPass("HZB reprojection");
 		ComputeRenderPassDescription& pass = rgraph.newComputeRenderPass("HZB reprojection");
 
 
@@ -174,7 +202,7 @@ void Hzb::populateRenderGraph(RenderingContext& ctx)
 		});
 		});
 	}
 	}
 
 
-	// Mipmap
+	// Mipmap primary HZB
 	{
 	{
 		ComputeRenderPassDescription& pass = rgraph.newComputeRenderPass("HZB mip gen");
 		ComputeRenderPassDescription& pass = rgraph.newComputeRenderPass("HZB mip gen");
 
 
@@ -188,66 +216,93 @@ void Hzb::populateRenderGraph(RenderingContext& ctx)
 		}
 		}
 
 
 		pass.setWork([this](RenderPassWorkContext& rgraphCtx) {
 		pass.setWork([this](RenderPassWorkContext& rgraphCtx) {
-			CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
+			runMipmaping(rgraphCtx, m_hzbRtDescr, m_runCtx.m_hzbRt, m_mipmapping.m_counterBufferZeroed, *m_mipmapping.m_counterBuffer,
+						 *m_mipmapping.m_grProgs[0]);
+		});
+	}
 
 
-			TextureSubresourceInfo firstMipSubresource;
-			const U32 mipsToCompute = m_hzbRtDescr.m_mipmapCount - 1;
+	// Mipmap shadow HZBs
+	for(U32 i = 0; i < cascadeCount; ++i)
+	{
+		ComputeRenderPassDescription& pass = rgraph.newComputeRenderPass("Shadow HZB mip gen");
 
 
-			// Zero the counter buffer once before everything else
-			if(!m_mipmapping.m_counterBufferZeroed) [[unlikely]]
-			{
-				m_mipmapping.m_counterBufferZeroed = true;
+		pass.newTextureDependency(m_runCtx.m_hzbShadowRts[i], TextureUsageBit::kSampledCompute, firstMipSubresource);
 
 
-				cmdb.fillBuffer(m_mipmapping.m_counterBuffer.get(), 0, kMaxPtrSize, 0);
+		for(U32 mip = 1; mip < m_hzbShadowRtDescrs[i].m_mipmapCount; ++mip)
+		{
+			TextureSubresourceInfo subresource;
+			subresource.m_firstMipmap = mip;
+			pass.newTextureDependency(m_runCtx.m_hzbShadowRts[i], TextureUsageBit::kImageComputeWrite, subresource);
+		}
 
 
-				const BufferBarrierInfo barrier = {m_mipmapping.m_counterBuffer.get(), BufferUsageBit::kTransferDestination,
-												   BufferUsageBit::kStorageComputeWrite, 0, kMaxPtrSize};
-				cmdb.setPipelineBarrier({}, {&barrier, 1}, {});
-			}
+		pass.setWork([this, i](RenderPassWorkContext& rgraphCtx) {
+			runMipmaping(rgraphCtx, m_hzbShadowRtDescrs[i], m_runCtx.m_hzbShadowRts[i], m_mipmapping.m_shadowCounterBufferZeroed[i],
+						 *m_mipmapping.m_shadowCounterBuffers[i], *m_mipmapping.m_grProgs[1]);
+		});
+	}
+}
 
 
-			cmdb.bindShaderProgram(m_mipmapping.m_grProg.get());
+void Hzb::runMipmaping(RenderPassWorkContext& rgraphCtx, const RenderTargetDescription& rtDescr, RenderTargetHandle rtHandle,
+					   Bool& counterBufferZeroed, Buffer& counterBuffer, ShaderProgram& reductionProgram)
+{
+	CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
 
 
-			varAU2(dispatchThreadGroupCountXY);
-			varAU2(workGroupOffset); // needed if Left and Top are not 0,0
-			varAU2(numWorkGroupsAndMips);
-			varAU4(rectInfo) = initAU4(0, 0, m_hzbRtDescr.m_width, m_hzbRtDescr.m_height);
-			SpdSetup(dispatchThreadGroupCountXY, workGroupOffset, numWorkGroupsAndMips, rectInfo, mipsToCompute);
+	TextureSubresourceInfo firstMipSubresource;
+	const U32 mipsToCompute = rtDescr.m_mipmapCount - 1;
 
 
-			struct Uniforms
-			{
-				U32 m_threadGroupCount;
-				U32 m_mipmapCount;
-				U32 m_padding0;
-				U32 m_padding1;
-			} pc;
+	// Zero the counter buffer once before everything else
+	if(!counterBufferZeroed) [[unlikely]]
+	{
+		counterBufferZeroed = true;
 
 
-			pc.m_threadGroupCount = numWorkGroupsAndMips[0];
-			pc.m_mipmapCount = numWorkGroupsAndMips[1];
+		cmdb.fillBuffer(&counterBuffer, 0, kMaxPtrSize, 0);
 
 
-			cmdb.setPushConstants(&pc, sizeof(pc));
+		const BufferBarrierInfo barrier = {&counterBuffer, BufferUsageBit::kTransferDestination, BufferUsageBit::kStorageComputeWrite, 0,
+										   kMaxPtrSize};
+		cmdb.setPipelineBarrier({}, {&barrier, 1}, {});
+	}
 
 
-			constexpr U32 maxMipsSpdCanProduce = 12;
-			for(U32 mip = 0; mip < maxMipsSpdCanProduce; ++mip)
-			{
-				TextureSubresourceInfo subresource;
-				if(mip < mipsToCompute)
-				{
-					subresource.m_firstMipmap = mip + 1;
-				}
-				else
-				{
-					subresource.m_firstMipmap = 1;
-				}
-
-				rgraphCtx.bindImage(0, 0, m_runCtx.m_hzbRt, subresource, mip);
-			}
+	cmdb.bindShaderProgram(&reductionProgram);
 
 
-			cmdb.bindStorageBuffer(0, 1, m_mipmapping.m_counterBuffer.get(), 0, kMaxPtrSize);
-			rgraphCtx.bindTexture(0, 2, m_runCtx.m_hzbRt, firstMipSubresource);
+	varAU2(dispatchThreadGroupCountXY);
+	varAU2(workGroupOffset); // needed if Left and Top are not 0,0
+	varAU2(numWorkGroupsAndMips);
+	varAU4(rectInfo) = initAU4(0, 0, rtDescr.m_width, rtDescr.m_height);
+	SpdSetup(dispatchThreadGroupCountXY, workGroupOffset, numWorkGroupsAndMips, rectInfo, mipsToCompute);
 
 
-			cmdb.dispatchCompute(dispatchThreadGroupCountXY[0], dispatchThreadGroupCountXY[1], 1);
-		});
+	struct Uniforms
+	{
+		U32 m_threadGroupCount;
+		U32 m_mipmapCount;
+		U32 m_padding0;
+		U32 m_padding1;
+	} pc;
+
+	pc.m_threadGroupCount = numWorkGroupsAndMips[0];
+	pc.m_mipmapCount = numWorkGroupsAndMips[1];
+
+	cmdb.setPushConstants(&pc, sizeof(pc));
+
+	constexpr U32 maxMipsSpdCanProduce = 12;
+	for(U32 mip = 0; mip < maxMipsSpdCanProduce; ++mip)
+	{
+		TextureSubresourceInfo subresource;
+		if(mip < mipsToCompute)
+		{
+			subresource.m_firstMipmap = mip + 1;
+		}
+		else
+		{
+			subresource.m_firstMipmap = 1;
+		}
+
+		rgraphCtx.bindImage(0, 0, rtHandle, subresource, mip);
 	}
 	}
+
+	cmdb.bindStorageBuffer(0, 1, &counterBuffer, 0, kMaxPtrSize);
+	rgraphCtx.bindTexture(0, 2, rtHandle, firstMipSubresource);
+
+	cmdb.dispatchCompute(dispatchThreadGroupCountXY[0], dispatchThreadGroupCountXY[1], 1);
 }
 }
 
 
 } // end namespace anki
 } // end namespace anki

+ 12 - 1
AnKi/Renderer/Hzb.h

@@ -19,6 +19,10 @@ namespace anki {
 /// atomic min on integers and -0.0 > 1.0 > 0.0 if seen as U32. Downscaling also uses min becase we want the farthest value. When testing against the
 /// atomic min on integers and -0.0 > 1.0 > 0.0 if seen as U32. Downscaling also uses min becase we want the farthest value. When testing against the
 /// HZB we only need to bring depth back to normal which is 0.0 for near and 1.0 for far. So it's a plain 1.0-x. So far becomes 1.0, near is 0.0 and
 /// HZB we only need to bring depth back to normal which is 0.0 for near and 1.0 for far. So it's a plain 1.0-x. So far becomes 1.0, near is 0.0 and
 /// untouched becomes 1.0 which transaltes to far and it's what we want.
 /// untouched becomes 1.0 which transaltes to far and it's what we want.
+///
+/// Shadows are a little bit different. Clear clears to -0.0. Reprojection treats 0 as near and 1 as far and chooses the min value because we want to
+/// cull everything behind the shadow. HZB generation reinterprets the values and uses max. Uppon testing we have 0 for near, 1 for far and -0.0 for
+/// clear which is what we want. If nothing reprojected (=clear color) then cull early.
 class Hzb : public RendererObject
 class Hzb : public RendererObject
 {
 {
 public:
 public:
@@ -53,9 +57,13 @@ private:
 	{
 	{
 	public:
 	public:
 		ShaderProgramResourcePtr m_prog;
 		ShaderProgramResourcePtr m_prog;
-		ShaderProgramPtr m_grProg;
+		Array<ShaderProgramPtr, 2> m_grProgs;
+
 		BufferPtr m_counterBuffer;
 		BufferPtr m_counterBuffer;
 		Bool m_counterBufferZeroed = false;
 		Bool m_counterBufferZeroed = false;
+
+		Array<BufferPtr, kMaxShadowCascades> m_shadowCounterBuffers;
+		Array<Bool, kMaxShadowCascades> m_shadowCounterBufferZeroed = {};
 	} m_mipmapping;
 	} m_mipmapping;
 
 
 	class
 	class
@@ -64,6 +72,9 @@ private:
 		RenderTargetHandle m_hzbRt;
 		RenderTargetHandle m_hzbRt;
 		Array<RenderTargetHandle, kMaxShadowCascades> m_hzbShadowRts;
 		Array<RenderTargetHandle, kMaxShadowCascades> m_hzbShadowRts;
 	} m_runCtx;
 	} m_runCtx;
+
+	static void runMipmaping(RenderPassWorkContext& rgraphCtx, const RenderTargetDescription& rtDescr, RenderTargetHandle rtHandle,
+							 Bool& counterBufferZeroed, Buffer& counterBuffer, ShaderProgram& reductionProgram);
 };
 };
 /// @}
 /// @}
 
 

+ 1 - 1
AnKi/Renderer/ShadowMapping.cpp

@@ -307,7 +307,7 @@ void ShadowMapping::processLights(RenderingContext& ctx, U32& threadCountForPass
 				drawcallCounts[activeCascades] = 1; // Doesn't matter
 				drawcallCounts[activeCascades] = 1; // Doesn't matter
 
 
 				// Change the quality per cascade
 				// Change the quality per cascade
-				hierarchies[activeCascades] = (cascade <= 1) ? (kTileAllocHierarchyCount - 1) : (kTileAllocHierarchyCount - 2);
+				hierarchies[activeCascades] = kTileAllocHierarchyCount - 1 - chooseDirectionalLightShadowCascadeDetail(cascade);
 				renderQueueElementsLods[activeCascades] = (cascade == 0) ? 0 : (kMaxLodCount - 1);
 				renderQueueElementsLods[activeCascades] = (cascade == 0) ? 0 : (kMaxLodCount - 1);
 
 
 				++activeCascades;
 				++activeCascades;

+ 7 - 3
AnKi/Shaders/HzbGenPyramid.ankiprog

@@ -3,7 +3,7 @@
 // Code licensed under the BSD License.
 // Code licensed under the BSD License.
 // http://www.anki3d.org/LICENSE
 // http://www.anki3d.org/LICENSE
 
 
-// The 1st reduction is average depth. The next reductions are max depth
+#pragma anki mutator REDUCTION_TYPE 0 1 // 0: min 1: max
 
 
 #pragma anki start comp
 #pragma anki start comp
 #include <AnKi/Shaders/Common.hlsl>
 #include <AnKi/Shaders/Common.hlsl>
@@ -79,8 +79,12 @@ void SpdStoreIntermediate(AU1 x, AU1 y, AF4 value)
 
 
 AF4 SpdReduce4(AF4 v0, AF4 v1, AF4 v2, AF4 v3)
 AF4 SpdReduce4(AF4 v0, AF4 v1, AF4 v2, AF4 v3)
 {
 {
-	const F32 minValue = min(v0.x, min(v1.x, min(v2.x, v3.x)));
-	return AF4(minValue, 0.0, 0.0, 0.0);
+#if REDUCTION_TYPE == 0
+	const F32 value = min(v0.x, min(v1.x, min(v2.x, v3.x)));
+#else
+	const F32 value = max(v0.x, max(v1.x, max(v2.x, v3.x)));
+#endif
+	return AF4(value, 0.0, 0.0, 0.0);
 }
 }
 
 
 #include <ThirdParty/FidelityFX/ffx_spd.h>
 #include <ThirdParty/FidelityFX/ffx_spd.h>

+ 1 - 1
AnKi/Shaders/HzbReprojection.ankiprog

@@ -67,7 +67,7 @@
 
 
 		if(all(texCoordsf >= Vec2(0.0f, 0.0f)) && all(texCoordsf < hzbTexSize))
 		if(all(texCoordsf >= Vec2(0.0f, 0.0f)) && all(texCoordsf < hzbTexSize))
 		{
 		{
-			InterlockedMax(g_shadowsHzbUavTex[i][IVec2(texCoordsf)], asuint(smDepth));
+			InterlockedMin(g_shadowsHzbUavTex[i][IVec2(texCoordsf)], asuint(smDepth));
 		}
 		}
 	}
 	}
 #endif
 #endif