Browse Source

Add HZB reprojection for shadows

Panagiotis Christopoulos Charitos 2 years ago
parent
commit
0a81e77bfd

+ 4 - 3
AnKi/Renderer/ConfigVars.defs.h

@@ -78,6 +78,7 @@ ANKI_CONFIG_VAR_U8(RFsrQuality, 1, 0, 2, "0: Use bilinear, 1: FSR low quality, 2
 ANKI_CONFIG_VAR_U8(RDlssQuality, 2, 0, 3, "0: Disabled, 1: Performance, 2: Balanced, 3: Quality")
 ANKI_CONFIG_VAR_F32(RSharpness, ((ANKI_PLATFORM_MOBILE) ? 0.0f : 0.8f), 0.0f, 1.0f, "Sharpen the image. It's a factor")
 
-// HiZ
-ANKI_CONFIG_VAR_U32(RHiZWidth, 512, 128, 4 * 1024, "HiZ map width")
-ANKI_CONFIG_VAR_U32(RHiZHeight, 256, 128, 4 * 1024, "HiZ map height")
+// HZB
+ANKI_CONFIG_VAR_U32(RHzbWidth, 512, 16, 4 * 1024, "HZB map width")
+ANKI_CONFIG_VAR_U32(RHzbHeight, 256, 16, 4 * 1024, "HZB map height")
+ANKI_CONFIG_VAR_U32(RHzbShadowSize, 128, 16, 4 * 1024, "Shadows HZB map width and height")

+ 78 - 18
AnKi/Renderer/Hzb.cpp

@@ -6,7 +6,9 @@
 #include <AnKi/Renderer/Hzb.h>
 #include <AnKi/Renderer/Renderer.h>
 #include <AnKi/Renderer/GBuffer.h>
+#include <AnKi/Renderer/RenderQueue.h>
 #include <AnKi/Core/ConfigSet.h>
+#include <AnKi/Shaders/Include/MiscRendererTypes.h>
 
 #if ANKI_COMPILER_GCC_COMPATIBLE
 #	pragma GCC diagnostic push
@@ -32,27 +34,41 @@ Error Hzb::init()
 	registerDebugRenderTarget("Hzb");
 
 	ANKI_CHECK(ResourceManager::getSingleton().loadResource("ShaderBinaries/HzbReprojection.ankiprogbin", m_reproj.m_prog));
-
 	const ShaderProgramResourceVariant* variant;
-	m_reproj.m_prog->getOrCreateVariant(variant);
-	m_reproj.m_grProg.reset(&variant->getProgram());
+	ShaderProgramResourceVariantInitInfo variantInit(m_reproj.m_prog);
+	for(U32 i = 0; i < m_reproj.m_grProgs.getSize(); ++i)
+	{
+		variantInit.addMutation("SHADOW_TEXTURE_COUNT", i);
+		m_reproj.m_prog->getOrCreateVariant(variantInit, variant);
+		m_reproj.m_grProgs[i].reset(&variant->getProgram());
+	}
 
 	ANKI_CHECK(ResourceManager::getSingleton().loadResource("ShaderBinaries/ClearTextureCompute.ankiprogbin", m_clearHzb.m_prog));
-	ShaderProgramResourceVariantInitInfo variantInit(m_clearHzb.m_prog);
-	variantInit.addMutation("TEXTURE_DIMENSIONS", 2);
-	variantInit.addMutation("COMPONENT_TYPE", 1);
-	m_clearHzb.m_prog->getOrCreateVariant(variantInit, variant);
+	ShaderProgramResourceVariantInitInfo variantInit2(m_clearHzb.m_prog);
+	variantInit2.addMutation("TEXTURE_DIMENSIONS", 2);
+	variantInit2.addMutation("COMPONENT_TYPE", 1);
+	m_clearHzb.m_prog->getOrCreateVariant(variantInit2, variant);
 	m_clearHzb.m_grProg.reset(&variant->getProgram());
 
 	ANKI_CHECK(ResourceManager::getSingleton().loadResource("ShaderBinaries/HzbGenPyramid.ankiprogbin", m_mipmapping.m_prog));
 	m_mipmapping.m_prog->getOrCreateVariant(variant);
 	m_mipmapping.m_grProg.reset(&variant->getProgram());
 
-	m_hzbRtDescr = getRenderer().create2DRenderTargetDescription(ConfigSet::getSingleton().getRHiZWidth(), ConfigSet::getSingleton().getRHiZHeight(),
-																 Format::kR32_Uint, "Hzb U32");
+	m_hzbRtDescr = getRenderer().create2DRenderTargetDescription(ConfigSet::getSingleton().getRHzbWidth(), ConfigSet::getSingleton().getRHzbHeight(),
+																 Format::kR32_Uint, "HZB U32");
 	m_hzbRtDescr.m_mipmapCount = U8(computeMaxMipmapCount2d(m_hzbRtDescr.m_width, m_hzbRtDescr.m_height, 1));
 	m_hzbRtDescr.bake();
 
+	for(U32 i = 0; i < kMaxShadowCascades; ++i)
+	{
+		RendererString name;
+		name.sprintf("Shadow HZB U32 #%u", i);
+		m_hzbShadowRtDescrs[i] = getRenderer().create2DRenderTargetDescription(
+			ConfigSet::getSingleton().getRHzbShadowSize(), ConfigSet::getSingleton().getRHzbShadowSize(), Format::kR32_Uint, name);
+		m_hzbShadowRtDescrs[i].m_mipmapCount = U8(computeMaxMipmapCount2d(m_hzbShadowRtDescrs[i].m_width, m_hzbShadowRtDescrs[i].m_height, 1));
+		m_hzbShadowRtDescrs[i].bake();
+	}
+
 	BufferInitInfo buffInit("HiZCounterBuffer");
 	buffInit.m_size = sizeof(U32);
 	buffInit.m_usage = BufferUsageBit::kStorageComputeWrite | BufferUsageBit::kTransferDestination;
@@ -64,13 +80,19 @@ Error Hzb::init()
 void Hzb::populateRenderGraph(RenderingContext& ctx)
 {
 	RenderGraphDescription& rgraph = ctx.m_renderGraphDescr;
+	const U32 cascadeCount = ctx.m_renderQueue->m_directionalLight.m_shadowCascadeCount;
+	TextureSubresourceInfo firstMipSubresource;
 
+	// Create RTs
 	m_runCtx.m_hzbRt = rgraph.newRenderTarget(m_hzbRtDescr);
-	TextureSubresourceInfo firstMipSubresource;
+	for(U32 i = 0; i < cascadeCount; ++i)
+	{
+		m_runCtx.m_hzbShadowRts[i] = rgraph.newRenderTarget(m_hzbShadowRtDescrs[i]);
+	}
 
-	// Clear RT
+	// Clear main RT
 	{
-		ComputeRenderPassDescription& pass = rgraph.newComputeRenderPass("Hzb clear");
+		ComputeRenderPassDescription& pass = rgraph.newComputeRenderPass("HZB clear");
 		pass.newTextureDependency(m_runCtx.m_hzbRt, TextureUsageBit::kImageComputeWrite, firstMipSubresource);
 
 		pass.setWork([this](RenderPassWorkContext& rctx) {
@@ -84,34 +106,72 @@ void Hzb::populateRenderGraph(RenderingContext& ctx)
 			UVec4 clearColor(0u);
 			cmdb.setPushConstants(&clearColor, sizeof(clearColor));
 
-			dispatchPPCompute(cmdb, 8, 8, 1, m_hzbRtDescr.m_width, m_hzbRtDescr.m_height, 1);
+			dispatchPPCompute(cmdb, 8, 8, m_hzbRtDescr.m_width, m_hzbRtDescr.m_height);
+		});
+	}
+
+	// Clear SM RTs
+	for(U32 i = 0; i < cascadeCount; ++i)
+	{
+		ComputeRenderPassDescription& pass = rgraph.newComputeRenderPass("Shadow HZB clear");
+		pass.newTextureDependency(m_runCtx.m_hzbShadowRts[i], TextureUsageBit::kImageComputeWrite, firstMipSubresource);
+
+		pass.setWork([this, i](RenderPassWorkContext& rctx) {
+			CommandBuffer& cmdb = *rctx.m_commandBuffer;
+
+			cmdb.bindShaderProgram(m_clearHzb.m_grProg.get());
+
+			TextureSubresourceInfo firstMipSubresource;
+			rctx.bindImage(0, 0, m_runCtx.m_hzbShadowRts[i], firstMipSubresource);
+
+			UVec4 clearColor(1u);
+			cmdb.setPushConstants(&clearColor, sizeof(clearColor));
+
+			dispatchPPCompute(cmdb, 8, 8, m_hzbShadowRtDescrs[i].m_width, m_hzbShadowRtDescrs[i].m_height);
 		});
 	}
 
 	// Reproject
 	{
-		ComputeRenderPassDescription& pass = rgraph.newComputeRenderPass("Hzb reprojection");
+		ComputeRenderPassDescription& pass = rgraph.newComputeRenderPass("HZB reprojection");
+
 		pass.newTextureDependency(m_runCtx.m_hzbRt, TextureUsageBit::kImageComputeWrite, firstMipSubresource);
+		for(U32 i = 0; i < cascadeCount; ++i)
+		{
+			pass.newTextureDependency(m_runCtx.m_hzbShadowRts[i], TextureUsageBit::kImageComputeWrite, firstMipSubresource);
+		}
 		pass.newTextureDependency(getRenderer().getGBuffer().getPreviousFrameDepthRt(), TextureUsageBit::kSampledCompute);
 
 		pass.setWork([this, &ctx](RenderPassWorkContext& rctx) {
+			const U32 cascadeCount = ctx.m_renderQueue->m_directionalLight.m_shadowCascadeCount;
 			CommandBuffer& cmdb = *rctx.m_commandBuffer;
 
-			cmdb.bindShaderProgram(m_reproj.m_grProg.get());
+			cmdb.bindShaderProgram(m_reproj.m_grProgs[cascadeCount].get());
 
 			rctx.bindTexture(0, 0, getRenderer().getGBuffer().getPreviousFrameDepthRt(), TextureSubresourceInfo(DepthStencilAspectBit::kDepth));
 			TextureSubresourceInfo firstMipSubresource;
 			rctx.bindImage(0, 1, m_runCtx.m_hzbRt, firstMipSubresource);
 
-			cmdb.setPushConstants(&ctx.m_matrices.m_reprojection, sizeof(Mat4));
+			HzbUniforms* unis = allocateAndBindUniforms<HzbUniforms*>(sizeof(*unis), cmdb, 0, 3);
+			unis->m_reprojectionMatrix = ctx.m_matrices.m_reprojection;
+			unis->m_invertedViewProjectionMatrix = ctx.m_matrices.m_invertedViewProjection;
+			for(U32 i = 0; i < ctx.m_renderQueue->m_directionalLight.m_shadowCascadeCount; ++i)
+			{
+				unis->m_shadowCascadeViewProjectionMatrices[i] = ctx.m_renderQueue->m_directionalLight.m_viewProjectionMatrices[i];
+			}
+
+			for(U32 i = 0; i < cascadeCount; ++i)
+			{
+				rctx.bindImage(0, 2, m_runCtx.m_hzbShadowRts[i], firstMipSubresource, i);
+			}
 
-			dispatchPPCompute(cmdb, 8, 8, 1, getRenderer().getInternalResolution().x(), getRenderer().getInternalResolution().y(), 1);
+			dispatchPPCompute(cmdb, 8, 8, getRenderer().getInternalResolution().x(), getRenderer().getInternalResolution().y());
 		});
 	}
 
 	// Mipmap
 	{
-		ComputeRenderPassDescription& pass = rgraph.newComputeRenderPass("Hzb mip gen");
+		ComputeRenderPassDescription& pass = rgraph.newComputeRenderPass("HZB mip gen");
 
 		pass.newTextureDependency(m_runCtx.m_hzbRt, TextureUsageBit::kSampledCompute, firstMipSubresource);
 

+ 3 - 1
AnKi/Renderer/Hzb.h

@@ -27,6 +27,7 @@ public:
 
 private:
 	RenderTargetDescription m_hzbRtDescr;
+	Array<RenderTargetDescription, kMaxShadowCascades> m_hzbShadowRtDescrs;
 
 	class
 	{
@@ -39,7 +40,7 @@ private:
 	{
 	public:
 		ShaderProgramResourcePtr m_prog;
-		ShaderProgramPtr m_grProg;
+		Array<ShaderProgramPtr, kMaxShadowCascades + 1> m_grProgs;
 	} m_reproj;
 
 	class
@@ -55,6 +56,7 @@ private:
 	{
 	public:
 		RenderTargetHandle m_hzbRt;
+		Array<RenderTargetHandle, kMaxShadowCascades> m_hzbShadowRts;
 	} m_runCtx;
 };
 /// @}

+ 1 - 0
AnKi/Renderer/RenderQueue.h

@@ -166,6 +166,7 @@ class DirectionalLightQueueElement final
 {
 public:
 	Array<Mat4, kMaxShadowCascades> m_textureMatrices;
+	Array<Mat4, kMaxShadowCascades> m_viewProjectionMatrices;
 	Array<RenderQueue*, kMaxShadowCascades> m_shadowRenderQueues;
 	U64 m_uuid; ///< Zero means that there is no dir light
 	Vec3 m_diffuseColor;

+ 2 - 1
AnKi/Scene/Components/LightComponent.cpp

@@ -340,7 +340,8 @@ void LightComponent::setupDirectionalLightQueueElement(const Frustum& primaryFru
 
 			// Light matrix
 			const Mat4 biasMat4(0.5f, 0.0f, 0.0f, 0.5f, 0.0f, 0.5f, 0.0f, 0.5f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f);
-			el.m_textureMatrices[i] = biasMat4 * cascadeProjMat * cascadeViewMat;
+			el.m_viewProjectionMatrices[i] = cascadeProjMat * cascadeViewMat;
+			el.m_textureMatrices[i] = biasMat4 * el.m_viewProjectionMatrices[i];
 
 			// Fill the frustum with the fixed projection parameters from the fixed projection matrix
 			Plane plane;

+ 7 - 0
AnKi/Shaders/CollisionFunctions.hlsl

@@ -15,6 +15,10 @@ Bool testRayTriangle(Vec3 rayOrigin, Vec3 rayDir, Vec3 v0, Vec3 v1, Vec3 v2, Boo
 	const Vec3 pvec = cross(rayDir, v0v2);
 	const F32 det = dot(v0v1, pvec);
 
+	t = 0.0f;
+	u = 0.0f;
+	v = 0.0f;
+
 	if((backfaceCulling && det < kEpsilonF32) || abs(det) < kEpsilonF32)
 	{
 		return false;
@@ -94,6 +98,9 @@ Bool testRayObb(Vec3 rayOrigin, Vec3 rayDir, Vec3 obbExtend, Mat4 obbTransformIn
 /// https://www.scratchapixel.com/lessons/3d-basic-rendering/minimal-ray-tracer-rendering-simple-shapes/ray-sphere-intersection
 Bool testRaySphere(Vec3 rayOrigin, Vec3 rayDir, Vec3 sphereCenter, F32 sphereRadius, out F32 t0, out F32 t1)
 {
+	t0 = 0.0f;
+	t1 = 0.0f;
+
 	const Vec3 L = sphereCenter - rayOrigin;
 	const F32 tca = dot(L, rayDir);
 	const F32 d2 = dot(L, L) - tca * tca;

+ 40 - 11
AnKi/Shaders/HzbReprojection.ankiprog

@@ -3,14 +3,21 @@
 // Code licensed under the BSD License.
 // http://www.anki3d.org/LICENSE
 
+#pragma anki mutator SHADOW_TEXTURE_COUNT 0 1 2 3 4
+
 #pragma anki start comp
 
 #include <AnKi/Shaders/Common.hlsl>
+#include <AnKi/Shaders/Include/MiscRendererTypes.h>
 
 [[vk::binding(0)]] Texture2D g_inputDepthTex;
-[[vk::binding(1), vk::image_format("r32ui")]] RWTexture2D<U32> g_hiZUavTex;
+[[vk::binding(1), vk::image_format("r32ui")]] RWTexture2D<U32> g_hzbUavTex;
+
+#if SHADOW_TEXTURE_COUNT > 0
+[[vk::binding(2), vk::image_format("r32ui")]] RWTexture2D<U32> g_shadowsHzbUavTex[SHADOW_TEXTURE_COUNT];
+#endif
 
-[[vk::push_constant]] ConstantBuffer<Mat4> g_reprojectionMat;
+[[vk::binding(3)]] ConstantBuffer<HzbUniforms> g_unis;
 
 [numthreads(8, 8, 1)] void main(UVec3 svDispatchThreadId : SV_DISPATCHTHREADID)
 {
@@ -27,20 +34,42 @@
 	const Vec2 ndc = uvToNdc(uv);
 
 	// Reproject
-	const Vec4 v4 = mul(g_reprojectionMat, Vec4(ndc, depth, 1.0));
-	const Vec2 newNdc = v4.xy / v4.w;
-	const Vec2 newUv = ndcToUv(newNdc);
+	Vec4 v4 = mul(g_unis.m_reprojectionMatrix, Vec4(ndc, depth, 1.0));
+	Vec2 newNdc = v4.xy / v4.w;
+	Vec2 newUv = ndcToUv(newNdc);
 
 	// Store
-	Vec2 hiZTexSize;
-	g_hiZUavTex.GetDimensions(hiZTexSize.x, hiZTexSize.y);
-	const Vec2 texCoordsf = newUv * hiZTexSize;
-	if(any(texCoordsf < Vec2(0.0f, 0.0f)) || any(texCoordsf >= hiZTexSize))
+	Vec2 hzbTexSize;
+	g_hzbUavTex.GetDimensions(hzbTexSize.x, hzbTexSize.y);
+	Vec2 texCoordsf = newUv * hzbTexSize;
+	if(all(texCoordsf >= Vec2(0.0f, 0.0f)) && all(texCoordsf < hzbTexSize))
 	{
-		return;
+		InterlockedMax(g_hzbUavTex[IVec2(texCoordsf)], asuint(depth));
 	}
 
-	InterlockedMax(g_hiZUavTex[IVec2(texCoordsf)], asuint(depth));
+	// Now do the same for the shadow cascades
+#if SHADOW_TEXTURE_COUNT > 0
+	v4 = mul(g_unis.m_invertedViewProjectionMatrix, Vec4(ndc, depth, 1.0));
+	const Vec3 worldPos = v4.xyz / v4.w;
+
+	for(U32 i = 0; i < SHADOW_TEXTURE_COUNT; ++i)
+	{
+		v4 = mul(g_unis.m_shadowCascadeViewProjectionMatrices[i], Vec4(worldPos, 1.0f));
+
+		newNdc = v4.xy / v4.w;
+		newUv = ndcToUv(newNdc);
+
+		const F32 smDepth = saturate(v4.z / v4.w);
+
+		g_shadowsHzbUavTex[i].GetDimensions(hzbTexSize.x, hzbTexSize.y);
+		texCoordsf = newUv * hzbTexSize;
+
+		if(all(texCoordsf >= Vec2(0.0f, 0.0f)) && all(texCoordsf < hzbTexSize))
+		{
+			InterlockedMax(g_shadowsHzbUavTex[i][IVec2(texCoordsf)], asuint(smDepth));
+		}
+	}
+#endif
 }
 
 #pragma anki end

+ 8 - 0
AnKi/Shaders/Include/MiscRendererTypes.h

@@ -162,4 +162,12 @@ struct GpuVisibilityUniforms
 	Mat4 m_viewProjectionMat;
 };
 
+struct HzbUniforms
+{
+	Mat4 m_reprojectionMatrix; ///< For the main camera.
+	Mat4 m_invertedViewProjectionMatrix; ///< NDC to world for the main camera.
+	Mat4 m_projectionMatrix;
+	Mat4 m_shadowCascadeViewProjectionMatrices[kMaxShadowCascades];
+};
+
 ANKI_END_NAMESPACE

BIN
ThirdParty/Bin/Windows64/dxc.exe


BIN
ThirdParty/Bin/Windows64/dxcompiler.dll