فهرست منبع

Remove the spiral from the shadow denoising

Panagiotis Christopoulos Charitos 4 سال پیش
والد
کامیت
0d2280b600

+ 54 - 24
AnKi/Renderer/RtShadows.cpp

@@ -9,6 +9,7 @@
 #include <AnKi/Renderer/ShadowMapping.h>
 #include <AnKi/Renderer/AccelerationStructureBuilder.h>
 #include <AnKi/Renderer/MotionVectors.h>
+#include <AnKi/Renderer/DepthDownscale.h>
 #include <AnKi/Renderer/RenderQueue.h>
 #include <AnKi/Resource/ShaderProgramResourceSystem.h>
 #include <AnKi/Util/Tracer.h>
@@ -59,16 +60,18 @@ Error RtShadows::initInternal(const ConfigSet& cfg)
 	{
 		ANKI_CHECK(getResourceManager().loadResource("Shaders/RtShadowsDenoise.ankiprog", m_denoiseProg));
 		ShaderProgramResourceVariantInitInfo variantInitInfo(m_denoiseProg);
-		variantInitInfo.addConstant("OUT_IMAGE_SIZE", UVec2(m_r->getWidth(), m_r->getHeight()));
-		variantInitInfo.addConstant("SPIRAL_TURN_COUNT", 28u);
-		variantInitInfo.addConstant("MIN_SAMPLE_COUNT", 4u);
+		variantInitInfo.addConstant("OUT_IMAGE_SIZE", UVec2(m_r->getWidth() / 2, m_r->getHeight() / 2));
+		variantInitInfo.addConstant("MIN_SAMPLE_COUNT", 8u);
 		variantInitInfo.addConstant("MAX_SAMPLE_COUNT", 32u);
-		variantInitInfo.addConstant("MIN_PIXEL_RADIUS", 8u);
-		variantInitInfo.addConstant("MAX_PIXEL_RADIUS", 16u);
+		variantInitInfo.addMutation("BLUR_ORIENTATION", 0);
 
 		const ShaderProgramResourceVariant* variant;
 		m_denoiseProg->getOrCreateVariant(variantInitInfo, variant);
-		m_grDenoiseProg = variant->getProgram();
+		m_grDenoiseHorizontalProg = variant->getProgram();
+
+		variantInitInfo.addMutation("BLUR_ORIENTATION", 1);
+		m_denoiseProg->getOrCreateVariant(variantInitInfo, variant);
+		m_grDenoiseVerticalProg = variant->getProgram();
 	}
 
 	// SVGF variance program
@@ -95,7 +98,6 @@ Error RtShadows::initInternal(const ConfigSet& cfg)
 		m_svgfAtrousProg->getOrCreateVariant(variantInitInfo, variant);
 		m_svgfAtrousGrProg = variant->getProgram();
 
-		variantInitInfo.addConstant("FB_SIZE", UVec2(m_r->getWidth(), m_r->getHeight()));
 		variantInitInfo.addMutation("LAST_PASS", 1);
 		m_svgfAtrousProg->getOrCreateVariant(variantInitInfo, variant);
 		m_svgfAtrousLastPassGrProg = variant->getProgram();
@@ -108,7 +110,7 @@ Error RtShadows::initInternal(const ConfigSet& cfg)
 	// Shadow RT
 	{
 		TextureInitInfo texinit =
-			m_r->create2DRenderTargetInitInfo(m_r->getWidth(), m_r->getHeight(), Format::R32G32_UINT,
+			m_r->create2DRenderTargetInitInfo(m_r->getWidth() / 2, m_r->getHeight() / 2, Format::R32G32_UINT,
 											  TextureUsageBit::ALL_SAMPLED | TextureUsageBit::IMAGE_TRACE_RAYS_WRITE
 												  | TextureUsageBit::IMAGE_COMPUTE_WRITE,
 											  "RtShadows");
@@ -198,6 +200,7 @@ void RtShadows::populateRenderGraph(RenderingContext& ctx)
 		}
 
 		m_runCtx.m_intermediateShadowsRts[0] = rgraph.newRenderTarget(m_intermediateShadowsRtDescr);
+		m_runCtx.m_intermediateShadowsRts[1] = rgraph.newRenderTarget(m_intermediateShadowsRtDescr);
 
 		m_runCtx.m_currentMomentsRt = rgraph.importRenderTarget(m_momentsRts[!prevRtIdx], TextureUsageBit::NONE);
 		m_runCtx.m_currentHistoryLengthRt =
@@ -205,8 +208,6 @@ void RtShadows::populateRenderGraph(RenderingContext& ctx)
 
 		if(m_useSvgf)
 		{
-			m_runCtx.m_intermediateShadowsRts[1] = rgraph.newRenderTarget(m_intermediateShadowsRtDescr);
-
 			if(m_atrousPassCount > 1)
 			{
 				m_runCtx.m_varianceRts[0] = rgraph.newRenderTarget(m_varianceRtDescr);
@@ -215,6 +216,10 @@ void RtShadows::populateRenderGraph(RenderingContext& ctx)
 		}
 	}
 
+	const RenderPassDependency depthDependency(m_r->getDepthDownscale().getHiZRt(),
+											   TextureUsageBit::SAMPLED_TRACE_RAYS | TextureUsageBit::SAMPLED_COMPUTE,
+											   HIZ_HALF_DEPTH);
+
 	// RT shadows pass
 	{
 		ComputeRenderPassDescription& rpass = rgraph.newComputeRenderPass("RtShadows");
@@ -228,7 +233,7 @@ void RtShadows::populateRenderGraph(RenderingContext& ctx)
 		rpass.newDependency(
 			RenderPassDependency(m_r->getAccelerationStructureBuilder().getAccelerationStructureHandle(),
 								 AccelerationStructureUsageBit::TRACE_RAYS_READ));
-		rpass.newDependency(RenderPassDependency(m_r->getGBuffer().getDepthRt(), TextureUsageBit::SAMPLED_TRACE_RAYS));
+		rpass.newDependency(depthDependency);
 		rpass.newDependency(
 			RenderPassDependency(m_r->getMotionVectors().getMotionVectorsRt(), TextureUsageBit::SAMPLED_TRACE_RAYS));
 		rpass.newDependency(
@@ -247,10 +252,10 @@ void RtShadows::populateRenderGraph(RenderingContext& ctx)
 		}
 	}
 
-	// Denoise pass
+	// Denoise pass horizontal
 	if(!m_useSvgf)
 	{
-		ComputeRenderPassDescription& rpass = rgraph.newComputeRenderPass("RtShadows Denoise");
+		ComputeRenderPassDescription& rpass = rgraph.newComputeRenderPass("RtShadows Denoise Horizontal");
 		rpass.setWork(
 			[](RenderPassWorkContext& rgraphCtx) {
 				static_cast<RtShadows*>(rgraphCtx.m_userData)->runDenoise(rgraphCtx);
@@ -259,7 +264,28 @@ void RtShadows::populateRenderGraph(RenderingContext& ctx)
 
 		rpass.newDependency(
 			RenderPassDependency(m_runCtx.m_intermediateShadowsRts[0], TextureUsageBit::SAMPLED_COMPUTE));
-		rpass.newDependency(RenderPassDependency(m_r->getGBuffer().getDepthRt(), TextureUsageBit::SAMPLED_COMPUTE));
+		rpass.newDependency(depthDependency);
+		rpass.newDependency(RenderPassDependency(m_r->getGBuffer().getColorRt(2), TextureUsageBit::SAMPLED_COMPUTE));
+		rpass.newDependency(RenderPassDependency(m_runCtx.m_currentMomentsRt, TextureUsageBit::SAMPLED_COMPUTE));
+		rpass.newDependency(RenderPassDependency(m_runCtx.m_currentHistoryLengthRt, TextureUsageBit::SAMPLED_COMPUTE));
+
+		rpass.newDependency(
+			RenderPassDependency(m_runCtx.m_intermediateShadowsRts[1], TextureUsageBit::IMAGE_COMPUTE_WRITE));
+	}
+
+	// Denoise pass vertical
+	if(!m_useSvgf)
+	{
+		ComputeRenderPassDescription& rpass = rgraph.newComputeRenderPass("RtShadows Denoise Vertical");
+		rpass.setWork(
+			[](RenderPassWorkContext& rgraphCtx) {
+				static_cast<RtShadows*>(rgraphCtx.m_userData)->runDenoise(rgraphCtx);
+			},
+			this, 0);
+
+		rpass.newDependency(
+			RenderPassDependency(m_runCtx.m_intermediateShadowsRts[1], TextureUsageBit::SAMPLED_COMPUTE));
+		rpass.newDependency(depthDependency);
 		rpass.newDependency(RenderPassDependency(m_r->getGBuffer().getColorRt(2), TextureUsageBit::SAMPLED_COMPUTE));
 		rpass.newDependency(RenderPassDependency(m_runCtx.m_currentMomentsRt, TextureUsageBit::SAMPLED_COMPUTE));
 		rpass.newDependency(RenderPassDependency(m_runCtx.m_currentHistoryLengthRt, TextureUsageBit::SAMPLED_COMPUTE));
@@ -281,7 +307,7 @@ void RtShadows::populateRenderGraph(RenderingContext& ctx)
 			RenderPassDependency(m_runCtx.m_intermediateShadowsRts[0], TextureUsageBit::SAMPLED_COMPUTE));
 		rpass.newDependency(RenderPassDependency(m_runCtx.m_currentMomentsRt, TextureUsageBit::SAMPLED_COMPUTE));
 		rpass.newDependency(RenderPassDependency(m_runCtx.m_currentHistoryLengthRt, TextureUsageBit::SAMPLED_COMPUTE));
-		rpass.newDependency(RenderPassDependency(m_r->getGBuffer().getDepthRt(), TextureUsageBit::SAMPLED_COMPUTE));
+		rpass.newDependency(depthDependency);
 		rpass.newDependency(RenderPassDependency(m_r->getGBuffer().getColorRt(2), TextureUsageBit::SAMPLED_COMPUTE));
 
 		rpass.newDependency(
@@ -306,7 +332,7 @@ void RtShadows::populateRenderGraph(RenderingContext& ctx)
 				},
 				this, 0);
 
-			rpass.newDependency(RenderPassDependency(m_r->getGBuffer().getDepthRt(), TextureUsageBit::SAMPLED_COMPUTE));
+			rpass.newDependency(depthDependency);
 			rpass.newDependency(
 				RenderPassDependency(m_r->getGBuffer().getColorRt(2), TextureUsageBit::SAMPLED_COMPUTE));
 			rpass.newDependency(
@@ -421,7 +447,7 @@ void RtShadows::run(RenderPassWorkContext& rgraphCtx)
 	rgraphCtx.bindColorTexture(0, 8, m_runCtx.m_historyAndFinalRt);
 	cmdb->bindSampler(0, 9, m_r->getSamplers().m_trilinearClamp);
 	cmdb->bindSampler(0, 10, m_r->getSamplers().m_nearestNearestClamp);
-	rgraphCtx.bindTexture(0, 11, m_r->getGBuffer().getDepthRt(), TextureSubresourceInfo(DepthStencilAspectBit::DEPTH));
+	rgraphCtx.bindColorTexture(0, 11, m_r->getDepthDownscale().getHiZRt());
 	rgraphCtx.bindColorTexture(0, 12, m_r->getMotionVectors().getMotionVectorsRt());
 	rgraphCtx.bindColorTexture(0, 13, m_r->getMotionVectors().getRejectionFactorRt());
 	rgraphCtx.bindColorTexture(0, 14, m_r->getGBuffer().getColorRt(2));
@@ -448,24 +474,28 @@ void RtShadows::runDenoise(RenderPassWorkContext& rgraphCtx)
 {
 	CommandBufferPtr& cmdb = rgraphCtx.m_commandBuffer;
 
-	cmdb->bindShaderProgram(m_grDenoiseProg);
+	cmdb->bindShaderProgram((m_runCtx.m_denoiseOrientation == 0) ? m_grDenoiseHorizontalProg : m_grDenoiseVerticalProg);
 
 	cmdb->bindSampler(0, 0, m_r->getSamplers().m_nearestNearestClamp);
 	cmdb->bindSampler(0, 1, m_r->getSamplers().m_trilinearClamp);
-	rgraphCtx.bindColorTexture(0, 2, m_runCtx.m_intermediateShadowsRts[0]);
-	rgraphCtx.bindTexture(0, 3, m_r->getGBuffer().getDepthRt(), TextureSubresourceInfo(DepthStencilAspectBit::DEPTH));
+	rgraphCtx.bindColorTexture(0, 2, m_runCtx.m_intermediateShadowsRts[m_runCtx.m_denoiseOrientation]);
+	rgraphCtx.bindColorTexture(0, 3, m_r->getDepthDownscale().getHiZRt());
 	rgraphCtx.bindColorTexture(0, 4, m_r->getGBuffer().getColorRt(2));
 	rgraphCtx.bindColorTexture(0, 5, m_runCtx.m_currentMomentsRt);
 	rgraphCtx.bindColorTexture(0, 6, m_runCtx.m_currentHistoryLengthRt);
 
-	rgraphCtx.bindImage(0, 7, m_runCtx.m_historyAndFinalRt);
+	rgraphCtx.bindImage(0, 7,
+						(m_runCtx.m_denoiseOrientation == 0) ? m_runCtx.m_intermediateShadowsRts[1]
+															 : m_runCtx.m_historyAndFinalRt);
 
 	RtShadowsDenoiseUniforms unis;
 	unis.invViewProjMat = m_runCtx.m_ctx->m_matrices.m_invertedViewProjectionJitter;
 	unis.time = F32(m_r->getGlobalTimestamp());
 	cmdb->setPushConstants(&unis, sizeof(unis));
 
-	dispatchPPCompute(cmdb, 8, 8, m_r->getWidth(), m_r->getHeight());
+	dispatchPPCompute(cmdb, 8, 8, m_r->getWidth() / 2, m_r->getHeight() / 2);
+
+	m_runCtx.m_denoiseOrientation = !m_runCtx.m_denoiseOrientation;
 }
 
 void RtShadows::runSvgfVariance(RenderPassWorkContext& rgraphCtx)
@@ -480,7 +510,7 @@ void RtShadows::runSvgfVariance(RenderPassWorkContext& rgraphCtx)
 	rgraphCtx.bindColorTexture(0, 2, m_runCtx.m_intermediateShadowsRts[0]);
 	rgraphCtx.bindColorTexture(0, 3, m_runCtx.m_currentMomentsRt);
 	rgraphCtx.bindColorTexture(0, 4, m_runCtx.m_currentHistoryLengthRt);
-	rgraphCtx.bindTexture(0, 5, m_r->getGBuffer().getDepthRt(), TextureSubresourceInfo(DepthStencilAspectBit::DEPTH));
+	rgraphCtx.bindColorTexture(0, 5, m_r->getDepthDownscale().getHiZRt());
 	rgraphCtx.bindColorTexture(0, 6, m_r->getGBuffer().getColorRt(2));
 
 	rgraphCtx.bindImage(0, 7, m_runCtx.m_intermediateShadowsRts[1]);
@@ -511,7 +541,7 @@ void RtShadows::runSvgfAtrous(RenderPassWorkContext& rgraphCtx)
 	cmdb->bindSampler(0, 0, m_r->getSamplers().m_nearestNearestClamp);
 	cmdb->bindSampler(0, 1, m_r->getSamplers().m_trilinearClamp);
 
-	rgraphCtx.bindTexture(0, 2, m_r->getGBuffer().getDepthRt(), TextureSubresourceInfo(DepthStencilAspectBit::DEPTH));
+	rgraphCtx.bindColorTexture(0, 2, m_r->getDepthDownscale().getHiZRt());
 	rgraphCtx.bindColorTexture(0, 3, m_r->getGBuffer().getColorRt(2));
 	rgraphCtx.bindColorTexture(0, 4, m_runCtx.m_intermediateShadowsRts[readRtIdx]);
 	rgraphCtx.bindColorTexture(0, 5, m_runCtx.m_varianceRts[readRtIdx]);

+ 3 - 1
AnKi/Renderer/RtShadows.h

@@ -72,7 +72,8 @@ public:
 	U32 m_missShaderGroupIdx = MAX_U32;
 
 	ShaderProgramResourcePtr m_denoiseProg;
-	ShaderProgramPtr m_grDenoiseProg;
+	ShaderProgramPtr m_grDenoiseHorizontalProg;
+	ShaderProgramPtr m_grDenoiseVerticalProg;
 
 	ShaderProgramResourcePtr m_svgfVarianceProg;
 	ShaderProgramPtr m_svgfVarianceGrProg;
@@ -115,6 +116,7 @@ public:
 		BitSet<MAX_RT_SHADOW_LAYERS, U8> m_layersWithRejectedHistory = {false};
 
 		U8 m_atrousPassIdx = 0;
+		U8 m_denoiseOrientation = 0;
 	} m_runCtx;
 
 	ANKI_USE_RESULT Error initInternal(const ConfigSet& cfg);

+ 8 - 0
AnKi/Shaders/Functions.glsl

@@ -555,3 +555,11 @@ UVec2 getOptimalGlobalInvocationId8x8Nvidia()
 	return swizzledGlobalId.xy;
 }
 #endif
+
+// Gaussian distrubution function
+F32 gaussianWeight(F32 s, F32 x)
+{
+	F32 p = 1.0 / (s * sqrt(2.0 * PI));
+	p *= exp((x * x) / (-2.0 * s * s));
+	return p;
+}

+ 20 - 27
AnKi/Shaders/RtShadowsDenoise.ankiprog

@@ -3,18 +3,18 @@
 // Code licensed under the BSD License.
 // http://www.anki3d.org/LICENSE
 
+#pragma anki mutator BLUR_ORIENTATION 0 1
+
 #pragma anki start comp
 
 ANKI_SPECIALIZATION_CONSTANT_UVEC2(OUT_IMAGE_SIZE, 0, UVec2(1));
 ANKI_SPECIALIZATION_CONSTANT_U32(MIN_SAMPLE_COUNT, 2, 1);
 ANKI_SPECIALIZATION_CONSTANT_U32(MAX_SAMPLE_COUNT, 3, 1);
-ANKI_SPECIALIZATION_CONSTANT_U32(MIN_PIXEL_RADIUS, 4, 1);
-ANKI_SPECIALIZATION_CONSTANT_U32(MAX_PIXEL_RADIUS, 5, 1);
-ANKI_SPECIALIZATION_CONSTANT_U32(SPIRAL_TURN_COUNT, 6, 1);
 
 #include <AnKi/Shaders/BilateralFilter.glsl>
 #include <AnKi/Shaders/Pack.glsl>
 #include <AnKi/Shaders/RtShadows.glsl>
+#include <AnKi/Shaders/Functions.glsl>
 
 const UVec2 WORKGROUP_SIZE = UVec2(8u, 8u);
 layout(local_size_x = WORKGROUP_SIZE.x, local_size_y = WORKGROUP_SIZE.y, local_size_z = 1) in;
@@ -60,16 +60,6 @@ F32 computeVarianceCenter(Vec2 uv)
 	return abs(sumMoments.y - sumMoments.x * sumMoments.x);
 }
 
-F32 computeShadowsLuma(F32 shadowLayers[MAX_RT_SHADOW_LAYERS])
-{
-	F32 l = 0.0;
-	ANKI_UNROLL for(U32 i = 0; i < MAX_RT_SHADOW_LAYERS; ++i)
-	{
-		l += shadowLayers[i];
-	}
-	return min(1.0, l / 2.0);
-}
-
 void main()
 {
 	// Set UVs
@@ -100,18 +90,15 @@ void main()
 	const F32 historyLength =
 		textureLod(u_historyLengthTex, u_linearAnyClampSampler, uv, 0.0).x * RT_SHADOWS_MAX_HISTORY_LENGTH;
 
-	U32 pixelRadius;
 	U32 sampleCount;
 	if(historyLength < 2.0)
 	{
 		// Worst case
-		pixelRadius = MAX_PIXEL_RADIUS;
 		sampleCount = MAX_SAMPLE_COUNT;
 	}
 	else if(historyLength > 4.0 && varianceCenter < 0.0001)
 	{
 		// Best case
-		pixelRadius = MIN_PIXEL_RADIUS;
 		sampleCount = MIN_SAMPLE_COUNT;
 	}
 	else
@@ -121,31 +108,37 @@ void main()
 		F32 blur = varianceCenter * 100.0;
 		blur = min(1.0, blur);
 
-		const F32 pixelRadiusf = mix(F32(MIN_PIXEL_RADIUS), F32(MAX_PIXEL_RADIUS), blur);
 		const F32 sampleCountf = mix(F32(MIN_SAMPLE_COUNT), F32(MAX_SAMPLE_COUNT), blur);
 
-		pixelRadius = U32(pixelRadiusf);
 		sampleCount = U32(sampleCountf);
 	}
 
+	sampleCount = sampleCount / 2;
+
 	// Sample
-	SpatialBilateralContext ctx =
-		spatialBilateralInit(sampleCount, gl_GlobalInvocationID.xy, pixelRadius, SPIRAL_TURN_COUNT, u_unis.time);
 	F32 weight = 1.0;
 
-	for(U32 i = 0; i < sampleCount; ++i)
+	for(I32 i = -I32(sampleCount); i < I32(sampleCount); ++i)
 	{
-		const IVec2 unormalizedUvs = clamp(IVec2(spatialBilateralIterate(ctx, i)), IVec2(0), IVec2(OUT_IMAGE_SIZE - 1));
-		const Vec2 sampleUv = Vec2(unormalizedUvs) / Vec2(OUT_IMAGE_SIZE);
+		const Vec2 texelSize = 1.0 / Vec2(OUT_IMAGE_SIZE);
+#if BLUR_ORIENTATION == 0
+		const Vec2 sampleUv = Vec2(uv.x + F32(i) * texelSize.x, uv.y);
+#else
+		const Vec2 sampleUv = Vec2(uv.x, uv.y + F32(i) * texelSize.y);
+#endif
 
 		F32 localShadowFactors[MAX_RT_SHADOW_LAYERS];
-		unpackRtShadows(texelFetch(u_inTex, IVec2(unormalizedUvs / 2), 0), localShadowFactors);
+		unpackRtShadows(textureLod(u_inTex, u_nearestAnyClampSampler, sampleUv, 0.0), localShadowFactors);
 
-		const F32 depthTap = texelFetch(u_depthTex, unormalizedUvs, 0).r;
+		const F32 depthTap = textureLod(u_depthTex, u_linearAnyClampSampler, sampleUv, 0.0).r;
 		const Vec3 positionTap = unproject(UV_TO_NDC(sampleUv), depthTap);
-		const Vec3 normalTap = unpackNormalFromGBuffer(texelFetch(u_gbuffer2Tex, unormalizedUvs, 0));
+		const Vec3 normalTap =
+			unpackNormalFromGBuffer(textureLod(u_gbuffer2Tex, u_linearAnyClampSampler, sampleUv, 0.0));
+
+		// F32 w = calculateBilateralWeighPlane(depthCenter, depthTap, 1.0);
+		F32 w = calculateBilateralWeightPlane(positionCenter, normalCenter, positionTap, normalTap, 1.0);
 
-		const F32 w = calculateBilateralWeightPlane(positionCenter, normalCenter, positionTap, normalTap, 1.0);
+		w *= gaussianWeight(0.4, abs(F32(i)) / F32(sampleCount + 1));
 
 		ANKI_UNROLL for(U32 i = 0; i < MAX_RT_SHADOW_LAYERS; ++i)
 		{

+ 1 - 1
AnKi/Shaders/RtShadowsRayGen.ankiprog

@@ -170,7 +170,7 @@ void main()
 	imageStore(u_historyLengthImage, IVec2(gl_LaunchIDEXT.xy), Vec4(historyLength));
 
 	// Compute blend fractor. Use nearest sampler because it's an integer texture
-	const F32 lowestBlendFactor = 0.05;
+	const F32 lowestBlendFactor = 0.1;
 	const F32 stableFrames = 4.0;
 	const F32 lerp = min(1.0, (historyLength * RT_SHADOWS_MAX_HISTORY_LENGTH - 1.0) / stableFrames);
 	const F32 blendFactor = mix(1.0, lowestBlendFactor, lerp);