Browse Source

Add denoising

Panagiotis Christopoulos Charitos 4 years ago
parent
commit
279a552ab6

+ 6 - 1
AnKi/Renderer/ConfigDefs.h

@@ -21,7 +21,12 @@ ANKI_CONFIG_OPTION(r_ssrDepthLod, 2, 0, 1000)
 
 ANKI_CONFIG_OPTION(r_ssgiMaxSteps, 32, 1, 2048)
 ANKI_CONFIG_OPTION(r_ssgiDepthLod, 2, 0, 1000)
-ANKI_CONFIG_OPTION(r_ssgiStepIncrement, 32, 1, 512)
+
+ANKI_CONFIG_OPTION(r_indirectDiffuseSsgiMaxSteps, 32, 1, 2048)
+ANKI_CONFIG_OPTION(r_indirectDiffuseSsgiDepthLod, 2, 0, 1000)
+ANKI_CONFIG_OPTION(r_indirectDiffuseSsgiStepIncrement, 32, 1, 512)
+ANKI_CONFIG_OPTION(r_indirectDiffuseDenoiseMinSampleCount, 8, 1, 128)
+ANKI_CONFIG_OPTION(r_indirectDiffuseDenoiseMaxSampleCount, 32, 1, 128)
 
 ANKI_CONFIG_OPTION(r_shadowMappingTileResolution, 128, 16, 2048)
 ANKI_CONFIG_OPTION(r_shadowMappingTileCountPerRowOrColumn, 16, 1, 256)

+ 57 - 4
AnKi/Renderer/IndirectDiffuse.cpp

@@ -54,9 +54,10 @@ Error IndirectDiffuse::initInternal(const ConfigSet& cfg)
 
 	// Init SSGI+probes pass
 	{
-		m_main.m_maxSteps = cfg.getNumberU32("r_ssgiMaxSteps");
-		m_main.m_depthLod = min(cfg.getNumberU32("r_ssgiDepthLod"), m_r->getDepthDownscale().getMipmapCount() - 1);
-		m_main.m_stepIncrement = cfg.getNumberU32("r_ssgiStepIncrement");
+		m_main.m_maxSteps = cfg.getNumberU32("r_indirectDiffuseSsgiMaxSteps");
+		m_main.m_depthLod =
+			min(cfg.getNumberU32("r_indirectDiffuseSsgiDepthLod"), m_r->getDepthDownscale().getMipmapCount() - 1);
+		m_main.m_stepIncrement = cfg.getNumberU32("r_indirectDiffuseSsgiStepIncrement");
 
 		ANKI_CHECK(getResourceManager().loadResource("Shaders/IndirectDiffuse.ankiprog", m_main.m_prog));
 
@@ -67,9 +68,13 @@ Error IndirectDiffuse::initInternal(const ConfigSet& cfg)
 
 	// Init denoise
 	{
+		m_denoise.m_minSampleCount = F32(cfg.getNumberU32("r_indirectDiffuseDenoiseMinSampleCount"));
+		m_denoise.m_maxSampleCount =
+			max(F32(cfg.getNumberU32("r_indirectDiffuseDenoiseMaxSampleCount")), m_denoise.m_minSampleCount);
+
 		ANKI_CHECK(getResourceManager().loadResource("Shaders/IndirectDiffuseDenoise.ankiprog", m_denoise.m_prog));
 
-		ShaderProgramResourceVariantInitInfo variantInit;
+		ShaderProgramResourceVariantInitInfo variantInit(m_denoise.m_prog);
 		variantInit.addMutation("BLUR_ORIENTATION", 0);
 		const ShaderProgramResourceVariant* variant;
 		m_denoise.m_prog->getOrCreateVariant(variantInit, variant);
@@ -169,6 +174,54 @@ void IndirectDiffuse::populateRenderGraph(RenderingContext& ctx)
 			dispatchPPCompute(cmdb, 8, 8, m_r->getInternalResolution().x() / 2, m_r->getInternalResolution().y() / 2);
 		});
 	}
+
+	// Denoise
+	for(U32 dir = 0; dir < 2; ++dir)
+	{
+		ComputeRenderPassDescription& rpass =
+			rgraph.newComputeRenderPass((dir == 0) ? "IndirectDiffuseDenoiseH" : "IndirectDiffuseDenoiseV");
+
+		const TextureUsageBit readUsage = TextureUsageBit::SAMPLED_COMPUTE;
+		const U32 readIdx = (dir == 0) ? WRITE : READ;
+
+		rpass.newDependency(RenderPassDependency(m_runCtx.m_mainRtHandles[readIdx], readUsage));
+
+		TextureSubresourceInfo hizSubresource;
+		hizSubresource.m_firstMipmap = 0;
+		rpass.newDependency(RenderPassDependency(m_r->getDepthDownscale().getHiZRt(), readUsage, hizSubresource));
+
+		rpass.newDependency(RenderPassDependency(m_r->getGBuffer().getColorRt(2), readUsage));
+		rpass.newDependency(RenderPassDependency(m_runCtx.m_momentsAndHistoryLengthHandles[readIdx], readUsage));
+
+		rpass.newDependency(
+			RenderPassDependency(m_runCtx.m_mainRtHandles[!readIdx], TextureUsageBit::IMAGE_COMPUTE_WRITE));
+
+		rpass.setWork([this, &ctx, dir, readIdx](RenderPassWorkContext& rgraphCtx) {
+			CommandBufferPtr& cmdb = rgraphCtx.m_commandBuffer;
+			cmdb->bindShaderProgram(m_denoise.m_grProgs[dir]);
+
+			cmdb->bindSampler(0, 0, m_r->getSamplers().m_trilinearClamp);
+			rgraphCtx.bindColorTexture(0, 1, m_runCtx.m_mainRtHandles[readIdx]);
+			TextureSubresourceInfo hizSubresource;
+			hizSubresource.m_firstMipmap = 0;
+			rgraphCtx.bindTexture(0, 2, m_r->getDepthDownscale().getHiZRt(), hizSubresource);
+			rgraphCtx.bindColorTexture(0, 3, m_r->getGBuffer().getColorRt(2));
+			rgraphCtx.bindColorTexture(0, 4, m_runCtx.m_momentsAndHistoryLengthHandles[readIdx]);
+			rgraphCtx.bindImage(0, 5, m_runCtx.m_mainRtHandles[!readIdx]);
+
+			IndirectDiffuseDenoiseUniforms unis;
+			unis.m_invertedViewProjectionJitterMat = ctx.m_matrices.m_invertedViewProjectionJitter;
+			unis.m_viewportSize = m_r->getInternalResolution() / 2u;
+			unis.m_viewportSizef = Vec2(unis.m_viewportSize);
+			unis.m_minSampleCount = m_denoise.m_minSampleCount;
+			unis.m_maxSampleCount = m_denoise.m_maxSampleCount;
+
+			cmdb->setPushConstants(&unis, sizeof(unis));
+
+			// Dispatch
+			dispatchPPCompute(cmdb, 8, 8, m_r->getInternalResolution().x() / 2, m_r->getInternalResolution().y() / 2);
+		});
+	}
 }
 
 } // end namespace anki

+ 2 - 0
AnKi/Renderer/IndirectDiffuse.h

@@ -61,6 +61,8 @@ private:
 	public:
 		ShaderProgramResourcePtr m_prog;
 		Array<ShaderProgramPtr, 2> m_grProgs;
+		F32 m_minSampleCount = 1.0f;
+		F32 m_maxSampleCount = 1.0f;
 	} m_denoise;
 
 	class

+ 3 - 3
AnKi/Shaders/Include/IndirectDiffuseTypes.h

@@ -23,9 +23,9 @@ struct IndirectDiffuseDenoiseUniforms
 	Mat4 m_invertedViewProjectionJitterMat;
 	UVec2 m_viewportSize;
 	Vec2 m_viewportSizef;
-	U32 m_minSampleCount;
-	U32 m_maxSampleCount;
-	U32 m_padding[2u];
+	F32 m_minSampleCount;
+	F32 m_maxSampleCount;
+	F32 m_padding[2u];
 };
 
 ANKI_END_NAMESPACE

+ 6 - 6
AnKi/Shaders/IndirectDiffuse.ankiprog

@@ -34,7 +34,7 @@ layout(set = 0, binding = 8) uniform texture2D u_depthRt;
 layout(set = 0, binding = 9) uniform texture2D u_lightBufferRt;
 layout(set = 0, binding = 10) uniform texture2D u_historyTex;
 layout(set = 0, binding = 11) uniform texture2D u_motionVectorsTex;
-layout(set = 0, binding = 12) uniform texture2D u_motionVectorRejectionTex;
+layout(set = 0, binding = 12) uniform texture2D u_motionVectorsRejectionTex;
 layout(set = 0, binding = 13) uniform texture2D u_prevMomentsAndHistoryLengthTex;
 
 layout(push_constant, std430) uniform b_pc
@@ -183,10 +183,10 @@ void main()
 	}
 
 	// Compute history length
-	const Vec2 historyUv = uv + textureLod(u_motionVectorsRt, u_linearAnyClampSampler, uv, 0.0).xy;
-	const F32 historyRejectionFactor = textureLod(u_motionVectorsRejectionRt, u_linearAnyClampSampler, uv, 0.0).x;
+	const Vec2 historyUv = uv + textureLod(u_motionVectorsTex, u_trilinearClampSampler, uv, 0.0).xy;
+	const F32 historyRejectionFactor = textureLod(u_motionVectorsRejectionTex, u_trilinearClampSampler, uv, 0.0).x;
 	const Vec3 prevMomentsAndHistoryLength =
-		textureLod(u_momentsAndHistoryLengthTex, u_linearAnyClampSampler, historyUv, 0.0).xyz;
+		textureLod(u_prevMomentsAndHistoryLengthTex, u_trilinearClampSampler, historyUv, 0.0).xyz;
 	F32 historyLength;
 	if(historyRejectionFactor >= 0.5)
 	{
@@ -196,7 +196,7 @@ void main()
 	else
 	{
 		// Sample seems stable, increment its temporal history
-		historyLength = prevMomentsAndHistoryLength.z + 1.0;
+		historyLength = min(prevMomentsAndHistoryLength.z + 1.0, 128.0);
 	}
 
 	// Blend color with history
@@ -221,7 +221,7 @@ void main()
 	moments.y = moments.x * moments.x;
 
 	// Blend the moments with history
-	const F32 momentsBlendFactor = 0.2;
+	F32 momentsBlendFactor = 0.2;
 	momentsBlendFactor = mix(momentsBlendFactor, 1.0, historyRejectionFactor);
 	moments = mix(prevMomentsAndHistoryLength.xy, moments, momentsBlendFactor);
 

+ 109 - 4
AnKi/Shaders/IndirectDiffuseDenoise.ankiprog

@@ -3,11 +3,14 @@
 // Code licensed under the BSD License.
 // http://www.anki3d.org/LICENSE
 
-#pragma anki mutator BLUR_ORIENTATION 0 1
+#pragma anki mutator BLUR_ORIENTATION 0 1 // 0: in X asix, 1: in Y axis
 
 #pragma anki start comp
 
 #include <AnKi/Shaders/Include/IndirectDiffuseTypes.h>
+#include <AnKi/Shaders/PackFunctions.glsl>
+#include <AnKi/Shaders/Functions.glsl>
+#include <AnKi/Shaders/BilateralFilter.glsl>
 
 const UVec2 WORKGROUP_SIZE = UVec2(8u, 8u);
 layout(local_size_x = WORKGROUP_SIZE.x, local_size_y = WORKGROUP_SIZE.y) in;
@@ -17,15 +20,117 @@ layout(set = 0, binding = 1) uniform texture2D u_toDenoiseTex;
 layout(set = 0, binding = 2) uniform texture2D u_depthTex;
 layout(set = 0, binding = 3) uniform texture2D u_gbuffer2Tex;
 layout(set = 0, binding = 4) uniform texture2D u_momentsAndHistoryLengthTex;
-layout(set = 0, binding = 5) writeonly uniform uimage2D u_outImg;
+layout(set = 0, binding = 5) writeonly uniform image2D u_outImg;
 
-layout(push_constant, std140, row_major) uniform b_pc
+layout(push_constant, std430, row_major) uniform b_pc
 {
-	IndirectDiffuseDenoiseUniforms m_unis;
+	IndirectDiffuseDenoiseUniforms u_unis;
 };
 
+Vec3 unproject(Vec2 ndc, F32 depth)
+{
+	const Vec4 worldPos4 = u_unis.m_invertedViewProjectionJitterMat * Vec4(ndc, depth, 1.0);
+	const Vec3 worldPos = worldPos4.xyz / worldPos4.w;
+	return worldPos;
+}
+
+F32 computeSpatialVariance(Vec2 uv)
+{
+	const F32 kernel[2][2] = {{1.0 / 4.0, 1.0 / 8.0}, {1.0 / 8.0, 1.0 / 16.0}};
+	const I32 radius = 1;
+	const Vec2 texelSize = 1.0 / u_unis.m_viewportSizef;
+	Vec2 sumMoments = Vec2(0.0);
+
+	for(I32 yy = -radius; yy <= radius; yy++)
+	{
+		for(I32 xx = -radius; xx <= radius; xx++)
+		{
+			const Vec2 newUv = uv + Vec2(xx, yy) * texelSize;
+			const F32 k = kernel[abs(xx)][abs(yy)];
+			sumMoments += textureLod(u_momentsAndHistoryLengthTex, u_linearAnyClampSampler, newUv, 0.0).xy * k;
+		}
+	}
+
+	return abs(sumMoments.y - sumMoments.x * sumMoments.x);
+}
+
 void main()
 {
+	if(skipOutOfBoundsInvocations(WORKGROUP_SIZE, u_unis.m_viewportSize))
+	{
+		return;
+	}
+
+	const Vec2 uv = (Vec2(gl_GlobalInvocationID.xy) + 0.5) / u_unis.m_viewportSizef;
+
+	// Reference
+	const F32 depthCenter = textureLod(u_depthTex, u_linearAnyClampSampler, uv, 0.0).r;
+	if(depthCenter == 1.0)
+	{
+		imageStore(u_outImg, IVec2(gl_GlobalInvocationID.xy), Vec4(0.0));
+		return;
+	}
+
+	const Vec3 positionCenter = unproject(UV_TO_NDC(uv), depthCenter);
+	const Vec3 normalCenter = readNormalFromGBuffer(u_gbuffer2Tex, u_linearAnyClampSampler, uv);
+
+	// Decide the amount of blurring
+	const F32 varianceCenter = computeSpatialVariance(uv);
+	const F32 historyLength = textureLod(u_momentsAndHistoryLengthTex, u_linearAnyClampSampler, uv, 0.0).z;
+
+	F32 sampleCount;
+	if(historyLength < 2.0)
+	{
+		// Worst case
+		sampleCount = u_unis.m_maxSampleCount;
+	}
+	else if(historyLength > 4.0 && varianceCenter < 0.0001)
+	{
+		// Best case
+		sampleCount = u_unis.m_minSampleCount;
+	}
+	else
+	{
+		// Every other case
+
+		F32 blur = varianceCenter * 100.0;
+		blur = min(1.0, blur);
+
+		sampleCount = mix(u_unis.m_minSampleCount, u_unis.m_maxSampleCount, blur);
+	}
+
+	sampleCount = 32.0;
+	sampleCount = sampleCount / 2.0;
+	sampleCount = floor(sampleCount);
+
+	// Sample
+	F32 weight = EPSILON;
+	Vec3 color = Vec3(0.0);
+
+	for(F32 i = -sampleCount; i <= sampleCount; i += 1.0)
+	{
+		const Vec2 texelSize = 1.0 / u_unis.m_viewportSizef;
+#if BLUR_ORIENTATION == 0
+		const Vec2 sampleUv = Vec2(uv.x + i * texelSize.x, uv.y);
+#else
+		const Vec2 sampleUv = Vec2(uv.x, uv.y + i * texelSize.y);
+#endif
+
+		const F32 depthTap = textureLod(u_depthTex, u_linearAnyClampSampler, sampleUv, 0.0).r;
+		const Vec3 positionTap = unproject(UV_TO_NDC(sampleUv), depthTap);
+		const Vec3 normalTap =
+			unpackNormalFromGBuffer(textureLod(u_gbuffer2Tex, u_linearAnyClampSampler, sampleUv, 0.0));
+
+		F32 w = calculateBilateralWeightPlane(positionCenter, normalCenter, positionTap, normalTap, 1.0);
+		// w *= gaussianWeight(0.4, abs(F32(i)) / (sampleCount + 1.0));
+		weight += w;
+
+		color += textureLod(u_toDenoiseTex, u_linearAnyClampSampler, sampleUv, 0.0).xyz * w;
+	}
+
+	// Normalize and store
+	color /= weight;
+	imageStore(u_outImg, IVec2(gl_GlobalInvocationID.xy), Vec4(color, 0.0));
 }
 
 #pragma anki end