Browse Source

Add some more SVGF code

Panagiotis Christopoulos Charitos 4 years ago
parent
commit
987cabe2db

+ 89 - 22
AnKi/Renderer/RtShadows.cpp

@@ -33,10 +33,10 @@ Error RtShadows::init(const ConfigSet& cfg)
 
 Error RtShadows::initInternal(const ConfigSet& cfg)
 {
-	// Ray gen prog
+	// Ray gen program
 	ANKI_CHECK(getResourceManager().loadResource("Shaders/RtShadowsRayGen.ankiprog", m_rayGenProg));
 	ShaderProgramResourceVariantInitInfo variantInitInfo(m_rayGenProg);
-	variantInitInfo.addMutation("SVGF", 0);
+	variantInitInfo.addMutation("SVGF", m_useSvgf);
 	const ShaderProgramResourceVariant* variant;
 	m_rayGenProg->getOrCreateVariant(variantInitInfo, variant);
 	m_rtLibraryGrProg = variant->getProgram();
@@ -48,33 +48,100 @@ Error RtShadows::initInternal(const ConfigSet& cfg)
 	m_missShaderGroupIdx = variant->getShaderGroupHandleIndex();
 
 	// Denoise program
-	ANKI_CHECK(getResourceManager().loadResource("Shaders/RtShadowsDenoise.ankiprog", m_denoiseProg));
-	ShaderProgramResourceVariantInitInfo variantInitInfo2(m_denoiseProg);
-	variantInitInfo2.addConstant("OUT_IMAGE_SIZE", UVec2(m_r->getWidth(), m_r->getHeight()));
-	variantInitInfo2.addConstant("SAMPLE_COUNT", 8u);
-	variantInitInfo2.addConstant("SPIRAL_TURN_COUNT", 27u);
-	variantInitInfo2.addConstant("PIXEL_RADIUS", 12u);
+	if(!m_useSvgf)
+	{
+		ANKI_CHECK(getResourceManager().loadResource("Shaders/RtShadowsDenoise.ankiprog", m_denoiseProg));
+		ShaderProgramResourceVariantInitInfo variantInitInfo(m_denoiseProg);
+		variantInitInfo.addConstant("OUT_IMAGE_SIZE", UVec2(m_r->getWidth(), m_r->getHeight()));
+		variantInitInfo.addConstant("SAMPLE_COUNT", 8u);
+		variantInitInfo.addConstant("SPIRAL_TURN_COUNT", 27u);
+		variantInitInfo.addConstant("PIXEL_RADIUS", 12u);
+
+		m_denoiseProg->getOrCreateVariant(variantInitInfo, variant);
+		m_grDenoiseProg = variant->getProgram();
+	}
 
-	m_denoiseProg->getOrCreateVariant(variantInitInfo2, variant);
-	m_grDenoiseProg = variant->getProgram();
+	// SVGF variance program
+	if(m_useSvgf)
+	{
+		ANKI_CHECK(getResourceManager().loadResource("Shaders/RtShadowsSvgfVariance.ankiprog", m_svgfVarianceProg));
+		ShaderProgramResourceVariantInitInfo variantInitInfo(m_svgfVarianceProg);
+		variantInitInfo.addConstant("FB_SIZE", UVec2(m_r->getWidth() / 2, m_r->getHeight() / 2));
 
-	// RTs
-	TextureInitInfo texinit = m_r->create2DRenderTargetInitInfo(
-		m_r->getWidth(), m_r->getHeight(), Format::R32G32_UINT,
-		TextureUsageBit::ALL_SAMPLED | TextureUsageBit::IMAGE_TRACE_RAYS_WRITE | TextureUsageBit::IMAGE_COMPUTE_WRITE,
-		"RtShadows");
-	texinit.m_initialUsage = TextureUsageBit::SAMPLED_FRAGMENT;
-	m_historyAndFinalRt = m_r->createAndClearRenderTarget(texinit);
+		m_svgfVarianceProg->getOrCreateVariant(variantInitInfo, variant);
+		m_svgfVarianceGrProg = variant->getProgram();
+	}
 
-	m_renderRt = m_r->create2DRenderTargetDescription(m_r->getWidth() / 2, m_r->getHeight() / 2, Format::R32G32_UINT,
-													  "RtShadowsTmp");
-	m_renderRt.bake();
+	// SVGF atrous program
+	if(m_useSvgf)
+	{
+		ANKI_CHECK(getResourceManager().loadResource("Shaders/RtShadowsSvgfAtrous.ankiprog", m_svgfAtrousProg));
+		ShaderProgramResourceVariantInitInfo variantInitInfo(m_svgfAtrousProg);
+		variantInitInfo.addConstant("FB_SIZE", UVec2(m_r->getWidth() / 2, m_r->getHeight() / 2));
 
-	// Misc
-	m_sbtRecordSize = getAlignedRoundUp(getGrManager().getDeviceCapabilities().m_sbtRecordAlignment, m_sbtRecordSize);
+		m_svgfAtrousProg->getOrCreateVariant(variantInitInfo, variant);
+		m_svgfAtrousGrProg = variant->getProgram();
+	}
+
+	// Debug program
 	ANKI_CHECK(getResourceManager().loadResource("Shaders/RtShadowsVisualizeRenderTarget.ankiprog",
 												 m_visualizeRenderTargetsProg));
 
+	// Shadow RT
+	{
+		TextureInitInfo texinit =
+			m_r->create2DRenderTargetInitInfo(m_r->getWidth(), m_r->getHeight(), Format::R32G32_UINT,
+											  TextureUsageBit::ALL_SAMPLED | TextureUsageBit::IMAGE_TRACE_RAYS_WRITE
+												  | TextureUsageBit::IMAGE_COMPUTE_WRITE,
+											  "RtShadows");
+		texinit.m_initialUsage = TextureUsageBit::SAMPLED_FRAGMENT;
+		m_historyAndFinalRt = m_r->createAndClearRenderTarget(texinit);
+	}
+
+	// Render RT
+	{
+		m_renderRt = m_r->create2DRenderTargetDescription(m_r->getWidth() / 2, m_r->getHeight() / 2,
+														  Format::R32G32_UINT, "RtShadows Tmp");
+		m_renderRt.bake();
+	}
+
+	// Moments RT
+	if(m_useSvgf)
+	{
+		TextureInitInfo texinit =
+			m_r->create2DRenderTargetInitInfo(m_r->getWidth() / 2, m_r->getHeight() / 2, Format::R32G32_SFLOAT,
+											  TextureUsageBit::ALL_SAMPLED | TextureUsageBit::IMAGE_TRACE_RAYS_WRITE
+												  | TextureUsageBit::IMAGE_COMPUTE_WRITE,
+											  "RtShadows Moments");
+		texinit.m_initialUsage = TextureUsageBit::SAMPLED_FRAGMENT;
+		m_momentsRts[0] = m_r->createAndClearRenderTarget(texinit);
+		m_momentsRts[1] = m_r->createAndClearRenderTarget(texinit);
+	}
+
+	// History len RT
+	if(m_useSvgf)
+	{
+		TextureInitInfo texinit =
+			m_r->create2DRenderTargetInitInfo(m_r->getWidth() / 2, m_r->getHeight() / 2, Format::R8_UNORM,
+											  TextureUsageBit::ALL_SAMPLED | TextureUsageBit::IMAGE_TRACE_RAYS_WRITE
+												  | TextureUsageBit::IMAGE_COMPUTE_WRITE,
+											  "RtShadows History Length");
+		texinit.m_initialUsage = TextureUsageBit::SAMPLED_FRAGMENT;
+		m_historyLengthRts[0] = m_r->createAndClearRenderTarget(texinit);
+		m_historyLengthRts[1] = m_r->createAndClearRenderTarget(texinit);
+	}
+
+	// Variance RT
+	if(m_useSvgf)
+	{
+		m_varianceRt = m_r->create2DRenderTargetDescription(m_r->getWidth() / 2, m_r->getHeight() / 2,
+															Format::R32_SFLOAT, "RtShadows Variance");
+		m_varianceRt.bake();
+	}
+
+	// Misc
+	m_sbtRecordSize = getAlignedRoundUp(getGrManager().getDeviceCapabilities().m_sbtRecordAlignment, m_sbtRecordSize);
+
 	return Error::NONE;
 }
 

+ 20 - 3
AnKi/Renderer/RtShadows.h

@@ -51,9 +51,19 @@ public:
 		U64 m_frameLastUsed = MAX_U64;
 	};
 
+	/// @name Render targets
+	/// @{
 	TexturePtr m_historyAndFinalRt;
 	RenderTargetDescription m_renderRt;
 
+	Array<TexturePtr, 2> m_momentsRts;
+	Array<TexturePtr, 2> m_historyLengthRts;
+
+	RenderTargetDescription m_varianceRt;
+	/// @}
+
+	/// @name Programs
+	/// @{
 	ShaderProgramResourcePtr m_rayGenProg;
 	ShaderProgramPtr m_rtLibraryGrProg;
 	U32 m_rayGenShaderGroupIdx = MAX_U32;
@@ -64,15 +74,22 @@ public:
 	ShaderProgramResourcePtr m_denoiseProg;
 	ShaderProgramPtr m_grDenoiseProg;
 
-	U32 m_sbtRecordSize = 256;
+	ShaderProgramResourcePtr m_svgfVarianceProg;
+	ShaderProgramPtr m_svgfVarianceGrProg;
+
+	ShaderProgramResourcePtr m_svgfAtrousProg;
+	ShaderProgramPtr m_svgfAtrousGrProg;
+
+	ShaderProgramResourcePtr m_visualizeRenderTargetsProg;
+	/// @}
 
 	Array<ShadowLayer, MAX_RT_SHADOW_LAYERS> m_shadowLayers;
 
+	U32 m_sbtRecordSize = 256;
+
 	Bool m_historyAndFinalRtImportedOnce = false;
 	Bool m_useSvgf = false;
 
-	ShaderProgramResourcePtr m_visualizeRenderTargetsProg;
-
 	class
 	{
 	public:

+ 15 - 8
AnKi/Shaders/Functions.glsl

@@ -408,16 +408,23 @@ Bool aabbsOverlap(const Vec3 aMin, const Vec3 aMax, const Vec3 bMin, const Vec3
 	return all(lessThan(aMin, bMax)) && all(lessThan(bMin, aMax));
 }
 
-// A convenience macro to skip out of bounds invocations on post-process compute shaders.
-#define SKIP_OUT_OF_BOUNDS_INVOCATIONS() \
-	if((FB_SIZE.x % WORKGROUP_SIZE.x) != 0u || (FB_SIZE.y % WORKGROUP_SIZE.y) != 0u) \
-	{ \
-		if(gl_GlobalInvocationID.x >= FB_SIZE.x || gl_GlobalInvocationID.y >= FB_SIZE.y) \
-		{ \
-			return; \
-		} \
+// A convenience function to skip out of bounds invocations on post-process compute shaders. Both the arguments should
+// be constexpr.
+#if defined(ANKI_COMPUTE_SHADER)
+Bool skipOutOfBoundsInvocations(UVec2 workgroupSize, UVec2 globalInvocationCount)
+{
+	if((globalInvocationCount.x % workgroupSize.x) != 0u || (globalInvocationCount.y % workgroupSize.y) != 0u)
+	{
+		if(gl_GlobalInvocationID.x >= globalInvocationCount.x || gl_GlobalInvocationID.y >= globalInvocationCount.y)
+		{
+			return true;
+		}
 	}
 
+	return false;
+}
+#endif
+
 // Create a matrix from some direction.
 Mat3 rotationFromDirection(Vec3 zAxis)
 {

+ 149 - 0
AnKi/Shaders/RtShadowsSvgfAtrous.ankiprog

@@ -0,0 +1,149 @@
+// Copyright (C) 2009-2021, Panagiotis Christopoulos Charitos and contributors.
+// All rights reserved.
+// Code licensed under the BSD License.
+// http://www.anki3d.org/LICENSE
+
+ANKI_SPECIALIZATION_CONSTANT_UVEC2(FB_SIZE, 0, UVec2(1));
+
+#pragma anki start comp
+
+#include <AnKi/Shaders/RtShadows.glsl>
+#include <AnKi/Shaders/BilateralFilter.glsl>
+#include <AnKi/Shaders/Functions.glsl>
+
+const UVec2 WORKGROUP_SIZE = UVec2(8, 8);
+layout(local_size_x = WORKGROUP_SIZE.x, local_size_y = WORKGROUP_SIZE.y, local_size_z = 1) in;
+
+layout(set = 0, binding = 0) uniform sampler u_nearestAnyClampSampler;
+layout(set = 0, binding = 1) uniform sampler u_linearAnyClampSampler;
+layout(set = 0, binding = 2) uniform texture2D u_depthTex;
+layout(set = 0, binding = 3) uniform texture2D u_gbuffer2Tex;
+layout(set = 0, binding = 4) uniform utexture2D u_shadowsTex;
+layout(set = 0, binding = 4) uniform texture2D u_varianceTex;
+
+layout(set = 0, binding = 5) uniform uimage2D u_shadowsImage;
+layout(set = 0, binding = 6) uniform image2D u_varianceImage;
+
+layout(std430, push_constant, row_major) uniform b_pc
+{
+	Mat4 u_invViewProjMat;
+};
+
+const I32 CONVOLUTION_RADIUS = 2;
+const F32 KERNEL_WEIGHTS[CONVOLUTION_RADIUS + 1] = F32[3](1.0, 2.0 / 3.0, 1.0 / 6.0);
+
+Vec3 unproject(Vec2 uv, F32 depth)
+{
+	const Vec4 worldPos4 = u_invViewProjMat * Vec4(UV_TO_NDC(uv), depth, 1.0);
+	const Vec3 worldPos = worldPos4.xyz / worldPos4.w;
+	return worldPos;
+}
+
+F32 computeShadowsLuma(F32 shadowLayers[MAX_RT_SHADOW_LAYERS])
+{
+	F32 l = 0.0;
+	ANKI_UNROLL for(U32 i = 0; i < MAX_RT_SHADOW_LAYERS; ++i)
+	{
+		l += shadowLayers[i];
+	}
+	return l;
+}
+
+void main()
+{
+	if(skipOutOfBoundsInvocations(WORKGROUP_SIZE, FB_SIZE))
+	{
+		return;
+	}
+
+	const Vec2 uv = (Vec2(gl_GlobalInvocationID.xy) + 0.5) / Vec2(FB_SIZE);
+
+	const F32 depth = textureLod(u_depthTex, u_linearAnyClampSampler, uv, 0.0).r;
+	if(depth == 1.0)
+	{
+		// Sky
+		imageStore(u_shadowsImage, IVec2(gl_GlobalInvocationID), UVec4(0));
+		imageStore(u_varianceImage, IVec2(gl_GlobalInvocationID), Vec4(0.0));
+		return;
+	}
+
+	// Set the reference sample
+	BilateralSample ref;
+	ref.m_depth = depth;
+	ref.m_position = unproject(uv, ref.m_depth);
+	ref.m_normal = readNormalFromGBuffer(u_gbuffer2Tex, u_linearAnyClampSampler, uv);
+
+	// Read center luma
+	F32 shadowLayers[MAX_RT_SHADOW_LAYERS];
+	unpackRtShadows(textureLod(u_shadowsTex, u_nearestAnyClampSampler, uv, 0.0), shadowLayers);
+	const F32 refLuma = computeShadowsLuma(shadowLayers);
+
+	// Init the sums
+	F32 sumShadowLayers[MAX_RT_SHADOW_LAYERS];
+	zeroRtShadowLayers(sumShadowLayers);
+	F32 sumVariance = 0.0;
+	F32 sumWeight = 0.0;
+
+	// Convolve
+	const Vec2 texelSize = 1.0 / Vec2(textureSize(u_shadowsTex, 0).xy);
+	for(I32 offsetx = -CONVOLUTION_RADIUS; offsetx <= CONVOLUTION_RADIUS; offsetx++)
+	{
+		for(I32 offsety = -CONVOLUTION_RADIUS; offsety <= CONVOLUTION_RADIUS; offsety++)
+		{
+			const Vec2 sampleUv = uv + Vec2(offsetx, offsety) * texelSize;
+
+			// Read shadows
+			F32 shadowLayers[MAX_RT_SHADOW_LAYERS];
+			unpackRtShadows(textureLod(u_shadowsTex, u_nearestAnyClampSampler, sampleUv, 0.0), shadowLayers);
+			const F32 luma = computeShadowsLuma(shadowLayers);
+
+			// Compute luma weight
+			const F32 variance = textureLod(u_varianceTex, u_nearestAnyClampSampler, sampleUv, 0.0).x;
+			const F32 sigmaL = 4.0;
+			const F32 wl = min(1.0, exp(-abs(luma - refLuma) / (sigmaL * sqrt(variance + 0.001) + EPSILON)));
+
+			// Set the current sample
+			BilateralSample crnt;
+			crnt.m_depth = textureLod(u_depthTex, u_linearAnyClampSampler, sampleUv, 0.0).r;
+			crnt.m_position = unproject(sampleUv, crnt.m_depth);
+			crnt.m_normal = unpackNormalFromGBuffer(textureLod(u_gbuffer2Tex, u_linearAnyClampSampler, sampleUv, 0.0));
+
+			// Do bilateral
+			BilateralConfig config;
+			initConfig(config);
+			const Vec3 weights = normalize(Vec3(0.0, 0.0, 1.0));
+			config.m_depthWeight = weights.x;
+			config.m_normalWeight = weights.y;
+			config.m_planeWeight = weights.z;
+			F32 w = calculateBilateralWeight(crnt, ref, config);
+
+			// Include more weights
+			w *= wl;
+			w *= KERNEL_WEIGHTS[abs(offsetx)] * KERNEL_WEIGHTS[abs(offsety)];
+
+			// Sum
+			ANKI_UNROLL for(U32 i = 0; i < MAX_RT_SHADOW_LAYERS; ++i)
+			{
+				sumShadowLayers[i] += shadowLayers[i] * w;
+			}
+
+			sumVariance += w * w * variance;
+			sumWeight += w;
+		}
+	}
+
+	// Normalize
+	sumWeight = max(EPSILON, sumWeight);
+
+	ANKI_UNROLL for(U32 i = 0; i < MAX_RT_SHADOW_LAYERS; ++i)
+	{
+		sumShadowLayers[i] /= sumWeight;
+	}
+
+	sumVariance /= (sumWeight * sumWeight);
+
+	// Store
+	imageStore(u_shadowsImage, IVec2(gl_GlobalInvocationID), packRtShadows(sumShadowLayers));
+	imageStore(u_varianceImage, IVec2(gl_GlobalInvocationID), Vec4(sumVariance, 0.0, 0.0, 0.0));
+}
+#pragma anki end

+ 20 - 7
AnKi/Shaders/RtShadowsSvgfVariance.ankiprog

@@ -9,6 +9,10 @@ ANKI_SPECIALIZATION_CONSTANT_UVEC2(FB_SIZE, 0, UVec2(1));
 
 #include <AnKi/Shaders/RtShadows.glsl>
 #include <AnKi/Shaders/BilateralFilter.glsl>
+#include <AnKi/Shaders/Functions.glsl>
+
+const UVec2 WORKGROUP_SIZE = UVec2(8, 8);
+layout(local_size_x = WORKGROUP_SIZE.x, local_size_y = WORKGROUP_SIZE.y, local_size_z = 1) in;
 
 layout(set = 0, binding = 0) uniform sampler u_nearestAnyClampSampler;
 layout(set = 0, binding = 1) uniform sampler u_linearAnyClampSampler;
@@ -26,6 +30,8 @@ layout(std430, push_constant, row_major) uniform b_pc
 	Mat4 u_invViewProjMat;
 };
 
+const I32 CONVOLUTION_RADIUS = 1;
+
 Vec3 unproject(Vec2 uv, F32 depth)
 {
 	const Vec4 worldPos4 = u_invViewProjMat * Vec4(UV_TO_NDC(uv), depth, 1.0);
@@ -35,14 +41,22 @@ Vec3 unproject(Vec2 uv, F32 depth)
 
 void main()
 {
-	ANKI_BRANCH if(gl_GlobalInvocationID.x >= FB_SIZE.x || gl_GlobalInvocationID.y >= FB_SIZE.y)
+	if(skipOutOfBoundsInvocations(WORKGROUP_SIZE, FB_SIZE))
 	{
-		// Out of bounds
 		return;
 	}
 
 	const Vec2 uv = (Vec2(gl_GlobalInvocationID.xy) + 0.5) / Vec2(FB_SIZE);
 
+	const F32 depth = textureLod(u_depthTex, u_linearAnyClampSampler, uv, 0.0).r;
+	if(depth == 1.0)
+	{
+		// Sky
+		imageStore(u_shadowsImage, IVec2(gl_GlobalInvocationID), UVec4(0));
+		imageStore(u_varianceImage, IVec2(gl_GlobalInvocationID), Vec4(0.0));
+		return;
+	}
+
 	const F32 historyLength = textureLod(u_historyLengthTex, u_nearestAnyClampSampler, uv, 0.0).r;
 
 	UVec4 outPackedShadowLayers;
@@ -52,12 +66,11 @@ void main()
 	{
 		// It's been stable less than 4 frames, need to do some work
 
-		const I32 radius = 1;
 		const Vec2 texelSize = 1.0 / Vec2(FB_SIZE);
 
 		// Set the reference sample
 		BilateralSample ref;
-		ref.m_depth = textureLod(u_depthTex, u_linearAnyClampSampler, uv, 0.0).r;
+		ref.m_depth = depth;
 		ref.m_position = unproject(uv, ref.m_depth);
 		ref.m_normal = readNormalFromGBuffer(u_gbuffer2Tex, u_linearAnyClampSampler, uv);
 
@@ -67,10 +80,10 @@ void main()
 		F32 sumShadowLayers[MAX_RT_SHADOW_LAYERS];
 		zeroRtShadowLayers(sumShadowLayers);
 
-		// Iterate
-		for(I32 offsetx = -radius; offsetx <= radius; offsetx++)
+		// Convolve
+		for(I32 offsetx = -CONVOLUTION_RADIUS; offsetx <= CONVOLUTION_RADIUS; offsetx++)
 		{
-			for(I32 offsety = -radius; offsety <= radius; offsety++)
+			for(I32 offsety = -CONVOLUTION_RADIUS; offsety <= CONVOLUTION_RADIUS; offsety++)
 			{
 				const Vec2 sampleUv = uv + Vec2(offsetx, offsety) * texelSize;
 

+ 4 - 1
AnKi/Shaders/ShadowmapsResolve.ankiprog

@@ -24,7 +24,10 @@ layout(set = 0, binding = 2) uniform texture2D u_depthRt;
 
 void main()
 {
-	SKIP_OUT_OF_BOUNDS_INVOCATIONS();
+	if(skipOutOfBoundsInvocations(WORKGROUP_SIZE, FB_SIZE))
+	{
+		return;
+	}
 
 	// World position
 	const Vec2 uv = (Vec2(gl_GlobalInvocationID.xy) + 0.5) / Vec2(FB_SIZE);