hai 1 ano · d09e8bd8b0
--- a/AnKi/Renderer/Reflections.cpp
+++ b/AnKi/Renderer/Reflections.cpp
@@ -149,6 +149,11 @@ void Reflections::populateRenderGraph(RenderingContext& ctx)
 
				 	const RenderTargetHandle hitPosAndDepthRt = rgraph.newRenderTarget(m_hitPosAndDepthRtDesc);
			
 
				 	const RenderTargetHandle hitPosRt = rgraph.newRenderTarget(m_hitPosRtDesc);
			
 
				 
			
 
				+	ReflectionConstants consts;
			
 
				+	consts.m_ssrStepIncrement = g_ssrStepIncrementCVar;
			
 
				+	consts.m_ssrMaxIterations = g_ssrMaxIterationsCVar;
			
 
				+	consts.m_roughnessCutoffToGiEdges = Vec2(g_roughnessCutoffToGiEdge0, g_roughnessCutoffToGiEdge1);
			
 
				+
			
 
				 	// SSR
			
 
				 	BufferView pixelsFailedSsrBuff;
			
 
				 	BufferHandle indirectArgsHandle;
			
@@ -167,12 +172,13 @@ void Reflections::populateRenderGraph(RenderingContext& ctx)
 
				 		rpass.newTextureDependency(getRenderer().getGBuffer().getColorRt(2), TextureUsageBit::kSrvCompute);
			
 
				 		rpass.newTextureDependency(getRenderer().getGBuffer().getDepthRt(), TextureUsageBit::kSrvCompute);
			
 
				 		rpass.newTextureDependency(getRenderer().getBloom().getPyramidRt(), TextureUsageBit::kSrvCompute);
			
 
				+		rpass.newBufferDependency(getRenderer().getClusterBinning().getClustersBufferHandle(), BufferUsageBit::kSrvCompute);
			
 
				 
			
 
				 		rpass.newTextureDependency(transientRt1, TextureUsageBit::kUavCompute);
			
 
				 		rpass.newTextureDependency(hitPosAndDepthRt, TextureUsageBit::kUavCompute);
			
 
				 		rpass.newBufferDependency(indirectArgsHandle, BufferUsageBit::kUavCompute);
			
 
				 
			
 
				-		rpass.setWork([this, transientRt1, hitPosAndDepthRt, &ctx, pixelsFailedSsrBuff](RenderPassWorkContext& rgraphCtx) {
			
 
				+		rpass.setWork([this, transientRt1, hitPosAndDepthRt, &ctx, pixelsFailedSsrBuff, consts](RenderPassWorkContext& rgraphCtx) {
			
 
				 			ANKI_TRACE_SCOPED_EVENT(Reflections);
			
 
				 			CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
			
 
				 
			
@@ -185,6 +191,8 @@ void Reflections::populateRenderGraph(RenderingContext& ctx)
 
				 			rgraphCtx.bindSrv(2, 0, getRenderer().getDepthDownscale().getRt());
			
 
				 			rgraphCtx.bindSrv(3, 0, getRenderer().getGBuffer().getDepthRt());
			
 
				 			rgraphCtx.bindSrv(4, 0, getRenderer().getBloom().getPyramidRt());
			
 
				+			cmdb.bindSrv(5, 0, getRenderer().getClusterBinning().getPackedObjectsBuffer(GpuSceneNonRenderableObjectType::kGlobalIlluminationProbe));
			
 
				+			cmdb.bindSrv(6, 0, getRenderer().getClusterBinning().getClustersBuffer());
			
 
				 
			
 
				 			rgraphCtx.bindUav(0, 0, transientRt1);
			
 
				 			rgraphCtx.bindUav(1, 0, hitPosAndDepthRt);
			
@@ -193,11 +201,6 @@ void Reflections::populateRenderGraph(RenderingContext& ctx)
 
				 
			
 
				 			cmdb.bindConstantBuffer(0, 0, ctx.m_globalRenderingConstantsBuffer);
			
 
				 
			
 
				-			SsrConstants2 consts;
			
 
				-			consts.m_stepIncrement = g_ssrStepIncrementCVar;
			
 
				-			consts.m_maxIterations = g_ssrMaxIterationsCVar;
			
 
				-			consts.m_projMat00_11_22_23 = Vec4(ctx.m_matrices.m_projection(0, 0), ctx.m_matrices.m_projection(1, 1),
			
 
				-											   ctx.m_matrices.m_projection(2, 2), ctx.m_matrices.m_projection(2, 3));
			
 
				 			cmdb.setFastConstants(&consts, sizeof(consts));
			
 
				 
			
 
				 			dispatchPPCompute(cmdb, 8, 8, getRenderer().getInternalResolution().x() / 2, getRenderer().getInternalResolution().y());
			
@@ -407,7 +410,7 @@ void Reflections::populateRenderGraph(RenderingContext& ctx)
 
				 		rpass.newTextureDependency(transientRt2, TextureUsageBit::kUavCompute);
			
 
				 		rpass.newTextureDependency(hitPosRt, TextureUsageBit::kUavCompute);
			
 
				 
			
 
				-		rpass.setWork([this, &ctx, transientRt1, transientRt2, hitPosAndDepthRt, hitPosRt](RenderPassWorkContext& rgraphCtx) {
			
 
				+		rpass.setWork([this, &ctx, transientRt1, transientRt2, hitPosAndDepthRt, hitPosRt, consts](RenderPassWorkContext& rgraphCtx) {
			
 
				 			ANKI_TRACE_SCOPED_EVENT(Reflections);
			
 
				 
			
 
				 			CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
			
@@ -425,6 +428,8 @@ void Reflections::populateRenderGraph(RenderingContext& ctx)
 
				 
			
 
				 			cmdb.bindConstantBuffer(0, 0, ctx.m_globalRenderingConstantsBuffer);
			
 
				 
			
 
				+			cmdb.setFastConstants(&consts, sizeof(consts));
			
 
				+
			
 
				 			dispatchPPCompute(cmdb, 8, 8, getRenderer().getInternalResolution().x(), getRenderer().getInternalResolution().y());
			
 
				 		});
			
 
				 	}
			
@@ -476,7 +481,7 @@ void Reflections::populateRenderGraph(RenderingContext& ctx)
 
				 
			
 
				 		rpass.newTextureDependency(transientRt2, TextureUsageBit::kUavCompute);
			
 
				 
			
 
				-		rpass.setWork([this, &ctx, transientRt1, transientRt2, writeMomentsRt](RenderPassWorkContext& rgraphCtx) {
			
 
				+		rpass.setWork([this, &ctx, transientRt1, transientRt2, writeMomentsRt, consts](RenderPassWorkContext& rgraphCtx) {
			
 
				 			ANKI_TRACE_SCOPED_EVENT(Reflections);
			
 
				 
			
 
				 			CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
			
@@ -489,6 +494,8 @@ void Reflections::populateRenderGraph(RenderingContext& ctx)
 
				 
			
 
				 			rgraphCtx.bindUav(0, 0, transientRt2);
			
 
				 
			
 
				+			cmdb.setFastConstants(&consts, sizeof(consts));
			
 
				+
			
 
				 			dispatchPPCompute(cmdb, 8, 8, getRenderer().getInternalResolution().x(), getRenderer().getInternalResolution().y());
			
 
				 		});
			
 
				 	}
			
--- a/AnKi/Renderer/Reflections.h
+++ b/AnKi/Renderer/Reflections.h
@@ -18,6 +18,11 @@ inline NumericCVar<F32> g_rtReflectionsMaxRayDistanceCVar("R", "RtReflectionsMax
 
				 inline NumericCVar<U32> g_ssrStepIncrementCVar("R", "SsrStepIncrement", 32, 1, 256, "The number of steps for each loop");
			
 
				 inline NumericCVar<U32> g_ssrMaxIterationsCVar("R", "SsrMaxIterations", 64, 1, 256, "Max SSR raymarching loop iterations");
			
 
				 
			
 
				+inline NumericCVar<F32> g_roughnessCutoffToGiEdge0("R", "RoughnessCutoffToGiEdge0", 0.7f, 0.0f, 1.0f,
			
 
				+												   "Before this roughness the reflections will never sample the GI probes");
			
 
				+inline NumericCVar<F32> g_roughnessCutoffToGiEdge1("R", "RoughnessCutoffToGiEdge1", 0.9f, 0.0f, 1.0f,
			
 
				+												   "After this roughness the reflections will sample the GI probes");
			
 
				+
			
 
				 class Reflections : public RendererObject
			
 
				 {
			
 
				 public:
			
--- a/AnKi/Renderer/Renderer.cpp
+++ b/AnKi/Renderer/Renderer.cpp
@@ -752,6 +752,9 @@ Error Renderer::render(Texture* presentTex)
 
				 
			
 
				 	ctx.m_matrices.m_unprojectionParameters = ctx.m_matrices.m_projection.extractPerspectiveUnprojectionParams();
			
 
				 
			
 
				+	ctx.m_matrices.m_projMat00_11_22_23 = Vec4(ctx.m_matrices.m_projection(0, 0), ctx.m_matrices.m_projection(1, 1),
			
 
				+											   ctx.m_matrices.m_projection(2, 2), ctx.m_matrices.m_projection(2, 3));
			
 
				+
			
 
				 	ctx.m_cameraNear = cam.getNear();
			
 
				 	ctx.m_cameraFar = cam.getFar();
			
 
				 
			
--- a/AnKi/Shaders/Include/MiscRendererTypes.h
+++ b/AnKi/Shaders/Include/MiscRendererTypes.h
@@ -57,6 +57,9 @@ struct CommonMatrices
 
				 	/// @endcode
			
 
				 	Vec4 m_unprojectionParameters;
			
 
				 
			
 
				+	/// Part of the perspective projection matrix. Used in cheapPerspectiveUnprojection
			
 
				+	Vec4 m_projMat00_11_22_23;
			
 
				+
			
 
				 	Vec2 m_jitterOffsetNdc;
			
 
				 	Vec2 m_padding;
			
 
				 };
			
@@ -134,33 +137,11 @@ struct DepthDownscaleConstants
 
				 	U32 m_mipmapCount;
			
 
				 };
			
 
				 
			
 
				-// Screen space reflections uniforms
			
 
				-struct SsrConstants
			
 
				-{
			
 
				-	Vec2 m_viewportSizef;
			
 
				-	U32 m_frameCount;
			
 
				-	U32 m_maxIterations;
			
 
				-
			
 
				-	UVec2 m_padding;
			
 
				-	F32 m_roughnessCutoff;
			
 
				-	U32 m_stepIncrement;
			
 
				-
			
 
				-	Vec4 m_projMat00_11_22_23;
			
 
				-
			
 
				-	Vec4 m_unprojectionParameters;
			
 
				-
			
 
				-	Mat4 m_prevViewProjMatMulInvViewProjMat;
			
 
				-	Mat3x4 m_normalMat;
			
 
				-};
			
 
				-
			
 
				-struct SsrConstants2
			
 
				+struct ReflectionConstants
			
 
				 {
			
 
				-	U32 m_maxIterations;
			
 
				-	U32 m_stepIncrement;
			
 
				-	U32 m_padding1;
			
 
				-	U32 m_padding2;
			
 
				-
			
 
				-	Vec4 m_projMat00_11_22_23;
			
 
				+	U32 m_ssrMaxIterations;
			
 
				+	U32 m_ssrStepIncrement;
			
 
				+	Vec2 m_roughnessCutoffToGiEdges;
			
 
				 };
			
 
				 
			
 
				 struct PixelFailedSsr
			
--- a/AnKi/Shaders/Reflections.ankiprog
+++ b/AnKi/Shaders/Reflections.ankiprog
@@ -20,14 +20,14 @@
 
				 #include <AnKi/Shaders/SsRaymarching.hlsl>
			
 
				 #include <AnKi/Shaders/ClusteredShadingFunctions.hlsl>
			
 
				 
			
 
				-// Config
			
 
				+// Config & debug
			
 
				 constexpr F32 kSpatialUpscalingPcfTexelOffset = 8.0;
			
 
				 #define SPATIAL_UPSCALING_POISON_KERNEL kPoissonDisk8
			
 
				 constexpr F32 kMaxBilateralSamples = 5.0;
			
 
				 constexpr F32 kGaussianSigma = 0.55;
			
 
				 #define STOCHASTIC_REFLECTIONS 1
			
 
				 #define TRY_SHADOWMAP_FIRST 1
			
 
				-#define DISABLE_DENOISING 0
			
 
				+constexpr Bool kDisableDenoising = false;
			
 
				 
			
 
				 // Functions
			
 
				 Vec3 getDiffuseIndirect(StructuredBuffer<GpuSceneGlobalIlluminationProbe> giProbes, Vec3 worldPos, Vec3 worldNormal,
			
@@ -55,6 +55,32 @@ Vec3 getDiffuseIndirect(StructuredBuffer<GpuSceneGlobalIlluminationProbe> giProb
 
				 	}
			
 
				 }
			
 
				 
			
 
				+Vec4 encodeColorDepthAndSampleCount(Vec3 color, F32 depth, U32 sampleCount)
			
 
				+{
			
 
				+	Vec4 signs;
			
 
				+	[unroll] for(U32 i = 0; i < 4; i++)
			
 
				+	{
			
 
				+		signs[i] = (sampleCount & (1u << i)) ? 1.0 : -1.0;
			
 
				+	}
			
 
				+
			
 
				+	return (Vec4(color, depth) + 0.01) * signs; // Add 0.01 to make sure that the sign sticks
			
 
				+}
			
 
				+
			
 
				+void decodeColorDepthAndSampleCount(Vec4 rgba, out Vec3 color, out F32 depth, out U32 sampleCount)
			
 
				+{
			
 
				+	sampleCount = 0;
			
 
				+	[unroll] for(U32 i = 0; i < 4; ++i)
			
 
				+	{
			
 
				+		sampleCount |= (sign(rgba[i]) > 0.0) ? (1u << i) : 0u;
			
 
				+	}
			
 
				+
			
 
				+	rgba = abs(rgba);
			
 
				+	rgba -= 0.01;
			
 
				+
			
 
				+	color = rgba.xyz;
			
 
				+	depth = rgba.w;
			
 
				+}
			
 
				+
			
 
				 // ===========================================================================
			
 
				 // SSR                                                                       =
			
 
				 // ===========================================================================
			
@@ -69,6 +95,8 @@ Texture2D<Vec4> g_gbufferRt2 : register(t1);
 
				 Texture2D<Vec4> g_downscaledDepthTex : register(t2);
			
 
				 Texture2D<Vec4> g_depthTex : register(t3);
			
 
				 Texture2D<Vec4> g_lightBufferRt : register(t4);
			
 
				+StructuredBuffer<GlobalIlluminationProbe> g_giProbes : register(t5);
			
 
				+StructuredBuffer<Cluster> g_clusters : register(t6);
			
 
				 
			
 
				 RWTexture2D<Vec4> g_colorAndPdfTex : register(u0);
			
 
				 RWTexture2D<Vec4> g_hitPosAndDepthTex : register(u1);
			
@@ -77,66 +105,30 @@ RWStructuredBuffer<DispatchIndirectArgs> g_indirectArgs : register(u3);
 
				 
			
 
				 ConstantBuffer<GlobalRendererConstants> g_globalRendererConstants : register(b0);
			
 
				 
			
 
				-ANKI_FAST_CONSTANTS(SsrConstants2, g_consts)
			
 
				+ANKI_FAST_CONSTANTS(ReflectionConstants, g_consts)
			
 
				 
			
 
				 #	define NUM_THREADS_SQRT 8
			
 
				 
			
 
				 groupshared Vec4 g_viewHitPointAndAttenuation[NUM_THREADS_SQRT][NUM_THREADS_SQRT];
			
 
				 groupshared Vec4 g_viewPosAndDepth[NUM_THREADS_SQRT][NUM_THREADS_SQRT];
			
 
				 
			
 
				-void doWork(UVec2 realCoord, UVec2 logicalCoord, Vec2 uv, out F32 attenuation, out Vec3 outColor, out F32 pdf, out F32 roughness, out F32 depth,
			
 
				-			out Vec3 viewReflDir, out Vec3 viewHitPoint, out Vec3 viewPos, out Vec3 viewNormal)
			
 
				+void doSsr(UVec2 realCoord, UVec2 logicalCoord, Vec2 uv, Vec3 viewReflDir, Vec3 viewPos, F32 depth, F32 randFactor, out F32 attenuation,
			
 
				+		   out Vec3 outColor, out Vec3 viewHitPoint)
			
 
				 {
			
 
				 	attenuation = 0.0;
			
 
				 	outColor = 0.0;
			
 
				-	pdf = 0.0;
			
 
				-	roughness = 0.0;
			
 
				-	viewReflDir = 0.0;
			
 
				 	viewHitPoint = 0.0;
			
 
				-	viewPos = 0.0;
			
 
				-	viewNormal = 0.0;
			
 
				-
			
 
				-	depth = g_depthTex[logicalCoord].x;
			
 
				-	if(depth == 1.0)
			
 
				-	{
			
 
				-		return;
			
 
				-	}
			
 
				-
			
 
				-	const Vec4 rt1 = g_gbufferRt1[logicalCoord];
			
 
				-	const Vec4 rt2 = g_gbufferRt2[logicalCoord];
			
 
				-
			
 
				-	const Vec3 worldNormal = unpackNormalFromGBuffer(rt2);
			
 
				-	viewNormal = mul(g_globalRendererConstants.m_matrices.m_view, Vec4(worldNormal, 0.0));
			
 
				-	roughness = unpackRoughnessFromGBuffer(rt1);
			
 
				-
			
 
				-	const Vec2 ndc = uvToNdc(uv);
			
 
				-	viewPos = cheapPerspectiveUnprojection(g_globalRendererConstants.m_matrices.m_unprojectionParameters, ndc, depth);
			
 
				-
			
 
				-	// Noise
			
 
				-	const UVec3 seed = rand3DPCG16(UVec3(logicalCoord, g_globalRendererConstants.m_frame % 8u));
			
 
				-	const Vec2 randFactors = hammersleyRandom16(g_globalRendererConstants.m_frame % 64u, 64u, seed);
			
 
				-
			
 
				-	// Compute refl
			
 
				-	const Vec3 viewDir = -normalize(viewPos);
			
 
				-#	if STOCHASTIC_REFLECTIONS
			
 
				-	viewReflDir = sampleReflectionVectorIsotropic(viewDir, viewNormal, roughness, randFactors, 4, pdf);
			
 
				-#	else
			
 
				-	ANKI_MAYBE_UNUSED(roughness);
			
 
				-	ANKI_MAYBE_UNUSED(randFactors);
			
 
				-	viewReflDir = reflect(-viewDir, viewNormal);
			
 
				-	pdf = 1.0;
			
 
				-#	endif
			
 
				 
			
 
				 	// Trace
			
 
				 	Vec3 hitPoint;
			
 
				 	{
			
 
				 		const U32 lod = 8u; // Use the max LOD for ray marching
			
 
				-		const U32 stepIncrement = g_consts.m_stepIncrement;
			
 
				+		const U32 stepIncrement = g_consts.m_ssrStepIncrement;
			
 
				 		const F32 stepIncrementf = F32(stepIncrement);
			
 
				 		const F32 minStepf = min(4.0f, stepIncrementf);
			
 
				-		const U32 initialStepIncrement = U32(lerp(minStepf, stepIncrementf, randFactors.x));
			
 
				-		raymarchGroundTruth(viewPos, viewReflDir, uv, depth, g_consts.m_projMat00_11_22_23, g_consts.m_maxIterations, g_downscaledDepthTex,
			
 
				-							g_trilinearClampSampler, F32(lod), stepIncrement, initialStepIncrement, hitPoint, attenuation);
			
 
				+		const U32 initialStepIncrement = U32(lerp(minStepf, stepIncrementf, randFactor));
			
 
				+		raymarchGroundTruth(viewPos, viewReflDir, uv, depth, g_globalRendererConstants.m_matrices.m_projMat00_11_22_23, g_consts.m_ssrMaxIterations,
			
 
				+							g_downscaledDepthTex, g_trilinearClampSampler, F32(lod), stepIncrement, initialStepIncrement, hitPoint, attenuation);
			
 
				 
			
 
				 		if(attenuation < kLowAttenuation)
			
 
				 		{
			
@@ -202,12 +194,13 @@ void bestCandidateToHallucinate(IVec2 svGroupThreadId, IVec2 offset, F32 depth,
 
				 		return;
			
 
				 	}
			
 
				 
			
 
				+	candidateCount += 1.0;
			
 
				+
			
 
				 	const F32 weight = calculateBilateralWeightDepth(depth, g_viewPosAndDepth[svGroupThreadId2.x][svGroupThreadId2.y].w, 1.0);
			
 
				 	if(weight > depthWeight)
			
 
				 	{
			
 
				 		depthWeight = weight;
			
 
				 		neighbourOffset = svGroupThreadId2 - svGroupThreadId;
			
 
				-		candidateCount += 1.0;
			
 
				 	}
			
 
				 }
			
 
				 
			
@@ -222,17 +215,67 @@ void bestCandidateToHallucinate(IVec2 svGroupThreadId, IVec2 offset, F32 depth,
 
				 	const UVec2 logicalCoord = UVec2(realCoord.x * 2u + (realCoord.y & 1u), realCoord.y);
			
 
				 	const Vec2 uv = (Vec2(logicalCoord) + 0.5) / Vec2(halfViewportSize.x * 2u, halfViewportSize.y);
			
 
				 
			
 
				-	F32 ssrAttenuation;
			
 
				+	// Read stuff
			
 
				+	const F32 depth = g_depthTex[logicalCoord].x;
			
 
				+	const Vec4 rt1 = g_gbufferRt1[logicalCoord];
			
 
				+	const Vec4 rt2 = g_gbufferRt2[logicalCoord];
			
 
				+	const Vec3 worldNormal = unpackNormalFromGBuffer(rt2);
			
 
				+	const Vec3 viewNormal = mul(g_globalRendererConstants.m_matrices.m_view, Vec4(worldNormal, 0.0));
			
 
				+	const F32 roughness = unpackRoughnessFromGBuffer(rt1);
			
 
				+
			
 
				+	const Vec2 ndc = uvToNdc(uv);
			
 
				+	const Vec3 viewPos = cheapPerspectiveUnprojection(g_globalRendererConstants.m_matrices.m_unprojectionParameters, ndc, depth);
			
 
				+
			
 
				+	// Rand
			
 
				+	const UVec3 seed = rand3DPCG16(UVec3(logicalCoord, g_globalRendererConstants.m_frame % 8u));
			
 
				+	const Vec2 randFactors = hammersleyRandom16(g_globalRendererConstants.m_frame % 64u, 64u, seed);
			
 
				+
			
 
				+	// Compute refl vector
			
 
				+	const Vec3 viewDir = -normalize(viewPos);
			
 
				+
			
 
				+	// Sample GI probes factor
			
 
				+	const F32 sampleGiProbesLerp = smoothstep(g_consts.m_roughnessCutoffToGiEdges.x, g_consts.m_roughnessCutoffToGiEdges.y, roughness);
			
 
				+	const Bool bSampleGiProbes = (sampleGiProbesLerp > randFactors.x); // Choose stocasticly
			
 
				+
			
 
				+	// Sample probes or to SS trace
			
 
				 	Vec3 outColor;
			
 
				-	F32 pdf;
			
 
				-	F32 roughness;
			
 
				-	F32 depth;
			
 
				 	Vec3 viewReflDir;
			
 
				 	Vec3 viewHitPoint;
			
 
				-	Vec3 viewPos;
			
 
				-	Vec3 viewNormal;
			
 
				-	doWork(realCoord, logicalCoord, uv, ssrAttenuation, outColor, pdf, roughness, depth, viewReflDir, viewHitPoint, viewPos, viewNormal);
			
 
				+	F32 pdf;
			
 
				+	F32 ssrAttenuation;
			
 
				+	if(bSampleGiProbes)
			
 
				+	{
			
 
				+		viewReflDir = reflect(-viewDir, viewNormal);
			
 
				+
			
 
				+		Cluster cluster = getClusterFragCoord(g_clusters, g_globalRendererConstants, Vec3(logicalCoord.xy + 0.5, depth));
			
 
				+
			
 
				+		const Vec3 woldReflDir = mul(g_globalRendererConstants.m_matrices.m_cameraTransform, Vec4(viewReflDir, 0.0));
			
 
				+
			
 
				+		Vec4 worldPos = mul(g_globalRendererConstants.m_matrices.m_invertedViewProjection, Vec4(uvToNdc(uv), depth, 1.0));
			
 
				+		worldPos.xyz /= worldPos.w;
			
 
				+
			
 
				+		outColor = sampleGiProbes<F32>(cluster, g_giProbes, woldReflDir, worldPos.xyz, g_trilinearClampSampler);
			
 
				+
			
 
				+		viewHitPoint = viewPos + viewReflDir * 1.0;
			
 
				+		pdf = 1.0;
			
 
				+		ssrAttenuation = 1.0;
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+		// SS trace
			
 
				+		if(STOCHASTIC_REFLECTIONS)
			
 
				+		{
			
 
				+			viewReflDir = sampleReflectionVectorIsotropic(viewDir, viewNormal, roughness, randFactors, 4, pdf);
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			viewReflDir = reflect(-viewDir, viewNormal);
			
 
				+		}
			
 
				 
			
 
				+		doSsr(realCoord, logicalCoord, uv, viewReflDir, viewPos, depth, randFactors.x, ssrAttenuation, outColor, viewHitPoint);
			
 
				+	}
			
 
				+
			
 
				+	// Stash to groupshared
			
 
				 	g_viewHitPointAndAttenuation[svGroupThreadId.x][svGroupThreadId.y] = Vec4(viewHitPoint, ssrAttenuation);
			
 
				 	g_viewPosAndDepth[svGroupThreadId.x][svGroupThreadId.y] = Vec4(viewPos, depth);
			
 
				 	GroupMemoryBarrierWithGroupSync();
			
@@ -246,7 +289,7 @@ void bestCandidateToHallucinate(IVec2 svGroupThreadId, IVec2 offset, F32 depth,
 
				 	}
			
 
				 
			
 
				 	// Hallucinate if needed
			
 
				-	if(ssrAttenuation <= kLowAttenuation && false)
			
 
				+	if(ssrAttenuation <= kLowAttenuation)
			
 
				 	{
			
 
				 		IVec2 neighbourOffset = -100;
			
 
				 		F32 depthWeight = 0.0;
			
@@ -257,7 +300,7 @@ void bestCandidateToHallucinate(IVec2 svGroupThreadId, IVec2 offset, F32 depth,
 
				 		bestCandidateToHallucinate(svGroupThreadId, IVec2(1, -1), depth, neighbourOffset, depthWeight, candidateCount);
			
 
				 		bestCandidateToHallucinate(svGroupThreadId, IVec2(1, 1), depth, neighbourOffset, depthWeight, candidateCount);
			
 
				 
			
 
				-		if(neighbourOffset.x != -100)
			
 
				+		if(neighbourOffset.x != -100 && candidateCount == 4.0)
			
 
				 		{
			
 
				 			// Found something
			
 
				 
			
@@ -275,7 +318,7 @@ void bestCandidateToHallucinate(IVec2 svGroupThreadId, IVec2 offset, F32 depth,
 
				 
			
 
				 			ssrAttenuation = g_viewHitPointAndAttenuation[neighbourSvGroupThreadId.x][neighbourSvGroupThreadId.y].w;
			
 
				 
			
 
				-			const Vec4 v4 = cheapPerspectiveProjection(g_consts.m_projMat00_11_22_23, Vec4(viewHitPoint, 1.0));
			
 
				+			const Vec4 v4 = cheapPerspectiveProjection(g_globalRendererConstants.m_matrices.m_projMat00_11_22_23, Vec4(viewHitPoint, 1.0));
			
 
				 			const Vec2 uv = ndcToUv(v4.xy / v4.w);
			
 
				 			outColor = g_lightBufferRt.SampleLevel(g_trilinearClampSampler, uv, 0.0).rgb;
			
 
				 		}
			
@@ -555,8 +598,10 @@ RWTexture2D<Vec4> g_hitPosTex : register(u1);
 
				 
			
 
				 ConstantBuffer<GlobalRendererConstants> g_globalRendererConstants : register(b0);
			
 
				 
			
 
				-groupshared Vec4 g_colorAndPdf[4][8];
			
 
				-groupshared Vec4 g_hitPosAndDepth[4][8];
			
 
				+ANKI_FAST_CONSTANTS(ReflectionConstants, g_consts)
			
 
				+
			
 
				+groupshared Vec4 g_colorAndPdf[8][8];
			
 
				+groupshared Vec4 g_hitPosAndDepth[8][8];
			
 
				 
			
 
				 // Return true if the coord contains a pixel that was populated by the previous passes
			
 
				 Bool isCheckerboardWhite(UVec2 coord)
			
@@ -571,8 +616,6 @@ void reconstructCheckerboardBlack(IVec2 svGroupThreadId, F32 refDepth, inout Vec
 
				 		return;
			
 
				 	}
			
 
				 
			
 
				-	svGroupThreadId /= 2;
			
 
				-
			
 
				 	const F32 weight = calculateBilateralWeightDepth(refDepth, g_hitPosAndDepth[svGroupThreadId.x][svGroupThreadId.y], 1.0);
			
 
				 
			
 
				 	color += g_colorAndPdf[svGroupThreadId.x][svGroupThreadId.y].xyz * weight;
			
@@ -603,10 +646,10 @@ void reconstructCheckerboardBlack(IVec2 svGroupThreadId, F32 refDepth, inout Vec
 
				 		const Vec4 rgba = g_colorAndPdfTex[checkerboardCoord];
			
 
				 		refColor = rgba.xyz;
			
 
				 		refPdf = rgba.w;
			
 
				-		g_colorAndPdf[svGroupThreadId.x / 2u][svGroupThreadId.y] = rgba;
			
 
				+		g_colorAndPdf[svGroupThreadId.x][svGroupThreadId.y] = rgba;
			
 
				 
			
 
				 		refHitPos = g_hitPosAndDepthTex[checkerboardCoord].xyz;
			
 
				-		g_hitPosAndDepth[svGroupThreadId.x / 2u][svGroupThreadId.y] = Vec4(refHitPos, refDepth);
			
 
				+		g_hitPosAndDepth[svGroupThreadId.x][svGroupThreadId.y] = Vec4(refHitPos, refDepth);
			
 
				 	}
			
 
				 
			
 
				 	GroupMemoryBarrierWithGroupSync();
			
@@ -634,7 +677,11 @@ void reconstructCheckerboardBlack(IVec2 svGroupThreadId, F32 refDepth, inout Vec
 
				 		return;
			
 
				 	}
			
 
				 
			
 
				-	if(DISABLE_DENOISING)
			
 
				+	const Vec4 rt1 = g_gbufferRt1[coord];
			
 
				+	const F32 roughness = unpackRoughnessFromGBuffer(rt1);
			
 
				+	const F32 alpha = pow2(roughness);
			
 
				+
			
 
				+	if(kDisableDenoising || roughness >= g_consts.m_roughnessCutoffToGiEdges.y)
			
 
				 	{
			
 
				 		g_denoisedTex[coord] = Vec4(refColor, 1.0 - refDepth); // Store depth in reverse for better precision
			
 
				 		g_hitPosTex[coord] = Vec4(refHitPos - g_globalRendererConstants.m_cameraPosition, 0.0);
			
@@ -647,10 +694,6 @@ void reconstructCheckerboardBlack(IVec2 svGroupThreadId, F32 refDepth, inout Vec
 
				 
			
 
				 	const Vec3 viewDir = normalize(g_globalRendererConstants.m_cameraPosition - worldPos);
			
 
				 
			
 
				-	const Vec4 rt1 = g_gbufferRt1[coord];
			
 
				-	const F32 roughness = unpackRoughnessFromGBuffer(rt1);
			
 
				-	const F32 alpha = pow2(roughness);
			
 
				-
			
 
				 	Vec3 outColor = 0.0;
			
 
				 	Vec3 newHitPos = 0.0;
			
 
				 
			
@@ -868,7 +911,7 @@ void computeSourceColor(Vec2 uv, IVec2 coord, IVec2 textureSize, out Vec3 m1, ou
 
				 	const UVec2 coord = min(svDispatchThreadId, textureSize - 1);
			
 
				 	const Vec2 uv = (Vec2(coord) + 0.5f) / textureSize;
			
 
				 
			
 
				-	if(DISABLE_DENOISING)
			
 
				+	if(kDisableDenoising)
			
 
				 	{
			
 
				 		g_outTex[coord] = g_colorAndDepth[coord];
			
 
				 		g_momentsTex[coord] = 0.0;
			
@@ -934,6 +977,8 @@ Texture2D<Vec4> g_gbufferRt1 : register(t2);
 
				 
			
 
				 RWTexture2D<Vec4> g_outTex : register(u0);
			
 
				 
			
 
				+ANKI_FAST_CONSTANTS(ReflectionConstants, g_consts)
			
 
				+
			
 
				 F32 computeVarianceCenter(IVec2 coord, UVec2 textureSize)
			
 
				 {
			
 
				 #	if 1
			
@@ -970,18 +1015,18 @@ F32 computeVarianceCenter(IVec2 coord, UVec2 textureSize)
 
				 	const F32 refDepth = rgba.w;
			
 
				 	const Vec3 centerColor = rgba.xyz;
			
 
				 
			
 
				-	if(DISABLE_DENOISING)
			
 
				+	const Vec4 rt1 = g_gbufferRt1[coord];
			
 
				+	const F32 roughness = unpackRoughnessFromGBuffer<F32>(rt1, 0.0);
			
 
				+	const F32 sqRoughness = sqrt(roughness);
			
 
				+
			
 
				+	if(kDisableDenoising || roughness >= g_consts.m_roughnessCutoffToGiEdges.y)
			
 
				 	{
			
 
				-		g_outTex[coord] = Vec4(centerColor, refDepth);
			
 
				+		g_outTex[coord] = encodeColorDepthAndSampleCount(centerColor, refDepth, 0u);
			
 
				 		return;
			
 
				 	}
			
 
				 
			
 
				 	const F32 variance = sqrt(computeVarianceCenter(coord, outSize)) * 100.0;
			
 
				 
			
 
				-	const Vec4 rt1 = g_gbufferRt1[coord];
			
 
				-	const F32 roughness = unpackRoughnessFromGBuffer<F32>(rt1, 0.0);
			
 
				-	const F32 sqRoughness = sqrt(roughness);
			
 
				-
			
 
				 	const F32 lerpFactor = sqRoughness * min(1.0, max(sqRoughness, variance));
			
 
				 
			
 
				 	const F32 sampleCount = round(lerp(0, kMaxBilateralSamples, lerpFactor));
			
@@ -1012,15 +1057,7 @@ F32 computeVarianceCenter(IVec2 coord, UVec2 textureSize)
 
				 
			
 
				 	colorSum /= weightSum;
			
 
				 
			
 
				-	// Encode the step count in the signs of the out color
			
 
				-	const U32 sampleCountu = sampleCount;
			
 
				-	Vec4 signs;
			
 
				-	[unroll] for(U32 i = 0; i < 4; i++)
			
 
				-	{
			
 
				-		signs[i] = (sampleCountu & (1u << i)) ? 1.0 : -1.0;
			
 
				-	}
			
 
				-
			
 
				-	g_outTex[coord] = Vec4(colorSum, refDepth) * signs;
			
 
				+	g_outTex[coord] = encodeColorDepthAndSampleCount(colorSum, refDepth, sampleCount);
			
 
				 }
			
 
				 #endif // ANKI_COMPUTE_SHADER && ANKI_TECHNIQUE_BilateralDenoiseHorizontal
			
 
				 
			
@@ -1033,17 +1070,6 @@ Texture2D<Vec4> g_colorAndDepthAndSampleCount : register(t0);
 
				 RWTexture2D<Vec4> g_outTex : register(u0);
			
 
				 RWStructuredBuffer<DispatchIndirectArgs> g_indirectArgs : register(u1);
			
 
				 
			
 
				-F32 decodeSampleCount(Vec4 rgba)
			
 
				-{
			
 
				-	U32 sampleCountu = 0;
			
 
				-	[unroll] for(U32 i = 0; i < 4; ++i)
			
 
				-	{
			
 
				-		sampleCountu |= (sign(rgba[i]) > 0.0) ? (1u << i) : 0u;
			
 
				-	}
			
 
				-
			
 
				-	return sampleCountu;
			
 
				-}
			
 
				-
			
 
				 [NumThreads(8, 8, 1)] void main(UVec2 svDispatchThreadId : SV_DispatchThreadID)
			
 
				 {
			
 
				 	UVec2 outSize;
			
@@ -1058,13 +1084,13 @@ F32 decodeSampleCount(Vec4 rgba)
 
				 		g_indirectArgs[1].m_threadGroupCountX = 0;
			
 
				 	}
			
 
				 
			
 
				-	Vec4 rgba = g_colorAndDepthAndSampleCount[coord];
			
 
				-	const F32 sampleCount = decodeSampleCount(rgba);
			
 
				-	rgba = abs(rgba);
			
 
				-	const F32 refDepth = rgba.w;
			
 
				-	const Vec3 refColor = rgba.xyz;
			
 
				+	U32 sampleCountu;
			
 
				+	F32 refDepth;
			
 
				+	Vec3 refColor;
			
 
				+	decodeColorDepthAndSampleCount(g_colorAndDepthAndSampleCount[coord], refColor, refDepth, sampleCountu);
			
 
				+	const F32 sampleCount = sampleCountu;
			
 
				 
			
 
				-	if(DISABLE_DENOISING)
			
 
				+	if(kDisableDenoising)
			
 
				 	{
			
 
				 		g_outTex[coord] = Vec4(refColor, 1.0);
			
 
				 		return;
			
@@ -1082,7 +1108,7 @@ F32 decodeSampleCount(Vec4 rgba)
 
				 		IVec2 newCoord = coord + IVec2(0.0, y);
			
 
				 		newCoord.y = clamp(newCoord.y, 0, outSize.y - 1);
			
 
				 
			
 
				-		rgba = abs(g_colorAndDepthAndSampleCount[newCoord]);
			
 
				+		const Vec4 rgba = abs(g_colorAndDepthAndSampleCount[newCoord]);
			
 
				 		const F32 sampleDepth = rgba.w;
			
 
				 		const Vec3 sampleColor = rgba.xyz;