3 săptămâni în urmă · ee83ac1a72
--- a/AnKi/Config.h.cmake
+++ b/AnKi/Config.h.cmake
@@ -7,9 +7,6 @@
 
				 
			
 
				 #pragma once
			
 
				 
			
 
				-/// @addtogroup config
			
 
				-/// @{
			
 
				-
			
 
				 #define _ANKI_STR_HELPER(x) #x
			
 
				 #define _ANKI_STR(x) _ANKI_STR_HELPER(x)
			
 
				 
			
@@ -350,4 +347,3 @@ void cleanupGetAndroidCommandLineArguments(void* ptr);
 
				 		return exitCode; \
			
 
				 	}
			
 
				 #endif
			
 
				-/// @}
			
--- a/AnKi/Renderer/ClusterBinning.cpp
+++ b/AnKi/Renderer/ClusterBinning.cpp
@@ -38,6 +38,8 @@ Error ClusterBinning::init()
 
				 									 m_packingGrProgs[type], "PackVisibles"));
			
 
				 	}
			
 
				 
			
 
				+	m_tileCounts = (getRenderer().getInternalResolution() + kClusteredShadingTileSize - 1) / kClusteredShadingTileSize;
			
 
				+
			
 
				 	return Error::kNone;
			
 
				 }
			
 
				 
			
@@ -49,8 +51,10 @@ void ClusterBinning::populateRenderGraph()
 
				 
			
 
				 	// Allocate the clusters buffer
			
 
				 	{
			
 
				-		const U32 clusterCount = getRenderer().getTileCounts().x * getRenderer().getTileCounts().y + getRenderer().getZSplitCount();
			
 
				-		m_runCtx.m_clustersBuffer = GpuVisibleTransientMemoryPool::getSingleton().allocateStructuredBuffer<Cluster>(clusterCount);
			
 
				+		const U32 clusterCount = m_tileCounts.x * m_tileCounts.y + g_cvarRenderClustererZSplitCount;
			
 
				+
			
 
				+		// Allocate +1 which is used as a zero cluster when a point is outside the clusterer
			
 
				+		m_runCtx.m_clustersBuffer = GpuVisibleTransientMemoryPool::getSingleton().allocateStructuredBuffer<Cluster>(clusterCount + 1);
			
 
				 		m_runCtx.m_dep = rgraph.importBuffer(m_runCtx.m_clustersBuffer, BufferUsageBit::kNone);
			
 
				 	}
			
 
				 
			
@@ -78,7 +82,7 @@ void ClusterBinning::populateRenderGraph()
 
				 
			
 
				 			cmdb.bindShaderProgram(m_jobSetupGrProg.get());
			
 
				 
			
 
				-			const UVec4 consts(getRenderer().getTileCounts().x * getRenderer().getTileCounts().y);
			
 
				+			const UVec4 consts(m_tileCounts.x * m_tileCounts.y);
			
 
				 			cmdb.setFastConstants(&consts, sizeof(consts));
			
 
				 
			
 
				 			for(GpuSceneNonRenderableObjectType type : EnumIterable<GpuSceneNonRenderableObjectType>())
			
@@ -162,7 +166,7 @@ void ClusterBinning::populateRenderGraph()
 
				 				struct ClusterBinningConstants
			
 
				 				{
			
 
				 					Vec3 m_cameraOrigin;
			
 
				-					F32 m_zSplitCountOverFrustumLength;
			
 
				+					F32 m_zSplitCountOverClustererLength;
			
 
				 
			
 
				 					Vec2 m_renderingSize;
			
 
				 					U32 m_tileCountX;
			
@@ -180,16 +184,16 @@ void ClusterBinning::populateRenderGraph()
 
				 
			
 
				 				RenderingContext& ctx = getRenderingContext();
			
 
				 				consts.m_cameraOrigin = ctx.m_matrices.m_cameraTransform.getTranslationPart().xyz;
			
 
				-				consts.m_zSplitCountOverFrustumLength = F32(getRenderer().getZSplitCount()) / (ctx.m_matrices.m_far - ctx.m_matrices.m_near);
			
 
				+				consts.m_zSplitCountOverClustererLength = F32(g_cvarRenderClustererZSplitCount) / (computeClustererFar() - ctx.m_matrices.m_near);
			
 
				 				consts.m_renderingSize = Vec2(getRenderer().getInternalResolution());
			
 
				-				consts.m_tileCountX = getRenderer().getTileCounts().x;
			
 
				-				consts.m_tileCount = getRenderer().getTileCounts().x * getRenderer().getTileCounts().y;
			
 
				+				consts.m_tileCountX = m_tileCounts.x;
			
 
				+				consts.m_tileCount = m_tileCounts.x * m_tileCounts.y;
			
 
				 
			
 
				 				Plane nearPlane;
			
 
				 				extractClipPlane(ctx.m_matrices.m_viewProjection, FrustumPlaneType::kNear, nearPlane);
			
 
				 				consts.m_nearPlaneWorld = Vec4(nearPlane.getNormal().xyz, nearPlane.getOffset());
			
 
				 
			
 
				-				consts.m_zSplitCountMinusOne = getRenderer().getZSplitCount() - 1;
			
 
				+				consts.m_zSplitCountMinusOne = g_cvarRenderClustererZSplitCount - 1;
			
 
				 
			
 
				 				consts.m_invertedViewProjMat = ctx.m_matrices.m_invertedViewProjectionJitter;
			
 
				 
			
@@ -280,4 +284,19 @@ void ClusterBinning::populateRenderGraph()
 
				 	}
			
 
				 }
			
 
				 
			
 
				+void ClusterBinning::fillClustererConstants(ClustererConstants& consts) const
			
 
				+{
			
 
				+	const F32 clustererFar = computeClustererFar();
			
 
				+	const F32 zSplitCount = F32(g_cvarRenderClustererZSplitCount);
			
 
				+	const F32 n = getRenderingContext().m_matrices.m_near;
			
 
				+	const F32 f = getRenderingContext().m_matrices.m_far;
			
 
				+
			
 
				+	consts.m_zSplitMagic.x = (clustererFar - n) / (-n * zSplitCount);
			
 
				+	consts.m_zSplitMagic.y = f * (clustererFar - n) / (n * (f - n) * zSplitCount);
			
 
				+	consts.m_tileCounts = m_tileCounts;
			
 
				+	consts.m_clustererFar = clustererFar;
			
 
				+	consts.m_clusterCount = consts.m_tileCounts.x * consts.m_tileCounts.y * U32(zSplitCount);
			
 
				+	consts.m_zSplitCount = U32(zSplitCount);
			
 
				+}
			
 
				+
			
 
				 } // end namespace anki
			
--- a/AnKi/Renderer/ClusterBinning.h
+++ b/AnKi/Renderer/ClusterBinning.h
@@ -9,6 +9,9 @@
 
				 
			
 
				 namespace anki {
			
 
				 
			
 
				+ANKI_CVAR2(NumericCVar<U32>, Render, Clusterer, ZSplitCount, 64, 8, kMaxZsplitCount, "Clusterer number of Z splits")
			
 
				+ANKI_CVAR2(NumericCVar<F32>, Render, Clusterer, Far, 512.0f, 32.0f, 10.0f * 1000.0f, "The extend of the clusterer in meters")
			
 
				+
			
 
				 // Bins clusterer objects to the clusterer.
			
 
				 class ClusterBinning : public RendererObject
			
 
				 {
			
@@ -36,12 +39,27 @@ public:
 
				 		return m_runCtx.m_dep;
			
 
				 	}
			
 
				 
			
 
				+	// Returns the length of the cluster frustum. It's less or equal to camera far.
			
 
				+	F32 computeClustererFar() const
			
 
				+	{
			
 
				+		return min<F32>(getRenderingContext().m_matrices.m_far, g_cvarRenderClustererFar);
			
 
				+	}
			
 
				+
			
 
				+	const UVec2& getTileCounts() const
			
 
				+	{
			
 
				+		return m_tileCounts;
			
 
				+	}
			
 
				+
			
 
				+	void fillClustererConstants(ClustererConstants& consts) const;
			
 
				+
			
 
				 private:
			
 
				 	ShaderProgramResourcePtr m_prog;
			
 
				 	ShaderProgramPtr m_jobSetupGrProg;
			
 
				 	Array<ShaderProgramPtr, U32(GpuSceneNonRenderableObjectType::kCount)> m_binningGrProgs;
			
 
				 	Array<ShaderProgramPtr, U32(GpuSceneNonRenderableObjectType::kCount)> m_packingGrProgs;
			
 
				 
			
 
				+	UVec2 m_tileCounts = UVec2(0u);
			
 
				+
			
 
				 	class
			
 
				 	{
			
 
				 	public:
			
--- a/AnKi/Renderer/LightShading.cpp
+++ b/AnKi/Renderer/LightShading.cpp
@@ -176,26 +176,12 @@ void LightShading::run(RenderPassWorkContext& rgraphCtx)
 
				 		cmdb.bindShaderProgram(m_applyFog.m_grProg.get());
			
 
				 
			
 
				 		// Bind all
			
 
				-		cmdb.bindSampler(0, 0, getRenderer().getSamplers().m_nearestNearestClamp.get());
			
 
				-		cmdb.bindSampler(1, 0, getRenderer().getSamplers().m_trilinearClamp.get());
			
 
				+		cmdb.bindSampler(0, 0, getRenderer().getSamplers().m_trilinearClamp.get());
			
 
				 
			
 
				 		rgraphCtx.bindSrv(0, 0, getGBuffer().getDepthRt());
			
 
				 		rgraphCtx.bindSrv(1, 0, getRenderer().getVolumetricFog().getRt());
			
 
				 
			
 
				-		class Consts
			
 
				-		{
			
 
				-		public:
			
 
				-			F32 m_zSplitCount;
			
 
				-			F32 m_finalZSplit;
			
 
				-			F32 m_near;
			
 
				-			F32 m_far;
			
 
				-		} consts;
			
 
				-		consts.m_zSplitCount = F32(getRenderer().getZSplitCount());
			
 
				-		consts.m_finalZSplit = F32(getRenderer().getVolumetricFog().getFinalClusterInZ());
			
 
				-		consts.m_near = getRenderingContext().m_matrices.m_near;
			
 
				-		consts.m_far = getRenderingContext().m_matrices.m_far;
			
 
				-
			
 
				-		cmdb.setFastConstants(&consts, sizeof(consts));
			
 
				+		cmdb.bindConstantBuffer(0, 0, getRenderingContext().m_globalRenderingConstantsBuffer);
			
 
				 
			
 
				 		// finalPixelColor = pixelWithoutFog * transmitance + inScattering (see the shader)
			
 
				 		cmdb.setBlendFactors(0, BlendFactor::kOne, BlendFactor::kSrcAlpha);
			
--- a/AnKi/Renderer/Renderer.cpp
+++ b/AnKi/Renderer/Renderer.cpp
@@ -163,10 +163,6 @@ Error Renderer::initInternal(const RendererInitInfo& inf)
 
				 	ANKI_R_LOGI("Initializing offscreen renderer. Resolution %ux%u. Internal resolution %ux%u", m_postProcessResolution.x, m_postProcessResolution.y,
			
 
				 				m_internalResolution.x, m_internalResolution.y);
			
 
				 
			
 
				-	m_tileCounts.x = (m_internalResolution.x + kClusteredShadingTileSize - 1) / kClusteredShadingTileSize;
			
 
				-	m_tileCounts.y = (m_internalResolution.y + kClusteredShadingTileSize - 1) / kClusteredShadingTileSize;
			
 
				-	m_zSplitCount = g_cvarRenderZSplitCount;
			
 
				-
			
 
				 	if(g_cvarCoreMeshletRendering && !GrManager::getSingleton().getDeviceCapabilities().m_meshShaders)
			
 
				 	{
			
 
				 		m_meshletRenderingType = MeshletRenderingType::kSoftware;
			
@@ -342,7 +338,7 @@ Error Renderer::populateRenderGraph()
 
				 	m_historyLength->populateRenderGraph();
			
 
				 	m_depthDownscale->populateRenderGraph();
			
 
				 	m_shadowMapping->populateRenderGraph();
			
 
				-	m_clusterBinning2->populateRenderGraph();
			
 
				+	m_clusterBinning->populateRenderGraph();
			
 
				 	m_generatedSky->populateRenderGraph();
			
 
				 	if(m_indirectDiffuseProbes)
			
 
				 	{
			
@@ -392,7 +388,7 @@ void Renderer::writeGlobalRendererConstants(GlobalRendererConstants& outConsts)
 
				 
			
 
				 	RenderingContext& ctx = getRenderingContext();
			
 
				 	GlobalRendererConstants consts;
			
 
				-	memset(&consts, 0, sizeof(consts));
			
 
				+	zeroMemory(consts);
			
 
				 
			
 
				 	consts.m_renderingSize = Vec2(F32(m_internalResolution.x), F32(m_internalResolution.y));
			
 
				 
			
@@ -404,12 +400,8 @@ void Renderer::writeGlobalRendererConstants(GlobalRendererConstants& outConsts)
 
				 	consts.m_nearPlaneWSpace = Vec4(nearPlane.getNormal().xyz, nearPlane.getOffset());
			
 
				 	consts.m_cameraPosition = ctx.m_matrices.m_cameraTransform.getTranslationPart().xyz;
			
 
				 
			
 
				-	consts.m_tileCounts = m_tileCounts;
			
 
				-	consts.m_zSplitCount = m_zSplitCount;
			
 
				-	consts.m_zSplitCountOverFrustumLength = F32(m_zSplitCount) / (ctx.m_matrices.m_far - ctx.m_matrices.m_near);
			
 
				-	consts.m_zSplitMagic.x = (ctx.m_matrices.m_near - ctx.m_matrices.m_far) / (ctx.m_matrices.m_near * F32(m_zSplitCount));
			
 
				-	consts.m_zSplitMagic.y = ctx.m_matrices.m_far / (ctx.m_matrices.m_near * F32(m_zSplitCount));
			
 
				-	consts.m_lightVolumeLastZSplit = min(g_cvarRenderVolumetricLightingAccumulationFinalZSplit - 1, m_zSplitCount);
			
 
				+	m_clusterBinning->fillClustererConstants(consts.m_clusterer);
			
 
				+	m_volumetricLightingAccumulation->fillClustererConstants(consts.m_clusterer);
			
 
				 
			
 
				 	consts.m_reflectionProbesMipCount = F32(m_probeReflections->getReflectionTextureMipmapCount());
			
 
				 
			
--- a/AnKi/Renderer/Renderer.h
+++ b/AnKi/Renderer/Renderer.h
@@ -25,7 +25,6 @@ ANKI_CVAR(
 
				 		return (value > 0.1f && value <= 8.0f) || value == 540.0f || value == 720.0f || value == 1080.0f || value == 1440.0f || value == 2160.0f;
			
 
				 	},
			
 
				 	"A factor over the requested swapchain resolution. Applies to post-processing and UI")
			
 
				-ANKI_CVAR(NumericCVar<U32>, Render, ZSplitCount, 64, 8, kMaxZsplitCount, "Clusterer number of Z splits")
			
 
				 ANKI_CVAR(NumericCVar<U8>, Render, TextureAnisotropy, (ANKI_PLATFORM_MOBILE) ? 1 : 16, 1, 16, "Texture anisotropy for the main passes")
			
 
				 ANKI_CVAR(BoolCVar, Render, PreferCompute, !ANKI_PLATFORM_MOBILE, "Prefer compute shaders")
			
 
				 ANKI_CVAR(BoolCVar, Render, HighQualityHdr, !ANKI_PLATFORM_MOBILE, "If true use R16G16B16 for HDR images. Alternatively use B10G11R11")
			
@@ -153,16 +152,6 @@ public:
 
				 		return m_samplers;
			
 
				 	}
			
 
				 
			
 
				-	const UVec2& getTileCounts() const
			
 
				-	{
			
 
				-		return m_tileCounts;
			
 
				-	}
			
 
				-
			
 
				-	U32 getZSplitCount() const
			
 
				-	{
			
 
				-		return m_zSplitCount;
			
 
				-	}
			
 
				-
			
 
				 	Format getHdrFormat() const;
			
 
				 	Format getDepthNoStencilFormat() const;
			
 
				 
			
@@ -275,9 +264,6 @@ private:
 
				 
			
 
				 	RenderGraphPtr m_rgraph;
			
 
				 
			
 
				-	UVec2 m_tileCounts = UVec2(0u);
			
 
				-	U32 m_zSplitCount = 0;
			
 
				-
			
 
				 	class
			
 
				 	{
			
 
				 	public:
			
--- a/AnKi/Renderer/RendererObject.def.h
+++ b/AnKi/Renderer/RendererObject.def.h
@@ -15,6 +15,7 @@ ANKI_RENDERER_OBJECT_DEF(Tonemapping, tonemapping, 1)
 
				 ANKI_RENDERER_OBJECT_DEF(FinalComposite, finalComposite, 1)
			
 
				 ANKI_RENDERER_OBJECT_DEF(Dbg, dbg, 1)
			
 
				 ANKI_RENDERER_OBJECT_DEF(ProbeReflections, probeReflections, 1)
			
 
				+ANKI_RENDERER_OBJECT_DEF(ClusterBinning, clusterBinning, 1)
			
 
				 ANKI_RENDERER_OBJECT_DEF(VolumetricFog, volumetricFog, 1)
			
 
				 ANKI_RENDERER_OBJECT_DEF(DepthDownscale, depthDownscale, 1)
			
 
				 ANKI_RENDERER_OBJECT_DEF(TemporalAA, temporalAA, 1)
			
@@ -34,7 +35,6 @@ ANKI_RENDERER_OBJECT_DEF(MotionVectors, motionVectors, 1)
 
				 ANKI_RENDERER_OBJECT_DEF(TemporalUpscaler, temporalUpscaler, 1)
			
 
				 ANKI_RENDERER_OBJECT_DEF(VrsSriGeneration, vrsSriGeneration, 1)
			
 
				 ANKI_RENDERER_OBJECT_DEF(PrimaryNonRenderableVisibility, primaryNonRenderableVisibility, 1)
			
 
				-ANKI_RENDERER_OBJECT_DEF(ClusterBinning, clusterBinning2, 1)
			
 
				 ANKI_RENDERER_OBJECT_DEF(Ssao, ssao, 1)
			
 
				 ANKI_RENDERER_OBJECT_DEF(GeneratedSky, generatedSky, 1)
			
 
				 ANKI_RENDERER_OBJECT_DEF(MotionBlur, motionBlur, 1)
			
--- a/AnKi/Renderer/VolumetricFog.cpp
+++ b/AnKi/Renderer/VolumetricFog.cpp
@@ -8,6 +8,7 @@
 
				 #include <AnKi/Renderer/DepthDownscale.h>
			
 
				 #include <AnKi/Renderer/ShadowMapping.h>
			
 
				 #include <AnKi/Renderer/LightShading.h>
			
 
				+#include <AnKi/Renderer/ClusterBinning.h>
			
 
				 #include <AnKi/Renderer/VolumetricLightingAccumulation.h>
			
 
				 #include <AnKi/Util/CVarSet.h>
			
 
				 #include <AnKi/Scene/Components/SkyboxComponent.h>
			
@@ -18,20 +19,17 @@ namespace anki {
 
				 Error VolumetricFog::init()
			
 
				 {
			
 
				 	// Misc
			
 
				-	const F32 qualityXY = g_cvarRenderVolumetricLightingAccumulationQualityXY;
			
 
				-	const F32 qualityZ = g_cvarRenderVolumetricLightingAccumulationQualityZ;
			
 
				-	m_finalZSplit = min<U32>(getRenderer().getZSplitCount() - 1, g_cvarRenderVolumetricLightingAccumulationFinalZSplit);
			
 
				+	const U32 zSplitCount = min<U32>(g_cvarRenderClustererZSplitCount, g_cvarRenderVolumetricLightingAccumulationFinalZSplit + 1);
			
 
				 
			
 
				-	m_volumeSize[0] = U32(F32(getRenderer().getTileCounts().x) * qualityXY);
			
 
				-	m_volumeSize[1] = U32(F32(getRenderer().getTileCounts().y) * qualityXY);
			
 
				-	m_volumeSize[2] = U32(F32(m_finalZSplit + 1) * qualityZ);
			
 
				+	m_volumeSize.xy = getClusterBinning().getTileCounts() << g_cvarRenderVolumetricLightingAccumulationSubdivisionXY;
			
 
				+	m_volumeSize.z = zSplitCount << g_cvarRenderVolumetricLightingAccumulationSubdivisionZ;
			
 
				 
			
 
				 	// Shaders
			
 
				 	ANKI_CHECK(loadShaderProgram("ShaderBinaries/VolumetricFogAccumulation.ankiprogbin", m_prog, m_grProg));
			
 
				 
			
 
				 	// RT descr
			
 
				-	m_rtDescr = getRenderer().create2DRenderTargetDescription(m_volumeSize[0], m_volumeSize[1], Format::kR16G16B16A16_Sfloat, "Fog");
			
 
				-	m_rtDescr.m_depth = m_volumeSize[2];
			
 
				+	m_rtDescr = getRenderer().create2DRenderTargetDescription(m_volumeSize.x, m_volumeSize.y, Format::kR16G16B16A16_Sfloat, "Fog");
			
 
				+	m_rtDescr.m_depth = m_volumeSize.z;
			
 
				 	m_rtDescr.m_type = TextureType::k3D;
			
 
				 	m_rtDescr.bake();
			
 
				 
			
@@ -67,15 +65,13 @@ void VolumetricFog::populateRenderGraph()
 
				 		consts.m_fogDiffuse = (sky) ? sky->getFogDiffuseColor() : Vec3(0.0f);
			
 
				 		consts.m_fogScatteringCoeff = (sky) ? sky->getFogScatteringCoefficient() : 0.0f;
			
 
				 		consts.m_fogAbsorptionCoeff = (sky) ? sky->getFogAbsorptionCoefficient() : 0.0f;
			
 
				-		consts.m_near = getRenderingContext().m_matrices.m_near;
			
 
				-		consts.m_far = getRenderingContext().m_matrices.m_far;
			
 
				-		consts.m_zSplitCountf = F32(getRenderer().getZSplitCount());
			
 
				-		consts.m_volumeSize = UVec3(m_volumeSize);
			
 
				-		consts.m_maxZSplitsToProcessf = F32(m_finalZSplit + 1);
			
 
				+		consts.m_zSplitThickness = (getClusterBinning().computeClustererFar() - getRenderingContext().m_matrices.m_near)
			
 
				+								   / F32(g_cvarRenderClustererZSplitCount << g_cvarRenderVolumetricLightingAccumulationSubdivisionZ);
			
 
				+		consts.m_volumeSize = m_volumeSize;
			
 
				 
			
 
				 		cmdb.setFastConstants(&consts, sizeof(consts));
			
 
				 
			
 
				-		dispatchPPCompute(cmdb, 8, 8, m_volumeSize[0], m_volumeSize[1]);
			
 
				+		dispatchPPCompute(cmdb, 8, 8, m_volumeSize.x, m_volumeSize.y);
			
 
				 	});
			
 
				 }
			
 
				 
			
--- a/AnKi/Renderer/VolumetricFog.h
+++ b/AnKi/Renderer/VolumetricFog.h
@@ -22,26 +22,13 @@ public:
 
				 		return m_runCtx.m_rt;
			
 
				 	}
			
 
				 
			
 
				-	const Array<U32, 3>& getVolumeSize() const
			
 
				-	{
			
 
				-		return m_volumeSize;
			
 
				-	}
			
 
				-
			
 
				-	// Get the last cluster split in Z axis that will be affected by lighting.
			
 
				-	U32 getFinalClusterInZ() const
			
 
				-	{
			
 
				-		return m_finalZSplit;
			
 
				-	}
			
 
				-
			
 
				 private:
			
 
				 	ShaderProgramResourcePtr m_prog;
			
 
				 	ShaderProgramPtr m_grProg;
			
 
				 
			
 
				 	RenderTargetDesc m_rtDescr;
			
 
				 
			
 
				-	U32 m_finalZSplit = 0;
			
 
				-
			
 
				-	Array<U32, 3> m_volumeSize;
			
 
				+	UVec3 m_volumeSize;
			
 
				 
			
 
				 	class
			
 
				 	{
			
--- a/AnKi/Renderer/VolumetricLightingAccumulation.cpp
+++ b/AnKi/Renderer/VolumetricLightingAccumulation.cpp
@@ -7,6 +7,7 @@
 
				 #include <AnKi/Renderer/ShadowMapping.h>
			
 
				 #include <AnKi/Renderer/IndirectDiffuseProbes.h>
			
 
				 #include <AnKi/Renderer/IndirectDiffuseClipmaps.h>
			
 
				+#include <AnKi/Renderer/GBuffer.h>
			
 
				 #include <AnKi/Renderer/Renderer.h>
			
 
				 #include <AnKi/Renderer/ClusterBinning.h>
			
 
				 #include <AnKi/Resource/ImageResource.h>
			
@@ -19,16 +20,13 @@ namespace anki {
 
				 Error VolumetricLightingAccumulation::init()
			
 
				 {
			
 
				 	// Misc
			
 
				-	const F32 qualityXY = g_cvarRenderVolumetricLightingAccumulationQualityXY;
			
 
				-	const F32 qualityZ = g_cvarRenderVolumetricLightingAccumulationQualityZ;
			
 
				-	const U32 finalZSplit = min<U32>(getRenderer().getZSplitCount() - 1, g_cvarRenderVolumetricLightingAccumulationFinalZSplit);
			
 
				+	const U32 finalZSplit = min<U32>(g_cvarRenderClustererZSplitCount - 1, g_cvarRenderVolumetricLightingAccumulationFinalZSplit);
			
 
				 
			
 
				-	m_volumeSize[0] = U32(F32(getRenderer().getTileCounts().x) * qualityXY);
			
 
				-	m_volumeSize[1] = U32(F32(getRenderer().getTileCounts().y) * qualityXY);
			
 
				-	m_volumeSize[2] = U32(F32(finalZSplit + 1) * qualityZ);
			
 
				+	m_volumeSize.xy = getClusterBinning().getTileCounts() << g_cvarRenderVolumetricLightingAccumulationSubdivisionXY;
			
 
				+	m_volumeSize.z = (finalZSplit + 1) << g_cvarRenderVolumetricLightingAccumulationSubdivisionZ;
			
 
				 
			
 
				-	if(!isAligned(getRenderer().getTileCounts().x, m_volumeSize[0]) || !isAligned(getRenderer().getTileCounts().y, m_volumeSize[1])
			
 
				-	   || m_volumeSize[0] == 0 || m_volumeSize[1] == 0 || m_volumeSize[2] == 0)
			
 
				+	if(!isAligned(getClusterBinning().getTileCounts().x, m_volumeSize.x) || !isAligned(getClusterBinning().getTileCounts().y, m_volumeSize.y)
			
 
				+	   || m_volumeSize.x == 0 || m_volumeSize.y == 0 || m_volumeSize.z == 0)
			
 
				 	{
			
 
				 		ANKI_R_LOGE("Wrong input");
			
 
				 		return Error::kUserData;
			
@@ -37,18 +35,25 @@ Error VolumetricLightingAccumulation::init()
 
				 	ANKI_CHECK(ResourceManager::getSingleton().loadResource("EngineAssets/BlueNoise_Rgba8_64x64.png", m_noiseImage));
			
 
				 
			
 
				 	// Shaders
			
 
				-	ANKI_CHECK(loadShaderProgram("ShaderBinaries/VolumetricLightingAccumulation.ankiprogbin",
			
 
				-								 {{"ENABLE_SHADOWS", 1}, {"CLIPMAP_DIFFUSE_INDIRECT", isIndirectDiffuseClipmapsEnabled()}}, m_prog, m_grProg));
			
 
				+	const Array<SubMutation, 2> mutation = {{{"ENABLE_SHADOWS", 1}, {"CLIPMAP_DIFFUSE_INDIRECT", isIndirectDiffuseClipmapsEnabled()}}};
			
 
				+	ANKI_CHECK(loadShaderProgram("ShaderBinaries/VolumetricLightingAccumulation.ankiprogbin", mutation, m_prog, m_grProg, "Accumulate"));
			
 
				+	ANKI_CHECK(loadShaderProgram("ShaderBinaries/VolumetricLightingAccumulation.ankiprogbin", mutation, m_prog, m_debugGrProg, "Debug"));
			
 
				 
			
 
				 	// Create RTs
			
 
				 	TextureInitInfo texinit = getRenderer().create2DRenderTargetInitInfo(
			
 
				-		m_volumeSize[0], m_volumeSize[1], Format::kR16G16B16A16_Sfloat,
			
 
				+		m_volumeSize.x, m_volumeSize.y, Format::kR16G16B16A16_Sfloat,
			
 
				 		TextureUsageBit::kUavCompute | TextureUsageBit::kSrvPixel | TextureUsageBit::kSrvCompute, "VolLight");
			
 
				-	texinit.m_depth = m_volumeSize[2];
			
 
				+	texinit.m_depth = m_volumeSize.z;
			
 
				 	texinit.m_type = TextureType::k3D;
			
 
				 	m_rtTextures[0] = getRenderer().createAndClearRenderTarget(texinit, TextureUsageBit::kSrvPixel);
			
 
				 	m_rtTextures[1] = getRenderer().createAndClearRenderTarget(texinit, TextureUsageBit::kSrvPixel);
			
 
				 
			
 
				+	m_debugRtDesc = getRenderer().create2DRenderTargetDescription(getRenderer().getInternalResolution().x, getRenderer().getInternalResolution().y,
			
 
				+																  Format::kR16G16B16A16_Sfloat);
			
 
				+	m_debugRtDesc.bake();
			
 
				+
			
 
				+	m_debugResult = g_cvarRenderVolumetricLightingAccumulationDebug;
			
 
				+
			
 
				 	return Error::kNone;
			
 
				 }
			
 
				 
			
@@ -80,6 +85,8 @@ void VolumetricLightingAccumulation::populateRenderGraph()
 
				 		getIndirectDiffuseClipmaps().setDependencies(pass, TextureUsageBit::kSrvCompute);
			
 
				 	}
			
 
				 
			
 
				+	pass.newTextureDependency(getGBuffer().getDepthRt(), TextureUsageBit::kSrvCompute);
			
 
				+
			
 
				 	pass.setWork([this](RenderPassWorkContext& rgraphCtx) {
			
 
				 		ANKI_TRACE_SCOPED_EVENT(VolumetricLightingAccumulation);
			
 
				 		CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
			
@@ -103,6 +110,7 @@ void VolumetricLightingAccumulation::populateRenderGraph()
 
				 		rgraphCtx.bindSrv(srv++, 0, getShadowMapping().getShadowmapRt());
			
 
				 		cmdb.bindSrv(srv++, 0, getClusterBinning().getPackedObjectsBuffer(GpuSceneNonRenderableObjectType::kFogDensityVolume));
			
 
				 		cmdb.bindSrv(srv++, 0, getClusterBinning().getClustersBuffer());
			
 
				+		rgraphCtx.bindSrv(srv++, 0, getGBuffer().getDepthRt());
			
 
				 
			
 
				 		if(isIndirectDiffuseProbesEnabled())
			
 
				 		{
			
@@ -133,15 +141,76 @@ void VolumetricLightingAccumulation::populateRenderGraph()
 
				 			consts.m_densityAtMinHeight = sky->getMaxFogDensity();
			
 
				 			consts.m_densityAtMaxHeight = sky->getMinFogDensity();
			
 
				 		}
			
 
				-		consts.m_volumeSize = UVec3(m_volumeSize);
			
 
				-
			
 
				-		const U32 finalZSplit = min<U32>(getRenderer().getZSplitCount() - 1, g_cvarRenderVolumetricLightingAccumulationFinalZSplit);
			
 
				-		consts.m_maxZSplitsToProcessf = F32(finalZSplit + 1);
			
 
				+		consts.m_volumeSize = m_volumeSize;
			
 
				+		consts.m_subZSplitThickness = (getClusterBinning().computeClustererFar() - getRenderingContext().m_matrices.m_near)
			
 
				+									  / F32(g_cvarRenderClustererZSplitCount << g_cvarRenderVolumetricLightingAccumulationSubdivisionZ);
			
 
				+		consts.m_clusterSubdivision =
			
 
				+			UVec3(g_cvarRenderVolumetricLightingAccumulationSubdivisionXY, g_cvarRenderVolumetricLightingAccumulationSubdivisionXY,
			
 
				+				  g_cvarRenderVolumetricLightingAccumulationSubdivisionZ);
			
 
				 
			
 
				 		cmdb.setFastConstants(&consts, sizeof(consts));
			
 
				 
			
 
				-		dispatchPPCompute(cmdb, 8, 8, 8, m_volumeSize[0], m_volumeSize[1], m_volumeSize[2]);
			
 
				+		dispatchPPCompute(cmdb, 8, 8, 8, m_volumeSize.x, m_volumeSize.y, m_volumeSize.z);
			
 
				 	});
			
 
				+
			
 
				+	if(m_debugResult)
			
 
				+	{
			
 
				+		m_runCtx.m_debugRt = rgraph.newRenderTarget(m_debugRtDesc);
			
 
				+
			
 
				+		NonGraphicsRenderPass& pass = rgraph.newNonGraphicsRenderPass("Vol debug");
			
 
				+
			
 
				+		pass.newTextureDependency(getRt(), TextureUsageBit::kSrvCompute);
			
 
				+		pass.newTextureDependency(getGBuffer().getDepthRt(), TextureUsageBit::kSrvCompute);
			
 
				+		pass.newTextureDependency(m_runCtx.m_debugRt, TextureUsageBit::kUavCompute);
			
 
				+
			
 
				+		pass.setWork([this](RenderPassWorkContext& rgraphCtx) {
			
 
				+			ANKI_TRACE_SCOPED_EVENT(VolumetricLightingAccumulationDebug);
			
 
				+			CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
			
 
				+
			
 
				+			cmdb.bindShaderProgram(m_debugGrProg.get());
			
 
				+
			
 
				+			rgraphCtx.bindSrv(0, 0, getRt());
			
 
				+			rgraphCtx.bindSrv(1, 0, getGBuffer().getDepthRt());
			
 
				+
			
 
				+			rgraphCtx.bindUav(0, 0, m_runCtx.m_debugRt);
			
 
				+
			
 
				+			cmdb.bindConstantBuffer(0, 0, getRenderingContext().m_globalRenderingConstantsBuffer);
			
 
				+
			
 
				+			cmdb.bindSampler(0, 0, getRenderer().getSamplers().m_nearestNearestClamp.get());
			
 
				+
			
 
				+			dispatchPPCompute(cmdb, 8, 8, getRenderer().getInternalResolution().x, getRenderer().getInternalResolution().y);
			
 
				+		});
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+		m_runCtx.m_debugRt = {};
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+void VolumetricLightingAccumulation::fillClustererConstants(ClustererConstants& consts)
			
 
				+{
			
 
				+	const U32 lightZSplitCount = min<U32>(g_cvarRenderClustererZSplitCount, g_cvarRenderVolumetricLightingAccumulationFinalZSplit + 1);
			
 
				+	const F32 clustererFar = getClusterBinning().computeClustererFar() / F32(g_cvarRenderClustererZSplitCount) * F32(lightZSplitCount);
			
 
				+	const F32 n = getRenderingContext().m_matrices.m_near;
			
 
				+	const F32 f = getRenderingContext().m_matrices.m_far;
			
 
				+
			
 
				+	consts.m_lightVolumeWMagic.x = (clustererFar - n) / (-n);
			
 
				+	consts.m_lightVolumeWMagic.y = f * (clustererFar - n) / (n * (f - n));
			
 
				+}
			
 
				+
			
 
				+void VolumetricLightingAccumulation::getDebugRenderTarget([[maybe_unused]] CString rtName,
			
 
				+														  Array<RenderTargetHandle, U32(DebugRenderTargetRegister::kCount)>& handles,
			
 
				+														  DebugRenderTargetDrawStyle& drawStyle) const
			
 
				+{
			
 
				+	if(m_runCtx.m_debugRt.isValid())
			
 
				+	{
			
 
				+		handles[0] = m_runCtx.m_debugRt;
			
 
				+		drawStyle = DebugRenderTargetDrawStyle::kTonemap;
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+		ANKI_R_LOGW("Need to enable debug drawing of volumetrics else nothing will happen");
			
 
				+	}
			
 
				 }
			
 
				 
			
 
				 } // end namespace anki
			
--- a/AnKi/Renderer/VolumetricLightingAccumulation.h
+++ b/AnKi/Renderer/VolumetricLightingAccumulation.h
@@ -9,15 +9,22 @@
 
				 
			
 
				 namespace anki {
			
 
				 
			
 
				-ANKI_CVAR2(NumericCVar<F32>, Render, VolumetricLightingAccumulation, QualityXY, 4.0f, 1.0f, 16.0f, "Quality of XY dimensions of volumetric lights")
			
 
				-ANKI_CVAR2(NumericCVar<F32>, Render, VolumetricLightingAccumulation, QualityZ, 4.0f, 1.0f, 16.0f, "Quality of Z dimension of volumetric lights")
			
 
				+ANKI_CVAR2(NumericCVar<U32>, Render, VolumetricLightingAccumulation, SubdivisionXY, 2u, 1u, 16u,
			
 
				+		   "The original clusters will be split using this CVar")
			
 
				+ANKI_CVAR2(NumericCVar<U32>, Render, VolumetricLightingAccumulation, SubdivisionZ, 2u, 1u, 16u, "The original clusters will be split using this CVar")
			
 
				 ANKI_CVAR2(NumericCVar<U32>, Render, VolumetricLightingAccumulation, FinalZSplit, 26, 1, 256,
			
 
				 		   "Final cluster split that will recieve volumetric lights")
			
 
				+ANKI_CVAR2(BoolCVar, Render, VolumetricLightingAccumulation, Debug, false, "Enable debugging of volumetrics")
			
 
				 
			
 
				 // Volumetric lighting. It accumulates lighting in a volume texture.
			
 
				 class VolumetricLightingAccumulation : public RendererObject
			
 
				 {
			
 
				 public:
			
 
				+	VolumetricLightingAccumulation()
			
 
				+	{
			
 
				+		registerDebugRenderTarget("Volumetric Lighting");
			
 
				+	}
			
 
				+
			
 
				 	Error init();
			
 
				 
			
 
				 	void populateRenderGraph();
			
@@ -27,19 +34,39 @@ public:
 
				 		return m_runCtx.m_rts[1];
			
 
				 	}
			
 
				 
			
 
				+	void fillClustererConstants(ClustererConstants& consts);
			
 
				+
			
 
				+	void setEnableDebuggingView(Bool enable)
			
 
				+	{
			
 
				+		m_debugResult = enable;
			
 
				+	}
			
 
				+
			
 
				+	Bool getDebuggingView() const
			
 
				+	{
			
 
				+		return m_debugResult;
			
 
				+	}
			
 
				+
			
 
				+	void getDebugRenderTarget(CString rtName, Array<RenderTargetHandle, U32(DebugRenderTargetRegister::kCount)>& handles,
			
 
				+							  DebugRenderTargetDrawStyle& drawStyle) const override;
			
 
				+
			
 
				 private:
			
 
				 	ShaderProgramResourcePtr m_prog;
			
 
				 	ShaderProgramPtr m_grProg;
			
 
				+	ShaderProgramPtr m_debugGrProg;
			
 
				 
			
 
				 	Array<TexturePtr, 2> m_rtTextures;
			
 
				 	ImageResourcePtr m_noiseImage;
			
 
				 
			
 
				-	Array<U32, 3> m_volumeSize;
			
 
				+	RenderTargetDesc m_debugRtDesc;
			
 
				+
			
 
				+	UVec3 m_volumeSize;
			
 
				+	Bool m_debugResult = false;
			
 
				 
			
 
				 	class
			
 
				 	{
			
 
				 	public:
			
 
				 		Array<RenderTargetHandle, 2> m_rts;
			
 
				+		RenderTargetHandle m_debugRt;
			
 
				 	} m_runCtx; // Runtime context.
			
 
				 };
			
 
				 
			
--- a/AnKi/Shaders/ClusterBinning.ankiprog
+++ b/AnKi/Shaders/ClusterBinning.ankiprog
@@ -102,7 +102,7 @@ constexpr U32 kPackVisiblesThreadgroupSize = 64;
 
				 struct ClusterBinningConstants
			
 
				 {
			
 
				 	Vec3 m_cameraOrigin;
			
 
				-	F32 m_zSplitCountOverFrustumLength;
			
 
				+	F32 m_zSplitCountOverClustererLength; // = clustererFar - near
			
 
				 
			
 
				 	Vec2 m_renderingSize;
			
 
				 	U32 m_tileCountX;
			
@@ -233,20 +233,20 @@ constexpr UVec2 kSampleLocations[kSampleCount] = {LOCATION(1, -3), LOCATION(-1,
 
				 #	if OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_LIGHT
			
 
				 		if(obj.m_isPointLight)
			
 
				 		{
			
 
				-			InterlockedOr(g_clusters[tileIdx].m_pointLightsMask[maskArrayIdx], mask);
			
 
				+			InterlockedOr(SBUFF(g_clusters, tileIdx).m_pointLightsMask[maskArrayIdx], mask);
			
 
				 		}
			
 
				 		else
			
 
				 		{
			
 
				-			InterlockedOr(g_clusters[tileIdx].m_spotLightsMask[maskArrayIdx], mask);
			
 
				+			InterlockedOr(SBUFF(g_clusters, tileIdx).m_spotLightsMask[maskArrayIdx], mask);
			
 
				 		}
			
 
				 #	elif OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_DECAL
			
 
				-		InterlockedOr(g_clusters[tileIdx].m_decalsMask[maskArrayIdx], mask);
			
 
				+		InterlockedOr(SBUFF(g_clusters, tileIdx).m_decalsMask[maskArrayIdx], mask);
			
 
				 #	elif OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_FOG_DENSITY_VOLUME
			
 
				-		InterlockedOr(g_clusters[tileIdx].m_fogDensityVolumesMask, mask);
			
 
				+		InterlockedOr(SBUFF(g_clusters, tileIdx).m_fogDensityVolumesMask, mask);
			
 
				 #	elif OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_REFLECTION_PROBE
			
 
				-		InterlockedOr(g_clusters[tileIdx].m_reflectionProbesMask, mask);
			
 
				+		InterlockedOr(SBUFF(g_clusters, tileIdx).m_reflectionProbesMask, mask);
			
 
				 #	elif OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_GLOBAL_ILLUMINATION_PROBE
			
 
				-		InterlockedOr(g_clusters[tileIdx].m_giProbesMask, mask);
			
 
				+		InterlockedOr(SBUFF(g_clusters, tileIdx).m_giProbesMask, mask);
			
 
				 #	else
			
 
				 #		error See file
			
 
				 #	endif
			
@@ -270,27 +270,27 @@ constexpr UVec2 kSampleLocations[kSampleCount] = {LOCATION(1, -3), LOCATION(-1,
 
				 			maxDistFromNearPlane = distFromNearPlaneA;
			
 
				 		}
			
 
				 
			
 
				-		const I32 startZSplit = max(I32(minDistFromNearPlane * g_consts.m_zSplitCountOverFrustumLength), 0);
			
 
				-		const I32 endZSplit = clamp(I32(maxDistFromNearPlane * g_consts.m_zSplitCountOverFrustumLength), 0, g_consts.m_zSplitCountMinusOne);
			
 
				+		const I32 startZSplit = max(I32(floor(minDistFromNearPlane * g_consts.m_zSplitCountOverClustererLength)), 0);
			
 
				+		const I32 endZSplit = clamp(I32(ceil(maxDistFromNearPlane * g_consts.m_zSplitCountOverClustererLength)), 0, g_consts.m_zSplitCountMinusOne);
			
 
				 		for(I32 i = startZSplit; i <= endZSplit; ++i)
			
 
				 		{
			
 
				 #	if OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_LIGHT
			
 
				 			if(obj.m_isPointLight)
			
 
				 			{
			
 
				-				InterlockedOr(g_clusters[g_consts.m_tileCount + i].m_pointLightsMask[maskArrayIdx], mask);
			
 
				+				InterlockedOr(SBUFF(g_clusters, g_consts.m_tileCount + i).m_pointLightsMask[maskArrayIdx], mask);
			
 
				 			}
			
 
				 			else
			
 
				 			{
			
 
				-				InterlockedOr(g_clusters[g_consts.m_tileCount + i].m_spotLightsMask[maskArrayIdx], mask);
			
 
				+				InterlockedOr(SBUFF(g_clusters, g_consts.m_tileCount + i).m_spotLightsMask[maskArrayIdx], mask);
			
 
				 			}
			
 
				 #	elif OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_DECAL
			
 
				-			InterlockedOr(g_clusters[g_consts.m_tileCount + i].m_decalsMask[maskArrayIdx], mask);
			
 
				+			InterlockedOr(SBUFF(g_clusters, g_consts.m_tileCount + i).m_decalsMask[maskArrayIdx], mask);
			
 
				 #	elif OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_FOG_DENSITY_VOLUME
			
 
				-			InterlockedOr(g_clusters[g_consts.m_tileCount + i].m_fogDensityVolumesMask, mask);
			
 
				+			InterlockedOr(SBUFF(g_clusters, g_consts.m_tileCount + i).m_fogDensityVolumesMask, mask);
			
 
				 #	elif OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_REFLECTION_PROBE
			
 
				-			InterlockedOr(g_clusters[g_consts.m_tileCount + i].m_reflectionProbesMask, mask);
			
 
				+			InterlockedOr(SBUFF(g_clusters, g_consts.m_tileCount + i).m_reflectionProbesMask, mask);
			
 
				 #	elif OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_GLOBAL_ILLUMINATION_PROBE
			
 
				-			InterlockedOr(g_clusters[g_consts.m_tileCount + i].m_giProbesMask, mask);
			
 
				+			InterlockedOr(SBUFF(g_clusters, g_consts.m_tileCount + i).m_giProbesMask, mask);
			
 
				 #	else
			
 
				 #		error See file
			
 
				 #	endif
			
@@ -319,6 +319,6 @@ StructuredBuffer<U32> g_visibles : register(t1);
 
				 		return;
			
 
				 	}
			
 
				 
			
 
				-	g_outBuffer[idxOut] = g_inBuffer[g_visibles[idxOut + 1]];
			
 
				+	SBUFF(g_outBuffer, idxOut) = SBUFF(g_inBuffer, SBUFF(g_visibles, idxOut + 1));
			
 
				 }
			
 
				 #endif
			
--- a/AnKi/Shaders/ClusteredShadingFunctions.hlsl
+++ b/AnKi/Shaders/ClusteredShadingFunctions.hlsl
@@ -53,22 +53,32 @@ Vec3 clusterHeatmap(Cluster cluster, U32 objectTypeMask, U32 maxObjectOverride =
 
				 	return heatmap(factor);
			
 
				 }
			
 
				 
			
 
				-/// Returns the index of the zSplit or linearizeDepth(n, f, depth)*zSplitCount
			
 
				-/// Simplifying this equation is 1/(a+b/depth) where a=(n-f)/(n*zSplitCount) and b=f/(n*zSplitCount)
			
 
				-U32 computeZSplitClusterIndex(F32 depth, U32 zSplitCount, F32 a, F32 b)
			
 
				+// Returns the index of the zSplit. Calculated as linearizeDepth(depth, n, f)*(f-n)/(clustererFar-n)*zSplitCount
			
 
				+// Simplifying this equation is 1/(a+b/depth) where a=(clustererFar-n)/(-n*zSplitCount) and b=f*(clustererFar-n)/(n*(f-n)*zSplitCount)
			
 
				+// If the depth is outside the clusterer's range then the return value will be creater or equal to zSplitCount
			
 
				+U32 computeZSplitClusterIndex(F32 depth, F32 a, F32 b)
			
 
				 {
			
 
				-	const F32 fSplitIdx = 1.0 / (a + b / depth);
			
 
				-	return min(zSplitCount - 1u, (U32)fSplitIdx);
			
 
				+	const F32 splitIdxf = 1.0 / (a + b / depth); // It's fine if depth is zero. The splitIdxf will become 0.0
			
 
				+	return (U32)splitIdxf;
			
 
				 }
			
 
				 
			
 
				-/// Return the tile index.
			
 
				+// It's similar to computeZSplitClusterIndex but instead of an index it returns a tex coordinate for the w coord of a 3D texture that covers the
			
 
				+// clusterer. Calculated as linearizeDepth(depth, n, f)*(f-n)/(clustererFar-n).
			
 
				+// Simplifying this equation is 1/(a+b/depth) where a=(clustererFar-n)/(-n) and b=f*(clustererFar-n)/(n*(f-n))
			
 
				+// If the depth is outside the clusterer's range then the return value will be creater or equal than 1.0
			
 
				+F32 computeVolumeWTexCoord(F32 depth, F32 a, F32 b)
			
 
				+{
			
 
				+	return 1.0 / (a + b / depth); // It's fine if depth is zero. The expression will become 0.0
			
 
				+}
			
 
				+
			
 
				+// Return the tile index.
			
 
				 U32 computeTileClusterIndexFragCoord(Vec2 fragCoord, U32 tileCountX)
			
 
				 {
			
 
				 	const UVec2 tileXY = UVec2(fragCoord / F32(kClusteredShadingTileSize));
			
 
				 	return tileXY.y * tileCountX + tileXY.x;
			
 
				 }
			
 
				 
			
 
				-/// Merge the tiles with z splits into a single cluster.
			
 
				+// Merge the tiles with z splits into a single cluster.
			
 
				 template<Bool kDynamicallyUniform = false>
			
 
				 Cluster mergeClusters(Cluster tileCluster, Cluster zCluster)
			
 
				 {
			
@@ -78,18 +88,18 @@ Cluster mergeClusters(Cluster tileCluster, Cluster zCluster)
 
				 	{
			
 
				 		[unroll] for(U32 i = 0; i < kMaxVisibleLights / 32; ++i)
			
 
				 		{
			
 
				-			outCluster.m_pointLightsMask[i] = WaveActiveBitOr(tileCluster.m_pointLightsMask[i] & zCluster.m_pointLightsMask[i]);
			
 
				-			outCluster.m_spotLightsMask[i] = WaveActiveBitOr(tileCluster.m_spotLightsMask[i] & zCluster.m_spotLightsMask[i]);
			
 
				+			outCluster.m_pointLightsMask[i] = WaveActiveBitAnd(tileCluster.m_pointLightsMask[i] & zCluster.m_pointLightsMask[i]);
			
 
				+			outCluster.m_spotLightsMask[i] = WaveActiveBitAnd(tileCluster.m_spotLightsMask[i] & zCluster.m_spotLightsMask[i]);
			
 
				 		}
			
 
				 
			
 
				 		[unroll] for(U32 i = 0; i < kMaxVisibleDecals / 32; ++i)
			
 
				 		{
			
 
				-			outCluster.m_decalsMask[i] = WaveActiveBitOr(tileCluster.m_decalsMask[i] & zCluster.m_decalsMask[i]);
			
 
				+			outCluster.m_decalsMask[i] = WaveActiveBitAnd(tileCluster.m_decalsMask[i] & zCluster.m_decalsMask[i]);
			
 
				 		}
			
 
				 
			
 
				-		outCluster.m_fogDensityVolumesMask = WaveActiveBitOr(tileCluster.m_fogDensityVolumesMask & zCluster.m_fogDensityVolumesMask);
			
 
				-		outCluster.m_reflectionProbesMask = WaveActiveBitOr(tileCluster.m_reflectionProbesMask & zCluster.m_reflectionProbesMask);
			
 
				-		outCluster.m_giProbesMask = WaveActiveBitOr(tileCluster.m_giProbesMask & zCluster.m_giProbesMask);
			
 
				+		outCluster.m_fogDensityVolumesMask = WaveActiveBitAnd(tileCluster.m_fogDensityVolumesMask & zCluster.m_fogDensityVolumesMask);
			
 
				+		outCluster.m_reflectionProbesMask = WaveActiveBitAnd(tileCluster.m_reflectionProbesMask & zCluster.m_reflectionProbesMask);
			
 
				+		outCluster.m_giProbesMask = WaveActiveBitAnd(tileCluster.m_giProbesMask & zCluster.m_giProbesMask);
			
 
				 	}
			
 
				 	else
			
 
				 	{
			
@@ -112,13 +122,18 @@ Cluster mergeClusters(Cluster tileCluster, Cluster zCluster)
 
				 	return outCluster;
			
 
				 }
			
 
				 
			
 
				-/// Get the final cluster after ORing and ANDing the masks.
			
 
				+// Get the final cluster after ORing and ANDing the masks.
			
 
				 template<Bool kDynamicallyUniform = false>
			
 
				-Cluster getClusterFragCoord(StructuredBuffer<Cluster> clusters, GlobalRendererConstants consts, Vec3 fragCoord)
			
 
				+Cluster getClusterFragCoord(StructuredBuffer<Cluster> clusters, ClustererConstants consts, Vec3 fragCoord)
			
 
				 {
			
 
				-	const Cluster tileCluster = clusters[computeTileClusterIndexFragCoord(fragCoord.xy, consts.m_tileCounts.x)];
			
 
				-	const Cluster zCluster = clusters[computeZSplitClusterIndex(fragCoord.z, consts.m_zSplitCount, consts.m_zSplitMagic.x, consts.m_zSplitMagic.y)
			
 
				-									  + consts.m_tileCounts.x * consts.m_tileCounts.y];
			
 
				+	U32 idx = computeTileClusterIndexFragCoord(fragCoord.xy, consts.m_tileCounts.x);
			
 
				+	const Cluster tileCluster = SBUFF(clusters, idx);
			
 
				+
			
 
				+	idx = computeZSplitClusterIndex(fragCoord.z, consts.m_zSplitMagic.x, consts.m_zSplitMagic.y);
			
 
				+	idx += consts.m_tileCounts.x * consts.m_tileCounts.y;
			
 
				+	idx = min(idx, consts.m_clusterCount); // The "consts.m_clusterCount" is intentional. There is a hiden cluster at the end that is all zeroes
			
 
				+	const Cluster zCluster = SBUFF(clusters, idx);
			
 
				+
			
 
				 	return mergeClusters<kDynamicallyUniform>(tileCluster, zCluster);
			
 
				 }
			
 
				 
			
--- a/AnKi/Shaders/ForwardShadingCommon.hlsl
+++ b/AnKi/Shaders/ForwardShadingCommon.hlsl
@@ -46,7 +46,7 @@ Vec3 computeLightColorHigh(Vec3 diffCol, Vec3 worldPos, Vec4 svPosition)
 
				 	Vec3 outColor = Vec3(0.0, 0.0, 0.0);
			
 
				 
			
 
				 	// Find the cluster and then the light counts
			
 
				-	Cluster cluster = getClusterFragCoord(g_clusters, g_globalRendererConstants, svPosition.xyz);
			
 
				+	Cluster cluster = getClusterFragCoord(g_clusters, g_globalRendererConstants.m_clusterer, svPosition.xyz);
			
 
				 
			
 
				 	// Point lights
			
 
				 	U32 idx = 0;
			
@@ -97,12 +97,10 @@ Vec3 computeLightColorHigh(Vec3 diffCol, Vec3 worldPos, Vec4 svPosition)
 
				 // Just read the light color from the vol texture
			
 
				 Vec3 computeLightColorLow(Vec3 diffCol, Vec3 worldPos, Vec4 svPosition)
			
 
				 {
			
 
				-	ANKI_MAYBE_UNUSED(worldPos);
			
 
				-
			
 
				-	const Vec2 uv = svPosition.xy / g_globalRendererConstants.m_renderingSize;
			
 
				-	const F32 linearDepth = linearizeDepth(svPosition.z, g_globalRendererConstants.m_matrices.m_near, g_globalRendererConstants.m_matrices.m_far);
			
 
				-	const F32 w = linearDepth * (F32(g_globalRendererConstants.m_zSplitCount) / F32(g_globalRendererConstants.m_lightVolumeLastZSplit + 1u));
			
 
				-	const Vec3 uvw = Vec3(uv, w);
			
 
				+	Vec3 uvw;
			
 
				+	uvw.xy = svPosition.xy / g_globalRendererConstants.m_renderingSize;
			
 
				+	uvw.z = computeVolumeWTexCoord(svPosition.z, g_globalRendererConstants.m_clusterer.m_lightVolumeWMagic.x,
			
 
				+								   g_globalRendererConstants.m_clusterer.m_lightVolumeWMagic.y);
			
 
				 
			
 
				 	const Vec3 light = g_lightVol.SampleLevel(g_trilinearClampSampler, uvw, 0.0).rgb;
			
 
				 	return diffuseLobe(diffCol) * light;
			
--- a/AnKi/Shaders/GBufferPost.ankiprog
+++ b/AnKi/Shaders/GBufferPost.ankiprog
@@ -38,7 +38,7 @@ SamplerState g_linearAnyClampSampler : register(s0);
 
				 	const Vec3 worldPos = worldPos4.xyz / worldPos4.w;
			
 
				 
			
 
				 	// Get the cluster. Make sure it's dynamically uniform because we are accessing bindless textures later on
			
 
				-	Cluster cluster = getClusterFragCoord<true>(g_clusters, g_globalConstants, Vec3(svDispatchThreadId, depth));
			
 
				+	Cluster cluster = getClusterFragCoord<true>(g_clusters, g_globalConstants.m_clusterer, Vec3(svDispatchThreadId, depth));
			
 
				 
			
 
				 	// Make the decalsMask uniform across the wave because we are accessing bindless textures later on
			
 
				 	U32 decalsMask = cluster.m_decalsMask[0];
			
--- a/AnKi/Shaders/Include/MiscRendererTypes.h
+++ b/AnKi/Shaders/Include/MiscRendererTypes.h
@@ -124,6 +124,18 @@ struct LocalLightsGridConstants
 
				 	F32 m_padding4;
			
 
				 };
			
 
				 
			
 
				+struct ClustererConstants
			
 
				+{
			
 
				+	Vec2 m_zSplitMagic; // It's the "a" and "b" of computeZSplitClusterIndex(). See there for details.
			
 
				+	UVec2 m_tileCounts;
			
 
				+
			
 
				+	Vec2 m_lightVolumeWMagic; // the "a" and "b" of computeVolumeWTexCoord(). See there for details.
			
 
				+	F32 m_clustererFar;
			
 
				+	U32 m_clusterCount : 16;
			
 
				+	U32 m_zSplitCount : 16;
			
 
				+};
			
 
				+static_assert(sizeof(ClustererConstants) % sizeof(Vec4) == 0);
			
 
				+
			
 
				 // Common constants for all passes.
			
 
				 struct GlobalRendererConstants
			
 
				 {
			
@@ -136,13 +148,7 @@ struct GlobalRendererConstants
 
				 	Vec3 m_cameraPosition;
			
 
				 	F32 m_reflectionProbesMipCount;
			
 
				 
			
 
				-	UVec2 m_tileCounts;
			
 
				-	U32 m_zSplitCount;
			
 
				-	F32 m_zSplitCountOverFrustumLength; ///< m_zSplitCount/(far-near)
			
 
				-
			
 
				-	Vec2 m_zSplitMagic; ///< It's the "a" and "b" of computeZSplitClusterIndex(). See there for details.
			
 
				-	U32 m_lightVolumeLastZSplit;
			
 
				-	U32 m_padding1;
			
 
				+	ClustererConstants m_clusterer;
			
 
				 
			
 
				 	DirectionalLight m_directionalLight;
			
 
				 
			
@@ -200,13 +206,13 @@ struct VolumetricFogConstants
 
				 	Vec3 m_fogDiffuse;
			
 
				 	F32 m_fogScatteringCoeff;
			
 
				 
			
 
				+	UVec3 m_volumeSize;
			
 
				 	F32 m_fogAbsorptionCoeff;
			
 
				-	F32 m_near;
			
 
				-	F32 m_far;
			
 
				-	F32 m_zSplitCountf;
			
 
				 
			
 
				-	UVec3 m_volumeSize;
			
 
				-	F32 m_maxZSplitsToProcessf;
			
 
				+	F32 m_zSplitThickness;
			
 
				+	U32 m_padding1;
			
 
				+	U32 m_padding2;
			
 
				+	U32 m_padding3;
			
 
				 };
			
 
				 
			
 
				 // Vol lighting
			
@@ -218,7 +224,10 @@ struct VolumetricLightingConstants
 
				 	F32 m_oneOverMaxMinusMinHeight; // 1 / (maxHeight / minHeight)
			
 
				 
			
 
				 	UVec3 m_volumeSize;
			
 
				-	F32 m_maxZSplitsToProcessf;
			
 
				+	F32 m_subZSplitThickness;
			
 
				+
			
 
				+	UVec3 m_clusterSubdivision;
			
 
				+	F32 m_padding;
			
 
				 };
			
 
				 
			
 
				 // SSAO
			
--- a/AnKi/Shaders/LightShading.ankiprog
+++ b/AnKi/Shaders/LightShading.ankiprog
@@ -63,7 +63,7 @@ Vec4 main(VertOut input) : SV_TARGET0
 
				 	const HVec3 viewDir = normalize(g_globalConstants.m_cameraPosition - worldPos);
			
 
				 
			
 
				 	// Get the cluster
			
 
				-	Cluster cluster = getClusterFragCoord(g_clusters, g_globalConstants, Vec3(input.m_svPosition.xy, depth));
			
 
				+	Cluster cluster = getClusterFragCoord(g_clusters, g_globalConstants.m_clusterer, Vec3(input.m_svPosition.xy, depth));
			
 
				 
			
 
				 	// return clusterHeatmap(cluster, 1u << (U32)GpuSceneNonRenderableObjectType::kLight, 3);
			
 
				 
			
--- a/AnKi/Shaders/LightShadingApplyFog.ankiprog
+++ b/AnKi/Shaders/LightShadingApplyFog.ankiprog
@@ -9,33 +9,35 @@
 
				 
			
 
				 #if ANKI_PIXEL_SHADER
			
 
				 #	include <AnKi/Shaders/Functions.hlsl>
			
 
				+#	include <AnKi/Shaders/ClusteredShadingFunctions.hlsl>
			
 
				+#	include <AnKi/Shaders/Include/MiscRendererTypes.h>
			
 
				+#	include <AnKi/Shaders/ImportanceSampling.hlsl>
			
 
				+
			
 
				+SamplerState g_linearAnyClampSampler : register(s0);
			
 
				 
			
 
				-SamplerState g_nearestAnyClampSampler : register(s0);
			
 
				-SamplerState g_linearAnyClampSampler : register(s1);
			
 
				 Texture2D g_depthRt : register(t0);
			
 
				 Texture3D<Vec4> g_fogVolume : register(t1);
			
 
				 
			
 
				-struct Constants
			
 
				-{
			
 
				-	F32 m_zSplitCount;
			
 
				-	F32 m_finalZSplit;
			
 
				-	F32 m_near;
			
 
				-	F32 m_far;
			
 
				-};
			
 
				-ANKI_FAST_CONSTANTS(Constants, g_consts)
			
 
				+ConstantBuffer<GlobalRendererConstants> g_consts : register(b0);
			
 
				 
			
 
				 Vec4 main(VertOut input) : SV_TARGET0
			
 
				 {
			
 
				-	const Vec2 uv = input.m_uv;
			
 
				-	Vec3 uvw;
			
 
				+	Vec3 texSize;
			
 
				+	g_fogVolume.GetDimensions(texSize.x, texSize.y, texSize.z);
			
 
				+	const Vec3 texelSize = 1.0 / texSize;
			
 
				 
			
 
				-	// Compute W coordinate
			
 
				-	const F32 depth = g_depthRt.SampleLevel(g_nearestAnyClampSampler, uv, 0.0).r;
			
 
				-	const F32 linearDepth = linearizeDepth(depth, g_consts.m_near, g_consts.m_far);
			
 
				-	uvw.z = linearDepth * (g_consts.m_zSplitCount / (g_consts.m_finalZSplit + 1.0f));
			
 
				+	// Random
			
 
				+	const UVec3 seed = rand3DPCG16(UVec3(input.m_svPosition.xy, g_consts.m_frame % 8u));
			
 
				+	const Vec2 random = hammersleyRandom16(g_consts.m_frame % 16, 16, seed);
			
 
				+	const Vec2 urandom = random * 2.0 - 1.0;
			
 
				 
			
 
				 	// Compute UV coordinates
			
 
				-	uvw.xy = uv;
			
 
				+	const F32 depth = TEX(g_depthRt, input.m_svPosition.xy).r;
			
 
				+	Vec3 uvw;
			
 
				+	uvw.xy = input.m_uv;
			
 
				+	uvw.xy += texelSize * urandom;
			
 
				+	uvw.z = computeVolumeWTexCoord(depth, g_consts.m_clusterer.m_lightVolumeWMagic.x, g_consts.m_clusterer.m_lightVolumeWMagic.y);
			
 
				+	uvw.z += texelSize / 2.0 * urandom.x;
			
 
				 
			
 
				 	// Read the volume
			
 
				 	const Vec4 fogVals = g_fogVolume.SampleLevel(g_linearAnyClampSampler, uvw, 0.0);
			
--- a/AnKi/Shaders/Reflections.ankiprog
+++ b/AnKi/Shaders/Reflections.ankiprog
@@ -220,7 +220,7 @@ Vec3 doLightShading(Vec3 worldPos, Vec3 viewPos, UVec2 coord, F32 depth)
 
				 
			
 
				 	Vec3 outColor = gbuffer.m_emission;
			
 
				 
			
 
				-	Cluster cluster = getClusterFragCoord(g_clusters, g_globalRendererConstants, Vec3(coord.xy + 0.5, depth));
			
 
				+	Cluster cluster = getClusterFragCoord(g_clusters, g_globalRendererConstants.m_clusterer, Vec3(coord.xy + 0.5, depth));
			
 
				 
			
 
				 	// GI
			
 
				 #	if INDIRECT_DIFFUSE_CLIPMAPS
			
@@ -416,7 +416,7 @@ void bestCandidateToHallucinate(IVec2 svGroupThreadId, IVec2 offset, F32 depth,
 
				 		const Vec3 col = sampleClipmapRadiance(worldPos, reflDir, g_globalRendererConstants.m_cameraPosition,
			
 
				 											   g_globalRendererConstants.m_indirectDiffuseClipmaps, g_linearAnyRepeatSampler, kSampleClipmapFlags);
			
 
				 #	else
			
 
				-		Cluster cluster = getClusterFragCoord(g_clusters, g_globalRendererConstants, Vec3(logicalCoord.xy + 0.5, depth));
			
 
				+		Cluster cluster = getClusterFragCoord(g_clusters, g_globalRendererConstants.m_clusterer, Vec3(logicalCoord.xy + 0.5, depth));
			
 
				 		const Vec3 col = sampleGiProbes<F32>(cluster, g_giProbes, reflDir, worldPos.xyz, g_trilinearClampSampler);
			
 
				 #	endif
			
 
				 
			
@@ -457,7 +457,7 @@ void bestCandidateToHallucinate(IVec2 svGroupThreadId, IVec2 offset, F32 depth,
 
				 	{
			
 
				 		viewReflDir = reflect(-viewDir, viewNormal);
			
 
				 
			
 
				-		Cluster cluster = getClusterFragCoord(g_clusters, g_globalRendererConstants, Vec3(logicalCoord.xy + 0.5, depth));
			
 
				+		Cluster cluster = getClusterFragCoord(g_clusters, g_globalRendererConstants.m_clusterer, Vec3(logicalCoord.xy + 0.5, depth));
			
 
				 
			
 
				 		const Vec3 woldReflDir = mul(g_globalRendererConstants.m_matrices.m_cameraTransform, Vec4(viewReflDir, 0.0));
			
 
				 
			
@@ -628,7 +628,7 @@ RWTexture2D<Vec4> g_hitPosAndDepthTex : register(u1);
 
				 	const Vec4 v4 = mul(g_globalRendererConstants.m_matrices.m_invertedViewProjectionJitter, Vec4(ndc, depth, 1.0));
			
 
				 	const Vec3 worldPos = v4.xyz / v4.w;
			
 
				 
			
 
				-	Cluster cluster = getClusterFragCoord(g_clusters, g_globalRendererConstants, Vec3(logicalCoord.xy + 0.5, depth));
			
 
				+	Cluster cluster = getClusterFragCoord(g_clusters, g_globalRendererConstants.m_clusterer, Vec3(logicalCoord.xy + 0.5, depth));
			
 
				 
			
 
				 	const F32 reflLod = (g_globalRendererConstants.m_reflectionProbesMipCount - 1.0f) * roughness;
			
 
				 	Vec3 probeColor = sampleReflectionProbes<F32>(cluster, g_reflectionProbes, reflDir, worldPos, reflLod, g_trilinearClampSampler);
			
--- a/AnKi/Shaders/ShadowmapsResolve.ankiprog
+++ b/AnKi/Shaders/ShadowmapsResolve.ankiprog
@@ -102,7 +102,7 @@ Vec4 main(VertOut input) : SV_TARGET0
 
				 
			
 
				 	// Cluster
			
 
				 	const Vec2 fragCoord = uv * g_globalConstants.m_renderingSize;
			
 
				-	Cluster cluster = getClusterFragCoord(g_clusters, g_globalConstants, Vec3(fragCoord, depth));
			
 
				+	Cluster cluster = getClusterFragCoord(g_clusters, g_globalConstants.m_clusterer, Vec3(fragCoord, depth));
			
 
				 
			
 
				 	// Layers
			
 
				 	U32 shadowCasterCountPerFragment = 0u;
			
--- a/AnKi/Shaders/VolumetricFogAccumulation.ankiprog
+++ b/AnKi/Shaders/VolumetricFogAccumulation.ankiprog
@@ -26,29 +26,13 @@ ANKI_FAST_CONSTANTS(VolumetricFogConstants, g_consts)
 
				 	Vec4 colorAndDensityFront = 0.0;
			
 
				 	[loop] for(U32 i = 0u; i < g_consts.m_volumeSize.z; ++i)
			
 
				 	{
			
 
				-		const F32 fi = F32(i);
			
 
				-
			
 
				-		// Compute the linear depth
			
 
				-		const F32 maxLinearDepth = g_consts.m_maxZSplitsToProcessf / g_consts.m_zSplitCountf;
			
 
				-		const F32 linearDepthFraction = maxLinearDepth / F32(g_consts.m_volumeSize.z);
			
 
				-		const F32 linearDepthNear = fi * linearDepthFraction;
			
 
				-		const F32 linearDepthFar = (fi + 1.0) * linearDepthFraction;
			
 
				-
			
 
				-		// Compute the min and max Z in view space if this cluster fragment
			
 
				-		const F32 zVSpaceNear = -linearDepthNear * (g_consts.m_far - g_consts.m_near) + g_consts.m_near;
			
 
				-		const F32 zVSpaceFar = -linearDepthFar * (g_consts.m_far - g_consts.m_near) + g_consts.m_near;
			
 
				-
			
 
				-		// Compute the thikness of this fragment
			
 
				-		const F32 layerThinkness = abs(zVSpaceNear - zVSpaceFar);
			
 
				-
			
 
				 		// Read the light value and the fog density from the fog volumes
			
 
				-		const F32 w = (fi + 0.5) / F32(g_consts.m_volumeSize.z);
			
 
				-		Vec4 lightAndFogDensity = g_lightVolume.SampleLevel(g_linearAnyClampSampler, Vec3(uv, w), 0.0);
			
 
				+		Vec4 lightAndFogDensity = TEX(g_lightVolume, UVec3(svDispatchThreadId.xy, i));
			
 
				 		lightAndFogDensity.xyz *= g_consts.m_fogDiffuse / kPi;
			
 
				 
			
 
				 		// Scattering & absorption
			
 
				-		const F32 scattering = lightAndFogDensity.w * g_consts.m_fogScatteringCoeff * layerThinkness;
			
 
				-		const F32 absorption = lightAndFogDensity.w * g_consts.m_fogAbsorptionCoeff * layerThinkness;
			
 
				+		const F32 scattering = lightAndFogDensity.w * g_consts.m_fogScatteringCoeff * g_consts.m_zSplitThickness;
			
 
				+		const F32 absorption = lightAndFogDensity.w * g_consts.m_fogAbsorptionCoeff * g_consts.m_zSplitThickness;
			
 
				 
			
 
				 		// Integrate
			
 
				 		const Vec4 colorAndDensityBack = Vec4(lightAndFogDensity.xyz * scattering, scattering + absorption);
			
@@ -58,6 +42,6 @@ ANKI_FAST_CONSTANTS(VolumetricFogConstants, g_consts)
 
				 
			
 
				 		// Write the value
			
 
				 		const Vec4 valToWrite = Vec4(colorAndDensityFront.rgb, saturate(exp(-colorAndDensityFront.a)));
			
 
				-		g_fogVolume[UVec3(svDispatchThreadId.xy, i)] = valToWrite;
			
 
				+		TEX(g_fogVolume, UVec3(svDispatchThreadId.xy, i)) = valToWrite;
			
 
				 	}
			
 
				 }
			
--- a/AnKi/Shaders/VolumetricLightingAccumulation.ankiprog
+++ b/AnKi/Shaders/VolumetricLightingAccumulation.ankiprog
@@ -8,13 +8,19 @@
 
				 #pragma anki mutator ENABLE_SHADOWS 0 1
			
 
				 #pragma anki mutator CLIPMAP_DIFFUSE_INDIRECT 0 1
			
 
				 
			
 
				-#pragma anki technique comp
			
 
				+#pragma anki technique Accumulate comp
			
 
				+#pragma anki technique Debug comp
			
 
				 
			
 
				 #include <AnKi/Shaders/Include/MiscRendererTypes.h>
			
 
				 #include <AnKi/Shaders/ClusteredShadingFunctions.hlsl>
			
 
				 #include <AnKi/Shaders/IndirectDiffuseClipmaps.hlsl>
			
 
				 #include <AnKi/Shaders/ImportanceSampling.hlsl>
			
 
				 
			
 
				+// ===========================================================================
			
 
				+// Accumulate                                                                =
			
 
				+// ===========================================================================
			
 
				+#if ANKI_TECHNIQUE_Accumulate
			
 
				+
			
 
				 constexpr F32 kPhaseFunctionAnisotropy = 0.3;
			
 
				 
			
 
				 SamplerState g_linearAnyRepeatSampler : register(s0);
			
@@ -29,46 +35,15 @@ StructuredBuffer<GpuSceneLight> g_lights : register(t2);
 
				 Texture2D<Vec4> g_shadowAtlasTex : register(t3);
			
 
				 StructuredBuffer<GpuSceneFogDensityVolume> g_fogDensityVolumes : register(t4);
			
 
				 StructuredBuffer<Cluster> g_clusters : register(t5);
			
 
				-#if !CLIPMAP_DIFFUSE_INDIRECT
			
 
				-StructuredBuffer<GpuSceneGlobalIlluminationProbe> g_giProbes : register(t6);
			
 
				-#endif
			
 
				+Texture2D g_depthMap : register(t6);
			
 
				+#	if !CLIPMAP_DIFFUSE_INDIRECT
			
 
				+StructuredBuffer<GpuSceneGlobalIlluminationProbe> g_giProbes : register(t7);
			
 
				+#	endif
			
 
				 
			
 
				 ConstantBuffer<GlobalRendererConstants> g_globalConstants : register(b0);
			
 
				 
			
 
				 ANKI_FAST_CONSTANTS(VolumetricLightingConstants, g_consts)
			
 
				 
			
 
				-Vec3 worldPosInsideClusterAndZViewSpace(Vec3 relativePos, Vec3 clusterIdf, out F32 negativeZViewSpace, out Vec3 uvw)
			
 
				-{
			
 
				-	// XY UV
			
 
				-	uvw.xy = lerp(clusterIdf.xy, clusterIdf.xy + 1.0, relativePos.xy) / Vec2(g_consts.m_volumeSize.xy);
			
 
				-
			
 
				-	// Compute the linear depth
			
 
				-	const F32 maxLinearDepth = g_consts.m_maxZSplitsToProcessf / F32(g_globalConstants.m_zSplitCount);
			
 
				-	const F32 linearDepthFraction = maxLinearDepth / F32(g_consts.m_volumeSize.z);
			
 
				-	const F32 linearDepthNear = clusterIdf.z * linearDepthFraction;
			
 
				-	const F32 linearDepthFar = (clusterIdf.z + 1.0) * linearDepthFraction;
			
 
				-	const F32 linearDepth = lerp(linearDepthNear, linearDepthFar, relativePos.z);
			
 
				-	uvw.z = linearDepth;
			
 
				-
			
 
				-	// View space
			
 
				-	negativeZViewSpace =
			
 
				-		linearDepth * (g_globalConstants.m_matrices.m_far - g_globalConstants.m_matrices.m_near) + g_globalConstants.m_matrices.m_near;
			
 
				-	const F32 zViewSpace = -negativeZViewSpace;
			
 
				-	const Vec2 xyViewSpace = uvToNdc(uvw.xy) * g_globalConstants.m_matrices.m_unprojectionParameters.xy * zViewSpace;
			
 
				-
			
 
				-	// Get the final world pos
			
 
				-	const Vec3 worldPos = mul(g_globalConstants.m_matrices.m_cameraTransform, Vec4(xyViewSpace, zViewSpace, 1.0));
			
 
				-
			
 
				-	return worldPos;
			
 
				-}
			
 
				-
			
 
				-Vec3 worldPosInsideCluster(Vec3 relativePos, Vec3 clusterIdf)
			
 
				-{
			
 
				-	F32 unused;
			
 
				-	Vec3 unused1;
			
 
				-	return worldPosInsideClusterAndZViewSpace(relativePos, clusterIdf, unused, unused1);
			
 
				-}
			
 
				-
			
 
				 // https://developer.nvidia.com/gpugems/GPUGems2/gpugems2_chapter16.html
			
 
				 F32 phaseFunction2(Vec3 pont2CameraDir, Vec3 point2LightDir, F32 g)
			
 
				 {
			
@@ -101,7 +76,7 @@ Vec4 accumulateLightsAndFog(Cluster cluster, Vec3 worldPos, F32 negativeZViewSpa
 
				 	{
			
 
				 		F32 factor = phaseFunction(viewDir, -dirLight.m_direction, kPhaseFunctionAnisotropy);
			
 
				 
			
 
				-#if ENABLE_SHADOWS
			
 
				+#	if ENABLE_SHADOWS
			
 
				 		const U32 shadowCascadeCount = dirLight.m_shadowCascadeCount;
			
 
				 
			
 
				 		if(shadowCascadeCount > 0u && negativeZViewSpace < dirLight.m_shadowCascadeDistances[shadowCascadeCount - 1u])
			
@@ -110,7 +85,7 @@ Vec4 accumulateLightsAndFog(Cluster cluster, Vec3 worldPos, F32 negativeZViewSpa
 
				 
			
 
				 			factor *= computeShadowFactorDirLight<F32>(dirLight, cascadeIdx, worldPos, g_shadowAtlasTex, g_linearAnyClampShadowSampler);
			
 
				 		}
			
 
				-#endif
			
 
				+#	endif
			
 
				 
			
 
				 		color += dirLight.m_diffuseColor * factor;
			
 
				 	}
			
@@ -126,12 +101,12 @@ Vec4 accumulateLightsAndFog(Cluster cluster, Vec3 worldPos, F32 negativeZViewSpa
 
				 
			
 
				 		factor *= phaseFunction(viewDir, normalize(frag2Light), kPhaseFunctionAnisotropy);
			
 
				 
			
 
				-#if ENABLE_SHADOWS
			
 
				+#	if ENABLE_SHADOWS
			
 
				 		if(light.m_shadow)
			
 
				 		{
			
 
				 			factor *= computeShadowFactorPointLight<F32>(light, frag2Light, g_shadowAtlasTex, g_linearAnyClampShadowSampler);
			
 
				 		}
			
 
				-#endif
			
 
				+#	endif
			
 
				 
			
 
				 		color += light.m_diffuseColor * factor;
			
 
				 	}
			
@@ -150,18 +125,18 @@ Vec4 accumulateLightsAndFog(Cluster cluster, Vec3 worldPos, F32 negativeZViewSpa
 
				 
			
 
				 		factor *= phaseFunction(viewDir, -light.m_direction, kPhaseFunctionAnisotropy);
			
 
				 
			
 
				-#if ENABLE_SHADOWS
			
 
				+#	if ENABLE_SHADOWS
			
 
				 		if(light.m_shadow)
			
 
				 		{
			
 
				 			factor *= computeShadowFactorSpotLight<F32>(light, worldPos, g_shadowAtlasTex, g_linearAnyClampShadowSampler);
			
 
				 		}
			
 
				-#endif
			
 
				+#	endif
			
 
				 
			
 
				 		color += light.m_diffuseColor * factor;
			
 
				 	}
			
 
				 
			
 
				 	// Indirect diffuse GI
			
 
				-#if CLIPMAP_DIFFUSE_INDIRECT
			
 
				+#	if CLIPMAP_DIFFUSE_INDIRECT
			
 
				 	{
			
 
				 		const SampleClipmapFlag flags = kSampleClipmapFlagNone;
			
 
				 		const Vec3 irradiance = sampleClipmapAvgIrradiance(worldPos, 0.0, g_globalConstants.m_cameraPosition,
			
@@ -169,14 +144,14 @@ Vec4 accumulateLightsAndFog(Cluster cluster, Vec3 worldPos, F32 negativeZViewSpa
 
				 
			
 
				 		color += irradiance * kPi; // Not sure why the multiplication with Pi but it looks more correct
			
 
				 	}
			
 
				-#else
			
 
				+#	else
			
 
				 	{
			
 
				 		Vec3 diffIndirect = sampleGiProbes<F32>(cluster, g_giProbes, viewDir, worldPos, g_linearAnyClampSampler);
			
 
				 		diffIndirect *= kPi; // Irradiance is pre-divided with PI so fix it
			
 
				 
			
 
				 		color += diffIndirect;
			
 
				 	}
			
 
				-#endif
			
 
				+#	endif
			
 
				 
			
 
				 	// Fog density
			
 
				 	F32 fogDensity = 0.0;
			
@@ -214,52 +189,130 @@ Vec4 accumulateLightsAndFog(Cluster cluster, Vec3 worldPos, F32 negativeZViewSpa
 
				 
			
 
				 [numthreads(8, 8, 8)] void main(UVec3 svDispatchThreadId : SV_DISPATCHTHREADID)
			
 
				 {
			
 
				-	const UVec3 clusterId = svDispatchThreadId;
			
 
				-	if(any(clusterId >= g_consts.m_volumeSize))
			
 
				+	const UVec3 subClusterId = svDispatchThreadId;
			
 
				+	if(any(subClusterId >= g_consts.m_volumeSize))
			
 
				 	{
			
 
				 		return;
			
 
				 	}
			
 
				 
			
 
				+	const ClustererConstants clustererConsts = g_globalConstants.m_clusterer;
			
 
				+
			
 
				 	// Find a random pos inside the cluster
			
 
				-	Vec3 random = TEX(g_noiseTex, (clusterId.xy + clusterId.z) % 64).rgb;
			
 
				-	random = animateBlueNoise(random, g_globalConstants.m_frame);
			
 
				+#	if 1
			
 
				+	Vec3 randomFactor = TEX(g_noiseTex, subClusterId.xy % 64).rgb;
			
 
				+	randomFactor = animateBlueNoise(randomFactor, g_globalConstants.m_frame + subClusterId.z);
			
 
				+#	else
			
 
				+	const UVec3 seed = rand3DPCG16(UVec3(subClusterId.xy, (g_globalConstants.m_frame + subClusterId.z) % 8u));
			
 
				+	const Vec2 random2 = hammersleyRandom16(g_globalConstants.m_frame % 16, 16, seed);
			
 
				+	Vec3 randomFactor = random2.xyx;
			
 
				+#	endif
			
 
				+
			
 
				+	const Vec3 subClusterIdf = subClusterId;
			
 
				+	const Vec3 uvw = (subClusterIdf + randomFactor) / Vec3(g_consts.m_volumeSize);
			
 
				 
			
 
				-	F32 negativeZViewSpace;
			
 
				-	Vec3 uvw;
			
 
				-	const Vec3 worldPos = worldPosInsideClusterAndZViewSpace(random, clusterId, negativeZViewSpace, uvw);
			
 
				+	// View space
			
 
				+	const F32 negativeZViewSpace = g_consts.m_subZSplitThickness * (subClusterIdf.z + randomFactor.z) + g_globalConstants.m_matrices.m_near;
			
 
				+	const Vec3 viewPos = Vec3(uvToNdc(uvw.xy) * g_globalConstants.m_matrices.m_unprojectionParameters.xy * -negativeZViewSpace, -negativeZViewSpace);
			
 
				+
			
 
				+	// Compute depth of sample point
			
 
				+	const Vec4 v4 = mul(g_globalConstants.m_matrices.m_projection, Vec4(viewPos, 1.0));
			
 
				+	const F32 depthSample = v4.z / v4.w;
			
 
				+	const F32 depth = g_depthMap.SampleLevel(g_linearAnyClampSampler, uvw.xy, 0.0).x;
			
 
				+
			
 
				+	Bool validSample;
			
 
				+	Vec4 lightAndFog;
			
 
				+	if(depthSample < depth)
			
 
				+	{
			
 
				+		// Valid sample, do lighting
			
 
				+
			
 
				+		validSample = true;
			
 
				+
			
 
				+		// Get the final world pos
			
 
				+		const Vec3 worldPos = mul(g_globalConstants.m_matrices.m_cameraTransform, Vec4(viewPos, 1.0));
			
 
				 
			
 
				-	// Get the cluster
			
 
				-	const UVec2 tileIdxXY = UVec2(uvw.xy * Vec2(g_globalConstants.m_tileCounts));
			
 
				-	const U32 tileIdx = tileIdxXY.y * g_globalConstants.m_tileCounts.x + tileIdxXY.x;
			
 
				-	Cluster cluster = g_clusters[tileIdx];
			
 
				+		// Get the cluster
			
 
				+		const UVec3 clusterId = subClusterId >> g_consts.m_clusterSubdivision;
			
 
				+		const U32 tileIdx = clusterId.y * clustererConsts.m_tileCounts.x + clusterId.x;
			
 
				+		Cluster cluster = SBUFF(g_clusters, tileIdx);
			
 
				 
			
 
				-	const U32 zSplitIdx = U32(uvw.z * F32(g_globalConstants.m_zSplitCount));
			
 
				-	const Cluster split = g_clusters[g_globalConstants.m_tileCounts.x * g_globalConstants.m_tileCounts.y + zSplitIdx];
			
 
				+		const Cluster split = SBUFF(g_clusters, clustererConsts.m_tileCounts.x * clustererConsts.m_tileCounts.y + clusterId.z);
			
 
				 
			
 
				-	cluster = mergeClusters(cluster, split);
			
 
				+		cluster = mergeClusters(cluster, split);
			
 
				 
			
 
				-	// Get lighting
			
 
				-	Vec4 lightAndFog = accumulateLightsAndFog(cluster, worldPos, negativeZViewSpace, random.x);
			
 
				+		// Do lighting
			
 
				+		lightAndFog = accumulateLightsAndFog(cluster, worldPos, negativeZViewSpace, randomFactor.x);
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+		// Invalid sample, regect it
			
 
				+
			
 
				+		validSample = false;
			
 
				+		lightAndFog = 0.0;
			
 
				+	}
			
 
				 
			
 
				 	// Read the prev result
			
 
				 	{
			
 
				 		// Better get a new world pos in the center of the cluster. Using worldPos creates noisy results
			
 
				-		const Vec3 midWPos = worldPosInsideCluster(Vec3(0.5, 0.5, 0.5), clusterId);
			
 
				+		const Vec2 uv = (subClusterIdf.xy + 0.5) / g_consts.m_volumeSize.xy;
			
 
				+		const F32 negativeZViewSpace = g_consts.m_subZSplitThickness * (subClusterIdf.z + 0.5) + g_globalConstants.m_matrices.m_near;
			
 
				+		const Vec3 viewPos = Vec3(uvToNdc(uv) * g_globalConstants.m_matrices.m_unprojectionParameters.xy * -negativeZViewSpace, -negativeZViewSpace);
			
 
				+		const Vec3 midWPos = mul(g_globalConstants.m_matrices.m_cameraTransform, Vec4(viewPos, 1.0));
			
 
				 
			
 
				 		// Project
			
 
				 		const Vec4 prevClipPos4 = mul(g_globalConstants.m_previousMatrices.m_viewProjection, Vec4(midWPos, 1.0));
			
 
				 		const Vec3 prevClipPos = prevClipPos4.xyz / prevClipPos4.w;
			
 
				 
			
 
				 		// Read prev
			
 
				-		if(all(prevClipPos.xy > -1.0) && all(prevClipPos.xy < 1.0))
			
 
				+		if(all(abs(prevClipPos.xy) <= 1.0) && prevClipPos.z > 0.0)
			
 
				 		{
			
 
				-			const F32 linearDepth = linearizeDepth(prevClipPos.z, g_globalConstants.m_matrices.m_near, g_globalConstants.m_matrices.m_far);
			
 
				-			const Vec3 uvw = Vec3(ndcToUv(prevClipPos.xy), linearDepth * (F32(g_globalConstants.m_zSplitCount) / g_consts.m_maxZSplitsToProcessf));
			
 
				+			Vec3 uvw;
			
 
				+			uvw.xy = ndcToUv(prevClipPos.xy);
			
 
				+			uvw.z = computeVolumeWTexCoord(prevClipPos.z, clustererConsts.m_lightVolumeWMagic.x, clustererConsts.m_lightVolumeWMagic.y);
			
 
				+
			
 
				 			const Vec4 history = g_prevVolume.SampleLevel(g_linearAnyClampSampler, uvw, 0.0);
			
 
				-			lightAndFog = lerp(history, lightAndFog, 1.0 / 16.0);
			
 
				+			lightAndFog = lerp(history, lightAndFog, (validSample) ? 1.0 / 16.0 : 0.0);
			
 
				 		}
			
 
				 	}
			
 
				 
			
 
				 	// Write result
			
 
				-	TEX(g_volume, clusterId) = lightAndFog;
			
 
				+	TEX(g_volume, subClusterId) = lightAndFog;
			
 
				 }
			
 
				+
			
 
				+#endif
			
 
				+
			
 
				+// ===========================================================================
			
 
				+// Debug                                                                     =
			
 
				+// ===========================================================================
			
 
				+#if ANKI_TECHNIQUE_Debug
			
 
				+
			
 
				+Texture3D g_lightVolume : register(t0);
			
 
				+Texture2D g_depthBuffer : register(t1);
			
 
				+
			
 
				+RWTexture2D<Vec4> g_outTex : register(u0);
			
 
				+
			
 
				+ConstantBuffer<GlobalRendererConstants> g_consts : register(b0);
			
 
				+
			
 
				+SamplerState g_linearAnyClampSampler : register(s0);
			
 
				+
			
 
				+[numthreads(8, 8, 1)] void main(COMPUTE_ARGS)
			
 
				+{
			
 
				+	UVec2 outTexSize;
			
 
				+	g_outTex.GetDimensions(outTexSize.x, outTexSize.y);
			
 
				+
			
 
				+	const UVec2 coord = svDispatchThreadId.xy;
			
 
				+	if(any(coord >= outTexSize))
			
 
				+	{
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	const F32 depth = TEX(g_depthBuffer, coord).x;
			
 
				+
			
 
				+	Vec3 uvw;
			
 
				+	uvw.xy = (coord + 0.5) / outTexSize;
			
 
				+	uvw.z = computeVolumeWTexCoord(depth, g_consts.m_clusterer.m_lightVolumeWMagic.x, g_consts.m_clusterer.m_lightVolumeWMagic.y);
			
 
				+
			
 
				+	const Vec3 light = g_lightVolume.SampleLevel(g_linearAnyClampSampler, uvw, 0.0).rgb;
			
 
				+
			
 
				+	TEX(g_outTex, coord) = Vec4(light, 0.0);
			
 
				+}
			
 
				+#endif