Browse Source

Add probe occlusion

Panagiotis Christopoulos Charitos 7 months ago
parent
commit
a4b48fb0cd

+ 50 - 27
AnKi/Renderer/IndirectDiffuseClipmaps.cpp

@@ -47,24 +47,25 @@ Error IndirectDiffuseClipmaps::init()
 
 	for(U32 i = 0; i < kIndirectDiffuseClipmapCount; ++i)
 	{
-		m_probeValidityRtDescs[i] = getRenderer().create2DRenderTargetDescription(
-			m_clipmapInfo[i].m_probeCounts.x(), m_clipmapInfo[i].m_probeCounts.z(), Format::kR8_Unorm, generateTempPassName("Probe validity #%u", i));
+		m_probeValidityRtDescs[i] =
+			getRenderer().create2DRenderTargetDescription(m_clipmapInfo[i].m_probeCounts.x(), m_clipmapInfo[i].m_probeCounts.z(), Format::kR8_Unorm,
+														  generateTempPassName("IndirectDiffuseClipmap: Probe validity #%u", i));
 		m_probeValidityRtDescs[i].m_depth = m_clipmapInfo[i].m_probeCounts.y();
 		m_probeValidityRtDescs[i].m_type = TextureType::k3D;
 		m_probeValidityRtDescs[i].bake();
 	}
 
 	// Create the lighting result texture
-	m_radianceDesc = getRenderer().create2DRenderTargetDescription(probesPerClipmap, kRaysPerProbePerFrame, Format::kB10G11R11_Ufloat_Pack32,
-																   "IndirectDiffuse light result");
-	m_radianceDesc.bake();
+	m_rtResultRtDesc = getRenderer().create2DRenderTargetDescription(probesPerClipmap, kRaysPerProbePerFrame, Format::kR16G16B16A16_Sfloat,
+																	 "IndirectDiffuseClipmap: RT result");
+	m_rtResultRtDesc.bake();
 
 	for(U32 clipmap = 0; clipmap < kIndirectDiffuseClipmapCount; ++clipmap)
 	{
 		TextureInitInfo volumeInit = getRenderer().create2DRenderTargetInitInfo(
 			m_clipmapInfo[clipmap].m_probeCounts.x() * (g_indirectDiffuseClipmapRadianceCacheProbeSize + 2),
 			m_clipmapInfo[clipmap].m_probeCounts.z() * (g_indirectDiffuseClipmapRadianceCacheProbeSize + 2), Format::kB10G11R11_Ufloat_Pack32,
-			TextureUsageBit::kAllShaderResource, generateTempPassName("IndirectDiffuseClipmapRadiance #%u", clipmap));
+			TextureUsageBit::kAllShaderResource, generateTempPassName("IndirectDiffuseClipmap: Radiance #%u", clipmap));
 		volumeInit.m_depth = m_clipmapInfo[clipmap].m_probeCounts.y();
 		volumeInit.m_type = TextureType::k3D;
 
@@ -76,13 +77,25 @@ Error IndirectDiffuseClipmaps::init()
 		TextureInitInfo volumeInit = getRenderer().create2DRenderTargetInitInfo(
 			m_clipmapInfo[clipmap].m_probeCounts.x() * (g_indirectDiffuseClipmapIrradianceProbeSize + 2),
 			m_clipmapInfo[clipmap].m_probeCounts.z() * (g_indirectDiffuseClipmapIrradianceProbeSize + 2), Format::kB10G11R11_Ufloat_Pack32,
-			TextureUsageBit::kAllShaderResource, generateTempPassName("IndirectDiffuseClipmapIrradiance #%u", clipmap));
+			TextureUsageBit::kAllShaderResource, generateTempPassName("IndirectDiffuseClipmap: Irradiance #%u", clipmap));
 		volumeInit.m_depth = m_clipmapInfo[clipmap].m_probeCounts.y();
 		volumeInit.m_type = TextureType::k3D;
 
 		m_irradianceVolumes[clipmap] = getRenderer().createAndClearRenderTarget(volumeInit, TextureUsageBit::kSrvCompute);
 	}
 
+	for(U32 clipmap = 0; clipmap < kIndirectDiffuseClipmapCount; ++clipmap)
+	{
+		TextureInitInfo volumeInit = getRenderer().create2DRenderTargetInitInfo(
+			m_clipmapInfo[clipmap].m_probeCounts.x() * (g_indirectDiffuseClipmapDistancesProbeSize + 2),
+			m_clipmapInfo[clipmap].m_probeCounts.z() * (g_indirectDiffuseClipmapDistancesProbeSize + 2), Format::kR16G16_Sfloat,
+			TextureUsageBit::kAllShaderResource, generateTempPassName("IndirectDiffuseClipmap: Dist moments #%u", clipmap));
+		volumeInit.m_depth = m_clipmapInfo[clipmap].m_probeCounts.y();
+		volumeInit.m_type = TextureType::k3D;
+
+		m_distanceMomentsVolumes[clipmap] = getRenderer().createAndClearRenderTarget(volumeInit, TextureUsageBit::kSrvCompute);
+	}
+
 	Array<SubMutation, 1> mutation = {"RAYS_PER_PROBE_PER_FRAME", kRaysPerProbePerFrame};
 
 	ANKI_CHECK(loadShaderProgram("ShaderBinaries/IndirectDiffuseClipmaps.ankiprogbin", mutation, m_prog, m_tmpVisGrProg, "Test"));
@@ -130,22 +143,25 @@ void IndirectDiffuseClipmaps::populateRenderGraph(RenderingContext& ctx)
 
 	RenderGraphBuilder& rgraph = ctx.m_renderGraphDescr;
 
-	const RenderTargetHandle radianceRt = rgraph.newRenderTarget(m_radianceDesc);
+	const RenderTargetHandle rtResultHandle = rgraph.newRenderTarget(m_rtResultRtDesc);
 	m_runCtx.m_tmpRt = rgraph.newRenderTarget(m_tmpRtDesc);
 
 	Array<RenderTargetHandle, kIndirectDiffuseClipmapCount> radianceVolumes;
 	Array<RenderTargetHandle, kIndirectDiffuseClipmapCount> irradianceVolumes;
+	Array<RenderTargetHandle, kIndirectDiffuseClipmapCount> distanceMomentsVolumes;
 	for(U32 i = 0; i < kIndirectDiffuseClipmapCount; ++i)
 	{
 		if(m_texturesImportedOnce)
 		{
 			radianceVolumes[i] = rgraph.importRenderTarget(m_radianceVolumes[i].get());
 			irradianceVolumes[i] = rgraph.importRenderTarget(m_irradianceVolumes[i].get());
+			distanceMomentsVolumes[i] = rgraph.importRenderTarget(m_distanceMomentsVolumes[i].get());
 		}
 		else
 		{
 			radianceVolumes[i] = rgraph.importRenderTarget(m_radianceVolumes[i].get(), TextureUsageBit::kSrvCompute);
 			irradianceVolumes[i] = rgraph.importRenderTarget(m_irradianceVolumes[i].get(), TextureUsageBit::kSrvCompute);
+			distanceMomentsVolumes[i] = rgraph.importRenderTarget(m_distanceMomentsVolumes[i].get(), TextureUsageBit::kSrvCompute);
 		}
 	}
 
@@ -214,7 +230,7 @@ void IndirectDiffuseClipmaps::populateRenderGraph(RenderingContext& ctx)
 	{
 		NonGraphicsRenderPass& pass = rgraph.newNonGraphicsRenderPass("IndirectDiffuseClipmaps");
 
-		pass.newTextureDependency(radianceRt, TextureUsageBit::kUavCompute);
+		pass.newTextureDependency(rtResultHandle, TextureUsageBit::kUavCompute);
 		pass.newBufferDependency(sbtHandle, BufferUsageBit::kShaderBindingTable);
 		if(getRenderer().getGeneratedSky().isEnabled())
 		{
@@ -224,7 +240,7 @@ void IndirectDiffuseClipmaps::populateRenderGraph(RenderingContext& ctx)
 		pass.newAccelerationStructureDependency(getRenderer().getAccelerationStructureBuilder().getAccelerationStructureHandle(),
 												AccelerationStructureUsageBit::kTraceRaysSrv);
 
-		pass.setWork([this, radianceRt, &ctx, sbtBuffer](RenderPassWorkContext& rgraphCtx) {
+		pass.setWork([this, rtResultHandle, &ctx, sbtBuffer](RenderPassWorkContext& rgraphCtx) {
 			CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
 
 			cmdb.bindShaderProgram(m_libraryGrProg.get());
@@ -274,7 +290,7 @@ void IndirectDiffuseClipmaps::populateRenderGraph(RenderingContext& ctx)
 			cmdb.bindSampler(0, 2, getRenderer().getSamplers().m_trilinearClamp.get());
 			cmdb.bindSampler(1, 2, getRenderer().getSamplers().m_trilinearClampShadow.get());
 
-			rgraphCtx.bindUav(0, 2, radianceRt);
+			rgraphCtx.bindUav(0, 2, rtResultHandle);
 			cmdb.bindUav(1, 2, TextureView(getDummyGpuResources().m_texture2DUav.get(), TextureSubresourceDesc::firstSurface()));
 
 			const UVec4 consts(0, kRaysPerProbePerFrame, 0, 0); // TODO
@@ -292,22 +308,25 @@ void IndirectDiffuseClipmaps::populateRenderGraph(RenderingContext& ctx)
 
 		NonGraphicsRenderPass& pass = rgraph.newNonGraphicsRenderPass("IndirectDiffuseClipmaps populate caches");
 
-		pass.newTextureDependency(radianceRt, TextureUsageBit::kSrvCompute);
+		pass.newTextureDependency(rtResultHandle, TextureUsageBit::kSrvCompute);
 		pass.newTextureDependency(radianceVolumes[clipmap], TextureUsageBit::kUavCompute);
 		pass.newTextureDependency(probeValidityRts[clipmap], TextureUsageBit::kUavCompute);
 
-		pass.setWork([this, &ctx, clipmap, radianceRt, radianceVolume = radianceVolumes[clipmap],
-					  validityVolume = probeValidityRts[clipmap]](RenderPassWorkContext& rgraphCtx) {
+		pass.setWork([this, &ctx, clipmap, rtResultHandle, radianceVolume = radianceVolumes[clipmap], validityVolume = probeValidityRts[clipmap],
+					  distanceMomentsVolume = distanceMomentsVolumes[clipmap]](RenderPassWorkContext& rgraphCtx) {
 			CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
 
 			cmdb.bindShaderProgram(m_populateCachesGrProg.get());
 
-			rgraphCtx.bindSrv(0, 0, radianceRt);
+			rgraphCtx.bindSrv(0, 0, rtResultHandle);
+
 			rgraphCtx.bindUav(0, 0, radianceVolume);
-			rgraphCtx.bindUav(1, 0, validityVolume);
+			rgraphCtx.bindUav(1, 0, distanceMomentsVolume);
+			rgraphCtx.bindUav(2, 0, validityVolume);
+
 			cmdb.bindConstantBuffer(0, 0, ctx.m_globalRenderingConstantsBuffer);
 
-			const UVec4 consts(clipmap, g_indirectDiffuseClipmapRadianceCacheProbeSize, 0, 0);
+			const UVec4 consts(clipmap, g_indirectDiffuseClipmapRadianceCacheProbeSize, g_indirectDiffuseClipmapDistancesProbeSize, 0);
 			cmdb.setFastConstants(&consts, sizeof(consts));
 
 			cmdb.dispatchCompute(m_clipmapInfo[clipmap].m_probeCounts.x(), m_clipmapInfo[clipmap].m_probeCounts.y(),
@@ -352,10 +371,11 @@ void IndirectDiffuseClipmaps::populateRenderGraph(RenderingContext& ctx)
 		{
 			pass.newTextureDependency(irradianceVolumes[i], TextureUsageBit::kSrvCompute);
 			pass.newTextureDependency(probeValidityRts[i], TextureUsageBit::kSrvCompute);
+			pass.newTextureDependency(distanceMomentsVolumes[i], TextureUsageBit::kSrvCompute);
 		}
 		pass.newTextureDependency(m_runCtx.m_tmpRt, TextureUsageBit::kUavCompute);
 
-		pass.setWork([this, &ctx, irradianceVolumes, probeValidityRts](RenderPassWorkContext& rgraphCtx) {
+		pass.setWork([this, &ctx, irradianceVolumes, probeValidityRts, distanceMomentsVolumes](RenderPassWorkContext& rgraphCtx) {
 			CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
 
 			cmdb.bindShaderProgram(m_tmpVisGrProg.get());
@@ -364,14 +384,20 @@ void IndirectDiffuseClipmaps::populateRenderGraph(RenderingContext& ctx)
 			rgraphCtx.bindSrv(1, 0, getRenderer().getGBuffer().getColorRt(2));
 			cmdb.bindSrv(2, 0, TextureView(&m_blueNoiseImg->getTexture(), TextureSubresourceDesc::firstSurface()));
 
+			U32 srvReg = 3;
+			for(U32 i = 0; i < kIndirectDiffuseClipmapCount; ++i)
+			{
+				rgraphCtx.bindSrv(srvReg++, 0, irradianceVolumes[i]);
+			}
+
 			for(U32 i = 0; i < kIndirectDiffuseClipmapCount; ++i)
 			{
-				rgraphCtx.bindSrv(3 + i, 0, irradianceVolumes[i]);
+				rgraphCtx.bindSrv(srvReg++, 0, probeValidityRts[i]);
 			}
 
 			for(U32 i = 0; i < kIndirectDiffuseClipmapCount; ++i)
 			{
-				rgraphCtx.bindSrv(3 + kIndirectDiffuseClipmapCount + i, 0, probeValidityRts[i]);
+				rgraphCtx.bindSrv(srvReg++, 0, distanceMomentsVolumes[i]);
 			}
 
 			rgraphCtx.bindUav(0, 0, m_runCtx.m_tmpRt);
@@ -393,16 +419,13 @@ void IndirectDiffuseClipmaps::drawDebugProbes(const RenderingContext& ctx, Rende
 
 	cmdb.bindShaderProgram(m_visProbesGrProg.get());
 
-	const Bool visualizeIrradiance = true;
-
-	const UVec4 consts(0u, (visualizeIrradiance) ? g_indirectDiffuseClipmapIrradianceProbeSize : g_indirectDiffuseClipmapRadianceCacheProbeSize, 0,
-					   0);
+	const UVec4 consts(clipmap);
 	cmdb.setFastConstants(&consts, sizeof(consts));
 
 	cmdb.bindConstantBuffer(0, 0, ctx.m_globalRenderingConstantsBuffer);
-	cmdb.bindSrv(
-		0, 0,
-		TextureView((visualizeIrradiance) ? m_irradianceVolumes[clipmap].get() : m_radianceVolumes[clipmap].get(), TextureSubresourceDesc::all()));
+
+	Texture* visVolume = m_irradianceVolumes[clipmap].get();
+	cmdb.bindSrv(0, 0, TextureView(visVolume, TextureSubresourceDesc::all()));
 	rgraphCtx.bindSrv(1, 0, m_runCtx.m_probeValidityRts[clipmap]);
 	cmdb.bindSampler(0, 0, getRenderer().getSamplers().m_trilinearRepeat.get());
 

+ 5 - 5
AnKi/Renderer/IndirectDiffuseClipmaps.h

@@ -66,9 +66,13 @@ public:
 private:
 	static constexpr U32 kRaysPerProbePerFrame = 32;
 
-	RenderTargetDesc m_radianceDesc;
 	Array<TexturePtr, kIndirectDiffuseClipmapCount> m_radianceVolumes;
 	Array<TexturePtr, kIndirectDiffuseClipmapCount> m_irradianceVolumes;
+	Array<TexturePtr, kIndirectDiffuseClipmapCount> m_distanceMomentsVolumes;
+
+	RenderTargetDesc m_rtResultRtDesc;
+	Array<RenderTargetDesc, kIndirectDiffuseClipmapCount> m_probeValidityRtDescs;
+	RenderTargetDesc m_tmpRtDesc; // TODO rm
 
 	Array<Clipmap, kIndirectDiffuseClipmapCount> m_clipmapInfo;
 
@@ -82,10 +86,6 @@ private:
 	ShaderProgramPtr m_sbtBuildGrProg;
 	ShaderProgramPtr m_visProbesGrProg;
 
-	Array<RenderTargetDesc, kIndirectDiffuseClipmapCount> m_probeValidityRtDescs;
-
-	RenderTargetDesc m_tmpRtDesc; // TODO rm
-
 	ImageResourcePtr m_blueNoiseImg;
 
 	U32 m_sbtRecordSize = 0;

+ 179 - 56
AnKi/Shaders/IndirectDiffuseClipmaps.ankiprog

@@ -173,7 +173,8 @@ ANKI_FAST_CONSTANTS(Consts, g_consts)
 	}
 
 	// Store result
-	g_lightResultTex[UVec2(probeIdx, sampleIdx)] = HVec4(radiance, 0.0);
+	const F16 kMaxDist = sqrt(kMaxF16 - kEpsilonF16); // Make sure the square root doesn't overflow F16
+	g_lightResultTex[UVec2(probeIdx, sampleIdx)] = HVec4(radiance, min(rayT, kMaxDist));
 }
 #endif // ANKI_RAY_GEN_SHADER
 
@@ -181,10 +182,11 @@ ANKI_FAST_CONSTANTS(Consts, g_consts)
 // PopulateCaches                                                            =
 // ===========================================================================
 #if NOT_ZERO(ANKI_TECHNIQUE_PopulateCaches)
-Texture2D<Vec4> g_lightResultTex : register(t0);
+Texture2D<Vec4> g_rtResultTex : register(t0);
 
 RWTexture3D<Vec4> g_radianceVolume : register(u0);
-RWTexture3D<Vec4> g_probeValidiryVolume : register(u1);
+RWTexture3D<Vec4> g_distanceMomentsVolume : register(u1);
+RWTexture3D<Vec4> g_probeValidiryVolume : register(u2);
 
 ConstantBuffer<GlobalRendererConstants> g_globalRendererConstants : register(b0);
 
@@ -192,32 +194,46 @@ struct Consts
 {
 	U32 m_clipmapIdx;
 	U32 m_radianceProbeSize; // Size without border
-	F32 m_padding1;
+	U32 m_distanceMomentsProbeSize;
 	F32 m_padding2;
 };
 ANKI_FAST_CONSTANTS(Consts, g_consts)
 
-groupshared U32 g_octCoordValueSet[128]; // TODO
+constexpr U32 kMaxValues = 128;
+groupshared U32 g_octCoordValueSet[kMaxValues]; // TODO
+groupshared U32 g_octCoordValueSet2[kMaxValues];
 groupshared U32 g_invalideRayCount;
 
 [NumThreads(RAYS_PER_PROBE_PER_FRAME, 1, 1)] void main(COMPUTE_ARGS)
 {
 	const Clipmap clipmap = g_globalRendererConstants.m_indirectDiffuseClipmaps[g_consts.m_clipmapIdx];
 
-	const U32 octPixelCount = g_consts.m_radianceProbeSize * g_consts.m_radianceProbeSize;
-	ANKI_ASSERT(octPixelCount <= 128);
+	const U32 radianceOctPixelCount = g_consts.m_radianceProbeSize * g_consts.m_radianceProbeSize;
+	ANKI_ASSERT(radianceOctPixelCount <= kMaxValues);
+	const U32 distanceMomentsOctPixelCount = g_consts.m_distanceMomentsProbeSize * g_consts.m_distanceMomentsProbeSize;
+	ANKI_ASSERT(distanceMomentsOctPixelCount <= kMaxValues);
 
 	// Zero groupshared
-	const U32 octPixelsPerThread = (octPixelCount + RAYS_PER_PROBE_PER_FRAME - 1) / RAYS_PER_PROBE_PER_FRAME;
-	for(U32 i = 0; i < octPixelsPerThread; ++i)
+	const U32 radianceOctPixelsPerThread = (radianceOctPixelCount + RAYS_PER_PROBE_PER_FRAME - 1) / RAYS_PER_PROBE_PER_FRAME;
+	for(U32 i = 0; i < radianceOctPixelsPerThread; ++i)
 	{
-		const U32 octCoordIdx = svGroupIndex * octPixelsPerThread + i;
-		if(octCoordIdx < octPixelCount)
+		const U32 octCoordIdx = svGroupIndex * radianceOctPixelsPerThread + i;
+		if(octCoordIdx < radianceOctPixelCount)
 		{
 			g_octCoordValueSet[octCoordIdx] = 0;
 		}
 	}
 
+	const U32 distanceMomentsOctPixelsPerThread = (distanceMomentsOctPixelCount + RAYS_PER_PROBE_PER_FRAME - 1) / RAYS_PER_PROBE_PER_FRAME;
+	for(U32 i = 0; i < distanceMomentsOctPixelsPerThread; ++i)
+	{
+		const U32 octCoordIdx = svGroupIndex * distanceMomentsOctPixelsPerThread + i;
+		if(octCoordIdx < distanceMomentsOctPixelCount)
+		{
+			g_octCoordValueSet2[octCoordIdx] = 0;
+		}
+	}
+
 	if(svGroupIndex == 0)
 	{
 		g_invalideRayCount = 0;
@@ -243,69 +259,134 @@ groupshared U32 g_invalideRayCount;
 
 	const Vec3 probeSize = clipmap.m_size / clipmap.m_probeCounts;
 	const Vec3 probeWorldPos = svGroupId * probeSize + probeSize * 0.5 + clipmapAabbMin;
+	const Bool blendWithHistory = all(probeWorldPos > prevClipmapAabbMin) && all(probeWorldPos < prevClipmapAabbMax);
 
-	UVec3 volumeTexCoord = frac(probeWorldPos.xzy / clipmap.m_size.xzy) * clipmap.m_probeCounts.xzy;
-	volumeTexCoord = min(volumeTexCoord, clipmap.m_probeCounts.xzy - 1u);
+	UVec3 volumeTexCoord = frac(probeWorldPos / clipmap.m_size) * clipmap.m_probeCounts;
+	volumeTexCoord = min(volumeTexCoord, clipmap.m_probeCounts - 1u);
+	volumeTexCoord = volumeTexCoord.xzy;
 
-	HVec3 radiance = TEX(g_lightResultTex, UVec2(probeIdx, sampleIdx));
+	// Read the result from RT
+	const HVec4 comp = TEX(g_rtResultTex, UVec2(probeIdx, sampleIdx));
+	HVec3 radiance = comp.xyz;
+	const HVec2 moments = HVec2(comp.w, square(comp.w));
 	if(all(radiance == HVec3(1.0, 0.0, 1.0)))
 	{
 		InterlockedAdd(g_invalideRayCount, 1);
 		radiance = 0.0;
 	}
 
-	const Vec2 octUv = generateRandomUv(sampleIdx, U32(RAYS_PER_PROBE_PER_FRAME), g_globalRendererConstants.m_frame);
-	const UVec2 octCoord = min(octUv * g_consts.m_radianceProbeSize, g_consts.m_radianceProbeSize - 1);
-	const U32 octCoordIdx = octCoord.y * g_consts.m_radianceProbeSize + octCoord.x;
-	ANKI_ASSERT(octCoordIdx < octPixelCount);
-
-	const Bool blendWithHistory = all(probeWorldPos > prevClipmapAabbMin) && all(probeWorldPos < prevClipmapAabbMax);
-	HVec3 avgRadiance = 0.0;
-	U32 iterationCount = 0;
-	do
+	// Update the radiance volume
 	{
-		U32 origValue;
-		InterlockedCompareExchange(g_octCoordValueSet[octCoordIdx], iterationCount, iterationCount + 1u, origValue);
+		const Vec2 octUv = generateRandomUv(sampleIdx, U32(RAYS_PER_PROBE_PER_FRAME), g_globalRendererConstants.m_frame);
+		const UVec2 octCoord = min(octUv * g_consts.m_radianceProbeSize, g_consts.m_radianceProbeSize - 1);
+		const U32 octCoordIdx = octCoord.y * g_consts.m_radianceProbeSize + octCoord.x;
+		ANKI_ASSERT(octCoordIdx < radianceOctPixelCount);
 
-		if(origValue == iterationCount)
+		HVec3 avgRadiance = 0.0;
+		U32 iterationCount = 0;
+		do
 		{
-			UVec3 actualVolumeTexCoord;
-			actualVolumeTexCoord.xy = octCoord + volumeTexCoord * (g_consts.m_radianceProbeSize + 2) + 1;
-			actualVolumeTexCoord.z = volumeTexCoord.z;
+			// A trick to only have one thread write to the same octahedron texel
+			U32 origValue;
+			InterlockedCompareExchange(g_octCoordValueSet[octCoordIdx], iterationCount, iterationCount + 1u, origValue);
 
-			if(blendWithHistory)
+			if(origValue == iterationCount)
 			{
-				const HVec3 prevValue = TEX(g_radianceVolume, actualVolumeTexCoord).xyz;
-				avgRadiance = lerp(prevValue, radiance, 0.1);
+				UVec3 actualVolumeTexCoord;
+				actualVolumeTexCoord.xy = octCoord + volumeTexCoord * (g_consts.m_radianceProbeSize + 2) + 1;
+				actualVolumeTexCoord.z = volumeTexCoord.z;
+
+				if(blendWithHistory)
+				{
+					const HVec3 prevValue = TEX(g_radianceVolume, actualVolumeTexCoord).xyz;
+					avgRadiance = lerp(prevValue, radiance, 0.1);
+				}
+				else
+				{
+					avgRadiance = radiance;
+				}
+
+				TEX(g_radianceVolume, actualVolumeTexCoord).xyz = avgRadiance;
+
+				iterationCount = kMaxU32;
 			}
 			else
 			{
-				avgRadiance = radiance;
+				++iterationCount;
 			}
 
-			TEX(g_radianceVolume, actualVolumeTexCoord).xyz = avgRadiance;
+			AllMemoryBarrierWithGroupSync();
+		} while(iterationCount < kMaxU32);
 
-			iterationCount = kMaxU32;
-		}
-		else
+		// Set oct borders
+		IVec2 borders[3];
+		const U32 borderCount = octahedronBorder(g_consts.m_radianceProbeSize, octCoord, borders);
+		for(U32 i = 0; i < borderCount; ++i)
 		{
-			++iterationCount;
-		}
+			IVec3 actualVolumeTexCoord;
+			actualVolumeTexCoord.xy = octCoord + volumeTexCoord * (g_consts.m_radianceProbeSize + 2) + 1;
+			actualVolumeTexCoord.xy += borders[i];
+			actualVolumeTexCoord.z = volumeTexCoord.z;
 
-		AllMemoryBarrierWithGroupSync();
-	} while(iterationCount < kMaxU32);
+			TEX(g_radianceVolume, actualVolumeTexCoord).xyz = avgRadiance;
+		}
+	}
 
-	// Set oct borders
-	IVec2 borders[3];
-	const U32 borderCount = octahedronBorder(g_consts.m_radianceProbeSize, octCoord, borders);
-	for(U32 i = 0; i < borderCount; ++i)
+	// Do the same for the distance
 	{
-		IVec3 actualVolumeTexCoord;
-		actualVolumeTexCoord.xy = octCoord + volumeTexCoord * (g_consts.m_radianceProbeSize + 2) + 1;
-		actualVolumeTexCoord.xy += borders[i];
-		actualVolumeTexCoord.z = volumeTexCoord.z;
+		const Vec2 octUv = generateRandomUv(sampleIdx, U32(RAYS_PER_PROBE_PER_FRAME), g_globalRendererConstants.m_frame);
+		const UVec2 octCoord = min(octUv * g_consts.m_distanceMomentsProbeSize, g_consts.m_distanceMomentsProbeSize - 1);
+		const U32 octCoordIdx = octCoord.y * g_consts.m_distanceMomentsProbeSize + octCoord.x;
+		ANKI_ASSERT(octCoordIdx < distanceMomentsOctPixelCount);
 
-		TEX(g_radianceVolume, actualVolumeTexCoord).xyz = avgRadiance;
+		HVec2 avgMoments = 0.0;
+		U32 iterationCount = 0;
+		do
+		{
+			// A trick to only have one thread write to the same octahedron texel
+			U32 origValue;
+			InterlockedCompareExchange(g_octCoordValueSet2[octCoordIdx], iterationCount, iterationCount + 1u, origValue);
+
+			if(origValue == iterationCount)
+			{
+				UVec3 actualVolumeTexCoord;
+				actualVolumeTexCoord.xy = octCoord + volumeTexCoord * (g_consts.m_distanceMomentsProbeSize + 2) + 1;
+				actualVolumeTexCoord.z = volumeTexCoord.z;
+
+				if(blendWithHistory)
+				{
+					const HVec2 prevValue = TEX(g_distanceMomentsVolume, actualVolumeTexCoord).xy;
+					avgMoments = lerp(prevValue, moments, 0.1);
+				}
+				else
+				{
+					avgMoments = moments;
+				}
+
+				TEX(g_distanceMomentsVolume, actualVolumeTexCoord) = HVec4(avgMoments, 0.0, 0.0);
+
+				iterationCount = kMaxU32;
+			}
+			else
+			{
+				++iterationCount;
+			}
+
+			AllMemoryBarrierWithGroupSync();
+		} while(iterationCount < kMaxU32);
+
+		// Set oct borders
+		IVec2 borders[3];
+		const U32 borderCount = octahedronBorder(g_consts.m_distanceMomentsProbeSize, octCoord, borders);
+		for(U32 i = 0; i < borderCount; ++i)
+		{
+			IVec3 actualVolumeTexCoord;
+			actualVolumeTexCoord.xy = octCoord + volumeTexCoord * (g_consts.m_distanceMomentsProbeSize + 2) + 1;
+			actualVolumeTexCoord.xy += borders[i];
+			actualVolumeTexCoord.z = volumeTexCoord.z;
+
+			TEX(g_distanceMomentsVolume, actualVolumeTexCoord).xy = avgMoments;
+		}
 	}
 
 	// Update probe validity
@@ -331,6 +412,7 @@ Texture2D<Vec4> g_blueNoiseTex : register(t2);
 
 Texture3D<Vec4> g_clipmapVolumes[kIndirectDiffuseClipmapCount] : register(t3);
 Texture3D<Vec4> g_probeValidityVolumes[kIndirectDiffuseClipmapCount] : register(t6); // WARNING: Adjust if kIndirectDiffuseClipmapCount changed
+Texture3D<Vec4> g_distanceMomentsVolumes[kIndirectDiffuseClipmapCount] : register(t9);
 
 RWTexture2D<Vec4> g_outTex : register(u0);
 
@@ -360,7 +442,7 @@ SamplerState g_linearAnyRepeatSampler : register(s0);
 	{
 		const Clipmap clipmap = g_globalRendererConstants.m_indirectDiffuseClipmaps[0];
 		const Vec3 probeSize = clipmap.m_size / clipmap.m_probeCounts;
-		worldPos += normal * min3(probeSize) * 0.5;
+		worldPos += normal * min3(probeSize) * 0.1;
 	}
 
 	// Rand
@@ -427,6 +509,15 @@ SamplerState g_linearAnyRepeatSampler : register(s0);
 		octahedronSize -= 2.0; // The border
 	};
 
+	F32 distMomentsOctSize = 0.0;
+	Vec3 distMomentsRealVolumeSize;
+	{
+		g_distanceMomentsVolumes[0].GetDimensions(distMomentsRealVolumeSize.x, distMomentsRealVolumeSize.y, distMomentsRealVolumeSize.z);
+
+		distMomentsOctSize = distMomentsRealVolumeSize.x / clipmap.m_probeCounts.x;
+		distMomentsOctSize -= 2.0; // The border
+	}
+
 	const Vec3 samplePointUvw = frac(worldPos / clipmap.m_size);
 	const Vec3 icoord = floor(samplePointUvw * fakeVolumeSize - 0.5);
 	const Vec3 fcoord = frac(samplePointUvw * fakeVolumeSize - 0.5);
@@ -454,12 +545,40 @@ SamplerState g_linearAnyRepeatSampler : register(s0);
 		const Vec3 w3 = select(xyz == 0.0, 1.0 - fcoord, fcoord);
 		F32 w = w3.x * w3.y * w3.z;
 
-		// Probe dir weight
+		// Normal weight
 		const Vec3 probePosition = firstProbePosition + xyz * probeSize;
 		const Vec3 dir = normalize(probePosition - worldPos);
 		const F32 wNormal = (dot(dir, normal) + 1.0) * 0.5;
 		w *= (wNormal * wNormal) + 0.2;
 
+		// Chebyshev occlusion test
+		if(1)
+		{
+			Vec3 uvw = coords.xzy;
+			uvw.xy *= distMomentsOctSize + 2.0;
+			uvw.xy += 1.0;
+			uvw.xy += octahedronEncode(normal) * distMomentsOctSize;
+			uvw.z += 0.5;
+			uvw /= distMomentsRealVolumeSize;
+			const HVec2 distMoments = g_distanceMomentsVolumes[NonUniformResourceIndex(clipmapIdx)].SampleLevel(g_linearAnyRepeatSampler, uvw, 0.0);
+
+			const F32 variance = abs(distMoments.x * distMoments.x - distMoments.y);
+
+			const F32 posToProbeDist = length(worldPos - probePosition);
+			F32 chebyshevWeight = 1.0;
+			if(posToProbeDist > distMoments.x) // occluded
+			{
+				// v must be greater than 0, which is guaranteed by the if condition above.
+				const F32 v = posToProbeDist - distMoments.x;
+				chebyshevWeight = variance / (variance + (v * v));
+
+				// Increase the contrast in the weight
+				chebyshevWeight = max((chebyshevWeight * chebyshevWeight * chebyshevWeight), 0.0);
+			}
+
+			w *= chebyshevWeight;
+		}
+
 		// Compute the actual coords
 		Vec3 uvw = coords.xzy;
 		uvw.xy *= octahedronSize + 2.0;
@@ -668,7 +787,7 @@ struct FragOut
 struct Consts
 {
 	U32 m_clipmapIdx;
-	U32 m_octProbeSize; // Size without border
+	U32 m_padding1;
 	U32 m_padding2;
 	U32 m_padding3;
 };
@@ -752,12 +871,16 @@ FragOut main(VertOut input)
 
 	const Vec3 normal = normalize(collisionPoint - input.m_probeCenter);
 
+	UVec3 texSize;
+	g_volume.GetDimensions(texSize.x, texSize.y, texSize.z);
+	const U32 octProbeSize = texSize.x / clipmap.m_probeCounts.x - 2;
+
 	Vec3 uvw = frac(input.m_probeCenter.xzy / clipmap.m_size.xzy);
 	const UVec3 texelCoord = uvw * clipmap.m_probeCounts.xzy;
 
-	uvw.xy = texelCoord.xy * (g_consts.m_octProbeSize + 2);
-	uvw.xy += octahedronEncode(normal) * g_consts.m_octProbeSize + 1.0;
-	uvw.xy /= clipmap.m_probeCounts.xz * (g_consts.m_octProbeSize + 2);
+	uvw.xy = texelCoord.xy * (octProbeSize + 2);
+	uvw.xy += octahedronEncode(normal) * octProbeSize + 1.0;
+	uvw.xy /= clipmap.m_probeCounts.xz * (octProbeSize + 2);
 
 	uvw.z = (texelCoord.z + 0.5) / clipmap.m_probeCounts.y;