Panagiotis Christopoulos Charitos преди 7 месеца
родител
ревизия
3277da46aa
променени са 4 файла, в които са добавени 283 реда и са изтрити 274 реда
  1. 107 106
      AnKi/Renderer/IndirectDiffuseClipmaps.cpp
  2. 13 0
      AnKi/Shaders/Functions.hlsl
  3. 1 1
      AnKi/Shaders/Include/MiscRendererTypes.h
  4. 162 167
      AnKi/Shaders/IndirectDiffuseClipmaps.ankiprog

+ 107 - 106
AnKi/Renderer/IndirectDiffuseClipmaps.cpp

@@ -35,6 +35,7 @@ Error IndirectDiffuseClipmaps::init()
 	{
 		const U32 count = m_clipmapInfo[i].m_probeCounts.x() * m_clipmapInfo[i].m_probeCounts.y() * m_clipmapInfo[i].m_probeCounts.z();
 		m_clipmapInfo[i].m_probeCountsTotal = count;
+		m_clipmapInfo[i].m_index = i;
 		if(i == 0)
 		{
 			probesPerClipmap = count;
@@ -232,30 +233,32 @@ void IndirectDiffuseClipmaps::populateRenderGraph(RenderingContext& ctx)
 		});
 	}
 
-	// Do ray tracing around the probes
+	for(U32 clipmap = 0; clipmap < kIndirectDiffuseClipmapCount; ++clipmap)
 	{
-		NonGraphicsRenderPass& pass = rgraph.newNonGraphicsRenderPass("IndirectDiffuseClipmaps");
-
-		pass.newTextureDependency(rtResultHandle, TextureUsageBit::kUavCompute);
-		pass.newBufferDependency(sbtHandle, BufferUsageBit::kShaderBindingTable);
-		if(getRenderer().getGeneratedSky().isEnabled())
+		// Do ray tracing around the probes
 		{
-			pass.newTextureDependency(getRenderer().getGeneratedSky().getEnvironmentMapRt(), TextureUsageBit::kSrvTraceRays);
-		}
-		pass.newTextureDependency(getRenderer().getShadowMapping().getShadowmapRt(), TextureUsageBit::kSrvTraceRays);
-		pass.newAccelerationStructureDependency(getRenderer().getAccelerationStructureBuilder().getAccelerationStructureHandle(),
-												AccelerationStructureUsageBit::kTraceRaysSrv);
+			NonGraphicsRenderPass& pass = rgraph.newNonGraphicsRenderPass(generateTempPassName("IndirectDiffuseClipmaps: RT #%u", clipmap));
 
-		pass.setWork([this, rtResultHandle, &ctx, sbtBuffer](RenderPassWorkContext& rgraphCtx) {
-			CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
+			pass.newTextureDependency(rtResultHandle, TextureUsageBit::kUavCompute);
+			pass.newBufferDependency(sbtHandle, BufferUsageBit::kShaderBindingTable);
+			if(getRenderer().getGeneratedSky().isEnabled())
+			{
+				pass.newTextureDependency(getRenderer().getGeneratedSky().getEnvironmentMapRt(), TextureUsageBit::kSrvTraceRays);
+			}
+			pass.newTextureDependency(getRenderer().getShadowMapping().getShadowmapRt(), TextureUsageBit::kSrvTraceRays);
+			pass.newAccelerationStructureDependency(getRenderer().getAccelerationStructureBuilder().getAccelerationStructureHandle(),
+													AccelerationStructureUsageBit::kTraceRaysSrv);
 
-			cmdb.bindShaderProgram(m_libraryGrProg.get());
+			pass.setWork([this, rtResultHandle, &ctx, sbtBuffer, clipmap](RenderPassWorkContext& rgraphCtx) {
+				CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
 
-			// More globals
-			cmdb.bindSampler(ANKI_MATERIAL_REGISTER_TILINEAR_REPEAT_SAMPLER, 0, getRenderer().getSamplers().m_trilinearRepeat.get());
-			cmdb.bindSrv(ANKI_MATERIAL_REGISTER_GPU_SCENE, 0, GpuSceneBuffer::getSingleton().getBufferView());
-			cmdb.bindSrv(ANKI_MATERIAL_REGISTER_MESH_LODS, 0, GpuSceneArrays::MeshLod::getSingleton().getBufferView());
-			cmdb.bindSrv(ANKI_MATERIAL_REGISTER_TRANSFORMS, 0, GpuSceneArrays::Transform::getSingleton().getBufferView());
+				cmdb.bindShaderProgram(m_libraryGrProg.get());
+
+				// More globals
+				cmdb.bindSampler(ANKI_MATERIAL_REGISTER_TILINEAR_REPEAT_SAMPLER, 0, getRenderer().getSamplers().m_trilinearRepeat.get());
+				cmdb.bindSrv(ANKI_MATERIAL_REGISTER_GPU_SCENE, 0, GpuSceneBuffer::getSingleton().getBufferView());
+				cmdb.bindSrv(ANKI_MATERIAL_REGISTER_MESH_LODS, 0, GpuSceneArrays::MeshLod::getSingleton().getBufferView());
+				cmdb.bindSrv(ANKI_MATERIAL_REGISTER_TRANSFORMS, 0, GpuSceneArrays::Transform::getSingleton().getBufferView());
 
 #define ANKI_UNIFIED_GEOM_FORMAT(fmt, shaderType, reg) \
 	cmdb.bindSrv( \
@@ -265,111 +268,109 @@ void IndirectDiffuseClipmaps::populateRenderGraph(RenderingContext& ctx)
 		Format::k##fmt);
 #include <AnKi/Shaders/Include/UnifiedGeometryTypes.def.h>
 
-			cmdb.bindConstantBuffer(0, 2, ctx.m_globalRenderingConstantsBuffer);
-
-			rgraphCtx.bindSrv(0, 2, getRenderer().getAccelerationStructureBuilder().getAccelerationStructureHandle());
-			cmdb.bindSrv(1, 2, TextureView(getDummyGpuResources().m_texture2DSrv.get(), TextureSubresourceDesc::all()));
-			cmdb.bindSrv(2, 2, TextureView(getDummyGpuResources().m_texture2DSrv.get(), TextureSubresourceDesc::all()));
-			cmdb.bindSrv(3, 2, TextureView(getDummyGpuResources().m_texture2DSrv.get(), TextureSubresourceDesc::all()));
-
-			const LightComponent* dirLight = SceneGraph::getSingleton().getDirectionalLight();
-			const SkyboxComponent* sky = SceneGraph::getSingleton().getSkybox();
-			const Bool bSkySolidColor =
-				(!sky || sky->getSkyboxType() == SkyboxType::kSolidColor || (!dirLight && sky->getSkyboxType() == SkyboxType::kGenerated));
-			if(bSkySolidColor)
-			{
-				cmdb.bindSrv(4, 2, TextureView(getDummyGpuResources().m_texture2DSrv.get(), TextureSubresourceDesc::all()));
-			}
-			else if(sky->getSkyboxType() == SkyboxType::kImage2D)
-			{
-				cmdb.bindSrv(4, 2, TextureView(&sky->getImageResource().getTexture(), TextureSubresourceDesc::all()));
-			}
-			else
-			{
-				rgraphCtx.bindSrv(4, 2, getRenderer().getGeneratedSky().getEnvironmentMapRt());
-			}
-
-			cmdb.bindSrv(5, 2, BufferView(getDummyGpuResources().m_buffer.get(), 0, sizeof(U32)));
-			cmdb.bindSrv(6, 2, BufferView(getDummyGpuResources().m_buffer.get(), 0, sizeof(U32)));
-			rgraphCtx.bindSrv(7, 2, getRenderer().getShadowMapping().getShadowmapRt());
-
-			cmdb.bindSampler(0, 2, getRenderer().getSamplers().m_trilinearClamp.get());
-			cmdb.bindSampler(1, 2, getRenderer().getSamplers().m_trilinearClampShadow.get());
-
-			rgraphCtx.bindUav(0, 2, rtResultHandle);
-			cmdb.bindUav(1, 2, TextureView(getDummyGpuResources().m_texture2DUav.get(), TextureSubresourceDesc::firstSurface()));
-
-			const UVec4 consts(0, kRaysPerProbePerFrame, 0, 0); // TODO
-			cmdb.setFastConstants(&consts, sizeof(consts));
-
-			const U32 probeCount = U32(m_clipmapInfo[0].m_probeCountsTotal);
-			cmdb.traceRays(sbtBuffer, m_sbtRecordSize, GpuSceneArrays::RenderableBoundingVolumeRt::getSingleton().getElementCount(), 1,
-						   probeCount * kRaysPerProbePerFrame, 1, 1);
-		});
-	}
-
-	// Populate caches
-	{
-		const U32 clipmap = 0; // TODO
-
-		NonGraphicsRenderPass& pass = rgraph.newNonGraphicsRenderPass("IndirectDiffuseClipmaps populate caches");
+				cmdb.bindConstantBuffer(0, 2, ctx.m_globalRenderingConstantsBuffer);
+
+				rgraphCtx.bindSrv(0, 2, getRenderer().getAccelerationStructureBuilder().getAccelerationStructureHandle());
+				cmdb.bindSrv(1, 2, TextureView(getDummyGpuResources().m_texture2DSrv.get(), TextureSubresourceDesc::all()));
+				cmdb.bindSrv(2, 2, TextureView(getDummyGpuResources().m_texture2DSrv.get(), TextureSubresourceDesc::all()));
+				cmdb.bindSrv(3, 2, TextureView(getDummyGpuResources().m_texture2DSrv.get(), TextureSubresourceDesc::all()));
+
+				const LightComponent* dirLight = SceneGraph::getSingleton().getDirectionalLight();
+				const SkyboxComponent* sky = SceneGraph::getSingleton().getSkybox();
+				const Bool bSkySolidColor =
+					(!sky || sky->getSkyboxType() == SkyboxType::kSolidColor || (!dirLight && sky->getSkyboxType() == SkyboxType::kGenerated));
+				if(bSkySolidColor)
+				{
+					cmdb.bindSrv(4, 2, TextureView(getDummyGpuResources().m_texture2DSrv.get(), TextureSubresourceDesc::all()));
+				}
+				else if(sky->getSkyboxType() == SkyboxType::kImage2D)
+				{
+					cmdb.bindSrv(4, 2, TextureView(&sky->getImageResource().getTexture(), TextureSubresourceDesc::all()));
+				}
+				else
+				{
+					rgraphCtx.bindSrv(4, 2, getRenderer().getGeneratedSky().getEnvironmentMapRt());
+				}
+
+				cmdb.bindSrv(5, 2, BufferView(getDummyGpuResources().m_buffer.get(), 0, sizeof(U32)));
+				cmdb.bindSrv(6, 2, BufferView(getDummyGpuResources().m_buffer.get(), 0, sizeof(U32)));
+				rgraphCtx.bindSrv(7, 2, getRenderer().getShadowMapping().getShadowmapRt());
+
+				cmdb.bindSampler(0, 2, getRenderer().getSamplers().m_trilinearClamp.get());
+				cmdb.bindSampler(1, 2, getRenderer().getSamplers().m_trilinearClampShadow.get());
+
+				rgraphCtx.bindUav(0, 2, rtResultHandle);
+				cmdb.bindUav(1, 2, TextureView(getDummyGpuResources().m_texture2DUav.get(), TextureSubresourceDesc::firstSurface()));
+
+				const UVec4 consts(clipmap, kRaysPerProbePerFrame, 0, 0);
+				cmdb.setFastConstants(&consts, sizeof(consts));
+
+				const U32 probeCount = U32(m_clipmapInfo[0].m_probeCountsTotal);
+				cmdb.traceRays(sbtBuffer, m_sbtRecordSize, GpuSceneArrays::RenderableBoundingVolumeRt::getSingleton().getElementCount(), 1,
+							   probeCount * kRaysPerProbePerFrame, 1, 1);
+			});
+		}
 
-		pass.newTextureDependency(rtResultHandle, TextureUsageBit::kSrvCompute);
-		pass.newTextureDependency(radianceVolumes[clipmap], TextureUsageBit::kUavCompute);
-		pass.newTextureDependency(probeValidityRts[clipmap], TextureUsageBit::kUavCompute);
+		// Populate caches
+		{
+			NonGraphicsRenderPass& pass =
+				rgraph.newNonGraphicsRenderPass(generateTempPassName("IndirectDiffuseClipmaps: Populate caches #%u", clipmap));
 
-		pass.setWork([this, &ctx, clipmap, rtResultHandle, radianceVolume = radianceVolumes[clipmap], validityVolume = probeValidityRts[clipmap],
-					  distanceMomentsVolume = distanceMomentsVolumes[clipmap]](RenderPassWorkContext& rgraphCtx) {
-			CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
+			pass.newTextureDependency(rtResultHandle, TextureUsageBit::kSrvCompute);
+			pass.newTextureDependency(radianceVolumes[clipmap], TextureUsageBit::kUavCompute);
+			pass.newTextureDependency(probeValidityRts[clipmap], TextureUsageBit::kUavCompute);
 
-			cmdb.bindShaderProgram(m_populateCachesGrProg.get());
+			pass.setWork([this, &ctx, clipmap, rtResultHandle, radianceVolume = radianceVolumes[clipmap], validityVolume = probeValidityRts[clipmap],
+						  distanceMomentsVolume = distanceMomentsVolumes[clipmap]](RenderPassWorkContext& rgraphCtx) {
+				CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
 
-			rgraphCtx.bindSrv(0, 0, rtResultHandle);
+				cmdb.bindShaderProgram(m_populateCachesGrProg.get());
 
-			rgraphCtx.bindUav(0, 0, radianceVolume);
-			rgraphCtx.bindUav(1, 0, distanceMomentsVolume);
-			rgraphCtx.bindUav(2, 0, validityVolume);
+				rgraphCtx.bindSrv(0, 0, rtResultHandle);
 
-			cmdb.bindConstantBuffer(0, 0, ctx.m_globalRenderingConstantsBuffer);
+				rgraphCtx.bindUav(0, 0, radianceVolume);
+				rgraphCtx.bindUav(1, 0, distanceMomentsVolume);
+				rgraphCtx.bindUav(2, 0, validityVolume);
 
-			const UVec4 consts(clipmap);
-			cmdb.setFastConstants(&consts, sizeof(consts));
+				cmdb.bindConstantBuffer(0, 0, ctx.m_globalRenderingConstantsBuffer);
 
-			cmdb.dispatchCompute(m_clipmapInfo[clipmap].m_probeCounts.x(), m_clipmapInfo[clipmap].m_probeCounts.y(),
-								 m_clipmapInfo[clipmap].m_probeCounts.z());
-		});
-	}
+				const UVec4 consts(clipmap);
+				cmdb.setFastConstants(&consts, sizeof(consts));
 
-	// Compute irradiance
-	{
-		const U32 clipmap = 0; // TODO
+				cmdb.dispatchCompute(m_clipmapInfo[clipmap].m_probeCounts.x(), m_clipmapInfo[clipmap].m_probeCounts.y(),
+									 m_clipmapInfo[clipmap].m_probeCounts.z());
+			});
+		}
 
-		NonGraphicsRenderPass& pass = rgraph.newNonGraphicsRenderPass("IndirectDiffuseClipmaps irradiance");
+		// Compute irradiance
+		{
+			NonGraphicsRenderPass& pass = rgraph.newNonGraphicsRenderPass(generateTempPassName("IndirectDiffuseClipmaps: Irradiance #%u", clipmap));
 
-		pass.newTextureDependency(radianceVolumes[clipmap], TextureUsageBit::kSrvCompute);
-		pass.newTextureDependency(irradianceVolumes[clipmap], TextureUsageBit::kUavCompute);
+			pass.newTextureDependency(radianceVolumes[clipmap], TextureUsageBit::kSrvCompute);
+			pass.newTextureDependency(irradianceVolumes[clipmap], TextureUsageBit::kUavCompute);
 
-		pass.setWork([this, &ctx, clipmap, radianceVolume = radianceVolumes[clipmap],
-					  irradianceVolume = irradianceVolumes[clipmap]](RenderPassWorkContext& rgraphCtx) {
-			CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
+			pass.setWork([this, &ctx, clipmap, radianceVolume = radianceVolumes[clipmap],
+						  irradianceVolume = irradianceVolumes[clipmap]](RenderPassWorkContext& rgraphCtx) {
+				CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
 
-			cmdb.bindShaderProgram(m_computeIrradianceGrProg.get());
+				cmdb.bindShaderProgram(m_computeIrradianceGrProg.get());
 
-			rgraphCtx.bindSrv(0, 0, radianceVolume);
-			rgraphCtx.bindUav(0, 0, irradianceVolume);
-			cmdb.bindConstantBuffer(0, 0, ctx.m_globalRenderingConstantsBuffer);
+				rgraphCtx.bindSrv(0, 0, radianceVolume);
+				rgraphCtx.bindUav(0, 0, irradianceVolume);
+				cmdb.bindConstantBuffer(0, 0, ctx.m_globalRenderingConstantsBuffer);
 
-			const UVec4 consts(clipmap);
-			cmdb.setFastConstants(&consts, sizeof(consts));
+				const UVec4 consts(clipmap);
+				cmdb.setFastConstants(&consts, sizeof(consts));
 
-			cmdb.dispatchCompute(m_clipmapInfo[clipmap].m_probeCountsTotal, g_indirectDiffuseClipmapIrradianceOctMapSize,
-								 g_indirectDiffuseClipmapIrradianceOctMapSize);
-		});
+				cmdb.dispatchCompute(m_clipmapInfo[clipmap].m_probeCountsTotal, g_indirectDiffuseClipmapIrradianceOctMapSize,
+									 g_indirectDiffuseClipmapIrradianceOctMapSize);
+			});
+		}
 	}
 
 	// Test
 	{
-		NonGraphicsRenderPass& pass = rgraph.newNonGraphicsRenderPass("IndirectDiffuseClipmaps test");
+		NonGraphicsRenderPass& pass = rgraph.newNonGraphicsRenderPass("IndirectDiffuseClipmaps composite");
 
 		pass.newTextureDependency(getRenderer().getGBuffer().getDepthRt(), TextureUsageBit::kSrvCompute);
 		pass.newTextureDependency(getRenderer().getGBuffer().getColorRt(2), TextureUsageBit::kSrvCompute);
@@ -421,7 +422,7 @@ void IndirectDiffuseClipmaps::drawDebugProbes(const RenderingContext& ctx, Rende
 {
 	CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
 
-	const U32 clipmap = 0;
+	const U32 clipmap = 1;
 
 	cmdb.bindShaderProgram(m_visProbesGrProg.get());
 
@@ -430,7 +431,7 @@ void IndirectDiffuseClipmaps::drawDebugProbes(const RenderingContext& ctx, Rende
 
 	cmdb.bindConstantBuffer(0, 0, ctx.m_globalRenderingConstantsBuffer);
 
-	Texture* visVolume = m_irradianceVolumes[clipmap].get();
+	Texture* visVolume = m_distanceMomentsVolumes[clipmap].get();
 	cmdb.bindSrv(0, 0, TextureView(visVolume, TextureSubresourceDesc::all()));
 	rgraphCtx.bindSrv(1, 0, m_runCtx.m_probeValidityRts[clipmap]);
 	cmdb.bindSampler(0, 0, getRenderer().getSamplers().m_trilinearRepeat.get());

+ 13 - 0
AnKi/Shaders/Functions.hlsl

@@ -922,3 +922,16 @@ IVec2 generateMsaa16x(U32 sample)
 
 	return pattern;
 }
+
+/// Given some UVs (can be 2D or 3D) and the texture size, return some improved UVs that are used for linear filtering.
+/// Code taken from https://www.shadertoy.com/view/XsfGDn
+template<typename TUv>
+TUv improvedLinearTextureFiltering(TUv uv, TUv texSize)
+{
+	uv = uv * texSize + 0.5;
+	const TUv iuv = floor(uv);
+	const TUv fuv = frac(uv);
+	uv = iuv + fuv * fuv * (3.0 - 2.0 * fuv);
+	uv = (uv - 0.5) / texSize;
+	return uv;
+}

+ 1 - 1
AnKi/Shaders/Include/MiscRendererTypes.h

@@ -76,7 +76,7 @@ struct Clipmap
 	U32 m_probeCountsTotal;
 
 	Vec3 m_size;
-	F32 m_padding2;
+	U32 m_index;
 };
 
 /// Common constants for all passes.

+ 162 - 167
AnKi/Shaders/IndirectDiffuseClipmaps.ankiprog

@@ -7,7 +7,7 @@
 
 #pragma anki mutator RAYS_PER_PROBE_PER_FRAME 32 64
 #pragma anki mutator GPU_WAVE_SIZE 16 32 64
-#pragma anki mutator RADIANCE_OCTAHEDRON_MAP_SIZE 10
+#pragma anki mutator RADIANCE_OCTAHEDRON_MAP_SIZE 10 11 12
 #pragma anki mutator IRRADIANCE_OCTAHEDRON_MAP_SIZE 4 5 6
 
 #pragma anki technique RtMaterialFetch rgen mutators
@@ -31,11 +31,11 @@
 #define CLIPMAP_VOLUME 1
 #include <AnKi/Shaders/RtMaterialFetch.hlsl>
 
-constexpr Vec3 kIndirectDiffuseClipmapForwardOffset = Vec3(10.0, 5.0, 10.0); // In meters
+constexpr Vec3 kIndirectDiffuseClipmapForwardOffset = Vec3(20.0, 2.0, 20.0); // In meters
 
 void computeClipmapBounds(Clipmap clipmap, Vec3 cameraPos, Vec3 lookDir, out Vec3 aabbMin, out Vec3 aabbMax)
 {
-	const Vec3 offset = normalize(Vec3(lookDir.x, 0.0, lookDir.z)) * kIndirectDiffuseClipmapForwardOffset;
+	const Vec3 offset = normalize(lookDir) * kIndirectDiffuseClipmapForwardOffset * (clipmap.m_index + 1);
 	cameraPos += offset;
 
 	const Vec3 halfSize = clipmap.m_size * 0.5;
@@ -47,7 +47,7 @@ void computeClipmapBounds(Clipmap clipmap, Vec3 cameraPos, Vec3 lookDir, out Vec
 
 F32 computeClipmapFade(Clipmap clipmap, Vec3 cameraPos, Vec3 lookDir, Vec3 worldPos)
 {
-	const Vec3 offset = normalize(Vec3(lookDir.x, 0.0, lookDir.z)) * kIndirectDiffuseClipmapForwardOffset;
+	const Vec3 offset = normalize(lookDir) * kIndirectDiffuseClipmapForwardOffset * (clipmap.m_index + 1);
 
 	cameraPos += offset;
 
@@ -176,7 +176,7 @@ ANKI_FAST_CONSTANTS(Consts, g_consts)
 	}
 
 	// Store result
-	const F16 kMaxDist = sqrt(kMaxF16 - kEpsilonF16); // Make sure the square root doesn't overflow F16
+	const F32 kMaxDist = 1000.0; // Chose something small and make sure its square doesn't overflow F16
 	g_lightResultTex[UVec2(probeIdx, sampleIdx)] = HVec4(radiance, min(rayT, kMaxDist));
 }
 #endif // ANKI_RAY_GEN_SHADER
@@ -252,7 +252,7 @@ groupshared U32 g_invalideRayCount;
 	// Read the result from RT
 	const HVec4 comp = TEX(g_rtResultTex, UVec2(probeIdx, sampleIdx));
 	HVec3 radiance = comp.xyz;
-	const HVec2 moments = HVec2(comp.w, square(comp.w));
+	const Vec2 moments = Vec2(comp.w, square(comp.w));
 	if(all(radiance == HVec3(1.0, 0.0, 1.0)))
 	{
 		InterlockedAdd(g_invalideRayCount, 1);
@@ -265,7 +265,7 @@ groupshared U32 g_invalideRayCount;
 		const UVec2 octCoord = min(octUv * RADIANCE_OCTAHEDRON_MAP_SIZE, RADIANCE_OCTAHEDRON_MAP_SIZE - 1);
 
 		HVec3 avgRadiance = 0.0;
-		HVec2 avgMoments = 0.0;
+		Vec2 avgMoments = 0.0;
 		U32 iterationCount = 0;
 		do
 		{
@@ -286,7 +286,7 @@ groupshared U32 g_invalideRayCount;
 					const HVec3 prevValue = TEX(g_radianceVolume, actualVolumeTexCoord).xyz;
 					avgRadiance = lerp(prevValue, radiance, blendFactor);
 
-					const HVec2 prevValue2 = TEX(g_distanceMomentsVolume, actualVolumeTexCoord).xy;
+					const Vec2 prevValue2 = TEX(g_distanceMomentsVolume, actualVolumeTexCoord).xy;
 					avgMoments = lerp(prevValue2, moments, blendFactor);
 				}
 				else
@@ -348,7 +348,7 @@ groupshared U32 g_invalideRayCount;
 
 			// Search the nearby texels
 			HVec3 otherRadiance = HVec3(1.0, 0.0, 1.0);
-			HVec2 otherMoments = HVec2(0.0, 0.0);
+			Vec2 otherMoments = 0.0;
 			for(I32 y = -1; y <= 1; ++y)
 			{
 				for(I32 x = -1; x <= 1; ++x)
@@ -405,6 +405,137 @@ groupshared U32 g_invalideRayCount;
 }
 #endif
 
+// ===========================================================================
+// ComputeIrradiance                                                         =
+// ===========================================================================
+#if NOT_ZERO(ANKI_TECHNIQUE_ComputeIrradiance)
+
+struct Consts
+{
+	U32 m_clipmapIdx;
+	U32 m_padding1;
+	U32 m_padding2;
+	U32 m_padding3;
+};
+ANKI_FAST_CONSTANTS(Consts, g_consts)
+
+constexpr U32 kThreadCount = GPU_WAVE_SIZE;
+
+Texture3D<Vec4> g_radianceVolume : register(t0);
+
+RWTexture3D<Vec4> g_irradianceVolume : register(u0);
+
+ConstantBuffer<GlobalRendererConstants> g_globalRendererConstants : register(b0);
+
+groupshared Vec3 g_irradianceResults[kThreadCount];
+
+[NumThreads(kThreadCount, 1, 1)] void main(COMPUTE_ARGS)
+{
+	const Clipmap clipmap = g_globalRendererConstants.m_indirectDiffuseClipmaps[g_consts.m_clipmapIdx];
+	const U32 probeIdx = svGroupId.x;
+	const UVec2 irradianceTexel = svGroupId.yz;
+
+	// Compute input radiance coordinates
+	UVec3 radianceTexelCoordStart;
+	unflatten3dArrayIndex(clipmap.m_probeCounts.z, clipmap.m_probeCounts.y, clipmap.m_probeCounts.x, probeIdx, radianceTexelCoordStart.z,
+						  radianceTexelCoordStart.y, radianceTexelCoordStart.x);
+	radianceTexelCoordStart = radianceTexelCoordStart.xzy;
+	radianceTexelCoordStart.xy *= RADIANCE_OCTAHEDRON_MAP_SIZE + 2;
+	radianceTexelCoordStart.xy += 1;
+
+	// Compute irradiance
+	Vec2 octUv = Vec2(irradianceTexel);
+	octUv += 0.5;
+	octUv /= IRRADIANCE_OCTAHEDRON_MAP_SIZE;
+	const Vec3 dir = octahedronDecode(octUv);
+
+	const U32 radianceTexelCount = RADIANCE_OCTAHEDRON_MAP_SIZE * RADIANCE_OCTAHEDRON_MAP_SIZE;
+	const U32 radiancePixelsPerThread = (radianceTexelCount + kThreadCount - 1) / kThreadCount;
+
+	Vec3 irradiance = 0.0;
+	for(U32 pixel = svGroupIndex * radiancePixelsPerThread; pixel < min(radianceTexelCount, (svGroupIndex + 1) * radiancePixelsPerThread); ++pixel)
+	{
+		Vec2 octUv = Vec2(pixel % RADIANCE_OCTAHEDRON_MAP_SIZE, pixel / RADIANCE_OCTAHEDRON_MAP_SIZE);
+		octUv += 0.5;
+		octUv /= RADIANCE_OCTAHEDRON_MAP_SIZE;
+
+		const Vec3 sampleDir = octahedronDecode(octUv);
+
+		const F32 lambert = dot(dir, sampleDir);
+		if(lambert <= kEpsilonF32)
+		{
+			continue;
+		}
+
+		UVec3 coord = radianceTexelCoordStart;
+		coord.x += pixel % RADIANCE_OCTAHEDRON_MAP_SIZE + 1;
+		coord.y += pixel / RADIANCE_OCTAHEDRON_MAP_SIZE + 1;
+
+		const Vec3 radiance = TEX(g_radianceVolume, coord).xyz;
+
+		const F32 sampleCount = square(F32(RADIANCE_OCTAHEDRON_MAP_SIZE)) / 2.0;
+		irradiance += radiance * lambert / sampleCount;
+	}
+
+	g_irradianceResults[svGroupIndex] = irradiance;
+
+	GroupMemoryBarrierWithGroupSync();
+
+	[loop] for(U32 s = kThreadCount / 2u; s > 0u; s >>= 1u)
+	{
+		if(svGroupIndex < s)
+		{
+			g_irradianceResults[svGroupIndex] += g_irradianceResults[svGroupIndex + s];
+		}
+
+#	if ANKI_PLATFORM_MOBILE
+		if(s > WaveGetLaneCount())
+		{
+			GroupMemoryBarrierWithGroupSync();
+		}
+#	else
+		GroupMemoryBarrierWithGroupSync();
+#	endif
+	}
+
+	if(svGroupIndex == 0)
+	{
+		irradiance = g_irradianceResults[0] * k2Pi;
+
+		// Compute the texel coord to write the output
+		UVec3 irradianceTexelCoord;
+		unflatten3dArrayIndex(clipmap.m_probeCounts.z, clipmap.m_probeCounts.y, clipmap.m_probeCounts.x, probeIdx, irradianceTexelCoord.z,
+							  irradianceTexelCoord.y, irradianceTexelCoord.x);
+		irradianceTexelCoord = irradianceTexelCoord.xzy;
+		irradianceTexelCoord.xy *= IRRADIANCE_OCTAHEDRON_MAP_SIZE + 2;
+		irradianceTexelCoord.xy += 1;
+		irradianceTexelCoord.x += irradianceTexel.x;
+		irradianceTexelCoord.y += irradianceTexel.y;
+
+		TEX(g_irradianceVolume, irradianceTexelCoord).xyz = irradiance;
+
+		// Write the borders
+		UVec3 volumeTexCoord;
+		unflatten3dArrayIndex(clipmap.m_probeCounts.z, clipmap.m_probeCounts.y, clipmap.m_probeCounts.x, probeIdx, volumeTexCoord.z, volumeTexCoord.y,
+							  volumeTexCoord.x);
+		volumeTexCoord = volumeTexCoord.xzy;
+
+		IVec2 borders[3];
+		const IVec2 octCoord = IVec2(irradianceTexel);
+		const U32 borderCount = octahedronBorder(IRRADIANCE_OCTAHEDRON_MAP_SIZE, octCoord, borders);
+		for(U32 i = 0; i < borderCount; ++i)
+		{
+			IVec3 actualVolumeTexCoord;
+			actualVolumeTexCoord.xy = octCoord + volumeTexCoord * (IRRADIANCE_OCTAHEDRON_MAP_SIZE + 2) + 1;
+			actualVolumeTexCoord.xy += borders[i];
+			actualVolumeTexCoord.z = volumeTexCoord.z;
+
+			TEX(g_irradianceVolume, actualVolumeTexCoord).xyz = irradiance;
+		}
+	}
+}
+#endif
+
 // ===========================================================================
 // Test                                                                      =
 // ===========================================================================
@@ -424,7 +555,7 @@ ConstantBuffer<GlobalRendererConstants> g_globalRendererConstants : register(b0)
 
 SamplerState g_linearAnyRepeatSampler : register(s0);
 
-[NumThreads(8, 8, 1)] void main(UVec2 svDispatchThreadId : SV_DispatchThreadID)
+[NumThreads(8, 8, 1)] void main(COMPUTE_ARGS)
 {
 	UVec2 viewportSize;
 	g_outTex.GetDimensions(viewportSize.x, viewportSize.y);
@@ -434,21 +565,14 @@ SamplerState g_linearAnyRepeatSampler : register(s0);
 		return;
 	}
 
-	const Vec3 normal = unpackNormalFromGBuffer(g_gbufferRt2[svDispatchThreadId]);
+	const Vec3 normal = unpackNormalFromGBuffer(g_gbufferRt2[svDispatchThreadId.xy]);
 
-	const F32 depth = g_depthTex[svDispatchThreadId].r;
-	const Vec2 uv = Vec2(svDispatchThreadId) / Vec2(viewportSize);
+	const F32 depth = g_depthTex[svDispatchThreadId.xy].r;
+	const Vec2 uv = Vec2(svDispatchThreadId.xy) / Vec2(viewportSize);
 	const Vec2 ndc = uvToNdc(uv);
 	const Vec4 worldPos4 = mul(g_globalRendererConstants.m_matrices.m_invertedViewProjectionJitter, Vec4(ndc, depth, 1.0));
 	Vec3 worldPos = worldPos4.xyz / worldPos4.w;
 
-	if(1)
-	{
-		const Clipmap clipmap = g_globalRendererConstants.m_indirectDiffuseClipmaps[0];
-		const Vec3 probeSize = clipmap.m_size / clipmap.m_probeCounts;
-		worldPos += normal * min3(probeSize) * 0.1;
-	}
-
 	// Rand
 	UVec2 noiseTexSize;
 	g_blueNoiseTex.GetDimensions(noiseTexSize.x, noiseTexSize.y);
@@ -458,11 +582,11 @@ SamplerState g_linearAnyRepeatSampler : register(s0);
 
 	const Mat3x4 cameraTrf = g_globalRendererConstants.m_matrices.m_cameraTransform;
 	const Vec3 lookDir = -Vec3(cameraTrf.m_row0[2], cameraTrf.m_row1[2], cameraTrf.m_row2[2]);
-#	if 0
+#	if 1
 	const U16 clipmapIdx = findClipmapOnPosition(g_globalRendererConstants.m_indirectDiffuseClipmaps, g_globalRendererConstants.m_cameraPosition,
 												 lookDir, worldPos, noise);
 #	else
-	U16 clipmapIdx = 0;
+	U16 clipmapIdx = 1;
 	const F32 fade = computeClipmapFade2(g_globalRendererConstants.m_indirectDiffuseClipmaps[clipmapIdx], g_globalRendererConstants.m_cameraPosition,
 										 lookDir, worldPos);
 	if(fade < 1.0)
@@ -474,19 +598,19 @@ SamplerState g_linearAnyRepeatSampler : register(s0);
 #	if 0
 	if(clipmapIdx == 0)
 	{
-		g_outTex[svDispatchThreadId] = Vec4(1, 0, 0, 0);
+		g_outTex[svDispatchThreadId.xy] = Vec4(1, 0, 0, 0);
 	}
 	else if(clipmapIdx == 1)
 	{
-		g_outTex[svDispatchThreadId] = Vec4(0, 1, 0, 0);
+		g_outTex[svDispatchThreadId.xy] = Vec4(0, 1, 0, 0);
 	}
 	else if(clipmapIdx == 2)
 	{
-		g_outTex[svDispatchThreadId] = Vec4(0, 0, 1, 0);
+		g_outTex[svDispatchThreadId.xy] = Vec4(0, 0, 1, 0);
 	}
 	else
 	{
-		g_outTex[svDispatchThreadId] = Vec4(1, 0, 1, 0);
+		g_outTex[svDispatchThreadId.xy] = Vec4(1, 0, 1, 0);
 	}
 
 	return;
@@ -494,16 +618,18 @@ SamplerState g_linearAnyRepeatSampler : register(s0);
 
 	if(clipmapIdx >= kIndirectDiffuseClipmapCount)
 	{
-		g_outTex[svDispatchThreadId] = 0.0;
+		g_outTex[svDispatchThreadId.xy] = 0.0;
 		return;
 	}
 
 	// Some calculations are in the real volume size and some in some fake one that doesn't include the octahedron and the y and z are swapped
 
-	const Clipmap clipmap = g_globalRendererConstants.m_indirectDiffuseClipmaps[clipmapIdx]; // TODO: Dynamically indexing cbuffer
+	const Clipmap clipmap = g_globalRendererConstants.m_indirectDiffuseClipmaps[clipmapIdx];
 	const Vec3 probeSize = clipmap.m_size / clipmap.m_probeCounts;
 	const Vec3 fakeVolumeSize = clipmap.m_probeCounts; // Volume size without the octahedron
 
+	const Vec3 biasedWorldPos = worldPos + normal * min3(probeSize) * 0.1;
+
 	F32 octahedronSize = 0.0;
 	Vec3 realVolumeSize;
 	{
@@ -522,11 +648,11 @@ SamplerState g_linearAnyRepeatSampler : register(s0);
 		distMomentsOctSize -= 2.0; // The border
 	}
 
-	const Vec3 samplePointUvw = frac(worldPos / clipmap.m_size);
+	const Vec3 samplePointUvw = frac(biasedWorldPos / clipmap.m_size);
 	const Vec3 icoord = floor(samplePointUvw * fakeVolumeSize - 0.5);
 	const Vec3 fcoord = frac(samplePointUvw * fakeVolumeSize - 0.5);
 
-	const Vec3 firstProbePosition = floor((worldPos - probeSize / 2.0) / probeSize) * probeSize + probeSize / 2.0;
+	const Vec3 firstProbePosition = floor((biasedWorldPos - probeSize / 2.0) / probeSize) * probeSize + probeSize / 2.0;
 
 	F32 weightSum = 0.0;
 	Vec3 irradiance = 0.0;
@@ -561,14 +687,14 @@ SamplerState g_linearAnyRepeatSampler : register(s0);
 			Vec3 uvw = coords.xzy;
 			uvw.xy *= distMomentsOctSize + 2.0;
 			uvw.xy += 1.0;
-			uvw.xy += octahedronEncode(normalize(worldPos - probePosition)) * distMomentsOctSize;
+			uvw.xy += octahedronEncode(normalize(biasedWorldPos - probePosition)) * distMomentsOctSize;
 			uvw.z += 0.5;
 			uvw /= distMomentsRealVolumeSize;
-			const HVec2 distMoments = g_distanceMomentsVolumes[NonUniformResourceIndex(clipmapIdx)].SampleLevel(g_linearAnyRepeatSampler, uvw, 0.0);
+			const Vec2 distMoments = g_distanceMomentsVolumes[NonUniformResourceIndex(clipmapIdx)].SampleLevel(g_linearAnyRepeatSampler, uvw, 0.0);
 
 			const F32 variance = abs(distMoments.x * distMoments.x - distMoments.y);
 
-			const F32 posToProbeDist = length(worldPos - probePosition);
+			const F32 posToProbeDist = length(biasedWorldPos - probePosition);
 			F32 chebyshevWeight = 1.0;
 			if(posToProbeDist > distMoments.x) // occluded
 			{
@@ -577,10 +703,10 @@ SamplerState g_linearAnyRepeatSampler : register(s0);
 				chebyshevWeight = variance / (variance + (v * v));
 
 				// Increase the contrast in the weight
-				chebyshevWeight = max((chebyshevWeight * chebyshevWeight * chebyshevWeight), 0.0);
+				chebyshevWeight = max((chebyshevWeight * chebyshevWeight * chebyshevWeight), 0.05);
 			}
 
-			w *= max(0.05, chebyshevWeight);
+			w *= chebyshevWeight;
 		}
 
 		// Compute the actual coords
@@ -605,138 +731,7 @@ SamplerState g_linearAnyRepeatSampler : register(s0);
 		irradiance = 0.0;
 	}
 
-	g_outTex[svDispatchThreadId] = Vec4(irradiance, 0.0);
-}
-#endif
-
-// ===========================================================================
-// ComputeIrradiance                                                         =
-// ===========================================================================
-#if NOT_ZERO(ANKI_TECHNIQUE_ComputeIrradiance)
-
-struct Consts
-{
-	U32 m_clipmapIdx;
-	U32 m_padding1;
-	U32 m_padding2;
-	U32 m_padding3;
-};
-ANKI_FAST_CONSTANTS(Consts, g_consts)
-
-constexpr U32 kThreadCount = GPU_WAVE_SIZE;
-
-Texture3D<Vec4> g_radianceVolume : register(t0);
-
-RWTexture3D<Vec4> g_irradianceVolume : register(u0);
-
-ConstantBuffer<GlobalRendererConstants> g_globalRendererConstants : register(b0);
-
-groupshared Vec3 g_irradianceResults[kThreadCount];
-
-[NumThreads(kThreadCount, 1, 1)] void main(COMPUTE_ARGS)
-{
-	const Clipmap clipmap = g_globalRendererConstants.m_indirectDiffuseClipmaps[g_consts.m_clipmapIdx];
-	const U32 probeIdx = svGroupId.x;
-	const UVec2 irradianceTexel = svGroupId.yz;
-
-	// Compute input radiance coordinates
-	UVec3 radianceTexelCoordStart;
-	unflatten3dArrayIndex(clipmap.m_probeCounts.z, clipmap.m_probeCounts.y, clipmap.m_probeCounts.x, probeIdx, radianceTexelCoordStart.z,
-						  radianceTexelCoordStart.y, radianceTexelCoordStart.x);
-	radianceTexelCoordStart = radianceTexelCoordStart.xzy;
-	radianceTexelCoordStart.xy *= RADIANCE_OCTAHEDRON_MAP_SIZE + 2;
-	radianceTexelCoordStart.xy += 1;
-
-	// Compute irradiance
-	Vec2 octUv = Vec2(irradianceTexel);
-	octUv += 0.5;
-	octUv /= IRRADIANCE_OCTAHEDRON_MAP_SIZE;
-	const Vec3 dir = octahedronDecode(octUv);
-
-	const U32 radianceTexelCount = RADIANCE_OCTAHEDRON_MAP_SIZE * RADIANCE_OCTAHEDRON_MAP_SIZE;
-	const U32 radiancePixelsPerThread = (radianceTexelCount + kThreadCount - 1) / kThreadCount;
-
-	Vec3 irradiance = 0.0;
-	for(U32 pixel = svGroupIndex * radiancePixelsPerThread; pixel < min(radianceTexelCount, (svGroupIndex + 1) * radiancePixelsPerThread); ++pixel)
-	{
-		Vec2 octUv = Vec2(pixel % RADIANCE_OCTAHEDRON_MAP_SIZE, pixel / RADIANCE_OCTAHEDRON_MAP_SIZE);
-		octUv += 0.5;
-		octUv /= RADIANCE_OCTAHEDRON_MAP_SIZE;
-
-		const Vec3 sampleDir = octahedronDecode(octUv);
-
-		const F32 lambert = dot(dir, sampleDir);
-		if(lambert <= kEpsilonF32)
-		{
-			continue;
-		}
-
-		UVec3 coord = radianceTexelCoordStart;
-		coord.x += pixel % RADIANCE_OCTAHEDRON_MAP_SIZE + 1;
-		coord.y += pixel / RADIANCE_OCTAHEDRON_MAP_SIZE + 1;
-
-		const Vec3 radiance = TEX(g_radianceVolume, coord).xyz;
-
-		const F32 sampleCount = square(F32(RADIANCE_OCTAHEDRON_MAP_SIZE)) / 2.0;
-		irradiance += radiance * lambert / sampleCount;
-	}
-
-	g_irradianceResults[svGroupIndex] = irradiance;
-
-	GroupMemoryBarrierWithGroupSync();
-
-	[loop] for(U32 s = kThreadCount / 2u; s > 0u; s >>= 1u)
-	{
-		if(svGroupIndex < s)
-		{
-			g_irradianceResults[svGroupIndex] += g_irradianceResults[svGroupIndex + s];
-		}
-
-#	if ANKI_PLATFORM_MOBILE
-		if(s > WaveGetLaneCount())
-		{
-			GroupMemoryBarrierWithGroupSync();
-		}
-#	else
-		GroupMemoryBarrierWithGroupSync();
-#	endif
-	}
-
-	if(svGroupIndex == 0)
-	{
-		irradiance = g_irradianceResults[0] * k2Pi;
-
-		// Compute the texel coord to write the output
-		UVec3 irradianceTexelCoord;
-		unflatten3dArrayIndex(clipmap.m_probeCounts.z, clipmap.m_probeCounts.y, clipmap.m_probeCounts.x, probeIdx, irradianceTexelCoord.z,
-							  irradianceTexelCoord.y, irradianceTexelCoord.x);
-		irradianceTexelCoord = irradianceTexelCoord.xzy;
-		irradianceTexelCoord.xy *= IRRADIANCE_OCTAHEDRON_MAP_SIZE + 2;
-		irradianceTexelCoord.xy += 1;
-		irradianceTexelCoord.x += irradianceTexel.x;
-		irradianceTexelCoord.y += irradianceTexel.y;
-
-		TEX(g_irradianceVolume, irradianceTexelCoord).xyz = irradiance;
-
-		// Write the borders
-		UVec3 volumeTexCoord;
-		unflatten3dArrayIndex(clipmap.m_probeCounts.z, clipmap.m_probeCounts.y, clipmap.m_probeCounts.x, probeIdx, volumeTexCoord.z, volumeTexCoord.y,
-							  volumeTexCoord.x);
-		volumeTexCoord = volumeTexCoord.xzy;
-
-		IVec2 borders[3];
-		const IVec2 octCoord = IVec2(irradianceTexel);
-		const U32 borderCount = octahedronBorder(IRRADIANCE_OCTAHEDRON_MAP_SIZE, octCoord, borders);
-		for(U32 i = 0; i < borderCount; ++i)
-		{
-			IVec3 actualVolumeTexCoord;
-			actualVolumeTexCoord.xy = octCoord + volumeTexCoord * (IRRADIANCE_OCTAHEDRON_MAP_SIZE + 2) + 1;
-			actualVolumeTexCoord.xy += borders[i];
-			actualVolumeTexCoord.z = volumeTexCoord.z;
-
-			TEX(g_irradianceVolume, actualVolumeTexCoord).xyz = irradiance;
-		}
-	}
+	g_outTex[svDispatchThreadId.xy] = Vec4(irradiance, 0.0);
 }
 #endif