浏览代码

Refactor the indirect diffuse probes

Panagiotis Christopoulos Charitos 8 月之前
父节点
当前提交
e25f1e50f7

+ 1 - 1
AnKi/Renderer/AccelerationStructureBuilder.h

@@ -13,7 +13,7 @@ namespace anki {
 /// @{
 /// @{
 
 
 inline NumericCVar<F32>
 inline NumericCVar<F32>
-	g_rayTracingExtendedFrustumDistanceCVar("R", "RayTracingExtendedFrustumDistance", 100.0f, 10.0f, 10000.0f,
+	g_rayTracingExtendedFrustumDistanceCVar("R", "RayTracingExtendedFrustumDistance", 200.0f, 10.0f, 10000.0f,
 											"Every object that its distance from the camera is bellow that value will take part in ray tracing");
 											"Every object that its distance from the camera is bellow that value will take part in ray tracing");
 
 
 /// Build acceleration structures.
 /// Build acceleration structures.

+ 89 - 76
AnKi/Renderer/IndirectDiffuseClipmaps.cpp

@@ -22,29 +22,54 @@ Error IndirectDiffuseClipmaps::init()
 																Format::kR8G8B8A8_Unorm, "Test");
 																Format::kR8G8B8A8_Unorm, "Test");
 	m_tmpRtDesc.bake();
 	m_tmpRtDesc.bake();
 
 
-	m_clipmapInfo[0].m_probeCounts = Vec3(F32(g_indirectDiffuseClipmap0ProbesPerDimCVar));
-	m_clipmapInfo[1].m_probeCounts = Vec3(F32(g_indirectDiffuseClipmap1ProbesPerDimCVar));
-	m_clipmapInfo[2].m_probeCounts = Vec3(F32(g_indirectDiffuseClipmap2ProbesPerDimCVar));
-	m_clipmapInfo[0].m_size = Vec3(g_indirectDiffuseClipmap0SizeCVar);
-	m_clipmapInfo[1].m_size = Vec3(g_indirectDiffuseClipmap1SizeCVar);
-	m_clipmapInfo[2].m_size = Vec3(g_indirectDiffuseClipmap2SizeCVar);
+	m_clipmapInfo[0].m_probeCounts =
+		UVec3(g_indirectDiffuseClipmapProbesXZCVar, g_indirectDiffuseClipmapProbesYCVar, g_indirectDiffuseClipmapProbesXZCVar);
+	m_clipmapInfo[1].m_probeCounts = m_clipmapInfo[0].m_probeCounts;
+	m_clipmapInfo[2].m_probeCounts = m_clipmapInfo[0].m_probeCounts;
+	m_clipmapInfo[0].m_size = Vec3(g_indirectDiffuseClipmap0XZSizeCVar, g_indirectDiffuseClipmap0YSizeCVar, g_indirectDiffuseClipmap0XZSizeCVar);
+	m_clipmapInfo[1].m_size = Vec3(g_indirectDiffuseClipmap1XZSizeCVar, g_indirectDiffuseClipmap1YSizeCVar, g_indirectDiffuseClipmap1XZSizeCVar);
+	m_clipmapInfo[2].m_size = Vec3(g_indirectDiffuseClipmap2XZSizeCVar, g_indirectDiffuseClipmap2YSizeCVar, g_indirectDiffuseClipmap2XZSizeCVar);
+
+	U32 probesPerClipmap = 0;
+	for(U32 i = 0; i < kIndirectDiffuseClipmapCount; ++i)
+	{
+		const U32 count = m_clipmapInfo[i].m_probeCounts.x() * m_clipmapInfo[i].m_probeCounts.y() * m_clipmapInfo[i].m_probeCounts.z();
+		m_clipmapInfo[i].m_probeCountsTotal = count;
+		if(i == 0)
+		{
+			probesPerClipmap = count;
+		}
+		else
+		{
+			ANKI_ASSERT(probesPerClipmap == count);
+		}
+	}
+
+	// Create the lighting result texture
+	m_radianceDesc = getRenderer().create2DRenderTargetDescription(probesPerClipmap, kRaysPerProbePerFrame, Format::kB10G11R11_Ufloat_Pack32,
+																   "IndirectDiffuse light result");
+	m_radianceDesc.bake();
 
 
 	for(U32 clipmap = 0; clipmap < kIndirectDiffuseClipmapCount; ++clipmap)
 	for(U32 clipmap = 0; clipmap < kIndirectDiffuseClipmapCount; ++clipmap)
 	{
 	{
 		for(U32 dir = 0; dir < 6; ++dir)
 		for(U32 dir = 0; dir < 6; ++dir)
 		{
 		{
 			TextureInitInfo volumeInit = getRenderer().create2DRenderTargetInitInfo(
 			TextureInitInfo volumeInit = getRenderer().create2DRenderTargetInitInfo(
-				U32(m_clipmapInfo[clipmap].m_probeCounts.x()), U32(m_clipmapInfo[clipmap].m_probeCounts.y()), Format::kB10G11R11_Ufloat_Pack32,
-				TextureUsageBit::kAllShaderResource, generateTempPassName("IndirectDiffuseClipmap #%u dir #%u", clipmap, dir));
-			volumeInit.m_depth = U32(m_clipmapInfo[clipmap].m_probeCounts.z());
+				m_clipmapInfo[clipmap].m_probeCounts.x() * (g_indirectDiffuseClipmapRadianceCacheProbeSize + 2),
+				m_clipmapInfo[clipmap].m_probeCounts.z() * (g_indirectDiffuseClipmapRadianceCacheProbeSize + 2), Format::kB10G11R11_Ufloat_Pack32,
+				TextureUsageBit::kAllShaderResource, generateTempPassName("IndirectDiffuseClipmapRadiance #%u", clipmap));
+			volumeInit.m_depth = m_clipmapInfo[clipmap].m_probeCounts.y();
 			volumeInit.m_type = TextureType::k3D;
 			volumeInit.m_type = TextureType::k3D;
 
 
-			m_clipmapVolumes[clipmap].m_directions[dir] = getRenderer().createAndClearRenderTarget(volumeInit, TextureUsageBit::kSrvCompute);
+			m_radianceVolumes[clipmap] = getRenderer().createAndClearRenderTarget(volumeInit, TextureUsageBit::kSrvCompute);
 		}
 		}
 	}
 	}
 
 
-	ANKI_CHECK(loadShaderProgram("ShaderBinaries/IndirectDiffuseClipmaps.ankiprogbin", {}, m_prog, m_tmpVisGrProg, "Test"));
-	ANKI_CHECK(loadShaderProgram("ShaderBinaries/IndirectDiffuseClipmaps.ankiprogbin", {}, m_prog, m_visProbesGrProg, "VisualizeProbes"));
+	Array<SubMutation, 1> mutation = {"RAYS_PER_PROBE_PER_FRAME", kRaysPerProbePerFrame};
+
+	ANKI_CHECK(loadShaderProgram("ShaderBinaries/IndirectDiffuseClipmaps.ankiprogbin", mutation, m_prog, m_tmpVisGrProg, "Test"));
+	ANKI_CHECK(loadShaderProgram("ShaderBinaries/IndirectDiffuseClipmaps.ankiprogbin", mutation, m_prog, m_visProbesGrProg, "VisualizeProbes"));
+	ANKI_CHECK(loadShaderProgram("ShaderBinaries/IndirectDiffuseClipmaps.ankiprogbin", mutation, m_prog, m_populateCachesGrProg, "PopulateCaches"));
 	ANKI_CHECK(loadShaderProgram("ShaderBinaries/RtSbtBuild.ankiprogbin", {{"TECHNIQUE", 1}}, m_sbtProg, m_sbtBuildGrProg, "SbtBuild"));
 	ANKI_CHECK(loadShaderProgram("ShaderBinaries/RtSbtBuild.ankiprogbin", {{"TECHNIQUE", 1}}, m_sbtProg, m_sbtBuildGrProg, "SbtBuild"));
 
 
 	{
 	{
@@ -54,6 +79,7 @@ Error IndirectDiffuseClipmaps::init()
 
 
 		ShaderProgramResourceVariantInitInfo variantInitInfo(m_prog);
 		ShaderProgramResourceVariantInitInfo variantInitInfo(m_prog);
 		variantInitInfo.requestTechniqueAndTypes(ShaderTypeBit::kRayGen, "RtMaterialFetch");
 		variantInitInfo.requestTechniqueAndTypes(ShaderTypeBit::kRayGen, "RtMaterialFetch");
+		variantInitInfo.addMutation("RAYS_PER_PROBE_PER_FRAME", kRaysPerProbePerFrame);
 		const ShaderProgramResourceVariant* variant;
 		const ShaderProgramResourceVariant* variant;
 		m_prog->getOrCreateVariant(variantInitInfo, variant);
 		m_prog->getOrCreateVariant(variantInitInfo, variant);
 		m_libraryGrProg.reset(&variant->getProgram());
 		m_libraryGrProg.reset(&variant->getProgram());
@@ -84,24 +110,21 @@ void IndirectDiffuseClipmaps::populateRenderGraph(RenderingContext& ctx)
 
 
 	RenderGraphBuilder& rgraph = ctx.m_renderGraphDescr;
 	RenderGraphBuilder& rgraph = ctx.m_renderGraphDescr;
 
 
-	Array2d<RenderTargetHandle, kIndirectDiffuseClipmapCount, 6> volumeRts;
-	for(U32 clipmap = 0; clipmap < kIndirectDiffuseClipmapCount; ++clipmap)
+	const RenderTargetHandle radianceRt = rgraph.newRenderTarget(m_radianceDesc);
+	m_runCtx.m_tmpRt = rgraph.newRenderTarget(m_tmpRtDesc);
+
+	Array<RenderTargetHandle, kIndirectDiffuseClipmapCount> radianceVolumes;
+	for(U32 i = 0; i < kIndirectDiffuseClipmapCount; ++i)
 	{
 	{
-		for(U32 dir = 0; dir < 6; ++dir)
+		if(m_texturesImportedOnce)
 		{
 		{
-			if(!m_clipmapsImportedOnce)
-			{
-				volumeRts[clipmap][dir] = rgraph.importRenderTarget(m_clipmapVolumes[clipmap].m_directions[dir].get(), TextureUsageBit::kSrvCompute);
-			}
-			else
-			{
-				volumeRts[clipmap][dir] = rgraph.importRenderTarget(m_clipmapVolumes[clipmap].m_directions[dir].get());
-			}
+			radianceVolumes[i] = rgraph.importRenderTarget(m_radianceVolumes[i].get());
+		}
+		else
+		{
+			radianceVolumes[i] = rgraph.importRenderTarget(m_radianceVolumes[i].get(), TextureUsageBit::kSrvCompute);
 		}
 		}
 	}
 	}
-	m_clipmapsImportedOnce = true;
-
-	m_runCtx.m_tmpRt = rgraph.newRenderTarget(m_tmpRtDesc);
 
 
 	// SBT build
 	// SBT build
 	BufferHandle sbtHandle;
 	BufferHandle sbtHandle;
@@ -161,13 +184,7 @@ void IndirectDiffuseClipmaps::populateRenderGraph(RenderingContext& ctx)
 	{
 	{
 		NonGraphicsRenderPass& pass = rgraph.newNonGraphicsRenderPass("IndirectDiffuseClipmaps");
 		NonGraphicsRenderPass& pass = rgraph.newNonGraphicsRenderPass("IndirectDiffuseClipmaps");
 
 
-		for(U32 clipmap = 0; clipmap < kIndirectDiffuseClipmapCount; ++clipmap)
-		{
-			for(U32 dir = 0; dir < 6; ++dir)
-			{
-				pass.newTextureDependency(volumeRts[clipmap][dir], TextureUsageBit::kUavCompute);
-			}
-		}
+		pass.newTextureDependency(radianceRt, TextureUsageBit::kUavCompute);
 		pass.newBufferDependency(sbtHandle, BufferUsageBit::kShaderBindingTable);
 		pass.newBufferDependency(sbtHandle, BufferUsageBit::kShaderBindingTable);
 		if(getRenderer().getGeneratedSky().isEnabled())
 		if(getRenderer().getGeneratedSky().isEnabled())
 		{
 		{
@@ -177,7 +194,7 @@ void IndirectDiffuseClipmaps::populateRenderGraph(RenderingContext& ctx)
 		pass.newAccelerationStructureDependency(getRenderer().getAccelerationStructureBuilder().getAccelerationStructureHandle(),
 		pass.newAccelerationStructureDependency(getRenderer().getAccelerationStructureBuilder().getAccelerationStructureHandle(),
 												AccelerationStructureUsageBit::kTraceRaysSrv);
 												AccelerationStructureUsageBit::kTraceRaysSrv);
 
 
-		pass.setWork([this, volumeRts, &ctx, sbtBuffer](RenderPassWorkContext& rgraphCtx) {
+		pass.setWork([this, radianceRt, &ctx, sbtBuffer](RenderPassWorkContext& rgraphCtx) {
 			CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
 			CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
 
 
 			cmdb.bindShaderProgram(m_libraryGrProg.get());
 			cmdb.bindShaderProgram(m_libraryGrProg.get());
@@ -227,41 +244,53 @@ void IndirectDiffuseClipmaps::populateRenderGraph(RenderingContext& ctx)
 			cmdb.bindSampler(0, 2, getRenderer().getSamplers().m_trilinearClamp.get());
 			cmdb.bindSampler(0, 2, getRenderer().getSamplers().m_trilinearClamp.get());
 			cmdb.bindSampler(1, 2, getRenderer().getSamplers().m_trilinearClampShadow.get());
 			cmdb.bindSampler(1, 2, getRenderer().getSamplers().m_trilinearClampShadow.get());
 
 
-			cmdb.bindUav(7, 2, TextureView(getDummyGpuResources().m_texture2DUav.get(), TextureSubresourceDesc::firstSurface()));
-			cmdb.bindUav(8, 2, TextureView(getDummyGpuResources().m_texture2DUav.get(), TextureSubresourceDesc::firstSurface()));
+			rgraphCtx.bindUav(0, 2, radianceRt);
+			cmdb.bindUav(1, 2, TextureView(getDummyGpuResources().m_texture2DUav.get(), TextureSubresourceDesc::firstSurface()));
 
 
-			for(U32 clipmap = 0; clipmap < kIndirectDiffuseClipmapCount; ++clipmap)
-			{
-				for(U32 dir = 0; dir < 6; ++dir)
-				{
-					rgraphCtx.bindUav(dir, 2, volumeRts[clipmap][dir]);
-				}
+			const UVec4 consts(0, kRaysPerProbePerFrame, 0, 0); // TODO
+			cmdb.setFastConstants(&consts, sizeof(consts));
+
+			const U32 probeCount = U32(m_clipmapInfo[0].m_probeCountsTotal);
+			cmdb.traceRays(sbtBuffer, m_sbtRecordSize, GpuSceneArrays::RenderableBoundingVolumeRt::getSingleton().getElementCount(), 1,
+						   probeCount * kRaysPerProbePerFrame, 1, 1);
+		});
+	}
 
 
-				const UVec4 consts(clipmap);
-				cmdb.setFastConstants(&consts, sizeof(consts));
+	// Update caches
+	{
+		const U32 clipmap = 0;
 
 
-				const U32 probeCount = m_clipmapVolumes[clipmap].m_directions[0]->getWidth();
-				cmdb.traceRays(sbtBuffer, m_sbtRecordSize, GpuSceneArrays::RenderableBoundingVolumeRt::getSingleton().getElementCount(), 1,
-							   probeCount, probeCount, probeCount);
-			}
+		NonGraphicsRenderPass& pass = rgraph.newNonGraphicsRenderPass("IndirectDiffuseClipmaps populate caches");
+
+		pass.newTextureDependency(radianceRt, TextureUsageBit::kSrvCompute);
+		pass.newTextureDependency(radianceVolumes[clipmap], TextureUsageBit::kUavCompute);
+
+		pass.setWork([this, &ctx, clipmap, radianceRt, radianceVolume = radianceVolumes[clipmap]](RenderPassWorkContext& rgraphCtx) {
+			CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
+
+			cmdb.bindShaderProgram(m_populateCachesGrProg.get());
+
+			rgraphCtx.bindSrv(0, 0, radianceRt);
+			rgraphCtx.bindUav(0, 0, radianceVolume);
+			cmdb.bindConstantBuffer(0, 0, ctx.m_globalRenderingConstantsBuffer);
+
+			const UVec4 consts(clipmap, g_indirectDiffuseClipmapRadianceCacheProbeSize, 0, 0);
+			cmdb.setFastConstants(&consts, sizeof(consts));
+
+			cmdb.dispatchCompute(m_clipmapInfo[clipmap].m_probeCounts.x(), m_clipmapInfo[clipmap].m_probeCounts.y(),
+								 m_clipmapInfo[clipmap].m_probeCounts.z());
 		});
 		});
 	}
 	}
 
 
 	{
 	{
 		NonGraphicsRenderPass& pass = rgraph.newNonGraphicsRenderPass("IndirectDiffuseClipmaps test");
 		NonGraphicsRenderPass& pass = rgraph.newNonGraphicsRenderPass("IndirectDiffuseClipmaps test");
 
 
-		for(U32 clipmap = 0; clipmap < kIndirectDiffuseClipmapCount; ++clipmap)
-		{
-			for(U32 dir = 0; dir < 6; ++dir)
-			{
-				pass.newTextureDependency(volumeRts[clipmap][dir], TextureUsageBit::kSrvCompute);
-			}
-		}
+		// TODO
 		pass.newTextureDependency(getRenderer().getGBuffer().getDepthRt(), TextureUsageBit::kSrvCompute);
 		pass.newTextureDependency(getRenderer().getGBuffer().getDepthRt(), TextureUsageBit::kSrvCompute);
 		pass.newTextureDependency(getRenderer().getGBuffer().getColorRt(2), TextureUsageBit::kSrvCompute);
 		pass.newTextureDependency(getRenderer().getGBuffer().getColorRt(2), TextureUsageBit::kSrvCompute);
 		pass.newTextureDependency(m_runCtx.m_tmpRt, TextureUsageBit::kUavCompute);
 		pass.newTextureDependency(m_runCtx.m_tmpRt, TextureUsageBit::kUavCompute);
 
 
-		pass.setWork([this, volumeRts, &ctx](RenderPassWorkContext& rgraphCtx) {
+		pass.setWork([this, &ctx](RenderPassWorkContext& rgraphCtx) {
 			CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
 			CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
 
 
 			cmdb.bindShaderProgram(m_tmpVisGrProg.get());
 			cmdb.bindShaderProgram(m_tmpVisGrProg.get());
@@ -270,14 +299,6 @@ void IndirectDiffuseClipmaps::populateRenderGraph(RenderingContext& ctx)
 			rgraphCtx.bindSrv(1, 0, getRenderer().getGBuffer().getColorRt(2));
 			rgraphCtx.bindSrv(1, 0, getRenderer().getGBuffer().getColorRt(2));
 			cmdb.bindSrv(2, 0, TextureView(&m_blueNoiseImg->getTexture(), TextureSubresourceDesc::firstSurface()));
 			cmdb.bindSrv(2, 0, TextureView(&m_blueNoiseImg->getTexture(), TextureSubresourceDesc::firstSurface()));
 
 
-			for(U32 clipmap = 0; clipmap < kIndirectDiffuseClipmapCount; ++clipmap)
-			{
-				for(U32 dir = 0; dir < 6; ++dir)
-				{
-					rgraphCtx.bindSrv(clipmap * 6 + dir + 3, 0, volumeRts[clipmap][dir]);
-				}
-			}
-
 			rgraphCtx.bindUav(0, 0, m_runCtx.m_tmpRt);
 			rgraphCtx.bindUav(0, 0, m_runCtx.m_tmpRt);
 
 
 			cmdb.bindConstantBuffer(0, 0, ctx.m_globalRenderingConstantsBuffer);
 			cmdb.bindConstantBuffer(0, 0, ctx.m_globalRenderingConstantsBuffer);
@@ -293,22 +314,14 @@ void IndirectDiffuseClipmaps::drawDebugProbes(const RenderingContext& ctx, Comma
 {
 {
 	cmdb.bindShaderProgram(m_visProbesGrProg.get());
 	cmdb.bindShaderProgram(m_visProbesGrProg.get());
 
 
-	const UVec4 consts(0u);
+	const UVec4 consts(0u, g_indirectDiffuseClipmapRadianceCacheProbeSize, 0, 0);
 	cmdb.setFastConstants(&consts, sizeof(consts));
 	cmdb.setFastConstants(&consts, sizeof(consts));
 
 
 	cmdb.bindConstantBuffer(0, 0, ctx.m_globalRenderingConstantsBuffer);
 	cmdb.bindConstantBuffer(0, 0, ctx.m_globalRenderingConstantsBuffer);
+	cmdb.bindSrv(0, 0, TextureView(m_radianceVolumes[0].get(), TextureSubresourceDesc::all()));
+	cmdb.bindSampler(0, 0, getRenderer().getSamplers().m_trilinearRepeat.get());
 
 
-	const U32 probeCount = U32(m_clipmapInfo[0].m_probeCounts.x() * m_clipmapInfo[0].m_probeCounts.y() * m_clipmapInfo[0].m_probeCounts.z());
-
-	for(U32 clipmap = 0; clipmap < kIndirectDiffuseClipmapCount; ++clipmap)
-	{
-		for(U32 dir = 0; dir < 6; ++dir)
-		{
-			cmdb.bindSrv(clipmap * 6 + dir, 0, TextureView(m_clipmapVolumes[clipmap].m_directions[dir].get(), TextureSubresourceDesc::all()));
-		}
-	}
-
-	cmdb.draw(PrimitiveTopology::kTriangles, 36, probeCount);
+	cmdb.draw(PrimitiveTopology::kTriangles, 36, m_clipmapInfo[0].m_probeCountsTotal);
 }
 }
 
 
 } // end namespace anki
 } // end namespace anki

+ 28 - 20
AnKi/Renderer/IndirectDiffuseClipmaps.h

@@ -16,18 +16,28 @@ namespace anki {
 
 
 inline BoolCVar g_rtIndirectDiffuseClipmapsCVar("R", "RtIndirectDiffuseClipmaps", false);
 inline BoolCVar g_rtIndirectDiffuseClipmapsCVar("R", "RtIndirectDiffuseClipmaps", false);
 
 
-inline NumericCVar<U32> g_indirectDiffuseClipmap0ProbesPerDimCVar("R", "IndirectDiffuseClipmap0ProbesPerDim", 32, 10, 100,
-																  "The cell count of each dimension of 1st clipmap");
-inline NumericCVar<U32> g_indirectDiffuseClipmap1ProbesPerDimCVar("R", "IndirectDiffuseClipmap1ProbesPerDim", 32, 10, 100,
-																  "The cell count of each dimension of 2nd clipmap");
-inline NumericCVar<U32> g_indirectDiffuseClipmap2ProbesPerDimCVar("R", "IndirectDiffuseClipmap2ProbesPerDim", 32, 10, 100,
-																  "The cell count of each dimension of 3rd clipmap");
-
-inline NumericCVar<F32> g_indirectDiffuseClipmap0SizeCVar("R", "IndirectDiffuseClipmap0Size", 32.0, 10.0, 1000.0, "The clipmap size in meters");
-inline NumericCVar<F32> g_indirectDiffuseClipmap1SizeCVar("R", "IndirectDiffuseClipmap1Size", 64.0, 10.0, 1000.0, "The clipmap size in meters");
-inline NumericCVar<F32> g_indirectDiffuseClipmap2SizeCVar("R", "IndirectDiffuseClipmap2Size", 128.0, 10.0, 1000.0, "The clipmap size in meters");
-
-/// Ambient global illumination passes.
+inline NumericCVar<U32> g_indirectDiffuseClipmapProbesXZCVar("R", "IndirectDiffuseClipmapProbesXZ", 32, 10, 100,
+															 "The cell count of each dimension of 1st clipmap");
+inline NumericCVar<U32> g_indirectDiffuseClipmapProbesYCVar("R", "IndirectDiffuseClipmapProbesY", 8, 4, 100,
+															"The cell count of each dimension of 1st clipmap");
+
+inline NumericCVar<F32> g_indirectDiffuseClipmap0XZSizeCVar("R", "IndirectDiffuseClipmap0XZSize", 48.0, 10.0, 1000.0, "The clipmap size in meters");
+inline NumericCVar<F32> g_indirectDiffuseClipmap0YSizeCVar("R", "IndirectDiffuseClipmap0YSize", 12.0, 10.0, 1000.0, "The clipmap size in meters");
+
+inline NumericCVar<F32> g_indirectDiffuseClipmap1XZSizeCVar("R", "IndirectDiffuseClipmap1XZSize", 96.0, 10.0, 1000.0, "The clipmap size in meters");
+inline NumericCVar<F32> g_indirectDiffuseClipmap1YSizeCVar("R", "IndirectDiffuseClipmap1YSize", 24.0, 10.0, 1000.0, "The clipmap size in meters");
+
+inline NumericCVar<F32> g_indirectDiffuseClipmap2XZSizeCVar("R", "IndirectDiffuseClipmap2XZSize", 192.0, 10.0, 1000.0, "The clipmap size in meters");
+inline NumericCVar<F32> g_indirectDiffuseClipmap2YSizeCVar("R", "IndirectDiffuseClipmap2YSize", 48.0, 10.0, 1000.0, "The clipmap size in meters");
+
+inline NumericCVar<U32> g_indirectDiffuseClipmapRadianceCacheProbeSize("R", "IndirectDiffuseClipmapLightCacheSize", 10, 5, 30,
+																	   "Size of the octahedral for the light cache");
+inline NumericCVar<U32> g_indirectDiffuseClipmapDistancesProbeSize("R", "IndirectDiffuseClipmapDistanceSize", 10, 5, 22,
+																   "Size of the octahedral for the probe distances");
+inline NumericCVar<U32> g_indirectDiffuseClipmapIrradianceProbeSize("R", "IndirectDiffuseClipmapIrradianceSize", 6, 4, 22,
+																	"Size of the octahedral for the irradiance");
+
+/// Indirect diffuse based on clipmaps of probes.
 class IndirectDiffuseClipmaps : public RendererObject
 class IndirectDiffuseClipmaps : public RendererObject
 {
 {
 public:
 public:
@@ -54,13 +64,10 @@ public:
 	void drawDebugProbes(const RenderingContext& ctx, CommandBuffer& cmdb) const;
 	void drawDebugProbes(const RenderingContext& ctx, CommandBuffer& cmdb) const;
 
 
 private:
 private:
-	class ClipmapVolumes
-	{
-	public:
-		Array<TexturePtr, 6> m_directions;
-	};
+	static constexpr U32 kRaysPerProbePerFrame = 32;
 
 
-	Array<ClipmapVolumes, kIndirectDiffuseClipmapCount> m_clipmapVolumes;
+	RenderTargetDesc m_radianceDesc;
+	Array<TexturePtr, kIndirectDiffuseClipmapCount> m_radianceVolumes;
 
 
 	Array<Clipmap, kIndirectDiffuseClipmapCount> m_clipmapInfo;
 	Array<Clipmap, kIndirectDiffuseClipmapCount> m_clipmapInfo;
 
 
@@ -68,11 +75,12 @@ private:
 	ShaderProgramResourcePtr m_missProg;
 	ShaderProgramResourcePtr m_missProg;
 	ShaderProgramResourcePtr m_sbtProg;
 	ShaderProgramResourcePtr m_sbtProg;
 	ShaderProgramPtr m_libraryGrProg;
 	ShaderProgramPtr m_libraryGrProg;
+	ShaderProgramPtr m_populateCachesGrProg;
 	ShaderProgramPtr m_tmpVisGrProg;
 	ShaderProgramPtr m_tmpVisGrProg;
 	ShaderProgramPtr m_sbtBuildGrProg;
 	ShaderProgramPtr m_sbtBuildGrProg;
 	ShaderProgramPtr m_visProbesGrProg;
 	ShaderProgramPtr m_visProbesGrProg;
 
 
-	RenderTargetDesc m_tmpRtDesc;
+	RenderTargetDesc m_tmpRtDesc; // TODO rm
 
 
 	ImageResourcePtr m_blueNoiseImg;
 	ImageResourcePtr m_blueNoiseImg;
 
 
@@ -80,7 +88,7 @@ private:
 	U32 m_rayGenShaderGroupIdx = kMaxU32;
 	U32 m_rayGenShaderGroupIdx = kMaxU32;
 	U32 m_missShaderGroupIdx = kMaxU32;
 	U32 m_missShaderGroupIdx = kMaxU32;
 
 
-	Bool m_clipmapsImportedOnce = false;
+	Bool m_texturesImportedOnce = false;
 
 
 	class
 	class
 	{
 	{

+ 2 - 7
AnKi/Renderer/Reflections.cpp

@@ -381,13 +381,8 @@ void Reflections::populateRenderGraph(RenderingContext& ctx)
 			cmdb.bindSrv(6, 2, pixelsFailedSsrBuff);
 			cmdb.bindSrv(6, 2, pixelsFailedSsrBuff);
 			rgraphCtx.bindSrv(7, 2, getRenderer().getShadowMapping().getShadowmapRt());
 			rgraphCtx.bindSrv(7, 2, getRenderer().getShadowMapping().getShadowmapRt());
 
 
-			for(U32 i = 0; i < 6; ++i)
-			{
-				cmdb.bindUav(i, 2, TextureView(getDummyGpuResources().m_texture3DUav.get(), TextureSubresourceDesc::firstSurface()));
-			}
-
-			rgraphCtx.bindUav(7, 2, transientRt1);
-			rgraphCtx.bindUav(8, 2, hitPosAndDepthRt);
+			rgraphCtx.bindUav(0, 2, transientRt1);
+			rgraphCtx.bindUav(1, 2, hitPosAndDepthRt);
 
 
 			cmdb.bindSampler(0, 2, getRenderer().getSamplers().m_trilinearClamp.get());
 			cmdb.bindSampler(0, 2, getRenderer().getSamplers().m_trilinearClamp.get());
 			cmdb.bindSampler(1, 2, getRenderer().getSamplers().m_trilinearClampShadow.get());
 			cmdb.bindSampler(1, 2, getRenderer().getSamplers().m_trilinearClampShadow.get());

+ 1 - 1
AnKi/Renderer/RendererObject.cpp

@@ -19,7 +19,7 @@ void RendererObject::registerDebugRenderTarget(CString rtName)
 	getRenderer().registerDebugRenderTarget(this, rtName);
 	getRenderer().registerDebugRenderTarget(this, rtName);
 }
 }
 
 
-Error RendererObject::loadShaderProgram(CString filename, std::initializer_list<SubMutation> mutators, ShaderProgramResourcePtr& rsrc,
+Error RendererObject::loadShaderProgram(CString filename, ConstWeakArray<SubMutation> mutators, ShaderProgramResourcePtr& rsrc,
 										ShaderProgramPtr& grProg, CString technique, ShaderTypeBit shaderTypes)
 										ShaderProgramPtr& grProg, CString technique, ShaderTypeBit shaderTypes)
 {
 {
 	if(!rsrc.isCreated())
 	if(!rsrc.isCreated())

+ 7 - 1
AnKi/Renderer/RendererObject.h

@@ -106,7 +106,13 @@ protected:
 	};
 	};
 
 
 	static Error loadShaderProgram(CString filename, std::initializer_list<SubMutation> mutators, ShaderProgramResourcePtr& rsrc,
 	static Error loadShaderProgram(CString filename, std::initializer_list<SubMutation> mutators, ShaderProgramResourcePtr& rsrc,
-								   ShaderProgramPtr& grProg, CString technique = {}, ShaderTypeBit shaderTypes = ShaderTypeBit::kNone);
+								   ShaderProgramPtr& grProg, CString technique = {}, ShaderTypeBit shaderTypes = ShaderTypeBit::kNone)
+	{
+		return loadShaderProgram(filename, ConstWeakArray<SubMutation>(mutators.begin(), U32(mutators.size())), rsrc, grProg, technique, shaderTypes);
+	}
+
+	static Error loadShaderProgram(CString filename, ConstWeakArray<SubMutation> mutators, ShaderProgramResourcePtr& rsrc, ShaderProgramPtr& grProg,
+								   CString technique = {}, ShaderTypeBit shaderTypes = ShaderTypeBit::kNone);
 
 
 	static void zeroBuffer(Buffer* buff);
 	static void zeroBuffer(Buffer* buff);
 
 

+ 38 - 0
AnKi/Shaders/Common.hlsl

@@ -158,6 +158,44 @@ U32 checkStructuredBuffer(T buff, U32 idx)
 // Safely access a structured buffer. Throw an assertion if it's out of bounds
 // Safely access a structured buffer. Throw an assertion if it's out of bounds
 #define SBUFF(buff, idx) buff[checkStructuredBuffer(buff, idx)]
 #define SBUFF(buff, idx) buff[checkStructuredBuffer(buff, idx)]
 
 
+UVec3 checkUavTexture(RWTexture3D<Vec4> tex, UVec3 coords)
+{
+	UVec3 size;
+	tex.GetDimensions(size.x, size.y, size.z);
+	ANKI_ASSERT(coords.x < size.x && coords.y < size.y && coords.z < size.z);
+	return coords;
+}
+
+UVec2 checkUavTexture(RWTexture2D<Vec4> tex, UVec2 coords)
+{
+	UVec2 size;
+	tex.GetDimensions(size.x, size.y);
+	ANKI_ASSERT(coords.x < size.x && coords.y < size.y);
+	return coords;
+}
+
+/// Safely access a UAV texture. Throw an assertion if it's out of bounds
+#define UAV_TEXTURE(tex, coords) tex[checkUavTexture(tex, coords)]
+
+UVec3 checkSrvTexture(Texture3D<Vec4> tex, UVec3 coords)
+{
+	UVec3 size;
+	tex.GetDimensions(size.x, size.y, size.z);
+	ANKI_ASSERT(coords.x < size.x && coords.y < size.y && coords.z < size.z);
+	return coords;
+}
+
+UVec2 checkSrvTexture(Texture2D<Vec4> tex, UVec2 coords)
+{
+	UVec2 size;
+	tex.GetDimensions(size.x, size.y);
+	ANKI_ASSERT(coords.x < size.x && coords.y < size.y);
+	return coords;
+}
+
+/// Safely access a SRV texture. Throw an assertion if it's out of bounds
+#define SRV_TEXTURE(tex, coords) tex[checkSrvTexture(tex, coords)]
+
 // Need extra decoration for per-primitive stuff in Vulkan. Remove when https://github.com/microsoft/DirectXShaderCompiler/issues/6862 is fixed
 // Need extra decoration for per-primitive stuff in Vulkan. Remove when https://github.com/microsoft/DirectXShaderCompiler/issues/6862 is fixed
 #if ANKI_GR_BACKEND_VULKAN
 #if ANKI_GR_BACKEND_VULKAN
 #	define SpvCapabilityMeshShadingEXT 5283
 #	define SpvCapabilityMeshShadingEXT 5283

+ 52 - 0
AnKi/Shaders/Functions.hlsl

@@ -594,6 +594,15 @@ I32 firstbitlow2(U32 v)
 	return firstbitlow(v);
 	return firstbitlow(v);
 }
 }
 
 
+#if ANKI_SUPPORTS_64BIT_TYPES
+/// The regular firstbitlow in DXC has some issues since it invokes a builtin that is only supposed to be used with
+/// 32bit input. This is an alternative implementation but it expects that the input is not zero.
+U32 countbits2(U64 v)
+{
+	return countbits(U32(v)) + countbits(U32(v >> 32ul));
+}
+#endif
+
 /// Encode the shading rate to be stored in an SRI. The rates should be power of two, can't be zero and can't exceed 4.
 /// Encode the shading rate to be stored in an SRI. The rates should be power of two, can't be zero and can't exceed 4.
 /// So the possible values are 1,2,4
 /// So the possible values are 1,2,4
 U32 encodeVrsRate(UVec2 rateXY)
 U32 encodeVrsRate(UVec2 rateXY)
@@ -807,6 +816,49 @@ Vec3 octahedronDecode(Vec2 f)
 	return normalize(n);
 	return normalize(n);
 }
 }
 
 
+/// Given the size of the octahedron texture and a texel that belongs to it, return the offsets relative to this texel that belong to the border.
+/// The texSize is without border and the texCoord as well.
+U32 octahedronBorder(IVec2 texSize, IVec2 texCoord, out IVec2 borderTexOffsets[3])
+{
+	U32 borderCount = 0;
+	if(all(texCoord == 0))
+	{
+		borderTexOffsets[borderCount++] = texSize;
+	}
+	else if(texCoord.x == 0 && texCoord.y == texSize.y - 1)
+	{
+		borderTexOffsets[borderCount++] = IVec2(texSize.x, -texSize.y);
+	}
+	else if(all(texCoord == texSize - 1))
+	{
+		borderTexOffsets[borderCount++] = -texSize;
+	}
+	else if(texCoord.x == texSize.x - 1 && texCoord.y == 0)
+	{
+		borderTexOffsets[borderCount++] = IVec2(-texSize.x, texSize.y);
+	}
+
+	if(texCoord.y == 0)
+	{
+		borderTexOffsets[borderCount++] = IVec2((texSize.x - 1) - 2 * texCoord.x, -1);
+	}
+	else if(texCoord.y == texSize.y - 1)
+	{
+		borderTexOffsets[borderCount++] = IVec2((texSize.x - 1) - 2 * texCoord.x, 1);
+	}
+
+	if(texCoord.x == 0)
+	{
+		borderTexOffsets[borderCount++] = IVec2(-1, (texSize.y - 1) - 2 * texCoord.y);
+	}
+	else if(texCoord.x == texSize.x - 1)
+	{
+		borderTexOffsets[borderCount++] = IVec2(1, (texSize.y - 1) - 2 * texCoord.y);
+	}
+
+	return borderCount;
+}
+
 /// Manual texture sampling of a 3D texture.
 /// Manual texture sampling of a 3D texture.
 template<typename T, U32 kComp>
 template<typename T, U32 kComp>
 vector<T, kComp> linearTextureSampling(Texture3D<Vec4> sam, Vec3 uv)
 vector<T, kComp> linearTextureSampling(Texture3D<Vec4> sam, Vec3 uv)

+ 3 - 1
AnKi/Shaders/GBufferGeneric.ankiprog

@@ -755,7 +755,9 @@ GBufferPixelOut main(
 	payload.m_worldNormal = normal;
 	payload.m_worldNormal = normal;
 
 
 	// Ray T
 	// Ray T
-	payload.m_rayT = RayTCurrent();
+	const Bool backfacing = HitKind() == HIT_KIND_TRIANGLE_FRONT_FACE;
+
+	payload.m_rayT = RayTCurrent() * (backfacing ? -1.0 : 1.0);
 }
 }
 #	endif
 #	endif
 
 

+ 2 - 2
AnKi/Shaders/Include/MiscRendererTypes.h

@@ -72,8 +72,8 @@ struct Sky
 
 
 struct Clipmap
 struct Clipmap
 {
 {
-	Vec3 m_probeCounts;
-	U32 m_padding1;
+	UVec3 m_probeCounts;
+	U32 m_probeCountsTotal;
 
 
 	Vec3 m_size;
 	Vec3 m_size;
 	F32 m_padding2;
 	F32 m_padding2;

+ 225 - 133
AnKi/Shaders/IndirectDiffuseClipmaps.ankiprog

@@ -5,9 +5,14 @@
 
 
 #pragma anki 16bit
 #pragma anki 16bit
 
 
-#pragma anki technique RtMaterialFetch rgen
-#pragma anki technique Test comp
-#pragma anki technique VisualizeProbes vert pixel
+#pragma anki mutator RAYS_PER_PROBE_PER_FRAME 32 64
+
+#pragma anki technique RtMaterialFetch rgen mutators
+#pragma anki technique PopulateCaches comp mutators RAYS_PER_PROBE_PER_FRAME
+#pragma anki technique Test comp mutators
+#pragma anki technique VisualizeProbes vert pixel mutators
+
+#define ANKI_ASSERTIONS_ENABLED 1
 
 
 #include <AnKi/Shaders/Include/GpuSceneTypes.h>
 #include <AnKi/Shaders/Include/GpuSceneTypes.h>
 #include <AnKi/Shaders/Functions.hlsl>
 #include <AnKi/Shaders/Functions.hlsl>
@@ -22,20 +27,13 @@
 #define CLIPMAP_VOLUME 1
 #define CLIPMAP_VOLUME 1
 #include <AnKi/Shaders/RtMaterialFetch.hlsl>
 #include <AnKi/Shaders/RtMaterialFetch.hlsl>
 
 
-Vec3 worldPosToVolumeUvw(Clipmap clipmap, Vec3 worldPos)
-{
-	const Vec3 uvw = frac(worldPos / clipmap.m_size);
-	return uvw;
-}
+constexpr Vec3 kIndirectDiffuseClipmapForwardOffset = Vec3(10.0, 5.0, 10.0); // In meters
 
 
-UVec3 worldPosToVolumeTexel(Clipmap clipmap, Vec3 worldPos)
+void computeClipmapBounds(Clipmap clipmap, Vec3 cameraPos, Vec3 lookDir, out Vec3 aabbMin, out Vec3 aabbMax)
 {
 {
-	const Vec3 uvw = worldPosToVolumeUvw(clipmap, worldPos);
-	return uvw * clipmap.m_probeCounts;
-}
+	const Vec3 offset = normalize(Vec3(lookDir.x, 0.0, lookDir.z)) * kIndirectDiffuseClipmapForwardOffset;
+	cameraPos += offset;
 
 
-void computeClipmapBounds(Clipmap clipmap, Vec3 cameraPos, out Vec3 aabbMin, out Vec3 aabbMax)
-{
 	const Vec3 halfSize = clipmap.m_size * 0.5;
 	const Vec3 halfSize = clipmap.m_size * 0.5;
 	const Vec3 probeSize = clipmap.m_size / clipmap.m_probeCounts;
 	const Vec3 probeSize = clipmap.m_size / clipmap.m_probeCounts;
 	const Vec3 roundedPos = round(cameraPos / probeSize) * probeSize;
 	const Vec3 roundedPos = round(cameraPos / probeSize) * probeSize;
@@ -43,17 +41,12 @@ void computeClipmapBounds(Clipmap clipmap, Vec3 cameraPos, out Vec3 aabbMin, out
 	aabbMax = roundedPos + halfSize;
 	aabbMax = roundedPos + halfSize;
 }
 }
 
 
-void computeClipmapBoundsConservative(Clipmap clipmap, Vec3 cameraPos, out Vec3 aabbMin, out Vec3 aabbMax)
+F32 computeClipmapFade(Clipmap clipmap, Vec3 cameraPos, Vec3 lookDir, Vec3 worldPos)
 {
 {
-	const Vec3 halfSize = clipmap.m_size * 0.5;
-	const Vec3 probeSize = clipmap.m_size / clipmap.m_probeCounts;
-	const Vec3 roundedPos = round(cameraPos / probeSize) * probeSize;
-	aabbMin = roundedPos - halfSize + probeSize * 0.5;
-	aabbMax = roundedPos + halfSize - probeSize * 0.5;
-}
+	const Vec3 offset = normalize(Vec3(lookDir.x, 0.0, lookDir.z)) * kIndirectDiffuseClipmapForwardOffset;
+
+	cameraPos += offset;
 
 
-F32 computeClipmapFade(Clipmap clipmap, Vec3 cameraPos, Vec3 worldPos)
-{
 	const Vec3 probeSize = clipmap.m_size / clipmap.m_probeCounts;
 	const Vec3 probeSize = clipmap.m_size / clipmap.m_probeCounts;
 	const Vec3 halfSize = clipmap.m_size * 0.5;
 	const Vec3 halfSize = clipmap.m_size * 0.5;
 	const Vec3 aabbMin = cameraPos - halfSize + probeSize;
 	const Vec3 aabbMin = cameraPos - halfSize + probeSize;
@@ -72,21 +65,38 @@ F32 computeClipmapFade(Clipmap clipmap, Vec3 cameraPos, Vec3 worldPos)
 	return fade;
 	return fade;
 }
 }
 
 
-U16 findClipmapOnPosition(Clipmap clipmaps[kIndirectDiffuseClipmapCount], Vec3 cameraPos, Vec3 worldPos, F32 randFactor)
+F32 computeClipmapFade2(Clipmap clipmap, Vec3 cameraPos, Vec3 lookDir, Vec3 worldPos)
 {
 {
-	F32 fade = computeClipmapFade(clipmaps[0], cameraPos, worldPos);
+	Vec3 aabbMin, aabbMax;
+	computeClipmapBounds(clipmap, cameraPos, lookDir, aabbMin, aabbMax);
+
+	if(all(worldPos < aabbMax) && all(worldPos > aabbMin))
+	{
+		return 1.0;
+	}
+	else
+	{
+		return 0.0;
+	}
+}
+
+U16 findClipmapOnPosition(Clipmap clipmaps[kIndirectDiffuseClipmapCount], Vec3 cameraPos, Vec3 lookDir, Vec3 worldPos, F32 randFactor)
+{
+	randFactor = pow2(randFactor);
+
+	F32 fade = computeClipmapFade(clipmaps[0], cameraPos, lookDir, worldPos);
 	if(fade > randFactor)
 	if(fade > randFactor)
 	{
 	{
 		return 0;
 		return 0;
 	}
 	}
 
 
-	fade = computeClipmapFade(clipmaps[1], cameraPos, worldPos);
+	fade = computeClipmapFade(clipmaps[1], cameraPos, lookDir, worldPos);
 	if(fade > randFactor)
 	if(fade > randFactor)
 	{
 	{
 		return 1;
 		return 1;
 	}
 	}
 
 
-	fade = computeClipmapFade(clipmaps[2], cameraPos, worldPos);
+	fade = computeClipmapFade(clipmaps[2], cameraPos, lookDir, worldPos);
 	if(fade > randFactor)
 	if(fade > randFactor)
 	{
 	{
 		return 2;
 		return 2;
@@ -95,130 +105,197 @@ U16 findClipmapOnPosition(Clipmap clipmaps[kIndirectDiffuseClipmapCount], Vec3 c
 	return 3;
 	return 3;
 }
 }
 
 
-SHL1<F16> readClipmap(Clipmap clipmaps[kIndirectDiffuseClipmapCount], Texture3D<Vec4> volumes[3 * kIndirectDiffuseClipmapCount],
-					  SamplerState linearAnyRepeatSampler, Vec3 cameraPos, Vec3 worldPos)
+Vec2 generateRandomUv(U32 sampleIdx, U32 sampleCount, U32 frame)
 {
 {
-	Vec3 clipmapAabbMin, clipmapAabbMax;
-	computeClipmapBoundsConservative(clipmaps[0], cameraPos, clipmapAabbMin, clipmapAabbMax);
-	if(all(worldPos > clipmapAabbMin) && all(worldPos < clipmapAabbMax))
-	{
-		const Vec3 uvw = worldPosToVolumeUvw(clipmaps[0], worldPos);
-		return loadSH<F16>(volumes[0], volumes[1], volumes[2], linearAnyRepeatSampler, uvw);
-	}
+	const UVec3 r = rand3DPCG16(UVec3(frame % 16u, frame % 4u, frame % 32u));
+	return hammersleyRandom16(sampleIdx, sampleCount, r);
+}
 
 
-	computeClipmapBoundsConservative(clipmaps[1], cameraPos, clipmapAabbMin, clipmapAabbMax);
-	if(all(worldPos > clipmapAabbMin) && all(worldPos < clipmapAabbMax))
-	{
-		const Vec3 uvw = worldPosToVolumeUvw(clipmaps[1], worldPos);
-		return loadSH<F16>(volumes[3], volumes[4], volumes[5], linearAnyRepeatSampler, uvw);
-	}
+HVec3 generateRandomPointInSphere(U32 sampleIdx, U32 sampleCount, U32 frame)
+{
+	return octahedronDecode(generateRandomUv(sampleIdx, sampleCount, frame));
+}
 
 
-	computeClipmapBoundsConservative(clipmaps[2], cameraPos, clipmapAabbMin, clipmapAabbMax);
-	if(all(worldPos > clipmapAabbMin) && all(worldPos < clipmapAabbMax))
-	{
-		const Vec3 uvw = worldPosToVolumeUvw(clipmaps[2], worldPos);
-		return loadSH<F16>(volumes[6], volumes[7], volumes[8], linearAnyRepeatSampler, uvw);
-	}
+// ===========================================================================
+// RayGen                                                                    =
+// ===========================================================================
+#if ANKI_RAY_GEN_SHADER
 
 
-	SHL1<F16> sh = (SHL1<F16>)0;
-	return sh;
-}
+struct Consts
+{
+	U32 m_clipmapIdx;
+	U32 m_raysPerProbeCount;
+	F32 m_padding1;
+	F32 m_padding2;
+};
+ANKI_FAST_CONSTANTS(Consts, g_consts)
 
 
-IrradianceDice<F16> readClipmap(Clipmap clipmaps[kIndirectDiffuseClipmapCount], Texture3D<Vec4> volumes[6 * kIndirectDiffuseClipmapCount],
-								SamplerState linearAnyRepeatSampler, Vec3 cameraPos, Vec3 worldPos)
+[Shader("raygeneration")] void main()
 {
 {
+	const Clipmap clipmap = g_globalRendererConstants.m_indirectDiffuseClipmaps[g_consts.m_clipmapIdx];
+
+	// Compute clipmap bounds
+	const Mat3x4 cameraTrf = g_globalRendererConstants.m_matrices.m_cameraTransform;
+	const Vec3 lookDir = -Vec3(cameraTrf.m_row0[2], cameraTrf.m_row1[2], cameraTrf.m_row2[2]);
 	Vec3 clipmapAabbMin, clipmapAabbMax;
 	Vec3 clipmapAabbMin, clipmapAabbMax;
-	computeClipmapBoundsConservative(clipmaps[0], cameraPos, clipmapAabbMin, clipmapAabbMax);
-	if(all(worldPos > clipmapAabbMin) && all(worldPos < clipmapAabbMax))
-	{
-		const Vec3 uvw = worldPosToVolumeUvw(clipmaps[0], worldPos);
-		return loadIrradianceDice<F16>(volumes, linearAnyRepeatSampler, uvw, 0);
-	}
+	computeClipmapBounds(clipmap, g_globalRendererConstants.m_cameraPosition, lookDir, clipmapAabbMin, clipmapAabbMax);
 
 
-	computeClipmapBoundsConservative(clipmaps[1], cameraPos, clipmapAabbMin, clipmapAabbMax);
-	if(all(worldPos > clipmapAabbMin) && all(worldPos < clipmapAabbMax))
+	// Compute probe info. Make sure you shoot coherent rays as much as possible by using the same direction on a specific wave
+	const U32 sampleIdx = DispatchRaysIndex().x / clipmap.m_probeCountsTotal;
+	const U32 probeIdx = DispatchRaysIndex().x % clipmap.m_probeCountsTotal;
+
+	UVec3 probe3dIdx;
+	unflatten3dArrayIndex(clipmap.m_probeCounts.z, clipmap.m_probeCounts.y, clipmap.m_probeCounts.x, probeIdx, probe3dIdx.z, probe3dIdx.y,
+						  probe3dIdx.x);
+
+	const Vec3 probeSize = clipmap.m_size / clipmap.m_probeCounts;
+	const Vec3 cellWorldPos = probe3dIdx * probeSize + probeSize * 0.5 + clipmapAabbMin;
+
+	// Trace
+	const HVec3 dir = generateRandomPointInSphere(sampleIdx, g_consts.m_raysPerProbeCount, g_globalRendererConstants.m_frame);
+	const F32 tMax = 1000.0; // TODO
+	constexpr U32 traceFlags = RAY_FLAG_FORCE_OPAQUE | RAY_FLAG_SKIP_PROCEDURAL_PRIMITIVES;
+	GBufferLight<F16> gbuffer = (GBufferLight<F16>)0;
+	F32 rayT = 0.0;
+	Bool backfacing = false;
+	const Bool hit = materialRayTrace<F16>(cellWorldPos, dir, 0.0, tMax, 1000.0, gbuffer, rayT, backfacing, traceFlags);
+
+	HVec3 radiance;
+	if(backfacing)
 	{
 	{
-		const Vec3 uvw = worldPosToVolumeUvw(clipmaps[1], worldPos);
-		return loadIrradianceDice<F16>(volumes, linearAnyRepeatSampler, uvw, 6);
+		radiance = HVec3(1.0, 0.0, 1.0);
 	}
 	}
-
-	computeClipmapBoundsConservative(clipmaps[2], cameraPos, clipmapAabbMin, clipmapAabbMax);
-	if(all(worldPos > clipmapAabbMin) && all(worldPos < clipmapAabbMax))
+	else
 	{
 	{
-		const Vec3 uvw = worldPosToVolumeUvw(clipmaps[2], worldPos);
-		return loadIrradianceDice<F16>(volumes, linearAnyRepeatSampler, uvw, 12);
+		const Vec3 hitPos = cellWorldPos + dir * (rayT - 0.01);
+		radiance = directLighting<F16>(gbuffer, hitPos, !hit, false, tMax, traceFlags | RAY_FLAG_ACCEPT_FIRST_HIT_AND_END_SEARCH);
 	}
 	}
 
 
-	IrradianceDice<F16> dice = (IrradianceDice<F16>)0;
-	return dice;
+	// Store result
+	g_lightResultTex[UVec2(probeIdx, sampleIdx)] = HVec4(radiance, 0.0);
 }
 }
+#endif // ANKI_RAY_GEN_SHADER
 
 
 // ===========================================================================
 // ===========================================================================
-// RayGen                                                                    =
+// PopulateCaches                                                            =
 // ===========================================================================
 // ===========================================================================
-#if ANKI_RAY_GEN_SHADER
+#if NOT_ZERO(ANKI_TECHNIQUE_PopulateCaches)
+Texture2D<Vec4> g_lightResultTex : register(t0);
+
+RWTexture3D<Vec4> g_radianceVolume : register(u0);
+
+ConstantBuffer<GlobalRendererConstants> g_globalRendererConstants : register(b0);
 
 
 struct Consts
 struct Consts
 {
 {
 	U32 m_clipmapIdx;
 	U32 m_clipmapIdx;
-	F32 m_padding0;
+	U32 m_radianceProbeSize; // Size without border
 	F32 m_padding1;
 	F32 m_padding1;
 	F32 m_padding2;
 	F32 m_padding2;
 };
 };
 ANKI_FAST_CONSTANTS(Consts, g_consts)
 ANKI_FAST_CONSTANTS(Consts, g_consts)
 
 
-[Shader("raygeneration")] void main()
+groupshared U32 g_octCoordValueSet[128]; // TODO
+
+[NumThreads(RAYS_PER_PROBE_PER_FRAME, 1, 1)] void main(U32 svGroupIndex : SV_GroupIndex, UVec3 svGroupId : SV_GroupID)
 {
 {
 	const Clipmap clipmap = g_globalRendererConstants.m_indirectDiffuseClipmaps[g_consts.m_clipmapIdx];
 	const Clipmap clipmap = g_globalRendererConstants.m_indirectDiffuseClipmaps[g_consts.m_clipmapIdx];
 
 
-	// Compute clipmap bounds
-	Vec3 clipmapAabbMin, clipmapAabbMax;
-	computeClipmapBounds(clipmap, g_globalRendererConstants.m_cameraPosition, clipmapAabbMin, clipmapAabbMax);
-
-	const Vec3 prevCameraPos = g_globalRendererConstants.m_previousMatrices.m_cameraTransform.getTranslationPart();
-	Vec3 prevClipmapAabbMin, prevClipmapAabbMax;
-	computeClipmapBounds(clipmap, prevCameraPos, prevClipmapAabbMin, prevClipmapAabbMax);
+	const U32 octPixelCount = g_consts.m_radianceProbeSize * g_consts.m_radianceProbeSize;
+	ANKI_ASSERT(octPixelCount <= 128);
 
 
-	// Compute probe info
-	const Vec3 probeSize = clipmap.m_size / clipmap.m_probeCounts;
-	const Vec3 cellWorldPos = DispatchRaysIndex().xyz * probeSize + probeSize * 0.5 + clipmapAabbMin;
+	// Zero groupshared
+	const U32 octPixelsPerThread = (octPixelCount + RAYS_PER_PROBE_PER_FRAME - 1) / RAYS_PER_PROBE_PER_FRAME;
+	for(U32 i = 0; i < octPixelsPerThread; ++i)
+	{
+		const U32 octCoordIdx = svGroupIndex * octPixelsPerThread + i;
+		if(octCoordIdx < octPixelCount)
+		{
+			g_octCoordValueSet[octCoordIdx] = 0;
+		}
+	}
 
 
-	const UVec3 probeTexelCoord = worldPosToVolumeTexel(clipmap, cellWorldPos);
-	ANKI_ASSERT(all(probeTexelCoord < clipmap.m_probeCounts));
+	GroupMemoryBarrierWithGroupSync();
 
 
-	// Integrate to build the SH
-	IrradianceDice<F16> dice = (IrradianceDice<F16>)0;
-	const U16 sampleCount = 32u;
+	const U32 sampleIdx = svGroupIndex;
+	const U32 probeIdx = svGroupId.z * clipmap.m_probeCounts.x * clipmap.m_probeCounts.y + svGroupId.y * clipmap.m_probeCounts.x + svGroupId.x;
 
 
-	for(U16 i = 0; i < sampleCount; ++i)
-	{
-		HVec3 dir = generateUniformPointOnSphere<F16>(i, sampleCount, g_globalRendererConstants.m_frame);
+	// Compute clipmap bounds
+	Mat3x4 cameraTrf = g_globalRendererConstants.m_matrices.m_cameraTransform;
+	Vec3 lookDir = -Vec3(cameraTrf.m_row0[2], cameraTrf.m_row1[2], cameraTrf.m_row2[2]);
+	Vec3 clipmapAabbMin, clipmapAabbMax;
+	computeClipmapBounds(clipmap, g_globalRendererConstants.m_cameraPosition, lookDir, clipmapAabbMin, clipmapAabbMax);
 
 
-		const F32 tMax = 1000.0; // TODO
+	// Compute previous frame clipmap bounds
+	cameraTrf = g_globalRendererConstants.m_previousMatrices.m_cameraTransform;
+	lookDir = -Vec3(cameraTrf.m_row0[2], cameraTrf.m_row1[2], cameraTrf.m_row2[2]);
+	Vec3 prevClipmapAabbMin, prevClipmapAabbMax;
+	computeClipmapBounds(clipmap, g_globalRendererConstants.m_previousMatrices.m_cameraTransform.getTranslationPart().xyz, lookDir,
+						 prevClipmapAabbMin, prevClipmapAabbMax);
 
 
-		constexpr U32 traceFlags = RAY_FLAG_FORCE_OPAQUE | RAY_FLAG_SKIP_PROCEDURAL_PRIMITIVES;
+	const Vec3 probeSize = clipmap.m_size / clipmap.m_probeCounts;
+	const Vec3 probeWorldPos = svGroupId * probeSize + probeSize * 0.5 + clipmapAabbMin;
 
 
-		GBufferLight<F16> gbuffer = (GBufferLight<F16>)0;
-		F32 rayT = 0.0;
-		const Bool hit = materialRayTrace<F16>(cellWorldPos, dir, 0.0, tMax, 1000.0, gbuffer, rayT, traceFlags);
+	UVec3 volumeTexCoord = frac(probeWorldPos.xzy / clipmap.m_size.xzy) * clipmap.m_probeCounts.xzy;
+	volumeTexCoord = min(volumeTexCoord, clipmap.m_probeCounts.xzy - 1u);
 
 
-		const Vec3 hitPos = cellWorldPos + dir * rayT;
-		const HVec3 radiance = directLighting<F16>(gbuffer, hitPos, !hit, false, tMax, traceFlags | RAY_FLAG_ACCEPT_FIRST_HIT_AND_END_SEARCH);
+	const HVec3 radiance = SRV_TEXTURE(g_lightResultTex, UVec2(probeIdx, sampleIdx));
 
 
-		dice = appendIrradianceDice(dice, dir, radiance, sampleCount);
-	}
+	const Vec2 octUv = generateRandomUv(sampleIdx, U32(RAYS_PER_PROBE_PER_FRAME), g_globalRendererConstants.m_frame);
+	const UVec2 octCoord = min(octUv * g_consts.m_radianceProbeSize, g_consts.m_radianceProbeSize - 1);
+	const U32 octCoordIdx = octCoord.y * g_consts.m_radianceProbeSize + octCoord.x;
+	ANKI_ASSERT(octCoordIdx < octPixelCount);
 
 
-	// Store the SH
-	const Bool blendWithHistory = all(cellWorldPos > prevClipmapAabbMin) && all(cellWorldPos < prevClipmapAabbMax);
-	if(blendWithHistory)
+	const Bool blendWithHistory = all(probeWorldPos > prevClipmapAabbMin) && all(probeWorldPos < prevClipmapAabbMax);
+	HVec3 avgRadiance = 0.0;
+	U32 iterationCount = 0;
+	do
 	{
 	{
-		const IrradianceDice<F16> historyDice = loadIrradianceDice<F16>(g_clipmapVolumes, probeTexelCoord);
-		dice = lerpIrradianceDice<F16>(historyDice, dice, 0.01);
-	}
+		U32 origValue;
+		InterlockedCompareExchange(g_octCoordValueSet[octCoordIdx], iterationCount, iterationCount + 1u, origValue);
+
+		if(origValue == iterationCount)
+		{
+			UVec3 actualVolumeTexCoord;
+			actualVolumeTexCoord.xy = octCoord + volumeTexCoord * (g_consts.m_radianceProbeSize + 2) + 1;
+			actualVolumeTexCoord.z = volumeTexCoord.z;
+
+			if(blendWithHistory)
+			{
+				const HVec3 prevValue = UAV_TEXTURE(g_radianceVolume, actualVolumeTexCoord).xyz;
+				avgRadiance = lerp(prevValue, radiance, 0.1);
+			}
+			else
+			{
+				avgRadiance = radiance;
+			}
+
+			UAV_TEXTURE(g_radianceVolume, actualVolumeTexCoord).xyz = avgRadiance;
+
+			iterationCount = kMaxU32;
+		}
+		else
+		{
+			++iterationCount;
+		}
+
+		AllMemoryBarrierWithGroupSync();
+	} while(iterationCount < kMaxU32);
+
+	// Set oct borders
+	IVec2 borders[3];
+	const U32 borderCount = octahedronBorder(g_consts.m_radianceProbeSize, octCoord, borders);
+	for(U32 i = 0; i < borderCount; ++i)
+	{
+		IVec3 actualVolumeTexCoord;
+		actualVolumeTexCoord.xy = octCoord + volumeTexCoord * (g_consts.m_radianceProbeSize + 2) + 1;
+		actualVolumeTexCoord.xy += borders[i];
+		actualVolumeTexCoord.z = volumeTexCoord.z;
 
 
-	storeIrradianceDice(dice, g_clipmapVolumes, probeTexelCoord);
+		UAV_TEXTURE(g_radianceVolume, actualVolumeTexCoord).xyz = avgRadiance;
+	}
 }
 }
-#endif // ANKI_RAY_GEN_SHADER
+#endif
 
 
 // ===========================================================================
 // ===========================================================================
 // Test                                                                      =
 // Test                                                                      =
@@ -229,8 +306,6 @@ Texture2D<Vec4> g_gbufferRt2 : register(t1);
 
 
 Texture2D<Vec4> g_blueNoiseTex : register(t2);
 Texture2D<Vec4> g_blueNoiseTex : register(t2);
 
 
-Texture3D<Vec4> g_clipmapVolumes[6 * kIndirectDiffuseClipmapCount] : register(t3);
-
 RWTexture2D<Vec4> g_outTex : register(u0);
 RWTexture2D<Vec4> g_outTex : register(u0);
 
 
 ConstantBuffer<GlobalRendererConstants> g_globalRendererConstants : register(b0);
 ConstantBuffer<GlobalRendererConstants> g_globalRendererConstants : register(b0);
@@ -263,9 +338,12 @@ SamplerState g_linearAnyRepeatSampler : register(s0);
 
 
 	F32 noise = noise3.x;
 	F32 noise = noise3.x;
 
 
-	const U16 clipmapIdx =
-		findClipmapOnPosition(g_globalRendererConstants.m_indirectDiffuseClipmaps, g_globalRendererConstants.m_cameraPosition, worldPos, noise);
-	/*if(clipmapIdx == 0)
+	const Mat3x4 cameraTrf = g_globalRendererConstants.m_matrices.m_cameraTransform;
+	const Vec3 lookDir = -Vec3(cameraTrf.m_row0[2], cameraTrf.m_row1[2], cameraTrf.m_row2[2]);
+
+	const U16 clipmapIdx = findClipmapOnPosition(g_globalRendererConstants.m_indirectDiffuseClipmaps, g_globalRendererConstants.m_cameraPosition,
+												 lookDir, worldPos, noise);
+	if(clipmapIdx == 0)
 	{
 	{
 		g_outTex[svDispatchThreadId] = Vec4(1, 0, 0, 0);
 		g_outTex[svDispatchThreadId] = Vec4(1, 0, 0, 0);
 	}
 	}
@@ -281,8 +359,10 @@ SamplerState g_linearAnyRepeatSampler : register(s0);
 	{
 	{
 		g_outTex[svDispatchThreadId] = Vec4(1, 0, 1, 0);
 		g_outTex[svDispatchThreadId] = Vec4(1, 0, 1, 0);
 	}
 	}
-	return;*/
 
 
+	return;
+
+#	if 0
 	if(clipmapIdx >= kIndirectDiffuseClipmapCount)
 	if(clipmapIdx >= kIndirectDiffuseClipmapCount)
 	{
 	{
 		g_outTex[svDispatchThreadId] = 0.0;
 		g_outTex[svDispatchThreadId] = 0.0;
@@ -295,6 +375,7 @@ SamplerState g_linearAnyRepeatSampler : register(s0);
 	const HVec3 irradiance = evaluateIrradianceDice<F16>(dice, normal);
 	const HVec3 irradiance = evaluateIrradianceDice<F16>(dice, normal);
 
 
 	g_outTex[svDispatchThreadId] = Vec4(irradiance, 0.0);
 	g_outTex[svDispatchThreadId] = Vec4(irradiance, 0.0);
+#	endif
 }
 }
 #endif
 #endif
 
 
@@ -303,18 +384,17 @@ SamplerState g_linearAnyRepeatSampler : register(s0);
 // ===========================================================================
 // ===========================================================================
 #if NOT_ZERO(ANKI_TECHNIQUE_VisualizeProbes)
 #if NOT_ZERO(ANKI_TECHNIQUE_VisualizeProbes)
 
 
+struct VertIn
+{
+	U32 m_svVertexId : SV_VertexID;
+	U32 m_svInstanceId : SV_InstanceID;
+};
+
 struct VertOut
 struct VertOut
 {
 {
 	Vec4 m_svPosition : SV_Position;
 	Vec4 m_svPosition : SV_Position;
 
 
 	Vec3 m_probeCenter : PROBE_CENTER;
 	Vec3 m_probeCenter : PROBE_CENTER;
-	UVec3 m_probeTexel : PROBE_TEXEL;
-};
-
-struct VertIn
-{
-	U32 m_svVertexId : SV_VertexID;
-	U32 m_svInstanceId : SV_InstanceID;
 };
 };
 
 
 struct FragOut
 struct FragOut
@@ -326,7 +406,7 @@ struct FragOut
 struct Consts
 struct Consts
 {
 {
 	U32 m_clipmapIdx;
 	U32 m_clipmapIdx;
-	U32 m_padding1;
+	U32 m_radianceProbeSize; // Size without border
 	U32 m_padding2;
 	U32 m_padding2;
 	U32 m_padding3;
 	U32 m_padding3;
 };
 };
@@ -334,7 +414,9 @@ ANKI_FAST_CONSTANTS(Consts, g_consts)
 
 
 ConstantBuffer<GlobalRendererConstants> g_globalRendererConstants : register(b0);
 ConstantBuffer<GlobalRendererConstants> g_globalRendererConstants : register(b0);
 
 
-Texture3D<Vec4> g_clipmapVolumes[6 * kIndirectDiffuseClipmapCount] : register(t0);
+Texture3D<Vec4> g_radianceVolume : register(t0);
+
+SamplerState g_linearAnyRepeatSampler : register(s0);
 
 
 constexpr F32 kSphereRadius = 0.05;
 constexpr F32 kSphereRadius = 0.05;
 
 
@@ -352,11 +434,14 @@ VertOut main(VertIn input)
 	const Vec3 camPos = g_globalRendererConstants.m_cameraPosition;
 	const Vec3 camPos = g_globalRendererConstants.m_cameraPosition;
 
 
 	UVec3 cellCoord;
 	UVec3 cellCoord;
-	unflatten3dArrayIndex(clipmap.m_probeCounts.x, clipmap.m_probeCounts.y, clipmap.m_probeCounts.z, input.m_svInstanceId, cellCoord.x, cellCoord.y,
-						  cellCoord.z);
+	unflatten3dArrayIndex(clipmap.m_probeCounts.z, clipmap.m_probeCounts.y, clipmap.m_probeCounts.x, input.m_svInstanceId, cellCoord.z, cellCoord.y,
+						  cellCoord.x);
+
+	const Mat3x4 cameraTrf = g_globalRendererConstants.m_matrices.m_cameraTransform;
+	const Vec3 lookDir = -Vec3(cameraTrf.m_row0[2], cameraTrf.m_row1[2], cameraTrf.m_row2[2]);
 
 
 	Vec3 clipmapAabbMin, clipmapAabbMax;
 	Vec3 clipmapAabbMin, clipmapAabbMax;
-	computeClipmapBounds(clipmap, camPos, clipmapAabbMin, clipmapAabbMax);
+	computeClipmapBounds(clipmap, camPos, lookDir, clipmapAabbMin, clipmapAabbMax);
 	const Vec3 probeSize = clipmap.m_size / clipmap.m_probeCounts;
 	const Vec3 probeSize = clipmap.m_size / clipmap.m_probeCounts;
 	const Vec3 cellWorldPos = cellCoord * probeSize + probeSize * 0.5 + clipmapAabbMin;
 	const Vec3 cellWorldPos = cellCoord * probeSize + probeSize * 0.5 + clipmapAabbMin;
 
 
@@ -370,9 +455,6 @@ VertOut main(VertIn input)
 	output.m_svPosition = mul(g_globalRendererConstants.m_matrices.m_viewProjectionJitter, Vec4(vertPos, 1.0));
 	output.m_svPosition = mul(g_globalRendererConstants.m_matrices.m_viewProjectionJitter, Vec4(vertPos, 1.0));
 	output.m_probeCenter = cellWorldPos;
 	output.m_probeCenter = cellWorldPos;
 
 
-	const UVec3 probeTexelCoord = worldPosToVolumeTexel(clipmap, cellWorldPos);
-	output.m_probeTexel = probeTexelCoord;
-
 	return output;
 	return output;
 }
 }
 #	endif // ANKI_VERTEX_SHADER
 #	endif // ANKI_VERTEX_SHADER
@@ -380,6 +462,8 @@ VertOut main(VertIn input)
 #	if ANKI_PIXEL_SHADER
 #	if ANKI_PIXEL_SHADER
 FragOut main(VertOut input)
 FragOut main(VertOut input)
 {
 {
+	const Clipmap clipmap = g_globalRendererConstants.m_indirectDiffuseClipmaps[g_consts.m_clipmapIdx];
+
 	FragOut output;
 	FragOut output;
 
 
 	// Compute the far point
 	// Compute the far point
@@ -405,10 +489,18 @@ FragOut main(VertOut input)
 
 
 	const Vec3 normal = normalize(collisionPoint - input.m_probeCenter);
 	const Vec3 normal = normalize(collisionPoint - input.m_probeCenter);
 
 
-	const IrradianceDice<F16> dice = loadIrradianceDice<F16>(g_clipmapVolumes, input.m_probeTexel, g_consts.m_clipmapIdx * 6);
-	const HVec3 irradiance = evaluateIrradianceDice<F16>(dice, normal);
+	Vec3 uvw = frac(input.m_probeCenter.xzy / clipmap.m_size.xzy);
+	const UVec3 texelCoord = uvw * clipmap.m_probeCounts.xzy;
+
+	uvw.xy = texelCoord.xy * (g_consts.m_radianceProbeSize + 2);
+	uvw.xy += octahedronEncode(normal) * g_consts.m_radianceProbeSize + 1.0;
+	uvw.xy /= clipmap.m_probeCounts.xz * (g_consts.m_radianceProbeSize + 2);
+
+	uvw.z = (texelCoord.z + 0.5) / clipmap.m_probeCounts.y;
+
+	Vec3 radiance = g_radianceVolume.SampleLevel(g_linearAnyRepeatSampler, uvw, 0.0).xyz;
 
 
-	output.m_color = Vec4(irradiance, 0.0);
+	output.m_color = Vec4(radiance, 0.0);
 	return output;
 	return output;
 }
 }
 #	endif // ANKI_PIXEL_SHADER
 #	endif // ANKI_PIXEL_SHADER

+ 3 - 0
AnKi/Shaders/Intellisense.hlsl

@@ -277,6 +277,9 @@ void InterlockedMax(T dest, T value);
 template<typename T>
 template<typename T>
 void InterlockedExchange(T dest, T value, T& originalValue);
 void InterlockedExchange(T dest, T value, T& originalValue);
 
 
+template<typename T>
+void InterlockedCompareExchange(T dest, T compareValue, T value, T& originalValue);
+
 template<typename T>
 template<typename T>
 void InterlockedOr(T dest, T value, T& originalValue);
 void InterlockedOr(T dest, T value, T& originalValue);
 
 

+ 2 - 1
AnKi/Shaders/Reflections.ankiprog

@@ -681,7 +681,8 @@ ANKI_FAST_CONSTANTS(Consts, g_consts)
 	// Trace
 	// Trace
 	GBufferLight<F16> gbuffer;
 	GBufferLight<F16> gbuffer;
 	F32 rayT;
 	F32 rayT;
-	const Bool hasHitSky = !materialRayTrace(worldPos, reflDir, max(tmin + kTMinBias, 0.05), g_consts.m_maxRayT, textureLod, gbuffer, rayT);
+	Bool unused;
+	const Bool hasHitSky = !materialRayTrace(worldPos, reflDir, max(tmin + kTMinBias, 0.05), g_consts.m_maxRayT, textureLod, gbuffer, rayT, unused);
 
 
 	const Vec3 hitPos = worldPos + reflDir * rayT;
 	const Vec3 hitPos = worldPos + reflDir * rayT;
 
 

+ 13 - 6
AnKi/Shaders/RtMaterialFetch.hlsl

@@ -37,6 +37,7 @@ Texture2D<Vec4> g_gbufferRt1 : register(t2, SPACE);
 Texture2D<Vec4> g_gbufferRt2 : register(t3, SPACE);
 Texture2D<Vec4> g_gbufferRt2 : register(t3, SPACE);
 #	endif
 #	endif
 Texture2D<Vec4> g_envMap : register(t4, SPACE);
 Texture2D<Vec4> g_envMap : register(t4, SPACE);
+
 #	if defined(CLIPMAP_VOLUME)
 #	if defined(CLIPMAP_VOLUME)
 StructuredBuffer<U32> g_dummyBuff1 : register(t5, SPACE);
 StructuredBuffer<U32> g_dummyBuff1 : register(t5, SPACE);
 StructuredBuffer<U32> g_dummyBuff2 : register(t6, SPACE);
 StructuredBuffer<U32> g_dummyBuff2 : register(t6, SPACE);
@@ -44,13 +45,17 @@ StructuredBuffer<U32> g_dummyBuff2 : register(t6, SPACE);
 StructuredBuffer<GpuSceneGlobalIlluminationProbe> g_giProbes : register(t5, SPACE);
 StructuredBuffer<GpuSceneGlobalIlluminationProbe> g_giProbes : register(t5, SPACE);
 StructuredBuffer<PixelFailedSsr> g_pixelsFailedSsr : register(t6, SPACE);
 StructuredBuffer<PixelFailedSsr> g_pixelsFailedSsr : register(t6, SPACE);
 #	endif
 #	endif
+
 Texture2D<Vec4> g_shadowAtlasTex : register(t7, SPACE);
 Texture2D<Vec4> g_shadowAtlasTex : register(t7, SPACE);
 
 
 // UAVs
 // UAVs
-RWTexture3D<Vec4> g_clipmapVolumes[6u] : register(u0, SPACE);
-
-RWTexture2D<Vec4> g_colorAndPdfTex : register(u7, SPACE);
-RWTexture2D<Vec4> g_hitPosAndDepthTex : register(u8, SPACE);
+#	if defined(CLIPMAP_VOLUME)
+RWTexture2D<Vec4> g_lightResultTex : register(u0, SPACE);
+RWTexture2D<Vec4> g_dummyUav : register(u1, SPACE);
+#	else
+RWTexture2D<Vec4> g_colorAndPdfTex : register(u0, SPACE);
+RWTexture2D<Vec4> g_hitPosAndDepthTex : register(u1, SPACE);
+#	endif
 
 
 // Samplers
 // Samplers
 SamplerState g_linearClampAnySampler : register(s0, SPACE);
 SamplerState g_linearClampAnySampler : register(s0, SPACE);
@@ -65,7 +70,7 @@ struct GBufferLight
 };
 };
 
 
 template<typename T>
 template<typename T>
-Bool materialRayTrace(Vec3 rayOrigin, Vec3 rayDir, F32 tMin, F32 tMax, T textureLod, out GBufferLight<T> gbuffer, out F32 rayT,
+Bool materialRayTrace(Vec3 rayOrigin, Vec3 rayDir, F32 tMin, F32 tMax, T textureLod, out GBufferLight<T> gbuffer, out F32 rayT, out Bool backfacing,
 					  U32 traceFlags = RAY_FLAG_FORCE_OPAQUE)
 					  U32 traceFlags = RAY_FLAG_FORCE_OPAQUE)
 {
 {
 	RtMaterialFetchRayPayload payload;
 	RtMaterialFetchRayPayload payload;
@@ -82,7 +87,9 @@ Bool materialRayTrace(Vec3 rayOrigin, Vec3 rayDir, F32 tMin, F32 tMax, T texture
 	TraceRay(g_tlas, traceFlags, cullMask, sbtRecordOffset, sbtRecordStride, missIndex, ray, payload);
 	TraceRay(g_tlas, traceFlags, cullMask, sbtRecordOffset, sbtRecordStride, missIndex, ray, payload);
 
 
 	rayT = payload.m_rayT;
 	rayT = payload.m_rayT;
-	const Bool hasHitSky = payload.m_rayT < 0.0;
+	const Bool hasHitSky = rayT == kMaxF32;
+	backfacing = rayT < 0.0;
+	rayT = abs(rayT);
 	if(hasHitSky)
 	if(hasHitSky)
 	{
 	{
 		gbuffer = (GBufferLight<T>)0;
 		gbuffer = (GBufferLight<T>)0;

+ 1 - 1
AnKi/Shaders/RtMaterialFetchMiss.ankiprog

@@ -14,5 +14,5 @@
 	payload.m_diffuseColor = 0.0;
 	payload.m_diffuseColor = 0.0;
 	payload.m_worldNormal = 0.0;
 	payload.m_worldNormal = 0.0;
 	payload.m_emission = 0.0;
 	payload.m_emission = 0.0;
-	payload.m_rayT = -1.0;
+	payload.m_rayT = kMaxF32;
 }
 }

+ 2 - 1
Samples/PhysicsPlayground/Main.cpp

@@ -197,7 +197,8 @@ Error MyApp::userMainLoop(Bool& quit, [[maybe_unused]] Second elapsedTime)
 
 
 	if(Input::getSingleton().getKey(KeyCode::kH) == 1)
 	if(Input::getSingleton().getKey(KeyCode::kH) == 1)
 	{
 	{
-		renderer.setCurrentDebugRenderTarget((renderer.getCurrentDebugRenderTarget() == "RtShadows") ? "" : "RtShadows");
+		renderer.setCurrentDebugRenderTarget(
+			(renderer.getCurrentDebugRenderTarget() == "IndirectDiffuseClipmapsTest") ? "" : "IndirectDiffuseClipmapsTest");
 	}
 	}
 
 
 	if(Input::getSingleton().getKey(KeyCode::kP) == 1)
 	if(Input::getSingleton().getKey(KeyCode::kP) == 1)