Browse Source

Add ray budget in IDC

Panagiotis Christopoulos Charitos 3 months ago
parent
commit
159d28ec55

+ 2 - 1
AnKi/GpuMemory/GpuVisibleTransientMemoryPool.h

@@ -60,7 +60,8 @@ private:
 								   | BufferUsageBit::kIndirectDispatchRays | BufferUsageBit::kShaderBindingTable;
 								   | BufferUsageBit::kIndirectDispatchRays | BufferUsageBit::kShaderBindingTable;
 		if(GrManager::getSingleton().getDeviceCapabilities().m_rayTracingEnabled)
 		if(GrManager::getSingleton().getDeviceCapabilities().m_rayTracingEnabled)
 		{
 		{
-			buffUsage |= (BufferUsageBit::kAccelerationStructureBuildScratch | BufferUsageBit::kAccelerationStructureBuild);
+			buffUsage |= (BufferUsageBit::kAccelerationStructureBuildScratch | BufferUsageBit::kAccelerationStructureBuild
+						  | BufferUsageBit::kAccelerationStructure);
 		}
 		}
 		m_pool.init(10_MB, 2.0, 0, buffUsage, BufferMapAccessBit::kNone, true, "GpuVisibleTransientMemoryPool");
 		m_pool.init(10_MB, 2.0, 0, buffUsage, BufferMapAccessBit::kNone, true, "GpuVisibleTransientMemoryPool");
 	}
 	}

+ 1 - 1
AnKi/Math/Functions.h

@@ -79,7 +79,7 @@ inline T sqrt(const T x) requires(std::is_integral<T>::value)
 }
 }
 
 
 template<typename T>
 template<typename T>
-inline T square(const T x)
+constexpr inline T square(const T x) requires(std::is_floating_point_v<T> || std::is_integral_v<T>)
 {
 {
 	return x * x;
 	return x * x;
 }
 }

+ 203 - 66
AnKi/Renderer/IndirectDiffuseClipmaps.cpp

@@ -18,6 +18,8 @@
 
 
 namespace anki {
 namespace anki {
 
 
+ANKI_SVAR(IdcRays, StatCategory::kRenderer, "IDC ray count", StatFlag::kZeroEveryFrame)
+
 class ProbeRange
 class ProbeRange
 {
 {
 public:
 public:
@@ -27,14 +29,15 @@ public:
 
 
 /// Given the clipmap's position of this and the previous frame it splits the clipmap into regions that contain new probes (thus they need a full
 /// Given the clipmap's position of this and the previous frame it splits the clipmap into regions that contain new probes (thus they need a full
 /// update) or regions of probes that need a less frequent update.
 /// update) or regions of probes that need a less frequent update.
-static void findClipmapInUpdateRanges(Vec3 newClipmapMin, Vec3 oldClipmapMin, UVec3 probeCountsi, Array<ProbeRange, 3>& fullUpdateProbeRanges,
-									  U32& fullUpdateProbeRangeCount, ProbeRange& partialUpdateProbeRange)
+static void findClipmapInUpdateRanges(Vec3 newClipmapMin, Vec3 oldClipmapMin, Vec3 probeSize, UVec3 probeCountsu,
+									  Array<ProbeRange, 3>& fullUpdateProbeRanges, U32& fullUpdateProbeRangeCount,
+									  ProbeRange& partialUpdateProbeRange)
 {
 {
 	fullUpdateProbeRangeCount = 0;
 	fullUpdateProbeRangeCount = 0;
 
 
-	const IVec3 probeCounts(probeCountsi);
+	const IVec3 probeCounts(probeCountsu);
 
 
-	const IVec3 delta = IVec3(newClipmapMin - oldClipmapMin) / probeCounts;
+	const IVec3 delta = IVec3((newClipmapMin - oldClipmapMin) / probeSize);
 	const IVec3 absDelta = delta.abs();
 	const IVec3 absDelta = delta.abs();
 
 
 	if(absDelta.x() >= probeCounts.x() || absDelta.y() >= probeCounts.y() || absDelta.z() >= probeCounts.z())
 	if(absDelta.x() >= probeCounts.x() || absDelta.y() >= probeCounts.y() || absDelta.z() >= probeCounts.z())
@@ -42,6 +45,7 @@ static void findClipmapInUpdateRanges(Vec3 newClipmapMin, Vec3 oldClipmapMin, UV
 		// No overlap between the old and new clipmap positions, full update
 		// No overlap between the old and new clipmap positions, full update
 
 
 		fullUpdateProbeRanges[fullUpdateProbeRangeCount++] = {IVec3(0), probeCounts};
 		fullUpdateProbeRanges[fullUpdateProbeRangeCount++] = {IVec3(0), probeCounts};
+		partialUpdateProbeRange = {IVec3(0), IVec3(0)};
 	}
 	}
 	else
 	else
 	{
 	{
@@ -89,7 +93,7 @@ static void findClipmapInUpdateRanges(Vec3 newClipmapMin, Vec3 oldClipmapMin, UV
 			fullUpdateProbeRangeBegin = partialUpdateProbeRangeBegin;
 			fullUpdateProbeRangeBegin = partialUpdateProbeRangeBegin;
 			fullUpdateProbeRangeEnd = IVec3(partialUpdateProbeRangeEnd.x(), -delta.y(), partialUpdateProbeRangeEnd.z());
 			fullUpdateProbeRangeEnd = IVec3(partialUpdateProbeRangeEnd.x(), -delta.y(), partialUpdateProbeRangeEnd.z());
 
 
-			partialUpdateProbeRangeEnd.y() += -delta.y();
+			partialUpdateProbeRangeBegin.y() += -delta.y();
 		}
 		}
 
 
 		if(delta.y() != 0)
 		if(delta.y() != 0)
@@ -113,7 +117,7 @@ static void findClipmapInUpdateRanges(Vec3 newClipmapMin, Vec3 oldClipmapMin, UV
 			fullUpdateProbeRangeBegin = partialUpdateProbeRangeBegin;
 			fullUpdateProbeRangeBegin = partialUpdateProbeRangeBegin;
 			fullUpdateProbeRangeEnd = IVec3(partialUpdateProbeRangeEnd.x(), partialUpdateProbeRangeEnd.y(), -delta.z());
 			fullUpdateProbeRangeEnd = IVec3(partialUpdateProbeRangeEnd.x(), partialUpdateProbeRangeEnd.y(), -delta.z());
 
 
-			partialUpdateProbeRangeEnd.z() += -delta.z();
+			partialUpdateProbeRangeBegin.z() += -delta.z();
 		}
 		}
 
 
 		if(delta.z() != 0)
 		if(delta.z() != 0)
@@ -124,14 +128,14 @@ static void findClipmapInUpdateRanges(Vec3 newClipmapMin, Vec3 oldClipmapMin, UV
 		partialUpdateProbeRange = {partialUpdateProbeRangeBegin, partialUpdateProbeRangeEnd};
 		partialUpdateProbeRange = {partialUpdateProbeRangeBegin, partialUpdateProbeRangeEnd};
 
 
 		// Validation
 		// Validation
-		[[maybe_unused]] IVec3 totalProbeCount(0);
+		[[maybe_unused]] I32 totalProbeCount = 0;
 		for(U32 i = 0; i < fullUpdateProbeRangeCount; ++i)
 		for(U32 i = 0; i < fullUpdateProbeRangeCount; ++i)
 		{
 		{
 			const IVec3 end = fullUpdateProbeRanges[i].m_end;
 			const IVec3 end = fullUpdateProbeRanges[i].m_end;
 			const IVec3 begin = fullUpdateProbeRanges[i].m_begin;
 			const IVec3 begin = fullUpdateProbeRanges[i].m_begin;
 			const IVec3 diff = end - begin;
 			const IVec3 diff = end - begin;
 			ANKI_ASSERT(diff.x() * diff.y() * diff.z() > 0);
 			ANKI_ASSERT(diff.x() * diff.y() * diff.z() > 0);
-			totalProbeCount += diff;
+			totalProbeCount += diff.x() * diff.y() * diff.z();
 		}
 		}
 
 
 		{
 		{
@@ -139,9 +143,9 @@ static void findClipmapInUpdateRanges(Vec3 newClipmapMin, Vec3 oldClipmapMin, UV
 			const IVec3 begin = partialUpdateProbeRange.m_begin;
 			const IVec3 begin = partialUpdateProbeRange.m_begin;
 			const IVec3 diff = end - begin;
 			const IVec3 diff = end - begin;
 			ANKI_ASSERT(diff.x() * diff.y() * diff.z() > 0);
 			ANKI_ASSERT(diff.x() * diff.y() * diff.z() > 0);
-			totalProbeCount += diff;
+			totalProbeCount += diff.x() * diff.y() * diff.z();
 		}
 		}
-		ANKI_ASSERT(totalProbeCount == probeCounts);
+		ANKI_ASSERT(totalProbeCount == probeCounts.x() * probeCounts.y() * probeCounts.z());
 	}
 	}
 }
 }
 
 
@@ -205,9 +209,10 @@ Error IndirectDiffuseClipmaps::init()
 	}
 	}
 
 
 	// Create the RT result texture
 	// Create the RT result texture
-	const U32 raysPerProbePerFrame = square<U32>(g_cvarRenderIdcRadianceOctMapSize);
-	m_rtResultRtDesc = getRenderer().create2DRenderTargetDescription(m_consts.m_totalProbeCount, raysPerProbePerFrame * kIndirectDiffuseClipmapCount,
-																	 Format::kR16G16B16A16_Sfloat, "IndirectDiffuseClipmap: RT result");
+	const U32 texelsPerProbe = square<U32>(g_cvarRenderIdcRadianceOctMapSize);
+	m_rtResultRtDesc =
+		getRenderer().create2DRenderTargetDescription(m_consts.m_totalProbeCount, texelsPerProbe * g_cvarRenderIdcRayCountPerTexelOfNewProbe,
+													  Format::kR16G16B16A16_Sfloat, "IndirectDiffuseClipmap: RT result");
 	m_rtResultRtDesc.bake();
 	m_rtResultRtDesc.bake();
 
 
 	for(U32 clipmap = 0; clipmap < kIndirectDiffuseClipmapCount; ++clipmap)
 	for(U32 clipmap = 0; clipmap < kIndirectDiffuseClipmapCount; ++clipmap)
@@ -321,6 +326,9 @@ Error IndirectDiffuseClipmaps::init()
 		m_consts.m_textures[i].m_distanceMomentsOctMapSize = (m_distanceMomentsVolumes[i]->getWidth() / m_consts.m_probeCounts.x()) - 2;
 		m_consts.m_textures[i].m_distanceMomentsOctMapSize = (m_distanceMomentsVolumes[i]->getWidth() / m_consts.m_probeCounts.x()) - 2;
 		m_consts.m_textures[i].m_irradianceOctMapSize = (m_irradianceVolumes[i]->getWidth() / m_consts.m_probeCounts.x()) - 2;
 		m_consts.m_textures[i].m_irradianceOctMapSize = (m_irradianceVolumes[i]->getWidth() / m_consts.m_probeCounts.x()) - 2;
 		m_consts.m_textures[i].m_radianceOctMapSize = (m_radianceVolumes[i]->getWidth() / m_consts.m_probeCounts.x()) - 2;
 		m_consts.m_textures[i].m_radianceOctMapSize = (m_radianceVolumes[i]->getWidth() / m_consts.m_probeCounts.x()) - 2;
+
+		m_consts.m_previousFrameAabbMins[i] = 100000.0f * m_consts.m_sizes[i] / Vec4(Vec3(m_consts.m_probeCounts), 1.0f);
+		m_consts.m_aabbMins[i] = m_consts.m_previousFrameAabbMins[i];
 	}
 	}
 
 
 	return Error::kNone;
 	return Error::kNone;
@@ -395,29 +403,78 @@ void IndirectDiffuseClipmaps::populateRenderGraph(RenderingContext& ctx)
 	buildShaderBindingTablePass("IndirectDiffuseClipmaps: Build SBT", m_rtLibraryGrProg.get(), m_rayGenShaderGroupIndices[1], m_missShaderGroupIdx,
 	buildShaderBindingTablePass("IndirectDiffuseClipmaps: Build SBT", m_rtLibraryGrProg.get(), m_rayGenShaderGroupIndices[1], m_missShaderGroupIdx,
 								m_sbtRecordSize, rgraph, sbtHandle, sbtBuffer);
 								m_sbtRecordSize, rgraph, sbtHandle, sbtBuffer);
 
 
-	// Do ray tracing around the probes
+	for(U32 clipmap = 0; clipmap < kIndirectDiffuseClipmapCount; ++clipmap)
 	{
 	{
-		NonGraphicsRenderPass& pass = rgraph.newNonGraphicsRenderPass("IndirectDiffuseClipmaps: RT");
+		// Compute probe ranges and ray budgets and stuff
+		Array<ProbeRange, 3> fullUpdateRanges;
+		U32 fullUpdateRangeCount = 0;
+		ProbeRange partialUpdateRange;
+		const Vec3 probeSize = m_consts.m_sizes[clipmap].xyz() / Vec3(m_consts.m_probeCounts);
+		findClipmapInUpdateRanges(m_consts.m_aabbMins[clipmap].xyz(), m_consts.m_previousFrameAabbMins[clipmap].xyz(), probeSize,
+								  UVec3(m_consts.m_probeCounts), fullUpdateRanges, fullUpdateRangeCount, partialUpdateRange);
+
+		U32 fullUpdateRayCount = 0;
+		for(U32 i = 0; i < fullUpdateRangeCount; ++i)
+		{
+			const UVec3 counts = UVec3(fullUpdateRanges[i].m_end - fullUpdateRanges[i].m_begin);
+			const U32 count = counts.x() * counts.y() * counts.z();
+			fullUpdateRayCount += square<U32>(g_cvarRenderIdcRadianceOctMapSize) * g_cvarRenderIdcRayCountPerTexelOfNewProbe * count;
+		}
 
 
-		pass.newTextureDependency(rtResultHandle, TextureUsageBit::kUavCompute);
-		pass.newBufferDependency(sbtHandle, BufferUsageBit::kShaderBindingTable);
-		setRgenSpace2Dependencies(pass);
+		const U32 remainingRayCount = (g_cvarRenderIdcProbeRayBudget / kIndirectDiffuseClipmapCount > fullUpdateRayCount)
+										  ? g_cvarRenderIdcProbeRayBudget / kIndirectDiffuseClipmapCount - fullUpdateRayCount
+										  : 0;
 
 
-		for(U32 clipmap = 0; clipmap < kIndirectDiffuseClipmapCount; ++clipmap)
+		const UVec3 partialUpdateProbeCounts = UVec3(partialUpdateRange.m_end - partialUpdateRange.m_begin);
+		U32 partialUpdateProbeCount = remainingRayCount / square<U32>(g_cvarRenderIdcRadianceOctMapSize);
+		partialUpdateProbeCount =
+			min(partialUpdateProbeCount, partialUpdateProbeCounts.x() * partialUpdateProbeCounts.y() * partialUpdateProbeCounts.z());
+
+		g_svarIdcRays.increment(fullUpdateRayCount + partialUpdateProbeCount * square<U32>(g_cvarRenderIdcRadianceOctMapSize));
+
+		struct ClipmapRegion
 		{
 		{
-			pass.newTextureDependency(irradianceVolumes[clipmap], TextureUsageBit::kSrvCompute);
-		}
+			UVec3 m_probesBegin;
+			U32 m_partialUpdate;
 
 
-		pass.setWork([this, rtResultHandle, &ctx, sbtBuffer](RenderPassWorkContext& rgraphCtx) {
-			CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
+			UVec3 m_probeCounts;
+			U32 m_probeCount;
+		};
 
 
-			cmdb.bindShaderProgram(m_rtLibraryGrProg.get());
+		struct ProbeUpdateConsts
+		{
+			U32 m_clipmapIdx;
+			U32 m_radianceOctMapSize; // Have it here as well as well as a mutator. Can't use the mutator cause it will create may raygen variants
+			U32 m_rayCountPerTexel; // Ray count per oct map texel
+			U32 m_maxProbesToUpdate;
 
 
-			// More globals
-			cmdb.bindSampler(ANKI_MATERIAL_REGISTER_TILINEAR_REPEAT_SAMPLER, 0, getRenderer().getSamplers().m_trilinearRepeat.get());
-			cmdb.bindSrv(ANKI_MATERIAL_REGISTER_GPU_SCENE, 0, GpuSceneBuffer::getSingleton().getBufferView());
-			cmdb.bindSrv(ANKI_MATERIAL_REGISTER_MESH_LODS, 0, GpuSceneArrays::MeshLod::getSingleton().getBufferView());
-			cmdb.bindSrv(ANKI_MATERIAL_REGISTER_TRANSFORMS, 0, GpuSceneArrays::Transform::getSingleton().getBufferView());
+			ClipmapRegion m_clipmapRegion;
+		};
+
+		// Do ray tracing around the probes
+		{
+			NonGraphicsRenderPass& pass = rgraph.newNonGraphicsRenderPass(generateTempPassName("IndirectDiffuseClipmaps: RT (clipmap %u)", clipmap));
+
+			pass.newTextureDependency(rtResultHandle, TextureUsageBit::kUavCompute);
+			pass.newBufferDependency(sbtHandle, BufferUsageBit::kShaderBindingTable);
+			setRgenSpace2Dependencies(pass);
+
+			for(U32 clipmap = 0; clipmap < kIndirectDiffuseClipmapCount; ++clipmap)
+			{
+				pass.newTextureDependency(irradianceVolumes[clipmap], TextureUsageBit::kSrvCompute);
+			}
+
+			pass.setWork([this, rtResultHandle, &ctx, sbtBuffer, fullUpdateRangeCount, clipmap, fullUpdateRanges, partialUpdateRange,
+						  partialUpdateProbeCount](RenderPassWorkContext& rgraphCtx) {
+				CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
+
+				cmdb.bindShaderProgram(m_rtLibraryGrProg.get());
+
+				// More globals
+				cmdb.bindSampler(ANKI_MATERIAL_REGISTER_TILINEAR_REPEAT_SAMPLER, 0, getRenderer().getSamplers().m_trilinearRepeat.get());
+				cmdb.bindSrv(ANKI_MATERIAL_REGISTER_GPU_SCENE, 0, GpuSceneBuffer::getSingleton().getBufferView());
+				cmdb.bindSrv(ANKI_MATERIAL_REGISTER_MESH_LODS, 0, GpuSceneArrays::MeshLod::getSingleton().getBufferView());
+				cmdb.bindSrv(ANKI_MATERIAL_REGISTER_TRANSFORMS, 0, GpuSceneArrays::Transform::getSingleton().getBufferView());
 
 
 #define ANKI_UNIFIED_GEOM_FORMAT(fmt, shaderType, reg) \
 #define ANKI_UNIFIED_GEOM_FORMAT(fmt, shaderType, reg) \
 	cmdb.bindSrv( \
 	cmdb.bindSrv( \
@@ -427,58 +484,135 @@ void IndirectDiffuseClipmaps::populateRenderGraph(RenderingContext& ctx)
 		Format::k##fmt);
 		Format::k##fmt);
 #include <AnKi/Shaders/Include/UnifiedGeometryTypes.def.h>
 #include <AnKi/Shaders/Include/UnifiedGeometryTypes.def.h>
 
 
-			bindRgenSpace2Resources(ctx, rgraphCtx);
+				bindRgenSpace2Resources(ctx, rgraphCtx);
 
 
-			rgraphCtx.bindUav(0, 2, rtResultHandle);
+				rgraphCtx.bindUav(0, 2, rtResultHandle);
 
 
-			const U32 raysPerProbePerFrame = square<U32>(g_cvarRenderIdcRadianceOctMapSize);
+				ProbeUpdateConsts consts;
+				consts.m_clipmapIdx = clipmap;
+				consts.m_radianceOctMapSize = g_cvarRenderIdcRadianceOctMapSize;
 
 
-			for(U32 clipmap = 0; clipmap < kIndirectDiffuseClipmapCount; ++clipmap)
-			{
-				const UVec4 consts(clipmap, g_cvarRenderIdcRadianceOctMapSize, 0, 0);
-				cmdb.setFastConstants(&consts, sizeof(consts));
+				// Do full updates
+				for(U32 i = 0; i < fullUpdateRangeCount; ++i)
+				{
+					cmdb.pushDebugMarker("Full update", Vec3(0.0f, 1.0f, 1.0f));
 
 
-				cmdb.dispatchRays(sbtBuffer, m_sbtRecordSize, GpuSceneArrays::RenderableBoundingVolumeRt::getSingleton().getElementCount(), 1,
-								  m_consts.m_totalProbeCount * raysPerProbePerFrame, 1, 1);
-			}
-		});
-	}
+					consts.m_rayCountPerTexel = g_cvarRenderIdcRayCountPerTexelOfNewProbe;
+					consts.m_clipmapRegion.m_probesBegin = UVec3(fullUpdateRanges[i].m_begin);
+					consts.m_clipmapRegion.m_probeCounts = UVec3(fullUpdateRanges[i].m_end - fullUpdateRanges[i].m_begin);
+					consts.m_clipmapRegion.m_probeCount = consts.m_clipmapRegion.m_probeCounts.x() * consts.m_clipmapRegion.m_probeCounts.y()
+														  * consts.m_clipmapRegion.m_probeCounts.z();
+					consts.m_maxProbesToUpdate = consts.m_clipmapRegion.m_probeCount;
+					consts.m_clipmapRegion.m_partialUpdate = 0;
 
 
-	// Populate caches
-	{
-		NonGraphicsRenderPass& pass = rgraph.newNonGraphicsRenderPass("IndirectDiffuseClipmaps: Populate caches");
+					cmdb.setFastConstants(&consts, sizeof(consts));
 
 
-		pass.newTextureDependency(rtResultHandle, TextureUsageBit::kSrvCompute);
-		for(U32 clipmap = 0; clipmap < kIndirectDiffuseClipmapCount; ++clipmap)
+					const U32 threadCount =
+						consts.m_clipmapRegion.m_probeCount * square<U32>(g_cvarRenderIdcRadianceOctMapSize) * consts.m_rayCountPerTexel;
+					cmdb.dispatchRays(sbtBuffer, m_sbtRecordSize, GpuSceneArrays::RenderableBoundingVolumeRt::getSingleton().getElementCount(), 1,
+									  threadCount, 1, 1);
+
+					cmdb.popDebugMarker();
+				}
+
+				// Do partial updates
+				if(partialUpdateProbeCount)
+				{
+					cmdb.pushDebugMarker("Partial update", Vec3(0.0f, 1.0f, 1.0f));
+
+					consts.m_rayCountPerTexel = 1;
+					consts.m_clipmapRegion.m_probesBegin = UVec3(partialUpdateRange.m_begin);
+					consts.m_clipmapRegion.m_probeCounts = UVec3(partialUpdateRange.m_end - partialUpdateRange.m_begin);
+					consts.m_clipmapRegion.m_probeCount = consts.m_clipmapRegion.m_probeCounts.x() * consts.m_clipmapRegion.m_probeCounts.y()
+														  * consts.m_clipmapRegion.m_probeCounts.z();
+					consts.m_maxProbesToUpdate = partialUpdateProbeCount;
+					consts.m_clipmapRegion.m_partialUpdate = 1;
+
+					cmdb.setFastConstants(&consts, sizeof(consts));
+
+					const U32 threadCount = partialUpdateProbeCount * square<U32>(g_cvarRenderIdcRadianceOctMapSize);
+					cmdb.dispatchRays(sbtBuffer, m_sbtRecordSize, GpuSceneArrays::RenderableBoundingVolumeRt::getSingleton().getElementCount(), 1,
+									  threadCount, 1, 1);
+
+					cmdb.popDebugMarker();
+				}
+			});
+		}
+
+		// Populate caches
 		{
 		{
+			NonGraphicsRenderPass& pass =
+				rgraph.newNonGraphicsRenderPass(generateTempPassName("IndirectDiffuseClipmaps: Populate caches (clipmap %u)", clipmap));
+
+			pass.newTextureDependency(rtResultHandle, TextureUsageBit::kSrvCompute);
 			pass.newTextureDependency(radianceVolumes[clipmap], TextureUsageBit::kUavCompute);
 			pass.newTextureDependency(radianceVolumes[clipmap], TextureUsageBit::kUavCompute);
 			pass.newTextureDependency(probeValidityVolumes[clipmap], TextureUsageBit::kUavCompute);
 			pass.newTextureDependency(probeValidityVolumes[clipmap], TextureUsageBit::kUavCompute);
 			pass.newTextureDependency(distanceMomentsVolumes[clipmap], TextureUsageBit::kUavCompute);
 			pass.newTextureDependency(distanceMomentsVolumes[clipmap], TextureUsageBit::kUavCompute);
-		}
 
 
-		pass.setWork([this, &ctx, rtResultHandle, radianceVolumes, probeValidityVolumes, distanceMomentsVolumes](RenderPassWorkContext& rgraphCtx) {
-			CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
+			pass.setWork([this, &ctx, rtResultHandle, radianceVolumes, probeValidityVolumes, distanceMomentsVolumes, clipmap, fullUpdateRanges,
+						  partialUpdateRange, partialUpdateProbeCount, fullUpdateRangeCount](RenderPassWorkContext& rgraphCtx) {
+				CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
 
 
-			cmdb.bindShaderProgram(m_populateCachesGrProg.get());
+				cmdb.bindShaderProgram(m_populateCachesGrProg.get());
 
 
-			rgraphCtx.bindSrv(0, 0, rtResultHandle);
+				rgraphCtx.bindSrv(0, 0, rtResultHandle);
 
 
-			cmdb.bindConstantBuffer(0, 0, ctx.m_globalRenderingConstantsBuffer);
+				cmdb.bindConstantBuffer(0, 0, ctx.m_globalRenderingConstantsBuffer);
 
 
-			for(U32 clipmap = 0; clipmap < kIndirectDiffuseClipmapCount; ++clipmap)
-			{
 				rgraphCtx.bindUav(0, 0, radianceVolumes[clipmap]);
 				rgraphCtx.bindUav(0, 0, radianceVolumes[clipmap]);
 				rgraphCtx.bindUav(1, 0, distanceMomentsVolumes[clipmap]);
 				rgraphCtx.bindUav(1, 0, distanceMomentsVolumes[clipmap]);
 				rgraphCtx.bindUav(2, 0, probeValidityVolumes[clipmap]);
 				rgraphCtx.bindUav(2, 0, probeValidityVolumes[clipmap]);
 
 
-				const UVec4 consts(clipmap);
-				cmdb.setFastConstants(&consts, sizeof(consts));
-
-				const U32 raysPerProbePerFrame = square<U32>(g_cvarRenderIdcRadianceOctMapSize);
-				const U32 threadCount = 64;
-				cmdb.dispatchCompute((raysPerProbePerFrame * m_consts.m_totalProbeCount + threadCount - 1) / threadCount, 1, 1);
-			}
-		});
+				ProbeUpdateConsts consts;
+				consts.m_clipmapIdx = clipmap;
+				consts.m_radianceOctMapSize = g_cvarRenderIdcRadianceOctMapSize;
+
+				// Do full updates
+				const U32 numthreads = 64;
+				for(U32 i = 0; i < fullUpdateRangeCount; ++i)
+				{
+					cmdb.pushDebugMarker("Full update", Vec3(0.0f, 1.0f, 1.0f));
+
+					consts.m_rayCountPerTexel = g_cvarRenderIdcRayCountPerTexelOfNewProbe;
+					consts.m_clipmapRegion.m_probesBegin = UVec3(fullUpdateRanges[i].m_begin);
+					consts.m_clipmapRegion.m_probeCounts = UVec3(fullUpdateRanges[i].m_end - fullUpdateRanges[i].m_begin);
+					consts.m_clipmapRegion.m_probeCount = consts.m_clipmapRegion.m_probeCounts.x() * consts.m_clipmapRegion.m_probeCounts.y()
+														  * consts.m_clipmapRegion.m_probeCounts.z();
+					consts.m_maxProbesToUpdate = consts.m_clipmapRegion.m_probeCount;
+					consts.m_clipmapRegion.m_partialUpdate = 0;
+
+					cmdb.setFastConstants(&consts, sizeof(consts));
+
+					U32 threadCount = consts.m_clipmapRegion.m_probeCount * square<U32>(g_cvarRenderIdcRadianceOctMapSize);
+					threadCount = (threadCount + numthreads - 1) / numthreads;
+					cmdb.dispatchCompute(threadCount, 1, 1);
+
+					cmdb.popDebugMarker();
+				}
+
+				// Do partial updates
+				if(partialUpdateProbeCount)
+				{
+					cmdb.pushDebugMarker("Partial update", Vec3(0.0f, 1.0f, 1.0f));
+
+					consts.m_rayCountPerTexel = 1;
+					consts.m_clipmapRegion.m_probesBegin = UVec3(partialUpdateRange.m_begin);
+					consts.m_clipmapRegion.m_probeCounts = UVec3(partialUpdateRange.m_end - partialUpdateRange.m_begin);
+					consts.m_clipmapRegion.m_probeCount = consts.m_clipmapRegion.m_probeCounts.x() * consts.m_clipmapRegion.m_probeCounts.y()
+														  * consts.m_clipmapRegion.m_probeCounts.z();
+					consts.m_maxProbesToUpdate = partialUpdateProbeCount;
+					consts.m_clipmapRegion.m_partialUpdate = 1;
+
+					cmdb.setFastConstants(&consts, sizeof(consts));
+
+					U32 threadCount = consts.m_maxProbesToUpdate * square<U32>(g_cvarRenderIdcRadianceOctMapSize);
+					threadCount = (threadCount + numthreads - 1) / numthreads;
+					cmdb.dispatchCompute(threadCount, 1, 1);
+
+					cmdb.popDebugMarker();
+				}
+			});
+		}
 	}
 	}
 
 
 	// Compute irradiance
 	// Compute irradiance
@@ -557,12 +691,15 @@ void IndirectDiffuseClipmaps::populateRenderGraph(RenderingContext& ctx)
 			bindRgenSpace2Resources(ctx, rgraphCtx);
 			bindRgenSpace2Resources(ctx, rgraphCtx);
 			rgraphCtx.bindUav(0, 2, lowRezRt);
 			rgraphCtx.bindUav(0, 2, lowRezRt);
 
 
-			const Vec4 consts(g_cvarRenderIdcFirstBounceRayDistance);
+			const Array<Vec4, 3> consts = {Vec4(g_cvarRenderIdcFirstBounceRayDistance), {}, {}};
 			cmdb.setFastConstants(&consts, sizeof(consts));
 			cmdb.setFastConstants(&consts, sizeof(consts));
 
 
-			cmdb.dispatchRays(sbtBuffer, m_sbtRecordSize, GpuSceneArrays::RenderableBoundingVolumeRt::getSingleton().getElementCount(), 1,
-							  getRenderer().getInternalResolution().x() / 2,
-							  getRenderer().getInternalResolution().y() / (!g_cvarRenderIdcApplyHighQuality + 1), 1);
+			const U32 width = getRenderer().getInternalResolution().x() / 2;
+			const U32 height = getRenderer().getInternalResolution().y() / (!g_cvarRenderIdcApplyHighQuality + 1);
+			cmdb.dispatchRays(sbtBuffer, m_sbtRecordSize, GpuSceneArrays::RenderableBoundingVolumeRt::getSingleton().getElementCount(), 1, width,
+							  height, 1);
+
+			g_svarIdcRays.increment(width * height);
 		});
 		});
 	}
 	}
 	else
 	else

+ 11 - 2
AnKi/Renderer/IndirectDiffuseClipmaps.h

@@ -21,6 +21,12 @@ constexpr U32 kDefaultClipmapProbeCountY = 12;
 constexpr F32 kDefaultClipmap0ProbeSize = 1.5f;
 constexpr F32 kDefaultClipmap0ProbeSize = 1.5f;
 constexpr F32 kDefaultClipmap1ProbeSize = 3.0f;
 constexpr F32 kDefaultClipmap1ProbeSize = 3.0f;
 constexpr F32 kDefaultClipmap2ProbeSize = 6.0f;
 constexpr F32 kDefaultClipmap2ProbeSize = 6.0f;
+constexpr U32 kDefaultRadianceOctMapSize = 10;
+constexpr U32 kDefaultRayCountPerTexelOfNewProbe = 4;
+
+/// As if you are updating 25% of the probes each frame.
+constexpr U32 kDefaultProbeRayBudget =
+	(kIndirectDiffuseClipmapCount * square(kDefaultClipmapProbeCountXZ) * kDefaultClipmapProbeCountY * square(kDefaultRadianceOctMapSize)) * 25 / 100;
 
 
 ANKI_CVAR2(NumericCVar<U32>, Render, Idc, ProbesXZ, kDefaultClipmapProbeCountXZ, 10, 100, "The cell count of each dimension of 1st clipmap")
 ANKI_CVAR2(NumericCVar<U32>, Render, Idc, ProbesXZ, kDefaultClipmapProbeCountXZ, 10, 100, "The cell count of each dimension of 1st clipmap")
 ANKI_CVAR2(NumericCVar<U32>, Render, Idc, ProbesY, kDefaultClipmapProbeCountY, 4, 100, "The cell count of each dimension of 1st clipmap")
 ANKI_CVAR2(NumericCVar<U32>, Render, Idc, ProbesY, kDefaultClipmapProbeCountY, 4, 100, "The cell count of each dimension of 1st clipmap")
@@ -41,7 +47,7 @@ ANKI_CVAR2(NumericCVar<F32>, Render, Idc, Clipmap2YSize, F32(kDefaultClipmapProb
 		   "The clipmap size in meters")
 		   "The clipmap size in meters")
 
 
 ANKI_CVAR2(
 ANKI_CVAR2(
-	NumericCVar<U32>, Render, Idc, RadianceOctMapSize, 10,
+	NumericCVar<U32>, Render, Idc, RadianceOctMapSize, kDefaultRadianceOctMapSize,
 	[](U32 val) {
 	[](U32 val) {
 		return val >= 4 && val <= 30 && val % 2 == 0;
 		return val >= 4 && val <= 30 && val % 2 == 0;
 	},
 	},
@@ -51,7 +57,10 @@ ANKI_CVAR2(NumericCVar<U32>, Render, Idc, IrradianceOctMapSize, 5, 4, 20, "Size
 ANKI_CVAR2(NumericCVar<F32>, Render, Idc, FirstBounceRayDistance, 0.0f, 0.0f, 10000.0f,
 ANKI_CVAR2(NumericCVar<F32>, Render, Idc, FirstBounceRayDistance, 0.0f, 0.0f, 10000.0f,
 		   "For the 1st bounce shoot rays instead of sampling the clipmaps")
 		   "For the 1st bounce shoot rays instead of sampling the clipmaps")
 ANKI_CVAR2(BoolCVar, Render, Idc, ApplyHighQuality, false, "If true use 1/2 resolution else use 1/4")
 ANKI_CVAR2(BoolCVar, Render, Idc, ApplyHighQuality, false, "If true use 1/2 resolution else use 1/4")
-ANKI_CVAR2(NumericCVar<U8>, Render, Idc, TexelRayCountNewProbe, 4, 1, 16,
+ANKI_CVAR2(NumericCVar<U8>, Render, Idc, RayCountPerTexelOfNewProbe, kDefaultRayCountPerTexelOfNewProbe, 1, 16,
+		   "The number of rays for a single texel of the oct map that will be cast for probes that are seen for the 1st time")
+
+ANKI_CVAR2(NumericCVar<U32>, Render, Idc, ProbeRayBudget, kDefaultProbeRayBudget, 1024, 100 * 1024 * 1024,
 		   "The number of rays for a single texel of the oct map that will be cast for probes that are seen for the 1st time")
 		   "The number of rays for a single texel of the oct map that will be cast for probes that are seen for the 1st time")
 
 
 /// @memberof IndirectDiffuseClipmaps
 /// @memberof IndirectDiffuseClipmaps

+ 3 - 1
AnKi/Renderer/Reflections.cpp

@@ -313,7 +313,9 @@ void Reflections::populateRenderGraph(RenderingContext& ctx)
 				U32 m_giProbeCount;
 				U32 m_giProbeCount;
 				F32 m_padding1;
 				F32 m_padding1;
 				F32 m_padding2;
 				F32 m_padding2;
-			} consts = {g_cvarRenderReflectionsRtMaxRayDistance, GpuSceneArrays::GlobalIlluminationProbe::getSingleton().getElementCount(), 0, 0};
+
+				Vec4 m_padding[2];
+			} consts = {g_cvarRenderReflectionsRtMaxRayDistance, GpuSceneArrays::GlobalIlluminationProbe::getSingleton().getElementCount(), 0, 0, {}};
 
 
 			cmdb.setFastConstants(&consts, sizeof(consts));
 			cmdb.setFastConstants(&consts, sizeof(consts));
 
 

+ 213 - 146
AnKi/Shaders/IndirectDiffuseClipmaps.ankiprog

@@ -40,43 +40,75 @@ constexpr F32 kGaussianSigma = 0.55;
 constexpr F32 kMaxBilateralSamplesPerDirection = 5.0;
 constexpr F32 kMaxBilateralSamplesPerDirection = 5.0;
 constexpr Bool kLocalLightShadow = false;
 constexpr Bool kLocalLightShadow = false;
 
 
-// ===========================================================================
-// RtMaterialFetch                                                           =
-// ===========================================================================
-#if NOT_ZERO(ANKI_TECHNIQUE_RtMaterialFetch)
+struct ClipmapRegion
+{
+	UVec3 m_probesBegin;
+	U32 m_partialUpdate;
 
 
-#	if RT_MATERIAL_FETCH_CLIPMAP
-struct Consts
+	UVec3 m_probeCounts;
+	U32 m_probeCount;
+};
+
+struct ProbeUpdateConsts
 {
 {
 	U32 m_clipmapIdx;
 	U32 m_clipmapIdx;
-	U32 m_radianceOctMapSize; // Don't use the mutator because we don't want to create may raygen variants
-	F32 m_padding1;
-	F32 m_padding2;
+	U32 m_radianceOctMapSize; // Have it here as well as well as a mutator. Can't use the mutator cause it will create may raygen variants
+	U32 m_rayCountPerTexel; // Ray count per oct map texel
+	U32 m_maxProbesToUpdate;
+
+	ClipmapRegion m_clipmapRegion;
 };
 };
-ANKI_FAST_CONSTANTS(Consts, g_consts)
+
+// ===========================================================================
+// RtMaterialFetch                                                           =
+// ===========================================================================
+#if NOT_ZERO(ANKI_TECHNIQUE_RtMaterialFetch) && NOT_ZERO(RT_MATERIAL_FETCH_CLIPMAP)
+
+ANKI_FAST_CONSTANTS(ProbeUpdateConsts, g_consts)
 
 
 [Shader("raygeneration")] void main()
 [Shader("raygeneration")] void main()
 {
 {
 	const IndirectDiffuseClipmapConstants idConsts = g_globalRendererConstants.m_indirectDiffuseClipmaps;
 	const IndirectDiffuseClipmapConstants idConsts = g_globalRendererConstants.m_indirectDiffuseClipmaps;
-	const UVec3 probeCounts = idConsts.m_probeCounts;
-	const U32 probeCountTotal = idConsts.m_totalProbeCount;
-	const Vec3 clipmapSize = idConsts.m_sizes[g_consts.m_clipmapIdx];
-	const Vec3 clipmapAabbMin = idConsts.m_aabbMins[g_consts.m_clipmapIdx];
+	const U32 octMapTexelCount = square(g_consts.m_radianceOctMapSize);
 
 
 	// Compute probe info. Make sure you shoot coherent rays as much as possible by using the same direction on a specific wave
 	// Compute probe info. Make sure you shoot coherent rays as much as possible by using the same direction on a specific wave
-	const U32 outPixelIdx = DispatchRaysIndex().x / probeCountTotal;
-	const U32 probeIdx = DispatchRaysIndex().x % probeCountTotal;
+	U32 probeIdx;
+	U32 subRayIdx;
+	U32 octMapTexelIdx;
+	unflatten3dArrayIndex(octMapTexelCount, g_consts.m_maxProbesToUpdate, g_consts.m_rayCountPerTexel, DispatchRaysIndex().x, octMapTexelIdx,
+						  probeIdx, subRayIdx);
+
+	if(g_consts.m_clipmapRegion.m_partialUpdate)
+	{
+		// Choose every other probe depending on the budget
+		const U32 div = g_consts.m_clipmapRegion.m_probeCount / g_consts.m_maxProbesToUpdate;
+
+		probeIdx = g_globalRendererConstants.m_frame + div * probeIdx;
+		probeIdx = probeIdx % g_consts.m_clipmapRegion.m_probeCount;
+	}
+
+	UVec3 probeId;
+	unflatten3dArrayIndex(g_consts.m_clipmapRegion.m_probeCounts.z, g_consts.m_clipmapRegion.m_probeCounts.y,
+						  g_consts.m_clipmapRegion.m_probeCounts.x, probeIdx, probeId.z, probeId.y, probeId.x);
+	probeId += g_consts.m_clipmapRegion.m_probesBegin;
+	probeIdx = probeId.z * idConsts.m_probeCounts.x * idConsts.m_probeCounts.y + probeId.y * idConsts.m_probeCounts.x + probeId.x;
 
 
-	UVec3 probe3dIdx;
-	unflatten3dArrayIndex(probeCounts.z, probeCounts.y, probeCounts.x, probeIdx, probe3dIdx.z, probe3dIdx.y, probe3dIdx.x);
+	// Check
+	{
+		const UVec3 probeIdBegin = g_consts.m_clipmapRegion.m_probesBegin;
+		[unroll] for(U32 i = 1; i < 2; ++i)
+		{
+			ANKI_ASSERT(probeId[i] >= probeIdBegin[i] && probeId[i] < probeIdBegin[i] + g_consts.m_clipmapRegion.m_probeCounts[i]);
+		}
+	}
 
 
-	const Vec3 probeSize = clipmapSize / probeCounts;
-	const Vec3 probeWorldPos = probe3dIdx * probeSize + probeSize * 0.5 + clipmapAabbMin;
+	const Vec3 probeSize = idConsts.m_sizes[g_consts.m_clipmapIdx] / idConsts.m_probeCounts;
+	const Vec3 probeWorldPos = probeId * probeSize + probeSize * 0.5 + idConsts.m_aabbMins[g_consts.m_clipmapIdx];
 
 
 	// Generate direction
 	// Generate direction
-	const UVec2 radianceOctCoord = UVec2(outPixelIdx % g_consts.m_radianceOctMapSize, outPixelIdx / g_consts.m_radianceOctMapSize);
+	const UVec2 radianceOctCoord = UVec2(octMapTexelIdx % g_consts.m_radianceOctMapSize, octMapTexelIdx / g_consts.m_radianceOctMapSize);
 	ANKI_ASSERT(all(radianceOctCoord < g_consts.m_radianceOctMapSize));
 	ANKI_ASSERT(all(radianceOctCoord < g_consts.m_radianceOctMapSize));
-	const U32 sampleIdx = (g_globalRendererConstants.m_frame + probeIdx) % 16;
+	const U32 sampleIdx = (g_globalRendererConstants.m_frame * g_consts.m_rayCountPerTexel + subRayIdx) % 16;
 	const Vec2 sampleCoord = radianceOctCoord + 0.5 + generateMsaa16x(sampleIdx) / (16.0 * 2.0);
 	const Vec2 sampleCoord = radianceOctCoord + 0.5 + generateMsaa16x(sampleIdx) / (16.0 * 2.0);
 	const HVec3 dir = octahedronDecode(sampleCoord / g_consts.m_radianceOctMapSize);
 	const HVec3 dir = octahedronDecode(sampleCoord / g_consts.m_radianceOctMapSize);
 
 
@@ -108,99 +140,9 @@ ANKI_FAST_CONSTANTS(Consts, g_consts)
 	}
 	}
 
 
 	// Store result
 	// Store result
-	const U32 raysPerProbePerFrame = square(g_consts.m_radianceOctMapSize);
 	const F32 kMaxDist = 1000.0; // Chose something small and make sure its square doesn't overflow F16
 	const F32 kMaxDist = 1000.0; // Chose something small and make sure its square doesn't overflow F16
-	TEX(g_lightResultTex, UVec2(probeIdx, outPixelIdx + raysPerProbePerFrame * g_consts.m_clipmapIdx)) = HVec4(radiance, min(rayT, kMaxDist));
-}
-
-// ===========================================================================
-// RtMaterialFetch (Apply)                                                   =
-// ===========================================================================
-#	else // RT_MATERIAL_FETCH_CLIPMAP
-struct Consts
-{
-	F32 m_rayMax;
-	F32 m_padding1;
-	F32 m_padding2;
-	F32 m_padding3;
-};
-ANKI_FAST_CONSTANTS(Consts, g_consts)
-
-[Shader("raygeneration")] void main()
-{
-#		if SPATIAL_RECONSTRUCT_TYPE == 0
-	const UVec2 fullCoord = UVec2(DispatchRaysIndex().x * 2u + (DispatchRaysIndex().y & 1u), DispatchRaysIndex().y);
-	const Vec2 uv = (fullCoord + 0.5) / (DispatchRaysDimensions().xy * UVec2(2, 1));
-#		else
-	const UVec2 fullCoord = DispatchRaysIndex().xy * 2u;
-	const Vec2 uv = (fullCoord + 0.5) / (DispatchRaysDimensions().xy * 2);
-#		endif
-
-	const F32 depth = TEX(g_depthTex, fullCoord).x;
-	const Vec4 rt2 = TEX(g_gbufferRt2, fullCoord);
-	const Vec3 worldNormal = unpackNormalFromGBuffer(rt2);
-
-	const Vec4 v4 = mul(g_globalRendererConstants.m_matrices.m_invertedViewProjectionJitter, Vec4(uvToNdc(uv), depth, 1.0));
-	const Vec3 worldPos = v4.xyz / v4.w;
-
-	const Vec3 biasDir = normalize(g_globalRendererConstants.m_cameraPosition - worldPos);
-	const Vec3 biasedWorldPos = worldPos + biasDir * 0.1;
-
-	// Rand
-	const UVec3 seed = rand3DPCG16(UVec3(fullCoord, g_globalRendererConstants.m_frame % 8u));
-	const Vec2 randFactors = hammersleyRandom16(g_globalRendererConstants.m_frame % 64u, 64u, seed);
-
-	const Mat3 tbn = rotationFromDirection(worldNormal);
-	const Vec3 rayDir = normalize(mul(tbn, hemisphereSampleCos(randFactors)));
-
-	// Trace
-	const F32 tMax = g_consts.m_rayMax;
-	constexpr U32 traceFlags = RAY_FLAG_FORCE_OPAQUE | RAY_FLAG_SKIP_PROCEDURAL_PRIMITIVES;
-	GBufferLight<F16> gbuffer = (GBufferLight<F16>)0;
-	F32 rayT = 0.0;
-	Bool backfacing = false;
-	const Bool hit = materialRayTrace<F16>(biasedWorldPos, rayDir, 0.01, tMax, 1000.0, gbuffer, rayT, backfacing, traceFlags);
-
-	HVec3 radiance = 0.0;
-	Vec3 hitPos = 0.0;
-	if(hit)
-	{
-		hitPos = biasedWorldPos + rayDir * (rayT - 0.01);
-		radiance = directLighting<F16>(gbuffer, hitPos, !hit, true, 1000.0, kLocalLightShadow, traceFlags | RAY_FLAG_ACCEPT_FIRST_HIT_AND_END_SEARCH);
-	}
-
-	Vec3 rayOrigin;
-	Vec3 rayDir2;
-	if(hit)
-	{
-		// 2nd bounce
-		rayOrigin = hitPos;
-		rayDir2 = gbuffer.m_worldNormal;
-	}
-	else
-	{
-		rayOrigin = biasedWorldPos;
-		rayDir2 = worldNormal;
-	}
-	const SampleClipmapFlag flags = kSampleClipmapFlagFullQuality | kSampleClipmapFlagBiasSamplePointSurfaceNormal;
-	const Vec3 irradiance =
-		sampleClipmapIrradiance(rayOrigin, rayDir2, g_globalRendererConstants.m_cameraPosition, g_globalRendererConstants.m_indirectDiffuseClipmaps,
-								g_linearAnyRepeatSampler, flags, randFactors.x);
-
-	Vec3 final;
-	if(hit)
-	{
-		final = radiance + irradiance * gbuffer.m_diffuse;
-	}
-	else
-	{
-		final = irradiance;
-	}
-
-	TEX(g_colorAndPdfTex, DispatchRaysIndex().xy) = Vec4(final, 0.0);
-	// TEX(g_colorAndPdfTex, DispatchRaysIndex().xy) = lerp(TEX(g_colorAndPdfTex, DispatchRaysIndex().xy), Vec4(final, 0.0), 0.05);
+	TEX(g_lightResultTex, UVec2(probeIdx, octMapTexelIdx * g_consts.m_rayCountPerTexel + subRayIdx)) = HVec4(radiance, min(rayT, kMaxDist));
 }
 }
-#	endif // RT_MATERIAL_FETCH_CLIPMAP
 #endif
 #endif
 
 
 // ===========================================================================
 // ===========================================================================
@@ -215,56 +157,87 @@ RWTexture3D<Vec4> g_probeValidiryVolume : register(u2);
 
 
 ConstantBuffer<GlobalRendererConstants> g_globalRendererConstants : register(b0);
 ConstantBuffer<GlobalRendererConstants> g_globalRendererConstants : register(b0);
 
 
-struct Consts
-{
-	U32 m_clipmapIdx;
-	U32 m_padding1;
-	U32 m_padding2;
-	U32 m_padding3;
-};
-ANKI_FAST_CONSTANTS(Consts, g_consts)
+ANKI_FAST_CONSTANTS(ProbeUpdateConsts, g_consts)
 
 
 [NumThreads(64, 1, 1)] void main(COMPUTE_ARGS)
 [NumThreads(64, 1, 1)] void main(COMPUTE_ARGS)
 {
 {
 	const IndirectDiffuseClipmapConstants idConsts = g_globalRendererConstants.m_indirectDiffuseClipmaps;
 	const IndirectDiffuseClipmapConstants idConsts = g_globalRendererConstants.m_indirectDiffuseClipmaps;
 	const U32 clipmapIdx = g_consts.m_clipmapIdx;
 	const U32 clipmapIdx = g_consts.m_clipmapIdx;
 	const Vec3 clipmapSize = idConsts.m_sizes[clipmapIdx].xyz;
 	const Vec3 clipmapSize = idConsts.m_sizes[clipmapIdx].xyz;
-	const UVec3 probeCounts = idConsts.m_probeCounts;
-	const Vec3 clipmapAabbMin = idConsts.m_aabbMins[clipmapIdx].xyz;
 	const Vec3 prevClipmapAabbMin = idConsts.m_previousFrameAabbMins[clipmapIdx].xyz;
 	const Vec3 prevClipmapAabbMin = idConsts.m_previousFrameAabbMins[clipmapIdx].xyz;
+	const U32 octMapTexelCount = square(RADIANCE_OCTAHEDRON_MAP_SIZE);
 
 
-	const U32 raysPerProbePerFrame = square(RADIANCE_OCTAHEDRON_MAP_SIZE);
+	U32 probeIdx = svDispatchThreadId.x / octMapTexelCount;
+	const U32 octMapTexelIdx = svDispatchThreadId.x % octMapTexelCount;
 
 
-	const U32 rtPixelIdx = svDispatchThreadId.x % raysPerProbePerFrame;
-	const U32 probeIdx = svDispatchThreadId.x / raysPerProbePerFrame;
-	if(probeIdx >= idConsts.m_totalProbeCount)
+	if(octMapTexelIdx >= octMapTexelCount || probeIdx >= g_consts.m_maxProbesToUpdate)
 	{
 	{
 		return;
 		return;
 	}
 	}
 
 
-	const Vec3 probeSize = clipmapSize / probeCounts;
-	UVec3 probe3dIdx;
-	unflatten3dArrayIndex(probeCounts.z, probeCounts.y, probeCounts.x, probeIdx, probe3dIdx.z, probe3dIdx.y, probe3dIdx.x);
-	const Vec3 probeWorldPos = probe3dIdx * probeSize + probeSize * 0.5 + clipmapAabbMin;
-	const Bool blendWithHistory = all(probeWorldPos > prevClipmapAabbMin) && all(probeWorldPos < prevClipmapAabbMin + clipmapSize);
+	if(g_consts.m_clipmapRegion.m_partialUpdate)
+	{
+		// Choose every other probe depending on the budget
+		const U32 div = g_consts.m_clipmapRegion.m_probeCount / g_consts.m_maxProbesToUpdate;
+
+		probeIdx = g_globalRendererConstants.m_frame + div * probeIdx;
+		probeIdx = probeIdx % g_consts.m_clipmapRegion.m_probeCount;
+	}
 
 
-	UVec3 noOctTexCoord = frac(probeWorldPos / clipmapSize) * probeCounts;
-	noOctTexCoord = min(noOctTexCoord, probeCounts - 1u);
-	noOctTexCoord = noOctTexCoord.xzy;
+	UVec3 probeId;
+	unflatten3dArrayIndex(g_consts.m_clipmapRegion.m_probeCounts.z, g_consts.m_clipmapRegion.m_probeCounts.y,
+						  g_consts.m_clipmapRegion.m_probeCounts.x, probeIdx, probeId.z, probeId.y, probeId.x);
+	probeId += g_consts.m_clipmapRegion.m_probesBegin;
+	probeIdx = probeId.z * idConsts.m_probeCounts.x * idConsts.m_probeCounts.y + probeId.y * idConsts.m_probeCounts.x + probeId.x;
 
 
-	// Read the result from RT
-	const HVec4 comp = TEX(g_rtResultTex, UVec2(probeIdx, rtPixelIdx + g_consts.m_clipmapIdx * raysPerProbePerFrame));
-	HVec3 radiance = comp.xyz;
-	const Vec2 moments = Vec2(comp.w, square(comp.w));
-	if(all(radiance == HVec3(1.0, 0.0, 1.0)))
+	// Check
 	{
 	{
-		radiance = 0.0;
+		const UVec3 probeIdBegin = g_consts.m_clipmapRegion.m_probesBegin;
+		[unroll] for(U32 i = 1; i < 2; ++i)
+		{
+			ANKI_ASSERT(probeId[i] >= probeIdBegin[i] && probeId[i] < probeIdBegin[i] + g_consts.m_clipmapRegion.m_probeCounts[i]);
+		}
 	}
 	}
 
 
+	// Read the result of RT
+	HVec3 radiance = 0.0;
+	Vec2 moments = 0.0;
+	F32 weightSum = 0.0;
+	for(U32 subray = 0; subray < g_consts.m_rayCountPerTexel; ++subray)
+	{
+		HVec4 comp = TEX(g_rtResultTex, UVec2(probeIdx, octMapTexelIdx * g_consts.m_rayCountPerTexel + subray));
+		const F32 weight = 1.0 / g_consts.m_rayCountPerTexel;
+
+		if(any(comp.xyz != HVec3(1.0, 0.0, 1.0)))
+		{
+			radiance += comp.xyz * weight;
+			moments += Vec2(comp.w, square(comp.w)) * weight;
+
+			weightSum += weight;
+		}
+	}
+
+	if(weightSum > 0.0)
+	{
+		radiance /= weightSum;
+		moments /= weightSum;
+	}
+
+	// Compute probe info
+	const Vec3 probeSize = clipmapSize / idConsts.m_probeCounts;
+	const Vec3 probeWorldPos = probeId * probeSize + probeSize * 0.5 + idConsts.m_aabbMins[clipmapIdx].xyz;
+
+	const Bool blendWithHistory =
+		g_consts.m_clipmapRegion.m_partialUpdate && all(probeWorldPos > prevClipmapAabbMin) && all(probeWorldPos < prevClipmapAabbMin + clipmapSize);
+
+	UVec3 noOctTexCoord = frac(probeWorldPos / clipmapSize) * idConsts.m_probeCounts;
+	noOctTexCoord = min(noOctTexCoord, idConsts.m_probeCounts - 1u);
+	noOctTexCoord = noOctTexCoord.xzy;
+
 	// Update the radiance and distance moments volumes
 	// Update the radiance and distance moments volumes
 	{
 	{
 		// Compute oct coord
 		// Compute oct coord
-		const UVec2 radianceOctCoord = UVec2(rtPixelIdx % RADIANCE_OCTAHEDRON_MAP_SIZE, rtPixelIdx / RADIANCE_OCTAHEDRON_MAP_SIZE);
+		const UVec2 radianceOctCoord = UVec2(octMapTexelIdx % RADIANCE_OCTAHEDRON_MAP_SIZE, octMapTexelIdx / RADIANCE_OCTAHEDRON_MAP_SIZE);
 		ANKI_ASSERT(all(radianceOctCoord < RADIANCE_OCTAHEDRON_MAP_SIZE));
 		ANKI_ASSERT(all(radianceOctCoord < RADIANCE_OCTAHEDRON_MAP_SIZE));
 
 
 		UVec3 actualVolumeTexCoord;
 		UVec3 actualVolumeTexCoord;
@@ -308,19 +281,21 @@ ANKI_FAST_CONSTANTS(Consts, g_consts)
 	}
 	}
 
 
 	// Update probe validity
 	// Update probe validity
-	if(rtPixelIdx == 0)
+	if(octMapTexelIdx == 0)
 	{
 	{
+		// Loop the directions again
 		F32 invalidRayCount = 0.0;
 		F32 invalidRayCount = 0.0;
-		for(U32 i = 0; i < raysPerProbePerFrame; ++i)
+		for(U32 i = 0; i < octMapTexelCount; ++i)
 		{
 		{
-			const HVec3 radiance = TEX(g_rtResultTex, UVec2(probeIdx, i + g_consts.m_clipmapIdx * raysPerProbePerFrame));
+			const U32 subray = 0;
+			const HVec3 radiance = TEX(g_rtResultTex, UVec2(probeIdx, i * g_consts.m_rayCountPerTexel + subray));
 			if(all(radiance == HVec3(1.0, 0.0, 1.0)))
 			if(all(radiance == HVec3(1.0, 0.0, 1.0)))
 			{
 			{
 				invalidRayCount += 1.0;
 				invalidRayCount += 1.0;
 			}
 			}
 		}
 		}
 
 
-		F32 valid = 1.0 - min(1.0, invalidRayCount / F32(raysPerProbePerFrame / 4));
+		F32 valid = 1.0 - min(1.0, invalidRayCount / F32(octMapTexelCount / 4));
 		if(blendWithHistory)
 		if(blendWithHistory)
 		{
 		{
 			const F32 prev = TEX(g_probeValidiryVolume, noOctTexCoord).x;
 			const F32 prev = TEX(g_probeValidiryVolume, noOctTexCoord).x;
@@ -528,6 +503,98 @@ SamplerState g_linearAnyRepeatSampler : register(s0);
 }
 }
 #endif
 #endif
 
 
+// ===========================================================================
+// RtMaterialFetch (Apply)                                                   =
+// ===========================================================================
+#if NOT_ZERO(ANKI_TECHNIQUE_RtMaterialFetch) && !RT_MATERIAL_FETCH_CLIPMAP
+
+struct Consts
+{
+	F32 m_rayMax;
+	F32 m_padding1;
+	F32 m_padding2;
+	F32 m_padding3;
+
+	Vec4 m_padding[2];
+};
+ANKI_FAST_CONSTANTS(Consts, g_consts)
+
+[Shader("raygeneration")] void main()
+{
+#	if SPATIAL_RECONSTRUCT_TYPE == 0
+	const UVec2 fullCoord = UVec2(DispatchRaysIndex().x * 2u + (DispatchRaysIndex().y & 1u), DispatchRaysIndex().y);
+	const Vec2 uv = (fullCoord + 0.5) / (DispatchRaysDimensions().xy * UVec2(2, 1));
+#	else
+	const UVec2 fullCoord = DispatchRaysIndex().xy * 2u;
+	const Vec2 uv = (fullCoord + 0.5) / (DispatchRaysDimensions().xy * 2);
+#	endif
+
+	const F32 depth = TEX(g_depthTex, fullCoord).x;
+	const Vec4 rt2 = TEX(g_gbufferRt2, fullCoord);
+	const Vec3 worldNormal = unpackNormalFromGBuffer(rt2);
+
+	const Vec4 v4 = mul(g_globalRendererConstants.m_matrices.m_invertedViewProjectionJitter, Vec4(uvToNdc(uv), depth, 1.0));
+	const Vec3 worldPos = v4.xyz / v4.w;
+
+	const Vec3 biasDir = normalize(g_globalRendererConstants.m_cameraPosition - worldPos);
+	const Vec3 biasedWorldPos = worldPos + biasDir * 0.1;
+
+	// Rand
+	const UVec3 seed = rand3DPCG16(UVec3(fullCoord, g_globalRendererConstants.m_frame % 8u));
+	const Vec2 randFactors = hammersleyRandom16(g_globalRendererConstants.m_frame % 64u, 64u, seed);
+
+	const Mat3 tbn = rotationFromDirection(worldNormal);
+	const Vec3 rayDir = normalize(mul(tbn, hemisphereSampleCos(randFactors)));
+
+	// Trace
+	const F32 tMax = g_consts.m_rayMax;
+	constexpr U32 traceFlags = RAY_FLAG_FORCE_OPAQUE | RAY_FLAG_SKIP_PROCEDURAL_PRIMITIVES;
+	GBufferLight<F16> gbuffer = (GBufferLight<F16>)0;
+	F32 rayT = 0.0;
+	Bool backfacing = false;
+	const Bool hit = materialRayTrace<F16>(biasedWorldPos, rayDir, 0.01, tMax, 1000.0, gbuffer, rayT, backfacing, traceFlags);
+
+	HVec3 radiance = 0.0;
+	Vec3 hitPos = 0.0;
+	if(hit)
+	{
+		hitPos = biasedWorldPos + rayDir * (rayT - 0.01);
+		radiance = directLighting<F16>(gbuffer, hitPos, !hit, true, 1000.0, kLocalLightShadow, traceFlags | RAY_FLAG_ACCEPT_FIRST_HIT_AND_END_SEARCH);
+	}
+
+	Vec3 rayOrigin;
+	Vec3 rayDir2;
+	if(hit)
+	{
+		// 2nd bounce
+		rayOrigin = hitPos;
+		rayDir2 = gbuffer.m_worldNormal;
+	}
+	else
+	{
+		rayOrigin = biasedWorldPos;
+		rayDir2 = worldNormal;
+	}
+	const SampleClipmapFlag flags = kSampleClipmapFlagFullQuality | kSampleClipmapFlagBiasSamplePointSurfaceNormal;
+	const Vec3 irradiance =
+		sampleClipmapIrradiance(rayOrigin, rayDir2, g_globalRendererConstants.m_cameraPosition, g_globalRendererConstants.m_indirectDiffuseClipmaps,
+								g_linearAnyRepeatSampler, flags, randFactors.x);
+
+	Vec3 final;
+	if(hit)
+	{
+		final = radiance + irradiance * gbuffer.m_diffuse;
+	}
+	else
+	{
+		final = irradiance;
+	}
+
+	TEX(g_colorAndPdfTex, DispatchRaysIndex().xy) = Vec4(final, 0.0);
+	// TEX(g_colorAndPdfTex, DispatchRaysIndex().xy) = lerp(TEX(g_colorAndPdfTex, DispatchRaysIndex().xy), Vec4(final, 0.0), 0.05);
+}
+#endif
+
 // ===========================================================================
 // ===========================================================================
 // SpatialReconstruct                                                        =
 // SpatialReconstruct                                                        =
 // ===========================================================================
 // ===========================================================================

+ 1 - 1
AnKi/Shaders/IndirectDiffuseClipmaps.hlsl

@@ -149,7 +149,7 @@ Vec3 sampleClipmapCommon(SampleClipmapsArgs args, SamplerState linearAnyRepeatSa
 							   : findClipmapOnPositionCheap(consts, args.m_samplePoint, flags);
 							   : findClipmapOnPositionCheap(consts, args.m_samplePoint, flags);
 #else
 #else
 	U16 clipmapIdx = 0;
 	U16 clipmapIdx = 0;
-	if(!insideClipmap(consts, clipmapIdx, args.m_samplePoint))
+	if(!insideClipmap(consts, clipmapIdx, args.m_samplePoint, flags))
 	{
 	{
 		clipmapIdx = 10;
 		clipmapIdx = 10;
 	}
 	}

+ 2 - 0
AnKi/Shaders/Reflections.ankiprog

@@ -670,6 +670,8 @@ struct Consts
 	U32 m_giProbeCount;
 	U32 m_giProbeCount;
 	F32 m_padding1;
 	F32 m_padding1;
 	F32 m_padding2;
 	F32 m_padding2;
+
+	Vec4 m_padding[2];
 };
 };
 ANKI_FAST_CONSTANTS(Consts, g_consts)
 ANKI_FAST_CONSTANTS(Consts, g_consts)
 
 

+ 2 - 4
AnKi/Util/Functions.h

@@ -302,8 +302,7 @@ TForwardIterator binarySearch(TForwardIterator first, TForwardIterator last, con
 	return (first != last && !comp(value, *first)) ? first : last;
 	return (first != last && !comp(value, *first)) ? first : last;
 }
 }
 
 
-/// Individual classes should specialize that function if they are packed. If a class is packed it can be used as
-/// whole in hashing.
+/// Individual classes should specialize that function if they are packed. If a class is packed it can be used as whole in hashing.
 template<typename T>
 template<typename T>
 constexpr Bool isPacked()
 constexpr Bool isPacked()
 {
 {
@@ -311,8 +310,7 @@ constexpr Bool isPacked()
 }
 }
 
 
 /// Unflatten 3D array index.
 /// Unflatten 3D array index.
-/// Imagine an array [sizeA][sizeB][sizeC] and a flat index in that array. Then this function will compute the unflatten
-/// indices.
+/// Imagine an array [sizeA][sizeB][sizeC] and a flat index in that array. Then this function will compute the unflatten indices.
 template<typename T, typename TI, typename TOut>
 template<typename T, typename TI, typename TOut>
 inline void unflatten3dArrayIndex(const T sizeA, const T sizeB, const T sizeC, const TI flatIdx, TOut& a, TOut& b, TOut& c)
 inline void unflatten3dArrayIndex(const T sizeA, const T sizeB, const T sizeC, const TI flatIdx, TOut& a, TOut& b, TOut& c)
 {
 {