Browse Source

More work on the GI clipmaps

Panagiotis Christopoulos Charitos 9 months ago
parent
commit
b6512043f1

+ 2 - 2
AnKi/Gr/Vulkan/VkBuffer.cpp

@@ -471,7 +471,7 @@ VkBufferView BufferImpl::getOrCreateBufferView(Format fmt, PtrSize offset, PtrSi
 
 	// Check if exists
 	{
-		RLockGuard<RWMutex> lock(m_viewsMtx);
+		RLockGuard lock(m_viewsMtx);
 
 		auto it = m_views.find(hash);
 		if(it != m_views.getEnd())
@@ -480,7 +480,7 @@ VkBufferView BufferImpl::getOrCreateBufferView(Format fmt, PtrSize offset, PtrSi
 		}
 	}
 
-	WLockGuard<RWMutex> lock(m_viewsMtx);
+	WLockGuard lock(m_viewsMtx);
 
 	// Check again
 	auto it = m_views.find(hash);

+ 2 - 2
AnKi/Gr/Vulkan/VkCommandBufferFactory.cpp

@@ -220,14 +220,14 @@ Error CommandBufferFactory::newCommandBuffer(ThreadId tid, CommandBufferFlag cmd
 
 		// Find using binary search
 		{
-			RLockGuard<RWMutex> lock(m_threadAllocMtx);
+			RLockGuard lock(m_threadAllocMtx);
 			auto it = binarySearch(m_threadAllocs.getBegin(), m_threadAllocs.getEnd(), tid, Comp());
 			alloc = (it != m_threadAllocs.getEnd()) ? (*it) : nullptr;
 		}
 
 		if(alloc == nullptr) [[unlikely]]
 		{
-			WLockGuard<RWMutex> lock(m_threadAllocMtx);
+			WLockGuard lock(m_threadAllocMtx);
 
 			// Check again
 			auto it = binarySearch(m_threadAllocs.getBegin(), m_threadAllocs.getEnd(), tid, Comp());

+ 2 - 2
AnKi/Gr/Vulkan/VkGraphicsState.cpp

@@ -155,7 +155,7 @@ void GraphicsPipelineFactory::flushState(GraphicsStateTracker& state, VkCommandB
 	// Find the PSO
 	VkPipeline pso = VK_NULL_HANDLE;
 	{
-		RLockGuard<RWMutex> lock(m_mtx);
+		RLockGuard lock(m_mtx);
 
 		auto it = m_map.find(state.m_globalHash);
 		if(it != m_map.getEnd())
@@ -377,7 +377,7 @@ void GraphicsPipelineFactory::flushState(GraphicsStateTracker& state, VkCommandB
 
 	// Now try to add the PSO to the hashmap
 	{
-		WLockGuard<RWMutex> lock(m_mtx);
+		WLockGuard lock(m_mtx);
 
 		auto it = m_map.find(state.m_globalHash);
 		if(it == m_map.getEnd())

+ 19 - 19
AnKi/Renderer/IndirectDiffuse.cpp

@@ -19,28 +19,28 @@ namespace anki {
 Error IndirectDiffuse::init()
 {
 	const Bool bRt = GrManager::getSingleton().getDeviceCapabilities().m_rayTracingEnabled && g_rtIndirectDiffuseCVar;
+	ANKI_ASSERT(bRt);
 
 	ANKI_CHECK(ResourceManager::getSingleton().loadResource("ShaderBinaries/IndirectDiffuse.ankiprogbin", m_mainProg));
 
-	if(bRt)
-	{
-		ShaderProgramResourceVariantInitInfo variantInitInfo(m_mainProg);
-		variantInitInfo.requestTechniqueAndTypes(ShaderTypeBit::kRayGen, "RtMaterialFetch");
-		const ShaderProgramResourceVariant* variant;
-		m_mainProg->getOrCreateVariant(variantInitInfo, variant);
-		m_libraryGrProg.reset(&variant->getProgram());
-		m_rayGenShaderGroupIdx = variant->getShaderGroupHandleIndex();
-
-		ShaderProgramResourceVariantInitInfo variantInitInfo2(m_mainProg);
-		variantInitInfo2.requestTechniqueAndTypes(ShaderTypeBit::kMiss, "RtMaterialFetch");
-		m_mainProg->getOrCreateVariant(variantInitInfo2, variant);
-		m_missShaderGroupIdx = variant->getShaderGroupHandleIndex();
-
-		ANKI_CHECK(loadShaderProgram("ShaderBinaries/RtSbtBuild.ankiprogbin", {{"TECHNIQUE", 1}}, m_sbtProg, m_sbtBuildGrProg, "SbtBuild"));
-
-		m_sbtRecordSize = getAlignedRoundUp(GrManager::getSingleton().getDeviceCapabilities().m_sbtRecordAlignment,
-											GrManager::getSingleton().getDeviceCapabilities().m_shaderGroupHandleSize + U32(sizeof(UVec4)));
-	}
+	ShaderProgramResourceVariantInitInfo variantInitInfo(m_mainProg);
+	variantInitInfo.requestTechniqueAndTypes(ShaderTypeBit::kRayGen, "RtMaterialFetch");
+	const ShaderProgramResourceVariant* variant;
+	m_mainProg->getOrCreateVariant(variantInitInfo, variant);
+	m_libraryGrProg.reset(&variant->getProgram());
+	m_rayGenShaderGroupIdx = variant->getShaderGroupHandleIndex();
+
+	ANKI_CHECK(ResourceManager::getSingleton().loadResource("ShaderBinaries/RtMaterialFetchMiss.ankiprogbin", m_missProg));
+
+	ShaderProgramResourceVariantInitInfo variantInitInfo2(m_missProg);
+	variantInitInfo2.requestTechniqueAndTypes(ShaderTypeBit::kMiss, "RtMaterialFetch");
+	m_missProg->getOrCreateVariant(variantInitInfo2, variant);
+	m_missShaderGroupIdx = variant->getShaderGroupHandleIndex();
+
+	ANKI_CHECK(loadShaderProgram("ShaderBinaries/RtSbtBuild.ankiprogbin", {{"TECHNIQUE", 1}}, m_sbtProg, m_sbtBuildGrProg, "SbtBuild"));
+
+	m_sbtRecordSize = getAlignedRoundUp(GrManager::getSingleton().getDeviceCapabilities().m_sbtRecordAlignment,
+										GrManager::getSingleton().getDeviceCapabilities().m_shaderGroupHandleSize + U32(sizeof(UVec4)));
 
 	m_transientRtDesc1 = getRenderer().create2DRenderTargetDescription(
 		getRenderer().getInternalResolution().x(), getRenderer().getInternalResolution().y(), Format::kR16G16B16A16_Sfloat, "IndirectDiffuse #1");

+ 1 - 0
AnKi/Renderer/IndirectDiffuse.h

@@ -40,6 +40,7 @@ public:
 public:
 	ShaderProgramResourcePtr m_sbtProg;
 	ShaderProgramResourcePtr m_mainProg;
+	ShaderProgramResourcePtr m_missProg;
 	ShaderProgramPtr m_sbtBuildGrProg;
 	ShaderProgramPtr m_libraryGrProg;
 

+ 208 - 32
AnKi/Renderer/IndirectDiffuseClipmaps.cpp

@@ -6,7 +6,13 @@
 #include <AnKi/Renderer/IndirectDiffuseClipmaps.h>
 #include <AnKi/Renderer/Renderer.h>
 #include <AnKi/Renderer/GBuffer.h>
+#include <AnKi/Renderer/AccelerationStructureBuilder.h>
+#include <AnKi/Renderer/Sky.h>
+#include <AnKi/Renderer/ShadowMapping.h>
+#include <AnKi/Scene/Components/SkyboxComponent.h>
+#include <AnKi/Shaders/Include/MaterialTypes.h>
 #include <AnKi/Util/Tracer.h>
+#include <AnKi/GpuMemory/UnifiedGeometryBuffer.h>
 
 namespace anki {
 
@@ -16,16 +22,51 @@ Error IndirectDiffuseClipmaps::init()
 																Format::kR8G8B8A8_Unorm, "Test");
 	m_tmpRtDesc.bake();
 
-	const Bool supports3Comp = GrManager::getSingleton().getDeviceCapabilities().m_unalignedBbpTextureFormats;
-	TextureInitInfo volumeInit = getRenderer().create2DRenderTargetInitInfo(
-		g_indirectDiffuseClipmap0ProbesPerDimCVar, g_indirectDiffuseClipmap0ProbesPerDimCVar,
-		(supports3Comp) ? Format::kR16G16B16_Sfloat : Format::kR16G16B16A16_Sfloat, TextureUsageBit::kAllShaderResource, "IndirectDiffuseClipmap #1");
-	volumeInit.m_depth = g_indirectDiffuseClipmap0ProbesPerDimCVar;
-	volumeInit.m_type = TextureType::k3D;
-	m_clipmapLevelTextures[0] = getRenderer().createAndClearRenderTarget(volumeInit, TextureUsageBit::kSrvCompute);
+	m_clipmapInfo[0].m_probeCounts = Vec3(F32(g_indirectDiffuseClipmap0ProbesPerDimCVar));
+	m_clipmapInfo[0].m_size = Vec3(g_indirectDiffuseClipmap0SizeCVar);
 
-	ANKI_CHECK(loadShaderProgram("ShaderBinaries/IndirectDiffuseClipmaps.ankiprogbin", {}, m_tmpProg, m_tmpGrProg, "Test"));
-	ANKI_CHECK(loadShaderProgram("ShaderBinaries/IndirectDiffuseClipmaps.ankiprogbin", {}, m_tmpProg, m_tmpGrProg2, "InitTex"));
+	for(U32 clipmap = 0; clipmap < kIndirectDiffuseClipmapCount; ++clipmap)
+	{
+		for(U32 i = 0; i < 3; ++i)
+		{
+			TextureInitInfo volumeInit = getRenderer().create2DRenderTargetInitInfo(
+				g_indirectDiffuseClipmap0ProbesPerDimCVar, g_indirectDiffuseClipmap0ProbesPerDimCVar, Format::kR16G16B16A16_Sfloat,
+				TextureUsageBit::kAllShaderResource, generateTempPassName("IndirectDiffuseClipmap #%u comp #%u", clipmap, i));
+			volumeInit.m_depth = g_indirectDiffuseClipmap0ProbesPerDimCVar;
+			volumeInit.m_type = TextureType::k3D;
+
+			m_clipmapVolumes[clipmap].m_perColorComponent[i] = getRenderer().createAndClearRenderTarget(volumeInit, TextureUsageBit::kSrvCompute);
+		}
+	}
+
+	ANKI_CHECK(loadShaderProgram("ShaderBinaries/IndirectDiffuseClipmaps.ankiprogbin", {}, m_prog, m_tmpVisGrProg, "Test"));
+	ANKI_CHECK(loadShaderProgram("ShaderBinaries/RtSbtBuild.ankiprogbin", {{"TECHNIQUE", 1}}, m_sbtProg, m_sbtBuildGrProg, "SbtBuild"));
+
+	{
+		ShaderProgramResourcePtr tmpProg;
+		ANKI_CHECK(ResourceManager::getSingleton().loadResource("ShaderBinaries/IndirectDiffuseClipmaps.ankiprogbin", tmpProg));
+		ANKI_ASSERT(tmpProg == m_prog);
+
+		ShaderProgramResourceVariantInitInfo variantInitInfo(m_prog);
+		variantInitInfo.requestTechniqueAndTypes(ShaderTypeBit::kRayGen, "RtMaterialFetch");
+		const ShaderProgramResourceVariant* variant;
+		m_prog->getOrCreateVariant(variantInitInfo, variant);
+		m_libraryGrProg.reset(&variant->getProgram());
+		m_rayGenShaderGroupIdx = variant->getShaderGroupHandleIndex();
+	}
+
+	{
+		ANKI_CHECK(ResourceManager::getSingleton().loadResource("ShaderBinaries/RtMaterialFetchMiss.ankiprogbin", m_missProg));
+
+		ShaderProgramResourceVariantInitInfo variantInitInfo(m_missProg);
+		variantInitInfo.requestTechniqueAndTypes(ShaderTypeBit::kMiss, "RtMaterialFetch");
+		const ShaderProgramResourceVariant* variant;
+		m_missProg->getOrCreateVariant(variantInitInfo, variant);
+		m_missShaderGroupIdx = variant->getShaderGroupHandleIndex();
+	}
+
+	m_sbtRecordSize = getAlignedRoundUp(GrManager::getSingleton().getDeviceCapabilities().m_sbtRecordAlignment,
+										GrManager::getSingleton().getDeviceCapabilities().m_shaderGroupHandleSize + U32(sizeof(UVec4)));
 
 	return Error::kNone;
 }
@@ -36,58 +77,193 @@ void IndirectDiffuseClipmaps::populateRenderGraph(RenderingContext& ctx)
 
 	RenderGraphBuilder& rgraph = ctx.m_renderGraphDescr;
 
-	RenderTargetHandle volumeRt;
-	if(!m_clipmapsImportedOnce)
-	{
-		volumeRt = rgraph.importRenderTarget(m_clipmapLevelTextures[0].get(), TextureUsageBit::kSrvCompute);
-	}
-	else
+	Array2d<RenderTargetHandle, kIndirectDiffuseClipmapCount, 3> volumeRts;
+	for(U32 clipmap = 0; clipmap < kIndirectDiffuseClipmapCount; ++clipmap)
 	{
-		m_clipmapsImportedOnce = true;
-		volumeRt = rgraph.importRenderTarget(m_clipmapLevelTextures[0].get());
+		for(U32 c = 0; c < 3; ++c)
+		{
+			if(!m_clipmapsImportedOnce)
+			{
+				volumeRts[clipmap][c] =
+					rgraph.importRenderTarget(m_clipmapVolumes[clipmap].m_perColorComponent[c].get(), TextureUsageBit::kSrvCompute);
+			}
+			else
+			{
+				volumeRts[clipmap][c] = rgraph.importRenderTarget(m_clipmapVolumes[clipmap].m_perColorComponent[c].get());
+			}
+		}
 	}
+	m_clipmapsImportedOnce = true;
 
 	m_runCtx.m_tmpRt = rgraph.newRenderTarget(m_tmpRtDesc);
 
+	// SBT build
+	BufferHandle sbtHandle;
+	BufferView sbtBuffer;
 	{
-		NonGraphicsRenderPass& pass = rgraph.newNonGraphicsRenderPass("IndirectDiffuseClipmaps clear");
+		BufferHandle visibilityDep;
+		BufferView visibleRenderableIndicesBuff, buildSbtIndirectArgsBuff;
+		getRenderer().getAccelerationStructureBuilder().getVisibilityInfo(visibilityDep, visibleRenderableIndicesBuff, buildSbtIndirectArgsBuff);
 
-		pass.newTextureDependency(volumeRt, TextureUsageBit::kUavCompute);
+		// Allocate SBT
+		U32 sbtAlignment = (GrManager::getSingleton().getDeviceCapabilities().m_structuredBufferNaturalAlignment)
+							   ? sizeof(U32)
+							   : GrManager::getSingleton().getDeviceCapabilities().m_structuredBufferBindOffsetAlignment;
+		sbtAlignment = computeCompoundAlignment(sbtAlignment, GrManager::getSingleton().getDeviceCapabilities().m_sbtRecordAlignment);
+		U8* sbtMem;
+		sbtBuffer = RebarTransientMemoryPool::getSingleton().allocate(
+			(GpuSceneArrays::RenderableBoundingVolumeRt::getSingleton().getElementCount() + 2) * m_sbtRecordSize, sbtAlignment, sbtMem);
+		sbtHandle = rgraph.importBuffer(sbtBuffer, BufferUsageBit::kNone);
 
-		pass.setWork([this, volumeRt, &ctx](RenderPassWorkContext& rgraphCtx) {
+		// Write the first 2 entries of the SBT
+		ConstWeakArray<U8> shaderGroupHandles = m_libraryGrProg->getShaderGroupHandles();
+		const U32 shaderHandleSize = GrManager::getSingleton().getDeviceCapabilities().m_shaderGroupHandleSize;
+		memcpy(sbtMem, &shaderGroupHandles[m_rayGenShaderGroupIdx * shaderHandleSize], shaderHandleSize);
+		memcpy(sbtMem + m_sbtRecordSize, &shaderGroupHandles[m_missShaderGroupIdx * shaderHandleSize], shaderHandleSize);
+
+		// Create the pass
+		NonGraphicsRenderPass& rpass = rgraph.newNonGraphicsRenderPass("RtReflections build SBT");
+
+		rpass.newBufferDependency(visibilityDep, BufferUsageBit::kIndirectCompute | BufferUsageBit::kSrvCompute);
+		rpass.newBufferDependency(sbtHandle, BufferUsageBit::kUavCompute);
+
+		rpass.setWork([this, buildSbtIndirectArgsBuff, sbtBuffer, visibleRenderableIndicesBuff](RenderPassWorkContext& rgraphCtx) {
+			ANKI_TRACE_SCOPED_EVENT(ReflectionsSbtBuild);
 			CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
 
-			cmdb.bindShaderProgram(m_tmpGrProg2.get());
+			cmdb.bindShaderProgram(m_sbtBuildGrProg.get());
+
+			cmdb.bindSrv(0, 0, GpuSceneArrays::Renderable::getSingleton().getBufferView());
+			cmdb.bindSrv(1, 0, visibleRenderableIndicesBuff);
+			cmdb.bindSrv(2, 0, BufferView(&m_libraryGrProg->getShaderGroupHandlesGpuBuffer()));
 
-			const Vec4 consts(g_indirectDiffuseClipmap0SizeCVar);
+			cmdb.bindUav(0, 0, sbtBuffer);
+
+			RtShadowsSbtBuildConstants consts = {};
+			ANKI_ASSERT(m_sbtRecordSize % 4 == 0);
+			consts.m_sbtRecordDwordSize = m_sbtRecordSize / 4;
+			const U32 shaderHandleSize = GrManager::getSingleton().getDeviceCapabilities().m_shaderGroupHandleSize;
+			ANKI_ASSERT(shaderHandleSize % 4 == 0);
+			consts.m_shaderHandleDwordSize = shaderHandleSize / 4;
 			cmdb.setFastConstants(&consts, sizeof(consts));
 
-			rgraphCtx.bindUav(0, 0, volumeRt);
+			cmdb.dispatchComputeIndirect(buildSbtIndirectArgsBuff);
+		});
+	}
 
-			cmdb.bindConstantBuffer(0, 0, ctx.m_globalRenderingConstantsBuffer);
+	// Do ray tracing around the probes
+	{
+		NonGraphicsRenderPass& pass = rgraph.newNonGraphicsRenderPass("IndirectDiffuseClipmaps");
+
+		for(U32 clipmap = 0; clipmap < kIndirectDiffuseClipmapCount; ++clipmap)
+		{
+			for(U32 c = 0; c < 3; ++c)
+			{
+				pass.newTextureDependency(volumeRts[clipmap][c], TextureUsageBit::kUavCompute);
+			}
+		}
+		pass.newBufferDependency(sbtHandle, BufferUsageBit::kShaderBindingTable);
+		if(getRenderer().getGeneratedSky().isEnabled())
+		{
+			pass.newTextureDependency(getRenderer().getGeneratedSky().getEnvironmentMapRt(), TextureUsageBit::kSrvTraceRays);
+		}
+		pass.newTextureDependency(getRenderer().getShadowMapping().getShadowmapRt(), TextureUsageBit::kSrvTraceRays);
+		pass.newAccelerationStructureDependency(getRenderer().getAccelerationStructureBuilder().getAccelerationStructureHandle(),
+												AccelerationStructureUsageBit::kTraceRaysSrv);
+
+		pass.setWork([this, volumeRts, &ctx, sbtBuffer](RenderPassWorkContext& rgraphCtx) {
+			CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
+
+			cmdb.bindShaderProgram(m_libraryGrProg.get());
+
+			// More globals
+			cmdb.bindSampler(ANKI_MATERIAL_REGISTER_TILINEAR_REPEAT_SAMPLER, 0, getRenderer().getSamplers().m_trilinearRepeat.get());
+			cmdb.bindSrv(ANKI_MATERIAL_REGISTER_GPU_SCENE, 0, GpuSceneBuffer::getSingleton().getBufferView());
+			cmdb.bindSrv(ANKI_MATERIAL_REGISTER_MESH_LODS, 0, GpuSceneArrays::MeshLod::getSingleton().getBufferView());
+			cmdb.bindSrv(ANKI_MATERIAL_REGISTER_TRANSFORMS, 0, GpuSceneArrays::Transform::getSingleton().getBufferView());
 
-			const U32 probeCountPerDim = m_clipmapLevelTextures[0]->getWidth();
-			dispatchPPCompute(cmdb, 4, 4, 4, probeCountPerDim, probeCountPerDim, probeCountPerDim);
+#define ANKI_UNIFIED_GEOM_FORMAT(fmt, shaderType, reg) \
+	cmdb.bindSrv( \
+		reg, 0, \
+		BufferView(&UnifiedGeometryBuffer::getSingleton().getBuffer(), 0, \
+				   getAlignedRoundDown(getFormatInfo(Format::k##fmt).m_texelSize, UnifiedGeometryBuffer::getSingleton().getBuffer().getSize())), \
+		Format::k##fmt);
+#include <AnKi/Shaders/Include/UnifiedGeometryTypes.def.h>
+
+			cmdb.bindConstantBuffer(0, 2, ctx.m_globalRenderingConstantsBuffer);
+
+			rgraphCtx.bindSrv(0, 2, getRenderer().getAccelerationStructureBuilder().getAccelerationStructureHandle());
+			cmdb.bindSrv(1, 2, TextureView(&getRenderer().getDummyTexture2d(), TextureSubresourceDesc::all()));
+			cmdb.bindSrv(2, 2, TextureView(&getRenderer().getDummyTexture2d(), TextureSubresourceDesc::all()));
+			cmdb.bindSrv(3, 2, TextureView(&getRenderer().getDummyTexture2d(), TextureSubresourceDesc::all()));
+
+			const LightComponent* dirLight = SceneGraph::getSingleton().getDirectionalLight();
+			const SkyboxComponent* sky = SceneGraph::getSingleton().getSkybox();
+			const Bool bSkySolidColor =
+				(!sky || sky->getSkyboxType() == SkyboxType::kSolidColor || (!dirLight && sky->getSkyboxType() == SkyboxType::kGenerated));
+			if(bSkySolidColor)
+			{
+				cmdb.bindSrv(4, 2, TextureView(&getRenderer().getDummyTexture2d(), TextureSubresourceDesc::all()));
+			}
+			else if(sky->getSkyboxType() == SkyboxType::kImage2D)
+			{
+				cmdb.bindSrv(4, 2, TextureView(&sky->getImageResource().getTexture(), TextureSubresourceDesc::all()));
+			}
+			else
+			{
+				rgraphCtx.bindSrv(4, 2, getRenderer().getGeneratedSky().getEnvironmentMapRt());
+			}
+
+			cmdb.bindSrv(5, 2, BufferView(&getRenderer().getDummyBuffer(), 0, sizeof(U32)));
+			cmdb.bindSrv(6, 2, BufferView(&getRenderer().getDummyBuffer(), 0, sizeof(U32)));
+			rgraphCtx.bindSrv(7, 2, getRenderer().getShadowMapping().getShadowmapRt());
+
+			cmdb.bindSampler(0, 2, getRenderer().getSamplers().m_trilinearClamp.get());
+			cmdb.bindSampler(1, 2, getRenderer().getSamplers().m_trilinearClampShadow.get());
+
+			for(U32 clipmap = 0; clipmap < kIndirectDiffuseClipmapCount; ++clipmap)
+			{
+				for(U32 c = 0; c < 3; ++c)
+				{
+					rgraphCtx.bindUav(c, 2, volumeRts[clipmap][c]);
+				}
+
+				const Vec4 consts(F32(U32(g_indirectDiffuseClipmap0SizeCVar) << clipmap));
+				cmdb.setFastConstants(&consts, sizeof(consts));
+
+				const U32 probeCount = m_clipmapVolumes[clipmap].m_perColorComponent[0]->getWidth();
+				cmdb.traceRays(sbtBuffer, m_sbtRecordSize, GpuSceneArrays::RenderableBoundingVolumeRt::getSingleton().getElementCount(), 1,
+							   probeCount, probeCount, probeCount);
+			}
 		});
 	}
 
 	{
 		NonGraphicsRenderPass& pass = rgraph.newNonGraphicsRenderPass("IndirectDiffuseClipmaps test");
 
-		pass.newTextureDependency(volumeRt, TextureUsageBit::kSrvCompute);
+		const U32 clipmap = 2;
+
+		for(U32 c = 0; c < 3; ++c)
+		{
+			pass.newTextureDependency(volumeRts[clipmap][c], TextureUsageBit::kSrvCompute);
+		}
 		pass.newTextureDependency(getRenderer().getGBuffer().getDepthRt(), TextureUsageBit::kSrvCompute);
+		pass.newTextureDependency(getRenderer().getGBuffer().getColorRt(2), TextureUsageBit::kSrvCompute);
 		pass.newTextureDependency(m_runCtx.m_tmpRt, TextureUsageBit::kUavCompute);
 
-		pass.setWork([this, volumeRt, &ctx](RenderPassWorkContext& rgraphCtx) {
+		pass.setWork([this, volumeRts, &ctx](RenderPassWorkContext& rgraphCtx) {
 			CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
 
-			cmdb.bindShaderProgram(m_tmpGrProg.get());
+			cmdb.bindShaderProgram(m_tmpVisGrProg.get());
 
-			const Vec4 consts(g_indirectDiffuseClipmap0SizeCVar);
+			const Vec4 consts(F32(U32(g_indirectDiffuseClipmap0SizeCVar) << clipmap));
 			cmdb.setFastConstants(&consts, sizeof(consts));
 
-			rgraphCtx.bindSrv(0, 0, volumeRt);
-			rgraphCtx.bindSrv(1, 0, getRenderer().getGBuffer().getDepthRt());
+			rgraphCtx.bindSrv(0, 0, volumeRts[clipmap][0]);
+			rgraphCtx.bindSrv(1, 0, volumeRts[clipmap][1]);
+			rgraphCtx.bindSrv(2, 0, volumeRts[clipmap][2]);
+			rgraphCtx.bindSrv(3, 0, getRenderer().getGBuffer().getDepthRt());
+			rgraphCtx.bindSrv(4, 0, getRenderer().getGBuffer().getColorRt(2));
 			rgraphCtx.bindUav(0, 0, m_runCtx.m_tmpRt);
 
 			cmdb.bindConstantBuffer(0, 0, ctx.m_globalRenderingConstantsBuffer);

+ 33 - 7
AnKi/Renderer/IndirectDiffuseClipmaps.h

@@ -15,9 +15,17 @@ namespace anki {
 /// @{
 
 inline BoolCVar g_rtIndirectDiffuseClipmapsCVar("R", "RtIndirectDiffuseClipmaps", false);
-inline NumericCVar<U32> g_indirectDiffuseClipmap0ProbesPerDimCVar("R", "IndirectDiffuseClipmap0ProbesPerDim", 40, 10, 50,
+
+inline NumericCVar<U32> g_indirectDiffuseClipmap0ProbesPerDimCVar("R", "IndirectDiffuseClipmap0ProbesPerDim", 20, 10, 50,
 																  "The cell count of each dimension of 1st clipmap");
-inline NumericCVar<F32> g_indirectDiffuseClipmap0SizeCVar("R", "IndirectDiffuseClipmap0Size", 20.0, 10.0, 100.0, "The clipmap size in meters");
+inline NumericCVar<U32> g_indirectDiffuseClipmap1ProbesPerDimCVar("R", "IndirectDiffuseClipmap1ProbesPerDim", 20, 10, 50,
+																  "The cell count of each dimension of 2nd clipmap");
+inline NumericCVar<U32> g_indirectDiffuseClipmap2ProbesPerDimCVar("R", "IndirectDiffuseClipmap2ProbesPerDim", 20, 10, 50,
+																  "The cell count of each dimension of 3rd clipmap");
+
+inline NumericCVar<F32> g_indirectDiffuseClipmap0SizeCVar("R", "IndirectDiffuseClipmap0Size", 20.0, 10.0, 1000.0, "The clipmap size in meters");
+inline NumericCVar<F32> g_indirectDiffuseClipmap1SizeCVar("R", "IndirectDiffuseClipmap1Size", 40.0, 10.0, 1000.0, "The clipmap size in meters");
+inline NumericCVar<F32> g_indirectDiffuseClipmap2SizeCVar("R", "IndirectDiffuseClipmap2Size", 80.0, 10.0, 1000.0, "The clipmap size in meters");
 
 /// Ambient global illumination passes.
 class IndirectDiffuseClipmaps : public RendererObject
@@ -38,17 +46,35 @@ public:
 		handles[0] = m_runCtx.m_tmpRt;
 	}
 
+	const Array<IndirectDiffuseClipmap, kIndirectDiffuseClipmapCount>& getClipmapsInfo() const
+	{
+		return m_clipmapInfo;
+	}
+
 private:
-	static constexpr U32 kClipmapLevelCount = 3;
+	class ClipmapVolumes
+	{
+	public:
+		Array<TexturePtr, 3> m_perColorComponent;
+	};
 
-	Array<TexturePtr, kClipmapLevelCount> m_clipmapLevelTextures;
+	Array<ClipmapVolumes, kIndirectDiffuseClipmapCount> m_clipmapVolumes;
 
-	ShaderProgramResourcePtr m_tmpProg;
-	ShaderProgramPtr m_tmpGrProg;
-	ShaderProgramPtr m_tmpGrProg2;
+	Array<IndirectDiffuseClipmap, kIndirectDiffuseClipmapCount> m_clipmapInfo;
+
+	ShaderProgramResourcePtr m_prog;
+	ShaderProgramResourcePtr m_missProg;
+	ShaderProgramResourcePtr m_sbtProg;
+	ShaderProgramPtr m_libraryGrProg;
+	ShaderProgramPtr m_tmpVisGrProg;
+	ShaderProgramPtr m_sbtBuildGrProg;
 
 	RenderTargetDesc m_tmpRtDesc;
 
+	U32 m_sbtRecordSize = 0;
+	U32 m_rayGenShaderGroupIdx = kMaxU32;
+	U32 m_missShaderGroupIdx = kMaxU32;
+
 	Bool m_clipmapsImportedOnce = false;
 
 	class

+ 5 - 3
AnKi/Renderer/Reflections.cpp

@@ -48,10 +48,11 @@ Error Reflections::init()
 		m_libraryGrProg.reset(&variant->getProgram());
 		m_rayGenShaderGroupIdx = variant->getShaderGroupHandleIndex();
 
-		ShaderProgramResourceVariantInitInfo variantInitInfo2(m_mainProg);
+		ANKI_CHECK(ResourceManager::getSingleton().loadResource("ShaderBinaries/RtMaterialFetchMiss.ankiprogbin", m_missProg));
+
+		ShaderProgramResourceVariantInitInfo variantInitInfo2(m_missProg);
 		variantInitInfo2.requestTechniqueAndTypes(ShaderTypeBit::kMiss, "RtMaterialFetch");
-		variantInitInfo2.addMutation("SSR_SAMPLE_GBUFFER", bSsrSamplesGBuffer);
-		m_mainProg->getOrCreateVariant(variantInitInfo2, variant);
+		m_missProg->getOrCreateVariant(variantInitInfo2, variant);
 		m_missShaderGroupIdx = variant->getShaderGroupHandleIndex();
 
 		m_sbtRecordSize = getAlignedRoundUp(GrManager::getSingleton().getDeviceCapabilities().m_sbtRecordAlignment,
@@ -382,6 +383,7 @@ void Reflections::populateRenderGraph(RenderingContext& ctx)
 
 			rgraphCtx.bindUav(0, 2, transientRt1);
 			rgraphCtx.bindUav(1, 2, hitPosAndDepthRt);
+			cmdb.bindUav(2, 2, TextureView(&getRenderer().getDummyTexture2d(), TextureSubresourceDesc::firstSurface()));
 
 			cmdb.bindSampler(0, 2, getRenderer().getSamplers().m_trilinearClamp.get());
 			cmdb.bindSampler(1, 2, getRenderer().getSamplers().m_trilinearClampShadow.get());

+ 1 - 0
AnKi/Renderer/Reflections.h

@@ -49,6 +49,7 @@ public:
 public:
 	ShaderProgramResourcePtr m_sbtProg;
 	ShaderProgramResourcePtr m_mainProg;
+	ShaderProgramResourcePtr m_missProg;
 	ShaderProgramPtr m_ssrGrProg;
 	ShaderProgramPtr m_sbtBuildGrProg;
 	ShaderProgramPtr m_libraryGrProg;

+ 14 - 1
AnKi/Renderer/Renderer.cpp

@@ -336,10 +336,13 @@ Error Renderer::populateRenderGraph(RenderingContext& ctx)
 	return Error::kNone;
 }
 
-void Renderer::writeGlobalRendererConstants(RenderingContext& ctx, GlobalRendererConstants& consts)
+void Renderer::writeGlobalRendererConstants(RenderingContext& ctx, GlobalRendererConstants& outConsts)
 {
 	ANKI_TRACE_SCOPED_EVENT(RWriteGlobalRendererConstants);
 
+	GlobalRendererConstants consts;
+	memset(&consts, 0, sizeof(consts));
+
 	consts.m_renderingSize = Vec2(F32(m_internalResolution.x()), F32(m_internalResolution.y()));
 
 	consts.m_time = F32(HighRezTimer::getCurrentTime());
@@ -410,6 +413,16 @@ void Renderer::writeGlobalRendererConstants(RenderingContext& ctx, GlobalRendere
 	{
 		consts.m_sky.m_type = 2;
 	}
+
+	if(m_indirectDiffuseClipmaps)
+	{
+		for(U32 i = 0; i < kIndirectDiffuseClipmapCount; ++i)
+		{
+			consts.m_indirectDiffuseClipmaps[i] = m_indirectDiffuseClipmaps->getClipmapsInfo()[i];
+		}
+	}
+
+	outConsts = consts;
 }
 
 TextureInitInfo Renderer::create2DRenderTargetInitInfo(U32 w, U32 h, Format format, TextureUsageBit usage, CString name)

+ 2 - 1
AnKi/Renderer/RendererObject.def.h

@@ -41,7 +41,8 @@ ANKI_RENDERER_OBJECT_DEF(MotionBlur, motionBlur, 1)
 ANKI_RENDERER_OBJECT_DEF(RtMaterialFetchDbg, rtMaterialFetchDbg,
 						 GrManager::getSingleton().getDeviceCapabilities().m_rayTracingEnabled&& g_rtMaterialFetchDbgCVar)
 ANKI_RENDERER_OBJECT_DEF(Reflections, reflections, 1)
-ANKI_RENDERER_OBJECT_DEF(IndirectDiffuse, indirectDiffuse, 1)
+ANKI_RENDERER_OBJECT_DEF(IndirectDiffuse, indirectDiffuse,
+						 GrManager::getSingleton().getDeviceCapabilities().m_rayTracingEnabled&& g_rtIndirectDiffuseCVar)
 
 // Util objects
 ANKI_RENDERER_OBJECT_DEF(RenderableDrawer, drawer, 1)

+ 2 - 2
AnKi/Resource/MaterialResource.cpp

@@ -547,7 +547,7 @@ const MaterialVariant& MaterialResource::getOrCreateVariant(const RenderingKey&
 
 	// Check if it's initialized
 	{
-		RLockGuard<RWMutex> lock(m_variantMatrixMtx);
+		RLockGuard lock(m_variantMatrixMtx);
 		if(variant.m_prog.isCreated()) [[likely]]
 		{
 			return variant;
@@ -555,7 +555,7 @@ const MaterialVariant& MaterialResource::getOrCreateVariant(const RenderingKey&
 	}
 
 	// Not initialized, init it
-	WLockGuard<RWMutex> lock(m_variantMatrixMtx);
+	WLockGuard lock(m_variantMatrixMtx);
 
 	// Check again
 	if(variant.m_prog.isCreated())

+ 2 - 2
AnKi/Resource/ShaderProgramResource.cpp

@@ -96,7 +96,7 @@ void ShaderProgramResource::getOrCreateVariant(const ShaderProgramResourceVarian
 
 	// Check if the variant is in the cache
 	{
-		RLockGuard<RWMutex> lock(m_mtx);
+		RLockGuard lock(m_mtx);
 
 		auto it = m_variants.find(hash);
 		if(it != m_variants.getEnd())
@@ -112,7 +112,7 @@ void ShaderProgramResource::getOrCreateVariant(const ShaderProgramResourceVarian
 	}
 
 	// Create the variant
-	WLockGuard<RWMutex> lock(m_mtx);
+	WLockGuard lock(m_mtx);
 
 	// Check again
 	auto it = m_variants.find(hash);

+ 9 - 0
AnKi/Shaders/ImportanceSampling.hlsl

@@ -25,6 +25,9 @@ Vec2 hammersley2d(U32 i, U32 N)
 
 /// Stolen from Unreal
 /// Returns three elements with 16 random bits each (0-0xffff)
+///
+/// Use it like that:
+/// UVec3 seed = rand3DPCG16(UVec3(coord, frame % 8u));
 UVec3 rand3DPCG16(UVec3 v)
 {
 	v = v * 1664525u + 1013904223u;
@@ -41,6 +44,9 @@ UVec3 rand3DPCG16(UVec3 v)
 
 /// Stolen from Unreal
 /// It will return a uniform 2D point inside [0.0, 1.0]. For random use rand3DPCG16()
+///
+/// Use it like that:
+/// Vec2 randFactors = hammersleyRandom16(sample, sampleCount, rand3DPCG16(...));
 Vec2 hammersleyRandom16(U32 sampleIdx, U32 sampleCount, UVec2 random)
 {
 	const F32 e1 = frac(F32(sampleIdx) / F32(sampleCount) + F32(random.x) * (1.0 / 65536.0));
@@ -50,6 +56,9 @@ Vec2 hammersleyRandom16(U32 sampleIdx, U32 sampleCount, UVec2 random)
 
 /// http://holger.dammertz.org/stuff/notes_HammersleyOnHemisphere.html
 /// From a uniform 2D point inside a circle get a 3D point in the surface of a hemisphere. It's oriented in the +Z. uv is in [0, 1]
+///
+/// Use it like that:
+/// Vec3 dir = hemisphereSampleCos(hammersleyRandom16(...));
 Vec3 hemisphereSampleUniform(Vec2 uv)
 {
 	const F32 phi = uv.y * 2.0 * kPi;

+ 1 - 0
AnKi/Shaders/Include/Common.h

@@ -419,6 +419,7 @@ ANKI_BEGIN_NAMESPACE
 
 constexpr U32 kMaxLodCount = 3u;
 constexpr U32 kMaxShadowCascades = 4u;
+constexpr U32 kIndirectDiffuseClipmapCount = 3u;
 
 constexpr F32 kShadowsPolygonOffsetFactor = 1.25f;
 constexpr F32 kShadowsPolygonOffsetUnits = 2.75f;

+ 11 - 0
AnKi/Shaders/Include/MiscRendererTypes.h

@@ -70,6 +70,15 @@ struct Sky
 	U32 m_type;
 };
 
+struct IndirectDiffuseClipmap
+{
+	Vec3 m_probeCounts;
+	F32 m_padding1;
+
+	Vec3 m_size;
+	F32 m_padding2;
+};
+
 /// Common constants for all passes.
 struct GlobalRendererConstants
 {
@@ -100,6 +109,8 @@ struct GlobalRendererConstants
 	CommonMatrices m_previousMatrices;
 
 	Sky m_sky;
+
+	IndirectDiffuseClipmap m_indirectDiffuseClipmaps[kIndirectDiffuseClipmapCount];
 };
 
 // RT shadows

+ 1 - 20
AnKi/Shaders/IndirectDiffuse.ankiprog

@@ -5,7 +5,7 @@
 
 #pragma anki 16bit
 
-#pragma anki technique RtMaterialFetch rgen miss
+#pragma anki technique RtMaterialFetch rgen
 
 #include <AnKi/Shaders/RtMaterialFetch.hlsl>
 #include <AnKi/Shaders/Include/GpuSceneTypes.h>
@@ -48,11 +48,6 @@ Vec3 getDiffuseIndirect(StructuredBuffer<GpuSceneGlobalIlluminationProbe> giProb
 	}
 }
 
-// ===========================================================================
-// RayGen                                                                    =
-// ===========================================================================
-#if ANKI_RAY_GEN_SHADER
-
 Vec3 lightShading(Vec3 rayOrigin, Vec3 rayDir, Vec3 hitPos, Vec3 hitNormal, Vec3 emission, Vec3 diffuse, Bool isSky)
 {
 	Vec3 color = 0;
@@ -186,17 +181,3 @@ Vec3 lightShading(Vec3 rayOrigin, Vec3 rayDir, Vec3 hitPos, Vec3 hitNormal, Vec3
 	g_colorAndPdfTex[coord] = Vec4(outColor, 0.0);
 	// g_colorAndPdfTex[coord] = Vec4(lerp(outColor, g_colorAndPdfTex[coord].xyz, 0.98), 0.0);
 }
-#endif // ANKI_RAY_GEN_SHADER
-
-// ===========================================================================
-// Miss                                                                      =
-// ===========================================================================
-#if ANKI_MISS_SHADER
-	[Shader("miss")] void main(inout RtMaterialFetchRayPayload payload)
-{
-	payload.m_diffuseColor = 0.0;
-	payload.m_worldNormal = 0.0;
-	payload.m_emission = 0.0;
-	payload.m_rayT = -1.0;
-}
-#endif // ANKI_MISS_SHADER

+ 112 - 30
AnKi/Shaders/IndirectDiffuseClipmaps.ankiprog

@@ -5,13 +5,19 @@
 
 #pragma anki 16bit
 
-#pragma anki technique InitTex comp
+#pragma anki technique RtMaterialFetch rgen
 #pragma anki technique Test comp
 
 #include <AnKi/Shaders/Include/GpuSceneTypes.h>
 #include <AnKi/Shaders/Functions.hlsl>
 #include <AnKi/Shaders/SH.hlsl>
 #include <AnKi/Shaders/Include/MiscRendererTypes.h>
+#include <AnKi/Shaders/ImportanceSampling.hlsl>
+#include <AnKi/Shaders/PackFunctions.hlsl>
+#include <AnKi/Shaders/SH.hlsl>
+
+#define CLIPMAP_VOLUME 1
+#include <AnKi/Shaders/RtMaterialFetch.hlsl>
 
 struct Clipmap
 {
@@ -23,69 +29,133 @@ struct Clipmap
 
 ANKI_FAST_CONSTANTS(Clipmap, g_clipmap)
 
-Vec3 worldPosToVolumeUvw(Vec3 worldPos, F32 clipmapSize)
+Vec3 worldPosToVolumeUvw(Vec3 worldPos, Vec3 clipmapSize)
 {
 	const Vec3 uvw = frac(worldPos / clipmapSize);
 	return uvw;
 }
 
-UVec3 worldPosToVolumeTexel(Vec3 worldPos, F32 clipmapSize, F32 probeCountPerDim)
+UVec3 worldPosToVolumeTexel(Vec3 worldPos, Vec3 clipmapSize, F32 probeCountPerDim)
 {
 	const Vec3 uvw = worldPosToVolumeUvw(worldPos, clipmapSize);
 	return uvw * probeCountPerDim;
 }
 
-void computeClipmapBounds(Vec3 cameraPos, F32 clipmapSize, out Vec3 aabbMin, out Vec3 aabbMax)
+void computeClipmapBounds(Vec3 cameraPos, Vec3 clipmapSize, out Vec3 aabbMin, out Vec3 aabbMax)
 {
 	aabbMin = round(cameraPos) - clipmapSize * 0.5;
 	aabbMax = round(cameraPos) + clipmapSize * 0.5;
 }
 
-#if NOT_ZERO(ANKI_TECHNIQUE_InitTex)
-RWTexture3D<Vec4> g_volume : register(u0);
+SHL1<F16> readClipmap(GlobalRendererConstants consts, Texture3D<Vec4> volumes[3 * kIndirectDiffuseClipmapCount], SamplerState linearAnyRepeatSampler,
+					  Vec3 worldPos)
+{
+	Vec3 clipmapAabbMin, clipmapAabbMax;
+	computeClipmapBounds(consts.m_cameraPosition, consts.m_indirectDiffuseClipmaps[0].m_size, clipmapAabbMin, clipmapAabbMax);
+	if(all(worldPos > clipmapAabbMin) && all(worldPos < clipmapAabbMax))
+	{
+		const Vec3 uvw = worldPosToVolumeUvw(worldPos, consts.m_indirectDiffuseClipmaps[0].m_size);
+		return loadSH<F16>(volumes[0], volumes[1], volumes[2], linearAnyRepeatSampler, uvw);
+	}
 
-ConstantBuffer<GlobalRendererConstants> g_globalConstants : register(b0);
+	computeClipmapBounds(consts.m_cameraPosition, consts.m_indirectDiffuseClipmaps[1].m_size, clipmapAabbMin, clipmapAabbMax);
+	if(all(worldPos > clipmapAabbMin) && all(worldPos < clipmapAabbMax))
+	{
+		const Vec3 uvw = worldPosToVolumeUvw(worldPos, consts.m_indirectDiffuseClipmaps[1].m_size);
+		return loadSH<F16>(volumes[3], volumes[4], volumes[5], linearAnyRepeatSampler, uvw);
+	}
 
-[NumThreads(4, 4, 4)] void main(UVec3 svDispatchThreadId : SV_DispatchThreadID)
+	computeClipmapBounds(consts.m_cameraPosition, consts.m_indirectDiffuseClipmaps[2].m_size, clipmapAabbMin, clipmapAabbMax);
+	if(all(worldPos > clipmapAabbMin) && all(worldPos < clipmapAabbMax))
+	{
+		const Vec3 uvw = worldPosToVolumeUvw(worldPos, consts.m_indirectDiffuseClipmaps[2].m_size);
+		return loadSH<F16>(volumes[6], volumes[7], volumes[8], linearAnyRepeatSampler, uvw);
+	}
+
+	SHL1<F16> sh = (SHL1<F16>)0;
+	return sh;
+}
+
+// ===========================================================================
+// RayGen                                                                    =
+// ===========================================================================
+#if ANKI_RAY_GEN_SHADER
+
+[Shader("raygeneration")] void main()
 {
 	F32 probeCountPerDim, unused0, unused1;
-	g_volume.GetDimensions(probeCountPerDim, unused0, unused1);
+	g_clipmapRedVolume.GetDimensions(probeCountPerDim, unused0, unused1);
 
+	// Compute clipmap bounds
 	Vec3 clipmapAabbMin, clipmapAabbMax;
-	computeClipmapBounds(g_globalConstants.m_cameraPosition, g_clipmap.m_size, clipmapAabbMin, clipmapAabbMax);
+	computeClipmapBounds(g_globalRendererConstants.m_cameraPosition, g_clipmap.m_size, clipmapAabbMin, clipmapAabbMax);
 
-	const Vec3 prevCameraPos = g_globalConstants.m_previousMatrices.m_cameraTransform.getTranslationPart();
+	const Vec3 prevCameraPos = g_globalRendererConstants.m_previousMatrices.m_cameraTransform.getTranslationPart();
 	Vec3 prevClipmapAabbMin, prevClipmapAabbMax;
 	computeClipmapBounds(prevCameraPos, g_clipmap.m_size, prevClipmapAabbMin, prevClipmapAabbMax);
 
+	// Compute probe info
 	const F32 probeSize = g_clipmap.m_size / probeCountPerDim;
-	const Vec3 cellWorldPos = svDispatchThreadId * probeSize + probeSize * 0.5 + clipmapAabbMin;
+	const Vec3 cellWorldPos = DispatchRaysIndex().xyz * probeSize + probeSize * 0.5 + clipmapAabbMin;
 
 	const UVec3 probeTexelCoord = worldPosToVolumeTexel(cellWorldPos, g_clipmap.m_size, probeCountPerDim);
+	ANKI_ASSERT(all(probeTexelCoord < probeCountPerDim));
 
-	if(any(probeTexelCoord >= probeCountPerDim))
-	{
-		return;
-	}
+	// Integrate to build the SH
+	SHL1<F16> sh = (SHL1<F16>)0;
+	const U32 slices = 8u;
+	const U32 rayCount = slices * slices / 2u;
+
+	const UVec3 seed = rand3DPCG16(UVec3(DispatchRaysIndex().xy, g_globalRendererConstants.m_frame % 32u));
+	const Vec2 randFactors = hammersleyRandom16(0, 1, seed);
+
+	const F32 angle = 2.0 * kPi / slices;
 
-	Vec3 color;
-	Vec3 prev = g_volume[probeTexelCoord].xyz;
-	if(any(cellWorldPos < prevClipmapAabbMin) || any(cellWorldPos > prevClipmapAabbMax))
+	for(U32 i = 0; i < slices; ++i)
 	{
-		color = abs(frac(g_globalConstants.m_frame * cellWorldPos)) + 0.1;
+		for(U32 j = 0; j < slices / 2u; ++j)
+		{
+			const F32 phi = angle * i + angle * randFactors.x;
+			const F32 theta = angle * j + angle * randFactors.y;
+
+			HVec3 dir;
+			dir.x = sin(theta) * cos(phi);
+			dir.y = sin(theta) * sin(phi);
+			dir.z = cos(theta);
+
+			const F32 tMax = 1000.0; // TODO
+
+			GBufferLight<F16> gbuffer = (GBufferLight<F16>)0;
+			F32 rayT = 0.0;
+			const Bool hit = materialRayTrace<F16>(cellWorldPos, dir, 0.01, tMax, 1000.0, gbuffer, rayT);
+
+			const Vec3 hitPos = cellWorldPos + dir * rayT;
+			const HVec3 radiance = directLighting(gbuffer, hitPos, !hit, true, tMax);
+
+			sh = appendSH(sh, dir, radiance, rayCount);
+		}
 	}
-	else
+
+	// Store the SH
+	const Bool blendWithHistory = all(cellWorldPos > prevClipmapAabbMin) && all(cellWorldPos < prevClipmapAabbMax);
+	if(blendWithHistory)
 	{
-		color = prev;
+		const SHL1<F16> historySH = loadSH<F16>(g_clipmapRedVolume, g_clipmapGreenVolume, g_clipmapBlueVolume, probeTexelCoord);
+		sh = lerpSH<F16>(historySH, sh, 0.01);
 	}
 
-	g_volume[probeTexelCoord] = Vec4(color, 1.0);
+	storeSH(sh, g_clipmapRedVolume, g_clipmapGreenVolume, g_clipmapBlueVolume, probeTexelCoord);
 }
-#endif
+#endif // ANKI_RAY_GEN_SHADER
 
+// ===========================================================================
+// Test                                                                      =
+// ===========================================================================
 #if NOT_ZERO(ANKI_TECHNIQUE_Test)
-Texture3D<Vec4> g_volume : register(t0);
-Texture2D<Vec4> g_depthTex : register(t1);
+Texture2D<Vec4> g_depthTex : register(t0);
+Texture2D<Vec4> g_gbufferRt2 : register(t1);
+
+Texture3D<Vec4> g_clipmapVolumes[3 * kIndirectDiffuseClipmapCount] : register(t2);
 
 RWTexture2D<Vec4> g_outTex : register(u0);
 
@@ -103,20 +173,32 @@ SamplerState g_linearAnyRepeatSampler : register(s0);
 		return;
 	}
 
+	const Vec3 normal = unpackNormalFromGBuffer(g_gbufferRt2[svDispatchThreadId]);
+
 	const F32 depth = g_depthTex[svDispatchThreadId].r;
 	const Vec2 ndc = uvToNdc(Vec2(svDispatchThreadId) / Vec2(viewportSize));
 	const Vec4 worldPos4 = mul(g_globalConstants.m_matrices.m_invertedViewProjectionJitter, Vec4(ndc, depth, 1.0));
-	const Vec3 worldPos = worldPos4.xyz / worldPos4.w;
+	Vec3 worldPos = worldPos4.xyz / worldPos4.w;
+
+	worldPos += normal * 0.5;
+
+	const Vec3 uvw = worldPosToVolumeUvw(worldPos, g_clipmap.m_size);
+
+	const SHL1<F32> sh = loadSH<F32>(g_clipmapVolumes[0], g_clipmapVolumes[1], g_clipmapVolumes[2], g_linearAnyRepeatSampler, uvw);
+	// const SHL1<F32> sh = loadSH<F32>(g_clipmapRedVolume, g_clipmapGreenVolume, g_clipmapBlueVolume, uvw * 20);
+
+	const Vec3 color = evaluateSH(sh, normal);
 
-	Vec3 uvw = worldPosToVolumeUvw(worldPos, g_clipmap.m_size);
 	// const Vec3 color = g_volume.SampleLevel(g_linearAnyRepeatSampler, uvw, 0.0);
-	const Vec3 color = g_volume[uvw * 40.0];
+	// const Vec3 color = g_volume[uvw * 40.0];
 
 	Vec3 clipMin, clipMax;
 	computeClipmapBounds(g_globalConstants.m_cameraPosition, g_clipmap.m_size, clipMin, clipMax);
+	clipMin += 2;
+	clipMax -= 2;
 
 	if(any(worldPos > clipMax) || any(worldPos < clipMin))
-		g_outTex[svDispatchThreadId] = 0.0;
+		g_outTex[svDispatchThreadId] = 0.1;
 	else
 		g_outTex[svDispatchThreadId] = Vec4(color, 1.0);
 }

+ 1 - 0
AnKi/Shaders/Intellisense.hlsl

@@ -46,6 +46,7 @@ using I32 = int;
 using U8 = unsigned int;
 using U16 = unsigned int;
 using U32 = unsigned int;
+using F16 = float;
 using F32 = float;
 using Bool = bool;
 

+ 16 - 95
AnKi/Shaders/Reflections.ankiprog

@@ -10,7 +10,7 @@
 #pragma anki technique Classification comp mutators
 #pragma anki technique Ssr comp
 #pragma anki technique ReflectionProbeFallback comp mutators
-#pragma anki technique RtMaterialFetch rgen miss mutators
+#pragma anki technique RtMaterialFetch rgen mutators
 #pragma anki technique SpatialDenoise comp mutators
 #pragma anki technique TemporalDenoise comp mutators
 #pragma anki technique BilateralDenoiseVertical comp mutators
@@ -54,8 +54,9 @@ enum
 };
 
 // Functions
-Vec3 getDiffuseIndirect(StructuredBuffer<GpuSceneGlobalIlluminationProbe> giProbes, Vec3 worldPos, Vec3 worldNormal,
-						SamplerState linearAnyClampSampler)
+template<typename T>
+vector<T, 3> getDiffuseIndirect(StructuredBuffer<GpuSceneGlobalIlluminationProbe> giProbes, Vec3 worldPos, Vec3 worldNormal,
+								SamplerState linearAnyClampSampler)
 {
 	const U32 probeCount = getStructuredBufferElementCount(giProbes);
 	U32 i;
@@ -75,11 +76,11 @@ Vec3 getDiffuseIndirect(StructuredBuffer<GpuSceneGlobalIlluminationProbe> giProb
 	if(probeFound)
 	{
 		const GpuSceneGlobalIlluminationProbe probe = giProbes[i];
-		return sampleGlobalIllumination<F32>(worldPos, worldNormal, probe, getBindlessTexture3DVec4(probe.m_volumeTexture), linearAnyClampSampler);
+		return sampleGlobalIllumination<T>(worldPos, worldNormal, probe, getBindlessTexture3DVec4(probe.m_volumeTexture), linearAnyClampSampler);
 	}
 	else
 	{
-		return 0.0;
+		return T(0);
 	}
 }
 
@@ -675,92 +676,25 @@ ANKI_FAST_CONSTANTS(Consts, g_consts)
 	const F32 pixelDistFromCamera = length(worldPos - g_globalRendererConstants.m_cameraPosition);
 	const F32 distFactor = pow(pixelDistFromCamera / distanceToMaxMip, 4.0);
 	const F32 maxMips = 8.0;
-	const F32 textureLod = max(roughness, distFactor) * maxMips;
+	const F16 textureLod = max(roughness, distFactor) * maxMips;
 
 	// Trace
-	RtMaterialFetchRayPayload payload;
-	payload = (RtMaterialFetchRayPayload)0;
-	payload.m_textureLod = textureLod;
-
-	constexpr U32 flags = RAY_FLAG_FORCE_OPAQUE | RAY_FLAG_SKIP_PROCEDURAL_PRIMITIVES;
-	const U32 sbtRecordOffset = 0u;
-	const U32 sbtRecordStride = 0u;
-	const U32 missIndex = 0u;
-	const U32 cullMask = 0xFFu;
-	RayDesc ray;
-	ray.Origin = worldPos;
-	ray.TMin = max(tmin + kTMinBias, 0.05);
-	ray.Direction = reflDir;
-	ray.TMax = g_consts.m_maxRayT;
-	TraceRay(g_tlas, flags, cullMask, sbtRecordOffset, sbtRecordStride, missIndex, ray, payload);
-
-	const Bool hasHitSky = payload.m_rayT < 0.0;
-	if(hasHitSky)
-	{
-		payload.m_rayT = g_consts.m_maxRayT;
-	}
-
-	const Vec3 hitPos = worldPos + reflDir * payload.m_rayT;
-
-	// Trace shadow
-	Vec4 vv4 = mul(g_globalRendererConstants.m_matrices.m_viewProjection, Vec4(hitPos, 1.0));
-	vv4.xy /= vv4.w;
-	const Bool bInsideFrustum = all(vv4.xy > -1.0) && all(vv4.xy < 1.0) && vv4.w > 0.0;
-
-	F32 shadow = 0.0;
-	if(hasHitSky)
-	{
-		// Skybox
-		shadow = 1.0;
-
-		if(g_globalRendererConstants.m_sky.m_type == 0)
-		{
-			payload.m_emission = g_globalRendererConstants.m_sky.m_solidColor;
-		}
-		else
-		{
-			const Vec2 uv = (g_globalRendererConstants.m_sky.m_type == 1) ? equirectangularMapping(reflDir) : octahedronEncode(reflDir);
-			payload.m_emission = g_envMap.SampleLevel(g_linearClampAnySampler, uv, 0.0).xyz;
-		}
-	}
-	else if(bInsideFrustum && kTryShadowmapFirst)
-	{
-		const F32 negativeZViewSpace = -mul(g_globalRendererConstants.m_matrices.m_view, Vec4(hitPos, 1.0)).z;
-		const U32 shadowCascadeCount = dirLight.m_shadowCascadeCount_31bit_active_1bit >> 1u;
-
-		const U32 cascadeIdx = computeShadowCascadeIndex(negativeZViewSpace, dirLight.m_shadowCascadeDistances, shadowCascadeCount);
+	GBufferLight<F16> gbuffer;
+	F32 rayT;
+	const Bool hasHitSky = !materialRayTrace(worldPos, reflDir, max(tmin + kTMinBias, 0.05), g_consts.m_maxRayT, textureLod, gbuffer, rayT);
 
-		shadow = computeShadowFactorDirLight<F32>(dirLight, cascadeIdx, hitPos, g_shadowAtlasTex, g_shadowSampler);
-	}
-	else
-	{
-		constexpr U32 qFlags = RAY_FLAG_FORCE_OPAQUE | RAY_FLAG_SKIP_PROCEDURAL_PRIMITIVES | RAY_FLAG_ACCEPT_FIRST_HIT_AND_END_SEARCH;
-		RayQuery<qFlags> q;
-		RayDesc ray;
-		ray.Origin = worldPos + reflDir * (payload.m_rayT - 0.01);
-		ray.TMin = 0.1;
-		ray.Direction = -dirLight.m_direction;
-		ray.TMax = g_consts.m_maxRayT;
-		q.TraceRayInline(g_tlas, qFlags, cullMask, ray);
-		q.Proceed();
-		shadow = (q.CommittedStatus() == COMMITTED_TRIANGLE_HIT) ? 0.0 : 1.0;
-	}
+	const Vec3 hitPos = worldPos + reflDir * rayT;
 
 	// Do simple light shading
-	Vec3 outColor = payload.m_emission;
-	Vec3 indirectDiffuse = 0.0;
+	HVec3 radiance = directLighting(gbuffer, hitPos, hasHitSky, kTryShadowmapFirst, g_consts.m_maxRayT);
+
 	if(!hasHitSky && g_consts.m_giProbeCount > 0)
 	{
-		indirectDiffuse = getDiffuseIndirect(g_giProbes, hitPos, payload.m_worldNormal, g_linearClampAnySampler);
+		const HVec3 indirectDiffuse = getDiffuseIndirect<F16>(g_giProbes, hitPos, gbuffer.m_worldNormal, g_linearClampAnySampler);
+		radiance += gbuffer.m_diffuse * indirectDiffuse;
 	}
-	outColor += payload.m_diffuseColor * indirectDiffuse;
-
-	const Vec3 l = -dirLight.m_direction;
-	const F32 lambert = max(0.0, dot(l, payload.m_worldNormal));
-	const Vec3 diffC = diffuseLobe(payload.m_diffuseColor);
-	outColor += diffC * dirLight.m_diffuseColor * lambert * shadow;
 
-	g_colorAndPdfTex[realCoord] = Vec4(outColor, max(0.0, pdf));
+	g_colorAndPdfTex[realCoord] = Vec4(radiance, max(0.0, pdf));
 
 	// Move it with camera to avoid precision issues since it's stored in fp16
 	// Store depth in reverse for better precision
@@ -768,19 +702,6 @@ ANKI_FAST_CONSTANTS(Consts, g_consts)
 }
 #endif // ANKI_RAY_GEN_SHADER
 
-// ===========================================================================
-// Miss                                                                      =
-// ===========================================================================
-#if ANKI_MISS_SHADER
-[shader("miss")] void main(inout RtMaterialFetchRayPayload payload)
-{
-	payload.m_diffuseColor = 0.0;
-	payload.m_worldNormal = 0.0;
-	payload.m_emission = 0.0;
-	payload.m_rayT = -1.0;
-}
-#endif // ANKI_MISS_SHADER
-
 // ===========================================================================
 // SpatialDenoise                                                            =
 // ===========================================================================

+ 123 - 3
AnKi/Shaders/RtMaterialFetch.hlsl

@@ -8,8 +8,9 @@
 #include <AnKi/Shaders/Include/MiscRendererTypes.h>
 #include <AnKi/Shaders/Include/GpuSceneTypes.h>
 #include <AnKi/Shaders/Common.hlsl>
+#include <AnKi/Shaders/LightFunctions.hlsl>
 
-struct [raypayload] RtMaterialFetchRayPayload
+struct [raypayload] RtMaterialFetchRayPayload // TODO make it FP16 when you change the GBufferGeneric.ankiprog
 {
 	Vec3 m_diffuseColor : write(closesthit, miss): read(caller);
 	Vec3 m_worldNormal : write(closesthit, miss): read(caller);
@@ -18,24 +19,143 @@ struct [raypayload] RtMaterialFetchRayPayload
 	F32 m_textureLod : write(caller): read(closesthit);
 };
 
-// Have a common resouce interface for all shaders
+// Have a common resouce interface for all shaders. It should be compatible between all ray shaders in DX and VK
 #if ANKI_RAY_GEN_SHADER
 #	define SPACE space2
 
 ConstantBuffer<GlobalRendererConstants> g_globalRendererConstants : register(b0, SPACE);
 
 RaytracingAccelerationStructure g_tlas : register(t0, SPACE);
+#	if defined(CLIPMAP_VOLUME)
+Texture2D<Vec4> g_dummyTex1 : register(t1, SPACE);
+Texture2D<Vec4> g_dummyTex2 : register(t2, SPACE);
+Texture2D<Vec4> g_dummyTex3 : register(t3, SPACE);
+#	else
 Texture2D<Vec4> g_depthTex : register(t1, SPACE);
 Texture2D<Vec4> g_gbufferRt1 : register(t2, SPACE);
 Texture2D<Vec4> g_gbufferRt2 : register(t3, SPACE);
+#	endif
 Texture2D<Vec4> g_envMap : register(t4, SPACE);
+#	if defined(CLIPMAP_VOLUME)
+StructuredBuffer<U32> g_dummyBuff1 : register(t5, SPACE);
+StructuredBuffer<U32> g_dummyBuff2 : register(t6, SPACE);
+#	else
 StructuredBuffer<GpuSceneGlobalIlluminationProbe> g_giProbes : register(t5, SPACE);
 StructuredBuffer<PixelFailedSsr> g_pixelsFailedSsr : register(t6, SPACE);
+#	endif
 Texture2D<Vec4> g_shadowAtlasTex : register(t7, SPACE);
 
+#	if defined(CLIPMAP_VOLUME)
+RWTexture3D<Vec4> g_clipmapRedVolume : register(u0, SPACE);
+RWTexture3D<Vec4> g_clipmapGreenVolume : register(u1, SPACE);
+RWTexture3D<Vec4> g_clipmapBlueVolume : register(u2, SPACE);
+#	else
 RWTexture2D<Vec4> g_colorAndPdfTex : register(u0, SPACE);
 RWTexture2D<Vec4> g_hitPosAndDepthTex : register(u1, SPACE);
+#	endif
 
 SamplerState g_linearClampAnySampler : register(s0, SPACE);
 SamplerComparisonState g_shadowSampler : register(s1, SPACE);
-#endif
+
+template<typename T>
+struct GBufferLight
+{
+	vector<T, 3> m_diffuse;
+	vector<T, 3> m_worldNormal;
+	vector<T, 3> m_emission;
+};
+
+template<typename T>
+Bool materialRayTrace(Vec3 rayOrigin, Vec3 rayDir, F32 tMin, F32 tMax, T textureLod, out GBufferLight<T> gbuffer, out F32 rayT)
+{
+	RtMaterialFetchRayPayload payload;
+	payload.m_textureLod = textureLod;
+	const U32 flags = RAY_FLAG_FORCE_OPAQUE;
+	const U32 sbtRecordOffset = 0u;
+	const U32 sbtRecordStride = 0u;
+	const U32 missIndex = 0u;
+	const U32 cullMask = 0xFFu;
+	RayDesc ray;
+	ray.Origin = rayOrigin;
+	ray.TMin = tMin;
+	ray.Direction = rayDir;
+	ray.TMax = tMax;
+	TraceRay(g_tlas, flags, cullMask, sbtRecordOffset, sbtRecordStride, missIndex, ray, payload);
+
+	rayT = payload.m_rayT;
+	const Bool hasHitSky = payload.m_rayT < 0.0;
+	if(hasHitSky)
+	{
+		gbuffer = (GBufferLight<T>)0;
+
+		if(g_globalRendererConstants.m_sky.m_type == 0)
+		{
+			gbuffer.m_emission = g_globalRendererConstants.m_sky.m_solidColor;
+		}
+		else
+		{
+			const Vec2 uv = (g_globalRendererConstants.m_sky.m_type == 1) ? equirectangularMapping(rayDir) : octahedronEncode(rayDir);
+			gbuffer.m_emission = g_envMap.SampleLevel(g_linearClampAnySampler, uv, 0.0).xyz;
+		}
+	}
+	else
+	{
+		gbuffer.m_diffuse = payload.m_diffuseColor;
+		gbuffer.m_worldNormal = payload.m_worldNormal;
+		gbuffer.m_emission = payload.m_emission;
+	}
+
+	return !hasHitSky;
+}
+
+template<typename T>
+vector<T, 3> directLighting(GBufferLight<T> gbuffer, Vec3 hitPos, Bool isSky, Bool tryShadowmapFirst, F32 shadowTMax)
+{
+	vector<T, 3> color = gbuffer.m_emission;
+
+	if(!isSky)
+	{
+		const DirectionalLight dirLight = g_globalRendererConstants.m_directionalLight;
+
+		// Trace shadow
+		Vec4 vv4 = mul(g_globalRendererConstants.m_matrices.m_viewProjection, Vec4(hitPos, 1.0));
+		vv4.xy /= vv4.w;
+		const Bool bInsideFrustum = all(vv4.xy > -1.0) && all(vv4.xy < 1.0) && vv4.w > 0.0;
+
+		F32 shadow;
+		if(bInsideFrustum && tryShadowmapFirst)
+		{
+			const F32 negativeZViewSpace = -mul(g_globalRendererConstants.m_matrices.m_view, Vec4(hitPos, 1.0)).z;
+			const U32 shadowCascadeCount = dirLight.m_shadowCascadeCount_31bit_active_1bit >> 1u;
+
+			const U32 cascadeIdx = computeShadowCascadeIndex(negativeZViewSpace, dirLight.m_shadowCascadeDistances, shadowCascadeCount);
+
+			shadow = computeShadowFactorDirLight<F32>(dirLight, cascadeIdx, hitPos, g_shadowAtlasTex, g_shadowSampler);
+		}
+		else
+		{
+			constexpr U32 qFlags = RAY_FLAG_FORCE_OPAQUE | RAY_FLAG_SKIP_PROCEDURAL_PRIMITIVES | RAY_FLAG_ACCEPT_FIRST_HIT_AND_END_SEARCH;
+			RayQuery<qFlags> q;
+			const U32 flags = RAY_FLAG_FORCE_OPAQUE;
+			const U32 cullMask = 0xFFu;
+			RayDesc ray;
+			ray.Origin = hitPos;
+			ray.TMin = 0.01;
+			ray.Direction = -dirLight.m_direction;
+			ray.TMax = shadowTMax;
+			q.TraceRayInline(g_tlas, qFlags, cullMask, ray);
+			q.Proceed();
+			shadow = (q.CommittedStatus() == COMMITTED_TRIANGLE_HIT) ? 0.0 : 1.0;
+		}
+
+		// Do simple light shading
+
+		const vector<T, 3> l = -dirLight.m_direction;
+		const T lambert = max(T(0), dot(l, gbuffer.m_worldNormal));
+		const vector<T, 3> diffC = diffuseLobe(gbuffer.m_diffuse);
+		color += diffC * dirLight.m_diffuseColor * lambert * shadow;
+	}
+
+	return color;
+}
+#endif // ANKI_RAY_GEN_SHADER

+ 137 - 2
AnKi/Shaders/SH.hlsl

@@ -12,6 +12,9 @@
 constexpr U32 kSHL1CoefficientCount = (1 + 1) * (1 + 1);
 constexpr F32 kSHBasisL0 = 1.0 / (2.0 * sqrt(kPi));
 constexpr F32 kSHBasisL1 = sqrt(3.0) / (2.0 * sqrt(kPi));
+constexpr F32 kSHCosineA0 = kPi;
+constexpr F32 kSHCosineA1 = (2.0 * kPi) / 3.0;
+constexpr F32 kSHCosineA2 = (0.25 * kPi);
 
 /// 2 bands, 4 coefficients per color component.
 template<typename T>
@@ -21,7 +24,7 @@ struct SHL1
 };
 
 template<typename T>
-SHL1<T> appendSH(SHL1<T> inputSH, vector<T, 3> direction, vector<T, 3> value, U32 sampleCount)
+SHL1<T> projectOntoL1(vector<T, 3> direction, vector<T, 3> value)
 {
 	SHL1<T> res;
 
@@ -33,7 +36,16 @@ SHL1<T> appendSH(SHL1<T> inputSH, vector<T, 3> direction, vector<T, 3> value, U3
 	res.m_c[2] = T(kSHBasisL1) * direction.z * value;
 	res.m_c[3] = T(kSHBasisL1) * direction.x * value;
 
-	const T weight = T(1) / T(sampleCount);
+	return res;
+}
+
+template<typename T>
+SHL1<T> appendSH(SHL1<T> inputSH, vector<T, 3> direction, vector<T, 3> radiance, U32 sampleCount)
+{
+	const SHL1<T> res = projectOntoL1<T>(direction, radiance);
+	const T spherePDF = T(1) / T(kPi * 4.0);
+	const T weight = T(1) / (T(sampleCount) * spherePDF);
+
 	[unroll] for(U32 i = 0; i < kSHL1CoefficientCount; ++i)
 	{
 		inputSH.m_c[i] += res.m_c[i] * weight;
@@ -41,3 +53,126 @@ SHL1<T> appendSH(SHL1<T> inputSH, vector<T, 3> direction, vector<T, 3> value, U3
 
 	return inputSH;
 }
+
+template<typename T>
+vector<T, 3> evaluateSH(SHL1<T> sh, vector<T, 3> direction)
+{
+	vector<T, 3> res = T(0);
+
+	if(true)
+	{
+		// L0
+		res += sh.m_c[0];
+
+		// L1
+		res += sh.m_c[1] * direction.y;
+		res += sh.m_c[2] * direction.z;
+		res += sh.m_c[3] * direction.x;
+	}
+	else
+	{
+		SHL1<T> convolved;
+		convolved.m_c[0] = sh.m_c[0] * kSHCosineA0;
+		convolved.m_c[1] = sh.m_c[1] * kSHCosineA1;
+		convolved.m_c[2] = sh.m_c[2] * kSHCosineA1;
+		convolved.m_c[3] = sh.m_c[3] * kSHCosineA1;
+
+		const SHL1<T> projectedDelta = projectOntoL1<T>(direction, T(1));
+
+		[unroll] for(U32 i = 0; i < kSHL1CoefficientCount; ++i)
+		{
+			res += convolved.m_c[i] * projectedDelta.m_c[i];
+		}
+	}
+
+	return res;
+}
+
+template<typename T>
+SHL1<T> lerpSH(SHL1<T> a, SHL1<T> b, T f)
+{
+	[unroll] for(U32 i = 0; i < kSHL1CoefficientCount; ++i)
+	{
+		a.m_c[i] = lerp(a.m_c[i], b.m_c[i], f);
+	}
+
+	return a;
+}
+
+template<typename T>
+SHL1<T> loadSH(Texture3D<Vec4> red, Texture3D<Vec4> green, Texture3D<Vec4> blue, SamplerState sampler, Vec3 uvw)
+{
+	vector<T, 4> colorComp[3];
+	colorComp[0] = red.SampleLevel(sampler, uvw, 0.0);
+	colorComp[1] = green.SampleLevel(sampler, uvw, 0.0);
+	colorComp[2] = blue.SampleLevel(sampler, uvw, 0.0);
+
+	SHL1<T> sh;
+	[unroll] for(U32 comp = 0; comp < 3; ++comp)
+	{
+		[unroll] for(U32 i = 0; i < kSHL1CoefficientCount; ++i)
+		{
+			sh.m_c[i][comp] = colorComp[comp][i];
+		}
+	}
+
+	return sh;
+}
+
+template<typename T>
+SHL1<T> loadSH(Texture3D<Vec4> red, Texture3D<Vec4> green, Texture3D<Vec4> blue, UVec3 coords)
+{
+	vector<T, 4> colorComp[3];
+	colorComp[0] = red[coords];
+	colorComp[1] = green[coords];
+	colorComp[2] = blue[coords];
+
+	SHL1<T> sh;
+	[unroll] for(U32 comp = 0; comp < 3; ++comp)
+	{
+		[unroll] for(U32 i = 0; i < kSHL1CoefficientCount; ++i)
+		{
+			sh.m_c[i][comp] = colorComp[comp][i];
+		}
+	}
+
+	return sh;
+}
+
+template<typename T>
+SHL1<T> loadSH(RWTexture3D<Vec4> red, RWTexture3D<Vec4> green, RWTexture3D<Vec4> blue, UVec3 coords)
+{
+	vector<T, 4> colorComp[3];
+	colorComp[0] = red[coords];
+	colorComp[1] = green[coords];
+	colorComp[2] = blue[coords];
+
+	SHL1<T> sh;
+	[unroll] for(U32 comp = 0; comp < 3; ++comp)
+	{
+		[unroll] for(U32 i = 0; i < kSHL1CoefficientCount; ++i)
+		{
+			sh.m_c[i][comp] = colorComp[comp][i];
+		}
+	}
+
+	return sh;
+}
+
+template<typename T>
+void storeSH(SHL1<T> sh, RWTexture3D<Vec4> red, RWTexture3D<Vec4> green, RWTexture3D<Vec4> blue, UVec3 coords)
+{
+	vector<T, 4> colorComp[3];
+
+	[unroll] for(U32 comp = 0; comp < 3; ++comp)
+	{
+		[unroll] for(U32 i = 0; i < kSHL1CoefficientCount; ++i)
+		{
+			colorComp[comp][i] = sh.m_c[i][comp];
+		}
+	}
+
+	red[coords] = colorComp[0];
+	green[coords] = colorComp[1];
+	blue[coords] = colorComp[2];
+}

+ 30 - 29
AnKi/Util/Thread.h

@@ -576,52 +576,53 @@ private:
 	TMutex* m_mtx;
 };
 
-/// Read/write lock guard. When constructed it locks a TMutex and unlocks it when it gets destroyed.
-/// @tparam TMutex Can be RWMutex.
-template<typename TMutex, Bool READER>
-class RWLockGuard
+/// Read lock guard. When constructed it locks a TMutex and unlocks it when it gets destroyed.
+template<typename TMutex>
+class RLockGuard
 {
 public:
-	RWLockGuard(TMutex& mtx)
+	RLockGuard(TMutex& mtx)
 		: m_mtx(&mtx)
 	{
-		if(READER)
-		{
-			m_mtx->lockRead();
-		}
-		else
-		{
-			m_mtx->lockWrite();
-		}
+		m_mtx->lockRead();
 	}
 
-	RWLockGuard(const RWLockGuard& b) = delete;
+	RLockGuard(const RLockGuard& b) = delete;
 
-	~RWLockGuard()
+	~RLockGuard()
 	{
-		if(READER)
-		{
-			m_mtx->unlockRead();
-		}
-		else
-		{
-			m_mtx->unlockWrite();
-		}
+		m_mtx->unlockRead();
 	}
 
-	RWLockGuard& operator=(const RWLockGuard& b) = delete;
+	RLockGuard& operator=(const RLockGuard& b) = delete;
 
 private:
 	TMutex* m_mtx;
 };
 
-/// Read lock guard.
+/// Write lock guard. When constructed it locks a TMutex and unlocks it when it gets destroyed.
 template<typename TMutex>
-using RLockGuard = RWLockGuard<TMutex, true>;
+class WLockGuard
+{
+public:
+	WLockGuard(TMutex& mtx)
+		: m_mtx(&mtx)
+	{
+		m_mtx->lockWrite();
+	}
 
-/// Write lock guard.
-template<typename TMutex>
-using WLockGuard = RWLockGuard<TMutex, false>;
+	WLockGuard(const WLockGuard& b) = delete;
+
+	~WLockGuard()
+	{
+		m_mtx->unlockWrite();
+	}
+
+	WLockGuard& operator=(const WLockGuard& b) = delete;
+
+private:
+	TMutex* m_mtx;
+};
 /// @}
 
 } // end namespace anki