Browse Source

Refactor RT shadows (not working right now)

Panagiotis Christopoulos Charitos 2 years ago
parent
commit
863e2b88ca
54 changed files with 764 additions and 1241 deletions
  1. 1 2
      AnKi/Gr/CommandBuffer.h
  2. 7 5
      AnKi/Gr/ShaderProgram.h
  3. 6 1
      AnKi/Gr/Vulkan/ShaderProgram.cpp
  4. 11 0
      AnKi/Gr/Vulkan/ShaderProgramImpl.cpp
  5. 7 1
      AnKi/Gr/Vulkan/ShaderProgramImpl.h
  6. 0 1
      AnKi/Renderer.h
  7. 18 14
      AnKi/Renderer/AccelerationStructureBuilder.cpp
  8. 9 0
      AnKi/Renderer/AccelerationStructureBuilder.h
  9. 2 2
      AnKi/Renderer/ForwardShading.cpp
  10. 1 1
      AnKi/Renderer/ForwardShading.h
  11. 1 1
      AnKi/Renderer/GBufferPost.cpp
  12. 0 55
      AnKi/Renderer/GenericCompute.cpp
  13. 0 33
      AnKi/Renderer/GenericCompute.h
  14. 51 65
      AnKi/Renderer/LensFlare.cpp
  15. 1 5
      AnKi/Renderer/LensFlare.h
  16. 12 14
      AnKi/Renderer/LightShading.cpp
  17. 1 1
      AnKi/Renderer/LightShading.h
  18. 14 31
      AnKi/Renderer/Renderer.cpp
  19. 1 0
      AnKi/Renderer/Renderer.h
  20. 0 1
      AnKi/Renderer/RendererObject.def.h
  21. 220 386
      AnKi/Renderer/RtShadows.cpp
  22. 7 31
      AnKi/Renderer/RtShadows.h
  23. 22 1
      AnKi/Renderer/ShadowmapsResolve.cpp
  24. 5 35
      AnKi/Renderer/Utils/GpuVisibility.cpp
  25. 0 2
      AnKi/Renderer/Utils/GpuVisibility.h
  26. 8 6
      AnKi/Renderer/VolumetricFog.cpp
  27. 57 53
      AnKi/Renderer/VolumetricLightingAccumulation.cpp
  28. 0 2
      AnKi/Renderer/VolumetricLightingAccumulation.h
  29. 0 1
      AnKi/Scene/Components/CameraComponent.cpp
  30. 0 3
      AnKi/Scene/Components/CameraComponent.h
  31. 0 6
      AnKi/Scene/Components/DecalComponent.cpp
  32. 0 16
      AnKi/Scene/Components/DecalComponent.h
  33. 0 16
      AnKi/Scene/Components/FogDensityComponent.cpp
  34. 0 19
      AnKi/Scene/Components/FogDensityComponent.h
  35. 0 13
      AnKi/Scene/Components/LensFlareComponent.cpp
  36. 7 9
      AnKi/Scene/Components/LensFlareComponent.h
  37. 23 6
      AnKi/Scene/Components/ModelComponent.cpp
  38. 1 0
      AnKi/Scene/Components/ModelComponent.h
  39. 2 0
      AnKi/Scene/GpuSceneArrays.def.h
  40. 0 27
      AnKi/Scene/Visibility.cpp
  41. 1 0
      AnKi/Shaders/Include/GpuSceneTypes.h
  42. 12 11
      AnKi/Shaders/Include/MiscRendererTypes.h
  43. 9 0
      AnKi/Shaders/Intellisense.hlsl
  44. 0 23
      AnKi/Shaders/LightShading.ankiprog
  45. 1 30
      AnKi/Shaders/RtShadows.hlsl
  46. 25 36
      AnKi/Shaders/RtShadowsDenoise.ankiprog
  47. 48 121
      AnKi/Shaders/RtShadowsRayGen.ankiprog
  48. 51 0
      AnKi/Shaders/RtShadowsSbtBuild.ankiprog
  49. 28 0
      AnKi/Shaders/RtShadowsSetupSbtBuild.ankiprog
  50. 28 47
      AnKi/Shaders/RtShadowsSvgfAtrous.ankiprog
  51. 28 39
      AnKi/Shaders/RtShadowsSvgfVariance.ankiprog
  52. 17 26
      AnKi/Shaders/RtShadowsUpscale.ankiprog
  53. 0 32
      AnKi/Shaders/RtShadowsVisualizeRenderTarget.ankiprog
  54. 21 11
      AnKi/Shaders/ShadowmapsResolve.hlsl

+ 1 - 2
AnKi/Gr/CommandBuffer.h

@@ -330,8 +330,7 @@ public:
 	/// @code HG = HG_offset + (HG_stride * (R_offset + R_stride * G_id + I_offset)) @endcode
 	/// The HG_offset is equal to sbtBufferOffset + GpuDeviceCapabilities::m_sbtRecordSize * (missShaderCount + 1).
 	/// The HG_stride is equal GpuDeviceCapabilities::m_sbtRecordSize * rayTypecount.
-	/// The R_offset and R_stride are provided in traceRayEXT. The R_offset is the "ray type" and R_stride the number of
-	/// ray types.
+	/// The R_offset and R_stride are provided in traceRayEXT. The R_offset is the "ray type" and R_stride the number of ray types.
 	/// The G_id is always 0 ATM.
 	/// The I_offset is the AccelerationStructureInstance::m_hitgroupSbtRecordIndex.
 	///

+ 7 - 5
AnKi/Gr/ShaderProgram.h

@@ -60,17 +60,19 @@ class ShaderProgram : public GrObject
 public:
 	static constexpr GrObjectType kClassType = GrObjectType::kShaderProgram;
 
-	/// Get the shader group handles that will be used in the SBTs. The size of each handle is
-	/// GpuDeviceCapabilities::m_shaderGroupHandleSize. To access a handle use:
+	/// Get the shader group handles that will be used in the SBTs. The size of each handle is GpuDeviceCapabilities::m_shaderGroupHandleSize. To
+	/// access a handle use:
 	/// @code
 	/// const U8* handleBegin = &getShaderGroupHandles()[handleIdx * devCapabilities.m_shaderGroupHandleSize];
 	/// const U8* handleEnd = &getShaderGroupHandles()[(handleIdx + 1) * devCapabilities.m_shaderGroupHandleSize];
 	/// @endcode
-	/// The handleIdx is defined via a convention. The ray gen shaders appear first where handleIdx is in the same order
-	/// as the shader in RayTracingShaders::m_rayGenShaders. Then miss shaders follow with a similar rule. Then hit
-	/// groups follow.
+	/// The handleIdx is defined via a convention. The ray gen shaders appear first where handleIdx is in the same order as the shader in
+	/// RayTracingShaders::m_rayGenShaders. Then miss shaders follow with a similar rule. Then hit groups follow.
 	ConstWeakArray<U8> getShaderGroupHandles() const;
 
+	/// Same as getShaderGroupHandles but the data live in a GPU buffer.
+	Buffer& getShaderGroupHandlesGpuBuffer() const;
+
 protected:
 	/// Construct.
 	ShaderProgram(CString name)

+ 6 - 1
AnKi/Gr/Vulkan/ShaderProgram.cpp

@@ -24,7 +24,12 @@ ShaderProgram* ShaderProgram::newInstance(const ShaderProgramInitInfo& init)
 
 ConstWeakArray<U8> ShaderProgram::getShaderGroupHandles() const
 {
-	return static_cast<const ShaderProgramImpl&>(*this).getShaderGroupHandles();
+	return static_cast<const ShaderProgramImpl&>(*this).getShaderGroupHandlesInternal();
+}
+
+Buffer& ShaderProgram::getShaderGroupHandlesGpuBuffer() const
+{
+	return static_cast<const ShaderProgramImpl&>(*this).getShaderGroupHandlesGpuBufferInternal();
 }
 
 } // end namespace anki

+ 11 - 0
AnKi/Gr/Vulkan/ShaderProgramImpl.cpp

@@ -336,6 +336,17 @@ Error ShaderProgramImpl::init(const ShaderProgramInitInfo& inf)
 		const U32 handleArraySize = getGrManagerImpl().getPhysicalDeviceRayTracingProperties().shaderGroupHandleSize * groupCount;
 		m_rt.m_allHandles.resize(handleArraySize, 0_U8);
 		ANKI_VK_CHECK(vkGetRayTracingShaderGroupHandlesKHR(getVkDevice(), m_rt.m_ppline, 0, groupCount, handleArraySize, &m_rt.m_allHandles[0]));
+
+		// Upload RT handles
+		BufferInitInfo buffInit("RT handles");
+		buffInit.m_size = m_rt.m_allHandles.getSizeInBytes();
+		buffInit.m_mapAccess = BufferMapAccessBit::kWrite;
+		buffInit.m_usage = BufferUsageBit::kAllCompute & BufferUsageBit::kAllRead;
+		m_rt.m_allHandlesBuff = getGrManagerImpl().newBuffer(buffInit);
+
+		void* mapped = m_rt.m_allHandlesBuff->map(0, kMaxPtrSize, BufferMapAccessBit::kWrite);
+		memcpy(mapped, m_rt.m_allHandles.getBegin(), m_rt.m_allHandles.getSizeInBytes());
+		m_rt.m_allHandlesBuff->unmap();
 	}
 
 	return Error::kNone;

+ 7 - 1
AnKi/Gr/Vulkan/ShaderProgramImpl.h

@@ -98,12 +98,17 @@ public:
 		return m_rt.m_missShaderCount;
 	}
 
-	ConstWeakArray<U8> getShaderGroupHandles() const
+	ConstWeakArray<U8> getShaderGroupHandlesInternal() const
 	{
 		ANKI_ASSERT(m_rt.m_allHandles.getSize() > 0);
 		return m_rt.m_allHandles;
 	}
 
+	Buffer& getShaderGroupHandlesGpuBufferInternal() const
+	{
+		return *m_rt.m_allHandlesBuff;
+	}
+
 private:
 	GrDynamicArray<ShaderPtr> m_shaders;
 	ShaderTypeBit m_stages = ShaderTypeBit::kNone;
@@ -133,6 +138,7 @@ private:
 		VkPipeline m_ppline = VK_NULL_HANDLE;
 		GrDynamicArray<U8> m_allHandles;
 		U32 m_missShaderCount = 0;
+		BufferPtr m_allHandlesBuff;
 	} m_rt;
 };
 /// @}

+ 0 - 1
AnKi/Renderer.h

@@ -29,7 +29,6 @@
 #include <AnKi/Renderer/Bloom.h>
 #include <AnKi/Renderer/VolumetricLightingAccumulation.h>
 #include <AnKi/Renderer/IndirectDiffuseProbes.h>
-#include <AnKi/Renderer/GenericCompute.h>
 #include <AnKi/Renderer/ShadowmapsResolve.h>
 #include <AnKi/Renderer/IndirectDiffuse.h>
 

+ 18 - 14
AnKi/Renderer/AccelerationStructureBuilder.cpp

@@ -27,7 +27,6 @@ void AccelerationStructureBuilder::populateRenderGraph(RenderingContext& ctx)
 
 		GpuVisibilityAccelerationStructuresInput in;
 		in.m_passesName = "Main TLAS visiblity";
-		in.m_technique = RenderingTechnique::kGBuffer;
 		in.m_lodReferencePoint = ctx.m_matrices.m_cameraTransform.getTranslationPart().xyz();
 		in.m_lodDistances = lodDistances;
 		in.m_pointOfTest = in.m_lodReferencePoint;
@@ -36,32 +35,37 @@ void AccelerationStructureBuilder::populateRenderGraph(RenderingContext& ctx)
 		in.m_rgraph = &ctx.m_renderGraphDescr;
 
 		getRenderer().getGpuVisibilityAccelerationStructures().pupulateRenderGraph(in, visOut);
+
+		m_runCtx.m_visibilityHandle = visOut.m_someBufferHandle;
+		m_runCtx.m_visibleRenderableIndicesBuff = visOut.m_renderableIndicesBuffer;
 	}
 
 	// Create the TLAS
 	AccelerationStructureInitInfo initInf("Main TLAS");
 	initInf.m_type = AccelerationStructureType::kTopLevel;
-	initInf.m_topLevel.m_indirectArgs.m_maxInstanceCount = GpuSceneArrays::RenderableAabbGBuffer::getSingleton().getElementCount();
+	initInf.m_topLevel.m_indirectArgs.m_maxInstanceCount = GpuSceneArrays::RenderableAabbRt::getSingleton().getElementCount();
 	initInf.m_topLevel.m_indirectArgs.m_instancesBuffer = visOut.m_instancesBuffer.m_buffer;
 	initInf.m_topLevel.m_indirectArgs.m_instancesBufferOffset = visOut.m_instancesBuffer.m_offset;
 	m_runCtx.m_tlas = GrManager::getSingleton().newAccelerationStructure(initInf);
 
-	// Build the job
-	RenderGraphDescription& rgraph = ctx.m_renderGraphDescr;
+	// Build the AS
+	{
+		RenderGraphDescription& rgraph = ctx.m_renderGraphDescr;
 
-	const BufferOffsetRange scratchBuff = GpuVisibleTransientMemoryPool::getSingleton().allocate(m_runCtx.m_tlas->getBuildScratchBufferSize());
+		const BufferOffsetRange scratchBuff = GpuVisibleTransientMemoryPool::getSingleton().allocate(m_runCtx.m_tlas->getBuildScratchBufferSize());
 
-	m_runCtx.m_tlasHandle = rgraph.importAccelerationStructure(m_runCtx.m_tlas.get(), AccelerationStructureUsageBit::kNone);
+		m_runCtx.m_tlasHandle = rgraph.importAccelerationStructure(m_runCtx.m_tlas.get(), AccelerationStructureUsageBit::kNone);
 
-	ComputeRenderPassDescription& rpass = rgraph.newComputeRenderPass("Build TLAS");
-	rpass.newAccelerationStructureDependency(m_runCtx.m_tlasHandle, AccelerationStructureUsageBit::kBuild);
-	rpass.newBufferDependency(visOut.m_someBufferHandle, BufferUsageBit::kAccelerationStructureBuild);
+		ComputeRenderPassDescription& rpass = rgraph.newComputeRenderPass("Build TLAS");
+		rpass.newAccelerationStructureDependency(m_runCtx.m_tlasHandle, AccelerationStructureUsageBit::kBuild);
+		rpass.newBufferDependency(visOut.m_someBufferHandle, BufferUsageBit::kAccelerationStructureBuild);
 
-	rpass.setWork([this, scratchBuff, rangeBuff = visOut.m_rangeBuffer](RenderPassWorkContext& rgraphCtx) {
-		ANKI_TRACE_SCOPED_EVENT(RTlas);
-		rgraphCtx.m_commandBuffer->buildAccelerationStructureIndirect(m_runCtx.m_tlas.get(), scratchBuff.m_buffer, scratchBuff.m_offset,
-																	  rangeBuff.m_buffer, rangeBuff.m_offset);
-	});
+		rpass.setWork([this, scratchBuff, rangeBuff = visOut.m_rangeBuffer](RenderPassWorkContext& rgraphCtx) {
+			ANKI_TRACE_SCOPED_EVENT(RTlas);
+			rgraphCtx.m_commandBuffer->buildAccelerationStructureIndirect(m_runCtx.m_tlas.get(), scratchBuff.m_buffer, scratchBuff.m_offset,
+																		  rangeBuff.m_buffer, rangeBuff.m_offset);
+		});
+	}
 }
 
 } // end namespace anki

+ 9 - 0
AnKi/Renderer/AccelerationStructureBuilder.h

@@ -28,12 +28,21 @@ public:
 		return m_runCtx.m_tlasHandle;
 	}
 
+	void getVisibilityInfo(BufferHandle& handle, BufferOffsetRange& buffer) const
+	{
+		handle = m_runCtx.m_visibilityHandle;
+		buffer = m_runCtx.m_visibleRenderableIndicesBuff;
+	}
+
 public:
 	class
 	{
 	public:
 		AccelerationStructurePtr m_tlas;
 		AccelerationStructureHandle m_tlasHandle;
+
+		BufferHandle m_visibilityHandle;
+		BufferOffsetRange m_visibleRenderableIndicesBuff;
 	} m_runCtx;
 };
 /// @}

+ 2 - 2
AnKi/Renderer/ForwardShading.cpp

@@ -85,12 +85,12 @@ void ForwardShading::run(const RenderingContext& ctx, RenderPassWorkContext& rgr
 	getRenderer().getLensFlare().runDrawFlares(ctx, cmdb);
 }
 
-void ForwardShading::setDependencies(const RenderingContext& ctx, GraphicsRenderPassDescription& pass)
+void ForwardShading::setDependencies(GraphicsRenderPassDescription& pass)
 {
 	pass.newTextureDependency(getRenderer().getDepthDownscale().getHiZRt(), TextureUsageBit::kSampledFragment, kHiZHalfSurface);
 	pass.newTextureDependency(getRenderer().getVolumetricLightingAccumulation().getRt(), TextureUsageBit::kSampledFragment);
 
-	if(ctx.m_renderQueue->m_lensFlares.getSize())
+	if(getRenderer().getLensFlare().getIndirectDrawBuffer().isValid())
 	{
 		pass.newBufferDependency(getRenderer().getLensFlare().getIndirectDrawBuffer(), BufferUsageBit::kIndirectDraw);
 	}

+ 1 - 1
AnKi/Renderer/ForwardShading.h

@@ -28,7 +28,7 @@ public:
 
 	void populateRenderGraph(RenderingContext& ctx);
 
-	void setDependencies(const RenderingContext& ctx, GraphicsRenderPassDescription& pass);
+	void setDependencies(GraphicsRenderPassDescription& pass);
 
 	void run(const RenderingContext& ctx, RenderPassWorkContext& rgraphCtx);
 

+ 1 - 1
AnKi/Renderer/GBufferPost.cpp

@@ -48,7 +48,7 @@ Error GBufferPost::initInternal()
 
 void GBufferPost::populateRenderGraph(RenderingContext& ctx)
 {
-	if(ctx.m_renderQueue->m_decals.getSize() == 0)
+	if(GpuSceneArrays::Decal::getSingleton().getElementCount() == 0)
 	{
 		// If there are no decals don't bother
 		return;

+ 0 - 55
AnKi/Renderer/GenericCompute.cpp

@@ -1,55 +0,0 @@
-// Copyright (C) 2009-2023, Panagiotis Christopoulos Charitos and contributors.
-// All rights reserved.
-// Code licensed under the BSD License.
-// http://www.anki3d.org/LICENSE
-
-#include <AnKi/Renderer/GenericCompute.h>
-#include <AnKi/Renderer/Renderer.h>
-#include <AnKi/Renderer/DepthDownscale.h>
-#include <AnKi/Renderer/RenderQueue.h>
-
-namespace anki {
-
-void GenericCompute::populateRenderGraph(RenderingContext& ctx)
-{
-	if(ctx.m_renderQueue->m_genericGpuComputeJobs.getSize() == 0)
-	{
-		return;
-	}
-
-	ComputeRenderPassDescription& pass = ctx.m_renderGraphDescr.newComputeRenderPass("Generic compute");
-
-	pass.setWork([this, &ctx](RenderPassWorkContext& rgraphCtx) {
-		run(ctx, rgraphCtx);
-	});
-
-	pass.newTextureDependency(getRenderer().getDepthDownscale().getHiZRt(), TextureUsageBit::kSampledCompute);
-}
-
-void GenericCompute::run([[maybe_unused]] const RenderingContext& ctx, [[maybe_unused]] RenderPassWorkContext& rgraphCtx)
-{
-	ANKI_ASSERT(!"TODO");
-#if 0
-	ANKI_ASSERT(ctx.m_renderQueue->m_genericGpuComputeJobs.getSize() > 0);
-
-	GenericGpuComputeJobQueueElementContext elementCtx;
-	elementCtx.m_commandBuffer = rgraphCtx.m_commandBuffer;
-	elementCtx.m_rebarStagingPool = &RebarTransientMemoryPool::getSingleton();
-	elementCtx.m_viewMatrix = ctx.m_matrices.m_view;
-	elementCtx.m_viewProjectionMatrix = ctx.m_matrices.m_viewProjection;
-	elementCtx.m_projectionMatrix = ctx.m_matrices.m_projection;
-	elementCtx.m_previousViewProjectionMatrix = ctx.m_prevMatrices.m_viewProjection;
-	elementCtx.m_cameraTransform = ctx.m_matrices.m_cameraTransform;
-
-	// Bind some state
-	rgraphCtx.bindColorTexture(0, 0, getRenderer().getDepthDownscale().getHiZRt());
-
-	for(const GenericGpuComputeJobQueueElement& element : ctx.m_renderQueue->m_genericGpuComputeJobs)
-	{
-		ANKI_ASSERT(element.m_callback);
-		element.m_callback(elementCtx, element.m_userData);
-	}
-#endif
-}
-
-} // end namespace anki

+ 0 - 33
AnKi/Renderer/GenericCompute.h

@@ -1,33 +0,0 @@
-// Copyright (C) 2009-2023, Panagiotis Christopoulos Charitos and contributors.
-// All rights reserved.
-// Code licensed under the BSD License.
-// http://www.anki3d.org/LICENSE
-
-#pragma once
-
-#include <AnKi/Renderer/RendererObject.h>
-
-namespace anki {
-
-/// @addtogroup renderer
-/// @{
-
-/// Executes various compute jobs required by the render queue. It's guaranteed to run before light shading and nothing
-/// more. It can access the previous frame's depth buffer.
-class GenericCompute : public RendererObject
-{
-public:
-	Error init()
-	{
-		return Error::kNone;
-	}
-
-	/// Populate the rendergraph.
-	void populateRenderGraph(RenderingContext& ctx);
-
-private:
-	void run(const RenderingContext& ctx, RenderPassWorkContext& rgraphCtx);
-};
-/// @}
-
-} // end namespace anki

+ 51 - 65
AnKi/Renderer/LensFlare.cpp

@@ -8,7 +8,9 @@
 #include <AnKi/Renderer/RenderQueue.h>
 #include <AnKi/Renderer/Renderer.h>
 #include <AnKi/Core/CVarSet.h>
+#include <AnKi/Core/GpuMemory/GpuVisibleTransientMemoryPool.h>
 #include <AnKi/Util/Functions.h>
+#include <AnKi/Scene/Components/LensFlareComponent.h>
 
 namespace anki {
 
@@ -40,17 +42,6 @@ Error LensFlare::initInternal()
 Error LensFlare::initSprite()
 {
 	m_maxSpritesPerFlare = g_lensFlareMaxSpritesPerFlareCVar.get();
-	m_maxFlares = g_lensFlareMaxFlaresCVar.get();
-
-	if(m_maxSpritesPerFlare < 1 || m_maxFlares < 1)
-	{
-		ANKI_R_LOGE("Incorrect m_maxSpritesPerFlare or m_maxFlares");
-		return Error::kUserData;
-	}
-
-	m_maxSprites = U16(m_maxSpritesPerFlare * m_maxFlares);
-
-	// Load prog
 	ANKI_CHECK(loadShaderProgram("ShaderBinaries/LensFlareSprite.ankiprogbin", m_realProg, m_realGrProg));
 
 	return Error::kNone;
@@ -58,10 +49,6 @@ Error LensFlare::initSprite()
 
 Error LensFlare::initOcclusion()
 {
-	m_indirectBuff = GrManager::getSingleton().newBuffer(BufferInitInfo(m_maxFlares * sizeof(DrawIndirectArgs),
-																		BufferUsageBit::kIndirectDraw | BufferUsageBit::kStorageComputeWrite,
-																		BufferMapAccessBit::kNone, "LensFlares"));
-
 	ANKI_CHECK(ResourceManager::getSingleton().loadResource("ShaderBinaries/LensFlareUpdateIndirectInfo.ankiprogbin", m_updateIndirectBuffProg));
 
 	ShaderProgramResourceVariantInitInfo variantInitInfo(m_updateIndirectBuffProg);
@@ -74,75 +61,72 @@ Error LensFlare::initOcclusion()
 	return Error::kNone;
 }
 
-void LensFlare::updateIndirectInfo(const RenderingContext& ctx, RenderPassWorkContext& rgraphCtx)
-{
-	CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
-	U32 count = min<U32>(ctx.m_renderQueue->m_lensFlares.getSize(), m_maxFlares);
-	ANKI_ASSERT(count > 0);
-
-	cmdb.bindShaderProgram(m_updateIndirectBuffGrProg.get());
-
-	cmdb.setPushConstants(&ctx.m_matrices.m_viewProjectionJitter, sizeof(ctx.m_matrices.m_viewProjectionJitter));
-
-	// Write flare info
-	Vec4* flarePositions = allocateAndBindStorage<Vec4>(cmdb, 0, 0, count);
-	for(U32 i = 0; i < count; ++i)
-	{
-		*flarePositions = Vec4(ctx.m_renderQueue->m_lensFlares[i].m_worldPosition, 1.0f);
-		++flarePositions;
-	}
-
-	rgraphCtx.bindStorageBuffer(0, 1, m_runCtx.m_indirectBuffHandle);
-	// Bind neareset because you don't need high quality
-	cmdb.bindSampler(0, 2, getRenderer().getSamplers().m_nearestNearestClamp.get());
-	rgraphCtx.bindTexture(0, 3, getRenderer().getDepthDownscale().getHiZRt(), kHiZQuarterSurface);
-	cmdb.dispatchCompute(count, 1, 1);
-}
-
 void LensFlare::populateRenderGraph(RenderingContext& ctx)
 {
-	if(ctx.m_renderQueue->m_lensFlares.getSize() == 0)
+	const U32 flareCount = SceneGraph::getSingleton().getComponentArrays().getLensFlares().getSize();
+	if(flareCount == 0)
 	{
+		m_runCtx = {};
 		return;
 	}
 
 	RenderGraphDescription& rgraph = ctx.m_renderGraphDescr;
 
-	// Import buffer
-	m_runCtx.m_indirectBuffHandle = rgraph.importBuffer(m_indirectBuff.get(), BufferUsageBit::kNone);
+	// Create indirect buffer
+	m_runCtx.m_indirectBuff = GpuVisibleTransientMemoryPool::getSingleton().allocate(sizeof(DrawIndirectArgs) * flareCount);
+	m_runCtx.m_indirectBuffHandle = rgraph.importBuffer(BufferUsageBit::kNone, m_runCtx.m_indirectBuff);
 
-	// Update the indirect buffer
-	{
-		ComputeRenderPassDescription& rpass = rgraph.newComputeRenderPass("LF Upd Ind/ct");
+	// Create the pass
+	ComputeRenderPassDescription& rpass = rgraph.newComputeRenderPass("Lens flare indirect");
 
-		rpass.setWork([this, &ctx](RenderPassWorkContext& rgraphCtx) {
-			updateIndirectInfo(ctx, rgraphCtx);
-		});
+	rpass.newBufferDependency(m_runCtx.m_indirectBuffHandle, BufferUsageBit::kStorageComputeWrite);
+	rpass.newTextureDependency(getRenderer().getDepthDownscale().getHiZRt(), TextureUsageBit::kSampledCompute, kHiZQuarterSurface);
 
-		rpass.newBufferDependency(m_runCtx.m_indirectBuffHandle, BufferUsageBit::kStorageComputeWrite);
-		rpass.newTextureDependency(getRenderer().getDepthDownscale().getHiZRt(), TextureUsageBit::kSampledCompute, kHiZQuarterSurface);
-	}
+	rpass.setWork([this, &ctx](RenderPassWorkContext& rgraphCtx) {
+		CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
+
+		const U32 flareCount = SceneGraph::getSingleton().getComponentArrays().getLensFlares().getSize();
+		ANKI_ASSERT(flareCount > 0);
+
+		cmdb.bindShaderProgram(m_updateIndirectBuffGrProg.get());
+
+		cmdb.setPushConstants(&ctx.m_matrices.m_viewProjectionJitter, sizeof(ctx.m_matrices.m_viewProjectionJitter));
+
+		// Write flare info
+		Vec4* flarePositions = allocateAndBindStorage<Vec4>(cmdb, 0, 0, flareCount);
+		for(const LensFlareComponent& comp : SceneGraph::getSingleton().getComponentArrays().getLensFlares())
+		{
+			*flarePositions = Vec4(comp.getWorldPosition(), 1.0f);
+			++flarePositions;
+		}
+
+		rgraphCtx.bindStorageBuffer(0, 1, m_runCtx.m_indirectBuffHandle);
+		// Bind neareset because you don't need high quality
+		cmdb.bindSampler(0, 2, getRenderer().getSamplers().m_nearestNearestClamp.get());
+		rgraphCtx.bindTexture(0, 3, getRenderer().getDepthDownscale().getHiZRt(), kHiZQuarterSurface);
+
+		cmdb.dispatchCompute(flareCount, 1, 1);
+	});
 }
 
 void LensFlare::runDrawFlares(const RenderingContext& ctx, CommandBuffer& cmdb)
 {
-	if(ctx.m_renderQueue->m_lensFlares.getSize() == 0)
+	const U32 flareCount = SceneGraph::getSingleton().getComponentArrays().getLensFlares().getSize();
+
+	if(flareCount == 0)
 	{
 		return;
 	}
 
-	const U32 count = min<U32>(ctx.m_renderQueue->m_lensFlares.getSize(), m_maxFlares);
-
 	cmdb.bindShaderProgram(m_realGrProg.get());
 	cmdb.setBlendFactors(0, BlendFactor::kSrcAlpha, BlendFactor::kOneMinusSrcAlpha);
 	cmdb.setDepthWrite(false);
 
-	for(U32 i = 0; i < count; ++i)
+	U32 count = 0;
+	for(const LensFlareComponent& comp : SceneGraph::getSingleton().getComponentArrays().getLensFlares())
 	{
-		const LensFlareQueueElement& flareEl = ctx.m_renderQueue->m_lensFlares[i];
-
 		// Compute position
-		Vec4 lfPos = Vec4(flareEl.m_worldPosition, 1.0);
+		Vec4 lfPos = Vec4(comp.getWorldPosition(), 1.0f);
 		Vec4 posClip = ctx.m_matrices.m_viewProjectionJitter * lfPos;
 
 		/*if(posClip.x() > posClip.w() || posClip.x() < -posClip.w() || posClip.y() > posClip.w()
@@ -163,19 +147,21 @@ void LensFlare::runDrawFlares(const RenderingContext& ctx, CommandBuffer& cmdb)
 		Vec2 posNdc = posClip.xy() / posClip.w();
 
 		// First flare
-		sprites[c].m_posScale = Vec4(posNdc, flareEl.m_firstFlareSize * Vec2(1.0f, getRenderer().getAspectRatio()));
+		sprites[c].m_posScale = Vec4(posNdc, comp.getFirstFlareSize() * Vec2(1.0f, getRenderer().getAspectRatio()));
 		sprites[c].m_depthPad3 = Vec4(0.0f);
-		const F32 alpha = flareEl.m_colorMultiplier.w() * (1.0f - pow(absolute(posNdc.x()), 6.0f))
+		const F32 alpha = comp.getColorMultiplier().w() * (1.0f - pow(absolute(posNdc.x()), 6.0f))
 						  * (1.0f - pow(absolute(posNdc.y()), 6.0f)); // Fade the flare on the edges
-		sprites[c].m_color = Vec4(flareEl.m_colorMultiplier.xyz(), alpha);
+		sprites[c].m_color = Vec4(comp.getColorMultiplier().xyz(), alpha);
 		++c;
 
 		// Render
-		ANKI_ASSERT(flareEl.m_textureView);
 		cmdb.bindSampler(0, 1, getRenderer().getSamplers().m_trilinearRepeat.get());
-		cmdb.bindTexture(0, 2, flareEl.m_textureView);
+		cmdb.bindTexture(0, 2, &comp.getImage().getTextureView());
+
+		cmdb.drawIndirect(PrimitiveTopology::kTriangleStrip, 1, count * sizeof(DrawIndirectArgs) + m_runCtx.m_indirectBuff.m_offset,
+						  m_runCtx.m_indirectBuff.m_buffer);
 
-		cmdb.drawIndirect(PrimitiveTopology::kTriangleStrip, 1, i * sizeof(DrawIndirectArgs), m_indirectBuff.get());
+		++count;
 	}
 
 	// Restore state

+ 1 - 5
AnKi/Renderer/LensFlare.h

@@ -32,7 +32,6 @@ public:
 
 private:
 	// Occlusion test
-	BufferPtr m_indirectBuff;
 	ShaderProgramResourcePtr m_updateIndirectBuffProg;
 	ShaderProgramPtr m_updateIndirectBuffGrProg;
 
@@ -40,12 +39,11 @@ private:
 	ShaderProgramResourcePtr m_realProg;
 	ShaderProgramPtr m_realGrProg;
 	U8 m_maxSpritesPerFlare;
-	U8 m_maxFlares;
-	U16 m_maxSprites;
 
 	class
 	{
 	public:
+		BufferOffsetRange m_indirectBuff;
 		BufferHandle m_indirectBuffHandle;
 	} m_runCtx;
 
@@ -53,8 +51,6 @@ private:
 	Error initOcclusion();
 
 	Error initInternal();
-
-	void updateIndirectInfo(const RenderingContext& ctx, RenderPassWorkContext& rgraphCtx);
 };
 /// @}
 

+ 12 - 14
AnKi/Renderer/LightShading.cpp

@@ -20,6 +20,7 @@
 #include <AnKi/Renderer/ClusterBinning2.h>
 #include <AnKi/Core/CVarSet.h>
 #include <AnKi/Util/Tracer.h>
+#include <AnKi/Scene/Components/SkyboxComponent.h>
 
 namespace anki {
 
@@ -63,13 +64,8 @@ Error LightShading::initLightShading()
 	variantInitInfo.addConstant("kTileSize", getRenderer().getTileSize());
 	const ShaderProgramResourceVariant* variant;
 
-	variantInitInfo.addMutation("USE_SHADOW_LAYERS", 0);
 	m_lightShading.m_prog->getOrCreateVariant(variantInitInfo, variant);
-	m_lightShading.m_grProg[0].reset(&variant->getProgram());
-
-	variantInitInfo.addMutation("USE_SHADOW_LAYERS", 1);
-	m_lightShading.m_prog->getOrCreateVariant(variantInitInfo, variant);
-	m_lightShading.m_grProg[1].reset(&variant->getProgram());
+	m_lightShading.m_grProg.reset(&variant->getProgram());
 
 	// Create RT descr
 	const UVec2 internalResolution = getRenderer().getInternalResolution();
@@ -153,7 +149,7 @@ void LightShading::run(const RenderingContext& ctx, RenderPassWorkContext& rgrap
 
 	// Do light shading first
 	{
-		cmdb.bindShaderProgram(m_lightShading.m_grProg[getRenderer().getRtShadowsEnabled()].get());
+		cmdb.bindShaderProgram(m_lightShading.m_grProg.get());
 		cmdb.setDepthWrite(false);
 
 		// Bind all
@@ -200,7 +196,7 @@ void LightShading::run(const RenderingContext& ctx, RenderPassWorkContext& rgrap
 
 		cmdb.bindUniformBuffer(0, 10, getRenderer().getClusterBinning2().getClusteredShadingUniforms());
 
-		const Vec4 pc(ctx.m_renderQueue->m_cameraNear, ctx.m_renderQueue->m_cameraFar, 0.0f, 0.0f);
+		const Vec4 pc(ctx.m_cameraNear, ctx.m_cameraFar, 0.0f, 0.0f);
 		cmdb.setPushConstants(&pc, sizeof(pc));
 
 		cmdb.setBlendFactors(0, BlendFactor::kOne, BlendFactor::kOne);
@@ -215,13 +211,15 @@ void LightShading::run(const RenderingContext& ctx, RenderPassWorkContext& rgrap
 	{
 		cmdb.setDepthCompareOperation(CompareOperation::kEqual);
 
-		const Bool isSolidColor = ctx.m_renderQueue->m_skybox.m_skyboxTexture == nullptr;
+		const SkyboxComponent* sky = SceneGraph::getSingleton().getSkybox();
+
+		const Bool isSolidColor = (sky) ? sky->getSkyboxType() == SkyboxType::kSolidColor : false;
 
 		if(isSolidColor)
 		{
 			cmdb.bindShaderProgram(m_skybox.m_grProgs[0].get());
 
-			const Vec4 color(ctx.m_renderQueue->m_skybox.m_solidColor, 0.0);
+			const Vec4 color((sky) ? sky->getSolidColor() : Vec3(0.0f), 0.0);
 			cmdb.setPushConstants(&color, sizeof(color));
 		}
 		else
@@ -242,7 +240,7 @@ void LightShading::run(const RenderingContext& ctx, RenderPassWorkContext& rgrap
 			cmdb.setPushConstants(&pc, sizeof(pc));
 
 			cmdb.bindSampler(0, 0, getRenderer().getSamplers().m_trilinearRepeatAnisoResolutionScalingBias.get());
-			cmdb.bindTexture(0, 1, ctx.m_renderQueue->m_skybox.m_skyboxTexture);
+			cmdb.bindTexture(0, 1, &sky->getImageResource().getTextureView());
 		}
 
 		drawQuad(cmdb);
@@ -269,8 +267,8 @@ void LightShading::run(const RenderingContext& ctx, RenderPassWorkContext& rgrap
 			F32 m_near;
 			F32 m_far;
 		} regs;
-		regs.m_near = ctx.m_renderQueue->m_cameraNear;
-		regs.m_far = ctx.m_renderQueue->m_cameraFar;
+		regs.m_near = ctx.m_cameraNear;
+		regs.m_far = ctx.m_cameraFar;
 
 		cmdb.setPushConstants(&regs, sizeof(regs));
 
@@ -379,7 +377,7 @@ void LightShading::populateRenderGraph(RenderingContext& ctx)
 	pass.newTextureDependency(getRenderer().getVolumetricFog().getRt(), readUsage);
 
 	// For forward shading
-	getRenderer().getForwardShading().setDependencies(ctx, pass);
+	getRenderer().getForwardShading().setDependencies(pass);
 }
 
 void LightShading::getDebugRenderTarget([[maybe_unused]] CString rtName, Array<RenderTargetHandle, kMaxDebugRenderTargets>& handles,

+ 1 - 1
AnKi/Renderer/LightShading.h

@@ -40,7 +40,7 @@ private:
 
 		// Light shaders
 		ShaderProgramResourcePtr m_prog;
-		Array<ShaderProgramPtr, 2> m_grProg;
+		ShaderProgramPtr m_grProg;
 	} m_lightShading;
 
 	class

+ 14 - 31
AnKi/Renderer/Renderer.cpp

@@ -36,7 +36,6 @@
 #include <AnKi/Renderer/IndirectSpecular.h>
 #include <AnKi/Renderer/VolumetricLightingAccumulation.h>
 #include <AnKi/Renderer/IndirectDiffuseProbes.h>
-#include <AnKi/Renderer/GenericCompute.h>
 #include <AnKi/Renderer/ShadowmapsResolve.h>
 #include <AnKi/Renderer/RtShadows.h>
 #include <AnKi/Renderer/AccelerationStructureBuilder.h>
@@ -62,6 +61,8 @@ static BoolCVar g_highQualityHdrCVar(CVarSubsystem::kRenderer, "HighQualityHdr",
 									 "If true use R16G16B16 for HDR images. Alternatively use B10G11R11");
 BoolCVar g_vrsLimitTo2x2CVar(CVarSubsystem::kRenderer, "VrsLimitTo2x2", false, "If true the max rate will be 2x2");
 BoolCVar g_vrsCVar(CVarSubsystem::kRenderer, "Vrs", true, "Enable VRS in multiple passes");
+BoolCVar g_rayTracedShadowsCVar(CVarSubsystem::kRenderer, "RayTracedShadows", true,
+								"Enable or not ray traced shadows. Ignored if RT is not supported");
 
 /// Generate a Halton jitter in [-0.5, 0.5]
 static Vec2 generateJitter(U32 frame)
@@ -170,9 +171,6 @@ Error Renderer::initInternal(UVec2 swapchainResolution)
 	}
 
 	// Init the stages. Careful with the order!!!!!!!!!!
-	m_genericCompute.reset(newInstance<GenericCompute>(RendererMemoryPool::getSingleton()));
-	ANKI_CHECK(m_genericCompute->init());
-
 	m_volumetricLightingAccumulation.reset(newInstance<VolumetricLightingAccumulation>(RendererMemoryPool::getSingleton()));
 	ANKI_CHECK(m_volumetricLightingAccumulation->init());
 
@@ -247,11 +245,9 @@ Error Renderer::initInternal(UVec2 swapchainResolution)
 		m_rtShadows.reset(newInstance<RtShadows>(RendererMemoryPool::getSingleton()));
 		ANKI_CHECK(m_rtShadows->init());
 	}
-	else
-	{
-		m_shadowmapsResolve.reset(newInstance<ShadowmapsResolve>(RendererMemoryPool::getSingleton()));
-		ANKI_CHECK(m_shadowmapsResolve->init());
-	}
+
+	m_shadowmapsResolve.reset(newInstance<ShadowmapsResolve>(RendererMemoryPool::getSingleton()));
+	ANKI_CHECK(m_shadowmapsResolve->init());
 
 	m_motionVectors.reset(newInstance<MotionVectors>(RendererMemoryPool::getSingleton()));
 	ANKI_CHECK(m_motionVectors->init());
@@ -317,12 +313,14 @@ Error Renderer::initInternal(UVec2 swapchainResolution)
 
 Error Renderer::populateRenderGraph(RenderingContext& ctx)
 {
+	const CameraComponent& cam = SceneGraph::getSingleton().getActiveCameraNode().getFirstComponentOfType<CameraComponent>();
+
 	ctx.m_prevMatrices = m_prevMatrices;
 
-	ctx.m_matrices.m_cameraTransform = ctx.m_renderQueue->m_cameraTransform;
-	ctx.m_matrices.m_view = ctx.m_renderQueue->m_viewMatrix;
-	ctx.m_matrices.m_projection = ctx.m_renderQueue->m_projectionMatrix;
-	ctx.m_matrices.m_viewProjection = ctx.m_renderQueue->m_viewProjectionMatrix;
+	ctx.m_matrices.m_cameraTransform = Mat3x4(cam.getFrustum().getWorldTransform());
+	ctx.m_matrices.m_view = cam.getFrustum().getViewMatrix();
+	ctx.m_matrices.m_projection = cam.getFrustum().getProjectionMatrix();
+	ctx.m_matrices.m_viewProjection = cam.getFrustum().getViewProjectionMatrix();
 
 	Vec2 jitter = m_jitterOffsets[m_frameCount & (m_jitterOffsets.getSize() - 1)]; // In [-0.5, 0.5]
 	const Vec2 ndcPixelSize = 2.0f / Vec2(m_internalResolution);
@@ -340,8 +338,8 @@ Error Renderer::populateRenderGraph(RenderingContext& ctx)
 
 	ctx.m_matrices.m_unprojectionParameters = ctx.m_matrices.m_projection.extractPerspectiveUnprojectionParams();
 
-	ctx.m_cameraNear = ctx.m_renderQueue->m_cameraNear;
-	ctx.m_cameraFar = ctx.m_renderQueue->m_cameraFar;
+	ctx.m_cameraNear = cam.getNear();
+	ctx.m_cameraFar = cam.getFar();
 
 	// Import RTs first
 	m_downscaleBlur->importRenderTargets(ctx);
@@ -353,7 +351,6 @@ Error Renderer::populateRenderGraph(RenderingContext& ctx)
 	// Populate render graph. WARNING Watch the order
 	gpuSceneCopy(ctx);
 	m_primaryNonRenderableVisibility->populateRenderGraph(ctx);
-	m_genericCompute->populateRenderGraph(ctx);
 	if(m_accelerationStructureBuilder)
 	{
 		m_accelerationStructureBuilder->populateRenderGraph(ctx);
@@ -372,10 +369,7 @@ Error Renderer::populateRenderGraph(RenderingContext& ctx)
 	{
 		m_rtShadows->populateRenderGraph(ctx);
 	}
-	else
-	{
-		m_shadowmapsResolve->populateRenderGraph(ctx);
-	}
+	m_shadowmapsResolve->populateRenderGraph(ctx);
 	m_volumetricFog->populateRenderGraph(ctx);
 	m_lensFlare->populateRenderGraph(ctx);
 	m_indirectSpecular->populateRenderGraph(ctx);
@@ -402,17 +396,6 @@ void Renderer::finalize(const RenderingContext& ctx, Fence* fence)
 	++m_frameCount;
 
 	m_prevMatrices = ctx.m_matrices;
-
-	// Inform about the HiZ map. Do it as late as possible
-	if(ctx.m_renderQueue->m_fillCoverageBufferCallback)
-	{
-		F32* depthValues;
-		U32 width;
-		U32 height;
-		m_depthDownscale->getClientDepthMapInfo(depthValues, width, height);
-		ctx.m_renderQueue->m_fillCoverageBufferCallback(ctx.m_renderQueue->m_fillCoverageBufferCallbackUserData, depthValues, width, height);
-	}
-
 	m_readbaks.endFrame(fence);
 }
 

+ 1 - 0
AnKi/Renderer/Renderer.h

@@ -22,6 +22,7 @@ extern BoolCVar g_vrsCVar;
 extern BoolCVar g_vrsLimitTo2x2CVar;
 extern BoolCVar g_preferComputeCVar;
 extern NumericCVar<F32> g_renderScalingCVar;
+extern BoolCVar g_rayTracedShadowsCVar;
 
 /// @addtogroup renderer
 /// @{

+ 0 - 1
AnKi/Renderer/RendererObject.def.h

@@ -22,7 +22,6 @@ ANKI_RENDERER_OBJECT_DEF(UiStage, uiStage)
 ANKI_RENDERER_OBJECT_DEF(IndirectSpecular, indirectSpecular)
 ANKI_RENDERER_OBJECT_DEF(VolumetricLightingAccumulation, volumetricLightingAccumulation)
 ANKI_RENDERER_OBJECT_DEF(IndirectDiffuseProbes, indirectDiffuseProbes)
-ANKI_RENDERER_OBJECT_DEF(GenericCompute, genericCompute)
 ANKI_RENDERER_OBJECT_DEF(ShadowmapsResolve, shadowmapsResolve)
 ANKI_RENDERER_OBJECT_DEF(RtShadows, rtShadows)
 ANKI_RENDERER_OBJECT_DEF(AccelerationStructureBuilder, accelerationStructureBuilder)

+ 220 - 386
AnKi/Renderer/RtShadows.cpp

@@ -18,6 +18,7 @@
 #include <AnKi/Shaders/Include/GpuSceneTypes.h>
 #include <AnKi/Core/GpuMemory/UnifiedGeometryBuffer.h>
 #include <AnKi/Core/GpuMemory/GpuSceneBuffer.h>
+#include <AnKi/Core/GpuMemory/GpuVisibleTransientMemoryPool.h>
 
 namespace anki {
 
@@ -46,6 +47,12 @@ Error RtShadows::initInternal()
 
 	ANKI_CHECK(ResourceManager::getSingleton().loadResource("EngineAssets/BlueNoise_Rgba8_64x64.png", m_blueNoiseImage));
 
+	// Setup build SBT program
+	ANKI_CHECK(loadShaderProgram("ShaderBinaries/RtShadowsSetupSbtBuild.ankiprogbin", m_setupBuildSbtProg, m_setupBuildSbtGrProg));
+
+	// Build SBT program
+	ANKI_CHECK(loadShaderProgram("ShaderBinaries/RtShadowsSbtBuild.ankiprogbin", m_buildSbtProg, m_buildSbtGrProg));
+
 	// Ray gen program
 	{
 		ANKI_CHECK(ResourceManager::getSingleton().loadResource("ShaderBinaries/RtShadowsRayGen.ankiprogbin", m_rayGenProg));
@@ -72,8 +79,6 @@ Error RtShadows::initInternal()
 	{
 		ANKI_CHECK(ResourceManager::getSingleton().loadResource("ShaderBinaries/RtShadowsDenoise.ankiprogbin", m_denoiseProg));
 		ShaderProgramResourceVariantInitInfo variantInitInfo(m_denoiseProg);
-		variantInitInfo.addConstant("kOutImageSize",
-									UVec2(getRenderer().getInternalResolution().x() / 2, getRenderer().getInternalResolution().y() / 2));
 		variantInitInfo.addConstant("kMinSampleCount", 8u);
 		variantInitInfo.addConstant("kMaxSampleCount", 32u);
 		variantInitInfo.addMutation("BLUR_ORIENTATION", 0);
@@ -90,14 +95,7 @@ Error RtShadows::initInternal()
 	// SVGF variance program
 	if(m_useSvgf)
 	{
-		ANKI_CHECK(ResourceManager::getSingleton().loadResource("ShaderBinaries/RtShadowsSvgfVariance.ankiprogbin", m_svgfVarianceProg));
-		ShaderProgramResourceVariantInitInfo variantInitInfo(m_svgfVarianceProg);
-		variantInitInfo.addConstant("kFramebufferSize",
-									UVec2(getRenderer().getInternalResolution().x() / 2, getRenderer().getInternalResolution().y() / 2));
-
-		const ShaderProgramResourceVariant* variant;
-		m_svgfVarianceProg->getOrCreateVariant(variantInitInfo, variant);
-		m_svgfVarianceGrProg.reset(&variant->getProgram());
+		ANKI_CHECK(loadShaderProgram("ShaderBinaries/RtShadowsSvgfVariance.ankiprogbin", m_svgfVarianceProg, m_svgfVarianceGrProg));
 	}
 
 	// SVGF atrous program
@@ -105,8 +103,6 @@ Error RtShadows::initInternal()
 	{
 		ANKI_CHECK(ResourceManager::getSingleton().loadResource("ShaderBinaries/RtShadowsSvgfAtrous.ankiprogbin", m_svgfAtrousProg));
 		ShaderProgramResourceVariantInitInfo variantInitInfo(m_svgfAtrousProg);
-		variantInitInfo.addConstant("kFramebufferSize",
-									UVec2(getRenderer().getInternalResolution().x() / 2, getRenderer().getInternalResolution().y() / 2));
 		variantInitInfo.addMutation("LAST_PASS", 0);
 
 		const ShaderProgramResourceVariant* variant;
@@ -119,24 +115,12 @@ Error RtShadows::initInternal()
 	}
 
 	// Upscale program
-	{
-		ANKI_CHECK(ResourceManager::getSingleton().loadResource("ShaderBinaries/RtShadowsUpscale.ankiprogbin", m_upscaleProg));
-		ShaderProgramResourceVariantInitInfo variantInitInfo(m_upscaleProg);
-		variantInitInfo.addConstant("kOutImageSize", UVec2(getRenderer().getInternalResolution().x(), getRenderer().getInternalResolution().y()));
-
-		const ShaderProgramResourceVariant* variant;
-		m_upscaleProg->getOrCreateVariant(variantInitInfo, variant);
-		m_upscaleGrProg.reset(&variant->getProgram());
-	}
-
-	// Debug program
-	ANKI_CHECK(
-		ResourceManager::getSingleton().loadResource("ShaderBinaries/RtShadowsVisualizeRenderTarget.ankiprogbin", m_visualizeRenderTargetsProg));
+	ANKI_CHECK(loadShaderProgram("ShaderBinaries/RtShadowsUpscale.ankiprogbin", m_upscaleProg, m_upscaleGrProg));
 
 	// Quarter rez shadow RT
 	{
 		TextureInitInfo texinit = getRenderer().create2DRenderTargetInitInfo(
-			getRenderer().getInternalResolution().x() / 2, getRenderer().getInternalResolution().y() / 2, Format::kR32G32_Uint,
+			getRenderer().getInternalResolution().x() / 2, getRenderer().getInternalResolution().y() / 2, Format::kR8_Unorm,
 			TextureUsageBit::kAllSampled | TextureUsageBit::kImageTraceRaysWrite | TextureUsageBit::kImageComputeWrite, "RtShadows History");
 		m_historyRt = getRenderer().createAndClearRenderTarget(texinit, TextureUsageBit::kSampledFragment);
 	}
@@ -144,7 +128,7 @@ Error RtShadows::initInternal()
 	// Temp shadow RT
 	{
 		m_intermediateShadowsRtDescr = getRenderer().create2DRenderTargetDescription(
-			getRenderer().getInternalResolution().x() / 2, getRenderer().getInternalResolution().y() / 2, Format::kR32G32_Uint, "RtShadows Tmp");
+			getRenderer().getInternalResolution().x() / 2, getRenderer().getInternalResolution().y() / 2, Format::kR8_Unorm, "RtShadows Tmp");
 		m_intermediateShadowsRtDescr.bake();
 	}
 
@@ -170,7 +154,7 @@ Error RtShadows::initInternal()
 	// Final RT
 	{
 		m_upscaledRtDescr = getRenderer().create2DRenderTargetDescription(
-			getRenderer().getInternalResolution().x(), getRenderer().getInternalResolution().y(), Format::kR32G32_Uint, "RtShadows Upscaled");
+			getRenderer().getInternalResolution().x(), getRenderer().getInternalResolution().y(), Format::kR8_Unorm, "RtShadows Upscaled");
 		m_upscaledRtDescr.bake();
 	}
 
@@ -184,13 +168,15 @@ void RtShadows::populateRenderGraph(RenderingContext& ctx)
 {
 	ANKI_TRACE_SCOPED_EVENT(RRtShadows);
 
-	RenderGraphDescription& rgraph = ctx.m_renderGraphDescr;
+#define ANKI_DEPTH_DEP \
+	getRenderer().getDepthDownscale().getHiZRt(), TextureUsageBit::kSampledTraceRays | TextureUsageBit::kSampledCompute, kHiZHalfSurface
 
-	buildSbt(ctx);
-	const U32 prevRtIdx = getRenderer().getFrameCount() & 1;
+	RenderGraphDescription& rgraph = ctx.m_renderGraphDescr;
 
 	// Import RTs
 	{
+		const U32 prevRtIdx = getRenderer().getFrameCount() & 1;
+
 		if(!m_rtsImportedOnce) [[unlikely]]
 		{
 			m_runCtx.m_historyRt = rgraph.importRenderTarget(m_historyRt.get(), TextureUsageBit::kSampledFragment);
@@ -231,15 +217,79 @@ void RtShadows::populateRenderGraph(RenderingContext& ctx)
 		m_runCtx.m_upscaledRt = rgraph.newRenderTarget(m_upscaledRtDescr);
 	}
 
-#define ANKI_DEPTH_DEP \
-	getRenderer().getDepthDownscale().getHiZRt(), TextureUsageBit::kSampledTraceRays | TextureUsageBit::kSampledCompute, kHiZHalfSurface
+	// Setup build SBT dispatch
+	BufferHandle sbtBuildIndirectArgsHandle;
+	BufferOffsetRange sbtBuildIndirectArgsBuffer;
+	{
+		sbtBuildIndirectArgsBuffer = GpuVisibleTransientMemoryPool::getSingleton().allocate(sizeof(DispatchIndirectArgs));
+		sbtBuildIndirectArgsHandle = rgraph.importBuffer(BufferUsageBit::kStorageComputeWrite, sbtBuildIndirectArgsBuffer);
+
+		ComputeRenderPassDescription& rpass = rgraph.newComputeRenderPass("RtShadows setup build SBT");
+
+		rpass.setWork([this, sbtBuildIndirectArgsBuffer](RenderPassWorkContext& rgraphCtx) {
+			CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
+
+			cmdb.bindShaderProgram(m_setupBuildSbtGrProg.get());
+
+			cmdb.bindStorageBuffer(0, 0, GpuSceneArrays::RenderableAabbRt::getSingleton().getBufferOffsetRange());
+			cmdb.bindStorageBuffer(0, 1, sbtBuildIndirectArgsBuffer);
 
-	// RT shadows pass
+			cmdb.dispatchCompute(1, 1, 1);
+		});
+	}
+
+	// Build the SBT
+	BufferHandle sbtHandle;
+	BufferOffsetRange sbtBuffer;
 	{
-		ComputeRenderPassDescription& rpass = rgraph.newComputeRenderPass("RtShadows");
-		rpass.setWork([this](RenderPassWorkContext& rgraphCtx) {
-			run(rgraphCtx);
+		// Allocate SBT
+		U8* sbtMem;
+		sbtBuffer = RebarTransientMemoryPool::getSingleton().allocateFrame(
+			(GpuSceneArrays::RenderableAabbRt::getSingleton().getElementCount() + 2) * m_sbtRecordSize, sbtMem);
+		sbtHandle = rgraph.importBuffer(BufferUsageBit::kStorageComputeWrite, sbtBuffer);
+
+		// Write the first 2 entries of the SBT
+		ConstWeakArray<U8> shaderGroupHandles = m_rtLibraryGrProg->getShaderGroupHandles();
+		const U32 shaderHandleSize = GrManager::getSingleton().getDeviceCapabilities().m_shaderGroupHandleSize;
+		memcpy(sbtMem, &shaderGroupHandles[m_rayGenShaderGroupIdx * shaderHandleSize], shaderHandleSize);
+		memcpy(sbtMem + m_sbtRecordSize, &shaderGroupHandles[m_missShaderGroupIdx * shaderHandleSize], shaderHandleSize);
+
+		// Create the pass
+		ComputeRenderPassDescription& rpass = rgraph.newComputeRenderPass("RtShadows build SBT");
+
+		BufferHandle visibilityHandle;
+		BufferOffsetRange visibleRenderableIndicesBuff;
+		getRenderer().getAccelerationStructureBuilder().getVisibilityInfo(visibilityHandle, visibleRenderableIndicesBuff);
+
+		rpass.newBufferDependency(visibilityHandle, BufferUsageBit::kStorageComputeRead);
+		rpass.newBufferDependency(sbtBuildIndirectArgsHandle, BufferUsageBit::kIndirectCompute);
+
+		rpass.setWork([this, sbtBuildIndirectArgsBuffer, sbtBuffer, visibleRenderableIndicesBuff](RenderPassWorkContext& rgraphCtx) {
+			CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
+
+			cmdb.bindShaderProgram(m_buildSbtGrProg.get());
+
+			cmdb.bindStorageBuffer(0, 0, GpuSceneArrays::RenderableAabbRt::getSingleton().getBufferOffsetRange());
+			cmdb.bindStorageBuffer(0, 1, &GpuSceneBuffer::getSingleton().getBuffer(), 0, kMaxPtrSize);
+			cmdb.bindStorageBuffer(0, 2, visibleRenderableIndicesBuff);
+			cmdb.bindStorageBuffer(0, 3, &m_rtLibraryGrProg->getShaderGroupHandlesGpuBuffer(), 0, kMaxPtrSize);
+			cmdb.bindStorageBuffer(0, 4, sbtBuffer);
+
+			RtShadowsSbtBuildUniforms unis = {};
+			ANKI_ASSERT(m_sbtRecordSize % 4 == 0);
+			unis.m_sbtRecordDwordSize = m_sbtRecordSize / 4;
+			const U32 shaderHandleSize = GrManager::getSingleton().getDeviceCapabilities().m_shaderGroupHandleSize;
+			ANKI_ASSERT(shaderHandleSize % 4 == 0);
+			unis.m_shaderHandleDwordSize = shaderHandleSize / 4;
+			cmdb.setPushConstants(&unis, sizeof(unis));
+
+			cmdb.dispatchComputeIndirect(sbtBuildIndirectArgsBuffer.m_buffer, sbtBuildIndirectArgsBuffer.m_offset);
 		});
+	}
+
+	// Ray gen
+	{
+		ComputeRenderPassDescription& rpass = rgraph.newComputeRenderPass("RtShadows");
 
 		rpass.newTextureDependency(m_runCtx.m_historyRt, TextureUsageBit::kSampledTraceRays);
 		rpass.newTextureDependency(m_runCtx.m_intermediateShadowsRts[0], TextureUsageBit::kImageTraceRaysWrite);
@@ -254,16 +304,66 @@ void RtShadows::populateRenderGraph(RenderingContext& ctx)
 		rpass.newTextureDependency(m_runCtx.m_currentMomentsRt, TextureUsageBit::kImageTraceRaysWrite);
 
 		rpass.newBufferDependency(getRenderer().getClusterBinning2().getClustersBufferHandle(), BufferUsageBit::kStorageTraceRaysRead);
-		rpass.newBufferDependency(getRenderer().getClusterBinning2().getPackedObjectsBufferHandle(GpuSceneNonRenderableObjectType::kLight),
-								  BufferUsageBit::kStorageTraceRaysRead);
+
+		rpass.setWork([this, sbtBuffer](RenderPassWorkContext& rgraphCtx) {
+			CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
+
+			cmdb.bindShaderProgram(m_rtLibraryGrProg.get());
+
+			// Allocate, set and bind global uniforms
+			{
+				MaterialGlobalUniforms* globalUniforms;
+				const RebarAllocation globalUniformsToken = RebarTransientMemoryPool::getSingleton().allocateFrame(1, globalUniforms);
+
+				memset(globalUniforms, 0, sizeof(*globalUniforms)); // Don't care for now
+
+				cmdb.bindUniformBuffer(U32(MaterialSet::kGlobal), U32(MaterialBinding::kGlobalUniforms), globalUniformsToken);
+			}
+
+			// More globals
+			cmdb.bindAllBindless(U32(MaterialSet::kBindless));
+			cmdb.bindSampler(U32(MaterialSet::kGlobal), U32(MaterialBinding::kTrilinearRepeatSampler),
+							 getRenderer().getSamplers().m_trilinearRepeat.get());
+			cmdb.bindStorageBuffer(U32(MaterialSet::kGlobal), U32(MaterialBinding::kGpuScene), &GpuSceneBuffer::getSingleton().getBuffer(), 0,
+								   kMaxPtrSize);
+
+#define ANKI_UNIFIED_GEOM_FORMAT(fmt, shaderType) \
+	cmdb.bindReadOnlyTextureBuffer(U32(MaterialSet::kGlobal), U32(MaterialBinding::kUnifiedGeometry_##fmt), \
+								   &UnifiedGeometryBuffer::getSingleton().getBuffer(), 0, kMaxPtrSize, Format::k##fmt);
+#include <AnKi/Shaders/Include/UnifiedGeometryTypes.defs.h>
+
+			constexpr U32 kSet = 2;
+
+			cmdb.bindUniformBuffer(kSet, 0, getRenderer().getClusterBinning2().getClusteredShadingUniforms());
+			cmdb.bindStorageBuffer(kSet, 1, getRenderer().getClusterBinning2().getClustersBuffer());
+
+			cmdb.bindSampler(kSet, 2, getRenderer().getSamplers().m_trilinearRepeat.get());
+
+			rgraphCtx.bindImage(kSet, 3, m_runCtx.m_intermediateShadowsRts[0]);
+			rgraphCtx.bindColorTexture(kSet, 4, m_runCtx.m_historyRt);
+			cmdb.bindSampler(kSet, 5, getRenderer().getSamplers().m_trilinearClamp.get());
+			rgraphCtx.bindTexture(kSet, 6, getRenderer().getDepthDownscale().getHiZRt(), kHiZHalfSurface);
+			rgraphCtx.bindColorTexture(kSet, 7, getRenderer().getMotionVectors().getMotionVectorsRt());
+			rgraphCtx.bindColorTexture(kSet, 8, getRenderer().getMotionVectors().getHistoryLengthRt());
+			rgraphCtx.bindColorTexture(kSet, 9, getRenderer().getGBuffer().getColorRt(2));
+			rgraphCtx.bindAccelerationStructure(kSet, 10, getRenderer().getAccelerationStructureBuilder().getAccelerationStructureHandle());
+			rgraphCtx.bindColorTexture(kSet, 11, m_runCtx.m_prevMomentsRt);
+			rgraphCtx.bindImage(kSet, 12, m_runCtx.m_currentMomentsRt);
+			cmdb.bindTexture(kSet, 13, &m_blueNoiseImage->getTextureView());
+
+			cmdb.traceRays(sbtBuffer.m_buffer, sbtBuffer.m_offset, m_sbtRecordSize,
+						   GpuSceneArrays::RenderableAabbRt::getSingleton().getElementCount(), 1, getRenderer().getInternalResolution().x() / 2,
+						   getRenderer().getInternalResolution().y() / 2, 1);
+		});
 	}
 
 	// Denoise pass horizontal
 	if(!m_useSvgf)
 	{
 		ComputeRenderPassDescription& rpass = rgraph.newComputeRenderPass("RtShadows Denoise Horizontal");
+
 		rpass.setWork([this, &ctx](RenderPassWorkContext& rgraphCtx) {
-			runDenoise(ctx, rgraphCtx);
+			runDenoise(ctx, rgraphCtx, true);
 		});
 
 		rpass.newTextureDependency(m_runCtx.m_intermediateShadowsRts[0], TextureUsageBit::kSampledCompute);
@@ -280,7 +380,7 @@ void RtShadows::populateRenderGraph(RenderingContext& ctx)
 	{
 		ComputeRenderPassDescription& rpass = rgraph.newComputeRenderPass("RtShadows Denoise Vertical");
 		rpass.setWork([this, &ctx](RenderPassWorkContext& rgraphCtx) {
-			runDenoise(ctx, rgraphCtx);
+			runDenoise(ctx, rgraphCtx, false);
 		});
 
 		rpass.newTextureDependency(m_runCtx.m_intermediateShadowsRts[1], TextureUsageBit::kSampledCompute);
@@ -296,9 +396,6 @@ void RtShadows::populateRenderGraph(RenderingContext& ctx)
 	if(m_useSvgf)
 	{
 		ComputeRenderPassDescription& rpass = rgraph.newComputeRenderPass("RtShadows SVGF Variance");
-		rpass.setWork([this, &ctx](RenderPassWorkContext& rgraphCtx) {
-			runSvgfVariance(ctx, rgraphCtx);
-		});
 
 		rpass.newTextureDependency(m_runCtx.m_intermediateShadowsRts[0], TextureUsageBit::kSampledCompute);
 		rpass.newTextureDependency(m_runCtx.m_currentMomentsRt, TextureUsageBit::kSampledCompute);
@@ -308,22 +405,38 @@ void RtShadows::populateRenderGraph(RenderingContext& ctx)
 
 		rpass.newTextureDependency(m_runCtx.m_intermediateShadowsRts[1], TextureUsageBit::kImageComputeWrite);
 		rpass.newTextureDependency(m_runCtx.m_varianceRts[1], TextureUsageBit::kImageComputeWrite);
+
+		rpass.setWork([this, &ctx](RenderPassWorkContext& rgraphCtx) {
+			CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
+
+			cmdb.bindShaderProgram(m_svgfVarianceGrProg.get());
+
+			cmdb.bindSampler(0, 0, getRenderer().getSamplers().m_trilinearClamp.get());
+
+			rgraphCtx.bindColorTexture(0, 1, m_runCtx.m_intermediateShadowsRts[0]);
+			rgraphCtx.bindColorTexture(0, 2, m_runCtx.m_currentMomentsRt);
+			rgraphCtx.bindColorTexture(0, 3, getRenderer().getMotionVectors().getHistoryLengthRt());
+			rgraphCtx.bindTexture(0, 4, getRenderer().getDepthDownscale().getHiZRt(), kHiZHalfSurface);
+
+			rgraphCtx.bindImage(0, 5, m_runCtx.m_intermediateShadowsRts[1]);
+			rgraphCtx.bindImage(0, 6, m_runCtx.m_varianceRts[1]);
+
+			const Mat4& invProjMat = ctx.m_matrices.m_projectionJitter.getInverse();
+			cmdb.setPushConstants(&invProjMat, sizeof(invProjMat));
+
+			dispatchPPCompute(cmdb, 8, 8, getRenderer().getInternalResolution().x() / 2, getRenderer().getInternalResolution().y() / 2);
+		});
 	}
 
 	// SVGF Atrous
 	if(m_useSvgf)
 	{
-		m_runCtx.m_atrousPassIdx = 0;
-
 		for(U32 i = 0; i < m_atrousPassCount; ++i)
 		{
 			const Bool lastPass = i == U32(m_atrousPassCount - 1);
 			const U32 readRtIdx = (i + 1) & 1;
 
 			ComputeRenderPassDescription& rpass = rgraph.newComputeRenderPass("RtShadows SVGF Atrous");
-			rpass.setWork([this, &ctx](RenderPassWorkContext& rgraphCtx) {
-				runSvgfAtrous(ctx, rgraphCtx);
-			});
 
 			rpass.newTextureDependency(ANKI_DEPTH_DEP);
 			rpass.newTextureDependency(getRenderer().getGBuffer().getColorRt(2), TextureUsageBit::kSampledCompute);
@@ -340,380 +453,101 @@ void RtShadows::populateRenderGraph(RenderingContext& ctx)
 			{
 				rpass.newTextureDependency(m_runCtx.m_historyRt, TextureUsageBit::kImageComputeWrite);
 			}
+
+			rpass.setWork([this, &ctx, passIdx = i](RenderPassWorkContext& rgraphCtx) {
+				CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
+
+				const Bool lastPass = passIdx == m_atrousPassCount - 1;
+				const U32 readRtIdx = (passIdx + 1) & 1;
+
+				if(lastPass)
+				{
+					cmdb.bindShaderProgram(m_svgfAtrousLastPassGrProg.get());
+				}
+				else
+				{
+					cmdb.bindShaderProgram(m_svgfAtrousGrProg.get());
+				}
+
+				cmdb.bindSampler(0, 0, getRenderer().getSamplers().m_nearestNearestClamp.get());
+				cmdb.bindSampler(0, 1, getRenderer().getSamplers().m_trilinearClamp.get());
+
+				rgraphCtx.bindTexture(0, 2, getRenderer().getDepthDownscale().getHiZRt(), kHiZHalfSurface);
+				rgraphCtx.bindColorTexture(0, 3, m_runCtx.m_intermediateShadowsRts[readRtIdx]);
+				rgraphCtx.bindColorTexture(0, 4, m_runCtx.m_varianceRts[readRtIdx]);
+
+				if(!lastPass)
+				{
+					rgraphCtx.bindImage(0, 5, m_runCtx.m_intermediateShadowsRts[!readRtIdx]);
+					rgraphCtx.bindImage(0, 6, m_runCtx.m_varianceRts[!readRtIdx]);
+				}
+				else
+				{
+					rgraphCtx.bindImage(0, 5, m_runCtx.m_historyRt);
+				}
+
+				const Mat4& invProjMat = ctx.m_matrices.m_projectionJitter.getInverse();
+				cmdb.setPushConstants(&invProjMat, sizeof(invProjMat));
+
+				dispatchPPCompute(cmdb, 8, 8, getRenderer().getInternalResolution().x() / 2, getRenderer().getInternalResolution().y() / 2);
+			});
 		}
 	}
 
 	// Upscale
 	{
 		ComputeRenderPassDescription& rpass = rgraph.newComputeRenderPass("RtShadows Upscale");
-		rpass.setWork([this](RenderPassWorkContext& rgraphCtx) {
-			runUpscale(rgraphCtx);
-		});
 
 		rpass.newTextureDependency(m_runCtx.m_historyRt, TextureUsageBit::kSampledCompute);
 		rpass.newTextureDependency(getRenderer().getGBuffer().getDepthRt(), TextureUsageBit::kSampledCompute);
 		rpass.newTextureDependency(ANKI_DEPTH_DEP);
 
 		rpass.newTextureDependency(m_runCtx.m_upscaledRt, TextureUsageBit::kImageComputeWrite);
-	}
-
-	// Find out the lights that will take part in RT pass
-	{
-		RenderQueue& rqueue = *ctx.m_renderQueue;
-		m_runCtx.m_layersWithRejectedHistory.unsetAll();
 
-		if(rqueue.m_directionalLight.hasShadow())
-		{
-			U32 layerIdx;
-			Bool rejectHistory;
-			[[maybe_unused]] const Bool layerFound = findShadowLayer(0, layerIdx, rejectHistory);
-			ANKI_ASSERT(layerFound && "Directional can't fail");
-
-			rqueue.m_directionalLight.m_shadowLayer = U8(layerIdx);
-			ANKI_ASSERT(rqueue.m_directionalLight.m_shadowLayer < kMaxRtShadowLayers);
-			m_runCtx.m_layersWithRejectedHistory.set(layerIdx, rejectHistory);
-		}
-
-		for(PointLightQueueElement& light : rqueue.m_pointLights)
-		{
-			if(!light.hasShadow())
-			{
-				continue;
-			}
-
-			U32 layerIdx;
-			Bool rejectHistory;
-			const Bool layerFound = findShadowLayer(light.m_uuid, layerIdx, rejectHistory);
-
-			if(layerFound)
-			{
-				light.m_shadowLayer = U8(layerIdx);
-				ANKI_ASSERT(light.m_shadowLayer < kMaxRtShadowLayers);
-				m_runCtx.m_layersWithRejectedHistory.set(layerIdx, rejectHistory);
-			}
-			else
-			{
-				// Disable shadows
-				light.m_shadowRenderQueues = {};
-			}
-		}
-
-		for(SpotLightQueueElement& light : rqueue.m_spotLights)
-		{
-			if(!light.hasShadow())
-			{
-				continue;
-			}
-
-			U32 layerIdx;
-			Bool rejectHistory;
-			const Bool layerFound = findShadowLayer(light.m_uuid, layerIdx, rejectHistory);
-
-			if(layerFound)
-			{
-				light.m_shadowLayer = U8(layerIdx);
-				ANKI_ASSERT(light.m_shadowLayer < kMaxRtShadowLayers);
-				m_runCtx.m_layersWithRejectedHistory.set(layerIdx, rejectHistory);
-			}
-			else
-			{
-				// Disable shadows
-				light.m_shadowRenderQueue = nullptr;
-			}
-		}
-	}
-}
-
-void RtShadows::run(RenderPassWorkContext& rgraphCtx)
-{
-	CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
-
-	cmdb.bindShaderProgram(m_rtLibraryGrProg.get());
-
-	// Allocate, set and bind global uniforms
-	{
-		MaterialGlobalUniforms* globalUniforms;
-		const RebarAllocation globalUniformsToken = RebarTransientMemoryPool::getSingleton().allocateFrame(1, globalUniforms);
-
-		memset(globalUniforms, 0, sizeof(*globalUniforms)); // Don't care for now
-
-		cmdb.bindUniformBuffer(U32(MaterialSet::kGlobal), U32(MaterialBinding::kGlobalUniforms), globalUniformsToken);
-	}
-
-	// More globals
-	cmdb.bindAllBindless(U32(MaterialSet::kBindless));
-	cmdb.bindSampler(U32(MaterialSet::kGlobal), U32(MaterialBinding::kTrilinearRepeatSampler), getRenderer().getSamplers().m_trilinearRepeat.get());
-	cmdb.bindStorageBuffer(U32(MaterialSet::kGlobal), U32(MaterialBinding::kGpuScene), &GpuSceneBuffer::getSingleton().getBuffer(), 0, kMaxPtrSize);
-
-#define ANKI_UNIFIED_GEOM_FORMAT(fmt, shaderType) \
-	cmdb.bindReadOnlyTextureBuffer(U32(MaterialSet::kGlobal), U32(MaterialBinding::kUnifiedGeometry_##fmt), \
-								   &UnifiedGeometryBuffer::getSingleton().getBuffer(), 0, kMaxPtrSize, Format::k##fmt);
-#include <AnKi/Shaders/Include/UnifiedGeometryTypes.defs.h>
-
-	constexpr U32 kSet = 2;
-
-	cmdb.bindUniformBuffer(kSet, 0, getRenderer().getClusterBinning2().getClusteredShadingUniforms());
-	cmdb.bindStorageBuffer(kSet, 1, getRenderer().getClusterBinning2().getPackedObjectsBuffer(GpuSceneNonRenderableObjectType::kLight));
-	cmdb.bindStorageBuffer(kSet, 2, getRenderer().getClusterBinning2().getClustersBuffer());
+		rpass.setWork([this](RenderPassWorkContext& rgraphCtx) {
+			CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
 
-	cmdb.bindSampler(kSet, 3, getRenderer().getSamplers().m_trilinearRepeat.get());
+			cmdb.bindShaderProgram(m_upscaleGrProg.get());
 
-	rgraphCtx.bindImage(kSet, 4, m_runCtx.m_intermediateShadowsRts[0]);
+			cmdb.bindSampler(0, 0, getRenderer().getSamplers().m_trilinearClamp.get());
 
-	rgraphCtx.bindColorTexture(kSet, 5, m_runCtx.m_historyRt);
-	cmdb.bindSampler(kSet, 6, getRenderer().getSamplers().m_trilinearClamp.get());
-	cmdb.bindSampler(kSet, 7, getRenderer().getSamplers().m_nearestNearestClamp.get());
-	rgraphCtx.bindTexture(kSet, 8, getRenderer().getDepthDownscale().getHiZRt(), kHiZHalfSurface);
-	rgraphCtx.bindColorTexture(kSet, 9, getRenderer().getMotionVectors().getMotionVectorsRt());
-	rgraphCtx.bindColorTexture(kSet, 10, getRenderer().getMotionVectors().getHistoryLengthRt());
-	rgraphCtx.bindColorTexture(kSet, 11, getRenderer().getGBuffer().getColorRt(2));
-	rgraphCtx.bindAccelerationStructure(kSet, 12, getRenderer().getAccelerationStructureBuilder().getAccelerationStructureHandle());
-	rgraphCtx.bindColorTexture(kSet, 13, m_runCtx.m_prevMomentsRt);
-	rgraphCtx.bindImage(kSet, 14, m_runCtx.m_currentMomentsRt);
-	cmdb.bindTexture(kSet, 15, &m_blueNoiseImage->getTextureView());
+			rgraphCtx.bindColorTexture(0, 1, m_runCtx.m_historyRt);
+			rgraphCtx.bindImage(0, 2, m_runCtx.m_upscaledRt);
+			rgraphCtx.bindTexture(0, 3, getRenderer().getDepthDownscale().getHiZRt(), kHiZHalfSurface);
+			rgraphCtx.bindTexture(0, 4, getRenderer().getGBuffer().getDepthRt(), TextureSubresourceInfo(DepthStencilAspectBit::kDepth));
 
-	RtShadowsUniforms unis;
-	for(U32 i = 0; i < kMaxRtShadowLayers; ++i)
-	{
-		unis.historyRejectFactor[i] = F32(m_runCtx.m_layersWithRejectedHistory.get(i));
+			dispatchPPCompute(cmdb, 8, 8, getRenderer().getInternalResolution().x(), getRenderer().getInternalResolution().y());
+		});
 	}
-	cmdb.setPushConstants(&unis, sizeof(unis));
-
-	cmdb.traceRays(m_runCtx.m_sbtBuffer.get(), m_runCtx.m_sbtOffset, m_sbtRecordSize, m_runCtx.m_hitGroupCount, 1,
-				   getRenderer().getInternalResolution().x() / 2, getRenderer().getInternalResolution().y() / 2, 1);
 }
 
-void RtShadows::runDenoise(const RenderingContext& ctx, RenderPassWorkContext& rgraphCtx)
+void RtShadows::runDenoise(const RenderingContext& ctx, RenderPassWorkContext& rgraphCtx, Bool horizontal)
 {
 	CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
 
-	cmdb.bindShaderProgram((m_runCtx.m_denoiseOrientation == 0) ? m_grDenoiseHorizontalProg.get() : m_grDenoiseVerticalProg.get());
+	cmdb.bindShaderProgram((horizontal) ? m_grDenoiseHorizontalProg.get() : m_grDenoiseVerticalProg.get());
 
 	cmdb.bindSampler(0, 0, getRenderer().getSamplers().m_nearestNearestClamp.get());
-	cmdb.bindSampler(0, 1, getRenderer().getSamplers().m_trilinearClamp.get());
-	rgraphCtx.bindColorTexture(0, 2, m_runCtx.m_intermediateShadowsRts[m_runCtx.m_denoiseOrientation]);
-	rgraphCtx.bindTexture(0, 3, getRenderer().getDepthDownscale().getHiZRt(), kHiZHalfSurface);
-	rgraphCtx.bindColorTexture(0, 4, getRenderer().getGBuffer().getColorRt(2));
-	rgraphCtx.bindColorTexture(0, 5, m_runCtx.m_currentMomentsRt);
-	rgraphCtx.bindColorTexture(0, 6, getRenderer().getMotionVectors().getHistoryLengthRt());
+	rgraphCtx.bindColorTexture(0, 1, m_runCtx.m_intermediateShadowsRts[(horizontal) ? 0 : 1]);
+	rgraphCtx.bindTexture(0, 2, getRenderer().getDepthDownscale().getHiZRt(), kHiZHalfSurface);
+	rgraphCtx.bindColorTexture(0, 3, getRenderer().getGBuffer().getColorRt(2));
+	rgraphCtx.bindColorTexture(0, 4, m_runCtx.m_currentMomentsRt);
+	rgraphCtx.bindColorTexture(0, 5, getRenderer().getMotionVectors().getHistoryLengthRt());
 
-	rgraphCtx.bindImage(0, 7, (m_runCtx.m_denoiseOrientation == 0) ? m_runCtx.m_intermediateShadowsRts[1] : m_runCtx.m_historyRt);
+	rgraphCtx.bindImage(0, 6, (horizontal) ? m_runCtx.m_intermediateShadowsRts[1] : m_runCtx.m_historyRt);
 
 	RtShadowsDenoiseUniforms unis;
-	unis.invViewProjMat = ctx.m_matrices.m_invertedViewProjectionJitter;
-	unis.time = F32(GlobalFrameIndex::getSingleton().m_value);
+	unis.m_invViewProjMat = ctx.m_matrices.m_invertedViewProjectionJitter;
+	unis.m_time = F32(GlobalFrameIndex::getSingleton().m_value % 0xFFFFu);
 	cmdb.setPushConstants(&unis, sizeof(unis));
 
 	dispatchPPCompute(cmdb, 8, 8, getRenderer().getInternalResolution().x() / 2, getRenderer().getInternalResolution().y() / 2);
-
-	m_runCtx.m_denoiseOrientation = !m_runCtx.m_denoiseOrientation;
-}
-
-void RtShadows::runSvgfVariance(const RenderingContext& ctx, RenderPassWorkContext& rgraphCtx)
-{
-	CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
-
-	cmdb.bindShaderProgram(m_svgfVarianceGrProg.get());
-
-	cmdb.bindSampler(0, 0, getRenderer().getSamplers().m_nearestNearestClamp.get());
-	cmdb.bindSampler(0, 1, getRenderer().getSamplers().m_trilinearClamp.get());
-
-	rgraphCtx.bindColorTexture(0, 2, m_runCtx.m_intermediateShadowsRts[0]);
-	rgraphCtx.bindColorTexture(0, 3, m_runCtx.m_currentMomentsRt);
-	rgraphCtx.bindColorTexture(0, 4, getRenderer().getMotionVectors().getHistoryLengthRt());
-	rgraphCtx.bindTexture(0, 5, getRenderer().getDepthDownscale().getHiZRt(), kHiZHalfSurface);
-
-	rgraphCtx.bindImage(0, 6, m_runCtx.m_intermediateShadowsRts[1]);
-	rgraphCtx.bindImage(0, 7, m_runCtx.m_varianceRts[1]);
-
-	const Mat4& invProjMat = ctx.m_matrices.m_projectionJitter.getInverse();
-	cmdb.setPushConstants(&invProjMat, sizeof(invProjMat));
-
-	dispatchPPCompute(cmdb, 8, 8, getRenderer().getInternalResolution().x() / 2, getRenderer().getInternalResolution().y() / 2);
-}
-
-void RtShadows::runSvgfAtrous(const RenderingContext& ctx, RenderPassWorkContext& rgraphCtx)
-{
-	CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
-
-	const Bool lastPass = m_runCtx.m_atrousPassIdx == m_atrousPassCount - 1;
-	const U32 readRtIdx = (m_runCtx.m_atrousPassIdx + 1) & 1;
-
-	if(lastPass)
-	{
-		cmdb.bindShaderProgram(m_svgfAtrousLastPassGrProg.get());
-	}
-	else
-	{
-		cmdb.bindShaderProgram(m_svgfAtrousGrProg.get());
-	}
-
-	cmdb.bindSampler(0, 0, getRenderer().getSamplers().m_nearestNearestClamp.get());
-	cmdb.bindSampler(0, 1, getRenderer().getSamplers().m_trilinearClamp.get());
-
-	rgraphCtx.bindTexture(0, 2, getRenderer().getDepthDownscale().getHiZRt(), kHiZHalfSurface);
-	rgraphCtx.bindColorTexture(0, 3, m_runCtx.m_intermediateShadowsRts[readRtIdx]);
-	rgraphCtx.bindColorTexture(0, 4, m_runCtx.m_varianceRts[readRtIdx]);
-
-	if(!lastPass)
-	{
-		rgraphCtx.bindImage(0, 5, m_runCtx.m_intermediateShadowsRts[!readRtIdx]);
-		rgraphCtx.bindImage(0, 6, m_runCtx.m_varianceRts[!readRtIdx]);
-	}
-	else
-	{
-		rgraphCtx.bindImage(0, 5, m_runCtx.m_historyRt);
-	}
-
-	const Mat4& invProjMat = ctx.m_matrices.m_projectionJitter.getInverse();
-	cmdb.setPushConstants(&invProjMat, sizeof(invProjMat));
-
-	dispatchPPCompute(cmdb, 8, 8, getRenderer().getInternalResolution().x() / 2, getRenderer().getInternalResolution().y() / 2);
-
-	++m_runCtx.m_atrousPassIdx;
-}
-
-void RtShadows::runUpscale(RenderPassWorkContext& rgraphCtx)
-{
-	CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
-
-	cmdb.bindShaderProgram(m_upscaleGrProg.get());
-
-	cmdb.bindSampler(0, 0, getRenderer().getSamplers().m_nearestNearestClamp.get());
-	cmdb.bindSampler(0, 1, getRenderer().getSamplers().m_trilinearClamp.get());
-
-	rgraphCtx.bindColorTexture(0, 2, m_runCtx.m_historyRt);
-	rgraphCtx.bindImage(0, 3, m_runCtx.m_upscaledRt);
-	rgraphCtx.bindTexture(0, 4, getRenderer().getDepthDownscale().getHiZRt(), kHiZHalfSurface);
-	rgraphCtx.bindTexture(0, 5, getRenderer().getGBuffer().getDepthRt(), TextureSubresourceInfo(DepthStencilAspectBit::kDepth));
-
-	dispatchPPCompute(cmdb, 8, 8, getRenderer().getInternalResolution().x(), getRenderer().getInternalResolution().y());
-}
-
-void RtShadows::buildSbt(RenderingContext& ctx)
-{
-	// Get some things
-	ANKI_ASSERT(ctx.m_renderQueue->m_rayTracingQueue);
-	ConstWeakArray<RayTracingInstanceQueueElement> instanceElements = ctx.m_renderQueue->m_rayTracingQueue->m_rayTracingInstances;
-	const U32 instanceCount = instanceElements.getSize();
-	ANKI_ASSERT(instanceCount > 0);
-
-	const U32 shaderHandleSize = GrManager::getSingleton().getDeviceCapabilities().m_shaderGroupHandleSize;
-
-	const U32 extraSbtRecords = 1 + 1; // Raygen + miss
-
-	m_runCtx.m_hitGroupCount = instanceCount;
-
-	// Allocate SBT
-	U8* sbt;
-	const RebarAllocation token =
-		RebarTransientMemoryPool::getSingleton().allocateFrame(PtrSize(m_sbtRecordSize) * (instanceCount + extraSbtRecords), sbt);
-	[[maybe_unused]] const U8* sbtStart = sbt;
-	m_runCtx.m_sbtBuffer.reset(const_cast<Buffer*>(&RebarTransientMemoryPool::getSingleton().getBuffer()));
-	m_runCtx.m_sbtOffset = token.getOffset();
-
-	// Set the miss and ray gen handles
-	ConstWeakArray<U8> shaderGroupHandles = m_rtLibraryGrProg->getShaderGroupHandles();
-	memcpy(sbt, &shaderGroupHandles[m_rayGenShaderGroupIdx * shaderHandleSize], shaderHandleSize);
-	sbt += m_sbtRecordSize;
-	memcpy(sbt, &shaderGroupHandles[m_missShaderGroupIdx * shaderHandleSize], shaderHandleSize);
-	sbt += m_sbtRecordSize;
-
-	// Init SBT and instances
-	ANKI_ASSERT(m_sbtRecordSize >= shaderHandleSize + sizeof(ModelGpuDescriptor));
-	for(U32 instanceIdx = 0; instanceIdx < instanceCount; ++instanceIdx)
-	{
-		const RayTracingInstanceQueueElement& element = instanceElements[instanceIdx];
-
-		// Init SBT record
-		memcpy(sbt, &shaderGroupHandles[element.m_shaderGroupHandleIndex * shaderHandleSize], shaderHandleSize);
-
-		ANKI_ASSERT(shaderHandleSize + sizeof(GpuSceneRenderableVertex) <= m_sbtRecordSize);
-		GpuSceneRenderableVertex* shaderRecord = reinterpret_cast<GpuSceneRenderableVertex*>(sbt + shaderHandleSize);
-		shaderRecord->m_worldTransformsOffset = element.m_worldTransformsOffset;
-		shaderRecord->m_uniformsOffset = element.m_uniformsOffset;
-		shaderRecord->m_meshLodOffset = element.m_geometryOffset;
-		shaderRecord->m_boneTransformsOrParticleEmitterOffset = 0;
-
-		sbt += m_sbtRecordSize;
-	}
-
-	ANKI_ASSERT(sbtStart + m_sbtRecordSize * (instanceCount + extraSbtRecords) == sbt);
-}
-
-Bool RtShadows::findShadowLayer(U64 lightUuid, U32& layerIdx, Bool& rejectHistoryBuffer)
-{
-	const U64 crntFrame = getRenderer().getFrameCount();
-	layerIdx = kMaxU32;
-	U32 nextBestLayerIdx = kMaxU32;
-	U64 nextBestLayerFame = crntFrame;
-	rejectHistoryBuffer = false;
-
-	for(U32 i = 0; i < m_shadowLayers.getSize(); ++i)
-	{
-		ShadowLayer& layer = m_shadowLayers[i];
-		if(layer.m_lightUuid == lightUuid && layer.m_frameLastUsed == crntFrame - 1)
-		{
-			// Found it being used last frame
-			layerIdx = i;
-			layer.m_frameLastUsed = crntFrame;
-			layer.m_lightUuid = lightUuid;
-			break;
-		}
-		else if(layer.m_lightUuid == lightUuid || layer.m_frameLastUsed == kMaxU64)
-		{
-			// Found an empty slot or slot used by the same light
-			layerIdx = i;
-			layer.m_frameLastUsed = crntFrame;
-			layer.m_lightUuid = lightUuid;
-			rejectHistoryBuffer = true;
-			break;
-		}
-		else if(layer.m_frameLastUsed < nextBestLayerFame)
-		{
-			nextBestLayerIdx = i;
-			nextBestLayerFame = crntFrame;
-		}
-	}
-
-	// Not found but there is a good candidate. Use that
-	if(layerIdx == kMaxU32 && nextBestLayerIdx != kMaxU32)
-	{
-		layerIdx = nextBestLayerIdx;
-		m_shadowLayers[nextBestLayerIdx].m_frameLastUsed = crntFrame;
-		m_shadowLayers[nextBestLayerIdx].m_lightUuid = lightUuid;
-		rejectHistoryBuffer = true;
-	}
-
-	return layerIdx != kMaxU32;
 }
 
 void RtShadows::getDebugRenderTarget(CString rtName, Array<RenderTargetHandle, kMaxDebugRenderTargets>& handles,
 									 ShaderProgramPtr& optionalShaderProgram) const
 {
-	U32 layerGroup = 0;
-	if(rtName == "RtShadows")
-	{
-		layerGroup = 0;
-	}
-	else if(rtName == "RtShadows1")
-	{
-		layerGroup = 1;
-	}
-	else
-	{
-		ANKI_ASSERT(rtName == "RtShadows2");
-		layerGroup = 2;
-	}
-
 	handles[0] = m_runCtx.m_upscaledRt;
-
-	ShaderProgramResourceVariantInitInfo variantInit(m_visualizeRenderTargetsProg);
-	variantInit.addMutation("LAYER_GROUP", layerGroup);
-
-	const ShaderProgramResourceVariant* variant;
-	m_visualizeRenderTargetsProg->getOrCreateVariant(variantInit, variant);
-	optionalShaderProgram.reset(&variant->getProgram());
 }
 
 } // end namespace anki

+ 7 - 31
AnKi/Renderer/RtShadows.h

@@ -22,8 +22,6 @@ public:
 	RtShadows()
 	{
 		registerDebugRenderTarget("RtShadows");
-		registerDebugRenderTarget("RtShadows1");
-		registerDebugRenderTarget("RtShadows2");
 	}
 
 	Error init();
@@ -39,13 +37,6 @@ public:
 	}
 
 public:
-	class ShadowLayer
-	{
-	public:
-		U64 m_lightUuid = kMaxU64;
-		U64 m_frameLastUsed = kMaxU64;
-	};
-
 	/// @name Render targets
 	/// @{
 	TexturePtr m_historyRt;
@@ -59,6 +50,12 @@ public:
 
 	/// @name Programs
 	/// @{
+	ShaderProgramResourcePtr m_setupBuildSbtProg;
+	ShaderProgramPtr m_setupBuildSbtGrProg;
+
+	ShaderProgramResourcePtr m_buildSbtProg;
+	ShaderProgramPtr m_buildSbtGrProg;
+
 	ShaderProgramResourcePtr m_rayGenProg;
 	ShaderProgramPtr m_rtLibraryGrProg;
 	U32 m_rayGenShaderGroupIdx = kMaxU32;
@@ -79,14 +76,10 @@ public:
 
 	ShaderProgramResourcePtr m_upscaleProg;
 	ShaderProgramPtr m_upscaleGrProg;
-
-	ShaderProgramResourcePtr m_visualizeRenderTargetsProg;
 	/// @}
 
 	ImageResourcePtr m_blueNoiseImage;
 
-	Array<ShadowLayer, kMaxRtShadowLayers> m_shadowLayers;
-
 	U32 m_sbtRecordSize = 256;
 
 	Bool m_rtsImportedOnce = false;
@@ -104,28 +97,11 @@ public:
 		RenderTargetHandle m_currentMomentsRt;
 
 		Array<RenderTargetHandle, 2> m_varianceRts;
-
-		BufferPtr m_sbtBuffer;
-		PtrSize m_sbtOffset;
-		U32 m_hitGroupCount = 0;
-
-		BitSet<kMaxRtShadowLayers, U8> m_layersWithRejectedHistory = {false};
-
-		U8 m_atrousPassIdx = 0;
-		U8 m_denoiseOrientation = 0;
 	} m_runCtx;
 
 	Error initInternal();
 
-	void run(RenderPassWorkContext& rgraphCtx);
-	void runDenoise(const RenderingContext& ctx, RenderPassWorkContext& rgraphCtx);
-	void runSvgfVariance(const RenderingContext& ctx, RenderPassWorkContext& rgraphCtx);
-	void runSvgfAtrous(const RenderingContext& ctx, RenderPassWorkContext& rgraphCtx);
-	void runUpscale(RenderPassWorkContext& rgraphCtx);
-
-	void buildSbt(RenderingContext& ctx);
-
-	Bool findShadowLayer(U64 lightUuid, U32& layerIdx, Bool& rejectHistoryBuffer);
+	void runDenoise(const RenderingContext& ctx, RenderPassWorkContext& rgraphCtx, Bool horizontal);
 
 	U32 getPassCountWithoutUpscaling() const
 	{

+ 22 - 1
AnKi/Renderer/ShadowmapsResolve.cpp

@@ -9,6 +9,7 @@
 #include <AnKi/Renderer/ShadowMapping.h>
 #include <AnKi/Renderer/DepthDownscale.h>
 #include <AnKi/Renderer/ClusterBinning2.h>
+#include <AnKi/Renderer/RtShadows.h>
 #include <AnKi/Core/CVarSet.h>
 
 namespace anki {
@@ -51,6 +52,8 @@ Error ShadowmapsResolve::initInternal()
 	variantInitInfo.addConstant("kZSplitCount", getRenderer().getZSplitCount());
 	variantInitInfo.addConstant("kTileSize", getRenderer().getTileSize());
 	variantInitInfo.addMutation("PCF", g_shadowMappingPcfCVar.get() != 0);
+	variantInitInfo.addMutation("DIRECTIONAL_LIGHT_SHADOW_RESOLVED",
+								GrManager::getSingleton().getDeviceCapabilities().m_rayTracingEnabled && g_rayTracedShadowsCVar.get());
 	const ShaderProgramResourceVariant* variant;
 	m_prog->getOrCreateVariant(variantInitInfo, variant);
 	m_grProg.reset(&variant->getProgram());
@@ -62,6 +65,8 @@ Error ShadowmapsResolve::initInternal()
 
 void ShadowmapsResolve::populateRenderGraph(RenderingContext& ctx)
 {
+	const Bool rtShadowsEnabled = GrManager::getSingleton().getDeviceCapabilities().m_rayTracingEnabled && g_rayTracedShadowsCVar.get();
+
 	RenderGraphDescription& rgraph = ctx.m_renderGraphDescr;
 	m_runCtx.m_rt = rgraph.newRenderTarget(m_rtDescr);
 
@@ -81,6 +86,11 @@ void ShadowmapsResolve::populateRenderGraph(RenderingContext& ctx)
 		rpass.newBufferDependency(getRenderer().getClusterBinning2().getClustersBufferHandle(), BufferUsageBit::kStorageComputeRead);
 		rpass.newBufferDependency(getRenderer().getClusterBinning2().getPackedObjectsBufferHandle(GpuSceneNonRenderableObjectType::kLight),
 								  BufferUsageBit::kStorageComputeRead);
+
+		if(rtShadowsEnabled)
+		{
+			rpass.newTextureDependency(getRenderer().getRtShadows().getRt(), TextureUsageBit::kSampledCompute);
+		}
 	}
 	else
 	{
@@ -99,6 +109,11 @@ void ShadowmapsResolve::populateRenderGraph(RenderingContext& ctx)
 		rpass.newBufferDependency(getRenderer().getClusterBinning2().getClustersBufferHandle(), BufferUsageBit::kStorageFragmentRead);
 		rpass.newBufferDependency(getRenderer().getClusterBinning2().getPackedObjectsBufferHandle(GpuSceneNonRenderableObjectType::kLight),
 								  BufferUsageBit::kStorageFragmentRead);
+
+		if(rtShadowsEnabled)
+		{
+			rpass.newTextureDependency(getRenderer().getRtShadows().getRt(), TextureUsageBit::kSampledFragment);
+		}
 	}
 }
 
@@ -127,9 +142,15 @@ void ShadowmapsResolve::run(RenderPassWorkContext& rgraphCtx)
 	}
 	cmdb.bindTexture(0, 8, &m_noiseImage->getTextureView());
 
+	const Bool rtShadowsEnabled = GrManager::getSingleton().getDeviceCapabilities().m_rayTracingEnabled && g_rayTracedShadowsCVar.get();
+	if(rtShadowsEnabled)
+	{
+		rgraphCtx.bindColorTexture(0, 9, getRenderer().getRtShadows().getRt());
+	}
+
 	if(g_preferComputeCVar.get())
 	{
-		rgraphCtx.bindImage(0, 9, m_runCtx.m_rt, TextureSubresourceInfo());
+		rgraphCtx.bindImage(0, 10, m_runCtx.m_rt, TextureSubresourceInfo());
 		dispatchPPCompute(cmdb, 8, 8, m_rtDescr.m_width, m_rtDescr.m_height);
 	}
 	else

+ 5 - 35
AnKi/Renderer/Utils/GpuVisibility.cpp

@@ -530,21 +530,7 @@ void GpuVisibilityAccelerationStructures::pupulateRenderGraph(GpuVisibilityAccel
 	}
 
 	// Allocate the transient buffers
-	U32 aabbCount = 0;
-	switch(in.m_technique)
-	{
-	case RenderingTechnique::kGBuffer:
-		aabbCount = GpuSceneArrays::RenderableAabbGBuffer::getSingleton().getElementCount();
-		break;
-	case RenderingTechnique::kDepth:
-		aabbCount = GpuSceneArrays::RenderableAabbDepth::getSingleton().getElementCount();
-		break;
-	case RenderingTechnique::kForward:
-		aabbCount = GpuSceneArrays::RenderableAabbForward::getSingleton().getElementCount();
-		break;
-	default:
-		ANKI_ASSERT(0);
-	}
+	const U32 aabbCount = GpuSceneArrays::RenderableAabbRt::getSingleton().getElementCount();
 
 	out.m_instancesBuffer = GpuVisibleTransientMemoryPool::getSingleton().allocate(aabbCount * sizeof(AccelerationStructureInstance));
 	out.m_someBufferHandle = rgraph.importBuffer(BufferUsageBit::kStorageComputeWrite, out.m_instancesBuffer);
@@ -564,9 +550,8 @@ void GpuVisibilityAccelerationStructures::pupulateRenderGraph(GpuVisibilityAccel
 	}
 
 	pass.setWork([this, viewProjMat = in.m_viewProjectionMatrix, lodDistances = in.m_lodDistances, pointOfTest = in.m_pointOfTest,
-				  testRadius = in.m_testRadius, technique = in.m_technique, instancesBuff = out.m_instancesBuffer,
-				  indicesBuff = out.m_renderableIndicesBuffer, rangeBuff = out.m_rangeBuffer, counterBufferOffset = m_currentCounterBufferOffset,
-				  aabbCount](RenderPassWorkContext& rgraph) {
+				  testRadius = in.m_testRadius, instancesBuff = out.m_instancesBuffer, indicesBuff = out.m_renderableIndicesBuffer,
+				  rangeBuff = out.m_rangeBuffer, counterBufferOffset = m_currentCounterBufferOffset](RenderPassWorkContext& rgraph) {
 		CommandBuffer& cmdb = *rgraph.m_commandBuffer;
 
 		cmdb.bindShaderProgram(m_grProg.get());
@@ -590,23 +575,7 @@ void GpuVisibilityAccelerationStructures::pupulateRenderGraph(GpuVisibilityAccel
 
 		cmdb.setPushConstants(&unis, sizeof(unis));
 
-		BufferOffsetRange aabbsBuffer;
-		switch(technique)
-		{
-		case RenderingTechnique::kGBuffer:
-			aabbsBuffer = GpuSceneArrays::RenderableAabbGBuffer::getSingleton().getBufferOffsetRange();
-			break;
-		case RenderingTechnique::kDepth:
-			aabbsBuffer = GpuSceneArrays::RenderableAabbDepth::getSingleton().getBufferOffsetRange();
-			break;
-		case RenderingTechnique::kForward:
-			aabbsBuffer = GpuSceneArrays::RenderableAabbForward::getSingleton().getBufferOffsetRange();
-			break;
-		default:
-			ANKI_ASSERT(0);
-		}
-
-		cmdb.bindStorageBuffer(0, 0, aabbsBuffer);
+		cmdb.bindStorageBuffer(0, 0, GpuSceneArrays::RenderableAabbRt::getSingleton().getBufferOffsetRange());
 		cmdb.bindStorageBuffer(0, 1, GpuSceneArrays::Renderable::getSingleton().getBufferOffsetRange());
 		cmdb.bindStorageBuffer(0, 2, &GpuSceneBuffer::getSingleton().getBuffer(), 0, kMaxPtrSize);
 		cmdb.bindStorageBuffer(0, 3, instancesBuff);
@@ -614,6 +583,7 @@ void GpuVisibilityAccelerationStructures::pupulateRenderGraph(GpuVisibilityAccel
 		cmdb.bindStorageBuffer(0, 5, rangeBuff);
 		cmdb.bindStorageBuffer(0, 6, m_counterBuffer.get(), counterBufferOffset, sizeof(U32) * 2);
 
+		const U32 aabbCount = GpuSceneArrays::RenderableAabbRt::getSingleton().getElementCount();
 		dispatchPPCompute(cmdb, 64, 1, aabbCount, 1);
 	});
 

+ 0 - 2
AnKi/Renderer/Utils/GpuVisibility.h

@@ -135,7 +135,6 @@ class GpuVisibilityAccelerationStructuresInput
 {
 public:
 	CString m_passesName;
-	RenderingTechnique m_technique = RenderingTechnique::kCount;
 
 	Vec3 m_lodReferencePoint = Vec3(kMaxF32);
 	Array<F32, kMaxLodCount - 1> m_lodDistances = {};
@@ -150,7 +149,6 @@ public:
 	void validate() const
 	{
 		ANKI_ASSERT(m_passesName.getLength() > 0);
-		ANKI_ASSERT(m_technique != RenderingTechnique::kCount);
 		ANKI_ASSERT(m_lodReferencePoint.x() != kMaxF32);
 		ANKI_ASSERT(m_lodReferencePoint == m_pointOfTest && "For now these should be the same");
 		ANKI_ASSERT(m_testRadius != kMaxF32);

+ 8 - 6
AnKi/Renderer/VolumetricFog.cpp

@@ -11,6 +11,7 @@
 #include <AnKi/Renderer/RenderQueue.h>
 #include <AnKi/Renderer/VolumetricLightingAccumulation.h>
 #include <AnKi/Core/CVarSet.h>
+#include <AnKi/Scene/Components/SkyboxComponent.h>
 
 namespace anki {
 
@@ -67,13 +68,14 @@ void VolumetricFog::populateRenderGraph(RenderingContext& ctx)
 
 		rgraphCtx.bindImage(0, 2, m_runCtx.m_rt, TextureSubresourceInfo());
 
+		const SkyboxComponent* sky = SceneGraph::getSingleton().getSkybox();
+
 		VolumetricFogUniforms regs;
-		const SkyboxQueueElement& el = ctx.m_renderQueue->m_skybox;
-		regs.m_fogDiffuse = el.m_fog.m_diffuseColor;
-		regs.m_fogScatteringCoeff = el.m_fog.m_scatteringCoeff;
-		regs.m_fogAbsorptionCoeff = el.m_fog.m_absorptionCoeff;
-		regs.m_near = ctx.m_renderQueue->m_cameraNear;
-		regs.m_far = ctx.m_renderQueue->m_cameraFar;
+		regs.m_fogDiffuse = (sky) ? sky->getFogDiffuseColor() : Vec3(0.0f);
+		regs.m_fogScatteringCoeff = (sky) ? sky->getFogScatteringCoefficient() : 0.0f;
+		regs.m_fogAbsorptionCoeff = (sky) ? sky->getFogAbsorptionCoefficient() : 0.0f;
+		regs.m_near = ctx.m_cameraNear;
+		regs.m_far = ctx.m_cameraFar;
 		regs.m_zSplitCountf = F32(getRenderer().getZSplitCount());
 		regs.m_volumeSize = UVec3(m_volumeSize);
 		regs.m_maxZSplitsToProcessf = F32(m_finalZSplit + 1);

+ 57 - 53
AnKi/Renderer/VolumetricLightingAccumulation.cpp

@@ -10,6 +10,7 @@
 #include <AnKi/Renderer/ClusterBinning2.h>
 #include <AnKi/Resource/ImageResource.h>
 #include <AnKi/Core/CVarSet.h>
+#include <AnKi/Scene/Components/SkyboxComponent.h>
 
 namespace anki {
 
@@ -76,10 +77,6 @@ void VolumetricLightingAccumulation::populateRenderGraph(RenderingContext& ctx)
 
 	ComputeRenderPassDescription& pass = rgraph.newComputeRenderPass("Vol light");
 
-	pass.setWork([this, &ctx](RenderPassWorkContext& rgraphCtx) {
-		run(ctx, rgraphCtx);
-	});
-
 	pass.newTextureDependency(m_runCtx.m_rts[0], TextureUsageBit::kSampledCompute);
 	pass.newTextureDependency(m_runCtx.m_rts[1], TextureUsageBit::kImageComputeWrite);
 	pass.newTextureDependency(getRenderer().getShadowMapping().getShadowmapRt(), TextureUsageBit::kSampledCompute);
@@ -97,56 +94,63 @@ void VolumetricLightingAccumulation::populateRenderGraph(RenderingContext& ctx)
 	{
 		pass.newTextureDependency(getRenderer().getIndirectDiffuseProbes().getCurrentlyRefreshedVolumeRt(), TextureUsageBit::kSampledCompute);
 	}
-}
-
-void VolumetricLightingAccumulation::run(const RenderingContext& ctx, RenderPassWorkContext& rgraphCtx)
-{
-	CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
-
-	cmdb.bindShaderProgram(m_grProg.get());
-
-	// Bind all
-	cmdb.bindSampler(0, 0, getRenderer().getSamplers().m_trilinearRepeat.get());
-	cmdb.bindSampler(0, 1, getRenderer().getSamplers().m_trilinearClamp.get());
-	cmdb.bindSampler(0, 2, getRenderer().getSamplers().m_trilinearClampShadow.get());
-
-	rgraphCtx.bindImage(0, 3, m_runCtx.m_rts[1], TextureSubresourceInfo());
-
-	cmdb.bindTexture(0, 4, &m_noiseImage->getTextureView());
-
-	rgraphCtx.bindColorTexture(0, 5, m_runCtx.m_rts[0]);
-
-	cmdb.bindUniformBuffer(0, 6, getRenderer().getClusterBinning2().getClusteredShadingUniforms());
-	cmdb.bindStorageBuffer(0, 7, getRenderer().getClusterBinning2().getPackedObjectsBuffer(GpuSceneNonRenderableObjectType::kLight));
-	rgraphCtx.bindColorTexture(0, 8, getRenderer().getShadowMapping().getShadowmapRt());
-	cmdb.bindStorageBuffer(0, 9,
-						   getRenderer().getClusterBinning2().getPackedObjectsBuffer(GpuSceneNonRenderableObjectType::kGlobalIlluminationProbe));
-	cmdb.bindStorageBuffer(0, 10, getRenderer().getClusterBinning2().getPackedObjectsBuffer(GpuSceneNonRenderableObjectType::kFogDensityVolume));
-	cmdb.bindStorageBuffer(0, 11, getRenderer().getClusterBinning2().getClustersBuffer());
-
-	cmdb.bindAllBindless(1);
-
-	VolumetricLightingUniforms unis;
-	const SkyboxQueueElement& queueEl = ctx.m_renderQueue->m_skybox;
-	if(queueEl.m_fog.m_heightOfMaxDensity > queueEl.m_fog.m_heightOfMinDensity)
-	{
-		unis.m_minHeight = queueEl.m_fog.m_heightOfMinDensity;
-		unis.m_oneOverMaxMinusMinHeight = 1.0f / (queueEl.m_fog.m_heightOfMaxDensity - unis.m_minHeight + kEpsilonf);
-		unis.m_densityAtMinHeight = queueEl.m_fog.m_minDensity;
-		unis.m_densityAtMaxHeight = queueEl.m_fog.m_maxDensity;
-	}
-	else
-	{
-		unis.m_minHeight = queueEl.m_fog.m_heightOfMaxDensity;
-		unis.m_oneOverMaxMinusMinHeight = 1.0f / (queueEl.m_fog.m_heightOfMinDensity - unis.m_minHeight + kEpsilonf);
-		unis.m_densityAtMinHeight = queueEl.m_fog.m_maxDensity;
-		unis.m_densityAtMaxHeight = queueEl.m_fog.m_minDensity;
-	}
-	unis.m_volumeSize = UVec3(m_volumeSize);
-	unis.m_maxZSplitsToProcessf = F32(m_finalZSplit + 1);
-	cmdb.setPushConstants(&unis, sizeof(unis));
 
-	dispatchPPCompute(cmdb, m_workgroupSize[0], m_workgroupSize[1], m_workgroupSize[2], m_volumeSize[0], m_volumeSize[1], m_volumeSize[2]);
+	pass.setWork([this](RenderPassWorkContext& rgraphCtx) {
+		CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
+
+		cmdb.bindShaderProgram(m_grProg.get());
+
+		// Bind all
+		cmdb.bindSampler(0, 0, getRenderer().getSamplers().m_trilinearRepeat.get());
+		cmdb.bindSampler(0, 1, getRenderer().getSamplers().m_trilinearClamp.get());
+		cmdb.bindSampler(0, 2, getRenderer().getSamplers().m_trilinearClampShadow.get());
+
+		rgraphCtx.bindImage(0, 3, m_runCtx.m_rts[1], TextureSubresourceInfo());
+
+		cmdb.bindTexture(0, 4, &m_noiseImage->getTextureView());
+
+		rgraphCtx.bindColorTexture(0, 5, m_runCtx.m_rts[0]);
+
+		cmdb.bindUniformBuffer(0, 6, getRenderer().getClusterBinning2().getClusteredShadingUniforms());
+		cmdb.bindStorageBuffer(0, 7, getRenderer().getClusterBinning2().getPackedObjectsBuffer(GpuSceneNonRenderableObjectType::kLight));
+		rgraphCtx.bindColorTexture(0, 8, getRenderer().getShadowMapping().getShadowmapRt());
+		cmdb.bindStorageBuffer(0, 9,
+							   getRenderer().getClusterBinning2().getPackedObjectsBuffer(GpuSceneNonRenderableObjectType::kGlobalIlluminationProbe));
+		cmdb.bindStorageBuffer(0, 10, getRenderer().getClusterBinning2().getPackedObjectsBuffer(GpuSceneNonRenderableObjectType::kFogDensityVolume));
+		cmdb.bindStorageBuffer(0, 11, getRenderer().getClusterBinning2().getClustersBuffer());
+
+		cmdb.bindAllBindless(1);
+
+		const SkyboxComponent* sky = SceneGraph::getSingleton().getSkybox();
+
+		VolumetricLightingUniforms unis;
+		if(!sky)
+		{
+			unis.m_minHeight = 0.0f;
+			unis.m_oneOverMaxMinusMinHeight = 0.0f;
+			unis.m_densityAtMinHeight = 0.0f;
+			unis.m_densityAtMaxHeight = 0.0f;
+		}
+		else if(sky->getHeightOfMaxFogDensity() > sky->getHeightOfMaxFogDensity())
+		{
+			unis.m_minHeight = sky->getHeightOfMinFogDensity();
+			unis.m_oneOverMaxMinusMinHeight = 1.0f / (sky->getHeightOfMaxFogDensity() - unis.m_minHeight + kEpsilonf);
+			unis.m_densityAtMinHeight = sky->getMinFogDensity();
+			unis.m_densityAtMaxHeight = sky->getMaxFogDensity();
+		}
+		else
+		{
+			unis.m_minHeight = sky->getHeightOfMaxFogDensity();
+			unis.m_oneOverMaxMinusMinHeight = 1.0f / (sky->getHeightOfMinFogDensity() - unis.m_minHeight + kEpsilonf);
+			unis.m_densityAtMinHeight = sky->getMaxFogDensity();
+			unis.m_densityAtMaxHeight = sky->getMinFogDensity();
+		}
+		unis.m_volumeSize = UVec3(m_volumeSize);
+		unis.m_maxZSplitsToProcessf = F32(m_finalZSplit + 1);
+		cmdb.setPushConstants(&unis, sizeof(unis));
+
+		dispatchPPCompute(cmdb, m_workgroupSize[0], m_workgroupSize[1], m_workgroupSize[2], m_volumeSize[0], m_volumeSize[1], m_volumeSize[2]);
+	});
 }
 
 } // end namespace anki

+ 0 - 2
AnKi/Renderer/VolumetricLightingAccumulation.h

@@ -54,8 +54,6 @@ private:
 	public:
 		Array<RenderTargetHandle, 2> m_rts;
 	} m_runCtx; ///< Runtime context.
-
-	void run(const RenderingContext& ctx, RenderPassWorkContext& rgraphCtx);
 };
 /// @}
 

+ 0 - 1
AnKi/Scene/Components/CameraComponent.cpp

@@ -23,7 +23,6 @@ static NumericCVar<F32> g_shadowCascade3DistanceCVar(CVarSubsystem::kScene, "Sha
 													 "The distance of the 4th cascade");
 static NumericCVar<F32> g_earyZDistanceCVar(CVarSubsystem::kScene, "EarlyZDistance", (ANKI_PLATFORM_MOBILE) ? 0.0f : 10.0f, 0.0f, kMaxF32,
 											"Objects with distance lower than that will be used in early Z");
-BoolCVar g_rayTracedShadowsCVar(CVarSubsystem::kScene, "RayTracedShadows", true, "Enable or not ray traced shadows. Ignored if RT is not supported");
 
 CameraComponent::CameraComponent(SceneNode* node)
 	: SceneComponent(node, kClassType)

+ 0 - 3
AnKi/Scene/Components/CameraComponent.h

@@ -11,9 +11,6 @@
 
 namespace anki {
 
-// Forward
-extern BoolCVar g_rayTracedShadowsCVar;
-
 /// @addtogroup scene
 /// @{
 

+ 0 - 6
AnKi/Scene/Components/DecalComponent.cpp

@@ -13,14 +13,12 @@ namespace anki {
 
 DecalComponent::DecalComponent(SceneNode* node)
 	: SceneComponent(node, kClassType)
-	, m_spatial(this)
 {
 	m_gpuSceneDecal.allocate();
 }
 
 DecalComponent::~DecalComponent()
 {
-	m_spatial.removeFromOctree(SceneGraph::getSingleton().getOctree());
 }
 
 void DecalComponent::setLayer(CString fname, F32 blendFactor, LayerType type)
@@ -70,7 +68,6 @@ Error DecalComponent::update(SceneComponentUpdateInfo& info, Bool& updated)
 		const Vec4 extend(halfBoxSize.x(), halfBoxSize.y(), halfBoxSize.z(), 0.0f);
 		const Obb obbL(center, Mat3x4::getIdentity(), extend);
 		m_obb = obbL.getTransformed(info.m_node->getWorldTransform());
-		m_spatial.setBoundingShape(m_obb);
 
 		// Upload to the GPU scene
 		GpuSceneDecal gpuDecal;
@@ -91,9 +88,6 @@ Error DecalComponent::update(SceneComponentUpdateInfo& info, Bool& updated)
 		m_gpuSceneDecal.uploadToGpuScene(gpuDecal);
 	}
 
-	const Bool spatialUpdated = m_spatial.update(SceneGraph::getSingleton().getOctree());
-	updated = updated || spatialUpdated;
-
 	return Error::kNone;
 }
 

+ 0 - 16
AnKi/Scene/Components/DecalComponent.h

@@ -57,20 +57,6 @@ public:
 		return m_boxSize;
 	}
 
-	void setupDecalQueueElement(DecalQueueElement& el) const
-	{
-		ANKI_ASSERT(isEnabled());
-		el.m_diffuseBindlessTextureIndex = m_layers[LayerType::kDiffuse].m_bindlessTextureIndex;
-		el.m_roughnessMetalnessBindlessTextureIndex = m_layers[LayerType::kRoughnessMetalness].m_bindlessTextureIndex;
-		el.m_diffuseBlendFactor = m_layers[LayerType::kDiffuse].m_blendFactor;
-		el.m_roughnessMetalnessBlendFactor = m_layers[LayerType::kRoughnessMetalness].m_blendFactor;
-		el.m_textureMatrix = m_biasProjViewMat;
-		el.m_obbCenter = m_obb.getCenter().xyz();
-		el.m_obbExtend = m_obb.getExtend().xyz();
-		el.m_obbRotation = m_obb.getRotation().getRotationPart();
-		el.m_index = m_gpuSceneDecal.getIndex();
-	}
-
 private:
 	enum class LayerType : U8
 	{
@@ -87,8 +73,6 @@ private:
 		U32 m_bindlessTextureIndex = kMaxU32;
 	};
 
-	Spatial m_spatial;
-
 	Array<Layer, U(LayerType::kCount)> m_layers;
 	Mat4 m_biasProjViewMat = Mat4::getIdentity();
 	Vec3 m_boxSize = Vec3(1.0f);

+ 0 - 16
AnKi/Scene/Components/FogDensityComponent.cpp

@@ -12,14 +12,12 @@ namespace anki {
 
 FogDensityComponent::FogDensityComponent(SceneNode* node)
 	: SceneComponent(node, kClassType)
-	, m_spatial(this)
 {
 	m_gpuSceneVolume.allocate();
 }
 
 FogDensityComponent ::~FogDensityComponent()
 {
-	m_spatial.removeFromOctree(SceneGraph::getSingleton().getOctree());
 }
 
 Error FogDensityComponent::update(SceneComponentUpdateInfo& info, Bool& updated)
@@ -32,17 +30,6 @@ Error FogDensityComponent::update(SceneComponentUpdateInfo& info, Bool& updated)
 
 		m_worldPos = info.m_node->getWorldTransform().getOrigin().xyz();
 
-		if(m_isBox)
-		{
-			const Aabb aabb(m_aabbMin + m_worldPos, m_aabbMax + m_worldPos);
-			m_spatial.setBoundingShape(aabb);
-		}
-		else
-		{
-			const Sphere sphere(m_worldPos, m_sphereRadius);
-			m_spatial.setBoundingShape(sphere);
-		}
-
 		// Upload to the GPU scene
 		GpuSceneFogDensityVolume gpuVolume;
 		if(m_isBox)
@@ -61,9 +48,6 @@ Error FogDensityComponent::update(SceneComponentUpdateInfo& info, Bool& updated)
 		m_gpuSceneVolume.uploadToGpuScene(gpuVolume);
 	}
 
-	const Bool spatialUpdated = m_spatial.update(SceneGraph::getSingleton().getOctree());
-	updated = updated || spatialUpdated;
-
 	return Error::kNone;
 }
 

+ 0 - 19
AnKi/Scene/Components/FogDensityComponent.h

@@ -79,23 +79,6 @@ public:
 		return m_density;
 	}
 
-	void setupFogDensityQueueElement(FogDensityQueueElement& el) const
-	{
-		el.m_density = m_density;
-		el.m_isBox = m_isBox;
-		if(m_isBox)
-		{
-			el.m_aabbMin = (m_aabbMin + m_worldPos).xyz();
-			el.m_aabbMax = (m_aabbMax + m_worldPos).xyz();
-		}
-		else
-		{
-			el.m_sphereCenter = m_worldPos.xyz();
-			el.m_sphereRadius = m_sphereRadius;
-		}
-		el.m_index = m_gpuSceneVolume.getIndex();
-	}
-
 private:
 	Vec3 m_aabbMin = Vec3(0.0f); ///< In local space.
 
@@ -105,8 +88,6 @@ private:
 		F32 m_sphereRadius;
 	};
 
-	Spatial m_spatial;
-
 	Vec3 m_worldPos = Vec3(0.0f);
 	F32 m_density = 1.0f;
 

+ 0 - 13
AnKi/Scene/Components/LensFlareComponent.cpp

@@ -12,13 +12,11 @@ namespace anki {
 
 LensFlareComponent::LensFlareComponent(SceneNode* node)
 	: SceneComponent(node, kClassType)
-	, m_spatial(this)
 {
 }
 
 LensFlareComponent::~LensFlareComponent()
 {
-	m_spatial.removeFromOctree(SceneGraph::getSingleton().getOctree());
 }
 
 void LensFlareComponent::loadImageResource(CString filename)
@@ -43,20 +41,9 @@ Error LensFlareComponent::update(SceneComponentUpdateInfo& info, Bool& updated)
 		m_dirty = false;
 
 		m_worldPosition = info.m_node->getWorldTransform().getOrigin().xyz();
-
-		const Aabb aabb(m_worldPosition - (kAabbSize / 2.0f), m_worldPosition + (kAabbSize / 2.0f));
-		m_spatial.setBoundingShape(aabb);
 	}
 
-	const Bool spatialUpdated = m_spatial.update(SceneGraph::getSingleton().getOctree());
-	updated = updated || spatialUpdated;
-
 	return Error::kNone;
 }
 
-void LensFlareComponent::onDestroy([[maybe_unused]] SceneNode& node)
-{
-	m_spatial.removeFromOctree(SceneGraph::getSingleton().getOctree());
-}
-
 } // end namespace anki

+ 7 - 9
AnKi/Scene/Components/LensFlareComponent.h

@@ -67,12 +67,14 @@ public:
 		return m_colorMul;
 	}
 
-	void setupLensFlareQueueElement(LensFlareQueueElement& el) const
+	const Vec3& getWorldPosition() const
 	{
-		el.m_worldPosition = m_worldPosition;
-		el.m_firstFlareSize = m_firstFlareSize;
-		el.m_colorMultiplier = m_colorMul;
-		el.m_textureView = &m_image->getTextureView();
+		return m_worldPosition;
+	}
+
+	const ImageResource& getImage() const
+	{
+		return *m_image;
 	}
 
 private:
@@ -82,8 +84,6 @@ private:
 
 	ImageResourcePtr m_image; ///< Array of textures.
 
-	Spatial m_spatial;
-
 	Vec2 m_firstFlareSize = Vec2(1.0f);
 	Vec2 m_otherFlareSize = Vec2(1.0f);
 
@@ -92,8 +92,6 @@ private:
 	Bool m_dirty = true;
 
 	Error update(SceneComponentUpdateInfo& info, Bool& updated) override;
-
-	void onDestroy(SceneNode& node) override;
 };
 /// @}
 

+ 23 - 6
AnKi/Scene/Components/ModelComponent.cpp

@@ -36,6 +36,7 @@ void ModelComponent::freeGpuScene()
 		patch.m_gpuSceneRenderableAabbDepth.free();
 		patch.m_gpuSceneRenderableAabbForward.free();
 		patch.m_gpuSceneRenderableAabbGBuffer.free();
+		patch.m_gpuSceneRenderableAabbRt.free();
 
 		for(RenderingTechnique t : EnumIterable<RenderingTechnique>())
 		{
@@ -92,13 +93,8 @@ void ModelComponent::loadModelResource(CString filename)
 		out.m_gpuSceneMeshLods.allocate();
 		out.m_gpuSceneRenderable.allocate();
 
-		for(RenderingTechnique t : EnumIterable<RenderingTechnique>())
+		for(RenderingTechnique t : EnumBitsIterable<RenderingTechnique, RenderingTechniqueBit>(out.m_techniques))
 		{
-			if(!(RenderingTechniqueBit(1 << t) & out.m_techniques) || !!(RenderingTechniqueBit(1 << t) & RenderingTechniqueBit::kAllRt))
-			{
-				continue;
-			}
-
 			switch(t)
 			{
 			case RenderingTechnique::kGBuffer:
@@ -110,6 +106,9 @@ void ModelComponent::loadModelResource(CString filename)
 			case RenderingTechnique::kDepth:
 				out.m_gpuSceneRenderableAabbDepth.allocate();
 				break;
+			case RenderingTechnique::kRtShadow:
+				out.m_gpuSceneRenderableAabbRt.allocate();
+				break;
 			default:
 				ANKI_ASSERT(0);
 			}
@@ -144,6 +143,7 @@ Error ModelComponent::update(SceneComponentUpdateInfo& info, Bool& updated)
 		{
 			const ModelPatch& patch = m_model->getModelPatches()[i];
 			const MeshResource& mesh = *patch.getMesh();
+			const MaterialResource& mtl = *patch.getMaterial();
 
 			Array<GpuSceneMeshLod, kMaxLodCount> meshLods;
 
@@ -196,6 +196,12 @@ Error ModelComponent::update(SceneComponentUpdateInfo& info, Bool& updated)
 			gpuRenderable.m_uniformsOffset = m_patchInfos[i].m_gpuSceneUniformsOffset;
 			gpuRenderable.m_meshLodsOffset = m_patchInfos[i].m_gpuSceneMeshLods.getGpuSceneOffset();
 			gpuRenderable.m_boneTransformsOffset = (hasSkin) ? m_skinComponent->getBoneTransformsGpuSceneOffset() : 0;
+			if(!!(mtl.getRenderingTechniques() & RenderingTechniqueBit::kRtShadow))
+			{
+				const RenderingKey key(RenderingTechnique::kRtShadow, 0, false, false);
+				const MaterialVariant& variant = mtl.getOrCreateVariant(key);
+				gpuRenderable.m_rtShadowsShaderHandleIndex = variant.getRtShaderGroupHandleIndex();
+			}
 			m_patchInfos[i].m_gpuSceneRenderable.uploadToGpuScene(gpuRenderable);
 		}
 
@@ -279,6 +285,7 @@ Error ModelComponent::update(SceneComponentUpdateInfo& info, Bool& updated)
 		const U32 modelPatchCount = m_model->getModelPatches().getSize();
 		for(U32 i = 0; i < modelPatchCount; ++i)
 		{
+			// Do raster techniques
 			for(RenderingTechnique t :
 				EnumBitsIterable<RenderingTechnique, RenderingTechniqueBit>(m_patchInfos[i].m_techniques & ~RenderingTechniqueBit::kAllRt))
 			{
@@ -301,6 +308,16 @@ Error ModelComponent::update(SceneComponentUpdateInfo& info, Bool& updated)
 					ANKI_ASSERT(0);
 				}
 			}
+
+			// Do RT techniques
+			if(!!(m_patchInfos[i].m_techniques & RenderingTechniqueBit::kAllRt))
+			{
+				const U32 bucket = 0;
+				const GpuSceneRenderableAabb gpuVolume = initGpuSceneRenderableAabb(aabbWorld.getMin().xyz(), aabbWorld.getMax().xyz(),
+																					m_patchInfos[i].m_gpuSceneRenderable.getIndex(), bucket);
+
+				m_patchInfos[i].m_gpuSceneRenderableAabbRt.uploadToGpuScene(gpuVolume);
+			}
 		}
 	}
 

+ 1 - 0
AnKi/Scene/Components/ModelComponent.h

@@ -55,6 +55,7 @@ private:
 		GpuSceneArrays::RenderableAabbGBuffer::Allocation m_gpuSceneRenderableAabbGBuffer;
 		GpuSceneArrays::RenderableAabbDepth::Allocation m_gpuSceneRenderableAabbDepth;
 		GpuSceneArrays::RenderableAabbForward::Allocation m_gpuSceneRenderableAabbForward;
+		GpuSceneArrays::RenderableAabbForward::Allocation m_gpuSceneRenderableAabbRt;
 
 		Array<RenderStateBucketIndex, U32(RenderingTechnique::kCount)> m_renderStateBucketIndices;
 		RenderingTechniqueBit m_techniques;

+ 2 - 0
AnKi/Scene/GpuSceneArrays.def.h

@@ -36,6 +36,8 @@ ANKI_CAT_SEPARATOR
 ANKI_CAT_TYPE(RenderableAabbForward, GpuSceneRenderableAabb, 1, g_minGpuSceneRenderablesCVar)
 ANKI_CAT_SEPARATOR
 ANKI_CAT_TYPE(RenderableAabbDepth, GpuSceneRenderableAabb, 2, g_minGpuSceneRenderablesCVar)
+ANKI_CAT_SEPARATOR
+ANKI_CAT_TYPE(RenderableAabbRt, GpuSceneRenderableAabb, 3, g_minGpuSceneRenderablesCVar)
 
 #undef ANKI_TRF_ARR
 #undef ANKI_MESH_ARR

+ 0 - 27
AnKi/Scene/Visibility.cpp

@@ -341,15 +341,6 @@ void VisibilityTestTask::test(ThreadHive& hive, U32 taskId)
 		}
 		else if(compType == LensFlareComponent::kClassType)
 		{
-			const LensFlareComponent& flarec = static_cast<LensFlareComponent&>(comp);
-
-			if(!isInside() || !flarec.isEnabled())
-			{
-				continue;
-			}
-
-			LensFlareQueueElement* el = result.m_lensFlares.newElement();
-			flarec.setupLensFlareQueueElement(*el);
 		}
 		else if(compType == ReflectionProbeComponent::kClassType)
 		{
@@ -357,27 +348,9 @@ void VisibilityTestTask::test(ThreadHive& hive, U32 taskId)
 		}
 		else if(compType == DecalComponent::kClassType)
 		{
-			const DecalComponent& decalc = static_cast<DecalComponent&>(comp);
-
-			if(!isInside() || !decalc.isEnabled())
-			{
-				continue;
-			}
-
-			DecalQueueElement* el = result.m_decals.newElement();
-			decalc.setupDecalQueueElement(*el);
 		}
 		else if(compType == FogDensityComponent::kClassType)
 		{
-			if(!isInside())
-			{
-				continue;
-			}
-
-			const FogDensityComponent& fogc = static_cast<FogDensityComponent&>(comp);
-
-			FogDensityQueueElement* el = result.m_fogDensityVolumes.newElement();
-			fogc.setupFogDensityQueueElement(*el);
 		}
 		else if(compType == GlobalIlluminationProbeComponent::kClassType)
 		{

+ 1 - 0
AnKi/Shaders/Include/GpuSceneTypes.h

@@ -24,6 +24,7 @@ struct GpuSceneRenderable
 	U32 m_meshLodsOffset; ///< Points to an array of GpuSceneMeshLod sized kMaxLodCount.
 	U32 m_boneTransformsOffset; ///< Array of Mat3x4 or 0 if its not a skin.
 	U32 m_particleEmitterOffset; ///< Offset to GpuSceneParticleEmitter or 0 if it's not an emitter.
+	U32 m_rtShadowsShaderHandleIndex; ///< The index of the shader handle in the array of library's handles.
 };
 
 /// Almost similar to GpuSceneRenderable but with only what the material shaders need. Needs to fit in a UVec4 vertex attribute.

+ 12 - 11
AnKi/Shaders/Include/MiscRendererTypes.h

@@ -10,21 +10,22 @@
 ANKI_BEGIN_NAMESPACE
 
 // RT shadows
-constexpr U32 kMaxRtShadowLayers = 8u;
-
-struct RtShadowsUniforms
+struct RtShadowsDenoiseUniforms
 {
-	F32 historyRejectFactor[kMaxRtShadowLayers]; // 1.0 means reject, 0.0 not reject
+	Mat4 m_invViewProjMat;
+
+	F32 m_time;
+	F32 m_padding0;
+	F32 m_padding1;
+	F32 m_padding2;
 };
 
-struct RtShadowsDenoiseUniforms
+struct RtShadowsSbtBuildUniforms
 {
-	Mat4 invViewProjMat;
-
-	F32 time;
-	F32 padding0;
-	F32 padding1;
-	F32 padding2;
+	U32 m_shaderHandleDwordSize;
+	U32 m_sbtRecordDwordSize;
+	U32 m_padding0;
+	U32 m_padding1;
 };
 
 // Indirect diffuse

+ 9 - 0
AnKi/Shaders/Intellisense.hlsl

@@ -148,6 +148,15 @@ struct ByteAddressBuffer
 	T& Load(U32 offset);
 };
 
+struct RWByteAddressBuffer
+{
+	template<typename T>
+	T& Load(U32 offset);
+
+	template<typename T>
+	void Store(U32 offset, T x);
+};
+
 struct RaytracingAccelerationStructure
 {
 };

+ 0 - 23
AnKi/Shaders/LightShading.ankiprog

@@ -3,8 +3,6 @@
 // Code licensed under the BSD License.
 // http://www.anki3d.org/LICENSE
 
-#pragma anki mutator USE_SHADOW_LAYERS 0 1
-
 #pragma anki start vert
 #include <AnKi/Shaders/QuadVert.hlsl>
 #pragma anki end
@@ -32,11 +30,7 @@ ANKI_SPECIALIZATION_CONSTANT_U32(kTileSize, 3u);
 [[vk::binding(7)]] Texture2D<Vec4> g_gbuffer1Tex;
 [[vk::binding(8)]] Texture2D<Vec4> g_gbuffer2Tex;
 [[vk::binding(9)]] Texture2D g_depthTex;
-#if USE_SHADOW_LAYERS
-[[vk::binding(10)]] Texture2D<UVec4> g_shadowLayersTex;
-#else
 [[vk::binding(11)]] Texture2D<RVec4> g_resolvedShadowsTex;
-#endif
 
 // Common code for lighting
 #define LIGHTING_COMMON_BRDF() \
@@ -75,13 +69,8 @@ RVec3 main(Vec4 svPosition : SV_POSITION, Vec2 uv : TEXCOORD) : SV_TARGET0
 	gbuffer.m_subsurface = max(gbuffer.m_subsurface, kSubsurfaceMin);
 
 	// SM
-#if USE_SHADOW_LAYERS
-	RF32 resolvedSm[kMaxRtShadowLayers];
-	unpackRtShadows(g_shadowLayersTex.SampleLevel(g_nearestAnyClampSampler, uv, 0.0), resolvedSm);
-#else
 	RVec4 resolvedSm = g_resolvedShadowsTex.SampleLevel(g_trilinearClampSampler, uv, 0.0);
 	U32 resolvedSmIdx = 0u;
-#endif
 
 	// Ambient and emissive color
 	RVec3 outColor = gbuffer.m_emission;
@@ -94,12 +83,8 @@ RVec3 main(Vec4 svPosition : SV_POSITION, Vec2 uv : TEXCOORD) : SV_TARGET0
 		RF32 shadowFactor;
 		if(dirLight.m_shadowCascadeCount > 0u)
 		{
-#if USE_SHADOW_LAYERS
-			shadowFactor = resolvedSm[dirLight.m_shadowLayer];
-#else
 			shadowFactor = resolvedSm[0];
 			++resolvedSmIdx;
-#endif
 		}
 		else
 		{
@@ -127,11 +112,7 @@ RVec3 main(Vec4 svPosition : SV_POSITION, Vec2 uv : TEXCOORD) : SV_TARGET0
 
 		[branch] if(light.m_shadowAtlasTileScale >= 0.0)
 		{
-#if USE_SHADOW_LAYERS
-			const RF32 shadow = resolvedSm[light.m_shadowLayer];
-#else
 			const RF32 shadow = resolvedSm[resolvedSmIdx++];
-#endif
 			lambert *= shadow;
 		}
 
@@ -151,11 +132,7 @@ RVec3 main(Vec4 svPosition : SV_POSITION, Vec2 uv : TEXCOORD) : SV_TARGET0
 
 		[branch] if(light.m_shadowLayer != kMaxU32)
 		{
-#if USE_SHADOW_LAYERS
-			const RF32 shadow = resolvedSm[light.m_shadowLayer];
-#else
 			const RF32 shadow = resolvedSm[resolvedSmIdx++];
-#endif
 			lambert *= shadow;
 		}
 

+ 1 - 30
AnKi/Shaders/RtShadows.hlsl

@@ -8,36 +8,7 @@
 #include <AnKi/Shaders/Include/MiscRendererTypes.h>
 #include <AnKi/Shaders/PackFunctions.hlsl>
 
-constexpr F32 kRtShadowsMaxHistoryLength = 16.0;
-
-UVec4 packRtShadows(RF32 shadowFactors[kMaxRtShadowLayers])
-{
-	const U32 a = newPackUnorm4x8(Vec4(shadowFactors[0], shadowFactors[1], shadowFactors[2], shadowFactors[3]));
-	const U32 b = newPackUnorm4x8(Vec4(shadowFactors[4], shadowFactors[5], shadowFactors[6], shadowFactors[7]));
-	return UVec4(a, b, 0, 0);
-}
-
-void unpackRtShadows(UVec4 packed, out RF32 shadowFactors[kMaxRtShadowLayers])
-{
-	const Vec4 a = newUnpackUnorm4x8(packed.x);
-	const Vec4 b = newUnpackUnorm4x8(packed.y);
-	shadowFactors[0] = a[0];
-	shadowFactors[1] = a[1];
-	shadowFactors[2] = a[2];
-	shadowFactors[3] = a[3];
-	shadowFactors[4] = b[0];
-	shadowFactors[5] = b[1];
-	shadowFactors[6] = b[2];
-	shadowFactors[7] = b[3];
-}
-
-void zeroRtShadowLayers(out RF32 shadowFactors[kMaxRtShadowLayers])
-{
-	[unroll] for(U32 i = 0u; i < kMaxRtShadowLayers; ++i)
-	{
-		shadowFactors[i] = 0.0;
-	}
-}
+constexpr F32 kRtShadowsMaxHistoryLength = 16.0; // The frames of history
 
 struct [raypayload] RayPayload
 {

+ 25 - 36
AnKi/Shaders/RtShadowsDenoise.ankiprog

@@ -12,24 +12,22 @@
 #include <AnKi/Shaders/RtShadows.hlsl>
 #include <AnKi/Shaders/Functions.hlsl>
 
-ANKI_SPECIALIZATION_CONSTANT_UVEC2(kOutImageSize, 0u);
 ANKI_SPECIALIZATION_CONSTANT_U32(kMinSampleCount, 2u);
 ANKI_SPECIALIZATION_CONSTANT_U32(kMaxSampleCount, 3u);
 
-[[vk::binding(0)]] SamplerState g_nearestAnyClampSampler;
-[[vk::binding(1)]] SamplerState g_linearAnyClampSampler;
-[[vk::binding(2)]] Texture2D<UVec4> g_inTex;
-[[vk::binding(3)]] Texture2D g_depthTex;
-[[vk::binding(4)]] Texture2D g_gbuffer2Tex;
-[[vk::binding(5)]] Texture2D g_momentsTex;
-[[vk::binding(6)]] Texture2D g_historyLengthTex;
-[[vk::binding(7)]] RWTexture2D<UVec4> g_outImg;
+[[vk::binding(0)]] SamplerState g_linearAnyClampSampler;
+[[vk::binding(1)]] Texture2D<Vec4> g_inTex;
+[[vk::binding(2)]] Texture2D<Vec4> g_depthTex;
+[[vk::binding(3)]] Texture2D<Vec4> g_gbuffer2Tex;
+[[vk::binding(4)]] Texture2D<Vec4> g_momentsTex;
+[[vk::binding(5)]] Texture2D<Vec4> g_historyLengthTex;
+[[vk::binding(6)]] RWTexture2D<Vec4> g_outUav;
 
 [[vk::push_constant]] ConstantBuffer<RtShadowsDenoiseUniforms> g_unis;
 
 Vec3 unproject(Vec2 ndc, F32 depth)
 {
-	const Vec4 worldPos4 = mul(g_unis.invViewProjMat, Vec4(ndc, depth, 1.0));
+	const Vec4 worldPos4 = mul(g_unis.m_invViewProjMat, Vec4(ndc, depth, 1.0));
 	const Vec3 worldPos = worldPos4.xyz / worldPos4.w;
 	return worldPos;
 }
@@ -59,27 +57,26 @@ F32 computeVarianceCenter(Vec2 uv)
 
 [numthreads(8, 8, 1)] void main(UVec3 svDispatchThreadId : SV_DISPATCHTHREADID)
 {
-	if(any(svDispatchThreadId.xy >= kOutImageSize))
+	Vec2 outImageSize;
+	g_outUav.GetDimensions(outImageSize.x, outImageSize.y);
+	if(any(svDispatchThreadId.xy >= UVec2(outImageSize)))
 	{
 		return;
 	}
 
-	const Vec2 uv = (Vec2(svDispatchThreadId.xy) + 0.5) / Vec2(kOutImageSize);
+	const Vec2 uv = (Vec2(svDispatchThreadId.xy) + 0.5) / outImageSize;
 
 	// Reference
 	const F32 depthCenter = g_depthTex.SampleLevel(g_linearAnyClampSampler, uv, 0.0).r;
 	if(depthCenter == 1.0)
 	{
-		g_outImg[svDispatchThreadId.xy] = 0;
+		g_outUav[svDispatchThreadId.xy] = 0.0;
 		return;
 	}
 
 	const Vec3 positionCenter = unproject(uvToNdc(uv), depthCenter);
 	const Vec3 normalCenter = unpackNormalFromGBuffer(g_gbuffer2Tex.SampleLevel(g_linearAnyClampSampler, uv, 0.0));
 
-	RF32 shadowFactors[kMaxRtShadowLayers];
-	unpackRtShadows(g_inTex.SampleLevel(g_nearestAnyClampSampler, uv, 0.0), shadowFactors);
-
 	// Decide the amount of blurring
 	const F32 varianceCenter = computeVarianceCenter(uv);
 	const F32 historyLength = g_historyLengthTex.SampleLevel(g_linearAnyClampSampler, uv, 0.0).x * kRtShadowsMaxHistoryLength;
@@ -110,44 +107,36 @@ F32 computeVarianceCenter(Vec2 uv)
 	sampleCount = sampleCount / 2u;
 
 	// Sample
-	F32 weight = 1.0; // TODO fix that
-
+	F32 weight = kEpsilonF32;
+	F32 shadowFactor = 0.0;
 	for(I32 i = -I32(sampleCount); i < I32(sampleCount); ++i)
 	{
-		const Vec2 texelSize = 1.0 / Vec2(kOutImageSize);
+		const Vec2 texelSize = 1.0 / outImageSize;
 #if BLUR_ORIENTATION == 0
 		const Vec2 sampleUv = Vec2(uv.x + F32(i) * texelSize.x, uv.y);
 #else
 		const Vec2 sampleUv = Vec2(uv.x, uv.y + F32(i) * texelSize.y);
 #endif
 
-		RF32 localShadowFactors[kMaxRtShadowLayers];
-		unpackRtShadows(g_inTex.SampleLevel(g_nearestAnyClampSampler, sampleUv, 0.0), localShadowFactors);
+		const F32 localShadowFactor = g_inTex.SampleLevel(g_linearAnyClampSampler, sampleUv, 0.0).r;
 
 		const F32 depthTap = g_depthTex.SampleLevel(g_linearAnyClampSampler, sampleUv, 0.0).r;
 		const Vec3 positionTap = unproject(uvToNdc(sampleUv), depthTap);
 		const Vec3 normalTap = unpackNormalFromGBuffer(g_gbuffer2Tex.SampleLevel(g_linearAnyClampSampler, sampleUv, 0.0));
 
-		// F32 w = calculateBilateralWeighPlane(depthCenter, depthTap, 1.0);
-		F32 w = calculateBilateralWeightPlane(positionCenter, normalCenter, positionTap, normalTap, 1.0);
+		// F32 localWeight = calculateBilateralWeighPlane(depthCenter, depthTap, 1.0);
+		F32 localWeight = calculateBilateralWeightPlane(positionCenter, normalCenter, positionTap, normalTap, 1.0);
 
-		w *= gaussianWeight(0.4, abs(F32(i)) / F32(sampleCount + 1u));
+		localWeight *= gaussianWeight(0.4, abs(F32(i)) / F32(sampleCount + 1u));
 
-		[unroll] for(U32 i = 0u; i < kMaxRtShadowLayers; ++i)
-		{
-			shadowFactors[i] += localShadowFactors[i] * w;
-		}
+		shadowFactor += localShadowFactor * localWeight;
 
-		weight += w;
-	}
-
-	// Write value
-	[unroll] for(U32 i = 0u; i < kMaxRtShadowLayers; ++i)
-	{
-		shadowFactors[i] /= weight;
+		weight += localWeight;
 	}
 
-	g_outImg[svDispatchThreadId.xy] = packRtShadows(shadowFactors);
+	// Normalize and write
+	shadowFactor /= weight;
+	g_outUav[svDispatchThreadId.xy] = shadowFactor;
 }
 
 #pragma anki end

+ 48 - 121
AnKi/Shaders/RtShadowsRayGen.ankiprog

@@ -18,24 +18,21 @@
 #define SET 2u
 
 [[vk::binding(0, SET)]] ConstantBuffer<ClusteredShadingUniforms> g_clusteredShading;
-[[vk::binding(1, SET)]] StructuredBuffer<PointLight> g_pointLights;
-[[vk::binding(1, SET)]] StructuredBuffer<SpotLight> g_spotLights;
-[[vk::binding(2, SET)]] StructuredBuffer<Cluster> g_clusters;
-[[vk::binding(3, SET)]] SamplerState g_trilinearRepeatSampler;
-[[vk::binding(4, SET)]] RWTexture2D<UVec4> g_shadowsImage;
-[[vk::binding(5, SET)]] Texture2D<UVec4> g_historyShadowsTex;
-[[vk::binding(6, SET)]] SamplerState g_linearAnyClampSampler;
-[[vk::binding(7, SET)]] SamplerState g_nearestAnyClampSampler;
-[[vk::binding(8, SET)]] Texture2D<Vec4> g_depthRt;
-[[vk::binding(9, SET)]] Texture2D<Vec4> g_motionVectorsRt;
-[[vk::binding(10, SET)]] Texture2D<Vec4> g_historyLengthTex;
-[[vk::binding(11, SET)]] Texture2D<Vec4> g_normalRt;
-[[vk::binding(12, SET)]] RaytracingAccelerationStructure g_tlas;
-[[vk::binding(13, SET)]] Texture2D<Vec4> g_prevMomentsTex;
-[[vk::binding(14, SET)]] RWTexture2D<Vec4> g_momentsImage;
-[[vk::binding(15, SET)]] Texture2D<Vec4> g_blueNoiseTex;
-
-[[vk::push_constant]] ConstantBuffer<RtShadowsUniforms> g_unis;
+[[vk::binding(1, SET)]] StructuredBuffer<Cluster> g_clusters;
+
+[[vk::binding(2, SET)]] SamplerState g_trilinearRepeatSampler;
+
+[[vk::binding(3, SET)]] RWTexture2D<Vec4> g_shadowsImage;
+[[vk::binding(4, SET)]] Texture2D<Vec4> g_historyShadowsTex;
+[[vk::binding(5, SET)]] SamplerState g_linearAnyClampSampler;
+[[vk::binding(6, SET)]] Texture2D<Vec4> g_depthRt;
+[[vk::binding(7, SET)]] Texture2D<Vec4> g_motionVectorsRt;
+[[vk::binding(8, SET)]] Texture2D<Vec4> g_historyLengthTex;
+[[vk::binding(9, SET)]] Texture2D<Vec4> g_normalRt;
+[[vk::binding(10, SET)]] RaytracingAccelerationStructure g_tlas;
+[[vk::binding(11, SET)]] Texture2D<Vec4> g_prevMomentsTex;
+[[vk::binding(12, SET)]] RWTexture2D<Vec4> g_momentsImage;
+[[vk::binding(13, SET)]] Texture2D<Vec4> g_blueNoiseTex;
 
 F32 trace(const Vec3 rayOrigin, const Vec3 rayDir, F32 tMax)
 {
@@ -57,6 +54,20 @@ F32 trace(const Vec3 rayOrigin, const Vec3 rayDir, F32 tMax)
 	return payload.m_shadowFactor;
 }
 
+Vec3 genRandomDirection(U32 rayIdx, Vec2 uv)
+{
+	const U32 frameIdx = g_clusteredShading.m_frame * RAYS_PER_PIXEL + rayIdx;
+
+	Vec2 noiseTexSize;
+	g_blueNoiseTex.GetDimensions(noiseTexSize.x, noiseTexSize.y);
+
+	Vec3 random = g_blueNoiseTex.SampleLevel(g_trilinearRepeatSampler, Vec2(DispatchRaysDimensions().xy) / noiseTexSize * uv, 0.0).rgb;
+	random = animateBlueNoise(random, frameIdx);
+	random = random * 2.0 - 1.0; // In [-1.0, 1.0]
+
+	return random;
+}
+
 [shader("raygeneration")] void main()
 {
 	// World position
@@ -68,139 +79,55 @@ F32 trace(const Vec3 rayOrigin, const Vec3 rayDir, F32 tMax)
 
 	if(depth == 1.0)
 	{
-		g_shadowsImage[DispatchRaysIndex().xy] = UVec4(0, 0, 0, 0);
-		g_momentsImage[DispatchRaysIndex().xy] = Vec4(0.0, 0.0, 0.0, 0.0);
+		g_shadowsImage[DispatchRaysIndex().xy] = 0.0f;
+		g_momentsImage[DispatchRaysIndex().xy] = 0.0f;
 		return;
 	}
 
 	// World normal
 	const Vec3 normal = unpackNormalFromGBuffer(g_normalRt.SampleLevel(g_linearAnyClampSampler, uv, 0.0));
 
-	// Cluster
-	Cluster cluster = getClusterFragCoord(g_clusters, g_clusteredShading, Vec3(uv * g_clusteredShading.m_renderingSize, depth));
-
-	RF32 shadowFactors[kMaxRtShadowLayers];
-	zeroRtShadowLayers(shadowFactors);
-
-	// Get a random factor
-	Vec3 random[RAYS_PER_PIXEL];
-	for(I32 i = 0; i < RAYS_PER_PIXEL; ++i)
-	{
-		const U32 frameIdx = g_clusteredShading.m_frame * U32(RAYS_PER_PIXEL + i);
-#if 0
-		const UVec3 irandom = rand3DPCG16(UVec3(DispatchRaysIndex().xy, frameIdx));
-		random[i] = Vec3(irandom) / F32(0xFFFF) * 2.0 - 1.0; // In [-1.0, 1.0]
-#else
-		random[i] = g_blueNoiseTex.SampleLevel(g_trilinearRepeatSampler, Vec2(DispatchRaysDimensions().xy) / Vec2(64.0, 64.0) * uv, 0.0).rgb;
-		random[i] = animateBlueNoise(random[i], frameIdx);
-		random[i] = random[i] * 2.0 - 1.0; // In [-1.0, 1.0]
-#endif
-	}
-
 	// Dir light
+	F32 shadowFactor = 0.0f;
 	const DirectionalLight dirLight = g_clusteredShading.m_directionalLight;
-	[branch] if(dirLight.m_active != 0u && dirLight.m_shadowCascadeCount > 0u)
-	{
-		for(I32 i = 0; i < RAYS_PER_PIXEL; ++i)
-		{
-			const Vec3 dirLightPos = worldPos + -dirLight.m_direction * 10.0 + random[i];
-			const Vec3 rayDir = normalize(dirLightPos - worldPos);
-
-			const F32 lambertTerm = dot(rayDir, normal);
-			[branch] if(lambertTerm > 0.0)
-			{
-				shadowFactors[dirLight.m_shadowLayer] += trace(worldPos, rayDir, 10000.0) / F32(RAYS_PER_PIXEL);
-			}
-		}
-	}
-
-	// Point lights
-	[loop] while(cluster.m_pointLightsMask != ExtendedClusterObjectMask(0))
+	for(U32 i = 0; i < RAYS_PER_PIXEL; ++i)
 	{
-		const I32 idx = firstbitlow2(cluster.m_pointLightsMask);
-		cluster.m_pointLightsMask &= ~(ExtendedClusterObjectMask(1) << ExtendedClusterObjectMask(idx));
-		const PointLight light = g_pointLights[idx];
+		const Vec3 dirLightPos = worldPos + -dirLight.m_direction * 10.0 + genRandomDirection(i, uv);
+		const Vec3 rayDir = normalize(dirLightPos - worldPos);
 
-		[branch] if(light.m_shadowAtlasTileScale >= 0.0)
+		const F32 lambertTerm = dot(rayDir, normal);
+		[branch] if(lambertTerm > 0.0)
 		{
-			for(I32 i = 0; i < RAYS_PER_PIXEL; ++i)
-			{
-				const Vec3 lightPos = light.m_position + 0.05 * light.m_radius * random[i];
-				const Vec3 toLight = lightPos - worldPos;
-				const F32 distanceToLight = length(toLight);
-				const Vec3 rayDir = toLight / distanceToLight; // normalize
-
-				const Bool inside = distanceToLight < light.m_radius;
-				const F32 lambertTerm = dot(rayDir, normal);
-				[branch] if(inside && lambertTerm > 0.0)
-				{
-					shadowFactors[light.m_shadowLayer] += trace(worldPos, rayDir, distanceToLight) / F32(RAYS_PER_PIXEL);
-				}
-			}
-		}
-	}
-
-	// Spot lights
-	[loop] while(cluster.m_spotLightsMask != ExtendedClusterObjectMask(0))
-	{
-		const I32 idx = firstbitlow2(cluster.m_spotLightsMask);
-		cluster.m_spotLightsMask &= ~(ExtendedClusterObjectMask(1) << ExtendedClusterObjectMask(idx));
-		const SpotLight light = g_spotLights[idx];
-
-		[branch] if(light.m_shadowLayer != kMaxU32)
-		{
-			for(I32 i = 0; i < RAYS_PER_PIXEL; ++i)
-			{
-				const Vec3 lightPos = light.m_position + 0.05 * light.m_radius * random[i];
-				const Vec3 toLight = lightPos - worldPos;
-				const F32 distanceToLight = length(toLight);
-				const Vec3 rayDir = toLight / distanceToLight; // normalize
-
-				const F32 lambertTerm = dot(rayDir, normal);
-				[branch] if(lambertTerm > 0.0)
-				{
-					shadowFactors[light.m_shadowLayer] += trace(worldPos, rayDir, distanceToLight) / F32(RAYS_PER_PIXEL);
-				}
-			}
+			shadowFactor += trace(worldPos, rayDir, 10000.0) / F32(RAYS_PER_PIXEL);
 		}
 	}
 
 	// Get history length
 	const Vec2 historyUv = uv + g_motionVectorsRt.SampleLevel(g_linearAnyClampSampler, uv, 0.0).xy;
-	const F32 historyLength = g_historyLengthTex.SampleLevel(g_linearAnyClampSampler, uv, 0.0).x;
+	const F32 historyLength = g_historyLengthTex.SampleLevel(g_linearAnyClampSampler, uv, 0.0).x * kRtShadowsMaxHistoryLength;
 
-	// Compute blend fractor. Use nearest sampler because it's an integer texture
+	// Compute blend fractor
 	const F32 lowestBlendFactor = 0.1;
 	const F32 stableFrames = 4.0;
-	const F32 lerpv = min(1.0, (historyLength * kRtShadowsMaxHistoryLength - 1.0) / stableFrames);
-	const F32 blendFactor = lerp(1.0, lowestBlendFactor, lerpv);
+	const F32 historyGoodnessFactor = min(1.0, (historyLength - 1.0) / stableFrames);
+	const F32 historyBlendFactor = lerp(1.0, lowestBlendFactor, historyGoodnessFactor);
 
 	// Blend with history
-	const UVec4 packedhistory = g_historyShadowsTex.SampleLevel(g_nearestAnyClampSampler, historyUv, 0.0);
-	RF32 history[kMaxRtShadowLayers];
-	unpackRtShadows(packedhistory, history);
-	for(U32 i = 0u; i < kMaxRtShadowLayers; ++i)
-	{
-		const F32 lerpv = min(1.0, g_unis.historyRejectFactor[i] + blendFactor);
-		shadowFactors[i] = lerp(history[i], shadowFactors[i], lerpv);
-	}
+	const F32 history = g_historyShadowsTex.SampleLevel(g_linearAnyClampSampler, historyUv, 0.0).r;
+	shadowFactor = lerp(history, shadowFactor, historyBlendFactor);
 
 	// Store the shadows image
-	const UVec4 packed = packRtShadows(shadowFactors);
-	g_shadowsImage[DispatchRaysIndex().xy] = packed;
+	g_shadowsImage[DispatchRaysIndex().xy] = shadowFactor;
 
 	// Compute the moments that will give temporal variance
-	Vec2 moments = 0.0;
-	[unroll] for(U32 i = 0u; i < kMaxRtShadowLayers; ++i)
-	{
-		moments.x += shadowFactors[i];
-	}
+	Vec2 moments;
+	moments.x = shadowFactor;
 	moments.y = moments.x * moments.x;
 
 	// Blend the moments
 	const Vec2 prevMoments = g_prevMomentsTex.SampleLevel(g_linearAnyClampSampler, historyUv, 0.0).xy;
 	const F32 lowestMomentsBlendFactor = 0.2;
-	const F32 momentsBlendFactor = lerp(1.0, lowestMomentsBlendFactor, lerpv);
+	const F32 momentsBlendFactor = lerp(1.0, lowestMomentsBlendFactor, historyGoodnessFactor);
 	moments = lerp(prevMoments, moments, momentsBlendFactor);
 
 	// Store the moments

+ 51 - 0
AnKi/Shaders/RtShadowsSbtBuild.ankiprog

@@ -0,0 +1,51 @@
+// Copyright (C) 2009-2023, Panagiotis Christopoulos Charitos and contributors.
+// All rights reserved.
+// Code licensed under the BSD License.
+// http://www.anki3d.org/LICENSE
+
+#include <AnKi/Shaders/RtShadows.hlsl>
+#include <AnKi/Shaders/Include/GpuSceneTypes.h>
+
+#pragma anki start comp
+
+[[vk::binding(0)]] StructuredBuffer<GpuSceneRenderable> g_renderables;
+[[vk::binding(1)]] ByteAddressBuffer g_gpuScene;
+
+[[vk::binding(2)]] StructuredBuffer<U32> g_visibleRenderableIndices; // 1st element is the count
+
+[[vk::binding(3)]] StructuredBuffer<U32> g_shaderHandles;
+
+[[vk::binding(4)]] RWStructuredBuffer<U32> g_sbtBuffer;
+
+[[vk::push_constant]] ConstantBuffer<RtShadowsSbtBuildUniforms> g_unis;
+
+#define NUMTHREADS 64
+
+[numthreads(NUMTHREADS, 1, 1)] void main(U32 svDispatchThreadId : SV_DISPATCHTHREADID)
+{
+	const U32 renderableCount = g_visibleRenderableIndices[0];
+	if(svDispatchThreadId >= renderableCount)
+	{
+		return;
+	}
+
+	const GpuSceneRenderable renderable = g_renderables[g_visibleRenderableIndices[svDispatchThreadId + 1]];
+
+	U32 sbtDwordOffset = g_unis.m_sbtRecordDwordSize * 2; // Skip raygen and miss shaders which are first
+	sbtDwordOffset += g_unis.m_sbtRecordDwordSize * svDispatchThreadId;
+
+	// Copy the handle
+	for(U32 i = 0; i < g_unis.m_shaderHandleDwordSize; ++i)
+	{
+		g_sbtBuffer[sbtDwordOffset] = g_shaderHandles[renderable.m_rtShadowsShaderHandleIndex * g_unis.m_shaderHandleDwordSize + i];
+		++sbtDwordOffset;
+	}
+
+	// Copy the GpuSceneRenderableVertex
+	g_sbtBuffer[sbtDwordOffset++] = renderable.m_worldTransformsOffset;
+	g_sbtBuffer[sbtDwordOffset++] = renderable.m_uniformsOffset;
+	g_sbtBuffer[sbtDwordOffset++] = renderable.m_meshLodsOffset;
+	g_sbtBuffer[sbtDwordOffset] = 0;
+}
+
+#pragma anki end

+ 28 - 0
AnKi/Shaders/RtShadowsSetupSbtBuild.ankiprog

@@ -0,0 +1,28 @@
+// Copyright (C) 2009-2023, Panagiotis Christopoulos Charitos and contributors.
+// All rights reserved.
+// Code licensed under the BSD License.
+// http://www.anki3d.org/LICENSE
+
+#include <AnKi/Shaders/RtShadows.hlsl>
+
+#pragma anki start comp
+
+[[vk::binding(0)]] StructuredBuffer<U32> g_visibleRenderableIndices; // 1st element is the count
+
+[[vk::binding(1)]] RWStructuredBuffer<DispatchIndirectArgs> g_args;
+
+#define NUMTHREADS 64
+
+[numthreads(1, 1, 1)] void main()
+{
+	const U32 renderableCount = g_visibleRenderableIndices[0];
+
+	DispatchIndirectArgs args;
+	args.m_threadGroupCountX = (renderableCount + NUMTHREADS - 1) / NUMTHREADS;
+	args.m_threadGroupCountY = 1;
+	args.m_threadGroupCountZ = 1;
+
+	g_args[0] = args;
+}
+
+#pragma anki end

+ 28 - 47
AnKi/Shaders/RtShadowsSvgfAtrous.ankiprog

@@ -11,23 +11,20 @@
 #include <AnKi/Shaders/BilateralFilter.hlsl>
 #include <AnKi/Shaders/Functions.hlsl>
 
-ANKI_SPECIALIZATION_CONSTANT_UVEC2(kFramebufferSize, 0u);
+[[vk::binding(0)]] SamplerState g_linearAnyClampSampler;
+[[vk::binding(1)]] Texture2D<Vec4> g_depthTex;
+[[vk::binding(2)]] Texture2D<Vec4> g_shadowsTex;
+[[vk::binding(3)]] Texture2D<Vec4> g_varianceTex;
 
-[[vk::binding(0)]] SamplerState g_nearestAnyClampSampler;
-[[vk::binding(1)]] SamplerState g_linearAnyClampSampler;
-[[vk::binding(2)]] Texture2D g_depthTex;
-[[vk::binding(3)]] Texture2D<UVec4> g_shadowsTex;
-[[vk::binding(4)]] Texture2D g_varianceTex;
-
-[[vk::binding(5)]] RWTexture2D<UVec4> g_shadowsImage;
+[[vk::binding(4)]] RWTexture2D<UVec4> g_shadowUav;
 #if !LAST_PASS
-[[vk::binding(6)]] RWTexture2D<Vec4> g_varianceImage;
+[[vk::binding(5)]] RWTexture2D<Vec4> g_varianceUav;
 #endif
 
 [[vk::push_constant]] ConstantBuffer<Mat4> g_invProjMat;
 
-constexpr I32 kConfolutionRadius = 2;
-constexpr F32 kKernelWeights[kConfolutionRadius + 1] = {1.0, 2.0 / 3.0, 1.0 / 6.0};
+constexpr I32 kConvolutionRadius = 2;
+constexpr F32 kKernelWeights[kConvolutionRadius + 1] = {1.0, 2.0 / 3.0, 1.0 / 6.0};
 
 Vec3 toViewspace(Vec2 uv, F32 depth)
 {
@@ -36,14 +33,9 @@ Vec3 toViewspace(Vec2 uv, F32 depth)
 	return pos;
 }
 
-F32 computeShadowsLuma(RF32 shadowLayers[kMaxRtShadowLayers])
+F32 computeShadowsLuma(F32 shadowFactor)
 {
-	RF32 l = 0.0;
-	[unroll] for(U32 i = 0u; i < kMaxRtShadowLayers; ++i)
-	{
-		l += shadowLayers[i];
-	}
-	return l;
+	return shadowFactor;
 }
 
 F32 computeVarianceCenter(Vec2 uv)
@@ -71,20 +63,23 @@ F32 computeVarianceCenter(Vec2 uv)
 
 [numthreads(8, 8, 1)] void main(UVec3 svDispatchThreadId : SV_DISPATCHTHREADID)
 {
-	if(any(svDispatchThreadId.xy >= kFramebufferSize))
+	Vec2 outUavSize;
+	g_shadowUav.GetDimensions(outUavSize.x, outUavSize.y);
+
+	if(any(Vec2(svDispatchThreadId.xy) >= outUavSize))
 	{
 		return;
 	}
 
-	const Vec2 uv = (Vec2(svDispatchThreadId.xy) + 0.5) / Vec2(kFramebufferSize);
+	const Vec2 uv = (Vec2(svDispatchThreadId.xy) + 0.5) / outUavSize;
 
 	const F32 depth = g_depthTex.SampleLevel(g_linearAnyClampSampler, uv, 0.0).r;
 	if(depth == 1.0)
 	{
 		// Sky
-		g_shadowsImage[svDispatchThreadId.xy] = 0u;
+		g_shadowUav[svDispatchThreadId.xy] = 0.0f;
 #if !LAST_PASS
-		g_varianceImage[svDispatchThreadId.xy] = 0.0f;
+		g_varianceUav[svDispatchThreadId.xy] = 0.0f;
 #endif
 		return;
 	}
@@ -94,36 +89,32 @@ F32 computeVarianceCenter(Vec2 uv)
 	const Vec3 positionCenter = toViewspace(uv, depthCenter);
 
 	// Read center luma
-	RF32 shadowLayers[kMaxRtShadowLayers];
-	unpackRtShadows(g_shadowsTex.SampleLevel(g_nearestAnyClampSampler, uv, 0.0), shadowLayers);
-	const F32 refLuma = computeShadowsLuma(shadowLayers);
+	const F32 refLuma = computeShadowsLuma(g_shadowsTex.SampleLevel(g_linearAnyClampSampler, uv, 0.0).x);
 
 	// Center variance
 	const F32 varianceCenter = computeVarianceCenter(uv);
 
 	// Init the sums
-	RF32 sumShadowLayers[kMaxRtShadowLayers];
-	zeroRtShadowLayers(sumShadowLayers);
+	F32 sumShadowFactor = 0.0;
 	F32 sumVariance = 0.0;
-	F32 sumWeight = 0.0;
+	F32 sumWeight = kEpsilonF32;
 
 	// Convolve
 	Vec2 texSize;
 	F32 mipCount;
 	g_shadowsTex.GetDimensions(0u, texSize.x, texSize.y, mipCount);
 	const Vec2 texelSize = 1.0 / texSize;
-	for(I32 offsetx = -kConfolutionRadius; offsetx <= kConfolutionRadius; offsetx++)
+	for(I32 offsetx = -kConvolutionRadius; offsetx <= kConvolutionRadius; offsetx++)
 	{
-		for(I32 offsety = -kConfolutionRadius; offsety <= kConfolutionRadius; offsety++)
+		for(I32 offsety = -kConvolutionRadius; offsety <= kConvolutionRadius; offsety++)
 		{
 			const Vec2 sampleUv = uv + Vec2(offsetx, offsety) * texelSize;
 
 			// Read shadows
-			RF32 shadowLayers[kMaxRtShadowLayers];
-			unpackRtShadows(g_shadowsTex.SampleLevel(g_nearestAnyClampSampler, sampleUv, 0.0), shadowLayers);
+			const F32 localShadowFactor = g_shadowsTex.SampleLevel(g_linearAnyClampSampler, sampleUv, 0.0).r;
 
 			// Compute luma weight
-			const F32 luma = computeShadowsLuma(shadowLayers);
+			const F32 luma = computeShadowsLuma(localShadowFactor);
 			const F32 variance = g_varianceTex.SampleLevel(g_linearAnyClampSampler, sampleUv, 0.0).x;
 			const F32 sigmaL = 4.0;
 			const F32 lumaDiff = abs(luma - refLuma);
@@ -141,30 +132,20 @@ F32 computeVarianceCenter(Vec2 uv)
 			// w *= kKernelWeights[abs(offsetx)] * kKernelWeights[abs(offsety)];
 
 			// Sum
-			[unroll] for(U32 i = 0u; i < kMaxRtShadowLayers; ++i)
-			{
-				sumShadowLayers[i] += shadowLayers[i] * w;
-			}
-
+			sumShadowFactor += localShadowFactor * w;
 			sumVariance += w * w * variance;
 			sumWeight += w;
 		}
 	}
 
 	// Normalize
-	sumWeight += kEpsilonF32;
-
-	[unroll] for(U32 i = 0u; i < kMaxRtShadowLayers; ++i)
-	{
-		sumShadowLayers[i] /= sumWeight;
-	}
-
+	sumShadowFactor /= sumWeight;
 	sumVariance /= (sumWeight * sumWeight);
 
 	// Store
-	g_shadowsImage[svDispatchThreadId.xy] = packRtShadows(sumShadowLayers);
+	g_shadowUav[svDispatchThreadId.xy] = sumShadowFactor;
 #if !LAST_PASS
-	g_varianceImage[svDispatchThreadId.xy] = Vec4(sumVariance, 0.0, 0.0, 0.0);
+	g_varianceUav[svDispatchThreadId.xy] = Vec4(sumVariance, 0.0, 0.0, 0.0);
 #endif
 }
 #pragma anki end

+ 28 - 39
AnKi/Shaders/RtShadowsSvgfVariance.ankiprog

@@ -9,17 +9,14 @@
 #include <AnKi/Shaders/BilateralFilter.hlsl>
 #include <AnKi/Shaders/Functions.hlsl>
 
-ANKI_SPECIALIZATION_CONSTANT_UVEC2(kFramebufferSize, 0u);
+[[vk::binding(0)]] SamplerState g_linearAnyClampSampler;
+[[vk::binding(1)]] Texture2D<Vec4> g_shadowsTex;
+[[vk::binding(2)]] Texture2D<Vec4> g_momentsTex;
+[[vk::binding(3)]] Texture2D<Vec4> g_historyLengthTex;
+[[vk::binding(4)]] Texture2D<Vec4> g_depthTex;
 
-[[vk::binding(0)]] SamplerState g_nearestAnyClampSampler;
-[[vk::binding(1)]] SamplerState g_linearAnyClampSampler;
-[[vk::binding(2)]] Texture2D<UVec4> g_shadowsTex;
-[[vk::binding(3)]] Texture2D g_momentsTex;
-[[vk::binding(4)]] Texture2D g_historyLengthTex;
-[[vk::binding(5)]] Texture2D g_depthTex;
-
-[[vk::binding(6)]] RWTexture2D<UVec4> g_shadowsImage;
-[[vk::binding(7)]] RWTexture2D<Vec4> g_varianceImage;
+[[vk::binding(5)]] RWTexture2D<Vec4> g_shadowUav;
+[[vk::binding(6)]] RWTexture2D<Vec4> g_varianceUav;
 
 [[vk::push_constant]] ConstantBuffer<Mat4> g_invProjMat;
 
@@ -32,34 +29,37 @@ Vec3 toViewspace(Vec2 uv, F32 depth)
 	return pos;
 }
 
-[numthreads(8, 8, 1)] void main(UVec3 svDispatchThreadId : SV_DISPATCHTHREADID)
+[numthreads(8, 8, 1)] void main(UVec2 svDispatchThreadId : SV_DISPATCHTHREADID)
 {
-	if(any(svDispatchThreadId >= kFramebufferSize))
+	Vec2 shadowUavSize;
+	g_shadowUav.GetDimensions(shadowUavSize.x, shadowUavSize.y);
+
+	if(any(Vec2(svDispatchThreadId) >= shadowUavSize))
 	{
 		return;
 	}
 
-	const Vec2 uv = (Vec2(svDispatchThreadId.xy) + 0.5) / Vec2(kFramebufferSize);
+	const Vec2 uv = (Vec2(svDispatchThreadId) + 0.5) / shadowUavSize;
 
 	const F32 depth = g_depthTex.SampleLevel(g_linearAnyClampSampler, uv, 0.0).r;
 	if(depth == 1.0)
 	{
 		// Sky
-		g_shadowsImage[svDispatchThreadId.xy] = 0;
-		g_varianceImage[svDispatchThreadId.xy] = 0.0f;
+		g_shadowUav[svDispatchThreadId] = 0.0f;
+		g_varianceUav[svDispatchThreadId] = 0.0f;
 		return;
 	}
 
-	const F32 historyLength = g_historyLengthTex.SampleLevel(g_linearAnyClampSampler, uv, 0.0).r;
+	const F32 historyLength = g_historyLengthTex.SampleLevel(g_linearAnyClampSampler, uv, 0.0).r * kRtShadowsMaxHistoryLength;
 
-	UVec4 outPackedShadowLayers;
+	F32 outShadowFactor;
 	F32 outVariance;
 
-	if(historyLength < 4.0 / kRtShadowsMaxHistoryLength)
+	if(historyLength < 4.0)
 	{
 		// It's been stable less than 4 frames, need to do some work
 
-		const Vec2 texelSize = 1.0 / Vec2(kFramebufferSize);
+		const Vec2 texelSize = 1.0 / shadowUavSize;
 
 		// Set the reference sample
 		const F32 depthCenter = depth;
@@ -67,9 +67,8 @@ Vec3 toViewspace(Vec2 uv, F32 depth)
 
 		// Init the sums
 		Vec2 sumMoments = 0.0f;
-		F32 sumWeight = 0.0;
-		RF32 sumShadowLayers[kMaxRtShadowLayers];
-		zeroRtShadowLayers(sumShadowLayers);
+		F32 sumWeight = kEpsilonF32;
+		F32 sumShadowFactor = 0.0f;
 
 		// Convolve
 		for(I32 offsetx = -kConvolutionRadius; offsetx <= kConvolutionRadius; offsetx++)
@@ -89,28 +88,18 @@ Vec3 toViewspace(Vec2 uv, F32 depth)
 				const Vec2 moments = g_momentsTex.SampleLevel(g_linearAnyClampSampler, sampleUv, 0.0).xy;
 				sumMoments += moments * w;
 
-				RF32 shadowLayers[kMaxRtShadowLayers];
-				unpackRtShadows(g_shadowsTex.SampleLevel(g_nearestAnyClampSampler, sampleUv, 0.0), shadowLayers);
-				[unroll] for(U32 i = 0u; i < kMaxRtShadowLayers; ++i)
-				{
-					sumShadowLayers[i] += shadowLayers[i] * w;
-				}
+				const F32 localShadowFactor = g_shadowsTex.SampleLevel(g_linearAnyClampSampler, sampleUv, 0.0).r;
 
+				sumShadowFactor += localShadowFactor * w;
 				sumWeight += w;
 			}
 		}
 
-		sumWeight += kEpsilonF32;
-
-		[unroll] for(U32 i = 0u; i < kMaxRtShadowLayers; ++i)
-		{
-			sumShadowLayers[i] /= sumWeight;
-		}
+		sumShadowFactor /= sumWeight;
 		sumMoments /= sumWeight;
 
-		outPackedShadowLayers = packRtShadows(sumShadowLayers);
-
 		outVariance = max(0.0, sumMoments.y - sumMoments.x * sumMoments.x);
+		outShadowFactor = sumShadowFactor;
 
 		// Give the variance a boost for the first frames
 		outVariance *= 4.0 / (historyLength * kRtShadowsMaxHistoryLength);
@@ -119,14 +108,14 @@ Vec3 toViewspace(Vec2 uv, F32 depth)
 	{
 		// Stable for more that 4 frames, passthrough
 
-		outPackedShadowLayers = g_shadowsTex.SampleLevel(g_nearestAnyClampSampler, uv, 0.0);
+		outShadowFactor = g_shadowsTex.SampleLevel(g_linearAnyClampSampler, uv, 0.0).r;
 
 		const Vec2 moments = g_momentsTex.SampleLevel(g_linearAnyClampSampler, uv, 0.0).xy;
 		outVariance = max(0.0, moments.y - moments.x * moments.x);
 	}
 
 	// Store
-	g_shadowsImage[svDispatchThreadId.xy] = outPackedShadowLayers;
-	g_varianceImage[svDispatchThreadId.xy] = Vec4(outVariance, 0.0, 0.0, 0.0);
+	g_shadowUav[svDispatchThreadId] = outShadowFactor;
+	g_varianceUav[svDispatchThreadId] = Vec4(outVariance, 0.0, 0.0, 0.0);
 }
 #pragma anki end

+ 17 - 26
AnKi/Shaders/RtShadowsUpscale.ankiprog

@@ -9,32 +9,30 @@
 #include <AnKi/Shaders/Functions.hlsl>
 #include <AnKi/Shaders/BilateralFilter.hlsl>
 
-ANKI_SPECIALIZATION_CONSTANT_UVEC2(kOutImageSize, 0u);
+[[vk::binding(0)]] SamplerState g_linearAnyClampSampler;
+[[vk::binding(1)]] Texture2D<Vec4> g_quarterShadowsTex;
+[[vk::binding(2)]] RWTexture2D<Vec4> g_fullShadowUav;
+[[vk::binding(3)]] Texture2D<Vec4> g_quarterDepthTex;
+[[vk::binding(4)]] Texture2D<Vec4> g_fullDepthTex;
 
-[[vk::binding(0)]] SamplerState g_nearestAnyClampSampler;
-[[vk::binding(1)]] SamplerState g_linearAnyClampSampler;
-[[vk::binding(2)]] Texture2D<UVec4> g_quarterShadowsTex;
-[[vk::binding(3)]] RWTexture2D<UVec4> g_fullShadowsImage;
-[[vk::binding(4)]] Texture2D g_quarterDepthTex;
-[[vk::binding(5)]] Texture2D g_fullDepthTex;
-
-[numthreads(8, 8, 1)] void main(UVec3 svDispatchThreadId : SV_DISPATCHTHREADID)
+[numthreads(8, 8, 1)] void main(UVec2 svDispatchThreadId : SV_DISPATCHTHREADID)
 {
-	if(any(svDispatchThreadId.xy >= kOutImageSize))
+	Vec2 outUavSize;
+	g_fullShadowUav.GetDimensions(outUavSize.x, outUavSize.y);
+
+	if(any(Vec2(svDispatchThreadId) >= outUavSize))
 	{
 		return;
 	}
 
-	const Vec2 uv = (Vec2(svDispatchThreadId.xy) + 0.5) / Vec2(kOutImageSize);
+	const Vec2 uv = (Vec2(svDispatchThreadId) + 0.5) / outUavSize;
 
 	// Reference
 	const F32 depthCenter = g_fullDepthTex.SampleLevel(g_linearAnyClampSampler, uv, 0.0).x;
 
-	RF32 sumShadowLayers[kMaxRtShadowLayers];
-	zeroRtShadowLayers(sumShadowLayers);
-
 	// Do a bilateral upscale
-	const Vec2 texelSize = 1.0 / Vec2(kOutImageSize / 2u);
+	F32 sumShadowFactor = 0.0f;
+	const Vec2 texelSize = 1.0 / (outUavSize / 2.0f);
 	const I32 radius = 1;
 	F32 sumWeight = kEpsilonF32;
 	for(I32 x = -radius; x <= radius; ++x)
@@ -46,24 +44,17 @@ ANKI_SPECIALIZATION_CONSTANT_UVEC2(kOutImageSize, 0u);
 
 			const F32 w = calculateBilateralWeightDepth(depthCenter, depthTap, 1.0);
 
-			RF32 shadowLayers[kMaxRtShadowLayers];
-			unpackRtShadows(g_quarterShadowsTex.SampleLevel(g_nearestAnyClampSampler, sampleUv, 0.0), shadowLayers);
+			const F32 localShadowFactor = g_quarterShadowsTex.SampleLevel(g_linearAnyClampSampler, sampleUv, 0.0).x;
 
-			for(U32 i = 0u; i < kMaxRtShadowLayers; ++i)
-			{
-				sumShadowLayers[i] += shadowLayers[i] * w;
-			}
+			sumShadowFactor += localShadowFactor * w;
 
 			sumWeight += w;
 		}
 	}
 
-	for(U32 i = 0u; i < kMaxRtShadowLayers; ++i)
-	{
-		sumShadowLayers[i] /= sumWeight;
-	}
+	sumShadowFactor /= sumWeight;
 
-	g_fullShadowsImage[svDispatchThreadId.xy] = packRtShadows(sumShadowLayers);
+	g_fullShadowUav[svDispatchThreadId.xy] = sumShadowFactor;
 }
 
 #pragma anki end

+ 0 - 32
AnKi/Shaders/RtShadowsVisualizeRenderTarget.ankiprog

@@ -1,32 +0,0 @@
-// Copyright (C) 2009-2023, Panagiotis Christopoulos Charitos and contributors.
-// All rights reserved.
-// Code licensed under the BSD License.
-// http://www.anki3d.org/LICENSE
-
-#pragma anki mutator LAYER_GROUP 0 1 2
-
-#pragma anki start vert
-#include <AnKi/Shaders/QuadVert.hlsl>
-#pragma anki end
-
-#pragma anki start frag
-#include <AnKi/Shaders/RtShadows.hlsl>
-
-[[vk::binding(0)]] SamplerState g_nearestAnyClampSampler;
-[[vk::binding(1)]] Texture2D<UVec4> g_inTex;
-
-Vec3 main(Vec2 uv : TEXCOORD) : SV_TARGET0
-{
-	const UVec4 packed = g_inTex.SampleLevel(g_nearestAnyClampSampler, uv, 0.0);
-	RF32 shadowFactors[kMaxRtShadowLayers];
-	unpackRtShadows(packed, shadowFactors);
-
-#if LAYER_GROUP == 0
-	return Vec3(shadowFactors[0], shadowFactors[1], shadowFactors[2]);
-#elif LAYER_GROUP == 1
-	return Vec3(shadowFactors[3], shadowFactors[4], shadowFactors[5]);
-#else
-	return Vec3(shadowFactors[6], shadowFactors[7], 0.0);
-#endif
-}
-#pragma anki end

+ 21 - 11
AnKi/Shaders/ShadowmapsResolve.hlsl

@@ -4,6 +4,7 @@
 // http://www.anki3d.org/LICENSE
 
 #pragma anki mutator PCF 0 1
+#pragma anki mutator DIRECTIONAL_LIGHT_SHADOW_RESOLVED 0 1
 
 #include <AnKi/Shaders/ClusteredShadingFunctions.hlsl>
 
@@ -26,8 +27,12 @@ ANKI_SPECIALIZATION_CONSTANT_U32(kTileSize, 5u);
 [[vk::binding(7)]] Texture2D<Vec4> g_depthRt;
 [[vk::binding(8)]] Texture2D<Vec4> g_noiseTex;
 
+#if DIRECTIONAL_LIGHT_SHADOW_RESOLVED
+[[vk::binding(9)]] Texture2D<Vec4> g_dirLightResolvedShadowsTex;
+#endif
+
 #if defined(ANKI_COMPUTE_SHADER)
-[[vk::binding(9)]] RWTexture2D<RVec4> g_outUav;
+[[vk::binding(10)]] RWTexture2D<RVec4> g_outUav;
 #endif
 
 Vec3 computeDebugShadowCascadeColor(U32 cascade)
@@ -91,6 +96,10 @@ RVec4 main(Vec2 uv : TEXCOORD) : SV_TARGET0
 	RVec4 shadowFactors = 0.0f;
 
 	// Dir light
+#if DIRECTIONAL_LIGHT_SHADOW_RESOLVED
+	shadowFactors[0] = g_dirLightResolvedShadowsTex.SampleLevel(g_linearAnyClampSampler, uv, 0.0f).x;
+	++shadowCasterCountPerFragment;
+#else
 	const DirectionalLight dirLight = g_clusteredShading.m_directionalLight;
 	if(dirLight.m_active != 0u && dirLight.m_shadowCascadeCount > 0u)
 	{
@@ -105,25 +114,25 @@ RVec4 main(Vec2 uv : TEXCOORD) : SV_TARGET0
 			const UVec2 cascadeIndices =
 				computeShadowCascadeIndex2(positiveZViewSpace, dirLight.m_shadowCascadeDistances, dirLight.m_shadowCascadeCount, cascadeBlendFactor);
 
-#if DEBUG_CASCADES
+#	if DEBUG_CASCADES
 			const Vec3 debugColorA = computeDebugShadowCascadeColor(cascadeIndices[0]);
 			const Vec3 debugColorB = computeDebugShadowCascadeColor(cascadeIndices[1]);
 			const Vec3 debugColor = lerp(debugColorA, debugColorB, cascadeBlendFactor);
-#	if defined(ANKI_COMPUTE_SHADER)
+#		if defined(ANKI_COMPUTE_SHADER)
 			g_outUav[svDispatchThreadId.xy] = shadowFactors;
 			return;
-#	else
+#		else
 			return shadowFactors;
+#		endif
 #	endif
-#endif
 
-#if PCF
+#	if PCF
 			const F32 shadowFactorCascadeA =
 				computeShadowFactorDirLightPcf(dirLight, cascadeIndices.x, worldPos, g_shadowAtlasTex, g_linearAnyClampShadowSampler, randFactor);
-#else
+#	else
 			const F32 shadowFactorCascadeA =
 				computeShadowFactorDirLight(dirLight, cascadeIndices.x, worldPos, g_shadowAtlasTex, g_linearAnyClampShadowSampler);
-#endif
+#	endif
 
 			if(cascadeBlendFactor < 0.01 || cascadeIndices.x == cascadeIndices.y)
 			{
@@ -132,15 +141,15 @@ RVec4 main(Vec2 uv : TEXCOORD) : SV_TARGET0
 			}
 			else
 			{
-#if PCF
+#	if PCF
 				// Blend cascades
 				const F32 shadowFactorCascadeB =
 					computeShadowFactorDirLightPcf(dirLight, cascadeIndices.y, worldPos, g_shadowAtlasTex, g_linearAnyClampShadowSampler, randFactor);
-#else
+#	else
 				// Blend cascades
 				const F32 shadowFactorCascadeB =
 					computeShadowFactorDirLight(dirLight, cascadeIndices.y, worldPos, g_shadowAtlasTex, g_linearAnyClampShadowSampler);
-#endif
+#	endif
 				shadowFactor = lerp(shadowFactorCascadeA, shadowFactorCascadeB, cascadeBlendFactor);
 			}
 
@@ -156,6 +165,7 @@ RVec4 main(Vec2 uv : TEXCOORD) : SV_TARGET0
 		shadowFactors[0] = shadowFactor;
 		++shadowCasterCountPerFragment;
 	}
+#endif // DIRECTIONAL_LIGHT_SHADOW_RESOLVED
 
 	// Point lights
 	[loop] while(cluster.m_pointLightsMask != ExtendedClusterObjectMask(0))