Browse Source

More improvements in GI

Panagiotis Christopoulos Charitos 6 months ago
parent
commit
056cf56880

+ 1 - 1
AnKi/GpuMemory/GpuVisibleTransientMemoryPool.h

@@ -57,7 +57,7 @@ private:
 
 		BufferUsageBit buffUsage = BufferUsageBit::kAllConstant | BufferUsageBit::kAllUav | BufferUsageBit::kAllSrv | BufferUsageBit::kIndirectDraw
 								   | BufferUsageBit::kIndirectCompute | BufferUsageBit::kVertexOrIndex | BufferUsageBit::kAllCopy
-								   | BufferUsageBit::kIndirectTraceRays;
+								   | BufferUsageBit::kIndirectTraceRays | BufferUsageBit::kShaderBindingTable;
 		if(GrManager::getSingleton().getDeviceCapabilities().m_rayTracingEnabled)
 		{
 			buffUsage |= (BufferUsageBit::kAccelerationStructureBuildScratch | BufferUsageBit::kAccelerationStructureBuild);

+ 9 - 0
AnKi/Gr/RenderGraph.cpp

@@ -491,6 +491,8 @@ Bool RenderGraph::passHasUnmetDependencies(const BakeContext& ctx, U32 passIdx)
 
 RenderGraph::BakeContext* RenderGraph::newContext(const RenderGraphBuilder& descr, StackMemoryPool& pool)
 {
+	ANKI_TRACE_FUNCTION();
+
 	// Allocate
 	BakeContext* ctx = anki::newInstance<BakeContext>(pool, &pool);
 
@@ -583,6 +585,8 @@ RenderGraph::BakeContext* RenderGraph::newContext(const RenderGraphBuilder& desc
 
 void RenderGraph::initRenderPassesAndSetDeps(const RenderGraphBuilder& descr)
 {
+	ANKI_TRACE_FUNCTION();
+
 	BakeContext& ctx = *m_ctx;
 	const U32 passCount = descr.m_passes.getSize();
 	ANKI_ASSERT(passCount > 0);
@@ -625,6 +629,7 @@ void RenderGraph::initRenderPassesAndSetDeps(const RenderGraphBuilder& descr)
 
 void RenderGraph::initBatches()
 {
+	ANKI_TRACE_FUNCTION();
 	ANKI_ASSERT(m_ctx);
 
 	U passesAssignedToBatchCount = 0;
@@ -657,6 +662,8 @@ void RenderGraph::initBatches()
 
 void RenderGraph::initGraphicsPasses(const RenderGraphBuilder& descr)
 {
+	ANKI_TRACE_FUNCTION();
+
 	BakeContext& ctx = *m_ctx;
 	const U32 passCount = descr.m_passes.getSize();
 	ANKI_ASSERT(passCount > 0);
@@ -812,6 +819,8 @@ void RenderGraph::setTextureBarrier(Batch& batch, const RenderPassDependency& de
 
 void RenderGraph::setBatchBarriers(const RenderGraphBuilder& descr)
 {
+	ANKI_TRACE_FUNCTION();
+
 	BakeContext& ctx = *m_ctx;
 
 	// For all batches

+ 64 - 0
AnKi/Gr/Vulkan/VkCommandBuffer.cpp

@@ -44,6 +44,7 @@ void CommandBuffer::endRecording()
 
 void CommandBuffer::bindVertexBuffer(U32 binding, const BufferView& buff, U32 stride, VertexStepRate stepRate)
 {
+	ANKI_TRACE_FUNCTION();
 	ANKI_ASSERT(buff.isValid());
 
 	ANKI_VK_SELF(CommandBufferImpl);
@@ -56,6 +57,7 @@ void CommandBuffer::bindVertexBuffer(U32 binding, const BufferView& buff, U32 st
 
 void CommandBuffer::setVertexAttribute(VertexAttributeSemantic attribute, U32 buffBinding, Format fmt, U32 relativeOffset)
 {
+	ANKI_TRACE_FUNCTION();
 	ANKI_VK_SELF(CommandBufferImpl);
 	self.commandCommon();
 	self.m_graphicsState.setVertexAttribute(attribute, buffBinding, fmt, relativeOffset);
@@ -63,6 +65,7 @@ void CommandBuffer::setVertexAttribute(VertexAttributeSemantic attribute, U32 bu
 
 void CommandBuffer::bindIndexBuffer(const BufferView& buff, IndexType type)
 {
+	ANKI_TRACE_FUNCTION();
 	ANKI_ASSERT(buff.isValid());
 
 	ANKI_VK_SELF(CommandBufferImpl);
@@ -74,6 +77,7 @@ void CommandBuffer::bindIndexBuffer(const BufferView& buff, IndexType type)
 
 void CommandBuffer::setPrimitiveRestart(Bool enable)
 {
+	ANKI_TRACE_FUNCTION();
 	ANKI_VK_SELF(CommandBufferImpl);
 	self.commandCommon();
 	self.m_graphicsState.setPrimitiveRestart(enable);
@@ -81,6 +85,7 @@ void CommandBuffer::setPrimitiveRestart(Bool enable)
 
 void CommandBuffer::setViewport(U32 minx, U32 miny, U32 width, U32 height)
 {
+	ANKI_TRACE_FUNCTION();
 	ANKI_VK_SELF(CommandBufferImpl);
 	ANKI_ASSERT(width > 0 && height > 0);
 	self.commandCommon();
@@ -89,6 +94,7 @@ void CommandBuffer::setViewport(U32 minx, U32 miny, U32 width, U32 height)
 
 void CommandBuffer::setScissor(U32 minx, U32 miny, U32 width, U32 height)
 {
+	ANKI_TRACE_FUNCTION();
 	ANKI_VK_SELF(CommandBufferImpl);
 	ANKI_ASSERT(width > 0 && height > 0);
 	self.commandCommon();
@@ -97,6 +103,7 @@ void CommandBuffer::setScissor(U32 minx, U32 miny, U32 width, U32 height)
 
 void CommandBuffer::setFillMode(FillMode mode)
 {
+	ANKI_TRACE_FUNCTION();
 	ANKI_VK_SELF(CommandBufferImpl);
 	self.commandCommon();
 	self.m_graphicsState.setFillMode(mode);
@@ -104,6 +111,7 @@ void CommandBuffer::setFillMode(FillMode mode)
 
 void CommandBuffer::setCullMode(FaceSelectionBit mode)
 {
+	ANKI_TRACE_FUNCTION();
 	ANKI_VK_SELF(CommandBufferImpl);
 	self.commandCommon();
 	self.m_graphicsState.setCullMode(mode);
@@ -111,6 +119,7 @@ void CommandBuffer::setCullMode(FaceSelectionBit mode)
 
 void CommandBuffer::setPolygonOffset(F32 factor, F32 units)
 {
+	ANKI_TRACE_FUNCTION();
 	ANKI_VK_SELF(CommandBufferImpl);
 	self.commandCommon();
 	self.m_graphicsState.setPolygonOffset(factor, units);
@@ -120,6 +129,7 @@ void CommandBuffer::setPolygonOffset(F32 factor, F32 units)
 void CommandBuffer::setStencilOperations(FaceSelectionBit face, StencilOperation stencilFail, StencilOperation stencilPassDepthFail,
 										 StencilOperation stencilPassDepthPass)
 {
+	ANKI_TRACE_FUNCTION();
 	ANKI_VK_SELF(CommandBufferImpl);
 	self.commandCommon();
 	self.m_graphicsState.setStencilOperations(face, stencilFail, stencilPassDepthFail, stencilPassDepthPass);
@@ -127,6 +137,7 @@ void CommandBuffer::setStencilOperations(FaceSelectionBit face, StencilOperation
 
 void CommandBuffer::setStencilCompareOperation(FaceSelectionBit face, CompareOperation comp)
 {
+	ANKI_TRACE_FUNCTION();
 	ANKI_VK_SELF(CommandBufferImpl);
 	self.commandCommon();
 	self.m_graphicsState.setStencilCompareOperation(face, comp);
@@ -134,6 +145,7 @@ void CommandBuffer::setStencilCompareOperation(FaceSelectionBit face, CompareOpe
 
 void CommandBuffer::setStencilCompareMask(FaceSelectionBit face, U32 mask)
 {
+	ANKI_TRACE_FUNCTION();
 	ANKI_VK_SELF(CommandBufferImpl);
 	self.commandCommon();
 	self.m_graphicsState.setStencilCompareMask(face, mask);
@@ -141,6 +153,7 @@ void CommandBuffer::setStencilCompareMask(FaceSelectionBit face, U32 mask)
 
 void CommandBuffer::setStencilWriteMask(FaceSelectionBit face, U32 mask)
 {
+	ANKI_TRACE_FUNCTION();
 	ANKI_VK_SELF(CommandBufferImpl);
 	self.commandCommon();
 	self.m_graphicsState.setStencilWriteMask(face, mask);
@@ -148,6 +161,7 @@ void CommandBuffer::setStencilWriteMask(FaceSelectionBit face, U32 mask)
 
 void CommandBuffer::setStencilReference(FaceSelectionBit face, U32 ref)
 {
+	ANKI_TRACE_FUNCTION();
 	ANKI_VK_SELF(CommandBufferImpl);
 	self.commandCommon();
 	self.m_graphicsState.setStencilReference(face, ref);
@@ -155,6 +169,7 @@ void CommandBuffer::setStencilReference(FaceSelectionBit face, U32 ref)
 
 void CommandBuffer::setDepthWrite(Bool enable)
 {
+	ANKI_TRACE_FUNCTION();
 	ANKI_VK_SELF(CommandBufferImpl);
 	self.commandCommon();
 	self.m_graphicsState.setDepthWrite(enable);
@@ -162,6 +177,7 @@ void CommandBuffer::setDepthWrite(Bool enable)
 
 void CommandBuffer::setDepthCompareOperation(CompareOperation op)
 {
+	ANKI_TRACE_FUNCTION();
 	ANKI_VK_SELF(CommandBufferImpl);
 	self.commandCommon();
 	self.m_graphicsState.setDepthCompareOperation(op);
@@ -169,6 +185,7 @@ void CommandBuffer::setDepthCompareOperation(CompareOperation op)
 
 void CommandBuffer::setAlphaToCoverage(Bool enable)
 {
+	ANKI_TRACE_FUNCTION();
 	ANKI_VK_SELF(CommandBufferImpl);
 	self.commandCommon();
 	self.m_graphicsState.setAlphaToCoverage(enable);
@@ -176,6 +193,7 @@ void CommandBuffer::setAlphaToCoverage(Bool enable)
 
 void CommandBuffer::setColorChannelWriteMask(U32 attachment, ColorBit mask)
 {
+	ANKI_TRACE_FUNCTION();
 	ANKI_VK_SELF(CommandBufferImpl);
 	self.commandCommon();
 	self.m_graphicsState.setColorChannelWriteMask(attachment, mask);
@@ -183,6 +201,7 @@ void CommandBuffer::setColorChannelWriteMask(U32 attachment, ColorBit mask)
 
 void CommandBuffer::setBlendFactors(U32 attachment, BlendFactor srcRgb, BlendFactor dstRgb, BlendFactor srcA, BlendFactor dstA)
 {
+	ANKI_TRACE_FUNCTION();
 	ANKI_VK_SELF(CommandBufferImpl);
 	self.commandCommon();
 	self.m_graphicsState.setBlendFactors(attachment, srcRgb, dstRgb, srcA, dstA);
@@ -190,6 +209,7 @@ void CommandBuffer::setBlendFactors(U32 attachment, BlendFactor srcRgb, BlendFac
 
 void CommandBuffer::setBlendOperation(U32 attachment, BlendOperation funcRgb, BlendOperation funcA)
 {
+	ANKI_TRACE_FUNCTION();
 	ANKI_VK_SELF(CommandBufferImpl);
 	self.commandCommon();
 	self.m_graphicsState.setBlendOperation(attachment, funcRgb, funcA);
@@ -197,6 +217,7 @@ void CommandBuffer::setBlendOperation(U32 attachment, BlendOperation funcRgb, Bl
 
 void CommandBuffer::bindSrv(U32 reg, U32 space, const TextureView& texView)
 {
+	ANKI_TRACE_FUNCTION();
 	ANKI_VK_SELF(CommandBufferImpl);
 	self.commandCommon();
 
@@ -209,6 +230,7 @@ void CommandBuffer::bindSrv(U32 reg, U32 space, const TextureView& texView)
 
 void CommandBuffer::bindUav(U32 reg, U32 space, const TextureView& texView)
 {
+	ANKI_TRACE_FUNCTION();
 	ANKI_VK_SELF(CommandBufferImpl);
 	self.commandCommon();
 
@@ -226,6 +248,7 @@ void CommandBuffer::bindUav(U32 reg, U32 space, const TextureView& texView)
 
 void CommandBuffer::bindSampler(U32 reg, U32 space, Sampler* sampler)
 {
+	ANKI_TRACE_FUNCTION();
 	ANKI_VK_SELF(CommandBufferImpl);
 	self.commandCommon();
 
@@ -236,6 +259,7 @@ void CommandBuffer::bindSampler(U32 reg, U32 space, Sampler* sampler)
 
 void CommandBuffer::bindConstantBuffer(U32 reg, U32 space, const BufferView& buff)
 {
+	ANKI_TRACE_FUNCTION();
 	ANKI_ASSERT(buff.isValid());
 
 	ANKI_VK_SELF(CommandBufferImpl);
@@ -247,6 +271,7 @@ void CommandBuffer::bindConstantBuffer(U32 reg, U32 space, const BufferView& buf
 
 void CommandBuffer::bindSrv(U32 reg, U32 space, const BufferView& buff, Format fmt)
 {
+	ANKI_TRACE_FUNCTION();
 	ANKI_ASSERT(buff.isValid());
 
 	ANKI_VK_SELF(CommandBufferImpl);
@@ -267,6 +292,7 @@ void CommandBuffer::bindSrv(U32 reg, U32 space, const BufferView& buff, Format f
 
 void CommandBuffer::bindUav(U32 reg, U32 space, const BufferView& buff, Format fmt)
 {
+	ANKI_TRACE_FUNCTION();
 	ANKI_ASSERT(buff.isValid());
 
 	ANKI_VK_SELF(CommandBufferImpl);
@@ -287,6 +313,7 @@ void CommandBuffer::bindUav(U32 reg, U32 space, const BufferView& buff, Format f
 
 void CommandBuffer::bindSrv(U32 reg, U32 space, AccelerationStructure* as)
 {
+	ANKI_TRACE_FUNCTION();
 	ANKI_VK_SELF(CommandBufferImpl);
 	self.commandCommon();
 
@@ -297,6 +324,7 @@ void CommandBuffer::bindSrv(U32 reg, U32 space, AccelerationStructure* as)
 
 void CommandBuffer::bindShaderProgram(ShaderProgram* prog)
 {
+	ANKI_TRACE_FUNCTION();
 	ANKI_VK_SELF(CommandBufferImpl);
 	self.commandCommon();
 
@@ -344,6 +372,7 @@ void CommandBuffer::bindShaderProgram(ShaderProgram* prog)
 void CommandBuffer::beginRenderPass(ConstWeakArray<RenderTarget> colorRts, RenderTarget* depthStencilRt, const TextureView& vrsRt, U8 vrsRtTexelSizeX,
 									U8 vrsRtTexelSizeY)
 {
+	ANKI_TRACE_FUNCTION();
 	ANKI_VK_SELF(CommandBufferImpl);
 
 	ANKI_ASSERT(!self.m_insideRenderpass);
@@ -484,6 +513,7 @@ void CommandBuffer::beginRenderPass(ConstWeakArray<RenderTarget> colorRts, Rende
 
 void CommandBuffer::endRenderPass()
 {
+	ANKI_TRACE_FUNCTION();
 	ANKI_VK_SELF(CommandBufferImpl);
 
 	ANKI_ASSERT(self.m_insideRenderpass);
@@ -497,6 +527,7 @@ void CommandBuffer::endRenderPass()
 
 void CommandBuffer::setVrsRate([[maybe_unused]] VrsRate rate)
 {
+	ANKI_TRACE_FUNCTION();
 	ANKI_VK_SELF(CommandBufferImpl);
 	ANKI_ASSERT(getGrManagerImpl().getDeviceCapabilities().m_vrs);
 	ANKI_ASSERT(rate < VrsRate::kCount);
@@ -507,6 +538,7 @@ void CommandBuffer::setVrsRate([[maybe_unused]] VrsRate rate)
 
 void CommandBuffer::drawIndexed(PrimitiveTopology topology, U32 count, U32 instanceCount, U32 firstIndex, U32 baseVertex, U32 baseInstance)
 {
+	ANKI_TRACE_FUNCTION();
 	ANKI_VK_SELF(CommandBufferImpl);
 	self.m_graphicsState.setPrimitiveTopology(topology);
 	self.drawcallCommon();
@@ -515,6 +547,7 @@ void CommandBuffer::drawIndexed(PrimitiveTopology topology, U32 count, U32 insta
 
 void CommandBuffer::draw(PrimitiveTopology topology, U32 count, U32 instanceCount, U32 first, U32 baseInstance)
 {
+	ANKI_TRACE_FUNCTION();
 	ANKI_VK_SELF(CommandBufferImpl);
 	self.m_graphicsState.setPrimitiveTopology(topology);
 	self.drawcallCommon();
@@ -523,6 +556,7 @@ void CommandBuffer::draw(PrimitiveTopology topology, U32 count, U32 instanceCoun
 
 void CommandBuffer::drawIndirect(PrimitiveTopology topology, const BufferView& buff, U32 drawCount)
 {
+	ANKI_TRACE_FUNCTION();
 	ANKI_ASSERT(buff.isValid());
 	ANKI_ASSERT(drawCount > 0);
 
@@ -541,6 +575,7 @@ void CommandBuffer::drawIndirect(PrimitiveTopology topology, const BufferView& b
 
 void CommandBuffer::drawIndexedIndirect(PrimitiveTopology topology, const BufferView& buff, U32 drawCount)
 {
+	ANKI_TRACE_FUNCTION();
 	ANKI_ASSERT(buff.isValid());
 	ANKI_ASSERT(drawCount > 0);
 
@@ -559,6 +594,7 @@ void CommandBuffer::drawIndexedIndirect(PrimitiveTopology topology, const Buffer
 void CommandBuffer::drawIndexedIndirectCount(PrimitiveTopology topology, const BufferView& argBuffer, U32 argBufferStride,
 											 const BufferView& countBuffer, U32 maxDrawCount)
 {
+	ANKI_TRACE_FUNCTION();
 	ANKI_ASSERT(argBuffer.isValid());
 	ANKI_ASSERT(countBuffer.isValid());
 
@@ -588,6 +624,7 @@ void CommandBuffer::drawIndexedIndirectCount(PrimitiveTopology topology, const B
 void CommandBuffer::drawIndirectCount(PrimitiveTopology topology, const BufferView& argBuffer, U32 argBufferStride, const BufferView& countBuffer,
 									  U32 maxDrawCount)
 {
+	ANKI_TRACE_FUNCTION();
 	ANKI_ASSERT(argBuffer.isValid());
 	ANKI_ASSERT(countBuffer.isValid());
 
@@ -615,6 +652,7 @@ void CommandBuffer::drawIndirectCount(PrimitiveTopology topology, const BufferVi
 
 void CommandBuffer::drawMeshTasks(U32 groupCountX, U32 groupCountY, U32 groupCountZ)
 {
+	ANKI_TRACE_FUNCTION();
 	ANKI_VK_SELF(CommandBufferImpl);
 	ANKI_ASSERT(!!(getGrManagerImpl().getExtensions() & VulkanExtensions::kEXT_mesh_shader));
 	self.drawcallCommon();
@@ -623,6 +661,7 @@ void CommandBuffer::drawMeshTasks(U32 groupCountX, U32 groupCountY, U32 groupCou
 
 void CommandBuffer::drawMeshTasksIndirect(const BufferView& argBuffer, U32 drawCount)
 {
+	ANKI_TRACE_FUNCTION();
 	ANKI_ASSERT(argBuffer.isValid());
 	ANKI_ASSERT(drawCount > 0);
 	ANKI_ASSERT(!!(getGrManagerImpl().getExtensions() & VulkanExtensions::kEXT_mesh_shader));
@@ -641,6 +680,7 @@ void CommandBuffer::drawMeshTasksIndirect(const BufferView& argBuffer, U32 drawC
 
 void CommandBuffer::dispatchCompute(U32 groupCountX, U32 groupCountY, U32 groupCountZ)
 {
+	ANKI_TRACE_FUNCTION();
 	ANKI_VK_SELF(CommandBufferImpl);
 	ANKI_ASSERT(groupCountX > 0 && groupCountY > 0 && groupCountZ > 0);
 	self.dispatchCommon();
@@ -649,6 +689,7 @@ void CommandBuffer::dispatchCompute(U32 groupCountX, U32 groupCountY, U32 groupC
 
 void CommandBuffer::dispatchComputeIndirect(const BufferView& argBuffer)
 {
+	ANKI_TRACE_FUNCTION();
 	ANKI_ASSERT(argBuffer.isValid());
 
 	ANKI_ASSERT(sizeof(DispatchIndirectArgs) == argBuffer.getRange());
@@ -668,6 +709,7 @@ void CommandBuffer::dispatchGraph([[maybe_unused]] const BufferView& scratchBuff
 void CommandBuffer::traceRays(const BufferView& sbtBuffer, U32 sbtRecordSize32, U32 hitGroupSbtRecordCount, U32 rayTypeCount, U32 width, U32 height,
 							  U32 depth)
 {
+	ANKI_TRACE_FUNCTION();
 	ANKI_VK_SELF(CommandBufferImpl);
 	self.traceRaysInternal(sbtBuffer, sbtRecordSize32, hitGroupSbtRecordCount, rayTypeCount, width, height, depth, {});
 }
@@ -675,6 +717,7 @@ void CommandBuffer::traceRays(const BufferView& sbtBuffer, U32 sbtRecordSize32,
 void CommandBuffer::traceRaysIndirect(const BufferView& sbtBuffer, U32 sbtRecordSize32, U32 hitGroupSbtRecordCount, U32 rayTypeCount,
 									  BufferView argsBuffer)
 {
+	ANKI_TRACE_FUNCTION();
 	ANKI_VK_SELF(CommandBufferImpl);
 	self.traceRaysInternal(sbtBuffer, sbtRecordSize32, hitGroupSbtRecordCount, rayTypeCount, 0, 0, 0, argsBuffer);
 }
@@ -708,6 +751,7 @@ void CommandBuffer::clearTexture(const TextureView& texView, const ClearValue& c
 
 void CommandBuffer::copyBufferToTexture(const BufferView& buff, const TextureView& texView)
 {
+	ANKI_TRACE_FUNCTION();
 	ANKI_ASSERT(buff.isValid());
 
 	ANKI_VK_SELF(CommandBufferImpl);
@@ -753,6 +797,7 @@ void CommandBuffer::copyBufferToTexture(const BufferView& buff, const TextureVie
 
 void CommandBuffer::zeroBuffer(const BufferView& buff)
 {
+	ANKI_TRACE_FUNCTION();
 	ANKI_ASSERT(buff.isValid());
 
 	ANKI_VK_SELF(CommandBufferImpl);
@@ -769,6 +814,7 @@ void CommandBuffer::zeroBuffer(const BufferView& buff)
 
 void CommandBuffer::writeOcclusionQueriesResultToBuffer(ConstWeakArray<OcclusionQuery*> queries, const BufferView& buff)
 {
+	ANKI_TRACE_FUNCTION();
 	ANKI_ASSERT(buff.isValid());
 
 	ANKI_VK_SELF(CommandBufferImpl);
@@ -797,6 +843,7 @@ void CommandBuffer::writeOcclusionQueriesResultToBuffer(ConstWeakArray<Occlusion
 
 void CommandBuffer::copyBufferToBuffer(Buffer* src, Buffer* dst, ConstWeakArray<CopyBufferToBufferInfo> copies)
 {
+	ANKI_TRACE_FUNCTION();
 	ANKI_VK_SELF(CommandBufferImpl);
 	ANKI_ASSERT(static_cast<const BufferImpl&>(*src).usageValid(BufferUsageBit::kCopySource));
 	ANKI_ASSERT(static_cast<const BufferImpl&>(*dst).usageValid(BufferUsageBit::kCopyDestination));
@@ -813,6 +860,7 @@ void CommandBuffer::copyBufferToBuffer(Buffer* src, Buffer* dst, ConstWeakArray<
 
 void CommandBuffer::buildAccelerationStructure(AccelerationStructure* as, const BufferView& scratchBuffer)
 {
+	ANKI_TRACE_FUNCTION();
 	ANKI_ASSERT(scratchBuffer.isValid());
 	ANKI_ASSERT(as);
 	ANKI_ASSERT(as->getBuildScratchBufferSize() <= scratchBuffer.getRange());
@@ -855,6 +903,7 @@ void CommandBuffer::upscale(GrUpscaler* upscaler, const TextureView& inColor, co
 							const TextureView& depth, const TextureView& exposure, Bool resetAccumulation, const Vec2& jitterOffset,
 							const Vec2& motionVectorsScale)
 {
+	ANKI_TRACE_FUNCTION();
 #if ANKI_DLSS
 	ANKI_VK_SELF(CommandBufferImpl);
 	ANKI_ASSERT(getGrManagerImpl().getDeviceCapabilities().m_dlss);
@@ -917,6 +966,7 @@ void CommandBuffer::upscale(GrUpscaler* upscaler, const TextureView& inColor, co
 void CommandBuffer::setPipelineBarrier(ConstWeakArray<TextureBarrierInfo> textures, ConstWeakArray<BufferBarrierInfo> buffers,
 									   ConstWeakArray<AccelerationStructureBarrierInfo> accelerationStructures)
 {
+	ANKI_TRACE_FUNCTION();
 	ANKI_VK_SELF(CommandBufferImpl);
 	self.commandCommon();
 
@@ -967,6 +1017,7 @@ void CommandBuffer::setPipelineBarrier(ConstWeakArray<TextureBarrierInfo> textur
 
 void CommandBuffer::beginOcclusionQuery(OcclusionQuery* query)
 {
+	ANKI_TRACE_FUNCTION();
 	ANKI_VK_SELF(CommandBufferImpl);
 	self.commandCommon();
 
@@ -981,6 +1032,7 @@ void CommandBuffer::beginOcclusionQuery(OcclusionQuery* query)
 
 void CommandBuffer::endOcclusionQuery(OcclusionQuery* query)
 {
+	ANKI_TRACE_FUNCTION();
 	ANKI_VK_SELF(CommandBufferImpl);
 	self.commandCommon();
 
@@ -995,6 +1047,7 @@ void CommandBuffer::endOcclusionQuery(OcclusionQuery* query)
 
 void CommandBuffer::beginPipelineQuery(PipelineQuery* query)
 {
+	ANKI_TRACE_FUNCTION();
 	ANKI_VK_SELF(CommandBufferImpl);
 	self.commandCommon();
 	const VkQueryPool handle = static_cast<const PipelineQueryImpl&>(*query).m_handle.getQueryPool();
@@ -1006,6 +1059,7 @@ void CommandBuffer::beginPipelineQuery(PipelineQuery* query)
 
 void CommandBuffer::endPipelineQuery(PipelineQuery* query)
 {
+	ANKI_TRACE_FUNCTION();
 	ANKI_VK_SELF(CommandBufferImpl);
 	self.commandCommon();
 	const VkQueryPool handle = static_cast<const PipelineQueryImpl&>(*query).m_handle.getQueryPool();
@@ -1017,6 +1071,7 @@ void CommandBuffer::endPipelineQuery(PipelineQuery* query)
 
 void CommandBuffer::writeTimestamp(TimestampQuery* query)
 {
+	ANKI_TRACE_FUNCTION();
 	ANKI_VK_SELF(CommandBufferImpl);
 	self.commandCommon();
 
@@ -1036,6 +1091,7 @@ Bool CommandBuffer::isEmpty() const
 
 void CommandBuffer::setFastConstants(const void* data, U32 dataSize)
 {
+	ANKI_TRACE_FUNCTION();
 	ANKI_VK_SELF(CommandBufferImpl);
 	ANKI_ASSERT(data && dataSize && dataSize % 16 == 0);
 	// ANKI_ASSERT(static_cast<const ShaderProgramImpl&>(self.getBoundProgram()).getReflection().m_descriptor.m_fastConstantsSize == dataSize
@@ -1047,6 +1103,7 @@ void CommandBuffer::setFastConstants(const void* data, U32 dataSize)
 
 void CommandBuffer::setLineWidth(F32 width)
 {
+	ANKI_TRACE_FUNCTION();
 	ANKI_VK_SELF(CommandBufferImpl);
 	self.commandCommon();
 	self.m_graphicsState.setLineWidth(width);
@@ -1054,6 +1111,7 @@ void CommandBuffer::setLineWidth(F32 width)
 
 void CommandBuffer::pushDebugMarker(CString name, Vec3 color)
 {
+	ANKI_TRACE_FUNCTION();
 	ANKI_VK_SELF(CommandBufferImpl);
 	if(self.m_debugMarkers) [[unlikely]]
 	{
@@ -1076,6 +1134,7 @@ void CommandBuffer::pushDebugMarker(CString name, Vec3 color)
 
 void CommandBuffer::popDebugMarker()
 {
+	ANKI_TRACE_FUNCTION();
 	ANKI_VK_SELF(CommandBufferImpl);
 	if(self.m_debugMarkers) [[unlikely]]
 	{
@@ -1132,6 +1191,7 @@ void CommandBufferImpl::setImageBarrier(VkPipelineStageFlags srcStage, VkAccessF
 										VkPipelineStageFlags dstStage, VkAccessFlags dstAccess, VkImageLayout newLayout, VkImage img,
 										const VkImageSubresourceRange& range)
 {
+	ANKI_TRACE_FUNCTION();
 	ANKI_ASSERT(img);
 	commandCommon();
 
@@ -1152,6 +1212,7 @@ void CommandBufferImpl::setImageBarrier(VkPipelineStageFlags srcStage, VkAccessF
 
 void CommandBufferImpl::beginRecording()
 {
+	ANKI_TRACE_FUNCTION();
 	// Do the begin
 	VkCommandBufferInheritanceInfo inheritance = {};
 	inheritance.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_INFO;
@@ -1172,6 +1233,7 @@ void CommandBufferImpl::beginRecording()
 
 void CommandBufferImpl::endRecording()
 {
+	ANKI_TRACE_FUNCTION();
 	commandCommon();
 
 	ANKI_ASSERT(!m_finalized);
@@ -1217,6 +1279,8 @@ void CommandBufferImpl::endRecording()
 
 void CommandBufferImpl::drawcallCommon()
 {
+	ANKI_TRACE_FUNCTION();
+
 	// Preconditions
 	commandCommon();
 	ANKI_ASSERT(m_graphicsProg);

+ 122 - 56
AnKi/Renderer/IndirectDiffuseClipmaps.cpp

@@ -31,6 +31,8 @@ static void computeClipmapBounds(Vec3 cameraPos, Vec3 lookDir, Clipmap& clipmap)
 
 Error IndirectDiffuseClipmaps::init()
 {
+	ANKI_CHECK(RtMaterialFetchRendererObject::init());
+
 	m_appliedGiRtDesc =
 		getRenderer().create2DRenderTargetDescription(getRenderer().getInternalResolution().x(), getRenderer().getInternalResolution().y(),
 													  getRenderer().getHdrFormat(), "IndirectDiffuseClipmap: Final");
@@ -112,17 +114,18 @@ Error IndirectDiffuseClipmaps::init()
 		m_distanceMomentsVolumes[clipmap] = getRenderer().createAndClearRenderTarget(volumeInit, TextureUsageBit::kSrvCompute);
 	}
 
-	const Array<SubMutation, 3> mutation = {{{"GPU_WAVE_SIZE", MutatorValue(GrManager::getSingleton().getDeviceCapabilities().m_maxWaveSize)},
+	const Array<SubMutation, 4> mutation = {{{"GPU_WAVE_SIZE", MutatorValue(GrManager::getSingleton().getDeviceCapabilities().m_maxWaveSize)},
 											 {"RADIANCE_OCTAHEDRON_MAP_SIZE", MutatorValue(g_indirectDiffuseClipmapRadianceOctMapSize)},
-											 {"IRRADIANCE_OCTAHEDRON_MAP_SIZE", MutatorValue(g_indirectDiffuseClipmapIrradianceOctMapSize)}}};
+											 {"IRRADIANCE_OCTAHEDRON_MAP_SIZE", MutatorValue(g_indirectDiffuseClipmapIrradianceOctMapSize)},
+											 {"RT_MATERIAL_FETCH_CLIPMAP", 0}}};
 
 	ANKI_CHECK(loadShaderProgram("ShaderBinaries/IndirectDiffuseClipmaps.ankiprogbin", mutation, m_prog, m_applyGiGrProg, "Apply"));
 	ANKI_CHECK(loadShaderProgram("ShaderBinaries/IndirectDiffuseClipmaps.ankiprogbin", mutation, m_prog, m_visProbesGrProg, "VisualizeProbes"));
 	ANKI_CHECK(loadShaderProgram("ShaderBinaries/IndirectDiffuseClipmaps.ankiprogbin", mutation, m_prog, m_populateCachesGrProg, "PopulateCaches"));
 	ANKI_CHECK(
 		loadShaderProgram("ShaderBinaries/IndirectDiffuseClipmaps.ankiprogbin", mutation, m_prog, m_computeIrradianceGrProg, "ComputeIrradiance"));
-	ANKI_CHECK(loadShaderProgram("ShaderBinaries/RtSbtBuild.ankiprogbin", {{"TECHNIQUE", 1}}, m_sbtProg, m_sbtBuildGrProg, "SbtBuild"));
 
+	for(MutatorValue rtMaterialFetchClipmap = 0; rtMaterialFetchClipmap < 2; ++rtMaterialFetchClipmap)
 	{
 		ShaderProgramResourcePtr tmpProg;
 		ANKI_CHECK(ResourceManager::getSingleton().loadResource("ShaderBinaries/IndirectDiffuseClipmaps.ankiprogbin", tmpProg));
@@ -134,10 +137,13 @@ Error IndirectDiffuseClipmaps::init()
 		{
 			variantInitInfo.addMutation(s.m_mutatorName, s.m_value);
 		}
+
+		variantInitInfo.addMutation("RT_MATERIAL_FETCH_CLIPMAP", rtMaterialFetchClipmap);
+
 		const ShaderProgramResourceVariant* variant;
 		m_prog->getOrCreateVariant(variantInitInfo, variant);
-		m_libraryGrProg.reset(&variant->getProgram());
-		m_rayGenShaderGroupIdx = variant->getShaderGroupHandleIndex();
+		m_rtLibraryGrProg.reset(&variant->getProgram());
+		m_rayGenShaderGroupIndices[rtMaterialFetchClipmap] = variant->getShaderGroupHandleIndex();
 	}
 
 	{
@@ -204,56 +210,8 @@ void IndirectDiffuseClipmaps::populateRenderGraph(RenderingContext& ctx)
 	// SBT build
 	BufferHandle sbtHandle;
 	BufferView sbtBuffer;
-	{
-		BufferHandle visibilityDep;
-		BufferView visibleRenderableIndicesBuff, buildSbtIndirectArgsBuff;
-		getRenderer().getAccelerationStructureBuilder().getVisibilityInfo(visibilityDep, visibleRenderableIndicesBuff, buildSbtIndirectArgsBuff);
-
-		// Allocate SBT
-		U32 sbtAlignment = (GrManager::getSingleton().getDeviceCapabilities().m_structuredBufferNaturalAlignment)
-							   ? sizeof(U32)
-							   : GrManager::getSingleton().getDeviceCapabilities().m_structuredBufferBindOffsetAlignment;
-		sbtAlignment = computeCompoundAlignment(sbtAlignment, GrManager::getSingleton().getDeviceCapabilities().m_sbtRecordAlignment);
-		U8* sbtMem;
-		sbtBuffer = RebarTransientMemoryPool::getSingleton().allocate(
-			(GpuSceneArrays::RenderableBoundingVolumeRt::getSingleton().getElementCount() + 2) * m_sbtRecordSize, sbtAlignment, sbtMem);
-		sbtHandle = rgraph.importBuffer(sbtBuffer, BufferUsageBit::kNone);
-
-		// Write the first 2 entries of the SBT
-		ConstWeakArray<U8> shaderGroupHandles = m_libraryGrProg->getShaderGroupHandles();
-		const U32 shaderHandleSize = GrManager::getSingleton().getDeviceCapabilities().m_shaderGroupHandleSize;
-		memcpy(sbtMem, &shaderGroupHandles[m_rayGenShaderGroupIdx * shaderHandleSize], shaderHandleSize);
-		memcpy(sbtMem + m_sbtRecordSize, &shaderGroupHandles[m_missShaderGroupIdx * shaderHandleSize], shaderHandleSize);
-
-		// Create the pass
-		NonGraphicsRenderPass& rpass = rgraph.newNonGraphicsRenderPass("RtReflections build SBT");
-
-		rpass.newBufferDependency(visibilityDep, BufferUsageBit::kIndirectCompute | BufferUsageBit::kSrvCompute);
-		rpass.newBufferDependency(sbtHandle, BufferUsageBit::kUavCompute);
-
-		rpass.setWork([this, buildSbtIndirectArgsBuff, sbtBuffer, visibleRenderableIndicesBuff](RenderPassWorkContext& rgraphCtx) {
-			ANKI_TRACE_SCOPED_EVENT(ReflectionsSbtBuild);
-			CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
-
-			cmdb.bindShaderProgram(m_sbtBuildGrProg.get());
-
-			cmdb.bindSrv(0, 0, GpuSceneArrays::Renderable::getSingleton().getBufferView());
-			cmdb.bindSrv(1, 0, visibleRenderableIndicesBuff);
-			cmdb.bindSrv(2, 0, BufferView(&m_libraryGrProg->getShaderGroupHandlesGpuBuffer()));
-
-			cmdb.bindUav(0, 0, sbtBuffer);
-
-			RtShadowsSbtBuildConstants consts = {};
-			ANKI_ASSERT(m_sbtRecordSize % 4 == 0);
-			consts.m_sbtRecordDwordSize = m_sbtRecordSize / 4;
-			const U32 shaderHandleSize = GrManager::getSingleton().getDeviceCapabilities().m_shaderGroupHandleSize;
-			ANKI_ASSERT(shaderHandleSize % 4 == 0);
-			consts.m_shaderHandleDwordSize = shaderHandleSize / 4;
-			cmdb.setFastConstants(&consts, sizeof(consts));
-
-			cmdb.dispatchComputeIndirect(buildSbtIndirectArgsBuff);
-		});
-	}
+	buildShaderBindingTablePass("IndirectDiffuseClipmaps: Build SBT", m_rtLibraryGrProg.get(), m_rayGenShaderGroupIndices[1], m_missShaderGroupIdx,
+								m_sbtRecordSize, rgraph, sbtHandle, sbtBuffer);
 
 	// Do ray tracing around the probes
 	{
@@ -278,7 +236,7 @@ void IndirectDiffuseClipmaps::populateRenderGraph(RenderingContext& ctx)
 					  distanceMomentsVolumes](RenderPassWorkContext& rgraphCtx) {
 			CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
 
-			cmdb.bindShaderProgram(m_libraryGrProg.get());
+			cmdb.bindShaderProgram(m_rtLibraryGrProg.get());
 
 			// More globals
 			cmdb.bindSampler(ANKI_MATERIAL_REGISTER_TILINEAR_REPEAT_SAMPLER, 0, getRenderer().getSamplers().m_trilinearRepeat.get());
@@ -429,6 +387,114 @@ void IndirectDiffuseClipmaps::populateRenderGraph(RenderingContext& ctx)
 	}
 
 	// Apply GI
+	if(0)
+	{
+		patchShaderBindingTablePass("IndirectDiffuseClipmaps: Patch SBT", m_rtLibraryGrProg.get(), m_rayGenShaderGroupIndices[0],
+									m_missShaderGroupIdx, m_sbtRecordSize, rgraph, sbtHandle, sbtBuffer);
+
+		NonGraphicsRenderPass& pass = rgraph.newNonGraphicsRenderPass("IndirectDiffuseClipmaps: RTApply");
+
+		pass.newBufferDependency(sbtHandle, BufferUsageBit::kShaderBindingTable);
+		if(getRenderer().getGeneratedSky().isEnabled())
+		{
+			pass.newTextureDependency(getRenderer().getGeneratedSky().getEnvironmentMapRt(), TextureUsageBit::kSrvTraceRays);
+		}
+		pass.newTextureDependency(getRenderer().getShadowMapping().getShadowmapRt(), TextureUsageBit::kSrvTraceRays);
+		pass.newAccelerationStructureDependency(getRenderer().getAccelerationStructureBuilder().getAccelerationStructureHandle(),
+												AccelerationStructureUsageBit::kTraceRaysSrv);
+		pass.newTextureDependency(getRenderer().getGBuffer().getColorRt(2), TextureUsageBit::kSrvTraceRays);
+		pass.newTextureDependency(getRenderer().getGBuffer().getDepthRt(), TextureUsageBit::kSrvTraceRays);
+
+		for(U32 clipmap = 0; clipmap < kIndirectDiffuseClipmapCount; ++clipmap)
+		{
+			pass.newTextureDependency(irradianceVolumes[clipmap], TextureUsageBit::kSrvTraceRays);
+			pass.newTextureDependency(probeValidityVolumes[clipmap], TextureUsageBit::kSrvTraceRays);
+			pass.newTextureDependency(distanceMomentsVolumes[clipmap], TextureUsageBit::kSrvTraceRays);
+		}
+
+		pass.newTextureDependency(m_runCtx.m_appliedGiRt, TextureUsageBit::kUavTraceRays);
+
+		pass.setWork([this, rtResultHandle, &ctx, sbtBuffer, irradianceVolumes, probeValidityVolumes,
+					  distanceMomentsVolumes](RenderPassWorkContext& rgraphCtx) {
+			CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
+
+			cmdb.bindShaderProgram(m_rtLibraryGrProg.get());
+
+			// More globals
+			cmdb.bindSampler(ANKI_MATERIAL_REGISTER_TILINEAR_REPEAT_SAMPLER, 0, getRenderer().getSamplers().m_trilinearRepeat.get());
+			cmdb.bindSrv(ANKI_MATERIAL_REGISTER_GPU_SCENE, 0, GpuSceneBuffer::getSingleton().getBufferView());
+			cmdb.bindSrv(ANKI_MATERIAL_REGISTER_MESH_LODS, 0, GpuSceneArrays::MeshLod::getSingleton().getBufferView());
+			cmdb.bindSrv(ANKI_MATERIAL_REGISTER_TRANSFORMS, 0, GpuSceneArrays::Transform::getSingleton().getBufferView());
+
+#define ANKI_UNIFIED_GEOM_FORMAT(fmt, shaderType, reg) \
+	cmdb.bindSrv( \
+		reg, 0, \
+		BufferView(&UnifiedGeometryBuffer::getSingleton().getBuffer(), 0, \
+				   getAlignedRoundDown(getFormatInfo(Format::k##fmt).m_texelSize, UnifiedGeometryBuffer::getSingleton().getBuffer().getSize())), \
+		Format::k##fmt);
+#include <AnKi/Shaders/Include/UnifiedGeometryTypes.def.h>
+
+			cmdb.bindConstantBuffer(0, 2, ctx.m_globalRenderingConstantsBuffer);
+
+			U32 srv = 0;
+			rgraphCtx.bindSrv(srv++, 2, getRenderer().getAccelerationStructureBuilder().getAccelerationStructureHandle());
+
+			const LightComponent* dirLight = SceneGraph::getSingleton().getDirectionalLight();
+			const SkyboxComponent* sky = SceneGraph::getSingleton().getSkybox();
+			const Bool bSkySolidColor =
+				(!sky || sky->getSkyboxType() == SkyboxType::kSolidColor || (!dirLight && sky->getSkyboxType() == SkyboxType::kGenerated));
+			if(bSkySolidColor)
+			{
+				cmdb.bindSrv(srv++, 2, TextureView(getDummyGpuResources().m_texture2DSrv.get(), TextureSubresourceDesc::all()));
+			}
+			else if(sky->getSkyboxType() == SkyboxType::kImage2D)
+			{
+				cmdb.bindSrv(srv++, 2, TextureView(&sky->getImageResource().getTexture(), TextureSubresourceDesc::all()));
+			}
+			else
+			{
+				rgraphCtx.bindSrv(srv++, 2, getRenderer().getGeneratedSky().getEnvironmentMapRt());
+			}
+
+			rgraphCtx.bindSrv(srv++, 2, getRenderer().getShadowMapping().getShadowmapRt());
+
+			cmdb.bindSrv(srv++, 2, BufferView(getDummyGpuResources().m_buffer.get(), 0, sizeof(U32)));
+			cmdb.bindSrv(srv++, 2, BufferView(getDummyGpuResources().m_buffer.get(), 0, sizeof(U32)));
+
+			for(U32 clipmap = 0; clipmap < kIndirectDiffuseClipmapCount; ++clipmap)
+			{
+				rgraphCtx.bindSrv(srv++, 2, irradianceVolumes[clipmap]);
+			}
+			for(U32 clipmap = 0; clipmap < kIndirectDiffuseClipmapCount; ++clipmap)
+			{
+				rgraphCtx.bindSrv(srv++, 2, probeValidityVolumes[clipmap]);
+			}
+			for(U32 clipmap = 0; clipmap < kIndirectDiffuseClipmapCount; ++clipmap)
+			{
+				rgraphCtx.bindSrv(srv++, 2, distanceMomentsVolumes[clipmap]);
+			}
+
+			rgraphCtx.bindSrv(srv++, 2, getRenderer().getGBuffer().getDepthRt());
+			cmdb.bindSrv(srv++, 2, TextureView(getDummyGpuResources().m_texture2DSrv.get(), TextureSubresourceDesc::all()));
+			rgraphCtx.bindSrv(srv++, 2, getRenderer().getGBuffer().getColorRt(2));
+
+			cmdb.bindSampler(0, 2, getRenderer().getSamplers().m_trilinearClamp.get());
+			cmdb.bindSampler(1, 2, getRenderer().getSamplers().m_trilinearClampShadow.get());
+			cmdb.bindSampler(2, 2, getRenderer().getSamplers().m_trilinearRepeat.get());
+
+			rgraphCtx.bindUav(0, 2, m_runCtx.m_appliedGiRt);
+			cmdb.bindUav(1, 2, TextureView(getDummyGpuResources().m_texture2DUav.get(), TextureSubresourceDesc::firstSurface()));
+
+			const Vec3 probeSizes = m_clipmapInfo[0].m_size / Vec3(m_clipmapInfo[0].m_probeCounts);
+			const F32 rayTMax = max(probeSizes.x(), max(probeSizes.y(), probeSizes.z())) * 10.0f;
+			const Vec4 consts(rayTMax);
+			cmdb.setFastConstants(&consts, sizeof(consts));
+
+			cmdb.traceRays(sbtBuffer, m_sbtRecordSize, GpuSceneArrays::RenderableBoundingVolumeRt::getSingleton().getElementCount(), 1,
+						   getRenderer().getInternalResolution().x(), getRenderer().getInternalResolution().y(), 1);
+		});
+	}
+	else
 	{
 		NonGraphicsRenderPass& pass = rgraph.newNonGraphicsRenderPass("IndirectDiffuseClipmaps composite");
 

+ 3 - 5
AnKi/Renderer/IndirectDiffuseClipmaps.h

@@ -58,7 +58,7 @@ inline NumericCVar<U32> g_indirectDiffuseClipmapIrradianceOctMapSize("R", "Indir
 																	 "Size of the octahedral for the irradiance");
 
 /// Indirect diffuse based on clipmaps of probes.
-class IndirectDiffuseClipmaps : public RendererObject
+class IndirectDiffuseClipmaps : public RtMaterialFetchRendererObject
 {
 public:
 	IndirectDiffuseClipmaps()
@@ -101,18 +101,16 @@ private:
 
 	ShaderProgramResourcePtr m_prog;
 	ShaderProgramResourcePtr m_missProg;
-	ShaderProgramResourcePtr m_sbtProg;
-	ShaderProgramPtr m_libraryGrProg;
+	ShaderProgramPtr m_rtLibraryGrProg;
 	ShaderProgramPtr m_populateCachesGrProg;
 	ShaderProgramPtr m_computeIrradianceGrProg;
 	ShaderProgramPtr m_applyGiGrProg;
-	ShaderProgramPtr m_sbtBuildGrProg;
 	ShaderProgramPtr m_visProbesGrProg;
 
 	ImageResourcePtr m_blueNoiseImg;
 
 	U32 m_sbtRecordSize = 0;
-	U32 m_rayGenShaderGroupIdx = kMaxU32;
+	Array<U32, 2> m_rayGenShaderGroupIndices = {kMaxU32, kMaxU32};
 	U32 m_missShaderGroupIdx = kMaxU32;
 
 	Bool m_texturesImportedOnce = false;

+ 4 - 53
AnKi/Renderer/Reflections.cpp

@@ -25,16 +25,13 @@ namespace anki {
 
 Error Reflections::init()
 {
+	ANKI_CHECK(RtMaterialFetchRendererObject::init());
+
 	const Bool bRtReflections = GrManager::getSingleton().getDeviceCapabilities().m_rayTracingEnabled && g_rtReflectionsCVar;
 	const Bool bSsrSamplesGBuffer = bRtReflections;
 
 	std::initializer_list<SubMutation> mutation = {{"SSR_SAMPLE_GBUFFER", bSsrSamplesGBuffer}};
 
-	if(bRtReflections)
-	{
-		ANKI_CHECK(loadShaderProgram("ShaderBinaries/RtSbtBuild.ankiprogbin", {{"TECHNIQUE", 1}}, m_sbtProg, m_sbtBuildGrProg, "SbtBuild"));
-	}
-
 	// Ray gen and miss
 	if(bRtReflections)
 	{
@@ -259,54 +256,8 @@ void Reflections::populateRenderGraph(RenderingContext& ctx)
 	BufferView sbtBuffer;
 	if(bRtReflections)
 	{
-		BufferHandle visibilityDep;
-		BufferView visibleRenderableIndicesBuff, buildSbtIndirectArgsBuff;
-		getRenderer().getAccelerationStructureBuilder().getVisibilityInfo(visibilityDep, visibleRenderableIndicesBuff, buildSbtIndirectArgsBuff);
-
-		// Allocate SBT
-		U32 sbtAlignment = (GrManager::getSingleton().getDeviceCapabilities().m_structuredBufferNaturalAlignment)
-							   ? sizeof(U32)
-							   : GrManager::getSingleton().getDeviceCapabilities().m_structuredBufferBindOffsetAlignment;
-		sbtAlignment = computeCompoundAlignment(sbtAlignment, GrManager::getSingleton().getDeviceCapabilities().m_sbtRecordAlignment);
-		U8* sbtMem;
-		sbtBuffer = RebarTransientMemoryPool::getSingleton().allocate(
-			(GpuSceneArrays::RenderableBoundingVolumeRt::getSingleton().getElementCount() + 2) * m_sbtRecordSize, sbtAlignment, sbtMem);
-		sbtHandle = rgraph.importBuffer(sbtBuffer, BufferUsageBit::kNone);
-
-		// Write the first 2 entries of the SBT
-		ConstWeakArray<U8> shaderGroupHandles = m_libraryGrProg->getShaderGroupHandles();
-		const U32 shaderHandleSize = GrManager::getSingleton().getDeviceCapabilities().m_shaderGroupHandleSize;
-		memcpy(sbtMem, &shaderGroupHandles[m_rayGenShaderGroupIdx * shaderHandleSize], shaderHandleSize);
-		memcpy(sbtMem + m_sbtRecordSize, &shaderGroupHandles[m_missShaderGroupIdx * shaderHandleSize], shaderHandleSize);
-
-		// Create the pass
-		NonGraphicsRenderPass& rpass = rgraph.newNonGraphicsRenderPass("RtReflections build SBT");
-
-		rpass.newBufferDependency(visibilityDep, BufferUsageBit::kIndirectCompute | BufferUsageBit::kSrvCompute);
-		rpass.newBufferDependency(sbtHandle, BufferUsageBit::kUavCompute);
-
-		rpass.setWork([this, buildSbtIndirectArgsBuff, sbtBuffer, visibleRenderableIndicesBuff](RenderPassWorkContext& rgraphCtx) {
-			ANKI_TRACE_SCOPED_EVENT(ReflectionsSbtBuild);
-			CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
-
-			cmdb.bindShaderProgram(m_sbtBuildGrProg.get());
-
-			cmdb.bindSrv(0, 0, GpuSceneArrays::Renderable::getSingleton().getBufferView());
-			cmdb.bindSrv(1, 0, visibleRenderableIndicesBuff);
-			cmdb.bindSrv(2, 0, BufferView(&m_libraryGrProg->getShaderGroupHandlesGpuBuffer()));
-
-			cmdb.bindUav(0, 0, sbtBuffer);
-
-			RtShadowsSbtBuildConstants consts = {};
-			ANKI_ASSERT(m_sbtRecordSize % 4 == 0);
-			consts.m_sbtRecordDwordSize = m_sbtRecordSize / 4;
-			const U32 shaderHandleSize = GrManager::getSingleton().getDeviceCapabilities().m_shaderGroupHandleSize;
-			ANKI_ASSERT(shaderHandleSize % 4 == 0);
-			consts.m_shaderHandleDwordSize = shaderHandleSize / 4;
-			cmdb.setFastConstants(&consts, sizeof(consts));
-
-			cmdb.dispatchComputeIndirect(buildSbtIndirectArgsBuff);
-		});
+		buildShaderBindingTablePass("RtReflections: Build SBT", m_libraryGrProg.get(), m_rayGenShaderGroupIdx, m_missShaderGroupIdx, m_sbtRecordSize,
+									rgraph, sbtHandle, sbtBuffer);
 	}
 
 	// Ray gen

+ 1 - 3
AnKi/Renderer/Reflections.h

@@ -23,7 +23,7 @@ inline NumericCVar<F32> g_roughnessCutoffToGiEdge0("R", "RoughnessCutoffToGiEdge
 inline NumericCVar<F32> g_roughnessCutoffToGiEdge1("R", "RoughnessCutoffToGiEdge1", 0.9f, 0.0f, 1.0f,
 												   "After this roughness the reflections will sample the GI probes");
 
-class Reflections : public RendererObject
+class Reflections : public RtMaterialFetchRendererObject
 {
 public:
 	Reflections()
@@ -47,11 +47,9 @@ public:
 	}
 
 public:
-	ShaderProgramResourcePtr m_sbtProg;
 	ShaderProgramResourcePtr m_mainProg;
 	ShaderProgramResourcePtr m_missProg;
 	ShaderProgramPtr m_ssrGrProg;
-	ShaderProgramPtr m_sbtBuildGrProg;
 	ShaderProgramPtr m_libraryGrProg;
 	ShaderProgramPtr m_spatialDenoisingGrProg;
 	ShaderProgramPtr m_temporalDenoisingGrProg;

+ 95 - 0
AnKi/Renderer/RendererObject.cpp

@@ -5,7 +5,10 @@
 
 #include <AnKi/Renderer/RendererObject.h>
 #include <AnKi/Renderer/Renderer.h>
+#include <AnKi/Renderer/AccelerationStructureBuilder.h>
 #include <AnKi/Util/Enum.h>
+#include <AnKi/Util/Tracer.h>
+#include <AnKi/GpuMemory/GpuVisibleTransientMemoryPool.h>
 
 namespace anki {
 
@@ -156,4 +159,96 @@ void RendererObject::fillBuffers(CommandBuffer& cmdb, ConstWeakArray<BufferView>
 	cmdb.popDebugMarker();
 }
 
+Error RtMaterialFetchRendererObject::init()
+{
+	ANKI_CHECK(loadShaderProgram("ShaderBinaries/RtSbtBuild.ankiprogbin", {{"TECHNIQUE", 1}}, m_sbtBuildProg, m_sbtBuildGrProg, "Build"));
+	ANKI_CHECK(
+		loadShaderProgram("ShaderBinaries/RtSbtBuild.ankiprogbin", {{"TECHNIQUE", 1}}, m_sbtBuildProg, m_sbtPatchGrProg, "PatchRaygenAndMiss"));
+	return Error::kNone;
+}
+
+void RtMaterialFetchRendererObject::buildShaderBindingTablePass(CString passName, ShaderProgram* library, U32 raygenHandleIdx, U32 missHandleIdx,
+																U32 sbtRecordSize, RenderGraphBuilder& rgraph, BufferHandle& sbtHandle,
+																BufferView& sbtBuffer)
+{
+	BufferHandle visibilityDep;
+	BufferView visibleRenderableIndicesBuff, buildSbtIndirectArgsBuff;
+	getRenderer().getAccelerationStructureBuilder().getVisibilityInfo(visibilityDep, visibleRenderableIndicesBuff, buildSbtIndirectArgsBuff);
+
+	// Allocate SBT
+	U32 sbtAlignment = (GrManager::getSingleton().getDeviceCapabilities().m_structuredBufferNaturalAlignment)
+						   ? sizeof(U32)
+						   : GrManager::getSingleton().getDeviceCapabilities().m_structuredBufferBindOffsetAlignment;
+	sbtAlignment = computeCompoundAlignment(sbtAlignment, GrManager::getSingleton().getDeviceCapabilities().m_sbtRecordAlignment);
+
+	sbtBuffer = GpuVisibleTransientMemoryPool::getSingleton().allocate(
+		(GpuSceneArrays::RenderableBoundingVolumeRt::getSingleton().getElementCount() + 2) * sbtRecordSize, sbtAlignment);
+	sbtHandle = rgraph.importBuffer(sbtBuffer, BufferUsageBit::kNone);
+
+	// Create the pass
+	NonGraphicsRenderPass& rpass = rgraph.newNonGraphicsRenderPass(passName);
+
+	rpass.newBufferDependency(visibilityDep, BufferUsageBit::kIndirectCompute | BufferUsageBit::kSrvCompute);
+	rpass.newBufferDependency(sbtHandle, BufferUsageBit::kUavCompute);
+
+	rpass.setWork([this, buildSbtIndirectArgsBuff, sbtBuffer, visibleRenderableIndicesBuff, lib = ShaderProgramPtr(library), sbtRecordSize,
+				   raygenHandleIdx, missHandleIdx](RenderPassWorkContext& rgraphCtx) {
+		ANKI_TRACE_SCOPED_EVENT(btBuild);
+		CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
+
+		cmdb.bindShaderProgram(m_sbtBuildGrProg.get());
+
+		cmdb.bindSrv(0, 0, GpuSceneArrays::Renderable::getSingleton().getBufferView());
+		cmdb.bindSrv(1, 0, visibleRenderableIndicesBuff);
+		cmdb.bindSrv(2, 0, BufferView(&lib->getShaderGroupHandlesGpuBuffer()));
+
+		cmdb.bindUav(0, 0, sbtBuffer);
+
+		RtShadowsSbtBuildConstants consts = {};
+		ANKI_ASSERT(sbtRecordSize % 4 == 0);
+		consts.m_sbtRecordDwordSize = sbtRecordSize / 4;
+		const U32 shaderHandleSize = GrManager::getSingleton().getDeviceCapabilities().m_shaderGroupHandleSize;
+		ANKI_ASSERT(shaderHandleSize % 4 == 0);
+		consts.m_shaderHandleDwordSize = shaderHandleSize / 4;
+		consts.m_raygenHandleIndex = raygenHandleIdx;
+		consts.m_missHandleIndex = missHandleIdx;
+		cmdb.setFastConstants(&consts, sizeof(consts));
+
+		cmdb.dispatchComputeIndirect(buildSbtIndirectArgsBuff);
+	});
+}
+
+void RtMaterialFetchRendererObject::patchShaderBindingTablePass(CString passName, ShaderProgram* library, U32 raygenHandleIdx, U32 missHandleIdx,
+																U32 sbtRecordSize, RenderGraphBuilder& rgraph, BufferHandle sbtHandle,
+																BufferView sbtBuffer)
+{
+	NonGraphicsRenderPass& rpass = rgraph.newNonGraphicsRenderPass(passName);
+
+	rpass.newBufferDependency(sbtHandle, BufferUsageBit::kUavCompute);
+
+	rpass.setWork(
+		[this, sbtBuffer, lib = ShaderProgramPtr(library), sbtRecordSize, raygenHandleIdx, missHandleIdx](RenderPassWorkContext& rgraphCtx) {
+			ANKI_TRACE_SCOPED_EVENT(btBuild);
+			CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
+
+			cmdb.bindShaderProgram(m_sbtPatchGrProg.get());
+
+			cmdb.bindSrv(0, 0, BufferView(&lib->getShaderGroupHandlesGpuBuffer()));
+
+			cmdb.bindUav(0, 0, sbtBuffer);
+
+			RtShadowsSbtBuildConstants consts = {};
+			ANKI_ASSERT(sbtRecordSize % 4 == 0);
+			consts.m_sbtRecordDwordSize = sbtRecordSize / 4;
+			const U32 shaderHandleSize = GrManager::getSingleton().getDeviceCapabilities().m_shaderGroupHandleSize;
+			ANKI_ASSERT(shaderHandleSize % 4 == 0);
+			consts.m_shaderHandleDwordSize = shaderHandleSize / 4;
+			consts.m_raygenHandleIndex = raygenHandleIdx;
+			consts.m_missHandleIndex = missHandleIdx;
+			cmdb.setFastConstants(&consts, sizeof(consts));
+
+			cmdb.dispatchCompute(1, 1, 1);
+		});
+}
+
 } // end namespace anki

+ 18 - 0
AnKi/Renderer/RendererObject.h

@@ -133,6 +133,24 @@ protected:
 		return Renderer::getSingleton().m_dummyResources;
 	}
 };
+
+class RtMaterialFetchRendererObject : protected RendererObject
+{
+protected:
+	Error init();
+
+	/// Build a pass that populates the shader binding table.
+	void buildShaderBindingTablePass(CString passName, ShaderProgram* library, U32 raygenGroupIdx, U32 missGroupIdx, U32 sbtRecordSize,
+									 RenderGraphBuilder& rgraph, BufferHandle& sbtHandle, BufferView& sbtBuffer);
+
+	void patchShaderBindingTablePass(CString passName, ShaderProgram* library, U32 raygenGroupIdx, U32 missGroupIdx, U32 sbtRecordSize,
+									 RenderGraphBuilder& rgraph, BufferHandle sbtHandle, BufferView sbtBuffer);
+
+private:
+	ShaderProgramResourcePtr m_sbtBuildProg;
+	ShaderProgramPtr m_sbtBuildGrProg;
+	ShaderProgramPtr m_sbtPatchGrProg;
+};
 /// @}
 
 } // end namespace anki

+ 21 - 55
AnKi/Renderer/RtMaterialFetchDbg.cpp

@@ -16,7 +16,7 @@ namespace anki {
 
 Error RtMaterialFetchDbg::init()
 {
-	ANKI_CHECK(loadShaderProgram("ShaderBinaries/RtSbtBuild.ankiprogbin", {{"TECHNIQUE", 1}}, m_sbtProg, m_sbtBuildGrProg, "SbtBuild"));
+	ANKI_CHECK(RtMaterialFetchRendererObject::init());
 
 	// Ray gen and miss
 	{
@@ -61,51 +61,8 @@ void RtMaterialFetchDbg::populateRenderGraph(RenderingContext& ctx)
 	// SBT build
 	BufferHandle sbtHandle;
 	BufferView sbtBuffer;
-	{
-		// Allocate SBT
-		U32 sbtAlignment = (GrManager::getSingleton().getDeviceCapabilities().m_structuredBufferNaturalAlignment)
-							   ? sizeof(U32)
-							   : GrManager::getSingleton().getDeviceCapabilities().m_structuredBufferBindOffsetAlignment;
-		sbtAlignment = computeCompoundAlignment(sbtAlignment, GrManager::getSingleton().getDeviceCapabilities().m_sbtRecordAlignment);
-		U8* sbtMem;
-		sbtBuffer = RebarTransientMemoryPool::getSingleton().allocate(
-			(GpuSceneArrays::RenderableBoundingVolumeRt::getSingleton().getElementCount() + 2) * m_sbtRecordSize, sbtAlignment, sbtMem);
-		sbtHandle = rgraph.importBuffer(sbtBuffer, BufferUsageBit::kUavCompute);
-
-		// Write the first 2 entries of the SBT
-		ConstWeakArray<U8> shaderGroupHandles = m_libraryGrProg->getShaderGroupHandles();
-		const U32 shaderHandleSize = GrManager::getSingleton().getDeviceCapabilities().m_shaderGroupHandleSize;
-		memcpy(sbtMem, &shaderGroupHandles[m_rayGenShaderGroupIdx * shaderHandleSize], shaderHandleSize);
-		memcpy(sbtMem + m_sbtRecordSize, &shaderGroupHandles[m_missShaderGroupIdx * shaderHandleSize], shaderHandleSize);
-
-		// Create the pass
-		NonGraphicsRenderPass& rpass = rgraph.newNonGraphicsRenderPass("RtMaterialFetch build SBT");
-
-		rpass.newBufferDependency(visibilityDep, BufferUsageBit::kSrvCompute | BufferUsageBit::kIndirectCompute);
-		rpass.newBufferDependency(sbtHandle, BufferUsageBit::kUavCompute);
-
-		rpass.setWork([this, sbtBuildIndirectArgsBuff, sbtBuffer, visibleRenderableIndicesBuff](RenderPassWorkContext& rgraphCtx) {
-			ANKI_TRACE_SCOPED_EVENT(RtMaterialFetchSbtBuild);
-			CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
-
-			cmdb.bindShaderProgram(m_sbtBuildGrProg.get());
-
-			cmdb.bindSrv(0, 0, GpuSceneArrays::Renderable::getSingleton().getBufferView());
-			cmdb.bindSrv(1, 0, visibleRenderableIndicesBuff);
-			cmdb.bindSrv(2, 0, BufferView(&m_libraryGrProg->getShaderGroupHandlesGpuBuffer()));
-			cmdb.bindUav(0, 0, sbtBuffer);
-
-			RtShadowsSbtBuildConstants consts = {};
-			ANKI_ASSERT(m_sbtRecordSize % 4 == 0);
-			consts.m_sbtRecordDwordSize = m_sbtRecordSize / 4;
-			const U32 shaderHandleSize = GrManager::getSingleton().getDeviceCapabilities().m_shaderGroupHandleSize;
-			ANKI_ASSERT(shaderHandleSize % 4 == 0);
-			consts.m_shaderHandleDwordSize = shaderHandleSize / 4;
-			cmdb.setFastConstants(&consts, sizeof(consts));
-
-			cmdb.dispatchComputeIndirect(sbtBuildIndirectArgsBuff);
-		});
-	}
+	buildShaderBindingTablePass("RtMaterialFetchDbg: Build SBT", m_libraryGrProg.get(), m_rayGenShaderGroupIdx, m_missShaderGroupIdx, m_sbtRecordSize,
+								rgraph, sbtHandle, sbtBuffer);
 
 	// Ray gen
 	{
@@ -140,22 +97,31 @@ void RtMaterialFetchDbg::populateRenderGraph(RenderingContext& ctx)
 
 			cmdb.bindConstantBuffer(0, 2, ctx.m_globalRenderingConstantsBuffer);
 
-			rgraphCtx.bindSrv(0, 2, getRenderer().getAccelerationStructureBuilder().getAccelerationStructureHandle());
+			U32 srv = 0;
+			rgraphCtx.bindSrv(srv++, 2, getRenderer().getAccelerationStructureBuilder().getAccelerationStructureHandle());
+
+			cmdb.bindSrv(srv++, 2, TextureView(getDummyGpuResources().m_texture2DSrv.get(), TextureSubresourceDesc::all()));
+			cmdb.bindSrv(srv++, 2, TextureView(getDummyGpuResources().m_texture2DSrv.get(), TextureSubresourceDesc::all()));
+
+			cmdb.bindSrv(srv++, 2, BufferView(getDummyGpuResources().m_buffer.get(), 0, sizeof(GpuSceneGlobalIlluminationProbe)));
+			cmdb.bindSrv(srv++, 2, BufferView(getDummyGpuResources().m_buffer.get(), 0, sizeof(PixelFailedSsr)));
+
+			for(U32 i = 0; i < kIndirectDiffuseClipmapCount * 3; ++i)
+			{
+				cmdb.bindSrv(srv++, 2, TextureView(getDummyGpuResources().m_texture3DSrv.get(), TextureSubresourceDesc::all()));
+			}
 
-			// Fill the rest of the interface resources
-			cmdb.bindSrv(1, 2, TextureView(getDummyGpuResources().m_texture2DSrv.get(), TextureSubresourceDesc::all()));
-			cmdb.bindSrv(2, 2, TextureView(getDummyGpuResources().m_texture2DSrv.get(), TextureSubresourceDesc::all()));
-			cmdb.bindSrv(3, 2, TextureView(getDummyGpuResources().m_texture2DSrv.get(), TextureSubresourceDesc::all()));
-			cmdb.bindSrv(4, 2, TextureView(getDummyGpuResources().m_texture2DSrv.get(), TextureSubresourceDesc::all()));
-			cmdb.bindSrv(5, 2, BufferView(getDummyGpuResources().m_buffer.get(), 0, sizeof(GpuSceneGlobalIlluminationProbe)));
-			cmdb.bindSrv(6, 2, BufferView(getDummyGpuResources().m_buffer.get(), 0, sizeof(PixelFailedSsr)));
-			cmdb.bindSrv(7, 2, TextureView(getDummyGpuResources().m_texture2DSrv.get(), TextureSubresourceDesc::all()));
+			for(U32 i = 0; i < 3; ++i)
+			{
+				cmdb.bindSrv(srv++, 2, TextureView(getDummyGpuResources().m_texture2DSrv.get(), TextureSubresourceDesc::all()));
+			}
 
 			rgraphCtx.bindUav(0, 2, m_runCtx.m_rt);
 			cmdb.bindUav(1, 2, TextureView(getDummyGpuResources().m_texture2DUav.get(), TextureSubresourceDesc::firstSurface()));
 
 			cmdb.bindSampler(0, 2, getRenderer().getSamplers().m_trilinearClamp.get());
 			cmdb.bindSampler(1, 2, getRenderer().getSamplers().m_trilinearClampShadow.get());
+			cmdb.bindSampler(2, 2, getRenderer().getSamplers().m_trilinearClampShadow.get());
 
 			Vec4 dummy;
 			cmdb.setFastConstants(&dummy, sizeof(dummy));

+ 1 - 3
AnKi/Renderer/RtMaterialFetchDbg.h

@@ -15,7 +15,7 @@ namespace anki {
 inline BoolCVar g_rtMaterialFetchDbgCVar("R", "RtMaterialFetchDbg", false, "Enable material debugging pass");
 
 /// Similar to ShadowmapsResolve but it's using ray tracing.
-class RtMaterialFetchDbg : public RendererObject
+class RtMaterialFetchDbg : public RtMaterialFetchRendererObject
 {
 public:
 	RtMaterialFetchDbg()
@@ -34,10 +34,8 @@ public:
 	}
 
 public:
-	ShaderProgramResourcePtr m_sbtProg;
 	ShaderProgramResourcePtr m_rtProg;
 	ShaderProgramResourcePtr m_missProg;
-	ShaderProgramPtr m_sbtBuildGrProg;
 	ShaderProgramPtr m_libraryGrProg;
 
 	RenderTargetDesc m_rtDesc;

+ 1 - 1
AnKi/Resource/ShaderProgramResourceSystem.cpp

@@ -343,7 +343,7 @@ Error ShaderProgramResourceSystem::createRayTracingPrograms(ResourceDynamicArray
 			if((inLib.m_presentStages & requiredShaders) != requiredShaders
 			   || !(inLib.m_presentStages & (ShaderTypeBit::kClosestHit | ShaderTypeBit::kAnyHit)))
 			{
-				ANKI_RESOURCE_LOGE("The libray is missing shader shader types: %s", inLib.m_name.cstr());
+				ANKI_RESOURCE_LOGE("The libray is missing shader types: %s", inLib.m_name.cstr());
 				return Error::kUserData;
 			}
 

+ 2 - 2
AnKi/Shaders/Include/MiscRendererTypes.h

@@ -131,8 +131,8 @@ struct RtShadowsSbtBuildConstants
 {
 	U32 m_shaderHandleDwordSize;
 	U32 m_sbtRecordDwordSize;
-	U32 m_padding0;
-	U32 m_padding1;
+	U32 m_raygenHandleIndex; ///< Index to the handles buffer
+	U32 m_missHandleIndex;
 };
 
 // Lens flare

+ 94 - 17
AnKi/Shaders/IndirectDiffuseClipmaps.ankiprog

@@ -8,8 +8,9 @@
 #pragma anki mutator GPU_WAVE_SIZE 16 32 64
 #pragma anki mutator RADIANCE_OCTAHEDRON_MAP_SIZE 8 10 12 14
 #pragma anki mutator IRRADIANCE_OCTAHEDRON_MAP_SIZE 4 5 6
+#pragma anki mutator RT_MATERIAL_FETCH_CLIPMAP 0 1
 
-#pragma anki technique RtMaterialFetch rgen mutators
+#pragma anki technique RtMaterialFetch rgen mutators RT_MATERIAL_FETCH_CLIPMAP
 #pragma anki technique PopulateCaches comp mutators RADIANCE_OCTAHEDRON_MAP_SIZE
 #pragma anki technique ComputeIrradiance comp mutators GPU_WAVE_SIZE RADIANCE_OCTAHEDRON_MAP_SIZE IRRADIANCE_OCTAHEDRON_MAP_SIZE
 #pragma anki technique Apply comp mutators
@@ -24,7 +25,9 @@
 #include <AnKi/Shaders/PackFunctions.hlsl>
 #include <AnKi/Shaders/FastMathFunctions.hlsl>
 
-#define CLIPMAP_VOLUME 1
+#if defined(RT_MATERIAL_FETCH_CLIPMAP) && RT_MATERIAL_FETCH_CLIPMAP
+#	define CLIPMAP_VOLUME
+#endif
 #include <AnKi/Shaders/RtMaterialFetch.hlsl>
 
 F32 computeClipmapFade(Clipmap clipmap, Vec3 cameraPos, Vec3 lookDir, Vec3 worldPos)
@@ -97,7 +100,7 @@ Vec3 sampleClipmapIrradianceCommon(Vec3 samplePoint, Vec3 normal, Vec3 cameraPos
 								   Texture3D<Vec4> volumes[kIndirectDiffuseClipmapCount],
 								   Texture3D<Vec4> distanceMomentsVolumes[kIndirectDiffuseClipmapCount],
 								   Texture3D<Vec4> probeValidityVolumes[kIndirectDiffuseClipmapCount], SamplerState linearAnyRepeatSampler,
-								   F32 slowVersionRandFactor, Bool fastVersion)
+								   F32 slowVersionRandFactor, Bool fastVersion, Bool biasSamplePoint)
 {
 #if 1
 	const U16 clipmapIdx = (fastVersion) ? findClipmapOnPositionCheap(clipmaps, samplePoint)
@@ -113,11 +116,11 @@ Vec3 sampleClipmapIrradianceCommon(Vec3 samplePoint, Vec3 normal, Vec3 cameraPos
 #if 0
 	if(clipmapIdx == 0)
 	{
-		return = Vec3(1, 0, 0);
+		return Vec3(1, 0, 0);
 	}
 	else if(clipmapIdx == 1)
 	{
-		return = Vec3(0, 1, 0);
+		return Vec3(0, 1, 0);
 	}
 	else if(clipmapIdx == 2)
 	{
@@ -125,7 +128,7 @@ Vec3 sampleClipmapIrradianceCommon(Vec3 samplePoint, Vec3 normal, Vec3 cameraPos
 	}
 	else
 	{
-		return = Vec3(1, 0, 1);
+		return Vec3(1, 0, 1);
 	}
 #endif
 
@@ -139,7 +142,7 @@ Vec3 sampleClipmapIrradianceCommon(Vec3 samplePoint, Vec3 normal, Vec3 cameraPos
 	const Vec3 fakeVolumeSize = clipmap.m_probeCounts; // Volume size without the octahedron
 
 	const Vec3 biasDir = normalize(cameraPos - samplePoint);
-	const Vec3 biasedWorldPos = samplePoint + biasDir * probeSize * 0.2;
+	const Vec3 biasedWorldPos = (biasSamplePoint) ? samplePoint + biasDir * probeSize * 0.2 : samplePoint;
 
 	F32 octahedronSize = 0.0;
 	Vec3 realVolumeSize;
@@ -255,24 +258,26 @@ Vec3 sampleClipmapIrradianceAccurate(Vec3 samplePoint, Vec3 normal, Vec3 cameraP
 									 Texture3D<Vec4> volumes[kIndirectDiffuseClipmapCount],
 									 Texture3D<Vec4> distanceMomentsVolumes[kIndirectDiffuseClipmapCount],
 									 Texture3D<Vec4> probeValidityVolumes[kIndirectDiffuseClipmapCount], SamplerState linearAnyRepeatSampler,
-									 F32 randFactor)
+									 F32 randFactor, Bool biasSamplePoint = true)
 {
 	return sampleClipmapIrradianceCommon(samplePoint, normal, cameraPos, lookDir, clipmaps, volumes, distanceMomentsVolumes, probeValidityVolumes,
-										 linearAnyRepeatSampler, randFactor, false);
+										 linearAnyRepeatSampler, randFactor, false, biasSamplePoint);
 }
 
 Vec3 sampleClipmapIrradianceCheap(Vec3 samplePoint, Vec3 normal, Clipmap clipmaps[kIndirectDiffuseClipmapCount],
-								  Texture3D<Vec4> volumes[kIndirectDiffuseClipmapCount], SamplerState linearAnyRepeatSampler)
+								  Texture3D<Vec4> volumes[kIndirectDiffuseClipmapCount], SamplerState linearAnyRepeatSampler,
+								  Bool biasSamplePoint = true)
 {
 	return sampleClipmapIrradianceCommon(samplePoint, normal, samplePoint, samplePoint, clipmaps, volumes, volumes, volumes, linearAnyRepeatSampler,
-										 0.0, true);
+										 0.0, true, biasSamplePoint);
 }
 
 // ===========================================================================
-// RayGen                                                                    =
+// RtMaterialFetch                                                           =
 // ===========================================================================
-#if ANKI_RAY_GEN_SHADER
+#if NOT_ZERO(ANKI_TECHNIQUE_RtMaterialFetch)
 
+#	if RT_MATERIAL_FETCH_CLIPMAP
 struct Consts
 {
 	U32 m_clipmapIdx;
@@ -328,13 +333,13 @@ ANKI_FAST_CONSTANTS(Consts, g_consts)
 		radiance = directLighting<F16>(gbuffer, hitPos, !hit, true, tMax, traceFlags | RAY_FLAG_ACCEPT_FIRST_HIT_AND_END_SEARCH);
 
 		// Apply indirect
-#	if 0
+#		if 0
 		const Mat3x4 cameraTrf = g_globalRendererConstants.m_matrices.m_cameraTransform;
 		const Vec3 lookDir = -Vec3(cameraTrf.m_row0[2], cameraTrf.m_row1[2], cameraTrf.m_row2[2]);
 		const Vec3 irradiance = sampleClipmapIrradianceAccurate(hitPos, gbuffer.m_worldNormal, g_globalRendererConstants.m_cameraPosition, lookDir,
 																g_globalRendererConstants.m_indirectDiffuseClipmaps, g_irradianceVolumes,
 																g_distanceMomentsVolumes, g_probeValidityVolumes, g_linearAnyRepeatSampler, noise);
-#	else
+#		else
 		Clipmap clipmaps[kIndirectDiffuseClipmapCount];
 		for(U32 i = 0; i < kIndirectDiffuseClipmapCount; ++i)
 		{
@@ -342,7 +347,7 @@ ANKI_FAST_CONSTANTS(Consts, g_consts)
 			clipmaps[i].m_aabbMin = clipmaps[i].m_prevFrameAabbMin;
 		}
 		const Vec3 irradiance = sampleClipmapIrradianceCheap(hitPos, gbuffer.m_worldNormal, clipmaps, g_irradianceVolumes, g_linearRepeatAnySampler);
-#	endif
+#		endif
 
 		radiance += irradiance * gbuffer.m_diffuse / kPi;
 	}
@@ -352,7 +357,79 @@ ANKI_FAST_CONSTANTS(Consts, g_consts)
 	const F32 kMaxDist = 1000.0; // Chose something small and make sure its square doesn't overflow F16
 	TEX(g_lightResultTex, UVec2(probeIdx, outPixelIdx + raysPerProbePerFrame * g_consts.m_clipmapIdx)) = HVec4(radiance, min(rayT, kMaxDist));
 }
-#endif // ANKI_RAY_GEN_SHADER
+#	else
+
+struct Consts
+{
+	F32 m_rayMax;
+	F32 m_padding1;
+	F32 m_padding2;
+	F32 m_padding3;
+};
+ANKI_FAST_CONSTANTS(Consts, g_consts)
+
+[Shader("raygeneration")] void main()
+{
+	const UVec2 coord = DispatchRaysIndex().xy;
+	const Vec2 uv = Vec2(coord) / DispatchRaysDimensions().xy;
+
+	const F32 depth = g_depthTex[coord].x;
+	const Vec4 rt2 = g_gbufferRt2[coord];
+	const Vec3 worldNormal = unpackNormalFromGBuffer(rt2);
+
+	const Vec4 v4 = mul(g_globalRendererConstants.m_matrices.m_invertedViewProjectionJitter, Vec4(uvToNdc(uv), depth, 1.0));
+	const Vec3 worldPos = v4.xyz / v4.w;
+
+	const Vec3 biasDir = normalize(g_globalRendererConstants.m_cameraPosition - worldPos);
+	const Vec3 biasedWorldPos = worldPos + biasDir * 0.1;
+
+	// Rand
+	const UVec3 seed = rand3DPCG16(UVec3(coord, g_globalRendererConstants.m_frame % 8u));
+	const Vec2 randFactors = hammersleyRandom16(g_globalRendererConstants.m_frame % 64u, 64u, seed);
+
+	const Mat3 tbn = rotationFromDirection(worldNormal);
+	const Vec3 rayDir = normalize(mul(tbn, hemisphereSampleCos(randFactors)));
+
+	// Trace
+	const F32 tMax = g_consts.m_rayMax;
+	constexpr U32 traceFlags = RAY_FLAG_FORCE_OPAQUE | RAY_FLAG_SKIP_PROCEDURAL_PRIMITIVES;
+	GBufferLight<F16> gbuffer = (GBufferLight<F16>)0;
+	F32 rayT = 0.0;
+	Bool backfacing = false;
+	const Bool hit = materialRayTrace<F16>(biasedWorldPos, rayDir, 0.01, tMax, 1000.0, gbuffer, rayT, backfacing, traceFlags);
+
+	HVec3 radiance = 0.0;
+	Vec3 hitPos = 0.0;
+	if(hit)
+	{
+		hitPos = biasedWorldPos + rayDir * (rayT - 0.01);
+		radiance = directLighting<F16>(gbuffer, hitPos, !hit, true, 1000.0, traceFlags | RAY_FLAG_ACCEPT_FIRST_HIT_AND_END_SEARCH);
+	}
+
+	const Mat3x4 cameraTrf = g_globalRendererConstants.m_matrices.m_cameraTransform;
+	const Vec3 lookDir = -Vec3(cameraTrf.m_row0[2], cameraTrf.m_row1[2], cameraTrf.m_row2[2]);
+
+	const Vec3 rayOrigin = (hit) ? hitPos : biasedWorldPos;
+	const Vec3 rayDir2 = (hit) ? gbuffer.m_worldNormal : worldNormal;
+	const Bool biasSamplePoint = !hit; // Bias the sample point if it's not for the 2nd bounce
+	const Vec3 irradiance = sampleClipmapIrradianceAccurate(
+		rayOrigin, rayDir2, g_globalRendererConstants.m_cameraPosition, lookDir, g_globalRendererConstants.m_indirectDiffuseClipmaps,
+		g_irradianceVolumes, g_distanceMomentsVolumes, g_probeValidityVolumes, g_linearRepeatAnySampler, randFactors.x, biasSamplePoint);
+
+	Vec3 final;
+	if(hit)
+	{
+		final = radiance + irradiance * gbuffer.m_diffuse;
+	}
+	else
+	{
+		final = irradiance;
+	}
+
+	g_colorAndPdfTex[coord].xyz = lerp(g_colorAndPdfTex[coord].xyz, final, 0.2);
+}
+#	endif // RT_MATERIAL_FETCH_CLIPMAP
+#endif
 
 // ===========================================================================
 // PopulateCaches                                                            =

+ 1 - 0
AnKi/Shaders/Intellisense.hlsl

@@ -314,6 +314,7 @@ void DispatchMesh(U32 groupSizeX, U32 groupSizeY, U32 groupSizeZ, T payload);
 void SetMeshOutputCounts(U32 vertexCount, U32 primitiveCount);
 
 UVec2 DispatchRaysIndex();
+UVec2 DispatchRaysDimensions();
 
 // Workgraphs
 

+ 0 - 2
AnKi/Shaders/RtMaterialFetch.hlsl

@@ -39,11 +39,9 @@ StructuredBuffer<GpuSceneGlobalIlluminationProbe> g_giProbes : register(t3, SPAC
 StructuredBuffer<PixelFailedSsr> g_pixelsFailedSsr : register(t4, SPACE);
 #	endif
 
-#	if defined(CLIPMAP_VOLUME)
 Texture3D<Vec4> g_irradianceVolumes[kIndirectDiffuseClipmapCount] : register(t5, SPACE);
 Texture3D<Vec4> g_probeValidityVolumes[kIndirectDiffuseClipmapCount] : register(t8, SPACE); // WARNING: Adjust if kIndirectDiffuseClipmapCount changed
 Texture3D<Vec4> g_distanceMomentsVolumes[kIndirectDiffuseClipmapCount] : register(t11, SPACE);
-#	endif
 
 #	if defined(CLIPMAP_VOLUME)
 Texture2D<Vec4> g_dummyTex[3] : register(t14, SPACE);

+ 50 - 8
AnKi/Shaders/RtSbtBuild.ankiprog

@@ -5,16 +5,17 @@
 
 #pragma anki mutator TECHNIQUE 0 1 // Shdows or MaterialFetch
 
-#pragma anki technique SbtBuild comp
+#pragma anki technique Build comp mutators TECHNIQUE
+#pragma anki technique PatchRaygenAndMiss comp mutators
 
 #include <AnKi/Shaders/Include/GpuSceneTypes.h>
 #include <AnKi/Shaders/Include/MiscRendererTypes.h>
 #include <AnKi/Shaders/Common.hlsl>
 
 // ===========================================================================
-// SbtBuild                                                                  =
+// Build                                                                     =
 // ===========================================================================
-#if ANKI_COMPUTE_SHADER && NOT_ZERO(ANKI_TECHNIQUE_SbtBuild)
+#if NOT_ZERO(ANKI_TECHNIQUE_Build)
 
 StructuredBuffer<GpuSceneRenderable> g_renderables : register(t0);
 
@@ -28,21 +29,36 @@ ANKI_FAST_CONSTANTS(RtShadowsSbtBuildConstants, g_consts)
 
 #	define NUMTHREADS 64
 
-[numthreads(NUMTHREADS, 1, 1)] void main(U32 svDispatchThreadId : SV_DISPATCHTHREADID)
+[numthreads(NUMTHREADS, 1, 1)] void main(COMPUTE_ARGS)
 {
+	if(all(svGroupId == 0))
+	{
+		// First group copies the raygen and miss handles
+
+		const U32 dword = svGroupIndex;
+		if(dword < g_consts.m_shaderHandleDwordSize)
+		{
+			const U32 raygenHandleDwordOffset = g_consts.m_raygenHandleIndex * g_consts.m_shaderHandleDwordSize + dword;
+			SBUFF(g_sbtBuffer, dword) = SBUFF(g_shaderHandles, raygenHandleDwordOffset);
+
+			const U32 missHandleDwordOffset = g_consts.m_missHandleIndex * g_consts.m_shaderHandleDwordSize + dword;
+			SBUFF(g_sbtBuffer, dword + g_consts.m_sbtRecordDwordSize) = SBUFF(g_shaderHandles, missHandleDwordOffset);
+		}
+	}
+
 	const U32 renderableCount = SBUFF(g_visibleRenderables, 0).m_lod_2bit_renderableIndex_30bit;
-	if(svDispatchThreadId >= renderableCount)
+	if(svDispatchThreadId.x >= renderableCount)
 	{
 		return;
 	}
 
-	const U32 lodAndRenderableIdx = SBUFF(g_visibleRenderables, svDispatchThreadId + 1).m_lod_2bit_renderableIndex_30bit;
+	const U32 lodAndRenderableIdx = SBUFF(g_visibleRenderables, svDispatchThreadId.x + 1).m_lod_2bit_renderableIndex_30bit;
 	const U32 renderableIdx = lodAndRenderableIdx & ((1u << 30) - 1u);
 	const U32 lod = lodAndRenderableIdx >> 30u;
 	const GpuSceneRenderable renderable = SBUFF(g_renderables, renderableIdx);
 
 	U32 sbtDwordOffset = g_consts.m_sbtRecordDwordSize * 2; // Skip raygen and miss shaders which are first
-	sbtDwordOffset += g_consts.m_sbtRecordDwordSize * svDispatchThreadId;
+	sbtDwordOffset += g_consts.m_sbtRecordDwordSize * svDispatchThreadId.x;
 
 	// Copy the handle
 	for(U32 i = 0; i < g_consts.m_shaderHandleDwordSize; ++i)
@@ -59,4 +75,30 @@ ANKI_FAST_CONSTANTS(RtShadowsSbtBuildConstants, g_consts)
 	SBUFF(g_sbtBuffer, sbtDwordOffset++) = renderable.m_meshLodsIndex + lod;
 	SBUFF(g_sbtBuffer, sbtDwordOffset) = 0;
 }
-#endif // ANKI_COMPUTE_SHADER && ANKI_TECHNIQUE_SbtBuild
+#endif
+
+// ===========================================================================
+// PatchRaygenAndMiss                                                        =
+// ===========================================================================
+#if NOT_ZERO(ANKI_TECHNIQUE_PatchRaygenAndMiss)
+
+StructuredBuffer<U32> g_shaderHandles : register(t0);
+
+RWStructuredBuffer<U32> g_sbtBuffer : register(u0);
+
+ANKI_FAST_CONSTANTS(RtShadowsSbtBuildConstants, g_consts)
+
+// Patches only raygen and miss handles to the SBT
+[NumThreads(32, 1, 1)] void main(COMPUTE_ARGS)
+{
+	const U32 dword = svGroupIndex;
+	if(dword < g_consts.m_shaderHandleDwordSize)
+	{
+		const U32 raygenHandleDwordOffset = g_consts.m_raygenHandleIndex * g_consts.m_shaderHandleDwordSize + dword;
+		SBUFF(g_sbtBuffer, dword) = SBUFF(g_shaderHandles, raygenHandleDwordOffset);
+
+		const U32 missHandleDwordOffset = g_consts.m_missHandleIndex * g_consts.m_shaderHandleDwordSize + dword;
+		SBUFF(g_sbtBuffer, dword + g_consts.m_sbtRecordDwordSize) = SBUFF(g_shaderHandles, missHandleDwordOffset);
+	}
+}
+#endif

+ 2 - 0
AnKi/Util/Tracer.h

@@ -184,10 +184,12 @@ private:
 #	define ANKI_TRACE_SCOPED_EVENT(name_) TracerScopedEvent _tse##name_(ANKI_STRINGIZE(ANKI_CONCATENATE(t, name_)))
 #	define ANKI_TRACE_CUSTOM_EVENT(name_, start_, duration_) \
 		Tracer::getSingleton().addCustomEvent(ANKI_STRINGIZE(ANKI_CONCATENATE(t, name_)), start_, duration_)
+#	define ANKI_TRACE_FUNCTION() TracerScopedEvent ANKI_CONCATENATE(_tse, __LINE__)(ANKI_FUNC)
 #	define ANKI_TRACE_INC_COUNTER(name_, val_) Tracer::getSingleton().incrementCounter(ANKI_STRINGIZE(ANKI_CONCATENATE(c, name_)), val_)
 #else
 #	define ANKI_TRACE_SCOPED_EVENT(name_) ((void)0)
 #	define ANKI_TRACE_CUSTOM_EVENT(name_, start_, duration_) ((void)0)
+#	define ANKI_TRACE_FUNCTION() ((void)0)
 #	define ANKI_TRACE_INC_COUNTER(name_, val_) ((void)0)
 #endif
 /// @}