Przeglądaj źródła

Add a ray tracing debug pass

Panagiotis Christopoulos Charitos 1 rok temu
rodzic
commit
b0468d4fa2
33 zmienionych plików z 568 dodań i 100 usunięć
  1. 2 3
      AnKi/Gr/Vulkan/VkAccelerationStructure.cpp
  2. 2 1
      AnKi/Gr/Vulkan/VkGrManager.cpp
  3. 1 1
      AnKi/Gr/Vulkan/VkShaderProgram.cpp
  4. 1 1
      AnKi/Renderer/AccelerationStructureBuilder.cpp
  5. 3 3
      AnKi/Renderer/AccelerationStructureBuilder.h
  6. 6 1
      AnKi/Renderer/Renderer.cpp
  7. 0 1
      AnKi/Renderer/Renderer.h
  8. 4 1
      AnKi/Renderer/RendererObject.def.h
  9. 168 0
      AnKi/Renderer/RtMaterialFetchDbg.cpp
  10. 56 0
      AnKi/Renderer/RtMaterialFetchDbg.h
  11. 1 0
      AnKi/Renderer/RtShadows.h
  12. 5 5
      AnKi/Renderer/Utils/GpuVisibility.cpp
  13. 1 1
      AnKi/Renderer/Utils/GpuVisibility.h
  14. 10 2
      AnKi/Resource/MaterialResource.cpp
  15. 7 2
      AnKi/Resource/RenderingKey.h
  16. 4 1
      AnKi/Resource/ShaderProgramResourceSystem.cpp
  17. 7 0
      AnKi/Scene/Components/ModelComponent.cpp
  18. 0 1
      AnKi/ShaderCompiler/ShaderProgramBinary.xml
  19. 0 1
      AnKi/ShaderCompiler/ShaderProgramBinaryExtra.h
  20. 5 0
      AnKi/Shaders/Common.hlsl
  21. 65 11
      AnKi/Shaders/GBufferGeneric.ankiprog
  22. 45 42
      AnKi/Shaders/GpuVisibilityAccelerationStructures.ankiprog
  23. 3 4
      AnKi/Shaders/GpuVisibilityAccelerationStructuresZeroRemainingInstances.ankiprog
  24. 1 0
      AnKi/Shaders/Include/GpuSceneTypes.h
  25. 1 0
      AnKi/Shaders/Include/MaterialTypes.h
  26. 5 0
      AnKi/Shaders/Include/MiscRendererTypes.h
  27. 9 9
      AnKi/Shaders/Include/UnifiedGeometryTypes.def.h
  28. 2 0
      AnKi/Shaders/Intellisense.hlsl
  29. 14 0
      AnKi/Shaders/RtMaterialFetch.hlsl
  30. 136 0
      AnKi/Shaders/RtMaterialFetchDbg.ankiprog
  31. 2 2
      AnKi/Shaders/RtShadows.ankiprog
  32. 1 6
      AnKi/Shaders/RtShadows.hlsl
  33. 1 1
      Samples/Common/SampleApp.cpp

+ 2 - 3
AnKi/Gr/Vulkan/VkAccelerationStructure.cpp

@@ -148,8 +148,7 @@ Error AccelerationStructureImpl::init(const AccelerationStructureInitInfo& inf)
 		else
 		{
 			// Instances buffer already created
-			ANKI_ASSERT(inf.m_topLevel.m_indirectArgs.m_instancesBuffer.getOffset()
-							+ sizeof(VkAccelerationStructureInstanceKHR) * inf.m_topLevel.m_indirectArgs.m_maxInstanceCount
+			ANKI_ASSERT(sizeof(VkAccelerationStructureInstanceKHR) * inf.m_topLevel.m_indirectArgs.m_maxInstanceCount
 						<= inf.m_topLevel.m_indirectArgs.m_instancesBuffer.getRange());
 			m_topLevelInfo.m_instancesBuffer.reset(&inf.m_topLevel.m_indirectArgs.m_instancesBuffer.getBuffer());
 
@@ -164,7 +163,7 @@ Error AccelerationStructureImpl::init(const AccelerationStructureInitInfo& inf)
 		geom.geometry.instances.data.deviceAddress = m_topLevelInfo.m_instancesBuffer->getGpuAddress();
 		if(isIndirect)
 		{
-			geom.geometry.instances.data.deviceAddress += inf.m_topLevel.m_indirectArgs.m_instancesBuffer.getRange();
+			geom.geometry.instances.data.deviceAddress += inf.m_topLevel.m_indirectArgs.m_instancesBuffer.getOffset();
 		}
 		geom.geometry.instances.arrayOfPointers = false;
 		geom.flags = VK_GEOMETRY_OPAQUE_BIT_KHR; // TODO

+ 2 - 1
AnKi/Gr/Vulkan/VkGrManager.cpp

@@ -1239,8 +1239,9 @@ Error GrManagerImpl::initDevice()
 		accelerationStructureFeatures.accelerationStructureHostCommands = false;
 		accelerationStructureFeatures.descriptorBindingAccelerationStructureUpdateAfterBind = false;
 
-		ANKI_ASSERT(accelerationStructureFeatures.pNext == nullptr);
 		appendPNextList(ci, &accelerationStructureFeatures);
+		appendPNextList(ci, &rayQueryFeatures);
+		appendPNextList(ci, &rtPipelineFeatures);
 
 		// Get some more stuff
 		VkPhysicalDeviceAccelerationStructurePropertiesKHR props = {};

+ 1 - 1
AnKi/Gr/Vulkan/VkShaderProgram.cpp

@@ -480,7 +480,7 @@ void ShaderProgramImpl::rewriteSpirv(ShaderReflectionDescriptorRelated& refl, Gr
 		visitSpirv(WeakArray<U32>(outSpv), [&](U32 cmd, WeakArray<U32> instructions) {
 			if(cmd == spv::OpDecorate && instructions[1] == spv::DecorationBinding
 			   && instructions[2] >= kDxcVkBindingShifts[0][HlslResourceType::kFirst]
-			   && instructions[2] < kDxcVkBindingShifts[kMaxRegisterSpaces - 1][HlslResourceType::kCount - 1])
+			   && instructions[2] < kDxcVkBindingShifts[kMaxRegisterSpaces - 1][HlslResourceType::kCount - 1] + 1000)
 			{
 				const U32 binding = instructions[2];
 

+ 1 - 1
AnKi/Renderer/AccelerationStructureBuilder.cpp

@@ -33,7 +33,7 @@ void AccelerationStructureBuilder::populateRenderGraph(RenderingContext& ctx)
 		getRenderer().getGpuVisibilityAccelerationStructures().pupulateRenderGraph(in, visOut);
 
 		m_runCtx.m_visibilityHandle = visOut.m_someBufferHandle;
-		m_runCtx.m_visibleRenderableIndicesBuff = visOut.m_renderableIndicesBuffer;
+		m_runCtx.m_visibleRenderablesBuff = visOut.m_renderablesBuffer;
 	}
 
 	// Create the TLAS

+ 3 - 3
AnKi/Renderer/AccelerationStructureBuilder.h

@@ -32,10 +32,10 @@ public:
 		return m_runCtx.m_tlasHandle;
 	}
 
-	void getVisibilityInfo(BufferHandle& handle, BufferView& buffer) const
+	void getVisibilityInfo(BufferHandle& handle, BufferView& visibleRenderables) const
 	{
 		handle = m_runCtx.m_visibilityHandle;
-		buffer = m_runCtx.m_visibleRenderableIndicesBuff;
+		visibleRenderables = m_runCtx.m_visibleRenderablesBuff;
 	}
 
 public:
@@ -46,7 +46,7 @@ public:
 		AccelerationStructureHandle m_tlasHandle;
 
 		BufferHandle m_visibilityHandle;
-		BufferView m_visibleRenderableIndicesBuff;
+		BufferView m_visibleRenderablesBuff;
 	} m_runCtx;
 };
 /// @}

+ 6 - 1
AnKi/Renderer/Renderer.cpp

@@ -46,6 +46,7 @@
 #include <AnKi/Renderer/Ssr.h>
 #include <AnKi/Renderer/Sky.h>
 #include <AnKi/Renderer/MotionBlur.h>
+#include <AnKi/Renderer/RtMaterialFetchDbg.h>
 #include <AnKi/Renderer/Utils/Drawer.h>
 #include <AnKi/Renderer/Utils/GpuVisibility.h>
 #include <AnKi/Renderer/Utils/MipmapGenerator.h>
@@ -279,10 +280,14 @@ Error Renderer::populateRenderGraph(RenderingContext& ctx)
 	m_gbufferPost->populateRenderGraph(ctx);
 	m_depthDownscale->populateRenderGraph(ctx);
 	m_ssr->populateRenderGraph(ctx);
-	if(m_rtShadows)
+	if(g_rayTracingCVar && m_rtShadows)
 	{
 		m_rtShadows->populateRenderGraph(ctx);
 	}
+	if(g_rayTracingCVar && m_rtMaterialFetchDbg)
+	{
+		m_rtMaterialFetchDbg->populateRenderGraph(ctx);
+	}
 	m_shadowmapsResolve->populateRenderGraph(ctx);
 	m_volumetricFog->populateRenderGraph(ctx);
 	m_lensFlare->populateRenderGraph(ctx);

+ 0 - 1
AnKi/Renderer/Renderer.h

@@ -27,7 +27,6 @@ inline BoolCVar g_preferComputeCVar("R", "PreferCompute", !ANKI_PLATFORM_MOBILE,
 inline BoolCVar g_highQualityHdrCVar("R", "HighQualityHdr", !ANKI_PLATFORM_MOBILE,
 									 "If true use R16G16B16 for HDR images. Alternatively use B10G11R11");
 inline BoolCVar g_vrsLimitTo2x2CVar("R", "VrsLimitTo2x2", false, "If true the max rate will be 2x2");
-inline BoolCVar g_rayTracedShadowsCVar("R", "RayTracedShadows", true, "Enable or not ray traced shadows. Ignored if RT is not supported");
 inline NumericCVar<U8> g_shadowCascadeCountCVar("R", "ShadowCascadeCount", (ANKI_PLATFORM_MOBILE) ? 3 : kMaxShadowCascades, 1, kMaxShadowCascades,
 												"Max number of shadow cascades for directional lights");
 inline NumericCVar<F32> g_shadowCascade0DistanceCVar("R", "ShadowCascade0Distance", 18.0, 1.0, kMaxF32, "The distance of the 1st cascade");

+ 4 - 1
AnKi/Renderer/RendererObject.def.h

@@ -25,7 +25,8 @@ ANKI_RENDERER_OBJECT_DEF(IndirectDiffuseProbes, indirectDiffuseProbes, 1)
 ANKI_RENDERER_OBJECT_DEF(ShadowmapsResolve, shadowmapsResolve, 1)
 ANKI_RENDERER_OBJECT_DEF(RtShadows, rtShadows, GrManager::getSingleton().getDeviceCapabilities().m_rayTracingEnabled&& g_rayTracedShadowsCVar)
 ANKI_RENDERER_OBJECT_DEF(AccelerationStructureBuilder, accelerationStructureBuilder,
-						 GrManager::getSingleton().getDeviceCapabilities().m_rayTracingEnabled&& g_rayTracedShadowsCVar)
+						 GrManager::getSingleton().getDeviceCapabilities().m_rayTracingEnabled
+							 && (g_rayTracedShadowsCVar || g_rtMaterialFetchDbgCVar))
 ANKI_RENDERER_OBJECT_DEF(MotionVectors, motionVectors, 1)
 ANKI_RENDERER_OBJECT_DEF(TemporalUpscaler, temporalUpscaler, 1)
 ANKI_RENDERER_OBJECT_DEF(VrsSriGeneration, vrsSriGeneration, 1)
@@ -35,6 +36,8 @@ ANKI_RENDERER_OBJECT_DEF(Ssao, ssao, 1)
 ANKI_RENDERER_OBJECT_DEF(Ssr, ssr, 1)
 ANKI_RENDERER_OBJECT_DEF(Sky, sky, 1)
 ANKI_RENDERER_OBJECT_DEF(MotionBlur, motionBlur, 1)
+ANKI_RENDERER_OBJECT_DEF(RtMaterialFetchDbg, rtMaterialFetchDbg,
+						 GrManager::getSingleton().getDeviceCapabilities().m_rayTracingEnabled&& g_rtMaterialFetchDbgCVar)
 
 // Util objects
 ANKI_RENDERER_OBJECT_DEF(RenderableDrawer, drawer, 1)

+ 168 - 0
AnKi/Renderer/RtMaterialFetchDbg.cpp

@@ -0,0 +1,168 @@
+// Copyright (C) 2009-present, Panagiotis Christopoulos Charitos and contributors.
+// All rights reserved.
+// Code licensed under the BSD License.
+// http://www.anki3d.org/LICENSE
+
+#include <AnKi/Renderer/RtMaterialFetchDbg.h>
+#include <AnKi/Renderer/Renderer.h>
+#include <AnKi/Renderer/AccelerationStructureBuilder.h>
+#include <AnKi/Renderer/GBuffer.h>
+#include <AnKi/Core/GpuMemory/GpuVisibleTransientMemoryPool.h>
+#include <AnKi/Core/GpuMemory/UnifiedGeometryBuffer.h>
+#include <AnKi/Util/Tracer.h>
+#include <AnKi/Shaders/Include/MaterialTypes.h>
+
+namespace anki {
+
+Error RtMaterialFetchDbg::init()
+{
+	ANKI_CHECK(loadShaderProgram("ShaderBinaries/RtMaterialFetchDbg.ankiprogbin", {}, m_prog, m_sbtBuildSetupGrProg, "SbtBuildSetup"));
+	ANKI_CHECK(loadShaderProgram("ShaderBinaries/RtMaterialFetchDbg.ankiprogbin", {}, m_prog, m_sbtBuildGrProg, "SbtBuild"));
+
+	// Ray gen and miss
+	{
+		ShaderProgramResourceVariantInitInfo variantInitInfo(m_prog);
+		variantInitInfo.requestTechniqueAndTypes(ShaderTypeBit::kRayGen, "RtMaterialFetch");
+		const ShaderProgramResourceVariant* variant;
+		m_prog->getOrCreateVariant(variantInitInfo, variant);
+		m_libraryGrProg.reset(&variant->getProgram());
+		m_rayGenShaderGroupIdx = variant->getShaderGroupHandleIndex();
+
+		ShaderProgramResourceVariantInitInfo variantInitInfo2(m_prog);
+		variantInitInfo2.requestTechniqueAndTypes(ShaderTypeBit::kMiss, "RtMaterialFetch");
+		m_prog->getOrCreateVariant(variantInitInfo2, variant);
+		m_missShaderGroupIdx = variant->getShaderGroupHandleIndex();
+	}
+
+	m_sbtRecordSize = getAlignedRoundUp(GrManager::getSingleton().getDeviceCapabilities().m_sbtRecordAlignment,
+										GrManager::getSingleton().getDeviceCapabilities().m_shaderGroupHandleSize + U32(sizeof(UVec4)));
+
+	m_rtDesc = getRenderer().create2DRenderTargetDescription(getRenderer().getInternalResolution().x(), getRenderer().getInternalResolution().y(),
+															 Format::kR8G8B8A8_Unorm, "RtMaterialFetch");
+	m_rtDesc.bake();
+
+	return Error::kNone;
+}
+
+void RtMaterialFetchDbg::populateRenderGraph(RenderingContext& ctx)
+{
+	RenderGraphBuilder& rgraph = ctx.m_renderGraphDescr;
+
+	// SBT build setup
+	BufferHandle sbtBuildIndirectArgsHandle;
+	BufferView sbtBuildIndirectArgsBuffer;
+	{
+		sbtBuildIndirectArgsBuffer = GpuVisibleTransientMemoryPool::getSingleton().allocateStructuredBuffer<DispatchIndirectArgs>(1);
+		sbtBuildIndirectArgsHandle = rgraph.importBuffer(sbtBuildIndirectArgsBuffer, BufferUsageBit::kUavCompute);
+
+		NonGraphicsRenderPass& rpass = rgraph.newNonGraphicsRenderPass("RtMaterialFetch setup build SBT");
+
+		rpass.newBufferDependency(sbtBuildIndirectArgsHandle, BufferUsageBit::kAccelerationStructureBuild);
+
+		rpass.setWork([this, sbtBuildIndirectArgsBuffer](RenderPassWorkContext& rgraphCtx) {
+			ANKI_TRACE_SCOPED_EVENT(RtMaterialFetch);
+			CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
+
+			cmdb.bindShaderProgram(m_sbtBuildSetupGrProg.get());
+
+			cmdb.bindSrv(0, 0, GpuSceneArrays::RenderableBoundingVolumeRt::getSingleton().getBufferView());
+			cmdb.bindUav(0, 0, sbtBuildIndirectArgsBuffer);
+
+			cmdb.dispatchCompute(1, 1, 1);
+		});
+	}
+
+	// SBT build
+	BufferHandle sbtHandle;
+	BufferView sbtBuffer;
+	{
+		// Allocate SBT
+		U32 sbtAlignment = (GrManager::getSingleton().getDeviceCapabilities().m_structuredBufferNaturalAlignment)
+							   ? sizeof(U32)
+							   : GrManager::getSingleton().getDeviceCapabilities().m_structuredBufferBindOffsetAlignment;
+		sbtAlignment = computeCompoundAlignment(sbtAlignment, GrManager::getSingleton().getDeviceCapabilities().m_sbtRecordAlignment);
+		U8* sbtMem;
+		sbtBuffer = RebarTransientMemoryPool::getSingleton().allocate(
+			(GpuSceneArrays::RenderableBoundingVolumeRt::getSingleton().getElementCount() + 2) * m_sbtRecordSize, sbtAlignment, sbtMem);
+		sbtHandle = rgraph.importBuffer(sbtBuffer, BufferUsageBit::kUavCompute);
+
+		// Write the first 2 entries of the SBT
+		ConstWeakArray<U8> shaderGroupHandles = m_libraryGrProg->getShaderGroupHandles();
+		const U32 shaderHandleSize = GrManager::getSingleton().getDeviceCapabilities().m_shaderGroupHandleSize;
+		memcpy(sbtMem, &shaderGroupHandles[m_rayGenShaderGroupIdx * shaderHandleSize], shaderHandleSize);
+		memcpy(sbtMem + m_sbtRecordSize, &shaderGroupHandles[m_missShaderGroupIdx * shaderHandleSize], shaderHandleSize);
+
+		// Create the pass
+		NonGraphicsRenderPass& rpass = rgraph.newNonGraphicsRenderPass("RtMaterialFetch build SBT");
+
+		BufferHandle visibilityHandle;
+		BufferView visibleRenderableIndicesBuff;
+		getRenderer().getAccelerationStructureBuilder().getVisibilityInfo(visibilityHandle, visibleRenderableIndicesBuff);
+
+		rpass.newBufferDependency(visibilityHandle, BufferUsageBit::kSrvCompute);
+		rpass.newBufferDependency(sbtBuildIndirectArgsHandle, BufferUsageBit::kIndirectCompute);
+
+		rpass.setWork([this, sbtBuildIndirectArgsBuffer, sbtBuffer, visibleRenderableIndicesBuff](RenderPassWorkContext& rgraphCtx) {
+			ANKI_TRACE_SCOPED_EVENT(RtShadows);
+			CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
+
+			cmdb.bindShaderProgram(m_sbtBuildGrProg.get());
+
+			cmdb.bindSrv(0, 0, GpuSceneArrays::Renderable::getSingleton().getBufferView());
+			cmdb.bindSrv(1, 0, visibleRenderableIndicesBuff);
+			cmdb.bindSrv(2, 0, BufferView(&m_libraryGrProg->getShaderGroupHandlesGpuBuffer()));
+			cmdb.bindUav(0, 0, sbtBuffer);
+
+			RtShadowsSbtBuildConstants consts = {};
+			ANKI_ASSERT(m_sbtRecordSize % 4 == 0);
+			consts.m_sbtRecordDwordSize = m_sbtRecordSize / 4;
+			const U32 shaderHandleSize = GrManager::getSingleton().getDeviceCapabilities().m_shaderGroupHandleSize;
+			ANKI_ASSERT(shaderHandleSize % 4 == 0);
+			consts.m_shaderHandleDwordSize = shaderHandleSize / 4;
+			cmdb.setFastConstants(&consts, sizeof(consts));
+
+			cmdb.dispatchComputeIndirect(sbtBuildIndirectArgsBuffer);
+		});
+	}
+
+	// Ray gen
+	{
+		m_runCtx.m_rt = rgraph.newRenderTarget(m_rtDesc);
+
+		NonGraphicsRenderPass& rpass = rgraph.newNonGraphicsRenderPass("RtMaterialFetch");
+
+		rpass.newBufferDependency(sbtHandle, BufferUsageBit::kShaderBindingTable);
+		rpass.newTextureDependency(m_runCtx.m_rt, TextureUsageBit::kUavTraceRays);
+		rpass.newAccelerationStructureDependency(getRenderer().getAccelerationStructureBuilder().getAccelerationStructureHandle(),
+												 AccelerationStructureUsageBit::kTraceRaysSrv);
+
+		rpass.setWork([this, sbtBuffer, &ctx](RenderPassWorkContext& rgraphCtx) {
+			ANKI_TRACE_SCOPED_EVENT(RtShadows);
+			CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
+
+			cmdb.bindShaderProgram(m_libraryGrProg.get());
+
+			// More globals
+			cmdb.bindSampler(ANKI_MATERIAL_REGISTER_TILINEAR_REPEAT_SAMPLER, 0, getRenderer().getSamplers().m_trilinearRepeat.get());
+			cmdb.bindSrv(ANKI_MATERIAL_REGISTER_GPU_SCENE, 0, GpuSceneBuffer::getSingleton().getBufferView());
+			cmdb.bindSrv(ANKI_MATERIAL_REGISTER_MESH_LODS, 0, GpuSceneArrays::MeshLod::getSingleton().getBufferView());
+
+#define ANKI_UNIFIED_GEOM_FORMAT(fmt, shaderType, reg) \
+	cmdb.bindSrv( \
+		reg, 0, \
+		BufferView(&UnifiedGeometryBuffer::getSingleton().getBuffer(), 0, \
+				   getAlignedRoundDown(getFormatInfo(Format::k##fmt).m_texelSize, UnifiedGeometryBuffer::getSingleton().getBuffer().getSize())), \
+		Format::k##fmt);
+#include <AnKi/Shaders/Include/UnifiedGeometryTypes.def.h>
+
+			cmdb.bindConstantBuffer(0, 2, ctx.m_globalRenderingConstantsBuffer);
+			rgraphCtx.bindSrv(0, 2, getRenderer().getAccelerationStructureBuilder().getAccelerationStructureHandle());
+			rgraphCtx.bindUav(0, 2, m_runCtx.m_rt);
+
+			cmdb.traceRays(sbtBuffer, m_sbtRecordSize, GpuSceneArrays::RenderableBoundingVolumeRt::getSingleton().getElementCount(), 1,
+						   getRenderer().getInternalResolution().x(), getRenderer().getInternalResolution().y(), 1);
+		});
+	}
+}
+
+} // end namespace anki

+ 56 - 0
AnKi/Renderer/RtMaterialFetchDbg.h

@@ -0,0 +1,56 @@
+// Copyright (C) 2009-present, Panagiotis Christopoulos Charitos and contributors.
+// All rights reserved.
+// Code licensed under the BSD License.
+// http://www.anki3d.org/LICENSE
+
+#pragma once
+
+#include <AnKi/Renderer/RendererObject.h>
+
+namespace anki {
+
+/// @addtogroup renderer
+/// @{
+
+inline BoolCVar g_rtMaterialFetchDbgCVar("R", "RtMaterialFetchDbg", false, "Enable material debugging pass");
+
+/// Similar to ShadowmapsResolve but it's using ray tracing.
+class RtMaterialFetchDbg : public RendererObject
+{
+public:
+	RtMaterialFetchDbg()
+	{
+		registerDebugRenderTarget("RtMaterialFetchDbg");
+	}
+
+	Error init();
+
+	void populateRenderGraph(RenderingContext& ctx);
+
+	void getDebugRenderTarget([[maybe_unused]] CString rtName, Array<RenderTargetHandle, kMaxDebugRenderTargets>& handles,
+							  [[maybe_unused]] ShaderProgramPtr& optionalShaderProgram) const override
+	{
+		handles[0] = m_runCtx.m_rt;
+	}
+
+public:
+	ShaderProgramResourcePtr m_prog;
+	ShaderProgramPtr m_sbtBuildSetupGrProg;
+	ShaderProgramPtr m_sbtBuildGrProg;
+	ShaderProgramPtr m_libraryGrProg;
+
+	RenderTargetDesc m_rtDesc;
+
+	U32 m_sbtRecordSize = 0;
+	U32 m_rayGenShaderGroupIdx = 0;
+	U32 m_missShaderGroupIdx = 0;
+
+	class
+	{
+	public:
+		RenderTargetHandle m_rt;
+	} m_runCtx;
+};
+/// @}
+
+} // namespace anki

+ 1 - 0
AnKi/Renderer/RtShadows.h

@@ -18,6 +18,7 @@ namespace anki {
 inline BoolCVar g_rtShadowsSvgfCVar("R", "RtShadowsSvgf", false, "Enable or not RT shadows SVGF");
 inline NumericCVar<U8> g_rtShadowsSvgfAtrousPassCountCVar("R", "RtShadowsSvgfAtrousPassCount", 3, 1, 20, "Number of atrous passes of SVGF");
 inline NumericCVar<U32> g_rtShadowsRaysPerPixelCVar("R", "RtShadowsRaysPerPixel", 1, 1, 8, "Number of shadow rays per pixel");
+inline BoolCVar g_rayTracedShadowsCVar("R", "RayTracedShadows", false, "Enable or not ray traced shadows. Ignored if RT is not supported");
 
 /// Similar to ShadowmapsResolve but it's using ray tracing.
 class RtShadows : public RendererObject

+ 5 - 5
AnKi/Renderer/Utils/GpuVisibility.cpp

@@ -1111,7 +1111,7 @@ void GpuVisibilityAccelerationStructures::pupulateRenderGraph(GpuVisibilityAccel
 	out.m_instancesBuffer = allocateStructuredBuffer<AccelerationStructureInstance>(aabbCount);
 	out.m_someBufferHandle = rgraph.importBuffer(out.m_instancesBuffer, BufferUsageBit::kUavCompute);
 
-	out.m_renderableIndicesBuffer = allocateStructuredBuffer<U32>(aabbCount + 1);
+	out.m_renderablesBuffer = allocateStructuredBuffer<LodAndRenderableIndex>(aabbCount + 1);
 
 	const BufferView zeroInstancesDispatchArgsBuff = allocateStructuredBuffer<DispatchIndirectArgs>(1);
 
@@ -1123,7 +1123,7 @@ void GpuVisibilityAccelerationStructures::pupulateRenderGraph(GpuVisibilityAccel
 		pass.newBufferDependency(out.m_someBufferHandle, BufferUsageBit::kUavCompute);
 
 		pass.setWork([this, viewProjMat = in.m_viewProjectionMatrix, lodDistances = in.m_lodDistances, pointOfTest = in.m_pointOfTest,
-					  testRadius = in.m_testRadius, instancesBuff = out.m_instancesBuffer, indicesBuff = out.m_renderableIndicesBuffer,
+					  testRadius = in.m_testRadius, instancesBuff = out.m_instancesBuffer, visRenderablesBuff = out.m_renderablesBuffer,
 					  zeroInstancesDispatchArgsBuff](RenderPassWorkContext& rgraph) {
 			CommandBuffer& cmdb = *rgraph.m_commandBuffer;
 
@@ -1153,7 +1153,7 @@ void GpuVisibilityAccelerationStructures::pupulateRenderGraph(GpuVisibilityAccel
 			cmdb.bindSrv(2, 0, GpuSceneArrays::MeshLod::getSingleton().getBufferView());
 			cmdb.bindSrv(3, 0, GpuSceneArrays::Transform::getSingleton().getBufferView());
 			cmdb.bindUav(0, 0, instancesBuff);
-			cmdb.bindUav(1, 0, indicesBuff);
+			cmdb.bindUav(1, 0, visRenderablesBuff);
 			cmdb.bindUav(2, 0, BufferView(m_counterBuffer.get(), 0, sizeof(U32) * 2));
 			cmdb.bindUav(3, 0, zeroInstancesDispatchArgsBuff);
 
@@ -1170,12 +1170,12 @@ void GpuVisibilityAccelerationStructures::pupulateRenderGraph(GpuVisibilityAccel
 		pass.newBufferDependency(out.m_someBufferHandle, BufferUsageBit::kUavCompute);
 
 		pass.setWork([this, zeroInstancesDispatchArgsBuff, instancesBuff = out.m_instancesBuffer,
-					  indicesBuff = out.m_renderableIndicesBuffer](RenderPassWorkContext& rgraph) {
+					  visRenderablesBuff = out.m_renderablesBuffer](RenderPassWorkContext& rgraph) {
 			CommandBuffer& cmdb = *rgraph.m_commandBuffer;
 
 			cmdb.bindShaderProgram(m_zeroRemainingInstancesGrProg.get());
 
-			cmdb.bindSrv(0, 0, indicesBuff);
+			cmdb.bindSrv(0, 0, visRenderablesBuff);
 			cmdb.bindUav(0, 0, instancesBuff);
 
 			cmdb.dispatchComputeIndirect(zeroInstancesDispatchArgsBuff);

+ 1 - 1
AnKi/Renderer/Utils/GpuVisibility.h

@@ -291,7 +291,7 @@ public:
 	BufferHandle m_someBufferHandle; ///< Some handle to track dependencies. No need to track every buffer.
 
 	BufferView m_instancesBuffer; ///< Points to AccelerationStructureBuildRangeInfo::m_primitiveCount number of AccelerationStructureInstance.
-	BufferView m_renderableIndicesBuffer; ///< AccelerationStructureBuildRangeInfo::m_primitiveCount number of indices to renderables.
+	BufferView m_renderablesBuffer; ///< AccelerationStructureBuildRangeInfo::m_primitiveCount + 1 number of indices to renderables.
 };
 
 /// Performs visibility to gather bottom-level acceleration structures in a buffer that can be used to build a TLAS.

+ 10 - 2
AnKi/Resource/MaterialResource.cpp

@@ -13,8 +13,6 @@ namespace anki {
 
 inline constexpr Array<CString, U32(BuiltinMutatorId::kCount)> kBuiltinMutatorNames = {{"NONE", "ANKI_BONES", "ANKI_VELOCITY"}};
 
-inline constexpr Array<CString, U(RenderingTechnique::kCount)> kTechniqueNames = {{"GBuffer", "Depth", "Forward", "RtShadow"}};
-
 // This is some trickery to select calling between XmlElement::getAttributeNumber and XmlElement::getAttributeNumbers
 namespace {
 
@@ -210,6 +208,13 @@ Error MaterialResource::parseShaderProgram(XmlElement shaderProgramEl, Bool asyn
 			m_techniquesMask |= RenderingTechniqueBit::kForward;
 			m_shaderTechniques |= ShaderTechniqueBit::kLegacy;
 		}
+		else if(t.m_name.getBegin() == CString("RtMaterialFetch"))
+		{
+			if(GrManager::getSingleton().getDeviceCapabilities().m_rayTracingEnabled)
+			{
+				m_techniquesMask |= RenderingTechniqueBit::kRtMaterialFetch;
+			}
+		}
 		else
 		{
 			ANKI_RESOURCE_LOGE("Found unneeded technique in the shader: %s", t.m_name.getBegin());
@@ -605,6 +610,9 @@ const MaterialVariant& MaterialResource::getOrCreateVariant(const RenderingKey&
 	case RenderingTechnique::kRtShadow:
 		initInfo.requestTechniqueAndTypes(ShaderTypeBit::kAllHit, "RtShadows");
 		break;
+	case RenderingTechnique::kRtMaterialFetch:
+		initInfo.requestTechniqueAndTypes(ShaderTypeBit::kAllHit, "RtMaterialFetch");
+		break;
 	default:
 		ANKI_ASSERT(0);
 	}

+ 7 - 2
AnKi/Resource/RenderingKey.h

@@ -17,9 +17,13 @@ enum class RenderingTechnique : U8
 	kDepth = 1,
 	kForward = 2,
 	kRtShadow = 3,
+	kRtMaterialFetch = 4,
 
 	kCount,
-	kFirst = 0
+	kFirst = 0,
+	kFirstRt = kRtShadow,
+	kLastRt = kRtMaterialFetch,
+	kRtCount = kLastRt - kFirstRt + 1
 };
 ANKI_ENUM_ALLOW_NUMERIC_OPERATIONS(RenderingTechnique)
 
@@ -30,8 +34,9 @@ enum class RenderingTechniqueBit : U8
 	kDepth = 1 << 1,
 	kForward = 1 << 2,
 	kRtShadow = 1 << 3,
+	kRtMaterialFetch = 1 << 4,
 
-	kAllRt = kRtShadow,
+	kAllRt = kRtShadow | kRtMaterialFetch,
 	kAllRaster = kGBuffer | kDepth | kForward
 };
 ANKI_ENUM_ALLOW_NUMERIC_OPERATIONS(RenderingTechniqueBit)

+ 4 - 1
AnKi/Resource/ShaderProgramResourceSystem.cpp

@@ -63,6 +63,7 @@ public:
 			ShaderInitInfo inf(progName);
 			inf.m_shaderType = shaderType;
 			inf.m_binary = codeBlock.m_binary;
+			inf.m_reflection = codeBlock.m_reflection;
 			shader->m_shader = GrManager::getSingleton().newShader(inf);
 			shader->m_hash = codeBlock.m_hash;
 
@@ -338,7 +339,9 @@ Error ShaderProgramResourceSystem::createRayTracingPrograms(ResourceDynamicArray
 			ShaderProgramRaytracingLibrary& outLib = outLibs[libIdx];
 			const Lib& inLib = libs[libIdx];
 
-			if(inLib.m_presentStages != (ShaderTypeBit::kRayGen | ShaderTypeBit::kMiss | ShaderTypeBit::kClosestHit | ShaderTypeBit::kAnyHit))
+			const ShaderTypeBit requiredShaders = ShaderTypeBit::kRayGen | ShaderTypeBit::kMiss;
+			if((inLib.m_presentStages & requiredShaders) != requiredShaders
+			   || !(inLib.m_presentStages & (ShaderTypeBit::kClosestHit | ShaderTypeBit::kAnyHit)))
 			{
 				ANKI_RESOURCE_LOGE("The libray is missing shader shader types: %s", inLib.m_name.cstr());
 				return Error::kUserData;

+ 7 - 0
AnKi/Scene/Components/ModelComponent.cpp

@@ -107,6 +107,7 @@ void ModelComponent::loadModelResource(CString filename)
 				out.m_gpuSceneRenderableAabbDepth.allocate();
 				break;
 			case RenderingTechnique::kRtShadow:
+			case RenderingTechnique::kRtMaterialFetch:
 				out.m_gpuSceneRenderableAabbRt.allocate();
 				break;
 			default:
@@ -216,6 +217,12 @@ Error ModelComponent::update(SceneComponentUpdateInfo& info, Bool& updated)
 				const MaterialVariant& variant = mtl.getOrCreateVariant(key);
 				gpuRenderable.m_rtShadowsShaderHandleIndex = variant.getRtShaderGroupHandleIndex();
 			}
+			if(!!(mtl.getRenderingTechniques() & RenderingTechniqueBit::kRtMaterialFetch))
+			{
+				const RenderingKey key(RenderingTechnique::kRtMaterialFetch, 0, false, false, false);
+				const MaterialVariant& variant = mtl.getOrCreateVariant(key);
+				gpuRenderable.m_rtMaterialFetchShaderHandleIndex = variant.getRtShaderGroupHandleIndex();
+			}
 			gpuRenderable.m_uuid = SceneGraph::getSingleton().getNewUuid();
 			m_patchInfos[i].m_gpuSceneRenderable.uploadToGpuScene(gpuRenderable);
 		}

+ 0 - 1
AnKi/ShaderCompiler/ShaderProgramBinary.xml

@@ -1 +0,0 @@
-

+ 0 - 1
AnKi/ShaderCompiler/ShaderProgramBinaryExtra.h

@@ -1 +0,0 @@
-

+ 5 - 0
AnKi/Shaders/Common.hlsl

@@ -86,6 +86,11 @@ U32 getMaxNumericLimit()
 constexpr F32 kPi = 3.14159265358979323846f;
 constexpr F32 kNaN = 0.0f / 0.0f;
 
+struct Barycentrics
+{
+	Vec2 m_value;
+};
+
 #if ANKI_GR_BACKEND_VULKAN
 #	define ANKI_FAST_CONSTANTS(type, var) [[vk::push_constant]] ConstantBuffer<type> var;
 #else

+ 65 - 11
AnKi/Shaders/GBufferGeneric.ankiprog

@@ -36,12 +36,15 @@
 #pragma anki technique RtShadows ahit mutators ALPHA_TEST DIFFUSE_TEX
 #pragma anki technique RtShadows chit mutators
 
+#pragma anki technique RtMaterialFetch chit mutators DIFFUSE_TEX
+
 #include <AnKi/Shaders/Include/MaterialTypes.h>
 #include <AnKi/Shaders/Include/GpuSceneFunctions.h>
 #include <AnKi/Shaders/PackFunctions.hlsl>
 #include <AnKi/Shaders/Functions.hlsl>
 #include <AnKi/Shaders/MaterialShadersCommon.hlsl>
 #include <AnKi/Shaders/RtShadows.hlsl>
+#include <AnKi/Shaders/RtMaterialFetch.hlsl>
 
 // Define a few things to avoid compilation errors
 #if ANKI_TECHNIQUE_RtShadows && ANKI_CLOSEST_HIT_SHADER
@@ -58,6 +61,13 @@
 #	define PARALLAX 0
 #endif
 
+#if ANKI_TECHNIQUE_RtMaterialFetch
+#	define ALPHA_TEST 0
+#	define ANKI_VELOCITY 0
+#	define ANKI_BONES 0
+#	define PARALLAX 0
+#endif
+
 #if ANKI_AMPLIFICATION_SHADER
 #	define ALPHA_TEST 0
 #	define DIFFUSE_TEX 0
@@ -617,18 +627,20 @@ GBufferPixelOut main(
 // ===========================================================================
 // RT Shadows                                                                =
 // ===========================================================================
-#if ANKI_ANY_HIT_SHADER
+#if ANKI_TECHNIQUE_RtShadows
+
+#	if ANKI_ANY_HIT_SHADER
 
-#	if REALLY_ALPHA_TEST
+#		if REALLY_ALPHA_TEST
 [[vk::shader_record_ext]] ConstantBuffer<GpuSceneRenderableInstance> g_gpuSceneRenderable : register(b0); // TODO that won't work on D3D
-#	endif
+#		endif
 
-[shader("anyhit")] void main(inout RayPayload payload, in Barycentrics barycentrics)
+[shader("anyhit")] void main(inout RtShadowsRayPayload payload, in Barycentrics barycentrics)
 {
 	ANKI_MAYBE_UNUSED(payload);
 	ANKI_MAYBE_UNUSED(barycentrics);
 
-#	if REALLY_ALPHA_TEST
+#		if REALLY_ALPHA_TEST
 	payload.m_shadowFactor = 1.0;
 
 	const Vec3 bary = Vec3(1.0f - barycentrics.m_value.x - barycentrics.m_value.y, barycentrics.m_value.x, barycentrics.m_value.y);
@@ -658,17 +670,59 @@ GBufferPixelOut main(
 	{
 		IgnoreHit();
 	}
-#	else
+#		else
 	payload.m_shadowFactor = 0.0;
 	AcceptHitAndEndSearch();
-#	endif
+#		endif
 }
-#endif // ANKI_ANY_HIT_SHADER
+#	endif // ANKI_ANY_HIT_SHADER
 
-#if ANKI_CLOSEST_HIT_SHADER
-[shader("closesthit")] void main(inout RayPayload payload, in Barycentrics barycentrics)
+#	if ANKI_CLOSEST_HIT_SHADER
+[shader("closesthit")] void main(inout RtShadowsRayPayload payload, in Barycentrics barycentrics)
 {
 	ANKI_MAYBE_UNUSED(payload);
 	ANKI_MAYBE_UNUSED(barycentrics);
 }
-#endif // ANKI_CLOSEST_HIT_SHADER
+#	endif // ANKI_CLOSEST_HIT_SHADER
+
+#endif // ANKI_TECHNIQUE_RtShadows
+
+// ===========================================================================
+// RT material fetch                                                         =
+// ===========================================================================
+#if ANKI_TECHNIQUE_RtMaterialFetch
+
+#	if ANKI_CLOSEST_HIT_SHADER
+[[vk::shader_record_ext]] ConstantBuffer<GpuSceneRenderableInstance> g_gpuSceneRenderable : register(b0); // TODO that won't work on D3D
+
+[shader("closesthit")] void main(inout RtMaterialFetchRayPayload payload : SV_RayPayload, in Barycentrics barycentrics : SV_IntersectionAttributes)
+{
+	const AnKiLocalConstants localConstants = loadAnKiLocalConstants(g_gpuScene, g_gpuSceneRenderable.m_constantsOffset);
+
+#		if DIFFUSE_TEX
+	const Vec3 bary = Vec3(1.0f - barycentrics.m_value.x - barycentrics.m_value.y, barycentrics.m_value.x, barycentrics.m_value.y);
+
+	const GpuSceneMeshLod mesh = g_meshLods[g_gpuSceneRenderable.m_meshLodIndex];
+
+	const U32 idx0 = g_unifiedGeom_R16_Uint[mesh.m_firstIndex + PrimitiveIndex() * 3 + 0];
+	const U32 idx1 = g_unifiedGeom_R16_Uint[mesh.m_firstIndex + PrimitiveIndex() * 3 + 1];
+	const U32 idx2 = g_unifiedGeom_R16_Uint[mesh.m_firstIndex + PrimitiveIndex() * 3 + 2];
+
+	const UnpackedMeshVertex vert0 = loadVertex(mesh, idx0, false);
+	const UnpackedMeshVertex vert1 = loadVertex(mesh, idx1, false);
+	const UnpackedMeshVertex vert2 = loadVertex(mesh, idx2, false);
+
+	const Vec2 uv = vert0.m_uv * bary.x + vert1.m_uv * bary.y + vert2.m_uv * bary.z;
+
+	RVec3 diffColor = getBindlessTexture2DRVec4(localConstants.m_diffuseTex).SampleLevel(g_globalSampler, uv, 0.0).xyz;
+#		else
+	RVec3 diffColor = 1.0;
+#		endif
+
+	diffColor *= localConstants.m_diffuseScale;
+
+	payload.m_diffuseColor = diffColor;
+}
+#	endif
+
+#endif // ANKI_TECHNIQUE_RtMaterialFetch

+ 45 - 42
AnKi/Shaders/GpuVisibilityAccelerationStructures.ankiprog

@@ -8,6 +8,7 @@
 #include <AnKi/Shaders/Common.hlsl>
 #include <AnKi/Shaders/Include/GpuSceneTypes.h>
 #include <AnKi/Shaders/Include/GpuVisibilityTypes.h>
+#include <AnKi/Shaders/Include/MiscRendererTypes.h>
 #include <AnKi/Shaders/VisibilityAndCollisionFunctions.hlsl>
 
 // Buffers that point to the GPU scene
@@ -17,7 +18,7 @@ StructuredBuffer<GpuSceneMeshLod> g_meshLods : register(t2);
 StructuredBuffer<Mat3x4> g_transforms : register(t3);
 
 RWStructuredBuffer<AccelerationStructureInstance> g_visibleInstances : register(u0);
-RWStructuredBuffer<U32> g_visibleRenderableIndices : register(u1); // 1st element is the count
+RWStructuredBuffer<LodAndRenderableIndex> g_visibleRenderables : register(u1); // 1st element is the count
 
 globallycoherent RWStructuredBuffer<U32> g_counterBuffer : register(u2); // 2 counters per dispatch
 
@@ -29,11 +30,11 @@ ANKI_FAST_CONSTANTS(GpuVisibilityAccelerationStructuresConstants, g_consts)
 
 [numthreads(NUMTHREADS, 1, 1)] void main(U32 svDispatchThreadId : SV_DISPATCHTHREADID, U32 svGroupIndex : SV_GROUPINDEX)
 {
+	const U32 maxVisibleInstances = getStructuredBufferElementCount(g_visibleInstances);
+
 	// Skip remaining threads
 	const U32 bvolumeIdx = svDispatchThreadId;
-	U32 bvolumeCount;
-	U32 unused;
-	g_renderableBoundingVolumes.GetDimensions(bvolumeCount, unused);
+	const U32 bvolumeCount = getStructuredBufferElementCount(g_renderableBoundingVolumes);
 	Bool visible = (bvolumeIdx < bvolumeCount);
 
 	// Sphere test
@@ -41,7 +42,7 @@ ANKI_FAST_CONSTANTS(GpuVisibilityAccelerationStructuresConstants, g_consts)
 	Vec3 sphereCenter;
 	if(visible)
 	{
-		bvolume = g_renderableBoundingVolumes[bvolumeIdx];
+		bvolume = SBUFF(g_renderableBoundingVolumes, bvolumeIdx);
 
 		sphereCenter = (bvolume.m_aabbMin + bvolume.m_aabbMax) * 0.5f;
 		visible = testSphereSphereCollision(sphereCenter, bvolume.m_sphereRadius, g_consts.m_pointOfTest, g_consts.m_testRadius);
@@ -77,16 +78,16 @@ ANKI_FAST_CONSTANTS(GpuVisibilityAccelerationStructuresConstants, g_consts)
 		}
 
 		const U32 renderableIdx = bvolume.m_renderableIndex_20bit_renderStateBucket_12bit >> 12u;
-		const GpuSceneRenderable renderable = g_renderables[renderableIdx];
+		const GpuSceneRenderable renderable = SBUFF(g_renderables, renderableIdx);
 
 		const U32 meshLodIndex = renderable.m_meshLodsIndex + lod;
-		const GpuSceneMeshLod meshLod = g_meshLods[meshLodIndex];
+		const GpuSceneMeshLod meshLod = SBUFF(g_meshLods, meshLodIndex);
 
 		if(meshLod.m_blasAddress.x != 0 || meshLod.m_blasAddress.y != 0)
 		{
 			// It has a BLAS, write what is to write
 
-			const Mat3x4 transform = g_transforms[renderable.m_worldTransformsIndex];
+			const Mat3x4 transform = SBUFF(g_transforms, renderable.m_worldTransformsIndex);
 			Mat3x4 meshQuantizationTransform;
 			meshQuantizationTransform.m_row0 = Vec4(meshLod.m_positionScale, 0.0f, 0.0f, meshLod.m_positionTranslation.x);
 			meshQuantizationTransform.m_row1 = Vec4(0.0f, meshLod.m_positionScale, 0.0f, meshLod.m_positionTranslation.y);
@@ -94,53 +95,55 @@ ANKI_FAST_CONSTANTS(GpuVisibilityAccelerationStructuresConstants, g_consts)
 			const Mat3x4 finalTrf = combineTransformations(transform, meshQuantizationTransform);
 
 			U32 instanceIdx;
-			InterlockedAdd(g_counterBuffer[0], 1, instanceIdx);
-
-			AccelerationStructureInstance instance;
-			instance.m_transform = finalTrf;
-			instance.m_mask8_instanceCustomIndex24 = (meshLod.m_tlasInstanceMask << 24u) | (instanceIdx & 0x00FFFFFFu);
-			instance.m_flags8_instanceShaderBindingTableRecordOffset24 =
-				((kAccellerationStructureFlagTriangleFrontCounterlockwise | kAccellerationStructureFlagTriangleFacingCullDisable)
-				 << (AccellerationStructureFlag)24u)
-				| (instanceIdx & 0x00FFFFFFu);
-			instance.m_accelerationStructureAddress = meshLod.m_blasAddress;
-			g_visibleInstances[instanceIdx] = instance;
-
-			g_visibleRenderableIndices[instanceIdx + 1] = renderableIdx;
+			InterlockedAdd(SBUFF(g_counterBuffer, 0), 1, instanceIdx);
+
+			if(instanceIdx < maxVisibleInstances)
+			{
+				AccelerationStructureInstance instance;
+				instance.m_transform = finalTrf;
+				instance.m_mask8_instanceCustomIndex24 = (meshLod.m_tlasInstanceMask << 24u) | (instanceIdx & 0x00FFFFFFu);
+				instance.m_flags8_instanceShaderBindingTableRecordOffset24 =
+					((kAccellerationStructureFlagTriangleFrontCounterlockwise | kAccellerationStructureFlagTriangleFacingCullDisable)
+					 << (AccellerationStructureFlag)24u)
+					| (instanceIdx & 0x00FFFFFFu);
+				instance.m_accelerationStructureAddress = meshLod.m_blasAddress;
+
+				SBUFF(g_visibleInstances, instanceIdx) = instance;
+
+				SBUFF(g_visibleRenderables, instanceIdx + 1).m_lod_2bit_renderableIndex_30bit = (lod << 30u) | renderableIdx;
+			}
 		}
 	}
 
 	// Store the counters to the actual buffers
 	{
-		Bool lastThreadgroupExecuting = false;
-		if(svGroupIndex == 0)
-		{
-			U32 threadgroupIdx;
-			InterlockedAdd(g_counterBuffer[1], 1, threadgroupIdx);
-			const U32 threadgroupCount = (bvolumeCount + NUMTHREADS - 1) / NUMTHREADS;
-			lastThreadgroupExecuting = (threadgroupIdx + 1 == threadgroupCount);
-		}
-
 		// Sync to make sure all the atomic ops have finished before the following code reads them
 		AllMemoryBarrierWithGroupSync();
 
-		if(lastThreadgroupExecuting)
+		if(svGroupIndex == 0)
 		{
-			const U32 visible = g_counterBuffer[0];
-			g_visibleRenderableIndices[0] = visible;
+			U32 threadgroupIdx;
+			InterlockedAdd(SBUFF(g_counterBuffer, 1), 1, threadgroupIdx);
+			const U32 threadgroupCount = (bvolumeCount + NUMTHREADS - 1) / NUMTHREADS;
+			const Bool lastThreadgroupExecuting = (threadgroupIdx + 1 == threadgroupCount);
 
-			g_counterBuffer[0] = 0;
-			g_counterBuffer[1] = 0;
+			if(lastThreadgroupExecuting)
+			{
+				const U32 visible = min(SBUFF(g_counterBuffer, 0), maxVisibleInstances);
+				SBUFF(g_visibleRenderables, 0).m_lod_2bit_renderableIndex_30bit = visible;
 
-			// Update indirect args of some next job
-			U32 total, unused;
-			g_visibleInstances.GetDimensions(total, unused);
+				SBUFF(g_counterBuffer, 0) = 0;
+				SBUFF(g_counterBuffer, 1) = 0;
 
-			const U32 remaining = total - visible;
+				// Update indirect args of some next job
+				const U32 remaining = maxVisibleInstances - visible;
 
-			g_nextDispatchIndirectArgs[0].m_threadGroupCountX = (remaining + NUMTHREADS - 1) / NUMTHREADS;
-			g_nextDispatchIndirectArgs[0].m_threadGroupCountY = 1;
-			g_nextDispatchIndirectArgs[0].m_threadGroupCountZ = 1;
+				DispatchIndirectArgs args;
+				args.m_threadGroupCountX = (remaining + NUMTHREADS - 1) / NUMTHREADS;
+				args.m_threadGroupCountY = 1;
+				args.m_threadGroupCountZ = 1;
+				SBUFF(g_nextDispatchIndirectArgs, 0) = args;
+			}
 		}
 	}
 }

+ 3 - 4
AnKi/Shaders/GpuVisibilityAccelerationStructuresZeroRemainingInstances.ankiprog

@@ -17,11 +17,10 @@ RWStructuredBuffer<AccelerationStructureInstance> g_instances : register(u0);
 {
 	const U32 visibleInstances = g_visibleRenderableIndices[0];
 
-	U32 totalInstances, unused;
-	g_instances.GetDimensions(totalInstances, unused);
+	const U32 maxInstances = getStructuredBufferElementCount(g_instances);
 
-	ANKI_ASSERT(totalInstances >= visibleInstances);
-	const U32 remainingInstances = totalInstances - visibleInstances;
+	ANKI_ASSERT(maxInstances >= visibleInstances);
+	const U32 remainingInstances = maxInstances - visibleInstances;
 
 	if(svDispatchThreadId < remainingInstances)
 	{

+ 1 - 0
AnKi/Shaders/Include/GpuSceneTypes.h

@@ -25,6 +25,7 @@ struct GpuSceneRenderable
 	U32 m_boneTransformsOffset; ///< Array of Mat3x4 or 0 if its not a skin.
 	U32 m_particleEmitterIndex; ///< Index to the GpuSceneParticleEmitter array or kMaxU32 if it's not an emitter.
 	U32 m_rtShadowsShaderHandleIndex; ///< The index of the shader handle in the array of library's handles.
+	U32 m_rtMaterialFetchShaderHandleIndex; ///< The index of the shader handle in the array of library's handles.
 	U32 m_uuid;
 };
 

+ 1 - 0
AnKi/Shaders/Include/MaterialTypes.h

@@ -47,6 +47,7 @@ static_assert(sizeof(MaterialGlobalConstants) == 15 * sizeof(Vec4));
 #define ANKI_MATERIAL_REGISTER_CLUSTERS 14
 
 // Always last because it's variable. Texture buffer bindings pointing to unified geom buffer:
+// !!WARNING!! Remember to update the UnifiedGeometryTypes.def.h if you change that one
 #define ANKI_MATERIAL_REGISTER_UNIFIED_GEOMETRY_START 15
 
 ANKI_END_NAMESPACE

+ 5 - 0
AnKi/Shaders/Include/MiscRendererTypes.h

@@ -201,4 +201,9 @@ struct SsaoSpatialDenoiseConstants
 	Vec2 m_padding;
 };
 
+struct LodAndRenderableIndex
+{
+	U32 m_lod_2bit_renderableIndex_30bit;
+};
+
 ANKI_END_NAMESPACE

+ 9 - 9
AnKi/Shaders/Include/UnifiedGeometryTypes.def.h

@@ -9,23 +9,23 @@
 #	define ANKI_UNIFIED_GEOM_FORMAT_SEPERATOR
 #endif
 
-ANKI_UNIFIED_GEOM_FORMAT(R32_Sfloat, F32, 14)
+ANKI_UNIFIED_GEOM_FORMAT(R32_Sfloat, F32, 15)
 ANKI_UNIFIED_GEOM_FORMAT_SEPERATOR
-ANKI_UNIFIED_GEOM_FORMAT(R32G32_Sfloat, Vec2, 15)
+ANKI_UNIFIED_GEOM_FORMAT(R32G32_Sfloat, Vec2, 16)
 ANKI_UNIFIED_GEOM_FORMAT_SEPERATOR
-ANKI_UNIFIED_GEOM_FORMAT(R32G32B32_Sfloat, Vec3, 16)
+ANKI_UNIFIED_GEOM_FORMAT(R32G32B32_Sfloat, Vec3, 17)
 ANKI_UNIFIED_GEOM_FORMAT_SEPERATOR
-ANKI_UNIFIED_GEOM_FORMAT(R32G32B32A32_Sfloat, Vec4, 17)
+ANKI_UNIFIED_GEOM_FORMAT(R32G32B32A32_Sfloat, Vec4, 18)
 ANKI_UNIFIED_GEOM_FORMAT_SEPERATOR
-ANKI_UNIFIED_GEOM_FORMAT(R16G16B16A16_Unorm, Vec4, 18)
+ANKI_UNIFIED_GEOM_FORMAT(R16G16B16A16_Unorm, Vec4, 19)
 ANKI_UNIFIED_GEOM_FORMAT_SEPERATOR
-ANKI_UNIFIED_GEOM_FORMAT(R8G8B8A8_Snorm, Vec4, 19)
+ANKI_UNIFIED_GEOM_FORMAT(R8G8B8A8_Snorm, Vec4, 20)
 ANKI_UNIFIED_GEOM_FORMAT_SEPERATOR
-ANKI_UNIFIED_GEOM_FORMAT(R8G8B8A8_Uint, UVec4, 20)
+ANKI_UNIFIED_GEOM_FORMAT(R8G8B8A8_Uint, UVec4, 21)
 ANKI_UNIFIED_GEOM_FORMAT_SEPERATOR
-ANKI_UNIFIED_GEOM_FORMAT(R16_Uint, U32, 21)
+ANKI_UNIFIED_GEOM_FORMAT(R16_Uint, U32, 22)
 ANKI_UNIFIED_GEOM_FORMAT_SEPERATOR
-ANKI_UNIFIED_GEOM_FORMAT(R8_Uint, U32, 22)
+ANKI_UNIFIED_GEOM_FORMAT(R8_Uint, U32, 23)
 
 #undef ANKI_UNIFIED_GEOM_FORMAT
 #undef ANKI_UNIFIED_GEOM_FORMAT_SEPERATOR

+ 2 - 0
AnKi/Shaders/Intellisense.hlsl

@@ -35,7 +35,9 @@
 #define ANKI_PIXEL_SHADER 1
 #define ANKI_MESH_SHADER 1
 #define ANKI_COMPUTE_SHADER 1
+#define ANKI_MISS_SHADER 1
 #define ANKI_CLOSEST_HIT_SHADER 1
+#define ANKI_RAY_GEN_SHADER 1
 
 using I8 = int;
 using I16 = int;

+ 14 - 0
AnKi/Shaders/RtMaterialFetch.hlsl

@@ -0,0 +1,14 @@
+// Copyright (C) 2009-present, Panagiotis Christopoulos Charitos and contributors.
+// All rights reserved.
+// Code licensed under the BSD License.
+// http://www.anki3d.org/LICENSE
+
+#pragma once
+
+#include <AnKi/Shaders/Include/MiscRendererTypes.h>
+#include <AnKi/Shaders/Common.hlsl>
+
+struct [raypayload] RtMaterialFetchRayPayload
+{
+	Vec3 m_diffuseColor : write(caller, closesthit, miss): read(caller);
+};

+ 136 - 0
AnKi/Shaders/RtMaterialFetchDbg.ankiprog

@@ -0,0 +1,136 @@
+// Copyright (C) 2009-present, Panagiotis Christopoulos Charitos and contributors.
+// All rights reserved.
+// Code licensed under the BSD License.
+// http://www.anki3d.org/LICENSE
+
+#pragma anki technique RtMaterialFetch rgen miss
+#pragma anki technique SbtBuild comp
+#pragma anki technique SbtBuildSetup comp
+
+#include <AnKi/Shaders/RtMaterialFetch.hlsl>
+#include <AnKi/Shaders/Include/GpuSceneTypes.h>
+
+// ===========================================================================
+// RayGen                                                                    =
+// ===========================================================================
+#if ANKI_RAY_GEN_SHADER
+
+#	define SPACE space2
+
+ConstantBuffer<GlobalRendererConstants> g_globalRendererConstants : register(b0, SPACE);
+
+RaytracingAccelerationStructure g_tlas : register(t0, SPACE);
+
+RWTexture2D<Vec4> g_outTex : register(u0, SPACE);
+
+[shader("raygeneration")] void main()
+{
+	Vec2 outSize;
+	g_outTex.GetDimensions(outSize.x, outSize.y);
+	const Vec2 uv = Vec2(DispatchRaysIndex().xy) / outSize;
+
+	const Vec4 v4 = mul(g_globalRendererConstants.m_matrices.m_invertedViewProjectionJitter, Vec4(uvToNdc(uv), 1.0, 1.0));
+	const Vec3 worldPos = v4.xyz / v4.w;
+
+	const Vec3 rayOrigin = g_globalRendererConstants.m_matrices.m_cameraTransform.getTranslationPart().xyz;
+	const Vec3 rayDir = normalize(worldPos - rayOrigin);
+
+	// Trace
+	RtMaterialFetchRayPayload payload;
+	payload.m_diffuseColor = 0.0;
+	const U32 flags = RAY_FLAG_FORCE_OPAQUE;
+	const U32 sbtRecordOffset = 0u;
+	const U32 sbtRecordStride = 0u;
+	const U32 missIndex = 0u;
+	const U32 cullMask = 0xFFu;
+	RayDesc ray;
+	ray.Origin = rayOrigin;
+	ray.TMin = 0.1;
+	ray.Direction = rayDir;
+	ray.TMax = 100.0; // TODO
+	TraceRay(g_tlas, flags, cullMask, sbtRecordOffset, sbtRecordStride, missIndex, ray, payload);
+
+	g_outTex[DispatchRaysIndex().xy] = Vec4(payload.m_diffuseColor, 0.0);
+}
+#endif // ANKI_RAY_GEN_SHADER
+
+// ===========================================================================
+// Miss                                                                      =
+// ===========================================================================
+#if ANKI_MISS_SHADER
+[shader("miss")] void main(inout RtMaterialFetchRayPayload payload)
+{
+	payload.m_diffuseColor = Vec3(0.0, 0.0, 0.5);
+}
+#endif // ANKI_MISS_SHADER
+
+// ===========================================================================
+// SbtBuildSetup                                                             =
+// ===========================================================================
+#if ANKI_COMPUTE_SHADER && ANKI_TECHNIQUE_SbtBuildSetup
+StructuredBuffer<LodAndRenderableIndex> g_visibleRenderables : register(t0); // 1st element is the count
+RWStructuredBuffer<DispatchIndirectArgs> g_args : register(u0);
+
+#	define NUMTHREADS 64
+
+[numthreads(1, 1, 1)] void main()
+{
+	const U32 renderableCount = SBUFF(g_visibleRenderables, 0).m_lod_2bit_renderableIndex_30bit;
+
+	DispatchIndirectArgs args;
+	args.m_threadGroupCountX = (renderableCount + NUMTHREADS - 1) / NUMTHREADS;
+	args.m_threadGroupCountY = 1;
+	args.m_threadGroupCountZ = 1;
+
+	g_args[0] = args;
+}
+#endif // ANKI_COMPUTE_SHADER && ANKI_TECHNIQUE_SbtBuildSetup
+
+// ===========================================================================
+// SbtBuild                                                                  =
+// ===========================================================================
+#if ANKI_COMPUTE_SHADER && ANKI_TECHNIQUE_SbtBuild
+
+StructuredBuffer<GpuSceneRenderable> g_renderables : register(t0);
+
+StructuredBuffer<LodAndRenderableIndex> g_visibleRenderables : register(t1); // 1st element is the count
+
+StructuredBuffer<U32> g_shaderHandles : register(t2);
+
+RWStructuredBuffer<U32> g_sbtBuffer : register(u0);
+
+ANKI_FAST_CONSTANTS(RtShadowsSbtBuildConstants, g_consts)
+
+#	define NUMTHREADS 64
+
+[numthreads(NUMTHREADS, 1, 1)] void main(U32 svDispatchThreadId : SV_DISPATCHTHREADID)
+{
+	const U32 renderableCount = SBUFF(g_visibleRenderables, 0).m_lod_2bit_renderableIndex_30bit;
+	if(svDispatchThreadId >= renderableCount)
+	{
+		return;
+	}
+
+	const U32 lodAndRenderableIdx = SBUFF(g_visibleRenderables, svDispatchThreadId + 1).m_lod_2bit_renderableIndex_30bit;
+	const U32 renderableIdx = lodAndRenderableIdx & ((1u << 30) - 1u);
+	const U32 lod = lodAndRenderableIdx >> 30u;
+	const GpuSceneRenderable renderable = SBUFF(g_renderables, renderableIdx);
+
+	U32 sbtDwordOffset = g_consts.m_sbtRecordDwordSize * 2; // Skip raygen and miss shaders which are first
+	sbtDwordOffset += g_consts.m_sbtRecordDwordSize * svDispatchThreadId;
+
+	// Copy the handle
+	for(U32 i = 0; i < g_consts.m_shaderHandleDwordSize; ++i)
+	{
+		const U32 offset = renderable.m_rtMaterialFetchShaderHandleIndex * g_consts.m_shaderHandleDwordSize + i;
+		SBUFF(g_sbtBuffer, sbtDwordOffset) = SBUFF(g_shaderHandles, offset);
+		++sbtDwordOffset;
+	}
+
+	// Copy the GpuSceneRenderableInstance
+	g_sbtBuffer[sbtDwordOffset++] = renderable.m_worldTransformsIndex;
+	g_sbtBuffer[sbtDwordOffset++] = renderable.m_constantsOffset;
+	g_sbtBuffer[sbtDwordOffset++] = renderable.m_meshLodsIndex + lod;
+	g_sbtBuffer[sbtDwordOffset] = 0;
+}
+#endif // ANKI_COMPUTE_SHADER && ANKI_TECHNIQUE_SbtBuild

+ 2 - 2
AnKi/Shaders/RtShadows.ankiprog

@@ -45,7 +45,7 @@ F32 trace(const Vec3 rayOrigin, const Vec3 rayDir, F32 tMax)
 	ray.Direction = rayDir;
 	ray.TMax = tMax;
 
-	RayPayload payload;
+	RtShadowsRayPayload payload;
 	payload.m_shadowFactor = 0.0;
 	TraceRay(g_tlas, flags, cullMask, sbtRecordOffset, sbtRecordStride, missIndex, ray, payload);
 
@@ -134,7 +134,7 @@ Vec3 genRandomDirection(U32 rayIdx, Vec2 uv)
 #endif // ANKI_RAY_GEN_SHADER
 
 #if ANKI_MISS_SHADER
-[shader("miss")] void main(inout RayPayload payload)
+[shader("miss")] void main(inout RtShadowsRayPayload payload)
 {
 	payload.m_shadowFactor = 1.0;
 }

+ 1 - 6
AnKi/Shaders/RtShadows.hlsl

@@ -10,12 +10,7 @@
 
 constexpr F32 kRtShadowsMaxHistoryLength = 16.0; // The frames of history
 
-struct [raypayload] RayPayload
+struct [raypayload] RtShadowsRayPayload
 {
 	F32 m_shadowFactor : write(caller, anyhit, miss): read(caller);
 };
-
-struct Barycentrics
-{
-	Vec2 m_value;
-};

+ 1 - 1
Samples/Common/SampleApp.cpp

@@ -69,7 +69,7 @@ Error SampleApp::userMainLoop(Bool& quit, Second elapsedTime)
 
 	if(in.getKey(KeyCode::kU) == 1)
 	{
-		renderer.setCurrentDebugRenderTarget((renderer.getCurrentDebugRenderTarget() == "Ssr") ? "" : "Ssr");
+		renderer.setCurrentDebugRenderTarget((renderer.getCurrentDebugRenderTarget() == "RtMaterialFetchDbg") ? "" : "RtMaterialFetchDbg");
 	}
 
 	if(in.getKey(KeyCode::kI) == 1)