Browse Source

Add a cheap inline RT path to both IDC and reflections

Panagiotis Christopoulos Charitos 2 months ago
parent
commit
b909755a2b

+ 7 - 7
AnKi/Math/Functions.h

@@ -290,23 +290,23 @@ TVec<T, 3> sphericalToCartesian(T polar, T azimuth)
 }
 }
 
 
 template<typename T>
 template<typename T>
-inline [[nodiscard]] U32 packUnorm4x8(TVec<T, 4> value)
+inline U32 packUnorm4x8(TVec<T, 4> value)
 {
 {
 	ANKI_ASSERT((value <= TVec<T, 4>(T(1)) && value >= TVec<T, 4>(T(0))));
 	ANKI_ASSERT((value <= TVec<T, 4>(T(1)) && value >= TVec<T, 4>(T(0))));
-	const TVec<U32, 4> packed(value * T(255));
-	return packed.x() | (packed.y() << 8u) | (packed.z() << 16u) | (packed.w() << 24u);
+	const TVec<T, 4> packed(value * T(255));
+	return packed.x() | (U32(packed.y()) << 8u) | (U32(packed.z()) << 16u) | (U32(packed.w()) << 24u);
 }
 }
 
 
 // Reverse of packUnorm4x8
 // Reverse of packUnorm4x8
 template<typename T>
 template<typename T>
-inline [[nodiscard]] TVec<T, 4> unpackUnorm4x8(const U32 value)
+inline TVec<T, 4> unpackUnorm4x8(const U32 value)
 {
 {
-	const TVec<U32, 4> packed(value & 0xFF, (value >> 8u) & 0xFF, (value >> 16u) & 0xff, value >> 24u);
-	return TVec<T, 4>(packed) / T(255);
+	const TVec<T, 4> packed(value & 0xFF, (value >> 8u) & 0xFF, (value >> 16u) & 0xff, value >> 24u);
+	return packed / T(255);
 }
 }
 
 
 template<typename TVec4>
 template<typename TVec4>
-inline [[nodiscard]] U32 packSnorm4x8(const TVec4& v)
+inline U32 packSnorm4x8(const TVec4& v)
 {
 {
 	union
 	union
 	{
 	{

+ 1 - 1
AnKi/Math/Quat.h

@@ -71,7 +71,7 @@ public:
 			y() = (m(0, 2) - m(2, 0)) / S;
 			y() = (m(0, 2) - m(2, 0)) / S;
 			z() = (m(1, 0) - m(0, 1)) / S;
 			z() = (m(1, 0) - m(0, 1)) / S;
 		}
 		}
-		else if((m(0, 0) > m(1, 1)) & (m(0, 0) > m(2, 2)))
+		else if(m(0, 0) > m(1, 1) && m(0, 0) > m(2, 2))
 		{
 		{
 			const T S = sqrt<T>(T(1) + m(0, 0) - m(1, 1) - m(2, 2)) * T(2); // S=4*qx
 			const T S = sqrt<T>(T(1) + m(0, 0) - m(1, 1) - m(2, 2)) * T(2); // S=4*qx
 			w() = (m(2, 1) - m(1, 2)) / S;
 			w() = (m(2, 1) - m(1, 2)) / S;

+ 40 - 21
AnKi/Renderer/IndirectDiffuseClipmaps.cpp

@@ -268,21 +268,20 @@ Error IndirectDiffuseClipmaps::init()
 											 {"RT_MATERIAL_FETCH_CLIPMAP", 0},
 											 {"RT_MATERIAL_FETCH_CLIPMAP", 0},
 											 {"SPATIAL_RECONSTRUCT_TYPE", !g_cvarRenderIdcApplyHighQuality}}};
 											 {"SPATIAL_RECONSTRUCT_TYPE", !g_cvarRenderIdcApplyHighQuality}}};
 
 
-	ANKI_CHECK(loadShaderProgram("ShaderBinaries/IndirectDiffuseClipmaps.ankiprogbin", mutation, m_prog, m_applyGiGrProg, "Apply"));
-	ANKI_CHECK(loadShaderProgram("ShaderBinaries/IndirectDiffuseClipmaps.ankiprogbin", mutation, m_prog, m_visProbesGrProg, "VisualizeProbes"));
-	ANKI_CHECK(loadShaderProgram("ShaderBinaries/IndirectDiffuseClipmaps.ankiprogbin", mutation, m_prog, m_populateCachesGrProg, "PopulateCaches"));
-	ANKI_CHECK(
-		loadShaderProgram("ShaderBinaries/IndirectDiffuseClipmaps.ankiprogbin", mutation, m_prog, m_computeIrradianceGrProg, "ComputeIrradiance"));
-	ANKI_CHECK(loadShaderProgram("ShaderBinaries/IndirectDiffuseClipmaps.ankiprogbin", mutation, m_prog, m_temporalDenoiseGrProg, "TemporalDenoise"));
-	ANKI_CHECK(
-		loadShaderProgram("ShaderBinaries/IndirectDiffuseClipmaps.ankiprogbin", mutation, m_prog, m_spatialReconstructGrProg, "SpatialReconstruct"));
-	ANKI_CHECK(
-		loadShaderProgram("ShaderBinaries/IndirectDiffuseClipmaps.ankiprogbin", mutation, m_prog, m_bilateralDenoiseGrProg, "BilateralDenoise"));
+	constexpr CString kProgFname = "ShaderBinaries/IndirectDiffuseClipmaps.ankiprogbin";
+	ANKI_CHECK(loadShaderProgram(kProgFname, mutation, m_prog, m_applyGiGrProg, "Apply"));
+	ANKI_CHECK(loadShaderProgram(kProgFname, mutation, m_prog, m_visProbesGrProg, "VisualizeProbes"));
+	ANKI_CHECK(loadShaderProgram(kProgFname, mutation, m_prog, m_populateCachesGrProg, "PopulateCaches"));
+	ANKI_CHECK(loadShaderProgram(kProgFname, mutation, m_prog, m_computeIrradianceGrProg, "ComputeIrradiance"));
+	ANKI_CHECK(loadShaderProgram(kProgFname, mutation, m_prog, m_temporalDenoiseGrProg, "TemporalDenoise"));
+	ANKI_CHECK(loadShaderProgram(kProgFname, mutation, m_prog, m_spatialReconstructGrProg, "SpatialReconstruct"));
+	ANKI_CHECK(loadShaderProgram(kProgFname, mutation, m_prog, m_bilateralDenoiseGrProg, "BilateralDenoise"));
+	ANKI_CHECK(loadShaderProgram(kProgFname, mutation, m_prog, m_rtMaterialFetchInlineRtGrProg, "RtMaterialFetchInlineRt"));
 
 
 	for(MutatorValue rtMaterialFetchClipmap = 0; rtMaterialFetchClipmap < 2; ++rtMaterialFetchClipmap)
 	for(MutatorValue rtMaterialFetchClipmap = 0; rtMaterialFetchClipmap < 2; ++rtMaterialFetchClipmap)
 	{
 	{
 		ShaderProgramResourcePtr tmpProg;
 		ShaderProgramResourcePtr tmpProg;
-		ANKI_CHECK(ResourceManager::getSingleton().loadResource("ShaderBinaries/IndirectDiffuseClipmaps.ankiprogbin", tmpProg));
+		ANKI_CHECK(ResourceManager::getSingleton().loadResource(kProgFname, tmpProg));
 		ANKI_ASSERT(tmpProg == m_prog);
 		ANKI_ASSERT(tmpProg == m_prog);
 
 
 		ShaderProgramResourceVariantInitInfo variantInitInfo(m_prog);
 		ShaderProgramResourceVariantInitInfo variantInitInfo(m_prog);
@@ -400,8 +399,11 @@ void IndirectDiffuseClipmaps::populateRenderGraph(RenderingContext& ctx)
 	// SBT build
 	// SBT build
 	BufferHandle sbtHandle;
 	BufferHandle sbtHandle;
 	BufferView sbtBuffer;
 	BufferView sbtBuffer;
-	buildShaderBindingTablePass("IndirectDiffuseClipmaps: Build SBT", m_rtLibraryGrProg.get(), m_rayGenShaderGroupIndices[1], m_missShaderGroupIdx,
-								m_sbtRecordSize, rgraph, sbtHandle, sbtBuffer);
+	if(!g_cvarRenderIdcInlineRt)
+	{
+		buildShaderBindingTablePass("IndirectDiffuseClipmaps: Build SBT", m_rtLibraryGrProg.get(), m_rayGenShaderGroupIndices[1],
+									m_missShaderGroupIdx, m_sbtRecordSize, rgraph, sbtHandle, sbtBuffer);
+	}
 
 
 	for(U32 clipmap = 0; clipmap < kIndirectDiffuseClipmapCount; ++clipmap)
 	for(U32 clipmap = 0; clipmap < kIndirectDiffuseClipmapCount; ++clipmap)
 	{
 	{
@@ -455,9 +457,12 @@ void IndirectDiffuseClipmaps::populateRenderGraph(RenderingContext& ctx)
 		{
 		{
 			NonGraphicsRenderPass& pass = rgraph.newNonGraphicsRenderPass(generateTempPassName("IndirectDiffuseClipmaps: RT (clipmap %u)", clipmap));
 			NonGraphicsRenderPass& pass = rgraph.newNonGraphicsRenderPass(generateTempPassName("IndirectDiffuseClipmaps: RT (clipmap %u)", clipmap));
 
 
-			pass.newTextureDependency(rtResultHandle, TextureUsageBit::kUavCompute);
-			pass.newBufferDependency(sbtHandle, BufferUsageBit::kShaderBindingTable);
-			setRgenSpace2Dependencies(pass);
+			pass.newTextureDependency(rtResultHandle, (g_cvarRenderIdcInlineRt) ? TextureUsageBit::kUavCompute : TextureUsageBit::kUavDispatchRays);
+			if(!g_cvarRenderIdcInlineRt)
+			{
+				pass.newBufferDependency(sbtHandle, BufferUsageBit::kShaderBindingTable);
+			}
+			setRgenSpace2Dependencies(pass, g_cvarRenderIdcInlineRt);
 
 
 			for(U32 clipmap = 0; clipmap < kIndirectDiffuseClipmapCount; ++clipmap)
 			for(U32 clipmap = 0; clipmap < kIndirectDiffuseClipmapCount; ++clipmap)
 			{
 			{
@@ -468,7 +473,7 @@ void IndirectDiffuseClipmaps::populateRenderGraph(RenderingContext& ctx)
 						  partialUpdateProbeCount](RenderPassWorkContext& rgraphCtx) {
 						  partialUpdateProbeCount](RenderPassWorkContext& rgraphCtx) {
 				CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
 				CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
 
 
-				cmdb.bindShaderProgram(m_rtLibraryGrProg.get());
+				cmdb.bindShaderProgram((g_cvarRenderIdcInlineRt) ? m_rtMaterialFetchInlineRtGrProg.get() : m_rtLibraryGrProg.get());
 
 
 				// More globals
 				// More globals
 				cmdb.bindSampler(ANKI_MATERIAL_REGISTER_TILINEAR_REPEAT_SAMPLER, 0, getRenderer().getSamplers().m_trilinearRepeat.get());
 				cmdb.bindSampler(ANKI_MATERIAL_REGISTER_TILINEAR_REPEAT_SAMPLER, 0, getRenderer().getSamplers().m_trilinearRepeat.get());
@@ -509,8 +514,15 @@ void IndirectDiffuseClipmaps::populateRenderGraph(RenderingContext& ctx)
 
 
 					const U32 threadCount =
 					const U32 threadCount =
 						consts.m_clipmapRegion.m_probeCount * square<U32>(g_cvarRenderIdcRadianceOctMapSize) * consts.m_rayCountPerTexel;
 						consts.m_clipmapRegion.m_probeCount * square<U32>(g_cvarRenderIdcRadianceOctMapSize) * consts.m_rayCountPerTexel;
-					cmdb.dispatchRays(sbtBuffer, m_sbtRecordSize, GpuSceneArrays::RenderableBoundingVolumeRt::getSingleton().getElementCount(), 1,
-									  threadCount, 1, 1);
+					if(g_cvarRenderIdcInlineRt)
+					{
+						cmdb.dispatchCompute((threadCount + 64 - 1) / 64, 1, 1);
+					}
+					else
+					{
+						cmdb.dispatchRays(sbtBuffer, m_sbtRecordSize, GpuSceneArrays::RenderableBoundingVolumeRt::getSingleton().getElementCount(), 1,
+										  threadCount, 1, 1);
+					}
 
 
 					cmdb.popDebugMarker();
 					cmdb.popDebugMarker();
 				}
 				}
@@ -531,8 +543,15 @@ void IndirectDiffuseClipmaps::populateRenderGraph(RenderingContext& ctx)
 					cmdb.setFastConstants(&consts, sizeof(consts));
 					cmdb.setFastConstants(&consts, sizeof(consts));
 
 
 					const U32 threadCount = partialUpdateProbeCount * square<U32>(g_cvarRenderIdcRadianceOctMapSize);
 					const U32 threadCount = partialUpdateProbeCount * square<U32>(g_cvarRenderIdcRadianceOctMapSize);
-					cmdb.dispatchRays(sbtBuffer, m_sbtRecordSize, GpuSceneArrays::RenderableBoundingVolumeRt::getSingleton().getElementCount(), 1,
-									  threadCount, 1, 1);
+					if(g_cvarRenderIdcInlineRt)
+					{
+						cmdb.dispatchCompute((threadCount + 64 - 1) / 64, 1, 1);
+					}
+					else
+					{
+						cmdb.dispatchRays(sbtBuffer, m_sbtRecordSize, GpuSceneArrays::RenderableBoundingVolumeRt::getSingleton().getElementCount(), 1,
+										  threadCount, 1, 1);
+					}
 
 
 					cmdb.popDebugMarker();
 					cmdb.popDebugMarker();
 				}
 				}

+ 2 - 0
AnKi/Renderer/IndirectDiffuseClipmaps.h

@@ -15,6 +15,7 @@ namespace anki {
 /// @{
 /// @{
 
 
 ANKI_CVAR(BoolCVar, Render, Idc, false, "Enable ray traced indirect diffuse clipmaps")
 ANKI_CVAR(BoolCVar, Render, Idc, false, "Enable ray traced indirect diffuse clipmaps")
+ANKI_CVAR2(BoolCVar, Render, Idc, InlineRt, false, "Use a cheap and less accurate path with inline RT");
 
 
 constexpr U32 kDefaultClipmapProbeCountXZ = 32;
 constexpr U32 kDefaultClipmapProbeCountXZ = 32;
 constexpr U32 kDefaultClipmapProbeCountY = 12;
 constexpr U32 kDefaultClipmapProbeCountY = 12;
@@ -132,6 +133,7 @@ private:
 	ShaderProgramResourcePtr m_prog;
 	ShaderProgramResourcePtr m_prog;
 	ShaderProgramResourcePtr m_missProg;
 	ShaderProgramResourcePtr m_missProg;
 	ShaderProgramPtr m_rtLibraryGrProg;
 	ShaderProgramPtr m_rtLibraryGrProg;
+	ShaderProgramPtr m_rtMaterialFetchInlineRtGrProg;
 	ShaderProgramPtr m_populateCachesGrProg;
 	ShaderProgramPtr m_populateCachesGrProg;
 	ShaderProgramPtr m_computeIrradianceGrProg;
 	ShaderProgramPtr m_computeIrradianceGrProg;
 	ShaderProgramPtr m_applyGiGrProg;
 	ShaderProgramPtr m_applyGiGrProg;

+ 2 - 3
AnKi/Renderer/ProbeReflections.cpp

@@ -85,8 +85,7 @@ void ProbeReflections::populateRenderGraph(RenderingContext& rctx)
 	}
 	}
 
 
 	// Iterate the visible probes to find a candidate for update
 	// Iterate the visible probes to find a candidate for update
-	WeakArray<ReflectionProbeComponent*> visibleProbes =
-		getRenderer().getPrimaryNonRenderableVisibility().getInterestingVisibleComponents().m_reflectionProbes;
+	WeakArray<ReflectionProbeComponent*> visibleProbes = getPrimaryNonRenderableVisibility().getInterestingVisibleComponents().m_reflectionProbes;
 	ReflectionProbeComponent* probeToRefresh = nullptr;
 	ReflectionProbeComponent* probeToRefresh = nullptr;
 	for(ReflectionProbeComponent* probe : visibleProbes)
 	for(ReflectionProbeComponent* probe : visibleProbes)
 	{
 	{
@@ -98,7 +97,7 @@ void ProbeReflections::populateRenderGraph(RenderingContext& rctx)
 	}
 	}
 
 
 	if(probeToRefresh == nullptr || AsyncLoader::getSingleton().getTasksInFlightCount() != 0
 	if(probeToRefresh == nullptr || AsyncLoader::getSingleton().getTasksInFlightCount() != 0
-	   || getRenderer().getIndirectDiffuseProbes().hasCurrentlyRefreshedVolumeRt()) [[likely]]
+	   || (isIndirectDiffuseProbesEnabled() && getIndirectDiffuseProbes().hasCurrentlyRefreshedVolumeRt())) [[likely]]
 	{
 	{
 		// Nothing to update or can't update right now, early exit
 		// Nothing to update or can't update right now, early exit
 		m_runCtx = {};
 		m_runCtx = {};

+ 36 - 21
AnKi/Renderer/Reflections.cpp

@@ -33,11 +33,11 @@ Error Reflections::init()
 
 
 	std::initializer_list<SubMutation> mutation = {{"SSR_SAMPLE_GBUFFER", bSsrSamplesGBuffer},
 	std::initializer_list<SubMutation> mutation = {{"SSR_SAMPLE_GBUFFER", bSsrSamplesGBuffer},
 												   {"INDIRECT_DIFFUSE_CLIPMAPS", isIndirectDiffuseClipmapsEnabled()}};
 												   {"INDIRECT_DIFFUSE_CLIPMAPS", isIndirectDiffuseClipmapsEnabled()}};
-
+	constexpr CString kProgFname = "ShaderBinaries/Reflections.ankiprogbin";
 	// Ray gen and miss
 	// Ray gen and miss
 	if(bRtReflections)
 	if(bRtReflections)
 	{
 	{
-		ANKI_CHECK(ResourceManager::getSingleton().loadResource("ShaderBinaries/Reflections.ankiprogbin", m_mainProg));
+		ANKI_CHECK(ResourceManager::getSingleton().loadResource(kProgFname, m_mainProg));
 
 
 		ShaderProgramResourceVariantInitInfo variantInitInfo(m_mainProg);
 		ShaderProgramResourceVariantInitInfo variantInitInfo(m_mainProg);
 		variantInitInfo.requestTechniqueAndTypes(ShaderTypeBit::kRayGen, "RtMaterialFetch");
 		variantInitInfo.requestTechniqueAndTypes(ShaderTypeBit::kRayGen, "RtMaterialFetch");
@@ -59,15 +59,14 @@ Error Reflections::init()
 											GrManager::getSingleton().getDeviceCapabilities().m_shaderGroupHandleSize + U32(sizeof(UVec4)));
 											GrManager::getSingleton().getDeviceCapabilities().m_shaderGroupHandleSize + U32(sizeof(UVec4)));
 	}
 	}
 
 
-	ANKI_CHECK(loadShaderProgram("ShaderBinaries/Reflections.ankiprogbin", mutation, m_mainProg, m_spatialDenoisingGrProg, "SpatialDenoise"));
-	ANKI_CHECK(loadShaderProgram("ShaderBinaries/Reflections.ankiprogbin", mutation, m_mainProg, m_temporalDenoisingGrProg, "TemporalDenoise"));
-	ANKI_CHECK(loadShaderProgram("ShaderBinaries/Reflections.ankiprogbin", mutation, m_mainProg, m_verticalBilateralDenoisingGrProg,
-								 "BilateralDenoiseVertical"));
-	ANKI_CHECK(loadShaderProgram("ShaderBinaries/Reflections.ankiprogbin", mutation, m_mainProg, m_horizontalBilateralDenoisingGrProg,
-								 "BilateralDenoiseHorizontal"));
-	ANKI_CHECK(loadShaderProgram("ShaderBinaries/Reflections.ankiprogbin", mutation, m_mainProg, m_ssrGrProg, "Ssr"));
-	ANKI_CHECK(loadShaderProgram("ShaderBinaries/Reflections.ankiprogbin", mutation, m_mainProg, m_probeFallbackGrProg, "ReflectionProbeFallback"));
-	ANKI_CHECK(loadShaderProgram("ShaderBinaries/Reflections.ankiprogbin", mutation, m_mainProg, m_tileClassificationGrProg, "Classification"));
+	ANKI_CHECK(loadShaderProgram(kProgFname, mutation, m_mainProg, m_spatialDenoisingGrProg, "SpatialDenoise"));
+	ANKI_CHECK(loadShaderProgram(kProgFname, mutation, m_mainProg, m_temporalDenoisingGrProg, "TemporalDenoise"));
+	ANKI_CHECK(loadShaderProgram(kProgFname, mutation, m_mainProg, m_verticalBilateralDenoisingGrProg, "BilateralDenoiseVertical"));
+	ANKI_CHECK(loadShaderProgram(kProgFname, mutation, m_mainProg, m_horizontalBilateralDenoisingGrProg, "BilateralDenoiseHorizontal"));
+	ANKI_CHECK(loadShaderProgram(kProgFname, mutation, m_mainProg, m_ssrGrProg, "Ssr"));
+	ANKI_CHECK(loadShaderProgram(kProgFname, mutation, m_mainProg, m_probeFallbackGrProg, "ReflectionProbeFallback"));
+	ANKI_CHECK(loadShaderProgram(kProgFname, mutation, m_mainProg, m_tileClassificationGrProg, "Classification"));
+	ANKI_CHECK(loadShaderProgram(kProgFname, mutation, m_mainProg, m_rtMaterialFetchInlineRtGrProg, "RtMaterialFetchInlineRt"));
 
 
 	m_transientRtDesc1 = getRenderer().create2DRenderTargetDescription(
 	m_transientRtDesc1 = getRenderer().create2DRenderTargetDescription(
 		getRenderer().getInternalResolution().x(), getRenderer().getInternalResolution().y(), Format::kR16G16B16A16_Sfloat, "Reflections #1");
 		getRenderer().getInternalResolution().x(), getRenderer().getInternalResolution().y(), Format::kR16G16B16A16_Sfloat, "Reflections #1");
@@ -260,7 +259,7 @@ void Reflections::populateRenderGraph(RenderingContext& ctx)
 	// SBT build
 	// SBT build
 	BufferHandle sbtHandle;
 	BufferHandle sbtHandle;
 	BufferView sbtBuffer;
 	BufferView sbtBuffer;
-	if(bRtReflections)
+	if(bRtReflections && !g_cvarRenderReflectionsInlineRt)
 	{
 	{
 		buildShaderBindingTablePass("RtReflections: Build SBT", m_libraryGrProg.get(), m_rayGenShaderGroupIdx, m_missShaderGroupIdx, m_sbtRecordSize,
 		buildShaderBindingTablePass("RtReflections: Build SBT", m_libraryGrProg.get(), m_rayGenShaderGroupIdx, m_missShaderGroupIdx, m_sbtRecordSize,
 									rgraph, sbtHandle, sbtBuffer);
 									rgraph, sbtHandle, sbtBuffer);
@@ -271,22 +270,30 @@ void Reflections::populateRenderGraph(RenderingContext& ctx)
 	{
 	{
 		NonGraphicsRenderPass& rpass = rgraph.newNonGraphicsRenderPass("RtReflections");
 		NonGraphicsRenderPass& rpass = rgraph.newNonGraphicsRenderPass("RtReflections");
 
 
-		rpass.newBufferDependency(sbtHandle, BufferUsageBit::kShaderBindingTable);
-		rpass.newTextureDependency(transientRt1, TextureUsageBit::kUavDispatchRays);
-		rpass.newTextureDependency(hitPosAndDepthRt, TextureUsageBit::kUavDispatchRays);
-		rpass.newBufferDependency(indirectArgsHandle, BufferUsageBit::kIndirectDispatchRays);
-		setRgenSpace2Dependencies(rpass);
+		const TextureUsageBit uavTexUsage = (g_cvarRenderReflectionsInlineRt) ? TextureUsageBit::kUavCompute : TextureUsageBit::kUavDispatchRays;
+		const BufferUsageBit indirectBuffUsage =
+			(g_cvarRenderReflectionsInlineRt) ? BufferUsageBit::kIndirectCompute : BufferUsageBit::kIndirectDispatchRays;
+		const TextureUsageBit srvTexUsage = (g_cvarRenderReflectionsInlineRt) ? TextureUsageBit::kSrvCompute : TextureUsageBit::kSrvDispatchRays;
+
+		if(!g_cvarRenderReflectionsInlineRt)
+		{
+			rpass.newBufferDependency(sbtHandle, BufferUsageBit::kShaderBindingTable);
+		}
+		rpass.newTextureDependency(transientRt1, uavTexUsage);
+		rpass.newTextureDependency(hitPosAndDepthRt, uavTexUsage);
+		rpass.newBufferDependency(indirectArgsHandle, indirectBuffUsage);
+		setRgenSpace2Dependencies(rpass, g_cvarRenderReflectionsInlineRt);
 
 
 		if(isIndirectDiffuseClipmapsEnabled())
 		if(isIndirectDiffuseClipmapsEnabled())
 		{
 		{
-			getIndirectDiffuseClipmaps().setDependencies(rpass, TextureUsageBit::kSrvDispatchRays);
+			getIndirectDiffuseClipmaps().setDependencies(rpass, srvTexUsage);
 		}
 		}
 
 
 		rpass.setWork([this, sbtBuffer, &ctx, transientRt1, hitPosAndDepthRt, pixelsFailedSsrBuff](RenderPassWorkContext& rgraphCtx) {
 		rpass.setWork([this, sbtBuffer, &ctx, transientRt1, hitPosAndDepthRt, pixelsFailedSsrBuff](RenderPassWorkContext& rgraphCtx) {
 			ANKI_TRACE_SCOPED_EVENT(ReflectionsRayGen);
 			ANKI_TRACE_SCOPED_EVENT(ReflectionsRayGen);
 			CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
 			CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
 
 
-			cmdb.bindShaderProgram(m_libraryGrProg.get());
+			cmdb.bindShaderProgram((g_cvarRenderReflectionsInlineRt) ? m_rtMaterialFetchInlineRtGrProg.get() : m_libraryGrProg.get());
 
 
 			// More globals
 			// More globals
 			cmdb.bindSampler(ANKI_MATERIAL_REGISTER_TILINEAR_REPEAT_SAMPLER, 0, getRenderer().getSamplers().m_trilinearRepeat.get());
 			cmdb.bindSampler(ANKI_MATERIAL_REGISTER_TILINEAR_REPEAT_SAMPLER, 0, getRenderer().getSamplers().m_trilinearRepeat.get());
@@ -319,8 +326,16 @@ void Reflections::populateRenderGraph(RenderingContext& ctx)
 
 
 			cmdb.setFastConstants(&consts, sizeof(consts));
 			cmdb.setFastConstants(&consts, sizeof(consts));
 
 
-			cmdb.dispatchRaysIndirect(sbtBuffer, m_sbtRecordSize, GpuSceneArrays::RenderableBoundingVolumeRt::getSingleton().getElementCount(), 1,
-									  BufferView(m_indirectArgsBuffer.get()).setRange(sizeof(DispatchIndirectArgs)));
+			if(g_cvarRenderReflectionsInlineRt)
+			{
+				cmdb.dispatchComputeIndirect(BufferView(m_indirectArgsBuffer.get()).incrementOffset(sizeof(DispatchIndirectArgs)));
+			}
+			else
+			{
+
+				cmdb.dispatchRaysIndirect(sbtBuffer, m_sbtRecordSize, GpuSceneArrays::RenderableBoundingVolumeRt::getSingleton().getElementCount(), 1,
+										  BufferView(m_indirectArgsBuffer.get()).setRange(sizeof(DispatchIndirectArgs)));
+			}
 		});
 		});
 	}
 	}
 	else
 	else

+ 2 - 0
AnKi/Renderer/Reflections.h

@@ -13,6 +13,7 @@ namespace anki {
 /// @{
 /// @{
 
 
 ANKI_CVAR2(BoolCVar, Render, Reflections, Rt, true, "Enable RT reflections")
 ANKI_CVAR2(BoolCVar, Render, Reflections, Rt, true, "Enable RT reflections")
+ANKI_CVAR2(BoolCVar, Render, Reflections, InlineRt, false, "Enable a cheap inline RT alternative path")
 ANKI_CVAR2(NumericCVar<F32>, Render, Reflections, RtMaxRayDistance, 100.0f, 1.0f, 10000.0f, "Max RT reflections ray distance")
 ANKI_CVAR2(NumericCVar<F32>, Render, Reflections, RtMaxRayDistance, 100.0f, 1.0f, 10000.0f, "Max RT reflections ray distance")
 ANKI_CVAR2(NumericCVar<U32>, Render, Reflections, SsrStepIncrement, 32, 1, 256, "The number of steps for each loop")
 ANKI_CVAR2(NumericCVar<U32>, Render, Reflections, SsrStepIncrement, 32, 1, 256, "The number of steps for each loop")
 ANKI_CVAR2(NumericCVar<U32>, Render, Reflections, SsrMaxIterations, ANKI_PLATFORM_MOBILE ? 16 : 64, 1, 256, "Max SSR raymarching loop iterations")
 ANKI_CVAR2(NumericCVar<U32>, Render, Reflections, SsrMaxIterations, ANKI_PLATFORM_MOBILE ? 16 : 64, 1, 256, "Max SSR raymarching loop iterations")
@@ -50,6 +51,7 @@ public:
 	ShaderProgramResourcePtr m_missProg;
 	ShaderProgramResourcePtr m_missProg;
 	ShaderProgramPtr m_ssrGrProg;
 	ShaderProgramPtr m_ssrGrProg;
 	ShaderProgramPtr m_libraryGrProg;
 	ShaderProgramPtr m_libraryGrProg;
+	ShaderProgramPtr m_rtMaterialFetchInlineRtGrProg;
 	ShaderProgramPtr m_spatialDenoisingGrProg;
 	ShaderProgramPtr m_spatialDenoisingGrProg;
 	ShaderProgramPtr m_temporalDenoisingGrProg;
 	ShaderProgramPtr m_temporalDenoisingGrProg;
 	ShaderProgramPtr m_verticalBilateralDenoisingGrProg;
 	ShaderProgramPtr m_verticalBilateralDenoisingGrProg;

+ 7 - 3
AnKi/Renderer/Renderer.cpp

@@ -426,15 +426,19 @@ void Renderer::writeGlobalRendererConstants(RenderingContext& ctx, GlobalRendere
 	if(isSolidColor)
 	if(isSolidColor)
 	{
 	{
 		consts.m_sky.m_solidColor = (sky) ? sky->getSolidColor() : Vec3(0.0);
 		consts.m_sky.m_solidColor = (sky) ? sky->getSolidColor() : Vec3(0.0);
-		consts.m_sky.m_type = 0;
+		consts.m_sky.m_type = U32(SkyType::kSolidColor);
 	}
 	}
 	else if(sky->getSkyboxType() == SkyboxType::kImage2D)
 	else if(sky->getSkyboxType() == SkyboxType::kImage2D)
 	{
 	{
-		consts.m_sky.m_type = 1;
+		consts.m_sky.m_type = U32(SkyType::kTextureWithEquirectangularMapping);
+		consts.m_sky.m_texture =
+			sky->getImageResource().getTexture().getOrCreateBindlessTextureIndex(TextureSubresourceDesc::all()) & ((1u << 30u) - 1u);
 	}
 	}
 	else
 	else
 	{
 	{
-		consts.m_sky.m_type = 2;
+		consts.m_sky.m_type = U32(SkyType::kTextureWithEctahedronMapping);
+		consts.m_sky.m_texture =
+			m_generatedSky->getEnvironmentMapTexture().getOrCreateBindlessTextureIndex(TextureSubresourceDesc::all()) & ((1u << 30u) - 1u);
 	}
 	}
 
 
 	if(m_indirectDiffuseClipmaps)
 	if(m_indirectDiffuseClipmaps)

+ 13 - 9
AnKi/Renderer/RendererObject.cpp

@@ -257,28 +257,32 @@ void RtMaterialFetchRendererObject::patchShaderBindingTablePass(CString passName
 		});
 		});
 }
 }
 
 
-void RtMaterialFetchRendererObject::setRgenSpace2Dependencies(RenderPassBase& pass)
+void RtMaterialFetchRendererObject::setRgenSpace2Dependencies(RenderPassBase& pass, Bool isComputeDispatch)
 {
 {
-	pass.newAccelerationStructureDependency(getAccelerationStructureBuilder().getAccelerationStructureHandle(),
-											AccelerationStructureUsageBit::kSrvDispatchRays);
+	const TextureUsageBit srvTexUsage = (isComputeDispatch) ? TextureUsageBit::kSrvCompute : TextureUsageBit::kSrvDispatchRays;
+	const BufferUsageBit srvBuffUsage = (isComputeDispatch) ? BufferUsageBit::kSrvCompute : BufferUsageBit::kSrvDispatchRays;
+	const AccelerationStructureUsageBit srvAsUsage =
+		(isComputeDispatch) ? AccelerationStructureUsageBit::kSrvCompute : AccelerationStructureUsageBit::kSrvDispatchRays;
+
+	pass.newAccelerationStructureDependency(getAccelerationStructureBuilder().getAccelerationStructureHandle(), srvAsUsage);
 
 
 	if(getGeneratedSky().isEnabled())
 	if(getGeneratedSky().isEnabled())
 	{
 	{
-		pass.newTextureDependency(getGeneratedSky().getEnvironmentMapRt(), TextureUsageBit::kSrvDispatchRays);
+		pass.newTextureDependency(getGeneratedSky().getEnvironmentMapRt(), srvTexUsage);
 	}
 	}
 
 
-	pass.newTextureDependency(getShadowMapping().getShadowmapRt(), TextureUsageBit::kSrvDispatchRays);
+	pass.newTextureDependency(getShadowMapping().getShadowmapRt(), srvTexUsage);
 
 
-	pass.newTextureDependency(getGBuffer().getDepthRt(), TextureUsageBit::kSrvDispatchRays);
-	pass.newTextureDependency(getGBuffer().getColorRt(1), TextureUsageBit::kSrvDispatchRays);
-	pass.newTextureDependency(getGBuffer().getColorRt(2), TextureUsageBit::kSrvDispatchRays);
+	pass.newTextureDependency(getGBuffer().getDepthRt(), srvTexUsage);
+	pass.newTextureDependency(getGBuffer().getColorRt(1), srvTexUsage);
+	pass.newTextureDependency(getGBuffer().getColorRt(2), srvTexUsage);
 
 
 	{
 	{
 		AccelerationStructureVisibilityInfo asVis;
 		AccelerationStructureVisibilityInfo asVis;
 		GpuVisibilityLocalLightsOutput lightVis;
 		GpuVisibilityLocalLightsOutput lightVis;
 		getAccelerationStructureBuilder().getVisibilityInfo(asVis, lightVis);
 		getAccelerationStructureBuilder().getVisibilityInfo(asVis, lightVis);
 
 
-		pass.newBufferDependency(lightVis.m_dependency, BufferUsageBit::kSrvDispatchRays);
+		pass.newBufferDependency(lightVis.m_dependency, srvBuffUsage);
 	}
 	}
 }
 }
 
 

+ 1 - 1
AnKi/Renderer/RendererObject.h

@@ -159,7 +159,7 @@ protected:
 									 RenderGraphBuilder& rgraph, BufferHandle sbtHandle, BufferView sbtBuffer);
 									 RenderGraphBuilder& rgraph, BufferHandle sbtHandle, BufferView sbtBuffer);
 
 
 	/// Sets the the resources of space 2 in RtMaterialFetch.hlsl as dependencies on the given pass.
 	/// Sets the the resources of space 2 in RtMaterialFetch.hlsl as dependencies on the given pass.
-	void setRgenSpace2Dependencies(RenderPassBase& pass);
+	void setRgenSpace2Dependencies(RenderPassBase& pass, Bool isComputeDispatch = false);
 
 
 	/// Bind the the resources of space 2 in RtMaterialFetch.hlsl.
 	/// Bind the the resources of space 2 in RtMaterialFetch.hlsl.
 	void bindRgenSpace2Resources(RenderingContext& ctx, RenderPassWorkContext& rgraphCtx);
 	void bindRgenSpace2Resources(RenderingContext& ctx, RenderPassWorkContext& rgraphCtx);

+ 1 - 1
AnKi/Scene/SceneGraph.cpp

@@ -356,7 +356,7 @@ const SceneNode& SceneGraph::getActiveCameraNode() const
 	}
 	}
 	else
 	else
 	{
 	{
-		*m_defaultMainCam;
+		return *m_defaultMainCam;
 	}
 	}
 }
 }
 
 

+ 9 - 1
AnKi/Shaders/Include/MiscRendererTypes.h

@@ -66,10 +66,18 @@ struct CommonMatrices
 	F32 m_far;
 	F32 m_far;
 };
 };
 
 
+enum class SkyType
+{
+	kSolidColor,
+	kTextureWithEquirectangularMapping,
+	kTextureWithEctahedronMapping,
+};
+
 struct Sky
 struct Sky
 {
 {
 	Vec3 m_solidColor;
 	Vec3 m_solidColor;
-	U32 m_type;
+	U32 m_type : 2; // One of SkyType
+	U32 m_texture : 30;
 };
 };
 
 
 struct IndirectDiffuseClipmapTextures
 struct IndirectDiffuseClipmapTextures

+ 1 - 9
AnKi/Shaders/IndirectDiffuse.ankiprog

@@ -54,15 +54,7 @@ Vec3 lightShading(Vec3 rayOrigin, Vec3 rayDir, Vec3 hitPos, Vec3 hitNormal, Vec3
 
 
 	if(isSky)
 	if(isSky)
 	{
 	{
-		if(g_globalRendererConstants.m_sky.m_type == 0)
-		{
-			color = g_globalRendererConstants.m_sky.m_solidColor;
-		}
-		else
-		{
-			const Vec2 uv = (g_globalRendererConstants.m_sky.m_type == 1) ? equirectangularMapping(rayDir) : octahedronEncode(rayDir);
-			color = g_envMap.SampleLevel(g_linearAnyClampSampler, uv, 0.0).xyz;
-		}
+		color = sampleSkyCheap<F32>(g_globalRendererConstants.m_sky, rayDir, g_linearAnyClampSampler);
 	}
 	}
 	else
 	else
 	{
 	{

+ 31 - 9
AnKi/Shaders/IndirectDiffuseClipmaps.ankiprog

@@ -12,6 +12,7 @@
 #pragma anki mutator SPATIAL_RECONSTRUCT_TYPE 0 1
 #pragma anki mutator SPATIAL_RECONSTRUCT_TYPE 0 1
 
 
 #pragma anki technique RtMaterialFetch rgen mutators RT_MATERIAL_FETCH_CLIPMAP SPATIAL_RECONSTRUCT_TYPE
 #pragma anki technique RtMaterialFetch rgen mutators RT_MATERIAL_FETCH_CLIPMAP SPATIAL_RECONSTRUCT_TYPE
+#pragma anki technique RtMaterialFetchInlineRt comp mutators
 #pragma anki technique PopulateCaches comp mutators RADIANCE_OCTAHEDRON_MAP_SIZE
 #pragma anki technique PopulateCaches comp mutators RADIANCE_OCTAHEDRON_MAP_SIZE
 #pragma anki technique ComputeIrradiance comp mutators GPU_WAVE_SIZE RADIANCE_OCTAHEDRON_MAP_SIZE IRRADIANCE_OCTAHEDRON_MAP_SIZE
 #pragma anki technique ComputeIrradiance comp mutators GPU_WAVE_SIZE RADIANCE_OCTAHEDRON_MAP_SIZE IRRADIANCE_OCTAHEDRON_MAP_SIZE
 #pragma anki technique Apply comp mutators SPATIAL_RECONSTRUCT_TYPE
 #pragma anki technique Apply comp mutators SPATIAL_RECONSTRUCT_TYPE
@@ -29,13 +30,9 @@
 #include <AnKi/Shaders/IndirectDiffuseClipmaps.hlsl>
 #include <AnKi/Shaders/IndirectDiffuseClipmaps.hlsl>
 #include <AnKi/Shaders/BilateralFilter.hlsl>
 #include <AnKi/Shaders/BilateralFilter.hlsl>
 #include <AnKi/Shaders/TemporalAA.hlsl>
 #include <AnKi/Shaders/TemporalAA.hlsl>
+#include <AnKi/Shaders/VisibilityAndCollisionFunctions.hlsl>
 #include <ThirdParty/SHforHLSL/SH.hlsli>
 #include <ThirdParty/SHforHLSL/SH.hlsli>
 
 
-#if defined(RT_MATERIAL_FETCH_CLIPMAP) && RT_MATERIAL_FETCH_CLIPMAP
-#	define CLIPMAP_VOLUME
-#endif
-#include <AnKi/Shaders/RtMaterialFetch.hlsl>
-
 constexpr F32 kGaussianSigma = 0.55;
 constexpr F32 kGaussianSigma = 0.55;
 constexpr F32 kMaxBilateralSamplesPerDirection = 5.0;
 constexpr F32 kMaxBilateralSamplesPerDirection = 5.0;
 constexpr Bool kLocalLightShadow = false;
 constexpr Bool kLocalLightShadow = false;
@@ -60,14 +57,28 @@ struct ProbeUpdateConsts
 };
 };
 
 
 // ===========================================================================
 // ===========================================================================
-// RtMaterialFetch                                                           =
+// RtMaterialFetch and RtMaterialFetchInlineRt                               =
 // ===========================================================================
 // ===========================================================================
-#if NOT_ZERO(ANKI_TECHNIQUE_RtMaterialFetch) && NOT_ZERO(RT_MATERIAL_FETCH_CLIPMAP)
+#if(NOT_ZERO(ANKI_TECHNIQUE_RtMaterialFetch) && NOT_ZERO(RT_MATERIAL_FETCH_CLIPMAP)) || NOT_ZERO(ANKI_TECHNIQUE_RtMaterialFetchInlineRt)
+
+#	define CLIPMAP_VOLUME
+#	define INCLUDE_ALL
+#	include <AnKi/Shaders/RtMaterialFetch.hlsl>
 
 
 ANKI_FAST_CONSTANTS(ProbeUpdateConsts, g_consts)
 ANKI_FAST_CONSTANTS(ProbeUpdateConsts, g_consts)
 
 
+#	if ANKI_COMPUTE_SHADER
+[numthreads(64, 1, 1)] void main(COMPUTE_ARGS)
+#	else
 [Shader("raygeneration")] void main()
 [Shader("raygeneration")] void main()
+#	endif
 {
 {
+#	if ANKI_COMPUTE_SHADER
+	const U32 tid = svDispatchThreadId.x;
+#	else
+	const U32 tid = DispatchRaysIndex().x;
+#	endif
+
 	const IndirectDiffuseClipmapConstants idConsts = g_globalRendererConstants.m_indirectDiffuseClipmaps;
 	const IndirectDiffuseClipmapConstants idConsts = g_globalRendererConstants.m_indirectDiffuseClipmaps;
 	const U32 octMapTexelCount = square(g_consts.m_radianceOctMapSize);
 	const U32 octMapTexelCount = square(g_consts.m_radianceOctMapSize);
 
 
@@ -75,8 +86,13 @@ ANKI_FAST_CONSTANTS(ProbeUpdateConsts, g_consts)
 	U32 probeIdx;
 	U32 probeIdx;
 	U32 subRayIdx;
 	U32 subRayIdx;
 	U32 octMapTexelIdx;
 	U32 octMapTexelIdx;
-	unflatten3dArrayIndex(octMapTexelCount, g_consts.m_maxProbesToUpdate, g_consts.m_rayCountPerTexel, DispatchRaysIndex().x, octMapTexelIdx,
-						  probeIdx, subRayIdx);
+	unflatten3dArrayIndex(octMapTexelCount, g_consts.m_maxProbesToUpdate, g_consts.m_rayCountPerTexel, tid, octMapTexelIdx, probeIdx, subRayIdx);
+#	if ANKI_COMPUTE_SHADER
+	if(octMapTexelIdx >= octMapTexelCount || probeIdx >= g_consts.m_maxProbesToUpdate || subRayIdx >= g_consts.m_rayCountPerTexel)
+	{
+		return;
+	}
+#	endif
 
 
 	if(g_consts.m_clipmapRegion.m_partialUpdate)
 	if(g_consts.m_clipmapRegion.m_partialUpdate)
 	{
 	{
@@ -118,7 +134,11 @@ ANKI_FAST_CONSTANTS(ProbeUpdateConsts, g_consts)
 	GBufferLight<F16> gbuffer = (GBufferLight<F16>)0;
 	GBufferLight<F16> gbuffer = (GBufferLight<F16>)0;
 	F32 rayT = 0.0;
 	F32 rayT = 0.0;
 	Bool backfacing = false;
 	Bool backfacing = false;
+#	if ANKI_COMPUTE_SHADER
+	const Bool hit = materialRayTraceInlineRt<F16>(probeWorldPos, dir, 0.0, tMax, 1000.0, gbuffer, rayT, backfacing);
+#	else
 	const Bool hit = materialRayTrace<F16>(probeWorldPos, dir, 0.0, tMax, 1000.0, gbuffer, rayT, backfacing, traceFlags);
 	const Bool hit = materialRayTrace<F16>(probeWorldPos, dir, 0.0, tMax, 1000.0, gbuffer, rayT, backfacing, traceFlags);
+#	endif
 
 
 	HVec3 radiance;
 	HVec3 radiance;
 	if(backfacing)
 	if(backfacing)
@@ -508,6 +528,8 @@ SamplerState g_linearAnyRepeatSampler : register(s0);
 // ===========================================================================
 // ===========================================================================
 #if NOT_ZERO(ANKI_TECHNIQUE_RtMaterialFetch) && !RT_MATERIAL_FETCH_CLIPMAP
 #if NOT_ZERO(ANKI_TECHNIQUE_RtMaterialFetch) && !RT_MATERIAL_FETCH_CLIPMAP
 
 
+#	include <AnKi/Shaders/RtMaterialFetch.hlsl>
+
 struct Consts
 struct Consts
 {
 {
 	F32 m_rayMax;
 	F32 m_rayMax;

+ 29 - 14
AnKi/Shaders/Reflections.ankiprog

@@ -12,12 +12,12 @@
 #pragma anki technique Ssr comp
 #pragma anki technique Ssr comp
 #pragma anki technique ReflectionProbeFallback comp mutators
 #pragma anki technique ReflectionProbeFallback comp mutators
 #pragma anki technique RtMaterialFetch rgen mutators INDIRECT_DIFFUSE_CLIPMAPS
 #pragma anki technique RtMaterialFetch rgen mutators INDIRECT_DIFFUSE_CLIPMAPS
+#pragma anki technique RtMaterialFetchInlineRt comp mutators INDIRECT_DIFFUSE_CLIPMAPS
 #pragma anki technique SpatialDenoise comp mutators
 #pragma anki technique SpatialDenoise comp mutators
 #pragma anki technique TemporalDenoise comp mutators
 #pragma anki technique TemporalDenoise comp mutators
 #pragma anki technique BilateralDenoiseVertical comp mutators
 #pragma anki technique BilateralDenoiseVertical comp mutators
 #pragma anki technique BilateralDenoiseHorizontal comp mutators
 #pragma anki technique BilateralDenoiseHorizontal comp mutators
 
 
-#include <AnKi/Shaders/RtMaterialFetch.hlsl>
 #include <AnKi/Shaders/Include/GpuSceneTypes.h>
 #include <AnKi/Shaders/Include/GpuSceneTypes.h>
 #include <AnKi/Shaders/PackFunctions.hlsl>
 #include <AnKi/Shaders/PackFunctions.hlsl>
 #include <AnKi/Shaders/LightFunctions.hlsl>
 #include <AnKi/Shaders/LightFunctions.hlsl>
@@ -27,6 +27,7 @@
 #include <AnKi/Shaders/ClusteredShadingFunctions.hlsl>
 #include <AnKi/Shaders/ClusteredShadingFunctions.hlsl>
 #include <AnKi/Shaders/IndirectDiffuseClipmaps.hlsl>
 #include <AnKi/Shaders/IndirectDiffuseClipmaps.hlsl>
 #include <AnKi/Shaders/TemporalAA.hlsl>
 #include <AnKi/Shaders/TemporalAA.hlsl>
+#include <AnKi/Shaders/Sky.hlsl>
 
 
 // Config & debug
 // Config & debug
 constexpr F32 kSpatialUpscalingPcfTexelOffset = 8.0;
 constexpr F32 kSpatialUpscalingPcfTexelOffset = 8.0;
@@ -638,15 +639,7 @@ RWTexture2D<Vec4> g_hitPosAndDepthTex : register(u1);
 	if(probeColor.x < 0.0)
 	if(probeColor.x < 0.0)
 	{
 	{
 		// No probe, sample sky
 		// No probe, sample sky
-		if(g_globalRendererConstants.m_sky.m_type == 0)
-		{
-			probeColor = g_globalRendererConstants.m_sky.m_solidColor;
-		}
-		else
-		{
-			const Vec2 uv = (g_globalRendererConstants.m_sky.m_type == 1) ? equirectangularMapping(reflDir) : octahedronEncode(reflDir);
-			probeColor = g_envMap.SampleLevel(g_trilinearClampSampler, uv, 0.0).xyz;
-		}
+		probeColor = sampleSkyCheap<F32>(g_globalRendererConstants.m_sky, reflDir, g_trilinearClampSampler);
 	}
 	}
 
 
 	// Write out
 	// Write out
@@ -660,9 +653,12 @@ RWTexture2D<Vec4> g_hitPosAndDepthTex : register(u1);
 #endif
 #endif
 
 
 // ===========================================================================
 // ===========================================================================
-// RayGen                                                                    =
+// RtMaterialFetch                                                           =
 // ===========================================================================
 // ===========================================================================
-#if ANKI_RAY_GEN_SHADER
+#if NOT_ZERO(ANKI_TECHNIQUE_RtMaterialFetch) || NOT_ZERO(ANKI_TECHNIQUE_RtMaterialFetchInlineRt)
+
+#	define INCLUDE_ALL
+#	include <AnKi/Shaders/RtMaterialFetch.hlsl>
 
 
 struct Consts
 struct Consts
 {
 {
@@ -704,12 +700,26 @@ vector<T, 3> getDiffuseIndirect(Vec3 worldPos, Vec3 worldNormal)
 	}
 	}
 }
 }
 
 
+#	if ANKI_COMPUTE_SHADER
+[numthreads(64, 1, 1)] void main(COMPUTE_ARGS)
+#	else
 [shader("raygeneration")] void main()
 [shader("raygeneration")] void main()
+#	endif
 {
 {
+#	if ANKI_COMPUTE_SHADER
+	const U32 gtid = svDispatchThreadId.x;
+	if(gtid >= getStructuredBufferElementCount(g_pixelsFailedSsr))
+	{
+		return;
+	}
+#	else
+	const U32 gtid = DispatchRaysIndex().x;
+#	endif
+
 	UVec2 halfViewportSize;
 	UVec2 halfViewportSize;
 	g_hitPosAndDepthTex.GetDimensions(halfViewportSize.x, halfViewportSize.y);
 	g_hitPosAndDepthTex.GetDimensions(halfViewportSize.x, halfViewportSize.y);
 
 
-	const PixelFailedSsr pixelFailedSsr = g_pixelsFailedSsr[DispatchRaysIndex().x];
+	const PixelFailedSsr pixelFailedSsr = SBUFF(g_pixelsFailedSsr, gtid);
 	const UVec2 realCoord = UVec2(pixelFailedSsr.m_pixel >> 16u, pixelFailedSsr.m_pixel & 0xFFFFu);
 	const UVec2 realCoord = UVec2(pixelFailedSsr.m_pixel >> 16u, pixelFailedSsr.m_pixel & 0xFFFFu);
 	const UVec2 logicalCoord = UVec2(realCoord.x * 2u + (realCoord.y & 1u), realCoord.y);
 	const UVec2 logicalCoord = UVec2(realCoord.x * 2u + (realCoord.y & 1u), realCoord.y);
 	const Vec4 packed = unpackSnorm4x8<F32>(pixelFailedSsr.m_reflectionDirAndRoughness);
 	const Vec4 packed = unpackSnorm4x8<F32>(pixelFailedSsr.m_reflectionDirAndRoughness);
@@ -736,7 +746,12 @@ vector<T, 3> getDiffuseIndirect(Vec3 worldPos, Vec3 worldNormal)
 	GBufferLight<F16> gbuffer;
 	GBufferLight<F16> gbuffer;
 	F32 rayT;
 	F32 rayT;
 	Bool unused;
 	Bool unused;
+#	if ANKI_COMPUTE_SHADER
+	const Bool hasHitSky =
+		!materialRayTraceInlineRt(worldPos, reflDir, max(tmin + kTMinBias, 0.05), g_consts.m_maxRayT, textureLod, gbuffer, rayT, unused);
+#	else
 	const Bool hasHitSky = !materialRayTrace(worldPos, reflDir, max(tmin + kTMinBias, 0.05), g_consts.m_maxRayT, textureLod, gbuffer, rayT, unused);
 	const Bool hasHitSky = !materialRayTrace(worldPos, reflDir, max(tmin + kTMinBias, 0.05), g_consts.m_maxRayT, textureLod, gbuffer, rayT, unused);
+#	endif
 
 
 	const Vec3 hitPos = worldPos + reflDir * rayT;
 	const Vec3 hitPos = worldPos + reflDir * rayT;
 
 
@@ -766,7 +781,7 @@ vector<T, 3> getDiffuseIndirect(Vec3 worldPos, Vec3 worldNormal)
 	// Store depth in reverse for better precision
 	// Store depth in reverse for better precision
 	g_hitPosAndDepthTex[realCoord] = Vec4(hitPos - g_globalRendererConstants.m_cameraPosition, 1.0 - depth);
 	g_hitPosAndDepthTex[realCoord] = Vec4(hitPos - g_globalRendererConstants.m_cameraPosition, 1.0 - depth);
 }
 }
-#endif // ANKI_RAY_GEN_SHADER
+#endif
 
 
 // ===========================================================================
 // ===========================================================================
 // SpatialDenoise                                                            =
 // SpatialDenoise                                                            =

+ 52 - 12
AnKi/Shaders/RtMaterialFetch.hlsl

@@ -9,6 +9,7 @@
 #include <AnKi/Shaders/Include/GpuSceneTypes.h>
 #include <AnKi/Shaders/Include/GpuSceneTypes.h>
 #include <AnKi/Shaders/Common.hlsl>
 #include <AnKi/Shaders/Common.hlsl>
 #include <AnKi/Shaders/LightFunctions.hlsl>
 #include <AnKi/Shaders/LightFunctions.hlsl>
+#include <AnKi/Shaders/Sky.hlsl>
 
 
 struct [raypayload] RtMaterialFetchRayPayload
 struct [raypayload] RtMaterialFetchRayPayload
 {
 {
@@ -27,7 +28,7 @@ struct [raypayload] RtMaterialFetchRayPayload
 };
 };
 
 
 // Have a common resouce interface for all shaders. It should be compatible between all ray shaders in DX and VK
 // Have a common resouce interface for all shaders. It should be compatible between all ray shaders in DX and VK
-#if ANKI_RAY_GEN_SHADER
+#if ANKI_RAY_GEN_SHADER || defined(INCLUDE_ALL)
 #	define SPACE space2
 #	define SPACE space2
 
 
 ConstantBuffer<GlobalRendererConstants> g_globalRendererConstants : register(b0, SPACE);
 ConstantBuffer<GlobalRendererConstants> g_globalRendererConstants : register(b0, SPACE);
@@ -100,16 +101,7 @@ Bool materialRayTrace(Vec3 rayOrigin, Vec3 rayDir, F32 tMin, F32 tMax, T texture
 	if(hasHitSky)
 	if(hasHitSky)
 	{
 	{
 		gbuffer = (GBufferLight<T>)0;
 		gbuffer = (GBufferLight<T>)0;
-
-		if(g_globalRendererConstants.m_sky.m_type == 0)
-		{
-			gbuffer.m_emission = g_globalRendererConstants.m_sky.m_solidColor;
-		}
-		else
-		{
-			const Vec2 uv = (g_globalRendererConstants.m_sky.m_type == 1) ? equirectangularMapping(rayDir) : octahedronEncode(rayDir);
-			gbuffer.m_emission = g_envMap.SampleLevel(g_linearAnyClampSampler, uv, 0.0).xyz;
-		}
+		gbuffer.m_emission = sampleSkyCheap<T>(g_globalRendererConstants.m_sky, rayDir, g_linearAnyClampSampler);
 	}
 	}
 	else
 	else
 	{
 	{
@@ -121,6 +113,52 @@ Bool materialRayTrace(Vec3 rayOrigin, Vec3 rayDir, F32 tMin, F32 tMax, T texture
 	return !hasHitSky;
 	return !hasHitSky;
 }
 }
 
 
+template<typename T>
+Bool materialRayTraceInlineRt(Vec3 rayOrigin, Vec3 rayDir, F32 tMin, F32 tMax, T textureLod, out GBufferLight<T> gbuffer, out F32 rayT,
+							  out Bool backfacing)
+{
+	gbuffer = (GBufferLight<T>)0;
+
+	RayQuery<RAY_FLAG_FORCE_OPAQUE> q;
+	const U32 cullMask = 0xFFu;
+	RayDesc ray;
+	ray.Origin = rayOrigin;
+	ray.TMin = tMin;
+	ray.Direction = rayDir;
+	ray.TMax = tMax;
+	q.TraceRayInline(g_tlas, RAY_FLAG_FORCE_OPAQUE, cullMask, ray);
+	while(q.Proceed())
+	{
+	}
+	const Bool hit = q.CommittedStatus() == COMMITTED_TRIANGLE_HIT;
+
+	if(!hit)
+	{
+		backfacing = false;
+		gbuffer.m_emission = sampleSkyCheap<T>(g_globalRendererConstants.m_sky, rayDir, g_linearAnyClampSampler);
+		rayT = -1.0;
+	}
+	else
+	{
+		backfacing = q.CommittedTriangleFrontFace();
+
+		// Read the diff color from the AS instance
+		UVec3 diffColoru = q.CommittedInstanceID();
+		diffColoru >>= UVec3(16, 8, 0);
+		diffColoru &= 0xFF;
+		gbuffer.m_diffuse = Vec3(diffColoru) / 255.0;
+
+		// Compute the normal
+		const Vec3 positions[3] = spvRayQueryGetIntersectionTriangleVertexPositionsKHR(q, SpvRayQueryCommittedIntersectionKHR);
+		const Vec3 vertNormal = normalize(cross(positions[1] - positions[0], positions[2] - positions[1]));
+		gbuffer.m_worldNormal = normalize(mul(q.CommittedObjectToWorld3x4(), Vec4(vertNormal, 0.0)));
+
+		rayT = q.CommittedRayT();
+	}
+
+	return hit;
+}
+
 Bool rayVisibility(Vec3 rayOrigin, Vec3 rayDir, F32 tMax, U32 traceFlags)
 Bool rayVisibility(Vec3 rayOrigin, Vec3 rayDir, F32 tMax, U32 traceFlags)
 {
 {
 	RayQuery<RAY_FLAG_NONE> q;
 	RayQuery<RAY_FLAG_NONE> q;
@@ -131,7 +169,9 @@ Bool rayVisibility(Vec3 rayOrigin, Vec3 rayDir, F32 tMax, U32 traceFlags)
 	ray.Direction = rayDir;
 	ray.Direction = rayDir;
 	ray.TMax = tMax;
 	ray.TMax = tMax;
 	q.TraceRayInline(g_tlas, traceFlags, cullMask, ray);
 	q.TraceRayInline(g_tlas, traceFlags, cullMask, ray);
-	q.Proceed();
+	while(q.Proceed())
+	{
+	}
 	const Bool hit = q.CommittedStatus() == COMMITTED_TRIANGLE_HIT;
 	const Bool hit = q.CommittedStatus() == COMMITTED_TRIANGLE_HIT;
 
 
 	return hit;
 	return hit;

+ 9 - 6
AnKi/Shaders/RtMaterialFetchDbg.ankiprog

@@ -39,7 +39,7 @@
 
 
 	if(!hit)
 	if(!hit)
 	{
 	{
-		col = Vec3(1.0, 0.0, 1.0);
+		col = Vec3(0.0, 0.0, 1.0);
 	}
 	}
 	else if(backfacing)
 	else if(backfacing)
 	{
 	{
@@ -61,19 +61,21 @@
 	ray.TMin = tMin;
 	ray.TMin = tMin;
 	ray.Direction = rayDir;
 	ray.Direction = rayDir;
 	ray.TMax = tMax;
 	ray.TMax = tMax;
-	q.TraceRayInline(g_tlas, traceFlags, cullMask, ray);
-	q.Proceed();
+	q.TraceRayInline(g_tlas, RAY_FLAG_FORCE_OPAQUE, cullMask, ray);
+	while(q.Proceed())
+	{
+	}
 	const Bool hit = q.CommittedStatus() == COMMITTED_TRIANGLE_HIT;
 	const Bool hit = q.CommittedStatus() == COMMITTED_TRIANGLE_HIT;
 
 
 	Bool backfacing = false;
 	Bool backfacing = false;
 	if(hit)
 	if(hit)
 	{
 	{
-		backfacing = q.CandidateTriangleFrontFace();
+		backfacing = q.CommittedTriangleFrontFace();
 	}
 	}
 
 
 	if(!hit)
 	if(!hit)
 	{
 	{
-		col = Vec3(1.0, 0.0, 1.0);
+		col = Vec3(0.0, 0.0, 1.0);
 	}
 	}
 	else if(backfacing)
 	else if(backfacing)
 	{
 	{
@@ -89,7 +91,7 @@
 		const Vec3 positions[3] = spvRayQueryGetIntersectionTriangleVertexPositionsKHR(q, SpvRayQueryCommittedIntersectionKHR);
 		const Vec3 positions[3] = spvRayQueryGetIntersectionTriangleVertexPositionsKHR(q, SpvRayQueryCommittedIntersectionKHR);
 		const Vec3 vertNormal = normalize(cross(positions[1] - positions[0], positions[2] - positions[1]));
 		const Vec3 vertNormal = normalize(cross(positions[1] - positions[0], positions[2] - positions[1]));
 
 
-		const Vec3 worldNormal = normalize(mul(q.CandidateObjectToWorld3x4(), Vec4(vertNormal, 0.0)));
+		const Vec3 worldNormal = normalize(mul(q.CommittedObjectToWorld3x4(), Vec4(vertNormal, 0.0)));
 
 
 		GBufferLight<F16> gbuffer = (GBufferLight<F16>)0;
 		GBufferLight<F16> gbuffer = (GBufferLight<F16>)0;
 		gbuffer.m_diffuse = color;
 		gbuffer.m_diffuse = color;
@@ -101,6 +103,7 @@
 		col += gbuffer.m_diffuse * 0.3;
 		col += gbuffer.m_diffuse * 0.3;
 
 
 		// col = worldNormal / 2.0 + 0.5;
 		// col = worldNormal / 2.0 + 0.5;
+		// col = frac(positions[0]);
 	}
 	}
 #endif
 #endif
 
 

+ 21 - 0
AnKi/Shaders/Sky.hlsl

@@ -3,7 +3,10 @@
 // Code licensed under the BSD License.
 // Code licensed under the BSD License.
 // http://www.anki3d.org/LICENSE
 // http://www.anki3d.org/LICENSE
 
 
+#pragma once
+
 #include <AnKi/Shaders/Common.hlsl>
 #include <AnKi/Shaders/Common.hlsl>
+#include <AnKi/Shaders/Include/MiscRendererTypes.h>
 
 
 // These are per megameter
 // These are per megameter
 constexpr F32 kGroundRadiusMM = 6.360f;
 constexpr F32 kGroundRadiusMM = 6.360f;
@@ -80,3 +83,21 @@ Vec3 computeSkyColor(Texture2D<Vec4> skyLut, SamplerState linearAnyClampSampler,
 
 
 	return col;
 	return col;
 }
 }
+
+template<typename T, typename Y>
+vector<T, 3> sampleSkyCheap(Sky sky, vector<Y, 3> direction, SamplerState trilinearClampSampler)
+{
+	vector<T, 3> color;
+	if(sky.m_type == (U32)SkyType::kSolidColor)
+	{
+		color = sky.m_solidColor;
+	}
+	else
+	{
+		const Vec2 uv =
+			(sky.m_type == (U32)SkyType::kTextureWithEquirectangularMapping) ? equirectangularMapping(direction) : octahedronEncode(direction);
+		color = getBindlessTexture2DVec4(sky.m_texture).SampleLevel(trilinearClampSampler, uv, 0.0).xyz;
+	}
+
+	return color;
+}

+ 9 - 4
AnKi/Util/FilesystemPosix.cpp

@@ -64,9 +64,10 @@ Bool directoryExists(const CString& filename)
 class WalkDirectoryTreeCallbackContext
 class WalkDirectoryTreeCallbackContext
 {
 {
 public:
 public:
-	const Function<Error(const CString&, Bool)>* m_callback = nullptr;
+	const Function<Error(WalkDirectoryArgs&)>* m_callback = nullptr;
 	U32 m_prefixLen;
 	U32 m_prefixLen;
 	Error m_err = {Error::kNone};
 	Error m_err = {Error::kNone};
+	Bool m_stopSearch = false;
 };
 };
 
 
 static thread_local WalkDirectoryTreeCallbackContext g_walkDirectoryTreeContext;
 static thread_local WalkDirectoryTreeCallbackContext g_walkDirectoryTreeContext;
@@ -92,18 +93,22 @@ static int walkDirectoryTreeCallback(const char* filepath, [[maybe_unused]] cons
 		WalkDirectoryTreeCallbackContext& ctx = g_walkDirectoryTreeContext;
 		WalkDirectoryTreeCallbackContext& ctx = g_walkDirectoryTreeContext;
 		ANKI_ASSERT(ctx.m_callback);
 		ANKI_ASSERT(ctx.m_callback);
 
 
-		if(ctx.m_err || strlen(filepath) <= ctx.m_prefixLen)
+		if(ctx.m_err || ctx.m_stopSearch || strlen(filepath) <= ctx.m_prefixLen)
 		{
 		{
 			return 0;
 			return 0;
 		}
 		}
 
 
-		ctx.m_err = (*ctx.m_callback)(filepath + ctx.m_prefixLen, isDir);
+		WalkDirectoryArgs args;
+		args.m_path = filepath + ctx.m_prefixLen;
+		args.m_isDirectory = isDir;
+		ctx.m_err = (*ctx.m_callback)(args);
+		ctx.m_stopSearch = args.m_stopSearch;
 	}
 	}
 
 
 	return 0;
 	return 0;
 }
 }
 
 
-Error walkDirectoryTreeInternal(const CString& dir, const Function<Error(const CString&, Bool)>& callback)
+Error walkDirectoryTreeInternal(CString dir, const Function<Error(WalkDirectoryArgs& args)>& callback)
 {
 {
 	ANKI_ASSERT(dir.getLength() > 0);
 	ANKI_ASSERT(dir.getLength() > 0);
 	Error err = Error::kNone;
 	Error err = Error::kNone;