Browse Source

Optimize the BW of RT shadows

Panagiotis Christopoulos Charitos 4 years ago
parent
commit
129b151b40

+ 57 - 38
AnKi/Renderer/FinalComposite.cpp

@@ -56,16 +56,18 @@ Error FinalComposite::initInternal(const ConfigSet& config)
 
 	for(U32 dbg = 0; dbg < 2; ++dbg)
 	{
-		for(U32 dbgRt = 0; dbgRt < 2; ++dbgRt)
-		{
-			const ShaderProgramResourceVariant* variant;
-			variantInitInfo.addMutation("DBG_ENABLED", dbg);
-			variantInitInfo.addMutation("DBG_RENDER_TARGET_ENABLED", dbgRt);
-			m_prog->getOrCreateVariant(variantInitInfo, variant);
-			m_grProgs[dbg][dbgRt] = variant->getProgram();
-		}
+		const ShaderProgramResourceVariant* variant;
+		variantInitInfo.addMutation("DBG_ENABLED", dbg);
+		m_prog->getOrCreateVariant(variantInitInfo, variant);
+		m_grProgs[dbg] = variant->getProgram();
 	}
 
+	ANKI_CHECK(getResourceManager().loadResource("Shaders/VisualizeRenderTarget.ankiprog",
+												 m_defaultVisualizeRenderTargetProg));
+	const ShaderProgramResourceVariant* variant;
+	m_defaultVisualizeRenderTargetProg->getOrCreateVariant(variant);
+	m_defaultVisualizeRenderTargetGrProg = variant->getProgram();
+
 	return Error::NONE;
 }
 
@@ -97,47 +99,63 @@ void FinalComposite::run(RenderingContext& ctx, RenderPassWorkContext& rgraphCtx
 	const Bool dbgEnabled = m_r->getDbg().getEnabled();
 	RenderTargetHandle dbgRt;
 	Bool dbgRtValid;
-	m_r->getCurrentDebugRenderTarget(dbgRt, dbgRtValid);
+	ShaderProgramPtr optionalDebugProgram;
+	m_r->getCurrentDebugRenderTarget(dbgRt, dbgRtValid, optionalDebugProgram);
 
-	cmdb->bindShaderProgram(m_grProgs[dbgEnabled][dbgRtValid]);
+	// Bind program
+	if(dbgRtValid && optionalDebugProgram.isCreated())
+	{
+		cmdb->bindShaderProgram(optionalDebugProgram);
+	}
+	else if(dbgRtValid)
+	{
+		cmdb->bindShaderProgram(m_defaultVisualizeRenderTargetGrProg);
+	}
+	else
+	{
+		cmdb->bindShaderProgram(m_grProgs[dbgEnabled]);
+	}
 
 	// Bind stuff
-	rgraphCtx.bindUniformBuffer(0, 0, m_r->getTonemapping().getAverageLuminanceBuffer());
+	if(!dbgRtValid)
+	{
+		rgraphCtx.bindUniformBuffer(0, 0, m_r->getTonemapping().getAverageLuminanceBuffer());
 
-	cmdb->bindSampler(0, 1, m_r->getSamplers().m_nearestNearestClamp);
-	cmdb->bindSampler(0, 2, m_r->getSamplers().m_trilinearClamp);
-	cmdb->bindSampler(0, 3, m_r->getSamplers().m_trilinearRepeat);
+		cmdb->bindSampler(0, 1, m_r->getSamplers().m_nearestNearestClamp);
+		cmdb->bindSampler(0, 2, m_r->getSamplers().m_trilinearClamp);
+		cmdb->bindSampler(0, 3, m_r->getSamplers().m_trilinearRepeat);
 
-	rgraphCtx.bindColorTexture(0, 4, m_r->getTemporalAA().getRt());
+		rgraphCtx.bindColorTexture(0, 4, m_r->getTemporalAA().getRt());
 
-	rgraphCtx.bindColorTexture(0, 5, m_r->getBloom().getRt());
-	cmdb->bindTexture(0, 6, m_lut->getGrTextureView(), TextureUsageBit::SAMPLED_FRAGMENT);
-	cmdb->bindTexture(0, 7, m_blueNoise->getGrTextureView(), TextureUsageBit::SAMPLED_FRAGMENT);
-	rgraphCtx.bindColorTexture(0, 8, m_r->getMotionVectors().getMotionVectorsRt());
-	rgraphCtx.bindTexture(0, 9, m_r->getGBuffer().getDepthRt(), TextureSubresourceInfo(DepthStencilAspectBit::DEPTH));
+		rgraphCtx.bindColorTexture(0, 5, m_r->getBloom().getRt());
+		cmdb->bindTexture(0, 6, m_lut->getGrTextureView(), TextureUsageBit::SAMPLED_FRAGMENT);
+		cmdb->bindTexture(0, 7, m_blueNoise->getGrTextureView(), TextureUsageBit::SAMPLED_FRAGMENT);
+		rgraphCtx.bindColorTexture(0, 8, m_r->getMotionVectors().getMotionVectorsRt());
+		rgraphCtx.bindTexture(0, 9, m_r->getGBuffer().getDepthRt(),
+							  TextureSubresourceInfo(DepthStencilAspectBit::DEPTH));
 
-	if(dbgEnabled)
-	{
-		rgraphCtx.bindColorTexture(0, 10, m_r->getDbg().getRt());
-	}
+		if(dbgEnabled)
+		{
+			rgraphCtx.bindColorTexture(0, 10, m_r->getDbg().getRt());
+		}
 
-	if(dbgRtValid)
-	{
-		rgraphCtx.bindColorTexture(0, 11, dbgRt);
+		struct PushConsts
+		{
+			Vec4 m_blueNoiseLayerPad3;
+			Mat4 m_prevViewProjMatMulInvViewProjMat;
+		} pconsts;
+		pconsts.m_blueNoiseLayerPad3.x() = F32(m_r->getFrameCount() % m_blueNoise->getLayerCount());
+		pconsts.m_prevViewProjMatMulInvViewProjMat = ctx.m_matrices.m_jitter * ctx.m_prevMatrices.m_viewProjection
+													 * ctx.m_matrices.m_viewProjectionJitter.getInverse();
+		cmdb->setPushConstants(&pconsts, sizeof(pconsts));
 	}
-
-	struct PushConsts
+	else
 	{
-		Vec4 m_blueNoiseLayerPad3;
-		Mat4 m_prevViewProjMatMulInvViewProjMat;
-	} pconsts;
-	pconsts.m_blueNoiseLayerPad3.x() = F32(m_r->getFrameCount() % m_blueNoise->getLayerCount());
-	pconsts.m_prevViewProjMatMulInvViewProjMat = ctx.m_matrices.m_jitter * ctx.m_prevMatrices.m_viewProjection
-												 * ctx.m_matrices.m_viewProjectionJitter.getInverse();
-	cmdb->setPushConstants(&pconsts, sizeof(pconsts));
+		rgraphCtx.bindColorTexture(0, 0, dbgRt);
+		cmdb->bindSampler(0, 1, m_r->getSamplers().m_nearestNearestClamp);
+	}
 
 	cmdb->setViewport(0, 0, ctx.m_outRenderTargetWidth, ctx.m_outRenderTargetHeight);
-
 	drawQuad(cmdb);
 
 	// Draw UI
@@ -174,7 +192,8 @@ void FinalComposite::populateRenderGraph(RenderingContext& ctx)
 
 	RenderTargetHandle dbgRt;
 	Bool dbgRtValid;
-	m_r->getCurrentDebugRenderTarget(dbgRt, dbgRtValid);
+	ShaderProgramPtr debugProgram;
+	m_r->getCurrentDebugRenderTarget(dbgRt, dbgRtValid, debugProgram);
 	if(dbgRtValid)
 	{
 		pass.newDependency({dbgRt, TextureUsageBit::SAMPLED_FRAGMENT});

+ 4 - 1
AnKi/Renderer/FinalComposite.h

@@ -35,7 +35,10 @@ private:
 	FramebufferDescription m_fbDescr;
 
 	ShaderProgramResourcePtr m_prog;
-	Array2d<ShaderProgramPtr, 2, 2> m_grProgs; ///< [Debug on or off][Dbg render target on or off]
+	Array<ShaderProgramPtr, 2> m_grProgs; ///< [Debug on or off]
+
+	ShaderProgramResourcePtr m_defaultVisualizeRenderTargetProg;
+	ShaderProgramPtr m_defaultVisualizeRenderTargetGrProg;
 
 	TextureResourcePtr m_lut; ///< Color grading lookup texture.
 	TextureResourcePtr m_blueNoise;

+ 2 - 1
AnKi/Renderer/GBuffer.h

@@ -48,7 +48,8 @@ public:
 		return m_runCtx.m_prevFrameDepthRt;
 	}
 
-	void getDebugRenderTarget(CString rtName, RenderTargetHandle& handle) const override
+	void getDebugRenderTarget(CString rtName, RenderTargetHandle& handle,
+							  ShaderProgramPtr& optionalShaderProgram) const override
 	{
 		if(rtName == "GBuffer_albedo")
 		{

+ 2 - 1
AnKi/Renderer/MotionVectors.h

@@ -41,7 +41,8 @@ public:
 		return m_runCtx.m_rejectionFactorRtHandle;
 	}
 
-	void getDebugRenderTarget(CString rtName, RenderTargetHandle& handle) const override
+	void getDebugRenderTarget(CString rtName, RenderTargetHandle& handle,
+							  ShaderProgramPtr& optionalShaderProgram) const override
 	{
 		if(rtName == "MotionVectors")
 		{

+ 3 - 2
AnKi/Renderer/Renderer.cpp

@@ -633,7 +633,8 @@ void Renderer::registerDebugRenderTarget(RendererObject* obj, CString rtName)
 	m_debugRts.emplaceBack(getAllocator(), std::move(inf));
 }
 
-void Renderer::getCurrentDebugRenderTarget(RenderTargetHandle& handle, Bool& handleValid)
+void Renderer::getCurrentDebugRenderTarget(RenderTargetHandle& handle, Bool& handleValid,
+										   ShaderProgramPtr& optionalShaderProgram)
 {
 	if(ANKI_LIKELY(m_currentDebugRtName.isEmpty()))
 	{
@@ -651,7 +652,7 @@ void Renderer::getCurrentDebugRenderTarget(RenderTargetHandle& handle, Bool& han
 	}
 	ANKI_ASSERT(obj);
 
-	obj->getDebugRenderTarget(m_currentDebugRtName, handle);
+	obj->getDebugRenderTarget(m_currentDebugRtName, handle, optionalShaderProgram);
 	handleValid = true;
 }
 

+ 2 - 1
AnKi/Renderer/Renderer.h

@@ -382,7 +382,8 @@ public:
 	}
 
 	// Need to call it after the handle is set by the RenderGraph.
-	void getCurrentDebugRenderTarget(RenderTargetHandle& handle, Bool& handleValid);
+	void getCurrentDebugRenderTarget(RenderTargetHandle& handle, Bool& handleValid,
+									 ShaderProgramPtr& optionalShaderProgram);
 	/// @}
 
 private:

+ 2 - 1
AnKi/Renderer/RendererObject.h

@@ -38,7 +38,8 @@ public:
 
 	HeapAllocator<U8> getAllocator() const;
 
-	virtual void getDebugRenderTarget(CString rtName, RenderTargetHandle& handle) const
+	virtual void getDebugRenderTarget(CString rtName, RenderTargetHandle& handle,
+									  ShaderProgramPtr& optionalShaderProgram) const
 	{
 		ANKI_ASSERT(!"Object doesn't support that");
 	}

+ 45 - 24
AnKi/Renderer/RtShadows.cpp

@@ -61,22 +61,20 @@ Error RtShadows::initInternal(const ConfigSet& cfg)
 
 	// RTs
 	TextureInitInfo texinit = m_r->create2DRenderTargetInitInfo(
-		m_r->getWidth(), m_r->getHeight(), Format::R8_UNORM,
+		m_r->getWidth(), m_r->getHeight(), Format::R32G32_UINT,
 		TextureUsageBit::ALL_SAMPLED | TextureUsageBit::IMAGE_TRACE_RAYS_WRITE | TextureUsageBit::IMAGE_COMPUTE_WRITE,
 		"RtShadows");
-	texinit.m_type = TextureType::_3D;
-	texinit.m_depth = MAX_RT_SHADOW_LAYERS;
 	texinit.m_initialUsage = TextureUsageBit::SAMPLED_FRAGMENT;
 	m_historyAndFinalRt = m_r->createAndClearRenderTarget(texinit);
 
-	m_renderRt = m_r->create2DRenderTargetDescription(m_r->getWidth() / 2, m_r->getHeight() / 2, Format::R8_UNORM,
+	m_renderRt = m_r->create2DRenderTargetDescription(m_r->getWidth() / 2, m_r->getHeight() / 2, Format::R32G32_UINT,
 													  "RtShadowsTmp");
-	m_renderRt.m_type = TextureType::_3D;
-	m_renderRt.m_depth = MAX_RT_SHADOW_LAYERS;
 	m_renderRt.bake();
 
 	// Misc
 	m_sbtRecordSize = getAlignedRoundUp(getGrManager().getDeviceCapabilities().m_sbtRecordAlignment, m_sbtRecordSize);
+	ANKI_CHECK(getResourceManager().loadResource("Shaders/RtShadowsVisualizeRenderTarget.ankiprog",
+												 m_visualizeRenderTargetsProg));
 
 	return Error::NONE;
 }
@@ -143,7 +141,6 @@ void RtShadows::populateRenderGraph(RenderingContext& ctx)
 	{
 		RenderQueue& rqueue = *m_runCtx.m_ctx->m_renderQueue;
 		m_runCtx.m_layersWithRejectedHistory.unsetAll();
-		m_runCtx.m_activeShadowLayerMask = 0;
 
 		if(rqueue.m_directionalLight.hasShadow())
 		{
@@ -156,7 +153,6 @@ void RtShadows::populateRenderGraph(RenderingContext& ctx)
 			rqueue.m_directionalLight.m_shadowLayer = U8(layerIdx);
 			ANKI_ASSERT(rqueue.m_directionalLight.m_shadowLayer < MAX_RT_SHADOW_LAYERS);
 			m_runCtx.m_layersWithRejectedHistory.set(layerIdx, rejectHistory);
-			m_runCtx.m_activeShadowLayerMask |= 1 << layerIdx;
 		}
 
 		for(PointLightQueueElement& light : rqueue.m_pointLights)
@@ -175,7 +171,6 @@ void RtShadows::populateRenderGraph(RenderingContext& ctx)
 				light.m_shadowLayer = U8(layerIdx);
 				ANKI_ASSERT(light.m_shadowLayer < MAX_RT_SHADOW_LAYERS);
 				m_runCtx.m_layersWithRejectedHistory.set(layerIdx, rejectHistory);
-				m_runCtx.m_activeShadowLayerMask |= 1 << layerIdx;
 			}
 			else
 			{
@@ -200,7 +195,6 @@ void RtShadows::populateRenderGraph(RenderingContext& ctx)
 				light.m_shadowLayer = U8(layerIdx);
 				ANKI_ASSERT(light.m_shadowLayer < MAX_RT_SHADOW_LAYERS);
 				m_runCtx.m_layersWithRejectedHistory.set(layerIdx, rejectHistory);
-				m_runCtx.m_activeShadowLayerMask |= 1 << layerIdx;
 			}
 			else
 			{
@@ -225,25 +219,25 @@ void RtShadows::run(RenderPassWorkContext& rgraphCtx)
 
 	rgraphCtx.bindColorTexture(0, 2, m_runCtx.m_historyAndFinalRt);
 	cmdb->bindSampler(0, 3, m_r->getSamplers().m_trilinearClamp);
-	rgraphCtx.bindTexture(0, 4, m_r->getGBuffer().getDepthRt(), TextureSubresourceInfo(DepthStencilAspectBit::DEPTH));
-	rgraphCtx.bindColorTexture(0, 5, m_r->getMotionVectors().getMotionVectorsRt());
-	rgraphCtx.bindColorTexture(0, 6, m_r->getMotionVectors().getRejectionFactorRt());
-	rgraphCtx.bindColorTexture(0, 7, m_r->getGBuffer().getColorRt(2));
-	rgraphCtx.bindAccelerationStructure(0, 8, m_r->getAccelerationStructureBuilder().getAccelerationStructureHandle());
+	cmdb->bindSampler(0, 4, m_r->getSamplers().m_nearestNearestClamp);
+	rgraphCtx.bindTexture(0, 5, m_r->getGBuffer().getDepthRt(), TextureSubresourceInfo(DepthStencilAspectBit::DEPTH));
+	rgraphCtx.bindColorTexture(0, 6, m_r->getMotionVectors().getMotionVectorsRt());
+	rgraphCtx.bindColorTexture(0, 7, m_r->getMotionVectors().getRejectionFactorRt());
+	rgraphCtx.bindColorTexture(0, 8, m_r->getGBuffer().getColorRt(2));
+	rgraphCtx.bindAccelerationStructure(0, 9, m_r->getAccelerationStructureBuilder().getAccelerationStructureHandle());
 
-	bindUniforms(cmdb, 0, 9, ctx.m_lightShadingUniformsToken);
+	bindUniforms(cmdb, 0, 10, ctx.m_lightShadingUniformsToken);
 
-	bindUniforms(cmdb, 0, 10, rsrc.m_pointLightsToken);
-	bindUniforms(cmdb, 0, 11, rsrc.m_spotLightsToken);
-	rgraphCtx.bindColorTexture(0, 12, m_r->getShadowMapping().getShadowmapRt());
+	bindUniforms(cmdb, 0, 11, rsrc.m_pointLightsToken);
+	bindUniforms(cmdb, 0, 12, rsrc.m_spotLightsToken);
+	rgraphCtx.bindColorTexture(0, 13, m_r->getShadowMapping().getShadowmapRt());
 
-	bindStorage(cmdb, 0, 13, rsrc.m_clustersToken);
-	bindStorage(cmdb, 0, 14, rsrc.m_indicesToken);
+	bindStorage(cmdb, 0, 14, rsrc.m_clustersToken);
+	bindStorage(cmdb, 0, 15, rsrc.m_indicesToken);
 
 	cmdb->bindAllBindless(1);
 
 	RtShadowsUniforms unis;
-	unis.activeShadowLayerMask = m_runCtx.m_activeShadowLayerMask;
 	for(U32 i = 0; i < MAX_RT_SHADOW_LAYERS; ++i)
 	{
 		unis.historyRejectFactor[i] = F32(m_runCtx.m_layersWithRejectedHistory.get(i));
@@ -260,7 +254,7 @@ void RtShadows::runDenoise(RenderPassWorkContext& rgraphCtx)
 
 	cmdb->bindShaderProgram(m_grDenoiseProg);
 
-	cmdb->bindSampler(0, 0, m_r->getSamplers().m_trilinearClamp);
+	cmdb->bindSampler(0, 0, m_r->getSamplers().m_nearestNearestClamp);
 	rgraphCtx.bindColorTexture(0, 1, m_runCtx.m_renderRt);
 	rgraphCtx.bindTexture(0, 2, m_r->getGBuffer().getDepthRt(), TextureSubresourceInfo(DepthStencilAspectBit::DEPTH));
 	rgraphCtx.bindColorTexture(0, 3, m_r->getGBuffer().getColorRt(2));
@@ -270,7 +264,6 @@ void RtShadows::runDenoise(RenderPassWorkContext& rgraphCtx)
 	RtShadowsDenoiseUniforms unis;
 	unis.invViewProjMat = m_runCtx.m_ctx->m_matrices.m_viewProjectionJitter.getInverse();
 	unis.time = F32(m_r->getGlobalTimestamp());
-	unis.activeShadowLayerMask = m_runCtx.m_activeShadowLayerMask;
 	cmdb->setPushConstants(&unis, sizeof(unis));
 
 	dispatchPPCompute(cmdb, 8, 8, m_r->getWidth(), m_r->getHeight());
@@ -371,4 +364,32 @@ Bool RtShadows::findShadowLayer(U64 lightUuid, U32& layerIdx, Bool& rejectHistor
 	return layerIdx != MAX_U32;
 }
 
+void RtShadows::getDebugRenderTarget(CString rtName, RenderTargetHandle& handle,
+									 ShaderProgramPtr& optionalShaderProgram) const
+{
+	U32 layerGroup = 0;
+	if(rtName == "RtShadows")
+	{
+		layerGroup = 0;
+	}
+	else if(rtName == "RtShadows1")
+	{
+		layerGroup = 1;
+	}
+	else
+	{
+		ANKI_ASSERT(rtName == "RtShadows2");
+		layerGroup = 2;
+	}
+
+	handle = m_runCtx.m_historyAndFinalRt;
+
+	ShaderProgramResourceVariantInitInfo variantInit(m_visualizeRenderTargetsProg);
+	variantInit.addMutation("LAYER_GROUP", layerGroup);
+
+	const ShaderProgramResourceVariant* variant;
+	m_visualizeRenderTargetsProg->getOrCreateVariant(variantInit, variant);
+	optionalShaderProgram = variant->getProgram();
+}
+
 } // end namespace anki

+ 6 - 6
AnKi/Renderer/RtShadows.h

@@ -25,6 +25,8 @@ public:
 		: RendererObject(r)
 	{
 		registerDebugRenderTarget("RtShadows");
+		registerDebugRenderTarget("RtShadows1");
+		registerDebugRenderTarget("RtShadows2");
 	}
 
 	~RtShadows();
@@ -33,11 +35,8 @@ public:
 
 	void populateRenderGraph(RenderingContext& ctx);
 
-	void getDebugRenderTarget(CString rtName, RenderTargetHandle& handle) const override
-	{
-		ANKI_ASSERT(rtName == "RtShadows");
-		handle = m_runCtx.m_historyAndFinalRt;
-	}
+	void getDebugRenderTarget(CString rtName, RenderTargetHandle& handle,
+							  ShaderProgramPtr& optionalShaderProgram) const override;
 
 	RenderTargetHandle getRt() const
 	{
@@ -65,6 +64,8 @@ public:
 
 	Bool m_historyAndFinalRtImportedOnce = false;
 
+	ShaderProgramResourcePtr m_visualizeRenderTargetsProg;
+
 	class
 	{
 	public:
@@ -78,7 +79,6 @@ public:
 		U32 m_hitGroupCount = 0;
 
 		BitSet<MAX_RT_SHADOW_LAYERS, U8> m_layersWithRejectedHistory = {false};
-		U32 m_activeShadowLayerMask = 0;
 	} m_runCtx;
 
 	ANKI_USE_RESULT Error initInternal(const ConfigSet& cfg);

+ 2 - 1
AnKi/Renderer/ShadowmapsResolve.h

@@ -31,7 +31,8 @@ public:
 
 	void populateRenderGraph(RenderingContext& ctx);
 
-	void getDebugRenderTarget(CString rtName, RenderTargetHandle& handle) const override
+	void getDebugRenderTarget(CString rtName, RenderTargetHandle& handle,
+							  ShaderProgramPtr& optionalShaderProgram) const override
 	{
 		ANKI_ASSERT(rtName == "SM_resolve");
 		handle = m_runCtx.m_rt;

+ 2 - 1
AnKi/Renderer/Ssgi.h

@@ -37,7 +37,8 @@ public:
 		return m_runCtx.m_finalRt;
 	}
 
-	void getDebugRenderTarget(CString rtName, RenderTargetHandle& handle) const override
+	void getDebugRenderTarget(CString rtName, RenderTargetHandle& handle,
+							  ShaderProgramPtr& optionalShaderProgram) const override
 	{
 		ANKI_ASSERT(rtName == "SSGI");
 		handle = m_runCtx.m_finalRt;

+ 2 - 1
AnKi/Renderer/Ssr.h

@@ -58,7 +58,8 @@ private:
 
 	void run(RenderPassWorkContext& rgraphCtx);
 
-	void getDebugRenderTarget(CString rtName, RenderTargetHandle& handle) const override
+	void getDebugRenderTarget(CString rtName, RenderTargetHandle& handle,
+							  ShaderProgramPtr& optionalShaderProgram) const override
 	{
 		ANKI_ASSERT(rtName == "SSR");
 		handle = m_runCtx.m_rt;

+ 1 - 0
AnKi/ShaderCompiler/Glslang.cpp

@@ -12,6 +12,7 @@
 #	pragma GCC diagnostic ignored "-Wundef"
 #	pragma GCC diagnostic ignored "-Wconversion"
 #endif
+#define ENABLE_OPT 0
 #include <Glslang/glslang/Public/ShaderLang.h>
 #include <Glslang/SPIRV/GlslangToSpv.h>
 #include <Glslang/StandAlone/DirStackFileIncluder.h>

+ 1 - 1
AnKi/Shaders/BilateralFilter.glsl

@@ -56,7 +56,7 @@ F32 calculateBilateralWeight(BilateralSample center, BilateralSample tap, Bilate
 		const F32 lowDistanceThreshold2 = 0.001;
 
 		// Change in position in camera space
-		Vec3 dq = center.m_position - tap.m_position;
+		const Vec3 dq = center.m_position - tap.m_position;
 
 		// How far away is this point from the original sample in camera space? (Max value is unbounded)
 		const F32 distance2 = dot(dq, dq);

+ 0 - 8
AnKi/Shaders/FinalComposite.ankiprog

@@ -6,7 +6,6 @@
 #pragma anki mutator BLUE_NOISE 0 1
 #pragma anki mutator BLOOM_ENABLED 0 1
 #pragma anki mutator DBG_ENABLED 0 1
-#pragma anki mutator DBG_RENDER_TARGET_ENABLED 0 1
 
 ANKI_SPECIALIZATION_CONSTANT_U32(LUT_SIZE, 0, 1);
 ANKI_SPECIALIZATION_CONSTANT_UVEC2(FB_SIZE, 1, UVec2(1, 1));
@@ -39,9 +38,6 @@ layout(set = 0, binding = 9) uniform texture2D u_depthRt;
 #if DBG_ENABLED
 layout(set = 0, binding = 10) uniform texture2D u_dbgOutlineRt;
 #endif
-#if DBG_RENDER_TARGET_ENABLED
-layout(set = 0, binding = 11) uniform texture2D u_dbgRt;
-#endif
 
 layout(push_constant, row_major, std430) uniform pc_
 {
@@ -94,10 +90,6 @@ void main()
 	out_color += blueNoise / 255.0;
 #endif
 
-#if DBG_RENDER_TARGET_ENABLED
-	out_color = textureLod(u_dbgRt, u_linearAnyClampSampler, uv, 0.0).rgb;
-#endif
-
 #if DBG_ENABLED
 	const Vec4 dbg = textureLod(u_dbgOutlineRt, u_linearAnyClampSampler, uv, 0.0);
 	out_color = mix(out_color, dbg.rgb, dbg.a);

+ 1 - 4
AnKi/Shaders/Include/RtShadows.h

@@ -14,16 +14,13 @@ const U32 MAX_RT_SHADOW_LAYERS = 8u;
 struct RtShadowsUniforms
 {
 	F32 historyRejectFactor[MAX_RT_SHADOW_LAYERS]; // 1.0 means reject, 0.0 not reject
-	U32 activeShadowLayerMask;
-	U32 padding[3];
 };
 
 struct RtShadowsDenoiseUniforms
 {
 	Mat4 invViewProjMat;
 	F32 time;
-	U32 activeShadowLayerMask;
-	U32 padding[2];
+	U32 padding[3];
 };
 
 ANKI_END_NAMESPACE

+ 9 - 9
AnKi/Shaders/LightShading.ankiprog

@@ -35,7 +35,7 @@ void main()
 #pragma anki start frag
 #include <AnKi/Shaders/Pack.glsl>
 #include <AnKi/Shaders/Functions.glsl>
-#include <AnKi/Shaders/Include/RtShadows.h>
+#include <AnKi/Shaders/RtShadows.glsl>
 
 #define LIGHT_SET 0
 #define LIGHT_COMMON_UNIS_BINDING 0
@@ -56,7 +56,7 @@ layout(set = 0, binding = 17) uniform texture2D u_ssrRt;
 layout(set = 0, binding = 18) uniform texture2D u_ssaoRt;
 layout(set = 0, binding = 19) uniform texture2D u_ssgiRt;
 #if USE_SHADOW_LAYERS
-layout(set = 0, binding = 20) uniform texture3D u_shadowLayersTex;
+layout(set = 0, binding = 20) uniform utexture2D u_shadowLayersTex;
 #else
 layout(set = 0, binding = 21) uniform texture2D u_resolvedSm;
 #endif
@@ -108,7 +108,10 @@ void main()
 	gbuffer.m_subsurface = max(gbuffer.m_subsurface, SUBSURFACE_MIN);
 
 	// SM
-#if !USE_SHADOW_LAYERS
+#if USE_SHADOW_LAYERS
+	F32 resolvedSm[MAX_RT_SHADOW_LAYERS];
+	unpackRtShadows(textureLod(u_shadowLayersTex, u_nearestAnyClampSampler, in_uv, 0.0).rg, resolvedSm);
+#else
 	Vec4 resolvedSm = textureLod(u_resolvedSm, u_trilinearClampSampler, in_uv, 0.0);
 	U32 resolvedSmIdx = 0;
 #endif
@@ -128,8 +131,7 @@ void main()
 		if(u_dirLight.m_cascadeCount > 0)
 		{
 #if USE_SHADOW_LAYERS
-			const F32 w = (F32(u_dirLight.m_shadowLayer) + 0.5) / F32(MAX_RT_SHADOW_LAYERS);
-			shadowFactor = textureLod(u_shadowLayersTex, u_trilinearClampSampler, Vec3(in_uv, w), 0.0).r;
+			shadowFactor = resolvedSm[u_dirLight.m_shadowLayer];
 #else
 			shadowFactor = resolvedSm[0];
 			++resolvedSmIdx;
@@ -161,8 +163,7 @@ void main()
 		ANKI_BRANCH if(light.m_shadowAtlasTileScale >= 0.0)
 		{
 #if USE_SHADOW_LAYERS
-			const F32 w = (F32(light.m_shadowLayer) + 0.5) / F32(MAX_RT_SHADOW_LAYERS);
-			const F32 shadow = textureLod(u_shadowLayersTex, u_trilinearClampSampler, Vec3(in_uv, w), 0.0).r;
+			const F32 shadow = resolvedSm[light.m_shadowLayer];
 #else
 			const F32 shadow = resolvedSm[resolvedSmIdx++];
 #endif
@@ -185,8 +186,7 @@ void main()
 		ANKI_BRANCH if(shadowmapLayerIdx >= 0.0)
 		{
 #if USE_SHADOW_LAYERS
-			const F32 w = (F32(light.m_shadowLayer) + 0.5) / F32(MAX_RT_SHADOW_LAYERS);
-			const F32 shadow = textureLod(u_shadowLayersTex, u_trilinearClampSampler, Vec3(in_uv, w), 0.0).r;
+			const F32 shadow = resolvedSm[light.m_shadowLayer];
 #else
 			const F32 shadow = resolvedSm[resolvedSmIdx++];
 #endif

+ 30 - 0
AnKi/Shaders/RtShadows.glsl

@@ -0,0 +1,30 @@
+// Copyright (C) 2009-2021, Panagiotis Christopoulos Charitos and contributors.
+// All rights reserved.
+// Code licensed under the BSD License.
+// http://www.anki3d.org/LICENSE
+
+#pragma once
+
+#include <AnKi/Shaders/Include/RtShadows.h>
+#include <AnKi/Shaders/Common.glsl>
+
+UVec2 packRtShadows(F32 shadowFactors[MAX_RT_SHADOW_LAYERS])
+{
+	const U32 a = packUnorm4x8(Vec4(shadowFactors[0], shadowFactors[1], shadowFactors[2], shadowFactors[3]));
+	const U32 b = packUnorm4x8(Vec4(shadowFactors[4], shadowFactors[5], shadowFactors[6], shadowFactors[7]));
+	return UVec2(a, b);
+}
+
+void unpackRtShadows(UVec2 packed, out F32 shadowFactors[MAX_RT_SHADOW_LAYERS])
+{
+	const Vec4 a = unpackUnorm4x8(packed.x);
+	const Vec4 b = unpackUnorm4x8(packed.y);
+	shadowFactors[0] = a[0];
+	shadowFactors[1] = a[1];
+	shadowFactors[2] = a[2];
+	shadowFactors[3] = a[3];
+	shadowFactors[4] = b[0];
+	shadowFactors[5] = b[1];
+	shadowFactors[6] = b[2];
+	shadowFactors[7] = b[3];
+}

+ 16 - 24
AnKi/Shaders/RtShadowsDenoise.ankiprog

@@ -12,16 +12,16 @@ ANKI_SPECIALIZATION_CONSTANT_U32(SPIRAL_TURN_COUNT, 4, 1);
 
 #include <AnKi/Shaders/BilateralFilter.glsl>
 #include <AnKi/Shaders/Pack.glsl>
-#include <AnKi/Shaders/Include/RtShadows.h>
+#include <AnKi/Shaders/RtShadows.glsl>
 
 const UVec2 WORKGROUP_SIZE = UVec2(8u, 8u);
 layout(local_size_x = WORKGROUP_SIZE.x, local_size_y = WORKGROUP_SIZE.y, local_size_z = 1) in;
 
-layout(set = 0, binding = 0) uniform sampler u_linearAnyClampSampler;
-layout(set = 0, binding = 1) uniform texture3D u_inTex;
+layout(set = 0, binding = 0) uniform sampler u_nearestAnyClampSampler;
+layout(set = 0, binding = 1) uniform utexture2D u_inTex;
 layout(set = 0, binding = 2) uniform texture2D u_depthTex;
 layout(set = 0, binding = 3) uniform texture2D u_gbuffer2Tex;
-layout(set = 0, binding = 4) writeonly uniform image3D u_outImg;
+layout(set = 0, binding = 4) writeonly uniform uimage2D u_outImg;
 
 layout(std430, push_constant, row_major) uniform b_pc
 {
@@ -48,21 +48,13 @@ void main()
 
 	// Reference
 	F32 shadowFactors[MAX_RT_SHADOW_LAYERS];
-	ANKI_UNROLL for(U32 i = 0; i < MAX_RT_SHADOW_LAYERS; ++i)
-	{
-		if((u_unis.activeShadowLayerMask & (1 << i)) != 0)
-		{
-			const F32 w = (F32(i) + 0.5) / F32(MAX_RT_SHADOW_LAYERS);
-			shadowFactors[i] = textureLod(u_inTex, u_linearAnyClampSampler, Vec3(uv, w), 0.0).r;
-		}
-	}
-
+	unpackRtShadows(textureLod(u_inTex, u_nearestAnyClampSampler, uv, 0.0).rg, shadowFactors);
 	F32 weight = 1.0;
 
 	BilateralSample ref;
-	ref.m_depth = textureLod(u_depthTex, u_linearAnyClampSampler, uv, 0.0).r;
+	ref.m_depth = textureLod(u_depthTex, u_nearestAnyClampSampler, uv, 0.0).r;
 	ref.m_position = unproject(UV_TO_NDC(uv), ref.m_depth);
-	ref.m_normal = readNormalFromGBuffer(u_gbuffer2Tex, u_linearAnyClampSampler, uv);
+	ref.m_normal = readNormalFromGBuffer(u_gbuffer2Tex, u_nearestAnyClampSampler, uv);
 
 	// Sample
 	SpatialBilateralContext ctx =
@@ -79,7 +71,7 @@ void main()
 		crnt.m_normal = unpackNormalFromGBuffer(texelFetch(u_gbuffer2Tex, unormalizedUvs, 0));
 
 		BilateralConfig config;
-		const Vec3 weights = normalize(Vec3(0.0, 1.0, 0.0));
+		const Vec3 weights = normalize(Vec3(0.0, 1.0, 1.0));
 		config.m_depthWeight = weights.x;
 		config.m_normalWeight = weights.y;
 		config.m_planeWeight = weights.z;
@@ -87,23 +79,23 @@ void main()
 		const F32 w = calculateBilateralWeight(crnt, ref, config);
 		weight += w;
 
+		F32 localShadowFactors[MAX_RT_SHADOW_LAYERS];
+		unpackRtShadows(texelFetch(u_inTex, IVec2(unormalizedUvs / 2), 0).rg, localShadowFactors);
+
 		ANKI_UNROLL for(U32 i = 0; i < MAX_RT_SHADOW_LAYERS; ++i)
 		{
-			if((u_unis.activeShadowLayerMask & (1 << i)) != 0)
-			{
-				shadowFactors[i] += texelFetch(u_inTex, IVec3(unormalizedUvs / 2, i), 0).r * w;
-			}
+			shadowFactors[i] += localShadowFactors[i] * w;
 		}
 	}
 
 	// Write value
 	ANKI_UNROLL for(U32 i = 0; i < MAX_RT_SHADOW_LAYERS; ++i)
 	{
-		if((u_unis.activeShadowLayerMask & (1 << i)) != 0)
-		{
-			imageStore(u_outImg, IVec3(gl_GlobalInvocationID.xy, i), Vec4(shadowFactors[i] / weight));
-		}
+		shadowFactors[i] /= weight;
 	}
+
+	const UVec2 packed = packRtShadows(shadowFactors);
+	imageStore(u_outImg, IVec2(gl_GlobalInvocationID.xy), UVec4(packed, 0, 0));
 }
 
 #pragma anki end

+ 26 - 61
AnKi/Shaders/RtShadowsRayGen.ankiprog

@@ -10,22 +10,23 @@
 
 #include <AnKi/Shaders/ImportanceSampling.glsl>
 #include <AnKi/Shaders/Pack.glsl>
-#include <AnKi/Shaders/Include/RtShadows.h>
+#include <AnKi/Shaders/RtShadows.glsl>
 
 layout(set = 0, binding = 0) uniform sampler u_trilinearRepeatSampler; // Used by the hit shaders
-layout(set = 0, binding = 1) uniform image3D u_outImg;
-layout(set = 0, binding = 2) uniform texture3D u_historyRt;
+layout(set = 0, binding = 1) uniform uimage2D u_outImg;
+layout(set = 0, binding = 2) uniform utexture2D u_historyRt;
 layout(set = 0, binding = 3) uniform sampler u_linearAnyClampSampler;
-layout(set = 0, binding = 4) uniform texture2D u_depthRt;
-layout(set = 0, binding = 5) uniform texture2D u_motionVectorsRt;
-layout(set = 0, binding = 6) uniform texture2D u_motionVectorsRejectionRt;
-layout(set = 0, binding = 7) uniform texture2D u_normalRt;
-layout(set = 0, binding = 8) uniform accelerationStructureEXT u_tlas;
+layout(set = 0, binding = 4) uniform sampler u_nearestAnyClampSampler;
+layout(set = 0, binding = 5) uniform texture2D u_depthRt;
+layout(set = 0, binding = 6) uniform texture2D u_motionVectorsRt;
+layout(set = 0, binding = 7) uniform texture2D u_motionVectorsRejectionRt;
+layout(set = 0, binding = 8) uniform texture2D u_normalRt;
+layout(set = 0, binding = 9) uniform accelerationStructureEXT u_tlas;
 
 #define LIGHT_SET 0
-#define LIGHT_COMMON_UNIS_BINDING 9
-#define LIGHT_LIGHTS_BINDING 10
-#define LIGHT_CLUSTERS_BINDING 13
+#define LIGHT_COMMON_UNIS_BINDING 10
+#define LIGHT_LIGHTS_BINDING 11
+#define LIGHT_CLUSTERS_BINDING 14
 #include <AnKi/Shaders/ClusteredShadingCommon.glsl>
 
 ANKI_BINDLESS_SET(1); // Used by the hit shaders
@@ -64,13 +65,7 @@ void main()
 
 	if(depth == 1.0)
 	{
-		ANKI_UNROLL for(U32 i = 0; i < MAX_RT_SHADOW_LAYERS; ++i)
-		{
-			if((u_unis.activeShadowLayerMask & (1 << i)) != 0)
-			{
-				imageStore(u_outImg, IVec3(gl_LaunchIDEXT.xy, i), Vec4(0.0));
-			}
-		}
+		imageStore(u_outImg, IVec2(gl_LaunchIDEXT.xy), UVec4(0));
 		return;
 	}
 
@@ -85,7 +80,7 @@ void main()
 	F32 shadowFactors[MAX_RT_SHADOW_LAYERS];
 	ANKI_UNROLL for(U32 i = 0; i < MAX_RT_SHADOW_LAYERS; ++i)
 	{
-		shadowFactors[i] = -1.0; // -1.0 means that this layer's pixel is outside the influence of light
+		shadowFactors[i] = 0.0;
 	}
 
 	// Get a random factor
@@ -99,17 +94,10 @@ void main()
 		const Vec3 rayDir = normalize(dirLightPos - worldPos);
 
 		const F32 lambertTerm = dot(rayDir, normal);
-		F32 shadowFactor;
 		ANKI_BRANCH if(lambertTerm > 0.0)
 		{
-			shadowFactor = trace(worldPos, rayDir, 10000.0);
-		}
-		else
-		{
-			shadowFactor = 0.0;
+			shadowFactors[u_dirLight.m_shadowLayer] = trace(worldPos, rayDir, 10000.0);
 		}
-
-		shadowFactors[u_dirLight.m_shadowLayer] = shadowFactor;
 	}
 
 	// Point lights
@@ -126,17 +114,10 @@ void main()
 			const Vec3 rayDir = toLight / distanceToLight; // normalize
 
 			const F32 lambertTerm = dot(rayDir, normal);
-			F32 shadowFactor;
 			ANKI_BRANCH if(lambertTerm > 0.0)
 			{
-				shadowFactor = trace(worldPos, rayDir, distanceToLight);
-			}
-			else
-			{
-				shadowFactor = 0.0;
+				shadowFactors[light.m_shadowLayer] = trace(worldPos, rayDir, distanceToLight);
 			}
-
-			shadowFactors[light.m_shadowLayer] = shadowFactor;
 		}
 	}
 
@@ -153,17 +134,10 @@ void main()
 			const Vec3 rayDir = toLight / distanceToLight; // normalize
 
 			const F32 lambertTerm = dot(rayDir, normal);
-			F32 shadowFactor;
 			ANKI_BRANCH if(lambertTerm > 0.0)
 			{
-				shadowFactor = trace(worldPos, rayDir, distanceToLight);
-			}
-			else
-			{
-				shadowFactor = 0.0;
+				shadowFactors[light.m_shadowLayer] = trace(worldPos, rayDir, distanceToLight);
 			}
-
-			shadowFactors[light.m_shadowLayer] = shadowFactor;
 		}
 	}
 
@@ -172,33 +146,24 @@ void main()
 		const Vec2 historyUv = uv + textureLod(u_motionVectorsRt, u_linearAnyClampSampler, uv, 0.0).rg;
 		const F32 rejectionFactor = textureLod(u_motionVectorsRejectionRt, u_linearAnyClampSampler, uv, 0.0).r;
 
+		// Use nearest because it's an integer texture
+		const UVec2 history2 = textureLod(u_historyRt, u_nearestAnyClampSampler, historyUv, 0.0).rg;
+		F32 history[MAX_RT_SHADOW_LAYERS];
+		unpackRtShadows(history2, history);
+
 		// Compute blend factors
 		const F32 nominalBlendFactor = 0.1;
 		const F32 blendFactor = mix(nominalBlendFactor, 1.0, rejectionFactor);
 
 		// Blend with history
-		for(U32 i = 0; i < MAX_RT_SHADOW_LAYERS; ++i)
+		ANKI_UNROLL for(U32 i = 0; i < MAX_RT_SHADOW_LAYERS; ++i)
 		{
-			ANKI_BRANCH if(shadowFactors[i] != -1.0)
-			{
-				// Inside the area of influence of the light and it's an active layer, blend with history
-
-				const F32 w = (F32(i) + 0.5) / F32(MAX_RT_SHADOW_LAYERS);
-				const F32 history = textureLod(u_historyRt, u_linearAnyClampSampler, Vec3(historyUv, w), 0.0).r;
-				shadowFactors[i] =
-					mix(history, shadowFactors[i], min(1.0, u_unis.historyRejectFactor[i] + blendFactor));
-			}
+			shadowFactors[i] = mix(history[i], shadowFactors[i], min(1.0, u_unis.historyRejectFactor[i] + blendFactor));
 		}
 	}
 
 	// Store
-	ANKI_UNROLL for(U32 i = 0; i < MAX_RT_SHADOW_LAYERS; ++i)
-	{
-		if((u_unis.activeShadowLayerMask & (1 << i)) != 0)
-		{
-			const F32 storeValue = max(shadowFactors[i], 0.0);
-			imageStore(u_outImg, IVec3(gl_LaunchIDEXT.xy, i), Vec4(storeValue));
-		}
-	}
+	const UVec2 packed = packRtShadows(shadowFactors);
+	imageStore(u_outImg, IVec2(gl_LaunchIDEXT.xy), UVec4(packed, 0, 0));
 }
 #pragma anki end

+ 34 - 0
AnKi/Shaders/RtShadowsVisualizeRenderTarget.ankiprog

@@ -0,0 +1,34 @@
+// Copyright (C) 2009-2021, Panagiotis Christopoulos Charitos and contributors.
+// All rights reserved.
+// Code licensed under the BSD License.
+// http://www.anki3d.org/LICENSE
+
+#pragma anki mutator LAYER_GROUP 0 1 2
+
+#pragma anki start vert
+#include <AnKi/Shaders/QuadVert.glsl>
+#pragma anki end
+
+#pragma anki start frag
+#include <AnKi/Shaders/RtShadows.glsl>
+
+layout(set = 0, binding = 0) uniform utexture2D u_inTex;
+layout(set = 0, binding = 1) uniform sampler u_nearestAnyClampSampler;
+
+layout(location = 0) in Vec2 in_uv;
+layout(location = 0) out Vec3 out_color;
+
+void main()
+{
+	F32 shadowFactors[MAX_RT_SHADOW_LAYERS];
+	unpackRtShadows(textureLod(u_inTex, u_nearestAnyClampSampler, in_uv, 0.0).rg, shadowFactors);
+
+#if LAYER_GROUP == 0
+	out_color = Vec3(shadowFactors[0], shadowFactors[1], shadowFactors[2]);
+#elif LAYER_GROUP == 1
+	out_color = Vec3(shadowFactors[3], shadowFactors[4], shadowFactors[5]);
+#else
+	out_color = Vec3(shadowFactors[6], shadowFactors[7], 0.0);
+#endif
+}
+#pragma anki end

+ 23 - 0
AnKi/Shaders/VisualizeRenderTarget.ankiprog

@@ -0,0 +1,23 @@
+// Copyright (C) 2009-2021, Panagiotis Christopoulos Charitos and contributors.
+// All rights reserved.
+// Code licensed under the BSD License.
+// http://www.anki3d.org/LICENSE
+
+#pragma anki start vert
+#include <AnKi/Shaders/QuadVert.glsl>
+#pragma anki end
+
+#pragma anki start frag
+#include <AnKi/Shaders/Common.glsl>
+
+layout(set = 0, binding = 0) uniform texture2D u_inTex;
+layout(set = 0, binding = 1) uniform sampler u_nearestAnyClampSampler;
+
+layout(location = 0) in Vec2 in_uv;
+layout(location = 0) out Vec3 out_color;
+
+void main()
+{
+	out_color = textureLod(u_inTex, u_nearestAnyClampSampler, in_uv, 0.0).rgb;
+}
+#pragma anki end

+ 2 - 2
README.md

@@ -18,9 +18,9 @@ See the [LICENSE](LICENSE) file for more info.
 Build Status, Linux and Windows
 [![Build Status](https://travis-ci.org/godlikepanos/anki-3d-engine.svg?branch=master)](https://travis-ci.org/godlikepanos/anki-3d-engine)
 
-To checkout the source including the submodules type:
+To checkout the source type:
 
-	git clone --recurse-submodules https://github.com/godlikepanos/anki-3d-engine.git anki
+	git clone https://github.com/godlikepanos/anki-3d-engine.git anki
 
 AnKi's build system is using `CMake`. A great effort was made to ease the building process that's why the number of
 external dependencies are almost none.

+ 21 - 4
Samples/Common/Framework.cpp

@@ -43,8 +43,8 @@ Error SampleApp::init(int argc, char** argv, CString sampleName)
 
 Error SampleApp::userMainLoop(Bool& quit, Second elapsedTime)
 {
-	const F32 ROTATE_ANGLE = toRad(2.5f);
-	const F32 MOUSE_SENSITIVITY = 5.0f;
+	constexpr F32 ROTATE_ANGLE = toRad(2.5f);
+	constexpr F32 MOUSE_SENSITIVITY = 5.0f;
 	quit = false;
 
 	SceneGraph& scene = getSceneGraph();
@@ -92,8 +92,25 @@ Error SampleApp::userMainLoop(Bool& quit, Second elapsedTime)
 
 	if(in.getKey(KeyCode::H) == 1)
 	{
-		renderer.setCurrentDebugRenderTarget((renderer.getCurrentDebugRenderTarget() == "RtShadows") ? ""
-																									 : "RtShadows");
+		static U32 pressCount = 0;
+		CString rtName;
+		switch(pressCount)
+		{
+		case 0:
+			rtName = "RtShadows";
+			break;
+		case 1:
+			rtName = "RtShadows1";
+			break;
+		case 2:
+			rtName = "RtShadows2";
+			break;
+		default:
+			rtName = "";
+		}
+		renderer.setCurrentDebugRenderTarget(rtName);
+
+		pressCount = (pressCount + 1) % 4;
 	}
 
 	if(in.getKey(KeyCode::J) == 1)