Browse Source

Add bent normals

Panagiotis Christopoulos Charitos 1 year ago
parent
commit
dfcbd4d177

+ 1 - 0
AnKi/Math/Transform.h

@@ -243,6 +243,7 @@ private:
 	{
 		ANKI_ASSERT(m_origin.w() == T(0));
 		using TT = TVec<T, 3>;
+		[[maybe_unused]] TT t; // Shut up the compiler regarding TT
 		ANKI_ASSERT(m_scale.w() == T(0) && m_scale.xyz() > TT(T(0)));
 	}
 };

+ 7 - 7
AnKi/Renderer/IndirectDiffuseProbes.cpp

@@ -213,7 +213,7 @@ void IndirectDiffuseProbes::populateRenderGraph(RenderingContext& rctx)
 				Array<F32, kMaxLodCount - 1> lodDistances = {g_lod0MaxDistanceCVar.get(), g_lod1MaxDistanceCVar.get()};
 
 				FrustumGpuVisibilityInput visIn;
-				visIn.m_passesName = computeTempPassName("GI: GBuffer", cellIdx, "face", f);
+				visIn.m_passesName = generateTempPassName("GI: GBuffer", cellIdx, "face", f);
 				visIn.m_technique = RenderingTechnique::kGBuffer;
 				visIn.m_viewProjectionMatrix = frustum.getViewProjectionMatrix();
 				visIn.m_lodReferencePoint = cellCenter;
@@ -254,7 +254,7 @@ void IndirectDiffuseProbes::populateRenderGraph(RenderingContext& rctx)
 				fbDescr.bake();
 
 				// Create the pass
-				GraphicsRenderPassDescription& pass = rgraph.newGraphicsRenderPass(computeTempPassName("GI: GBuffer", cellIdx, "face", f));
+				GraphicsRenderPassDescription& pass = rgraph.newGraphicsRenderPass(generateTempPassName("GI: GBuffer", cellIdx, "face", f));
 				pass.setFramebufferInfo(fbDescr, gbufferColorRts, gbufferDepthRt);
 
 				for(U i = 0; i < kGBufferColorRenderTargetCount; ++i)
@@ -311,7 +311,7 @@ void IndirectDiffuseProbes::populateRenderGraph(RenderingContext& rctx)
 				Array<F32, kMaxLodCount - 1> lodDistances = {g_lod0MaxDistanceCVar.get(), g_lod1MaxDistanceCVar.get()};
 
 				FrustumGpuVisibilityInput visIn;
-				visIn.m_passesName = computeTempPassName("GI: Shadows", cellIdx, "face", f);
+				visIn.m_passesName = generateTempPassName("GI: Shadows", cellIdx, "face", f);
 				visIn.m_technique = RenderingTechnique::kDepth;
 				visIn.m_viewProjectionMatrix = cascadeViewProjMat;
 				visIn.m_lodReferencePoint = cellCenter;
@@ -340,7 +340,7 @@ void IndirectDiffuseProbes::populateRenderGraph(RenderingContext& rctx)
 			if(doShadows)
 			{
 				// Create the pass
-				GraphicsRenderPassDescription& pass = rgraph.newGraphicsRenderPass(computeTempPassName("GI: Shadows", cellIdx, "face", f));
+				GraphicsRenderPassDescription& pass = rgraph.newGraphicsRenderPass(generateTempPassName("GI: Shadows", cellIdx, "face", f));
 				pass.setFramebufferInfo(m_shadowMapping.m_fbDescr, {}, shadowsRt);
 
 				pass.newTextureDependency(shadowsRt, TextureUsageBit::kAllFramebuffer, TextureSubresourceInfo(DepthStencilAspectBit::kDepth));
@@ -381,7 +381,7 @@ void IndirectDiffuseProbes::populateRenderGraph(RenderingContext& rctx)
 			GpuVisibilityNonRenderablesOutput lightVis;
 			{
 				GpuVisibilityNonRenderablesInput in;
-				in.m_passesName = computeTempPassName("GI: Light visibility", cellIdx, "face", f);
+				in.m_passesName = generateTempPassName("GI: Light visibility", cellIdx, "face", f);
 				in.m_objectType = GpuSceneNonRenderableObjectType::kLight;
 				in.m_viewProjectionMat = frustum.getViewProjectionMatrix();
 				in.m_rgraph = &rgraph;
@@ -398,7 +398,7 @@ void IndirectDiffuseProbes::populateRenderGraph(RenderingContext& rctx)
 				fbDescr.bake();
 
 				// Create the pass
-				GraphicsRenderPassDescription& pass = rgraph.newGraphicsRenderPass(computeTempPassName("GI: Light shading", cellIdx, "face", f));
+				GraphicsRenderPassDescription& pass = rgraph.newGraphicsRenderPass(generateTempPassName("GI: Light shading", cellIdx, "face", f));
 				pass.setFramebufferInfo(fbDescr, {lightShadingRt});
 
 				pass.newBufferDependency(lightVis.m_visiblesBufferHandle, BufferUsageBit::kUavFragmentRead);
@@ -466,7 +466,7 @@ void IndirectDiffuseProbes::populateRenderGraph(RenderingContext& rctx)
 
 		// Irradiance pass. First & 2nd bounce
 		{
-			ComputeRenderPassDescription& pass = rgraph.newComputeRenderPass(computeTempPassName("GI: Irradiance", cellIdx));
+			ComputeRenderPassDescription& pass = rgraph.newComputeRenderPass(generateTempPassName("GI: Irradiance", cellIdx));
 
 			pass.newTextureDependency(lightShadingRt, TextureUsageBit::kSampledCompute);
 			pass.newTextureDependency(irradianceVolume, TextureUsageBit::kUavComputeWrite);

+ 7 - 7
AnKi/Renderer/ProbeReflections.cpp

@@ -212,7 +212,7 @@ void ProbeReflections::populateRenderGraph(RenderingContext& rctx)
 			Array<F32, kMaxLodCount - 1> lodDistances = {g_lod0MaxDistanceCVar.get(), g_lod1MaxDistanceCVar.get()};
 
 			FrustumGpuVisibilityInput visIn;
-			visIn.m_passesName = computeTempPassName("Cube refl: GBuffer", f);
+			visIn.m_passesName = generateTempPassName("Cube refl: GBuffer", f);
 			visIn.m_technique = RenderingTechnique::kGBuffer;
 			visIn.m_viewProjectionMatrix = frustum.getViewProjectionMatrix();
 			visIn.m_lodReferencePoint = probeToRefresh->getWorldPosition();
@@ -253,7 +253,7 @@ void ProbeReflections::populateRenderGraph(RenderingContext& rctx)
 			fbDescr.bake();
 
 			// Create pass
-			GraphicsRenderPassDescription& pass = rgraph.newGraphicsRenderPass(computeTempPassName("Cube refl: GBuffer", f));
+			GraphicsRenderPassDescription& pass = rgraph.newGraphicsRenderPass(generateTempPassName("Cube refl: GBuffer", f));
 			pass.setFramebufferInfo(fbDescr, gbufferColorRts, gbufferDepthRt);
 
 			for(U i = 0; i < kGBufferColorRenderTargetCount; ++i)
@@ -307,7 +307,7 @@ void ProbeReflections::populateRenderGraph(RenderingContext& rctx)
 			Array<F32, kMaxLodCount - 1> lodDistances = {g_lod0MaxDistanceCVar.get(), g_lod1MaxDistanceCVar.get()};
 
 			FrustumGpuVisibilityInput visIn;
-			visIn.m_passesName = computeTempPassName("Cube refl: Shadows", f);
+			visIn.m_passesName = generateTempPassName("Cube refl: Shadows", f);
 			visIn.m_technique = RenderingTechnique::kDepth;
 			visIn.m_viewProjectionMatrix = cascadeViewProjMat;
 			visIn.m_lodReferencePoint = probeToRefresh->getWorldPosition();
@@ -336,7 +336,7 @@ void ProbeReflections::populateRenderGraph(RenderingContext& rctx)
 		if(doShadows)
 		{
 			// Pass
-			GraphicsRenderPassDescription& pass = rgraph.newGraphicsRenderPass(computeTempPassName("Cube refl: Shadows", f));
+			GraphicsRenderPassDescription& pass = rgraph.newGraphicsRenderPass(generateTempPassName("Cube refl: Shadows", f));
 			pass.setFramebufferInfo(m_shadowMapping.m_fbDescr, {}, shadowMapRt);
 
 			pass.newTextureDependency(shadowMapRt, TextureUsageBit::kAllFramebuffer, TextureSubresourceInfo(DepthStencilAspectBit::kDepth));
@@ -375,7 +375,7 @@ void ProbeReflections::populateRenderGraph(RenderingContext& rctx)
 		GpuVisibilityNonRenderablesOutput lightVis;
 		{
 			GpuVisibilityNonRenderablesInput in;
-			in.m_passesName = computeTempPassName("Cube refl: Light visibility", f);
+			in.m_passesName = generateTempPassName("Cube refl: Light visibility", f);
 			in.m_objectType = GpuSceneNonRenderableObjectType::kLight;
 			in.m_viewProjectionMat = frustum.getViewProjectionMatrix();
 			in.m_rgraph = &rgraph;
@@ -392,7 +392,7 @@ void ProbeReflections::populateRenderGraph(RenderingContext& rctx)
 			fbDescr.bake();
 
 			// Pass
-			GraphicsRenderPassDescription& pass = rgraph.newGraphicsRenderPass(computeTempPassName("Cube refl: light shading", f));
+			GraphicsRenderPassDescription& pass = rgraph.newGraphicsRenderPass(generateTempPassName("Cube refl: light shading", f));
 			pass.setFramebufferInfo(fbDescr, {probeTexture});
 
 			pass.newBufferDependency(lightVis.m_visiblesBufferHandle, BufferUsageBit::kUavFragmentRead);
@@ -514,7 +514,7 @@ void ProbeReflections::populateRenderGraph(RenderingContext& rctx)
 	{
 		for(U32 faceIdx = 0; faceIdx < 6; ++faceIdx)
 		{
-			GraphicsRenderPassDescription& pass = rgraph.newGraphicsRenderPass(computeTempPassName("Cube refl: Gen mips", faceIdx));
+			GraphicsRenderPassDescription& pass = rgraph.newGraphicsRenderPass(generateTempPassName("Cube refl: Gen mips", faceIdx));
 
 			TextureSubresourceInfo subresource(TextureSurfaceInfo(0, 0, faceIdx, 0));
 			subresource.m_mipmapCount = m_lightShading.m_mipCount;

+ 2 - 2
AnKi/Renderer/RendererObject.cpp

@@ -109,14 +109,14 @@ void RendererObject::zeroBuffer(Buffer* buff)
 	fence->clientWait(16.0_sec);
 }
 
-CString RendererObject::computeTempPassName(CString name, U32 index)
+CString RendererObject::generateTempPassName(CString name, U32 index)
 {
 	Char* str = static_cast<Char*>(getRenderer().getFrameMemoryPool().allocate(128, 1));
 	snprintf(str, 128, "%s #%u", name.cstr(), index);
 	return str;
 }
 
-CString RendererObject::computeTempPassName(CString name, U32 index, CString name2, U32 index2)
+CString RendererObject::generateTempPassName(CString name, U32 index, CString name2, U32 index2)
 {
 	Char* str = static_cast<Char*>(getRenderer().getFrameMemoryPool().allocate(128, 1));
 	snprintf(str, 128, "%s #%u %s #%u", name.cstr(), index, name2.cstr(), index2);

+ 2 - 2
AnKi/Renderer/RendererObject.h

@@ -102,9 +102,9 @@ protected:
 	static void zeroBuffer(Buffer* buff);
 
 	/// Temp pass name.
-	static CString computeTempPassName(CString name, U32 index);
+	static CString generateTempPassName(CString name, U32 index);
 
-	static CString computeTempPassName(CString name, U32 index, CString name2, U32 index2);
+	static CString generateTempPassName(CString name, U32 index, CString name2, U32 index2);
 };
 /// @}
 

+ 11 - 11
AnKi/Renderer/ShadowMapping.cpp

@@ -360,7 +360,7 @@ void ShadowMapping::processLights(RenderingContext& ctx)
 			// Vis testing
 			const Array<F32, kMaxLodCount - 1> lodDistances = {g_lod0MaxDistanceCVar.get(), g_lod1MaxDistanceCVar.get()};
 			DistanceGpuVisibilityInput visIn;
-			visIn.m_passesName = computeTempPassName("Shadows point light", lightIdx);
+			visIn.m_passesName = generateTempPassName("Shadows point light", lightIdx);
 			visIn.m_technique = RenderingTechnique::kDepth;
 			visIn.m_lodReferencePoint = ctx.m_matrices.m_cameraTransform.getTranslationPart().xyz();
 			visIn.m_lodDistances = lodDistances;
@@ -377,7 +377,7 @@ void ShadowMapping::processLights(RenderingContext& ctx)
 			BufferOffsetRange clearTileIndirectArgs;
 			if(!renderAllways)
 			{
-				clearTileIndirectArgs = createVetVisibilityPass(computeTempPassName("Shadows: Vet point light", lightIdx), *lightc, visOut, rgraph);
+				clearTileIndirectArgs = createVetVisibilityPass(generateTempPassName("Shadows: Vet point light", lightIdx), *lightc, visOut, rgraph);
 			}
 
 			// Add additional visibility and draw passes
@@ -394,7 +394,7 @@ void ShadowMapping::processLights(RenderingContext& ctx)
 				if(getRenderer().runSoftwareMeshletRendering())
 				{
 					GpuMeshletVisibilityInput meshIn;
-					meshIn.m_passesName = computeTempPassName("Shadows point light", lightIdx, "face", face);
+					meshIn.m_passesName = generateTempPassName("Shadows point light", lightIdx, "face", face);
 					meshIn.m_technique = RenderingTechnique::kDepth;
 					meshIn.m_viewProjectionMatrix = frustum.getViewProjectionMatrix();
 					meshIn.m_cameraTransform = frustum.getViewMatrix().getInverseTransformation();
@@ -406,7 +406,7 @@ void ShadowMapping::processLights(RenderingContext& ctx)
 				}
 
 				createDrawShadowsPass(atlasViewports[face], frustum.getViewProjectionMatrix(), frustum.getViewMatrix(), visOut, meshletVisOut,
-									  clearTileIndirectArgs, {}, computeTempPassName("Shadows: Point light", lightIdx, "face", face), rgraph);
+									  clearTileIndirectArgs, {}, generateTempPassName("Shadows: Point light", lightIdx, "face", face), rgraph);
 			}
 		}
 		else
@@ -447,7 +447,7 @@ void ShadowMapping::processLights(RenderingContext& ctx)
 			// Vis testing
 			const Array<F32, kMaxLodCount - 1> lodDistances = {g_lod0MaxDistanceCVar.get(), g_lod1MaxDistanceCVar.get()};
 			FrustumGpuVisibilityInput visIn;
-			visIn.m_passesName = computeTempPassName("Shadows spot light", lightIdx);
+			visIn.m_passesName = generateTempPassName("Shadows spot light", lightIdx);
 			visIn.m_technique = RenderingTechnique::kDepth;
 			visIn.m_lodReferencePoint = cameraOrigin;
 			visIn.m_lodDistances = lodDistances;
@@ -464,7 +464,7 @@ void ShadowMapping::processLights(RenderingContext& ctx)
 			BufferOffsetRange clearTileIndirectArgs;
 			if(!renderAllways)
 			{
-				clearTileIndirectArgs = createVetVisibilityPass(computeTempPassName("Shadows: Vet spot light", lightIdx), *lightc, visOut, rgraph);
+				clearTileIndirectArgs = createVetVisibilityPass(generateTempPassName("Shadows: Vet spot light", lightIdx), *lightc, visOut, rgraph);
 			}
 
 			// Additional visibility
@@ -472,7 +472,7 @@ void ShadowMapping::processLights(RenderingContext& ctx)
 			if(getRenderer().runSoftwareMeshletRendering())
 			{
 				GpuMeshletVisibilityInput meshIn;
-				meshIn.m_passesName = computeTempPassName("Shadows spot light", lightIdx);
+				meshIn.m_passesName = generateTempPassName("Shadows spot light", lightIdx);
 				meshIn.m_technique = RenderingTechnique::kDepth;
 				meshIn.m_viewProjectionMatrix = lightc->getSpotLightViewProjectionMatrix();
 				meshIn.m_cameraTransform = lightc->getSpotLightViewMatrix().getInverseTransformation();
@@ -485,7 +485,7 @@ void ShadowMapping::processLights(RenderingContext& ctx)
 
 			// Add draw pass
 			createDrawShadowsPass(atlasViewport, lightc->getSpotLightViewProjectionMatrix(), lightc->getSpotLightViewMatrix(), visOut, meshletVisOut,
-								  clearTileIndirectArgs, {}, computeTempPassName("Shadows: Spot light", lightIdx), rgraph);
+								  clearTileIndirectArgs, {}, generateTempPassName("Shadows: Spot light", lightIdx), rgraph);
 		}
 		else
 		{
@@ -541,7 +541,7 @@ void ShadowMapping::processLights(RenderingContext& ctx)
 			// Vis testing
 			const Array<F32, kMaxLodCount - 1> lodDistances = {g_lod0MaxDistanceCVar.get(), g_lod1MaxDistanceCVar.get()};
 			FrustumGpuVisibilityInput visIn;
-			visIn.m_passesName = computeTempPassName("Shadows: Dir light cascade", cascade);
+			visIn.m_passesName = generateTempPassName("Shadows: Dir light cascade", cascade);
 			visIn.m_technique = RenderingTechnique::kDepth;
 			visIn.m_viewProjectionMatrix = cascadeViewProjMats[cascade];
 			visIn.m_lodReferencePoint = ctx.m_matrices.m_cameraTransform.getTranslationPart().xyz();
@@ -558,7 +558,7 @@ void ShadowMapping::processLights(RenderingContext& ctx)
 			if(getRenderer().runSoftwareMeshletRendering())
 			{
 				GpuMeshletVisibilityInput meshIn;
-				meshIn.m_passesName = computeTempPassName("Shadows: Dir light cascade", lightIdx);
+				meshIn.m_passesName = generateTempPassName("Shadows: Dir light cascade", lightIdx);
 				meshIn.m_technique = RenderingTechnique::kDepth;
 				meshIn.m_viewProjectionMatrix = cascadeViewProjMats[cascade];
 				meshIn.m_cameraTransform = cascadeViewMats[cascade].getInverseTransformation();
@@ -571,7 +571,7 @@ void ShadowMapping::processLights(RenderingContext& ctx)
 
 			// Draw
 			createDrawShadowsPass(dirLightAtlasViewports[cascade], cascadeViewProjMats[cascade], cascadeViewMats[cascade], visOut, meshletVisOut, {},
-								  hzbGenIn.m_cascades[cascade].m_hzbRt, computeTempPassName("Shadows: Dir light cascade", cascade), rgraph);
+								  hzbGenIn.m_cascades[cascade].m_hzbRt, generateTempPassName("Shadows: Dir light cascade", cascade), rgraph);
 
 			// Update the texture matrix to point to the correct region in the atlas
 			ctx.m_dirLightTextureMatrices[cascade] = createSpotLightTextureMatrix(dirLightAtlasViewports[cascade]) * cascadeViewProjMats[cascade];

+ 30 - 25
AnKi/Renderer/Ssao.cpp

@@ -40,18 +40,17 @@ Error Ssao::initInternal()
 	{
 		TextureUsageBit usage = TextureUsageBit::kAllSampled;
 		usage |= (preferCompute) ? TextureUsageBit::kUavComputeWrite : TextureUsageBit::kFramebufferWrite;
-		TextureInitInfo texInit = getRenderer().create2DRenderTargetInitInfo(rez.x(), rez.y(), Format::kR8_Unorm, usage, "SSAO #1");
+		TextureInitInfo texInit =
+			getRenderer().create2DRenderTargetInitInfo(rez.x(), rez.y(), Format::kR8G8B8A8_Snorm, usage, "Bent normals + SSAO #1");
 		m_tex[0] = getRenderer().createAndClearRenderTarget(texInit, TextureUsageBit::kAllSampled);
 
-		texInit.setName("SSAO #2");
+		texInit.setName("Bent normals + SSAO #2");
 		m_tex[1] = getRenderer().createAndClearRenderTarget(texInit, TextureUsageBit::kAllSampled);
 	}
 
-	m_ssaoWithDepthRtDescr = getRenderer().create2DRenderTargetDescription(rez.x(), rez.y(), Format::kR16G16_Unorm, "SSAO+depth");
-	m_ssaoWithDepthRtDescr.bake();
-
-	m_ssaoRtDescr = getRenderer().create2DRenderTargetDescription(rez.x(), rez.y(), Format::kR8_Unorm, "SSAO");
-	m_ssaoRtDescr.bake();
+	m_bentNormalsAndSsaoRtDescr =
+		getRenderer().create2DRenderTargetDescription(rez.x(), rez.y(), Format::kR8G8B8A8_Snorm, "Bent normals + SSAO temp");
+	m_bentNormalsAndSsaoRtDescr.bake();
 
 	m_fbDescr.m_colorAttachmentCount = 1;
 	m_fbDescr.bake();
@@ -79,6 +78,7 @@ void Ssao::populateRenderGraph(RenderingContext& ctx)
 
 	RenderTargetHandle historyRt;
 	RenderTargetHandle finalRt;
+	RenderTargetHandle bentNormalsFinalRt;
 
 	if(m_texImportedOnce) [[likely]]
 	{
@@ -94,8 +94,7 @@ void Ssao::populateRenderGraph(RenderingContext& ctx)
 
 	m_runCtx.m_finalRt = finalRt;
 
-	const RenderTargetHandle ssaoWithDepthRt = rgraph.newRenderTarget(m_ssaoWithDepthRtDescr);
-	const RenderTargetHandle ssaoRt = rgraph.newRenderTarget(m_ssaoRtDescr);
+	const RenderTargetHandle bentNormalsAndSsaoTempRt = rgraph.newRenderTarget(m_bentNormalsAndSsaoRtDescr);
 
 	TextureUsageBit readUsage;
 	TextureUsageBit writeUsage;
@@ -121,15 +120,15 @@ void Ssao::populateRenderGraph(RenderingContext& ctx)
 		else
 		{
 			GraphicsRenderPassDescription& pass = rgraph.newGraphicsRenderPass("SSAO");
-			pass.setFramebufferInfo(m_fbDescr, {ssaoWithDepthRt}, {});
+			pass.setFramebufferInfo(m_fbDescr, {finalRt}, {});
 			ppass = &pass;
 		}
 
-		ppass->newTextureDependency(ssaoWithDepthRt, writeUsage);
 		ppass->newTextureDependency(getRenderer().getGBuffer().getColorRt(2), readUsage);
 		ppass->newTextureDependency(getRenderer().getGBuffer().getDepthRt(), readUsage);
+		ppass->newTextureDependency(finalRt, writeUsage);
 
-		ppass->setWork([this, &ctx, ssaoWithDepthRt](RenderPassWorkContext& rgraphCtx) {
+		ppass->setWork([this, &ctx, finalRt](RenderPassWorkContext& rgraphCtx) {
 			CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
 
 			cmdb.bindShaderProgram(m_grProg.get());
@@ -141,7 +140,7 @@ void Ssao::populateRenderGraph(RenderingContext& ctx)
 			cmdb.bindSampler(0, 3, getRenderer().getSamplers().m_trilinearRepeat.get());
 			cmdb.bindSampler(0, 4, getRenderer().getSamplers().m_trilinearClamp.get());
 
-			const UVec2 rez = (g_ssaoQuarterRez.get()) ? getRenderer().getInternalResolution() / 2 : getRenderer().getInternalResolution();
+			const UVec2 rez = (g_ssaoQuarterRez.get()) ? getRenderer().getInternalResolution() / 2u : getRenderer().getInternalResolution();
 
 			SsaoConstants consts;
 			consts.m_radius = g_ssaoRadiusCVar.get();
@@ -155,12 +154,11 @@ void Ssao::populateRenderGraph(RenderingContext& ctx)
 			consts.m_frameCount = getRenderer().getFrameCount() % kMaxU32;
 			consts.m_ssaoPower = g_ssaoPower.get();
 			consts.m_viewMat = ctx.m_matrices.m_view;
-			computeLinearizeDepthOptimal(ctx.m_cameraNear, ctx.m_cameraFar, consts.m_linearizeDepthParams.x(), consts.m_linearizeDepthParams.y());
 			cmdb.setPushConstants(&consts, sizeof(consts));
 
 			if(g_preferComputeCVar.get())
 			{
-				rgraphCtx.bindUavTexture(0, 5, ssaoWithDepthRt);
+				rgraphCtx.bindUavTexture(0, 5, finalRt);
 
 				dispatchPPCompute(cmdb, 8, 8, rez.x(), rez.y());
 			}
@@ -185,26 +183,33 @@ void Ssao::populateRenderGraph(RenderingContext& ctx)
 		else
 		{
 			GraphicsRenderPassDescription& pass = rgraph.newGraphicsRenderPass("SSAO spatial denoise");
-			pass.setFramebufferInfo(m_fbDescr, {ssaoRt}, {});
+			pass.setFramebufferInfo(m_fbDescr, {bentNormalsAndSsaoTempRt}, {});
 			ppass = &pass;
 		}
 
-		ppass->newTextureDependency(ssaoWithDepthRt, readUsage);
-		ppass->newTextureDependency(ssaoRt, writeUsage);
+		ppass->newTextureDependency(finalRt, readUsage);
+		ppass->newTextureDependency(getRenderer().getGBuffer().getDepthRt(), readUsage);
+		ppass->newTextureDependency(bentNormalsAndSsaoTempRt, writeUsage);
 
-		ppass->setWork([this, ssaoWithDepthRt, ssaoRt](RenderPassWorkContext& rgraphCtx) {
+		ppass->setWork([this, finalRt, bentNormalsAndSsaoTempRt, &ctx](RenderPassWorkContext& rgraphCtx) {
 			CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
 
 			cmdb.bindShaderProgram(m_spatialDenoiseGrProg.get());
 
 			cmdb.bindSampler(0, 0, getRenderer().getSamplers().m_trilinearClamp.get());
-			rgraphCtx.bindColorTexture(0, 1, ssaoWithDepthRt);
+			rgraphCtx.bindColorTexture(0, 1, finalRt);
+			rgraphCtx.bindColorTexture(0, 2, getRenderer().getGBuffer().getDepthRt());
 
 			const UVec2 rez = (g_ssaoQuarterRez.get()) ? getRenderer().getInternalResolution() / 2u : getRenderer().getInternalResolution();
 
+			SsaoSpatialDenoiseConstants consts;
+			computeLinearizeDepthOptimal(ctx.m_cameraNear, ctx.m_cameraFar, consts.m_linearizeDepthParams.x(), consts.m_linearizeDepthParams.y());
+			consts.m_viewToWorldMat = ctx.m_matrices.m_cameraTransform;
+			cmdb.setPushConstants(&consts, sizeof(consts));
+
 			if(g_preferComputeCVar.get())
 			{
-				rgraphCtx.bindUavTexture(0, 2, ssaoRt);
+				rgraphCtx.bindUavTexture(0, 3, bentNormalsAndSsaoTempRt);
 				dispatchPPCompute(cmdb, 8, 8, rez.x(), rez.y());
 			}
 			else
@@ -231,18 +236,18 @@ void Ssao::populateRenderGraph(RenderingContext& ctx)
 			ppass = &pass;
 		}
 
-		ppass->newTextureDependency(ssaoRt, readUsage);
+		ppass->newTextureDependency(bentNormalsAndSsaoTempRt, readUsage);
 		ppass->newTextureDependency(historyRt, readUsage);
 		ppass->newTextureDependency(getRenderer().getMotionVectors().getMotionVectorsRt(), readUsage);
 		ppass->newTextureDependency(finalRt, writeUsage);
 
-		ppass->setWork([this, ssaoRt, historyRt](RenderPassWorkContext& rgraphCtx) {
+		ppass->setWork([this, bentNormalsAndSsaoTempRt, finalRt, historyRt](RenderPassWorkContext& rgraphCtx) {
 			CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
 
 			cmdb.bindShaderProgram(m_tempralDenoiseGrProg.get());
 
 			cmdb.bindSampler(0, 0, getRenderer().getSamplers().m_trilinearClamp.get());
-			rgraphCtx.bindColorTexture(0, 1, ssaoRt);
+			rgraphCtx.bindColorTexture(0, 1, bentNormalsAndSsaoTempRt);
 			rgraphCtx.bindColorTexture(0, 2, historyRt);
 			rgraphCtx.bindColorTexture(0, 3, getRenderer().getMotionVectors().getMotionVectorsRt());
 
@@ -250,7 +255,7 @@ void Ssao::populateRenderGraph(RenderingContext& ctx)
 
 			if(g_preferComputeCVar.get())
 			{
-				rgraphCtx.bindUavTexture(0, 4, m_runCtx.m_finalRt);
+				rgraphCtx.bindUavTexture(0, 4, finalRt);
 				dispatchPPCompute(cmdb, 8, 8, rez.x(), rez.y());
 			}
 			else

+ 1 - 2
AnKi/Renderer/Ssao.h

@@ -46,8 +46,7 @@ public:
 	ShaderProgramPtr m_tempralDenoiseGrProg;
 
 	FramebufferDescription m_fbDescr;
-	RenderTargetDescription m_ssaoWithDepthRtDescr;
-	RenderTargetDescription m_ssaoRtDescr;
+	RenderTargetDescription m_bentNormalsAndSsaoRtDescr;
 
 	Array<TexturePtr, 2> m_tex;
 	Bool m_texImportedOnce = false;

+ 1 - 1
AnKi/Renderer/Utils/HzbGenerator.cpp

@@ -257,7 +257,7 @@ void HzbGenerator::populateRenderGraphDirectionalLight(const HzbDirectionalLight
 			ANKI_ASSERT(cascadeMinDepth <= cascadeMaxDepth);
 		}
 
-		RenderTargetDescription depthRtDescr("HZB boxes depth");
+		RenderTargetDescription depthRtDescr(generateTempPassName("HZB boxes depth", i));
 		depthRtDescr.m_width = cascade.m_hzbRtSize.x() * 2;
 		depthRtDescr.m_height = cascade.m_hzbRtSize.y() * 2;
 		depthRtDescr.m_format = Format::kD16_Unorm;

+ 9 - 1
AnKi/Shaders/Include/MiscRendererTypes.h

@@ -104,11 +104,19 @@ struct SsaoConstants
 	F32 m_projectionMat22;
 	F32 m_projectionMat23;
 
-	Vec2 m_linearizeDepthParams;
+	Vec2 m_padding;
 	RF32 m_ssaoPower;
 	U32 m_frameCount;
 
 	Mat3x4 m_viewMat;
 };
 
+struct SsaoSpatialDenoiseConstants
+{
+	Mat3x4 m_viewToWorldMat;
+
+	Vec2 m_linearizeDepthParams;
+	Vec2 m_padding;
+};
+
 ANKI_END_NAMESPACE

+ 6 - 5
AnKi/Shaders/LightShading.ankiprog

@@ -73,7 +73,9 @@ RVec3 main(Vec4 svPosition : SV_POSITION, Vec2 uv : TEXCOORD) : SV_TARGET0
 	gbuffer.m_subsurface = max(gbuffer.m_subsurface, kSubsurfaceMin);
 
 	// Apply SSAO
-	const RF32 ssao = g_ssaoTex.SampleLevel(g_nearestAnyClampSampler, uv, 0.0f).x;
+	const RVec4 ssaoAndBentNormals = g_ssaoTex.SampleLevel(g_nearestAnyClampSampler, uv, 0.0f);
+	const RF32 ssao = ssaoAndBentNormals.w;
+	const RVec3 bentNormal = ssaoAndBentNormals.xyz;
 	gbuffer.m_diffuse *= ssao;
 
 	// Ambient and emissive color
@@ -92,7 +94,7 @@ RVec3 main(Vec4 svPosition : SV_POSITION, Vec2 uv : TEXCOORD) : SV_TARGET0
 
 			// Sample
 			probeColor =
-				sampleGlobalIllumination(worldPos, gbuffer.m_normal, probe, g_bindlessTextures3dF32[probe.m_volumeTexture], g_trilinearClampSampler);
+				sampleGlobalIllumination(worldPos, bentNormal, probe, g_bindlessTextures3dF32[probe.m_volumeTexture], g_trilinearClampSampler);
 		}
 		else
 		{
@@ -112,9 +114,8 @@ RVec3 main(Vec4 svPosition : SV_POSITION, Vec2 uv : TEXCOORD) : SV_TARGET0
 				totalBlendWeight += blendWeight;
 
 				// Sample
-				const RVec3 c =
-					sampleGlobalIllumination(worldPos, gbuffer.m_normal, probe,
-											 g_bindlessTextures3dF32[NonUniformResourceIndex(probe.m_volumeTexture)], g_trilinearClampSampler);
+				const RVec3 c = sampleGlobalIllumination(
+					worldPos, bentNormal, probe, g_bindlessTextures3dF32[NonUniformResourceIndex(probe.m_volumeTexture)], g_trilinearClampSampler);
 				probeColor += c * blendWeight;
 			}
 

+ 82 - 59
AnKi/Shaders/Ssao.ankiprog

@@ -27,7 +27,7 @@
 [[vk::binding(4)]] SamplerState g_linearAnyClampSampler;
 
 #	if ANKI_COMPUTE_SHADER
-[[vk::binding(5)]] RWTexture2D<Vec4> g_outUav;
+[[vk::binding(5)]] RWTexture2D<RVec4> g_bentNormalsAndSsaoUav;
 #	endif
 
 [[vk::push_constant]] ConstantBuffer<SsaoConstants> g_consts;
@@ -52,7 +52,7 @@ RF32 computeFalloff(RF32 len)
 #	if ANKI_COMPUTE_SHADER
 [numthreads(8, 8, 1)] void main(UVec2 svDispatchThreadId : SV_DISPATCHTHREADID)
 #	else
-Vec2 main([[vk::location(0)]] Vec2 uv : TEXCOORD, Vec4 svPosition : SV_POSITION) : SV_TARGET0
+RVec4 main([[vk::location(0)]] Vec2 uv : TEXCOORD, Vec4 svPosition : SV_POSITION) : SV_TARGET0
 #	endif
 {
 #	if ANKI_COMPUTE_SHADER
@@ -148,12 +148,18 @@ Vec2 main([[vk::location(0)]] Vec2 uv : TEXCOORD, Vec4 svPosition : SV_POSITION)
 	// Apply power
 	Vd = pow(Vd, g_consts.m_ssaoPower);
 
-	const F32 linearDepth = linearizeDepthOptimal(depth, g_consts.m_linearizeDepthParams.x, g_consts.m_linearizeDepthParams.y);
+	// Compute bent normal: see "Algorithm 2 Extension that computes bent normals b."
+	const RF32 t0 =
+		(6.0f * sin(h1 - n) - sin(3.0f * h1 - n) + 6.0f * sin(h2 - n) - sin(3.0f * h2 - n) + 16.0f * sin(n) - 3.0f * (sin(h1 + n) + sin(h2 + n)))
+		/ 12.0f;
+	const RF32 t1 = (-cos(3.0f * h1 - n) - cos(3.0f * h2 - n) + 8.0f * cos(n) - 3.0f * (cos(h1 + n) + cos(h2 + n))) / 12.0f;
+	RVec3 bentNormal = RVec3(-dir2d.x * t0, -dir2d.y * t0, t1);
+	bentNormal = normalize(bentNormal);
 
 #	if ANKI_COMPUTE_SHADER
-	g_outUav[svDispatchThreadId] = Vec4(Vd, linearDepth, 0.0f, 0.0f);
+	g_bentNormalsAndSsaoUav[svDispatchThreadId] = RVec4(bentNormal, Vd);
 #	else
-	return Vec2(Vd, linearDepth);
+	return RVec4(bentNormal, Vd);
 #	endif
 }
 #endif // ANKI_TECHNIQUE_Ssao && (ANKI_COMPUTE_SHADER || ANKI_FRAGMENT_SHADER)
@@ -163,76 +169,91 @@ Vec2 main([[vk::location(0)]] Vec2 uv : TEXCOORD, Vec4 svPosition : SV_POSITION)
 // ===========================================================================
 #if ANKI_TECHNIQUE_SsaoSpatialDenoise && (ANKI_COMPUTE_SHADER || ANKI_FRAGMENT_SHADER)
 #	include <AnKi/Shaders/BilateralFilter.hlsl>
+#	include <AnKi/Shaders/Include/MiscRendererTypes.h>
+#	include <AnKi/Shaders/Functions.hlsl>
 
 [[vk::binding(0)]] SamplerState g_linearAnyClampSampler;
-[[vk::binding(1)]] Texture2D<Vec4> g_inTex;
+[[vk::binding(1)]] Texture2D<RVec4> g_bentNormalsAndSsaoTex;
+[[vk::binding(2)]] Texture2D<Vec4> g_depthTex;
 
 #	if ANKI_COMPUTE_SHADER
-[[vk::binding(2)]] RWTexture2D<RVec4> g_outImg;
+[[vk::binding(3)]] RWTexture2D<RVec4> g_bentNormalsAndSsaoUav;
 #	endif
 
+[[vk::push_constant]] ConstantBuffer<SsaoSpatialDenoiseConstants> g_consts;
+
 F32 computeWeight(F32 depth, F32 refDepth)
 {
 	const F32 diff = abs(depth - refDepth);
 	return sqrt(1.0 / (0.0003 + diff));
 }
 
-void sampleTex(Vec2 uv, IVec2 offset, F32 refDepth, inout RF32 col, inout F32 weight)
+void sampleTex(Vec2 uv, IVec2 offset, F32 refDepth, inout RF32 ssao, inout RVec3 bentNormal, inout F32 weight)
 {
-	const Vec2 ssaoWithDepth = g_inTex.SampleLevel(g_linearAnyClampSampler, uv, 0.0, offset).xy;
-	const F32 w = computeWeight(refDepth, ssaoWithDepth.y);
-	col += ssaoWithDepth.x * w;
+	const F32 linearDepth = linearizeDepthOptimal(g_depthTex.SampleLevel(g_linearAnyClampSampler, uv, 0.0, offset).x,
+												  g_consts.m_linearizeDepthParams.x, g_consts.m_linearizeDepthParams.y);
+	const RVec4 bentNormalAndSsao = g_bentNormalsAndSsaoTex.SampleLevel(g_linearAnyClampSampler, uv, 0.0, offset);
+	const F32 w = computeWeight(refDepth, linearDepth);
+	ssao += bentNormalAndSsao.w * w;
+	bentNormal += bentNormalAndSsao.xyz * w;
 	weight += w;
 }
 
 #	if ANKI_COMPUTE_SHADER
 [numthreads(8, 8, 1)] void main(UVec2 svDispatchThreadId : SV_DISPATCHTHREADID)
 #	else
-RF32 main([[vk::location(0)]] Vec2 uv : TEXCOORD) : SV_TARGET0
+RVec4 main([[vk::location(0)]] Vec2 uv : TEXCOORD) : SV_TARGET0
 #	endif
 {
 // Set UVs
 #	if ANKI_COMPUTE_SHADER
 	Vec2 textureSize;
 	U32 mipCount;
-	g_inTex.GetDimensions(0, textureSize.x, textureSize.y, mipCount);
+	g_bentNormalsAndSsaoTex.GetDimensions(0, textureSize.x, textureSize.y, mipCount);
 	const Vec2 uv = (Vec2(svDispatchThreadId) + 0.5f) / textureSize;
 #	endif
 
 	// Sample ref
-	const Vec2 refSsaoAndDepth = g_inTex.SampleLevel(g_linearAnyClampSampler, uv, 0.0).xy;
-	RF32 color = refSsaoAndDepth.x;
-	const F32 refDepth = refSsaoAndDepth.y;
+	const RVec4 refBentNormalAndSsao = g_bentNormalsAndSsaoTex.SampleLevel(g_linearAnyClampSampler, uv, 0.0);
+	RF32 ssao = refBentNormalAndSsao.w;
+	RVec3 bentNormal = refBentNormalAndSsao.xyz;
+	const F32 refDepth = linearizeDepthOptimal(g_depthTex.SampleLevel(g_linearAnyClampSampler, uv, 0.0).x, g_consts.m_linearizeDepthParams.x,
+											   g_consts.m_linearizeDepthParams.y);
 	F32 weight = computeWeight(0.0f, 0.0f); // Highest weight that this function can give
 
 	// Sample taps
-	sampleTex(uv, IVec2(1, 1), refDepth, color, weight);
-	sampleTex(uv, IVec2(0, 1), refDepth, color, weight);
-	sampleTex(uv, IVec2(-1, 1), refDepth, color, weight);
-	sampleTex(uv, IVec2(-1, 0), refDepth, color, weight);
-	sampleTex(uv, IVec2(-1, -1), refDepth, color, weight);
-	sampleTex(uv, IVec2(0, -1), refDepth, color, weight);
-	sampleTex(uv, IVec2(1, -1), refDepth, color, weight);
-	sampleTex(uv, IVec2(1, 0), refDepth, color, weight);
+	sampleTex(uv, IVec2(1, 1), refDepth, ssao, bentNormal, weight);
+	sampleTex(uv, IVec2(0, 1), refDepth, ssao, bentNormal, weight);
+	sampleTex(uv, IVec2(-1, 1), refDepth, ssao, bentNormal, weight);
+	sampleTex(uv, IVec2(-1, 0), refDepth, ssao, bentNormal, weight);
+	sampleTex(uv, IVec2(-1, -1), refDepth, ssao, bentNormal, weight);
+	sampleTex(uv, IVec2(0, -1), refDepth, ssao, bentNormal, weight);
+	sampleTex(uv, IVec2(1, -1), refDepth, ssao, bentNormal, weight);
+	sampleTex(uv, IVec2(1, 0), refDepth, ssao, bentNormal, weight);
 
 #	if SPATIAL_DENOISE_QUALITY == 1
-	sampleTex(uv, IVec2(2, 2), refDepth, color, weight);
-	sampleTex(uv, IVec2(0, 2), refDepth, color, weight);
-	sampleTex(uv, IVec2(-2, 2), refDepth, color, weight);
-	sampleTex(uv, IVec2(-2, 0), refDepth, color, weight);
-	sampleTex(uv, IVec2(-2, -2), refDepth, color, weight);
-	sampleTex(uv, IVec2(0, -2), refDepth, color, weight);
-	sampleTex(uv, IVec2(2, -2), refDepth, color, weight);
-	sampleTex(uv, IVec2(2, 0), refDepth, color, weight);
+	sampleTex(uv, IVec2(2, 2), refDepth, ssao, bentNormal, weight);
+	sampleTex(uv, IVec2(0, 2), refDepth, ssao, bentNormal, weight);
+	sampleTex(uv, IVec2(-2, 2), refDepth, ssao, bentNormal, weight);
+	sampleTex(uv, IVec2(-2, 0), refDepth, ssao, bentNormal, weight);
+	sampleTex(uv, IVec2(-2, -2), refDepth, ssao, bentNormal, weight);
+	sampleTex(uv, IVec2(0, -2), refDepth, ssao, bentNormal, weight);
+	sampleTex(uv, IVec2(2, -2), refDepth, ssao, bentNormal, weight);
+	sampleTex(uv, IVec2(2, 0), refDepth, ssao, bentNormal, weight);
 #	endif
 
-	color /= weight;
+	ssao /= weight;
+	ssao = saturate(ssao);
+
+	bentNormal /= weight;
+	bentNormal = normalize(bentNormal);
+	bentNormal = mul(g_consts.m_viewToWorldMat, Vec4(bentNormal, 0.0f));
 
 	// Write value
 #	if ANKI_COMPUTE_SHADER
-	g_outImg[svDispatchThreadId] = color;
+	g_bentNormalsAndSsaoUav[svDispatchThreadId] = RVec4(bentNormal, ssao);
 #	else
-	return color;
+	return RVec4(bentNormal, ssao);
 #	endif
 }
 #endif
@@ -242,54 +263,55 @@ RF32 main([[vk::location(0)]] Vec2 uv : TEXCOORD) : SV_TARGET0
 // ===========================================================================
 #if ANKI_TECHNIQUE_SsaoTemporalDenoise && (ANKI_COMPUTE_SHADER || ANKI_FRAGMENT_SHADER)
 #	include <AnKi/Shaders/Functions.hlsl>
+#	include <AnKi/Shaders/TonemappingFunctions.hlsl>
 
 [[vk::binding(0)]] SamplerState g_linearAnyClampSampler;
-[[vk::binding(1)]] Texture2D<RVec4> g_ssaoTex;
-[[vk::binding(2)]] Texture2D<RVec4> g_historySsaoTex;
+[[vk::binding(1)]] Texture2D<RVec4> g_bentNormalsAndSsaoTex;
+[[vk::binding(2)]] Texture2D<RVec4> g_historyBentNormalsAndSsaoTex;
 [[vk::binding(3)]] Texture2D<Vec4> g_motionVectorsTex;
 
 #	if ANKI_COMPUTE_SHADER
-[[vk::binding(4)]] RWTexture2D<RVec4> g_outImg;
+[[vk::binding(4)]] RWTexture2D<RVec4> g_bentNormalsAndSsaoUav;
 #	endif
 
 #	if ANKI_COMPUTE_SHADER
 [numthreads(8, 8, 1)] void main(UVec2 svDispatchThreadId : SV_DISPATCHTHREADID)
 #	else
-RF32 main([[vk::location(0)]] Vec2 uv : TEXCOORD) : SV_TARGET0
+RVec4 main([[vk::location(0)]] Vec2 uv : TEXCOORD) : SV_TARGET0
 #	endif
 {
 #	if ANKI_COMPUTE_SHADER
 	Vec2 textureSize;
 	U32 mipCount;
-	g_ssaoTex.GetDimensions(0, textureSize.x, textureSize.y, mipCount);
+	g_bentNormalsAndSsaoTex.GetDimensions(0, textureSize.x, textureSize.y, mipCount);
 	const Vec2 uv = (Vec2(svDispatchThreadId) + 0.5f) / textureSize;
 #	endif
 
 	const Vec2 historyUv = uv + g_motionVectorsTex.SampleLevel(g_linearAnyClampSampler, uv, 0.0f).xy;
 
 	// Read textures
-	RF32 history = g_historySsaoTex.SampleLevel(g_linearAnyClampSampler, historyUv, 0.0f).x;
-	RF32 current = g_ssaoTex.SampleLevel(g_linearAnyClampSampler, uv, 0.0f).x;
+	RVec4 history = g_historyBentNormalsAndSsaoTex.SampleLevel(g_linearAnyClampSampler, historyUv, 0.0f);
+	RVec4 current = g_bentNormalsAndSsaoTex.SampleLevel(g_linearAnyClampSampler, uv, 0.0f);
 
 	// Remove ghosting by clamping the history color to neighbour's AABB
-	const RF32 near0 = g_ssaoTex.SampleLevel(g_linearAnyClampSampler, uv, 0, IVec2(1, 0)).x;
-	const RF32 near1 = g_ssaoTex.SampleLevel(g_linearAnyClampSampler, uv, 0, IVec2(0, 1)).x;
-	const RF32 near2 = g_ssaoTex.SampleLevel(g_linearAnyClampSampler, uv, 0, IVec2(-1, 0)).x;
-	const RF32 near3 = g_ssaoTex.SampleLevel(g_linearAnyClampSampler, uv, 0, IVec2(0, -1)).x;
+	const RVec4 near0 = g_bentNormalsAndSsaoTex.SampleLevel(g_linearAnyClampSampler, uv, 0, IVec2(1, 0));
+	const RVec4 near1 = g_bentNormalsAndSsaoTex.SampleLevel(g_linearAnyClampSampler, uv, 0, IVec2(0, 1));
+	const RVec4 near2 = g_bentNormalsAndSsaoTex.SampleLevel(g_linearAnyClampSampler, uv, 0, IVec2(-1, 0));
+	const RVec4 near3 = g_bentNormalsAndSsaoTex.SampleLevel(g_linearAnyClampSampler, uv, 0, IVec2(0, -1));
 
 #	if 0
-	const RF32 boxMin = min(current, min4(near0, near1, near2, near3));
-	const RF32 boxMax = max(current, max4(near0, near1, near2, near3));
+	const RVec4 boxMin = min(current, min4(near0, near1, near2, near3));
+	const RVec4 boxMax = max(current, max4(near0, near1, near2, near3));
 #	else
-	const RF32 m1 = current + near0 + near1 + near2 + near3;
-	const RF32 m2 = current * current + near0 * near0 + near1 * near1 + near2 * near2 + near3 * near3;
+	const RVec4 m1 = current + near0 + near1 + near2 + near3;
+	const RVec4 m2 = current * current + near0 * near0 + near1 * near1 + near2 * near2 + near3 * near3;
 
-	const RF32 mu = m1 / 5.0;
-	const RF32 sigma = sqrt(m2 / 5.0 - mu * mu);
+	const RVec4 mu = m1 / 5.0;
+	const RVec4 sigma = sqrt(m2 / 5.0 - mu * mu);
 
 	const F32 varianceClippingGamma = 1.2f;
-	const RF32 boxMin = mu - varianceClippingGamma * sigma;
-	const RF32 boxMax = mu + varianceClippingGamma * sigma;
+	const RVec4 boxMin = mu - varianceClippingGamma * sigma;
+	const RVec4 boxMax = mu + varianceClippingGamma * sigma;
 #	endif
 
 	history = clamp(history, boxMin, boxMax);
@@ -297,8 +319,8 @@ RF32 main([[vk::location(0)]] Vec2 uv : TEXCOORD) : SV_TARGET0
 	// Final
 	const RF32 kBlendFactor = 0.1f;
 
-	const F32 lum0 = current;
-	const F32 lum1 = history;
+	const F32 lum0 = computeLuminance(current.xyz) * current.w;
+	const F32 lum1 = computeLuminance(history.xyz) * history.w;
 	const F32 maxLum = 1.0;
 
 	RF32 diff = abs(lum0 - lum1) / max(lum0, max(lum1, maxLum + kEpsilonF32));
@@ -306,13 +328,14 @@ RF32 main([[vk::location(0)]] Vec2 uv : TEXCOORD) : SV_TARGET0
 	diff = diff * diff;
 	const RF32 feedback = lerp(0.0, kBlendFactor, diff);
 
-	const RF32 ssao = lerp(history, current, feedback);
+	RVec4 finalVal = lerp(history, current, feedback);
+	finalVal.xyz = normalize(finalVal.xyz);
 
 	// Write value
 #	if ANKI_COMPUTE_SHADER
-	g_outImg[svDispatchThreadId] = ssao;
+	g_bentNormalsAndSsaoUav[svDispatchThreadId] = finalVal;
 #	else
-	return ssao;
+	return finalVal;
 #	endif
 }
 #endif

+ 1 - 1
AnKi/Shaders/VisualizeRenderTarget.ankiprog

@@ -16,6 +16,6 @@
 Vec3 main(Vec2 uv : TEXCOORD) : SV_TARGET0
 {
 	const Vec4 rgba = g_inTex.SampleLevel(g_nearestAnyClampSampler, uv, 0.0);
-	return rgba.xxx;
+	return rgba.xyz;
 }
 #pragma anki technique_end frag