소스 검색

Fix motion vector calculation. Improve SSAO denoising

Panagiotis Christopoulos Charitos 1 년 전
부모
커밋
a99b6b93bc

+ 1 - 1
AnKi/Importer/GltfImporterMesh.cpp

@@ -444,7 +444,7 @@ static void generateMeshlets(SubMesh& submesh)
 		outMeshlet.m_sphere =
 			computeBoundingSphere(&newVertexBuffer[outMeshlet.m_firstVertex].m_position, outMeshlet.m_vertexCount, sizeof(TempVertex));
 
-		if(bounds.radius < outMeshlet.m_sphere.getRadius())
+		if(bounds.radius < outMeshlet.m_sphere.getRadius() && bounds.radius > 0.0f)
 		{
 			// meshopt computed smaller sphere, use that one
 			outMeshlet.m_sphere.setCenter(Vec3(&bounds.center[0]));

+ 13 - 57
AnKi/Renderer/MotionVectors.cpp

@@ -8,6 +8,7 @@
 #include <AnKi/Renderer/GBuffer.h>
 #include <AnKi/Core/CVarSet.h>
 #include <AnKi/Util/Tracer.h>
+#include <AnKi/Window/Input.h>
 
 namespace anki {
 
@@ -33,24 +34,7 @@ Error MotionVectors::initInternal()
 		getRenderer().getInternalResolution().x(), getRenderer().getInternalResolution().y(), Format::kR16G16_Sfloat, "MotionVectors");
 	m_motionVectorsRtDescr.bake();
 
-	TextureUsageBit historyLengthUsage = TextureUsageBit::kAllSampled;
-	if(g_preferComputeCVar.get())
-	{
-		historyLengthUsage |= TextureUsageBit::kUavComputeWrite;
-	}
-	else
-	{
-		historyLengthUsage |= TextureUsageBit::kFramebufferWrite;
-	}
-
-	TextureInitInfo historyLengthTexInit =
-		getRenderer().create2DRenderTargetInitInfo(getRenderer().getInternalResolution().x(), getRenderer().getInternalResolution().y(),
-												   Format::kR8_Unorm, historyLengthUsage, "MotionVectorsHistoryLen#1");
-	m_historyLengthTextures[0] = getRenderer().createAndClearRenderTarget(historyLengthTexInit, TextureUsageBit::kAllSampled);
-	historyLengthTexInit.setName("MotionVectorsHistoryLen#2");
-	m_historyLengthTextures[1] = getRenderer().createAndClearRenderTarget(historyLengthTexInit, TextureUsageBit::kAllSampled);
-
-	m_fbDescr.m_colorAttachmentCount = 2;
+	m_fbDescr.m_colorAttachmentCount = 1;
 	m_fbDescr.bake();
 
 	return Error::kNone;
@@ -63,24 +47,6 @@ void MotionVectors::populateRenderGraph(RenderingContext& ctx)
 
 	m_runCtx.m_motionVectorsRtHandle = rgraph.newRenderTarget(m_motionVectorsRtDescr);
 
-	const U32 writeHistoryLenTexIdx = getRenderer().getFrameCount() & 1;
-	const U32 readHistoryLenTexIdx = !writeHistoryLenTexIdx;
-
-	if(m_historyLengthTexturesImportedOnce) [[likely]]
-	{
-		m_runCtx.m_historyLengthWriteRtHandle = rgraph.importRenderTarget(m_historyLengthTextures[writeHistoryLenTexIdx].get());
-		m_runCtx.m_historyLengthReadRtHandle = rgraph.importRenderTarget(m_historyLengthTextures[readHistoryLenTexIdx].get());
-	}
-	else
-	{
-		m_runCtx.m_historyLengthWriteRtHandle =
-			rgraph.importRenderTarget(m_historyLengthTextures[writeHistoryLenTexIdx].get(), TextureUsageBit::kAllSampled);
-		m_runCtx.m_historyLengthReadRtHandle =
-			rgraph.importRenderTarget(m_historyLengthTextures[readHistoryLenTexIdx].get(), TextureUsageBit::kAllSampled);
-
-		m_historyLengthTexturesImportedOnce = true;
-	}
-
 	RenderPassDescriptionBase* ppass;
 	TextureUsageBit readUsage;
 	TextureUsageBit writeUsage;
@@ -95,7 +61,7 @@ void MotionVectors::populateRenderGraph(RenderingContext& ctx)
 	else
 	{
 		GraphicsRenderPassDescription& pass = rgraph.newGraphicsRenderPass("MotionVectors");
-		pass.setFramebufferInfo(m_fbDescr, {m_runCtx.m_motionVectorsRtHandle, m_runCtx.m_historyLengthWriteRtHandle});
+		pass.setFramebufferInfo(m_fbDescr, {m_runCtx.m_motionVectorsRtHandle});
 
 		readUsage = TextureUsageBit::kSampledFragment;
 		writeUsage = TextureUsageBit::kFramebufferWrite;
@@ -108,34 +74,26 @@ void MotionVectors::populateRenderGraph(RenderingContext& ctx)
 
 		cmdb.bindShaderProgram(m_grProg.get());
 
-		cmdb.bindSampler(0, 0, getRenderer().getSamplers().m_trilinearClamp.get());
+		cmdb.bindSampler(0, 0, getRenderer().getSamplers().m_nearestNearestClamp.get());
 		rgraphCtx.bindTexture(0, 1, getRenderer().getGBuffer().getDepthRt(), TextureSubresourceInfo(DepthStencilAspectBit::kDepth));
-		rgraphCtx.bindTexture(0, 2, getRenderer().getGBuffer().getPreviousFrameDepthRt(), TextureSubresourceInfo(DepthStencilAspectBit::kDepth));
-		rgraphCtx.bindColorTexture(0, 3, getRenderer().getGBuffer().getColorRt(3));
-		rgraphCtx.bindColorTexture(0, 4, m_runCtx.m_historyLengthReadRtHandle);
+		rgraphCtx.bindColorTexture(0, 2, getRenderer().getGBuffer().getColorRt(3));
 
 		class Constants
 		{
 		public:
-			Mat4 m_reprojectionMat;
-			Mat4 m_viewProjectionInvMat;
-			Mat4 m_prevViewProjectionInvMat;
-
-			Vec2 m_viewportSize;
-			F32 m_padding0;
-			F32 m_padding1;
+			Mat4 m_currentViewProjMat;
+			Mat4 m_currentInvViewProjMat;
+			Mat4 m_prevViewProjMat;
 		} * pc;
-		pc = allocateAndBindConstants<Constants>(cmdb, 0, 5);
+		pc = allocateAndBindConstants<Constants>(cmdb, 0, 3);
 
-		pc->m_reprojectionMat = ctx.m_matrices.m_reprojection;
-		pc->m_viewProjectionInvMat = ctx.m_matrices.m_invertedViewProjectionJitter;
-		pc->m_prevViewProjectionInvMat = ctx.m_prevMatrices.m_invertedViewProjectionJitter;
-		pc->m_viewportSize = Vec2(getRenderer().getInternalResolution());
+		pc->m_currentViewProjMat = ctx.m_matrices.m_viewProjection;
+		pc->m_currentInvViewProjMat = ctx.m_matrices.m_invertedViewProjection;
+		pc->m_prevViewProjMat = ctx.m_prevMatrices.m_viewProjection;
 
 		if(g_preferComputeCVar.get())
 		{
-			rgraphCtx.bindUavTexture(0, 6, m_runCtx.m_motionVectorsRtHandle, TextureSubresourceInfo());
-			rgraphCtx.bindUavTexture(0, 7, m_runCtx.m_historyLengthWriteRtHandle, TextureSubresourceInfo());
+			rgraphCtx.bindUavTexture(0, 4, m_runCtx.m_motionVectorsRtHandle, TextureSubresourceInfo());
 		}
 
 		if(g_preferComputeCVar.get())
@@ -151,8 +109,6 @@ void MotionVectors::populateRenderGraph(RenderingContext& ctx)
 	});
 
 	ppass->newTextureDependency(m_runCtx.m_motionVectorsRtHandle, writeUsage);
-	ppass->newTextureDependency(m_runCtx.m_historyLengthWriteRtHandle, writeUsage);
-	ppass->newTextureDependency(m_runCtx.m_historyLengthReadRtHandle, readUsage);
 	ppass->newTextureDependency(getRenderer().getGBuffer().getColorRt(3), readUsage);
 	ppass->newTextureDependency(getRenderer().getGBuffer().getDepthRt(), readUsage);
 	ppass->newTextureDependency(getRenderer().getGBuffer().getPreviousFrameDepthRt(), readUsage);

+ 0 - 16
AnKi/Renderer/MotionVectors.h

@@ -20,7 +20,6 @@ public:
 	MotionVectors()
 	{
 		registerDebugRenderTarget("MotionVectors");
-		registerDebugRenderTarget("MotionVectorsHistoryLength");
 	}
 
 	Error init();
@@ -32,11 +31,6 @@ public:
 		return m_runCtx.m_motionVectorsRtHandle;
 	}
 
-	RenderTargetHandle getHistoryLengthRt() const
-	{
-		return m_runCtx.m_historyLengthWriteRtHandle;
-	}
-
 	void getDebugRenderTarget(CString rtName, Array<RenderTargetHandle, kMaxDebugRenderTargets>& handles,
 							  [[maybe_unused]] ShaderProgramPtr& optionalShaderProgram) const override
 	{
@@ -44,11 +38,6 @@ public:
 		{
 			handles[0] = m_runCtx.m_motionVectorsRtHandle;
 		}
-		else
-		{
-			ANKI_ASSERT(rtName == "MotionVectorsHistoryLength");
-			handles[0] = m_runCtx.m_historyLengthWriteRtHandle;
-		}
 	}
 
 private:
@@ -57,15 +46,10 @@ private:
 	RenderTargetDescription m_motionVectorsRtDescr;
 	FramebufferDescription m_fbDescr;
 
-	Array<TexturePtr, 2> m_historyLengthTextures;
-	Bool m_historyLengthTexturesImportedOnce = false;
-
 	class
 	{
 	public:
 		RenderTargetHandle m_motionVectorsRtHandle;
-		RenderTargetHandle m_historyLengthReadRtHandle;
-		RenderTargetHandle m_historyLengthWriteRtHandle;
 	} m_runCtx;
 
 	Error initInternal();

+ 3 - 2
AnKi/Renderer/Renderer.cpp

@@ -269,7 +269,8 @@ Error Renderer::populateRenderGraph(RenderingContext& ctx)
 	ctx.m_matrices.m_viewProjection = cam.getFrustum().getViewProjectionMatrix();
 
 	Vec2 jitter = m_jitterOffsets[m_frameCount & (m_jitterOffsets.getSize() - 1)]; // In [-0.5, 0.5]
-	const Vec2 ndcPixelSize = 2.0f / Vec2(m_internalResolution);
+	jitter *= 2.0f; // In [-1, 1]
+	const Vec2 ndcPixelSize = 1.0f / Vec2(m_internalResolution);
 	jitter *= ndcPixelSize;
 	ctx.m_matrices.m_jitter = Mat4::getIdentity();
 	ctx.m_matrices.m_jitter.setTranslationPart(Vec4(jitter, 0.0f, 1.0f));
@@ -281,7 +282,7 @@ Error Renderer::populateRenderGraph(RenderingContext& ctx)
 	ctx.m_matrices.m_invertedViewProjection = ctx.m_matrices.m_viewProjection.getInverse();
 	ctx.m_matrices.m_invertedProjectionJitter = ctx.m_matrices.m_projectionJitter.getInverse();
 
-	ctx.m_matrices.m_reprojection = ctx.m_matrices.m_jitter * ctx.m_prevMatrices.m_viewProjection * ctx.m_matrices.m_invertedViewProjectionJitter;
+	ctx.m_matrices.m_reprojection = ctx.m_prevMatrices.m_viewProjection * ctx.m_matrices.m_invertedViewProjection;
 
 	ctx.m_matrices.m_unprojectionParameters = ctx.m_matrices.m_projection.extractPerspectiveUnprojectionParams();
 

+ 15 - 7
AnKi/Renderer/RtShadows.cpp

@@ -135,6 +135,18 @@ Error RtShadows::initInternal()
 		m_upscaledRtDescr.bake();
 	}
 
+	{
+		TextureInitInfo texinit = getRenderer().create2DRenderTargetInitInfo(
+			getRenderer().getInternalResolution().x() / 2, getRenderer().getInternalResolution().y() / 2, Format::kR32_Sfloat,
+			TextureUsageBit::kAllSampled | TextureUsageBit::kUavTraceRaysWrite | TextureUsageBit::kUavComputeWrite, "RtShadows history len");
+		ClearValue clear;
+		clear.m_colorf[0] = 1.0f;
+		m_dummyHistoryLenTex = getRenderer().createAndClearRenderTarget(texinit, TextureUsageBit::kSampledFragment, clear);
+
+		TextureViewInitInfo viewInit(m_dummyHistoryLenTex.get());
+		m_dummyHistoryLenTexView = GrManager::getSingleton().newTextureView(viewInit);
+	}
+
 	// Misc
 	m_sbtRecordSize = getAlignedRoundUp(GrManager::getSingleton().getDeviceCapabilities().m_sbtRecordAlignment, m_sbtRecordSize);
 
@@ -279,7 +291,6 @@ void RtShadows::populateRenderGraph(RenderingContext& ctx)
 												 AccelerationStructureUsageBit::kTraceRaysRead);
 		rpass.newTextureDependency(ANKI_DEPTH_DEP);
 		rpass.newTextureDependency(getRenderer().getMotionVectors().getMotionVectorsRt(), TextureUsageBit::kSampledTraceRays);
-		rpass.newTextureDependency(getRenderer().getMotionVectors().getHistoryLengthRt(), TextureUsageBit::kSampledTraceRays);
 		rpass.newTextureDependency(getRenderer().getGBuffer().getColorRt(2), TextureUsageBit::kSampledTraceRays);
 
 		rpass.newTextureDependency(m_runCtx.m_prevMomentsRt, TextureUsageBit::kSampledTraceRays);
@@ -327,7 +338,7 @@ void RtShadows::populateRenderGraph(RenderingContext& ctx)
 			cmdb.bindSampler(kSet, 5, getRenderer().getSamplers().m_trilinearClamp.get());
 			rgraphCtx.bindTexture(kSet, 6, getRenderer().getDepthDownscale().getRt(), DepthDownscale::kQuarterInternalResolution);
 			rgraphCtx.bindColorTexture(kSet, 7, getRenderer().getMotionVectors().getMotionVectorsRt());
-			rgraphCtx.bindColorTexture(kSet, 8, getRenderer().getMotionVectors().getHistoryLengthRt());
+			cmdb.bindTexture(kSet, 8, m_dummyHistoryLenTexView.get());
 			rgraphCtx.bindColorTexture(kSet, 9, getRenderer().getGBuffer().getColorRt(2));
 			rgraphCtx.bindAccelerationStructure(kSet, 10, getRenderer().getAccelerationStructureBuilder().getAccelerationStructureHandle());
 			rgraphCtx.bindColorTexture(kSet, 11, m_runCtx.m_prevMomentsRt);
@@ -353,7 +364,6 @@ void RtShadows::populateRenderGraph(RenderingContext& ctx)
 		rpass.newTextureDependency(ANKI_DEPTH_DEP);
 		rpass.newTextureDependency(getRenderer().getGBuffer().getColorRt(2), TextureUsageBit::kSampledCompute);
 		rpass.newTextureDependency(m_runCtx.m_currentMomentsRt, TextureUsageBit::kSampledCompute);
-		rpass.newTextureDependency(getRenderer().getMotionVectors().getHistoryLengthRt(), TextureUsageBit::kSampledCompute);
 
 		rpass.newTextureDependency(m_runCtx.m_intermediateShadowsRts[1], TextureUsageBit::kUavComputeWrite);
 	}
@@ -370,7 +380,6 @@ void RtShadows::populateRenderGraph(RenderingContext& ctx)
 		rpass.newTextureDependency(ANKI_DEPTH_DEP);
 		rpass.newTextureDependency(getRenderer().getGBuffer().getColorRt(2), TextureUsageBit::kSampledCompute);
 		rpass.newTextureDependency(m_runCtx.m_currentMomentsRt, TextureUsageBit::kSampledCompute);
-		rpass.newTextureDependency(getRenderer().getMotionVectors().getHistoryLengthRt(), TextureUsageBit::kSampledCompute);
 
 		rpass.newTextureDependency(m_runCtx.m_historyRt, TextureUsageBit::kUavComputeWrite);
 	}
@@ -382,7 +391,6 @@ void RtShadows::populateRenderGraph(RenderingContext& ctx)
 
 		rpass.newTextureDependency(m_runCtx.m_intermediateShadowsRts[0], TextureUsageBit::kSampledCompute);
 		rpass.newTextureDependency(m_runCtx.m_currentMomentsRt, TextureUsageBit::kSampledCompute);
-		rpass.newTextureDependency(getRenderer().getMotionVectors().getHistoryLengthRt(), TextureUsageBit::kSampledCompute);
 		rpass.newTextureDependency(ANKI_DEPTH_DEP);
 		rpass.newTextureDependency(getRenderer().getGBuffer().getColorRt(2), TextureUsageBit::kSampledCompute);
 
@@ -399,7 +407,7 @@ void RtShadows::populateRenderGraph(RenderingContext& ctx)
 
 			rgraphCtx.bindColorTexture(0, 1, m_runCtx.m_intermediateShadowsRts[0]);
 			rgraphCtx.bindColorTexture(0, 2, m_runCtx.m_currentMomentsRt);
-			rgraphCtx.bindColorTexture(0, 3, getRenderer().getMotionVectors().getHistoryLengthRt());
+			cmdb.bindTexture(0, 3, m_dummyHistoryLenTexView.get());
 			rgraphCtx.bindTexture(0, 4, getRenderer().getDepthDownscale().getRt(), DepthDownscale::kQuarterInternalResolution);
 
 			rgraphCtx.bindUavTexture(0, 5, m_runCtx.m_intermediateShadowsRts[1]);
@@ -519,7 +527,7 @@ void RtShadows::runDenoise(const RenderingContext& ctx, RenderPassWorkContext& r
 	rgraphCtx.bindTexture(0, 2, getRenderer().getDepthDownscale().getRt(), DepthDownscale::kQuarterInternalResolution);
 	rgraphCtx.bindColorTexture(0, 3, getRenderer().getGBuffer().getColorRt(2));
 	rgraphCtx.bindColorTexture(0, 4, m_runCtx.m_currentMomentsRt);
-	rgraphCtx.bindColorTexture(0, 5, getRenderer().getMotionVectors().getHistoryLengthRt());
+	cmdb.bindTexture(0, 5, m_dummyHistoryLenTexView.get());
 
 	rgraphCtx.bindUavTexture(0, 6, (horizontal) ? m_runCtx.m_intermediateShadowsRts[1] : m_runCtx.m_historyRt);
 

+ 3 - 0
AnKi/Renderer/RtShadows.h

@@ -46,6 +46,9 @@ public:
 	Array<TexturePtr, 2> m_momentsRts;
 
 	RenderTargetDescription m_varianceRtDescr;
+
+	TexturePtr m_dummyHistoryLenTex;
+	TextureViewPtr m_dummyHistoryLenTexView;
 	/// @}
 
 	/// @name Programs

+ 90 - 52
AnKi/Renderer/Ssao.cpp

@@ -15,7 +15,9 @@ namespace anki {
 static NumericCVar<U32> g_ssaoSampleCountCVar(CVarSubsystem::kRenderer, "SsaoSampleCount", 4, 1, 1024, "SSAO sample count");
 static NumericCVar<F32> g_ssaoRadiusCVar(CVarSubsystem::kRenderer, "SsaoRadius", 2.0f, 0.1f, 100.0f, "SSAO radius in meters");
 static BoolCVar g_ssaoQuarterRez(CVarSubsystem::kRenderer, "SsaoQuarterResolution", ANKI_PLATFORM_MOBILE, "Render SSAO in quarter rez");
-static NumericCVar<F32> g_ssaoPower(CVarSubsystem::kRenderer, "SsaoPower", 3.0f, 0.1f, 100.0f, "SSAO power");
+static NumericCVar<F32> g_ssaoPower(CVarSubsystem::kRenderer, "SsaoPower", 1.5f, 0.1f, 100.0f, "SSAO power");
+static NumericCVar<U8> g_ssaoSpatialQuality(CVarSubsystem::kRenderer, "SsaoSpatialQuality", (ANKI_PLATFORM_MOBILE) ? 0 : 1, 0, 1,
+											"SSAO spatial denoise quality");
 
 Error Ssao::init()
 {
@@ -39,17 +41,27 @@ Error Ssao::initInternal()
 		TextureUsageBit usage = TextureUsageBit::kAllSampled;
 		usage |= (preferCompute) ? TextureUsageBit::kUavComputeWrite : TextureUsageBit::kFramebufferWrite;
 		TextureInitInfo texInit = getRenderer().create2DRenderTargetInitInfo(rez.x(), rez.y(), Format::kR8_Unorm, usage, "SSAO #1");
-		m_rts[0] = getRenderer().createAndClearRenderTarget(texInit, TextureUsageBit::kAllSampled);
+		m_tex[0] = getRenderer().createAndClearRenderTarget(texInit, TextureUsageBit::kAllSampled);
+
 		texInit.setName("SSAO #2");
-		m_rts[1] = getRenderer().createAndClearRenderTarget(texInit, TextureUsageBit::kAllSampled);
+		m_tex[1] = getRenderer().createAndClearRenderTarget(texInit, TextureUsageBit::kAllSampled);
 	}
 
+	m_ssaoWithDepthRtDescr = getRenderer().create2DRenderTargetDescription(rez.x(), rez.y(), Format::kR16G16_Unorm, "SSAO+depth");
+	m_ssaoWithDepthRtDescr.bake();
+
+	m_ssaoRtDescr = getRenderer().create2DRenderTargetDescription(rez.x(), rez.y(), Format::kR8_Unorm, "SSAO");
+	m_ssaoRtDescr.bake();
+
 	m_fbDescr.m_colorAttachmentCount = 1;
 	m_fbDescr.bake();
 
-	ANKI_CHECK(loadShaderProgram("ShaderBinaries/Ssao.ankiprogbin", {{"SAMPLE_COUNT", 3}}, m_prog, m_grProg, "Ssao"));
-	ANKI_CHECK(loadShaderProgram("ShaderBinaries/Ssao.ankiprogbin", {{"SAMPLE_COUNT", 5}}, m_prog, m_denoiseGrProgs[0], "SsaoDenoiseHorizontal"));
-	ANKI_CHECK(loadShaderProgram("ShaderBinaries/Ssao.ankiprogbin", {{"SAMPLE_COUNT", 5}}, m_prog, m_denoiseGrProgs[1], "SsaoDenoiseVertical"));
+	ANKI_CHECK(
+		loadShaderProgram("ShaderBinaries/Ssao.ankiprogbin", {{"SPATIAL_DENOISE_QUALITY", g_ssaoSpatialQuality.get()}}, m_prog, m_grProg, "Ssao"));
+	ANKI_CHECK(loadShaderProgram("ShaderBinaries/Ssao.ankiprogbin", {{"SPATIAL_DENOISE_QUALITY", g_ssaoSpatialQuality.get()}}, m_prog,
+								 m_spatialDenoiseGrProg, "SsaoSpatialDenoise"));
+	ANKI_CHECK(loadShaderProgram("ShaderBinaries/Ssao.ankiprogbin", {{"SPATIAL_DENOISE_QUALITY", g_ssaoSpatialQuality.get()}}, m_prog,
+								 m_tempralDenoiseGrProg, "SsaoTemporalDenoise"));
 
 	ANKI_CHECK(ResourceManager::getSingleton().loadResource("EngineAssets/BlueNoise_Rgba8_64x64.png", m_noiseImage));
 
@@ -64,18 +76,27 @@ void Ssao::populateRenderGraph(RenderingContext& ctx)
 
 	const U32 readRtIdx = getRenderer().getFrameCount() & 1;
 	const U32 writeRtIdx = !readRtIdx;
-	if(m_rtsImportedOnce) [[likely]]
+
+	RenderTargetHandle historyRt;
+	RenderTargetHandle finalRt;
+
+	if(m_texImportedOnce) [[likely]]
 	{
-		m_runCtx.m_ssaoRts[0] = rgraph.importRenderTarget(m_rts[readRtIdx].get());
-		m_runCtx.m_ssaoRts[1] = rgraph.importRenderTarget(m_rts[writeRtIdx].get());
+		finalRt = rgraph.importRenderTarget(m_tex[writeRtIdx].get());
+		historyRt = rgraph.importRenderTarget(m_tex[readRtIdx].get());
 	}
 	else
 	{
-		m_runCtx.m_ssaoRts[0] = rgraph.importRenderTarget(m_rts[readRtIdx].get(), TextureUsageBit::kAllSampled);
-		m_runCtx.m_ssaoRts[1] = rgraph.importRenderTarget(m_rts[writeRtIdx].get(), TextureUsageBit::kAllSampled);
-		m_rtsImportedOnce = true;
+		finalRt = rgraph.importRenderTarget(m_tex[writeRtIdx].get(), TextureUsageBit::kAllSampled);
+		historyRt = rgraph.importRenderTarget(m_tex[readRtIdx].get(), TextureUsageBit::kAllSampled);
+		m_texImportedOnce = true;
 	}
 
+	m_runCtx.m_finalRt = finalRt;
+
+	const RenderTargetHandle ssaoWithDepthRt = rgraph.newRenderTarget(m_ssaoWithDepthRtDescr);
+	const RenderTargetHandle ssaoRt = rgraph.newRenderTarget(m_ssaoRtDescr);
+
 	TextureUsageBit readUsage;
 	TextureUsageBit writeUsage;
 	if(preferCompute)
@@ -100,18 +121,15 @@ void Ssao::populateRenderGraph(RenderingContext& ctx)
 		else
 		{
 			GraphicsRenderPassDescription& pass = rgraph.newGraphicsRenderPass("SSAO");
-			pass.setFramebufferInfo(m_fbDescr, {m_runCtx.m_ssaoRts[1]}, {});
+			pass.setFramebufferInfo(m_fbDescr, {ssaoWithDepthRt}, {});
 			ppass = &pass;
 		}
 
-		ppass->newTextureDependency(m_runCtx.m_ssaoRts[1], writeUsage);
-		ppass->newTextureDependency(m_runCtx.m_ssaoRts[0], readUsage);
+		ppass->newTextureDependency(ssaoWithDepthRt, writeUsage);
 		ppass->newTextureDependency(getRenderer().getGBuffer().getColorRt(2), readUsage);
 		ppass->newTextureDependency(getRenderer().getGBuffer().getDepthRt(), readUsage);
-		ppass->newTextureDependency(getRenderer().getMotionVectors().getHistoryLengthRt(), readUsage);
-		ppass->newTextureDependency(getRenderer().getMotionVectors().getMotionVectorsRt(), readUsage);
 
-		ppass->setWork([this, &ctx](RenderPassWorkContext& rgraphCtx) {
+		ppass->setWork([this, &ctx, ssaoWithDepthRt](RenderPassWorkContext& rgraphCtx) {
 			CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
 
 			cmdb.bindShaderProgram(m_grProg.get());
@@ -122,9 +140,6 @@ void Ssao::populateRenderGraph(RenderingContext& ctx)
 			cmdb.bindTexture(0, 2, &m_noiseImage->getTextureView());
 			cmdb.bindSampler(0, 3, getRenderer().getSamplers().m_trilinearRepeat.get());
 			cmdb.bindSampler(0, 4, getRenderer().getSamplers().m_trilinearClamp.get());
-			rgraphCtx.bindColorTexture(0, 5, m_runCtx.m_ssaoRts[0]);
-			rgraphCtx.bindColorTexture(0, 6, getRenderer().getMotionVectors().getMotionVectorsRt());
-			rgraphCtx.bindColorTexture(0, 7, getRenderer().getMotionVectors().getHistoryLengthRt());
 
 			const UVec2 rez = (g_ssaoQuarterRez.get()) ? getRenderer().getInternalResolution() / 2 : getRenderer().getInternalResolution();
 
@@ -140,12 +155,12 @@ void Ssao::populateRenderGraph(RenderingContext& ctx)
 			consts.m_frameCount = getRenderer().getFrameCount() % kMaxU32;
 			consts.m_ssaoPower = g_ssaoPower.get();
 			consts.m_viewMat = ctx.m_matrices.m_view;
-			consts.m_prevJitterUv = ctx.m_matrices.m_jitterOffsetNdc / 2.0f;
+			computeLinearizeDepthOptimal(ctx.m_cameraNear, ctx.m_cameraFar, consts.m_linearizeDepthParams.x(), consts.m_linearizeDepthParams.y());
 			cmdb.setPushConstants(&consts, sizeof(consts));
 
 			if(g_preferComputeCVar.get())
 			{
-				rgraphCtx.bindUavTexture(0, 8, m_runCtx.m_ssaoRts[1]);
+				rgraphCtx.bindUavTexture(0, 5, ssaoWithDepthRt);
 
 				dispatchPPCompute(cmdb, 8, 8, rez.x(), rez.y());
 			}
@@ -158,66 +173,89 @@ void Ssao::populateRenderGraph(RenderingContext& ctx)
 		});
 	}
 
-	// Vertical and horizontal blur
-	for(U32 dir = 0; dir < 2; ++dir)
+	// Spatial denoise
 	{
 		RenderPassDescriptionBase* ppass;
 
-		const U32 readRt = (dir == 0) ? 1 : 0;
-		const U32 writeRt = !readRt;
-
-		CString passName = (dir == 0) ? "SSAO vert blur" : "SSAO horiz blur";
 		if(preferCompute)
 		{
-			ComputeRenderPassDescription& pass = rgraph.newComputeRenderPass(passName);
+			ComputeRenderPassDescription& pass = rgraph.newComputeRenderPass("SSAO spatial denoise");
 			ppass = &pass;
 		}
 		else
 		{
-			GraphicsRenderPassDescription& pass = rgraph.newGraphicsRenderPass(passName);
-			pass.setFramebufferInfo(m_fbDescr, {m_runCtx.m_ssaoRts[writeRt]}, {});
+			GraphicsRenderPassDescription& pass = rgraph.newGraphicsRenderPass("SSAO spatial denoise");
+			pass.setFramebufferInfo(m_fbDescr, {ssaoRt}, {});
 			ppass = &pass;
 		}
 
-		ppass->newTextureDependency(m_runCtx.m_ssaoRts[readRt], readUsage);
-		ppass->newTextureDependency(m_runCtx.m_ssaoRts[writeRt], writeUsage);
-		if(g_ssaoQuarterRez.get())
-		{
-			ppass->newTextureDependency(getRenderer().getDepthDownscale().getRt(), readUsage);
-		}
-		else
-		{
-			ppass->newTextureDependency(getRenderer().getGBuffer().getDepthRt(), readUsage);
-		}
+		ppass->newTextureDependency(ssaoWithDepthRt, readUsage);
+		ppass->newTextureDependency(ssaoRt, writeUsage);
 
-		ppass->setWork([this, dir, readRt](RenderPassWorkContext& rgraphCtx) {
+		ppass->setWork([this, ssaoWithDepthRt, ssaoRt](RenderPassWorkContext& rgraphCtx) {
 			CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
 
-			cmdb.bindShaderProgram(m_denoiseGrProgs[dir].get());
+			cmdb.bindShaderProgram(m_spatialDenoiseGrProg.get());
 
 			cmdb.bindSampler(0, 0, getRenderer().getSamplers().m_trilinearClamp.get());
-			rgraphCtx.bindColorTexture(0, 1, m_runCtx.m_ssaoRts[readRt]);
-			if(g_ssaoQuarterRez.get())
+			rgraphCtx.bindColorTexture(0, 1, ssaoWithDepthRt);
+
+			const UVec2 rez = (g_ssaoQuarterRez.get()) ? getRenderer().getInternalResolution() / 2u : getRenderer().getInternalResolution();
+
+			if(g_preferComputeCVar.get())
 			{
-				rgraphCtx.bindColorTexture(0, 2, getRenderer().getDepthDownscale().getRt());
+				rgraphCtx.bindUavTexture(0, 2, ssaoRt);
+				dispatchPPCompute(cmdb, 8, 8, rez.x(), rez.y());
 			}
 			else
 			{
-				rgraphCtx.bindTexture(0, 2, getRenderer().getGBuffer().getDepthRt(), TextureSubresourceInfo(DepthStencilAspectBit::kDepth));
+				cmdb.setViewport(0, 0, rez.x(), rez.y());
+				drawQuad(cmdb);
 			}
+		});
+	}
 
-			const UVec2 rez = (g_ssaoQuarterRez.get()) ? getRenderer().getInternalResolution() / 2 : getRenderer().getInternalResolution();
+	// Temporal denoise
+	{
+		RenderPassDescriptionBase* ppass;
+
+		if(preferCompute)
+		{
+			ComputeRenderPassDescription& pass = rgraph.newComputeRenderPass("SSAO temporal denoise");
+			ppass = &pass;
+		}
+		else
+		{
+			GraphicsRenderPassDescription& pass = rgraph.newGraphicsRenderPass("SSAO temporal denoise");
+			pass.setFramebufferInfo(m_fbDescr, {finalRt}, {});
+			ppass = &pass;
+		}
+
+		ppass->newTextureDependency(ssaoRt, readUsage);
+		ppass->newTextureDependency(historyRt, readUsage);
+		ppass->newTextureDependency(getRenderer().getMotionVectors().getMotionVectorsRt(), readUsage);
+		ppass->newTextureDependency(finalRt, writeUsage);
+
+		ppass->setWork([this, ssaoRt, historyRt](RenderPassWorkContext& rgraphCtx) {
+			CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
+
+			cmdb.bindShaderProgram(m_tempralDenoiseGrProg.get());
+
+			cmdb.bindSampler(0, 0, getRenderer().getSamplers().m_trilinearClamp.get());
+			rgraphCtx.bindColorTexture(0, 1, ssaoRt);
+			rgraphCtx.bindColorTexture(0, 2, historyRt);
+			rgraphCtx.bindColorTexture(0, 3, getRenderer().getMotionVectors().getMotionVectorsRt());
+
+			const UVec2 rez = (g_ssaoQuarterRez.get()) ? getRenderer().getInternalResolution() / 2u : getRenderer().getInternalResolution();
 
 			if(g_preferComputeCVar.get())
 			{
-				rgraphCtx.bindUavTexture(0, 3, m_runCtx.m_ssaoRts[!readRt]);
-
+				rgraphCtx.bindUavTexture(0, 4, m_runCtx.m_finalRt);
 				dispatchPPCompute(cmdb, 8, 8, rez.x(), rez.y());
 			}
 			else
 			{
 				cmdb.setViewport(0, 0, rez.x(), rez.y());
-
 				drawQuad(cmdb);
 			}
 		});

+ 11 - 7
AnKi/Renderer/Ssao.h

@@ -31,29 +31,33 @@ public:
 							  [[maybe_unused]] ShaderProgramPtr& optionalShaderProgram) const override
 	{
 		ANKI_ASSERT(rtName == "Ssao");
-		handles[0] = m_runCtx.m_ssaoRts[1];
+		handles[0] = m_runCtx.m_finalRt;
 	}
 
 	RenderTargetHandle getRt() const
 	{
-		return m_runCtx.m_ssaoRts[1];
+		return m_runCtx.m_finalRt;
 	}
 
 public:
 	ShaderProgramResourcePtr m_prog;
 	ShaderProgramPtr m_grProg;
-	Array<ShaderProgramPtr, 2> m_denoiseGrProgs;
+	ShaderProgramPtr m_spatialDenoiseGrProg;
+	ShaderProgramPtr m_tempralDenoiseGrProg;
 
 	FramebufferDescription m_fbDescr;
-	ImageResourcePtr m_noiseImage;
+	RenderTargetDescription m_ssaoWithDepthRtDescr;
+	RenderTargetDescription m_ssaoRtDescr;
+
+	Array<TexturePtr, 2> m_tex;
+	Bool m_texImportedOnce = false;
 
-	Array<TexturePtr, 2> m_rts;
-	Bool m_rtsImportedOnce = false;
+	ImageResourcePtr m_noiseImage;
 
 	class
 	{
 	public:
-		Array<RenderTargetHandle, 2> m_ssaoRts;
+		RenderTargetHandle m_finalRt;
 	} m_runCtx;
 
 	Error initInternal();

+ 2 - 2
AnKi/Shaders/GBufferGeneric.ankiprog

@@ -570,7 +570,7 @@ void main(
 
 	const AnKiLocalConstants localConstants = loadAnKiLocalConstants(g_gpuScene, constantsOffset);
 	const RVec4 diffColorA = g_bindlessTextures2dF32[localConstants.m_diffuseTex].Sample(g_globalSampler, vertInput.m_uv);
-	if(diffColorA.a * localConstants.m_diffuseScale.a == 0.0f)
+	if(diffColorA.a * localConstants.m_diffuseScale.a < 0.5f)
 	{
 		discard;
 	}
@@ -606,7 +606,7 @@ FragOut main(
 	const RVec4 diffColorA = g_bindlessTextures2dF32[localConstants.m_diffuseTex].Sample(g_globalSampler, uv) * localConstants.m_diffuseScale;
 	const RVec3 diffColor = diffColorA.rgb;
 #			if REALLY_ALPHA_TEST
-	if(diffColorA.a == 0.0f)
+	if(diffColorA.a < 0.5f)
 	{
 		discard;
 	}

+ 1 - 1
AnKi/Shaders/Include/MiscRendererTypes.h

@@ -104,7 +104,7 @@ struct SsaoConstants
 	F32 m_projectionMat22;
 	F32 m_projectionMat23;
 
-	Vec2 m_prevJitterUv;
+	Vec2 m_linearizeDepthParams;
 	RF32 m_ssaoPower;
 	U32 m_frameCount;
 

+ 1 - 1
AnKi/Shaders/LightShading.ankiprog

@@ -130,7 +130,7 @@ RVec3 main(Vec4 svPosition : SV_POSITION, Vec2 uv : TEXCOORD) : SV_TARGET0
 		const RVec4 ssr = g_ssrTex.SampleLevel(g_trilinearClampSampler, uv, 0.0f);
 		RVec3 refl = ssr.xyz;
 
-		[branch] if(ssr.w < 0.001f)
+		[branch] if(ssr.w < 1.0f)
 		{
 			const Vec3 reflDir = reflect(-viewDir, gbuffer.m_normal);
 			const RF32 reflLod = (g_clusteredShading.m_reflectionProbesMipCount - 1.0f) * gbuffer.m_roughness;

+ 23 - 111
AnKi/Shaders/MotionVectors.ankiprog

@@ -9,106 +9,23 @@
 #	include <AnKi/Shaders/Functions.hlsl>
 
 constexpr F32 kMaxRejectionDistance = 0.1; // In meters
-constexpr F32 kMaxHistoryLength = 16.0;
 
-[[vk::binding(0)]] SamplerState g_linearAnyClampSampler;
+[[vk::binding(0)]] SamplerState g_nearesetAnyClampSampler;
 [[vk::binding(1)]] Texture2D g_currentDepthTex;
-[[vk::binding(2)]] Texture2D g_historyDepthTex;
-[[vk::binding(3)]] Texture2D g_velocityTex;
-[[vk::binding(4)]] Texture2D g_historyLengthTex;
+[[vk::binding(2)]] Texture2D g_velocityTex;
 
 struct Constants
 {
-	Mat4 m_reprojectionMat;
-	Mat4 m_viewProjectionInvMat;
-	Mat4 m_prevViewProjectionInvMat;
-
-	Vec2 m_viewportSize;
-	F32 m_padding0;
-	F32 m_padding1;
+	Mat4 m_currentViewProjMat;
+	Mat4 m_currentInvViewProjMat;
+	Mat4 m_prevViewProjMat;
 };
 
-[[vk::binding(5)]] ConstantBuffer<Constants> g_consts;
+[[vk::binding(3)]] ConstantBuffer<Constants> g_consts;
 
 #	if ANKI_COMPUTE_SHADER
-[[vk::binding(6)]] RWTexture2D<Vec2> g_motionVectorsUav;
-[[vk::binding(7)]] RWTexture2D<F32> g_historyLengthUav;
-#	endif
-
-Vec3 clipToWorld(Vec4 clip, Mat4 clipToWorldMat)
-{
-	const Vec4 v4 = mul(clipToWorldMat, clip);
-	return v4.xyz / v4.w;
-}
-
-/// Average the some depth values and unproject.
-Vec3 getAverageWorldPosition(Texture2D tex, Vec2 uv, Mat4 clipToWorldMat)
-{
-	const Vec2 halfTexel = (1.0 / g_consts.m_viewportSize) / 2.0;
-
-	Vec4 depths = tex.GatherRed(g_linearAnyClampSampler, uv + halfTexel);
-	depths += tex.GatherRed(g_linearAnyClampSampler, uv - halfTexel);
-
-	const F32 avgDepth = (depths.x + depths.y + depths.z + depths.w) / 8.0;
-
-	return clipToWorld(Vec4(uvToNdc(uv), avgDepth, 1.0), clipToWorldMat);
-}
-
-/// Get the depths of some neighbour texels, unproject and find the AABB in world space that encloses them.
-void getMinMaxWorldPositions(Texture2D tex, Vec2 uv, Mat4 clipToWorldMat, out Vec3 aabbMin, out Vec3 aabbMax)
-{
-	const Vec2 halfTexel = (1.0 / g_consts.m_viewportSize) / 2.0;
-
-	const Vec4 depths1 = tex.GatherRed(g_linearAnyClampSampler, uv + halfTexel);
-	const Vec4 depths2 = tex.GatherRed(g_linearAnyClampSampler, uv - halfTexel);
-
-	const Vec4 minDepths4 = min(depths1, depths2);
-	const Vec4 maxDepths4 = max(depths1, depths2);
-
-	const Vec2 minDepths2 = min(minDepths4.xy, minDepths4.zw);
-	const Vec2 maxDepths2 = max(maxDepths4.xy, maxDepths4.zw);
-
-	const F32 minDepth = min(minDepths2.x, minDepths2.y);
-	const F32 maxDepth = max(maxDepths2.x, maxDepths2.y);
-
-	const Vec3 a = clipToWorld(Vec4(uvToNdc(uv), minDepth, 1.0), clipToWorldMat);
-	const Vec3 b = clipToWorld(Vec4(uvToNdc(uv), maxDepth, 1.0), clipToWorldMat);
-
-	aabbMin = min(a, b);
-	aabbMax = max(a, b);
-}
-
-F32 computeRejectionFactor(Vec2 uv, Vec2 historyUv)
-{
-	Vec3 boxMin;
-	Vec3 boxMax;
-	getMinMaxWorldPositions(g_currentDepthTex, uv, g_consts.m_viewProjectionInvMat, boxMin, boxMax);
-
-#	if 0
-	const F32 historyDepth = g_historyDepthTex.SampleLevel(g_linearAnyClampSampler, historyUv, 0.0).r;
-	const Vec3 historyWorldPos = clipToWorld(Vec4(uvToNdc(historyUv), historyDepth, 1.0), g_consts.m_prevViewProjectionInvMat);
-#	else
-	// Average gives more rejection so less ghosting
-	const Vec3 historyWorldPos = getAverageWorldPosition(g_historyDepthTex, historyUv, g_consts.m_prevViewProjectionInvMat);
+[[vk::binding(4)]] RWTexture2D<Vec2> g_motionVectorsUav;
 #	endif
-	const Vec3 clampedHistoryWorldPos = clamp(historyWorldPos, boxMin, boxMax);
-
-	// This factor shows when new pixels appeared by checking depth differences
-	const Vec3 delta = clampedHistoryWorldPos - historyWorldPos;
-	const F32 distSquared = dot(delta, delta);
-	const F32 disocclusionFactor = min(1.0, distSquared / (kMaxRejectionDistance * kMaxRejectionDistance));
-	F32 rejection = disocclusionFactor;
-
-	// New pixels might appeared, add them to the disocclusion
-	const F32 minUv = min(historyUv.x, historyUv.y);
-	const F32 maxUv = max(historyUv.x, historyUv.y);
-	if(minUv <= 0.0 || maxUv >= 1.0)
-	{
-		rejection = 1.0;
-	}
-
-	return rejection;
-}
 
 #	if ANKI_COMPUTE_SHADER
 [numthreads(8, 8, 1)] void main(UVec2 svDispatchThreadId : SV_DISPATCHTHREADID)
@@ -116,18 +33,18 @@ F32 computeRejectionFactor(Vec2 uv, Vec2 historyUv)
 struct FragOut
 {
 	Vec2 m_motionVectors : SV_TARGET0;
-	F32 m_historyLength : SV_TARGET1;
 };
 
 FragOut main(Vec2 uv : TEXCOORD)
 #	endif
 {
 #	if ANKI_COMPUTE_SHADER
-	const Vec2 uv = (Vec2(svDispatchThreadId) + 0.5f) / g_consts.m_viewportSize;
+	Vec2 uavSize;
+	g_motionVectorsUav.GetDimensions(uavSize.x, uavSize.y);
+	const Vec2 uv = (Vec2(svDispatchThreadId) + 0.5f) / uavSize;
 #	endif
-	const F32 depth = g_currentDepthTex.SampleLevel(g_linearAnyClampSampler, uv, 0.0).r;
 
-	const Vec2 velocity = g_velocityTex.SampleLevel(g_linearAnyClampSampler, uv, 0.0).rg;
+	const Vec2 velocity = g_velocityTex.SampleLevel(g_nearesetAnyClampSampler, uv, 0.0).rg;
 
 	Vec2 historyUv;
 	if(velocity.x != 1.0)
@@ -136,33 +53,28 @@ FragOut main(Vec2 uv : TEXCOORD)
 	}
 	else
 	{
-		const Vec4 v4 = mul(g_consts.m_reprojectionMat, Vec4(uvToNdc(uv), depth, 1.0));
-		historyUv = ndcToUv(v4.xy / v4.w);
-	}
+		// Don't use a reprojection matrix or other kind of optimizations. Due to numerical precision it produces slightly off result. Big enough to
+		// create slight visual issues. Do it the hard way.
+		const F32 depth = g_currentDepthTex.SampleLevel(g_nearesetAnyClampSampler, uv, 0.0).r;
+		const Vec4 v4 = mul(g_consts.m_currentInvViewProjMat, Vec4(uvToNdc(uv), depth, 1.0));
+		const Vec3 worldPos = v4.xyz / v4.w;
 
-	const F32 rejection = computeRejectionFactor(uv, historyUv);
+		Vec4 clipPos = mul(g_consts.m_currentViewProjMat, Vec4(worldPos, 1.0));
+		clipPos.xy /= clipPos.w;
 
-	// Compute history length
-	F32 historyLength;
-	if(rejection >= 0.5)
-	{
-		// Rejection factor too high, reset the temporal history
-		historyLength = 1.0 / kMaxHistoryLength;
-	}
-	else
-	{
-		historyLength = g_historyLengthTex.SampleLevel(g_linearAnyClampSampler, historyUv, 0.0).r;
-		historyLength += 1.0 / kMaxHistoryLength;
+		Vec4 prevClipPos = mul(g_consts.m_prevViewProjMat, Vec4(worldPos, 1.0));
+		prevClipPos.xy /= prevClipPos.w;
+
+		const Vec2 diff = (prevClipPos.xy - clipPos.xy) * 0.5f; // aka uvToNdc(prevClipPos.xy) - uvToNdc(clipPos.xy)
+		historyUv = uv + diff;
 	}
 
 	// Write out
 #	if ANKI_COMPUTE_SHADER
 	g_motionVectorsUav[svDispatchThreadId] = historyUv - uv;
-	g_historyLengthUav[svDispatchThreadId] = historyLength;
 #	else
 	FragOut output;
 	output.m_motionVectors = historyUv - uv;
-	output.m_historyLength = historyLength;
 	return output;
 #	endif
 }

+ 137 - 82
AnKi/Shaders/Ssao.ankiprog

@@ -5,7 +5,7 @@
 
 // Ground truth ambiend occlusion
 
-#pragma anki mutator SAMPLE_COUNT 3 5 7 9 11 13 15
+#pragma anki mutator SPATIAL_DENOISE_QUALITY 0 1
 
 #include <AnKi/Shaders/Common.hlsl>
 
@@ -26,12 +26,8 @@
 [[vk::binding(3)]] SamplerState g_trilinearRepeatSampler;
 [[vk::binding(4)]] SamplerState g_linearAnyClampSampler;
 
-[[vk::binding(5)]] Texture2D<RVec4> g_historyTex;
-[[vk::binding(6)]] Texture2D<Vec4> g_motionVectorsTex;
-[[vk::binding(7)]] Texture2D<RVec4> g_historyLengthTex;
-
 #	if ANKI_COMPUTE_SHADER
-[[vk::binding(8)]] RWTexture2D<RVec4> g_outUav;
+[[vk::binding(5)]] RWTexture2D<Vec4> g_outUav;
 #	endif
 
 [[vk::push_constant]] ConstantBuffer<SsaoConstants> g_consts;
@@ -56,7 +52,7 @@ RF32 computeFalloff(RF32 len)
 #	if ANKI_COMPUTE_SHADER
 [numthreads(8, 8, 1)] void main(UVec2 svDispatchThreadId : SV_DISPATCHTHREADID)
 #	else
-RF32 main([[vk::location(0)]] Vec2 uv : TEXCOORD, Vec4 svPosition : SV_POSITION) : SV_TARGET0
+Vec2 main([[vk::location(0)]] Vec2 uv : TEXCOORD, Vec4 svPosition : SV_POSITION) : SV_TARGET0
 #	endif
 {
 #	if ANKI_COMPUTE_SHADER
@@ -67,7 +63,8 @@ RF32 main([[vk::location(0)]] Vec2 uv : TEXCOORD, Vec4 svPosition : SV_POSITION)
 #	endif
 
 	const Vec2 ndc = uvToNdc(uv);
-	const Vec3 Pc = unproject(ndc);
+	const F32 depth = g_depthTex.SampleLevel(g_linearAnyClampSampler, uv, 0.0).r;
+	const Vec3 Pc = cheapPerspectiveUnprojection(g_consts.m_unprojectionParameters, ndc, depth);
 	const RVec3 V = normalize(-Pc); // View vector
 
 	// Get noise
@@ -151,116 +148,174 @@ RF32 main([[vk::location(0)]] Vec2 uv : TEXCOORD, Vec4 svPosition : SV_POSITION)
 	// Apply power
 	Vd = pow(Vd, g_consts.m_ssaoPower);
 
-	// Blend color with history
-	{
-		const Vec2 historyUv = uv + g_motionVectorsTex.SampleLevel(g_linearAnyClampSampler, uv, 0.0f).xy;
-
-		// History length creates black trails so it doesn't work correctly
-#	if 0
-		const RVec4 historyLengths = g_historyLengthTex.GatherRed(g_linearAnyClampSampler, uv + g_consts.m_prevJitterUv);
-		const RF32 historyLength = max4(historyLengths);
-#	else
-		const RF32 historyLength = (any(historyUv < 0.0f) || any(historyUv > 1.0f)) ? 0.0f : 1.0f;
-#	endif
-
-		const RF32 lowestBlendFactor = 0.1f;
-		const RF32 maxHistoryLength = 16.0f;
-		const RF32 stableFrames = 4.0f;
-		const RF32 lerpVal = min(1.0f, (historyLength * maxHistoryLength - 1.0f) / stableFrames);
-		const RF32 blendFactor = lerp(1.0f, lowestBlendFactor, lerpVal);
-
-		// Blend with history
-		if(blendFactor < 1.0)
-		{
-			const RF32 history = g_historyTex.SampleLevel(g_linearAnyClampSampler, historyUv, 0.0f).r;
-			Vd = lerp(history, Vd, blendFactor);
-		}
-	}
+	const F32 linearDepth = linearizeDepthOptimal(depth, g_consts.m_linearizeDepthParams.x, g_consts.m_linearizeDepthParams.y);
 
 #	if ANKI_COMPUTE_SHADER
-	g_outUav[svDispatchThreadId] = Vd;
+	g_outUav[svDispatchThreadId] = Vec4(Vd, linearDepth, 0.0f, 0.0f);
 #	else
-	return Vd;
+	return Vec2(Vd, linearDepth);
 #	endif
 }
 #endif // ANKI_TECHNIQUE_Ssao && (ANKI_COMPUTE_SHADER || ANKI_FRAGMENT_SHADER)
 
 // ===========================================================================
-// SSAO denoise                                                              =
+// SSAO spatial denoise                                                      =
 // ===========================================================================
-#if(ANKI_TECHNIQUE_SsaoDenoiseVertical || ANKI_TECHNIQUE_SsaoDenoiseHorizontal) && (ANKI_COMPUTE_SHADER || ANKI_FRAGMENT_SHADER)
+#if ANKI_TECHNIQUE_SsaoSpatialDenoise && (ANKI_COMPUTE_SHADER || ANKI_FRAGMENT_SHADER)
 #	include <AnKi/Shaders/BilateralFilter.hlsl>
 
 [[vk::binding(0)]] SamplerState g_linearAnyClampSampler;
-[[vk::binding(1)]] Texture2D<RVec4> g_inTex;
-[[vk::binding(2)]] Texture2D<Vec4> g_depthTex;
+[[vk::binding(1)]] Texture2D<Vec4> g_inTex;
 
 #	if ANKI_COMPUTE_SHADER
-[[vk::binding(3)]] RWTexture2D<RVec4> g_outImg;
+[[vk::binding(2)]] RWTexture2D<RVec4> g_outImg;
 #	endif
 
-F32 readDepth(Vec2 uv)
+F32 computeWeight(F32 depth, F32 refDepth)
 {
-	return g_depthTex.SampleLevel(g_linearAnyClampSampler, uv, 0.0).x;
+	const F32 diff = abs(depth - refDepth);
+	return sqrt(1.0 / (0.0003 + diff));
 }
 
-void sampleTex(Vec2 uv, F32 refDepth, inout RF32 col, inout RF32 weight)
+void sampleTex(Vec2 uv, IVec2 offset, F32 refDepth, inout RF32 col, inout F32 weight)
 {
-	const RF32 color = g_inTex.SampleLevel(g_linearAnyClampSampler, uv, 0.0).x;
-	const RF32 w = calculateBilateralWeightDepth(refDepth, readDepth(uv), 1.0f);
-	col += color * w;
+	const Vec2 ssaoWithDepth = g_inTex.SampleLevel(g_linearAnyClampSampler, uv, 0.0, offset).xy;
+	const F32 w = computeWeight(refDepth, ssaoWithDepth.y);
+	col += ssaoWithDepth.x * w;
 	weight += w;
 }
 
 #	if ANKI_COMPUTE_SHADER
 [numthreads(8, 8, 1)] void main(UVec2 svDispatchThreadId : SV_DISPATCHTHREADID)
 #	else
-F32 main([[vk::location(0)]] Vec2 uv : TEXCOORD) : SV_TARGET0
+RF32 main([[vk::location(0)]] Vec2 uv : TEXCOORD) : SV_TARGET0
 #	endif
 {
-	UVec2 textureSize;
+// Set UVs
+#	if ANKI_COMPUTE_SHADER
+	Vec2 textureSize;
 	U32 mipCount;
 	g_inTex.GetDimensions(0, textureSize.x, textureSize.y, mipCount);
+	const Vec2 uv = (Vec2(svDispatchThreadId) + 0.5f) / textureSize;
+#	endif
+
+	// Sample ref
+	const Vec2 refSsaoAndDepth = g_inTex.SampleLevel(g_linearAnyClampSampler, uv, 0.0).xy;
+	RF32 color = refSsaoAndDepth.x;
+	const F32 refDepth = refSsaoAndDepth.y;
+	F32 weight = computeWeight(0.0f, 0.0f); // Highest weight that this function can give
+
+	// Sample taps
+	sampleTex(uv, IVec2(1, 1), refDepth, color, weight);
+	sampleTex(uv, IVec2(0, 1), refDepth, color, weight);
+	sampleTex(uv, IVec2(-1, 1), refDepth, color, weight);
+	sampleTex(uv, IVec2(-1, 0), refDepth, color, weight);
+	sampleTex(uv, IVec2(-1, -1), refDepth, color, weight);
+	sampleTex(uv, IVec2(0, -1), refDepth, color, weight);
+	sampleTex(uv, IVec2(1, -1), refDepth, color, weight);
+	sampleTex(uv, IVec2(1, 0), refDepth, color, weight);
+
+#	if SPATIAL_DENOISE_QUALITY == 1
+	sampleTex(uv, IVec2(2, 2), refDepth, color, weight);
+	sampleTex(uv, IVec2(0, 2), refDepth, color, weight);
+	sampleTex(uv, IVec2(-2, 2), refDepth, color, weight);
+	sampleTex(uv, IVec2(-2, 0), refDepth, color, weight);
+	sampleTex(uv, IVec2(-2, -2), refDepth, color, weight);
+	sampleTex(uv, IVec2(0, -2), refDepth, color, weight);
+	sampleTex(uv, IVec2(2, -2), refDepth, color, weight);
+	sampleTex(uv, IVec2(2, 0), refDepth, color, weight);
+#	endif
 
-	// Set UVs
+	color /= weight;
+
+	// Write value
 #	if ANKI_COMPUTE_SHADER
-	const Vec2 uv = (Vec2(svDispatchThreadId) + 0.5) / Vec2(textureSize);
+	g_outImg[svDispatchThreadId] = color;
+#	else
+	return color;
 #	endif
+}
+#endif
 
-	const Vec2 texelSize = 1.0 / Vec2(textureSize);
+// ===========================================================================
+// SSAO temporal denoise                                                     =
+// ===========================================================================
+#if ANKI_TECHNIQUE_SsaoTemporalDenoise && (ANKI_COMPUTE_SHADER || ANKI_FRAGMENT_SHADER)
+#	include <AnKi/Shaders/Functions.hlsl>
 
-	// Sample
-	RF32 color = g_inTex.SampleLevel(g_linearAnyClampSampler, uv, 0.0).r;
-	const F32 refDepth = readDepth(uv);
-	RF32 weight = 1.0;
+[[vk::binding(0)]] SamplerState g_linearAnyClampSampler;
+[[vk::binding(1)]] Texture2D<RVec4> g_ssaoTex;
+[[vk::binding(2)]] Texture2D<RVec4> g_historySsaoTex;
+[[vk::binding(3)]] Texture2D<Vec4> g_motionVectorsTex;
 
-#	if ANKI_TECHNIQUE_SsaoDenoiseHorizontal
-#		define X_OR_Y x
+#	if ANKI_COMPUTE_SHADER
+[[vk::binding(4)]] RWTexture2D<RVec4> g_outImg;
+#	endif
+
+#	if ANKI_COMPUTE_SHADER
+[numthreads(8, 8, 1)] void main(UVec2 svDispatchThreadId : SV_DISPATCHTHREADID)
 #	else
-#		define X_OR_Y y
+RF32 main([[vk::location(0)]] Vec2 uv : TEXCOORD) : SV_TARGET0
+#	endif
+{
+#	if ANKI_COMPUTE_SHADER
+	Vec2 textureSize;
+	U32 mipCount;
+	g_ssaoTex.GetDimensions(0, textureSize.x, textureSize.y, mipCount);
+	const Vec2 uv = (Vec2(svDispatchThreadId) + 0.5f) / textureSize;
 #	endif
 
-	Vec2 uvOffset = 0.0f;
-	uvOffset.X_OR_Y = 1.0f * texelSize.X_OR_Y;
+	const Vec2 historyUv = uv + g_motionVectorsTex.SampleLevel(g_linearAnyClampSampler, uv, 0.0f).xy;
 
-	[unroll] for(U32 i = 0u; i < (SAMPLE_COUNT - 1u) / 2u; ++i)
-	{
-		sampleTex(uv + uvOffset, refDepth, color, weight);
-		sampleTex(uv - uvOffset, refDepth, color, weight);
+	// Read textures
+	RF32 history = g_historySsaoTex.SampleLevel(g_linearAnyClampSampler, historyUv, 0.0f).x;
+	RF32 current = g_ssaoTex.SampleLevel(g_linearAnyClampSampler, uv, 0.0f).x;
 
-		uvOffset.X_OR_Y += 1.0f * texelSize.X_OR_Y;
-	}
+	// Remove ghosting by clamping the history color to neighbour's AABB
+	const RF32 near0 = g_ssaoTex.SampleLevel(g_linearAnyClampSampler, uv, 0, IVec2(1, 0)).x;
+	const RF32 near1 = g_ssaoTex.SampleLevel(g_linearAnyClampSampler, uv, 0, IVec2(0, 1)).x;
+	const RF32 near2 = g_ssaoTex.SampleLevel(g_linearAnyClampSampler, uv, 0, IVec2(-1, 0)).x;
+	const RF32 near3 = g_ssaoTex.SampleLevel(g_linearAnyClampSampler, uv, 0, IVec2(0, -1)).x;
 
-	color /= weight;
+#	if 0
+	const RF32 boxMin = min(current, min4(near0, near1, near2, near3));
+	const RF32 boxMax = max(current, max4(near0, near1, near2, near3));
+#	else
+	const RF32 m1 = current + near0 + near1 + near2 + near3;
+	const RF32 m2 = current * current + near0 * near0 + near1 * near1 + near2 * near2 + near3 * near3;
+
+	const RF32 mu = m1 / 5.0;
+	const RF32 sigma = sqrt(m2 / 5.0 - mu * mu);
+
+	const F32 varianceClippingGamma = 1.2f;
+	const RF32 boxMin = mu - varianceClippingGamma * sigma;
+	const RF32 boxMax = mu + varianceClippingGamma * sigma;
+#	endif
+
+	history = clamp(history, boxMin, boxMax);
+
+	// Final
+	const RF32 kBlendFactor = 0.1f;
+
+	const F32 lum0 = current;
+	const F32 lum1 = history;
+	const F32 maxLum = 1.0;
+
+	RF32 diff = abs(lum0 - lum1) / max(lum0, max(lum1, maxLum + kEpsilonF32));
+	diff = 1.0 - diff;
+	diff = diff * diff;
+	const RF32 feedback = lerp(0.0, kBlendFactor, diff);
+
+	const RF32 ssao = lerp(history, current, feedback);
 
 	// Write value
 #	if ANKI_COMPUTE_SHADER
-	g_outImg[svDispatchThreadId] = color;
+	g_outImg[svDispatchThreadId] = ssao;
 #	else
-	return color;
+	return ssao;
 #	endif
 }
-#endif // (ANKI_TECHNIQUE_SsaoDenoiseVertical || ANKI_TECHNIQUE_SsaoDenoiseHorizontal) && (ANKI_COMPUTE_SHADER || ANKI_FRAGMENT_SHADER)
+#endif
 
 // ===========================================================================
 // Techniques                                                                =
@@ -275,22 +330,22 @@ F32 main([[vk::location(0)]] Vec2 uv : TEXCOORD) : SV_TARGET0
 #pragma anki technique_start comp Ssao
 #pragma anki technique_end comp Ssao
 
-#pragma anki technique_start vert SsaoDenoiseVertical
+#pragma anki technique_start vert SsaoSpatialDenoise
 #include <AnKi/Shaders/QuadVert.hlsl>
-#pragma anki technique_end vert SsaoDenoiseVertical
+#pragma anki technique_end vert SsaoSpatialDenoise
 
-#pragma anki technique_start frag SsaoDenoiseVertical
-#pragma anki technique_end frag SsaoDenoiseVertical
+#pragma anki technique_start frag SsaoSpatialDenoise
+#pragma anki technique_end frag SsaoSpatialDenoise
 
-#pragma anki technique_start comp SsaoDenoiseVertical
-#pragma anki technique_end comp SsaoDenoiseVertical
+#pragma anki technique_start comp SsaoSpatialDenoise
+#pragma anki technique_end comp SsaoSpatialDenoise
 
-#pragma anki technique_start vert SsaoDenoiseHorizontal
+#pragma anki technique_start vert SsaoTemporalDenoise
 #include <AnKi/Shaders/QuadVert.hlsl>
-#pragma anki technique_end vert SsaoDenoiseHorizontal
+#pragma anki technique_end vert SsaoTemporalDenoise
 
-#pragma anki technique_start frag SsaoDenoiseHorizontal
-#pragma anki technique_end frag SsaoDenoiseHorizontal
+#pragma anki technique_start frag SsaoTemporalDenoise
+#pragma anki technique_end frag SsaoTemporalDenoise
 
-#pragma anki technique_start comp SsaoDenoiseHorizontal
-#pragma anki technique_end comp SsaoDenoiseHorizontal
+#pragma anki technique_start comp SsaoTemporalDenoise
+#pragma anki technique_end comp SsaoTemporalDenoise

+ 1 - 1
AnKi/Shaders/VisualizeRenderTarget.ankiprog

@@ -16,6 +16,6 @@
 Vec3 main(Vec2 uv : TEXCOORD) : SV_TARGET0
 {
 	const Vec4 rgba = g_inTex.SampleLevel(g_nearestAnyClampSampler, uv, 0.0);
-	return rgba.xyz;
+	return rgba.xxx;
 }
 #pragma anki technique_end frag

+ 2 - 1
Sandbox/Main.cpp

@@ -357,7 +357,8 @@ Error MyApp::userMainLoop(Bool& quit, Second elapsedTime)
 
 	if(in.getKey(KeyCode::kH) == 1)
 	{
-		renderer.setCurrentDebugRenderTarget((renderer.getCurrentDebugRenderTarget() == "Bloom") ? "" : "Bloom");
+		renderer.setCurrentDebugRenderTarget((renderer.getCurrentDebugRenderTarget() == "MotionVectorsHistoryLength") ? ""
+																													  : "MotionVectorsHistoryLength");
 	}
 
 	/*if(in.getKey(KeyCode::J) == 1)