浏览代码

Add some additional denoising

Panagiotis Christopoulos Charitos 5 月之前
父节点
当前提交
22a42b69b2

+ 6 - 0
AnKi/Renderer/HistoryLength.h

@@ -5,6 +5,7 @@
 
 
 #pragma once
 #pragma once
 
 
+#include "AnKi/Gr/RenderGraph.h"
 #include <AnKi/Renderer/RendererObject.h>
 #include <AnKi/Renderer/RendererObject.h>
 
 
 namespace anki {
 namespace anki {
@@ -31,6 +32,11 @@ public:
 		handles[0] = m_runCtx.m_rt;
 		handles[0] = m_runCtx.m_rt;
 	}
 	}
 
 
+	RenderTargetHandle getRt() const
+	{
+		return m_runCtx.m_rt;
+	}
+
 private:
 private:
 	ShaderProgramResourcePtr m_prog;
 	ShaderProgramResourcePtr m_prog;
 	ShaderProgramPtr m_grProg;
 	ShaderProgramPtr m_grProg;

+ 101 - 8
AnKi/Renderer/IndirectDiffuseClipmaps.cpp

@@ -3,12 +3,15 @@
 // Code licensed under the BSD License.
 // Code licensed under the BSD License.
 // http://www.anki3d.org/LICENSE
 // http://www.anki3d.org/LICENSE
 
 
+#include "AnKi/Gr/RenderGraph.h"
 #include <AnKi/Renderer/IndirectDiffuseClipmaps.h>
 #include <AnKi/Renderer/IndirectDiffuseClipmaps.h>
 #include <AnKi/Renderer/Renderer.h>
 #include <AnKi/Renderer/Renderer.h>
 #include <AnKi/Renderer/GBuffer.h>
 #include <AnKi/Renderer/GBuffer.h>
 #include <AnKi/Renderer/AccelerationStructureBuilder.h>
 #include <AnKi/Renderer/AccelerationStructureBuilder.h>
 #include <AnKi/Renderer/Sky.h>
 #include <AnKi/Renderer/Sky.h>
 #include <AnKi/Renderer/ShadowMapping.h>
 #include <AnKi/Renderer/ShadowMapping.h>
+#include <AnKi/Renderer/HistoryLength.h>
+#include <AnKi/Renderer/MotionVectors.h>
 #include <AnKi/Scene/Components/SkyboxComponent.h>
 #include <AnKi/Scene/Components/SkyboxComponent.h>
 #include <AnKi/Shaders/Include/MaterialTypes.h>
 #include <AnKi/Shaders/Include/MaterialTypes.h>
 #include <AnKi/Util/Tracer.h>
 #include <AnKi/Util/Tracer.h>
@@ -33,15 +36,29 @@ Error IndirectDiffuseClipmaps::init()
 {
 {
 	ANKI_CHECK(RtMaterialFetchRendererObject::init());
 	ANKI_CHECK(RtMaterialFetchRendererObject::init());
 
 
+	const Bool firstBounceUsesRt = g_indirectDiffuseClipmapFirstBounceRayDistance > 0.0f;
+
 	m_lowRezRtDesc = getRenderer().create2DRenderTargetDescription(
 	m_lowRezRtDesc = getRenderer().create2DRenderTargetDescription(
 		getRenderer().getInternalResolution().x() / 2, getRenderer().getInternalResolution().y() / (!g_indirectDiffuseClipmapApplyHighQuality + 1),
 		getRenderer().getInternalResolution().x() / 2, getRenderer().getInternalResolution().y() / (!g_indirectDiffuseClipmapApplyHighQuality + 1),
-		getRenderer().getHdrFormat(), "IndirectDiffuseClipmap: Half");
+		getRenderer().getHdrFormat(), "IndirectDiffuseClipmap: Apply rez");
 	m_lowRezRtDesc.bake();
 	m_lowRezRtDesc.bake();
 
 
 	m_fullRtDesc = getRenderer().create2DRenderTargetDescription(getRenderer().getInternalResolution().x(), getRenderer().getInternalResolution().y(),
 	m_fullRtDesc = getRenderer().create2DRenderTargetDescription(getRenderer().getInternalResolution().x(), getRenderer().getInternalResolution().y(),
 																 getRenderer().getHdrFormat(), "IndirectDiffuseClipmap: Full");
 																 getRenderer().getHdrFormat(), "IndirectDiffuseClipmap: Full");
 	m_fullRtDesc.bake();
 	m_fullRtDesc.bake();
 
 
+	if(firstBounceUsesRt)
+	{
+		for(U32 i = 0; i < 2; ++i)
+		{
+			const TextureInitInfo init = getRenderer().create2DRenderTargetInitInfo(
+				getRenderer().getInternalResolution().x(), getRenderer().getInternalResolution().y(), Format::kR16G16B16A16_Sfloat,
+				TextureUsageBit::kAllShaderResource, generateTempPassName("IndirectDiffuseClipmap: Final #%u", i));
+
+			m_irradianceRts[i] = getRenderer().createAndClearRenderTarget(init, TextureUsageBit::kSrvCompute);
+		}
+	}
+
 	m_consts.m_probeCounts = UVec3(g_indirectDiffuseClipmapProbesXZCVar, g_indirectDiffuseClipmapProbesYCVar, g_indirectDiffuseClipmapProbesXZCVar);
 	m_consts.m_probeCounts = UVec3(g_indirectDiffuseClipmapProbesXZCVar, g_indirectDiffuseClipmapProbesYCVar, g_indirectDiffuseClipmapProbesXZCVar);
 	m_consts.m_totalProbeCount = m_consts.m_probeCounts.x() * m_consts.m_probeCounts.y() * m_consts.m_probeCounts.z();
 	m_consts.m_totalProbeCount = m_consts.m_probeCounts.x() * m_consts.m_probeCounts.y() * m_consts.m_probeCounts.z();
 
 
@@ -128,6 +145,8 @@ Error IndirectDiffuseClipmaps::init()
 	ANKI_CHECK(loadShaderProgram("ShaderBinaries/IndirectDiffuseClipmaps.ankiprogbin", mutation, m_prog, m_temporalDenoiseGrProg, "TemporalDenoise"));
 	ANKI_CHECK(loadShaderProgram("ShaderBinaries/IndirectDiffuseClipmaps.ankiprogbin", mutation, m_prog, m_temporalDenoiseGrProg, "TemporalDenoise"));
 	ANKI_CHECK(
 	ANKI_CHECK(
 		loadShaderProgram("ShaderBinaries/IndirectDiffuseClipmaps.ankiprogbin", mutation, m_prog, m_spatialReconstructGrProg, "SpatialReconstruct"));
 		loadShaderProgram("ShaderBinaries/IndirectDiffuseClipmaps.ankiprogbin", mutation, m_prog, m_spatialReconstructGrProg, "SpatialReconstruct"));
+	ANKI_CHECK(
+		loadShaderProgram("ShaderBinaries/IndirectDiffuseClipmaps.ankiprogbin", mutation, m_prog, m_bilateralDenoiseGrProg, "BilateralDenoise"));
 
 
 	for(MutatorValue rtMaterialFetchClipmap = 0; rtMaterialFetchClipmap < 2; ++rtMaterialFetchClipmap)
 	for(MutatorValue rtMaterialFetchClipmap = 0; rtMaterialFetchClipmap < 2; ++rtMaterialFetchClipmap)
 	{
 	{
@@ -185,7 +204,7 @@ void IndirectDiffuseClipmaps::populateRenderGraph(RenderingContext& ctx)
 {
 {
 	ANKI_TRACE_SCOPED_EVENT(IndirectDiffuse);
 	ANKI_TRACE_SCOPED_EVENT(IndirectDiffuse);
 
 
-	const Bool firstBounceRt = g_indirectDiffuseClipmapFirstBounceRayDistance > 0.0f;
+	const Bool firstBounceUsesRt = g_indirectDiffuseClipmapFirstBounceRayDistance > 0.0f;
 
 
 	for(U32 i = 0; i < kIndirectDiffuseClipmapCount; ++i)
 	for(U32 i = 0; i < kIndirectDiffuseClipmapCount; ++i)
 	{
 	{
@@ -199,7 +218,23 @@ void IndirectDiffuseClipmaps::populateRenderGraph(RenderingContext& ctx)
 
 
 	const RenderTargetHandle rtResultHandle = rgraph.newRenderTarget(m_rtResultRtDesc);
 	const RenderTargetHandle rtResultHandle = rgraph.newRenderTarget(m_rtResultRtDesc);
 	const RenderTargetHandle lowRezRt = rgraph.newRenderTarget(m_lowRezRtDesc);
 	const RenderTargetHandle lowRezRt = rgraph.newRenderTarget(m_lowRezRtDesc);
-	const RenderTargetHandle fullHandle = rgraph.newRenderTarget(m_fullRtDesc);
+	const RenderTargetHandle fullRtTmp = rgraph.newRenderTarget(m_fullRtDesc);
+
+	Array<RenderTargetHandle, 2> fullRts;
+	if(firstBounceUsesRt)
+	{
+		for(U32 i = 0; i < 2; ++i)
+		{
+			if(m_texturesImportedOnce) [[likely]]
+			{
+				fullRts[i] = rgraph.importRenderTarget(m_irradianceRts[i].get());
+			}
+			else
+			{
+				fullRts[i] = rgraph.importRenderTarget(m_irradianceRts[i].get(), TextureUsageBit::kSrvCompute);
+			}
+		}
+	}
 
 
 	Array<RenderTargetHandle, kIndirectDiffuseClipmapCount>& radianceVolumes = m_runCtx.m_handles.m_radianceVolumes;
 	Array<RenderTargetHandle, kIndirectDiffuseClipmapCount>& radianceVolumes = m_runCtx.m_handles.m_radianceVolumes;
 	Array<RenderTargetHandle, kIndirectDiffuseClipmapCount>& irradianceVolumes = m_runCtx.m_handles.m_irradianceVolumes;
 	Array<RenderTargetHandle, kIndirectDiffuseClipmapCount>& irradianceVolumes = m_runCtx.m_handles.m_irradianceVolumes;
@@ -398,7 +433,7 @@ void IndirectDiffuseClipmaps::populateRenderGraph(RenderingContext& ctx)
 	}
 	}
 
 
 	// Apply GI
 	// Apply GI
-	if(firstBounceRt)
+	if(firstBounceUsesRt)
 	{
 	{
 		patchShaderBindingTablePass("IndirectDiffuseClipmaps: Patch SBT", m_rtLibraryGrProg.get(), m_rayGenShaderGroupIndices[0],
 		patchShaderBindingTablePass("IndirectDiffuseClipmaps: Patch SBT", m_rtLibraryGrProg.get(), m_rayGenShaderGroupIndices[0],
 									m_missShaderGroupIdx, m_sbtRecordSize, rgraph, sbtHandle, sbtBuffer);
 									m_missShaderGroupIdx, m_sbtRecordSize, rgraph, sbtHandle, sbtBuffer);
@@ -531,23 +566,81 @@ void IndirectDiffuseClipmaps::populateRenderGraph(RenderingContext& ctx)
 
 
 		pass.newTextureDependency(getGBuffer().getDepthRt(), TextureUsageBit::kSrvCompute);
 		pass.newTextureDependency(getGBuffer().getDepthRt(), TextureUsageBit::kSrvCompute);
 		pass.newTextureDependency(lowRezRt, TextureUsageBit::kSrvCompute);
 		pass.newTextureDependency(lowRezRt, TextureUsageBit::kSrvCompute);
-		pass.newTextureDependency(fullHandle, TextureUsageBit::kUavCompute);
+		pass.newTextureDependency(fullRtTmp, TextureUsageBit::kUavCompute);
 
 
-		pass.setWork([this, lowRezRt, fullHandle](RenderPassWorkContext& rgraphCtx) {
+		pass.setWork([this, lowRezRt, fullRtTmp](RenderPassWorkContext& rgraphCtx) {
 			CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
 			CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
 
 
 			cmdb.bindShaderProgram(m_spatialReconstructGrProg.get());
 			cmdb.bindShaderProgram(m_spatialReconstructGrProg.get());
 
 
 			rgraphCtx.bindSrv(0, 0, lowRezRt);
 			rgraphCtx.bindSrv(0, 0, lowRezRt);
 			rgraphCtx.bindSrv(1, 0, getGBuffer().getDepthRt());
 			rgraphCtx.bindSrv(1, 0, getGBuffer().getDepthRt());
-			rgraphCtx.bindUav(0, 0, fullHandle);
+			rgraphCtx.bindUav(0, 0, fullRtTmp);
 
 
 			dispatchPPCompute(cmdb, 8, 8, getRenderer().getInternalResolution().x() / 2,
 			dispatchPPCompute(cmdb, 8, 8, getRenderer().getInternalResolution().x() / 2,
 							  getRenderer().getInternalResolution().y() / (!g_indirectDiffuseClipmapApplyHighQuality + 1));
 							  getRenderer().getInternalResolution().y() / (!g_indirectDiffuseClipmapApplyHighQuality + 1));
 		});
 		});
 	}
 	}
 
 
-	m_runCtx.m_handles.m_appliedIrradiance = fullHandle;
+	if(!firstBounceUsesRt)
+	{
+		m_runCtx.m_handles.m_appliedIrradiance = fullRtTmp;
+		return;
+	}
+
+	const RenderTargetHandle historyRt = fullRts[0];
+	const RenderTargetHandle outRt = fullRts[1];
+
+	// Temporal denoise
+	{
+		NonGraphicsRenderPass& pass = rgraph.newNonGraphicsRenderPass("IndirectDiffuseClipmaps: Temporal denoise");
+
+		pass.newTextureDependency(fullRtTmp, TextureUsageBit::kSrvCompute);
+		pass.newTextureDependency(historyRt, TextureUsageBit::kSrvCompute);
+		pass.newTextureDependency(getHistoryLength().getRt(), TextureUsageBit::kSrvCompute);
+		pass.newTextureDependency(getMotionVectors().getMotionVectorsRt(), TextureUsageBit::kSrvCompute);
+		pass.newTextureDependency(outRt, TextureUsageBit::kUavCompute);
+
+		pass.setWork([this, fullRtTmp, historyRt, outRt](RenderPassWorkContext& rgraphCtx) {
+			CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
+
+			cmdb.bindShaderProgram(m_temporalDenoiseGrProg.get());
+
+			rgraphCtx.bindSrv(0, 0, getHistoryLength().getRt());
+			rgraphCtx.bindSrv(1, 0, getMotionVectors().getMotionVectorsRt());
+			rgraphCtx.bindSrv(2, 0, historyRt);
+			rgraphCtx.bindSrv(3, 0, fullRtTmp);
+
+			rgraphCtx.bindUav(0, 0, outRt);
+
+			cmdb.bindSampler(0, 0, getRenderer().getSamplers().m_trilinearClamp.get());
+
+			dispatchPPCompute(cmdb, 8, 8, getRenderer().getInternalResolution().x(), getRenderer().getInternalResolution().y());
+		});
+	}
+
+	// Bilateral denoise
+	{
+		NonGraphicsRenderPass& pass = rgraph.newNonGraphicsRenderPass("IndirectDiffuseClipmaps: Bilateral denoise");
+
+		pass.newTextureDependency(getGBuffer().getDepthRt(), TextureUsageBit::kSrvCompute);
+		pass.newTextureDependency(outRt, TextureUsageBit::kSrvCompute);
+		pass.newTextureDependency(historyRt, TextureUsageBit::kUavCompute);
+
+		pass.setWork([this, outRt, historyRt](RenderPassWorkContext& rgraphCtx) {
+			CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
+
+			cmdb.bindShaderProgram(m_bilateralDenoiseGrProg.get());
+
+			rgraphCtx.bindSrv(0, 0, outRt);
+			rgraphCtx.bindSrv(1, 0, getGBuffer().getDepthRt());
+			rgraphCtx.bindUav(0, 0, historyRt);
+
+			dispatchPPCompute(cmdb, 8, 8, getRenderer().getInternalResolution().x(), getRenderer().getInternalResolution().y());
+		});
+	}
+
+	m_runCtx.m_handles.m_appliedIrradiance = historyRt;
 }
 }
 
 
 void IndirectDiffuseClipmaps::drawDebugProbes(const RenderingContext& ctx, RenderPassWorkContext& rgraphCtx) const
 void IndirectDiffuseClipmaps::drawDebugProbes(const RenderingContext& ctx, RenderPassWorkContext& rgraphCtx) const

+ 3 - 0
AnKi/Renderer/IndirectDiffuseClipmaps.h

@@ -116,6 +116,8 @@ private:
 	RenderTargetDesc m_lowRezRtDesc;
 	RenderTargetDesc m_lowRezRtDesc;
 	RenderTargetDesc m_fullRtDesc;
 	RenderTargetDesc m_fullRtDesc;
 
 
+	Array<TexturePtr, 2> m_irradianceRts;
+
 	IndirectDiffuseClipmapConstants m_consts;
 	IndirectDiffuseClipmapConstants m_consts;
 
 
 	ShaderProgramResourcePtr m_prog;
 	ShaderProgramResourcePtr m_prog;
@@ -127,6 +129,7 @@ private:
 	ShaderProgramPtr m_visProbesGrProg;
 	ShaderProgramPtr m_visProbesGrProg;
 	ShaderProgramPtr m_temporalDenoiseGrProg;
 	ShaderProgramPtr m_temporalDenoiseGrProg;
 	ShaderProgramPtr m_spatialReconstructGrProg;
 	ShaderProgramPtr m_spatialReconstructGrProg;
+	ShaderProgramPtr m_bilateralDenoiseGrProg;
 
 
 	ImageResourcePtr m_blueNoiseImg;
 	ImageResourcePtr m_blueNoiseImg;
 
 

+ 2 - 2
AnKi/Renderer/Renderer.cpp

@@ -310,6 +310,8 @@ Error Renderer::populateRenderGraph(RenderingContext& ctx)
 		m_accelerationStructureBuilder->populateRenderGraph(ctx);
 		m_accelerationStructureBuilder->populateRenderGraph(ctx);
 	}
 	}
 	m_gbuffer->populateRenderGraph(ctx);
 	m_gbuffer->populateRenderGraph(ctx);
+	m_motionVectors->populateRenderGraph(ctx);
+	m_historyLength->populateRenderGraph(ctx);
 	m_depthDownscale->populateRenderGraph(ctx);
 	m_depthDownscale->populateRenderGraph(ctx);
 	m_shadowMapping->populateRenderGraph(ctx);
 	m_shadowMapping->populateRenderGraph(ctx);
 	m_clusterBinning2->populateRenderGraph(ctx);
 	m_clusterBinning2->populateRenderGraph(ctx);
@@ -324,8 +326,6 @@ Error Renderer::populateRenderGraph(RenderingContext& ctx)
 	}
 	}
 	m_probeReflections->populateRenderGraph(ctx);
 	m_probeReflections->populateRenderGraph(ctx);
 	m_volumetricLightingAccumulation->populateRenderGraph(ctx);
 	m_volumetricLightingAccumulation->populateRenderGraph(ctx);
-	m_motionVectors->populateRenderGraph(ctx);
-	m_historyLength->populateRenderGraph(ctx);
 	m_gbufferPost->populateRenderGraph(ctx);
 	m_gbufferPost->populateRenderGraph(ctx);
 	if(m_rtShadows)
 	if(m_rtShadows)
 	{
 	{

+ 85 - 14
AnKi/Shaders/IndirectDiffuseClipmaps.ankiprog

@@ -17,6 +17,7 @@
 #pragma anki technique Apply comp mutators SPATIAL_RECONSTRUCT_TYPE
 #pragma anki technique Apply comp mutators SPATIAL_RECONSTRUCT_TYPE
 #pragma anki technique SpatialReconstruct comp mutators SPATIAL_RECONSTRUCT_TYPE
 #pragma anki technique SpatialReconstruct comp mutators SPATIAL_RECONSTRUCT_TYPE
 #pragma anki technique TemporalDenoise comp mutators
 #pragma anki technique TemporalDenoise comp mutators
+#pragma anki technique BilateralDenoise comp mutators
 #pragma anki technique VisualizeProbes vert pixel mutators
 #pragma anki technique VisualizeProbes vert pixel mutators
 
 
 #define ANKI_ASSERTIONS_ENABLED 1
 #define ANKI_ASSERTIONS_ENABLED 1
@@ -28,12 +29,17 @@
 #include <AnKi/Shaders/PackFunctions.hlsl>
 #include <AnKi/Shaders/PackFunctions.hlsl>
 #include <AnKi/Shaders/FastMathFunctions.hlsl>
 #include <AnKi/Shaders/FastMathFunctions.hlsl>
 #include <AnKi/Shaders/IndirectDiffuseClipmaps.hlsl>
 #include <AnKi/Shaders/IndirectDiffuseClipmaps.hlsl>
+#include <AnKi/Shaders/BilateralFilter.hlsl>
+#include <AnKi/Shaders/TemporalAA.hlsl>
 
 
 #if defined(RT_MATERIAL_FETCH_CLIPMAP) && RT_MATERIAL_FETCH_CLIPMAP
 #if defined(RT_MATERIAL_FETCH_CLIPMAP) && RT_MATERIAL_FETCH_CLIPMAP
 #	define CLIPMAP_VOLUME
 #	define CLIPMAP_VOLUME
 #endif
 #endif
 #include <AnKi/Shaders/RtMaterialFetch.hlsl>
 #include <AnKi/Shaders/RtMaterialFetch.hlsl>
 
 
+constexpr F32 kGaussianSigma = 0.55;
+constexpr F32 kMaxBilateralSamplesPerDirection = 5.0;
+
 // ===========================================================================
 // ===========================================================================
 // RtMaterialFetch                                                           =
 // RtMaterialFetch                                                           =
 // ===========================================================================
 // ===========================================================================
@@ -192,6 +198,7 @@ ANKI_FAST_CONSTANTS(Consts, g_consts)
 	}
 	}
 
 
 	TEX(g_colorAndPdfTex, DispatchRaysIndex().xy) = Vec4(final, 0.0);
 	TEX(g_colorAndPdfTex, DispatchRaysIndex().xy) = Vec4(final, 0.0);
+	// TEX(g_colorAndPdfTex, DispatchRaysIndex().xy) = lerp(TEX(g_colorAndPdfTex, DispatchRaysIndex().xy), Vec4(final, 0.0), 0.05);
 }
 }
 #	endif // RT_MATERIAL_FETCH_CLIPMAP
 #	endif // RT_MATERIAL_FETCH_CLIPMAP
 #endif
 #endif
@@ -523,14 +530,15 @@ SamplerState g_linearAnyRepeatSampler : register(s0);
 
 
 [NumThreads(64, 1, 1)] void main(COMPUTE_ARGS)
 [NumThreads(64, 1, 1)] void main(COMPUTE_ARGS)
 {
 {
-	Vec2 halfViewportSize;
-	g_outTex.GetDimensions(halfViewportSize.x, halfViewportSize.y);
-	const Vec2 fullViewportSize = halfViewportSize * Vec2(2.0, 1.0);
+	Vec2 lowTextureSize;
+	g_outTex.GetDimensions(lowTextureSize.x, lowTextureSize.y);
 
 
 	const UVec2 realSvDispatchThreadId = getOptimalDispatchThreadId8x8Amd(svGroupIndex, svGroupId.xy);
 	const UVec2 realSvDispatchThreadId = getOptimalDispatchThreadId8x8Amd(svGroupIndex, svGroupId.xy);
 #	if SPATIAL_RECONSTRUCT_TYPE == 0
 #	if SPATIAL_RECONSTRUCT_TYPE == 0
+	const Vec2 fullViewportSize = lowTextureSize * Vec2(2.0, 1.0);
 	const Vec2 coord = Vec2(realSvDispatchThreadId.x * 2u + (realSvDispatchThreadId.y & 1u), realSvDispatchThreadId.y);
 	const Vec2 coord = Vec2(realSvDispatchThreadId.x * 2u + (realSvDispatchThreadId.y & 1u), realSvDispatchThreadId.y);
 #	else
 #	else
+	const Vec2 fullViewportSize = lowTextureSize * 2.0;
 	const Vec2 coord = Vec2(realSvDispatchThreadId * 2u);
 	const Vec2 coord = Vec2(realSvDispatchThreadId * 2u);
 #	endif
 #	endif
 
 
@@ -577,8 +585,6 @@ SamplerState g_linearAnyRepeatSampler : register(s0);
 // SpatialReconstruct                                                        =
 // SpatialReconstruct                                                        =
 // ===========================================================================
 // ===========================================================================
 #if NOT_ZERO(ANKI_TECHNIQUE_SpatialReconstruct)
 #if NOT_ZERO(ANKI_TECHNIQUE_SpatialReconstruct)
-#	include <AnKi/Shaders/BilateralFilter.hlsl>
-
 Texture2D<Vec3> g_inTex : register(t0);
 Texture2D<Vec3> g_inTex : register(t0);
 Texture2D<F32> g_depthTex : register(t1);
 Texture2D<F32> g_depthTex : register(t1);
 
 
@@ -756,30 +762,34 @@ void checkerboardReconstruct(IVec2 svDispatchThreadId)
 Texture2D<F32> g_historyLengthTex : register(t0);
 Texture2D<F32> g_historyLengthTex : register(t0);
 Texture2D<Vec2> g_motionVectorsTex : register(t1);
 Texture2D<Vec2> g_motionVectorsTex : register(t1);
 Texture2D<Vec3> g_historyTex : register(t2);
 Texture2D<Vec3> g_historyTex : register(t2);
-Texture2D<Vec3> g_currentTex : register(t3);
+Texture2D<Vec4> g_currentTex : register(t3);
 
 
-RWTexture2D<Vec3> g_outTex : register(u0);
+RWTexture2D<Vec4> g_outTex : register(u0);
 
 
 SamplerState g_linearAnyClampSampler : register(s0);
 SamplerState g_linearAnyClampSampler : register(s0);
 
 
 [numthreads(64, 1, 1)] void main(COMPUTE_ARGS)
 [numthreads(64, 1, 1)] void main(COMPUTE_ARGS)
 {
 {
 	const Vec2 coord = getOptimalDispatchThreadId8x8Amd(svGroupIndex, svGroupId.xy);
 	const Vec2 coord = getOptimalDispatchThreadId8x8Amd(svGroupIndex, svGroupId.xy);
-	Vec2 viewport;
-	g_historyLengthTex.GetDimensions(viewport.x, viewport.y);
 
 
-	const F32 historyLen = TEX(g_historyLengthTex, coord) / kMaxHistoryLength;
+	const F32 minBlendFactor = 0.025;
+	const F32 maxBlendFactor = 0.9;
 
 
-	F32 blendFactor = historyLen / 1.0;
-	blendFactor = lerp(1.0, 0.05, blendFactor);
+	const F32 historyLen = TEX(g_historyLengthTex, coord) * kMaxHistoryLength;
+
+	F32 blendFactor = min(1.0, historyLen / 1.0);
+	blendFactor = lerp(maxBlendFactor, minBlendFactor, blendFactor);
 
 
 	Vec3 outColor = TEX(g_currentTex, coord);
 	Vec3 outColor = TEX(g_currentTex, coord);
-	if(blendFactor > 0.9)
+	if(blendFactor > maxBlendFactor * 0.9)
 	{
 	{
 		// Don't accumulate
 		// Don't accumulate
 	}
 	}
 	else
 	else
 	{
 	{
+		Vec2 viewport;
+		g_historyLengthTex.GetDimensions(viewport.x, viewport.y);
+
 		const Vec2 uv = (coord + 0.5) / viewport;
 		const Vec2 uv = (coord + 0.5) / viewport;
 		const Vec2 historyUv = uv + TEX(g_motionVectorsTex, coord);
 		const Vec2 historyUv = uv + TEX(g_motionVectorsTex, coord);
 
 
@@ -788,7 +798,68 @@ SamplerState g_linearAnyClampSampler : register(s0);
 		outColor = lerp(history, outColor, blendFactor);
 		outColor = lerp(history, outColor, blendFactor);
 	}
 	}
 
 
-	TEX(g_outTex, coord) = outColor;
+	TEX(g_outTex, coord) = Vec4(outColor, historyLen);
+}
+#endif
+
+// ===========================================================================
+// BilateralDenoise                                                          =
+// ===========================================================================
+#if NOT_ZERO(ANKI_TECHNIQUE_BilateralDenoise)
+Texture2D<Vec4> g_inTex : register(t0);
+Texture2D<F32> g_depthTex : register(t1);
+
+RWTexture2D<Vec4> g_outTex : register(u0);
+
+[numthreads(64, 1, 1)] void main(COMPUTE_ARGS)
+{
+	const IVec2 coord = getOptimalDispatchThreadId8x8Amd(svGroupIndex, svGroupId.xy);
+	IVec2 viewport;
+	g_outTex.GetDimensions(viewport.x, viewport.y);
+
+	if(any(coord >= viewport))
+	{
+		return;
+	}
+
+	const F32 refDepth = TEX(g_depthTex, coord);
+
+	F32 weightSum = kEpsilonF32;
+
+	const Vec4 rgba = TEX(g_inTex, coord);
+
+	const F32 historyLen = rgba.w;
+
+	Vec3 colorSum = rgba.xyz * weightSum;
+
+	const F32 blurFactor = 1.0 - min(1.0, historyLen / 12.0);
+	const I32 sampleCount = max(1.0, kMaxBilateralSamplesPerDirection * blurFactor);
+
+	for(I32 x = -sampleCount; x <= sampleCount; ++x)
+	{
+		for(I32 y = -sampleCount; y <= sampleCount; ++y)
+		{
+			if(x == 0.0 && y == 0.0)
+			{
+				continue;
+			}
+
+			IVec2 newCoord = coord + IVec2(x, y);
+			newCoord.x = clamp(newCoord.x, 0, viewport.x - 1);
+
+			const Vec3 sampleColor = TEX(g_inTex, newCoord);
+			const F32 sampleDepth = TEX(g_depthTex, newCoord);
+
+			const F32 depthWeight = calculateBilateralWeightDepth<F32>(refDepth, sampleDepth, 1.0);
+			const F32 weight = depthWeight;
+
+			colorSum += sampleColor * weight;
+			weightSum += weight;
+		}
+	}
+
+	colorSum /= weightSum;
+	TEX(g_outTex, coord) = Vec4(colorSum, 0.0);
 }
 }
 #endif
 #endif
 
 

+ 5 - 112
AnKi/Shaders/Reflections.ankiprog

@@ -26,6 +26,7 @@
 #include <AnKi/Shaders/SsRaymarching.hlsl>
 #include <AnKi/Shaders/SsRaymarching.hlsl>
 #include <AnKi/Shaders/ClusteredShadingFunctions.hlsl>
 #include <AnKi/Shaders/ClusteredShadingFunctions.hlsl>
 #include <AnKi/Shaders/IndirectDiffuseClipmaps.hlsl>
 #include <AnKi/Shaders/IndirectDiffuseClipmaps.hlsl>
+#include <AnKi/Shaders/TemporalAA.hlsl>
 
 
 // Config & debug
 // Config & debug
 constexpr F32 kSpatialUpscalingPcfTexelOffset = 8.0;
 constexpr F32 kSpatialUpscalingPcfTexelOffset = 8.0;
@@ -977,87 +978,6 @@ Vec2 computeHistoryUv(UVec2 coords, Vec2 uv)
 	return historyUv;
 	return historyUv;
 }
 }
 
 
-void accumulateSourceColor(Vec2 newUv, Vec4 texelWeights, inout Vec3 m1, inout Vec3 m2, inout Vec3 sourceSample, inout Vec3 neighboorMin,
-						   inout Vec3 neighboorMax)
-{
-	const Vec4 red = g_colorAndDepth.GatherRed(g_linearAnyClampSampler, newUv);
-	const Vec4 green = g_colorAndDepth.GatherGreen(g_linearAnyClampSampler, newUv);
-	const Vec4 blue = g_colorAndDepth.GatherBlue(g_linearAnyClampSampler, newUv);
-
-	[unroll] for(U32 c = 0; c < 4; ++c)
-	{
-		if(texelWeights[c] > 0.0)
-		{
-			const Vec3 neighbor = Vec3(red[c], green[c], blue[c]);
-
-			sourceSample += neighbor * texelWeights[c];
-
-			neighboorMin = min(neighboorMin, neighbor);
-			neighboorMax = max(neighboorMax, neighbor);
-
-			m1 += neighbor;
-			m2 += neighbor * neighbor;
-		}
-	}
-}
-
-void accumulateSourceColor(IVec2 coord, IVec2 textureSize, F32 weight, inout Vec3 m1, inout Vec3 m2, inout Vec3 sourceSample, inout Vec3 neighboorMin,
-						   inout Vec3 neighboorMax)
-{
-	coord = clamp(coord, 0, textureSize - 1);
-
-	const Vec3 neighbor = g_colorAndDepth[coord].xyz;
-
-	sourceSample += neighbor * weight;
-
-	neighboorMin = min(neighboorMin, neighbor);
-	neighboorMax = max(neighboorMax, neighbor);
-
-	m1 += neighbor;
-	m2 += neighbor * neighbor;
-}
-
-void computeSourceColor(Vec2 uv, IVec2 coord, IVec2 textureSize, out Vec3 m1, out Vec3 m2, out Vec3 sourceSample, out Vec3 neighboorMin,
-						out Vec3 neighboorMax)
-{
-	sourceSample = 0.0;
-	neighboorMin = 1000.0;
-	neighboorMax = -1000.0;
-	m1 = 0.0;
-	m2 = 0.0;
-
-	const Vec2 texelSize = 1.0 / textureSize;
-	const Vec2 halfTexelSize = texelSize / 2.0;
-
-	// Positioning mentioned bellow is in screen space (bottom left is in the bottom left of the screen)
-	// Alogithm wants to sample 9 taps of this:
-	// +-+-+-+
-	// |6|7|8|
-	// +-+-+-+
-	// |3|4|5|
-	// +-+-+-+
-	// |0|1|2|
-	// +-+-+-+
-	// "uv" points to the middle of 4
-
-	// Bottom left (0, 1, 4, 3)
-	Vec2 newUv = uv + Vec2(-halfTexelSize.x, +halfTexelSize.y);
-	accumulateSourceColor(newUv, Vec4(0.5, 0.5, 1.0, 0.5), m1, m2, sourceSample, neighboorMin, neighboorMax);
-
-	// Top right (4, 5, 8, 7)
-	newUv = uv + Vec2(+halfTexelSize.x, -halfTexelSize.y);
-	accumulateSourceColor(newUv, Vec4(0.0, 0.5, 0.5, 0.5), m1, m2, sourceSample, neighboorMin, neighboorMax);
-
-	// Top left
-	accumulateSourceColor(coord + IVec2(-1, -1), textureSize, 0.5, m1, m2, sourceSample, neighboorMin, neighboorMax);
-
-	// Bottom right
-	accumulateSourceColor(coord + IVec2(+1, +1), textureSize, 0.5, m1, m2, sourceSample, neighboorMin, neighboorMax);
-
-	// Misc
-	sourceSample /= 1.0 + 0.5 * 8.0;
-}
-
 [numthreads(8, 8, 1)] void main(UVec2 svDispatchThreadId : SV_DISPATCHTHREADID)
 [numthreads(8, 8, 1)] void main(UVec2 svDispatchThreadId : SV_DISPATCHTHREADID)
 {
 {
 	UVec2 textureSize;
 	UVec2 textureSize;
@@ -1075,49 +995,22 @@ void computeSourceColor(Vec2 uv, IVec2 coord, IVec2 textureSize, out Vec3 m1, ou
 		return;
 		return;
 	}
 	}
 
 
-	// Read crnt
-	const F32 depth = g_colorAndDepth[coord].w;
-	Vec3 sourceSample = 0.0;
-	Vec3 neighboorMin = 0.0;
-	Vec3 neighboorMax = 0.0;
-	Vec3 m1 = 0.0;
-	Vec3 m2 = 0.0;
-	computeSourceColor(uv, coord, textureSize, m1, m2, sourceSample, neighboorMin, neighboorMax);
-
 	// Read history
 	// Read history
 	const Vec2 historyUv = computeHistoryUv(coord, uv);
 	const Vec2 historyUv = computeHistoryUv(coord, uv);
 	Vec3 history = g_historyTex.SampleLevel(g_linearAnyClampSampler, historyUv, 0.0f);
 	Vec3 history = g_historyTex.SampleLevel(g_linearAnyClampSampler, historyUv, 0.0f);
 
 
-	// Fix history
-	constexpr F32 sampleCount = 9.0;
-	const Vec3 mu = m1 / sampleCount;
-	const Vec3 sigma = sqrt(abs((m2 / sampleCount) - (mu * mu)));
-	const Vec3 minc = mu - kTemporalGamma * sigma;
-	const Vec3 maxc = mu + kTemporalGamma * sigma;
-
-	history = clamp(history, minc, maxc);
-
-	// Blend history and current
-	const Vec3 compressedSource = sourceSample * rcp(max3(sourceSample) + 1.0);
-	const Vec3 compressedHistory = history * rcp(max3(history) + 1.0);
-	const F32 luminanceSource = computeLuminance(compressedSource);
-	const F32 luminanceHistory = computeLuminance(compressedHistory);
-
-	F32 sourceWeight = kTemporalSourceWeight;
-	F32 historyWeight = 1.0 - sourceWeight;
-	sourceWeight *= 1.0 / (1.0 + luminanceSource);
-	historyWeight *= 1.0 / (1.0 + luminanceHistory);
-
-	const Vec3 finalVal = (sourceSample * sourceWeight + history * historyWeight) / max(sourceWeight + historyWeight, 0.00001);
+	// TAA
+	const Vec3 finalVal = computeTemporalAA<F32>(g_colorAndDepth, g_linearAnyClampSampler, history, coord);
 
 
 	// Temporal variance
 	// Temporal variance
 	const Vec2 momentsHistory = g_momentsHistoryTex.SampleLevel(g_linearAnyClampSampler, historyUv, 0.0f).xy;
 	const Vec2 momentsHistory = g_momentsHistoryTex.SampleLevel(g_linearAnyClampSampler, historyUv, 0.0f).xy;
 	Vec2 crntMoments;
 	Vec2 crntMoments;
-	crntMoments.x = luminanceSource;
+	crntMoments.x = computeLuminance(finalVal);
 	crntMoments.y = crntMoments.x * crntMoments.x;
 	crntMoments.y = crntMoments.x * crntMoments.x;
 	const Vec2 moments = lerp(crntMoments, momentsHistory, 0.25);
 	const Vec2 moments = lerp(crntMoments, momentsHistory, 0.25);
 
 
 	// Write value
 	// Write value
+	const F32 depth = g_colorAndDepth[coord].w;
 	g_outTex[coord] = Vec4(finalVal, depth);
 	g_outTex[coord] = Vec4(finalVal, depth);
 	g_momentsTex[coord] = Vec4(moments, 0.0, 0.0);
 	g_momentsTex[coord] = Vec4(moments, 0.0, 0.0);
 }
 }

+ 150 - 0
AnKi/Shaders/TemporalAA.hlsl

@@ -0,0 +1,150 @@
+// Copyright (C) 2009-present, Panagiotis Christopoulos Charitos and contributors.
+// All rights reserved.
+// Code licensed under the BSD License.
+// http://www.anki3d.org/LICENSE
+
+// Contains functions useful for temporal AA like accumulations
+
+#pragma once
+
+#include <AnKi/Shaders/Common.hlsl>
+
+/// Accumualte color using texture gather.
+template<typename T, typename TPostProcessTextureFunc>
+void accumulateSourceColor(Texture2D<Vec4> tex, SamplerState linearAnyClampSampler, Vec2 newUv, Vec4 texelWeights, TPostProcessTextureFunc func,
+						   inout vector<T, 3> m1, inout vector<T, 3> m2, inout vector<T, 3> sourceSample, inout vector<T, 3> neighboorMin,
+						   inout vector<T, 3> neighboorMax)
+{
+	const vector<T, 4> red = tex.GatherRed(linearAnyClampSampler, newUv);
+	const vector<T, 4> green = tex.GatherGreen(linearAnyClampSampler, newUv);
+	const vector<T, 4> blue = tex.GatherBlue(linearAnyClampSampler, newUv);
+
+	[unroll] for(U32 c = 0; c < 4; ++c)
+	{
+		if(texelWeights[c] > 0.0)
+		{
+			const vector<T, 3> neighbor = func(vector<T, 3>(red[c], green[c], blue[c]));
+
+			sourceSample += neighbor * texelWeights[c];
+
+			neighboorMin = min(neighboorMin, neighbor);
+			neighboorMax = max(neighboorMax, neighbor);
+
+			m1 += neighbor;
+			m2 += neighbor * neighbor;
+		}
+	}
+}
+
+/// Accumualte color using plain texture sampling.
+template<typename T, typename TPostProcessTextureFunc>
+void accumulateSourceColor(Texture2D<Vec4> tex, Vec2 coord, Vec2 textureSize, T weight, TPostProcessTextureFunc func, inout vector<T, 3> m1,
+						   inout vector<T, 3> m2, inout vector<T, 3> sourceSample, inout vector<T, 3> neighboorMin, inout vector<T, 3> neighboorMax)
+{
+	coord = clamp(coord, 0.0, textureSize - 1.0);
+
+	const vector<T, 3> neighbor = func(TEX(tex, coord).xyz);
+
+	sourceSample += neighbor * weight;
+
+	neighboorMin = min(neighboorMin, neighbor);
+	neighboorMax = max(neighboorMax, neighbor);
+
+	m1 += neighbor;
+	m2 += neighbor * neighbor;
+}
+
+template<typename T>
+struct DefaultPostProcessTextureFunc
+{
+	vector<T, 3> operator()(vector<T, 3> inp)
+	{
+		return inp;
+	}
+};
+
+/// Compute the source color and a few other things like moments for temporal AA.
+template<typename T, typename TPostProcessTextureFunc = DefaultPostProcessTextureFunc<T> >
+void computeSourceColor(Texture2D<Vec4> tex, SamplerState linearAnyClampSampler, Vec2 coord, out vector<T, 3> m1, out vector<T, 3> m2,
+						out vector<T, 3> sourceSample, out vector<T, 3> neighboorMin, out vector<T, 3> neighboorMax,
+						TPostProcessTextureFunc func = (DefaultPostProcessTextureFunc<T>)0)
+{
+	sourceSample = 0.0;
+	neighboorMin = kMaxF32;
+	neighboorMax = kMinF32;
+	m1 = 0.0;
+	m2 = 0.0;
+
+	Vec2 textureSize;
+	tex.GetDimensions(textureSize.x, textureSize.y);
+
+	const Vec2 uv = (coord + 0.5) / textureSize;
+
+	const Vec2 texelSize = 1.0 / textureSize;
+	const Vec2 halfTexelSize = texelSize / 2.0;
+
+	// Positioning mentioned bellow is in screen space (bottom left is in the bottom left of the screen)
+	// Alogithm wants to sample 9 taps of this:
+	// +-+-+-+
+	// |6|7|8|
+	// +-+-+-+
+	// |3|4|5|
+	// +-+-+-+
+	// |0|1|2|
+	// +-+-+-+
+	// "uv" points to the middle of 4
+
+	// Bottom left (0, 1, 4, 3)
+	Vec2 newUv = uv + Vec2(-halfTexelSize.x, +halfTexelSize.y);
+	accumulateSourceColor(tex, linearAnyClampSampler, newUv, vector<T, 4>(0.5, 0.5, 1.0, 0.5), func, m1, m2, sourceSample, neighboorMin,
+						  neighboorMax);
+
+	// Top right (4, 5, 8, 7)
+	newUv = uv + Vec2(+halfTexelSize.x, -halfTexelSize.y);
+	accumulateSourceColor(tex, linearAnyClampSampler, newUv, vector<T, 4>(0.0, 0.5, 0.5, 0.5), func, m1, m2, sourceSample, neighboorMin,
+						  neighboorMax);
+
+	// Top left
+	accumulateSourceColor(tex, coord + IVec2(-1, -1), textureSize, T(0.5), func, m1, m2, sourceSample, neighboorMin, neighboorMax);
+
+	// Bottom right
+	accumulateSourceColor(tex, coord + IVec2(+1, +1), textureSize, T(0.5), func, m1, m2, sourceSample, neighboorMin, neighboorMax);
+
+	// Misc
+	sourceSample /= T(1.0 + 0.5 * 8.0);
+}
+
+template<typename T, typename TPostProcessTextureFunc = DefaultPostProcessTextureFunc<T> >
+vector<T, 3> computeTemporalAA(Texture2D<Vec4> currentTex, SamplerState linearAnyClampSampler, Vec3 history, Vec2 coord,
+							   F32 temporalSourceWeight = 0.01, F32 temporalGamma = 1.0,
+							   TPostProcessTextureFunc func = (DefaultPostProcessTextureFunc<T>)0)
+{
+	vector<T, 3> m1;
+	vector<T, 3> m2;
+	vector<T, 3> sourceSample;
+	vector<T, 3> neighboorMin;
+	vector<T, 3> neighboorMax;
+	computeSourceColor(currentTex, linearAnyClampSampler, coord, m1, m2, sourceSample, neighboorMin, neighboorMax);
+
+	const T sampleCount = 9.0;
+	const vector<T, 3> mu = m1 / sampleCount;
+	const vector<T, 3> sigma = sqrt(abs((m2 / sampleCount) - (mu * mu)));
+	const vector<T, 3> minc = mu - temporalGamma * sigma;
+	const vector<T, 3> maxc = mu + temporalGamma * sigma;
+
+	history = clamp(history, minc, maxc);
+
+	// Blend history and current
+	const vector<T, 3> compressedSource = sourceSample * rcp(max3(sourceSample) + T(1));
+	const vector<T, 3> compressedHistory = history * rcp(max3(history) + T(1));
+	const T luminanceSource = computeLuminance(compressedSource);
+	const T luminanceHistory = computeLuminance(compressedHistory);
+
+	T sourceWeight = temporalSourceWeight;
+	T historyWeight = T(1) - sourceWeight;
+	sourceWeight *= T(1) / (T(1) + luminanceSource);
+	historyWeight *= T(1) / (T(1) + luminanceHistory);
+
+	const vector<T, 3> finalVal = (sourceSample * sourceWeight + history * historyWeight) / max(sourceWeight + historyWeight, T(0.00001));
+	return finalVal;
+}

+ 1 - 1
Samples/PhysicsPlayground/Main.cpp

@@ -215,7 +215,7 @@ Error MyApp::userMainLoop(Bool& quit, [[maybe_unused]] Second elapsedTime)
 
 
 	if(in.getKey(KeyCode::kO) == 1)
 	if(in.getKey(KeyCode::kO) == 1)
 	{
 	{
-		renderer.setCurrentDebugRenderTarget((renderer.getCurrentDebugRenderTarget() == "RtMaterialFetchDbg") ? "" : "RtMaterialFetchDbg");
+		renderer.setCurrentDebugRenderTarget((renderer.getCurrentDebugRenderTarget() == "HistoryLen") ? "" : "HistoryLen");
 	}
 	}
 
 
 	if(Input::getSingleton().getKey(KeyCode::kP) == 1)
 	if(Input::getSingleton().getKey(KeyCode::kP) == 1)