소스 검색

Add some classification in reflections

Panagiotis Christopoulos Charitos 1 년 전
부모
커밋
5f95a5a1b7

+ 0 - 2
AnKi/Renderer/DepthDownscale.cpp

@@ -39,8 +39,6 @@ Error DepthDownscale::initInternal()
 
 	m_mipCount = 2;
 
-	const UVec2 lastMipSize = UVec2(width, height) >> (m_mipCount - 1);
-
 	const Bool preferCompute = g_preferComputeCVar;
 
 	// Create RT descr

+ 51 - 6
AnKi/Renderer/Reflections.cpp

@@ -62,9 +62,12 @@ Error Reflections::init()
 								 "BilateralDenoiseHorizontal"));
 	ANKI_CHECK(loadShaderProgram("ShaderBinaries/Reflections.ankiprogbin", mutation, m_rtProg, m_ssrGrProg, "Ssr"));
 	ANKI_CHECK(loadShaderProgram("ShaderBinaries/Reflections.ankiprogbin", mutation, m_rtProg, m_probeFallbackGrProg, "ReflectionProbeFallback"));
+	ANKI_CHECK(loadShaderProgram("ShaderBinaries/Reflections.ankiprogbin", mutation, m_rtProg, m_tileClassificationGrProg, "Classification"));
 
-	m_sbtRecordSize = getAlignedRoundUp(GrManager::getSingleton().getDeviceCapabilities().m_sbtRecordAlignment,
-										GrManager::getSingleton().getDeviceCapabilities().m_shaderGroupHandleSize + U32(sizeof(UVec4)));
+	m_sbtRecordSize = (bRtReflections)
+						  ? getAlignedRoundUp(GrManager::getSingleton().getDeviceCapabilities().m_sbtRecordAlignment,
+											  GrManager::getSingleton().getDeviceCapabilities().m_shaderGroupHandleSize + U32(sizeof(UVec4)))
+						  : 0;
 
 	m_transientRtDesc1 = getRenderer().create2DRenderTargetDescription(
 		getRenderer().getInternalResolution().x(), getRenderer().getInternalResolution().y(), Format::kR16G16B16A16_Sfloat, "Reflections #1");
@@ -94,6 +97,11 @@ Error Reflections::init()
 	texInit.setName("ReflectionsMoments #2");
 	m_momentsTextures[1] = getRenderer().createAndClearRenderTarget(texInit, TextureUsageBit::kSrvCompute);
 
+	m_classTileMapRtDesc = getRenderer().create2DRenderTargetDescription((getRenderer().getInternalResolution().x() + kTileSize - 1) / kTileSize,
+																		 (getRenderer().getInternalResolution().y() + kTileSize - 1) / kTileSize,
+																		 Format::kR8_Uint, "ReflClassTileMap");
+	m_classTileMapRtDesc.bake();
+
 	{
 		BufferInitInfo buffInit("ReflRayGenIndirectArgs");
 		buffInit.m_size = sizeof(DispatchIndirectArgs) * 2;
@@ -153,12 +161,40 @@ void Reflections::populateRenderGraph(RenderingContext& ctx)
 	const RenderTargetHandle transientRt2 = rgraph.newRenderTarget(m_transientRtDesc2);
 	const RenderTargetHandle hitPosAndDepthRt = rgraph.newRenderTarget(m_hitPosAndDepthRtDesc);
 	const RenderTargetHandle hitPosRt = rgraph.newRenderTarget(m_hitPosRtDesc);
+	const RenderTargetHandle classTileMapRt = rgraph.newRenderTarget(m_classTileMapRtDesc);
 
 	ReflectionConstants consts;
 	consts.m_ssrStepIncrement = g_ssrStepIncrementCVar;
 	consts.m_ssrMaxIterations = g_ssrMaxIterationsCVar;
 	consts.m_roughnessCutoffToGiEdges = Vec2(g_roughnessCutoffToGiEdge0, g_roughnessCutoffToGiEdge1);
 
+	// Classification
+	{
+		// Create the pass
+		NonGraphicsRenderPass& rpass = rgraph.newNonGraphicsRenderPass("ReflTileClassification");
+
+		rpass.newTextureDependency(getRenderer().getGBuffer().getColorRt(1), TextureUsageBit::kSrvCompute);
+		rpass.newTextureDependency(getRenderer().getGBuffer().getDepthRt(), TextureUsageBit::kSrvCompute);
+
+		rpass.newTextureDependency(classTileMapRt, TextureUsageBit::kUavCompute);
+
+		rpass.setWork([this, classTileMapRt, consts](RenderPassWorkContext& rgraphCtx) {
+			ANKI_TRACE_SCOPED_EVENT(Reflections);
+			CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
+
+			cmdb.bindShaderProgram(m_tileClassificationGrProg.get());
+
+			rgraphCtx.bindSrv(0, 0, getRenderer().getGBuffer().getColorRt(1));
+			rgraphCtx.bindSrv(1, 0, getRenderer().getGBuffer().getDepthRt());
+			rgraphCtx.bindUav(0, 0, classTileMapRt);
+
+			cmdb.setFastConstants(&consts, sizeof(consts));
+
+			dispatchPPCompute(cmdb, kTileSize / 2, kTileSize, getRenderer().getInternalResolution().x() / 2,
+							  getRenderer().getInternalResolution().y());
+		});
+	}
+
 	// SSR
 	BufferView pixelsFailedSsrBuff;
 	BufferHandle indirectArgsHandle;
@@ -180,12 +216,13 @@ void Reflections::populateRenderGraph(RenderingContext& ctx)
 		rpass.newTextureDependency(getRenderer().getBloom().getPyramidRt(), TextureUsageBit::kSrvCompute);
 		rpass.newBufferDependency(getRenderer().getClusterBinning().getClustersBufferHandle(), BufferUsageBit::kSrvCompute);
 		rpass.newTextureDependency(getRenderer().getShadowMapping().getShadowmapRt(), TextureUsageBit::kSrvCompute);
+		rpass.newTextureDependency(classTileMapRt, TextureUsageBit::kSrvCompute);
 
 		rpass.newTextureDependency(transientRt1, TextureUsageBit::kUavCompute);
 		rpass.newTextureDependency(hitPosAndDepthRt, TextureUsageBit::kUavCompute);
 		rpass.newBufferDependency(indirectArgsHandle, BufferUsageBit::kUavCompute);
 
-		rpass.setWork([this, transientRt1, hitPosAndDepthRt, &ctx, pixelsFailedSsrBuff, consts](RenderPassWorkContext& rgraphCtx) {
+		rpass.setWork([this, transientRt1, hitPosAndDepthRt, &ctx, pixelsFailedSsrBuff, consts, classTileMapRt](RenderPassWorkContext& rgraphCtx) {
 			ANKI_TRACE_SCOPED_EVENT(Reflections);
 			CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
 
@@ -203,6 +240,7 @@ void Reflections::populateRenderGraph(RenderingContext& ctx)
 			cmdb.bindSrv(6, 0, getRenderer().getClusterBinning().getPackedObjectsBuffer(GpuSceneNonRenderableObjectType::kGlobalIlluminationProbe));
 			cmdb.bindSrv(7, 0, getRenderer().getClusterBinning().getClustersBuffer());
 			rgraphCtx.bindSrv(8, 0, getRenderer().getShadowMapping().getShadowmapRt());
+			rgraphCtx.bindSrv(9, 0, classTileMapRt);
 
 			rgraphCtx.bindUav(0, 0, transientRt1);
 			rgraphCtx.bindUav(1, 0, hitPosAndDepthRt);
@@ -453,11 +491,13 @@ void Reflections::populateRenderGraph(RenderingContext& ctx)
 		rpass.newTextureDependency(readMomentsRt, TextureUsageBit::kSrvCompute);
 		rpass.newTextureDependency(getRenderer().getMotionVectors().getMotionVectorsRt(), TextureUsageBit::kSrvCompute);
 		rpass.newTextureDependency(hitPosRt, TextureUsageBit::kSrvCompute);
+		rpass.newTextureDependency(classTileMapRt, TextureUsageBit::kSrvCompute);
 
 		rpass.newTextureDependency(transientRt1, TextureUsageBit::kUavCompute);
 		rpass.newTextureDependency(writeMomentsRt, TextureUsageBit::kUavCompute);
 
-		rpass.setWork([this, &ctx, transientRt1, transientRt2, mainRt, readMomentsRt, writeMomentsRt, hitPosRt](RenderPassWorkContext& rgraphCtx) {
+		rpass.setWork([this, &ctx, transientRt1, transientRt2, mainRt, readMomentsRt, writeMomentsRt, hitPosRt,
+					   classTileMapRt](RenderPassWorkContext& rgraphCtx) {
 			ANKI_TRACE_SCOPED_EVENT(Reflections);
 
 			CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
@@ -471,6 +511,7 @@ void Reflections::populateRenderGraph(RenderingContext& ctx)
 			rgraphCtx.bindSrv(2, 0, readMomentsRt);
 			rgraphCtx.bindSrv(3, 0, getRenderer().getMotionVectors().getMotionVectorsRt());
 			rgraphCtx.bindSrv(4, 0, hitPosRt);
+			rgraphCtx.bindSrv(5, 0, classTileMapRt);
 
 			rgraphCtx.bindUav(0, 0, transientRt1);
 			rgraphCtx.bindUav(1, 0, writeMomentsRt);
@@ -488,10 +529,11 @@ void Reflections::populateRenderGraph(RenderingContext& ctx)
 		rpass.newTextureDependency(transientRt1, TextureUsageBit::kSrvCompute);
 		rpass.newTextureDependency(writeMomentsRt, TextureUsageBit::kSrvCompute);
 		rpass.newTextureDependency(getRenderer().getGBuffer().getColorRt(1), TextureUsageBit::kSrvCompute);
+		rpass.newTextureDependency(classTileMapRt, TextureUsageBit::kSrvCompute);
 
 		rpass.newTextureDependency(transientRt2, TextureUsageBit::kUavCompute);
 
-		rpass.setWork([this, &ctx, transientRt1, transientRt2, writeMomentsRt, consts](RenderPassWorkContext& rgraphCtx) {
+		rpass.setWork([this, transientRt1, transientRt2, writeMomentsRt, consts, classTileMapRt](RenderPassWorkContext& rgraphCtx) {
 			ANKI_TRACE_SCOPED_EVENT(Reflections);
 
 			CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
@@ -501,6 +543,7 @@ void Reflections::populateRenderGraph(RenderingContext& ctx)
 			rgraphCtx.bindSrv(0, 0, transientRt1);
 			rgraphCtx.bindSrv(1, 0, writeMomentsRt);
 			rgraphCtx.bindSrv(2, 0, getRenderer().getGBuffer().getColorRt(1));
+			rgraphCtx.bindSrv(3, 0, classTileMapRt);
 
 			rgraphCtx.bindUav(0, 0, transientRt2);
 
@@ -517,8 +560,9 @@ void Reflections::populateRenderGraph(RenderingContext& ctx)
 		rpass.newTextureDependency(transientRt2, TextureUsageBit::kSrvCompute);
 
 		rpass.newTextureDependency(mainRt, TextureUsageBit::kUavCompute);
+		rpass.newTextureDependency(classTileMapRt, TextureUsageBit::kSrvCompute);
 
-		rpass.setWork([this, &ctx, transientRt2, mainRt](RenderPassWorkContext& rgraphCtx) {
+		rpass.setWork([this, transientRt2, mainRt, classTileMapRt](RenderPassWorkContext& rgraphCtx) {
 			ANKI_TRACE_SCOPED_EVENT(Reflections);
 
 			CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
@@ -526,6 +570,7 @@ void Reflections::populateRenderGraph(RenderingContext& ctx)
 			cmdb.bindShaderProgram(m_verticalBilateralDenoisingGrProg.get());
 
 			rgraphCtx.bindSrv(0, 0, transientRt2);
+			rgraphCtx.bindSrv(1, 0, classTileMapRt);
 
 			rgraphCtx.bindUav(0, 0, mainRt);
 			cmdb.bindUav(1, 0, BufferView(m_indirectArgsBuffer.get()));

+ 4 - 0
AnKi/Renderer/Reflections.h

@@ -57,11 +57,13 @@ public:
 	ShaderProgramPtr m_verticalBilateralDenoisingGrProg;
 	ShaderProgramPtr m_horizontalBilateralDenoisingGrProg;
 	ShaderProgramPtr m_probeFallbackGrProg;
+	ShaderProgramPtr m_tileClassificationGrProg;
 
 	RenderTargetDesc m_transientRtDesc1;
 	RenderTargetDesc m_transientRtDesc2;
 	RenderTargetDesc m_hitPosAndDepthRtDesc;
 	RenderTargetDesc m_hitPosRtDesc;
+	RenderTargetDesc m_classTileMapRtDesc;
 
 	/// 2 x DispatchIndirectArgs. 1st is for RT and 2nd for probe fallback
 	BufferPtr m_indirectArgsBuffer;
@@ -74,6 +76,8 @@ public:
 	U32 m_rayGenShaderGroupIdx = 0;
 	U32 m_missShaderGroupIdx = 0;
 
+	static constexpr U32 kTileSize = 32;
+
 	class
 	{
 	public:

+ 1 - 1
AnKi/Renderer/Renderer.cpp

@@ -780,7 +780,7 @@ Error Renderer::render(Texture* presentTex)
 		pass.newTextureDependency(ctx.m_swapchainRenderTarget, TextureUsageBit::kRtvDsvWrite);
 		pass.newTextureDependency(m_finalComposite->getRenderTarget(), TextureUsageBit::kSrvPixel);
 
-		pass.setWork([this, &ctx](RenderPassWorkContext& rgraphCtx) {
+		pass.setWork([this](RenderPassWorkContext& rgraphCtx) {
 			CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
 			cmdb.setViewport(0, 0, m_swapchainResolution.x(), m_swapchainResolution.y());
 

+ 0 - 1
AnKi/Renderer/Tonemapping.cpp

@@ -18,7 +18,6 @@ Error Tonemapping::init()
 	{
 		m_expAndAvgLum.m_inputTexMip =
 			(getRenderer().getBloom().getPyramidTextureMipmapCount() > 2) ? getRenderer().getBloom().getPyramidTextureMipmapCount() - 2 : 0;
-		const UVec2 size = getRenderer().getBloom().getPyramidTextureSize() >> m_expAndAvgLum.m_inputTexMip;
 
 		// Create program
 		ANKI_CHECK(loadShaderProgram("ShaderBinaries/TonemappingAverageLuminance.ankiprogbin", m_expAndAvgLum.m_prog, m_expAndAvgLum.m_grProg));

+ 3 - 3
AnKi/Shaders/LightShading.ankiprog

@@ -44,9 +44,9 @@ Texture2D<RVec4> g_integrationLut : register(t12);
 RVec4 main(VertOut input) : SV_TARGET0
 {
 	const Vec2 uv = input.m_uv;
-	const F32 depth = g_depthTex.SampleLevel(g_nearestAnyClampSampler, uv, 0.0).r;
 	const Vec2 ndc = uvToNdc(uv);
 	const UVec2 coord = input.m_svPosition;
+	const F32 depth = g_depthTex[coord].r;
 
 	if(depth == 1.0)
 	{
@@ -70,7 +70,7 @@ RVec4 main(VertOut input) : SV_TARGET0
 	gbuffer.m_subsurface = max(gbuffer.m_subsurface, kSubsurfaceMin);
 
 	// Apply SSAO
-	const RVec4 ssaoAndBentNormals = g_ssaoTex[coord];
+	const RVec4 ssaoAndBentNormals = g_ssaoTex.SampleLevel(g_trilinearClampSampler, uv, 0.0);
 	const RF32 ssao = ssaoAndBentNormals.w;
 	const RVec3 bentNormal = ssaoAndBentNormals.xyz;
 	gbuffer.m_diffuse *= ssao;
@@ -95,7 +95,7 @@ RVec4 main(VertOut input) : SV_TARGET0
 	}
 
 	// SM
-	RVec4 resolvedSm = g_resolvedShadowsTex[coord];
+	RVec4 resolvedSm = g_resolvedShadowsTex.SampleLevel(g_nearestAnyClampSampler, uv, 0.0);
 	U32 resolvedSmIdx = 0u;
 
 	// Dir light

+ 143 - 6
AnKi/Shaders/Reflections.ankiprog

@@ -5,6 +5,7 @@
 
 #pragma anki mutator SSR_SAMPLE_GBUFFER 0 1
 
+#pragma anki technique Classification comp mutators
 #pragma anki technique Ssr comp
 #pragma anki technique ReflectionProbeFallback comp mutators
 #pragma anki technique RtMaterialFetch rgen miss mutators
@@ -35,6 +36,17 @@ constexpr Bool kExtraSsrRejection = true;
 constexpr Bool kDebugSsr = false;
 constexpr Bool kSsrHallucinate = true;
 constexpr Bool kSsrHallucinateDebug = false;
+constexpr F32 kTemporalSourceWeight = 0.005;
+#define TILE_SIZE 32
+
+// The states of a tile
+enum
+{
+	kClassNormal, // Always 1st
+	kClassSky, // Always 2nd
+	kClassMirror,
+	kClassVeryRough,
+};
 
 // Functions
 Vec3 getDiffuseIndirect(StructuredBuffer<GpuSceneGlobalIlluminationProbe> giProbes, Vec3 worldPos, Vec3 worldNormal,
@@ -88,6 +100,85 @@ void decodeColorDepthAndSampleCount(Vec4 rgba, out Vec3 color, out F32 depth, ou
 	depth = rgba.w;
 }
 
+// ===========================================================================
+// Classification                                                            =
+// ===========================================================================
+#if NOT_ZERO(ANKI_TECHNIQUE_Classification)
+
+SamplerState g_trilinearClampSampler : register(s0);
+
+Texture2D<Vec4> g_gbufferRt1 : register(t0);
+Texture2D<Vec4> g_depthTex : register(t1);
+
+RWTexture2D<UVec4> g_classTimeMap : register(u0);
+
+ANKI_FAST_CONSTANTS(ReflectionConstants, g_consts)
+
+groupshared U32 g_minRoughness;
+groupshared U32 g_maxRoughness;
+groupshared U32 g_allSky;
+
+[NumThreads(TILE_SIZE / 2, TILE_SIZE, 1)] void main(U32 svGroupIndex : SV_GroupIndex, UVec2 svDispatchThreadId : SV_DispatchThreadID,
+													UVec2 svGroupId : SV_GroupID)
+{
+	if(svGroupIndex == 0)
+	{
+		g_minRoughness = asuint(1.0);
+		g_maxRoughness = asuint(0.0);
+		g_allSky = 0;
+	}
+
+	GroupMemoryBarrierWithGroupSync();
+
+	UVec2 fullViewportSize;
+	g_gbufferRt1.GetDimensions(fullViewportSize.x, fullViewportSize.y);
+
+	const UVec2 realCoord = min(svDispatchThreadId, fullViewportSize / UVec2(2, 1) - 1u);
+	const UVec2 logicalCoord = UVec2(realCoord.x * 2u + (realCoord.y & 1u), realCoord.y);
+
+	const F32 depth = g_depthTex[logicalCoord].x;
+
+	if(depth < 1.0)
+	{
+		const Vec4 rt1 = g_gbufferRt1[logicalCoord];
+		const F32 roughness = unpackRoughnessFromGBuffer(rt1);
+
+		U32 orig;
+		InterlockedMax(g_maxRoughness, asuint(roughness), orig);
+		InterlockedMin(g_minRoughness, asuint(roughness), orig);
+	}
+	else
+	{
+		InterlockedAdd(g_allSky, 1u);
+	}
+
+	GroupMemoryBarrierWithGroupSync();
+
+	if(svGroupIndex == 0)
+	{
+		U32 tileClass = 0;
+		if(g_allSky == TILE_SIZE / 2 * TILE_SIZE)
+		{
+			tileClass = kClassSky;
+		}
+		else if(asfloat(g_minRoughness) >= g_consts.m_roughnessCutoffToGiEdges.y && g_allSky == 0)
+		{
+			tileClass = kClassVeryRough;
+		}
+		else if(asfloat(g_maxRoughness) <= kMinRoughness * 2.0)
+		{
+			tileClass = kClassMirror;
+		}
+		else
+		{
+			tileClass = kClassNormal;
+		}
+
+		g_classTimeMap[svGroupId] = tileClass;
+	}
+}
+#endif
+
 // ===========================================================================
 // SSR                                                                       =
 // ===========================================================================
@@ -106,6 +197,7 @@ Texture2D<Vec4> g_lightBufferRt : register(t5);
 StructuredBuffer<GlobalIlluminationProbe> g_giProbes : register(t6);
 StructuredBuffer<Cluster> g_clusters : register(t7);
 Texture2D<Vec4> g_shadowAtlasTex : register(t8);
+Texture2D<UVec4> g_classTileMap : register(t9);
 
 RWTexture2D<Vec4> g_colorAndPdfTex : register(u0);
 RWTexture2D<Vec4> g_hitPosAndDepthTex : register(u1);
@@ -270,11 +362,41 @@ void bestCandidateToHallucinate(IVec2 svGroupThreadId, IVec2 offset, F32 depth,
 	const UVec2 logicalCoord = UVec2(realCoord.x * 2u + (realCoord.y & 1u), realCoord.y);
 	const Vec2 uv = (Vec2(logicalCoord) + 0.5) / Vec2(halfViewportSize.x * 2u, halfViewportSize.y);
 
-	// Read stuff
+	// Fast path 1
+	const U32 tileClass = g_classTileMap[logicalCoord / TILE_SIZE];
+	if(tileClass == kClassSky)
+	{
+		g_colorAndPdfTex[realCoord] = 0.0;
+		g_hitPosAndDepthTex[realCoord] = 0.0;
+		return;
+	}
+
 	const F32 depth = g_depthTex[logicalCoord].x;
-	const Vec4 rt1 = g_gbufferRt1[logicalCoord];
 	const Vec4 rt2 = g_gbufferRt2[logicalCoord];
 	const Vec3 worldNormal = unpackNormalFromGBuffer(rt2);
+
+	// Fast path 2
+	if(tileClass == kClassVeryRough)
+	{
+		Vec4 worldPos = mul(g_globalRendererConstants.m_matrices.m_invertedViewProjection, Vec4(uvToNdc(uv), depth, 1.0));
+		worldPos.xyz /= worldPos.w;
+
+		const Vec3 reflDir = reflect(normalize(worldPos.xyz - g_globalRendererConstants.m_cameraPosition), worldNormal);
+
+		Cluster cluster = getClusterFragCoord(g_clusters, g_globalRendererConstants, Vec3(logicalCoord.xy + 0.5, depth));
+
+		const Vec3 col = sampleGiProbes<F32>(cluster, g_giProbes, reflDir, worldPos.xyz, g_trilinearClampSampler);
+
+		Vec3 worldHitPos = worldPos + reflDir * 1.0;
+		worldHitPos -= g_globalRendererConstants.m_cameraPosition;
+
+		g_colorAndPdfTex[realCoord] = Vec4(col, 1.0);
+		g_hitPosAndDepthTex[realCoord] = Vec4(worldHitPos, 1.0 - depth);
+		return;
+	}
+
+	// Read stuff
+	const Vec4 rt1 = g_gbufferRt1[logicalCoord];
 	const Vec3 viewNormal = mul(g_globalRendererConstants.m_matrices.m_view, Vec4(worldNormal, 0.0));
 	const F32 roughness = unpackRoughnessFromGBuffer(rt1);
 
@@ -843,6 +965,7 @@ Texture2D<Vec4> g_historyTex : register(t1);
 Texture2D<Vec4> g_momentsHistoryTex : register(t2);
 Texture2D<Vec4> g_motionVectorsTex : register(t3);
 Texture2D<Vec4> g_hitPosTex : register(t4);
+Texture2D<UVec4> g_classTileMap : register(t5);
 
 RWTexture2D<Vec4> g_outTex : register(u0);
 RWTexture2D<Vec4> g_momentsTex : register(u1);
@@ -967,7 +1090,9 @@ void computeSourceColor(Vec2 uv, IVec2 coord, IVec2 textureSize, out Vec3 m1, ou
 	const UVec2 coord = min(svDispatchThreadId, textureSize - 1);
 	const Vec2 uv = (Vec2(coord) + 0.5f) / textureSize;
 
-	if(kDisableDenoising)
+	const U32 tileClass = g_classTileMap[coord / TILE_SIZE];
+
+	if(kDisableDenoising || tileClass >= kClassSky)
 	{
 		g_outTex[coord] = g_colorAndDepth[coord];
 		g_momentsTex[coord] = 0.0;
@@ -1003,7 +1128,7 @@ void computeSourceColor(Vec2 uv, IVec2 coord, IVec2 textureSize, out Vec3 m1, ou
 	const F32 luminanceSource = computeLuminance(compressedSource);
 	const F32 luminanceHistory = computeLuminance(compressedHistory);
 
-	F32 sourceWeight = 0.1;
+	F32 sourceWeight = kTemporalSourceWeight;
 	F32 historyWeight = 1.0 - sourceWeight;
 	sourceWeight *= 1.0 / (1.0 + luminanceSource);
 	historyWeight *= 1.0 / (1.0 + luminanceHistory);
@@ -1030,6 +1155,7 @@ void computeSourceColor(Vec2 uv, IVec2 coord, IVec2 textureSize, out Vec3 m1, ou
 Texture2D<Vec4> g_colorAndDepth : register(t0);
 Texture2D<Vec4> g_momentsTex : register(t1);
 Texture2D<Vec4> g_gbufferRt1 : register(t2);
+Texture2D<UVec4> g_classTileMap : register(t3);
 
 RWTexture2D<Vec4> g_outTex : register(u0);
 
@@ -1071,11 +1197,19 @@ F32 computeVarianceCenter(IVec2 coord, UVec2 textureSize)
 	const F32 refDepth = rgba.w;
 	const Vec3 centerColor = rgba.xyz;
 
+	const U32 tileClass = g_classTileMap[coord / TILE_SIZE];
+
+	if(kDisableDenoising || tileClass >= kClassSky)
+	{
+		g_outTex[coord] = encodeColorDepthAndSampleCount(centerColor, refDepth, 0u);
+		return;
+	}
+
 	const Vec4 rt1 = g_gbufferRt1[coord];
 	const F32 roughness = unpackRoughnessFromGBuffer<F32>(rt1, 0.0);
 	const F32 sqRoughness = sqrt(roughness);
 
-	if(kDisableDenoising || roughness >= g_consts.m_roughnessCutoffToGiEdges.y)
+	if(roughness >= g_consts.m_roughnessCutoffToGiEdges.y)
 	{
 		g_outTex[coord] = encodeColorDepthAndSampleCount(centerColor, refDepth, 0u);
 		return;
@@ -1122,6 +1256,7 @@ F32 computeVarianceCenter(IVec2 coord, UVec2 textureSize)
 // ===========================================================================
 #if ANKI_COMPUTE_SHADER && NOT_ZERO(ANKI_TECHNIQUE_BilateralDenoiseVertical)
 Texture2D<Vec4> g_colorAndDepthAndSampleCount : register(t0);
+Texture2D<UVec4> g_classTileMap : register(t1);
 
 RWTexture2D<Vec4> g_outTex : register(u0);
 RWStructuredBuffer<DispatchIndirectArgs> g_indirectArgs : register(u1);
@@ -1146,7 +1281,9 @@ RWStructuredBuffer<DispatchIndirectArgs> g_indirectArgs : register(u1);
 	decodeColorDepthAndSampleCount(g_colorAndDepthAndSampleCount[coord], refColor, refDepth, sampleCountu);
 	const F32 sampleCount = sampleCountu;
 
-	if(kDisableDenoising)
+	const U32 tileClass = g_classTileMap[coord / TILE_SIZE];
+
+	if(kDisableDenoising || tileClass >= kClassSky)
 	{
 		g_outTex[coord] = Vec4(refColor, 1.0);
 		return;