2 years ago · ae2918296f
--- a/AnKi/Shaders/RtShadowsDenoise.ankiprog
+++ b/AnKi/Shaders/RtShadowsDenoise.ankiprog
@@ -3,39 +3,35 @@
 
				 // Code licensed under the BSD License.
			
 
				 // http://www.anki3d.org/LICENSE
			
 
				 
			
 
				+#pragma anki hlsl
			
 
				+
			
 
				 #pragma anki mutator BLUR_ORIENTATION 0 1
			
 
				 
			
 
				 #pragma anki start comp
			
 
				 
			
 
				-#include <AnKi/Shaders/BilateralFilter.glsl>
			
 
				-#include <AnKi/Shaders/PackFunctions.glsl>
			
 
				-#include <AnKi/Shaders/RtShadows.glsl>
			
 
				-#include <AnKi/Shaders/Functions.glsl>
			
 
				+#include <AnKi/Shaders/BilateralFilter.hlsl>
			
 
				+#include <AnKi/Shaders/PackFunctions.hlsl>
			
 
				+#include <AnKi/Shaders/RtShadows.hlsl>
			
 
				+#include <AnKi/Shaders/Functions.hlsl>
			
 
				 
			
 
				 ANKI_SPECIALIZATION_CONSTANT_UVEC2(kOutImageSize, 0u);
			
 
				 ANKI_SPECIALIZATION_CONSTANT_U32(kMinSampleCount, 2u);
			
 
				 ANKI_SPECIALIZATION_CONSTANT_U32(kMaxSampleCount, 3u);
			
 
				 
			
 
				-const UVec2 kWorkgroupSize = UVec2(8u, 8u);
			
 
				-layout(local_size_x = kWorkgroupSize.x, local_size_y = kWorkgroupSize.y, local_size_z = 1) in;
			
 
				-
			
 
				-layout(set = 0, binding = 0) uniform sampler u_nearestAnyClampSampler;
			
 
				-layout(set = 0, binding = 1) uniform sampler u_linearAnyClampSampler;
			
 
				-layout(set = 0, binding = 2) uniform utexture2D u_inTex;
			
 
				-layout(set = 0, binding = 3) uniform texture2D u_depthTex;
			
 
				-layout(set = 0, binding = 4) uniform texture2D u_gbuffer2Tex;
			
 
				-layout(set = 0, binding = 5) uniform texture2D u_momentsTex;
			
 
				-layout(set = 0, binding = 6) uniform texture2D u_historyLengthTex;
			
 
				-layout(set = 0, binding = 7) writeonly uniform uimage2D u_outImg;
			
 
				+[[vk::binding(0)]] SamplerState g_nearestAnyClampSampler;
			
 
				+[[vk::binding(1)]] SamplerState g_linearAnyClampSampler;
			
 
				+[[vk::binding(2)]] Texture2D<UVec4> g_inTex;
			
 
				+[[vk::binding(3)]] Texture2D g_depthTex;
			
 
				+[[vk::binding(4)]] Texture2D g_gbuffer2Tex;
			
 
				+[[vk::binding(5)]] Texture2D g_momentsTex;
			
 
				+[[vk::binding(6)]] Texture2D g_historyLengthTex;
			
 
				+[[vk::binding(7)]] RWTexture2D<UVec4> g_outImg;
			
 
				 
			
 
				-layout(push_constant, row_major, std140) uniform b_pc
			
 
				-{
			
 
				-	RtShadowsDenoiseUniforms u_unis;
			
 
				-};
			
 
				+[[vk::push_constant]] ConstantBuffer<RtShadowsDenoiseUniforms> g_unis;
			
 
				 
			
 
				 Vec3 unproject(Vec2 ndc, F32 depth)
			
 
				 {
			
 
				-	const Vec4 worldPos4 = u_unis.invViewProjMat * Vec4(ndc, depth, 1.0);
			
 
				+	const Vec4 worldPos4 = mul(g_unis.invViewProjMat, Vec4(ndc, depth, 1.0));
			
 
				 	const Vec3 worldPos = worldPos4.xyz / worldPos4.w;
			
 
				 	return worldPos;
			
 
				 }
			
@@ -44,8 +40,11 @@ F32 computeVarianceCenter(Vec2 uv)
 
				 {
			
 
				 	const F32 kernel[2][2] = {{1.0 / 4.0, 1.0 / 8.0}, {1.0 / 8.0, 1.0 / 16.0}};
			
 
				 	const I32 radius = 1;
			
 
				-	const Vec2 texelSize = 1.0 / Vec2(textureSize(u_momentsTex, 0).xy);
			
 
				-	Vec2 sumMoments = Vec2(0.0);
			
 
				+	Vec2 texSize;
			
 
				+	F32 mipCount;
			
 
				+	g_momentsTex.GetDimensions(0, texSize.x, texSize.y, mipCount);
			
 
				+	const Vec2 texelSize = 1.0 / texSize;
			
 
				+	Vec2 sumMoments = 0.0f;
			
 
				 
			
 
				 	for(I32 yy = -radius; yy <= radius; yy++)
			
 
				 	{
			
@@ -53,40 +52,40 @@ F32 computeVarianceCenter(Vec2 uv)
 
				 		{
			
 
				 			const Vec2 newUv = uv + Vec2(xx, yy) * texelSize;
			
 
				 			const F32 k = kernel[abs(xx)][abs(yy)];
			
 
				-			sumMoments += textureLod(u_momentsTex, u_linearAnyClampSampler, newUv, 0.0).xy * k;
			
 
				+			sumMoments += g_momentsTex.SampleLevel(g_linearAnyClampSampler, newUv, 0.0).xy * k;
			
 
				 		}
			
 
				 	}
			
 
				 
			
 
				 	return abs(sumMoments.y - sumMoments.x * sumMoments.x);
			
 
				 }
			
 
				 
			
 
				-void main()
			
 
				+[numthreads(8, 8, 1)] void main(UVec3 svDispatchThreadId : SV_DISPATCHTHREADID)
			
 
				 {
			
 
				-	if(skipOutOfBoundsInvocations(kWorkgroupSize, kOutImageSize))
			
 
				+	if(any(svDispatchThreadId.xy >= kOutImageSize))
			
 
				 	{
			
 
				 		return;
			
 
				 	}
			
 
				 
			
 
				-	const Vec2 uv = (Vec2(gl_GlobalInvocationID.xy) + 0.5) / Vec2(kOutImageSize);
			
 
				+	const Vec2 uv = (Vec2(svDispatchThreadId.xy) + 0.5) / Vec2(kOutImageSize);
			
 
				 
			
 
				 	// Reference
			
 
				-	const F32 depthCenter = textureLod(u_depthTex, u_linearAnyClampSampler, uv, 0.0).r;
			
 
				+	const F32 depthCenter = g_depthTex.SampleLevel(g_linearAnyClampSampler, uv, 0.0).r;
			
 
				 	if(depthCenter == 1.0)
			
 
				 	{
			
 
				-		imageStore(u_outImg, IVec2(gl_GlobalInvocationID.xy), UVec4(0));
			
 
				+		g_outImg[svDispatchThreadId.xy] = 0;
			
 
				 		return;
			
 
				 	}
			
 
				 
			
 
				-	const Vec3 positionCenter = unproject(UV_TO_NDC(uv), depthCenter);
			
 
				-	const Vec3 normalCenter = unpackNormalFromGBuffer(textureLod(u_gbuffer2Tex, u_linearAnyClampSampler, uv, 0.0));
			
 
				+	const Vec3 positionCenter = unproject(uvToNdc(uv), depthCenter);
			
 
				+	const Vec3 normalCenter = unpackNormalFromGBuffer(g_gbuffer2Tex.SampleLevel(g_linearAnyClampSampler, uv, 0.0));
			
 
				 
			
 
				-	F32 shadowFactors[kMaxRtShadowLayers];
			
 
				-	unpackRtShadows(textureLod(u_inTex, u_nearestAnyClampSampler, uv, 0.0), shadowFactors);
			
 
				+	RF32 shadowFactors[kMaxRtShadowLayers];
			
 
				+	unpackRtShadows(g_inTex.SampleLevel(g_nearestAnyClampSampler, uv, 0.0), shadowFactors);
			
 
				 
			
 
				 	// Decide the amount of blurring
			
 
				 	const F32 varianceCenter = computeVarianceCenter(uv);
			
 
				 	const F32 historyLength =
			
 
				-		textureLod(u_historyLengthTex, u_linearAnyClampSampler, uv, 0.0).x * kRtShadowsMaxHistoryLength;
			
 
				+		g_historyLengthTex.SampleLevel(g_linearAnyClampSampler, uv, 0.0).x * kRtShadowsMaxHistoryLength;
			
 
				 
			
 
				 	U32 sampleCount;
			
 
				 	if(historyLength < 2.0)
			
@@ -106,7 +105,7 @@ void main()
 
				 		F32 blur = varianceCenter * 100.0;
			
 
				 		blur = min(1.0, blur);
			
 
				 
			
 
				-		const F32 sampleCountf = mix(F32(kMinSampleCount), F32(kMaxSampleCount), blur);
			
 
				+		const F32 sampleCountf = lerp(F32(kMinSampleCount), F32(kMaxSampleCount), blur);
			
 
				 
			
 
				 		sampleCount = U32(sampleCountf);
			
 
				 	}
			
@@ -125,13 +124,13 @@ void main()
 
				 		const Vec2 sampleUv = Vec2(uv.x, uv.y + F32(i) * texelSize.y);
			
 
				 #endif
			
 
				 
			
 
				-		F32 localShadowFactors[kMaxRtShadowLayers];
			
 
				-		unpackRtShadows(textureLod(u_inTex, u_nearestAnyClampSampler, sampleUv, 0.0), localShadowFactors);
			
 
				+		RF32 localShadowFactors[kMaxRtShadowLayers];
			
 
				+		unpackRtShadows(g_inTex.SampleLevel(g_nearestAnyClampSampler, sampleUv, 0.0), localShadowFactors);
			
 
				 
			
 
				-		const F32 depthTap = textureLod(u_depthTex, u_linearAnyClampSampler, sampleUv, 0.0).r;
			
 
				-		const Vec3 positionTap = unproject(UV_TO_NDC(sampleUv), depthTap);
			
 
				+		const F32 depthTap = g_depthTex.SampleLevel(g_linearAnyClampSampler, sampleUv, 0.0).r;
			
 
				+		const Vec3 positionTap = unproject(uvToNdc(sampleUv), depthTap);
			
 
				 		const Vec3 normalTap =
			
 
				-			unpackNormalFromGBuffer(textureLod(u_gbuffer2Tex, u_linearAnyClampSampler, sampleUv, 0.0));
			
 
				+			unpackNormalFromGBuffer(g_gbuffer2Tex.SampleLevel(g_linearAnyClampSampler, sampleUv, 0.0));
			
 
				 
			
 
				 		// F32 w = calculateBilateralWeighPlane(depthCenter, depthTap, 1.0);
			
 
				 		F32 w = calculateBilateralWeightPlane(positionCenter, normalCenter, positionTap, normalTap, 1.0);
			
@@ -152,7 +151,7 @@ void main()
 
				 		shadowFactors[i] /= weight;
			
 
				 	}
			
 
				 
			
 
				-	imageStore(u_outImg, IVec2(gl_GlobalInvocationID.xy), packRtShadows(shadowFactors));
			
 
				+	g_outImg[svDispatchThreadId.xy] = packRtShadows(shadowFactors);
			
 
				 }
			
 
				 
			
 
				 #pragma anki end
			
--- a/AnKi/Shaders/RtShadowsSvgfAtrous.ankiprog
+++ b/AnKi/Shaders/RtShadowsSvgfAtrous.ankiprog
@@ -3,48 +3,44 @@
 
				 // Code licensed under the BSD License.
			
 
				 // http://www.anki3d.org/LICENSE
			
 
				 
			
 
				+#pragma anki hlsl
			
 
				+
			
 
				 #pragma anki mutator LAST_PASS 0 1
			
 
				 
			
 
				 #pragma anki start comp
			
 
				 
			
 
				-#include <AnKi/Shaders/RtShadows.glsl>
			
 
				-#include <AnKi/Shaders/BilateralFilter.glsl>
			
 
				-#include <AnKi/Shaders/Functions.glsl>
			
 
				+#include <AnKi/Shaders/RtShadows.hlsl>
			
 
				+#include <AnKi/Shaders/BilateralFilter.hlsl>
			
 
				+#include <AnKi/Shaders/Functions.hlsl>
			
 
				 
			
 
				 ANKI_SPECIALIZATION_CONSTANT_UVEC2(kFramebufferSize, 0u);
			
 
				 
			
 
				-const UVec2 kWorkgroupSize = UVec2(8, 8);
			
 
				-layout(local_size_x = kWorkgroupSize.x, local_size_y = kWorkgroupSize.y, local_size_z = 1) in;
			
 
				-
			
 
				-layout(set = 0, binding = 0) uniform sampler u_nearestAnyClampSampler;
			
 
				-layout(set = 0, binding = 1) uniform sampler u_linearAnyClampSampler;
			
 
				-layout(set = 0, binding = 2) uniform texture2D u_depthTex;
			
 
				-layout(set = 0, binding = 3) uniform utexture2D u_shadowsTex;
			
 
				-layout(set = 0, binding = 4) uniform texture2D u_varianceTex;
			
 
				+[[vk::binding(0)]] SamplerState g_nearestAnyClampSampler;
			
 
				+[[vk::binding(1)]] SamplerState g_linearAnyClampSampler;
			
 
				+[[vk::binding(2)]] Texture2D g_depthTex;
			
 
				+[[vk::binding(3)]] Texture2D<UVec4> g_shadowsTex;
			
 
				+[[vk::binding(4)]] Texture2D g_varianceTex;
			
 
				 
			
 
				-layout(set = 0, binding = 5) uniform uimage2D u_shadowsImage;
			
 
				+[[vk::binding(5)]] RWTexture2D<UVec4> g_shadowsImage;
			
 
				 #if !LAST_PASS
			
 
				-layout(set = 0, binding = 6) uniform image2D u_varianceImage;
			
 
				+[[vk::binding(6)]] RWTexture2D<Vec4> g_varianceImage;
			
 
				 #endif
			
 
				 
			
 
				-layout(push_constant, row_major, std140) uniform b_pc
			
 
				-{
			
 
				-	Mat4 u_invProjMat;
			
 
				-};
			
 
				+[[vk::push_constant]] ConstantBuffer<Mat4> g_invProjMat;
			
 
				 
			
 
				-const I32 kConfolutionRadius = 2;
			
 
				-const F32 kKernelWeights[kConfolutionRadius + 1] = F32[3](1.0, 2.0 / 3.0, 1.0 / 6.0);
			
 
				+constexpr I32 kConfolutionRadius = 2;
			
 
				+constexpr F32 kKernelWeights[kConfolutionRadius + 1] = {1.0, 2.0 / 3.0, 1.0 / 6.0};
			
 
				 
			
 
				 Vec3 toViewspace(Vec2 uv, F32 depth)
			
 
				 {
			
 
				-	const Vec4 pos4 = u_invProjMat * Vec4(UV_TO_NDC(uv), depth, 1.0);
			
 
				+	const Vec4 pos4 = mul(g_invProjMat, Vec4(uvToNdc(uv), depth, 1.0));
			
 
				 	const Vec3 pos = pos4.xyz / pos4.w;
			
 
				 	return pos;
			
 
				 }
			
 
				 
			
 
				-F32 computeShadowsLuma(F32 shadowLayers[kMaxRtShadowLayers])
			
 
				+F32 computeShadowsLuma(RF32 shadowLayers[kMaxRtShadowLayers])
			
 
				 {
			
 
				-	F32 l = 0.0;
			
 
				+	RF32 l = 0.0;
			
 
				 	[unroll] for(U32 i = 0u; i < kMaxRtShadowLayers; ++i)
			
 
				 	{
			
 
				 		l += shadowLayers[i];
			
@@ -56,7 +52,10 @@ F32 computeVarianceCenter(Vec2 uv)
 
				 {
			
 
				 	const F32 kernel[2][2] = {{1.0 / 4.0, 1.0 / 8.0}, {1.0 / 8.0, 1.0 / 16.0}};
			
 
				 	const I32 radius = 1;
			
 
				-	const Vec2 texelSize = 1.0 / Vec2(textureSize(u_varianceTex, 0).xy);
			
 
				+	Vec2 texSize;
			
 
				+	F32 mipCount;
			
 
				+	g_varianceTex.GetDimensions(0, texSize.x, texSize.y, mipCount);
			
 
				+	const Vec2 texelSize = 1.0 / texSize;
			
 
				 	F32 sum = 0.0;
			
 
				 
			
 
				 	for(I32 yy = -radius; yy <= radius; yy++)
			
@@ -65,29 +64,29 @@ F32 computeVarianceCenter(Vec2 uv)
 
				 		{
			
 
				 			const Vec2 newUv = uv + Vec2(xx, yy) * texelSize;
			
 
				 			const F32 k = kernel[abs(xx)][abs(yy)];
			
 
				-			sum += textureLod(u_varianceTex, u_linearAnyClampSampler, newUv, 0.0).r * k;
			
 
				+			sum += g_varianceTex.SampleLevel(g_linearAnyClampSampler, newUv, 0.0).r * k;
			
 
				 		}
			
 
				 	}
			
 
				 
			
 
				 	return sum;
			
 
				 }
			
 
				 
			
 
				-void main()
			
 
				+[numthreads(8, 8, 1)] void main(UVec3 svDispatchThreadId : SV_DISPATCHTHREADID)
			
 
				 {
			
 
				-	if(skipOutOfBoundsInvocations(kWorkgroupSize, kFramebufferSize))
			
 
				+	if(any(svDispatchThreadId.xy >= kFramebufferSize))
			
 
				 	{
			
 
				 		return;
			
 
				 	}
			
 
				 
			
 
				-	const Vec2 uv = (Vec2(gl_GlobalInvocationID.xy) + 0.5) / Vec2(kFramebufferSize);
			
 
				+	const Vec2 uv = (Vec2(svDispatchThreadId.xy) + 0.5) / Vec2(kFramebufferSize);
			
 
				 
			
 
				-	const F32 depth = textureLod(u_depthTex, u_linearAnyClampSampler, uv, 0.0).r;
			
 
				+	const F32 depth = g_depthTex.SampleLevel(g_linearAnyClampSampler, uv, 0.0).r;
			
 
				 	if(depth == 1.0)
			
 
				 	{
			
 
				 		// Sky
			
 
				-		imageStore(u_shadowsImage, IVec2(gl_GlobalInvocationID), UVec4(0));
			
 
				+		g_shadowsImage[svDispatchThreadId.xy] = 0u;
			
 
				 #if !LAST_PASS
			
 
				-		imageStore(u_varianceImage, IVec2(gl_GlobalInvocationID), Vec4(0.0));
			
 
				+		g_varianceImage[svDispatchThreadId.xy] = 0.0f;
			
 
				 #endif
			
 
				 		return;
			
 
				 	}
			
@@ -97,21 +96,24 @@ void main()
 
				 	const Vec3 positionCenter = toViewspace(uv, depthCenter);
			
 
				 
			
 
				 	// Read center luma
			
 
				-	F32 shadowLayers[kMaxRtShadowLayers];
			
 
				-	unpackRtShadows(textureLod(u_shadowsTex, u_nearestAnyClampSampler, uv, 0.0), shadowLayers);
			
 
				+	RF32 shadowLayers[kMaxRtShadowLayers];
			
 
				+	unpackRtShadows(g_shadowsTex.SampleLevel(g_nearestAnyClampSampler, uv, 0.0), shadowLayers);
			
 
				 	const F32 refLuma = computeShadowsLuma(shadowLayers);
			
 
				 
			
 
				 	// Center variance
			
 
				 	const F32 varianceCenter = computeVarianceCenter(uv);
			
 
				 
			
 
				 	// Init the sums
			
 
				-	F32 sumShadowLayers[kMaxRtShadowLayers];
			
 
				+	RF32 sumShadowLayers[kMaxRtShadowLayers];
			
 
				 	zeroRtShadowLayers(sumShadowLayers);
			
 
				 	F32 sumVariance = 0.0;
			
 
				 	F32 sumWeight = 0.0;
			
 
				 
			
 
				 	// Convolve
			
 
				-	const Vec2 texelSize = 1.0 / Vec2(textureSize(u_shadowsTex, 0).xy);
			
 
				+	Vec2 texSize;
			
 
				+	F32 mipCount;
			
 
				+	g_shadowsTex.GetDimensions(0u, texSize.x, texSize.y, mipCount);
			
 
				+	const Vec2 texelSize = 1.0 / texSize;
			
 
				 	for(I32 offsetx = -kConfolutionRadius; offsetx <= kConfolutionRadius; offsetx++)
			
 
				 	{
			
 
				 		for(I32 offsety = -kConfolutionRadius; offsety <= kConfolutionRadius; offsety++)
			
@@ -119,18 +121,18 @@ void main()
 
				 			const Vec2 sampleUv = uv + Vec2(offsetx, offsety) * texelSize;
			
 
				 
			
 
				 			// Read shadows
			
 
				-			F32 shadowLayers[kMaxRtShadowLayers];
			
 
				-			unpackRtShadows(textureLod(u_shadowsTex, u_nearestAnyClampSampler, sampleUv, 0.0), shadowLayers);
			
 
				+			RF32 shadowLayers[kMaxRtShadowLayers];
			
 
				+			unpackRtShadows(g_shadowsTex.SampleLevel(g_nearestAnyClampSampler, sampleUv, 0.0), shadowLayers);
			
 
				 
			
 
				 			// Compute luma weight
			
 
				 			const F32 luma = computeShadowsLuma(shadowLayers);
			
 
				-			const F32 variance = textureLod(u_varianceTex, u_linearAnyClampSampler, sampleUv, 0.0).x;
			
 
				+			const F32 variance = g_varianceTex.SampleLevel(g_linearAnyClampSampler, sampleUv, 0.0).x;
			
 
				 			const F32 sigmaL = 4.0;
			
 
				 			const F32 lumaDiff = abs(luma - refLuma);
			
 
				-			const F32 wl = min(1.0, exp(-lumaDiff / (sigmaL * sqrt(varianceCenter + 0.001) + kEpsilonf)));
			
 
				+			const F32 wl = min(1.0, exp(-lumaDiff / (sigmaL * sqrt(varianceCenter + 0.001) + kEpsilonF32)));
			
 
				 
			
 
				 			// Set the current sample
			
 
				-			const F32 depthTap = textureLod(u_depthTex, u_linearAnyClampSampler, sampleUv, 0.0).r;
			
 
				+			const F32 depthTap = g_depthTex.SampleLevel(g_linearAnyClampSampler, sampleUv, 0.0).r;
			
 
				 			const Vec3 positionTap = toViewspace(sampleUv, depthTap);
			
 
				 
			
 
				 			// Do bilateral
			
@@ -152,7 +154,7 @@ void main()
 
				 	}
			
 
				 
			
 
				 	// Normalize
			
 
				-	sumWeight += kEpsilonf;
			
 
				+	sumWeight += kEpsilonF32;
			
 
				 
			
 
				 	[unroll] for(U32 i = 0u; i < kMaxRtShadowLayers; ++i)
			
 
				 	{
			
@@ -162,9 +164,9 @@ void main()
 
				 	sumVariance /= (sumWeight * sumWeight);
			
 
				 
			
 
				 	// Store
			
 
				-	imageStore(u_shadowsImage, IVec2(gl_GlobalInvocationID), packRtShadows(sumShadowLayers));
			
 
				+	g_shadowsImage[svDispatchThreadId.xy] = packRtShadows(sumShadowLayers);
			
 
				 #if !LAST_PASS
			
 
				-	imageStore(u_varianceImage, IVec2(gl_GlobalInvocationID), Vec4(sumVariance, 0.0, 0.0, 0.0));
			
 
				+	g_varianceImage[svDispatchThreadId.xy] = Vec4(sumVariance, 0.0, 0.0, 0.0);
			
 
				 #endif
			
 
				 }
			
 
				 #pragma anki end
			
--- a/AnKi/Shaders/RtShadowsSvgfVariance.ankiprog
+++ b/AnKi/Shaders/RtShadowsSvgfVariance.ankiprog
@@ -3,60 +3,56 @@
 
				 // Code licensed under the BSD License.
			
 
				 // http://www.anki3d.org/LICENSE
			
 
				 
			
 
				+#pragma anki hlsl
			
 
				+
			
 
				 #pragma anki start comp
			
 
				 
			
 
				-#include <AnKi/Shaders/RtShadows.glsl>
			
 
				-#include <AnKi/Shaders/BilateralFilter.glsl>
			
 
				-#include <AnKi/Shaders/Functions.glsl>
			
 
				+#include <AnKi/Shaders/RtShadows.hlsl>
			
 
				+#include <AnKi/Shaders/BilateralFilter.hlsl>
			
 
				+#include <AnKi/Shaders/Functions.hlsl>
			
 
				 
			
 
				 ANKI_SPECIALIZATION_CONSTANT_UVEC2(kFramebufferSize, 0u);
			
 
				 
			
 
				-const UVec2 kWorkgroupSize = UVec2(8, 8);
			
 
				-layout(local_size_x = kWorkgroupSize.x, local_size_y = kWorkgroupSize.y, local_size_z = 1) in;
			
 
				-
			
 
				-layout(set = 0, binding = 0) uniform sampler u_nearestAnyClampSampler;
			
 
				-layout(set = 0, binding = 1) uniform sampler u_linearAnyClampSampler;
			
 
				-layout(set = 0, binding = 2) uniform utexture2D u_shadowsTex;
			
 
				-layout(set = 0, binding = 3) uniform texture2D u_momentsTex;
			
 
				-layout(set = 0, binding = 4) uniform texture2D u_historyLengthTex;
			
 
				-layout(set = 0, binding = 5) uniform texture2D u_depthTex;
			
 
				+[[vk::binding(0)]] SamplerState g_nearestAnyClampSampler;
			
 
				+[[vk::binding(1)]] SamplerState g_linearAnyClampSampler;
			
 
				+[[vk::binding(2)]] Texture2D<UVec4> g_shadowsTex;
			
 
				+[[vk::binding(3)]] Texture2D g_momentsTex;
			
 
				+[[vk::binding(4)]] Texture2D g_historyLengthTex;
			
 
				+[[vk::binding(5)]] Texture2D g_depthTex;
			
 
				 
			
 
				-layout(set = 0, binding = 6) uniform uimage2D u_shadowsImage;
			
 
				-layout(set = 0, binding = 7) uniform image2D u_varianceImage;
			
 
				+[[vk::binding(6)]] RWTexture2D<UVec4> g_shadowsImage;
			
 
				+[[vk::binding(7)]] RWTexture2D<Vec4> g_varianceImage;
			
 
				 
			
 
				-layout(push_constant, row_major, std140) uniform b_pc
			
 
				-{
			
 
				-	Mat4 u_invProjMat;
			
 
				-};
			
 
				+[[vk::push_constant]] ConstantBuffer<Mat4> g_invProjMat;
			
 
				 
			
 
				-const I32 kConvolutionRadius = 1;
			
 
				+constexpr I32 kConvolutionRadius = 1;
			
 
				 
			
 
				 Vec3 toViewspace(Vec2 uv, F32 depth)
			
 
				 {
			
 
				-	const Vec4 pos4 = u_invProjMat * Vec4(UV_TO_NDC(uv), depth, 1.0);
			
 
				+	const Vec4 pos4 = mul(g_invProjMat, Vec4(uvToNdc(uv), depth, 1.0));
			
 
				 	const Vec3 pos = pos4.xyz / pos4.w;
			
 
				 	return pos;
			
 
				 }
			
 
				 
			
 
				-void main()
			
 
				+[numthreads(8, 8, 1)] void main(UVec3 svDispatchThreadId : SV_DISPATCHTHREADID)
			
 
				 {
			
 
				-	if(skipOutOfBoundsInvocations(kWorkgroupSize, kFramebufferSize))
			
 
				+	if(any(svDispatchThreadId >= kFramebufferSize))
			
 
				 	{
			
 
				 		return;
			
 
				 	}
			
 
				 
			
 
				-	const Vec2 uv = (Vec2(gl_GlobalInvocationID.xy) + 0.5) / Vec2(kFramebufferSize);
			
 
				+	const Vec2 uv = (Vec2(svDispatchThreadId.xy) + 0.5) / Vec2(kFramebufferSize);
			
 
				 
			
 
				-	const F32 depth = textureLod(u_depthTex, u_linearAnyClampSampler, uv, 0.0).r;
			
 
				+	const F32 depth = g_depthTex.SampleLevel(g_linearAnyClampSampler, uv, 0.0).r;
			
 
				 	if(depth == 1.0)
			
 
				 	{
			
 
				 		// Sky
			
 
				-		imageStore(u_shadowsImage, IVec2(gl_GlobalInvocationID), UVec4(0));
			
 
				-		imageStore(u_varianceImage, IVec2(gl_GlobalInvocationID), Vec4(0.0));
			
 
				+		g_shadowsImage[svDispatchThreadId.xy] = 0;
			
 
				+		g_varianceImage[svDispatchThreadId.xy] = 0.0f;
			
 
				 		return;
			
 
				 	}
			
 
				 
			
 
				-	const F32 historyLength = textureLod(u_historyLengthTex, u_linearAnyClampSampler, uv, 0.0).r;
			
 
				+	const F32 historyLength = g_historyLengthTex.SampleLevel(g_linearAnyClampSampler, uv, 0.0).r;
			
 
				 
			
 
				 	UVec4 outPackedShadowLayers;
			
 
				 	F32 outVariance;
			
@@ -72,9 +68,9 @@ void main()
 
				 		const Vec3 positionCenter = toViewspace(uv, depthCenter);
			
 
				 
			
 
				 		// Init the sums
			
 
				-		Vec2 sumMoments = Vec2(0.0);
			
 
				+		Vec2 sumMoments = 0.0f;
			
 
				 		F32 sumWeight = 0.0;
			
 
				-		F32 sumShadowLayers[kMaxRtShadowLayers];
			
 
				+		RF32 sumShadowLayers[kMaxRtShadowLayers];
			
 
				 		zeroRtShadowLayers(sumShadowLayers);
			
 
				 
			
 
				 		// Convolve
			
@@ -85,18 +81,18 @@ void main()
 
				 				const Vec2 sampleUv = uv + Vec2(offsetx, offsety) * texelSize;
			
 
				 
			
 
				 				// Set the current sample
			
 
				-				const F32 depthTap = textureLod(u_depthTex, u_linearAnyClampSampler, sampleUv, 0.0).r;
			
 
				+				const F32 depthTap = g_depthTex.SampleLevel(g_linearAnyClampSampler, sampleUv, 0.0).r;
			
 
				 				const Vec3 positionTap = toViewspace(sampleUv, depthTap);
			
 
				 
			
 
				 				// Do bilateral
			
 
				 				const F32 w = calculateBilateralWeightViewspacePosition(positionCenter, positionTap, 0.5);
			
 
				 
			
 
				 				// Sum
			
 
				-				const Vec2 moments = textureLod(u_momentsTex, u_linearAnyClampSampler, sampleUv, 0.0).xy;
			
 
				+				const Vec2 moments = g_momentsTex.SampleLevel(g_linearAnyClampSampler, sampleUv, 0.0).xy;
			
 
				 				sumMoments += moments * w;
			
 
				 
			
 
				-				F32 shadowLayers[kMaxRtShadowLayers];
			
 
				-				unpackRtShadows(textureLod(u_shadowsTex, u_nearestAnyClampSampler, sampleUv, 0.0), shadowLayers);
			
 
				+				RF32 shadowLayers[kMaxRtShadowLayers];
			
 
				+				unpackRtShadows(g_shadowsTex.SampleLevel(g_nearestAnyClampSampler, sampleUv, 0.0), shadowLayers);
			
 
				 				[unroll] for(U32 i = 0u; i < kMaxRtShadowLayers; ++i)
			
 
				 				{
			
 
				 					sumShadowLayers[i] += shadowLayers[i] * w;
			
@@ -106,7 +102,7 @@ void main()
 
				 			}
			
 
				 		}
			
 
				 
			
 
				-		sumWeight += kEpsilonf;
			
 
				+		sumWeight += kEpsilonF32;
			
 
				 
			
 
				 		[unroll] for(U32 i = 0u; i < kMaxRtShadowLayers; ++i)
			
 
				 		{
			
@@ -125,14 +121,14 @@ void main()
 
				 	{
			
 
				 		// Stable for more that 4 frames, passthrough
			
 
				 
			
 
				-		outPackedShadowLayers = textureLod(u_shadowsTex, u_nearestAnyClampSampler, uv, 0.0);
			
 
				+		outPackedShadowLayers = g_shadowsTex.SampleLevel(g_nearestAnyClampSampler, uv, 0.0);
			
 
				 
			
 
				-		const Vec2 moments = textureLod(u_momentsTex, u_linearAnyClampSampler, uv, 0.0).xy;
			
 
				+		const Vec2 moments = g_momentsTex.SampleLevel(g_linearAnyClampSampler, uv, 0.0).xy;
			
 
				 		outVariance = max(0.0, moments.y - moments.x * moments.x);
			
 
				 	}
			
 
				 
			
 
				 	// Store
			
 
				-	imageStore(u_shadowsImage, IVec2(gl_GlobalInvocationID), outPackedShadowLayers);
			
 
				-	imageStore(u_varianceImage, IVec2(gl_GlobalInvocationID), Vec4(outVariance, 0.0, 0.0, 0.0));
			
 
				+	g_shadowsImage[svDispatchThreadId.xy] = outPackedShadowLayers;
			
 
				+	g_varianceImage[svDispatchThreadId.xy] = Vec4(outVariance, 0.0, 0.0, 0.0);
			
 
				 }
			
 
				 #pragma anki end
			
--- a/AnKi/Shaders/RtShadowsUpscale.ankiprog
+++ b/AnKi/Shaders/RtShadowsUpscale.ankiprog
@@ -3,54 +3,53 @@
 
				 // Code licensed under the BSD License.
			
 
				 // http://www.anki3d.org/LICENSE
			
 
				 
			
 
				+#pragma anki hlsl
			
 
				+
			
 
				 #pragma anki start comp
			
 
				 
			
 
				-#include <AnKi/Shaders/RtShadows.glsl>
			
 
				-#include <AnKi/Shaders/Functions.glsl>
			
 
				-#include <AnKi/Shaders/BilateralFilter.glsl>
			
 
				+#include <AnKi/Shaders/RtShadows.hlsl>
			
 
				+#include <AnKi/Shaders/Functions.hlsl>
			
 
				+#include <AnKi/Shaders/BilateralFilter.hlsl>
			
 
				 
			
 
				 ANKI_SPECIALIZATION_CONSTANT_UVEC2(kOutImageSize, 0u);
			
 
				 
			
 
				-const UVec2 kWorkgroupSize = UVec2(8u, 8u);
			
 
				-layout(local_size_x = kWorkgroupSize.x, local_size_y = kWorkgroupSize.y, local_size_z = 1) in;
			
 
				-
			
 
				-layout(set = 0, binding = 0) uniform sampler u_nearestAnyClampSampler;
			
 
				-layout(set = 0, binding = 1) uniform sampler u_linearAnyClampSampler;
			
 
				-layout(set = 0, binding = 2) uniform utexture2D u_quarterShadowsTex;
			
 
				-layout(set = 0, binding = 3) uniform uimage2D u_fullShadowsImage;
			
 
				-layout(set = 0, binding = 4) uniform texture2D u_quarterDepthTex;
			
 
				-layout(set = 0, binding = 5) uniform texture2D u_fullDepthTex;
			
 
				+[[vk::binding(0)]] SamplerState g_nearestAnyClampSampler;
			
 
				+[[vk::binding(1)]] SamplerState g_linearAnyClampSampler;
			
 
				+[[vk::binding(2)]] Texture2D<UVec4> g_quarterShadowsTex;
			
 
				+[[vk::binding(3)]] RWTexture2D<UVec4> g_fullShadowsImage;
			
 
				+[[vk::binding(4)]] Texture2D g_quarterDepthTex;
			
 
				+[[vk::binding(5)]] Texture2D g_fullDepthTex;
			
 
				 
			
 
				-void main()
			
 
				+[numthreads(8, 8, 1)] void main(UVec3 svDispatchThreadId : SV_DISPATCHTHREADID)
			
 
				 {
			
 
				-	if(skipOutOfBoundsInvocations(kWorkgroupSize, kOutImageSize))
			
 
				+	if(any(svDispatchThreadId.xy >= kOutImageSize))
			
 
				 	{
			
 
				 		return;
			
 
				 	}
			
 
				 
			
 
				-	const Vec2 uv = (Vec2(gl_GlobalInvocationID.xy) + 0.5) / Vec2(kOutImageSize);
			
 
				+	const Vec2 uv = (Vec2(svDispatchThreadId.xy) + 0.5) / Vec2(kOutImageSize);
			
 
				 
			
 
				 	// Reference
			
 
				-	const F32 depthCenter = textureLod(u_fullDepthTex, u_linearAnyClampSampler, uv, 0.0).x;
			
 
				+	const F32 depthCenter = g_fullDepthTex.SampleLevel(g_linearAnyClampSampler, uv, 0.0).x;
			
 
				 
			
 
				-	F32 sumShadowLayers[kMaxRtShadowLayers];
			
 
				+	RF32 sumShadowLayers[kMaxRtShadowLayers];
			
 
				 	zeroRtShadowLayers(sumShadowLayers);
			
 
				 
			
 
				 	// Do a bilateral upscale
			
 
				 	const Vec2 texelSize = 1.0 / Vec2(kOutImageSize / 2u);
			
 
				 	const I32 radius = 1;
			
 
				-	F32 sumWeight = kEpsilonf;
			
 
				+	F32 sumWeight = kEpsilonF32;
			
 
				 	for(I32 x = -radius; x <= radius; ++x)
			
 
				 	{
			
 
				 		for(I32 y = -radius; y <= radius; ++y)
			
 
				 		{
			
 
				 			const Vec2 sampleUv = uv + Vec2(x, y) * texelSize;
			
 
				-			const F32 depthTap = textureLod(u_quarterDepthTex, u_linearAnyClampSampler, sampleUv, 0.0).x;
			
 
				+			const F32 depthTap = g_quarterDepthTex.SampleLevel(g_linearAnyClampSampler, sampleUv, 0.0).x;
			
 
				 
			
 
				 			const F32 w = calculateBilateralWeightDepth(depthCenter, depthTap, 1.0);
			
 
				 
			
 
				-			F32 shadowLayers[kMaxRtShadowLayers];
			
 
				-			unpackRtShadows(textureLod(u_quarterShadowsTex, u_nearestAnyClampSampler, sampleUv, 0.0), shadowLayers);
			
 
				+			RF32 shadowLayers[kMaxRtShadowLayers];
			
 
				+			unpackRtShadows(g_quarterShadowsTex.SampleLevel(g_nearestAnyClampSampler, sampleUv, 0.0), shadowLayers);
			
 
				 
			
 
				 			for(U32 i = 0u; i < kMaxRtShadowLayers; ++i)
			
 
				 			{
			
@@ -66,7 +65,7 @@ void main()
 
				 		sumShadowLayers[i] /= sumWeight;
			
 
				 	}
			
 
				 
			
 
				-	imageStore(u_fullShadowsImage, IVec2(gl_GlobalInvocationID.xy), packRtShadows(sumShadowLayers));
			
 
				+	g_fullShadowsImage[svDispatchThreadId.xy] = packRtShadows(sumShadowLayers);
			
 
				 }
			
 
				 
			
 
				 #pragma anki end