Browse Source

Convert more RT shaders to HLSL

Panagiotis Christopoulos Charitos 2 years ago
parent
commit
ae2918296f

+ 39 - 40
AnKi/Shaders/RtShadowsDenoise.ankiprog

@@ -3,39 +3,35 @@
 // Code licensed under the BSD License.
 // Code licensed under the BSD License.
 // http://www.anki3d.org/LICENSE
 // http://www.anki3d.org/LICENSE
 
 
+#pragma anki hlsl
+
 #pragma anki mutator BLUR_ORIENTATION 0 1
 #pragma anki mutator BLUR_ORIENTATION 0 1
 
 
 #pragma anki start comp
 #pragma anki start comp
 
 
-#include <AnKi/Shaders/BilateralFilter.glsl>
-#include <AnKi/Shaders/PackFunctions.glsl>
-#include <AnKi/Shaders/RtShadows.glsl>
-#include <AnKi/Shaders/Functions.glsl>
+#include <AnKi/Shaders/BilateralFilter.hlsl>
+#include <AnKi/Shaders/PackFunctions.hlsl>
+#include <AnKi/Shaders/RtShadows.hlsl>
+#include <AnKi/Shaders/Functions.hlsl>
 
 
 ANKI_SPECIALIZATION_CONSTANT_UVEC2(kOutImageSize, 0u);
 ANKI_SPECIALIZATION_CONSTANT_UVEC2(kOutImageSize, 0u);
 ANKI_SPECIALIZATION_CONSTANT_U32(kMinSampleCount, 2u);
 ANKI_SPECIALIZATION_CONSTANT_U32(kMinSampleCount, 2u);
 ANKI_SPECIALIZATION_CONSTANT_U32(kMaxSampleCount, 3u);
 ANKI_SPECIALIZATION_CONSTANT_U32(kMaxSampleCount, 3u);
 
 
-const UVec2 kWorkgroupSize = UVec2(8u, 8u);
-layout(local_size_x = kWorkgroupSize.x, local_size_y = kWorkgroupSize.y, local_size_z = 1) in;
-
-layout(set = 0, binding = 0) uniform sampler u_nearestAnyClampSampler;
-layout(set = 0, binding = 1) uniform sampler u_linearAnyClampSampler;
-layout(set = 0, binding = 2) uniform utexture2D u_inTex;
-layout(set = 0, binding = 3) uniform texture2D u_depthTex;
-layout(set = 0, binding = 4) uniform texture2D u_gbuffer2Tex;
-layout(set = 0, binding = 5) uniform texture2D u_momentsTex;
-layout(set = 0, binding = 6) uniform texture2D u_historyLengthTex;
-layout(set = 0, binding = 7) writeonly uniform uimage2D u_outImg;
+[[vk::binding(0)]] SamplerState g_nearestAnyClampSampler;
+[[vk::binding(1)]] SamplerState g_linearAnyClampSampler;
+[[vk::binding(2)]] Texture2D<UVec4> g_inTex;
+[[vk::binding(3)]] Texture2D g_depthTex;
+[[vk::binding(4)]] Texture2D g_gbuffer2Tex;
+[[vk::binding(5)]] Texture2D g_momentsTex;
+[[vk::binding(6)]] Texture2D g_historyLengthTex;
+[[vk::binding(7)]] RWTexture2D<UVec4> g_outImg;
 
 
-layout(push_constant, row_major, std140) uniform b_pc
-{
-	RtShadowsDenoiseUniforms u_unis;
-};
+[[vk::push_constant]] ConstantBuffer<RtShadowsDenoiseUniforms> g_unis;
 
 
 Vec3 unproject(Vec2 ndc, F32 depth)
 Vec3 unproject(Vec2 ndc, F32 depth)
 {
 {
-	const Vec4 worldPos4 = u_unis.invViewProjMat * Vec4(ndc, depth, 1.0);
+	const Vec4 worldPos4 = mul(g_unis.invViewProjMat, Vec4(ndc, depth, 1.0));
 	const Vec3 worldPos = worldPos4.xyz / worldPos4.w;
 	const Vec3 worldPos = worldPos4.xyz / worldPos4.w;
 	return worldPos;
 	return worldPos;
 }
 }
@@ -44,8 +40,11 @@ F32 computeVarianceCenter(Vec2 uv)
 {
 {
 	const F32 kernel[2][2] = {{1.0 / 4.0, 1.0 / 8.0}, {1.0 / 8.0, 1.0 / 16.0}};
 	const F32 kernel[2][2] = {{1.0 / 4.0, 1.0 / 8.0}, {1.0 / 8.0, 1.0 / 16.0}};
 	const I32 radius = 1;
 	const I32 radius = 1;
-	const Vec2 texelSize = 1.0 / Vec2(textureSize(u_momentsTex, 0).xy);
-	Vec2 sumMoments = Vec2(0.0);
+	Vec2 texSize;
+	F32 mipCount;
+	g_momentsTex.GetDimensions(0, texSize.x, texSize.y, mipCount);
+	const Vec2 texelSize = 1.0 / texSize;
+	Vec2 sumMoments = 0.0f;
 
 
 	for(I32 yy = -radius; yy <= radius; yy++)
 	for(I32 yy = -radius; yy <= radius; yy++)
 	{
 	{
@@ -53,40 +52,40 @@ F32 computeVarianceCenter(Vec2 uv)
 		{
 		{
 			const Vec2 newUv = uv + Vec2(xx, yy) * texelSize;
 			const Vec2 newUv = uv + Vec2(xx, yy) * texelSize;
 			const F32 k = kernel[abs(xx)][abs(yy)];
 			const F32 k = kernel[abs(xx)][abs(yy)];
-			sumMoments += textureLod(u_momentsTex, u_linearAnyClampSampler, newUv, 0.0).xy * k;
+			sumMoments += g_momentsTex.SampleLevel(g_linearAnyClampSampler, newUv, 0.0).xy * k;
 		}
 		}
 	}
 	}
 
 
 	return abs(sumMoments.y - sumMoments.x * sumMoments.x);
 	return abs(sumMoments.y - sumMoments.x * sumMoments.x);
 }
 }
 
 
-void main()
+[numthreads(8, 8, 1)] void main(UVec3 svDispatchThreadId : SV_DISPATCHTHREADID)
 {
 {
-	if(skipOutOfBoundsInvocations(kWorkgroupSize, kOutImageSize))
+	if(any(svDispatchThreadId.xy >= kOutImageSize))
 	{
 	{
 		return;
 		return;
 	}
 	}
 
 
-	const Vec2 uv = (Vec2(gl_GlobalInvocationID.xy) + 0.5) / Vec2(kOutImageSize);
+	const Vec2 uv = (Vec2(svDispatchThreadId.xy) + 0.5) / Vec2(kOutImageSize);
 
 
 	// Reference
 	// Reference
-	const F32 depthCenter = textureLod(u_depthTex, u_linearAnyClampSampler, uv, 0.0).r;
+	const F32 depthCenter = g_depthTex.SampleLevel(g_linearAnyClampSampler, uv, 0.0).r;
 	if(depthCenter == 1.0)
 	if(depthCenter == 1.0)
 	{
 	{
-		imageStore(u_outImg, IVec2(gl_GlobalInvocationID.xy), UVec4(0));
+		g_outImg[svDispatchThreadId.xy] = 0;
 		return;
 		return;
 	}
 	}
 
 
-	const Vec3 positionCenter = unproject(UV_TO_NDC(uv), depthCenter);
-	const Vec3 normalCenter = unpackNormalFromGBuffer(textureLod(u_gbuffer2Tex, u_linearAnyClampSampler, uv, 0.0));
+	const Vec3 positionCenter = unproject(uvToNdc(uv), depthCenter);
+	const Vec3 normalCenter = unpackNormalFromGBuffer(g_gbuffer2Tex.SampleLevel(g_linearAnyClampSampler, uv, 0.0));
 
 
-	F32 shadowFactors[kMaxRtShadowLayers];
-	unpackRtShadows(textureLod(u_inTex, u_nearestAnyClampSampler, uv, 0.0), shadowFactors);
+	RF32 shadowFactors[kMaxRtShadowLayers];
+	unpackRtShadows(g_inTex.SampleLevel(g_nearestAnyClampSampler, uv, 0.0), shadowFactors);
 
 
 	// Decide the amount of blurring
 	// Decide the amount of blurring
 	const F32 varianceCenter = computeVarianceCenter(uv);
 	const F32 varianceCenter = computeVarianceCenter(uv);
 	const F32 historyLength =
 	const F32 historyLength =
-		textureLod(u_historyLengthTex, u_linearAnyClampSampler, uv, 0.0).x * kRtShadowsMaxHistoryLength;
+		g_historyLengthTex.SampleLevel(g_linearAnyClampSampler, uv, 0.0).x * kRtShadowsMaxHistoryLength;
 
 
 	U32 sampleCount;
 	U32 sampleCount;
 	if(historyLength < 2.0)
 	if(historyLength < 2.0)
@@ -106,7 +105,7 @@ void main()
 		F32 blur = varianceCenter * 100.0;
 		F32 blur = varianceCenter * 100.0;
 		blur = min(1.0, blur);
 		blur = min(1.0, blur);
 
 
-		const F32 sampleCountf = mix(F32(kMinSampleCount), F32(kMaxSampleCount), blur);
+		const F32 sampleCountf = lerp(F32(kMinSampleCount), F32(kMaxSampleCount), blur);
 
 
 		sampleCount = U32(sampleCountf);
 		sampleCount = U32(sampleCountf);
 	}
 	}
@@ -125,13 +124,13 @@ void main()
 		const Vec2 sampleUv = Vec2(uv.x, uv.y + F32(i) * texelSize.y);
 		const Vec2 sampleUv = Vec2(uv.x, uv.y + F32(i) * texelSize.y);
 #endif
 #endif
 
 
-		F32 localShadowFactors[kMaxRtShadowLayers];
-		unpackRtShadows(textureLod(u_inTex, u_nearestAnyClampSampler, sampleUv, 0.0), localShadowFactors);
+		RF32 localShadowFactors[kMaxRtShadowLayers];
+		unpackRtShadows(g_inTex.SampleLevel(g_nearestAnyClampSampler, sampleUv, 0.0), localShadowFactors);
 
 
-		const F32 depthTap = textureLod(u_depthTex, u_linearAnyClampSampler, sampleUv, 0.0).r;
-		const Vec3 positionTap = unproject(UV_TO_NDC(sampleUv), depthTap);
+		const F32 depthTap = g_depthTex.SampleLevel(g_linearAnyClampSampler, sampleUv, 0.0).r;
+		const Vec3 positionTap = unproject(uvToNdc(sampleUv), depthTap);
 		const Vec3 normalTap =
 		const Vec3 normalTap =
-			unpackNormalFromGBuffer(textureLod(u_gbuffer2Tex, u_linearAnyClampSampler, sampleUv, 0.0));
+			unpackNormalFromGBuffer(g_gbuffer2Tex.SampleLevel(g_linearAnyClampSampler, sampleUv, 0.0));
 
 
 		// F32 w = calculateBilateralWeighPlane(depthCenter, depthTap, 1.0);
 		// F32 w = calculateBilateralWeighPlane(depthCenter, depthTap, 1.0);
 		F32 w = calculateBilateralWeightPlane(positionCenter, normalCenter, positionTap, normalTap, 1.0);
 		F32 w = calculateBilateralWeightPlane(positionCenter, normalCenter, positionTap, normalTap, 1.0);
@@ -152,7 +151,7 @@ void main()
 		shadowFactors[i] /= weight;
 		shadowFactors[i] /= weight;
 	}
 	}
 
 
-	imageStore(u_outImg, IVec2(gl_GlobalInvocationID.xy), packRtShadows(shadowFactors));
+	g_outImg[svDispatchThreadId.xy] = packRtShadows(shadowFactors);
 }
 }
 
 
 #pragma anki end
 #pragma anki end

+ 44 - 42
AnKi/Shaders/RtShadowsSvgfAtrous.ankiprog

@@ -3,48 +3,44 @@
 // Code licensed under the BSD License.
 // Code licensed under the BSD License.
 // http://www.anki3d.org/LICENSE
 // http://www.anki3d.org/LICENSE
 
 
+#pragma anki hlsl
+
 #pragma anki mutator LAST_PASS 0 1
 #pragma anki mutator LAST_PASS 0 1
 
 
 #pragma anki start comp
 #pragma anki start comp
 
 
-#include <AnKi/Shaders/RtShadows.glsl>
-#include <AnKi/Shaders/BilateralFilter.glsl>
-#include <AnKi/Shaders/Functions.glsl>
+#include <AnKi/Shaders/RtShadows.hlsl>
+#include <AnKi/Shaders/BilateralFilter.hlsl>
+#include <AnKi/Shaders/Functions.hlsl>
 
 
 ANKI_SPECIALIZATION_CONSTANT_UVEC2(kFramebufferSize, 0u);
 ANKI_SPECIALIZATION_CONSTANT_UVEC2(kFramebufferSize, 0u);
 
 
-const UVec2 kWorkgroupSize = UVec2(8, 8);
-layout(local_size_x = kWorkgroupSize.x, local_size_y = kWorkgroupSize.y, local_size_z = 1) in;
-
-layout(set = 0, binding = 0) uniform sampler u_nearestAnyClampSampler;
-layout(set = 0, binding = 1) uniform sampler u_linearAnyClampSampler;
-layout(set = 0, binding = 2) uniform texture2D u_depthTex;
-layout(set = 0, binding = 3) uniform utexture2D u_shadowsTex;
-layout(set = 0, binding = 4) uniform texture2D u_varianceTex;
+[[vk::binding(0)]] SamplerState g_nearestAnyClampSampler;
+[[vk::binding(1)]] SamplerState g_linearAnyClampSampler;
+[[vk::binding(2)]] Texture2D g_depthTex;
+[[vk::binding(3)]] Texture2D<UVec4> g_shadowsTex;
+[[vk::binding(4)]] Texture2D g_varianceTex;
 
 
-layout(set = 0, binding = 5) uniform uimage2D u_shadowsImage;
+[[vk::binding(5)]] RWTexture2D<UVec4> g_shadowsImage;
 #if !LAST_PASS
 #if !LAST_PASS
-layout(set = 0, binding = 6) uniform image2D u_varianceImage;
+[[vk::binding(6)]] RWTexture2D<Vec4> g_varianceImage;
 #endif
 #endif
 
 
-layout(push_constant, row_major, std140) uniform b_pc
-{
-	Mat4 u_invProjMat;
-};
+[[vk::push_constant]] ConstantBuffer<Mat4> g_invProjMat;
 
 
-const I32 kConfolutionRadius = 2;
-const F32 kKernelWeights[kConfolutionRadius + 1] = F32[3](1.0, 2.0 / 3.0, 1.0 / 6.0);
+constexpr I32 kConfolutionRadius = 2;
+constexpr F32 kKernelWeights[kConfolutionRadius + 1] = {1.0, 2.0 / 3.0, 1.0 / 6.0};
 
 
 Vec3 toViewspace(Vec2 uv, F32 depth)
 Vec3 toViewspace(Vec2 uv, F32 depth)
 {
 {
-	const Vec4 pos4 = u_invProjMat * Vec4(UV_TO_NDC(uv), depth, 1.0);
+	const Vec4 pos4 = mul(g_invProjMat, Vec4(uvToNdc(uv), depth, 1.0));
 	const Vec3 pos = pos4.xyz / pos4.w;
 	const Vec3 pos = pos4.xyz / pos4.w;
 	return pos;
 	return pos;
 }
 }
 
 
-F32 computeShadowsLuma(F32 shadowLayers[kMaxRtShadowLayers])
+F32 computeShadowsLuma(RF32 shadowLayers[kMaxRtShadowLayers])
 {
 {
-	F32 l = 0.0;
+	RF32 l = 0.0;
 	[unroll] for(U32 i = 0u; i < kMaxRtShadowLayers; ++i)
 	[unroll] for(U32 i = 0u; i < kMaxRtShadowLayers; ++i)
 	{
 	{
 		l += shadowLayers[i];
 		l += shadowLayers[i];
@@ -56,7 +52,10 @@ F32 computeVarianceCenter(Vec2 uv)
 {
 {
 	const F32 kernel[2][2] = {{1.0 / 4.0, 1.0 / 8.0}, {1.0 / 8.0, 1.0 / 16.0}};
 	const F32 kernel[2][2] = {{1.0 / 4.0, 1.0 / 8.0}, {1.0 / 8.0, 1.0 / 16.0}};
 	const I32 radius = 1;
 	const I32 radius = 1;
-	const Vec2 texelSize = 1.0 / Vec2(textureSize(u_varianceTex, 0).xy);
+	Vec2 texSize;
+	F32 mipCount;
+	g_varianceTex.GetDimensions(0, texSize.x, texSize.y, mipCount);
+	const Vec2 texelSize = 1.0 / texSize;
 	F32 sum = 0.0;
 	F32 sum = 0.0;
 
 
 	for(I32 yy = -radius; yy <= radius; yy++)
 	for(I32 yy = -radius; yy <= radius; yy++)
@@ -65,29 +64,29 @@ F32 computeVarianceCenter(Vec2 uv)
 		{
 		{
 			const Vec2 newUv = uv + Vec2(xx, yy) * texelSize;
 			const Vec2 newUv = uv + Vec2(xx, yy) * texelSize;
 			const F32 k = kernel[abs(xx)][abs(yy)];
 			const F32 k = kernel[abs(xx)][abs(yy)];
-			sum += textureLod(u_varianceTex, u_linearAnyClampSampler, newUv, 0.0).r * k;
+			sum += g_varianceTex.SampleLevel(g_linearAnyClampSampler, newUv, 0.0).r * k;
 		}
 		}
 	}
 	}
 
 
 	return sum;
 	return sum;
 }
 }
 
 
-void main()
+[numthreads(8, 8, 1)] void main(UVec3 svDispatchThreadId : SV_DISPATCHTHREADID)
 {
 {
-	if(skipOutOfBoundsInvocations(kWorkgroupSize, kFramebufferSize))
+	if(any(svDispatchThreadId.xy >= kFramebufferSize))
 	{
 	{
 		return;
 		return;
 	}
 	}
 
 
-	const Vec2 uv = (Vec2(gl_GlobalInvocationID.xy) + 0.5) / Vec2(kFramebufferSize);
+	const Vec2 uv = (Vec2(svDispatchThreadId.xy) + 0.5) / Vec2(kFramebufferSize);
 
 
-	const F32 depth = textureLod(u_depthTex, u_linearAnyClampSampler, uv, 0.0).r;
+	const F32 depth = g_depthTex.SampleLevel(g_linearAnyClampSampler, uv, 0.0).r;
 	if(depth == 1.0)
 	if(depth == 1.0)
 	{
 	{
 		// Sky
 		// Sky
-		imageStore(u_shadowsImage, IVec2(gl_GlobalInvocationID), UVec4(0));
+		g_shadowsImage[svDispatchThreadId.xy] = 0u;
 #if !LAST_PASS
 #if !LAST_PASS
-		imageStore(u_varianceImage, IVec2(gl_GlobalInvocationID), Vec4(0.0));
+		g_varianceImage[svDispatchThreadId.xy] = 0.0f;
 #endif
 #endif
 		return;
 		return;
 	}
 	}
@@ -97,21 +96,24 @@ void main()
 	const Vec3 positionCenter = toViewspace(uv, depthCenter);
 	const Vec3 positionCenter = toViewspace(uv, depthCenter);
 
 
 	// Read center luma
 	// Read center luma
-	F32 shadowLayers[kMaxRtShadowLayers];
-	unpackRtShadows(textureLod(u_shadowsTex, u_nearestAnyClampSampler, uv, 0.0), shadowLayers);
+	RF32 shadowLayers[kMaxRtShadowLayers];
+	unpackRtShadows(g_shadowsTex.SampleLevel(g_nearestAnyClampSampler, uv, 0.0), shadowLayers);
 	const F32 refLuma = computeShadowsLuma(shadowLayers);
 	const F32 refLuma = computeShadowsLuma(shadowLayers);
 
 
 	// Center variance
 	// Center variance
 	const F32 varianceCenter = computeVarianceCenter(uv);
 	const F32 varianceCenter = computeVarianceCenter(uv);
 
 
 	// Init the sums
 	// Init the sums
-	F32 sumShadowLayers[kMaxRtShadowLayers];
+	RF32 sumShadowLayers[kMaxRtShadowLayers];
 	zeroRtShadowLayers(sumShadowLayers);
 	zeroRtShadowLayers(sumShadowLayers);
 	F32 sumVariance = 0.0;
 	F32 sumVariance = 0.0;
 	F32 sumWeight = 0.0;
 	F32 sumWeight = 0.0;
 
 
 	// Convolve
 	// Convolve
-	const Vec2 texelSize = 1.0 / Vec2(textureSize(u_shadowsTex, 0).xy);
+	Vec2 texSize;
+	F32 mipCount;
+	g_shadowsTex.GetDimensions(0u, texSize.x, texSize.y, mipCount);
+	const Vec2 texelSize = 1.0 / texSize;
 	for(I32 offsetx = -kConfolutionRadius; offsetx <= kConfolutionRadius; offsetx++)
 	for(I32 offsetx = -kConfolutionRadius; offsetx <= kConfolutionRadius; offsetx++)
 	{
 	{
 		for(I32 offsety = -kConfolutionRadius; offsety <= kConfolutionRadius; offsety++)
 		for(I32 offsety = -kConfolutionRadius; offsety <= kConfolutionRadius; offsety++)
@@ -119,18 +121,18 @@ void main()
 			const Vec2 sampleUv = uv + Vec2(offsetx, offsety) * texelSize;
 			const Vec2 sampleUv = uv + Vec2(offsetx, offsety) * texelSize;
 
 
 			// Read shadows
 			// Read shadows
-			F32 shadowLayers[kMaxRtShadowLayers];
-			unpackRtShadows(textureLod(u_shadowsTex, u_nearestAnyClampSampler, sampleUv, 0.0), shadowLayers);
+			RF32 shadowLayers[kMaxRtShadowLayers];
+			unpackRtShadows(g_shadowsTex.SampleLevel(g_nearestAnyClampSampler, sampleUv, 0.0), shadowLayers);
 
 
 			// Compute luma weight
 			// Compute luma weight
 			const F32 luma = computeShadowsLuma(shadowLayers);
 			const F32 luma = computeShadowsLuma(shadowLayers);
-			const F32 variance = textureLod(u_varianceTex, u_linearAnyClampSampler, sampleUv, 0.0).x;
+			const F32 variance = g_varianceTex.SampleLevel(g_linearAnyClampSampler, sampleUv, 0.0).x;
 			const F32 sigmaL = 4.0;
 			const F32 sigmaL = 4.0;
 			const F32 lumaDiff = abs(luma - refLuma);
 			const F32 lumaDiff = abs(luma - refLuma);
-			const F32 wl = min(1.0, exp(-lumaDiff / (sigmaL * sqrt(varianceCenter + 0.001) + kEpsilonf)));
+			const F32 wl = min(1.0, exp(-lumaDiff / (sigmaL * sqrt(varianceCenter + 0.001) + kEpsilonF32)));
 
 
 			// Set the current sample
 			// Set the current sample
-			const F32 depthTap = textureLod(u_depthTex, u_linearAnyClampSampler, sampleUv, 0.0).r;
+			const F32 depthTap = g_depthTex.SampleLevel(g_linearAnyClampSampler, sampleUv, 0.0).r;
 			const Vec3 positionTap = toViewspace(sampleUv, depthTap);
 			const Vec3 positionTap = toViewspace(sampleUv, depthTap);
 
 
 			// Do bilateral
 			// Do bilateral
@@ -152,7 +154,7 @@ void main()
 	}
 	}
 
 
 	// Normalize
 	// Normalize
-	sumWeight += kEpsilonf;
+	sumWeight += kEpsilonF32;
 
 
 	[unroll] for(U32 i = 0u; i < kMaxRtShadowLayers; ++i)
 	[unroll] for(U32 i = 0u; i < kMaxRtShadowLayers; ++i)
 	{
 	{
@@ -162,9 +164,9 @@ void main()
 	sumVariance /= (sumWeight * sumWeight);
 	sumVariance /= (sumWeight * sumWeight);
 
 
 	// Store
 	// Store
-	imageStore(u_shadowsImage, IVec2(gl_GlobalInvocationID), packRtShadows(sumShadowLayers));
+	g_shadowsImage[svDispatchThreadId.xy] = packRtShadows(sumShadowLayers);
 #if !LAST_PASS
 #if !LAST_PASS
-	imageStore(u_varianceImage, IVec2(gl_GlobalInvocationID), Vec4(sumVariance, 0.0, 0.0, 0.0));
+	g_varianceImage[svDispatchThreadId.xy] = Vec4(sumVariance, 0.0, 0.0, 0.0);
 #endif
 #endif
 }
 }
 #pragma anki end
 #pragma anki end

+ 34 - 38
AnKi/Shaders/RtShadowsSvgfVariance.ankiprog

@@ -3,60 +3,56 @@
 // Code licensed under the BSD License.
 // Code licensed under the BSD License.
 // http://www.anki3d.org/LICENSE
 // http://www.anki3d.org/LICENSE
 
 
+#pragma anki hlsl
+
 #pragma anki start comp
 #pragma anki start comp
 
 
-#include <AnKi/Shaders/RtShadows.glsl>
-#include <AnKi/Shaders/BilateralFilter.glsl>
-#include <AnKi/Shaders/Functions.glsl>
+#include <AnKi/Shaders/RtShadows.hlsl>
+#include <AnKi/Shaders/BilateralFilter.hlsl>
+#include <AnKi/Shaders/Functions.hlsl>
 
 
 ANKI_SPECIALIZATION_CONSTANT_UVEC2(kFramebufferSize, 0u);
 ANKI_SPECIALIZATION_CONSTANT_UVEC2(kFramebufferSize, 0u);
 
 
-const UVec2 kWorkgroupSize = UVec2(8, 8);
-layout(local_size_x = kWorkgroupSize.x, local_size_y = kWorkgroupSize.y, local_size_z = 1) in;
-
-layout(set = 0, binding = 0) uniform sampler u_nearestAnyClampSampler;
-layout(set = 0, binding = 1) uniform sampler u_linearAnyClampSampler;
-layout(set = 0, binding = 2) uniform utexture2D u_shadowsTex;
-layout(set = 0, binding = 3) uniform texture2D u_momentsTex;
-layout(set = 0, binding = 4) uniform texture2D u_historyLengthTex;
-layout(set = 0, binding = 5) uniform texture2D u_depthTex;
+[[vk::binding(0)]] SamplerState g_nearestAnyClampSampler;
+[[vk::binding(1)]] SamplerState g_linearAnyClampSampler;
+[[vk::binding(2)]] Texture2D<UVec4> g_shadowsTex;
+[[vk::binding(3)]] Texture2D g_momentsTex;
+[[vk::binding(4)]] Texture2D g_historyLengthTex;
+[[vk::binding(5)]] Texture2D g_depthTex;
 
 
-layout(set = 0, binding = 6) uniform uimage2D u_shadowsImage;
-layout(set = 0, binding = 7) uniform image2D u_varianceImage;
+[[vk::binding(6)]] RWTexture2D<UVec4> g_shadowsImage;
+[[vk::binding(7)]] RWTexture2D<Vec4> g_varianceImage;
 
 
-layout(push_constant, row_major, std140) uniform b_pc
-{
-	Mat4 u_invProjMat;
-};
+[[vk::push_constant]] ConstantBuffer<Mat4> g_invProjMat;
 
 
-const I32 kConvolutionRadius = 1;
+constexpr I32 kConvolutionRadius = 1;
 
 
 Vec3 toViewspace(Vec2 uv, F32 depth)
 Vec3 toViewspace(Vec2 uv, F32 depth)
 {
 {
-	const Vec4 pos4 = u_invProjMat * Vec4(UV_TO_NDC(uv), depth, 1.0);
+	const Vec4 pos4 = mul(g_invProjMat, Vec4(uvToNdc(uv), depth, 1.0));
 	const Vec3 pos = pos4.xyz / pos4.w;
 	const Vec3 pos = pos4.xyz / pos4.w;
 	return pos;
 	return pos;
 }
 }
 
 
-void main()
+[numthreads(8, 8, 1)] void main(UVec3 svDispatchThreadId : SV_DISPATCHTHREADID)
 {
 {
-	if(skipOutOfBoundsInvocations(kWorkgroupSize, kFramebufferSize))
+	if(any(svDispatchThreadId >= kFramebufferSize))
 	{
 	{
 		return;
 		return;
 	}
 	}
 
 
-	const Vec2 uv = (Vec2(gl_GlobalInvocationID.xy) + 0.5) / Vec2(kFramebufferSize);
+	const Vec2 uv = (Vec2(svDispatchThreadId.xy) + 0.5) / Vec2(kFramebufferSize);
 
 
-	const F32 depth = textureLod(u_depthTex, u_linearAnyClampSampler, uv, 0.0).r;
+	const F32 depth = g_depthTex.SampleLevel(g_linearAnyClampSampler, uv, 0.0).r;
 	if(depth == 1.0)
 	if(depth == 1.0)
 	{
 	{
 		// Sky
 		// Sky
-		imageStore(u_shadowsImage, IVec2(gl_GlobalInvocationID), UVec4(0));
-		imageStore(u_varianceImage, IVec2(gl_GlobalInvocationID), Vec4(0.0));
+		g_shadowsImage[svDispatchThreadId.xy] = 0;
+		g_varianceImage[svDispatchThreadId.xy] = 0.0f;
 		return;
 		return;
 	}
 	}
 
 
-	const F32 historyLength = textureLod(u_historyLengthTex, u_linearAnyClampSampler, uv, 0.0).r;
+	const F32 historyLength = g_historyLengthTex.SampleLevel(g_linearAnyClampSampler, uv, 0.0).r;
 
 
 	UVec4 outPackedShadowLayers;
 	UVec4 outPackedShadowLayers;
 	F32 outVariance;
 	F32 outVariance;
@@ -72,9 +68,9 @@ void main()
 		const Vec3 positionCenter = toViewspace(uv, depthCenter);
 		const Vec3 positionCenter = toViewspace(uv, depthCenter);
 
 
 		// Init the sums
 		// Init the sums
-		Vec2 sumMoments = Vec2(0.0);
+		Vec2 sumMoments = 0.0f;
 		F32 sumWeight = 0.0;
 		F32 sumWeight = 0.0;
-		F32 sumShadowLayers[kMaxRtShadowLayers];
+		RF32 sumShadowLayers[kMaxRtShadowLayers];
 		zeroRtShadowLayers(sumShadowLayers);
 		zeroRtShadowLayers(sumShadowLayers);
 
 
 		// Convolve
 		// Convolve
@@ -85,18 +81,18 @@ void main()
 				const Vec2 sampleUv = uv + Vec2(offsetx, offsety) * texelSize;
 				const Vec2 sampleUv = uv + Vec2(offsetx, offsety) * texelSize;
 
 
 				// Set the current sample
 				// Set the current sample
-				const F32 depthTap = textureLod(u_depthTex, u_linearAnyClampSampler, sampleUv, 0.0).r;
+				const F32 depthTap = g_depthTex.SampleLevel(g_linearAnyClampSampler, sampleUv, 0.0).r;
 				const Vec3 positionTap = toViewspace(sampleUv, depthTap);
 				const Vec3 positionTap = toViewspace(sampleUv, depthTap);
 
 
 				// Do bilateral
 				// Do bilateral
 				const F32 w = calculateBilateralWeightViewspacePosition(positionCenter, positionTap, 0.5);
 				const F32 w = calculateBilateralWeightViewspacePosition(positionCenter, positionTap, 0.5);
 
 
 				// Sum
 				// Sum
-				const Vec2 moments = textureLod(u_momentsTex, u_linearAnyClampSampler, sampleUv, 0.0).xy;
+				const Vec2 moments = g_momentsTex.SampleLevel(g_linearAnyClampSampler, sampleUv, 0.0).xy;
 				sumMoments += moments * w;
 				sumMoments += moments * w;
 
 
-				F32 shadowLayers[kMaxRtShadowLayers];
-				unpackRtShadows(textureLod(u_shadowsTex, u_nearestAnyClampSampler, sampleUv, 0.0), shadowLayers);
+				RF32 shadowLayers[kMaxRtShadowLayers];
+				unpackRtShadows(g_shadowsTex.SampleLevel(g_nearestAnyClampSampler, sampleUv, 0.0), shadowLayers);
 				[unroll] for(U32 i = 0u; i < kMaxRtShadowLayers; ++i)
 				[unroll] for(U32 i = 0u; i < kMaxRtShadowLayers; ++i)
 				{
 				{
 					sumShadowLayers[i] += shadowLayers[i] * w;
 					sumShadowLayers[i] += shadowLayers[i] * w;
@@ -106,7 +102,7 @@ void main()
 			}
 			}
 		}
 		}
 
 
-		sumWeight += kEpsilonf;
+		sumWeight += kEpsilonF32;
 
 
 		[unroll] for(U32 i = 0u; i < kMaxRtShadowLayers; ++i)
 		[unroll] for(U32 i = 0u; i < kMaxRtShadowLayers; ++i)
 		{
 		{
@@ -125,14 +121,14 @@ void main()
 	{
 	{
 		// Stable for more that 4 frames, passthrough
 		// Stable for more that 4 frames, passthrough
 
 
-		outPackedShadowLayers = textureLod(u_shadowsTex, u_nearestAnyClampSampler, uv, 0.0);
+		outPackedShadowLayers = g_shadowsTex.SampleLevel(g_nearestAnyClampSampler, uv, 0.0);
 
 
-		const Vec2 moments = textureLod(u_momentsTex, u_linearAnyClampSampler, uv, 0.0).xy;
+		const Vec2 moments = g_momentsTex.SampleLevel(g_linearAnyClampSampler, uv, 0.0).xy;
 		outVariance = max(0.0, moments.y - moments.x * moments.x);
 		outVariance = max(0.0, moments.y - moments.x * moments.x);
 	}
 	}
 
 
 	// Store
 	// Store
-	imageStore(u_shadowsImage, IVec2(gl_GlobalInvocationID), outPackedShadowLayers);
-	imageStore(u_varianceImage, IVec2(gl_GlobalInvocationID), Vec4(outVariance, 0.0, 0.0, 0.0));
+	g_shadowsImage[svDispatchThreadId.xy] = outPackedShadowLayers;
+	g_varianceImage[svDispatchThreadId.xy] = Vec4(outVariance, 0.0, 0.0, 0.0);
 }
 }
 #pragma anki end
 #pragma anki end

+ 21 - 22
AnKi/Shaders/RtShadowsUpscale.ankiprog

@@ -3,54 +3,53 @@
 // Code licensed under the BSD License.
 // Code licensed under the BSD License.
 // http://www.anki3d.org/LICENSE
 // http://www.anki3d.org/LICENSE
 
 
+#pragma anki hlsl
+
 #pragma anki start comp
 #pragma anki start comp
 
 
-#include <AnKi/Shaders/RtShadows.glsl>
-#include <AnKi/Shaders/Functions.glsl>
-#include <AnKi/Shaders/BilateralFilter.glsl>
+#include <AnKi/Shaders/RtShadows.hlsl>
+#include <AnKi/Shaders/Functions.hlsl>
+#include <AnKi/Shaders/BilateralFilter.hlsl>
 
 
 ANKI_SPECIALIZATION_CONSTANT_UVEC2(kOutImageSize, 0u);
 ANKI_SPECIALIZATION_CONSTANT_UVEC2(kOutImageSize, 0u);
 
 
-const UVec2 kWorkgroupSize = UVec2(8u, 8u);
-layout(local_size_x = kWorkgroupSize.x, local_size_y = kWorkgroupSize.y, local_size_z = 1) in;
-
-layout(set = 0, binding = 0) uniform sampler u_nearestAnyClampSampler;
-layout(set = 0, binding = 1) uniform sampler u_linearAnyClampSampler;
-layout(set = 0, binding = 2) uniform utexture2D u_quarterShadowsTex;
-layout(set = 0, binding = 3) uniform uimage2D u_fullShadowsImage;
-layout(set = 0, binding = 4) uniform texture2D u_quarterDepthTex;
-layout(set = 0, binding = 5) uniform texture2D u_fullDepthTex;
+[[vk::binding(0)]] SamplerState g_nearestAnyClampSampler;
+[[vk::binding(1)]] SamplerState g_linearAnyClampSampler;
+[[vk::binding(2)]] Texture2D<UVec4> g_quarterShadowsTex;
+[[vk::binding(3)]] RWTexture2D<UVec4> g_fullShadowsImage;
+[[vk::binding(4)]] Texture2D g_quarterDepthTex;
+[[vk::binding(5)]] Texture2D g_fullDepthTex;
 
 
-void main()
+[numthreads(8, 8, 1)] void main(UVec3 svDispatchThreadId : SV_DISPATCHTHREADID)
 {
 {
-	if(skipOutOfBoundsInvocations(kWorkgroupSize, kOutImageSize))
+	if(any(svDispatchThreadId.xy >= kOutImageSize))
 	{
 	{
 		return;
 		return;
 	}
 	}
 
 
-	const Vec2 uv = (Vec2(gl_GlobalInvocationID.xy) + 0.5) / Vec2(kOutImageSize);
+	const Vec2 uv = (Vec2(svDispatchThreadId.xy) + 0.5) / Vec2(kOutImageSize);
 
 
 	// Reference
 	// Reference
-	const F32 depthCenter = textureLod(u_fullDepthTex, u_linearAnyClampSampler, uv, 0.0).x;
+	const F32 depthCenter = g_fullDepthTex.SampleLevel(g_linearAnyClampSampler, uv, 0.0).x;
 
 
-	F32 sumShadowLayers[kMaxRtShadowLayers];
+	RF32 sumShadowLayers[kMaxRtShadowLayers];
 	zeroRtShadowLayers(sumShadowLayers);
 	zeroRtShadowLayers(sumShadowLayers);
 
 
 	// Do a bilateral upscale
 	// Do a bilateral upscale
 	const Vec2 texelSize = 1.0 / Vec2(kOutImageSize / 2u);
 	const Vec2 texelSize = 1.0 / Vec2(kOutImageSize / 2u);
 	const I32 radius = 1;
 	const I32 radius = 1;
-	F32 sumWeight = kEpsilonf;
+	F32 sumWeight = kEpsilonF32;
 	for(I32 x = -radius; x <= radius; ++x)
 	for(I32 x = -radius; x <= radius; ++x)
 	{
 	{
 		for(I32 y = -radius; y <= radius; ++y)
 		for(I32 y = -radius; y <= radius; ++y)
 		{
 		{
 			const Vec2 sampleUv = uv + Vec2(x, y) * texelSize;
 			const Vec2 sampleUv = uv + Vec2(x, y) * texelSize;
-			const F32 depthTap = textureLod(u_quarterDepthTex, u_linearAnyClampSampler, sampleUv, 0.0).x;
+			const F32 depthTap = g_quarterDepthTex.SampleLevel(g_linearAnyClampSampler, sampleUv, 0.0).x;
 
 
 			const F32 w = calculateBilateralWeightDepth(depthCenter, depthTap, 1.0);
 			const F32 w = calculateBilateralWeightDepth(depthCenter, depthTap, 1.0);
 
 
-			F32 shadowLayers[kMaxRtShadowLayers];
-			unpackRtShadows(textureLod(u_quarterShadowsTex, u_nearestAnyClampSampler, sampleUv, 0.0), shadowLayers);
+			RF32 shadowLayers[kMaxRtShadowLayers];
+			unpackRtShadows(g_quarterShadowsTex.SampleLevel(g_nearestAnyClampSampler, sampleUv, 0.0), shadowLayers);
 
 
 			for(U32 i = 0u; i < kMaxRtShadowLayers; ++i)
 			for(U32 i = 0u; i < kMaxRtShadowLayers; ++i)
 			{
 			{
@@ -66,7 +65,7 @@ void main()
 		sumShadowLayers[i] /= sumWeight;
 		sumShadowLayers[i] /= sumWeight;
 	}
 	}
 
 
-	imageStore(u_fullShadowsImage, IVec2(gl_GlobalInvocationID.xy), packRtShadows(sumShadowLayers));
+	g_fullShadowsImage[svDispatchThreadId.xy] = packRtShadows(sumShadowLayers);
 }
 }
 
 
 #pragma anki end
 #pragma anki end