|
|
@@ -3,48 +3,44 @@
|
|
|
// Code licensed under the BSD License.
|
|
|
// http://www.anki3d.org/LICENSE
|
|
|
|
|
|
+#pragma anki hlsl
|
|
|
+
|
|
|
#pragma anki mutator LAST_PASS 0 1
|
|
|
|
|
|
#pragma anki start comp
|
|
|
|
|
|
-#include <AnKi/Shaders/RtShadows.glsl>
|
|
|
-#include <AnKi/Shaders/BilateralFilter.glsl>
|
|
|
-#include <AnKi/Shaders/Functions.glsl>
|
|
|
+#include <AnKi/Shaders/RtShadows.hlsl>
|
|
|
+#include <AnKi/Shaders/BilateralFilter.hlsl>
|
|
|
+#include <AnKi/Shaders/Functions.hlsl>
|
|
|
|
|
|
ANKI_SPECIALIZATION_CONSTANT_UVEC2(kFramebufferSize, 0u);
|
|
|
|
|
|
-const UVec2 kWorkgroupSize = UVec2(8, 8);
|
|
|
-layout(local_size_x = kWorkgroupSize.x, local_size_y = kWorkgroupSize.y, local_size_z = 1) in;
|
|
|
-
|
|
|
-layout(set = 0, binding = 0) uniform sampler u_nearestAnyClampSampler;
|
|
|
-layout(set = 0, binding = 1) uniform sampler u_linearAnyClampSampler;
|
|
|
-layout(set = 0, binding = 2) uniform texture2D u_depthTex;
|
|
|
-layout(set = 0, binding = 3) uniform utexture2D u_shadowsTex;
|
|
|
-layout(set = 0, binding = 4) uniform texture2D u_varianceTex;
|
|
|
+[[vk::binding(0)]] SamplerState g_nearestAnyClampSampler;
|
|
|
+[[vk::binding(1)]] SamplerState g_linearAnyClampSampler;
|
|
|
+[[vk::binding(2)]] Texture2D g_depthTex;
|
|
|
+[[vk::binding(3)]] Texture2D<UVec4> g_shadowsTex;
|
|
|
+[[vk::binding(4)]] Texture2D g_varianceTex;
|
|
|
|
|
|
-layout(set = 0, binding = 5) uniform uimage2D u_shadowsImage;
|
|
|
+[[vk::binding(5)]] RWTexture2D<UVec4> g_shadowsImage;
|
|
|
#if !LAST_PASS
|
|
|
-layout(set = 0, binding = 6) uniform image2D u_varianceImage;
|
|
|
+[[vk::binding(6)]] RWTexture2D<Vec4> g_varianceImage;
|
|
|
#endif
|
|
|
|
|
|
-layout(push_constant, row_major, std140) uniform b_pc
|
|
|
-{
|
|
|
- Mat4 u_invProjMat;
|
|
|
-};
|
|
|
+[[vk::push_constant]] ConstantBuffer<Mat4> g_invProjMat;
|
|
|
|
|
|
-const I32 kConfolutionRadius = 2;
|
|
|
-const F32 kKernelWeights[kConfolutionRadius + 1] = F32[3](1.0, 2.0 / 3.0, 1.0 / 6.0);
|
|
|
+constexpr I32 kConfolutionRadius = 2;
|
|
|
+constexpr F32 kKernelWeights[kConfolutionRadius + 1] = {1.0, 2.0 / 3.0, 1.0 / 6.0};
|
|
|
|
|
|
Vec3 toViewspace(Vec2 uv, F32 depth)
|
|
|
{
|
|
|
- const Vec4 pos4 = u_invProjMat * Vec4(UV_TO_NDC(uv), depth, 1.0);
|
|
|
+ const Vec4 pos4 = mul(g_invProjMat, Vec4(uvToNdc(uv), depth, 1.0));
|
|
|
const Vec3 pos = pos4.xyz / pos4.w;
|
|
|
return pos;
|
|
|
}
|
|
|
|
|
|
-F32 computeShadowsLuma(F32 shadowLayers[kMaxRtShadowLayers])
|
|
|
+F32 computeShadowsLuma(RF32 shadowLayers[kMaxRtShadowLayers])
|
|
|
{
|
|
|
- F32 l = 0.0;
|
|
|
+ RF32 l = 0.0;
|
|
|
[unroll] for(U32 i = 0u; i < kMaxRtShadowLayers; ++i)
|
|
|
{
|
|
|
l += shadowLayers[i];
|
|
|
@@ -56,7 +52,10 @@ F32 computeVarianceCenter(Vec2 uv)
|
|
|
{
|
|
|
const F32 kernel[2][2] = {{1.0 / 4.0, 1.0 / 8.0}, {1.0 / 8.0, 1.0 / 16.0}};
|
|
|
const I32 radius = 1;
|
|
|
- const Vec2 texelSize = 1.0 / Vec2(textureSize(u_varianceTex, 0).xy);
|
|
|
+ Vec2 texSize;
|
|
|
+ F32 mipCount;
|
|
|
+ g_varianceTex.GetDimensions(0, texSize.x, texSize.y, mipCount);
|
|
|
+ const Vec2 texelSize = 1.0 / texSize;
|
|
|
F32 sum = 0.0;
|
|
|
|
|
|
for(I32 yy = -radius; yy <= radius; yy++)
|
|
|
@@ -65,29 +64,29 @@ F32 computeVarianceCenter(Vec2 uv)
|
|
|
{
|
|
|
const Vec2 newUv = uv + Vec2(xx, yy) * texelSize;
|
|
|
const F32 k = kernel[abs(xx)][abs(yy)];
|
|
|
- sum += textureLod(u_varianceTex, u_linearAnyClampSampler, newUv, 0.0).r * k;
|
|
|
+ sum += g_varianceTex.SampleLevel(g_linearAnyClampSampler, newUv, 0.0).r * k;
|
|
|
}
|
|
|
}
|
|
|
|
|
|
return sum;
|
|
|
}
|
|
|
|
|
|
-void main()
|
|
|
+[numthreads(8, 8, 1)] void main(UVec3 svDispatchThreadId : SV_DISPATCHTHREADID)
|
|
|
{
|
|
|
- if(skipOutOfBoundsInvocations(kWorkgroupSize, kFramebufferSize))
|
|
|
+ if(any(svDispatchThreadId.xy >= kFramebufferSize))
|
|
|
{
|
|
|
return;
|
|
|
}
|
|
|
|
|
|
- const Vec2 uv = (Vec2(gl_GlobalInvocationID.xy) + 0.5) / Vec2(kFramebufferSize);
|
|
|
+ const Vec2 uv = (Vec2(svDispatchThreadId.xy) + 0.5) / Vec2(kFramebufferSize);
|
|
|
|
|
|
- const F32 depth = textureLod(u_depthTex, u_linearAnyClampSampler, uv, 0.0).r;
|
|
|
+ const F32 depth = g_depthTex.SampleLevel(g_linearAnyClampSampler, uv, 0.0).r;
|
|
|
if(depth == 1.0)
|
|
|
{
|
|
|
// Sky
|
|
|
- imageStore(u_shadowsImage, IVec2(gl_GlobalInvocationID), UVec4(0));
|
|
|
+ g_shadowsImage[svDispatchThreadId.xy] = 0u;
|
|
|
#if !LAST_PASS
|
|
|
- imageStore(u_varianceImage, IVec2(gl_GlobalInvocationID), Vec4(0.0));
|
|
|
+ g_varianceImage[svDispatchThreadId.xy] = 0.0f;
|
|
|
#endif
|
|
|
return;
|
|
|
}
|
|
|
@@ -97,21 +96,24 @@ void main()
|
|
|
const Vec3 positionCenter = toViewspace(uv, depthCenter);
|
|
|
|
|
|
// Read center luma
|
|
|
- F32 shadowLayers[kMaxRtShadowLayers];
|
|
|
- unpackRtShadows(textureLod(u_shadowsTex, u_nearestAnyClampSampler, uv, 0.0), shadowLayers);
|
|
|
+ RF32 shadowLayers[kMaxRtShadowLayers];
|
|
|
+ unpackRtShadows(g_shadowsTex.SampleLevel(g_nearestAnyClampSampler, uv, 0.0), shadowLayers);
|
|
|
const F32 refLuma = computeShadowsLuma(shadowLayers);
|
|
|
|
|
|
// Center variance
|
|
|
const F32 varianceCenter = computeVarianceCenter(uv);
|
|
|
|
|
|
// Init the sums
|
|
|
- F32 sumShadowLayers[kMaxRtShadowLayers];
|
|
|
+ RF32 sumShadowLayers[kMaxRtShadowLayers];
|
|
|
zeroRtShadowLayers(sumShadowLayers);
|
|
|
F32 sumVariance = 0.0;
|
|
|
F32 sumWeight = 0.0;
|
|
|
|
|
|
// Convolve
|
|
|
- const Vec2 texelSize = 1.0 / Vec2(textureSize(u_shadowsTex, 0).xy);
|
|
|
+ Vec2 texSize;
|
|
|
+ F32 mipCount;
|
|
|
+ g_shadowsTex.GetDimensions(0u, texSize.x, texSize.y, mipCount);
|
|
|
+ const Vec2 texelSize = 1.0 / texSize;
|
|
|
for(I32 offsetx = -kConfolutionRadius; offsetx <= kConfolutionRadius; offsetx++)
|
|
|
{
|
|
|
for(I32 offsety = -kConfolutionRadius; offsety <= kConfolutionRadius; offsety++)
|
|
|
@@ -119,18 +121,18 @@ void main()
|
|
|
const Vec2 sampleUv = uv + Vec2(offsetx, offsety) * texelSize;
|
|
|
|
|
|
// Read shadows
|
|
|
- F32 shadowLayers[kMaxRtShadowLayers];
|
|
|
- unpackRtShadows(textureLod(u_shadowsTex, u_nearestAnyClampSampler, sampleUv, 0.0), shadowLayers);
|
|
|
+ RF32 shadowLayers[kMaxRtShadowLayers];
|
|
|
+ unpackRtShadows(g_shadowsTex.SampleLevel(g_nearestAnyClampSampler, sampleUv, 0.0), shadowLayers);
|
|
|
|
|
|
// Compute luma weight
|
|
|
const F32 luma = computeShadowsLuma(shadowLayers);
|
|
|
- const F32 variance = textureLod(u_varianceTex, u_linearAnyClampSampler, sampleUv, 0.0).x;
|
|
|
+ const F32 variance = g_varianceTex.SampleLevel(g_linearAnyClampSampler, sampleUv, 0.0).x;
|
|
|
const F32 sigmaL = 4.0;
|
|
|
const F32 lumaDiff = abs(luma - refLuma);
|
|
|
- const F32 wl = min(1.0, exp(-lumaDiff / (sigmaL * sqrt(varianceCenter + 0.001) + kEpsilonf)));
|
|
|
+ const F32 wl = min(1.0, exp(-lumaDiff / (sigmaL * sqrt(varianceCenter + 0.001) + kEpsilonF32)));
|
|
|
|
|
|
// Set the current sample
|
|
|
- const F32 depthTap = textureLod(u_depthTex, u_linearAnyClampSampler, sampleUv, 0.0).r;
|
|
|
+ const F32 depthTap = g_depthTex.SampleLevel(g_linearAnyClampSampler, sampleUv, 0.0).r;
|
|
|
const Vec3 positionTap = toViewspace(sampleUv, depthTap);
|
|
|
|
|
|
// Do bilateral
|
|
|
@@ -152,7 +154,7 @@ void main()
|
|
|
}
|
|
|
|
|
|
// Normalize
|
|
|
- sumWeight += kEpsilonf;
|
|
|
+ sumWeight += kEpsilonF32;
|
|
|
|
|
|
[unroll] for(U32 i = 0u; i < kMaxRtShadowLayers; ++i)
|
|
|
{
|
|
|
@@ -162,9 +164,9 @@ void main()
|
|
|
sumVariance /= (sumWeight * sumWeight);
|
|
|
|
|
|
// Store
|
|
|
- imageStore(u_shadowsImage, IVec2(gl_GlobalInvocationID), packRtShadows(sumShadowLayers));
|
|
|
+ g_shadowsImage[svDispatchThreadId.xy] = packRtShadows(sumShadowLayers);
|
|
|
#if !LAST_PASS
|
|
|
- imageStore(u_varianceImage, IVec2(gl_GlobalInvocationID), Vec4(sumVariance, 0.0, 0.0, 0.0));
|
|
|
+ g_varianceImage[svDispatchThreadId.xy] = Vec4(sumVariance, 0.0, 0.0, 0.0);
|
|
|
#endif
|
|
|
}
|
|
|
#pragma anki end
|