|
|
@@ -5,10 +5,14 @@
|
|
|
|
|
|
// Ground truth ambiend occlusion
|
|
|
|
|
|
-#pragma anki mutator SPATIAL_DENOISE_QUALITY 0 1
|
|
|
+#pragma anki 16bit
|
|
|
+
|
|
|
+#pragma anki mutator SPATIAL_DENOISE_SAMPLE_COUNT 3 5 7 9
|
|
|
+#pragma anki mutator DENOISING_QUARTER_RESOLUTION 0 1
|
|
|
|
|
|
#pragma anki technique Ssao vert pixel comp mutators
|
|
|
-#pragma anki technique SsaoSpatialDenoise vert pixel comp
|
|
|
+#pragma anki technique SsaoSpatialDenoiseHorizontal vert pixel comp
|
|
|
+#pragma anki technique SsaoSpatialDenoiseVertical vert pixel comp
|
|
|
#pragma anki technique SsaoTemporalDenoise vert pixel comp
|
|
|
|
|
|
#include <AnKi/Shaders/QuadVert.hlsl>
|
|
|
@@ -26,15 +30,15 @@
|
|
|
Texture2D<Vec4> g_gbufferRt2 : register(t0);
|
|
|
Texture2D<Vec4> g_depthTex : register(t1);
|
|
|
|
|
|
-Texture2D<RVec4> g_noiseTex : register(t2);
|
|
|
+Texture2D<Vec4> g_noiseTex : register(t2);
|
|
|
SamplerState g_trilinearRepeatSampler : register(s0);
|
|
|
SamplerState g_linearAnyClampSampler : register(s1);
|
|
|
|
|
|
# if ANKI_COMPUTE_SHADER
|
|
|
-RWTexture2D<RVec4> g_bentNormalsAndSsaoStorageTex : register(u0);
|
|
|
+RWTexture2D<Vec4> g_bentNormalsAndSsaoStorageTex : register(u0);
|
|
|
# endif
|
|
|
|
|
|
-ANKI_FAST_CONSTANTS(SsaoConstants, g_consts)
|
|
|
+ConstantBuffer<SsaoConstants> g_consts : register(b0);
|
|
|
|
|
|
Vec3 unproject(Vec2 ndc)
|
|
|
{
|
|
|
@@ -48,105 +52,92 @@ Vec4 project(Vec4 p)
|
|
|
p);
|
|
|
}
|
|
|
|
|
|
-RF32 computeFalloff(RF32 len)
|
|
|
+F16 computeFalloff(F16 len)
|
|
|
{
|
|
|
- return sqrt(1.0f - min(1.0f, len / g_consts.m_radius));
|
|
|
+ return sqrt(1.0 - min(1.0, len / F16(g_consts.m_radius)));
|
|
|
}
|
|
|
|
|
|
-# if ANKI_COMPUTE_SHADER
|
|
|
-[numthreads(8, 8, 1)] void main(UVec2 svDispatchThreadId : SV_DISPATCHTHREADID)
|
|
|
-# else
|
|
|
-RVec4 main(VertOut input) : SV_TARGET0
|
|
|
-# endif
|
|
|
+HVec4 doWork(Vec2 coord)
|
|
|
{
|
|
|
-# if ANKI_COMPUTE_SHADER
|
|
|
- const Vec2 uv = (Vec2(svDispatchThreadId) + 0.5) / g_consts.m_viewportSizef;
|
|
|
-# else
|
|
|
- const UVec2 svDispatchThreadId = input.m_svPosition;
|
|
|
- ANKI_MAYBE_UNUSED(svDispatchThreadId);
|
|
|
- const Vec2 uv = input.m_uv;
|
|
|
-# endif
|
|
|
+ const Vec2 uv = (coord + 0.5) / g_consts.m_viewportSizef;
|
|
|
|
|
|
const Vec2 ndc = uvToNdc(uv);
|
|
|
const F32 depth = g_depthTex.SampleLevel(g_linearAnyClampSampler, uv, 0.0).r;
|
|
|
const Vec3 Pc = cheapPerspectiveUnprojection(g_consts.m_unprojectionParameters, ndc, depth);
|
|
|
- const RVec3 V = normalize(-Pc); // View vector
|
|
|
+ const HVec3 V = normalize(-Pc); // View vector
|
|
|
|
|
|
// Get noise
|
|
|
-# if 0
|
|
|
- Vec2 noiseTexSize;
|
|
|
- g_noiseTex.GetDimensions(noiseTexSize.x, noiseTexSize.y);
|
|
|
- const RVec2 noiseUv = Vec2(g_consts.m_viewportSizef) / noiseTexSize * uv;
|
|
|
- const RVec2 noise2 = animateBlueNoise(g_noiseTex.SampleLevel(g_trilinearRepeatSampler, noiseUv, 0.0).xyz, g_consts.m_frameCount).yx;
|
|
|
+# if 1
|
|
|
+ const HVec2 noise2 = animateBlueNoise(g_noiseTex[UVec2(coord) % 64].xyz, g_consts.m_frameCount).yx;
|
|
|
# else
|
|
|
- const RVec2 noise2 = spatioTemporalNoise(svDispatchThreadId, g_consts.m_frameCount);
|
|
|
+ const HVec2 noise2 = spatioTemporalNoise(coord, g_consts.m_frameCount);
|
|
|
# endif
|
|
|
|
|
|
// Rand slice direction
|
|
|
- const RF32 randAng = noise2.x * kPi;
|
|
|
+ const F16 randAng = noise2.x * kPi;
|
|
|
# if 0
|
|
|
- const RF32 aspect = g_consts.m_viewportSizef.x / g_consts.m_viewportSizef.y;
|
|
|
- const RVec2 dir2d = normalize(Vec2(cos(randAng), sin(randAng)) * Vec2(1.0f, aspect));
|
|
|
+ const F16 aspect = g_consts.m_viewportSizef.x / g_consts.m_viewportSizef.y;
|
|
|
+ const HVec2 dir2d = normalize(Vec2(cos(randAng), sin(randAng)) * Vec2(1.0, aspect));
|
|
|
# else
|
|
|
- const RVec2 dir2d = Vec2(cos(randAng), sin(randAng));
|
|
|
+ const HVec2 dir2d = Vec2(cos(randAng), sin(randAng));
|
|
|
# endif
|
|
|
|
|
|
// Project the view normal to the slice
|
|
|
const Vec3 worldNormal = unpackNormalFromGBuffer(g_gbufferRt2.SampleLevel(g_linearAnyClampSampler, uv, 0.0));
|
|
|
- const RVec3 viewNormal = mul(g_consts.m_viewMat, Vec4(worldNormal, 0.0));
|
|
|
+ const HVec3 viewNormal = mul(g_consts.m_viewMat, Vec4(worldNormal, 0.0));
|
|
|
|
|
|
- const RVec3 directionVec = RVec3(dir2d, 0.0f);
|
|
|
- const RVec3 orthoDirectionVec = directionVec - (dot(directionVec, V) * V);
|
|
|
- const RVec3 axisVec = normalize(cross(orthoDirectionVec, V));
|
|
|
- const RVec3 projectedNormalVec = viewNormal - axisVec * dot(viewNormal, axisVec);
|
|
|
- const RF32 signNorm = (F32)sign(dot(orthoDirectionVec, projectedNormalVec));
|
|
|
- const RF32 projectedNormalVecLength = length(projectedNormalVec);
|
|
|
- const RF32 cosNorm = saturate(dot(projectedNormalVec, V) / projectedNormalVecLength);
|
|
|
- const RF32 n = -signNorm * fastAcos(cosNorm);
|
|
|
+ const HVec3 directionVec = HVec3(dir2d, 0.0);
|
|
|
+ const HVec3 orthoDirectionVec = directionVec - (dot(directionVec, V) * V);
|
|
|
+ const HVec3 axisVec = normalize(cross(orthoDirectionVec, V));
|
|
|
+ const HVec3 projectedNormalVec = viewNormal - axisVec * dot(viewNormal, axisVec);
|
|
|
+ const F16 signNorm = (F32)sign(dot(orthoDirectionVec, projectedNormalVec));
|
|
|
+ const F16 projectedNormalVecLength = length(projectedNormalVec);
|
|
|
+ const F16 cosNorm = saturate(dot(projectedNormalVec, V) / projectedNormalVecLength);
|
|
|
+ const F16 n = -signNorm * fastAcos(cosNorm);
|
|
|
|
|
|
// Find the projected radius
|
|
|
const Vec3 sphereLimit = Pc + Vec3(g_consts.m_radius, 0.0, 0.0);
|
|
|
const Vec4 projSphereLimit = project(Vec4(sphereLimit, 1.0));
|
|
|
const Vec2 projSphereLimit2 = projSphereLimit.xy / projSphereLimit.w;
|
|
|
- const RF32 projRadius = length(projSphereLimit2 - ndc);
|
|
|
+ const F16 projRadius = length(projSphereLimit2 - ndc);
|
|
|
|
|
|
// Compute the inner integral (Slide 54)
|
|
|
const U32 stepCount = max(1u, g_consts.m_sampleCount / 2u);
|
|
|
|
|
|
- const RF32 lowHorizonCos1 = cos(n - kPi / 2.0f);
|
|
|
- const RF32 lowHorizonCos2 = cos(n + kPi / 2.0f);
|
|
|
+ const F16 lowHorizonCos1 = cos(n - kPi / 2.0);
|
|
|
+ const F16 lowHorizonCos2 = cos(n + kPi / 2.0);
|
|
|
|
|
|
- RF32 cosH1 = lowHorizonCos1;
|
|
|
- RF32 cosH2 = lowHorizonCos2;
|
|
|
+ F16 cosH1 = lowHorizonCos1;
|
|
|
+ F16 cosH2 = lowHorizonCos2;
|
|
|
|
|
|
for(U32 i = 0u; i < stepCount; ++i)
|
|
|
{
|
|
|
- const RF32 stepBaseNoise = RF32(i * stepCount) * 0.6180339887498948482;
|
|
|
- const RF32 stepNoise = frac(noise2.y + stepBaseNoise);
|
|
|
- RF32 s = (i + stepNoise) / RF32(stepCount);
|
|
|
+ const F16 stepBaseNoise = F16(i * stepCount) * 0.6180339887498948482;
|
|
|
+ const F16 stepNoise = frac(noise2.y + stepBaseNoise);
|
|
|
+ F16 s = (i + stepNoise) / F16(stepCount);
|
|
|
s *= s;
|
|
|
const Vec2 sampleOffset = dir2d * projRadius * s;
|
|
|
|
|
|
// h1
|
|
|
const Vec3 Ps = unproject(ndc + sampleOffset);
|
|
|
const Vec3 Ds = Ps - Pc;
|
|
|
- const RF32 DsLen = length(Ds);
|
|
|
+ const F16 DsLen = length(Ds);
|
|
|
cosH1 = max(cosH1, lerp(lowHorizonCos1, dot(V, Ds) / DsLen, computeFalloff(DsLen)));
|
|
|
|
|
|
// h2
|
|
|
const Vec3 Pt = unproject(ndc - sampleOffset);
|
|
|
const Vec3 Dt = Pt - Pc;
|
|
|
- const RF32 DtLen = length(Dt);
|
|
|
+ const F16 DtLen = length(Dt);
|
|
|
cosH2 = max(cosH2, lerp(lowHorizonCos2, dot(V, Dt) / DtLen, computeFalloff(DtLen)));
|
|
|
}
|
|
|
|
|
|
// Compute the h1 and h2
|
|
|
- const RF32 h1 = n + max(-fastAcos(cosH1) - n, -kPi / 2);
|
|
|
- const RF32 h2 = n + min(fastAcos(cosH2) - n, kPi / 2);
|
|
|
+ const F16 h1 = n + max(-fastAcos(cosH1) - n, -kPi / 2.0);
|
|
|
+ const F16 h2 = n + min(fastAcos(cosH2) - n, kPi / 2.0);
|
|
|
|
|
|
// Compute the final value (Slide 61)
|
|
|
- RF32 Vd = -cos(2.0f * h1 - n) + cos(n) + 2.0f * h1 * sin(n);
|
|
|
- Vd += -cos(2.0f * h2 - n) + cos(n) + 2.0f * h2 * sin(n);
|
|
|
+ F16 Vd = -cos(2.0 * h1 - n) + cos(n) + 2.0 * h1 * sin(n);
|
|
|
+ Vd += -cos(2.0 * h2 - n) + cos(n) + 2.0 * h2 * sin(n);
|
|
|
Vd *= 0.25;
|
|
|
Vd *= projectedNormalVecLength;
|
|
|
|
|
|
@@ -154,197 +145,186 @@ RVec4 main(VertOut input) : SV_TARGET0
|
|
|
Vd = pow(Vd, g_consts.m_ssaoPower);
|
|
|
|
|
|
// Compute bent normal: see "Algorithm 2 Extension that computes bent normals b."
|
|
|
- const RF32 t0 =
|
|
|
- (6.0f * sin(h1 - n) - sin(3.0f * h1 - n) + 6.0f * sin(h2 - n) - sin(3.0f * h2 - n) + 16.0f * sin(n) - 3.0f * (sin(h1 + n) + sin(h2 + n)))
|
|
|
- / 12.0f;
|
|
|
- const RF32 t1 = (-cos(3.0f * h1 - n) - cos(3.0f * h2 - n) + 8.0f * cos(n) - 3.0f * (cos(h1 + n) + cos(h2 + n))) / 12.0f;
|
|
|
- RVec3 bentNormal = RVec3(-dir2d.x * t0, -dir2d.y * t0, t1);
|
|
|
+ const F16 t0 =
|
|
|
+ (6.0 * sin(h1 - n) - sin(3.0 * h1 - n) + 6.0 * sin(h2 - n) - sin(3.0 * h2 - n) + 16.0 * sin(n) - 3.0 * (sin(h1 + n) + sin(h2 + n))) / 12.0;
|
|
|
+ const F16 t1 = (-cos(3.0 * h1 - n) - cos(3.0 * h2 - n) + 8.0 * cos(n) - 3.0 * (cos(h1 + n) + cos(h2 + n))) / 12.0;
|
|
|
+ HVec3 bentNormal = HVec3(-dir2d.x * t0, -dir2d.y * t0, t1);
|
|
|
bentNormal = normalize(bentNormal);
|
|
|
+ bentNormal = mul(g_consts.m_viewToWorldMat, Vec4(bentNormal, 0.0));
|
|
|
+
|
|
|
+ return HVec4(bentNormal, Vd);
|
|
|
+}
|
|
|
|
|
|
# if ANKI_COMPUTE_SHADER
|
|
|
- g_bentNormalsAndSsaoStorageTex[svDispatchThreadId] = RVec4(bentNormal, Vd);
|
|
|
+[numthreads(64, 1, 1)] void main(COMPUTE_ARGS)
|
|
|
+{
|
|
|
+ const Vec2 coord = getOptimalDispatchThreadId8x8Amd(svGroupIndex, svGroupId.xy);
|
|
|
+ g_bentNormalsAndSsaoStorageTex[coord] = doWork(coord);
|
|
|
+}
|
|
|
# else
|
|
|
- return RVec4(bentNormal, Vd);
|
|
|
-# endif
|
|
|
+Vec4 main(VertOut input) : SV_TARGET0
|
|
|
+{
|
|
|
+ const Vec2 coord = floor(input.m_svPosition.xy);
|
|
|
+ return doWork(coord);
|
|
|
}
|
|
|
-#endif // ANKI_TECHNIQUE_Ssao && (ANKI_COMPUTE_SHADER || ANKI_PIXEL_SHADER)
|
|
|
+# endif
|
|
|
+#endif
|
|
|
|
|
|
// ===========================================================================
|
|
|
-// SSAO spatial denoise =
|
|
|
+// SSAO temporal denoise =
|
|
|
// ===========================================================================
|
|
|
-#if ANKI_TECHNIQUE_SsaoSpatialDenoise && (ANKI_COMPUTE_SHADER || ANKI_PIXEL_SHADER)
|
|
|
-# include <AnKi/Shaders/BilateralFilter.hlsl>
|
|
|
-# include <AnKi/Shaders/Include/MiscRendererTypes.h>
|
|
|
+#if ANKI_TECHNIQUE_SsaoTemporalDenoise && (ANKI_COMPUTE_SHADER || ANKI_PIXEL_SHADER)
|
|
|
# include <AnKi/Shaders/Functions.hlsl>
|
|
|
+# include <AnKi/Shaders/TonemappingFunctions.hlsl>
|
|
|
+# include <AnKi/Shaders/TonemappingFunctions.hlsl>
|
|
|
+# include <AnKi/Shaders/Include/MiscRendererTypes.h>
|
|
|
|
|
|
SamplerState g_linearAnyClampSampler : register(s0);
|
|
|
-Texture2D<RVec4> g_bentNormalsAndSsaoTex : register(t0);
|
|
|
-Texture2D<Vec4> g_depthTex : register(t1);
|
|
|
+
|
|
|
+Texture2D<Vec4> g_bentNormalsAndSsaoTex : register(t0);
|
|
|
+Texture2D<Vec4> g_historyBentNormalsAndSsaoTex : register(t1);
|
|
|
+Texture2D<Vec4> g_motionVectorsTex : register(t2);
|
|
|
+Texture2D<Vec4> g_historyLengthTex : register(t3);
|
|
|
+
|
|
|
+ConstantBuffer<GlobalRendererConstants> g_globalRendererConsts : register(b0);
|
|
|
|
|
|
# if ANKI_COMPUTE_SHADER
|
|
|
-RWTexture2D<RVec4> g_bentNormalsAndSsaoStorageTex : register(u0);
|
|
|
+RWTexture2D<Vec4> g_bentNormalsAndSsaoStorageTex : register(u0);
|
|
|
# endif
|
|
|
|
|
|
-ANKI_FAST_CONSTANTS(SsaoSpatialDenoiseConstants, g_consts)
|
|
|
-
|
|
|
-F32 computeWeight(F32 depth, F32 refDepth)
|
|
|
+HVec4 doWork(Vec2 coord)
|
|
|
{
|
|
|
- const F32 diff = abs(depth - refDepth);
|
|
|
- return sqrt(1.0 / (0.0003 + diff));
|
|
|
-}
|
|
|
+ Vec2 viewport;
|
|
|
+ g_bentNormalsAndSsaoTex.GetDimensions(viewport.x, viewport.y);
|
|
|
+ const Vec2 uv = (coord + 0.5) / viewport;
|
|
|
|
|
|
-void sampleTex(Vec2 uv, IVec2 offset, F32 refDepth, inout RF32 ssao, inout RVec3 bentNormal, inout F32 weight)
|
|
|
-{
|
|
|
- const F32 linearDepth = linearizeDepthOptimal(g_depthTex.SampleLevel(g_linearAnyClampSampler, uv, 0.0, offset).x,
|
|
|
- g_consts.m_linearizeDepthParams.x, g_consts.m_linearizeDepthParams.y);
|
|
|
- const RVec4 bentNormalAndSsao = g_bentNormalsAndSsaoTex.SampleLevel(g_linearAnyClampSampler, uv, 0.0, offset);
|
|
|
- const F32 w = computeWeight(refDepth, linearDepth);
|
|
|
- ssao += bentNormalAndSsao.w * w;
|
|
|
- bentNormal += bentNormalAndSsao.xyz * w;
|
|
|
- weight += w;
|
|
|
-}
|
|
|
+ const F16 minBlendFactor = 0.1;
|
|
|
+ const F16 maxBlendFactor = 0.9;
|
|
|
|
|
|
-# if ANKI_COMPUTE_SHADER
|
|
|
-[numthreads(8, 8, 1)] void main(UVec2 svDispatchThreadId : SV_DISPATCHTHREADID)
|
|
|
-# else
|
|
|
-RVec4 main(VertOut input) : SV_TARGET0
|
|
|
-# endif
|
|
|
-{
|
|
|
-// Set UVs
|
|
|
-# if ANKI_COMPUTE_SHADER
|
|
|
- Vec2 textureSize;
|
|
|
- U32 mipCount;
|
|
|
- g_bentNormalsAndSsaoTex.GetDimensions(0, textureSize.x, textureSize.y, mipCount);
|
|
|
- const Vec2 uv = (Vec2(svDispatchThreadId) + 0.5f) / textureSize;
|
|
|
-# else
|
|
|
- const Vec2 uv = input.m_uv;
|
|
|
-# endif
|
|
|
+ const F16 historyLen = g_historyLengthTex.SampleLevel(g_linearAnyClampSampler, uv, 0.0).x * kMaxHistoryLength;
|
|
|
|
|
|
- // Sample ref
|
|
|
- const RVec4 refBentNormalAndSsao = g_bentNormalsAndSsaoTex.SampleLevel(g_linearAnyClampSampler, uv, 0.0);
|
|
|
- RF32 ssao = refBentNormalAndSsao.w;
|
|
|
- RVec3 bentNormal = refBentNormalAndSsao.xyz;
|
|
|
- const F32 refDepth = linearizeDepthOptimal(g_depthTex.SampleLevel(g_linearAnyClampSampler, uv, 0.0).x, g_consts.m_linearizeDepthParams.x,
|
|
|
- g_consts.m_linearizeDepthParams.y);
|
|
|
- F32 weight = computeWeight(0.0f, 0.0f); // Highest weight that this function can give
|
|
|
-
|
|
|
- // Sample taps
|
|
|
- sampleTex(uv, IVec2(1, 1), refDepth, ssao, bentNormal, weight);
|
|
|
- sampleTex(uv, IVec2(0, 1), refDepth, ssao, bentNormal, weight);
|
|
|
- sampleTex(uv, IVec2(-1, 1), refDepth, ssao, bentNormal, weight);
|
|
|
- sampleTex(uv, IVec2(-1, 0), refDepth, ssao, bentNormal, weight);
|
|
|
- sampleTex(uv, IVec2(-1, -1), refDepth, ssao, bentNormal, weight);
|
|
|
- sampleTex(uv, IVec2(0, -1), refDepth, ssao, bentNormal, weight);
|
|
|
- sampleTex(uv, IVec2(1, -1), refDepth, ssao, bentNormal, weight);
|
|
|
- sampleTex(uv, IVec2(1, 0), refDepth, ssao, bentNormal, weight);
|
|
|
-
|
|
|
-# if SPATIAL_DENOISE_QUALITY == 1
|
|
|
- sampleTex(uv, IVec2(2, 2), refDepth, ssao, bentNormal, weight);
|
|
|
- sampleTex(uv, IVec2(0, 2), refDepth, ssao, bentNormal, weight);
|
|
|
- sampleTex(uv, IVec2(-2, 2), refDepth, ssao, bentNormal, weight);
|
|
|
- sampleTex(uv, IVec2(-2, 0), refDepth, ssao, bentNormal, weight);
|
|
|
- sampleTex(uv, IVec2(-2, -2), refDepth, ssao, bentNormal, weight);
|
|
|
- sampleTex(uv, IVec2(0, -2), refDepth, ssao, bentNormal, weight);
|
|
|
- sampleTex(uv, IVec2(2, -2), refDepth, ssao, bentNormal, weight);
|
|
|
- sampleTex(uv, IVec2(2, 0), refDepth, ssao, bentNormal, weight);
|
|
|
+ F16 blendFactor = min(1.0, historyLen / 1.0);
|
|
|
+ blendFactor = lerp(maxBlendFactor, minBlendFactor, blendFactor);
|
|
|
+
|
|
|
+ Vec2 uv2 = uv;
|
|
|
+# if !DENOISING_QUARTER_RESOLUTION
|
|
|
+ uv2 /= 2.0;
|
|
|
# endif
|
|
|
+ HVec4 outColor = g_bentNormalsAndSsaoTex.SampleLevel(g_linearAnyClampSampler, uv2, 0.0);
|
|
|
+
|
|
|
+ if(blendFactor > maxBlendFactor * 0.9)
|
|
|
+ {
|
|
|
+ // Don't accumulate
|
|
|
+ }
|
|
|
+ else
|
|
|
+ {
|
|
|
+ const Vec2 mv = g_motionVectorsTex.SampleLevel(g_linearAnyClampSampler, uv, 0.0);
|
|
|
+ const Vec2 historyUv =
|
|
|
+ uv + mv
|
|
|
+ + (g_globalRendererConsts.m_previousMatrices.m_jitterOffsetNdc - g_globalRendererConsts.m_matrices.m_jitterOffsetNdc) / Vec2(2.0, -2.0);
|
|
|
|
|
|
- ssao /= weight;
|
|
|
- ssao = saturate(ssao);
|
|
|
+ const HVec4 history = g_historyBentNormalsAndSsaoTex.SampleLevel(g_linearAnyClampSampler, historyUv, 0.0);
|
|
|
|
|
|
- bentNormal /= weight;
|
|
|
- bentNormal = normalize(bentNormal);
|
|
|
- bentNormal = mul(g_consts.m_viewToWorldMat, Vec4(bentNormal, 0.0f));
|
|
|
+ outColor = lerp(history, outColor, blendFactor);
|
|
|
+ outColor.xyz = normalize(outColor.xyz);
|
|
|
+ }
|
|
|
+
|
|
|
+ return outColor;
|
|
|
+}
|
|
|
|
|
|
- // Write value
|
|
|
# if ANKI_COMPUTE_SHADER
|
|
|
- g_bentNormalsAndSsaoStorageTex[svDispatchThreadId] = RVec4(bentNormal, ssao);
|
|
|
+[numthreads(64, 1, 1)] void main(COMPUTE_ARGS)
|
|
|
+{
|
|
|
+ const Vec2 coord = getOptimalDispatchThreadId8x8Amd(svGroupIndex, svGroupId.xy);
|
|
|
+ g_bentNormalsAndSsaoStorageTex[coord] = doWork(coord);
|
|
|
+}
|
|
|
# else
|
|
|
- return RVec4(bentNormal, ssao);
|
|
|
-# endif
|
|
|
+Vec4 main(VertOut input) : SV_TARGET0
|
|
|
+{
|
|
|
+ const Vec2 coord = floor(input.m_svPosition.xy);
|
|
|
+ return doWork(coord);
|
|
|
}
|
|
|
+# endif
|
|
|
#endif
|
|
|
|
|
|
// ===========================================================================
|
|
|
-// SSAO temporal denoise =
|
|
|
+// SSAO spatial denoise =
|
|
|
// ===========================================================================
|
|
|
-#if ANKI_TECHNIQUE_SsaoTemporalDenoise && (ANKI_COMPUTE_SHADER || ANKI_PIXEL_SHADER)
|
|
|
+#if(ANKI_TECHNIQUE_SsaoSpatialDenoiseHorizontal || ANKI_TECHNIQUE_SsaoSpatialDenoiseVertical) && (ANKI_COMPUTE_SHADER || ANKI_PIXEL_SHADER)
|
|
|
+# include <AnKi/Shaders/BilateralFilter.hlsl>
|
|
|
+# include <AnKi/Shaders/Include/MiscRendererTypes.h>
|
|
|
# include <AnKi/Shaders/Functions.hlsl>
|
|
|
-# include <AnKi/Shaders/TonemappingFunctions.hlsl>
|
|
|
+# include <AnKi/Shaders/BilateralFilter.hlsl>
|
|
|
|
|
|
SamplerState g_linearAnyClampSampler : register(s0);
|
|
|
-Texture2D<RVec4> g_bentNormalsAndSsaoTex : register(t0);
|
|
|
-Texture2D<RVec4> g_historyBentNormalsAndSsaoTex : register(t1);
|
|
|
-Texture2D<Vec4> g_motionVectorsTex : register(t2);
|
|
|
+Texture2D<Vec4> g_bentNormalsAndSsaoTex : register(t0);
|
|
|
+Texture2D<Vec4> g_depthTex : register(t1);
|
|
|
|
|
|
# if ANKI_COMPUTE_SHADER
|
|
|
-RWTexture2D<RVec4> g_bentNormalsAndSsaoStorageTex : register(u0);
|
|
|
+RWTexture2D<Vec4> g_bentNormalsAndSsaoStorageTex : register(u0);
|
|
|
# endif
|
|
|
|
|
|
-# if ANKI_COMPUTE_SHADER
|
|
|
-[numthreads(8, 8, 1)] void main(UVec2 svDispatchThreadId : SV_DISPATCHTHREADID)
|
|
|
-# else
|
|
|
-RVec4 main(VertOut input) : SV_TARGET0
|
|
|
-# endif
|
|
|
+F32 depthWeight(F32 refDepth, F32 sampleDepth)
|
|
|
{
|
|
|
-# if ANKI_COMPUTE_SHADER
|
|
|
- Vec2 textureSize;
|
|
|
- U32 mipCount;
|
|
|
- g_bentNormalsAndSsaoTex.GetDimensions(0, textureSize.x, textureSize.y, mipCount);
|
|
|
- const Vec2 uv = (Vec2(svDispatchThreadId) + 0.5f) / textureSize;
|
|
|
-# else
|
|
|
- const Vec2 uv = input.m_uv;
|
|
|
-# endif
|
|
|
+ return calculateBilateralWeightDepth<F32>(refDepth, sampleDepth, 1.0);
|
|
|
+}
|
|
|
|
|
|
- const Vec2 historyUv = uv + g_motionVectorsTex.SampleLevel(g_linearAnyClampSampler, uv, 0.0f).xy;
|
|
|
+HVec4 doWork(Vec2 coord, Bool horizontal)
|
|
|
+{
|
|
|
+ Vec2 viewport;
|
|
|
+ g_bentNormalsAndSsaoTex.GetDimensions(viewport.x, viewport.y);
|
|
|
|
|
|
- // Read textures
|
|
|
- RVec4 history = g_historyBentNormalsAndSsaoTex.SampleLevel(g_linearAnyClampSampler, historyUv, 0.0f);
|
|
|
- RVec4 current = g_bentNormalsAndSsaoTex.SampleLevel(g_linearAnyClampSampler, uv, 0.0f);
|
|
|
+ // Sample ref
|
|
|
+ const F32 refDepth = g_depthTex[coord].x;
|
|
|
+ if(refDepth == 1.0)
|
|
|
+ {
|
|
|
+ return 0.0;
|
|
|
+ }
|
|
|
|
|
|
- // Remove ghosting by clamping the history color to neighbour's AABB
|
|
|
- const RVec4 near0 = g_bentNormalsAndSsaoTex.SampleLevel(g_linearAnyClampSampler, uv, 0, IVec2(1, 0));
|
|
|
- const RVec4 near1 = g_bentNormalsAndSsaoTex.SampleLevel(g_linearAnyClampSampler, uv, 0, IVec2(0, 1));
|
|
|
- const RVec4 near2 = g_bentNormalsAndSsaoTex.SampleLevel(g_linearAnyClampSampler, uv, 0, IVec2(-1, 0));
|
|
|
- const RVec4 near3 = g_bentNormalsAndSsaoTex.SampleLevel(g_linearAnyClampSampler, uv, 0, IVec2(0, -1));
|
|
|
+ F32 weightSum = depthWeight(0.0, 0.0); // Highest weight that this function can give
|
|
|
|
|
|
-# if 0
|
|
|
- const RVec4 boxMin = min(current, min4(near0, near1, near2, near3));
|
|
|
- const RVec4 boxMax = max(current, max4(near0, near1, near2, near3));
|
|
|
-# else
|
|
|
- const RVec4 m1 = current + near0 + near1 + near2 + near3;
|
|
|
- const RVec4 m2 = current * current + near0 * near0 + near1 * near1 + near2 * near2 + near3 * near3;
|
|
|
+ HVec4 bentNormalAndSsao = g_bentNormalsAndSsaoTex[coord];
|
|
|
+ bentNormalAndSsao *= weightSum;
|
|
|
|
|
|
- const RVec4 mu = m1 / 5.0;
|
|
|
- const RVec4 sigma = sqrt(m2 / 5.0 - mu * mu);
|
|
|
+ const F32 halfSampleCount = SPATIAL_DENOISE_SAMPLE_COUNT / 2;
|
|
|
+ for(F32 i = -halfSampleCount; i <= halfSampleCount; i += 1.0)
|
|
|
+ {
|
|
|
+ if(i == 0.0)
|
|
|
+ {
|
|
|
+ continue;
|
|
|
+ }
|
|
|
|
|
|
- const F32 varianceClippingGamma = 1.2f;
|
|
|
- const RVec4 boxMin = mu - varianceClippingGamma * sigma;
|
|
|
- const RVec4 boxMax = mu + varianceClippingGamma * sigma;
|
|
|
-# endif
|
|
|
+ Vec2 newCoord = coord + ((horizontal) ? Vec2(i, 0.0) : Vec2(0.0, i));
|
|
|
+ newCoord = clamp(newCoord, 0.0, viewport - 1.0);
|
|
|
|
|
|
- history = clamp(history, boxMin, boxMax);
|
|
|
+ const HVec4 sampleColor = g_bentNormalsAndSsaoTex[newCoord];
|
|
|
+ const F32 sampleDepth = g_depthTex[newCoord].x;
|
|
|
|
|
|
- // Final
|
|
|
- const RF32 kBlendFactor = 0.1f;
|
|
|
+ const F32 weight = depthWeight(refDepth, sampleDepth);
|
|
|
|
|
|
- const F32 lum0 = computeLuminance(current.xyz) * current.w;
|
|
|
- const F32 lum1 = computeLuminance(history.xyz) * history.w;
|
|
|
- const F32 maxLum = 1.0;
|
|
|
+ bentNormalAndSsao += sampleColor * weight;
|
|
|
+ weightSum += weight;
|
|
|
+ }
|
|
|
|
|
|
- RF32 diff = abs(lum0 - lum1) / max(lum0, max(lum1, maxLum + kEpsilonF32));
|
|
|
- diff = 1.0 - diff;
|
|
|
- diff = diff * diff;
|
|
|
- const RF32 feedback = lerp(0.0, kBlendFactor, diff);
|
|
|
+ bentNormalAndSsao /= weightSum;
|
|
|
|
|
|
- RVec4 finalVal = lerp(history, current, feedback);
|
|
|
- finalVal.xyz = normalize(finalVal.xyz);
|
|
|
+ bentNormalAndSsao.w = saturate(bentNormalAndSsao.w);
|
|
|
+ bentNormalAndSsao.xyz = normalize(bentNormalAndSsao.xyz);
|
|
|
+
|
|
|
+ return bentNormalAndSsao;
|
|
|
+}
|
|
|
|
|
|
- // Write value
|
|
|
# if ANKI_COMPUTE_SHADER
|
|
|
- g_bentNormalsAndSsaoStorageTex[svDispatchThreadId] = finalVal;
|
|
|
+[numthreads(64, 1, 1)] void main(COMPUTE_ARGS)
|
|
|
+{
|
|
|
+ const Vec2 coord = getOptimalDispatchThreadId8x8Amd(svGroupIndex, svGroupId.xy);
|
|
|
+ g_bentNormalsAndSsaoStorageTex[coord] = doWork(coord, ANKI_TECHNIQUE_SsaoSpatialDenoiseHorizontal);
|
|
|
+}
|
|
|
# else
|
|
|
- return finalVal;
|
|
|
-# endif
|
|
|
+Vec4 main(VertOut input) : SV_TARGET0
|
|
|
+{
|
|
|
+ const Vec2 coord = floor(input.m_svPosition.xy);
|
|
|
+ return doWork(coord, ANKI_TECHNIQUE_SsaoSpatialDenoiseHorizontal);
|
|
|
}
|
|
|
+# endif
|
|
|
#endif
|