|
|
@@ -20,7 +20,7 @@
|
|
|
|
|
|
// Config
|
|
|
constexpr F32 kSpatialUpscalingPcfTexelOffset = 8.0;
|
|
|
-#define SPATIAL_UPSCALING_POISON_KERNEL kPoissonDisk4
|
|
|
+#define SPATIAL_UPSCALING_POISON_KERNEL kPoissonDisk8
|
|
|
constexpr F32 kMaxBilateralSamples = 5.0;
|
|
|
constexpr F32 kGaussianSigma = 0.55;
|
|
|
|
|
|
@@ -76,22 +76,23 @@ ANKI_FAST_CONSTANTS(SsrConstants2, g_consts)
|
|
|
// All calculations in view space
|
|
|
[NumThreads(8, 8, 1)] void main(UVec2 svDispatchThreadId : SV_DispatchThreadID)
|
|
|
{
|
|
|
- UVec2 outSize;
|
|
|
- g_colorAndPdfTex.GetDimensions(outSize.x, outSize.y);
|
|
|
+ UVec2 halfViewportSize;
|
|
|
+ g_hitPosAndDepthTex.GetDimensions(halfViewportSize.x, halfViewportSize.y);
|
|
|
|
|
|
- const UVec2 coord = min(svDispatchThreadId, outSize - 1u);
|
|
|
- const Vec2 uv = (Vec2(coord) + 0.5) / Vec2(outSize);
|
|
|
+ const UVec2 realCoord = min(svDispatchThreadId, halfViewportSize - 1u);
|
|
|
+ const UVec2 logicalCoord = UVec2(realCoord.x * 2u + (realCoord.y & 1u), realCoord.y);
|
|
|
+ const Vec2 uv = (Vec2(logicalCoord) + 0.5) / Vec2(halfViewportSize.x * 2u, halfViewportSize.y);
|
|
|
|
|
|
- const F32 depth = g_depthTex.SampleLevel(g_trilinearClampSampler, uv, 0.0).x;
|
|
|
+ const F32 depth = g_depthTex[logicalCoord].x;
|
|
|
if(depth == 1.0)
|
|
|
{
|
|
|
- g_colorAndPdfTex[coord] = 0.0;
|
|
|
- g_hitPosAndDepthTex[coord] = 0.0;
|
|
|
+ g_colorAndPdfTex[realCoord] = 0.0;
|
|
|
+ g_hitPosAndDepthTex[realCoord] = 0.0;
|
|
|
return;
|
|
|
}
|
|
|
|
|
|
- const Vec4 rt1 = g_gbufferRt1[coord];
|
|
|
- const Vec4 rt2 = g_gbufferRt2[coord];
|
|
|
+ const Vec4 rt1 = g_gbufferRt1[logicalCoord];
|
|
|
+ const Vec4 rt2 = g_gbufferRt2[logicalCoord];
|
|
|
|
|
|
const Vec3 worldNormal = unpackNormalFromGBuffer(rt2);
|
|
|
const Vec3 viewNormal = mul(g_globalRendererConstants.m_matrices.m_view, Vec4(worldNormal, 0.0));
|
|
|
@@ -101,7 +102,7 @@ ANKI_FAST_CONSTANTS(SsrConstants2, g_consts)
|
|
|
const Vec3 viewPos = cheapPerspectiveUnprojection(g_globalRendererConstants.m_matrices.m_unprojectionParameters, ndc, depth);
|
|
|
|
|
|
// Noise
|
|
|
- const UVec3 seed = rand3DPCG16(UVec3(coord, g_globalRendererConstants.m_frame % 8u));
|
|
|
+ const UVec3 seed = rand3DPCG16(UVec3(logicalCoord, g_globalRendererConstants.m_frame % 8u));
|
|
|
const Vec2 randFactors = hammersleyRandom16(g_globalRendererConstants.m_frame % 64u, 64u, seed);
|
|
|
|
|
|
// Compute refl
|
|
|
@@ -190,15 +191,15 @@ ANKI_FAST_CONSTANTS(SsrConstants2, g_consts)
|
|
|
|
|
|
pdf = max(0.0, pdf) * ssrAttenuation;
|
|
|
|
|
|
- g_colorAndPdfTex[coord] = Vec4(outColor, pdf);
|
|
|
- g_hitPosAndDepthTex[coord] = Vec4(worldHitPos, 1.0 - depth); // Store depth in reverse for better precision
|
|
|
+ g_colorAndPdfTex[realCoord] = Vec4(outColor, pdf);
|
|
|
+ g_hitPosAndDepthTex[realCoord] = Vec4(worldHitPos, 1.0 - depth); // Store depth in reverse for better precision
|
|
|
}
|
|
|
else
|
|
|
{
|
|
|
U32 writePos;
|
|
|
InterlockedAdd(g_raygenIndirectArgs[0].m_threadGroupCountX, 1u, writePos);
|
|
|
|
|
|
- g_pixelsFailedSsr[writePos] = (coord.x << 16u) | coord.y;
|
|
|
+ g_pixelsFailedSsr[writePos] = (realCoord.x << 16u) | realCoord.y;
|
|
|
}
|
|
|
}
|
|
|
#endif
|
|
|
@@ -219,34 +220,28 @@ ANKI_FAST_CONSTANTS(Consts, g_consts)
|
|
|
|
|
|
[shader("raygeneration")] void main()
|
|
|
{
|
|
|
- UVec2 outSize;
|
|
|
- g_colorAndPdfTex.GetDimensions(outSize.x, outSize.y);
|
|
|
+ UVec2 halfViewportSize;
|
|
|
+ g_hitPosAndDepthTex.GetDimensions(halfViewportSize.x, halfViewportSize.y);
|
|
|
|
|
|
const U32 pixel = g_pixelsFailedSsr[DispatchRaysIndex().x];
|
|
|
- const UVec2 coord = UVec2(pixel >> 16u, pixel & 0xFFFFu);
|
|
|
-
|
|
|
- const F32 depth = g_depthTex[coord].x;
|
|
|
- if(depth == 1.0)
|
|
|
- {
|
|
|
- g_colorAndPdfTex[coord] = 0.0;
|
|
|
- g_hitPosAndDepthTex[coord] = 0.0;
|
|
|
- return;
|
|
|
- }
|
|
|
+ const UVec2 realCoord = UVec2(pixel >> 16u, pixel & 0xFFFFu);
|
|
|
+ const UVec2 logicalCoord = UVec2(realCoord.x * 2u + (realCoord.y & 1u), realCoord.y);
|
|
|
|
|
|
- const Vec4 rt1 = g_gbufferRt1[coord];
|
|
|
- const Vec4 rt2 = g_gbufferRt2[coord];
|
|
|
+ const F32 depth = g_depthTex[logicalCoord].x;
|
|
|
+ const Vec4 rt1 = g_gbufferRt1[logicalCoord];
|
|
|
+ const Vec4 rt2 = g_gbufferRt2[logicalCoord];
|
|
|
|
|
|
const Vec3 worldNormal = unpackNormalFromGBuffer(rt2);
|
|
|
const F32 roughness = unpackRoughnessFromGBuffer(rt1);
|
|
|
|
|
|
- const Vec2 ndc = uvToNdc((Vec2(coord) + 0.5) / Vec2(outSize));
|
|
|
+ const Vec2 ndc = uvToNdc((Vec2(logicalCoord) + 0.5) / Vec2(halfViewportSize.x * 2u, halfViewportSize.y));
|
|
|
const Vec4 v4 = mul(g_globalRendererConstants.m_matrices.m_invertedViewProjectionJitter, Vec4(ndc, depth, 1.0));
|
|
|
const Vec3 worldPos = v4.xyz / v4.w;
|
|
|
|
|
|
const DirectionalLight dirLight = g_globalRendererConstants.m_directionalLight;
|
|
|
|
|
|
// Noise
|
|
|
- const UVec3 seed = rand3DPCG16(UVec3(coord, g_globalRendererConstants.m_frame % 8u));
|
|
|
+ const UVec3 seed = rand3DPCG16(UVec3(logicalCoord, g_globalRendererConstants.m_frame % 8u));
|
|
|
const Vec2 randFactors = hammersleyRandom16(g_globalRendererConstants.m_frame % 64u, 64u, seed);
|
|
|
|
|
|
// Compute refl
|
|
|
@@ -327,11 +322,11 @@ ANKI_FAST_CONSTANTS(Consts, g_consts)
|
|
|
const Vec3 diffC = diffuseLobe(payload.m_diffuseColor);
|
|
|
outColor += diffC * dirLight.m_diffuseColor * lambert * shadow;
|
|
|
|
|
|
- g_colorAndPdfTex[coord] = Vec4(outColor, max(0.0, pdf));
|
|
|
+ g_colorAndPdfTex[realCoord] = Vec4(outColor, max(0.0, pdf));
|
|
|
|
|
|
// Move it with camera to avoid precision issues since it's stored in fp16
|
|
|
// Store depth in reverse for better precision
|
|
|
- g_hitPosAndDepthTex[coord] = Vec4(hitPos - g_globalRendererConstants.m_cameraPosition, 1.0 - depth);
|
|
|
+ g_hitPosAndDepthTex[realCoord] = Vec4(hitPos - g_globalRendererConstants.m_cameraPosition, 1.0 - depth);
|
|
|
}
|
|
|
#endif // ANKI_RAY_GEN_SHADER
|
|
|
|
|
|
@@ -363,29 +358,87 @@ RWTexture2D<Vec4> g_hitPosTex : register(u1);
|
|
|
|
|
|
ConstantBuffer<GlobalRendererConstants> g_globalRendererConstants : register(b0);
|
|
|
|
|
|
-# define NUM_THREADS 64u
|
|
|
+groupshared Vec4 g_colorAndPdf[4][8];
|
|
|
+groupshared Vec4 g_hitPosAndDepth[4][8];
|
|
|
+
|
|
|
+// Return true if the coord contains a pixel that was populated by the previous passes
|
|
|
+Bool isCheckerboardWhite(UVec2 coord)
|
|
|
+{
|
|
|
+ return (coord.y & 1u) == (coord.x & 1u);
|
|
|
+}
|
|
|
+
|
|
|
+void reconstructCheckerboardBlack(IVec2 svGroupThreadId, F32 refDepth, inout Vec3 color, inout F32 pdf, inout Vec3 hitPos, inout F32 sumWeight)
|
|
|
+{
|
|
|
+ if(any(svGroupThreadId < 0u) || any(svGroupThreadId > 7u))
|
|
|
+ {
|
|
|
+ return;
|
|
|
+ }
|
|
|
+
|
|
|
+ svGroupThreadId /= 2;
|
|
|
+
|
|
|
+ const F32 weight = calculateBilateralWeightDepth(refDepth, g_hitPosAndDepth[svGroupThreadId.x][svGroupThreadId.y], 1.0);
|
|
|
+
|
|
|
+ color += g_colorAndPdf[svGroupThreadId.x][svGroupThreadId.y].xyz * weight;
|
|
|
+ pdf += g_colorAndPdf[svGroupThreadId.x][svGroupThreadId.y].w * weight;
|
|
|
+ hitPos += g_hitPosAndDepth[svGroupThreadId.x][svGroupThreadId.y].xyz * weight;
|
|
|
+
|
|
|
+ sumWeight += weight;
|
|
|
+}
|
|
|
|
|
|
[NumThreads(8, 8, 1)] void main(UVec2 svDispatchThreadId : SV_DispatchThreadID, UVec2 svGroupThreadId : SV_GROUPTHREADID,
|
|
|
U32 svGroupIndex : SV_GROUPINDEX)
|
|
|
{
|
|
|
- UVec2 outSize;
|
|
|
- g_colorAndPdfTex.GetDimensions(outSize.x, outSize.y);
|
|
|
+ UVec2 viewportSize;
|
|
|
+ g_colorAndPdfTex.GetDimensions(viewportSize.x, viewportSize.y);
|
|
|
+ const UVec2 halfViewportSize = UVec2(viewportSize.x / 2u, viewportSize.y);
|
|
|
|
|
|
- const UVec2 coord = min(svDispatchThreadId, outSize - 1);
|
|
|
+ const UVec2 coord = min(svDispatchThreadId, viewportSize - 1u);
|
|
|
+ const UVec2 checkerboardCoord = UVec2(coord.x / 2u, coord.y);
|
|
|
|
|
|
- Vec4 rgba = g_colorAndPdfTex[coord];
|
|
|
- const Vec3 color = rgba.xyz;
|
|
|
- const F32 pdf = rgba.w;
|
|
|
+ const F32 refDepth = g_depthTex[coord];
|
|
|
|
|
|
- const F32 depth = g_depthTex[coord];
|
|
|
- if(depth == 1.0)
|
|
|
+ Vec3 refColor = 0.0;
|
|
|
+ F32 refPdf = 0.0;
|
|
|
+ Vec3 refHitPos = 0.0;
|
|
|
+ if(isCheckerboardWhite(coord))
|
|
|
+ {
|
|
|
+ // Dump pixel data to shared memory to be used to reconstruct other pixels
|
|
|
+ const Vec4 rgba = g_colorAndPdfTex[checkerboardCoord];
|
|
|
+ refColor = rgba.xyz;
|
|
|
+ refPdf = rgba.w;
|
|
|
+ g_colorAndPdf[svGroupThreadId.x / 2u][svGroupThreadId.y] = rgba;
|
|
|
+
|
|
|
+ refHitPos = g_hitPosAndDepthTex[checkerboardCoord].xyz;
|
|
|
+ g_hitPosAndDepth[svGroupThreadId.x / 2u][svGroupThreadId.y] = Vec4(refHitPos, refDepth);
|
|
|
+ }
|
|
|
+
|
|
|
+ GroupMemoryBarrierWithGroupSync();
|
|
|
+
|
|
|
+ if(!isCheckerboardWhite(coord))
|
|
|
{
|
|
|
- g_denoisedTex[svDispatchThreadId] = 0.0;
|
|
|
+ // Reconstruct missing pixel
|
|
|
+ const IVec2 svGroupThreadIdi = svGroupThreadId;
|
|
|
+
|
|
|
+ F32 sumWeight = 0.001;
|
|
|
+ reconstructCheckerboardBlack(svGroupThreadIdi + IVec2(-1, 0), refDepth, refColor, refPdf, refHitPos, sumWeight);
|
|
|
+ reconstructCheckerboardBlack(svGroupThreadIdi + IVec2(+1, 0), refDepth, refColor, refPdf, refHitPos, sumWeight);
|
|
|
+ reconstructCheckerboardBlack(svGroupThreadIdi + IVec2(0, +1), refDepth, refColor, refPdf, refHitPos, sumWeight);
|
|
|
+ reconstructCheckerboardBlack(svGroupThreadIdi + IVec2(0, -1), refDepth, refColor, refPdf, refHitPos, sumWeight);
|
|
|
+
|
|
|
+ refColor /= sumWeight;
|
|
|
+ refPdf /= sumWeight;
|
|
|
+ refHitPos /= sumWeight;
|
|
|
+ }
|
|
|
+
|
|
|
+ if(refDepth == 1.0)
|
|
|
+ {
|
|
|
+ g_denoisedTex[coord] = 0.0;
|
|
|
+ g_hitPosTex[coord] = 0.0;
|
|
|
return;
|
|
|
}
|
|
|
|
|
|
- const Vec2 ndc = uvToNdc((Vec2(coord) + 0.5) / Vec2(outSize));
|
|
|
- const Vec4 v4 = mul(g_globalRendererConstants.m_matrices.m_invertedViewProjectionJitter, Vec4(ndc, depth, 1.0));
|
|
|
+ const Vec2 ndc = uvToNdc((Vec2(coord) + 0.5) / Vec2(viewportSize));
|
|
|
+ const Vec4 v4 = mul(g_globalRendererConstants.m_matrices.m_invertedViewProjectionJitter, Vec4(ndc, refDepth, 1.0));
|
|
|
const Vec3 worldPos = v4.xyz / v4.w;
|
|
|
|
|
|
const Vec3 viewDir = normalize(g_globalRendererConstants.m_cameraPosition - worldPos);
|
|
|
@@ -399,24 +452,24 @@ ConstantBuffer<GlobalRendererConstants> g_globalRendererConstants : register(b0)
|
|
|
|
|
|
if(roughness <= kMinRoughness + kEpsilonF32)
|
|
|
{
|
|
|
- outColor = color;
|
|
|
- newHitPos = g_hitPosAndDepthTex[coord].xyz + g_globalRendererConstants.m_cameraPosition;
|
|
|
+ outColor = refColor;
|
|
|
+ newHitPos = refHitPos + g_globalRendererConstants.m_cameraPosition;
|
|
|
}
|
|
|
else
|
|
|
{
|
|
|
const Vec4 rt2 = g_gbufferRt2[coord];
|
|
|
const Vec3 worldNormal = unpackNormalFromGBuffer(rt2);
|
|
|
|
|
|
- const UVec3 seed = rand3DPCG16(UVec3(svDispatchThreadId, g_globalRendererConstants.m_frame % 8u));
|
|
|
+ const UVec3 seed = rand3DPCG16(UVec3(coord, g_globalRendererConstants.m_frame % 8u));
|
|
|
const Vec2 randFactors = hammersleyRandom16(g_globalRendererConstants.m_frame % 64u, 64u, seed);
|
|
|
|
|
|
const F32 sinTheta = sin(randFactors.x * 2.0 * kPi);
|
|
|
const F32 cosTheta = cos(randFactors.x * 2.0 * kPi);
|
|
|
|
|
|
const F32 sampleCount = ARRAY_SIZE(SPATIAL_UPSCALING_POISON_KERNEL) + 1.0;
|
|
|
- F32 avgLuma = computeLuminance(color) / sampleCount;
|
|
|
- outColor = color;
|
|
|
- F32 weightSum = pdf;
|
|
|
+ F32 avgLuma = computeLuminance(refColor) / sampleCount;
|
|
|
+ outColor = refColor;
|
|
|
+ F32 weightSum = refPdf;
|
|
|
for(U32 i = 0u; i < ARRAY_SIZE(SPATIAL_UPSCALING_POISON_KERNEL); ++i)
|
|
|
{
|
|
|
const Vec2 diskPoint = SPATIAL_UPSCALING_POISON_KERNEL[i];
|
|
|
@@ -426,17 +479,19 @@ ConstantBuffer<GlobalRendererConstants> g_globalRendererConstants : register(b0)
|
|
|
rotatedDiskPoint.x = diskPoint.x * cosTheta - diskPoint.y * sinTheta;
|
|
|
rotatedDiskPoint.y = diskPoint.y * cosTheta + diskPoint.x * sinTheta;
|
|
|
|
|
|
+ rotatedDiskPoint.x /= 2.0; // Adjust because the input textures are in half width
|
|
|
+
|
|
|
// Offset calculation
|
|
|
- const IVec2 newCoord = clamp(IVec2(coord) + rotatedDiskPoint * kSpatialUpscalingPcfTexelOffset, 0, outSize - 1);
|
|
|
+ const IVec2 newCoord = clamp(IVec2(checkerboardCoord) + rotatedDiskPoint * kSpatialUpscalingPcfTexelOffset, 0, halfViewportSize - 1u);
|
|
|
|
|
|
- rgba = g_hitPosAndDepthTex[newCoord];
|
|
|
+ const Vec4 rgba = g_hitPosAndDepthTex[newCoord];
|
|
|
const F32 sampleDepth = 1.0 - rgba.w;
|
|
|
const Vec3 hitPos = rgba.xyz + g_globalRendererConstants.m_cameraPosition;
|
|
|
|
|
|
const Vec3 reflectedDir = normalize(hitPos - worldPos);
|
|
|
const F32 pdf = pdfVndfIsotropic(reflectedDir, viewDir, alpha, worldNormal);
|
|
|
|
|
|
- const F32 weight = pdf * calculateBilateralWeightDepth(depth, sampleDepth, 1.0);
|
|
|
+ const F32 weight = pdf * calculateBilateralWeightDepth(refDepth, sampleDepth, 1.0);
|
|
|
|
|
|
if(weight > 0.001)
|
|
|
{
|
|
|
@@ -450,8 +505,16 @@ ConstantBuffer<GlobalRendererConstants> g_globalRendererConstants : register(b0)
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- outColor /= weightSum;
|
|
|
- newHitPos /= weightSum;
|
|
|
+ if(weightSum > 0.001)
|
|
|
+ {
|
|
|
+ outColor /= weightSum;
|
|
|
+ newHitPos /= weightSum;
|
|
|
+ }
|
|
|
+ else
|
|
|
+ {
|
|
|
+ outColor = 0.0;
|
|
|
+ newHitPos = g_globalRendererConstants.m_cameraPosition;
|
|
|
+ }
|
|
|
|
|
|
// Remove fireflies
|
|
|
const F32 luma = computeLuminance(outColor);
|
|
|
@@ -461,7 +524,7 @@ ConstantBuffer<GlobalRendererConstants> g_globalRendererConstants : register(b0)
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- g_denoisedTex[coord] = Vec4(outColor, 1.0 - depth); // Store depth in reverse for better precision
|
|
|
+ g_denoisedTex[coord] = Vec4(outColor, 1.0 - refDepth); // Store depth in reverse for better precision
|
|
|
g_hitPosTex[coord] = Vec4(newHitPos - g_globalRendererConstants.m_cameraPosition, 0.0);
|
|
|
}
|
|
|
#endif // ANKI_COMPUTE_SHADER && ANKI_TECHNIQUE_SpatialDenoise
|