|
|
@@ -7,7 +7,7 @@
|
|
|
|
|
|
#pragma anki mutator RAYS_PER_PROBE_PER_FRAME 32 64
|
|
|
#pragma anki mutator GPU_WAVE_SIZE 16 32 64
|
|
|
-#pragma anki mutator RADIANCE_OCTAHEDRON_MAP_SIZE 10
|
|
|
+#pragma anki mutator RADIANCE_OCTAHEDRON_MAP_SIZE 10 11 12
|
|
|
#pragma anki mutator IRRADIANCE_OCTAHEDRON_MAP_SIZE 4 5 6
|
|
|
|
|
|
#pragma anki technique RtMaterialFetch rgen mutators
|
|
|
@@ -31,11 +31,11 @@
|
|
|
#define CLIPMAP_VOLUME 1
|
|
|
#include <AnKi/Shaders/RtMaterialFetch.hlsl>
|
|
|
|
|
|
-constexpr Vec3 kIndirectDiffuseClipmapForwardOffset = Vec3(10.0, 5.0, 10.0); // In meters
|
|
|
+constexpr Vec3 kIndirectDiffuseClipmapForwardOffset = Vec3(20.0, 2.0, 20.0); // In meters
|
|
|
|
|
|
void computeClipmapBounds(Clipmap clipmap, Vec3 cameraPos, Vec3 lookDir, out Vec3 aabbMin, out Vec3 aabbMax)
|
|
|
{
|
|
|
- const Vec3 offset = normalize(Vec3(lookDir.x, 0.0, lookDir.z)) * kIndirectDiffuseClipmapForwardOffset;
|
|
|
+ const Vec3 offset = normalize(lookDir) * kIndirectDiffuseClipmapForwardOffset * (clipmap.m_index + 1);
|
|
|
cameraPos += offset;
|
|
|
|
|
|
const Vec3 halfSize = clipmap.m_size * 0.5;
|
|
|
@@ -47,7 +47,7 @@ void computeClipmapBounds(Clipmap clipmap, Vec3 cameraPos, Vec3 lookDir, out Vec
|
|
|
|
|
|
F32 computeClipmapFade(Clipmap clipmap, Vec3 cameraPos, Vec3 lookDir, Vec3 worldPos)
|
|
|
{
|
|
|
- const Vec3 offset = normalize(Vec3(lookDir.x, 0.0, lookDir.z)) * kIndirectDiffuseClipmapForwardOffset;
|
|
|
+ const Vec3 offset = normalize(lookDir) * kIndirectDiffuseClipmapForwardOffset * (clipmap.m_index + 1);
|
|
|
|
|
|
cameraPos += offset;
|
|
|
|
|
|
@@ -176,7 +176,7 @@ ANKI_FAST_CONSTANTS(Consts, g_consts)
|
|
|
}
|
|
|
|
|
|
// Store result
|
|
|
- const F16 kMaxDist = sqrt(kMaxF16 - kEpsilonF16); // Make sure the square root doesn't overflow F16
|
|
|
+ const F32 kMaxDist = 1000.0; // Chose something small and make sure its square doesn't overflow F16
|
|
|
g_lightResultTex[UVec2(probeIdx, sampleIdx)] = HVec4(radiance, min(rayT, kMaxDist));
|
|
|
}
|
|
|
#endif // ANKI_RAY_GEN_SHADER
|
|
|
@@ -252,7 +252,7 @@ groupshared U32 g_invalideRayCount;
|
|
|
// Read the result from RT
|
|
|
const HVec4 comp = TEX(g_rtResultTex, UVec2(probeIdx, sampleIdx));
|
|
|
HVec3 radiance = comp.xyz;
|
|
|
- const HVec2 moments = HVec2(comp.w, square(comp.w));
|
|
|
+ const Vec2 moments = Vec2(comp.w, square(comp.w));
|
|
|
if(all(radiance == HVec3(1.0, 0.0, 1.0)))
|
|
|
{
|
|
|
InterlockedAdd(g_invalideRayCount, 1);
|
|
|
@@ -265,7 +265,7 @@ groupshared U32 g_invalideRayCount;
|
|
|
const UVec2 octCoord = min(octUv * RADIANCE_OCTAHEDRON_MAP_SIZE, RADIANCE_OCTAHEDRON_MAP_SIZE - 1);
|
|
|
|
|
|
HVec3 avgRadiance = 0.0;
|
|
|
- HVec2 avgMoments = 0.0;
|
|
|
+ Vec2 avgMoments = 0.0;
|
|
|
U32 iterationCount = 0;
|
|
|
do
|
|
|
{
|
|
|
@@ -286,7 +286,7 @@ groupshared U32 g_invalideRayCount;
|
|
|
const HVec3 prevValue = TEX(g_radianceVolume, actualVolumeTexCoord).xyz;
|
|
|
avgRadiance = lerp(prevValue, radiance, blendFactor);
|
|
|
|
|
|
- const HVec2 prevValue2 = TEX(g_distanceMomentsVolume, actualVolumeTexCoord).xy;
|
|
|
+ const Vec2 prevValue2 = TEX(g_distanceMomentsVolume, actualVolumeTexCoord).xy;
|
|
|
avgMoments = lerp(prevValue2, moments, blendFactor);
|
|
|
}
|
|
|
else
|
|
|
@@ -348,7 +348,7 @@ groupshared U32 g_invalideRayCount;
|
|
|
|
|
|
// Search the nearby texels
|
|
|
HVec3 otherRadiance = HVec3(1.0, 0.0, 1.0);
|
|
|
- HVec2 otherMoments = HVec2(0.0, 0.0);
|
|
|
+ Vec2 otherMoments = 0.0;
|
|
|
for(I32 y = -1; y <= 1; ++y)
|
|
|
{
|
|
|
for(I32 x = -1; x <= 1; ++x)
|
|
|
@@ -405,6 +405,137 @@ groupshared U32 g_invalideRayCount;
|
|
|
}
|
|
|
#endif
|
|
|
|
|
|
+// ===========================================================================
|
|
|
+// ComputeIrradiance =
|
|
|
+// ===========================================================================
|
|
|
+#if NOT_ZERO(ANKI_TECHNIQUE_ComputeIrradiance)
|
|
|
+
|
|
|
+struct Consts
|
|
|
+{
|
|
|
+ U32 m_clipmapIdx;
|
|
|
+ U32 m_padding1;
|
|
|
+ U32 m_padding2;
|
|
|
+ U32 m_padding3;
|
|
|
+};
|
|
|
+ANKI_FAST_CONSTANTS(Consts, g_consts)
|
|
|
+
|
|
|
+constexpr U32 kThreadCount = GPU_WAVE_SIZE;
|
|
|
+
|
|
|
+Texture3D<Vec4> g_radianceVolume : register(t0);
|
|
|
+
|
|
|
+RWTexture3D<Vec4> g_irradianceVolume : register(u0);
|
|
|
+
|
|
|
+ConstantBuffer<GlobalRendererConstants> g_globalRendererConstants : register(b0);
|
|
|
+
|
|
|
+groupshared Vec3 g_irradianceResults[kThreadCount];
|
|
|
+
|
|
|
+[NumThreads(kThreadCount, 1, 1)] void main(COMPUTE_ARGS)
|
|
|
+{
|
|
|
+ const Clipmap clipmap = g_globalRendererConstants.m_indirectDiffuseClipmaps[g_consts.m_clipmapIdx];
|
|
|
+ const U32 probeIdx = svGroupId.x;
|
|
|
+ const UVec2 irradianceTexel = svGroupId.yz;
|
|
|
+
|
|
|
+ // Compute input radiance coordinates
|
|
|
+ UVec3 radianceTexelCoordStart;
|
|
|
+ unflatten3dArrayIndex(clipmap.m_probeCounts.z, clipmap.m_probeCounts.y, clipmap.m_probeCounts.x, probeIdx, radianceTexelCoordStart.z,
|
|
|
+ radianceTexelCoordStart.y, radianceTexelCoordStart.x);
|
|
|
+ radianceTexelCoordStart = radianceTexelCoordStart.xzy;
|
|
|
+ radianceTexelCoordStart.xy *= RADIANCE_OCTAHEDRON_MAP_SIZE + 2;
|
|
|
+ radianceTexelCoordStart.xy += 1;
|
|
|
+
|
|
|
+ // Compute irradiance
|
|
|
+ Vec2 octUv = Vec2(irradianceTexel);
|
|
|
+ octUv += 0.5;
|
|
|
+ octUv /= IRRADIANCE_OCTAHEDRON_MAP_SIZE;
|
|
|
+ const Vec3 dir = octahedronDecode(octUv);
|
|
|
+
|
|
|
+ const U32 radianceTexelCount = RADIANCE_OCTAHEDRON_MAP_SIZE * RADIANCE_OCTAHEDRON_MAP_SIZE;
|
|
|
+ const U32 radiancePixelsPerThread = (radianceTexelCount + kThreadCount - 1) / kThreadCount;
|
|
|
+
|
|
|
+ Vec3 irradiance = 0.0;
|
|
|
+ for(U32 pixel = svGroupIndex * radiancePixelsPerThread; pixel < min(radianceTexelCount, (svGroupIndex + 1) * radiancePixelsPerThread); ++pixel)
|
|
|
+ {
|
|
|
+ Vec2 octUv = Vec2(pixel % RADIANCE_OCTAHEDRON_MAP_SIZE, pixel / RADIANCE_OCTAHEDRON_MAP_SIZE);
|
|
|
+ octUv += 0.5;
|
|
|
+ octUv /= RADIANCE_OCTAHEDRON_MAP_SIZE;
|
|
|
+
|
|
|
+ const Vec3 sampleDir = octahedronDecode(octUv);
|
|
|
+
|
|
|
+ const F32 lambert = dot(dir, sampleDir);
|
|
|
+ if(lambert <= kEpsilonF32)
|
|
|
+ {
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+
|
|
|
+ UVec3 coord = radianceTexelCoordStart;
|
|
|
+ coord.x += pixel % RADIANCE_OCTAHEDRON_MAP_SIZE + 1;
|
|
|
+ coord.y += pixel / RADIANCE_OCTAHEDRON_MAP_SIZE + 1;
|
|
|
+
|
|
|
+ const Vec3 radiance = TEX(g_radianceVolume, coord).xyz;
|
|
|
+
|
|
|
+ const F32 sampleCount = square(F32(RADIANCE_OCTAHEDRON_MAP_SIZE)) / 2.0;
|
|
|
+ irradiance += radiance * lambert / sampleCount;
|
|
|
+ }
|
|
|
+
|
|
|
+ g_irradianceResults[svGroupIndex] = irradiance;
|
|
|
+
|
|
|
+ GroupMemoryBarrierWithGroupSync();
|
|
|
+
|
|
|
+ [loop] for(U32 s = kThreadCount / 2u; s > 0u; s >>= 1u)
|
|
|
+ {
|
|
|
+ if(svGroupIndex < s)
|
|
|
+ {
|
|
|
+ g_irradianceResults[svGroupIndex] += g_irradianceResults[svGroupIndex + s];
|
|
|
+ }
|
|
|
+
|
|
|
+# if ANKI_PLATFORM_MOBILE
|
|
|
+ if(s > WaveGetLaneCount())
|
|
|
+ {
|
|
|
+ GroupMemoryBarrierWithGroupSync();
|
|
|
+ }
|
|
|
+# else
|
|
|
+ GroupMemoryBarrierWithGroupSync();
|
|
|
+# endif
|
|
|
+ }
|
|
|
+
|
|
|
+ if(svGroupIndex == 0)
|
|
|
+ {
|
|
|
+ irradiance = g_irradianceResults[0] * k2Pi;
|
|
|
+
|
|
|
+ // Compute the texel coord to write the output
|
|
|
+ UVec3 irradianceTexelCoord;
|
|
|
+ unflatten3dArrayIndex(clipmap.m_probeCounts.z, clipmap.m_probeCounts.y, clipmap.m_probeCounts.x, probeIdx, irradianceTexelCoord.z,
|
|
|
+ irradianceTexelCoord.y, irradianceTexelCoord.x);
|
|
|
+ irradianceTexelCoord = irradianceTexelCoord.xzy;
|
|
|
+ irradianceTexelCoord.xy *= IRRADIANCE_OCTAHEDRON_MAP_SIZE + 2;
|
|
|
+ irradianceTexelCoord.xy += 1;
|
|
|
+ irradianceTexelCoord.x += irradianceTexel.x;
|
|
|
+ irradianceTexelCoord.y += irradianceTexel.y;
|
|
|
+
|
|
|
+ TEX(g_irradianceVolume, irradianceTexelCoord).xyz = irradiance;
|
|
|
+
|
|
|
+ // Write the borders
|
|
|
+ UVec3 volumeTexCoord;
|
|
|
+ unflatten3dArrayIndex(clipmap.m_probeCounts.z, clipmap.m_probeCounts.y, clipmap.m_probeCounts.x, probeIdx, volumeTexCoord.z, volumeTexCoord.y,
|
|
|
+ volumeTexCoord.x);
|
|
|
+ volumeTexCoord = volumeTexCoord.xzy;
|
|
|
+
|
|
|
+ IVec2 borders[3];
|
|
|
+ const IVec2 octCoord = IVec2(irradianceTexel);
|
|
|
+ const U32 borderCount = octahedronBorder(IRRADIANCE_OCTAHEDRON_MAP_SIZE, octCoord, borders);
|
|
|
+ for(U32 i = 0; i < borderCount; ++i)
|
|
|
+ {
|
|
|
+ IVec3 actualVolumeTexCoord;
|
|
|
+ actualVolumeTexCoord.xy = octCoord + volumeTexCoord * (IRRADIANCE_OCTAHEDRON_MAP_SIZE + 2) + 1;
|
|
|
+ actualVolumeTexCoord.xy += borders[i];
|
|
|
+ actualVolumeTexCoord.z = volumeTexCoord.z;
|
|
|
+
|
|
|
+ TEX(g_irradianceVolume, actualVolumeTexCoord).xyz = irradiance;
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|
|
|
+#endif
|
|
|
+
|
|
|
// ===========================================================================
|
|
|
// Test =
|
|
|
// ===========================================================================
|
|
|
@@ -424,7 +555,7 @@ ConstantBuffer<GlobalRendererConstants> g_globalRendererConstants : register(b0)
|
|
|
|
|
|
SamplerState g_linearAnyRepeatSampler : register(s0);
|
|
|
|
|
|
-[NumThreads(8, 8, 1)] void main(UVec2 svDispatchThreadId : SV_DispatchThreadID)
|
|
|
+[NumThreads(8, 8, 1)] void main(COMPUTE_ARGS)
|
|
|
{
|
|
|
UVec2 viewportSize;
|
|
|
g_outTex.GetDimensions(viewportSize.x, viewportSize.y);
|
|
|
@@ -434,21 +565,14 @@ SamplerState g_linearAnyRepeatSampler : register(s0);
|
|
|
return;
|
|
|
}
|
|
|
|
|
|
- const Vec3 normal = unpackNormalFromGBuffer(g_gbufferRt2[svDispatchThreadId]);
|
|
|
+ const Vec3 normal = unpackNormalFromGBuffer(g_gbufferRt2[svDispatchThreadId.xy]);
|
|
|
|
|
|
- const F32 depth = g_depthTex[svDispatchThreadId].r;
|
|
|
- const Vec2 uv = Vec2(svDispatchThreadId) / Vec2(viewportSize);
|
|
|
+ const F32 depth = g_depthTex[svDispatchThreadId.xy].r;
|
|
|
+ const Vec2 uv = Vec2(svDispatchThreadId.xy) / Vec2(viewportSize);
|
|
|
const Vec2 ndc = uvToNdc(uv);
|
|
|
const Vec4 worldPos4 = mul(g_globalRendererConstants.m_matrices.m_invertedViewProjectionJitter, Vec4(ndc, depth, 1.0));
|
|
|
Vec3 worldPos = worldPos4.xyz / worldPos4.w;
|
|
|
|
|
|
- if(1)
|
|
|
- {
|
|
|
- const Clipmap clipmap = g_globalRendererConstants.m_indirectDiffuseClipmaps[0];
|
|
|
- const Vec3 probeSize = clipmap.m_size / clipmap.m_probeCounts;
|
|
|
- worldPos += normal * min3(probeSize) * 0.1;
|
|
|
- }
|
|
|
-
|
|
|
// Rand
|
|
|
UVec2 noiseTexSize;
|
|
|
g_blueNoiseTex.GetDimensions(noiseTexSize.x, noiseTexSize.y);
|
|
|
@@ -458,11 +582,11 @@ SamplerState g_linearAnyRepeatSampler : register(s0);
|
|
|
|
|
|
const Mat3x4 cameraTrf = g_globalRendererConstants.m_matrices.m_cameraTransform;
|
|
|
const Vec3 lookDir = -Vec3(cameraTrf.m_row0[2], cameraTrf.m_row1[2], cameraTrf.m_row2[2]);
|
|
|
-# if 0
|
|
|
+# if 1
|
|
|
const U16 clipmapIdx = findClipmapOnPosition(g_globalRendererConstants.m_indirectDiffuseClipmaps, g_globalRendererConstants.m_cameraPosition,
|
|
|
lookDir, worldPos, noise);
|
|
|
# else
|
|
|
- U16 clipmapIdx = 0;
|
|
|
+ U16 clipmapIdx = 1;
|
|
|
const F32 fade = computeClipmapFade2(g_globalRendererConstants.m_indirectDiffuseClipmaps[clipmapIdx], g_globalRendererConstants.m_cameraPosition,
|
|
|
lookDir, worldPos);
|
|
|
if(fade < 1.0)
|
|
|
@@ -474,19 +598,19 @@ SamplerState g_linearAnyRepeatSampler : register(s0);
|
|
|
# if 0
|
|
|
if(clipmapIdx == 0)
|
|
|
{
|
|
|
- g_outTex[svDispatchThreadId] = Vec4(1, 0, 0, 0);
|
|
|
+ g_outTex[svDispatchThreadId.xy] = Vec4(1, 0, 0, 0);
|
|
|
}
|
|
|
else if(clipmapIdx == 1)
|
|
|
{
|
|
|
- g_outTex[svDispatchThreadId] = Vec4(0, 1, 0, 0);
|
|
|
+ g_outTex[svDispatchThreadId.xy] = Vec4(0, 1, 0, 0);
|
|
|
}
|
|
|
else if(clipmapIdx == 2)
|
|
|
{
|
|
|
- g_outTex[svDispatchThreadId] = Vec4(0, 0, 1, 0);
|
|
|
+ g_outTex[svDispatchThreadId.xy] = Vec4(0, 0, 1, 0);
|
|
|
}
|
|
|
else
|
|
|
{
|
|
|
- g_outTex[svDispatchThreadId] = Vec4(1, 0, 1, 0);
|
|
|
+ g_outTex[svDispatchThreadId.xy] = Vec4(1, 0, 1, 0);
|
|
|
}
|
|
|
|
|
|
return;
|
|
|
@@ -494,16 +618,18 @@ SamplerState g_linearAnyRepeatSampler : register(s0);
|
|
|
|
|
|
if(clipmapIdx >= kIndirectDiffuseClipmapCount)
|
|
|
{
|
|
|
- g_outTex[svDispatchThreadId] = 0.0;
|
|
|
+ g_outTex[svDispatchThreadId.xy] = 0.0;
|
|
|
return;
|
|
|
}
|
|
|
|
|
|
// Some calculations are in the real volume size and some in some fake one that doesn't include the octahedron and the y and z are swapped
|
|
|
|
|
|
- const Clipmap clipmap = g_globalRendererConstants.m_indirectDiffuseClipmaps[clipmapIdx]; // TODO: Dynamically indexing cbuffer
|
|
|
+ const Clipmap clipmap = g_globalRendererConstants.m_indirectDiffuseClipmaps[clipmapIdx];
|
|
|
const Vec3 probeSize = clipmap.m_size / clipmap.m_probeCounts;
|
|
|
const Vec3 fakeVolumeSize = clipmap.m_probeCounts; // Volume size without the octahedron
|
|
|
|
|
|
+ const Vec3 biasedWorldPos = worldPos + normal * min3(probeSize) * 0.1;
|
|
|
+
|
|
|
F32 octahedronSize = 0.0;
|
|
|
Vec3 realVolumeSize;
|
|
|
{
|
|
|
@@ -522,11 +648,11 @@ SamplerState g_linearAnyRepeatSampler : register(s0);
|
|
|
distMomentsOctSize -= 2.0; // The border
|
|
|
}
|
|
|
|
|
|
- const Vec3 samplePointUvw = frac(worldPos / clipmap.m_size);
|
|
|
+ const Vec3 samplePointUvw = frac(biasedWorldPos / clipmap.m_size);
|
|
|
const Vec3 icoord = floor(samplePointUvw * fakeVolumeSize - 0.5);
|
|
|
const Vec3 fcoord = frac(samplePointUvw * fakeVolumeSize - 0.5);
|
|
|
|
|
|
- const Vec3 firstProbePosition = floor((worldPos - probeSize / 2.0) / probeSize) * probeSize + probeSize / 2.0;
|
|
|
+ const Vec3 firstProbePosition = floor((biasedWorldPos - probeSize / 2.0) / probeSize) * probeSize + probeSize / 2.0;
|
|
|
|
|
|
F32 weightSum = 0.0;
|
|
|
Vec3 irradiance = 0.0;
|
|
|
@@ -561,14 +687,14 @@ SamplerState g_linearAnyRepeatSampler : register(s0);
|
|
|
Vec3 uvw = coords.xzy;
|
|
|
uvw.xy *= distMomentsOctSize + 2.0;
|
|
|
uvw.xy += 1.0;
|
|
|
- uvw.xy += octahedronEncode(normalize(worldPos - probePosition)) * distMomentsOctSize;
|
|
|
+ uvw.xy += octahedronEncode(normalize(biasedWorldPos - probePosition)) * distMomentsOctSize;
|
|
|
uvw.z += 0.5;
|
|
|
uvw /= distMomentsRealVolumeSize;
|
|
|
- const HVec2 distMoments = g_distanceMomentsVolumes[NonUniformResourceIndex(clipmapIdx)].SampleLevel(g_linearAnyRepeatSampler, uvw, 0.0);
|
|
|
+ const Vec2 distMoments = g_distanceMomentsVolumes[NonUniformResourceIndex(clipmapIdx)].SampleLevel(g_linearAnyRepeatSampler, uvw, 0.0);
|
|
|
|
|
|
const F32 variance = abs(distMoments.x * distMoments.x - distMoments.y);
|
|
|
|
|
|
- const F32 posToProbeDist = length(worldPos - probePosition);
|
|
|
+ const F32 posToProbeDist = length(biasedWorldPos - probePosition);
|
|
|
F32 chebyshevWeight = 1.0;
|
|
|
if(posToProbeDist > distMoments.x) // occluded
|
|
|
{
|
|
|
@@ -577,10 +703,10 @@ SamplerState g_linearAnyRepeatSampler : register(s0);
|
|
|
chebyshevWeight = variance / (variance + (v * v));
|
|
|
|
|
|
// Increase the contrast in the weight
|
|
|
- chebyshevWeight = max((chebyshevWeight * chebyshevWeight * chebyshevWeight), 0.0);
|
|
|
+ chebyshevWeight = max((chebyshevWeight * chebyshevWeight * chebyshevWeight), 0.05);
|
|
|
}
|
|
|
|
|
|
- w *= max(0.05, chebyshevWeight);
|
|
|
+ w *= chebyshevWeight;
|
|
|
}
|
|
|
|
|
|
// Compute the actual coords
|
|
|
@@ -605,138 +731,7 @@ SamplerState g_linearAnyRepeatSampler : register(s0);
|
|
|
irradiance = 0.0;
|
|
|
}
|
|
|
|
|
|
- g_outTex[svDispatchThreadId] = Vec4(irradiance, 0.0);
|
|
|
-}
|
|
|
-#endif
|
|
|
-
|
|
|
-// ===========================================================================
|
|
|
-// ComputeIrradiance =
|
|
|
-// ===========================================================================
|
|
|
-#if NOT_ZERO(ANKI_TECHNIQUE_ComputeIrradiance)
|
|
|
-
|
|
|
-struct Consts
|
|
|
-{
|
|
|
- U32 m_clipmapIdx;
|
|
|
- U32 m_padding1;
|
|
|
- U32 m_padding2;
|
|
|
- U32 m_padding3;
|
|
|
-};
|
|
|
-ANKI_FAST_CONSTANTS(Consts, g_consts)
|
|
|
-
|
|
|
-constexpr U32 kThreadCount = GPU_WAVE_SIZE;
|
|
|
-
|
|
|
-Texture3D<Vec4> g_radianceVolume : register(t0);
|
|
|
-
|
|
|
-RWTexture3D<Vec4> g_irradianceVolume : register(u0);
|
|
|
-
|
|
|
-ConstantBuffer<GlobalRendererConstants> g_globalRendererConstants : register(b0);
|
|
|
-
|
|
|
-groupshared Vec3 g_irradianceResults[kThreadCount];
|
|
|
-
|
|
|
-[NumThreads(kThreadCount, 1, 1)] void main(COMPUTE_ARGS)
|
|
|
-{
|
|
|
- const Clipmap clipmap = g_globalRendererConstants.m_indirectDiffuseClipmaps[g_consts.m_clipmapIdx];
|
|
|
- const U32 probeIdx = svGroupId.x;
|
|
|
- const UVec2 irradianceTexel = svGroupId.yz;
|
|
|
-
|
|
|
- // Compute input radiance coordinates
|
|
|
- UVec3 radianceTexelCoordStart;
|
|
|
- unflatten3dArrayIndex(clipmap.m_probeCounts.z, clipmap.m_probeCounts.y, clipmap.m_probeCounts.x, probeIdx, radianceTexelCoordStart.z,
|
|
|
- radianceTexelCoordStart.y, radianceTexelCoordStart.x);
|
|
|
- radianceTexelCoordStart = radianceTexelCoordStart.xzy;
|
|
|
- radianceTexelCoordStart.xy *= RADIANCE_OCTAHEDRON_MAP_SIZE + 2;
|
|
|
- radianceTexelCoordStart.xy += 1;
|
|
|
-
|
|
|
- // Compute irradiance
|
|
|
- Vec2 octUv = Vec2(irradianceTexel);
|
|
|
- octUv += 0.5;
|
|
|
- octUv /= IRRADIANCE_OCTAHEDRON_MAP_SIZE;
|
|
|
- const Vec3 dir = octahedronDecode(octUv);
|
|
|
-
|
|
|
- const U32 radianceTexelCount = RADIANCE_OCTAHEDRON_MAP_SIZE * RADIANCE_OCTAHEDRON_MAP_SIZE;
|
|
|
- const U32 radiancePixelsPerThread = (radianceTexelCount + kThreadCount - 1) / kThreadCount;
|
|
|
-
|
|
|
- Vec3 irradiance = 0.0;
|
|
|
- for(U32 pixel = svGroupIndex * radiancePixelsPerThread; pixel < min(radianceTexelCount, (svGroupIndex + 1) * radiancePixelsPerThread); ++pixel)
|
|
|
- {
|
|
|
- Vec2 octUv = Vec2(pixel % RADIANCE_OCTAHEDRON_MAP_SIZE, pixel / RADIANCE_OCTAHEDRON_MAP_SIZE);
|
|
|
- octUv += 0.5;
|
|
|
- octUv /= RADIANCE_OCTAHEDRON_MAP_SIZE;
|
|
|
-
|
|
|
- const Vec3 sampleDir = octahedronDecode(octUv);
|
|
|
-
|
|
|
- const F32 lambert = dot(dir, sampleDir);
|
|
|
- if(lambert <= kEpsilonF32)
|
|
|
- {
|
|
|
- continue;
|
|
|
- }
|
|
|
-
|
|
|
- UVec3 coord = radianceTexelCoordStart;
|
|
|
- coord.x += pixel % RADIANCE_OCTAHEDRON_MAP_SIZE + 1;
|
|
|
- coord.y += pixel / RADIANCE_OCTAHEDRON_MAP_SIZE + 1;
|
|
|
-
|
|
|
- const Vec3 radiance = TEX(g_radianceVolume, coord).xyz;
|
|
|
-
|
|
|
- const F32 sampleCount = square(F32(RADIANCE_OCTAHEDRON_MAP_SIZE)) / 2.0;
|
|
|
- irradiance += radiance * lambert / sampleCount;
|
|
|
- }
|
|
|
-
|
|
|
- g_irradianceResults[svGroupIndex] = irradiance;
|
|
|
-
|
|
|
- GroupMemoryBarrierWithGroupSync();
|
|
|
-
|
|
|
- [loop] for(U32 s = kThreadCount / 2u; s > 0u; s >>= 1u)
|
|
|
- {
|
|
|
- if(svGroupIndex < s)
|
|
|
- {
|
|
|
- g_irradianceResults[svGroupIndex] += g_irradianceResults[svGroupIndex + s];
|
|
|
- }
|
|
|
-
|
|
|
-# if ANKI_PLATFORM_MOBILE
|
|
|
- if(s > WaveGetLaneCount())
|
|
|
- {
|
|
|
- GroupMemoryBarrierWithGroupSync();
|
|
|
- }
|
|
|
-# else
|
|
|
- GroupMemoryBarrierWithGroupSync();
|
|
|
-# endif
|
|
|
- }
|
|
|
-
|
|
|
- if(svGroupIndex == 0)
|
|
|
- {
|
|
|
- irradiance = g_irradianceResults[0] * k2Pi;
|
|
|
-
|
|
|
- // Compute the texel coord to write the output
|
|
|
- UVec3 irradianceTexelCoord;
|
|
|
- unflatten3dArrayIndex(clipmap.m_probeCounts.z, clipmap.m_probeCounts.y, clipmap.m_probeCounts.x, probeIdx, irradianceTexelCoord.z,
|
|
|
- irradianceTexelCoord.y, irradianceTexelCoord.x);
|
|
|
- irradianceTexelCoord = irradianceTexelCoord.xzy;
|
|
|
- irradianceTexelCoord.xy *= IRRADIANCE_OCTAHEDRON_MAP_SIZE + 2;
|
|
|
- irradianceTexelCoord.xy += 1;
|
|
|
- irradianceTexelCoord.x += irradianceTexel.x;
|
|
|
- irradianceTexelCoord.y += irradianceTexel.y;
|
|
|
-
|
|
|
- TEX(g_irradianceVolume, irradianceTexelCoord).xyz = irradiance;
|
|
|
-
|
|
|
- // Write the borders
|
|
|
- UVec3 volumeTexCoord;
|
|
|
- unflatten3dArrayIndex(clipmap.m_probeCounts.z, clipmap.m_probeCounts.y, clipmap.m_probeCounts.x, probeIdx, volumeTexCoord.z, volumeTexCoord.y,
|
|
|
- volumeTexCoord.x);
|
|
|
- volumeTexCoord = volumeTexCoord.xzy;
|
|
|
-
|
|
|
- IVec2 borders[3];
|
|
|
- const IVec2 octCoord = IVec2(irradianceTexel);
|
|
|
- const U32 borderCount = octahedronBorder(IRRADIANCE_OCTAHEDRON_MAP_SIZE, octCoord, borders);
|
|
|
- for(U32 i = 0; i < borderCount; ++i)
|
|
|
- {
|
|
|
- IVec3 actualVolumeTexCoord;
|
|
|
- actualVolumeTexCoord.xy = octCoord + volumeTexCoord * (IRRADIANCE_OCTAHEDRON_MAP_SIZE + 2) + 1;
|
|
|
- actualVolumeTexCoord.xy += borders[i];
|
|
|
- actualVolumeTexCoord.z = volumeTexCoord.z;
|
|
|
-
|
|
|
- TEX(g_irradianceVolume, actualVolumeTexCoord).xyz = irradiance;
|
|
|
- }
|
|
|
- }
|
|
|
+ g_outTex[svDispatchThreadId.xy] = Vec4(irradiance, 0.0);
|
|
|
}
|
|
|
#endif
|
|
|
|