|
|
@@ -527,127 +527,138 @@ ANKI_FAST_CONSTANTS(Consts, g_consts)
|
|
|
// ===========================================================================
|
|
|
#if NOT_ZERO(ANKI_TECHNIQUE_ComputeIrradiance)
|
|
|
|
|
|
-struct Consts
|
|
|
-{
|
|
|
- U32 m_clipmapIdx;
|
|
|
- U32 m_padding1;
|
|
|
- U32 m_padding2;
|
|
|
- U32 m_padding3;
|
|
|
-};
|
|
|
-ANKI_FAST_CONSTANTS(Consts, g_consts)
|
|
|
-
|
|
|
-constexpr U32 kThreadCount = GPU_WAVE_SIZE;
|
|
|
+constexpr U32 kThreadCount = min(32, GPU_WAVE_SIZE); // Keep it bellow 32 to avoid threads doing no work
|
|
|
|
|
|
-Texture3D<Vec4> g_radianceVolume : register(t0);
|
|
|
+Texture3D<Vec4> g_radianceVolumes[kIndirectDiffuseClipmapCount] : register(t0);
|
|
|
|
|
|
-RWTexture3D<Vec4> g_irradianceVolume : register(u0);
|
|
|
+RWTexture3D<Vec4> g_irradianceVolumes[kIndirectDiffuseClipmapCount] : register(u0);
|
|
|
|
|
|
ConstantBuffer<GlobalRendererConstants> g_globalRendererConstants : register(b0);
|
|
|
|
|
|
-groupshared Vec3 g_irradianceResults[kThreadCount];
|
|
|
+SamplerState g_linearAnyRepeatSampler : register(s0);
|
|
|
|
|
|
-[NumThreads(kThreadCount, 1, 1)] void main(COMPUTE_ARGS)
|
|
|
+groupshared U64Vec3 g_irradianceResults[IRRADIANCE_OCTAHEDRON_MAP_SIZE][IRRADIANCE_OCTAHEDRON_MAP_SIZE];
|
|
|
+
|
|
|
+void InterlockedAddColor(U32 x, U32 y, Vec3 color)
|
|
|
{
|
|
|
- const Clipmap clipmap = g_globalRendererConstants.m_indirectDiffuseClipmaps[g_consts.m_clipmapIdx];
|
|
|
- const U32 probeIdx = svGroupId.x;
|
|
|
- const UVec2 irradianceTexel = svGroupId.yz;
|
|
|
-
|
|
|
- // Compute input radiance coordinates
|
|
|
- UVec3 radianceTexelCoordStart;
|
|
|
- unflatten3dArrayIndex(clipmap.m_probeCounts.z, clipmap.m_probeCounts.y, clipmap.m_probeCounts.x, probeIdx, radianceTexelCoordStart.z,
|
|
|
- radianceTexelCoordStart.y, radianceTexelCoordStart.x);
|
|
|
- radianceTexelCoordStart = radianceTexelCoordStart.xzy;
|
|
|
- radianceTexelCoordStart.xy *= RADIANCE_OCTAHEDRON_MAP_SIZE + 2;
|
|
|
- radianceTexelCoordStart.xy += 1;
|
|
|
+ [unroll] for(U32 i = 0; i < 3; ++i)
|
|
|
+ {
|
|
|
+ const F32 fracPart = frac(color[i]);
|
|
|
+ const F32 intPart = color[i] - fracPart;
|
|
|
|
|
|
- // Compute irradiance
|
|
|
- Vec2 octUv = Vec2(irradianceTexel);
|
|
|
- octUv += 0.5;
|
|
|
- octUv /= IRRADIANCE_OCTAHEDRON_MAP_SIZE;
|
|
|
- const Vec3 dir = octahedronDecode(octUv);
|
|
|
+ U64 val = U64(intPart) << U64(32);
|
|
|
+ val |= U64(fracPart * 10000.0);
|
|
|
+ InterlockedAdd(g_irradianceResults[y][x][i], val);
|
|
|
+ }
|
|
|
+}
|
|
|
|
|
|
- const U32 radianceTexelCount = RADIANCE_OCTAHEDRON_MAP_SIZE * RADIANCE_OCTAHEDRON_MAP_SIZE;
|
|
|
- const U32 radiancePixelsPerThread = (radianceTexelCount + kThreadCount - 1) / kThreadCount;
|
|
|
+Vec3 decodeAtomicColor(U32 x, U32 y)
|
|
|
+{
|
|
|
+ Vec3 output;
|
|
|
+ [unroll] for(U32 i = 0; i < 3; ++i)
|
|
|
+ {
|
|
|
+ const U64 val = g_irradianceResults[y][x][i];
|
|
|
|
|
|
- Vec3 irradiance = 0.0;
|
|
|
- for(U32 pixel = svGroupIndex * radiancePixelsPerThread; pixel < min(radianceTexelCount, (svGroupIndex + 1) * radiancePixelsPerThread); ++pixel)
|
|
|
+ output[i] = F32(val >> U64(32));
|
|
|
+ output[i] += F32(val & U64(kMaxU32)) / 10000.0;
|
|
|
+ }
|
|
|
+ return output;
|
|
|
+}
|
|
|
+
|
|
|
+// The group services a single probe. Every thread reads a radiance value and bins it to the appropreate irradiance pixel
|
|
|
+[NumThreads(kThreadCount, 1, 1)] void main(COMPUTE_ARGS)
|
|
|
+{
|
|
|
+ const U32 clipmapIdx = svGroupId.x / g_globalRendererConstants.m_indirectDiffuseClipmaps[0].m_probeCounts[0];
|
|
|
+ const UVec3 probeId = UVec3(svGroupId.x % g_globalRendererConstants.m_indirectDiffuseClipmaps[0].m_probeCounts[0], svGroupId.y, svGroupId.z);
|
|
|
+ const Clipmap clipmap = g_globalRendererConstants.m_indirectDiffuseClipmaps[clipmapIdx];
|
|
|
+
|
|
|
+ // Zero shared memory
|
|
|
+ const U32 irradianceTexelCount = IRRADIANCE_OCTAHEDRON_MAP_SIZE * IRRADIANCE_OCTAHEDRON_MAP_SIZE;
|
|
|
+ const U32 irradiancePixelsPerThread = (irradianceTexelCount + kThreadCount - 1) / kThreadCount;
|
|
|
+ for(U32 pixel = svGroupIndex * irradiancePixelsPerThread; pixel < min(irradianceTexelCount, (svGroupIndex + 1) * irradiancePixelsPerThread);
|
|
|
+ ++pixel)
|
|
|
{
|
|
|
- Vec2 octUv = Vec2(pixel % RADIANCE_OCTAHEDRON_MAP_SIZE, pixel / RADIANCE_OCTAHEDRON_MAP_SIZE);
|
|
|
- octUv += 0.5;
|
|
|
- octUv /= RADIANCE_OCTAHEDRON_MAP_SIZE;
|
|
|
+ const U32 x = pixel % IRRADIANCE_OCTAHEDRON_MAP_SIZE;
|
|
|
+ const U32 y = pixel / IRRADIANCE_OCTAHEDRON_MAP_SIZE;
|
|
|
|
|
|
- const Vec3 sampleDir = octahedronDecode(octUv);
|
|
|
+ g_irradianceResults[y][x] = 0;
|
|
|
+ }
|
|
|
|
|
|
- const F32 lambert = dot(dir, sampleDir);
|
|
|
- if(lambert <= kEpsilonF32)
|
|
|
- {
|
|
|
- continue;
|
|
|
- }
|
|
|
+ GroupMemoryBarrierWithGroupSync();
|
|
|
|
|
|
- UVec3 coord = radianceTexelCoordStart;
|
|
|
- coord.x += pixel % RADIANCE_OCTAHEDRON_MAP_SIZE + 1;
|
|
|
- coord.y += pixel / RADIANCE_OCTAHEDRON_MAP_SIZE + 1;
|
|
|
+ // Iterate the radiance pixels of this thread and bin them to the irradiance texel. Use bilinear filtering to reduce the sample count
|
|
|
+ UVec3 radianceTexelCoordStart = probeId.xzy;
|
|
|
+ radianceTexelCoordStart.xy *= RADIANCE_OCTAHEDRON_MAP_SIZE + 2;
|
|
|
+ radianceTexelCoordStart.xy += 1;
|
|
|
+ const Vec3 radianceVolumeSize = clipmap.m_probeCounts.xzy * Vec3(RADIANCE_OCTAHEDRON_MAP_SIZE + 2, RADIANCE_OCTAHEDRON_MAP_SIZE + 2, 1.0);
|
|
|
+ const Vec3 radianceTexelStartUv = (Vec3(radianceTexelCoordStart) + Vec3(0.0, 0.0, 0.5)) / radianceVolumeSize;
|
|
|
|
|
|
- const Vec3 radiance = TEX(g_radianceVolume, coord).xyz;
|
|
|
+ const U32 halfRadianceOctMapSize = RADIANCE_OCTAHEDRON_MAP_SIZE / 2;
|
|
|
+ const U32 radianceTexelCount = square(halfRadianceOctMapSize);
|
|
|
+ const U32 radiancePixelsPerThread = (radianceTexelCount + kThreadCount - 1) / kThreadCount;
|
|
|
+ for(U32 pixel = svGroupIndex * radiancePixelsPerThread; pixel < min(radianceTexelCount, (svGroupIndex + 1) * radiancePixelsPerThread); ++pixel)
|
|
|
+ {
|
|
|
+ Vec2 radianceOctUv = Vec2(pixel % halfRadianceOctMapSize, pixel / halfRadianceOctMapSize);
|
|
|
+ radianceOctUv += 0.5;
|
|
|
+ radianceOctUv /= halfRadianceOctMapSize;
|
|
|
|
|
|
- const F32 sampleCount = square(F32(RADIANCE_OCTAHEDRON_MAP_SIZE)) / 2.0;
|
|
|
- irradiance += radiance * lambert / sampleCount;
|
|
|
- }
|
|
|
+ const Vec3 sampleDir = octahedronDecode(radianceOctUv);
|
|
|
|
|
|
- g_irradianceResults[svGroupIndex] = irradiance;
|
|
|
+ Vec3 uv = radianceTexelStartUv;
|
|
|
+ uv.xy += radianceOctUv * RADIANCE_OCTAHEDRON_MAP_SIZE / radianceVolumeSize.xy;
|
|
|
|
|
|
- GroupMemoryBarrierWithGroupSync();
|
|
|
+ const Vec3 radiance = g_radianceVolumes[clipmapIdx].SampleLevel(g_linearAnyRepeatSampler, uv, 0.0).xyz;
|
|
|
|
|
|
- [loop] for(U32 s = kThreadCount / 2u; s > 0u; s >>= 1u)
|
|
|
- {
|
|
|
- if(svGroupIndex < s)
|
|
|
+ for(U32 irradiancePixelX = 0; irradiancePixelX < IRRADIANCE_OCTAHEDRON_MAP_SIZE; ++irradiancePixelX)
|
|
|
{
|
|
|
- g_irradianceResults[svGroupIndex] += g_irradianceResults[svGroupIndex + s];
|
|
|
- }
|
|
|
+ for(U32 irradiancePixelY = 0; irradiancePixelY < IRRADIANCE_OCTAHEDRON_MAP_SIZE; ++irradiancePixelY)
|
|
|
+ {
|
|
|
+ Vec2 irradianceOctUv = Vec2(irradiancePixelX, irradiancePixelY);
|
|
|
+ irradianceOctUv += 0.5;
|
|
|
+ irradianceOctUv /= IRRADIANCE_OCTAHEDRON_MAP_SIZE;
|
|
|
+ const Vec3 dir = octahedronDecode(irradianceOctUv);
|
|
|
|
|
|
-# if ANKI_PLATFORM_MOBILE
|
|
|
- if(s > WaveGetLaneCount())
|
|
|
- {
|
|
|
- GroupMemoryBarrierWithGroupSync();
|
|
|
+ const F32 lambert = dot(dir, sampleDir);
|
|
|
+ if(lambert <= kEpsilonF32)
|
|
|
+ {
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+
|
|
|
+ const F32 sampleCount = radianceTexelCount / 2.0;
|
|
|
+ InterlockedAddColor(irradiancePixelX, irradiancePixelY, radiance * lambert / sampleCount);
|
|
|
+ }
|
|
|
}
|
|
|
-# else
|
|
|
- GroupMemoryBarrierWithGroupSync();
|
|
|
-# endif
|
|
|
}
|
|
|
|
|
|
- if(svGroupIndex == 0)
|
|
|
+ GroupMemoryBarrierWithGroupSync();
|
|
|
+
|
|
|
+ // Write the irradiance
|
|
|
+ for(U32 pixel = svGroupIndex * irradiancePixelsPerThread; pixel < min(irradianceTexelCount, (svGroupIndex + 1) * irradiancePixelsPerThread);
|
|
|
+ ++pixel)
|
|
|
{
|
|
|
- irradiance = g_irradianceResults[0] * k2Pi;
|
|
|
+ const U32 x = pixel % IRRADIANCE_OCTAHEDRON_MAP_SIZE;
|
|
|
+ const U32 y = pixel / IRRADIANCE_OCTAHEDRON_MAP_SIZE;
|
|
|
|
|
|
- // Compute the texel coord to write the output
|
|
|
- UVec3 irradianceTexelCoord;
|
|
|
- unflatten3dArrayIndex(clipmap.m_probeCounts.z, clipmap.m_probeCounts.y, clipmap.m_probeCounts.x, probeIdx, irradianceTexelCoord.z,
|
|
|
- irradianceTexelCoord.y, irradianceTexelCoord.x);
|
|
|
- irradianceTexelCoord = irradianceTexelCoord.xzy;
|
|
|
+ UVec3 irradianceTexelCoord = probeId.xzy;
|
|
|
irradianceTexelCoord.xy *= IRRADIANCE_OCTAHEDRON_MAP_SIZE + 2;
|
|
|
irradianceTexelCoord.xy += 1;
|
|
|
- irradianceTexelCoord.x += irradianceTexel.x;
|
|
|
- irradianceTexelCoord.y += irradianceTexel.y;
|
|
|
+ const IVec3 irradianceTexelCoordStart = irradianceTexelCoord;
|
|
|
+ irradianceTexelCoord.x += x;
|
|
|
+ irradianceTexelCoord.y += y;
|
|
|
|
|
|
- TEX(g_irradianceVolume, irradianceTexelCoord).xyz = irradiance;
|
|
|
+ const Vec3 irradiance = decodeAtomicColor(x, y) * k2Pi;
|
|
|
+ TEX(g_irradianceVolumes[clipmapIdx], irradianceTexelCoord).xyz = irradiance;
|
|
|
|
|
|
// Write the borders
|
|
|
- UVec3 volumeTexCoord;
|
|
|
- unflatten3dArrayIndex(clipmap.m_probeCounts.z, clipmap.m_probeCounts.y, clipmap.m_probeCounts.x, probeIdx, volumeTexCoord.z, volumeTexCoord.y,
|
|
|
- volumeTexCoord.x);
|
|
|
- volumeTexCoord = volumeTexCoord.xzy;
|
|
|
-
|
|
|
IVec2 borders[3];
|
|
|
- const IVec2 octCoord = IVec2(irradianceTexel);
|
|
|
+ const IVec2 octCoord = IVec2(x, y);
|
|
|
const U32 borderCount = octahedronBorder(IRRADIANCE_OCTAHEDRON_MAP_SIZE, octCoord, borders);
|
|
|
for(U32 i = 0; i < borderCount; ++i)
|
|
|
{
|
|
|
- IVec3 actualVolumeTexCoord;
|
|
|
- actualVolumeTexCoord.xy = octCoord + volumeTexCoord * (IRRADIANCE_OCTAHEDRON_MAP_SIZE + 2) + 1;
|
|
|
- actualVolumeTexCoord.xy += borders[i];
|
|
|
- actualVolumeTexCoord.z = volumeTexCoord.z;
|
|
|
+ IVec3 actualVolumeTexCoord = irradianceTexelCoordStart;
|
|
|
+ actualVolumeTexCoord.xy += octCoord + borders[i];
|
|
|
|
|
|
- TEX(g_irradianceVolume, actualVolumeTexCoord).xyz = irradiance;
|
|
|
+ TEX(g_irradianceVolumes[clipmapIdx], actualVolumeTexCoord).xyz = irradiance;
|
|
|
}
|
|
|
}
|
|
|
}
|