// Copyright (C) 2009-present, Panagiotis Christopoulos Charitos and contributors. // All rights reserved. // Code licensed under the BSD License. // http://www.anki3d.org/LICENSE // Compute the irradiance given a light shading result. The irradiance will be stored in an ambient dice. #pragma anki mutator THREDGROUP_SIZE_SQRT 8 16 32 #pragma anki mutator STORE_LOCATION 0 1 // 0: in a 3D texture, 1: In an SSBO #pragma anki mutator SECOND_BOUNCE 0 1 #pragma anki technique comp #include #include #include #define DEBUG_MODE 0 // 0: disable, 1: different color per dice face, 2: different color per cell constexpr U32 kThreadgroupSize = U32(THREDGROUP_SIZE_SQRT) * U32(THREDGROUP_SIZE_SQRT); SamplerState g_nearestAnyClampSampler : register(s0); TextureCube g_lightShadingTexCube : register(t0); #if SECOND_BOUNCE == 1 TextureCube g_gbufferTex[3u] : register(t1); #endif #if STORE_LOCATION == 0 RWTexture3D g_irradianceVolume : register(u0); struct Constants { IVec3 m_volumeTexel; I32 m_nextTexelOffsetInU; }; ANKI_FAST_CONSTANTS(Constants, g_consts) #else struct BufferOut { Vec4 m_val[6u]; }; RWStructuredBuffer g_irradianceDisceResults : register(u0); #endif constexpr U32 kMinWaveSize = 8u; groupshared Vec3 s_integrationResults[6u][kThreadgroupSize / kMinWaveSize]; // In cube coords groupshared U32 s_waveIndexInsideThreadGroup; Vec3 sampleLightShadingTexture(const U32 face, UVec3 svGroupThreadId) { const Vec2 uv = (Vec2(svGroupThreadId.x, svGroupThreadId.y) + 0.5) / F32(THREDGROUP_SIZE_SQRT); const Vec3 cubeUvw = getCubemapDirection(uv, face); return g_lightShadingTexCube.SampleLevel(g_nearestAnyClampSampler, cubeUvw, 0.0).rgb; } [numthreads(THREDGROUP_SIZE_SQRT, THREDGROUP_SIZE_SQRT, 1)] void main(UVec3 svGroupThreadId : SV_GROUPTHREADID, U32 svGroupIndex : SV_GROUPINDEX) { U32 wavesPerThreadGroup; U32 waveIndexInsideThreadGroup; ANKI_COMPUTE_WAVE_INDEX_INSIDE_THREADGROUP(svGroupIndex, s_waveIndexInsideThreadGroup, waveIndexInsideThreadGroup, wavesPerThreadGroup); const F32 threadgroupSizeSqrtf = F32(THREDGROUP_SIZE_SQRT); // Compute the NDC used in cubeCoordSolidAngle const Vec2 faceUv = (Vec2(svGroupThreadId.xy) + 0.5) / threadgroupSizeSqrtf; // Compute result for a pixel Vec3 resultFaces[6u]; for(U32 f = 0u; f < 6u; ++f) { // Get the direction of the dice face const Vec3 diceDir = getCubemapDirection(0.5, f) * Vec3(1.0, 1.0, -1.0); const Vec3 r = getCubemapDirection(faceUv, f) * Vec3(1.0, 1.0, -1.0); // Compute integral part const F32 lambert = max(0.0, dot(r, diceDir)); const Vec3 lightShading = sampleLightShadingTexture(f, svGroupThreadId); const Vec3 irradiance = lightShading * lambert * cubeCoordSolidAngle(uvToNdc(faceUv), threadgroupSizeSqrtf); // Store resultFaces[f] = irradiance; } // Subgroup reduce [unroll] for(U32 f = 0u; f < 6u; ++f) { resultFaces[f] = WaveActiveSum(resultFaces[f]); } [branch] if(WaveIsFirstLane()) { [unroll] for(U32 f = 0u; f < 6u; ++f) { s_integrationResults[f][waveIndexInsideThreadGroup] = resultFaces[f]; } } GroupMemoryBarrierWithGroupSync(); // Worgroup reduce [branch] if(svGroupIndex == 0u) { [unroll] for(U32 f = 0u; f < 6u; ++f) { for(U32 i = 1u; i < wavesPerThreadGroup; ++i) { s_integrationResults[f][0] += s_integrationResults[f][i]; } } } GroupMemoryBarrierWithGroupSync(); #if SECOND_BOUNCE == 1 // Initialize again for the 2nd bounce for(U32 f = 0u; f < 6u; ++f) { // Get the direction of the dice face const Vec3 diceDir = getCubemapDirection(0.5, f) * Vec3(1.0, 1.0, -1.0); const Vec3 r = getCubemapDirection(faceUv, f) * Vec3(1.0, 1.0, -1.0); // Compute integral part const F32 lambert = max(0.0, dot(r, diceDir)); // Read the gbuffer const Vec3 gbufferUv = getCubemapDirection(faceUv, f); GbufferInfo gbuffer = (GbufferInfo)0; unpackGBufferNoVelocity(g_gbufferTex[0u].SampleLevel(g_nearestAnyClampSampler, gbufferUv, 0.0), g_gbufferTex[1u].SampleLevel(g_nearestAnyClampSampler, gbufferUv, 0.0), g_gbufferTex[2u].SampleLevel(g_nearestAnyClampSampler, gbufferUv, 0.0), gbuffer); // Sample irradiance Vec3 firstBounceIrradiance = sampleAmbientDice(s_integrationResults[0][0], s_integrationResults[1][0], s_integrationResults[2][0], s_integrationResults[3][0], s_integrationResults[4][0], s_integrationResults[5][0], gbuffer.m_normal * Vec3(1.0, 1.0, -1.0)); firstBounceIrradiance = gbuffer.m_diffuse * firstBounceIrradiance; // Compute 2nd bounce const Vec3 lightShading = sampleLightShadingTexture(f, svGroupThreadId); const Vec3 irradiance = (firstBounceIrradiance + lightShading * lambert) * cubeCoordSolidAngle(uvToNdc(faceUv), threadgroupSizeSqrtf); // Store resultFaces[f] = irradiance; } // Subgroup reduce [unroll] for(U32 f = 0u; f < 6u; ++f) { resultFaces[f] = WaveActiveSum(resultFaces[f]); } [branch] if(WaveIsFirstLane()) { [unroll] for(U32 f = 0u; f < 6u; ++f) { s_integrationResults[f][waveIndexInsideThreadGroup] = resultFaces[f]; } } GroupMemoryBarrierWithGroupSync(); // Worgroup reduce [branch] if(svGroupIndex == 0u) { [unroll] for(U32 f = 0u; f < 6u; ++f) { for(U32 i = 1u; i < wavesPerThreadGroup; ++i) { s_integrationResults[f][0] += s_integrationResults[f][i]; } } } GroupMemoryBarrierWithGroupSync(); #endif // Store the results if(svGroupIndex < 6u) { const U32 f = svGroupIndex; #if DEBUG_MODE == 0 Vec3 irradiance = s_integrationResults[f][0]; const Vec3 toStoreValue = irradiance; #elif DEBUG_MODE == 1 const Vec3 toStoreValue = colorPerCubeFace(f); #else UVec3 volumeSize; g_irradianceVolume.GetDimensions(volumeSize.x, volumeSize.y, volumeSize.z); const UVec3 subvolumeSize = UVec3(volumeSize.x / 6u, volumeSize.y, volumeSize.z); const U32 cellIdx = g_consts.m_volumeTexel.z * subvolumeSize.x * subvolumeSize.y + g_consts.m_volumeTexel.y * subvolumeSize.x + g_consts.m_volumeTexel.x; const F32 headmapFactor = F32(cellIdx) / F32(subvolumeSize.x * subvolumeSize.y * subvolumeSize.z); const Vec3 toStoreValue = heatmap(headmapFactor); #endif #if STORE_LOCATION == 0 const UVec3 storeUvw = UVec3(g_consts.m_volumeTexel.x + I32(f) * g_consts.m_nextTexelOffsetInU, g_consts.m_volumeTexel.y, g_consts.m_volumeTexel.z); g_irradianceVolume[storeUvw] = Vec4(toStoreValue, 0.0); #else g_irradianceDisceResults[0].m_val[f] = toStoreValue.xyzx; #endif } }