IrradianceDice.ankiprog 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211
  1. // Copyright (C) 2009-present, Panagiotis Christopoulos Charitos and contributors.
  2. // All rights reserved.
  3. // Code licensed under the BSD License.
  4. // http://www.anki3d.org/LICENSE
  5. // Compute the irradiance given a light shading result. The irradiance will be stored in an ambient dice.
  6. #pragma anki mutator THREDGROUP_SIZE_SQRT 8 16 32
  7. #pragma anki mutator STORE_LOCATION 0 1 // 0: in a 3D texture, 1: In an SSBO
  8. #pragma anki mutator SECOND_BOUNCE 0 1
  9. #pragma anki technique comp
  10. #include <AnKi/Shaders/Functions.hlsl>
  11. #include <AnKi/Shaders/PackFunctions.hlsl>
  12. #include <AnKi/Shaders/LightFunctions.hlsl>
  13. #define DEBUG_MODE 0 // 0: disable, 1: different color per dice face, 2: different color per cell
  14. constexpr U32 kThreadgroupSize = U32(THREDGROUP_SIZE_SQRT) * U32(THREDGROUP_SIZE_SQRT);
  15. SamplerState g_nearestAnyClampSampler : register(s0);
  16. TextureCube<Vec4> g_lightShadingTexCube : register(t0);
  17. #if SECOND_BOUNCE == 1
  18. TextureCube<Vec4> g_gbufferTex[3u] : register(t1);
  19. #endif
  20. #if STORE_LOCATION == 0
  21. RWTexture3D<Vec4> g_irradianceVolume : register(u0);
  22. struct Constants
  23. {
  24. IVec3 m_volumeTexel;
  25. I32 m_nextTexelOffsetInU;
  26. };
  27. ANKI_FAST_CONSTANTS(Constants, g_consts)
  28. #else
  29. struct BufferOut
  30. {
  31. Vec4 m_val[6u];
  32. };
  33. RWStructuredBuffer<BufferOut> g_irradianceDisceResults : register(u0);
  34. #endif
  35. constexpr U32 kMinWaveSize = 8u;
  36. groupshared Vec3 s_integrationResults[6u][kThreadgroupSize / kMinWaveSize]; // In cube coords
  37. groupshared U32 s_waveIndexInsideThreadGroup;
  38. Vec3 sampleLightShadingTexture(const U32 face, UVec3 svGroupThreadId)
  39. {
  40. const Vec2 uv = (Vec2(svGroupThreadId.x, svGroupThreadId.y) + 0.5) / F32(THREDGROUP_SIZE_SQRT);
  41. const Vec3 cubeUvw = getCubemapDirection(uv, face);
  42. return g_lightShadingTexCube.SampleLevel(g_nearestAnyClampSampler, cubeUvw, 0.0).rgb;
  43. }
  44. [numthreads(THREDGROUP_SIZE_SQRT, THREDGROUP_SIZE_SQRT, 1)] void main(UVec3 svGroupThreadId : SV_GROUPTHREADID, U32 svGroupIndex : SV_GROUPINDEX)
  45. {
  46. U32 wavesPerThreadGroup;
  47. U32 waveIndexInsideThreadGroup;
  48. ANKI_COMPUTE_WAVE_INDEX_INSIDE_THREADGROUP(svGroupIndex, s_waveIndexInsideThreadGroup, waveIndexInsideThreadGroup, wavesPerThreadGroup);
  49. const F32 threadgroupSizeSqrtf = F32(THREDGROUP_SIZE_SQRT);
  50. // Compute the NDC used in cubeCoordSolidAngle
  51. const Vec2 faceUv = (Vec2(svGroupThreadId.xy) + 0.5) / threadgroupSizeSqrtf;
  52. // Compute result for a pixel
  53. Vec3 resultFaces[6u];
  54. for(U32 f = 0u; f < 6u; ++f)
  55. {
  56. // Get the direction of the dice face
  57. const Vec3 diceDir = getCubemapDirection(0.5, f) * Vec3(1.0, 1.0, -1.0);
  58. const Vec3 r = getCubemapDirection(faceUv, f) * Vec3(1.0, 1.0, -1.0);
  59. // Compute integral part
  60. const F32 lambert = max(0.0, dot(r, diceDir));
  61. const Vec3 lightShading = sampleLightShadingTexture(f, svGroupThreadId);
  62. const Vec3 irradiance = lightShading * lambert * cubeCoordSolidAngle(uvToNdc(faceUv), threadgroupSizeSqrtf);
  63. // Store
  64. resultFaces[f] = irradiance;
  65. }
  66. // Subgroup reduce
  67. [unroll] for(U32 f = 0u; f < 6u; ++f)
  68. {
  69. resultFaces[f] = WaveActiveSum(resultFaces[f]);
  70. }
  71. [branch] if(WaveIsFirstLane())
  72. {
  73. [unroll] for(U32 f = 0u; f < 6u; ++f)
  74. {
  75. s_integrationResults[f][waveIndexInsideThreadGroup] = resultFaces[f];
  76. }
  77. }
  78. GroupMemoryBarrierWithGroupSync();
  79. // Worgroup reduce
  80. [branch] if(svGroupIndex == 0u)
  81. {
  82. [unroll] for(U32 f = 0u; f < 6u; ++f)
  83. {
  84. for(U32 i = 1u; i < wavesPerThreadGroup; ++i)
  85. {
  86. s_integrationResults[f][0] += s_integrationResults[f][i];
  87. }
  88. }
  89. }
  90. GroupMemoryBarrierWithGroupSync();
  91. #if SECOND_BOUNCE == 1
  92. // Initialize again for the 2nd bounce
  93. for(U32 f = 0u; f < 6u; ++f)
  94. {
  95. // Get the direction of the dice face
  96. const Vec3 diceDir = getCubemapDirection(0.5, f) * Vec3(1.0, 1.0, -1.0);
  97. const Vec3 r = getCubemapDirection(faceUv, f) * Vec3(1.0, 1.0, -1.0);
  98. // Compute integral part
  99. const F32 lambert = max(0.0, dot(r, diceDir));
  100. // Read the gbuffer
  101. const Vec3 gbufferUv = getCubemapDirection(faceUv, f);
  102. GbufferInfo<F32> gbuffer = (GbufferInfo<F32>)0;
  103. unpackGBufferNoVelocity(g_gbufferTex[0u].SampleLevel(g_nearestAnyClampSampler, gbufferUv, 0.0),
  104. g_gbufferTex[1u].SampleLevel(g_nearestAnyClampSampler, gbufferUv, 0.0),
  105. g_gbufferTex[2u].SampleLevel(g_nearestAnyClampSampler, gbufferUv, 0.0), gbuffer);
  106. // Sample irradiance
  107. Vec3 firstBounceIrradiance =
  108. sampleAmbientDice<F32>(s_integrationResults[0][0], s_integrationResults[1][0], s_integrationResults[2][0], s_integrationResults[3][0],
  109. s_integrationResults[4][0], s_integrationResults[5][0], gbuffer.m_normal * Vec3(1.0, 1.0, -1.0));
  110. firstBounceIrradiance = gbuffer.m_diffuse * firstBounceIrradiance;
  111. // Compute 2nd bounce
  112. const Vec3 lightShading = sampleLightShadingTexture(f, svGroupThreadId);
  113. const Vec3 irradiance = (firstBounceIrradiance + lightShading * lambert) * cubeCoordSolidAngle(uvToNdc(faceUv), threadgroupSizeSqrtf);
  114. // Store
  115. resultFaces[f] = irradiance;
  116. }
  117. // Subgroup reduce
  118. [unroll] for(U32 f = 0u; f < 6u; ++f)
  119. {
  120. resultFaces[f] = WaveActiveSum(resultFaces[f]);
  121. }
  122. [branch] if(WaveIsFirstLane())
  123. {
  124. [unroll] for(U32 f = 0u; f < 6u; ++f)
  125. {
  126. s_integrationResults[f][waveIndexInsideThreadGroup] = resultFaces[f];
  127. }
  128. }
  129. GroupMemoryBarrierWithGroupSync();
  130. // Worgroup reduce
  131. [branch] if(svGroupIndex == 0u)
  132. {
  133. [unroll] for(U32 f = 0u; f < 6u; ++f)
  134. {
  135. for(U32 i = 1u; i < wavesPerThreadGroup; ++i)
  136. {
  137. s_integrationResults[f][0] += s_integrationResults[f][i];
  138. }
  139. }
  140. }
  141. GroupMemoryBarrierWithGroupSync();
  142. #endif
  143. // Store the results
  144. if(svGroupIndex < 6u)
  145. {
  146. const U32 f = svGroupIndex;
  147. #if DEBUG_MODE == 0
  148. Vec3 irradiance = s_integrationResults[f][0];
  149. const Vec3 toStoreValue = irradiance;
  150. #elif DEBUG_MODE == 1
  151. const Vec3 toStoreValue = colorPerCubeFace(f);
  152. #else
  153. UVec3 volumeSize;
  154. g_irradianceVolume.GetDimensions(volumeSize.x, volumeSize.y, volumeSize.z);
  155. const UVec3 subvolumeSize = UVec3(volumeSize.x / 6u, volumeSize.y, volumeSize.z);
  156. const U32 cellIdx =
  157. g_consts.m_volumeTexel.z * subvolumeSize.x * subvolumeSize.y + g_consts.m_volumeTexel.y * subvolumeSize.x + g_consts.m_volumeTexel.x;
  158. const F32 headmapFactor = F32(cellIdx) / F32(subvolumeSize.x * subvolumeSize.y * subvolumeSize.z);
  159. const Vec3 toStoreValue = heatmap(headmapFactor);
  160. #endif
  161. #if STORE_LOCATION == 0
  162. const UVec3 storeUvw =
  163. UVec3(g_consts.m_volumeTexel.x + I32(f) * g_consts.m_nextTexelOffsetInU, g_consts.m_volumeTexel.y, g_consts.m_volumeTexel.z);
  164. g_irradianceVolume[storeUvw] = Vec4(toStoreValue, 0.0);
  165. #else
  166. g_irradianceDisceResults[0].m_val[f] = toStoreValue.xyzx;
  167. #endif
  168. }
  169. }