ClusteredShadingFunctions.hlsl 8.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275
  1. // Copyright (C) 2009-present, Panagiotis Christopoulos Charitos and contributors.
  2. // All rights reserved.
  3. // Code licensed under the BSD License.
  4. // http://www.anki3d.org/LICENSE
  5. #pragma once
  6. #include <AnKi/Shaders/LightFunctions.hlsl>
  7. // Debugging function
  8. Vec3 clusterHeatmap(Cluster cluster, U32 objectTypeMask, U32 maxObjectOverride = 0)
  9. {
  10. U32 maxObjects = 0u;
  11. I32 count = 0;
  12. if((objectTypeMask & (1u << (U32)GpuSceneNonRenderableObjectType::kLight)) != 0u)
  13. {
  14. maxObjects += kMaxVisibleLights;
  15. for(U32 i = 0; i < kMaxVisibleLights / 32; ++i)
  16. {
  17. count += I32(countbits(cluster.m_pointLightsMask[i] | cluster.m_spotLightsMask[i]));
  18. }
  19. }
  20. if((objectTypeMask & (1u << (U32)GpuSceneNonRenderableObjectType::kDecal)) != 0u)
  21. {
  22. maxObjects += kMaxVisibleDecals;
  23. for(U32 i = 0; i < kMaxVisibleDecals / 32; ++i)
  24. {
  25. count += I32(countbits(cluster.m_decalsMask[i]));
  26. }
  27. }
  28. if((objectTypeMask & (1u << (U32)GpuSceneNonRenderableObjectType::kFogDensityVolume)) != 0u)
  29. {
  30. maxObjects += kMaxVisibleFogDensityVolumes;
  31. count += countbits(cluster.m_fogDensityVolumesMask);
  32. }
  33. if((objectTypeMask & (1u << (U32)GpuSceneNonRenderableObjectType::kReflectionProbe)) != 0u)
  34. {
  35. maxObjects += kMaxVisibleReflectionProbes;
  36. count += countbits(cluster.m_reflectionProbesMask);
  37. }
  38. if((objectTypeMask & (1u << (U32)GpuSceneNonRenderableObjectType::kGlobalIlluminationProbe)) != 0u)
  39. {
  40. maxObjects += kMaxVisibleGlobalIlluminationProbes;
  41. count += countbits(cluster.m_giProbesMask);
  42. }
  43. const F32 factor = min(1.0, F32(count) / F32((maxObjectOverride > 0) ? maxObjectOverride : maxObjects));
  44. return heatmap(factor);
  45. }
  46. /// Returns the index of the zSplit or linearizeDepth(n, f, depth)*zSplitCount
  47. /// Simplifying this equation is 1/(a+b/depth) where a=(n-f)/(n*zSplitCount) and b=f/(n*zSplitCount)
  48. U32 computeZSplitClusterIndex(F32 depth, U32 zSplitCount, F32 a, F32 b)
  49. {
  50. const F32 fSplitIdx = 1.0 / (a + b / depth);
  51. return min(zSplitCount - 1u, (U32)fSplitIdx);
  52. }
  53. /// Return the tile index.
  54. U32 computeTileClusterIndexFragCoord(Vec2 fragCoord, U32 tileCountX)
  55. {
  56. const UVec2 tileXY = UVec2(fragCoord / F32(kClusteredShadingTileSize));
  57. return tileXY.y * tileCountX + tileXY.x;
  58. }
  59. /// Merge the tiles with z splits into a single cluster.
  60. template<Bool kDynamicallyUniform = false>
  61. Cluster mergeClusters(Cluster tileCluster, Cluster zCluster)
  62. {
  63. Cluster outCluster;
  64. if(kDynamicallyUniform)
  65. {
  66. [unroll] for(U32 i = 0; i < kMaxVisibleLights / 32; ++i)
  67. {
  68. outCluster.m_pointLightsMask[i] = WaveActiveBitOr(tileCluster.m_pointLightsMask[i] & zCluster.m_pointLightsMask[i]);
  69. outCluster.m_spotLightsMask[i] = WaveActiveBitOr(tileCluster.m_spotLightsMask[i] & zCluster.m_spotLightsMask[i]);
  70. }
  71. [unroll] for(U32 i = 0; i < kMaxVisibleDecals / 32; ++i)
  72. {
  73. outCluster.m_decalsMask[i] = WaveActiveBitOr(tileCluster.m_decalsMask[i] & zCluster.m_decalsMask[i]);
  74. }
  75. outCluster.m_fogDensityVolumesMask = WaveActiveBitOr(tileCluster.m_fogDensityVolumesMask & zCluster.m_fogDensityVolumesMask);
  76. outCluster.m_reflectionProbesMask = WaveActiveBitOr(tileCluster.m_reflectionProbesMask & zCluster.m_reflectionProbesMask);
  77. outCluster.m_giProbesMask = WaveActiveBitOr(tileCluster.m_giProbesMask & zCluster.m_giProbesMask);
  78. }
  79. else
  80. {
  81. [unroll] for(U32 i = 0; i < kMaxVisibleLights / 32; ++i)
  82. {
  83. outCluster.m_pointLightsMask[i] = (tileCluster.m_pointLightsMask[i] & zCluster.m_pointLightsMask[i]);
  84. outCluster.m_spotLightsMask[i] = (tileCluster.m_spotLightsMask[i] & zCluster.m_spotLightsMask[i]);
  85. }
  86. [unroll] for(U32 i = 0; i < kMaxVisibleDecals / 32; ++i)
  87. {
  88. outCluster.m_decalsMask[i] = (tileCluster.m_decalsMask[i] & zCluster.m_decalsMask[i]);
  89. }
  90. outCluster.m_fogDensityVolumesMask = (tileCluster.m_fogDensityVolumesMask & zCluster.m_fogDensityVolumesMask);
  91. outCluster.m_reflectionProbesMask = (tileCluster.m_reflectionProbesMask & zCluster.m_reflectionProbesMask);
  92. outCluster.m_giProbesMask = (tileCluster.m_giProbesMask & zCluster.m_giProbesMask);
  93. }
  94. return outCluster;
  95. }
  96. /// Get the final cluster after ORing and ANDing the masks.
  97. template<Bool kDynamicallyUniform = false>
  98. Cluster getClusterFragCoord(StructuredBuffer<Cluster> clusters, GlobalRendererConstants consts, Vec3 fragCoord)
  99. {
  100. const Cluster tileCluster = clusters[computeTileClusterIndexFragCoord(fragCoord.xy, consts.m_tileCounts.x)];
  101. const Cluster zCluster = clusters[computeZSplitClusterIndex(fragCoord.z, consts.m_zSplitCount, consts.m_zSplitMagic.x, consts.m_zSplitMagic.y)
  102. + consts.m_tileCounts.x * consts.m_tileCounts.y];
  103. return mergeClusters<kDynamicallyUniform>(tileCluster, zCluster);
  104. }
  105. U32 iteratePointLights(inout Cluster cluster)
  106. {
  107. for(U32 block = 0; block < kMaxVisibleLights / 32; ++block)
  108. {
  109. if(cluster.m_pointLightsMask[block] != 0)
  110. {
  111. const U32 idx = (U32)firstbitlow2(cluster.m_pointLightsMask[block]);
  112. cluster.m_pointLightsMask[block] ^= 1u << idx;
  113. return idx + block * 32;
  114. }
  115. }
  116. return kMaxU32;
  117. }
  118. U32 iterateSpotLights(inout Cluster cluster)
  119. {
  120. for(U32 block = 0; block < kMaxVisibleLights / 32; ++block)
  121. {
  122. if(cluster.m_spotLightsMask[block] != 0)
  123. {
  124. const U32 idx = (U32)firstbitlow2(cluster.m_spotLightsMask[block]);
  125. cluster.m_spotLightsMask[block] ^= 1u << idx;
  126. return idx + block * 32;
  127. }
  128. }
  129. return kMaxU32;
  130. }
  131. U32 iterateDecals(inout Cluster cluster)
  132. {
  133. for(U32 block = 0; block < kMaxVisibleDecals / 32; ++block)
  134. {
  135. if(cluster.m_decalsMask[block] != 0)
  136. {
  137. const U32 idx = (U32)firstbitlow2(cluster.m_decalsMask[block]);
  138. cluster.m_decalsMask[block] ^= 1u << idx;
  139. return idx + block * 32;
  140. }
  141. }
  142. return kMaxU32;
  143. }
  144. template<typename T>
  145. vector<T, 3> sampleReflectionProbes(Cluster cluster, StructuredBuffer<ReflectionProbe> probes, Vec3 reflDir, Vec3 worldPos, T reflTexLod,
  146. SamplerState trilinearClampSampler)
  147. {
  148. const U32 probeCount = countbits(cluster.m_reflectionProbesMask);
  149. vector<T, 3> probeColor;
  150. if(probeCount == 0)
  151. {
  152. probeColor = -1.0;
  153. }
  154. else if(WaveActiveAllTrue(probeCount == 1))
  155. {
  156. // Only one probe, do a fast path without blending probes
  157. const ReflectionProbe probe = probes[firstbitlow2(cluster.m_reflectionProbesMask)];
  158. // Sample
  159. Vec3 cubeUv = intersectProbe(worldPos, reflDir, probe.m_aabbMin, probe.m_aabbMax, probe.m_position);
  160. cubeUv.z = -cubeUv.z;
  161. probeColor = getBindlessTextureCubeVec4(probe.m_cubeTexture).SampleLevel(trilinearClampSampler, cubeUv, reflTexLod).rgb;
  162. }
  163. else
  164. {
  165. // More than one probes, do a slow path that blends them together
  166. probeColor = 0.0;
  167. T totalBlendWeight = 0.001;
  168. // Loop probes
  169. [loop] while(cluster.m_reflectionProbesMask != 0u)
  170. {
  171. const U32 idx = U32(firstbitlow2(cluster.m_reflectionProbesMask));
  172. cluster.m_reflectionProbesMask &= ~(1u << idx);
  173. const ReflectionProbe probe = probes[idx];
  174. // Compute blend weight
  175. const T blendWeight = computeProbeBlendWeight(worldPos, probe.m_aabbMin, probe.m_aabbMax, 0.2);
  176. totalBlendWeight += blendWeight;
  177. // Sample reflections
  178. Vec3 cubeUv = intersectProbe(worldPos, reflDir, probe.m_aabbMin, probe.m_aabbMax, probe.m_position);
  179. cubeUv.z = -cubeUv.z;
  180. const vector<T, 3> c =
  181. getBindlessTextureNonUniformIndexCubeVec4(probe.m_cubeTexture).SampleLevel(trilinearClampSampler, cubeUv, reflTexLod).rgb;
  182. probeColor += c * blendWeight;
  183. }
  184. // Normalize the colors
  185. probeColor /= totalBlendWeight;
  186. }
  187. return probeColor;
  188. }
  189. template<typename T>
  190. vector<T, 3> sampleGiProbes(Cluster cluster, StructuredBuffer<GlobalIlluminationProbe> probes, Vec3 normal, Vec3 worldPos,
  191. SamplerState trilinearClampSampler)
  192. {
  193. vector<T, 3> probeColor;
  194. const U32 probeCount = countbits(cluster.m_giProbesMask);
  195. if(probeCount == 0)
  196. {
  197. probeColor = 0.0;
  198. }
  199. else if(WaveActiveAllTrue(probeCount == 1))
  200. {
  201. // All subgroups point to the same probe and there is only one probe, do a fast path without blend weight
  202. const GlobalIlluminationProbe probe = probes[firstbitlow2(cluster.m_giProbesMask)];
  203. // Sample
  204. probeColor = sampleGlobalIllumination<T>(worldPos, normal, probe, getBindlessTexture3DVec4(probe.m_volumeTexture), trilinearClampSampler);
  205. }
  206. else
  207. {
  208. // More than one probes, do a slow path that blends them together
  209. probeColor = 0.0;
  210. T totalBlendWeight = 0.001;
  211. // Loop probes
  212. [loop] while(cluster.m_giProbesMask != 0u)
  213. {
  214. const U32 idx = U32(firstbitlow2(cluster.m_giProbesMask));
  215. cluster.m_giProbesMask &= ~(1u << idx);
  216. const GlobalIlluminationProbe probe = probes[idx];
  217. // Compute blend weight
  218. const F32 blendWeight = computeProbeBlendWeight(worldPos, probe.m_aabbMin, probe.m_aabbMax, probe.m_fadeDistance);
  219. totalBlendWeight += blendWeight;
  220. // Sample
  221. const vector<T, 3> c = sampleGlobalIllumination<T>(worldPos, normal, probe,
  222. getBindlessTextureNonUniformIndex3DVec4(probe.m_volumeTexture), trilinearClampSampler);
  223. probeColor += c * blendWeight;
  224. }
  225. // Normalize
  226. probeColor /= totalBlendWeight;
  227. }
  228. return probeColor;
  229. }