ClusterBinning.ankiprog 9.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341
  1. // Copyright (C) 2009-2021, Panagiotis Christopoulos Charitos and contributors.
  2. // All rights reserved.
  3. // Code licensed under the BSD License.
  4. // http://www.anki3d.org/LICENSE
  5. ANKI_SPECIALIZATION_CONSTANT_U32(TILE_SIZE, 0u);
  6. ANKI_SPECIALIZATION_CONSTANT_U32(TILE_COUNT_X, 1u);
  7. ANKI_SPECIALIZATION_CONSTANT_U32(TILE_COUNT_Y, 2u);
  8. ANKI_SPECIALIZATION_CONSTANT_U32(Z_SPLIT_COUNT, 3u);
  9. ANKI_SPECIALIZATION_CONSTANT_UVEC2(RENDERING_SIZE, 4u);
  10. #pragma anki start comp
  11. #include <AnKi/Shaders/Include/ClusteredShadingTypes.h>
  12. #include <AnKi/Shaders/Common.glsl>
  13. #include <AnKi/Shaders/CollisionFunctions.glsl>
  14. const U32 WORKGROUP_SIZE = 64u;
  15. layout(local_size_x = WORKGROUP_SIZE) in;
  16. layout(set = 0, binding = 0, scalar) uniform b_unis
  17. {
  18. ClusteredShadingUniforms u_unis;
  19. };
  20. layout(set = 0, binding = 1, scalar) writeonly buffer b_clusters
  21. {
  22. Cluster u_clusters[];
  23. };
  24. layout(set = 0, binding = 2, scalar) uniform b_pointLights
  25. {
  26. PointLight u_pointLights[MAX_VISIBLE_POINT_LIGHTS];
  27. };
  28. layout(set = 0, binding = 3, scalar) uniform b_spotLights
  29. {
  30. SpotLightBinning u_spotLights[MAX_VISIBLE_SPOT_LIGHTS];
  31. };
  32. layout(set = 0, binding = 4, scalar) uniform b_reflectionProbes
  33. {
  34. ReflectionProbe u_reflectionProbes[MAX_VISIBLE_REFLECTION_PROBES];
  35. };
  36. layout(set = 0, binding = 5, scalar) uniform b_giProbes
  37. {
  38. GlobalIlluminationProbe u_giProbes[MAX_VISIBLE_GLOBAL_ILLUMINATION_PROBES];
  39. };
  40. layout(set = 0, binding = 6, scalar) uniform b_fogVolumes
  41. {
  42. FogDensityVolume u_fogVolumes[MAX_VISIBLE_FOG_DENSITY_VOLUMES];
  43. };
  44. layout(set = 0, binding = 7, scalar) uniform b_decals
  45. {
  46. Decal u_decals[MAX_VISIBLE_DECALS];
  47. };
  48. const U32 TILE_COUNT = TILE_COUNT_X * TILE_COUNT_Y;
  49. // DX Sample locations
  50. const U32 SAMPLE_COUNT = 4u;
  51. #define LOCATION(x, y) UVec2(Vec2(IVec2(x, y) + 8) / 16.0 * F32(TILE_SIZE))
  52. UVec2 SAMPLE_LOCATIONS[SAMPLE_COUNT] = UVec2[](LOCATION(-2, -6), LOCATION(6, -2), LOCATION(-6, 2), LOCATION(2, 6));
  53. #undef LOCATION
  54. // A mask per tile of this workgroup for the clusterer object being processed by this workgroup
  55. const U32 TILES_PER_WORKGROUP = WORKGROUP_SIZE / SAMPLE_COUNT;
  56. shared U64 s_tileMasks[TILES_PER_WORKGROUP];
  57. // A mask for each Z split for a specific clusterer object
  58. shared U64 s_zSplitMasks[Z_SPLIT_COUNT];
  59. Bool isPointLight()
  60. {
  61. return gl_GlobalInvocationID.y < u_unis.m_objectCountsUpTo[CLUSTER_OBJECT_TYPE_POINT_LIGHT];
  62. }
  63. Bool isSpotLight()
  64. {
  65. return gl_GlobalInvocationID.y < u_unis.m_objectCountsUpTo[CLUSTER_OBJECT_TYPE_SPOT_LIGHT];
  66. }
  67. Bool isDecal()
  68. {
  69. return gl_GlobalInvocationID.y < u_unis.m_objectCountsUpTo[CLUSTER_OBJECT_TYPE_DECAL];
  70. }
  71. Bool isFogVolume()
  72. {
  73. return gl_GlobalInvocationID.y < u_unis.m_objectCountsUpTo[CLUSTER_OBJECT_TYPE_FOG_DENSITY_VOLUME];
  74. }
  75. Bool isReflectionProbe()
  76. {
  77. return gl_GlobalInvocationID.y < u_unis.m_objectCountsUpTo[CLUSTER_OBJECT_TYPE_REFLECTION_PROBE];
  78. }
  79. Bool isGiProbe()
  80. {
  81. return gl_GlobalInvocationID.y < u_unis.m_objectCountsUpTo[CLUSTER_OBJECT_TYPE_GLOBAL_ILLUMINATION_PROBE];
  82. }
  83. void main()
  84. {
  85. const U32 tileIdx = gl_GlobalInvocationID.x / SAMPLE_COUNT;
  86. const U32 localTileIdx = gl_LocalInvocationIndex / SAMPLE_COUNT;
  87. const U32 sampleIdx = gl_GlobalInvocationID.x % SAMPLE_COUNT;
  88. const U32 clustererObjectIdx = gl_GlobalInvocationID.y;
  89. if(tileIdx >= TILE_COUNT)
  90. {
  91. // Early exit
  92. return;
  93. }
  94. const UVec2 tileXY = UVec2(tileIdx % TILE_COUNT_X, tileIdx / TILE_COUNT_X);
  95. // This is a pixel in one of the main framebuffers of the renderer, eg the gbuffer's framebuffers
  96. const UVec2 pixel = tileXY * TILE_SIZE + SAMPLE_LOCATIONS[sampleIdx];
  97. const Vec2 uv = Vec2(pixel) / Vec2(RENDERING_SIZE);
  98. const Vec2 ndc = UV_TO_NDC(uv);
  99. // Unproject the sample in view space
  100. const Vec4 farWorldPos4 = u_unis.m_matrices.m_invertedViewProjection * Vec4(ndc, 1.0, 1.0);
  101. const Vec3 farWorldPos = farWorldPos4.xyz / farWorldPos4.w;
  102. // Create the ray that will test the clusterer objects
  103. const Vec3 rayOrigin = u_unis.m_cameraPosition;
  104. const Vec3 rayDir = normalize(farWorldPos - rayOrigin);
  105. // Zero shared memory
  106. s_tileMasks[localTileIdx] = 0ul;
  107. const U32 splitsPerInvocation = max(1u, Z_SPLIT_COUNT / WORKGROUP_SIZE);
  108. for(U32 i = gl_LocalInvocationIndex * splitsPerInvocation;
  109. i < (gl_LocalInvocationIndex + 1u) * splitsPerInvocation && i < Z_SPLIT_COUNT; ++i)
  110. {
  111. s_zSplitMasks[i] = 0ul;
  112. }
  113. memoryBarrierShared();
  114. barrier();
  115. // Do collision
  116. F32 t0, t1;
  117. U32 objectArrayIdx;
  118. Bool collides;
  119. // Point light
  120. if(isPointLight())
  121. {
  122. objectArrayIdx = clustererObjectIdx;
  123. const PointLight light = u_pointLights[objectArrayIdx];
  124. collides = testRaySphere(rayOrigin, rayDir, light.m_position, light.m_radius, t0, t1);
  125. }
  126. // Spot light
  127. else if(isSpotLight())
  128. {
  129. objectArrayIdx = clustererObjectIdx - u_unis.m_objectCountsUpTo[CLUSTER_OBJECT_TYPE_SPOT_LIGHT - 1u];
  130. const SpotLightBinning light = u_spotLights[objectArrayIdx];
  131. t0 = 10000.0;
  132. t1 = -10000.0;
  133. // Iterate all triangles
  134. const U32 indices[6u * 3u] = U32[](0u, 1u, 2u, 0u, 2u, 3u, 0u, 3u, 4u, 0u, 1u, 4u, 1u, 2u, 3u, 3u, 4u, 1u);
  135. U32 hits = 0u;
  136. U32 idx = 0u;
  137. do
  138. {
  139. const Vec3 v0 = light.m_edgePoints[indices[idx + 0u]];
  140. const Vec3 v1 = light.m_edgePoints[indices[idx + 1u]];
  141. const Vec3 v2 = light.m_edgePoints[indices[idx + 2u]];
  142. F32 t, u, v;
  143. const Bool localCollides = testRayTriangle(rayOrigin, rayDir, v0, v1, v2, false, t, u, v);
  144. if(localCollides)
  145. {
  146. t0 = min(t0, t);
  147. t1 = max(t1, t);
  148. ++hits;
  149. }
  150. idx += 3u;
  151. } while(hits < 2u && idx < 6u * 3u);
  152. if(hits == 1u)
  153. {
  154. t0 = 0.0;
  155. }
  156. collides = (hits != 0u);
  157. }
  158. // Decal
  159. else if(isDecal())
  160. {
  161. objectArrayIdx = clustererObjectIdx - u_unis.m_objectCountsUpTo[CLUSTER_OBJECT_TYPE_DECAL - 1u];
  162. const Decal decal = u_decals[objectArrayIdx];
  163. collides = testRayObb(rayOrigin, rayDir, decal.m_obbExtend, decal.m_invertedTransform, t0, t1);
  164. }
  165. // Fog volume
  166. else if(isFogVolume())
  167. {
  168. objectArrayIdx = clustererObjectIdx - u_unis.m_objectCountsUpTo[CLUSTER_OBJECT_TYPE_FOG_DENSITY_VOLUME - 1u];
  169. const FogDensityVolume vol = u_fogVolumes[objectArrayIdx];
  170. if(vol.m_isBox != 0u)
  171. {
  172. collides =
  173. testRayAabb(rayOrigin, rayDir, vol.m_aabbMinOrSphereCenter, vol.m_aabbMaxOrSphereRadiusSquared, t0, t1);
  174. }
  175. else
  176. {
  177. collides = testRaySphere(rayOrigin, rayDir, vol.m_aabbMinOrSphereCenter,
  178. sqrt(vol.m_aabbMaxOrSphereRadiusSquared.x), t0, t1);
  179. }
  180. }
  181. // Reflection probe
  182. else if(isReflectionProbe())
  183. {
  184. objectArrayIdx = clustererObjectIdx - u_unis.m_objectCountsUpTo[CLUSTER_OBJECT_TYPE_REFLECTION_PROBE - 1u];
  185. const ReflectionProbe probe = u_reflectionProbes[objectArrayIdx];
  186. collides = testRayAabb(rayOrigin, rayDir, probe.m_aabbMin, probe.m_aabbMax, t0, t1);
  187. }
  188. // GI probe
  189. else
  190. {
  191. objectArrayIdx =
  192. clustererObjectIdx - u_unis.m_objectCountsUpTo[CLUSTER_OBJECT_TYPE_GLOBAL_ILLUMINATION_PROBE - 1u];
  193. const GlobalIlluminationProbe probe = u_giProbes[objectArrayIdx];
  194. collides = testRayAabb(rayOrigin, rayDir, probe.m_aabbMin, probe.m_aabbMax, t0, t1);
  195. }
  196. // Update the masks
  197. if(collides)
  198. {
  199. // Set the tile
  200. const U64 mask = 1ul << U64(objectArrayIdx);
  201. atomicOr(s_tileMasks[localTileIdx], mask);
  202. // Compute and set the Z splits
  203. const Vec3 hitpointA = rayDir * t0 + rayOrigin;
  204. const Vec3 hitpointB = rayDir * t1 + rayOrigin;
  205. const F32 distFromNearPlaneA =
  206. testPlanePoint(u_unis.m_nearPlaneWSpace.xyz, u_unis.m_nearPlaneWSpace.w, hitpointA);
  207. const F32 distFromNearPlaneB =
  208. testPlanePoint(u_unis.m_nearPlaneWSpace.xyz, u_unis.m_nearPlaneWSpace.w, hitpointB);
  209. F32 minDistFromNearPlane;
  210. F32 maxDistFromNearPlane;
  211. if(distFromNearPlaneA < distFromNearPlaneB)
  212. {
  213. minDistFromNearPlane = distFromNearPlaneA;
  214. maxDistFromNearPlane = distFromNearPlaneB;
  215. }
  216. else
  217. {
  218. minDistFromNearPlane = distFromNearPlaneB;
  219. maxDistFromNearPlane = distFromNearPlaneA;
  220. }
  221. const I32 startZSplit = max(I32(minDistFromNearPlane * u_unis.m_zSplitCountOverFrustumLength), 0);
  222. const I32 endZSplit =
  223. clamp(I32(maxDistFromNearPlane * u_unis.m_zSplitCountOverFrustumLength), 0, I32(Z_SPLIT_COUNT) - 1);
  224. for(I32 i = startZSplit; i <= endZSplit; ++i)
  225. {
  226. atomicOr(s_zSplitMasks[i], mask);
  227. }
  228. }
  229. // Sync
  230. memoryBarrierShared();
  231. barrier();
  232. // First sample writes the tile
  233. if(sampleIdx == 0u && s_tileMasks[localTileIdx] != 0ul)
  234. {
  235. if(isPointLight())
  236. {
  237. atomicOr(u_clusters[tileIdx].m_pointLightsMask, s_tileMasks[localTileIdx]);
  238. }
  239. else if(isSpotLight())
  240. {
  241. atomicOr(u_clusters[tileIdx].m_spotLightsMask, s_tileMasks[localTileIdx]);
  242. }
  243. else if(isDecal())
  244. {
  245. atomicOr(u_clusters[tileIdx].m_decalsMask, s_tileMasks[localTileIdx]);
  246. }
  247. else if(isFogVolume())
  248. {
  249. atomicOr(u_clusters[tileIdx].m_fogDensityVolumesMask, U32(s_tileMasks[localTileIdx]));
  250. }
  251. else if(isReflectionProbe())
  252. {
  253. atomicOr(u_clusters[tileIdx].m_reflectionProbesMask, U32(s_tileMasks[localTileIdx]));
  254. }
  255. else
  256. {
  257. atomicOr(u_clusters[tileIdx].m_giProbesMask, U32(s_tileMasks[localTileIdx]));
  258. }
  259. }
  260. // All invocations write at least one Z split
  261. for(U32 i = gl_LocalInvocationIndex * splitsPerInvocation;
  262. i < (gl_LocalInvocationIndex + 1u) * splitsPerInvocation && i < Z_SPLIT_COUNT; ++i)
  263. {
  264. if(s_zSplitMasks[i] != 0ul)
  265. {
  266. if(isPointLight())
  267. {
  268. atomicOr(u_clusters[TILE_COUNT + i].m_pointLightsMask, s_zSplitMasks[i]);
  269. }
  270. else if(isSpotLight())
  271. {
  272. atomicOr(u_clusters[TILE_COUNT + i].m_spotLightsMask, s_zSplitMasks[i]);
  273. }
  274. else if(isDecal())
  275. {
  276. atomicOr(u_clusters[TILE_COUNT + i].m_decalsMask, s_zSplitMasks[i]);
  277. }
  278. else if(isFogVolume())
  279. {
  280. atomicOr(u_clusters[TILE_COUNT + i].m_fogDensityVolumesMask, U32(s_zSplitMasks[i]));
  281. }
  282. else if(isReflectionProbe())
  283. {
  284. atomicOr(u_clusters[TILE_COUNT + i].m_reflectionProbesMask, U32(s_zSplitMasks[i]));
  285. }
  286. else
  287. {
  288. atomicOr(u_clusters[TILE_COUNT + i].m_giProbesMask, U32(s_zSplitMasks[i]));
  289. }
  290. }
  291. }
  292. }
  293. #pragma anki end