ClusterBinning.ankiprog 9.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338
  1. // Copyright (C) 2009-2021, Panagiotis Christopoulos Charitos and contributors.
  2. // All rights reserved.
  3. // Code licensed under the BSD License.
  4. // http://www.anki3d.org/LICENSE
  5. ANKI_SPECIALIZATION_CONSTANT_U32(TILE_SIZE, 0u);
  6. ANKI_SPECIALIZATION_CONSTANT_U32(TILE_COUNT_X, 1u);
  7. ANKI_SPECIALIZATION_CONSTANT_U32(TILE_COUNT_Y, 2u);
  8. ANKI_SPECIALIZATION_CONSTANT_U32(Z_SPLIT_COUNT, 3u);
  9. ANKI_SPECIALIZATION_CONSTANT_UVEC2(RENDERING_SIZE, 4u);
  10. #pragma anki start comp
  11. #include <AnKi/Shaders/Include/ClusteredShadingTypes.h>
  12. #include <AnKi/Shaders/Common.glsl>
  13. #include <AnKi/Shaders/CollisionFunctions.glsl>
  14. const U32 WORKGROUP_SIZE = 64u;
  15. layout(local_size_x = WORKGROUP_SIZE) in;
  16. layout(set = 0, binding = 0, scalar) uniform b_unis
  17. {
  18. ClusteredShadingUniforms u_unis;
  19. };
  20. layout(set = 0, binding = 1, scalar) writeonly buffer b_clusters
  21. {
  22. Cluster u_clusters[];
  23. };
  24. layout(set = 0, binding = 2, scalar) uniform b_pointLights
  25. {
  26. PointLight u_pointLights[MAX_VISIBLE_POINT_LIGHTS];
  27. };
  28. layout(set = 0, binding = 3, scalar) uniform b_spotLights
  29. {
  30. SpotLightBinning u_spotLights[MAX_VISIBLE_SPOT_LIGHTS];
  31. };
  32. layout(set = 0, binding = 4, scalar) uniform b_reflectionProbes
  33. {
  34. ReflectionProbe u_reflectionProbes[MAX_VISIBLE_REFLECTION_PROBES];
  35. };
  36. layout(set = 0, binding = 5, scalar) uniform b_giProbes
  37. {
  38. GlobalIlluminationProbe u_giProbes[MAX_VISIBLE_GLOBAL_ILLUMINATION_PROBES];
  39. };
  40. layout(set = 0, binding = 6, scalar) uniform b_fogVolumes
  41. {
  42. FogDensityVolume u_fogVolumes[MAX_VISIBLE_FOG_DENSITY_VOLUMES];
  43. };
  44. layout(set = 0, binding = 7, scalar) uniform b_decals
  45. {
  46. Decal u_decals[MAX_VISIBLE_DECALS];
  47. };
  48. const U32 TILE_COUNT = TILE_COUNT_X * TILE_COUNT_Y;
  49. // DX Sample locations
  50. const U32 SAMPLE_COUNT = 4u;
  51. #define LOCATION(x, y) UVec2(Vec2(IVec2(x, y) + 8) / 16.0 * F32(TILE_SIZE))
  52. UVec2 SAMPLE_LOCATIONS[SAMPLE_COUNT] = UVec2[](LOCATION(-2, -6), LOCATION(6, -2), LOCATION(-6, 2), LOCATION(2, 6));
  53. #undef LOCATION
  54. // A mask per tile of this workgroup for the clusterer object being processed by this workgroup
  55. const U32 TILES_PER_WORKGROUP = WORKGROUP_SIZE / SAMPLE_COUNT;
  56. shared ExtendedClusterObjectMask s_tileMasks[TILES_PER_WORKGROUP];
  57. // A mask for each Z split for a specific clusterer object
  58. shared ExtendedClusterObjectMask s_zSplitMasks[Z_SPLIT_COUNT];
  59. Bool isPointLight()
  60. {
  61. return gl_GlobalInvocationID.y < u_unis.m_objectCountsUpTo[CLUSTER_OBJECT_TYPE_POINT_LIGHT];
  62. }
  63. Bool isSpotLight()
  64. {
  65. return gl_GlobalInvocationID.y < u_unis.m_objectCountsUpTo[CLUSTER_OBJECT_TYPE_SPOT_LIGHT];
  66. }
  67. Bool isDecal()
  68. {
  69. return gl_GlobalInvocationID.y < u_unis.m_objectCountsUpTo[CLUSTER_OBJECT_TYPE_DECAL];
  70. }
  71. Bool isFogVolume()
  72. {
  73. return gl_GlobalInvocationID.y < u_unis.m_objectCountsUpTo[CLUSTER_OBJECT_TYPE_FOG_DENSITY_VOLUME];
  74. }
  75. Bool isReflectionProbe()
  76. {
  77. return gl_GlobalInvocationID.y < u_unis.m_objectCountsUpTo[CLUSTER_OBJECT_TYPE_REFLECTION_PROBE];
  78. }
  79. Bool isGiProbe()
  80. {
  81. return gl_GlobalInvocationID.y < u_unis.m_objectCountsUpTo[CLUSTER_OBJECT_TYPE_GLOBAL_ILLUMINATION_PROBE];
  82. }
  83. void main()
  84. {
  85. const U32 globalInvocationIDX = min(gl_GlobalInvocationID.x, TILE_COUNT * SAMPLE_COUNT);
  86. const U32 tileIdx = globalInvocationIDX / SAMPLE_COUNT;
  87. const U32 sampleIdx = globalInvocationIDX % SAMPLE_COUNT;
  88. const U32 localTileIdx = gl_LocalInvocationIndex / SAMPLE_COUNT;
  89. const U32 clustererObjectIdx = gl_GlobalInvocationID.y;
  90. const UVec2 tileXY = UVec2(tileIdx % TILE_COUNT_X, tileIdx / TILE_COUNT_X);
  91. // This is a pixel in one of the main framebuffers of the renderer, eg the gbuffer's framebuffers
  92. const UVec2 pixel = tileXY * TILE_SIZE + SAMPLE_LOCATIONS[sampleIdx];
  93. const Vec2 uv = Vec2(pixel) / Vec2(RENDERING_SIZE);
  94. const Vec2 ndc = UV_TO_NDC(uv);
  95. // Unproject the sample in view space
  96. const Vec4 farWorldPos4 = u_unis.m_matrices.m_invertedViewProjection * Vec4(ndc, 1.0, 1.0);
  97. const Vec3 farWorldPos = farWorldPos4.xyz / farWorldPos4.w;
  98. // Create the ray that will test the clusterer objects
  99. const Vec3 rayOrigin = u_unis.m_cameraPosition;
  100. const Vec3 rayDir = normalize(farWorldPos - rayOrigin);
  101. // Zero shared memory
  102. s_tileMasks[localTileIdx] = ExtendedClusterObjectMask(0);
  103. const U32 splitsPerInvocation = max(1u, Z_SPLIT_COUNT / WORKGROUP_SIZE);
  104. for(U32 i = gl_LocalInvocationIndex * splitsPerInvocation;
  105. i < (gl_LocalInvocationIndex + 1u) * splitsPerInvocation && i < Z_SPLIT_COUNT; ++i)
  106. {
  107. s_zSplitMasks[i] = ExtendedClusterObjectMask(0);
  108. }
  109. memoryBarrierShared();
  110. barrier();
  111. // Do collision
  112. F32 t0, t1;
  113. U32 objectArrayIdx;
  114. Bool collides;
  115. // Point light
  116. if(isPointLight())
  117. {
  118. objectArrayIdx = clustererObjectIdx;
  119. const PointLight light = u_pointLights[objectArrayIdx];
  120. collides = testRaySphere(rayOrigin, rayDir, light.m_position, light.m_radius, t0, t1);
  121. }
  122. // Spot light
  123. else if(isSpotLight())
  124. {
  125. objectArrayIdx = clustererObjectIdx - u_unis.m_objectCountsUpTo[CLUSTER_OBJECT_TYPE_SPOT_LIGHT - 1u];
  126. const SpotLightBinning light = u_spotLights[objectArrayIdx];
  127. t0 = 10000.0;
  128. t1 = -10000.0;
  129. // Iterate all triangles
  130. const U32 indices[6u * 3u] = U32[](0u, 1u, 2u, 0u, 2u, 3u, 0u, 3u, 4u, 0u, 1u, 4u, 1u, 2u, 3u, 3u, 4u, 1u);
  131. U32 hits = 0u;
  132. U32 idx = 0u;
  133. do
  134. {
  135. const Vec3 v0 = light.m_edgePoints[indices[idx + 0u]];
  136. const Vec3 v1 = light.m_edgePoints[indices[idx + 1u]];
  137. const Vec3 v2 = light.m_edgePoints[indices[idx + 2u]];
  138. F32 t, u, v;
  139. const Bool localCollides = testRayTriangle(rayOrigin, rayDir, v0, v1, v2, false, t, u, v);
  140. if(localCollides)
  141. {
  142. t0 = min(t0, t);
  143. t1 = max(t1, t);
  144. ++hits;
  145. }
  146. idx += 3u;
  147. } while(hits < 2u && idx < 6u * 3u);
  148. if(hits == 1u)
  149. {
  150. t0 = 0.0;
  151. }
  152. collides = (hits != 0u);
  153. }
  154. // Decal
  155. else if(isDecal())
  156. {
  157. objectArrayIdx = clustererObjectIdx - u_unis.m_objectCountsUpTo[CLUSTER_OBJECT_TYPE_DECAL - 1u];
  158. const Decal decal = u_decals[objectArrayIdx];
  159. collides = testRayObb(rayOrigin, rayDir, decal.m_obbExtend, decal.m_invertedTransform, t0, t1);
  160. }
  161. // Fog volume
  162. else if(isFogVolume())
  163. {
  164. objectArrayIdx = clustererObjectIdx - u_unis.m_objectCountsUpTo[CLUSTER_OBJECT_TYPE_FOG_DENSITY_VOLUME - 1u];
  165. const FogDensityVolume vol = u_fogVolumes[objectArrayIdx];
  166. if(vol.m_isBox != 0u)
  167. {
  168. collides =
  169. testRayAabb(rayOrigin, rayDir, vol.m_aabbMinOrSphereCenter, vol.m_aabbMaxOrSphereRadiusSquared, t0, t1);
  170. }
  171. else
  172. {
  173. collides = testRaySphere(rayOrigin, rayDir, vol.m_aabbMinOrSphereCenter,
  174. sqrt(vol.m_aabbMaxOrSphereRadiusSquared.x), t0, t1);
  175. }
  176. }
  177. // Reflection probe
  178. else if(isReflectionProbe())
  179. {
  180. objectArrayIdx = clustererObjectIdx - u_unis.m_objectCountsUpTo[CLUSTER_OBJECT_TYPE_REFLECTION_PROBE - 1u];
  181. const ReflectionProbe probe = u_reflectionProbes[objectArrayIdx];
  182. collides = testRayAabb(rayOrigin, rayDir, probe.m_aabbMin, probe.m_aabbMax, t0, t1);
  183. }
  184. // GI probe
  185. else
  186. {
  187. objectArrayIdx =
  188. clustererObjectIdx - u_unis.m_objectCountsUpTo[CLUSTER_OBJECT_TYPE_GLOBAL_ILLUMINATION_PROBE - 1u];
  189. const GlobalIlluminationProbe probe = u_giProbes[objectArrayIdx];
  190. collides = testRayAabb(rayOrigin, rayDir, probe.m_aabbMin, probe.m_aabbMax, t0, t1);
  191. }
  192. // Update the masks
  193. if(collides)
  194. {
  195. // Set the tile
  196. const ExtendedClusterObjectMask mask = ExtendedClusterObjectMask(1)
  197. << ExtendedClusterObjectMask(objectArrayIdx);
  198. atomicOr(s_tileMasks[localTileIdx], mask);
  199. // Compute and set the Z splits
  200. const Vec3 hitpointA = rayDir * t0 + rayOrigin;
  201. const Vec3 hitpointB = rayDir * t1 + rayOrigin;
  202. const F32 distFromNearPlaneA =
  203. testPlanePoint(u_unis.m_nearPlaneWSpace.xyz, u_unis.m_nearPlaneWSpace.w, hitpointA);
  204. const F32 distFromNearPlaneB =
  205. testPlanePoint(u_unis.m_nearPlaneWSpace.xyz, u_unis.m_nearPlaneWSpace.w, hitpointB);
  206. F32 minDistFromNearPlane;
  207. F32 maxDistFromNearPlane;
  208. if(distFromNearPlaneA < distFromNearPlaneB)
  209. {
  210. minDistFromNearPlane = distFromNearPlaneA;
  211. maxDistFromNearPlane = distFromNearPlaneB;
  212. }
  213. else
  214. {
  215. minDistFromNearPlane = distFromNearPlaneB;
  216. maxDistFromNearPlane = distFromNearPlaneA;
  217. }
  218. const I32 startZSplit = max(I32(minDistFromNearPlane * u_unis.m_zSplitCountOverFrustumLength), 0);
  219. const I32 endZSplit =
  220. clamp(I32(maxDistFromNearPlane * u_unis.m_zSplitCountOverFrustumLength), 0, I32(Z_SPLIT_COUNT) - 1);
  221. for(I32 i = startZSplit; i <= endZSplit; ++i)
  222. {
  223. atomicOr(s_zSplitMasks[i], mask);
  224. }
  225. }
  226. // Sync
  227. memoryBarrierShared();
  228. barrier();
  229. // First sample writes the tile
  230. if(sampleIdx == 0u && s_tileMasks[localTileIdx] != ExtendedClusterObjectMask(0))
  231. {
  232. if(isPointLight())
  233. {
  234. atomicOr(u_clusters[tileIdx].m_pointLightsMask, s_tileMasks[localTileIdx]);
  235. }
  236. else if(isSpotLight())
  237. {
  238. atomicOr(u_clusters[tileIdx].m_spotLightsMask, s_tileMasks[localTileIdx]);
  239. }
  240. else if(isDecal())
  241. {
  242. atomicOr(u_clusters[tileIdx].m_decalsMask, s_tileMasks[localTileIdx]);
  243. }
  244. else if(isFogVolume())
  245. {
  246. atomicOr(u_clusters[tileIdx].m_fogDensityVolumesMask, U32(s_tileMasks[localTileIdx]));
  247. }
  248. else if(isReflectionProbe())
  249. {
  250. atomicOr(u_clusters[tileIdx].m_reflectionProbesMask, U32(s_tileMasks[localTileIdx]));
  251. }
  252. else
  253. {
  254. atomicOr(u_clusters[tileIdx].m_giProbesMask, U32(s_tileMasks[localTileIdx]));
  255. }
  256. }
  257. // All invocations write at least one Z split
  258. for(U32 i = gl_LocalInvocationIndex * splitsPerInvocation;
  259. i < (gl_LocalInvocationIndex + 1u) * splitsPerInvocation && i < Z_SPLIT_COUNT; ++i)
  260. {
  261. if(s_zSplitMasks[i] != ExtendedClusterObjectMask(0))
  262. {
  263. if(isPointLight())
  264. {
  265. atomicOr(u_clusters[TILE_COUNT + i].m_pointLightsMask, s_zSplitMasks[i]);
  266. }
  267. else if(isSpotLight())
  268. {
  269. atomicOr(u_clusters[TILE_COUNT + i].m_spotLightsMask, s_zSplitMasks[i]);
  270. }
  271. else if(isDecal())
  272. {
  273. atomicOr(u_clusters[TILE_COUNT + i].m_decalsMask, s_zSplitMasks[i]);
  274. }
  275. else if(isFogVolume())
  276. {
  277. atomicOr(u_clusters[TILE_COUNT + i].m_fogDensityVolumesMask, U32(s_zSplitMasks[i]));
  278. }
  279. else if(isReflectionProbe())
  280. {
  281. atomicOr(u_clusters[TILE_COUNT + i].m_reflectionProbesMask, U32(s_zSplitMasks[i]));
  282. }
  283. else
  284. {
  285. atomicOr(u_clusters[TILE_COUNT + i].m_giProbesMask, U32(s_zSplitMasks[i]));
  286. }
  287. }
  288. }
  289. }
  290. #pragma anki end