ClusterBinning.ankiprog 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368
  1. // Copyright (C) 2009-present, Panagiotis Christopoulos Charitos and contributors.
  2. // All rights reserved.
  3. // Code licensed under the BSD License.
  4. // http://www.anki3d.org/LICENSE
  5. // Performs cluster binning. The dispatch's threadcount X is the tileCount*sampleCount/numthreads and the Y is the number of visible objects
  6. #pragma anki mutator OBJECT_TYPE 0 1 2 3 4 // Same as GpuSceneNonRenderableObjectType
  7. #pragma anki technique Setup comp mutators
  8. #pragma anki technique Binning comp
  9. #pragma anki technique PackVisibles comp
  10. #include <AnKi/Shaders/Include/ClusteredShadingTypes.h>
  11. #include <AnKi/Shaders/VisibilityAndCollisionFunctions.hlsl>
  12. // ===========================================================================
  13. // Setup =
  14. // ===========================================================================
  15. #if NOT_ZERO(ANKI_TECHNIQUE_Setup)
  16. StructuredBuffer<U32> g_visibleIndices[(U32)GpuSceneNonRenderableObjectType::kCount] : register(t0);
  17. // This has a size of 2*GpuSceneNonRenderableObjectType::kCount. The first GpuSceneNonRenderableObjectType::kCount elements are for the cluster
  18. // binning dispatches and the rest GpuSceneNonRenderableObjectType::kCount for the packing dispatches
  19. RWStructuredBuffer<DispatchIndirectArgs> g_indirectArgs : register(u0);
  20. struct Constants
  21. {
  22. U32 m_tileCount;
  23. U32 m_padding1;
  24. U32 m_padding2;
  25. U32 m_padding3;
  26. };
  27. ANKI_FAST_CONSTANTS(Constants, g_consts)
  28. constexpr U32 kSampleCount = 8;
  29. constexpr U32 kClusterBinningThreadgroupSize = 64;
  30. constexpr U32 kPackVisiblesThreadgroupSize = 64;
  31. # define THREADGROUP_SIZE 16
  32. [numthreads(THREADGROUP_SIZE, 1, 1)] void main(U32 svDispatchThreadId : SV_DISPATCHTHREADID)
  33. {
  34. if(svDispatchThreadId < (U32)GpuSceneNonRenderableObjectType::kCount)
  35. {
  36. // First threads set the dispatch args of cluster binning
  37. const GpuSceneNonRenderableObjectType type = (GpuSceneNonRenderableObjectType)svDispatchThreadId;
  38. const U32 objCount = min(kMaxVisibleClusteredObjects[(U32)type], g_visibleIndices[NonUniformResourceIndex((U32)type)][0]);
  39. DispatchIndirectArgs args;
  40. args.m_threadGroupCountX = (g_consts.m_tileCount * kSampleCount + kClusterBinningThreadgroupSize - 1) / kClusterBinningThreadgroupSize;
  41. args.m_threadGroupCountY = objCount;
  42. args.m_threadGroupCountZ = 1;
  43. g_indirectArgs[svDispatchThreadId] = args;
  44. }
  45. else if(svDispatchThreadId < (U32)GpuSceneNonRenderableObjectType::kCount * 2)
  46. {
  47. // Next threads set the dispatch args of packing
  48. const GpuSceneNonRenderableObjectType type =
  49. (GpuSceneNonRenderableObjectType)(svDispatchThreadId - (U32)GpuSceneNonRenderableObjectType::kCount);
  50. const U32 objCount = min(kMaxVisibleClusteredObjects[(U32)type], g_visibleIndices[NonUniformResourceIndex((U32)type)][0]);
  51. DispatchIndirectArgs args;
  52. args.m_threadGroupCountX = (objCount + kPackVisiblesThreadgroupSize - 1) / kPackVisiblesThreadgroupSize;
  53. args.m_threadGroupCountY = 1;
  54. args.m_threadGroupCountZ = 1;
  55. g_indirectArgs[svDispatchThreadId] = args;
  56. }
  57. else
  58. {
  59. // Skip remaining threads
  60. }
  61. }
  62. #endif
  63. // ===========================================================================
  64. // Binning =
  65. // ===========================================================================
  66. #if NOT_ZERO(ANKI_TECHNIQUE_Binning)
  67. # if OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_LIGHT
  68. typedef GpuSceneLight GpuSceneType;
  69. # elif OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_DECAL
  70. typedef GpuSceneDecal GpuSceneType;
  71. # elif OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_FOG_DENSITY_VOLUME
  72. typedef GpuSceneFogDensityVolume GpuSceneType;
  73. # elif OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_REFLECTION_PROBE
  74. typedef GpuSceneReflectionProbe GpuSceneType;
  75. # elif OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_GLOBAL_ILLUMINATION_PROBE
  76. typedef GpuSceneGlobalIlluminationProbe GpuSceneType;
  77. # else
  78. # error See file
  79. # endif
  80. struct ClusterBinningConstants
  81. {
  82. Vec3 m_cameraOrigin;
  83. F32 m_zSplitCountOverFrustumLength;
  84. Vec2 m_renderingSize;
  85. U32 m_tileCountX;
  86. U32 m_tileCount;
  87. Vec4 m_nearPlaneWorld;
  88. I32 m_zSplitCountMinusOne;
  89. I32 m_padding0;
  90. I32 m_padding1;
  91. I32 m_padding2;
  92. Mat4 m_invertedViewProjMat;
  93. };
  94. ANKI_FAST_CONSTANTS(ClusterBinningConstants, g_consts)
  95. StructuredBuffer<U32> g_visibleObjectIds : register(t0); // 1st index is the count and then the indices to the g_objects
  96. StructuredBuffer<GpuSceneType> g_objects : register(t1);
  97. RWStructuredBuffer<Cluster> g_clusters : register(u0);
  98. # define THREADGROUP_SIZE 64
  99. // ALMOST like DX Sample locations (https://learn.microsoft.com/en-us/windows/win32/api/d3d11/ne-d3d11-d3d11_standard_multisample_quality_levels)
  100. constexpr U32 kSampleCount = 8u;
  101. # define LOCATION(x, y) UVec2(Vec2(IVec2(x, y) + 8) / 16.0 * F32(kClusteredShadingTileSize))
  102. constexpr UVec2 kSampleLocations[kSampleCount] = {LOCATION(1, -3), LOCATION(-1, 3), LOCATION(5, 1), LOCATION(-6, -6),
  103. LOCATION(-6, 6), LOCATION(-7, -1), LOCATION(6, 7), LOCATION(7, -7)};
  104. # undef LOCATION
  105. [numthreads(THREADGROUP_SIZE, 1, 1)] void main(UVec2 svDispatchThreadId : SV_DISPATCHTHREADID)
  106. {
  107. const U32 dispatchThreadIdX = min(svDispatchThreadId.x, g_consts.m_tileCount * kSampleCount);
  108. const U32 tileIdx = dispatchThreadIdX / kSampleCount;
  109. const U32 sampleIdx = dispatchThreadIdX % kSampleCount;
  110. const U32 visibleObjectIdx = svDispatchThreadId.y;
  111. ANKI_ASSERT(visibleObjectIdx < kMaxVisibleClusteredObjects[OBJECT_TYPE]);
  112. const UVec2 tileXY = UVec2(tileIdx % g_consts.m_tileCountX, tileIdx / g_consts.m_tileCountX);
  113. // This is a pixel in one of the main framebuffers of the renderer, eg the gbuffer's framebuffers
  114. const UVec2 pixel = tileXY * kClusteredShadingTileSize + kSampleLocations[sampleIdx];
  115. const Vec2 uv = Vec2(pixel) / g_consts.m_renderingSize;
  116. const Vec2 ndc = uvToNdc(uv);
  117. // Unproject the sample to world space
  118. const Vec4 farWorldPos4 = mul(g_consts.m_invertedViewProjMat, Vec4(ndc, 1.0, 1.0));
  119. const Vec3 farWorldPos = farWorldPos4.xyz / farWorldPos4.w;
  120. // Create the ray that will test the clusterer objects
  121. const Vec3 rayOrigin = g_consts.m_cameraOrigin;
  122. const Vec3 rayDir = normalize(farWorldPos - rayOrigin);
  123. // Do collision
  124. F32 t0, t1;
  125. Bool collides;
  126. const GpuSceneType obj = g_objects[g_visibleObjectIds[visibleObjectIdx + 1]];
  127. # if OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_LIGHT
  128. if(obj.m_isPointLight)
  129. {
  130. collides = testRaySphere(rayOrigin, rayDir, obj.m_position, obj.m_radius, t0, t1);
  131. }
  132. else
  133. {
  134. // Spot light
  135. t0 = 10000.0;
  136. t1 = -10000.0;
  137. // Iterate all triangles
  138. const U32 indices[6u * 3u] = {0u, 1u, 2u, 0u, 2u, 3u, 0u, 3u, 4u, 0u, 1u, 4u, 1u, 2u, 3u, 3u, 4u, 1u};
  139. const Vec3 edgePoints[5u] = {obj.m_position, obj.m_edgePoints[0].xyz, obj.m_edgePoints[1].xyz, obj.m_edgePoints[2].xyz,
  140. obj.m_edgePoints[3].xyz};
  141. U32 hits = 0u;
  142. U32 idx = 0u;
  143. do
  144. {
  145. const Vec3 v0 = edgePoints[indices[idx + 0u]];
  146. const Vec3 v1 = edgePoints[indices[idx + 1u]];
  147. const Vec3 v2 = edgePoints[indices[idx + 2u]];
  148. F32 t, u, v;
  149. const Bool localCollides = testRayTriangle(rayOrigin, rayDir, v0, v1, v2, false, t, u, v);
  150. if(localCollides)
  151. {
  152. t0 = min(t0, t);
  153. t1 = max(t1, t);
  154. ++hits;
  155. }
  156. idx += 3u;
  157. } while(hits < 2u && idx < 6u * 3u);
  158. if(hits == 1u)
  159. {
  160. t0 = 0.0;
  161. }
  162. collides = (hits != 0u);
  163. }
  164. # elif OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_DECAL
  165. collides = testRaySphere(rayOrigin, rayDir, obj.m_sphereCenter, obj.m_sphereRadius, t0, t1);
  166. # elif OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_FOG_DENSITY_VOLUME
  167. if(obj.m_isBox != 0u)
  168. {
  169. collides = testRayAabb(rayOrigin, rayDir, obj.m_aabbMinOrSphereCenter, obj.m_aabbMaxOrSphereRadius, t0, t1);
  170. }
  171. else
  172. {
  173. collides = testRaySphere(rayOrigin, rayDir, obj.m_aabbMinOrSphereCenter, obj.m_aabbMaxOrSphereRadius.x, t0, t1);
  174. }
  175. # elif OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_REFLECTION_PROBE \
  176. || OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_GLOBAL_ILLUMINATION_PROBE
  177. collides = testRayAabb(rayOrigin, rayDir, obj.m_aabbMin, obj.m_aabbMax, t0, t1);
  178. # else
  179. # error See file
  180. # endif
  181. // Update the masks
  182. if(collides)
  183. {
  184. const U32 mask = 1u << (visibleObjectIdx % 32);
  185. const U32 maskArrayIdx = visibleObjectIdx / 32;
  186. ANKI_MAYBE_UNUSED(maskArrayIdx);
  187. // Set the tile
  188. # if OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_LIGHT
  189. if(obj.m_isPointLight)
  190. {
  191. InterlockedOr(g_clusters[tileIdx].m_pointLightsMask[maskArrayIdx], mask);
  192. }
  193. else
  194. {
  195. InterlockedOr(g_clusters[tileIdx].m_spotLightsMask[maskArrayIdx], mask);
  196. }
  197. # elif OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_DECAL
  198. InterlockedOr(g_clusters[tileIdx].m_decalsMask[maskArrayIdx], mask);
  199. # elif OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_FOG_DENSITY_VOLUME
  200. InterlockedOr(g_clusters[tileIdx].m_fogDensityVolumesMask, mask);
  201. # elif OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_REFLECTION_PROBE
  202. InterlockedOr(g_clusters[tileIdx].m_reflectionProbesMask, mask);
  203. # elif OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_GLOBAL_ILLUMINATION_PROBE
  204. InterlockedOr(g_clusters[tileIdx].m_giProbesMask, mask);
  205. # else
  206. # error See file
  207. # endif
  208. // Compute and set the Z splits
  209. const Vec3 hitpointA = rayDir * t0 + rayOrigin;
  210. const Vec3 hitpointB = rayDir * t1 + rayOrigin;
  211. const F32 distFromNearPlaneA = testPlanePoint(g_consts.m_nearPlaneWorld.xyz, g_consts.m_nearPlaneWorld.w, hitpointA);
  212. const F32 distFromNearPlaneB = testPlanePoint(g_consts.m_nearPlaneWorld.xyz, g_consts.m_nearPlaneWorld.w, hitpointB);
  213. F32 minDistFromNearPlane;
  214. F32 maxDistFromNearPlane;
  215. if(distFromNearPlaneA < distFromNearPlaneB)
  216. {
  217. minDistFromNearPlane = distFromNearPlaneA;
  218. maxDistFromNearPlane = distFromNearPlaneB;
  219. }
  220. else
  221. {
  222. minDistFromNearPlane = distFromNearPlaneB;
  223. maxDistFromNearPlane = distFromNearPlaneA;
  224. }
  225. const I32 startZSplit = max(I32(minDistFromNearPlane * g_consts.m_zSplitCountOverFrustumLength), 0);
  226. const I32 endZSplit = clamp(I32(maxDistFromNearPlane * g_consts.m_zSplitCountOverFrustumLength), 0, g_consts.m_zSplitCountMinusOne);
  227. for(I32 i = startZSplit; i <= endZSplit; ++i)
  228. {
  229. # if OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_LIGHT
  230. if(obj.m_isPointLight)
  231. {
  232. InterlockedOr(g_clusters[g_consts.m_tileCount + i].m_pointLightsMask[maskArrayIdx], mask);
  233. }
  234. else
  235. {
  236. InterlockedOr(g_clusters[g_consts.m_tileCount + i].m_spotLightsMask[maskArrayIdx], mask);
  237. }
  238. # elif OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_DECAL
  239. InterlockedOr(g_clusters[g_consts.m_tileCount + i].m_decalsMask[maskArrayIdx], mask);
  240. # elif OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_FOG_DENSITY_VOLUME
  241. InterlockedOr(g_clusters[g_consts.m_tileCount + i].m_fogDensityVolumesMask, mask);
  242. # elif OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_REFLECTION_PROBE
  243. InterlockedOr(g_clusters[g_consts.m_tileCount + i].m_reflectionProbesMask, mask);
  244. # elif OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_GLOBAL_ILLUMINATION_PROBE
  245. InterlockedOr(g_clusters[g_consts.m_tileCount + i].m_giProbesMask, mask);
  246. # else
  247. # error See file
  248. # endif
  249. }
  250. }
  251. }
  252. #endif
  253. // ===========================================================================
  254. // PackVisibles =
  255. // ===========================================================================
  256. #if NOT_ZERO(ANKI_TECHNIQUE_PackVisibles)
  257. # if OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_LIGHT
  258. typedef LightUnion ClusteredType;
  259. typedef GpuSceneLight GpuSceneType;
  260. # elif OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_DECAL
  261. typedef Decal ClusteredType;
  262. typedef GpuSceneDecal GpuSceneType;
  263. # elif OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_FOG_DENSITY_VOLUME
  264. typedef FogDensityVolume ClusteredType;
  265. typedef GpuSceneFogDensityVolume GpuSceneType;
  266. # elif OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_REFLECTION_PROBE
  267. typedef ReflectionProbe ClusteredType;
  268. typedef GpuSceneReflectionProbe GpuSceneType;
  269. # elif OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_GLOBAL_ILLUMINATION_PROBE
  270. typedef GlobalIlluminationProbe ClusteredType;
  271. typedef GpuSceneGlobalIlluminationProbe GpuSceneType;
  272. # else
  273. # error See file
  274. # endif
  275. StructuredBuffer<GpuSceneType> g_inBuffer : register(t0);
  276. RWStructuredBuffer<ClusteredType> g_outBuffer : register(u0);
  277. StructuredBuffer<U32> g_visibles : register(t1);
  278. # define THREAD_GROUP_SIZE 64
  279. [numthreads(THREAD_GROUP_SIZE, 1, 1)] void main(UVec3 svDispatchThreadId : SV_DISPATCHTHREADID)
  280. {
  281. const U32 visibleObjCount = min(g_visibles[0], kMaxVisibleClusteredObjects[OBJECT_TYPE]);
  282. const U32 idxOut = svDispatchThreadId.x;
  283. if(idxOut >= visibleObjCount)
  284. {
  285. return;
  286. }
  287. # if OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_LIGHT
  288. const GpuSceneLight input = g_inBuffer[g_visibles[idxOut + 1]];
  289. const Bool isPoint = input.m_isPointLight;
  290. LightUnion output = (LightUnion)0;
  291. output.m_position = input.m_position;
  292. output.m_radius = input.m_radius;
  293. output.m_diffuseColor = input.m_diffuseColor;
  294. output.m_lightType = (isPoint) ? 0 : 1;
  295. output.m_shadow = input.m_shadow;
  296. output.m_innerCos = input.m_innerCos;
  297. output.m_outerCos = input.m_outerCos;
  298. output.m_direction = input.m_direction;
  299. output.m_shadowAtlasTileScale = input.m_spotLightMatrixOrPointLightUvViewports[0].z; // Scale should be the same for all
  300. for(U32 i = 0; i < 6; ++i)
  301. {
  302. output.m_spotLightMatrixOrPointLightUvViewports[i] = input.m_spotLightMatrixOrPointLightUvViewports[i];
  303. }
  304. g_outBuffer[idxOut] = output;
  305. # else
  306. g_outBuffer[idxOut] = g_inBuffer[g_visibles[idxOut + 1]];
  307. # endif
  308. }
  309. #endif