LightCulling.azsl 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630
  1. /*
  2. * Copyright (c) Contributors to the Open 3D Engine Project.
  3. * For complete copyright and license terms please see the LICENSE at the root of this distribution.
  4. *
  5. * SPDX-License-Identifier: Apache-2.0 OR MIT
  6. *
  7. */
  8. #include <scenesrg_all.srgi>
  9. #include <viewsrg_all.srgi>
  10. // Perform light culling on a compute shader
  11. #include <Atom/RPI/Math.azsli>
  12. #include <Atom/Features/LightCulling/LightCullingShared.azsli>
  13. #include <Atom/Features/PBR/Lights/LightStructures.azsli>
  14. #include <Atom/Features/Decals/DecalStructures.azsli>
  15. ShaderResourceGroup PassSrg : SRG_PerPass
  16. {
  17. struct LightCullingConstants
  18. {
  19. float2 m_gridPixel;
  20. float2 m_gridHalfPixel;
  21. uint m_gridWidth;
  22. uint m_padding0;
  23. uint m_padding1;
  24. uint m_padding2;
  25. };
  26. LightCullingConstants m_constantData;
  27. // Source light data
  28. StructuredBuffer<SimplePointLight> m_simplePointLights;
  29. StructuredBuffer<SimpleSpotLight> m_simpleSpotLights;
  30. StructuredBuffer<PointLight> m_pointLights;
  31. StructuredBuffer<DiskLight> m_diskLights;
  32. StructuredBuffer<CapsuleLight> m_capsuleLights;
  33. StructuredBuffer<QuadLight> m_quadLights;
  34. uint m_simplePointLightCount;
  35. uint m_simpleSpotLightCount;
  36. uint m_pointLightCount;
  37. uint m_diskLightCount;
  38. uint m_capsuleLightCount;
  39. uint m_quadLightCount;
  40. // Produced by the LightCullingTilePrepare pass. Contains depth min/max and mask data (a bit set for each location where opaque geo was found)
  41. Texture2D<uint4> m_tileLightData;
  42. // Destination light data
  43. RWStructuredBuffer<uint> m_lightList;
  44. RWTexture2D<uint> m_lightCount;
  45. StructuredBuffer<Decal> m_decals;
  46. uint m_decalCount;
  47. }
  48. groupshared uint shared_lightCount;
  49. groupshared uint shared_lightIndices[TILE_DIM_X * TILE_DIM_Y];
  50. bool IsVectorPointingTowardsEye(const float3 dir)
  51. {
  52. return (dir.z * RH_COORD_SYSTEM_REVERSE) < 0;
  53. }
  54. float3 WorldToView_Point(float3 p)
  55. {
  56. float3 result = mul(ViewSrg::m_viewMatrix, float4(p, 1.0)).xyz;
  57. return result;
  58. }
  59. float3 WorldToView_Vector(float3 v)
  60. {
  61. float3 result = mul((float3x3)ViewSrg::m_viewMatrix, v);
  62. return result;
  63. }
  64. bool TestSphereVsAabbInvSqrt(float3 sphereCenter, float invSphereRadiusSq, float3 aabbCenter, float3 aabbHalfSize)
  65. {
  66. float3 delta = max(float3(0.0, 0.0, 0.0), abs(aabbCenter - sphereCenter) - aabbHalfSize);
  67. float d2 = dot(delta, delta);
  68. return d2 * invSphereRadiusSq < 1.0f;
  69. }
  70. bool TestSphereVsAabb(float3 sphereCenter, float sphereRadiusSq, float3 aabbCenter, float3 aabbHalfSize)
  71. {
  72. float3 delta = max(float3(0.0, 0.0, 0.0), abs(aabbCenter - sphereCenter) - aabbHalfSize);
  73. float d2 = dot(delta, delta);
  74. return d2 < sphereRadiusSq;
  75. }
  76. // Note that this function isn't precise. It will have false positives due to being simplified for speed.
  77. // Function origin and description: https://bartwronski.com/2017/04/13/cull-that-cone/
  78. bool TestSphereVsCone(float3 spherePos, float sphereRadius, float3 origin, float3 forward, float cosa, float size)
  79. {
  80. float3 V = spherePos - origin;
  81. float V1len = dot(V, forward);
  82. bool backOk = V1len >= -sphereRadius;
  83. bool frontOk = V1len <= sphereRadius + size;
  84. float rsina = rsqrt(1 - cosa * cosa);
  85. float VlenSq = dot(V, V);
  86. float distanceClosestPoint = rsina * cosa * sqrt(max(0.0, VlenSq - V1len * V1len)) - V1len;
  87. bool angleOk = distanceClosestPoint <= sphereRadius* rsina;
  88. return angleOk && backOk && frontOk;
  89. }
  90. uint NextPowerTwo(uint x)
  91. {
  92. // https://wickedengine.net/2018/01/05/next-power-of-two-in-hlsl/
  93. return 2u << firstbithigh(max(1, x) - 1);
  94. }
  95. uint GetSortKey(Decal decal)
  96. {
  97. return (decal.m_sortKeyPacked & 0xFF);
  98. }
  99. bool AreDecalsOutofOrder(uint packedIndexLeft, uint packedIndexRight)
  100. {
  101. uint leftIndex = Light_GetIndex(packedIndexLeft);
  102. uint rightIndex = Light_GetIndex(packedIndexRight);
  103. Decal leftDecal = PassSrg::m_decals[leftIndex];
  104. Decal rightDecal = PassSrg::m_decals[rightIndex];
  105. uint leftSortIndex = GetSortKey(leftDecal);
  106. uint rightSortIndex = GetSortKey(rightDecal);
  107. if (leftSortIndex == rightSortIndex)
  108. {
  109. return leftIndex > rightIndex;
  110. }
  111. else
  112. {
  113. return leftSortIndex > rightSortIndex;
  114. }
  115. }
  116. void SortDecals(uint groupIndex)
  117. {
  118. // Note that shared_lightCount can exceed the array size if too many decals intersect the tile, so clamp it here.
  119. uint numArray = min(TILE_DIM_X * TILE_DIM_Y, shared_lightCount);
  120. uint numArrayPowerOfTwo = NextPowerTwo(numArray);
  121. // Bitonic sort code from AMD: https://github.com/GPUOpen-LibrariesAndSDKs/GPUParticles11
  122. // AMD / MIT License is contained in the same directory as this file
  123. // subArraySize = 2,4,8,16,etc...
  124. for (uint subArraySize = 2; subArraySize <= numArrayPowerOfTwo; subArraySize = subArraySize * 2)
  125. {
  126. // compareDist = (subArraySize / 2), (subArraySize / 4), ... 32, 16, 8, 4, 2, 1
  127. for (uint compareDist = subArraySize >> 1; compareDist > 0; compareDist = compareDist >> 1)
  128. {
  129. // This code from AMD very cleverly computes the locations of two different array indices to compare.
  130. // The pattern that is produced from this is identical to: https://en.wikipedia.org/wiki/Bitonic_sorter#Alternative_representation
  131. // Essentially creating larger monotonic sequences and then merging them together. (Two monotonic sequences together is a bitonic)
  132. uint index_low = groupIndex & (compareDist - 1);
  133. uint index_high = 2 * (groupIndex - index_low);
  134. uint index0 = index_high + index_low;
  135. uint index1 = compareDist == subArraySize >> 1 ? index_high + (2 * compareDist - 1) - index_low : index_high + compareDist + index_low;
  136. if (index0 < numArray && index1 < numArray)
  137. {
  138. bool areDecalsOutOrder = AreDecalsOutofOrder(shared_lightIndices[index0], shared_lightIndices[index1]);
  139. if (areDecalsOutOrder)
  140. {
  141. uint uTemp = shared_lightIndices[index0];
  142. shared_lightIndices[index0] = shared_lightIndices[index1];
  143. shared_lightIndices[index1] = uTemp;
  144. }
  145. }
  146. GroupMemoryBarrierWithGroupSync();
  147. }
  148. }
  149. }
  150. void MarkLightAsVisibleInSharedMemory(uint lightIndex, uint inside)
  151. {
  152. uint sharedLightIndex;
  153. InterlockedAdd(shared_lightCount, 1, sharedLightIndex);
  154. sharedLightIndex = min(sharedLightIndex, NVLC_MAX_POSSIBLE_LIGHTS_PER_BIN - 1);
  155. shared_lightIndices[sharedLightIndex] = PackLightIndexWithBinMask(lightIndex, inside);
  156. }
  157. void CopySharedLightsToMainMemory(uint lightCount, uint groupIndex, uint3 groupID)
  158. {
  159. if( groupIndex < shared_lightCount )
  160. {
  161. uint offset = min(lightCount + groupIndex, NVLC_MAX_POSSIBLE_LIGHTS_PER_BIN - 1);
  162. uint index = GetLightListIndex(groupID, PassSrg::m_constantData.m_gridWidth, offset);
  163. PassSrg::m_lightList[index] = shared_lightIndices[groupIndex];
  164. }
  165. }
  166. // Return the minz and maxz of this light in view space
  167. float2 ComputePointLightMinMaxZ(float lightRadius, float3 lightPosition)
  168. {
  169. float2 minmax = lightPosition.z + lightRadius * float2(-1,1) * RH_COORD_SYSTEM_REVERSE;
  170. return minmax;
  171. }
  172. float2 ComputeSimpleSpotLightMinMax(SimpleSpotLight light, float3 lightPosition)
  173. {
  174. float lightRadius = rsqrt(light.m_invAttenuationRadiusSquared);
  175. float2 minmax = lightPosition.z + lightRadius * float2(-1, 1) * RH_COORD_SYSTEM_REVERSE;
  176. return minmax;
  177. }
  178. // Return the minz and maxz of this quad light in view space
  179. // Quad light must be double sided
  180. float2 ComputeQuadLightMinMaxZ_DoubleSided(QuadLight light, float3 lightPosition)
  181. {
  182. const float lightRadius = rsqrt(light.m_invAttenuationRadiusSquared);
  183. const float2 minmax = lightPosition.z + lightRadius * float2(-1,1) * RH_COORD_SYSTEM_REVERSE;
  184. return minmax;
  185. }
  186. // Return the minz and maxz of this quad light in view space
  187. // Quad light must be single sided
  188. float2 ComputeQuadLightMinMaxZ_SingleSided(QuadLight light, float3 lightPosition, float3 lightDirection)
  189. {
  190. // [GFX TODO][ATOM-6170] We can compute a tighter bounds with single sided lights by bringing in one of bounds
  191. return ComputeQuadLightMinMaxZ_DoubleSided(light, lightPosition);
  192. }
  193. float2 ComputeDiskLightMinMax(DiskLight light, float3 lightPosition)
  194. {
  195. float lightRadius = rsqrt(light.m_invAttenuationRadiusSquared) + light.m_bulbPositionOffset;
  196. float2 minmax = lightPosition.z + lightRadius * float2(-1, 1) * RH_COORD_SYSTEM_REVERSE;
  197. return minmax;
  198. }
  199. float2 ComputeCapsuleLightMinMax(CapsuleLight light, float3 lightPosition, float lightFalloffRadius)
  200. {
  201. float offsetZ = abs(WorldToView_Vector(light.m_direction).z * light.m_length * 0.5f) + lightFalloffRadius;
  202. float nearZ = lightPosition.z - offsetZ * RH_COORD_SYSTEM_REVERSE;
  203. float farZ = lightPosition.z + offsetZ * RH_COORD_SYSTEM_REVERSE;
  204. return float2(nearZ, farZ);
  205. }
  206. void CullDecals(uint groupIndex, TileLightData tileLightData, float3 aabb_center, float3 aabb_extents, float2 tile_center_uv)
  207. {
  208. for (uint decalIndex = groupIndex ; decalIndex < PassSrg::m_decalCount ; decalIndex += TILE_DIM_X * TILE_DIM_Y)
  209. {
  210. Decal decal = PassSrg::m_decals[decalIndex];
  211. float3 decalPosition = WorldToView_Point(decal.m_position);
  212. // just wrapping a bounding sphere around a cube for now to get a minor perf boost. i.e. the sphere radius is sqrt(x*x + y*y + z*z)
  213. // ATOM-4224 - try AABB-AABB
  214. float boundingSphereRadiusSqr = dot(decal.m_halfSize, decal.m_halfSize);
  215. bool potentiallyIntersects = TestSphereVsAabb(decalPosition, boundingSphereRadiusSqr, aabb_center, aabb_extents);
  216. if (potentiallyIntersects)
  217. {
  218. uint inside = 0;
  219. float2 minmax = ComputePointLightMinMaxZ(sqrt(boundingSphereRadiusSqr), decalPosition);
  220. if (IsObjectInsideTile(tileLightData, minmax, inside))
  221. {
  222. MarkLightAsVisibleInSharedMemory(decalIndex, inside);
  223. }
  224. }
  225. }
  226. }
  227. void CullPointLight(uint lightIndex, float3 lightPosition, float invLightRadius, TileLightData tileLightData, float3 aabb_center, float3 aabb_extents)
  228. {
  229. lightPosition = WorldToView_Point(lightPosition);
  230. bool potentiallyIntersects = TestSphereVsAabbInvSqrt(lightPosition, invLightRadius, aabb_center, aabb_extents);
  231. if (potentiallyIntersects)
  232. {
  233. // Implement and profile fine-grained light culling testing
  234. // ATOM-3732
  235. uint inside = 0;
  236. float2 minmax = ComputePointLightMinMaxZ(rsqrt(invLightRadius), lightPosition);
  237. if (IsObjectInsideTile(tileLightData, minmax, inside))
  238. {
  239. MarkLightAsVisibleInSharedMemory(lightIndex, inside);
  240. }
  241. }
  242. }
  243. void CullSimplePointLights(uint groupIndex, TileLightData tileLightData, float3 aabb_center, float3 aabb_extents)
  244. {
  245. for (uint lightIndex = groupIndex ; lightIndex < PassSrg::m_simplePointLightCount ; lightIndex += TILE_DIM_X * TILE_DIM_Y)
  246. {
  247. SimplePointLight light = PassSrg::m_simplePointLights[lightIndex];
  248. CullPointLight(lightIndex, light.m_position, light.m_invAttenuationRadiusSquared, tileLightData, aabb_center, aabb_extents);
  249. }
  250. }
  251. void CullPointLights(uint groupIndex, TileLightData tileLightData, float3 aabb_center, float3 aabb_extents)
  252. {
  253. for (uint lightIndex = groupIndex ; lightIndex < PassSrg::m_pointLightCount ; lightIndex += TILE_DIM_X * TILE_DIM_Y)
  254. {
  255. PointLight light = PassSrg::m_pointLights[lightIndex];
  256. CullPointLight(lightIndex, light.m_position, light.m_invAttenuationRadiusSquared, tileLightData, aabb_center, aabb_extents);
  257. }
  258. }
  259. void CullSimpleSpotLights(uint groupIndex, TileLightData tileLightData, float3 aabb_center, float3 aabb_extents)
  260. {
  261. for (uint lightIndex = groupIndex ; lightIndex < PassSrg::m_simpleSpotLightCount ; lightIndex += TILE_DIM_X * TILE_DIM_Y)
  262. {
  263. SimpleSpotLight light = PassSrg::m_simpleSpotLights[lightIndex];
  264. float3 lightPosition = WorldToView_Point(light.m_position);
  265. float3 lightDirection = WorldToView_Vector(light.m_direction);
  266. bool potentiallyIntersects = TestSphereVsCone(aabb_center, length(aabb_extents), lightPosition, lightDirection, light.m_cosOuterConeAngle, rsqrt(light.m_invAttenuationRadiusSquared));
  267. if (potentiallyIntersects)
  268. {
  269. // Implement and profile fine-grained light culling testing
  270. // ATOM-3732
  271. uint inside = 0;
  272. float2 minmax = ComputeSimpleSpotLightMinMax(light, lightPosition);
  273. if (IsObjectInsideTile(tileLightData, minmax, inside))
  274. {
  275. MarkLightAsVisibleInSharedMemory(lightIndex, inside);
  276. }
  277. }
  278. }
  279. }
  280. void CullDiskLights(uint groupIndex, TileLightData tileLightData, float3 aabb_center, float3 aabb_extents)
  281. {
  282. for (uint lightIndex = groupIndex ; lightIndex < PassSrg::m_diskLightCount ; lightIndex += TILE_DIM_X * TILE_DIM_Y)
  283. {
  284. DiskLight light = PassSrg::m_diskLights[lightIndex];
  285. float3 lightPosition = WorldToView_Point(light.m_position - light.m_bulbPositionOffset * light.m_direction);
  286. float lightRadius = rsqrt(light.m_invAttenuationRadiusSquared) + light.m_diskRadius;
  287. float lightRadiusSqr = lightRadius * lightRadius;
  288. float aabbRadius = length(aabb_extents);
  289. float3 lightDirection = WorldToView_Vector(light.m_direction);
  290. bool potentiallyIntersects;
  291. if ((light.m_flags & DiskLightFlag::UseConeAngle) > 0)
  292. {
  293. potentiallyIntersects = TestSphereVsCone(aabb_center, length(aabb_extents), lightPosition, lightDirection, light.m_cosOuterConeAngle, rsqrt(light.m_invAttenuationRadiusSquared) + light.m_bulbPositionOffset);
  294. }
  295. else
  296. {
  297. potentiallyIntersects = TestSphereVsAabb(lightPosition, lightRadiusSqr, aabb_center, aabb_extents);
  298. if (potentiallyIntersects)
  299. {
  300. // Only one side is visible, check that we are above the hemisphere
  301. float3 toAABBCenter = aabb_center - lightPosition;
  302. float distanceToLightPlane = dot(lightDirection, toAABBCenter);
  303. potentiallyIntersects = distanceToLightPlane >= -aabbRadius;
  304. }
  305. }
  306. if (potentiallyIntersects)
  307. {
  308. // Implement and profile fine-grained light culling testing
  309. // ATOM-3732
  310. uint inside = 0;
  311. float2 minmax = ComputeDiskLightMinMax(light, lightPosition);
  312. if (IsObjectInsideTile(tileLightData, minmax, inside))
  313. {
  314. MarkLightAsVisibleInSharedMemory(lightIndex, inside);
  315. }
  316. }
  317. }
  318. }
  319. void CullCapsuleLights(uint groupIndex, TileLightData tileLightData, float3 aabb_center, float3 aabb_extents)
  320. {
  321. for (uint lightIndex = groupIndex ; lightIndex < PassSrg::m_capsuleLightCount ; lightIndex += TILE_DIM_X * TILE_DIM_Y)
  322. {
  323. CapsuleLight light = PassSrg::m_capsuleLights[lightIndex];
  324. float3 lightMiddleWorld = light.m_startPoint + light.m_direction * light.m_length * 0.5f;
  325. float3 lightMiddleView = WorldToView_Point(lightMiddleWorld);
  326. float lightFalloffRadius = rsqrt(light.m_invAttenuationRadiusSquared);
  327. float lightConservativeBoundingRadius = lightFalloffRadius + light.m_length * 0.5f;
  328. bool potentiallyIntersects = TestSphereVsAabb(lightMiddleView, lightConservativeBoundingRadius * lightConservativeBoundingRadius, aabb_center, aabb_extents);
  329. if (potentiallyIntersects)
  330. {
  331. // Implement and profile fine-grained light culling testing
  332. // ATOM-3732
  333. uint inside = 0;
  334. float2 minmax = ComputeCapsuleLightMinMax(light, lightMiddleView, lightFalloffRadius);
  335. if (IsObjectInsideTile(tileLightData, minmax, inside))
  336. {
  337. MarkLightAsVisibleInSharedMemory(lightIndex, inside);
  338. }
  339. }
  340. }
  341. }
  342. void CullQuadLights(uint groupIndex, TileLightData tileLightData, float3 aabb_center, float3 aabb_extents)
  343. {
  344. // Implement and profile fine-grained light culling testing
  345. // ATOM-3732
  346. for (uint lightIndex = groupIndex ; lightIndex < PassSrg::m_quadLightCount ; lightIndex += TILE_DIM_X * TILE_DIM_Y)
  347. {
  348. const QuadLight light = PassSrg::m_quadLights[lightIndex];
  349. const float3 lightPosition = WorldToView_Point(light.m_position);
  350. bool potentiallyIntersects = TestSphereVsAabbInvSqrt(lightPosition, light.m_invAttenuationRadiusSquared, aabb_center, aabb_extents);
  351. if (potentiallyIntersects)
  352. {
  353. float2 minmaxz;
  354. const bool singleSided = (light.m_flags & QuadLightFlag::EmitsBothDirections) == 0;
  355. if (singleSided)
  356. {
  357. // Only one side is visible, check that we are above the hemisphere
  358. const float3 leftDir = light.m_leftDir;
  359. const float3 upDir = light.m_upDir;
  360. const float3 lightDirection = WorldToView_Vector(cross(leftDir, upDir));
  361. const float3 toAABBCenter = aabb_center - lightPosition;
  362. const float distanceToLightPlane = dot(lightDirection, toAABBCenter);
  363. const float aabbRadius = length(aabb_extents);
  364. const bool aboveHemisphere = distanceToLightPlane >= -aabbRadius;
  365. if (aboveHemisphere)
  366. {
  367. minmaxz = ComputeQuadLightMinMaxZ_SingleSided(light, lightPosition, lightDirection);
  368. }
  369. else
  370. {
  371. potentiallyIntersects = false;
  372. }
  373. }
  374. else
  375. {
  376. minmaxz = ComputeQuadLightMinMaxZ_DoubleSided(light, lightPosition);
  377. }
  378. uint inside = 0;
  379. if (potentiallyIntersects && IsObjectInsideTile(tileLightData, minmaxz, inside))
  380. {
  381. MarkLightAsVisibleInSharedMemory(lightIndex, inside);
  382. }
  383. }
  384. }
  385. }
  386. uint WriteEndOfGroup(uint lightCount, uint3 groupID)
  387. {
  388. uint lightsAfter = lightCount + shared_lightCount;
  389. uint end = PackLightIndexWithBinMask(NVLC_END_OF_GROUP, NVLC_ALL_BIN_BITS);
  390. uint offset = min(lightCount + shared_lightCount, NVLC_MAX_POSSIBLE_LIGHTS_PER_BIN - 1);
  391. uint index = GetLightListIndex(groupID, PassSrg::m_constantData.m_gridWidth, offset);
  392. PassSrg::m_lightList[index] = end;
  393. lightsAfter++;
  394. return lightsAfter;
  395. }
  396. void ClearSharedLightCount(uint groupIndex)
  397. {
  398. if( groupIndex == 0 )
  399. {
  400. shared_lightCount = 0;
  401. }
  402. }
  403. void ClearSharedLightCountWithDoubleBarrier(uint groupIndex)
  404. {
  405. GroupMemoryBarrierWithGroupSync();
  406. ClearSharedLightCount(groupIndex);
  407. GroupMemoryBarrierWithGroupSync();
  408. }
  409. float2 ReadDepthCloseFar(uint3 groupID)
  410. {
  411. float2 depthCloseFar = asfloat(PassSrg::m_tileLightData[groupID.xy].xy);
  412. return depthCloseFar;
  413. }
  414. TileLightData ReadTileLightData(uint3 groupID)
  415. {
  416. uint4 packedData = PassSrg::m_tileLightData[groupID.xy];
  417. return Tile_UnpackData(packedData);
  418. }
  419. uint WriteCullingDataToMainMemory(uint lightCount, uint groupIndex, uint3 groupID)
  420. {
  421. GroupMemoryBarrierWithGroupSync();
  422. CopySharedLightsToMainMemory(lightCount, groupIndex, groupID );
  423. lightCount = WriteEndOfGroup(lightCount, groupID);
  424. return lightCount;
  425. }
  426. // Converts depth in view space to depth buffer depth
  427. // It is used when depth is in view space and we want to use it as depth for projection matrix
  428. float ViewSpaceToDepthBuffer(float depth)
  429. {
  430. float a = ViewSrg::m_projectionMatrix[2][3];
  431. float b = ViewSrg::m_projectionMatrix[2][2];
  432. return a / depth - b;
  433. }
  434. float3 GetViewSpacePosition(float2 tileUv, float depth)
  435. {
  436. // Map UV from [0, 1] to NDC [-1, 1]
  437. tileUv = tileUv * 2.0f - 1.0f;
  438. float4 clipPos = float4(tileUv.x, -tileUv.y, depth, 1.0f);
  439. float4 viewPos = mul(ViewSrg::m_projectionMatrixInverse, clipPos);
  440. return viewPos.xyz / viewPos.w;
  441. }
  442. // This function builds an AABB in view space that encompasses the tile
  443. // The AABB is built by taking the four corners of the tile and the center of the tile, and transforming them to view space
  444. void BuildAabb(TileLightData tileLightData, uint2 tileId, out float3 aabbCenter, out float3 aabbExtents, out float2 tileCenterUv)
  445. {
  446. float2 tileUvMin = float2(tileId) * PassSrg::m_constantData.m_gridPixel;
  447. float2 tileUvMax = tileUvMin + PassSrg::m_constantData.m_gridPixel;
  448. tileCenterUv = tileUvMin + PassSrg::m_constantData.m_gridHalfPixel;
  449. const float depthMin = ViewSpaceToDepthBuffer(-tileLightData.zNear);;
  450. const float depthMax = ViewSpaceToDepthBuffer(-tileLightData.zFar);
  451. float3 pt0 = GetViewSpacePosition(tileUvMin, depthMin);
  452. float3 pt1 = GetViewSpacePosition(tileUvMin, depthMax);
  453. float3 pt2 = GetViewSpacePosition(tileUvMax, depthMin);
  454. float3 pt3 = GetViewSpacePosition(tileUvMax, depthMax);
  455. float3 aabbMin = min(min(pt0, pt1), min(pt2, pt3));
  456. float3 aabbMax = max(max(pt0, pt1), max(pt2, pt3));
  457. aabbCenter = (aabbMin + aabbMax) * 0.5f;
  458. aabbExtents = (aabbMax - aabbMin);
  459. }
  460. // This shader is invoke one thread-group per on-screen tile
  461. // e.g. if the screen resolution is 1920x1080, with 16x16 tiles, there will be 120x68 tiles (and 120x68 thread groups)
  462. // Each thread-group is dedicated to culling all lights against that screen-tile.
  463. // It might be worth splitting this compute shader into several shaders, one per light type.
  464. // Each thread will read one light, determine if it is visible, write it to shared memory, then move onto the next light until
  465. // all lights are processed
  466. // After all lights visibility is computed, it will write them back from shared memory to GPU memory
  467. // This will write out the following:
  468. // Point light index << 16 | bitmask contains which bits the light is present in
  469. // Point light index << 16 | bitmask contains which bits the light is present in
  470. // Point light index << 16 | bitmask contains which bits the light is present in
  471. // ...
  472. // End of Group
  473. // Disk light index << 16 | bitmask contains which bits the light is present in
  474. // Disk light index << 16 | bitmask contains which bits the light is present in
  475. // Disk light index << 16 | bitmask contains which bits the light is present in
  476. // ...
  477. // End of Group
  478. // i.e. for each 32-bit UINT, it contains the 16 bit light index + 16-bit binning information
  479. // (other light types and decals to come)
  480. // Note! This isn't consumed by the forward shader. This light list will be further processed by the LightCullingRemap shader, producing a LightListRemapped buffer
  481. // that is more optimal for consumption by the forward shader.
  482. [numthreads(TILE_DIM_X, TILE_DIM_Y, 1)]
  483. void MainCS(
  484. uint3 dispatchThreadID : SV_DispatchThreadID,
  485. uint3 groupID : SV_GroupID,
  486. uint groupIndex : SV_GroupIndex)
  487. {
  488. ClearSharedLightCount(groupIndex);
  489. uint lightCount = 0;
  490. TileLightData tileLightData = ReadTileLightData(groupID);
  491. float2 tileCenterUv;
  492. float3 aabb_center, aabb_extents;
  493. BuildAabb(tileLightData, groupID.xy, aabb_center, aabb_extents, tileCenterUv);
  494. GroupMemoryBarrierWithGroupSync();
  495. CullDecals(groupIndex, tileLightData, aabb_center, aabb_extents, tileCenterUv);
  496. GroupMemoryBarrierWithGroupSync();
  497. SortDecals(groupIndex);
  498. lightCount = WriteCullingDataToMainMemory(lightCount, groupIndex, groupID );
  499. ClearSharedLightCountWithDoubleBarrier(groupIndex);
  500. CullSimplePointLights(groupIndex, tileLightData, aabb_center, aabb_extents);
  501. lightCount = WriteCullingDataToMainMemory(lightCount, groupIndex, groupID );
  502. ClearSharedLightCountWithDoubleBarrier(groupIndex);
  503. CullSimpleSpotLights(groupIndex, tileLightData, aabb_center, aabb_extents);
  504. lightCount = WriteCullingDataToMainMemory(lightCount, groupIndex, groupID );
  505. ClearSharedLightCountWithDoubleBarrier(groupIndex);
  506. CullPointLights(groupIndex, tileLightData, aabb_center, aabb_extents);
  507. lightCount = WriteCullingDataToMainMemory(lightCount, groupIndex, groupID );
  508. ClearSharedLightCountWithDoubleBarrier(groupIndex);
  509. CullDiskLights(groupIndex, tileLightData, aabb_center, aabb_extents);
  510. lightCount = WriteCullingDataToMainMemory(lightCount, groupIndex, groupID );
  511. ClearSharedLightCountWithDoubleBarrier(groupIndex);
  512. CullCapsuleLights(groupIndex, tileLightData, aabb_center, aabb_extents);
  513. lightCount = WriteCullingDataToMainMemory(lightCount, groupIndex, groupID );
  514. ClearSharedLightCountWithDoubleBarrier(groupIndex);
  515. CullQuadLights(groupIndex, tileLightData, aabb_center, aabb_extents);
  516. lightCount = WriteCullingDataToMainMemory(lightCount, groupIndex, groupID );
  517. if (groupIndex == 0)
  518. {
  519. PassSrg::m_lightCount[groupID.xy] = lightCount;
  520. }
  521. }