GpuVisibilityAccelerationStructures.ankiprog 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186
  1. // Copyright (C) 2009-present, Panagiotis Christopoulos Charitos and contributors.
  2. // All rights reserved.
  3. // Code licensed under the BSD License.
  4. // http://www.anki3d.org/LICENSE
  5. #pragma anki technique Visibility comp
  6. #pragma anki technique ZeroRemainingInstances comp
  7. #include <AnKi/Shaders/Common.hlsl>
  8. #include <AnKi/Shaders/Include/GpuSceneTypes.h>
  9. #include <AnKi/Shaders/Include/GpuVisibilityTypes.h>
  10. #include <AnKi/Shaders/Include/MiscRendererTypes.h>
  11. #include <AnKi/Shaders/VisibilityAndCollisionFunctions.hlsl>
  12. // ===========================================================================
  13. // Visibility =
  14. // ===========================================================================
  15. #if ANKI_COMPUTE_SHADER && NOT_ZERO(ANKI_TECHNIQUE_Visibility)
  16. // Buffers that point to the GPU scene
  17. StructuredBuffer<GpuSceneRenderableBoundingVolume> g_renderableBoundingVolumes : register(t0);
  18. StructuredBuffer<GpuSceneRenderable> g_renderables : register(t1);
  19. StructuredBuffer<GpuSceneMeshLod> g_meshLods : register(t2);
  20. StructuredBuffer<Mat3x4> g_transforms : register(t3);
  21. RWStructuredBuffer<AccelerationStructureInstance> g_visibleInstances : register(u0);
  22. RWStructuredBuffer<LodAndRenderableIndex> g_visibleRenderables : register(u1); // 1st element is the count
  23. globallycoherent RWStructuredBuffer<U32> g_counterBuffer : register(u2); // 2 counters per dispatch
  24. // Contains 2 elements. 1st is the args of the ZeroRemainingInstances
  25. RWStructuredBuffer<DispatchIndirectArgs> g_nextDispatchIndirectArgs : register(u3);
  26. ANKI_FAST_CONSTANTS(GpuVisibilityAccelerationStructuresConstants, g_consts)
  27. # define NUMTHREADS 64
  28. [numthreads(NUMTHREADS, 1, 1)] void main(U32 svDispatchThreadId : SV_DISPATCHTHREADID, U32 svGroupIndex : SV_GROUPINDEX)
  29. {
  30. const U32 maxVisibleInstances = getStructuredBufferElementCount(g_visibleInstances);
  31. // Skip remaining threads
  32. const U32 bvolumeIdx = svDispatchThreadId;
  33. const U32 bvolumeCount = getStructuredBufferElementCount(g_renderableBoundingVolumes);
  34. Bool visible = (bvolumeIdx < bvolumeCount);
  35. // Sphere test
  36. GpuSceneRenderableBoundingVolume bvolume;
  37. Vec3 sphereCenter = 0.0;
  38. if(visible)
  39. {
  40. bvolume = SBUFF(g_renderableBoundingVolumes, bvolumeIdx);
  41. sphereCenter = (bvolume.m_aabbMin + bvolume.m_aabbMax) * 0.5f;
  42. visible = testSphereSphereCollision(sphereCenter, bvolume.m_sphereRadius, g_consts.m_pointOfTest, g_consts.m_testRadius);
  43. }
  44. // All good, write the instance
  45. if(visible)
  46. {
  47. // LOD selection
  48. U32 lod;
  49. const Bool insideCameraFrustum = frustumTest(g_consts.m_clipPlanes, sphereCenter, bvolume.m_sphereRadius);
  50. if(insideCameraFrustum)
  51. {
  52. // Visible by the camera, need to match the camera LODs
  53. const F32 distFromLodPoint = length(sphereCenter - g_consts.m_pointOfTest) - bvolume.m_sphereRadius;
  54. if(distFromLodPoint < g_consts.m_maxLodDistances[0])
  55. {
  56. lod = 0u;
  57. }
  58. else if(distFromLodPoint < g_consts.m_maxLodDistances[1])
  59. {
  60. lod = 1u;
  61. }
  62. else
  63. {
  64. lod = 2u;
  65. }
  66. }
  67. else
  68. {
  69. // Not visible by the main camera, lowest LOD
  70. lod = 2u;
  71. }
  72. const U32 renderableIdx = bvolume.m_renderableIndex;
  73. const GpuSceneRenderable renderable = SBUFF(g_renderables, renderableIdx);
  74. const U32 meshLodIndex = renderable.m_meshLodsIndex + lod;
  75. const GpuSceneMeshLod meshLod = SBUFF(g_meshLods, meshLodIndex);
  76. if(meshLod.m_blasAddress.x != 0 || meshLod.m_blasAddress.y != 0)
  77. {
  78. // It has a BLAS, write what is to write
  79. const Mat3x4 transform = SBUFF(g_transforms, renderable.m_worldTransformsIndex);
  80. Mat3x4 meshQuantizationTransform;
  81. meshQuantizationTransform.m_row0 = Vec4(meshLod.m_positionScale, 0.0f, 0.0f, meshLod.m_positionTranslation.x);
  82. meshQuantizationTransform.m_row1 = Vec4(0.0f, meshLod.m_positionScale, 0.0f, meshLod.m_positionTranslation.y);
  83. meshQuantizationTransform.m_row2 = Vec4(0.0f, 0.0f, meshLod.m_positionScale, meshLod.m_positionTranslation.z);
  84. const Mat3x4 finalTrf = combineTransformations(transform, meshQuantizationTransform);
  85. U32 instanceIdx;
  86. InterlockedAdd(SBUFF(g_counterBuffer, 0), 1, instanceIdx);
  87. if(instanceIdx < maxVisibleInstances)
  88. {
  89. AccelerationStructureInstance instance;
  90. instance.m_transform = finalTrf;
  91. instance.m_mask = meshLod.m_tlasInstanceMask;
  92. instance.m_instanceCustomIndex = renderable.m_diffuseColor & 0x00FFFFFFu;
  93. instance.m_flags = kAccellerationStructureFlagTriangleFrontCounterlockwise;
  94. instance.m_instanceShaderBindingTableRecordOffset = instanceIdx & 0x00FFFFFFu;
  95. instance.m_accelerationStructureAddress = meshLod.m_blasAddress;
  96. SBUFF(g_visibleInstances, instanceIdx) = instance;
  97. SBUFF(g_visibleRenderables, instanceIdx + 1).m_lod_2bit_renderableIndex_30bit = (lod << 30u) | renderableIdx;
  98. }
  99. }
  100. }
  101. // Store the counters to the actual buffers
  102. {
  103. // Sync to make sure all the atomic ops have finished before the following code reads them
  104. AllMemoryBarrierWithGroupSync();
  105. if(svGroupIndex == 0)
  106. {
  107. U32 threadgroupIdx;
  108. InterlockedAdd(SBUFF(g_counterBuffer, 1), 1, threadgroupIdx);
  109. const U32 threadgroupCount = (bvolumeCount + NUMTHREADS - 1) / NUMTHREADS;
  110. const Bool lastThreadgroupExecuting = (threadgroupIdx + 1 == threadgroupCount);
  111. if(lastThreadgroupExecuting)
  112. {
  113. const U32 visible = min(SBUFF(g_counterBuffer, 0), maxVisibleInstances);
  114. SBUFF(g_visibleRenderables, 0).m_lod_2bit_renderableIndex_30bit = visible;
  115. SBUFF(g_counterBuffer, 0) = 0;
  116. SBUFF(g_counterBuffer, 1) = 0;
  117. // Update indirect args of the ZeroRemainingInstances
  118. const U32 remaining = maxVisibleInstances - visible;
  119. DispatchIndirectArgs args;
  120. args.m_threadGroupCountX = (remaining + NUMTHREADS - 1) / NUMTHREADS;
  121. args.m_threadGroupCountY = 1;
  122. args.m_threadGroupCountZ = 1;
  123. SBUFF(g_nextDispatchIndirectArgs, 0) = args;
  124. // Update the args for the various SBT build dispatches
  125. args.m_threadGroupCountX = (visible + NUMTHREADS - 1) / NUMTHREADS;
  126. SBUFF(g_nextDispatchIndirectArgs, 1) = args;
  127. }
  128. }
  129. }
  130. }
  131. #endif // ANKI_COMPUTE_SHADER && ANKI_TECHNIQUE_Visibility
  132. // ===========================================================================
  133. // ZeroRemainingInstances =
  134. // ===========================================================================
  135. #if ANKI_COMPUTE_SHADER && NOT_ZERO(ANKI_TECHNIQUE_ZeroRemainingInstances)
  136. StructuredBuffer<U32> g_visibleRenderableIndices : register(t0); // 1st element is the count
  137. RWStructuredBuffer<AccelerationStructureInstance> g_instances : register(u0);
  138. # define NUMTHREADS 64
  139. [numthreads(NUMTHREADS, 1, 1)] void main(U32 svDispatchThreadId : SV_DISPATCHTHREADID)
  140. {
  141. const U32 visibleInstances = g_visibleRenderableIndices[0];
  142. const U32 maxInstances = getStructuredBufferElementCount(g_instances);
  143. ANKI_ASSERT(maxInstances >= visibleInstances);
  144. const U32 remainingInstances = maxInstances - visibleInstances;
  145. if(svDispatchThreadId < remainingInstances)
  146. {
  147. SBUFF(g_instances, visibleInstances + svDispatchThreadId) = (AccelerationStructureInstance)0;
  148. }
  149. }
  150. #endif