GpuVisibilityNonRenderables.ankiprog 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171
  1. // Copyright (C) 2009-present, Panagiotis Christopoulos Charitos and contributors.
  2. // All rights reserved.
  3. // Code licensed under the BSD License.
  4. // http://www.anki3d.org/LICENSE
  5. #pragma anki mutator HZB_TEST 0 1
  6. #pragma anki mutator OBJECT_TYPE 0 1 2 3 4 // Same as GpuSceneNonRenderableObjectType
  7. #pragma anki mutator CPU_FEEDBACK 0 1
  8. #pragma anki skip_mutation CPU_FEEDBACK 1 OBJECT_TYPE 1
  9. #pragma anki skip_mutation CPU_FEEDBACK 1 OBJECT_TYPE 2
  10. #pragma anki technique comp
  11. #include <AnKi/Shaders/Common.hlsl>
  12. #include <AnKi/Shaders/Include/GpuSceneTypes.h>
  13. #include <AnKi/Shaders/Include/GpuVisibilityTypes.h>
  14. #include <AnKi/Shaders/VisibilityAndCollisionFunctions.hlsl>
  15. #if OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_LIGHT
  16. typedef GpuSceneLight ObjectType;
  17. #elif OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_DECAL
  18. typedef GpuSceneDecal ObjectType;
  19. #elif OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_FOG_DENSITY_VOLUME
  20. typedef GpuSceneFogDensityVolume ObjectType;
  21. #elif OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_REFLECTION_PROBE
  22. typedef GpuSceneReflectionProbe ObjectType;
  23. #else
  24. typedef GpuSceneGlobalIlluminationProbe ObjectType;
  25. #endif
  26. StructuredBuffer<ObjectType> g_objects : register(t0);
  27. #if HZB_TEST
  28. Texture2D g_hzbTex : register(t1);
  29. SamplerState g_nearestAnyClampSampler : register(s0);
  30. #endif
  31. RWStructuredBuffer<U32> g_visibleIndices : register(u0); // 1st element is the count. What follows is indices
  32. ANKI_FAST_CONSTANTS(GpuVisibilityNonRenderableConstants, g_consts)
  33. RWStructuredBuffer<GpuVisibilityNonRenderablesCounters> g_counterBuffer : register(u1);
  34. #if CPU_FEEDBACK
  35. // 1st element is a count. What follows is an array pairs of UUIDs and array index.
  36. RWStructuredBuffer<U32> g_cpuFeedbackBuffer : register(u2);
  37. #endif
  38. Vec4 getSphere(GpuSceneLight l)
  39. {
  40. return Vec4(l.m_position, l.m_radius);
  41. }
  42. Vec4 getSphere(GpuSceneDecal l)
  43. {
  44. return Vec4(l.m_sphereCenter, l.m_sphereRadius);
  45. }
  46. Vec4 getSphere(GpuSceneFogDensityVolume l)
  47. {
  48. if(l.m_isBox)
  49. {
  50. const Vec3 center = (l.m_aabbMinOrSphereCenter + l.m_aabbMaxOrSphereRadius) / 2.0f;
  51. const F32 radius = length(l.m_aabbMaxOrSphereRadius - center);
  52. return Vec4(center, radius);
  53. }
  54. else
  55. {
  56. return Vec4(l.m_aabbMinOrSphereCenter, l.m_aabbMaxOrSphereRadius.x);
  57. }
  58. }
  59. Vec4 getSphere(GpuSceneReflectionProbe l)
  60. {
  61. const Vec3 center = (l.m_aabbMin + l.m_aabbMax) / 2.0f;
  62. const F32 radius = length(center - l.m_aabbMax);
  63. return Vec4(center, radius);
  64. }
  65. Vec4 getSphere(GpuSceneGlobalIlluminationProbe l)
  66. {
  67. const Vec3 center = (l.m_aabbMin + l.m_aabbMax) / 2.0f;
  68. const F32 radius = length(center - l.m_aabbMax);
  69. return Vec4(center, radius);
  70. }
  71. #define NUMTHREADS 64
  72. [numthreads(NUMTHREADS, 1, 1)] void main(U32 svDispatchThreadId : SV_DISPATCHTHREADID, U32 svGroupIndex : SV_GROUPINDEX)
  73. {
  74. Bool skip = false;
  75. U32 objectCount, unused;
  76. g_objects.GetDimensions(objectCount, unused);
  77. skip = (svDispatchThreadId >= objectCount);
  78. // Frustum test
  79. //
  80. Vec4 sphere = 0.0;
  81. if(!skip)
  82. {
  83. sphere = getSphere(g_objects[svDispatchThreadId]);
  84. skip = !frustumTest(g_consts.m_clipPlanes, sphere.xyz, sphere.w);
  85. }
  86. #if HZB_TEST
  87. if(!skip)
  88. {
  89. const Vec3 aabbMin = sphere.xyz - sphere.w;
  90. const Vec3 aabbMax = sphere.xyz + sphere.w;
  91. Vec2 minNdc, maxNdc;
  92. F32 aabbMinDepth;
  93. projectAabb(aabbMin, aabbMax, g_consts.m_viewProjectionMat, minNdc, maxNdc, aabbMinDepth);
  94. if(cullHzb(minNdc, maxNdc, aabbMinDepth, g_hzbTex, g_nearestAnyClampSampler))
  95. {
  96. skip = true;
  97. }
  98. }
  99. #endif
  100. // Add the object
  101. //
  102. if(!skip)
  103. {
  104. U32 idx;
  105. InterlockedAdd(g_counterBuffer[0].m_visibleObjectCount, 1, idx);
  106. g_visibleIndices[idx + 1] = svDispatchThreadId;
  107. }
  108. // Give feedback to the CPU
  109. //
  110. #if CPU_FEEDBACK
  111. const Bool doFeedback = g_objects[svDispatchThreadId].m_cpuFeedback;
  112. if(!skip && doFeedback)
  113. {
  114. U32 idx;
  115. InterlockedAdd(g_counterBuffer[0].m_feedbackObjectCount, 1, idx);
  116. g_cpuFeedbackBuffer[idx * 2 + 1] = g_objects[svDispatchThreadId].m_uuid;
  117. g_cpuFeedbackBuffer[idx * 2 + 2] = g_objects[svDispatchThreadId].m_componentArrayIndex;
  118. }
  119. #endif
  120. // Sync to make sure all the atomic ops have finished before the following code reads them
  121. AllMemoryBarrierWithGroupSync();
  122. // Store the counters to the actual buffers
  123. //
  124. if(svGroupIndex == 0)
  125. {
  126. U32 threadgroupIdx;
  127. InterlockedAdd(g_counterBuffer[0].m_threadgroupCount, 1, threadgroupIdx);
  128. const U32 threadgroupCount = (objectCount + NUMTHREADS - 1) / NUMTHREADS;
  129. const Bool lastThreadgroupExecuting = (threadgroupIdx + 1 == threadgroupCount);
  130. if(lastThreadgroupExecuting)
  131. {
  132. g_visibleIndices[0] = g_counterBuffer[0].m_visibleObjectCount;
  133. g_counterBuffer[0].m_visibleObjectCount = 0;
  134. #if CPU_FEEDBACK
  135. g_cpuFeedbackBuffer[0] = g_counterBuffer[0].m_feedbackObjectCount;
  136. g_counterBuffer[0].m_feedbackObjectCount = 0;
  137. #endif
  138. g_counterBuffer[0].m_threadgroupCount = 0;
  139. }
  140. }
  141. }