GpuVisibility.h 9.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360
  1. // Copyright (C) 2009-present, Panagiotis Christopoulos Charitos and contributors.
  2. // All rights reserved.
  3. // Code licensed under the BSD License.
  4. // http://www.anki3d.org/LICENSE
  5. #pragma once
  6. #include <AnKi/Renderer/RendererObject.h>
  7. #include <AnKi/Renderer/Utils/Readback.h>
  8. #include <AnKi/Resource/RenderingKey.h>
  9. namespace anki {
  10. class InstanceRange
  11. {
  12. friend class GpuVisibility;
  13. public:
  14. U32 getFirstInstance() const
  15. {
  16. ANKI_ASSERT(isValid());
  17. return m_firstInstance;
  18. }
  19. U32 getInstanceCount() const
  20. {
  21. ANKI_ASSERT(isValid());
  22. return m_instanceCount;
  23. }
  24. Bool isValid() const
  25. {
  26. return m_instanceCount > 0;
  27. }
  28. private:
  29. U32 m_firstInstance = 0;
  30. U32 m_instanceCount = 0;
  31. };
  32. class BaseGpuVisibilityInput
  33. {
  34. public:
  35. CString m_passesName;
  36. RenderingTechnique m_technique = RenderingTechnique::kCount;
  37. Vec3 m_lodReferencePoint = Vec3(kMaxF32);
  38. Array<F32, kMaxLodCount - 1> m_lodDistances = {};
  39. RenderGraphBuilder* m_rgraph = nullptr;
  40. Bool m_gatherAabbIndices = false; // For debug draw.
  41. Bool m_hashVisibles = false; // Create a hash for the visible renderables.
  42. Bool m_limitMemory = false; // Use less memory but you pay some cost scheduling the work.
  43. };
  44. class FrustumGpuVisibilityInput : public BaseGpuVisibilityInput
  45. {
  46. public:
  47. Mat4 m_viewProjectionMatrix;
  48. /// The size of the viewport the visibility results will be used on. Used to kill objects that don't touch the sampling positions.
  49. UVec2 m_viewportSize;
  50. const RenderTargetHandle* m_hzbRt = nullptr; // Optional.
  51. Bool m_twoPhaseOcclusionCulling = false; // If it's false then it's only a single phase. Only applies when meshlet rendering is enabled.
  52. };
  53. class DistanceGpuVisibilityInput : public BaseGpuVisibilityInput
  54. {
  55. public:
  56. Vec3 m_pointOfTest = Vec3(0.0f);
  57. F32 m_testRadius = 1.0f;
  58. };
  59. class GpuVisibilityOutput
  60. {
  61. friend class GpuVisibility;
  62. public:
  63. BufferHandle m_dependency; // Just expose one handle for depedencies. No need to track all buffers. Wait on it using indirect draw usage.
  64. class
  65. {
  66. public:
  67. BufferView m_perDrawDataBuffer; // An array of GpuScenePerDraw.
  68. BufferView m_mdiDrawCountsBuffer; // An array of U32, one for each render state bucket (even those that use task/mesh flow).
  69. BufferView m_drawIndexedIndirectArgsBuffer; // Array of DrawIndexedIndirectArgs or DrawIndirectArgs.
  70. // One for each bucket. It's an index to the m_perDrawDataBuffer. It basically gives the offset to the m_perDrawDataBuffer for each bucket
  71. BufferView m_firstPerDrawBuffer;
  72. // Defines the element sub-ranges in the m_drawIndexedIndirectArgsBuffer per render state bucket.
  73. WeakArray<InstanceRange> m_bucketIndirectArgsRanges;
  74. } m_legacy; // Legacy vertex shading.
  75. class
  76. {
  77. public:
  78. BufferView m_dispatchMeshIndirectArgsBuffer; // H/W meshlet rendering array of DispatchIndirectArgs, one for each render state bucket.
  79. BufferView m_drawIndirectArgs; // S/W meshlet rendering array of DrawIndirectArgs, one for each state bucket.
  80. BufferView m_meshletInstancesBuffer;
  81. BufferView m_firstMeshletBuffer; // For H/W meshlet rendering. Points to the first meshlet in the m_meshletInstancesBuffer. One per bucket.
  82. } m_mesh; // S/W or H/W meshlet rendering.
  83. // [Optional] Indices to the AABB buffer (LodAndGpuSceneRenderableBoundingVolumeIndex). The 1st element is the count
  84. BufferView m_visibleAaabbIndicesBuffer;
  85. BufferView m_visiblesHashBuffer; // [Optional] A hash of the visible objects. Used to conditionaly not perform shadow randering.
  86. Bool containsDrawcalls() const
  87. {
  88. return m_dependency.isValid();
  89. }
  90. private:
  91. class
  92. {
  93. public:
  94. BufferView m_meshletsFailedHzb;
  95. BufferView m_counters;
  96. BufferView m_meshletPrefixSums;
  97. BufferView m_gpuVisIndirectDispatchArgs;
  98. } m_stage1And2Mem; // Output of the 2nd (or 1st) stage that will be used in the 3rd
  99. class
  100. {
  101. public:
  102. BufferView m_indirectDrawArgs;
  103. BufferView m_dispatchMeshIndirectArgs;
  104. BufferView m_meshletInstances;
  105. } m_stage3Mem; // Output of the 3rd stage.
  106. };
  107. /// Performs GPU visibility for some pass.
  108. class GpuVisibility : public RendererObject
  109. {
  110. public:
  111. Error init();
  112. /// Perform frustum visibility testing.
  113. /// @note Not thread-safe.
  114. void populateRenderGraph(FrustumGpuVisibilityInput& in, GpuVisibilityOutput& out)
  115. {
  116. ANKI_ASSERT(in.m_viewProjectionMatrix != Mat4::getZero());
  117. ANKI_ASSERT(in.m_viewportSize != UVec2(0u));
  118. populateRenderGraphInternal(false, in, out);
  119. }
  120. /// Perform the optional stage 3: 2nd phase of the 2-phase occlusion culling.
  121. /// @note Not thread-safe.
  122. void populateRenderGraphStage3(FrustumGpuVisibilityInput& in, GpuVisibilityOutput& out);
  123. /// Perform simple distance-based visibility testing.
  124. /// @note Not thread-safe.
  125. void populateRenderGraph(DistanceGpuVisibilityInput& in, GpuVisibilityOutput& out)
  126. {
  127. populateRenderGraphInternal(true, in, out);
  128. }
  129. private:
  130. ShaderProgramResourcePtr m_1stStageProg;
  131. Array5d<ShaderProgramPtr, 2, 2, 2, 2, 2> m_frustumGrProgs;
  132. Array4d<ShaderProgramPtr, 2, 2, 2, 2> m_distGrProgs;
  133. ShaderProgramResourcePtr m_2ndStageProg;
  134. ShaderProgramPtr m_gatherGrProg;
  135. Array4d<ShaderProgramPtr, 2, 2, 2, 2> m_meshletGrProgs;
  136. class
  137. {
  138. public:
  139. class
  140. {
  141. public:
  142. BufferView m_visibleRenderables;
  143. BufferView m_visibleMeshlets;
  144. } m_stage1;
  145. class
  146. {
  147. public:
  148. BufferView m_perDraw;
  149. BufferView m_drawIndexedIndirectArgs;
  150. } m_stage2Legacy;
  151. class
  152. {
  153. public:
  154. BufferView m_meshletInstances;
  155. BufferView m_meshletsFailedHzb;
  156. } m_stage2Meshlet;
  157. class
  158. {
  159. public:
  160. BufferView m_meshletInstances;
  161. } m_stage3;
  162. U64 m_frameIdx = kMaxU64;
  163. BufferHandle m_dep;
  164. } m_persistentMemory;
  165. MultiframeReadbackToken m_outOfMemoryReadback;
  166. BufferView m_outOfMemoryReadbackBuffer;
  167. void populateRenderGraphInternal(Bool distanceBased, BaseGpuVisibilityInput& in, GpuVisibilityOutput& out);
  168. };
  169. class GpuVisibilityNonRenderablesInput
  170. {
  171. public:
  172. CString m_passesName;
  173. GpuSceneNonRenderableObjectType m_objectType = GpuSceneNonRenderableObjectType::kCount;
  174. Mat4 m_viewProjectionMat;
  175. RenderGraphBuilder* m_rgraph = nullptr;
  176. const RenderTargetHandle* m_hzbRt = nullptr; // Optional.
  177. BufferView m_cpuFeedbackBuffer; // Optional.
  178. };
  179. class GpuVisibilityNonRenderablesOutput
  180. {
  181. public:
  182. BufferHandle m_visiblesBufferHandle; // Buffer handle holding the visible objects. Used for tracking. No need to track all buffers.
  183. BufferView m_visiblesBuffer;
  184. };
  185. // GPU visibility of lights, probes etc.
  186. class GpuVisibilityNonRenderables : public RendererObject
  187. {
  188. public:
  189. Error init();
  190. void populateRenderGraph(GpuVisibilityNonRenderablesInput& in, GpuVisibilityNonRenderablesOutput& out);
  191. private:
  192. ShaderProgramResourcePtr m_prog;
  193. Array3d<ShaderProgramPtr, 2, U32(GpuSceneNonRenderableObjectType::kCount), 2> m_grProgs;
  194. static constexpr U32 kInitialCounterArraySize = 32;
  195. BufferHandle m_counterBufferZeroingHandle;
  196. BufferPtr m_counterBuffer; // A buffer containing multiple counters for atomic operations.
  197. U64 m_lastFrameIdx = kMaxU64;
  198. U32 m_counterBufferOffset = 0;
  199. };
  200. class GpuVisibilityAccelerationStructuresInput
  201. {
  202. public:
  203. CString m_passesName;
  204. Vec3 m_lodReferencePoint = Vec3(kMaxF32);
  205. Array<F32, kMaxLodCount - 1> m_lodDistances = {};
  206. Vec3 m_pointOfTest = Vec3(kMaxF32);
  207. F32 m_testRadius = kMaxF32;
  208. Mat4 m_viewProjectionMatrix;
  209. RenderGraphBuilder* m_rgraph = nullptr;
  210. void validate() const
  211. {
  212. ANKI_ASSERT(m_passesName.getLength() > 0);
  213. ANKI_ASSERT(m_lodReferencePoint.x != kMaxF32);
  214. ANKI_ASSERT(m_lodReferencePoint == m_pointOfTest && "For now these should be the same");
  215. ANKI_ASSERT(m_testRadius != kMaxF32);
  216. ANKI_ASSERT(m_viewProjectionMatrix != Mat4());
  217. ANKI_ASSERT(m_rgraph);
  218. }
  219. };
  220. class GpuVisibilityAccelerationStructuresOutput
  221. {
  222. public:
  223. BufferHandle m_dependency; // Some handle to track dependencies. No need to track every buffer.
  224. BufferView m_instancesBuffer; // Points to AccelerationStructureBuildRangeInfo::m_primitiveCount number of AccelerationStructureInstance.
  225. BufferView m_renderablesBuffer; // AccelerationStructureBuildRangeInfo::m_primitiveCount + 1 number of indices to renderables.
  226. BufferView m_buildSbtIndirectArgsBuffer; // The DispatchIndirectArgs for the SBT dispatches.
  227. };
  228. // Performs visibility to gather bottom-level acceleration structures in a buffer that can be used to build a TLAS.
  229. class GpuVisibilityAccelerationStructures : public RendererObject
  230. {
  231. public:
  232. Error init();
  233. void pupulateRenderGraph(GpuVisibilityAccelerationStructuresInput& in, GpuVisibilityAccelerationStructuresOutput& out);
  234. private:
  235. ShaderProgramResourcePtr m_visibilityProg;
  236. ShaderProgramPtr m_visibilityGrProg;
  237. ShaderProgramPtr m_zeroRemainingInstancesGrProg;
  238. BufferPtr m_counterBuffer; // A buffer containing multiple counters for atomic operations.
  239. #if ANKI_ASSERTIONS_ENABLED
  240. U64 m_lastFrameIdx = kMaxU64;
  241. #endif
  242. };
  243. class GpuVisibilityLocalLightsInput
  244. {
  245. public:
  246. UVec3 m_cellCounts;
  247. Vec3 m_cellSize;
  248. Vec3 m_cameraPosition;
  249. Vec3 m_lookDirection;
  250. U32 m_lightIndexListSize = 0; // The number of light indices to store.
  251. CString m_passesName = "GpuVisibilityLocalLights";
  252. RenderGraphBuilder* m_rgraph = nullptr;
  253. };
  254. class GpuVisibilityLocalLightsOutput
  255. {
  256. public:
  257. BufferHandle m_dependency; // Some handle to track dependencies. No need to track every buffer.
  258. BufferView m_lightIndexOffsetsPerCellBuffer; // One offset to the m_lightIndexBuffer. One offset per cell.
  259. BufferView m_lightIndexCountsPerCellBuffer; // Number of lights per cell.
  260. BufferView m_lightIndexListBuffer; // Contains indexes to the GPU scene lights array.
  261. // The volume of the grid.
  262. Vec3 m_lightGridMin;
  263. Vec3 m_lightGridMax;
  264. };
  265. // Gathers the local lights around the camera to a grid.
  266. class GpuVisibilityLocalLights : public RendererObject
  267. {
  268. public:
  269. Error init();
  270. void populateRenderGraph(GpuVisibilityLocalLightsInput& in, GpuVisibilityLocalLightsOutput& out);
  271. private:
  272. static constexpr F32 kForwardBias = 4.0f;
  273. ShaderProgramResourcePtr m_visibilityProg;
  274. ShaderProgramPtr m_setupGrProg;
  275. ShaderProgramPtr m_countGrProg;
  276. ShaderProgramPtr m_prefixSumGrProg;
  277. ShaderProgramPtr m_fillGrProg;
  278. };
  279. } // end namespace anki