GpuVisibility.cpp 35 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906
  1. // Copyright (C) 2009-2023, Panagiotis Christopoulos Charitos and contributors.
  2. // All rights reserved.
  3. // Code licensed under the BSD License.
  4. // http://www.anki3d.org/LICENSE
  5. #include <AnKi/Renderer/Utils/GpuVisibility.h>
  6. #include <AnKi/Renderer/Renderer.h>
  7. #include <AnKi/Scene/RenderStateBucket.h>
  8. #include <AnKi/Scene/GpuSceneArray.h>
  9. #include <AnKi/Core/GpuMemory/GpuVisibleTransientMemoryPool.h>
  10. #include <AnKi/Core/GpuMemory/RebarTransientMemoryPool.h>
  11. #include <AnKi/Core/GpuMemory/GpuSceneBuffer.h>
  12. #include <AnKi/Collision/Functions.h>
  13. #include <AnKi/Shaders/Include/GpuVisibilityTypes.h>
  14. #include <AnKi/Core/GpuMemory/UnifiedGeometryBuffer.h>
  15. #include <AnKi/Core/StatsSet.h>
  16. #include <AnKi/Core/CVarSet.h>
  17. namespace anki {
  18. constexpr U32 kMaxVisibleObjects = 30 * 1024;
  19. constexpr U32 kMaxVisiblePrimitives = 40'000'000;
  20. constexpr U32 kMaxVisibleMeshlets = kMaxVisiblePrimitives / kMaxPrimitivesPerMeshlet;
  21. constexpr PtrSize kMaxMeshletMemory = kMaxVisibleMeshlets * sizeof(GpuSceneMeshletInstance);
  22. constexpr U32 kVisibleMaxMeshletGroups = max(kMaxVisibleObjects, (kMaxVisibleMeshlets + kMeshletGroupSize - 1) / kMeshletGroupSize);
  23. constexpr PtrSize kMaxMeshletGroupMemory = kVisibleMaxMeshletGroups * sizeof(GpuSceneMeshletGroupInstance);
  24. static NumericCVar<PtrSize> g_maxMeshletMemoryPerTest(CVarSubsystem::kRenderer, "MaxMeshletMemoryPerTest", kMaxMeshletMemory, 1_KB, 100_MB,
  25. "Max memory that will be allocated per GPU occlusion test for storing meshlets");
  26. static NumericCVar<PtrSize> g_maxMeshletGroupMemoryPerTest(CVarSubsystem::kRenderer, "MaxMeshletGroupMemoryPerTest", kMaxMeshletGroupMemory, 1_KB,
  27. 100_MB,
  28. "Max memory that will be allocated per GPU occlusion test for storing meshlet groups");
  29. static StatCounter g_gpuVisMemoryAllocatedStatVar(StatCategory::kRenderer, "GPU visibility mem",
  30. StatFlag::kBytes | StatFlag::kMainThreadUpdates | StatFlag::kZeroEveryFrame);
  31. static BufferOffsetRange allocateTransientGpuMem(PtrSize size)
  32. {
  33. BufferOffsetRange out = {};
  34. if(size)
  35. {
  36. g_gpuVisMemoryAllocatedStatVar.increment(size);
  37. out = GpuVisibleTransientMemoryPool::getSingleton().allocate(size);
  38. }
  39. return out;
  40. }
  41. Error GpuVisibility::init()
  42. {
  43. for(MutatorValue hzb = 0; hzb < 2; ++hzb)
  44. {
  45. for(MutatorValue gatherAabbs = 0; gatherAabbs < 2; ++gatherAabbs)
  46. {
  47. for(MutatorValue genHash = 0; genHash < 2; ++genHash)
  48. {
  49. for(MutatorValue gatherType = 0; gatherType < 3; ++gatherType)
  50. {
  51. ANKI_CHECK(loadShaderProgram("ShaderBinaries/GpuVisibility.ankiprogbin",
  52. {{"HZB_TEST", hzb},
  53. {"DISTANCE_TEST", 0},
  54. {"GATHER_AABBS", gatherAabbs},
  55. {"HASH_VISIBLES", genHash},
  56. {"GATHER_TYPE", gatherType + 1}},
  57. m_prog, m_frustumGrProgs[hzb][gatherAabbs][genHash][gatherType]));
  58. }
  59. }
  60. }
  61. }
  62. for(MutatorValue gatherAabbs = 0; gatherAabbs < 2; ++gatherAabbs)
  63. {
  64. for(MutatorValue genHash = 0; genHash < 2; ++genHash)
  65. {
  66. for(MutatorValue gatherType = 0; gatherType < 3; ++gatherType)
  67. {
  68. ANKI_CHECK(loadShaderProgram("ShaderBinaries/GpuVisibility.ankiprogbin",
  69. {{"HZB_TEST", 0},
  70. {"DISTANCE_TEST", 1},
  71. {"GATHER_AABBS", gatherAabbs},
  72. {"HASH_VISIBLES", genHash},
  73. {"GATHER_TYPE", gatherType + 1}},
  74. m_prog, m_distGrProgs[gatherAabbs][genHash][gatherType]));
  75. }
  76. }
  77. }
  78. for(MutatorValue hzb = 0; hzb < 2; ++hzb)
  79. {
  80. ANKI_CHECK(loadShaderProgram("ShaderBinaries/GpuVisibilityMeshlet.ankiprogbin", {{"HZB_TEST", hzb}}, m_meshletCullingProg,
  81. m_meshletCullingGrProgs[hzb]));
  82. }
  83. return Error::kNone;
  84. }
  85. void GpuVisibility::computeGpuVisibilityMemoryRequirements(RenderingTechnique t, MemoryRequirements& total, WeakArray<MemoryRequirements> perBucket)
  86. {
  87. ANKI_ASSERT(perBucket.getSize() == RenderStateBucketContainer::getSingleton().getBucketCount(t));
  88. U32 totalMeshletCount = 0;
  89. U32 totalMeshletGroupCount = 0;
  90. U32 totalRenderableCount = 0;
  91. RenderStateBucketContainer::getSingleton().iterateBuckets(t, [&](const RenderStateInfo&, U32 userCount, U32 meshletGroupCount, U32 meshletCount) {
  92. if(meshletCount)
  93. {
  94. totalMeshletCount += meshletCount;
  95. totalMeshletGroupCount += meshletGroupCount;
  96. }
  97. else
  98. {
  99. totalRenderableCount += userCount;
  100. }
  101. });
  102. const U32 maxVisibleMeshlets = min(U32(g_maxMeshletMemoryPerTest.get() / sizeof(GpuSceneMeshletInstance)), totalMeshletCount);
  103. const U32 maxVisibleMeshletGroups = min(U32(g_maxMeshletGroupMemoryPerTest.get() / sizeof(GpuSceneMeshletGroupInstance)), totalMeshletGroupCount);
  104. const U32 maxVisibleRenderables = min(kMaxVisibleObjects, totalRenderableCount);
  105. total = {};
  106. U32 bucketCount = 0;
  107. RenderStateBucketContainer::getSingleton().iterateBuckets(t, [&](const RenderStateInfo&, U32 userCount, U32 meshletGroupCount, U32 meshletCount) {
  108. MemoryRequirements& bucket = perBucket[bucketCount++];
  109. // Use U64 cause some expressions are overflowing
  110. if(meshletCount)
  111. {
  112. ANKI_ASSERT(meshletGroupCount > 0);
  113. ANKI_ASSERT(totalMeshletCount > 0);
  114. bucket.m_meshletInstanceCount = max(1u, U32(U64(meshletCount) * maxVisibleMeshlets / totalMeshletCount));
  115. ANKI_ASSERT(totalMeshletGroupCount > 0);
  116. bucket.m_meshletGroupInstanceCount = max(1u, U32(U64(meshletGroupCount) * maxVisibleMeshletGroups / totalMeshletGroupCount));
  117. }
  118. else if(userCount > 0)
  119. {
  120. ANKI_ASSERT(totalRenderableCount > 0);
  121. bucket.m_renderableInstanceCount = max(1u, U32(U64(userCount) * maxVisibleRenderables / totalRenderableCount));
  122. }
  123. total.m_meshletInstanceCount += bucket.m_meshletInstanceCount;
  124. total.m_meshletGroupInstanceCount += bucket.m_meshletGroupInstanceCount;
  125. total.m_renderableInstanceCount += bucket.m_renderableInstanceCount;
  126. });
  127. }
  128. void GpuVisibility::populateRenderGraphInternal(Bool distanceBased, BaseGpuVisibilityInput& in, GpuVisibilityOutput& out)
  129. {
  130. ANKI_ASSERT(in.m_lodReferencePoint.x() != kMaxF32);
  131. if(RenderStateBucketContainer::getSingleton().getBucketsActiveUserCount(in.m_technique) == 0) [[unlikely]]
  132. {
  133. // Early exit
  134. in = {};
  135. return;
  136. }
  137. RenderGraphDescription& rgraph = *in.m_rgraph;
  138. class DistanceTestData
  139. {
  140. public:
  141. Vec3 m_pointOfTest;
  142. F32 m_testRadius;
  143. };
  144. class FrustumTestData
  145. {
  146. public:
  147. RenderTargetHandle m_hzbRt;
  148. Mat4 m_viewProjMat;
  149. UVec2 m_finalRenderTargetSize;
  150. };
  151. FrustumTestData* frustumTestData = nullptr;
  152. DistanceTestData* distTestData = nullptr;
  153. if(distanceBased)
  154. {
  155. distTestData = newInstance<DistanceTestData>(getRenderer().getFrameMemoryPool());
  156. const DistanceGpuVisibilityInput& din = static_cast<DistanceGpuVisibilityInput&>(in);
  157. distTestData->m_pointOfTest = din.m_pointOfTest;
  158. distTestData->m_testRadius = din.m_testRadius;
  159. }
  160. else
  161. {
  162. frustumTestData = newInstance<FrustumTestData>(getRenderer().getFrameMemoryPool());
  163. const FrustumGpuVisibilityInput& fin = static_cast<FrustumGpuVisibilityInput&>(in);
  164. frustumTestData->m_viewProjMat = fin.m_viewProjectionMatrix;
  165. frustumTestData->m_finalRenderTargetSize = fin.m_viewportSize;
  166. }
  167. // Allocate memory
  168. const Bool firstCallInFrame = m_runCtx.m_frameIdx != getRenderer().getFrameCount();
  169. if(firstCallInFrame)
  170. {
  171. // First call in frame. Init stuff
  172. m_runCtx.m_frameIdx = getRenderer().getFrameCount();
  173. m_runCtx.m_populateRenderGraphCallCount = 0;
  174. m_runCtx.m_populateRenderGraphMeshletRenderingCallCount = 0;
  175. // Calc memory requirements
  176. MemoryRequirements maxTotalMemReq;
  177. WeakArray<MemoryRequirements> bucketsMemReqs;
  178. for(RenderingTechnique t : EnumBitsIterable<RenderingTechnique, RenderingTechniqueBit>(RenderingTechniqueBit::kAllRaster))
  179. {
  180. const U32 tBucketCount = RenderStateBucketContainer::getSingleton().getBucketCount(t);
  181. if(tBucketCount == 0)
  182. {
  183. continue;
  184. }
  185. newArray<MemoryRequirements>(getRenderer().getFrameMemoryPool(), tBucketCount, bucketsMemReqs);
  186. computeGpuVisibilityMemoryRequirements(t, m_runCtx.m_totalMemRequirements[t], bucketsMemReqs);
  187. maxTotalMemReq = maxTotalMemReq.max(m_runCtx.m_totalMemRequirements[t]);
  188. newArray<InstanceRange>(getRenderer().getFrameMemoryPool(), tBucketCount, m_runCtx.m_renderableInstanceRanges[t]);
  189. newArray<InstanceRange>(getRenderer().getFrameMemoryPool(), tBucketCount, m_runCtx.m_meshletGroupInstanceRanges[t]);
  190. newArray<InstanceRange>(getRenderer().getFrameMemoryPool(), tBucketCount, m_runCtx.m_meshletInstanceRanges[t]);
  191. U32 renderablesFirstInstance = 0, groupsFirstInstance = 0, meshletsFirstInstance = 0;
  192. for(U32 i = 0; i < tBucketCount; ++i)
  193. {
  194. m_runCtx.m_renderableInstanceRanges[t][i].m_firstInstance = renderablesFirstInstance;
  195. m_runCtx.m_renderableInstanceRanges[t][i].m_instanceCount = bucketsMemReqs[i].m_renderableInstanceCount;
  196. m_runCtx.m_meshletGroupInstanceRanges[t][i].m_firstInstance = groupsFirstInstance;
  197. m_runCtx.m_meshletGroupInstanceRanges[t][i].m_instanceCount = bucketsMemReqs[i].m_meshletGroupInstanceCount;
  198. m_runCtx.m_meshletInstanceRanges[t][i].m_firstInstance = meshletsFirstInstance;
  199. m_runCtx.m_meshletInstanceRanges[t][i].m_instanceCount = bucketsMemReqs[i].m_meshletInstanceCount;
  200. renderablesFirstInstance += bucketsMemReqs[i].m_renderableInstanceCount;
  201. groupsFirstInstance += bucketsMemReqs[i].m_meshletGroupInstanceCount;
  202. meshletsFirstInstance += bucketsMemReqs[i].m_meshletInstanceCount;
  203. }
  204. }
  205. // Allocate persistent memory
  206. for(PersistentMemory& mem : m_runCtx.m_persistentMem)
  207. {
  208. mem = {};
  209. mem.m_drawIndexedIndirectArgsBuffer = allocateTransientGpuMem(maxTotalMemReq.m_renderableInstanceCount * sizeof(DrawIndexedIndirectArgs));
  210. mem.m_renderableInstancesBuffer = allocateTransientGpuMem(maxTotalMemReq.m_renderableInstanceCount * sizeof(GpuSceneRenderableInstance));
  211. mem.m_meshletGroupsInstancesBuffer =
  212. allocateTransientGpuMem(maxTotalMemReq.m_meshletGroupInstanceCount * sizeof(GpuSceneMeshletGroupInstance));
  213. mem.m_bufferDepedency =
  214. rgraph.importBuffer(BufferUsageBit::kNone, (mem.m_drawIndexedIndirectArgsBuffer.m_buffer) ? mem.m_drawIndexedIndirectArgsBuffer
  215. : mem.m_meshletGroupsInstancesBuffer);
  216. }
  217. if(getRenderer().runSoftwareMeshletRendering())
  218. {
  219. // Because someone will need it later
  220. for(PersistentMemoryMeshletRendering& mem : m_runCtx.m_persistentMeshletRenderingMem)
  221. {
  222. mem = {};
  223. mem.m_meshletInstancesBuffer = allocateTransientGpuMem(maxTotalMemReq.m_meshletInstanceCount * sizeof(GpuSceneMeshletInstance));
  224. mem.m_bufferDepedency = rgraph.importBuffer(BufferUsageBit::kNone, mem.m_meshletInstancesBuffer);
  225. }
  226. }
  227. }
  228. const U32 bucketCount = RenderStateBucketContainer::getSingleton().getBucketCount(in.m_technique);
  229. const MemoryRequirements& req = m_runCtx.m_totalMemRequirements[in.m_technique];
  230. const PersistentMemory& mem = m_runCtx.m_persistentMem[m_runCtx.m_populateRenderGraphCallCount++ % m_runCtx.m_persistentMem.getSize()];
  231. out.m_legacy.m_drawIndexedIndirectArgsBuffer = mem.m_drawIndexedIndirectArgsBuffer;
  232. out.m_legacy.m_drawIndexedIndirectArgsBuffer.m_range = req.m_renderableInstanceCount * sizeof(DrawIndexedIndirectArgs);
  233. out.m_legacy.m_renderableInstancesBuffer = mem.m_renderableInstancesBuffer;
  234. out.m_legacy.m_renderableInstancesBuffer.m_range = req.m_renderableInstanceCount * sizeof(GpuSceneRenderableInstance);
  235. out.m_legacy.m_mdiDrawCountsBuffer = allocateTransientGpuMem(sizeof(U32) * bucketCount);
  236. out.m_mesh.m_meshletGroupInstancesBuffer = mem.m_meshletGroupsInstancesBuffer;
  237. out.m_mesh.m_meshletGroupInstancesBuffer.m_range = req.m_meshletGroupInstanceCount * sizeof(GpuSceneMeshletGroupInstance);
  238. out.m_mesh.m_taskShaderIndirectArgsBuffer = allocateTransientGpuMem(bucketCount * sizeof(DispatchIndirectArgs));
  239. if(in.m_hashVisibles)
  240. {
  241. out.m_visiblesHashBuffer = allocateTransientGpuMem(sizeof(GpuVisibilityHash));
  242. }
  243. if(in.m_gatherAabbIndices)
  244. {
  245. out.m_visibleAaabbIndicesBuffer =
  246. allocateTransientGpuMem((RenderStateBucketContainer::getSingleton().getBucketsActiveUserCount(in.m_technique) + 1) * sizeof(U32));
  247. }
  248. // Set instance sub-ranges
  249. out.m_legacy.m_bucketRenderableInstanceRanges = m_runCtx.m_renderableInstanceRanges[in.m_technique];
  250. out.m_mesh.m_bucketMeshletGroupInstanceRanges = m_runCtx.m_meshletGroupInstanceRanges[in.m_technique];
  251. // Zero some stuff
  252. const BufferHandle zeroStuffDependency = rgraph.importBuffer(BufferUsageBit::kNone, out.m_legacy.m_mdiDrawCountsBuffer);
  253. {
  254. Array<Char, 128> passName;
  255. snprintf(passName.getBegin(), passName.getSizeInBytes(), "GPU vis zero: %s", in.m_passesName.cstr());
  256. ComputeRenderPassDescription& pass = rgraph.newComputeRenderPass(passName.getBegin());
  257. pass.newBufferDependency(zeroStuffDependency, BufferUsageBit::kTransferDestination);
  258. pass.setWork([out](RenderPassWorkContext& rpass) {
  259. CommandBuffer& cmdb = *rpass.m_commandBuffer;
  260. cmdb.pushDebugMarker("MDI counts", Vec3(1.0f, 1.0f, 1.0f));
  261. cmdb.fillBuffer(out.m_legacy.m_mdiDrawCountsBuffer, 0);
  262. cmdb.popDebugMarker();
  263. if(out.m_mesh.m_taskShaderIndirectArgsBuffer.m_buffer)
  264. {
  265. cmdb.pushDebugMarker("Task shader indirect args", Vec3(1.0f, 1.0f, 1.0f));
  266. cmdb.fillBuffer(out.m_mesh.m_taskShaderIndirectArgsBuffer, 0);
  267. cmdb.popDebugMarker();
  268. }
  269. if(out.m_visiblesHashBuffer.m_buffer)
  270. {
  271. cmdb.pushDebugMarker("Visibles hash", Vec3(1.0f, 1.0f, 1.0f));
  272. cmdb.fillBuffer(out.m_visiblesHashBuffer, 0);
  273. cmdb.popDebugMarker();
  274. }
  275. if(out.m_visibleAaabbIndicesBuffer.m_buffer)
  276. {
  277. cmdb.pushDebugMarker("Visible AABB indices", Vec3(1.0f, 1.0f, 1.0f));
  278. cmdb.fillBuffer(out.m_visibleAaabbIndicesBuffer.m_buffer, out.m_visibleAaabbIndicesBuffer.m_offset, sizeof(U32), 0);
  279. cmdb.popDebugMarker();
  280. }
  281. });
  282. }
  283. // Set the out dependency. Use one of the big buffers.
  284. out.m_dependency = mem.m_bufferDepedency;
  285. // Create the renderpass
  286. Array<Char, 128> passName;
  287. snprintf(passName.getBegin(), passName.getSizeInBytes(), "GPU vis: %s", in.m_passesName.cstr());
  288. ComputeRenderPassDescription& pass = rgraph.newComputeRenderPass(passName.getBegin());
  289. pass.newBufferDependency(getRenderer().getGpuSceneBufferHandle(), BufferUsageBit::kUavComputeRead);
  290. pass.newBufferDependency(zeroStuffDependency, BufferUsageBit::kUavComputeWrite);
  291. pass.newBufferDependency(out.m_dependency, BufferUsageBit::kUavComputeWrite);
  292. if(!distanceBased && static_cast<FrustumGpuVisibilityInput&>(in).m_hzbRt)
  293. {
  294. frustumTestData->m_hzbRt = *static_cast<FrustumGpuVisibilityInput&>(in).m_hzbRt;
  295. pass.newTextureDependency(frustumTestData->m_hzbRt, TextureUsageBit::kSampledCompute);
  296. }
  297. pass.setWork([this, frustumTestData, distTestData, lodReferencePoint = in.m_lodReferencePoint, lodDistances = in.m_lodDistances,
  298. technique = in.m_technique, out](RenderPassWorkContext& rpass) {
  299. CommandBuffer& cmdb = *rpass.m_commandBuffer;
  300. const Bool gatherAabbIndices = out.m_visibleAaabbIndicesBuffer.m_buffer != nullptr;
  301. const Bool genHash = out.m_visiblesHashBuffer.m_buffer != nullptr;
  302. U32 gatherType = 0;
  303. if(out.m_mesh.m_meshletGroupInstancesBuffer.m_range > 0)
  304. {
  305. gatherType |= 2u;
  306. }
  307. if(out.m_legacy.m_renderableInstancesBuffer.m_range > 0)
  308. {
  309. gatherType |= 1u;
  310. }
  311. ANKI_ASSERT(gatherType != 0);
  312. if(frustumTestData)
  313. {
  314. cmdb.bindShaderProgram(m_frustumGrProgs[frustumTestData->m_hzbRt.isValid()][gatherAabbIndices][genHash][gatherType - 1u].get());
  315. }
  316. else
  317. {
  318. cmdb.bindShaderProgram(m_distGrProgs[gatherAabbIndices][genHash][gatherType - 1u].get());
  319. }
  320. BufferOffsetRange aabbsBuffer;
  321. U32 aabbCount = 0;
  322. switch(technique)
  323. {
  324. case RenderingTechnique::kGBuffer:
  325. aabbsBuffer = GpuSceneArrays::RenderableBoundingVolumeGBuffer::getSingleton().getBufferOffsetRange();
  326. aabbCount = GpuSceneArrays::RenderableBoundingVolumeGBuffer::getSingleton().getElementCount();
  327. break;
  328. case RenderingTechnique::kDepth:
  329. aabbsBuffer = GpuSceneArrays::RenderableBoundingVolumeDepth::getSingleton().getBufferOffsetRange();
  330. aabbCount = GpuSceneArrays::RenderableBoundingVolumeDepth::getSingleton().getElementCount();
  331. break;
  332. case RenderingTechnique::kForward:
  333. aabbsBuffer = GpuSceneArrays::RenderableBoundingVolumeForward::getSingleton().getBufferOffsetRange();
  334. aabbCount = GpuSceneArrays::RenderableBoundingVolumeForward::getSingleton().getElementCount();
  335. break;
  336. default:
  337. ANKI_ASSERT(0);
  338. }
  339. cmdb.bindUavBuffer(0, 0, aabbsBuffer);
  340. cmdb.bindUavBuffer(0, 1, GpuSceneArrays::Renderable::getSingleton().getBufferOffsetRange());
  341. cmdb.bindUavBuffer(0, 2, GpuSceneArrays::MeshLod::getSingleton().getBufferOffsetRange());
  342. cmdb.bindUavBuffer(0, 3, GpuSceneBuffer::getSingleton().getBufferOffsetRange());
  343. if(gatherType & 1u)
  344. {
  345. cmdb.bindUavBuffer(0, 4, out.m_legacy.m_renderableInstancesBuffer);
  346. cmdb.bindUavBuffer(0, 5, out.m_legacy.m_drawIndexedIndirectArgsBuffer);
  347. cmdb.bindUavBuffer(0, 6, out.m_legacy.m_mdiDrawCountsBuffer);
  348. }
  349. if(gatherType & 2u)
  350. {
  351. cmdb.bindUavBuffer(0, 7, out.m_mesh.m_taskShaderIndirectArgsBuffer);
  352. cmdb.bindUavBuffer(0, 8, out.m_mesh.m_meshletGroupInstancesBuffer);
  353. }
  354. const U32 bucketCount = RenderStateBucketContainer::getSingleton().getBucketCount(technique);
  355. UVec2* instanceRanges = allocateAndBindUav<UVec2>(cmdb, 0, 9, bucketCount);
  356. for(U32 i = 0; i < bucketCount; ++i)
  357. {
  358. const Bool legacyBucket = m_runCtx.m_renderableInstanceRanges[technique][i].m_instanceCount > 0;
  359. if(legacyBucket)
  360. {
  361. instanceRanges[i].x() = m_runCtx.m_renderableInstanceRanges[technique][i].m_firstInstance;
  362. instanceRanges[i].y() = m_runCtx.m_renderableInstanceRanges[technique][i].m_instanceCount;
  363. }
  364. else
  365. {
  366. instanceRanges[i].x() = m_runCtx.m_meshletGroupInstanceRanges[technique][i].m_firstInstance;
  367. instanceRanges[i].y() = m_runCtx.m_meshletGroupInstanceRanges[technique][i].m_instanceCount;
  368. }
  369. }
  370. if(frustumTestData)
  371. {
  372. FrustumGpuVisibilityConstants* unis = allocateAndBindConstants<FrustumGpuVisibilityConstants>(cmdb, 0, 10);
  373. Array<Plane, 6> planes;
  374. extractClipPlanes(frustumTestData->m_viewProjMat, planes);
  375. for(U32 i = 0; i < 6; ++i)
  376. {
  377. unis->m_clipPlanes[i] = Vec4(planes[i].getNormal().xyz(), planes[i].getOffset());
  378. }
  379. ANKI_ASSERT(kMaxLodCount == 3);
  380. unis->m_maxLodDistances[0] = lodDistances[0];
  381. unis->m_maxLodDistances[1] = lodDistances[1];
  382. unis->m_maxLodDistances[2] = kMaxF32;
  383. unis->m_maxLodDistances[3] = kMaxF32;
  384. unis->m_lodReferencePoint = lodReferencePoint;
  385. unis->m_viewProjectionMat = frustumTestData->m_viewProjMat;
  386. unis->m_finalRenderTargetSize = Vec2(frustumTestData->m_finalRenderTargetSize);
  387. if(frustumTestData->m_hzbRt.isValid())
  388. {
  389. rpass.bindColorTexture(0, 11, frustumTestData->m_hzbRt);
  390. cmdb.bindSampler(0, 12, getRenderer().getSamplers().m_nearestNearestClamp.get());
  391. }
  392. }
  393. else
  394. {
  395. DistanceGpuVisibilityConstants unis;
  396. unis.m_pointOfTest = distTestData->m_pointOfTest;
  397. unis.m_testRadius = distTestData->m_testRadius;
  398. unis.m_maxLodDistances[0] = lodDistances[0];
  399. unis.m_maxLodDistances[1] = lodDistances[1];
  400. unis.m_maxLodDistances[2] = kMaxF32;
  401. unis.m_maxLodDistances[3] = kMaxF32;
  402. unis.m_lodReferencePoint = lodReferencePoint;
  403. cmdb.setPushConstants(&unis, sizeof(unis));
  404. }
  405. if(gatherAabbIndices)
  406. {
  407. cmdb.bindUavBuffer(0, 13, out.m_visibleAaabbIndicesBuffer);
  408. }
  409. if(genHash)
  410. {
  411. cmdb.bindUavBuffer(0, 14, out.m_visiblesHashBuffer);
  412. }
  413. dispatchPPCompute(cmdb, 64, 1, aabbCount, 1);
  414. });
  415. }
  416. void GpuVisibility::populateRenderGraph(GpuMeshletVisibilityInput& in, GpuMeshletVisibilityOutput& out)
  417. {
  418. RenderGraphDescription& rgraph = *in.m_rgraph;
  419. if(in.m_taskShaderIndirectArgsBuffer.m_buffer == nullptr) [[unlikely]]
  420. {
  421. // Early exit
  422. return;
  423. }
  424. // Allocate memory
  425. const U32 bucketCount = m_runCtx.m_renderableInstanceRanges[in.m_technique].getSize();
  426. ANKI_ASSERT(RenderStateBucketContainer::getSingleton().getBucketCount(in.m_technique) == bucketCount);
  427. const PersistentMemoryMeshletRendering& mem = m_runCtx.m_persistentMeshletRenderingMem[m_runCtx.m_populateRenderGraphMeshletRenderingCallCount++
  428. % m_runCtx.m_persistentMeshletRenderingMem.getSize()];
  429. out.m_drawIndirectArgsBuffer = allocateTransientGpuMem(sizeof(DrawIndirectArgs) * bucketCount);
  430. out.m_meshletInstancesBuffer = mem.m_meshletInstancesBuffer;
  431. out.m_meshletInstancesBuffer.m_range = m_runCtx.m_totalMemRequirements[in.m_technique].m_meshletInstanceCount * sizeof(GpuSceneMeshletInstance);
  432. out.m_bucketMeshletInstanceRanges = m_runCtx.m_meshletInstanceRanges[in.m_technique];
  433. // Zero some stuff
  434. const BufferHandle indirectArgsDep = rgraph.importBuffer(BufferUsageBit::kNone, out.m_drawIndirectArgsBuffer);
  435. {
  436. Array<Char, 128> passName;
  437. snprintf(passName.getBegin(), passName.getSizeInBytes(), "GPU meshlet vis zero: %s", in.m_passesName.cstr());
  438. ComputeRenderPassDescription& pass = rgraph.newComputeRenderPass(passName.getBegin());
  439. pass.newBufferDependency(indirectArgsDep, BufferUsageBit::kTransferDestination);
  440. pass.setWork([drawIndirectArgsBuffer = out.m_drawIndirectArgsBuffer](RenderPassWorkContext& rpass) {
  441. CommandBuffer& cmdb = *rpass.m_commandBuffer;
  442. cmdb.pushDebugMarker("Draw indirect args", Vec3(1.0f, 1.0f, 1.0f));
  443. cmdb.fillBuffer(drawIndirectArgsBuffer, 0);
  444. cmdb.popDebugMarker();
  445. });
  446. }
  447. out.m_dependency = mem.m_bufferDepedency;
  448. // Create the renderpass
  449. Array<Char, 128> passName;
  450. snprintf(passName.getBegin(), passName.getSizeInBytes(), "GPU meshlet vis: %s", in.m_passesName.cstr());
  451. ComputeRenderPassDescription& pass = rgraph.newComputeRenderPass(passName.getBegin());
  452. pass.newBufferDependency(indirectArgsDep, BufferUsageBit::kUavComputeWrite);
  453. pass.newBufferDependency(mem.m_bufferDepedency, BufferUsageBit::kUavComputeWrite);
  454. pass.newBufferDependency(in.m_dependency, BufferUsageBit::kIndirectCompute);
  455. pass.setWork([this, hzbRt = in.m_hzbRt, viewProjMat = in.m_viewProjectionMatrix, camTrf = in.m_cameraTransform, viewportSize = in.m_viewportSize,
  456. computeIndirectArgs = in.m_taskShaderIndirectArgsBuffer, out, meshletGroupInstancesBuffer = in.m_meshletGroupInstancesBuffer,
  457. bucketMeshletGroupInstanceRanges = in.m_bucketMeshletGroupInstanceRanges](RenderPassWorkContext& rpass) {
  458. CommandBuffer& cmdb = *rpass.m_commandBuffer;
  459. const U32 bucketCount = out.m_bucketMeshletInstanceRanges.getSize();
  460. for(U32 i = 0; i < bucketCount; ++i)
  461. {
  462. if(out.m_bucketMeshletInstanceRanges[i].m_instanceCount == 0)
  463. {
  464. continue;
  465. }
  466. const Bool hasHzb = hzbRt.isValid();
  467. cmdb.bindShaderProgram(m_meshletCullingGrProgs[hasHzb].get());
  468. cmdb.bindUavBuffer(0, 0, meshletGroupInstancesBuffer);
  469. cmdb.bindUavBuffer(0, 1, GpuSceneArrays::Renderable::getSingleton().getBufferOffsetRange());
  470. cmdb.bindUavBuffer(0, 2, GpuSceneArrays::MeshLod::getSingleton().getBufferOffsetRange());
  471. cmdb.bindUavBuffer(0, 3, GpuSceneBuffer::getSingleton().getBufferOffsetRange());
  472. cmdb.bindUavBuffer(0, 4, UnifiedGeometryBuffer::getSingleton().getBufferOffsetRange());
  473. cmdb.bindUavBuffer(0, 5, out.m_drawIndirectArgsBuffer);
  474. cmdb.bindUavBuffer(0, 6, out.m_meshletInstancesBuffer);
  475. if(hasHzb)
  476. {
  477. rpass.bindColorTexture(0, 7, hzbRt);
  478. cmdb.bindSampler(0, 8, getRenderer().getSamplers().m_nearestNearestClamp.get());
  479. }
  480. class Consts
  481. {
  482. public:
  483. Mat4 m_viewProjectionMatrix;
  484. Vec3 m_cameraPos;
  485. U32 m_firstDrawArg;
  486. Vec2 m_viewportSizef;
  487. U32 m_firstMeshletGroup;
  488. U32 m_firstMeshlet;
  489. U32 m_meshletCount;
  490. U32 m_padding1;
  491. U32 m_padding2;
  492. U32 m_padding3;
  493. } consts;
  494. consts.m_viewProjectionMatrix = viewProjMat;
  495. consts.m_cameraPos = camTrf.getTranslationPart().xyz();
  496. consts.m_firstDrawArg = i;
  497. consts.m_viewportSizef = Vec2(viewportSize);
  498. consts.m_firstMeshletGroup = bucketMeshletGroupInstanceRanges[i].getFirstInstance();
  499. consts.m_firstMeshlet = out.m_bucketMeshletInstanceRanges[i].getFirstInstance();
  500. consts.m_meshletCount = out.m_bucketMeshletInstanceRanges[i].getInstanceCount();
  501. cmdb.setPushConstants(&consts, sizeof(consts));
  502. cmdb.dispatchComputeIndirect(computeIndirectArgs.m_buffer, computeIndirectArgs.m_offset + i * sizeof(DispatchIndirectArgs));
  503. };
  504. });
  505. }
  506. Error GpuVisibilityNonRenderables::init()
  507. {
  508. ANKI_CHECK(ResourceManager::getSingleton().loadResource("ShaderBinaries/GpuVisibilityNonRenderables.ankiprogbin", m_prog));
  509. for(MutatorValue hzb = 0; hzb < 2; ++hzb)
  510. {
  511. for(GpuSceneNonRenderableObjectType type : EnumIterable<GpuSceneNonRenderableObjectType>())
  512. {
  513. for(MutatorValue cpuFeedback = 0; cpuFeedback < 2; ++cpuFeedback)
  514. {
  515. ANKI_CHECK(loadShaderProgram("ShaderBinaries/GpuVisibilityNonRenderables.ankiprogbin",
  516. {{"HZB_TEST", hzb}, {"OBJECT_TYPE", MutatorValue(type)}, {"CPU_FEEDBACK", cpuFeedback}}, m_prog,
  517. m_grProgs[hzb][type][cpuFeedback]));
  518. }
  519. }
  520. }
  521. return Error::kNone;
  522. }
  523. void GpuVisibilityNonRenderables::populateRenderGraph(GpuVisibilityNonRenderablesInput& in, GpuVisibilityNonRenderablesOutput& out)
  524. {
  525. ANKI_ASSERT(in.m_viewProjectionMat != Mat4::getZero());
  526. RenderGraphDescription& rgraph = *in.m_rgraph;
  527. U32 objCount = 0;
  528. switch(in.m_objectType)
  529. {
  530. case GpuSceneNonRenderableObjectType::kLight:
  531. objCount = GpuSceneArrays::Light::getSingleton().getElementCount();
  532. break;
  533. case GpuSceneNonRenderableObjectType::kDecal:
  534. objCount = GpuSceneArrays::Decal::getSingleton().getElementCount();
  535. break;
  536. case GpuSceneNonRenderableObjectType::kFogDensityVolume:
  537. objCount = GpuSceneArrays::FogDensityVolume::getSingleton().getElementCount();
  538. break;
  539. case GpuSceneNonRenderableObjectType::kGlobalIlluminationProbe:
  540. objCount = GpuSceneArrays::GlobalIlluminationProbe::getSingleton().getElementCount();
  541. break;
  542. case GpuSceneNonRenderableObjectType::kReflectionProbe:
  543. objCount = GpuSceneArrays::ReflectionProbe::getSingleton().getElementCount();
  544. break;
  545. default:
  546. ANKI_ASSERT(0);
  547. }
  548. if(objCount == 0)
  549. {
  550. U32* count;
  551. out.m_visiblesBuffer = RebarTransientMemoryPool::getSingleton().allocateFrame(sizeof(U32), count);
  552. *count = 0;
  553. out.m_visiblesBufferHandle = rgraph.importBuffer(BufferUsageBit::kNone, out.m_visiblesBuffer);
  554. return;
  555. }
  556. if(in.m_cpuFeedbackBuffer.m_buffer)
  557. {
  558. ANKI_ASSERT(in.m_cpuFeedbackBuffer.m_range == sizeof(U32) * (objCount * 2 + 1));
  559. }
  560. const Bool firstRunInFrame = m_lastFrameIdx != getRenderer().getFrameCount();
  561. if(firstRunInFrame)
  562. {
  563. // 1st run in this frame, do some bookkeeping
  564. m_lastFrameIdx = getRenderer().getFrameCount();
  565. m_counterBufferOffset = 0;
  566. m_counterBufferZeroingHandle = {};
  567. }
  568. constexpr U32 kCountersPerDispatch = 3; // 1 for the threadgroup, 1 for the visbile object count and 1 for objects with feedback
  569. const U32 counterBufferElementSize =
  570. getAlignedRoundUp(GrManager::getSingleton().getDeviceCapabilities().m_uavBufferBindOffsetAlignment, U32(kCountersPerDispatch * sizeof(U32)));
  571. if(!m_counterBuffer.isCreated() || m_counterBufferOffset + counterBufferElementSize > m_counterBuffer->getSize()) [[unlikely]]
  572. {
  573. // Counter buffer not created or not big enough, create a new one
  574. BufferInitInfo buffInit("GpuVisibilityNonRenderablesCounters");
  575. buffInit.m_size = (m_counterBuffer.isCreated()) ? m_counterBuffer->getSize() * 2
  576. : kCountersPerDispatch * counterBufferElementSize * kInitialCounterArraySize;
  577. buffInit.m_usage = BufferUsageBit::kUavComputeWrite | BufferUsageBit::kUavComputeRead | BufferUsageBit::kTransferDestination;
  578. m_counterBuffer = GrManager::getSingleton().newBuffer(buffInit);
  579. m_counterBufferZeroingHandle = rgraph.importBuffer(m_counterBuffer.get(), buffInit.m_usage, 0, kMaxPtrSize);
  580. ComputeRenderPassDescription& pass = rgraph.newComputeRenderPass("GpuVisibilityNonRenderablesClearCounterBuffer");
  581. pass.newBufferDependency(m_counterBufferZeroingHandle, BufferUsageBit::kTransferDestination);
  582. pass.setWork([counterBuffer = m_counterBuffer](RenderPassWorkContext& rgraph) {
  583. rgraph.m_commandBuffer->fillBuffer(counterBuffer.get(), 0, kMaxPtrSize, 0);
  584. });
  585. m_counterBufferOffset = 0;
  586. }
  587. else if(!firstRunInFrame)
  588. {
  589. m_counterBufferOffset += counterBufferElementSize;
  590. }
  591. // Allocate memory for the result
  592. out.m_visiblesBuffer = allocateTransientGpuMem((objCount + 1) * sizeof(U32));
  593. out.m_visiblesBufferHandle = rgraph.importBuffer(BufferUsageBit::kNone, out.m_visiblesBuffer);
  594. // Create the renderpass
  595. ComputeRenderPassDescription& pass = rgraph.newComputeRenderPass(in.m_passesName);
  596. pass.newBufferDependency(getRenderer().getGpuSceneBufferHandle(), BufferUsageBit::kUavComputeRead);
  597. pass.newBufferDependency(out.m_visiblesBufferHandle, BufferUsageBit::kUavComputeWrite);
  598. if(in.m_hzbRt)
  599. {
  600. pass.newTextureDependency(*in.m_hzbRt, TextureUsageBit::kSampledCompute);
  601. }
  602. if(m_counterBufferZeroingHandle.isValid()) [[unlikely]]
  603. {
  604. pass.newBufferDependency(m_counterBufferZeroingHandle, BufferUsageBit::kUavComputeRead | BufferUsageBit::kUavComputeWrite);
  605. }
  606. pass.setWork([this, objType = in.m_objectType, feedbackBuffer = in.m_cpuFeedbackBuffer, viewProjectionMat = in.m_viewProjectionMat,
  607. visibleIndicesBuffHandle = out.m_visiblesBufferHandle, counterBuffer = m_counterBuffer, counterBufferOffset = m_counterBufferOffset,
  608. objCount](RenderPassWorkContext& rgraph) {
  609. CommandBuffer& cmdb = *rgraph.m_commandBuffer;
  610. const Bool needsFeedback = feedbackBuffer.m_buffer != nullptr;
  611. cmdb.bindShaderProgram(m_grProgs[0][objType][needsFeedback].get());
  612. BufferOffsetRange objBuffer;
  613. switch(objType)
  614. {
  615. case GpuSceneNonRenderableObjectType::kLight:
  616. objBuffer = GpuSceneArrays::Light::getSingleton().getBufferOffsetRange();
  617. break;
  618. case GpuSceneNonRenderableObjectType::kDecal:
  619. objBuffer = GpuSceneArrays::Decal::getSingleton().getBufferOffsetRange();
  620. break;
  621. case GpuSceneNonRenderableObjectType::kFogDensityVolume:
  622. objBuffer = GpuSceneArrays::FogDensityVolume::getSingleton().getBufferOffsetRange();
  623. break;
  624. case GpuSceneNonRenderableObjectType::kGlobalIlluminationProbe:
  625. objBuffer = GpuSceneArrays::GlobalIlluminationProbe::getSingleton().getBufferOffsetRange();
  626. break;
  627. case GpuSceneNonRenderableObjectType::kReflectionProbe:
  628. objBuffer = GpuSceneArrays::ReflectionProbe::getSingleton().getBufferOffsetRange();
  629. break;
  630. default:
  631. ANKI_ASSERT(0);
  632. }
  633. cmdb.bindUavBuffer(0, 0, objBuffer);
  634. GpuVisibilityNonRenderableConstants unis;
  635. Array<Plane, 6> planes;
  636. extractClipPlanes(viewProjectionMat, planes);
  637. for(U32 i = 0; i < 6; ++i)
  638. {
  639. unis.m_clipPlanes[i] = Vec4(planes[i].getNormal().xyz(), planes[i].getOffset());
  640. }
  641. cmdb.setPushConstants(&unis, sizeof(unis));
  642. rgraph.bindUavBuffer(0, 1, visibleIndicesBuffHandle);
  643. cmdb.bindUavBuffer(0, 2, counterBuffer.get(), counterBufferOffset, sizeof(U32) * kCountersPerDispatch);
  644. if(needsFeedback)
  645. {
  646. cmdb.bindUavBuffer(0, 3, feedbackBuffer.m_buffer, feedbackBuffer.m_offset, feedbackBuffer.m_range);
  647. }
  648. dispatchPPCompute(cmdb, 64, 1, objCount, 1);
  649. });
  650. }
  651. Error GpuVisibilityAccelerationStructures::init()
  652. {
  653. ANKI_CHECK(loadShaderProgram("ShaderBinaries/GpuVisibilityAccelerationStructures.ankiprogbin", m_visibilityProg, m_visibilityGrProg));
  654. ANKI_CHECK(loadShaderProgram("ShaderBinaries/GpuVisibilityAccelerationStructuresZeroRemainingInstances.ankiprogbin", m_zeroRemainingInstancesProg,
  655. m_zeroRemainingInstancesGrProg));
  656. BufferInitInfo inf("GpuVisibilityAccelerationStructuresCounters");
  657. inf.m_size = sizeof(U32) * 2;
  658. inf.m_usage = BufferUsageBit::kUavComputeWrite | BufferUsageBit::kUavComputeRead | BufferUsageBit::kTransferDestination;
  659. m_counterBuffer = GrManager::getSingleton().newBuffer(inf);
  660. zeroBuffer(m_counterBuffer.get());
  661. return Error::kNone;
  662. }
  663. void GpuVisibilityAccelerationStructures::pupulateRenderGraph(GpuVisibilityAccelerationStructuresInput& in,
  664. GpuVisibilityAccelerationStructuresOutput& out)
  665. {
  666. in.validate();
  667. RenderGraphDescription& rgraph = *in.m_rgraph;
  668. #if ANKI_ASSERTIONS_ENABLED
  669. ANKI_ASSERT(m_lastFrameIdx != getRenderer().getFrameCount());
  670. m_lastFrameIdx = getRenderer().getFrameCount();
  671. #endif
  672. // Allocate the transient buffers
  673. const U32 aabbCount = GpuSceneArrays::RenderableBoundingVolumeRt::getSingleton().getElementCount();
  674. out.m_instancesBuffer = allocateTransientGpuMem(aabbCount * sizeof(AccelerationStructureInstance));
  675. out.m_someBufferHandle = rgraph.importBuffer(BufferUsageBit::kUavComputeWrite, out.m_instancesBuffer);
  676. out.m_renderableIndicesBuffer = allocateTransientGpuMem((aabbCount + 1) * sizeof(U32));
  677. const BufferOffsetRange zeroInstancesDispatchArgsBuff = allocateTransientGpuMem(sizeof(DispatchIndirectArgs));
  678. // Create vis pass
  679. {
  680. ComputeRenderPassDescription& pass = rgraph.newComputeRenderPass(in.m_passesName);
  681. pass.newBufferDependency(getRenderer().getGpuSceneBufferHandle(), BufferUsageBit::kUavComputeRead);
  682. pass.newBufferDependency(out.m_someBufferHandle, BufferUsageBit::kUavComputeWrite);
  683. pass.setWork([this, viewProjMat = in.m_viewProjectionMatrix, lodDistances = in.m_lodDistances, pointOfTest = in.m_pointOfTest,
  684. testRadius = in.m_testRadius, instancesBuff = out.m_instancesBuffer, indicesBuff = out.m_renderableIndicesBuffer,
  685. zeroInstancesDispatchArgsBuff](RenderPassWorkContext& rgraph) {
  686. CommandBuffer& cmdb = *rgraph.m_commandBuffer;
  687. cmdb.bindShaderProgram(m_visibilityGrProg.get());
  688. GpuVisibilityAccelerationStructuresConstants unis;
  689. Array<Plane, 6> planes;
  690. extractClipPlanes(viewProjMat, planes);
  691. for(U32 i = 0; i < 6; ++i)
  692. {
  693. unis.m_clipPlanes[i] = Vec4(planes[i].getNormal().xyz(), planes[i].getOffset());
  694. }
  695. unis.m_pointOfTest = pointOfTest;
  696. unis.m_testRadius = testRadius;
  697. ANKI_ASSERT(kMaxLodCount == 3);
  698. unis.m_maxLodDistances[0] = lodDistances[0];
  699. unis.m_maxLodDistances[1] = lodDistances[1];
  700. unis.m_maxLodDistances[2] = kMaxF32;
  701. unis.m_maxLodDistances[3] = kMaxF32;
  702. cmdb.setPushConstants(&unis, sizeof(unis));
  703. cmdb.bindUavBuffer(0, 0, GpuSceneArrays::RenderableBoundingVolumeRt::getSingleton().getBufferOffsetRange());
  704. cmdb.bindUavBuffer(0, 1, GpuSceneArrays::Renderable::getSingleton().getBufferOffsetRange());
  705. cmdb.bindUavBuffer(0, 2, GpuSceneArrays::MeshLod::getSingleton().getBufferOffsetRange());
  706. cmdb.bindUavBuffer(0, 3, &GpuSceneBuffer::getSingleton().getBuffer(), 0, kMaxPtrSize);
  707. cmdb.bindUavBuffer(0, 4, instancesBuff);
  708. cmdb.bindUavBuffer(0, 5, indicesBuff);
  709. cmdb.bindUavBuffer(0, 6, m_counterBuffer.get(), 0, sizeof(U32) * 2);
  710. cmdb.bindUavBuffer(0, 7, zeroInstancesDispatchArgsBuff);
  711. const U32 aabbCount = GpuSceneArrays::RenderableBoundingVolumeRt::getSingleton().getElementCount();
  712. dispatchPPCompute(cmdb, 64, 1, aabbCount, 1);
  713. });
  714. }
  715. // Zero remaining instances
  716. {
  717. Array<Char, 64> passName;
  718. snprintf(passName.getBegin(), sizeof(passName), "%s: Zero remaining instances", in.m_passesName.cstr());
  719. ComputeRenderPassDescription& pass = rgraph.newComputeRenderPass(passName.getBegin());
  720. pass.newBufferDependency(out.m_someBufferHandle, BufferUsageBit::kUavComputeWrite);
  721. pass.setWork([this, zeroInstancesDispatchArgsBuff, instancesBuff = out.m_instancesBuffer,
  722. indicesBuff = out.m_renderableIndicesBuffer](RenderPassWorkContext& rgraph) {
  723. CommandBuffer& cmdb = *rgraph.m_commandBuffer;
  724. cmdb.bindShaderProgram(m_zeroRemainingInstancesGrProg.get());
  725. cmdb.bindUavBuffer(0, 0, indicesBuff);
  726. cmdb.bindUavBuffer(0, 1, instancesBuff);
  727. cmdb.dispatchComputeIndirect(zeroInstancesDispatchArgsBuff.m_buffer, zeroInstancesDispatchArgsBuff.m_offset);
  728. });
  729. }
  730. }
  731. } // end namespace anki