GpuVisibility.cpp 36 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941
  1. // Copyright (C) 2009-2023, Panagiotis Christopoulos Charitos and contributors.
  2. // All rights reserved.
  3. // Code licensed under the BSD License.
  4. // http://www.anki3d.org/LICENSE
  5. #include <AnKi/Renderer/Utils/GpuVisibility.h>
  6. #include <AnKi/Renderer/Renderer.h>
  7. #include <AnKi/Scene/RenderStateBucket.h>
  8. #include <AnKi/Scene/GpuSceneArray.h>
  9. #include <AnKi/Core/GpuMemory/GpuVisibleTransientMemoryPool.h>
  10. #include <AnKi/Core/GpuMemory/RebarTransientMemoryPool.h>
  11. #include <AnKi/Core/GpuMemory/GpuSceneBuffer.h>
  12. #include <AnKi/Collision/Functions.h>
  13. #include <AnKi/Shaders/Include/GpuVisibilityTypes.h>
  14. #include <AnKi/Core/GpuMemory/UnifiedGeometryBuffer.h>
  15. #include <AnKi/Core/StatsSet.h>
  16. #include <AnKi/Core/CVarSet.h>
  17. namespace anki {
  18. constexpr U32 kMaxVisibleObjects = 30 * 1024;
  19. constexpr U32 kMaxVisiblePrimitives = 40'000'000;
  20. constexpr U32 kMaxVisibleMeshlets = kMaxVisiblePrimitives / kMaxPrimitivesPerMeshlet;
  21. constexpr PtrSize kMaxMeshletMemory = kMaxVisibleMeshlets * sizeof(GpuSceneMeshletInstance);
  22. constexpr U32 kVisibleMaxMeshletGroups = max(kMaxVisibleObjects, (kMaxVisibleMeshlets + kMeshletGroupSize - 1) / kMeshletGroupSize);
  23. constexpr PtrSize kMaxMeshletGroupMemory = kVisibleMaxMeshletGroups * sizeof(GpuSceneMeshletGroupInstance);
  24. static NumericCVar<PtrSize> g_maxMeshletMemoryPerTest(CVarSubsystem::kRenderer, "MaxMeshletMemoryPerTest", kMaxMeshletMemory, 1_KB, 100_MB,
  25. "Max memory that will be allocated per GPU occlusion test for storing meshlets");
  26. static NumericCVar<PtrSize> g_maxMeshletGroupMemoryPerTest(CVarSubsystem::kRenderer, "MaxMeshletGroupMemoryPerTest", kMaxMeshletGroupMemory, 1_KB,
  27. 100_MB,
  28. "Max memory that will be allocated per GPU occlusion test for storing meshlet groups");
  29. static StatCounter g_gpuVisMemoryAllocatedStatVar(StatCategory::kRenderer, "GPU visibility mem",
  30. StatFlag::kBytes | StatFlag::kMainThreadUpdates | StatFlag::kZeroEveryFrame);
  31. static BufferView allocateTransientGpuMem(PtrSize size)
  32. {
  33. BufferView out = {};
  34. if(size)
  35. {
  36. g_gpuVisMemoryAllocatedStatVar.increment(size);
  37. out = GpuVisibleTransientMemoryPool::getSingleton().allocate(size);
  38. }
  39. return out;
  40. }
  41. Error GpuVisibility::init()
  42. {
  43. for(MutatorValue hzb = 0; hzb < 2; ++hzb)
  44. {
  45. for(MutatorValue gatherAabbs = 0; gatherAabbs < 2; ++gatherAabbs)
  46. {
  47. for(MutatorValue genHash = 0; genHash < 2; ++genHash)
  48. {
  49. for(MutatorValue gatherType = 0; gatherType < 3; ++gatherType)
  50. {
  51. ANKI_CHECK(loadShaderProgram("ShaderBinaries/GpuVisibility.ankiprogbin",
  52. {{"HZB_TEST", hzb},
  53. {"DISTANCE_TEST", 0},
  54. {"GATHER_AABBS", gatherAabbs},
  55. {"HASH_VISIBLES", genHash},
  56. {"GATHER_TYPE", gatherType + 1}},
  57. m_prog, m_frustumGrProgs[hzb][gatherAabbs][genHash][gatherType]));
  58. }
  59. }
  60. }
  61. }
  62. for(MutatorValue gatherAabbs = 0; gatherAabbs < 2; ++gatherAabbs)
  63. {
  64. for(MutatorValue genHash = 0; genHash < 2; ++genHash)
  65. {
  66. for(MutatorValue gatherType = 0; gatherType < 3; ++gatherType)
  67. {
  68. ANKI_CHECK(loadShaderProgram("ShaderBinaries/GpuVisibility.ankiprogbin",
  69. {{"HZB_TEST", 0},
  70. {"DISTANCE_TEST", 1},
  71. {"GATHER_AABBS", gatherAabbs},
  72. {"HASH_VISIBLES", genHash},
  73. {"GATHER_TYPE", gatherType + 1}},
  74. m_prog, m_distGrProgs[gatherAabbs][genHash][gatherType]));
  75. }
  76. }
  77. }
  78. for(MutatorValue hzb = 0; hzb < 2; ++hzb)
  79. {
  80. for(MutatorValue passthrough = 0; passthrough < 2; ++passthrough)
  81. {
  82. ANKI_CHECK(loadShaderProgram("ShaderBinaries/GpuVisibilityMeshlet.ankiprogbin", {{"HZB_TEST", hzb}, {"PASSTHROUGH", passthrough}},
  83. m_meshletCullingProg, m_meshletCullingGrProgs[hzb][passthrough]));
  84. }
  85. }
  86. return Error::kNone;
  87. }
  88. void GpuVisibility::computeGpuVisibilityMemoryRequirements(RenderingTechnique t, MemoryRequirements& total, WeakArray<MemoryRequirements> perBucket)
  89. {
  90. ANKI_ASSERT(perBucket.getSize() == RenderStateBucketContainer::getSingleton().getBucketCount(t));
  91. U32 totalMeshletCount = 0;
  92. U32 totalMeshletGroupCount = 0;
  93. U32 totalRenderableCount = 0;
  94. RenderStateBucketContainer::getSingleton().iterateBuckets(t, [&](const RenderStateInfo&, U32 userCount, U32 meshletGroupCount, U32 meshletCount) {
  95. if(meshletCount)
  96. {
  97. totalMeshletCount += meshletCount;
  98. totalMeshletGroupCount += meshletGroupCount;
  99. }
  100. else
  101. {
  102. totalRenderableCount += userCount;
  103. }
  104. });
  105. const U32 maxVisibleMeshlets = min(U32(g_maxMeshletMemoryPerTest.get() / sizeof(GpuSceneMeshletInstance)), totalMeshletCount);
  106. const U32 maxVisibleMeshletGroups = min(U32(g_maxMeshletGroupMemoryPerTest.get() / sizeof(GpuSceneMeshletGroupInstance)), totalMeshletGroupCount);
  107. const U32 maxVisibleRenderables = min(kMaxVisibleObjects, totalRenderableCount);
  108. total = {};
  109. U32 bucketCount = 0;
  110. RenderStateBucketContainer::getSingleton().iterateBuckets(t, [&](const RenderStateInfo&, U32 userCount, U32 meshletGroupCount, U32 meshletCount) {
  111. MemoryRequirements& bucket = perBucket[bucketCount++];
  112. // Use U64 cause some expressions are overflowing
  113. if(meshletCount)
  114. {
  115. ANKI_ASSERT(meshletGroupCount > 0);
  116. ANKI_ASSERT(totalMeshletCount > 0);
  117. bucket.m_meshletInstanceCount = max(1u, U32(U64(meshletCount) * maxVisibleMeshlets / totalMeshletCount));
  118. ANKI_ASSERT(totalMeshletGroupCount > 0);
  119. bucket.m_meshletGroupInstanceCount = max(1u, U32(U64(meshletGroupCount) * maxVisibleMeshletGroups / totalMeshletGroupCount));
  120. }
  121. else if(userCount > 0)
  122. {
  123. ANKI_ASSERT(totalRenderableCount > 0);
  124. bucket.m_renderableInstanceCount = max(1u, U32(U64(userCount) * maxVisibleRenderables / totalRenderableCount));
  125. }
  126. total.m_meshletInstanceCount += bucket.m_meshletInstanceCount;
  127. total.m_meshletGroupInstanceCount += bucket.m_meshletGroupInstanceCount;
  128. total.m_renderableInstanceCount += bucket.m_renderableInstanceCount;
  129. });
  130. }
  131. void GpuVisibility::populateRenderGraphInternal(Bool distanceBased, BaseGpuVisibilityInput& in, GpuVisibilityOutput& out)
  132. {
  133. ANKI_ASSERT(in.m_lodReferencePoint.x() != kMaxF32);
  134. if(RenderStateBucketContainer::getSingleton().getBucketsActiveUserCount(in.m_technique) == 0) [[unlikely]]
  135. {
  136. // Early exit
  137. in = {};
  138. return;
  139. }
  140. RenderGraphDescription& rgraph = *in.m_rgraph;
  141. class DistanceTestData
  142. {
  143. public:
  144. Vec3 m_pointOfTest;
  145. F32 m_testRadius;
  146. };
  147. class FrustumTestData
  148. {
  149. public:
  150. RenderTargetHandle m_hzbRt;
  151. Mat4 m_viewProjMat;
  152. UVec2 m_finalRenderTargetSize;
  153. };
  154. FrustumTestData* frustumTestData = nullptr;
  155. DistanceTestData* distTestData = nullptr;
  156. if(distanceBased)
  157. {
  158. distTestData = newInstance<DistanceTestData>(getRenderer().getFrameMemoryPool());
  159. const DistanceGpuVisibilityInput& din = static_cast<DistanceGpuVisibilityInput&>(in);
  160. distTestData->m_pointOfTest = din.m_pointOfTest;
  161. distTestData->m_testRadius = din.m_testRadius;
  162. }
  163. else
  164. {
  165. frustumTestData = newInstance<FrustumTestData>(getRenderer().getFrameMemoryPool());
  166. const FrustumGpuVisibilityInput& fin = static_cast<FrustumGpuVisibilityInput&>(in);
  167. frustumTestData->m_viewProjMat = fin.m_viewProjectionMatrix;
  168. frustumTestData->m_finalRenderTargetSize = fin.m_viewportSize;
  169. }
  170. // Allocate memory
  171. const Bool firstCallInFrame = m_runCtx.m_frameIdx != getRenderer().getFrameCount();
  172. if(firstCallInFrame)
  173. {
  174. // First call in frame. Init stuff
  175. m_runCtx.m_frameIdx = getRenderer().getFrameCount();
  176. m_runCtx.m_populateRenderGraphCallCount = 0;
  177. m_runCtx.m_populateRenderGraphMeshletRenderingCallCount = 0;
  178. // Calc memory requirements
  179. MemoryRequirements maxTotalMemReq;
  180. WeakArray<MemoryRequirements> bucketsMemReqs;
  181. for(RenderingTechnique t : EnumBitsIterable<RenderingTechnique, RenderingTechniqueBit>(RenderingTechniqueBit::kAllRaster))
  182. {
  183. const U32 tBucketCount = RenderStateBucketContainer::getSingleton().getBucketCount(t);
  184. if(tBucketCount == 0)
  185. {
  186. continue;
  187. }
  188. newArray<MemoryRequirements>(getRenderer().getFrameMemoryPool(), tBucketCount, bucketsMemReqs);
  189. computeGpuVisibilityMemoryRequirements(t, m_runCtx.m_totalMemRequirements[t], bucketsMemReqs);
  190. maxTotalMemReq = maxTotalMemReq.max(m_runCtx.m_totalMemRequirements[t]);
  191. newArray<InstanceRange>(getRenderer().getFrameMemoryPool(), tBucketCount, m_runCtx.m_renderableInstanceRanges[t]);
  192. newArray<InstanceRange>(getRenderer().getFrameMemoryPool(), tBucketCount, m_runCtx.m_meshletGroupInstanceRanges[t]);
  193. newArray<InstanceRange>(getRenderer().getFrameMemoryPool(), tBucketCount, m_runCtx.m_meshletInstanceRanges[t]);
  194. U32 renderablesFirstInstance = 0, groupsFirstInstance = 0, meshletsFirstInstance = 0;
  195. for(U32 i = 0; i < tBucketCount; ++i)
  196. {
  197. m_runCtx.m_renderableInstanceRanges[t][i].m_firstInstance = renderablesFirstInstance;
  198. m_runCtx.m_renderableInstanceRanges[t][i].m_instanceCount = bucketsMemReqs[i].m_renderableInstanceCount;
  199. m_runCtx.m_meshletGroupInstanceRanges[t][i].m_firstInstance = groupsFirstInstance;
  200. m_runCtx.m_meshletGroupInstanceRanges[t][i].m_instanceCount = bucketsMemReqs[i].m_meshletGroupInstanceCount;
  201. m_runCtx.m_meshletInstanceRanges[t][i].m_firstInstance = meshletsFirstInstance;
  202. m_runCtx.m_meshletInstanceRanges[t][i].m_instanceCount = bucketsMemReqs[i].m_meshletInstanceCount;
  203. renderablesFirstInstance += bucketsMemReqs[i].m_renderableInstanceCount;
  204. groupsFirstInstance += bucketsMemReqs[i].m_meshletGroupInstanceCount;
  205. meshletsFirstInstance += bucketsMemReqs[i].m_meshletInstanceCount;
  206. }
  207. }
  208. // Allocate persistent memory
  209. for(PersistentMemory& mem : m_runCtx.m_persistentMem)
  210. {
  211. mem = {};
  212. mem.m_drawIndexedIndirectArgsBuffer = allocateTransientGpuMem(maxTotalMemReq.m_renderableInstanceCount * sizeof(DrawIndexedIndirectArgs));
  213. mem.m_renderableInstancesBuffer = allocateTransientGpuMem(maxTotalMemReq.m_renderableInstanceCount * sizeof(GpuSceneRenderableInstance));
  214. mem.m_meshletGroupsInstancesBuffer =
  215. allocateTransientGpuMem(maxTotalMemReq.m_meshletGroupInstanceCount * sizeof(GpuSceneMeshletGroupInstance));
  216. mem.m_bufferDepedency = rgraph.importBuffer((mem.m_drawIndexedIndirectArgsBuffer.isValid()) ? mem.m_drawIndexedIndirectArgsBuffer
  217. : mem.m_meshletGroupsInstancesBuffer,
  218. BufferUsageBit::kNone);
  219. }
  220. if(getRenderer().runSoftwareMeshletRendering())
  221. {
  222. // Because someone will need it later
  223. for(PersistentMemoryMeshletRendering& mem : m_runCtx.m_persistentMeshletRenderingMem)
  224. {
  225. mem = {};
  226. mem.m_meshletInstancesBuffer = allocateTransientGpuMem(maxTotalMemReq.m_meshletInstanceCount * sizeof(GpuSceneMeshletInstance));
  227. mem.m_bufferDepedency = rgraph.importBuffer(mem.m_meshletInstancesBuffer, BufferUsageBit::kNone);
  228. }
  229. }
  230. }
  231. const U32 bucketCount = RenderStateBucketContainer::getSingleton().getBucketCount(in.m_technique);
  232. const MemoryRequirements& req = m_runCtx.m_totalMemRequirements[in.m_technique];
  233. const PersistentMemory& mem = m_runCtx.m_persistentMem[m_runCtx.m_populateRenderGraphCallCount++ % m_runCtx.m_persistentMem.getSize()];
  234. out.m_legacy.m_drawIndexedIndirectArgsBuffer =
  235. (req.m_renderableInstanceCount)
  236. ? BufferView(mem.m_drawIndexedIndirectArgsBuffer).setRange(req.m_renderableInstanceCount * sizeof(DrawIndexedIndirectArgs))
  237. : BufferView();
  238. out.m_legacy.m_renderableInstancesBuffer =
  239. (req.m_renderableInstanceCount)
  240. ? BufferView(mem.m_renderableInstancesBuffer).setRange(req.m_renderableInstanceCount * sizeof(GpuSceneRenderableInstance))
  241. : BufferView();
  242. out.m_legacy.m_mdiDrawCountsBuffer = allocateTransientGpuMem(sizeof(U32) * bucketCount);
  243. out.m_mesh.m_meshletGroupInstancesBuffer =
  244. (req.m_meshletGroupInstanceCount)
  245. ? BufferView(mem.m_meshletGroupsInstancesBuffer).setRange(req.m_meshletGroupInstanceCount * sizeof(GpuSceneMeshletGroupInstance))
  246. : BufferView();
  247. out.m_mesh.m_taskShaderIndirectArgsBuffer = allocateTransientGpuMem(bucketCount * sizeof(DispatchIndirectArgs));
  248. if(in.m_hashVisibles)
  249. {
  250. out.m_visiblesHashBuffer = allocateTransientGpuMem(sizeof(GpuVisibilityHash));
  251. }
  252. if(in.m_gatherAabbIndices)
  253. {
  254. out.m_visibleAaabbIndicesBuffer =
  255. allocateTransientGpuMem((RenderStateBucketContainer::getSingleton().getBucketsActiveUserCount(in.m_technique) + 1) * sizeof(U32));
  256. }
  257. // Set instance sub-ranges
  258. out.m_legacy.m_bucketRenderableInstanceRanges = m_runCtx.m_renderableInstanceRanges[in.m_technique];
  259. out.m_mesh.m_bucketMeshletGroupInstanceRanges = m_runCtx.m_meshletGroupInstanceRanges[in.m_technique];
  260. // Zero some stuff
  261. const BufferHandle zeroStuffDependency = rgraph.importBuffer(out.m_legacy.m_mdiDrawCountsBuffer, BufferUsageBit::kNone);
  262. {
  263. Array<Char, 128> passName;
  264. snprintf(passName.getBegin(), passName.getSizeInBytes(), "GPU vis zero: %s", in.m_passesName.cstr());
  265. ComputeRenderPassDescription& pass = rgraph.newComputeRenderPass(passName.getBegin());
  266. pass.newBufferDependency(zeroStuffDependency, BufferUsageBit::kTransferDestination);
  267. pass.setWork([out](RenderPassWorkContext& rpass) {
  268. CommandBuffer& cmdb = *rpass.m_commandBuffer;
  269. cmdb.pushDebugMarker("MDI counts", Vec3(1.0f, 1.0f, 1.0f));
  270. cmdb.fillBuffer(out.m_legacy.m_mdiDrawCountsBuffer, 0);
  271. cmdb.popDebugMarker();
  272. if(out.m_mesh.m_taskShaderIndirectArgsBuffer.isValid())
  273. {
  274. cmdb.pushDebugMarker("Task shader indirect args", Vec3(1.0f, 1.0f, 1.0f));
  275. cmdb.fillBuffer(out.m_mesh.m_taskShaderIndirectArgsBuffer, 0);
  276. cmdb.popDebugMarker();
  277. }
  278. if(out.m_visiblesHashBuffer.isValid())
  279. {
  280. cmdb.pushDebugMarker("Visibles hash", Vec3(1.0f, 1.0f, 1.0f));
  281. cmdb.fillBuffer(out.m_visiblesHashBuffer, 0);
  282. cmdb.popDebugMarker();
  283. }
  284. if(out.m_visibleAaabbIndicesBuffer.isValid())
  285. {
  286. cmdb.pushDebugMarker("Visible AABB indices", Vec3(1.0f, 1.0f, 1.0f));
  287. cmdb.fillBuffer(BufferView(out.m_visibleAaabbIndicesBuffer).setRange(sizeof(U32)), 0);
  288. cmdb.popDebugMarker();
  289. }
  290. });
  291. }
  292. // Set the out dependency. Use one of the big buffers.
  293. out.m_dependency = mem.m_bufferDepedency;
  294. // Create the renderpass
  295. Array<Char, 128> passName;
  296. snprintf(passName.getBegin(), passName.getSizeInBytes(), "GPU vis: %s", in.m_passesName.cstr());
  297. ComputeRenderPassDescription& pass = rgraph.newComputeRenderPass(passName.getBegin());
  298. pass.newBufferDependency(getRenderer().getGpuSceneBufferHandle(), BufferUsageBit::kStorageComputeRead);
  299. pass.newBufferDependency(zeroStuffDependency, BufferUsageBit::kStorageComputeWrite);
  300. pass.newBufferDependency(out.m_dependency, BufferUsageBit::kStorageComputeWrite);
  301. if(!distanceBased && static_cast<FrustumGpuVisibilityInput&>(in).m_hzbRt)
  302. {
  303. frustumTestData->m_hzbRt = *static_cast<FrustumGpuVisibilityInput&>(in).m_hzbRt;
  304. pass.newTextureDependency(frustumTestData->m_hzbRt, TextureUsageBit::kSampledCompute);
  305. }
  306. pass.setWork([this, frustumTestData, distTestData, lodReferencePoint = in.m_lodReferencePoint, lodDistances = in.m_lodDistances,
  307. technique = in.m_technique, out](RenderPassWorkContext& rpass) {
  308. CommandBuffer& cmdb = *rpass.m_commandBuffer;
  309. const Bool gatherAabbIndices = out.m_visibleAaabbIndicesBuffer.isValid();
  310. const Bool genHash = out.m_visiblesHashBuffer.isValid();
  311. U32 gatherType = 0;
  312. if(out.m_mesh.m_meshletGroupInstancesBuffer.isValid())
  313. {
  314. gatherType |= 2u;
  315. }
  316. if(out.m_legacy.m_renderableInstancesBuffer.isValid())
  317. {
  318. gatherType |= 1u;
  319. }
  320. ANKI_ASSERT(gatherType != 0);
  321. if(frustumTestData)
  322. {
  323. cmdb.bindShaderProgram(m_frustumGrProgs[frustumTestData->m_hzbRt.isValid()][gatherAabbIndices][genHash][gatherType - 1u].get());
  324. }
  325. else
  326. {
  327. cmdb.bindShaderProgram(m_distGrProgs[gatherAabbIndices][genHash][gatherType - 1u].get());
  328. }
  329. BufferView aabbsBuffer;
  330. U32 aabbCount = 0;
  331. switch(technique)
  332. {
  333. case RenderingTechnique::kGBuffer:
  334. aabbsBuffer = GpuSceneArrays::RenderableBoundingVolumeGBuffer::getSingleton().getBufferView();
  335. aabbCount = GpuSceneArrays::RenderableBoundingVolumeGBuffer::getSingleton().getElementCount();
  336. break;
  337. case RenderingTechnique::kDepth:
  338. aabbsBuffer = GpuSceneArrays::RenderableBoundingVolumeDepth::getSingleton().getBufferView();
  339. aabbCount = GpuSceneArrays::RenderableBoundingVolumeDepth::getSingleton().getElementCount();
  340. break;
  341. case RenderingTechnique::kForward:
  342. aabbsBuffer = GpuSceneArrays::RenderableBoundingVolumeForward::getSingleton().getBufferView();
  343. aabbCount = GpuSceneArrays::RenderableBoundingVolumeForward::getSingleton().getElementCount();
  344. break;
  345. default:
  346. ANKI_ASSERT(0);
  347. }
  348. cmdb.bindStorageBuffer(0, 0, aabbsBuffer);
  349. cmdb.bindStorageBuffer(0, 1, GpuSceneArrays::Renderable::getSingleton().getBufferView());
  350. cmdb.bindStorageBuffer(0, 2, GpuSceneArrays::MeshLod::getSingleton().getBufferView());
  351. cmdb.bindStorageBuffer(0, 3, GpuSceneArrays::Transform::getSingleton().getBufferView());
  352. cmdb.bindStorageBuffer(0, 4, GpuSceneBuffer::getSingleton().getBufferView());
  353. if(gatherType & 1u)
  354. {
  355. cmdb.bindStorageBuffer(0, 5, out.m_legacy.m_renderableInstancesBuffer);
  356. cmdb.bindStorageBuffer(0, 6, out.m_legacy.m_drawIndexedIndirectArgsBuffer);
  357. cmdb.bindStorageBuffer(0, 7, out.m_legacy.m_mdiDrawCountsBuffer);
  358. }
  359. if(gatherType & 2u)
  360. {
  361. cmdb.bindStorageBuffer(0, 8, out.m_mesh.m_taskShaderIndirectArgsBuffer);
  362. cmdb.bindStorageBuffer(0, 9, out.m_mesh.m_meshletGroupInstancesBuffer);
  363. }
  364. const U32 bucketCount = RenderStateBucketContainer::getSingleton().getBucketCount(technique);
  365. UVec2* instanceRanges = allocateAndBindStorageBuffer<UVec2>(cmdb, 0, 10, bucketCount);
  366. for(U32 i = 0; i < bucketCount; ++i)
  367. {
  368. const Bool legacyBucket = m_runCtx.m_renderableInstanceRanges[technique][i].m_instanceCount > 0;
  369. if(legacyBucket)
  370. {
  371. instanceRanges[i].x() = m_runCtx.m_renderableInstanceRanges[technique][i].m_firstInstance;
  372. instanceRanges[i].y() = m_runCtx.m_renderableInstanceRanges[technique][i].m_instanceCount;
  373. }
  374. else
  375. {
  376. instanceRanges[i].x() = m_runCtx.m_meshletGroupInstanceRanges[technique][i].m_firstInstance;
  377. instanceRanges[i].y() = m_runCtx.m_meshletGroupInstanceRanges[technique][i].m_instanceCount;
  378. }
  379. }
  380. if(frustumTestData)
  381. {
  382. FrustumGpuVisibilityUniforms* unis = allocateAndBindConstants<FrustumGpuVisibilityUniforms>(cmdb, 0, 11);
  383. Array<Plane, 6> planes;
  384. extractClipPlanes(frustumTestData->m_viewProjMat, planes);
  385. for(U32 i = 0; i < 6; ++i)
  386. {
  387. unis->m_clipPlanes[i] = Vec4(planes[i].getNormal().xyz(), planes[i].getOffset());
  388. }
  389. ANKI_ASSERT(kMaxLodCount == 3);
  390. unis->m_maxLodDistances[0] = lodDistances[0];
  391. unis->m_maxLodDistances[1] = lodDistances[1];
  392. unis->m_maxLodDistances[2] = kMaxF32;
  393. unis->m_maxLodDistances[3] = kMaxF32;
  394. unis->m_lodReferencePoint = lodReferencePoint;
  395. unis->m_viewProjectionMat = frustumTestData->m_viewProjMat;
  396. unis->m_finalRenderTargetSize = Vec2(frustumTestData->m_finalRenderTargetSize);
  397. if(frustumTestData->m_hzbRt.isValid())
  398. {
  399. rpass.bindColorTexture(0, 12, frustumTestData->m_hzbRt);
  400. cmdb.bindSampler(0, 13, getRenderer().getSamplers().m_nearestNearestClamp.get());
  401. }
  402. }
  403. else
  404. {
  405. DistanceGpuVisibilityUniforms unis;
  406. unis.m_pointOfTest = distTestData->m_pointOfTest;
  407. unis.m_testRadius = distTestData->m_testRadius;
  408. unis.m_maxLodDistances[0] = lodDistances[0];
  409. unis.m_maxLodDistances[1] = lodDistances[1];
  410. unis.m_maxLodDistances[2] = kMaxF32;
  411. unis.m_maxLodDistances[3] = kMaxF32;
  412. unis.m_lodReferencePoint = lodReferencePoint;
  413. cmdb.setPushConstants(&unis, sizeof(unis));
  414. }
  415. if(gatherAabbIndices)
  416. {
  417. cmdb.bindStorageBuffer(0, 14, out.m_visibleAaabbIndicesBuffer);
  418. }
  419. if(genHash)
  420. {
  421. cmdb.bindStorageBuffer(0, 15, out.m_visiblesHashBuffer);
  422. }
  423. dispatchPPCompute(cmdb, 64, 1, aabbCount, 1);
  424. });
  425. }
  426. void GpuVisibility::populateRenderGraphMeshletInternal(Bool passthrough, BaseGpuMeshletVisibilityInput& in, GpuMeshletVisibilityOutput& out)
  427. {
  428. RenderGraphDescription& rgraph = *in.m_rgraph;
  429. if(!in.m_taskShaderIndirectArgsBuffer.isValid()) [[unlikely]]
  430. {
  431. // Early exit
  432. return;
  433. }
  434. class NonPassthrough
  435. {
  436. public:
  437. Mat4 m_viewProjectionMatrix;
  438. Mat3x4 m_cameraTransform;
  439. UVec2 m_viewportSize;
  440. RenderTargetHandle m_hzbRt;
  441. }* nonPassthroughData = nullptr;
  442. if(!passthrough)
  443. {
  444. GpuMeshletVisibilityInput& nonPassthroughIn = static_cast<GpuMeshletVisibilityInput&>(in);
  445. nonPassthroughData = newInstance<NonPassthrough>(getRenderer().getFrameMemoryPool());
  446. nonPassthroughData->m_viewProjectionMatrix = nonPassthroughIn.m_viewProjectionMatrix;
  447. nonPassthroughData->m_cameraTransform = nonPassthroughIn.m_cameraTransform;
  448. nonPassthroughData->m_viewportSize = nonPassthroughIn.m_viewportSize;
  449. nonPassthroughData->m_hzbRt = nonPassthroughIn.m_hzbRt;
  450. }
  451. // Allocate memory
  452. const U32 bucketCount = m_runCtx.m_renderableInstanceRanges[in.m_technique].getSize();
  453. ANKI_ASSERT(RenderStateBucketContainer::getSingleton().getBucketCount(in.m_technique) == bucketCount);
  454. const PersistentMemoryMeshletRendering& mem = m_runCtx.m_persistentMeshletRenderingMem[m_runCtx.m_populateRenderGraphMeshletRenderingCallCount++
  455. % m_runCtx.m_persistentMeshletRenderingMem.getSize()];
  456. out.m_drawIndirectArgsBuffer = allocateTransientGpuMem(sizeof(DrawIndirectArgs) * bucketCount);
  457. out.m_meshletInstancesBuffer =
  458. BufferView(mem.m_meshletInstancesBuffer)
  459. .setRange(m_runCtx.m_totalMemRequirements[in.m_technique].m_meshletInstanceCount * sizeof(GpuSceneMeshletInstance));
  460. out.m_bucketMeshletInstanceRanges = m_runCtx.m_meshletInstanceRanges[in.m_technique];
  461. // Zero some stuff
  462. const BufferHandle indirectArgsDep = rgraph.importBuffer(out.m_drawIndirectArgsBuffer, BufferUsageBit::kNone);
  463. {
  464. Array<Char, 128> passName;
  465. snprintf(passName.getBegin(), passName.getSizeInBytes(), "GPU meshlet vis zero: %s", in.m_passesName.cstr());
  466. ComputeRenderPassDescription& pass = rgraph.newComputeRenderPass(passName.getBegin());
  467. pass.newBufferDependency(indirectArgsDep, BufferUsageBit::kTransferDestination);
  468. pass.setWork([drawIndirectArgsBuffer = out.m_drawIndirectArgsBuffer](RenderPassWorkContext& rpass) {
  469. CommandBuffer& cmdb = *rpass.m_commandBuffer;
  470. cmdb.pushDebugMarker("Draw indirect args", Vec3(1.0f, 1.0f, 1.0f));
  471. cmdb.fillBuffer(drawIndirectArgsBuffer, 0);
  472. cmdb.popDebugMarker();
  473. });
  474. }
  475. out.m_dependency = mem.m_bufferDepedency;
  476. // Create the renderpass
  477. Array<Char, 128> passName;
  478. snprintf(passName.getBegin(), passName.getSizeInBytes(), "GPU meshlet vis: %s", in.m_passesName.cstr());
  479. ComputeRenderPassDescription& pass = rgraph.newComputeRenderPass(passName.getBegin());
  480. pass.newBufferDependency(indirectArgsDep, BufferUsageBit::kStorageComputeWrite);
  481. pass.newBufferDependency(mem.m_bufferDepedency, BufferUsageBit::kStorageComputeWrite);
  482. pass.newBufferDependency(in.m_dependency, BufferUsageBit::kIndirectCompute);
  483. pass.setWork([this, nonPassthroughData, computeIndirectArgs = in.m_taskShaderIndirectArgsBuffer, out,
  484. meshletGroupInstancesBuffer = in.m_meshletGroupInstancesBuffer,
  485. bucketMeshletGroupInstanceRanges = in.m_bucketMeshletGroupInstanceRanges](RenderPassWorkContext& rpass) {
  486. CommandBuffer& cmdb = *rpass.m_commandBuffer;
  487. const U32 bucketCount = out.m_bucketMeshletInstanceRanges.getSize();
  488. for(U32 i = 0; i < bucketCount; ++i)
  489. {
  490. if(out.m_bucketMeshletInstanceRanges[i].m_instanceCount == 0)
  491. {
  492. continue;
  493. }
  494. const Bool hasHzb = (nonPassthroughData) ? nonPassthroughData->m_hzbRt.isValid() : false;
  495. const Bool isPassthrough = (nonPassthroughData == nullptr);
  496. cmdb.bindShaderProgram(m_meshletCullingGrProgs[hasHzb][isPassthrough].get());
  497. cmdb.bindStorageBuffer(0, 0, meshletGroupInstancesBuffer);
  498. cmdb.bindStorageBuffer(0, 1, GpuSceneArrays::Renderable::getSingleton().getBufferView());
  499. cmdb.bindStorageBuffer(0, 2, GpuSceneArrays::MeshLod::getSingleton().getBufferView());
  500. cmdb.bindStorageBuffer(0, 3, GpuSceneArrays::Transform::getSingleton().getBufferView());
  501. cmdb.bindStorageBuffer(0, 4, UnifiedGeometryBuffer::getSingleton().getBufferView());
  502. cmdb.bindStorageBuffer(0, 5, out.m_drawIndirectArgsBuffer);
  503. cmdb.bindStorageBuffer(0, 6, out.m_meshletInstancesBuffer);
  504. if(hasHzb)
  505. {
  506. rpass.bindColorTexture(0, 7, nonPassthroughData->m_hzbRt);
  507. cmdb.bindSampler(0, 8, getRenderer().getSamplers().m_nearestNearestClamp.get());
  508. }
  509. class Consts
  510. {
  511. public:
  512. Mat4 m_viewProjectionMatrix;
  513. Vec3 m_cameraPos;
  514. U32 m_firstDrawArg;
  515. Vec2 m_viewportSizef;
  516. U32 m_firstMeshletGroup;
  517. U32 m_firstMeshlet;
  518. U32 m_meshletCount;
  519. U32 m_padding1;
  520. U32 m_padding2;
  521. U32 m_padding3;
  522. } consts;
  523. consts.m_viewProjectionMatrix = (!isPassthrough) ? nonPassthroughData->m_viewProjectionMatrix : Mat4::getIdentity();
  524. consts.m_cameraPos = (!isPassthrough) ? nonPassthroughData->m_cameraTransform.getTranslationPart().xyz() : Vec3(0.0f);
  525. consts.m_firstDrawArg = i;
  526. consts.m_viewportSizef = (!isPassthrough) ? Vec2(nonPassthroughData->m_viewportSize) : Vec2(0.0f);
  527. consts.m_firstMeshletGroup = bucketMeshletGroupInstanceRanges[i].getFirstInstance();
  528. consts.m_firstMeshlet = out.m_bucketMeshletInstanceRanges[i].getFirstInstance();
  529. consts.m_meshletCount = out.m_bucketMeshletInstanceRanges[i].getInstanceCount();
  530. cmdb.setPushConstants(&consts, sizeof(consts));
  531. cmdb.dispatchComputeIndirect(
  532. BufferView(computeIndirectArgs).incrementOffset(i * sizeof(DispatchIndirectArgs)).setRange(sizeof(DispatchIndirectArgs)));
  533. };
  534. });
  535. }
  536. Error GpuVisibilityNonRenderables::init()
  537. {
  538. ANKI_CHECK(ResourceManager::getSingleton().loadResource("ShaderBinaries/GpuVisibilityNonRenderables.ankiprogbin", m_prog));
  539. for(MutatorValue hzb = 0; hzb < 2; ++hzb)
  540. {
  541. for(GpuSceneNonRenderableObjectType type : EnumIterable<GpuSceneNonRenderableObjectType>())
  542. {
  543. for(MutatorValue cpuFeedback = 0; cpuFeedback < 2; ++cpuFeedback)
  544. {
  545. ANKI_CHECK(loadShaderProgram("ShaderBinaries/GpuVisibilityNonRenderables.ankiprogbin",
  546. {{"HZB_TEST", hzb}, {"OBJECT_TYPE", MutatorValue(type)}, {"CPU_FEEDBACK", cpuFeedback}}, m_prog,
  547. m_grProgs[hzb][type][cpuFeedback]));
  548. }
  549. }
  550. }
  551. return Error::kNone;
  552. }
  553. void GpuVisibilityNonRenderables::populateRenderGraph(GpuVisibilityNonRenderablesInput& in, GpuVisibilityNonRenderablesOutput& out)
  554. {
  555. ANKI_ASSERT(in.m_viewProjectionMat != Mat4::getZero());
  556. RenderGraphDescription& rgraph = *in.m_rgraph;
  557. U32 objCount = 0;
  558. switch(in.m_objectType)
  559. {
  560. case GpuSceneNonRenderableObjectType::kLight:
  561. objCount = GpuSceneArrays::Light::getSingleton().getElementCount();
  562. break;
  563. case GpuSceneNonRenderableObjectType::kDecal:
  564. objCount = GpuSceneArrays::Decal::getSingleton().getElementCount();
  565. break;
  566. case GpuSceneNonRenderableObjectType::kFogDensityVolume:
  567. objCount = GpuSceneArrays::FogDensityVolume::getSingleton().getElementCount();
  568. break;
  569. case GpuSceneNonRenderableObjectType::kGlobalIlluminationProbe:
  570. objCount = GpuSceneArrays::GlobalIlluminationProbe::getSingleton().getElementCount();
  571. break;
  572. case GpuSceneNonRenderableObjectType::kReflectionProbe:
  573. objCount = GpuSceneArrays::ReflectionProbe::getSingleton().getElementCount();
  574. break;
  575. default:
  576. ANKI_ASSERT(0);
  577. }
  578. if(objCount == 0)
  579. {
  580. U32* count;
  581. out.m_visiblesBuffer = RebarTransientMemoryPool::getSingleton().allocateFrame(sizeof(U32), count);
  582. *count = 0;
  583. out.m_visiblesBufferHandle = rgraph.importBuffer(out.m_visiblesBuffer, BufferUsageBit::kNone);
  584. return;
  585. }
  586. if(in.m_cpuFeedbackBuffer.isValid())
  587. {
  588. ANKI_ASSERT(in.m_cpuFeedbackBuffer.getRange() == sizeof(U32) * (objCount * 2 + 1));
  589. }
  590. const Bool firstRunInFrame = m_lastFrameIdx != getRenderer().getFrameCount();
  591. if(firstRunInFrame)
  592. {
  593. // 1st run in this frame, do some bookkeeping
  594. m_lastFrameIdx = getRenderer().getFrameCount();
  595. m_counterBufferOffset = 0;
  596. m_counterBufferZeroingHandle = {};
  597. }
  598. constexpr U32 kCountersPerDispatch = 3; // 1 for the threadgroup, 1 for the visbile object count and 1 for objects with feedback
  599. const U32 counterBufferElementSize = getAlignedRoundUp(GrManager::getSingleton().getDeviceCapabilities().m_storageBufferBindOffsetAlignment,
  600. U32(kCountersPerDispatch * sizeof(U32)));
  601. if(!m_counterBuffer.isCreated() || m_counterBufferOffset + counterBufferElementSize > m_counterBuffer->getSize()) [[unlikely]]
  602. {
  603. // Counter buffer not created or not big enough, create a new one
  604. BufferInitInfo buffInit("GpuVisibilityNonRenderablesCounters");
  605. buffInit.m_size = (m_counterBuffer.isCreated()) ? m_counterBuffer->getSize() * 2
  606. : kCountersPerDispatch * counterBufferElementSize * kInitialCounterArraySize;
  607. buffInit.m_usage = BufferUsageBit::kStorageComputeWrite | BufferUsageBit::kStorageComputeRead | BufferUsageBit::kTransferDestination;
  608. m_counterBuffer = GrManager::getSingleton().newBuffer(buffInit);
  609. m_counterBufferZeroingHandle = rgraph.importBuffer(BufferView(m_counterBuffer.get()), buffInit.m_usage);
  610. ComputeRenderPassDescription& pass = rgraph.newComputeRenderPass("GpuVisibilityNonRenderablesClearCounterBuffer");
  611. pass.newBufferDependency(m_counterBufferZeroingHandle, BufferUsageBit::kTransferDestination);
  612. pass.setWork([counterBuffer = m_counterBuffer](RenderPassWorkContext& rgraph) {
  613. rgraph.m_commandBuffer->fillBuffer(BufferView(counterBuffer.get()), 0);
  614. });
  615. m_counterBufferOffset = 0;
  616. }
  617. else if(!firstRunInFrame)
  618. {
  619. m_counterBufferOffset += counterBufferElementSize;
  620. }
  621. // Allocate memory for the result
  622. out.m_visiblesBuffer = allocateTransientGpuMem((objCount + 1) * sizeof(U32));
  623. out.m_visiblesBufferHandle = rgraph.importBuffer(out.m_visiblesBuffer, BufferUsageBit::kNone);
  624. // Create the renderpass
  625. ComputeRenderPassDescription& pass = rgraph.newComputeRenderPass(in.m_passesName);
  626. pass.newBufferDependency(getRenderer().getGpuSceneBufferHandle(), BufferUsageBit::kStorageComputeRead);
  627. pass.newBufferDependency(out.m_visiblesBufferHandle, BufferUsageBit::kStorageComputeWrite);
  628. if(in.m_hzbRt)
  629. {
  630. pass.newTextureDependency(*in.m_hzbRt, TextureUsageBit::kSampledCompute);
  631. }
  632. if(m_counterBufferZeroingHandle.isValid()) [[unlikely]]
  633. {
  634. pass.newBufferDependency(m_counterBufferZeroingHandle, BufferUsageBit::kStorageComputeRead | BufferUsageBit::kStorageComputeWrite);
  635. }
  636. pass.setWork([this, objType = in.m_objectType, feedbackBuffer = in.m_cpuFeedbackBuffer, viewProjectionMat = in.m_viewProjectionMat,
  637. visibleIndicesBuffHandle = out.m_visiblesBufferHandle, counterBuffer = m_counterBuffer, counterBufferOffset = m_counterBufferOffset,
  638. objCount](RenderPassWorkContext& rgraph) {
  639. CommandBuffer& cmdb = *rgraph.m_commandBuffer;
  640. const Bool needsFeedback = feedbackBuffer.isValid();
  641. cmdb.bindShaderProgram(m_grProgs[0][objType][needsFeedback].get());
  642. BufferView objBuffer;
  643. switch(objType)
  644. {
  645. case GpuSceneNonRenderableObjectType::kLight:
  646. objBuffer = GpuSceneArrays::Light::getSingleton().getBufferView();
  647. break;
  648. case GpuSceneNonRenderableObjectType::kDecal:
  649. objBuffer = GpuSceneArrays::Decal::getSingleton().getBufferView();
  650. break;
  651. case GpuSceneNonRenderableObjectType::kFogDensityVolume:
  652. objBuffer = GpuSceneArrays::FogDensityVolume::getSingleton().getBufferView();
  653. break;
  654. case GpuSceneNonRenderableObjectType::kGlobalIlluminationProbe:
  655. objBuffer = GpuSceneArrays::GlobalIlluminationProbe::getSingleton().getBufferView();
  656. break;
  657. case GpuSceneNonRenderableObjectType::kReflectionProbe:
  658. objBuffer = GpuSceneArrays::ReflectionProbe::getSingleton().getBufferView();
  659. break;
  660. default:
  661. ANKI_ASSERT(0);
  662. }
  663. cmdb.bindStorageBuffer(0, 0, objBuffer);
  664. GpuVisibilityNonRenderableUniforms unis;
  665. Array<Plane, 6> planes;
  666. extractClipPlanes(viewProjectionMat, planes);
  667. for(U32 i = 0; i < 6; ++i)
  668. {
  669. unis.m_clipPlanes[i] = Vec4(planes[i].getNormal().xyz(), planes[i].getOffset());
  670. }
  671. cmdb.setPushConstants(&unis, sizeof(unis));
  672. rgraph.bindStorageBuffer(0, 1, visibleIndicesBuffHandle);
  673. cmdb.bindStorageBuffer(0, 2, BufferView(counterBuffer.get(), counterBufferOffset, sizeof(U32) * kCountersPerDispatch));
  674. if(needsFeedback)
  675. {
  676. cmdb.bindStorageBuffer(0, 3, feedbackBuffer);
  677. }
  678. dispatchPPCompute(cmdb, 64, 1, objCount, 1);
  679. });
  680. }
  681. Error GpuVisibilityAccelerationStructures::init()
  682. {
  683. ANKI_CHECK(loadShaderProgram("ShaderBinaries/GpuVisibilityAccelerationStructures.ankiprogbin", m_visibilityProg, m_visibilityGrProg));
  684. ANKI_CHECK(loadShaderProgram("ShaderBinaries/GpuVisibilityAccelerationStructuresZeroRemainingInstances.ankiprogbin", m_zeroRemainingInstancesProg,
  685. m_zeroRemainingInstancesGrProg));
  686. BufferInitInfo inf("GpuVisibilityAccelerationStructuresCounters");
  687. inf.m_size = sizeof(U32) * 2;
  688. inf.m_usage = BufferUsageBit::kStorageComputeWrite | BufferUsageBit::kStorageComputeRead | BufferUsageBit::kTransferDestination;
  689. m_counterBuffer = GrManager::getSingleton().newBuffer(inf);
  690. zeroBuffer(m_counterBuffer.get());
  691. return Error::kNone;
  692. }
  693. void GpuVisibilityAccelerationStructures::pupulateRenderGraph(GpuVisibilityAccelerationStructuresInput& in,
  694. GpuVisibilityAccelerationStructuresOutput& out)
  695. {
  696. in.validate();
  697. RenderGraphDescription& rgraph = *in.m_rgraph;
  698. #if ANKI_ASSERTIONS_ENABLED
  699. ANKI_ASSERT(m_lastFrameIdx != getRenderer().getFrameCount());
  700. m_lastFrameIdx = getRenderer().getFrameCount();
  701. #endif
  702. // Allocate the transient buffers
  703. const U32 aabbCount = GpuSceneArrays::RenderableBoundingVolumeRt::getSingleton().getElementCount();
  704. out.m_instancesBuffer = allocateTransientGpuMem(aabbCount * sizeof(AccelerationStructureInstance));
  705. out.m_someBufferHandle = rgraph.importBuffer(out.m_instancesBuffer, BufferUsageBit::kStorageComputeWrite);
  706. out.m_renderableIndicesBuffer = allocateTransientGpuMem((aabbCount + 1) * sizeof(U32));
  707. const BufferView zeroInstancesDispatchArgsBuff = allocateTransientGpuMem(sizeof(DispatchIndirectArgs));
  708. // Create vis pass
  709. {
  710. ComputeRenderPassDescription& pass = rgraph.newComputeRenderPass(in.m_passesName);
  711. pass.newBufferDependency(getRenderer().getGpuSceneBufferHandle(), BufferUsageBit::kStorageComputeRead);
  712. pass.newBufferDependency(out.m_someBufferHandle, BufferUsageBit::kStorageComputeWrite);
  713. pass.setWork([this, viewProjMat = in.m_viewProjectionMatrix, lodDistances = in.m_lodDistances, pointOfTest = in.m_pointOfTest,
  714. testRadius = in.m_testRadius, instancesBuff = out.m_instancesBuffer, indicesBuff = out.m_renderableIndicesBuffer,
  715. zeroInstancesDispatchArgsBuff](RenderPassWorkContext& rgraph) {
  716. CommandBuffer& cmdb = *rgraph.m_commandBuffer;
  717. cmdb.bindShaderProgram(m_visibilityGrProg.get());
  718. GpuVisibilityAccelerationStructuresUniforms unis;
  719. Array<Plane, 6> planes;
  720. extractClipPlanes(viewProjMat, planes);
  721. for(U32 i = 0; i < 6; ++i)
  722. {
  723. unis.m_clipPlanes[i] = Vec4(planes[i].getNormal().xyz(), planes[i].getOffset());
  724. }
  725. unis.m_pointOfTest = pointOfTest;
  726. unis.m_testRadius = testRadius;
  727. ANKI_ASSERT(kMaxLodCount == 3);
  728. unis.m_maxLodDistances[0] = lodDistances[0];
  729. unis.m_maxLodDistances[1] = lodDistances[1];
  730. unis.m_maxLodDistances[2] = kMaxF32;
  731. unis.m_maxLodDistances[3] = kMaxF32;
  732. cmdb.setPushConstants(&unis, sizeof(unis));
  733. cmdb.bindStorageBuffer(0, 0, GpuSceneArrays::RenderableBoundingVolumeRt::getSingleton().getBufferView());
  734. cmdb.bindStorageBuffer(0, 1, GpuSceneArrays::Renderable::getSingleton().getBufferView());
  735. cmdb.bindStorageBuffer(0, 2, GpuSceneArrays::MeshLod::getSingleton().getBufferView());
  736. cmdb.bindStorageBuffer(0, 3, GpuSceneArrays::Transform::getSingleton().getBufferView());
  737. cmdb.bindStorageBuffer(0, 4, instancesBuff);
  738. cmdb.bindStorageBuffer(0, 5, indicesBuff);
  739. cmdb.bindStorageBuffer(0, 6, BufferView(m_counterBuffer.get(), 0, sizeof(U32) * 2));
  740. cmdb.bindStorageBuffer(0, 7, zeroInstancesDispatchArgsBuff);
  741. const U32 aabbCount = GpuSceneArrays::RenderableBoundingVolumeRt::getSingleton().getElementCount();
  742. dispatchPPCompute(cmdb, 64, 1, aabbCount, 1);
  743. });
  744. }
  745. // Zero remaining instances
  746. {
  747. Array<Char, 64> passName;
  748. snprintf(passName.getBegin(), sizeof(passName), "%s: Zero remaining instances", in.m_passesName.cstr());
  749. ComputeRenderPassDescription& pass = rgraph.newComputeRenderPass(passName.getBegin());
  750. pass.newBufferDependency(out.m_someBufferHandle, BufferUsageBit::kStorageComputeWrite);
  751. pass.setWork([this, zeroInstancesDispatchArgsBuff, instancesBuff = out.m_instancesBuffer,
  752. indicesBuff = out.m_renderableIndicesBuffer](RenderPassWorkContext& rgraph) {
  753. CommandBuffer& cmdb = *rgraph.m_commandBuffer;
  754. cmdb.bindShaderProgram(m_zeroRemainingInstancesGrProg.get());
  755. cmdb.bindStorageBuffer(0, 0, indicesBuff);
  756. cmdb.bindStorageBuffer(0, 1, instancesBuff);
  757. cmdb.dispatchComputeIndirect(zeroInstancesDispatchArgsBuff);
  758. });
  759. }
  760. }
  761. } // end namespace anki