GpuVisibility.cpp 36 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933
  1. // Copyright (C) 2009-2023, Panagiotis Christopoulos Charitos and contributors.
  2. // All rights reserved.
  3. // Code licensed under the BSD License.
  4. // http://www.anki3d.org/LICENSE
  5. #include <AnKi/Renderer/Utils/GpuVisibility.h>
  6. #include <AnKi/Renderer/Renderer.h>
  7. #include <AnKi/Scene/RenderStateBucket.h>
  8. #include <AnKi/Scene/GpuSceneArray.h>
  9. #include <AnKi/Core/GpuMemory/GpuVisibleTransientMemoryPool.h>
  10. #include <AnKi/Core/GpuMemory/RebarTransientMemoryPool.h>
  11. #include <AnKi/Core/GpuMemory/GpuSceneBuffer.h>
  12. #include <AnKi/Collision/Functions.h>
  13. #include <AnKi/Shaders/Include/GpuVisibilityTypes.h>
  14. #include <AnKi/Core/GpuMemory/UnifiedGeometryBuffer.h>
  15. #include <AnKi/Core/StatsSet.h>
  16. #include <AnKi/Core/CVarSet.h>
  17. namespace anki {
  18. constexpr U32 kMaxVisibleObjects = 30 * 1024;
  19. constexpr U32 kMaxVisiblePrimitives = 40'000'000;
  20. constexpr U32 kMaxVisibleMeshlets = kMaxVisiblePrimitives / kMaxPrimitivesPerMeshlet;
  21. constexpr PtrSize kMaxMeshletMemory = kMaxVisibleMeshlets * sizeof(GpuSceneMeshletInstance);
  22. constexpr U32 kVisibleMaxMeshletGroups = max(kMaxVisibleObjects, (kMaxVisibleMeshlets + kMeshletGroupSize - 1) / kMeshletGroupSize);
  23. constexpr PtrSize kMaxMeshletGroupMemory = kVisibleMaxMeshletGroups * sizeof(GpuSceneMeshletGroupInstance);
  24. static NumericCVar<PtrSize> g_maxMeshletMemoryPerTest(CVarSubsystem::kRenderer, "MaxMeshletMemoryPerTest", kMaxMeshletMemory, 1_KB, 100_MB,
  25. "Max memory that will be allocated per GPU occlusion test for storing meshlets");
  26. static NumericCVar<PtrSize> g_maxMeshletGroupMemoryPerTest(CVarSubsystem::kRenderer, "MaxMeshletGroupMemoryPerTest", kMaxMeshletGroupMemory, 1_KB,
  27. 100_MB,
  28. "Max memory that will be allocated per GPU occlusion test for storing meshlet groups");
  29. static StatCounter g_gpuVisMemoryAllocatedStatVar(StatCategory::kRenderer, "GPU visibility mem",
  30. StatFlag::kBytes | StatFlag::kMainThreadUpdates | StatFlag::kZeroEveryFrame);
  31. static BufferOffsetRange allocateTransientGpuMem(PtrSize size)
  32. {
  33. BufferOffsetRange out = {};
  34. if(size)
  35. {
  36. g_gpuVisMemoryAllocatedStatVar.increment(size);
  37. out = GpuVisibleTransientMemoryPool::getSingleton().allocate(size);
  38. }
  39. return out;
  40. }
  41. Error GpuVisibility::init()
  42. {
  43. for(MutatorValue hzb = 0; hzb < 2; ++hzb)
  44. {
  45. for(MutatorValue gatherAabbs = 0; gatherAabbs < 2; ++gatherAabbs)
  46. {
  47. for(MutatorValue genHash = 0; genHash < 2; ++genHash)
  48. {
  49. for(MutatorValue gatherType = 0; gatherType < 3; ++gatherType)
  50. {
  51. ANKI_CHECK(loadShaderProgram("ShaderBinaries/GpuVisibility.ankiprogbin",
  52. {{"HZB_TEST", hzb},
  53. {"DISTANCE_TEST", 0},
  54. {"GATHER_AABBS", gatherAabbs},
  55. {"HASH_VISIBLES", genHash},
  56. {"GATHER_TYPE", gatherType + 1}},
  57. m_prog, m_frustumGrProgs[hzb][gatherAabbs][genHash][gatherType]));
  58. }
  59. }
  60. }
  61. }
  62. for(MutatorValue gatherAabbs = 0; gatherAabbs < 2; ++gatherAabbs)
  63. {
  64. for(MutatorValue genHash = 0; genHash < 2; ++genHash)
  65. {
  66. for(MutatorValue gatherType = 0; gatherType < 3; ++gatherType)
  67. {
  68. ANKI_CHECK(loadShaderProgram("ShaderBinaries/GpuVisibility.ankiprogbin",
  69. {{"HZB_TEST", 0},
  70. {"DISTANCE_TEST", 1},
  71. {"GATHER_AABBS", gatherAabbs},
  72. {"HASH_VISIBLES", genHash},
  73. {"GATHER_TYPE", gatherType + 1}},
  74. m_prog, m_distGrProgs[gatherAabbs][genHash][gatherType]));
  75. }
  76. }
  77. }
  78. for(MutatorValue hzb = 0; hzb < 2; ++hzb)
  79. {
  80. for(MutatorValue passthrough = 0; passthrough < 2; ++passthrough)
  81. {
  82. ANKI_CHECK(loadShaderProgram("ShaderBinaries/GpuVisibilityMeshlet.ankiprogbin", {{"HZB_TEST", hzb}, {"PASSTHROUGH", passthrough}},
  83. m_meshletCullingProg, m_meshletCullingGrProgs[hzb][passthrough]));
  84. }
  85. }
  86. return Error::kNone;
  87. }
  88. void GpuVisibility::computeGpuVisibilityMemoryRequirements(RenderingTechnique t, MemoryRequirements& total, WeakArray<MemoryRequirements> perBucket)
  89. {
  90. ANKI_ASSERT(perBucket.getSize() == RenderStateBucketContainer::getSingleton().getBucketCount(t));
  91. U32 totalMeshletCount = 0;
  92. U32 totalMeshletGroupCount = 0;
  93. U32 totalRenderableCount = 0;
  94. RenderStateBucketContainer::getSingleton().iterateBuckets(t, [&](const RenderStateInfo&, U32 userCount, U32 meshletGroupCount, U32 meshletCount) {
  95. if(meshletCount)
  96. {
  97. totalMeshletCount += meshletCount;
  98. totalMeshletGroupCount += meshletGroupCount;
  99. }
  100. else
  101. {
  102. totalRenderableCount += userCount;
  103. }
  104. });
  105. const U32 maxVisibleMeshlets = min(U32(g_maxMeshletMemoryPerTest.get() / sizeof(GpuSceneMeshletInstance)), totalMeshletCount);
  106. const U32 maxVisibleMeshletGroups = min(U32(g_maxMeshletGroupMemoryPerTest.get() / sizeof(GpuSceneMeshletGroupInstance)), totalMeshletGroupCount);
  107. const U32 maxVisibleRenderables = min(kMaxVisibleObjects, totalRenderableCount);
  108. total = {};
  109. U32 bucketCount = 0;
  110. RenderStateBucketContainer::getSingleton().iterateBuckets(t, [&](const RenderStateInfo&, U32 userCount, U32 meshletGroupCount, U32 meshletCount) {
  111. MemoryRequirements& bucket = perBucket[bucketCount++];
  112. // Use U64 cause some expressions are overflowing
  113. if(meshletCount)
  114. {
  115. ANKI_ASSERT(meshletGroupCount > 0);
  116. ANKI_ASSERT(totalMeshletCount > 0);
  117. bucket.m_meshletInstanceCount = max(1u, U32(U64(meshletCount) * maxVisibleMeshlets / totalMeshletCount));
  118. ANKI_ASSERT(totalMeshletGroupCount > 0);
  119. bucket.m_meshletGroupInstanceCount = max(1u, U32(U64(meshletGroupCount) * maxVisibleMeshletGroups / totalMeshletGroupCount));
  120. }
  121. else if(userCount > 0)
  122. {
  123. ANKI_ASSERT(totalRenderableCount > 0);
  124. bucket.m_renderableInstanceCount = max(1u, U32(U64(userCount) * maxVisibleRenderables / totalRenderableCount));
  125. }
  126. total.m_meshletInstanceCount += bucket.m_meshletInstanceCount;
  127. total.m_meshletGroupInstanceCount += bucket.m_meshletGroupInstanceCount;
  128. total.m_renderableInstanceCount += bucket.m_renderableInstanceCount;
  129. });
  130. }
  131. void GpuVisibility::populateRenderGraphInternal(Bool distanceBased, BaseGpuVisibilityInput& in, GpuVisibilityOutput& out)
  132. {
  133. ANKI_ASSERT(in.m_lodReferencePoint.x() != kMaxF32);
  134. if(RenderStateBucketContainer::getSingleton().getBucketsActiveUserCount(in.m_technique) == 0) [[unlikely]]
  135. {
  136. // Early exit
  137. in = {};
  138. return;
  139. }
  140. RenderGraphDescription& rgraph = *in.m_rgraph;
  141. class DistanceTestData
  142. {
  143. public:
  144. Vec3 m_pointOfTest;
  145. F32 m_testRadius;
  146. };
  147. class FrustumTestData
  148. {
  149. public:
  150. RenderTargetHandle m_hzbRt;
  151. Mat4 m_viewProjMat;
  152. UVec2 m_finalRenderTargetSize;
  153. };
  154. FrustumTestData* frustumTestData = nullptr;
  155. DistanceTestData* distTestData = nullptr;
  156. if(distanceBased)
  157. {
  158. distTestData = newInstance<DistanceTestData>(getRenderer().getFrameMemoryPool());
  159. const DistanceGpuVisibilityInput& din = static_cast<DistanceGpuVisibilityInput&>(in);
  160. distTestData->m_pointOfTest = din.m_pointOfTest;
  161. distTestData->m_testRadius = din.m_testRadius;
  162. }
  163. else
  164. {
  165. frustumTestData = newInstance<FrustumTestData>(getRenderer().getFrameMemoryPool());
  166. const FrustumGpuVisibilityInput& fin = static_cast<FrustumGpuVisibilityInput&>(in);
  167. frustumTestData->m_viewProjMat = fin.m_viewProjectionMatrix;
  168. frustumTestData->m_finalRenderTargetSize = fin.m_viewportSize;
  169. }
  170. // Allocate memory
  171. const Bool firstCallInFrame = m_runCtx.m_frameIdx != getRenderer().getFrameCount();
  172. if(firstCallInFrame)
  173. {
  174. // First call in frame. Init stuff
  175. m_runCtx.m_frameIdx = getRenderer().getFrameCount();
  176. m_runCtx.m_populateRenderGraphCallCount = 0;
  177. m_runCtx.m_populateRenderGraphMeshletRenderingCallCount = 0;
  178. // Calc memory requirements
  179. MemoryRequirements maxTotalMemReq;
  180. WeakArray<MemoryRequirements> bucketsMemReqs;
  181. for(RenderingTechnique t : EnumBitsIterable<RenderingTechnique, RenderingTechniqueBit>(RenderingTechniqueBit::kAllRaster))
  182. {
  183. const U32 tBucketCount = RenderStateBucketContainer::getSingleton().getBucketCount(t);
  184. if(tBucketCount == 0)
  185. {
  186. continue;
  187. }
  188. newArray<MemoryRequirements>(getRenderer().getFrameMemoryPool(), tBucketCount, bucketsMemReqs);
  189. computeGpuVisibilityMemoryRequirements(t, m_runCtx.m_totalMemRequirements[t], bucketsMemReqs);
  190. maxTotalMemReq = maxTotalMemReq.max(m_runCtx.m_totalMemRequirements[t]);
  191. newArray<InstanceRange>(getRenderer().getFrameMemoryPool(), tBucketCount, m_runCtx.m_renderableInstanceRanges[t]);
  192. newArray<InstanceRange>(getRenderer().getFrameMemoryPool(), tBucketCount, m_runCtx.m_meshletGroupInstanceRanges[t]);
  193. newArray<InstanceRange>(getRenderer().getFrameMemoryPool(), tBucketCount, m_runCtx.m_meshletInstanceRanges[t]);
  194. U32 renderablesFirstInstance = 0, groupsFirstInstance = 0, meshletsFirstInstance = 0;
  195. for(U32 i = 0; i < tBucketCount; ++i)
  196. {
  197. m_runCtx.m_renderableInstanceRanges[t][i].m_firstInstance = renderablesFirstInstance;
  198. m_runCtx.m_renderableInstanceRanges[t][i].m_instanceCount = bucketsMemReqs[i].m_renderableInstanceCount;
  199. m_runCtx.m_meshletGroupInstanceRanges[t][i].m_firstInstance = groupsFirstInstance;
  200. m_runCtx.m_meshletGroupInstanceRanges[t][i].m_instanceCount = bucketsMemReqs[i].m_meshletGroupInstanceCount;
  201. m_runCtx.m_meshletInstanceRanges[t][i].m_firstInstance = meshletsFirstInstance;
  202. m_runCtx.m_meshletInstanceRanges[t][i].m_instanceCount = bucketsMemReqs[i].m_meshletInstanceCount;
  203. renderablesFirstInstance += bucketsMemReqs[i].m_renderableInstanceCount;
  204. groupsFirstInstance += bucketsMemReqs[i].m_meshletGroupInstanceCount;
  205. meshletsFirstInstance += bucketsMemReqs[i].m_meshletInstanceCount;
  206. }
  207. }
  208. // Allocate persistent memory
  209. for(PersistentMemory& mem : m_runCtx.m_persistentMem)
  210. {
  211. mem = {};
  212. mem.m_drawIndexedIndirectArgsBuffer = allocateTransientGpuMem(maxTotalMemReq.m_renderableInstanceCount * sizeof(DrawIndexedIndirectArgs));
  213. mem.m_renderableInstancesBuffer = allocateTransientGpuMem(maxTotalMemReq.m_renderableInstanceCount * sizeof(GpuSceneRenderableInstance));
  214. mem.m_meshletGroupsInstancesBuffer =
  215. allocateTransientGpuMem(maxTotalMemReq.m_meshletGroupInstanceCount * sizeof(GpuSceneMeshletGroupInstance));
  216. mem.m_bufferDepedency =
  217. rgraph.importBuffer(BufferUsageBit::kNone, (mem.m_drawIndexedIndirectArgsBuffer.m_buffer) ? mem.m_drawIndexedIndirectArgsBuffer
  218. : mem.m_meshletGroupsInstancesBuffer);
  219. }
  220. if(getRenderer().runSoftwareMeshletRendering())
  221. {
  222. // Because someone will need it later
  223. for(PersistentMemoryMeshletRendering& mem : m_runCtx.m_persistentMeshletRenderingMem)
  224. {
  225. mem = {};
  226. mem.m_meshletInstancesBuffer = allocateTransientGpuMem(maxTotalMemReq.m_meshletInstanceCount * sizeof(GpuSceneMeshletInstance));
  227. mem.m_bufferDepedency = rgraph.importBuffer(BufferUsageBit::kNone, mem.m_meshletInstancesBuffer);
  228. }
  229. }
  230. }
  231. const U32 bucketCount = RenderStateBucketContainer::getSingleton().getBucketCount(in.m_technique);
  232. const MemoryRequirements& req = m_runCtx.m_totalMemRequirements[in.m_technique];
  233. const PersistentMemory& mem = m_runCtx.m_persistentMem[m_runCtx.m_populateRenderGraphCallCount++ % m_runCtx.m_persistentMem.getSize()];
  234. out.m_legacy.m_drawIndexedIndirectArgsBuffer = mem.m_drawIndexedIndirectArgsBuffer;
  235. out.m_legacy.m_drawIndexedIndirectArgsBuffer.m_range = req.m_renderableInstanceCount * sizeof(DrawIndexedIndirectArgs);
  236. out.m_legacy.m_renderableInstancesBuffer = mem.m_renderableInstancesBuffer;
  237. out.m_legacy.m_renderableInstancesBuffer.m_range = req.m_renderableInstanceCount * sizeof(GpuSceneRenderableInstance);
  238. out.m_legacy.m_mdiDrawCountsBuffer = allocateTransientGpuMem(sizeof(U32) * bucketCount);
  239. out.m_mesh.m_meshletGroupInstancesBuffer = mem.m_meshletGroupsInstancesBuffer;
  240. out.m_mesh.m_meshletGroupInstancesBuffer.m_range = req.m_meshletGroupInstanceCount * sizeof(GpuSceneMeshletGroupInstance);
  241. out.m_mesh.m_taskShaderIndirectArgsBuffer = allocateTransientGpuMem(bucketCount * sizeof(DispatchIndirectArgs));
  242. if(in.m_hashVisibles)
  243. {
  244. out.m_visiblesHashBuffer = allocateTransientGpuMem(sizeof(GpuVisibilityHash));
  245. }
  246. if(in.m_gatherAabbIndices)
  247. {
  248. out.m_visibleAaabbIndicesBuffer =
  249. allocateTransientGpuMem((RenderStateBucketContainer::getSingleton().getBucketsActiveUserCount(in.m_technique) + 1) * sizeof(U32));
  250. }
  251. // Set instance sub-ranges
  252. out.m_legacy.m_bucketRenderableInstanceRanges = m_runCtx.m_renderableInstanceRanges[in.m_technique];
  253. out.m_mesh.m_bucketMeshletGroupInstanceRanges = m_runCtx.m_meshletGroupInstanceRanges[in.m_technique];
  254. // Zero some stuff
  255. const BufferHandle zeroStuffDependency = rgraph.importBuffer(BufferUsageBit::kNone, out.m_legacy.m_mdiDrawCountsBuffer);
  256. {
  257. Array<Char, 128> passName;
  258. snprintf(passName.getBegin(), passName.getSizeInBytes(), "GPU vis zero: %s", in.m_passesName.cstr());
  259. ComputeRenderPassDescription& pass = rgraph.newComputeRenderPass(passName.getBegin());
  260. pass.newBufferDependency(zeroStuffDependency, BufferUsageBit::kTransferDestination);
  261. pass.setWork([out](RenderPassWorkContext& rpass) {
  262. CommandBuffer& cmdb = *rpass.m_commandBuffer;
  263. cmdb.pushDebugMarker("MDI counts", Vec3(1.0f, 1.0f, 1.0f));
  264. cmdb.fillBuffer(out.m_legacy.m_mdiDrawCountsBuffer, 0);
  265. cmdb.popDebugMarker();
  266. if(out.m_mesh.m_taskShaderIndirectArgsBuffer.m_buffer)
  267. {
  268. cmdb.pushDebugMarker("Task shader indirect args", Vec3(1.0f, 1.0f, 1.0f));
  269. cmdb.fillBuffer(out.m_mesh.m_taskShaderIndirectArgsBuffer, 0);
  270. cmdb.popDebugMarker();
  271. }
  272. if(out.m_visiblesHashBuffer.m_buffer)
  273. {
  274. cmdb.pushDebugMarker("Visibles hash", Vec3(1.0f, 1.0f, 1.0f));
  275. cmdb.fillBuffer(out.m_visiblesHashBuffer, 0);
  276. cmdb.popDebugMarker();
  277. }
  278. if(out.m_visibleAaabbIndicesBuffer.m_buffer)
  279. {
  280. cmdb.pushDebugMarker("Visible AABB indices", Vec3(1.0f, 1.0f, 1.0f));
  281. cmdb.fillBuffer(out.m_visibleAaabbIndicesBuffer.m_buffer, out.m_visibleAaabbIndicesBuffer.m_offset, sizeof(U32), 0);
  282. cmdb.popDebugMarker();
  283. }
  284. });
  285. }
  286. // Set the out dependency. Use one of the big buffers.
  287. out.m_dependency = mem.m_bufferDepedency;
  288. // Create the renderpass
  289. Array<Char, 128> passName;
  290. snprintf(passName.getBegin(), passName.getSizeInBytes(), "GPU vis: %s", in.m_passesName.cstr());
  291. ComputeRenderPassDescription& pass = rgraph.newComputeRenderPass(passName.getBegin());
  292. pass.newBufferDependency(getRenderer().getGpuSceneBufferHandle(), BufferUsageBit::kUavComputeRead);
  293. pass.newBufferDependency(zeroStuffDependency, BufferUsageBit::kUavComputeWrite);
  294. pass.newBufferDependency(out.m_dependency, BufferUsageBit::kUavComputeWrite);
  295. if(!distanceBased && static_cast<FrustumGpuVisibilityInput&>(in).m_hzbRt)
  296. {
  297. frustumTestData->m_hzbRt = *static_cast<FrustumGpuVisibilityInput&>(in).m_hzbRt;
  298. pass.newTextureDependency(frustumTestData->m_hzbRt, TextureUsageBit::kSampledCompute);
  299. }
  300. pass.setWork([this, frustumTestData, distTestData, lodReferencePoint = in.m_lodReferencePoint, lodDistances = in.m_lodDistances,
  301. technique = in.m_technique, out](RenderPassWorkContext& rpass) {
  302. CommandBuffer& cmdb = *rpass.m_commandBuffer;
  303. const Bool gatherAabbIndices = out.m_visibleAaabbIndicesBuffer.m_buffer != nullptr;
  304. const Bool genHash = out.m_visiblesHashBuffer.m_buffer != nullptr;
  305. U32 gatherType = 0;
  306. if(out.m_mesh.m_meshletGroupInstancesBuffer.m_range > 0)
  307. {
  308. gatherType |= 2u;
  309. }
  310. if(out.m_legacy.m_renderableInstancesBuffer.m_range > 0)
  311. {
  312. gatherType |= 1u;
  313. }
  314. ANKI_ASSERT(gatherType != 0);
  315. if(frustumTestData)
  316. {
  317. cmdb.bindShaderProgram(m_frustumGrProgs[frustumTestData->m_hzbRt.isValid()][gatherAabbIndices][genHash][gatherType - 1u].get());
  318. }
  319. else
  320. {
  321. cmdb.bindShaderProgram(m_distGrProgs[gatherAabbIndices][genHash][gatherType - 1u].get());
  322. }
  323. BufferOffsetRange aabbsBuffer;
  324. U32 aabbCount = 0;
  325. switch(technique)
  326. {
  327. case RenderingTechnique::kGBuffer:
  328. aabbsBuffer = GpuSceneArrays::RenderableBoundingVolumeGBuffer::getSingleton().getBufferOffsetRange();
  329. aabbCount = GpuSceneArrays::RenderableBoundingVolumeGBuffer::getSingleton().getElementCount();
  330. break;
  331. case RenderingTechnique::kDepth:
  332. aabbsBuffer = GpuSceneArrays::RenderableBoundingVolumeDepth::getSingleton().getBufferOffsetRange();
  333. aabbCount = GpuSceneArrays::RenderableBoundingVolumeDepth::getSingleton().getElementCount();
  334. break;
  335. case RenderingTechnique::kForward:
  336. aabbsBuffer = GpuSceneArrays::RenderableBoundingVolumeForward::getSingleton().getBufferOffsetRange();
  337. aabbCount = GpuSceneArrays::RenderableBoundingVolumeForward::getSingleton().getElementCount();
  338. break;
  339. default:
  340. ANKI_ASSERT(0);
  341. }
  342. cmdb.bindUavBuffer(0, 0, aabbsBuffer);
  343. cmdb.bindUavBuffer(0, 1, GpuSceneArrays::Renderable::getSingleton().getBufferOffsetRange());
  344. cmdb.bindUavBuffer(0, 2, GpuSceneArrays::MeshLod::getSingleton().getBufferOffsetRange());
  345. cmdb.bindUavBuffer(0, 3, GpuSceneBuffer::getSingleton().getBufferOffsetRange());
  346. cmdb.bindUavBuffer(0, 4, GpuSceneArrays::Transform::getSingleton().getBufferOffsetRange());
  347. if(gatherType & 1u)
  348. {
  349. cmdb.bindUavBuffer(0, 5, out.m_legacy.m_renderableInstancesBuffer);
  350. cmdb.bindUavBuffer(0, 6, out.m_legacy.m_drawIndexedIndirectArgsBuffer);
  351. cmdb.bindUavBuffer(0, 7, out.m_legacy.m_mdiDrawCountsBuffer);
  352. }
  353. if(gatherType & 2u)
  354. {
  355. cmdb.bindUavBuffer(0, 8, out.m_mesh.m_taskShaderIndirectArgsBuffer);
  356. cmdb.bindUavBuffer(0, 9, out.m_mesh.m_meshletGroupInstancesBuffer);
  357. }
  358. const U32 bucketCount = RenderStateBucketContainer::getSingleton().getBucketCount(technique);
  359. UVec2* instanceRanges = allocateAndBindUav<UVec2>(cmdb, 0, 10, bucketCount);
  360. for(U32 i = 0; i < bucketCount; ++i)
  361. {
  362. const Bool legacyBucket = m_runCtx.m_renderableInstanceRanges[technique][i].m_instanceCount > 0;
  363. if(legacyBucket)
  364. {
  365. instanceRanges[i].x() = m_runCtx.m_renderableInstanceRanges[technique][i].m_firstInstance;
  366. instanceRanges[i].y() = m_runCtx.m_renderableInstanceRanges[technique][i].m_instanceCount;
  367. }
  368. else
  369. {
  370. instanceRanges[i].x() = m_runCtx.m_meshletGroupInstanceRanges[technique][i].m_firstInstance;
  371. instanceRanges[i].y() = m_runCtx.m_meshletGroupInstanceRanges[technique][i].m_instanceCount;
  372. }
  373. }
  374. if(frustumTestData)
  375. {
  376. FrustumGpuVisibilityConstants* unis = allocateAndBindConstants<FrustumGpuVisibilityConstants>(cmdb, 0, 11);
  377. Array<Plane, 6> planes;
  378. extractClipPlanes(frustumTestData->m_viewProjMat, planes);
  379. for(U32 i = 0; i < 6; ++i)
  380. {
  381. unis->m_clipPlanes[i] = Vec4(planes[i].getNormal().xyz(), planes[i].getOffset());
  382. }
  383. ANKI_ASSERT(kMaxLodCount == 3);
  384. unis->m_maxLodDistances[0] = lodDistances[0];
  385. unis->m_maxLodDistances[1] = lodDistances[1];
  386. unis->m_maxLodDistances[2] = kMaxF32;
  387. unis->m_maxLodDistances[3] = kMaxF32;
  388. unis->m_lodReferencePoint = lodReferencePoint;
  389. unis->m_viewProjectionMat = frustumTestData->m_viewProjMat;
  390. unis->m_finalRenderTargetSize = Vec2(frustumTestData->m_finalRenderTargetSize);
  391. if(frustumTestData->m_hzbRt.isValid())
  392. {
  393. rpass.bindColorTexture(0, 12, frustumTestData->m_hzbRt);
  394. cmdb.bindSampler(0, 13, getRenderer().getSamplers().m_nearestNearestClamp.get());
  395. }
  396. }
  397. else
  398. {
  399. DistanceGpuVisibilityConstants unis;
  400. unis.m_pointOfTest = distTestData->m_pointOfTest;
  401. unis.m_testRadius = distTestData->m_testRadius;
  402. unis.m_maxLodDistances[0] = lodDistances[0];
  403. unis.m_maxLodDistances[1] = lodDistances[1];
  404. unis.m_maxLodDistances[2] = kMaxF32;
  405. unis.m_maxLodDistances[3] = kMaxF32;
  406. unis.m_lodReferencePoint = lodReferencePoint;
  407. cmdb.setPushConstants(&unis, sizeof(unis));
  408. }
  409. if(gatherAabbIndices)
  410. {
  411. cmdb.bindUavBuffer(0, 14, out.m_visibleAaabbIndicesBuffer);
  412. }
  413. if(genHash)
  414. {
  415. cmdb.bindUavBuffer(0, 15, out.m_visiblesHashBuffer);
  416. }
  417. dispatchPPCompute(cmdb, 64, 1, aabbCount, 1);
  418. });
  419. }
  420. void GpuVisibility::populateRenderGraphMeshletInternal(Bool passthrough, BaseGpuMeshletVisibilityInput& in, GpuMeshletVisibilityOutput& out)
  421. {
  422. RenderGraphDescription& rgraph = *in.m_rgraph;
  423. if(in.m_taskShaderIndirectArgsBuffer.m_buffer == nullptr) [[unlikely]]
  424. {
  425. // Early exit
  426. return;
  427. }
  428. class NonPassthrough
  429. {
  430. public:
  431. Mat4 m_viewProjectionMatrix;
  432. Mat3x4 m_cameraTransform;
  433. UVec2 m_viewportSize;
  434. RenderTargetHandle m_hzbRt;
  435. }* nonPassthroughData = nullptr;
  436. if(!passthrough)
  437. {
  438. GpuMeshletVisibilityInput& nonPassthroughIn = static_cast<GpuMeshletVisibilityInput&>(in);
  439. nonPassthroughData = newInstance<NonPassthrough>(getRenderer().getFrameMemoryPool());
  440. nonPassthroughData->m_viewProjectionMatrix = nonPassthroughIn.m_viewProjectionMatrix;
  441. nonPassthroughData->m_cameraTransform = nonPassthroughIn.m_cameraTransform;
  442. nonPassthroughData->m_viewportSize = nonPassthroughIn.m_viewportSize;
  443. nonPassthroughData->m_hzbRt = nonPassthroughIn.m_hzbRt;
  444. }
  445. // Allocate memory
  446. const U32 bucketCount = m_runCtx.m_renderableInstanceRanges[in.m_technique].getSize();
  447. ANKI_ASSERT(RenderStateBucketContainer::getSingleton().getBucketCount(in.m_technique) == bucketCount);
  448. const PersistentMemoryMeshletRendering& mem = m_runCtx.m_persistentMeshletRenderingMem[m_runCtx.m_populateRenderGraphMeshletRenderingCallCount++
  449. % m_runCtx.m_persistentMeshletRenderingMem.getSize()];
  450. out.m_drawIndirectArgsBuffer = allocateTransientGpuMem(sizeof(DrawIndirectArgs) * bucketCount);
  451. out.m_meshletInstancesBuffer = mem.m_meshletInstancesBuffer;
  452. out.m_meshletInstancesBuffer.m_range = m_runCtx.m_totalMemRequirements[in.m_technique].m_meshletInstanceCount * sizeof(GpuSceneMeshletInstance);
  453. out.m_bucketMeshletInstanceRanges = m_runCtx.m_meshletInstanceRanges[in.m_technique];
  454. // Zero some stuff
  455. const BufferHandle indirectArgsDep = rgraph.importBuffer(BufferUsageBit::kNone, out.m_drawIndirectArgsBuffer);
  456. {
  457. Array<Char, 128> passName;
  458. snprintf(passName.getBegin(), passName.getSizeInBytes(), "GPU meshlet vis zero: %s", in.m_passesName.cstr());
  459. ComputeRenderPassDescription& pass = rgraph.newComputeRenderPass(passName.getBegin());
  460. pass.newBufferDependency(indirectArgsDep, BufferUsageBit::kTransferDestination);
  461. pass.setWork([drawIndirectArgsBuffer = out.m_drawIndirectArgsBuffer](RenderPassWorkContext& rpass) {
  462. CommandBuffer& cmdb = *rpass.m_commandBuffer;
  463. cmdb.pushDebugMarker("Draw indirect args", Vec3(1.0f, 1.0f, 1.0f));
  464. cmdb.fillBuffer(drawIndirectArgsBuffer, 0);
  465. cmdb.popDebugMarker();
  466. });
  467. }
  468. out.m_dependency = mem.m_bufferDepedency;
  469. // Create the renderpass
  470. Array<Char, 128> passName;
  471. snprintf(passName.getBegin(), passName.getSizeInBytes(), "GPU meshlet vis: %s", in.m_passesName.cstr());
  472. ComputeRenderPassDescription& pass = rgraph.newComputeRenderPass(passName.getBegin());
  473. pass.newBufferDependency(indirectArgsDep, BufferUsageBit::kUavComputeWrite);
  474. pass.newBufferDependency(mem.m_bufferDepedency, BufferUsageBit::kUavComputeWrite);
  475. pass.newBufferDependency(in.m_dependency, BufferUsageBit::kIndirectCompute);
  476. pass.setWork([this, nonPassthroughData, computeIndirectArgs = in.m_taskShaderIndirectArgsBuffer, out,
  477. meshletGroupInstancesBuffer = in.m_meshletGroupInstancesBuffer,
  478. bucketMeshletGroupInstanceRanges = in.m_bucketMeshletGroupInstanceRanges](RenderPassWorkContext& rpass) {
  479. CommandBuffer& cmdb = *rpass.m_commandBuffer;
  480. const U32 bucketCount = out.m_bucketMeshletInstanceRanges.getSize();
  481. for(U32 i = 0; i < bucketCount; ++i)
  482. {
  483. if(out.m_bucketMeshletInstanceRanges[i].m_instanceCount == 0)
  484. {
  485. continue;
  486. }
  487. const Bool hasHzb = (nonPassthroughData) ? nonPassthroughData->m_hzbRt.isValid() : false;
  488. const Bool isPassthrough = (nonPassthroughData == nullptr);
  489. cmdb.bindShaderProgram(m_meshletCullingGrProgs[hasHzb][isPassthrough].get());
  490. cmdb.bindUavBuffer(0, 0, meshletGroupInstancesBuffer);
  491. cmdb.bindUavBuffer(0, 1, GpuSceneArrays::Renderable::getSingleton().getBufferOffsetRange());
  492. cmdb.bindUavBuffer(0, 2, GpuSceneArrays::MeshLod::getSingleton().getBufferOffsetRange());
  493. cmdb.bindUavBuffer(0, 3, GpuSceneArrays::Transform::getSingleton().getBufferOffsetRange());
  494. cmdb.bindUavBuffer(0, 4, UnifiedGeometryBuffer::getSingleton().getBufferOffsetRange());
  495. cmdb.bindUavBuffer(0, 5, out.m_drawIndirectArgsBuffer);
  496. cmdb.bindUavBuffer(0, 6, out.m_meshletInstancesBuffer);
  497. if(hasHzb)
  498. {
  499. rpass.bindColorTexture(0, 7, nonPassthroughData->m_hzbRt);
  500. cmdb.bindSampler(0, 8, getRenderer().getSamplers().m_nearestNearestClamp.get());
  501. }
  502. class Consts
  503. {
  504. public:
  505. Mat4 m_viewProjectionMatrix;
  506. Vec3 m_cameraPos;
  507. U32 m_firstDrawArg;
  508. Vec2 m_viewportSizef;
  509. U32 m_firstMeshletGroup;
  510. U32 m_firstMeshlet;
  511. U32 m_meshletCount;
  512. U32 m_padding1;
  513. U32 m_padding2;
  514. U32 m_padding3;
  515. } consts;
  516. consts.m_viewProjectionMatrix = (!isPassthrough) ? nonPassthroughData->m_viewProjectionMatrix : Mat4::getIdentity();
  517. consts.m_cameraPos = (!isPassthrough) ? nonPassthroughData->m_cameraTransform.getTranslationPart().xyz() : Vec3(0.0f);
  518. consts.m_firstDrawArg = i;
  519. consts.m_viewportSizef = (!isPassthrough) ? Vec2(nonPassthroughData->m_viewportSize) : Vec2(0.0f);
  520. consts.m_firstMeshletGroup = bucketMeshletGroupInstanceRanges[i].getFirstInstance();
  521. consts.m_firstMeshlet = out.m_bucketMeshletInstanceRanges[i].getFirstInstance();
  522. consts.m_meshletCount = out.m_bucketMeshletInstanceRanges[i].getInstanceCount();
  523. cmdb.setPushConstants(&consts, sizeof(consts));
  524. cmdb.dispatchComputeIndirect(computeIndirectArgs.m_buffer, computeIndirectArgs.m_offset + i * sizeof(DispatchIndirectArgs));
  525. };
  526. });
  527. }
  528. Error GpuVisibilityNonRenderables::init()
  529. {
  530. ANKI_CHECK(ResourceManager::getSingleton().loadResource("ShaderBinaries/GpuVisibilityNonRenderables.ankiprogbin", m_prog));
  531. for(MutatorValue hzb = 0; hzb < 2; ++hzb)
  532. {
  533. for(GpuSceneNonRenderableObjectType type : EnumIterable<GpuSceneNonRenderableObjectType>())
  534. {
  535. for(MutatorValue cpuFeedback = 0; cpuFeedback < 2; ++cpuFeedback)
  536. {
  537. ANKI_CHECK(loadShaderProgram("ShaderBinaries/GpuVisibilityNonRenderables.ankiprogbin",
  538. {{"HZB_TEST", hzb}, {"OBJECT_TYPE", MutatorValue(type)}, {"CPU_FEEDBACK", cpuFeedback}}, m_prog,
  539. m_grProgs[hzb][type][cpuFeedback]));
  540. }
  541. }
  542. }
  543. return Error::kNone;
  544. }
  545. void GpuVisibilityNonRenderables::populateRenderGraph(GpuVisibilityNonRenderablesInput& in, GpuVisibilityNonRenderablesOutput& out)
  546. {
  547. ANKI_ASSERT(in.m_viewProjectionMat != Mat4::getZero());
  548. RenderGraphDescription& rgraph = *in.m_rgraph;
  549. U32 objCount = 0;
  550. switch(in.m_objectType)
  551. {
  552. case GpuSceneNonRenderableObjectType::kLight:
  553. objCount = GpuSceneArrays::Light::getSingleton().getElementCount();
  554. break;
  555. case GpuSceneNonRenderableObjectType::kDecal:
  556. objCount = GpuSceneArrays::Decal::getSingleton().getElementCount();
  557. break;
  558. case GpuSceneNonRenderableObjectType::kFogDensityVolume:
  559. objCount = GpuSceneArrays::FogDensityVolume::getSingleton().getElementCount();
  560. break;
  561. case GpuSceneNonRenderableObjectType::kGlobalIlluminationProbe:
  562. objCount = GpuSceneArrays::GlobalIlluminationProbe::getSingleton().getElementCount();
  563. break;
  564. case GpuSceneNonRenderableObjectType::kReflectionProbe:
  565. objCount = GpuSceneArrays::ReflectionProbe::getSingleton().getElementCount();
  566. break;
  567. default:
  568. ANKI_ASSERT(0);
  569. }
  570. if(objCount == 0)
  571. {
  572. U32* count;
  573. out.m_visiblesBuffer = RebarTransientMemoryPool::getSingleton().allocateFrame(sizeof(U32), count);
  574. *count = 0;
  575. out.m_visiblesBufferHandle = rgraph.importBuffer(BufferUsageBit::kNone, out.m_visiblesBuffer);
  576. return;
  577. }
  578. if(in.m_cpuFeedbackBuffer.m_buffer)
  579. {
  580. ANKI_ASSERT(in.m_cpuFeedbackBuffer.m_range == sizeof(U32) * (objCount * 2 + 1));
  581. }
  582. const Bool firstRunInFrame = m_lastFrameIdx != getRenderer().getFrameCount();
  583. if(firstRunInFrame)
  584. {
  585. // 1st run in this frame, do some bookkeeping
  586. m_lastFrameIdx = getRenderer().getFrameCount();
  587. m_counterBufferOffset = 0;
  588. m_counterBufferZeroingHandle = {};
  589. }
  590. constexpr U32 kCountersPerDispatch = 3; // 1 for the threadgroup, 1 for the visbile object count and 1 for objects with feedback
  591. const U32 counterBufferElementSize =
  592. getAlignedRoundUp(GrManager::getSingleton().getDeviceCapabilities().m_uavBufferBindOffsetAlignment, U32(kCountersPerDispatch * sizeof(U32)));
  593. if(!m_counterBuffer.isCreated() || m_counterBufferOffset + counterBufferElementSize > m_counterBuffer->getSize()) [[unlikely]]
  594. {
  595. // Counter buffer not created or not big enough, create a new one
  596. BufferInitInfo buffInit("GpuVisibilityNonRenderablesCounters");
  597. buffInit.m_size = (m_counterBuffer.isCreated()) ? m_counterBuffer->getSize() * 2
  598. : kCountersPerDispatch * counterBufferElementSize * kInitialCounterArraySize;
  599. buffInit.m_usage = BufferUsageBit::kUavComputeWrite | BufferUsageBit::kUavComputeRead | BufferUsageBit::kTransferDestination;
  600. m_counterBuffer = GrManager::getSingleton().newBuffer(buffInit);
  601. m_counterBufferZeroingHandle = rgraph.importBuffer(m_counterBuffer.get(), buffInit.m_usage, 0, kMaxPtrSize);
  602. ComputeRenderPassDescription& pass = rgraph.newComputeRenderPass("GpuVisibilityNonRenderablesClearCounterBuffer");
  603. pass.newBufferDependency(m_counterBufferZeroingHandle, BufferUsageBit::kTransferDestination);
  604. pass.setWork([counterBuffer = m_counterBuffer](RenderPassWorkContext& rgraph) {
  605. rgraph.m_commandBuffer->fillBuffer(counterBuffer.get(), 0, kMaxPtrSize, 0);
  606. });
  607. m_counterBufferOffset = 0;
  608. }
  609. else if(!firstRunInFrame)
  610. {
  611. m_counterBufferOffset += counterBufferElementSize;
  612. }
  613. // Allocate memory for the result
  614. out.m_visiblesBuffer = allocateTransientGpuMem((objCount + 1) * sizeof(U32));
  615. out.m_visiblesBufferHandle = rgraph.importBuffer(BufferUsageBit::kNone, out.m_visiblesBuffer);
  616. // Create the renderpass
  617. ComputeRenderPassDescription& pass = rgraph.newComputeRenderPass(in.m_passesName);
  618. pass.newBufferDependency(getRenderer().getGpuSceneBufferHandle(), BufferUsageBit::kUavComputeRead);
  619. pass.newBufferDependency(out.m_visiblesBufferHandle, BufferUsageBit::kUavComputeWrite);
  620. if(in.m_hzbRt)
  621. {
  622. pass.newTextureDependency(*in.m_hzbRt, TextureUsageBit::kSampledCompute);
  623. }
  624. if(m_counterBufferZeroingHandle.isValid()) [[unlikely]]
  625. {
  626. pass.newBufferDependency(m_counterBufferZeroingHandle, BufferUsageBit::kUavComputeRead | BufferUsageBit::kUavComputeWrite);
  627. }
  628. pass.setWork([this, objType = in.m_objectType, feedbackBuffer = in.m_cpuFeedbackBuffer, viewProjectionMat = in.m_viewProjectionMat,
  629. visibleIndicesBuffHandle = out.m_visiblesBufferHandle, counterBuffer = m_counterBuffer, counterBufferOffset = m_counterBufferOffset,
  630. objCount](RenderPassWorkContext& rgraph) {
  631. CommandBuffer& cmdb = *rgraph.m_commandBuffer;
  632. const Bool needsFeedback = feedbackBuffer.m_buffer != nullptr;
  633. cmdb.bindShaderProgram(m_grProgs[0][objType][needsFeedback].get());
  634. BufferOffsetRange objBuffer;
  635. switch(objType)
  636. {
  637. case GpuSceneNonRenderableObjectType::kLight:
  638. objBuffer = GpuSceneArrays::Light::getSingleton().getBufferOffsetRange();
  639. break;
  640. case GpuSceneNonRenderableObjectType::kDecal:
  641. objBuffer = GpuSceneArrays::Decal::getSingleton().getBufferOffsetRange();
  642. break;
  643. case GpuSceneNonRenderableObjectType::kFogDensityVolume:
  644. objBuffer = GpuSceneArrays::FogDensityVolume::getSingleton().getBufferOffsetRange();
  645. break;
  646. case GpuSceneNonRenderableObjectType::kGlobalIlluminationProbe:
  647. objBuffer = GpuSceneArrays::GlobalIlluminationProbe::getSingleton().getBufferOffsetRange();
  648. break;
  649. case GpuSceneNonRenderableObjectType::kReflectionProbe:
  650. objBuffer = GpuSceneArrays::ReflectionProbe::getSingleton().getBufferOffsetRange();
  651. break;
  652. default:
  653. ANKI_ASSERT(0);
  654. }
  655. cmdb.bindUavBuffer(0, 0, objBuffer);
  656. GpuVisibilityNonRenderableConstants unis;
  657. Array<Plane, 6> planes;
  658. extractClipPlanes(viewProjectionMat, planes);
  659. for(U32 i = 0; i < 6; ++i)
  660. {
  661. unis.m_clipPlanes[i] = Vec4(planes[i].getNormal().xyz(), planes[i].getOffset());
  662. }
  663. cmdb.setPushConstants(&unis, sizeof(unis));
  664. rgraph.bindUavBuffer(0, 1, visibleIndicesBuffHandle);
  665. cmdb.bindUavBuffer(0, 2, counterBuffer.get(), counterBufferOffset, sizeof(U32) * kCountersPerDispatch);
  666. if(needsFeedback)
  667. {
  668. cmdb.bindUavBuffer(0, 3, feedbackBuffer.m_buffer, feedbackBuffer.m_offset, feedbackBuffer.m_range);
  669. }
  670. dispatchPPCompute(cmdb, 64, 1, objCount, 1);
  671. });
  672. }
  673. Error GpuVisibilityAccelerationStructures::init()
  674. {
  675. ANKI_CHECK(loadShaderProgram("ShaderBinaries/GpuVisibilityAccelerationStructures.ankiprogbin", m_visibilityProg, m_visibilityGrProg));
  676. ANKI_CHECK(loadShaderProgram("ShaderBinaries/GpuVisibilityAccelerationStructuresZeroRemainingInstances.ankiprogbin", m_zeroRemainingInstancesProg,
  677. m_zeroRemainingInstancesGrProg));
  678. BufferInitInfo inf("GpuVisibilityAccelerationStructuresCounters");
  679. inf.m_size = sizeof(U32) * 2;
  680. inf.m_usage = BufferUsageBit::kUavComputeWrite | BufferUsageBit::kUavComputeRead | BufferUsageBit::kTransferDestination;
  681. m_counterBuffer = GrManager::getSingleton().newBuffer(inf);
  682. zeroBuffer(m_counterBuffer.get());
  683. return Error::kNone;
  684. }
  685. void GpuVisibilityAccelerationStructures::pupulateRenderGraph(GpuVisibilityAccelerationStructuresInput& in,
  686. GpuVisibilityAccelerationStructuresOutput& out)
  687. {
  688. in.validate();
  689. RenderGraphDescription& rgraph = *in.m_rgraph;
  690. #if ANKI_ASSERTIONS_ENABLED
  691. ANKI_ASSERT(m_lastFrameIdx != getRenderer().getFrameCount());
  692. m_lastFrameIdx = getRenderer().getFrameCount();
  693. #endif
  694. // Allocate the transient buffers
  695. const U32 aabbCount = GpuSceneArrays::RenderableBoundingVolumeRt::getSingleton().getElementCount();
  696. out.m_instancesBuffer = allocateTransientGpuMem(aabbCount * sizeof(AccelerationStructureInstance));
  697. out.m_someBufferHandle = rgraph.importBuffer(BufferUsageBit::kUavComputeWrite, out.m_instancesBuffer);
  698. out.m_renderableIndicesBuffer = allocateTransientGpuMem((aabbCount + 1) * sizeof(U32));
  699. const BufferOffsetRange zeroInstancesDispatchArgsBuff = allocateTransientGpuMem(sizeof(DispatchIndirectArgs));
  700. // Create vis pass
  701. {
  702. ComputeRenderPassDescription& pass = rgraph.newComputeRenderPass(in.m_passesName);
  703. pass.newBufferDependency(getRenderer().getGpuSceneBufferHandle(), BufferUsageBit::kUavComputeRead);
  704. pass.newBufferDependency(out.m_someBufferHandle, BufferUsageBit::kUavComputeWrite);
  705. pass.setWork([this, viewProjMat = in.m_viewProjectionMatrix, lodDistances = in.m_lodDistances, pointOfTest = in.m_pointOfTest,
  706. testRadius = in.m_testRadius, instancesBuff = out.m_instancesBuffer, indicesBuff = out.m_renderableIndicesBuffer,
  707. zeroInstancesDispatchArgsBuff](RenderPassWorkContext& rgraph) {
  708. CommandBuffer& cmdb = *rgraph.m_commandBuffer;
  709. cmdb.bindShaderProgram(m_visibilityGrProg.get());
  710. GpuVisibilityAccelerationStructuresConstants unis;
  711. Array<Plane, 6> planes;
  712. extractClipPlanes(viewProjMat, planes);
  713. for(U32 i = 0; i < 6; ++i)
  714. {
  715. unis.m_clipPlanes[i] = Vec4(planes[i].getNormal().xyz(), planes[i].getOffset());
  716. }
  717. unis.m_pointOfTest = pointOfTest;
  718. unis.m_testRadius = testRadius;
  719. ANKI_ASSERT(kMaxLodCount == 3);
  720. unis.m_maxLodDistances[0] = lodDistances[0];
  721. unis.m_maxLodDistances[1] = lodDistances[1];
  722. unis.m_maxLodDistances[2] = kMaxF32;
  723. unis.m_maxLodDistances[3] = kMaxF32;
  724. cmdb.setPushConstants(&unis, sizeof(unis));
  725. cmdb.bindUavBuffer(0, 0, GpuSceneArrays::RenderableBoundingVolumeRt::getSingleton().getBufferOffsetRange());
  726. cmdb.bindUavBuffer(0, 1, GpuSceneArrays::Renderable::getSingleton().getBufferOffsetRange());
  727. cmdb.bindUavBuffer(0, 2, GpuSceneArrays::MeshLod::getSingleton().getBufferOffsetRange());
  728. cmdb.bindUavBuffer(0, 3, GpuSceneArrays::Transform::getSingleton().getBufferOffsetRange());
  729. cmdb.bindUavBuffer(0, 4, instancesBuff);
  730. cmdb.bindUavBuffer(0, 5, indicesBuff);
  731. cmdb.bindUavBuffer(0, 6, m_counterBuffer.get(), 0, sizeof(U32) * 2);
  732. cmdb.bindUavBuffer(0, 7, zeroInstancesDispatchArgsBuff);
  733. const U32 aabbCount = GpuSceneArrays::RenderableBoundingVolumeRt::getSingleton().getElementCount();
  734. dispatchPPCompute(cmdb, 64, 1, aabbCount, 1);
  735. });
  736. }
  737. // Zero remaining instances
  738. {
  739. Array<Char, 64> passName;
  740. snprintf(passName.getBegin(), sizeof(passName), "%s: Zero remaining instances", in.m_passesName.cstr());
  741. ComputeRenderPassDescription& pass = rgraph.newComputeRenderPass(passName.getBegin());
  742. pass.newBufferDependency(out.m_someBufferHandle, BufferUsageBit::kUavComputeWrite);
  743. pass.setWork([this, zeroInstancesDispatchArgsBuff, instancesBuff = out.m_instancesBuffer,
  744. indicesBuff = out.m_renderableIndicesBuffer](RenderPassWorkContext& rgraph) {
  745. CommandBuffer& cmdb = *rgraph.m_commandBuffer;
  746. cmdb.bindShaderProgram(m_zeroRemainingInstancesGrProg.get());
  747. cmdb.bindUavBuffer(0, 0, indicesBuff);
  748. cmdb.bindUavBuffer(0, 1, instancesBuff);
  749. cmdb.dispatchComputeIndirect(zeroInstancesDispatchArgsBuff.m_buffer, zeroInstancesDispatchArgsBuff.m_offset);
  750. });
  751. }
  752. }
  753. } // end namespace anki