GpuVisibility.cpp 36 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932
  1. // Copyright (C) 2009-present, Panagiotis Christopoulos Charitos and contributors.
  2. // All rights reserved.
  3. // Code licensed under the BSD License.
  4. // http://www.anki3d.org/LICENSE
  5. #include <AnKi/Renderer/Utils/GpuVisibility.h>
  6. #include <AnKi/Renderer/Renderer.h>
  7. #include <AnKi/Scene/RenderStateBucket.h>
  8. #include <AnKi/Scene/GpuSceneArray.h>
  9. #include <AnKi/Core/GpuMemory/GpuVisibleTransientMemoryPool.h>
  10. #include <AnKi/Core/GpuMemory/RebarTransientMemoryPool.h>
  11. #include <AnKi/Core/GpuMemory/GpuSceneBuffer.h>
  12. #include <AnKi/Collision/Functions.h>
  13. #include <AnKi/Shaders/Include/GpuVisibilityTypes.h>
  14. #include <AnKi/Core/GpuMemory/UnifiedGeometryBuffer.h>
  15. #include <AnKi/Core/StatsSet.h>
  16. #include <AnKi/Core/CVarSet.h>
  17. namespace anki {
  18. constexpr U32 kMaxVisibleObjects = 30 * 1024;
  19. constexpr U32 kMaxVisiblePrimitives = 40'000'000;
  20. constexpr U32 kMaxVisibleMeshlets = kMaxVisiblePrimitives / kMaxPrimitivesPerMeshlet;
  21. constexpr PtrSize kMaxMeshletMemory = kMaxVisibleMeshlets * sizeof(GpuSceneMeshletInstance);
  22. constexpr U32 kVisibleMaxMeshletGroups = max(kMaxVisibleObjects, (kMaxVisibleMeshlets + kMeshletGroupSize - 1) / kMeshletGroupSize);
  23. constexpr PtrSize kMaxMeshletGroupMemory = kVisibleMaxMeshletGroups * sizeof(GpuSceneMeshletGroupInstance);
  24. static NumericCVar<PtrSize> g_maxMeshletMemoryPerTest(CVarSubsystem::kRenderer, "MaxMeshletMemoryPerTest", kMaxMeshletMemory, 1_KB, 100_MB,
  25. "Max memory that will be allocated per GPU occlusion test for storing meshlets");
  26. static NumericCVar<PtrSize> g_maxMeshletGroupMemoryPerTest(CVarSubsystem::kRenderer, "MaxMeshletGroupMemoryPerTest", kMaxMeshletGroupMemory, 1_KB,
  27. 100_MB,
  28. "Max memory that will be allocated per GPU occlusion test for storing meshlet groups");
  29. static StatCounter g_gpuVisMemoryAllocatedStatVar(StatCategory::kRenderer, "GPU visibility mem",
  30. StatFlag::kBytes | StatFlag::kMainThreadUpdates | StatFlag::kZeroEveryFrame);
  31. static BufferView allocateTransientGpuMem(PtrSize size)
  32. {
  33. BufferView out = {};
  34. if(size)
  35. {
  36. g_gpuVisMemoryAllocatedStatVar.increment(size);
  37. out = GpuVisibleTransientMemoryPool::getSingleton().allocate(size);
  38. }
  39. return out;
  40. }
  41. Error GpuVisibility::init()
  42. {
  43. for(MutatorValue hzb = 0; hzb < 2; ++hzb)
  44. {
  45. for(MutatorValue gatherAabbs = 0; gatherAabbs < 2; ++gatherAabbs)
  46. {
  47. for(MutatorValue genHash = 0; genHash < 2; ++genHash)
  48. {
  49. for(MutatorValue gatherType = 0; gatherType < 3; ++gatherType)
  50. {
  51. ANKI_CHECK(loadShaderProgram("ShaderBinaries/GpuVisibility.ankiprogbin",
  52. {{"HZB_TEST", hzb},
  53. {"DISTANCE_TEST", 0},
  54. {"GATHER_AABBS", gatherAabbs},
  55. {"HASH_VISIBLES", genHash},
  56. {"GATHER_TYPE", gatherType + 1}},
  57. m_prog, m_frustumGrProgs[hzb][gatherAabbs][genHash][gatherType]));
  58. }
  59. }
  60. }
  61. }
  62. for(MutatorValue gatherAabbs = 0; gatherAabbs < 2; ++gatherAabbs)
  63. {
  64. for(MutatorValue genHash = 0; genHash < 2; ++genHash)
  65. {
  66. for(MutatorValue gatherType = 0; gatherType < 3; ++gatherType)
  67. {
  68. ANKI_CHECK(loadShaderProgram("ShaderBinaries/GpuVisibility.ankiprogbin",
  69. {{"HZB_TEST", 0},
  70. {"DISTANCE_TEST", 1},
  71. {"GATHER_AABBS", gatherAabbs},
  72. {"HASH_VISIBLES", genHash},
  73. {"GATHER_TYPE", gatherType + 1}},
  74. m_prog, m_distGrProgs[gatherAabbs][genHash][gatherType]));
  75. }
  76. }
  77. }
  78. for(MutatorValue hzb = 0; hzb < 2; ++hzb)
  79. {
  80. for(MutatorValue passthrough = 0; passthrough < 2; ++passthrough)
  81. {
  82. ANKI_CHECK(loadShaderProgram("ShaderBinaries/GpuVisibilityMeshlet.ankiprogbin", {{"HZB_TEST", hzb}, {"PASSTHROUGH", passthrough}},
  83. m_meshletCullingProg, m_meshletCullingGrProgs[hzb][passthrough]));
  84. }
  85. }
  86. return Error::kNone;
  87. }
  88. void GpuVisibility::computeGpuVisibilityMemoryRequirements(RenderingTechnique t, MemoryRequirements& total, WeakArray<MemoryRequirements> perBucket)
  89. {
  90. ANKI_ASSERT(perBucket.getSize() == RenderStateBucketContainer::getSingleton().getBucketCount(t));
  91. U32 totalMeshletCount = 0;
  92. U32 totalMeshletGroupCount = 0;
  93. U32 totalRenderableCount = 0;
  94. RenderStateBucketContainer::getSingleton().iterateBuckets(t, [&](const RenderStateInfo&, U32 userCount, U32 meshletGroupCount, U32 meshletCount) {
  95. if(meshletCount)
  96. {
  97. totalMeshletCount += meshletCount;
  98. totalMeshletGroupCount += meshletGroupCount;
  99. }
  100. else
  101. {
  102. totalRenderableCount += userCount;
  103. }
  104. });
  105. const U32 maxVisibleMeshlets = min(U32(g_maxMeshletMemoryPerTest.get() / sizeof(GpuSceneMeshletInstance)), totalMeshletCount);
  106. const U32 maxVisibleMeshletGroups = min(U32(g_maxMeshletGroupMemoryPerTest.get() / sizeof(GpuSceneMeshletGroupInstance)), totalMeshletGroupCount);
  107. const U32 maxVisibleRenderables = min(kMaxVisibleObjects, totalRenderableCount);
  108. total = {};
  109. U32 bucketCount = 0;
  110. RenderStateBucketContainer::getSingleton().iterateBuckets(t, [&](const RenderStateInfo&, U32 userCount, U32 meshletGroupCount, U32 meshletCount) {
  111. MemoryRequirements& bucket = perBucket[bucketCount++];
  112. // Use U64 cause some expressions are overflowing
  113. if(meshletCount)
  114. {
  115. ANKI_ASSERT(meshletGroupCount > 0);
  116. ANKI_ASSERT(totalMeshletCount > 0);
  117. bucket.m_meshletInstanceCount = max(1u, U32(U64(meshletCount) * maxVisibleMeshlets / totalMeshletCount));
  118. ANKI_ASSERT(totalMeshletGroupCount > 0);
  119. bucket.m_meshletGroupInstanceCount = max(1u, U32(U64(meshletGroupCount) * maxVisibleMeshletGroups / totalMeshletGroupCount));
  120. }
  121. else if(userCount > 0)
  122. {
  123. ANKI_ASSERT(totalRenderableCount > 0);
  124. bucket.m_renderableInstanceCount = max(1u, U32(U64(userCount) * maxVisibleRenderables / totalRenderableCount));
  125. }
  126. total.m_meshletInstanceCount += bucket.m_meshletInstanceCount;
  127. total.m_meshletGroupInstanceCount += bucket.m_meshletGroupInstanceCount;
  128. total.m_renderableInstanceCount += bucket.m_renderableInstanceCount;
  129. });
  130. }
  131. void GpuVisibility::populateRenderGraphInternal(Bool distanceBased, BaseGpuVisibilityInput& in, GpuVisibilityOutput& out)
  132. {
  133. ANKI_ASSERT(in.m_lodReferencePoint.x() != kMaxF32);
  134. if(RenderStateBucketContainer::getSingleton().getBucketsActiveUserCount(in.m_technique) == 0) [[unlikely]]
  135. {
  136. // Early exit
  137. in = {};
  138. return;
  139. }
  140. RenderGraphBuilder& rgraph = *in.m_rgraph;
  141. class DistanceTestData
  142. {
  143. public:
  144. Vec3 m_pointOfTest;
  145. F32 m_testRadius;
  146. };
  147. class FrustumTestData
  148. {
  149. public:
  150. RenderTargetHandle m_hzbRt;
  151. Mat4 m_viewProjMat;
  152. UVec2 m_finalRenderTargetSize;
  153. };
  154. FrustumTestData* frustumTestData = nullptr;
  155. DistanceTestData* distTestData = nullptr;
  156. if(distanceBased)
  157. {
  158. distTestData = newInstance<DistanceTestData>(getRenderer().getFrameMemoryPool());
  159. const DistanceGpuVisibilityInput& din = static_cast<DistanceGpuVisibilityInput&>(in);
  160. distTestData->m_pointOfTest = din.m_pointOfTest;
  161. distTestData->m_testRadius = din.m_testRadius;
  162. }
  163. else
  164. {
  165. frustumTestData = newInstance<FrustumTestData>(getRenderer().getFrameMemoryPool());
  166. const FrustumGpuVisibilityInput& fin = static_cast<FrustumGpuVisibilityInput&>(in);
  167. frustumTestData->m_viewProjMat = fin.m_viewProjectionMatrix;
  168. frustumTestData->m_finalRenderTargetSize = fin.m_viewportSize;
  169. }
  170. // Allocate memory
  171. const Bool firstCallInFrame = m_runCtx.m_frameIdx != getRenderer().getFrameCount();
  172. if(firstCallInFrame)
  173. {
  174. // First call in frame. Init stuff
  175. m_runCtx.m_frameIdx = getRenderer().getFrameCount();
  176. m_runCtx.m_populateRenderGraphCallCount = 0;
  177. m_runCtx.m_populateRenderGraphMeshletRenderingCallCount = 0;
  178. // Calc memory requirements
  179. MemoryRequirements maxTotalMemReq;
  180. WeakArray<MemoryRequirements> bucketsMemReqs;
  181. for(RenderingTechnique t : EnumBitsIterable<RenderingTechnique, RenderingTechniqueBit>(RenderingTechniqueBit::kAllRaster))
  182. {
  183. const U32 tBucketCount = RenderStateBucketContainer::getSingleton().getBucketCount(t);
  184. if(tBucketCount == 0)
  185. {
  186. continue;
  187. }
  188. newArray<MemoryRequirements>(getRenderer().getFrameMemoryPool(), tBucketCount, bucketsMemReqs);
  189. computeGpuVisibilityMemoryRequirements(t, m_runCtx.m_totalMemRequirements[t], bucketsMemReqs);
  190. maxTotalMemReq = maxTotalMemReq.max(m_runCtx.m_totalMemRequirements[t]);
  191. newArray<InstanceRange>(getRenderer().getFrameMemoryPool(), tBucketCount, m_runCtx.m_renderableInstanceRanges[t]);
  192. newArray<InstanceRange>(getRenderer().getFrameMemoryPool(), tBucketCount, m_runCtx.m_meshletGroupInstanceRanges[t]);
  193. newArray<InstanceRange>(getRenderer().getFrameMemoryPool(), tBucketCount, m_runCtx.m_meshletInstanceRanges[t]);
  194. U32 renderablesFirstInstance = 0, groupsFirstInstance = 0, meshletsFirstInstance = 0;
  195. for(U32 i = 0; i < tBucketCount; ++i)
  196. {
  197. m_runCtx.m_renderableInstanceRanges[t][i].m_firstInstance = renderablesFirstInstance;
  198. m_runCtx.m_renderableInstanceRanges[t][i].m_instanceCount = bucketsMemReqs[i].m_renderableInstanceCount;
  199. m_runCtx.m_meshletGroupInstanceRanges[t][i].m_firstInstance = groupsFirstInstance;
  200. m_runCtx.m_meshletGroupInstanceRanges[t][i].m_instanceCount = bucketsMemReqs[i].m_meshletGroupInstanceCount;
  201. m_runCtx.m_meshletInstanceRanges[t][i].m_firstInstance = meshletsFirstInstance;
  202. m_runCtx.m_meshletInstanceRanges[t][i].m_instanceCount = bucketsMemReqs[i].m_meshletInstanceCount;
  203. renderablesFirstInstance += bucketsMemReqs[i].m_renderableInstanceCount;
  204. groupsFirstInstance += bucketsMemReqs[i].m_meshletGroupInstanceCount;
  205. meshletsFirstInstance += bucketsMemReqs[i].m_meshletInstanceCount;
  206. }
  207. }
  208. // Allocate persistent memory
  209. for(PersistentMemory& mem : m_runCtx.m_persistentMem)
  210. {
  211. mem = {};
  212. mem.m_drawIndexedIndirectArgsBuffer = allocateTransientGpuMem(maxTotalMemReq.m_renderableInstanceCount * sizeof(DrawIndexedIndirectArgs));
  213. mem.m_renderableInstancesBuffer = allocateTransientGpuMem(maxTotalMemReq.m_renderableInstanceCount * sizeof(GpuSceneRenderableInstance));
  214. mem.m_meshletGroupsInstancesBuffer =
  215. allocateTransientGpuMem(maxTotalMemReq.m_meshletGroupInstanceCount * sizeof(GpuSceneMeshletGroupInstance));
  216. mem.m_bufferDepedency = rgraph.importBuffer((mem.m_drawIndexedIndirectArgsBuffer.isValid()) ? mem.m_drawIndexedIndirectArgsBuffer
  217. : mem.m_meshletGroupsInstancesBuffer,
  218. BufferUsageBit::kNone);
  219. }
  220. if(getRenderer().runSoftwareMeshletRendering())
  221. {
  222. // Because someone will need it later
  223. for(PersistentMemoryMeshletRendering& mem : m_runCtx.m_persistentMeshletRenderingMem)
  224. {
  225. mem = {};
  226. mem.m_meshletInstancesBuffer = allocateTransientGpuMem(maxTotalMemReq.m_meshletInstanceCount * sizeof(GpuSceneMeshletInstance));
  227. mem.m_bufferDepedency = rgraph.importBuffer(mem.m_meshletInstancesBuffer, BufferUsageBit::kNone);
  228. }
  229. }
  230. }
  231. const U32 bucketCount = RenderStateBucketContainer::getSingleton().getBucketCount(in.m_technique);
  232. const MemoryRequirements& req = m_runCtx.m_totalMemRequirements[in.m_technique];
  233. const PersistentMemory& mem = m_runCtx.m_persistentMem[m_runCtx.m_populateRenderGraphCallCount++ % m_runCtx.m_persistentMem.getSize()];
  234. out.m_legacy.m_drawIndexedIndirectArgsBuffer =
  235. (req.m_renderableInstanceCount)
  236. ? BufferView(mem.m_drawIndexedIndirectArgsBuffer).setRange(req.m_renderableInstanceCount * sizeof(DrawIndexedIndirectArgs))
  237. : BufferView();
  238. out.m_legacy.m_renderableInstancesBuffer =
  239. (req.m_renderableInstanceCount)
  240. ? BufferView(mem.m_renderableInstancesBuffer).setRange(req.m_renderableInstanceCount * sizeof(GpuSceneRenderableInstance))
  241. : BufferView();
  242. out.m_legacy.m_mdiDrawCountsBuffer = allocateTransientGpuMem(sizeof(U32) * bucketCount);
  243. out.m_mesh.m_meshletGroupInstancesBuffer =
  244. (req.m_meshletGroupInstanceCount)
  245. ? BufferView(mem.m_meshletGroupsInstancesBuffer).setRange(req.m_meshletGroupInstanceCount * sizeof(GpuSceneMeshletGroupInstance))
  246. : BufferView();
  247. out.m_mesh.m_taskShaderIndirectArgsBuffer = allocateTransientGpuMem(bucketCount * sizeof(DispatchIndirectArgs));
  248. if(in.m_hashVisibles)
  249. {
  250. out.m_visiblesHashBuffer = allocateTransientGpuMem(sizeof(GpuVisibilityHash));
  251. }
  252. if(in.m_gatherAabbIndices)
  253. {
  254. out.m_visibleAaabbIndicesBuffer =
  255. allocateTransientGpuMem((RenderStateBucketContainer::getSingleton().getBucketsActiveUserCount(in.m_technique) + 1) * sizeof(U32));
  256. }
  257. // Set instance sub-ranges
  258. out.m_legacy.m_bucketRenderableInstanceRanges = m_runCtx.m_renderableInstanceRanges[in.m_technique];
  259. out.m_mesh.m_bucketMeshletGroupInstanceRanges = m_runCtx.m_meshletGroupInstanceRanges[in.m_technique];
  260. // Zero some stuff
  261. const BufferHandle zeroStuffDependency = rgraph.importBuffer(out.m_legacy.m_mdiDrawCountsBuffer, BufferUsageBit::kNone);
  262. {
  263. Array<Char, 128> passName;
  264. snprintf(passName.getBegin(), passName.getSizeInBytes(), "GPU vis zero: %s", in.m_passesName.cstr());
  265. NonGraphicsRenderPass& pass = rgraph.newNonGraphicsRenderPass(passName.getBegin());
  266. pass.newBufferDependency(zeroStuffDependency, BufferUsageBit::kTransferDestination);
  267. pass.setWork([out](RenderPassWorkContext& rpass) {
  268. CommandBuffer& cmdb = *rpass.m_commandBuffer;
  269. cmdb.pushDebugMarker("MDI counts", Vec3(1.0f, 1.0f, 1.0f));
  270. cmdb.fillBuffer(out.m_legacy.m_mdiDrawCountsBuffer, 0);
  271. cmdb.popDebugMarker();
  272. if(out.m_mesh.m_taskShaderIndirectArgsBuffer.isValid())
  273. {
  274. cmdb.pushDebugMarker("Task shader indirect args", Vec3(1.0f, 1.0f, 1.0f));
  275. cmdb.fillBuffer(out.m_mesh.m_taskShaderIndirectArgsBuffer, 0);
  276. cmdb.popDebugMarker();
  277. }
  278. if(out.m_visiblesHashBuffer.isValid())
  279. {
  280. cmdb.pushDebugMarker("Visibles hash", Vec3(1.0f, 1.0f, 1.0f));
  281. cmdb.fillBuffer(out.m_visiblesHashBuffer, 0);
  282. cmdb.popDebugMarker();
  283. }
  284. if(out.m_visibleAaabbIndicesBuffer.isValid())
  285. {
  286. cmdb.pushDebugMarker("Visible AABB indices", Vec3(1.0f, 1.0f, 1.0f));
  287. cmdb.fillBuffer(BufferView(out.m_visibleAaabbIndicesBuffer).setRange(sizeof(U32)), 0);
  288. cmdb.popDebugMarker();
  289. }
  290. });
  291. }
  292. // Set the out dependency. Use one of the big buffers.
  293. out.m_dependency = mem.m_bufferDepedency;
  294. // Create the renderpass
  295. NonGraphicsRenderPass& pass = rgraph.newNonGraphicsRenderPass(generateTempPassName("GPU vis: %s", in.m_passesName.cstr()));
  296. pass.newBufferDependency(getRenderer().getGpuSceneBufferHandle(), BufferUsageBit::kStorageComputeRead);
  297. pass.newBufferDependency(zeroStuffDependency, BufferUsageBit::kStorageComputeWrite);
  298. pass.newBufferDependency(out.m_dependency, BufferUsageBit::kStorageComputeWrite);
  299. if(!distanceBased && static_cast<FrustumGpuVisibilityInput&>(in).m_hzbRt)
  300. {
  301. frustumTestData->m_hzbRt = *static_cast<FrustumGpuVisibilityInput&>(in).m_hzbRt;
  302. pass.newTextureDependency(frustumTestData->m_hzbRt, TextureUsageBit::kSampledCompute);
  303. }
  304. pass.setWork([this, frustumTestData, distTestData, lodReferencePoint = in.m_lodReferencePoint, lodDistances = in.m_lodDistances,
  305. technique = in.m_technique, out](RenderPassWorkContext& rpass) {
  306. CommandBuffer& cmdb = *rpass.m_commandBuffer;
  307. const Bool gatherAabbIndices = out.m_visibleAaabbIndicesBuffer.isValid();
  308. const Bool genHash = out.m_visiblesHashBuffer.isValid();
  309. U32 gatherType = 0;
  310. if(out.m_mesh.m_meshletGroupInstancesBuffer.isValid())
  311. {
  312. gatherType |= 2u;
  313. }
  314. if(out.m_legacy.m_renderableInstancesBuffer.isValid())
  315. {
  316. gatherType |= 1u;
  317. }
  318. ANKI_ASSERT(gatherType != 0);
  319. if(frustumTestData)
  320. {
  321. cmdb.bindShaderProgram(m_frustumGrProgs[frustumTestData->m_hzbRt.isValid()][gatherAabbIndices][genHash][gatherType - 1u].get());
  322. }
  323. else
  324. {
  325. cmdb.bindShaderProgram(m_distGrProgs[gatherAabbIndices][genHash][gatherType - 1u].get());
  326. }
  327. BufferView aabbsBuffer;
  328. U32 aabbCount = 0;
  329. switch(technique)
  330. {
  331. case RenderingTechnique::kGBuffer:
  332. aabbsBuffer = GpuSceneArrays::RenderableBoundingVolumeGBuffer::getSingleton().getBufferView();
  333. aabbCount = GpuSceneArrays::RenderableBoundingVolumeGBuffer::getSingleton().getElementCount();
  334. break;
  335. case RenderingTechnique::kDepth:
  336. aabbsBuffer = GpuSceneArrays::RenderableBoundingVolumeDepth::getSingleton().getBufferView();
  337. aabbCount = GpuSceneArrays::RenderableBoundingVolumeDepth::getSingleton().getElementCount();
  338. break;
  339. case RenderingTechnique::kForward:
  340. aabbsBuffer = GpuSceneArrays::RenderableBoundingVolumeForward::getSingleton().getBufferView();
  341. aabbCount = GpuSceneArrays::RenderableBoundingVolumeForward::getSingleton().getElementCount();
  342. break;
  343. default:
  344. ANKI_ASSERT(0);
  345. }
  346. cmdb.bindStorageBuffer(ANKI_REG(t0), aabbsBuffer);
  347. cmdb.bindStorageBuffer(ANKI_REG(t1), GpuSceneArrays::Renderable::getSingleton().getBufferView());
  348. cmdb.bindStorageBuffer(ANKI_REG(t2), GpuSceneArrays::MeshLod::getSingleton().getBufferView());
  349. cmdb.bindStorageBuffer(ANKI_REG(t3), GpuSceneArrays::Transform::getSingleton().getBufferView());
  350. cmdb.bindStorageBuffer(ANKI_REG(t4), GpuSceneArrays::ParticleEmitter::getSingleton().getBufferViewSafe());
  351. if(gatherType & 1u)
  352. {
  353. cmdb.bindStorageBuffer(ANKI_REG(u0), out.m_legacy.m_renderableInstancesBuffer);
  354. cmdb.bindStorageBuffer(ANKI_REG(u1), out.m_legacy.m_drawIndexedIndirectArgsBuffer);
  355. cmdb.bindStorageBuffer(ANKI_REG(u2), out.m_legacy.m_drawIndexedIndirectArgsBuffer);
  356. cmdb.bindStorageBuffer(ANKI_REG(u3), out.m_legacy.m_mdiDrawCountsBuffer);
  357. }
  358. if(gatherType & 2u)
  359. {
  360. cmdb.bindStorageBuffer(ANKI_REG(u4), out.m_mesh.m_taskShaderIndirectArgsBuffer);
  361. cmdb.bindStorageBuffer(ANKI_REG(u5), out.m_mesh.m_meshletGroupInstancesBuffer);
  362. }
  363. const U32 bucketCount = RenderStateBucketContainer::getSingleton().getBucketCount(technique);
  364. UVec2* instanceRanges = allocateAndBindStorageBuffer<UVec2>(cmdb, ANKI_REG(t5), bucketCount);
  365. for(U32 i = 0; i < bucketCount; ++i)
  366. {
  367. const Bool legacyBucket = m_runCtx.m_renderableInstanceRanges[technique][i].m_instanceCount > 0;
  368. if(legacyBucket)
  369. {
  370. instanceRanges[i].x() = m_runCtx.m_renderableInstanceRanges[technique][i].m_firstInstance;
  371. instanceRanges[i].y() = m_runCtx.m_renderableInstanceRanges[technique][i].m_instanceCount;
  372. }
  373. else
  374. {
  375. instanceRanges[i].x() = m_runCtx.m_meshletGroupInstanceRanges[technique][i].m_firstInstance;
  376. instanceRanges[i].y() = m_runCtx.m_meshletGroupInstanceRanges[technique][i].m_instanceCount;
  377. }
  378. }
  379. if(frustumTestData)
  380. {
  381. FrustumGpuVisibilityUniforms* unis = allocateAndBindConstants<FrustumGpuVisibilityUniforms>(cmdb, ANKI_REG(b0));
  382. Array<Plane, 6> planes;
  383. extractClipPlanes(frustumTestData->m_viewProjMat, planes);
  384. for(U32 i = 0; i < 6; ++i)
  385. {
  386. unis->m_clipPlanes[i] = Vec4(planes[i].getNormal().xyz(), planes[i].getOffset());
  387. }
  388. ANKI_ASSERT(kMaxLodCount == 3);
  389. unis->m_maxLodDistances[0] = lodDistances[0];
  390. unis->m_maxLodDistances[1] = lodDistances[1];
  391. unis->m_maxLodDistances[2] = kMaxF32;
  392. unis->m_maxLodDistances[3] = kMaxF32;
  393. unis->m_lodReferencePoint = lodReferencePoint;
  394. unis->m_viewProjectionMat = frustumTestData->m_viewProjMat;
  395. unis->m_finalRenderTargetSize = Vec2(frustumTestData->m_finalRenderTargetSize);
  396. if(frustumTestData->m_hzbRt.isValid())
  397. {
  398. rpass.bindTexture(ANKI_REG(t6), frustumTestData->m_hzbRt);
  399. cmdb.bindSampler(ANKI_REG(s0), getRenderer().getSamplers().m_nearestNearestClamp.get());
  400. }
  401. }
  402. else
  403. {
  404. DistanceGpuVisibilityUniforms unis;
  405. unis.m_pointOfTest = distTestData->m_pointOfTest;
  406. unis.m_testRadius = distTestData->m_testRadius;
  407. unis.m_maxLodDistances[0] = lodDistances[0];
  408. unis.m_maxLodDistances[1] = lodDistances[1];
  409. unis.m_maxLodDistances[2] = kMaxF32;
  410. unis.m_maxLodDistances[3] = kMaxF32;
  411. unis.m_lodReferencePoint = lodReferencePoint;
  412. cmdb.setPushConstants(&unis, sizeof(unis));
  413. }
  414. if(gatherAabbIndices)
  415. {
  416. cmdb.bindStorageBuffer(ANKI_REG(u6), out.m_visibleAaabbIndicesBuffer);
  417. }
  418. if(genHash)
  419. {
  420. cmdb.bindStorageBuffer(ANKI_REG(u7), out.m_visiblesHashBuffer);
  421. }
  422. dispatchPPCompute(cmdb, 64, 1, aabbCount, 1);
  423. });
  424. }
  425. void GpuVisibility::populateRenderGraphMeshletInternal(Bool passthrough, BaseGpuMeshletVisibilityInput& in, GpuMeshletVisibilityOutput& out)
  426. {
  427. RenderGraphBuilder& rgraph = *in.m_rgraph;
  428. if(!in.m_taskShaderIndirectArgsBuffer.isValid()) [[unlikely]]
  429. {
  430. // Early exit
  431. return;
  432. }
  433. class NonPassthrough
  434. {
  435. public:
  436. Mat4 m_viewProjectionMatrix;
  437. Mat3x4 m_cameraTransform;
  438. UVec2 m_viewportSize;
  439. RenderTargetHandle m_hzbRt;
  440. }* nonPassthroughData = nullptr;
  441. if(!passthrough)
  442. {
  443. GpuMeshletVisibilityInput& nonPassthroughIn = static_cast<GpuMeshletVisibilityInput&>(in);
  444. nonPassthroughData = newInstance<NonPassthrough>(getRenderer().getFrameMemoryPool());
  445. nonPassthroughData->m_viewProjectionMatrix = nonPassthroughIn.m_viewProjectionMatrix;
  446. nonPassthroughData->m_cameraTransform = nonPassthroughIn.m_cameraTransform;
  447. nonPassthroughData->m_viewportSize = nonPassthroughIn.m_viewportSize;
  448. nonPassthroughData->m_hzbRt = nonPassthroughIn.m_hzbRt;
  449. }
  450. // Allocate memory
  451. const U32 bucketCount = m_runCtx.m_renderableInstanceRanges[in.m_technique].getSize();
  452. ANKI_ASSERT(RenderStateBucketContainer::getSingleton().getBucketCount(in.m_technique) == bucketCount);
  453. const PersistentMemoryMeshletRendering& mem = m_runCtx.m_persistentMeshletRenderingMem[m_runCtx.m_populateRenderGraphMeshletRenderingCallCount++
  454. % m_runCtx.m_persistentMeshletRenderingMem.getSize()];
  455. out.m_drawIndirectArgsBuffer = allocateTransientGpuMem(sizeof(DrawIndirectArgs) * bucketCount);
  456. out.m_meshletInstancesBuffer =
  457. BufferView(mem.m_meshletInstancesBuffer)
  458. .setRange(m_runCtx.m_totalMemRequirements[in.m_technique].m_meshletInstanceCount * sizeof(GpuSceneMeshletInstance));
  459. out.m_bucketMeshletInstanceRanges = m_runCtx.m_meshletInstanceRanges[in.m_technique];
  460. // Zero some stuff
  461. const BufferHandle indirectArgsDep = rgraph.importBuffer(out.m_drawIndirectArgsBuffer, BufferUsageBit::kNone);
  462. {
  463. NonGraphicsRenderPass& pass = rgraph.newNonGraphicsRenderPass(generateTempPassName("GPU meshlet vis zero: %s", in.m_passesName.cstr()));
  464. pass.newBufferDependency(indirectArgsDep, BufferUsageBit::kTransferDestination);
  465. pass.setWork([drawIndirectArgsBuffer = out.m_drawIndirectArgsBuffer](RenderPassWorkContext& rpass) {
  466. CommandBuffer& cmdb = *rpass.m_commandBuffer;
  467. cmdb.pushDebugMarker("Draw indirect args", Vec3(1.0f, 1.0f, 1.0f));
  468. cmdb.fillBuffer(drawIndirectArgsBuffer, 0);
  469. cmdb.popDebugMarker();
  470. });
  471. }
  472. out.m_dependency = mem.m_bufferDepedency;
  473. // Create the renderpass
  474. NonGraphicsRenderPass& pass = rgraph.newNonGraphicsRenderPass(generateTempPassName("GPU meshlet vis: %s", in.m_passesName.cstr()));
  475. pass.newBufferDependency(indirectArgsDep, BufferUsageBit::kStorageComputeWrite);
  476. pass.newBufferDependency(mem.m_bufferDepedency, BufferUsageBit::kStorageComputeWrite);
  477. pass.newBufferDependency(in.m_dependency, BufferUsageBit::kIndirectCompute);
  478. pass.setWork([this, nonPassthroughData, computeIndirectArgs = in.m_taskShaderIndirectArgsBuffer, out,
  479. meshletGroupInstancesBuffer = in.m_meshletGroupInstancesBuffer,
  480. bucketMeshletGroupInstanceRanges = in.m_bucketMeshletGroupInstanceRanges](RenderPassWorkContext& rpass) {
  481. CommandBuffer& cmdb = *rpass.m_commandBuffer;
  482. const U32 bucketCount = out.m_bucketMeshletInstanceRanges.getSize();
  483. for(U32 i = 0; i < bucketCount; ++i)
  484. {
  485. if(out.m_bucketMeshletInstanceRanges[i].m_instanceCount == 0)
  486. {
  487. continue;
  488. }
  489. const Bool hasHzb = (nonPassthroughData) ? nonPassthroughData->m_hzbRt.isValid() : false;
  490. const Bool isPassthrough = (nonPassthroughData == nullptr);
  491. cmdb.bindShaderProgram(m_meshletCullingGrProgs[hasHzb][isPassthrough].get());
  492. cmdb.bindStorageBuffer(ANKI_REG(t0), meshletGroupInstancesBuffer);
  493. cmdb.bindStorageBuffer(ANKI_REG(t1), GpuSceneArrays::Renderable::getSingleton().getBufferView());
  494. cmdb.bindStorageBuffer(ANKI_REG(t2), GpuSceneArrays::MeshLod::getSingleton().getBufferView());
  495. cmdb.bindStorageBuffer(ANKI_REG(t3), GpuSceneArrays::Transform::getSingleton().getBufferView());
  496. cmdb.bindStorageBuffer(ANKI_REG(t4), UnifiedGeometryBuffer::getSingleton().getBufferView());
  497. cmdb.bindStorageBuffer(ANKI_REG(u0), out.m_drawIndirectArgsBuffer);
  498. cmdb.bindStorageBuffer(ANKI_REG(u1), out.m_meshletInstancesBuffer);
  499. if(hasHzb)
  500. {
  501. rpass.bindTexture(ANKI_REG(t5), nonPassthroughData->m_hzbRt);
  502. cmdb.bindSampler(ANKI_REG(s0), getRenderer().getSamplers().m_nearestNearestClamp.get());
  503. }
  504. class Consts
  505. {
  506. public:
  507. Mat4 m_viewProjectionMatrix;
  508. Vec3 m_cameraPos;
  509. U32 m_firstDrawArg;
  510. Vec2 m_viewportSizef;
  511. U32 m_firstMeshletGroup;
  512. U32 m_firstMeshlet;
  513. U32 m_meshletCount;
  514. U32 m_padding1;
  515. U32 m_padding2;
  516. U32 m_padding3;
  517. } consts;
  518. consts.m_viewProjectionMatrix = (!isPassthrough) ? nonPassthroughData->m_viewProjectionMatrix : Mat4::getIdentity();
  519. consts.m_cameraPos = (!isPassthrough) ? nonPassthroughData->m_cameraTransform.getTranslationPart().xyz() : Vec3(0.0f);
  520. consts.m_firstDrawArg = i;
  521. consts.m_viewportSizef = (!isPassthrough) ? Vec2(nonPassthroughData->m_viewportSize) : Vec2(0.0f);
  522. consts.m_firstMeshletGroup = bucketMeshletGroupInstanceRanges[i].getFirstInstance();
  523. consts.m_firstMeshlet = out.m_bucketMeshletInstanceRanges[i].getFirstInstance();
  524. consts.m_meshletCount = out.m_bucketMeshletInstanceRanges[i].getInstanceCount();
  525. cmdb.setPushConstants(&consts, sizeof(consts));
  526. cmdb.dispatchComputeIndirect(
  527. BufferView(computeIndirectArgs).incrementOffset(i * sizeof(DispatchIndirectArgs)).setRange(sizeof(DispatchIndirectArgs)));
  528. };
  529. });
  530. }
  531. Error GpuVisibilityNonRenderables::init()
  532. {
  533. ANKI_CHECK(ResourceManager::getSingleton().loadResource("ShaderBinaries/GpuVisibilityNonRenderables.ankiprogbin", m_prog));
  534. for(MutatorValue hzb = 0; hzb < 2; ++hzb)
  535. {
  536. for(GpuSceneNonRenderableObjectType type : EnumIterable<GpuSceneNonRenderableObjectType>())
  537. {
  538. for(MutatorValue cpuFeedback = 0; cpuFeedback < 2; ++cpuFeedback)
  539. {
  540. ANKI_CHECK(loadShaderProgram("ShaderBinaries/GpuVisibilityNonRenderables.ankiprogbin",
  541. {{"HZB_TEST", hzb}, {"OBJECT_TYPE", MutatorValue(type)}, {"CPU_FEEDBACK", cpuFeedback}}, m_prog,
  542. m_grProgs[hzb][type][cpuFeedback]));
  543. }
  544. }
  545. }
  546. return Error::kNone;
  547. }
  548. void GpuVisibilityNonRenderables::populateRenderGraph(GpuVisibilityNonRenderablesInput& in, GpuVisibilityNonRenderablesOutput& out)
  549. {
  550. ANKI_ASSERT(in.m_viewProjectionMat != Mat4::getZero());
  551. RenderGraphBuilder& rgraph = *in.m_rgraph;
  552. U32 objCount = 0;
  553. switch(in.m_objectType)
  554. {
  555. case GpuSceneNonRenderableObjectType::kLight:
  556. objCount = GpuSceneArrays::Light::getSingleton().getElementCount();
  557. break;
  558. case GpuSceneNonRenderableObjectType::kDecal:
  559. objCount = GpuSceneArrays::Decal::getSingleton().getElementCount();
  560. break;
  561. case GpuSceneNonRenderableObjectType::kFogDensityVolume:
  562. objCount = GpuSceneArrays::FogDensityVolume::getSingleton().getElementCount();
  563. break;
  564. case GpuSceneNonRenderableObjectType::kGlobalIlluminationProbe:
  565. objCount = GpuSceneArrays::GlobalIlluminationProbe::getSingleton().getElementCount();
  566. break;
  567. case GpuSceneNonRenderableObjectType::kReflectionProbe:
  568. objCount = GpuSceneArrays::ReflectionProbe::getSingleton().getElementCount();
  569. break;
  570. default:
  571. ANKI_ASSERT(0);
  572. }
  573. if(objCount == 0)
  574. {
  575. U32* count;
  576. out.m_visiblesBuffer = RebarTransientMemoryPool::getSingleton().allocateFrame(sizeof(U32), count);
  577. *count = 0;
  578. out.m_visiblesBufferHandle = rgraph.importBuffer(out.m_visiblesBuffer, BufferUsageBit::kNone);
  579. return;
  580. }
  581. if(in.m_cpuFeedbackBuffer.isValid())
  582. {
  583. ANKI_ASSERT(in.m_cpuFeedbackBuffer.getRange() == sizeof(U32) * (objCount * 2 + 1));
  584. }
  585. const Bool firstRunInFrame = m_lastFrameIdx != getRenderer().getFrameCount();
  586. if(firstRunInFrame)
  587. {
  588. // 1st run in this frame, do some bookkeeping
  589. m_lastFrameIdx = getRenderer().getFrameCount();
  590. m_counterBufferOffset = 0;
  591. m_counterBufferZeroingHandle = {};
  592. }
  593. constexpr U32 kCountersPerDispatch = 3; // 1 for the threadgroup, 1 for the visbile object count and 1 for objects with feedback
  594. const U32 counterBufferElementSize = getAlignedRoundUp(GrManager::getSingleton().getDeviceCapabilities().m_storageBufferBindOffsetAlignment,
  595. U32(kCountersPerDispatch * sizeof(U32)));
  596. if(!m_counterBuffer.isCreated() || m_counterBufferOffset + counterBufferElementSize > m_counterBuffer->getSize()) [[unlikely]]
  597. {
  598. // Counter buffer not created or not big enough, create a new one
  599. BufferInitInfo buffInit("GpuVisibilityNonRenderablesCounters");
  600. buffInit.m_size = (m_counterBuffer.isCreated()) ? m_counterBuffer->getSize() * 2
  601. : kCountersPerDispatch * counterBufferElementSize * kInitialCounterArraySize;
  602. buffInit.m_usage = BufferUsageBit::kStorageComputeWrite | BufferUsageBit::kStorageComputeRead | BufferUsageBit::kTransferDestination;
  603. m_counterBuffer = GrManager::getSingleton().newBuffer(buffInit);
  604. m_counterBufferZeroingHandle = rgraph.importBuffer(BufferView(m_counterBuffer.get()), buffInit.m_usage);
  605. NonGraphicsRenderPass& pass =
  606. rgraph.newNonGraphicsRenderPass(generateTempPassName("Non-renderables vis: Clear counter buff: %s", in.m_passesName.cstr()));
  607. pass.newBufferDependency(m_counterBufferZeroingHandle, BufferUsageBit::kTransferDestination);
  608. pass.setWork([counterBuffer = m_counterBuffer](RenderPassWorkContext& rgraph) {
  609. rgraph.m_commandBuffer->fillBuffer(BufferView(counterBuffer.get()), 0);
  610. });
  611. m_counterBufferOffset = 0;
  612. }
  613. else if(!firstRunInFrame)
  614. {
  615. m_counterBufferOffset += counterBufferElementSize;
  616. }
  617. // Allocate memory for the result
  618. out.m_visiblesBuffer = allocateTransientGpuMem((objCount + 1) * sizeof(U32));
  619. out.m_visiblesBufferHandle = rgraph.importBuffer(out.m_visiblesBuffer, BufferUsageBit::kNone);
  620. // Create the renderpass
  621. NonGraphicsRenderPass& pass = rgraph.newNonGraphicsRenderPass(generateTempPassName("Non-renderables vis: %s", in.m_passesName.cstr()));
  622. pass.newBufferDependency(getRenderer().getGpuSceneBufferHandle(), BufferUsageBit::kStorageComputeRead);
  623. pass.newBufferDependency(out.m_visiblesBufferHandle, BufferUsageBit::kStorageComputeWrite);
  624. if(in.m_hzbRt)
  625. {
  626. pass.newTextureDependency(*in.m_hzbRt, TextureUsageBit::kSampledCompute);
  627. }
  628. if(m_counterBufferZeroingHandle.isValid()) [[unlikely]]
  629. {
  630. pass.newBufferDependency(m_counterBufferZeroingHandle, BufferUsageBit::kStorageComputeRead | BufferUsageBit::kStorageComputeWrite);
  631. }
  632. pass.setWork([this, objType = in.m_objectType, feedbackBuffer = in.m_cpuFeedbackBuffer, viewProjectionMat = in.m_viewProjectionMat,
  633. visibleIndicesBuffHandle = out.m_visiblesBufferHandle, counterBuffer = m_counterBuffer, counterBufferOffset = m_counterBufferOffset,
  634. objCount](RenderPassWorkContext& rgraph) {
  635. CommandBuffer& cmdb = *rgraph.m_commandBuffer;
  636. const Bool needsFeedback = feedbackBuffer.isValid();
  637. cmdb.bindShaderProgram(m_grProgs[0][objType][needsFeedback].get());
  638. BufferView objBuffer;
  639. switch(objType)
  640. {
  641. case GpuSceneNonRenderableObjectType::kLight:
  642. objBuffer = GpuSceneArrays::Light::getSingleton().getBufferView();
  643. break;
  644. case GpuSceneNonRenderableObjectType::kDecal:
  645. objBuffer = GpuSceneArrays::Decal::getSingleton().getBufferView();
  646. break;
  647. case GpuSceneNonRenderableObjectType::kFogDensityVolume:
  648. objBuffer = GpuSceneArrays::FogDensityVolume::getSingleton().getBufferView();
  649. break;
  650. case GpuSceneNonRenderableObjectType::kGlobalIlluminationProbe:
  651. objBuffer = GpuSceneArrays::GlobalIlluminationProbe::getSingleton().getBufferView();
  652. break;
  653. case GpuSceneNonRenderableObjectType::kReflectionProbe:
  654. objBuffer = GpuSceneArrays::ReflectionProbe::getSingleton().getBufferView();
  655. break;
  656. default:
  657. ANKI_ASSERT(0);
  658. }
  659. cmdb.bindStorageBuffer(ANKI_REG(t0), objBuffer);
  660. GpuVisibilityNonRenderableUniforms unis;
  661. Array<Plane, 6> planes;
  662. extractClipPlanes(viewProjectionMat, planes);
  663. for(U32 i = 0; i < 6; ++i)
  664. {
  665. unis.m_clipPlanes[i] = Vec4(planes[i].getNormal().xyz(), planes[i].getOffset());
  666. }
  667. cmdb.setPushConstants(&unis, sizeof(unis));
  668. rgraph.bindStorageBuffer(ANKI_REG(u0), visibleIndicesBuffHandle);
  669. cmdb.bindStorageBuffer(ANKI_REG(u1), BufferView(counterBuffer.get(), counterBufferOffset, sizeof(U32) * kCountersPerDispatch));
  670. if(needsFeedback)
  671. {
  672. cmdb.bindStorageBuffer(ANKI_REG(u2), feedbackBuffer);
  673. }
  674. dispatchPPCompute(cmdb, 64, 1, objCount, 1);
  675. });
  676. }
  677. Error GpuVisibilityAccelerationStructures::init()
  678. {
  679. ANKI_CHECK(loadShaderProgram("ShaderBinaries/GpuVisibilityAccelerationStructures.ankiprogbin", m_visibilityProg, m_visibilityGrProg));
  680. ANKI_CHECK(loadShaderProgram("ShaderBinaries/GpuVisibilityAccelerationStructuresZeroRemainingInstances.ankiprogbin", m_zeroRemainingInstancesProg,
  681. m_zeroRemainingInstancesGrProg));
  682. BufferInitInfo inf("GpuVisibilityAccelerationStructuresCounters");
  683. inf.m_size = sizeof(U32) * 2;
  684. inf.m_usage = BufferUsageBit::kStorageComputeWrite | BufferUsageBit::kStorageComputeRead | BufferUsageBit::kTransferDestination;
  685. m_counterBuffer = GrManager::getSingleton().newBuffer(inf);
  686. zeroBuffer(m_counterBuffer.get());
  687. return Error::kNone;
  688. }
  689. void GpuVisibilityAccelerationStructures::pupulateRenderGraph(GpuVisibilityAccelerationStructuresInput& in,
  690. GpuVisibilityAccelerationStructuresOutput& out)
  691. {
  692. in.validate();
  693. RenderGraphBuilder& rgraph = *in.m_rgraph;
  694. #if ANKI_ASSERTIONS_ENABLED
  695. ANKI_ASSERT(m_lastFrameIdx != getRenderer().getFrameCount());
  696. m_lastFrameIdx = getRenderer().getFrameCount();
  697. #endif
  698. // Allocate the transient buffers
  699. const U32 aabbCount = GpuSceneArrays::RenderableBoundingVolumeRt::getSingleton().getElementCount();
  700. out.m_instancesBuffer = allocateTransientGpuMem(aabbCount * sizeof(AccelerationStructureInstance));
  701. out.m_someBufferHandle = rgraph.importBuffer(out.m_instancesBuffer, BufferUsageBit::kStorageComputeWrite);
  702. out.m_renderableIndicesBuffer = allocateTransientGpuMem((aabbCount + 1) * sizeof(U32));
  703. const BufferView zeroInstancesDispatchArgsBuff = allocateTransientGpuMem(sizeof(DispatchIndirectArgs));
  704. // Create vis pass
  705. {
  706. NonGraphicsRenderPass& pass = rgraph.newNonGraphicsRenderPass(generateTempPassName("Accel vis: %s", in.m_passesName.cstr()));
  707. pass.newBufferDependency(getRenderer().getGpuSceneBufferHandle(), BufferUsageBit::kStorageComputeRead);
  708. pass.newBufferDependency(out.m_someBufferHandle, BufferUsageBit::kStorageComputeWrite);
  709. pass.setWork([this, viewProjMat = in.m_viewProjectionMatrix, lodDistances = in.m_lodDistances, pointOfTest = in.m_pointOfTest,
  710. testRadius = in.m_testRadius, instancesBuff = out.m_instancesBuffer, indicesBuff = out.m_renderableIndicesBuffer,
  711. zeroInstancesDispatchArgsBuff](RenderPassWorkContext& rgraph) {
  712. CommandBuffer& cmdb = *rgraph.m_commandBuffer;
  713. cmdb.bindShaderProgram(m_visibilityGrProg.get());
  714. GpuVisibilityAccelerationStructuresUniforms unis;
  715. Array<Plane, 6> planes;
  716. extractClipPlanes(viewProjMat, planes);
  717. for(U32 i = 0; i < 6; ++i)
  718. {
  719. unis.m_clipPlanes[i] = Vec4(planes[i].getNormal().xyz(), planes[i].getOffset());
  720. }
  721. unis.m_pointOfTest = pointOfTest;
  722. unis.m_testRadius = testRadius;
  723. ANKI_ASSERT(kMaxLodCount == 3);
  724. unis.m_maxLodDistances[0] = lodDistances[0];
  725. unis.m_maxLodDistances[1] = lodDistances[1];
  726. unis.m_maxLodDistances[2] = kMaxF32;
  727. unis.m_maxLodDistances[3] = kMaxF32;
  728. cmdb.setPushConstants(&unis, sizeof(unis));
  729. cmdb.bindStorageBuffer(ANKI_REG(t0), GpuSceneArrays::RenderableBoundingVolumeRt::getSingleton().getBufferView());
  730. cmdb.bindStorageBuffer(ANKI_REG(t1), GpuSceneArrays::Renderable::getSingleton().getBufferView());
  731. cmdb.bindStorageBuffer(ANKI_REG(t2), GpuSceneArrays::MeshLod::getSingleton().getBufferView());
  732. cmdb.bindStorageBuffer(ANKI_REG(t3), GpuSceneArrays::Transform::getSingleton().getBufferView());
  733. cmdb.bindStorageBuffer(ANKI_REG(u0), instancesBuff);
  734. cmdb.bindStorageBuffer(ANKI_REG(u1), indicesBuff);
  735. cmdb.bindStorageBuffer(ANKI_REG(u2), BufferView(m_counterBuffer.get(), 0, sizeof(U32) * 2));
  736. cmdb.bindStorageBuffer(ANKI_REG(u3), zeroInstancesDispatchArgsBuff);
  737. const U32 aabbCount = GpuSceneArrays::RenderableBoundingVolumeRt::getSingleton().getElementCount();
  738. dispatchPPCompute(cmdb, 64, 1, aabbCount, 1);
  739. });
  740. }
  741. // Zero remaining instances
  742. {
  743. NonGraphicsRenderPass& pass =
  744. rgraph.newNonGraphicsRenderPass(generateTempPassName("Accel vis zero remaining instances: %s", in.m_passesName.cstr()));
  745. pass.newBufferDependency(out.m_someBufferHandle, BufferUsageBit::kStorageComputeWrite);
  746. pass.setWork([this, zeroInstancesDispatchArgsBuff, instancesBuff = out.m_instancesBuffer,
  747. indicesBuff = out.m_renderableIndicesBuffer](RenderPassWorkContext& rgraph) {
  748. CommandBuffer& cmdb = *rgraph.m_commandBuffer;
  749. cmdb.bindShaderProgram(m_zeroRemainingInstancesGrProg.get());
  750. cmdb.bindStorageBuffer(ANKI_REG(t0), indicesBuff);
  751. cmdb.bindStorageBuffer(ANKI_REG(U0), instancesBuff);
  752. cmdb.dispatchComputeIndirect(zeroInstancesDispatchArgsBuff);
  753. });
  754. }
  755. }
  756. } // end namespace anki