GpuVisibility.cpp 51 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388
  1. // Copyright (C) 2009-present, Panagiotis Christopoulos Charitos and contributors.
  2. // All rights reserved.
  3. // Code licensed under the BSD License.
  4. // http://www.anki3d.org/LICENSE
  5. #include <AnKi/Renderer/Utils/GpuVisibility.h>
  6. #include <AnKi/Renderer/Renderer.h>
  7. #include <AnKi/Scene/RenderStateBucket.h>
  8. #include <AnKi/Scene/GpuSceneArray.h>
  9. #include <AnKi/GpuMemory/GpuVisibleTransientMemoryPool.h>
  10. #include <AnKi/GpuMemory/RebarTransientMemoryPool.h>
  11. #include <AnKi/GpuMemory/GpuSceneBuffer.h>
  12. #include <AnKi/Collision/Functions.h>
  13. #include <AnKi/Shaders/Include/GpuVisibilityTypes.h>
  14. #include <AnKi/GpuMemory/UnifiedGeometryBuffer.h>
  15. #include <AnKi/Core/StatsSet.h>
  16. #include <AnKi/Util/CVarSet.h>
  17. #include <AnKi/Util/Tracer.h>
  18. #include <AnKi/Core/App.h>
  19. namespace anki {
  20. constexpr U32 kMaxVisibleObjects = 30 * 1024;
  21. constexpr U32 kMaxVisiblePrimitives = 40'000'000;
  22. constexpr U32 kMaxVisibleMeshlets = kMaxVisiblePrimitives / kMaxPrimitivesPerMeshlet;
  23. ANKI_SVAR(GpuVisMemoryAllocated, StatCategory::kRenderer, "GPU vis mem", StatFlag::kBytes | StatFlag::kMainThreadUpdates | StatFlag::kZeroEveryFrame)
  24. ANKI_SVAR(MaxGpuVisMemoryAllocated, StatCategory::kRenderer, "GPU vis mem: max ever used/frame", StatFlag::kBytes | StatFlag::kMainThreadUpdates)
  25. class GpuVisLimits
  26. {
  27. public:
  28. U32 m_maxVisibleLegacyRenderables = 0;
  29. U32 m_totalLegacyRenderables = 0;
  30. U32 m_maxVisibleMeshlets = 0;
  31. };
  32. static GpuVisLimits computeLimits(RenderingTechnique t)
  33. {
  34. GpuVisLimits out;
  35. const RenderStateBucketContainer& buckets = RenderStateBucketContainer::getSingleton();
  36. const U32 meshletUserCount = buckets.getBucketsActiveUserCountWithMeshletSupport(t);
  37. ANKI_ASSERT(meshletUserCount == 0 || (g_cvarCoreMeshletRendering || GrManager::getSingleton().getDeviceCapabilities().m_meshShaders));
  38. out.m_totalLegacyRenderables = buckets.getBucketsActiveUserCountWithNoMeshletSupport(t);
  39. out.m_maxVisibleLegacyRenderables = min(out.m_totalLegacyRenderables, kMaxVisibleObjects);
  40. out.m_maxVisibleMeshlets = (meshletUserCount) ? min(kMaxVisibleMeshlets, buckets.getBucketsLod0MeshletCount(t)) : 0;
  41. return out;
  42. }
  43. class GpuVisMemoryStats : public RendererObject, public MakeSingletonSimple<GpuVisMemoryStats>
  44. {
  45. public:
  46. void informAboutAllocation(PtrSize size)
  47. {
  48. if(m_frameIdx != getRenderer().getFrameCount())
  49. {
  50. // First call in the frame, update the stat var
  51. m_frameIdx = getRenderer().getFrameCount();
  52. m_maxMemUsedInFrame = max(m_maxMemUsedInFrame, m_memUsedThisFrame);
  53. m_memUsedThisFrame = 0;
  54. g_svarMaxGpuVisMemoryAllocated.set(m_maxMemUsedInFrame);
  55. }
  56. m_memUsedThisFrame += size;
  57. }
  58. private:
  59. PtrSize m_memUsedThisFrame = 0;
  60. PtrSize m_maxMemUsedInFrame = 0;
  61. U64 m_frameIdx = kMaxU64;
  62. };
  63. template<typename T>
  64. static BufferView allocateStructuredBuffer(U32 count)
  65. {
  66. BufferView out = {};
  67. if(count > 0)
  68. {
  69. g_svarGpuVisMemoryAllocated.increment(sizeof(T) * count);
  70. out = GpuVisibleTransientMemoryPool::getSingleton().allocateStructuredBuffer<T>(count);
  71. GpuVisMemoryStats::getSingleton().informAboutAllocation(sizeof(T) * count);
  72. }
  73. return out;
  74. }
  75. Error GpuVisibility::init()
  76. {
  77. for(MutatorValue hzb = 0; hzb < 2; ++hzb)
  78. {
  79. for(MutatorValue gatherAabbs = 0; gatherAabbs < 2; ++gatherAabbs)
  80. {
  81. for(MutatorValue genHash = 0; genHash < 2; ++genHash)
  82. {
  83. for(MutatorValue gatherMeshlets = 0; gatherMeshlets < 2; ++gatherMeshlets)
  84. {
  85. for(MutatorValue gatherLegacy = 0; gatherLegacy < 2; ++gatherLegacy)
  86. {
  87. if(gatherLegacy == 0 && gatherMeshlets == 0)
  88. {
  89. continue; // Not allowed
  90. }
  91. ANKI_CHECK(loadShaderProgram("ShaderBinaries/GpuVisibilityStage1.ankiprogbin",
  92. {{"HZB_TEST", hzb},
  93. {"DISTANCE_TEST", 0},
  94. {"GATHER_AABBS", gatherAabbs},
  95. {"HASH_VISIBLES", genHash},
  96. {"GATHER_MESHLETS", gatherMeshlets},
  97. {"GATHER_LEGACY", gatherLegacy}},
  98. m_1stStageProg, m_frustumGrProgs[hzb][gatherAabbs][genHash][gatherMeshlets][gatherLegacy]));
  99. }
  100. }
  101. }
  102. }
  103. }
  104. for(MutatorValue gatherAabbs = 0; gatherAabbs < 2; ++gatherAabbs)
  105. {
  106. for(MutatorValue genHash = 0; genHash < 2; ++genHash)
  107. {
  108. for(MutatorValue gatherMeshlets = 0; gatherMeshlets < 2; ++gatherMeshlets)
  109. {
  110. for(MutatorValue gatherLegacy = 0; gatherLegacy < 2; ++gatherLegacy)
  111. {
  112. if(gatherLegacy == 0 && gatherMeshlets == 0)
  113. {
  114. continue; // Not allowed
  115. }
  116. ANKI_CHECK(loadShaderProgram("ShaderBinaries/GpuVisibilityStage1.ankiprogbin",
  117. {{"HZB_TEST", 0},
  118. {"DISTANCE_TEST", 1},
  119. {"GATHER_AABBS", gatherAabbs},
  120. {"HASH_VISIBLES", genHash},
  121. {"GATHER_MESHLETS", gatherMeshlets},
  122. {"GATHER_LEGACY", gatherLegacy}},
  123. m_1stStageProg, m_distGrProgs[gatherAabbs][genHash][gatherMeshlets][gatherLegacy]));
  124. }
  125. }
  126. }
  127. }
  128. ANKI_CHECK(loadShaderProgram("ShaderBinaries/GpuVisibilityStage2And3.ankiprogbin",
  129. {{"HZB_TEST", 0}, {"PASSTHROUGH", 0}, {"MESH_SHADERS", 0}, {"STORE_MESHLETS_FAILED_HZB", 1}}, m_2ndStageProg,
  130. m_gatherGrProg, "Legacy"));
  131. for(MutatorValue hzb = 0; hzb < 2; ++hzb)
  132. {
  133. for(MutatorValue passthrough = 0; passthrough < 2; ++passthrough)
  134. {
  135. for(MutatorValue meshShaders = 0; meshShaders < 2; ++meshShaders)
  136. {
  137. for(MutatorValue storeMeshletsFailedHzb = 0; storeMeshletsFailedHzb < 2; ++storeMeshletsFailedHzb)
  138. {
  139. ANKI_CHECK(loadShaderProgram("ShaderBinaries/GpuVisibilityStage2And3.ankiprogbin",
  140. {{"HZB_TEST", hzb},
  141. {"PASSTHROUGH", passthrough},
  142. {"MESH_SHADERS", meshShaders},
  143. {"STORE_MESHLETS_FAILED_HZB", storeMeshletsFailedHzb}},
  144. m_2ndStageProg, m_meshletGrProgs[hzb][passthrough][meshShaders][storeMeshletsFailedHzb],
  145. "Meshlets"));
  146. }
  147. }
  148. }
  149. }
  150. return Error::kNone;
  151. }
  152. void GpuVisibility::populateRenderGraphInternal(Bool distanceBased, BaseGpuVisibilityInput& in, GpuVisibilityOutput& out)
  153. {
  154. ANKI_ASSERT(in.m_lodReferencePoint.x != kMaxF32);
  155. if(RenderStateBucketContainer::getSingleton().getBucketsActiveUserCount(in.m_technique) == 0) [[unlikely]]
  156. {
  157. // Early exit
  158. out = {};
  159. return;
  160. }
  161. RenderGraphBuilder& rgraph = *in.m_rgraph;
  162. class DistanceTestData
  163. {
  164. public:
  165. Vec3 m_pointOfTest;
  166. F32 m_testRadius;
  167. };
  168. class FrustumTestData
  169. {
  170. public:
  171. RenderTargetHandle m_hzbRt;
  172. Mat4 m_viewProjMat;
  173. UVec2 m_finalRenderTargetSize;
  174. };
  175. FrustumTestData* frustumTestData = nullptr;
  176. DistanceTestData* distTestData = nullptr;
  177. Bool bStoreMeshletsFailedHzb = false;
  178. if(distanceBased)
  179. {
  180. distTestData = newInstance<DistanceTestData>(getRenderer().getFrameMemoryPool());
  181. const DistanceGpuVisibilityInput& din = static_cast<DistanceGpuVisibilityInput&>(in);
  182. distTestData->m_pointOfTest = din.m_pointOfTest;
  183. distTestData->m_testRadius = din.m_testRadius;
  184. }
  185. else
  186. {
  187. frustumTestData = newInstance<FrustumTestData>(getRenderer().getFrameMemoryPool());
  188. const FrustumGpuVisibilityInput& fin = static_cast<FrustumGpuVisibilityInput&>(in);
  189. frustumTestData->m_viewProjMat = fin.m_viewProjectionMatrix;
  190. frustumTestData->m_finalRenderTargetSize = fin.m_viewportSize;
  191. if(fin.m_hzbRt)
  192. {
  193. frustumTestData->m_hzbRt = *fin.m_hzbRt;
  194. }
  195. bStoreMeshletsFailedHzb = fin.m_twoPhaseOcclusionCulling;
  196. }
  197. const Bool firstCallInFrame = m_persistentMemory.m_frameIdx != getRenderer().getFrameCount();
  198. if(firstCallInFrame)
  199. {
  200. m_persistentMemory.m_frameIdx = getRenderer().getFrameCount();
  201. }
  202. // OoM
  203. if(firstCallInFrame)
  204. {
  205. U32 data;
  206. PtrSize dataReadSize;
  207. getRenderer().getReadbackManager().readMostRecentData(m_outOfMemoryReadback, &data, sizeof(data), dataReadSize);
  208. if(dataReadSize == sizeof(U32) && data != 0)
  209. {
  210. CString who;
  211. switch(data)
  212. {
  213. case 0b1:
  214. who = "Stage 1";
  215. break;
  216. case 0b10:
  217. who = "Stage 2";
  218. break;
  219. case 0b11:
  220. who = "Both stages";
  221. break;
  222. default:
  223. ANKI_ASSERT(0);
  224. }
  225. ANKI_RESOURCE_LOGE("GPU visibility went out of memory: %s", who.cstr());
  226. }
  227. m_outOfMemoryReadbackBuffer = getRenderer().getReadbackManager().allocateStructuredBuffer<U32>(m_outOfMemoryReadback, 1);
  228. }
  229. // Get some limits
  230. const RenderStateBucketContainer& buckets = RenderStateBucketContainer::getSingleton();
  231. const U32 bucketCount = buckets.getBucketCount(in.m_technique);
  232. const GpuVisLimits limits = computeLimits(in.m_technique);
  233. const Bool bHwMeshletRendering = getRenderer().getMeshletRenderingType() == MeshletRenderingType::kMeshShaders && limits.m_maxVisibleMeshlets > 0;
  234. const Bool bSwMeshletRendering = getRenderer().getMeshletRenderingType() == MeshletRenderingType::kSoftware && limits.m_maxVisibleMeshlets > 0;
  235. const Bool bMeshletRendering = bHwMeshletRendering || bSwMeshletRendering;
  236. const Bool bLegacyRendering = limits.m_maxVisibleLegacyRenderables > 0;
  237. if(bStoreMeshletsFailedHzb)
  238. {
  239. ANKI_ASSERT(bMeshletRendering && frustumTestData->m_hzbRt.isValid());
  240. }
  241. // Allocate persistent memory for the frame
  242. if(firstCallInFrame)
  243. {
  244. GpuVisLimits maxLimits;
  245. for(RenderingTechnique t : EnumBitsIterable<RenderingTechnique, RenderingTechniqueBit>(RenderingTechniqueBit::kAllRaster))
  246. {
  247. const GpuVisLimits limits = computeLimits(t);
  248. maxLimits.m_maxVisibleLegacyRenderables = max(maxLimits.m_maxVisibleLegacyRenderables, limits.m_maxVisibleLegacyRenderables);
  249. maxLimits.m_maxVisibleMeshlets = max(maxLimits.m_maxVisibleMeshlets, limits.m_maxVisibleMeshlets);
  250. }
  251. m_persistentMemory.m_stage1.m_visibleRenderables =
  252. allocateStructuredBuffer<GpuVisibilityVisibleRenderableDesc>(maxLimits.m_maxVisibleLegacyRenderables);
  253. m_persistentMemory.m_stage1.m_visibleMeshlets = allocateStructuredBuffer<GpuVisibilityVisibleMeshletDesc>(maxLimits.m_maxVisibleMeshlets);
  254. m_persistentMemory.m_stage2Legacy.m_perDraw = allocateStructuredBuffer<GpuScenePerDraw>(maxLimits.m_maxVisibleLegacyRenderables);
  255. m_persistentMemory.m_stage2Legacy.m_drawIndexedIndirectArgs =
  256. allocateStructuredBuffer<DrawIndexedIndirectArgs>(maxLimits.m_maxVisibleLegacyRenderables);
  257. m_persistentMemory.m_stage2Meshlet.m_meshletInstances = allocateStructuredBuffer<GpuSceneMeshletInstance>(maxLimits.m_maxVisibleMeshlets);
  258. m_persistentMemory.m_stage2Meshlet.m_meshletsFailedHzb =
  259. allocateStructuredBuffer<GpuVisibilityVisibleRenderableDesc>(maxLimits.m_maxVisibleMeshlets);
  260. m_persistentMemory.m_stage3.m_meshletInstances = allocateStructuredBuffer<GpuSceneMeshletInstance>(maxLimits.m_maxVisibleMeshlets);
  261. m_persistentMemory.m_dep = rgraph.importBuffer((bMeshletRendering) ? m_persistentMemory.m_stage1.m_visibleMeshlets
  262. : m_persistentMemory.m_stage1.m_visibleRenderables,
  263. BufferUsageBit::kNone);
  264. }
  265. // Compute the MDI sub-ranges
  266. if(limits.m_maxVisibleLegacyRenderables)
  267. {
  268. newArray<InstanceRange>(getRenderer().getFrameMemoryPool(), bucketCount, out.m_legacy.m_bucketIndirectArgsRanges);
  269. U32 ibucket = 0;
  270. U32 offset = 0;
  271. buckets.iterateBuckets(in.m_technique, [&](const RenderStateInfo&, U32 userCount, U32 meshletCount) {
  272. out.m_legacy.m_bucketIndirectArgsRanges[ibucket].m_firstInstance = offset;
  273. if(meshletCount == 0 && userCount > 0)
  274. {
  275. out.m_legacy.m_bucketIndirectArgsRanges[ibucket].m_instanceCount =
  276. max(1u, U32(U64(userCount) * limits.m_maxVisibleLegacyRenderables / limits.m_totalLegacyRenderables));
  277. offset += out.m_legacy.m_bucketIndirectArgsRanges[ibucket].m_instanceCount;
  278. }
  279. ++ibucket;
  280. });
  281. // The last element should point to the limit of the buffer
  282. InstanceRange& last = out.m_legacy.m_bucketIndirectArgsRanges.getBack();
  283. ANKI_ASSERT(limits.m_maxVisibleLegacyRenderables >= last.m_firstInstance);
  284. last.m_instanceCount = limits.m_maxVisibleLegacyRenderables - last.m_firstInstance;
  285. }
  286. // Allocate memory for stage 1
  287. class Stage1Mem
  288. {
  289. public:
  290. BufferView m_counters;
  291. BufferView m_visibleRenderables;
  292. BufferView m_visibleMeshlets;
  293. BufferView m_renderablePrefixSums;
  294. BufferView m_meshletPrefixSums;
  295. BufferView m_gpuVisIndirectDispatchArgs;
  296. BufferView m_visibleAabbIndices;
  297. BufferView m_hash;
  298. } stage1Mem;
  299. stage1Mem.m_counters = allocateStructuredBuffer<U32>(U32(GpuVisibilityCounter::kCount));
  300. if(in.m_limitMemory)
  301. {
  302. PtrSize newRange = sizeof(GpuVisibilityVisibleRenderableDesc) * limits.m_maxVisibleLegacyRenderables;
  303. if(newRange)
  304. {
  305. ANKI_ASSERT(newRange <= m_persistentMemory.m_stage1.m_visibleRenderables.getRange());
  306. stage1Mem.m_visibleRenderables = BufferView(m_persistentMemory.m_stage1.m_visibleRenderables).setRange(newRange);
  307. }
  308. newRange = sizeof(GpuVisibilityVisibleMeshletDesc) * limits.m_maxVisibleMeshlets;
  309. if(newRange)
  310. {
  311. ANKI_ASSERT(newRange <= m_persistentMemory.m_stage1.m_visibleMeshlets.getRange());
  312. stage1Mem.m_visibleMeshlets = BufferView(m_persistentMemory.m_stage1.m_visibleMeshlets).setRange(newRange);
  313. }
  314. }
  315. else
  316. {
  317. stage1Mem.m_visibleRenderables = allocateStructuredBuffer<GpuVisibilityVisibleRenderableDesc>(limits.m_maxVisibleLegacyRenderables);
  318. stage1Mem.m_visibleMeshlets = allocateStructuredBuffer<GpuVisibilityVisibleMeshletDesc>(limits.m_maxVisibleMeshlets);
  319. }
  320. stage1Mem.m_renderablePrefixSums = allocateStructuredBuffer<U32>(bucketCount);
  321. stage1Mem.m_meshletPrefixSums = allocateStructuredBuffer<U32>(bucketCount);
  322. stage1Mem.m_gpuVisIndirectDispatchArgs = allocateStructuredBuffer<DispatchIndirectArgs>(U32(GpuVisibilityIndirectDispatches::kCount));
  323. if(in.m_gatherAabbIndices)
  324. {
  325. stage1Mem.m_visibleAabbIndices = allocateStructuredBuffer<U32>(buckets.getBucketsActiveUserCount(in.m_technique));
  326. }
  327. if(in.m_hashVisibles)
  328. {
  329. stage1Mem.m_hash = allocateStructuredBuffer<GpuVisibilityHash>(1);
  330. }
  331. // Allocate memory for stage 2
  332. class Stage2Mem
  333. {
  334. public:
  335. class
  336. {
  337. public:
  338. BufferView m_perDraw;
  339. BufferView m_drawIndexedIndirectArgs;
  340. BufferView m_mdiDrawCounts;
  341. } m_legacy;
  342. class
  343. {
  344. public:
  345. BufferView m_indirectDrawArgs;
  346. BufferView m_dispatchMeshIndirectArgs;
  347. BufferView m_meshletInstances;
  348. BufferView m_meshletsFailedHzb;
  349. } m_meshlet;
  350. } stage2Mem;
  351. if(bLegacyRendering)
  352. {
  353. if(in.m_limitMemory)
  354. {
  355. PtrSize newRange = sizeof(GpuScenePerDraw) * limits.m_maxVisibleLegacyRenderables;
  356. ANKI_ASSERT(newRange <= m_persistentMemory.m_stage2Legacy.m_perDraw.getRange());
  357. stage2Mem.m_legacy.m_perDraw = BufferView(m_persistentMemory.m_stage2Legacy.m_perDraw).setRange(newRange);
  358. newRange = sizeof(DrawIndexedIndirectArgs) * limits.m_maxVisibleLegacyRenderables;
  359. ANKI_ASSERT(newRange <= m_persistentMemory.m_stage2Legacy.m_drawIndexedIndirectArgs.getRange());
  360. stage2Mem.m_legacy.m_drawIndexedIndirectArgs = BufferView(m_persistentMemory.m_stage2Legacy.m_drawIndexedIndirectArgs).setRange(newRange);
  361. }
  362. else
  363. {
  364. stage2Mem.m_legacy.m_perDraw = allocateStructuredBuffer<GpuScenePerDraw>(limits.m_maxVisibleLegacyRenderables);
  365. stage2Mem.m_legacy.m_drawIndexedIndirectArgs = allocateStructuredBuffer<DrawIndexedIndirectArgs>(limits.m_maxVisibleLegacyRenderables);
  366. }
  367. stage2Mem.m_legacy.m_mdiDrawCounts = allocateStructuredBuffer<U32>(bucketCount);
  368. }
  369. if(bMeshletRendering)
  370. {
  371. if(bHwMeshletRendering)
  372. {
  373. stage2Mem.m_meshlet.m_dispatchMeshIndirectArgs = allocateStructuredBuffer<DispatchIndirectArgs>(bucketCount);
  374. }
  375. else
  376. {
  377. stage2Mem.m_meshlet.m_indirectDrawArgs = allocateStructuredBuffer<DrawIndirectArgs>(bucketCount);
  378. }
  379. const U32 newCount = limits.m_maxVisibleMeshlets;
  380. if(in.m_limitMemory)
  381. {
  382. ANKI_ASSERT(newCount * sizeof(GpuSceneMeshletInstance) <= m_persistentMemory.m_stage2Meshlet.m_meshletInstances.getRange());
  383. stage2Mem.m_meshlet.m_meshletInstances =
  384. BufferView(m_persistentMemory.m_stage2Meshlet.m_meshletInstances).setRange(newCount * sizeof(GpuSceneMeshletInstance));
  385. }
  386. else
  387. {
  388. stage2Mem.m_meshlet.m_meshletInstances = allocateStructuredBuffer<GpuSceneMeshletInstance>(newCount);
  389. }
  390. if(bStoreMeshletsFailedHzb)
  391. {
  392. const U32 newCount = limits.m_maxVisibleMeshlets;
  393. if(in.m_limitMemory)
  394. {
  395. ANKI_ASSERT(newCount * sizeof(GpuVisibilityVisibleMeshletDesc) <= m_persistentMemory.m_stage2Meshlet.m_meshletsFailedHzb.getRange());
  396. stage2Mem.m_meshlet.m_meshletsFailedHzb =
  397. BufferView(m_persistentMemory.m_stage2Meshlet.m_meshletsFailedHzb).setRange(newCount * sizeof(GpuVisibilityVisibleMeshletDesc));
  398. }
  399. else
  400. {
  401. stage2Mem.m_meshlet.m_meshletsFailedHzb = allocateStructuredBuffer<GpuVisibilityVisibleMeshletDesc>(newCount);
  402. }
  403. }
  404. }
  405. // Stage 3 memory
  406. class Stage3Mem
  407. {
  408. public:
  409. BufferView m_indirectDrawArgs;
  410. BufferView m_dispatchMeshIndirectArgs;
  411. BufferView m_meshletInstances;
  412. } stage3Mem;
  413. if(bStoreMeshletsFailedHzb)
  414. {
  415. if(bHwMeshletRendering)
  416. {
  417. stage3Mem.m_dispatchMeshIndirectArgs = allocateStructuredBuffer<DispatchIndirectArgs>(bucketCount);
  418. }
  419. else
  420. {
  421. stage3Mem.m_indirectDrawArgs = allocateStructuredBuffer<DrawIndirectArgs>(bucketCount);
  422. }
  423. const U32 newCount = limits.m_maxVisibleMeshlets;
  424. if(in.m_limitMemory)
  425. {
  426. ANKI_ASSERT(newCount * sizeof(GpuSceneMeshletInstance) <= m_persistentMemory.m_stage3.m_meshletInstances.getRange());
  427. stage3Mem.m_meshletInstances =
  428. BufferView(m_persistentMemory.m_stage3.m_meshletInstances).setRange(newCount * sizeof(GpuSceneMeshletInstance));
  429. }
  430. else
  431. {
  432. stage3Mem.m_meshletInstances = allocateStructuredBuffer<GpuSceneMeshletInstance>(newCount);
  433. }
  434. }
  435. // Setup output
  436. out.m_legacy.m_perDrawDataBuffer = stage2Mem.m_legacy.m_perDraw;
  437. out.m_legacy.m_mdiDrawCountsBuffer = stage2Mem.m_legacy.m_mdiDrawCounts;
  438. out.m_legacy.m_drawIndexedIndirectArgsBuffer = stage2Mem.m_legacy.m_drawIndexedIndirectArgs;
  439. out.m_mesh.m_dispatchMeshIndirectArgsBuffer = stage2Mem.m_meshlet.m_dispatchMeshIndirectArgs;
  440. out.m_mesh.m_drawIndirectArgs = stage2Mem.m_meshlet.m_indirectDrawArgs;
  441. out.m_mesh.m_meshletInstancesBuffer = stage2Mem.m_meshlet.m_meshletInstances;
  442. out.m_visibleAaabbIndicesBuffer = stage1Mem.m_visibleAabbIndices;
  443. out.m_visiblesHashBuffer = stage1Mem.m_hash;
  444. if(bHwMeshletRendering)
  445. {
  446. out.m_mesh.m_firstMeshletBuffer = stage1Mem.m_meshletPrefixSums;
  447. }
  448. if(bLegacyRendering)
  449. {
  450. out.m_legacy.m_firstPerDrawBuffer = stage1Mem.m_renderablePrefixSums;
  451. }
  452. if(bStoreMeshletsFailedHzb)
  453. {
  454. out.m_stage1And2Mem.m_meshletsFailedHzb = stage2Mem.m_meshlet.m_meshletsFailedHzb;
  455. out.m_stage1And2Mem.m_counters = stage1Mem.m_counters;
  456. out.m_stage1And2Mem.m_meshletPrefixSums = stage1Mem.m_meshletPrefixSums;
  457. out.m_stage1And2Mem.m_gpuVisIndirectDispatchArgs = stage1Mem.m_gpuVisIndirectDispatchArgs;
  458. out.m_stage3Mem.m_indirectDrawArgs = stage3Mem.m_indirectDrawArgs;
  459. out.m_stage3Mem.m_dispatchMeshIndirectArgs = stage3Mem.m_dispatchMeshIndirectArgs;
  460. out.m_stage3Mem.m_meshletInstances = stage3Mem.m_meshletInstances;
  461. }
  462. // Use one buffer as a depedency. Doesn't matter which
  463. out.m_dependency =
  464. (in.m_limitMemory) ? m_persistentMemory.m_dep : rgraph.importBuffer(stage1Mem.m_gpuVisIndirectDispatchArgs, BufferUsageBit::kNone);
  465. // Zero some stuff
  466. const BufferHandle zeroMemDep = rgraph.importBuffer(stage1Mem.m_counters, BufferUsageBit::kNone);
  467. {
  468. NonGraphicsRenderPass& pass = rgraph.newNonGraphicsRenderPass(generateTempPassName("GPU vis zero: %s", in.m_passesName.cstr()));
  469. pass.newBufferDependency(zeroMemDep, BufferUsageBit::kUavCompute);
  470. pass.setWork([stage1Mem, stage2Mem, stage3Mem](RenderPassWorkContext& rpass) {
  471. ANKI_TRACE_SCOPED_EVENT(GpuVisZero);
  472. CommandBuffer& cmdb = *rpass.m_commandBuffer;
  473. constexpr Bool debugZeroing = false; // For debugging purposes zero everything
  474. #define ANKI_ZERO(buff, alwaysZero) \
  475. if((alwaysZero || debugZeroing) && buff.isValid()) \
  476. { \
  477. cmdb.pushDebugMarker(#buff, Vec3(1.0f, 1.0f, 1.0f)); \
  478. fillBuffer(cmdb, buff, 0); \
  479. cmdb.popDebugMarker(); \
  480. }
  481. #define ANKI_ZERO_PART(buff, alwaysZero, sizeToZero) \
  482. if((alwaysZero || debugZeroing) && buff.isValid()) \
  483. { \
  484. cmdb.pushDebugMarker(#buff, Vec3(1.0f, 1.0f, 1.0f)); \
  485. fillBuffer(cmdb, (debugZeroing) ? buff : BufferView(buff).setRange(sizeToZero), 0); \
  486. cmdb.popDebugMarker(); \
  487. }
  488. ANKI_ZERO(stage1Mem.m_counters, true)
  489. ANKI_ZERO(stage1Mem.m_visibleRenderables, false)
  490. ANKI_ZERO(stage1Mem.m_visibleMeshlets, false)
  491. ANKI_ZERO(stage1Mem.m_renderablePrefixSums, true)
  492. ANKI_ZERO(stage1Mem.m_meshletPrefixSums, true)
  493. ANKI_ZERO(stage1Mem.m_gpuVisIndirectDispatchArgs, false)
  494. ANKI_ZERO_PART(stage1Mem.m_visibleAabbIndices, true, sizeof(U32))
  495. ANKI_ZERO(stage1Mem.m_hash, true)
  496. ANKI_ZERO(stage2Mem.m_legacy.m_perDraw, false)
  497. ANKI_ZERO(stage2Mem.m_legacy.m_drawIndexedIndirectArgs, true)
  498. ANKI_ZERO(stage2Mem.m_legacy.m_mdiDrawCounts, true)
  499. ANKI_ZERO(stage2Mem.m_meshlet.m_indirectDrawArgs, true)
  500. ANKI_ZERO(stage2Mem.m_meshlet.m_dispatchMeshIndirectArgs, true)
  501. ANKI_ZERO(stage2Mem.m_meshlet.m_meshletInstances, false)
  502. ANKI_ZERO(stage2Mem.m_meshlet.m_meshletsFailedHzb, false)
  503. ANKI_ZERO(stage3Mem.m_indirectDrawArgs, true)
  504. ANKI_ZERO(stage3Mem.m_dispatchMeshIndirectArgs, true)
  505. ANKI_ZERO(stage3Mem.m_meshletInstances, false)
  506. #undef ANKI_ZERO
  507. });
  508. }
  509. // 1st stage
  510. {
  511. NonGraphicsRenderPass& pass = rgraph.newNonGraphicsRenderPass(generateTempPassName("GPU vis 1st stage: %s", in.m_passesName.cstr()));
  512. pass.newBufferDependency(getRenderer().getGpuSceneBufferHandle(), BufferUsageBit::kSrvCompute);
  513. pass.newBufferDependency(out.m_dependency, BufferUsageBit::kUavCompute);
  514. pass.newBufferDependency(zeroMemDep, BufferUsageBit::kUavCompute);
  515. if(frustumTestData && frustumTestData->m_hzbRt.isValid())
  516. {
  517. pass.newTextureDependency(frustumTestData->m_hzbRt, TextureUsageBit::kSrvCompute);
  518. }
  519. pass.setWork([this, frustumTestData, distTestData, lodReferencePoint = in.m_lodReferencePoint, lodDistances = in.m_lodDistances,
  520. technique = in.m_technique, stage1Mem, bLegacyRendering, bMeshletRendering](RenderPassWorkContext& rpass) {
  521. ANKI_TRACE_SCOPED_EVENT(GpuVis1stStage);
  522. CommandBuffer& cmdb = *rpass.m_commandBuffer;
  523. const Bool gatherAabbIndices = stage1Mem.m_visibleAabbIndices.isValid();
  524. const Bool genHash = stage1Mem.m_hash.isValid();
  525. if(frustumTestData)
  526. {
  527. cmdb.bindShaderProgram(
  528. m_frustumGrProgs[frustumTestData->m_hzbRt.isValid()][gatherAabbIndices][genHash][bMeshletRendering][bLegacyRendering].get());
  529. }
  530. else
  531. {
  532. cmdb.bindShaderProgram(m_distGrProgs[gatherAabbIndices][genHash][bMeshletRendering][bLegacyRendering].get());
  533. }
  534. BufferView aabbsBuffer;
  535. U32 aabbCount = 0;
  536. switch(technique)
  537. {
  538. case RenderingTechnique::kGBuffer:
  539. aabbsBuffer = GpuSceneArrays::RenderableBoundingVolumeGBuffer::getSingleton().getBufferView();
  540. aabbCount = GpuSceneArrays::RenderableBoundingVolumeGBuffer::getSingleton().getElementCount();
  541. break;
  542. case RenderingTechnique::kDepth:
  543. aabbsBuffer = GpuSceneArrays::RenderableBoundingVolumeDepth::getSingleton().getBufferView();
  544. aabbCount = GpuSceneArrays::RenderableBoundingVolumeDepth::getSingleton().getElementCount();
  545. break;
  546. case RenderingTechnique::kForward:
  547. aabbsBuffer = GpuSceneArrays::RenderableBoundingVolumeForward::getSingleton().getBufferView();
  548. aabbCount = GpuSceneArrays::RenderableBoundingVolumeForward::getSingleton().getElementCount();
  549. break;
  550. default:
  551. ANKI_ASSERT(0);
  552. }
  553. cmdb.bindSrv(0, 0, aabbsBuffer);
  554. cmdb.bindSrv(1, 0, GpuSceneArrays::Renderable::getSingleton().getBufferView());
  555. cmdb.bindSrv(2, 0, GpuSceneArrays::MeshLod::getSingleton().getBufferView());
  556. cmdb.bindSrv(3, 0, GpuSceneArrays::Transform::getSingleton().getBufferView());
  557. cmdb.bindUav(0, 0, stage1Mem.m_counters);
  558. cmdb.bindUav(1, 0, (bLegacyRendering) ? stage1Mem.m_visibleRenderables : BufferView(getDummyGpuResources().m_buffer.get()));
  559. cmdb.bindUav(2, 0, (bMeshletRendering) ? stage1Mem.m_visibleMeshlets : BufferView(getDummyGpuResources().m_buffer.get()));
  560. cmdb.bindUav(3, 0, (bLegacyRendering) ? stage1Mem.m_renderablePrefixSums : BufferView(getDummyGpuResources().m_buffer.get()));
  561. cmdb.bindUav(4, 0, (bMeshletRendering) ? stage1Mem.m_meshletPrefixSums : BufferView(getDummyGpuResources().m_buffer.get()));
  562. cmdb.bindUav(5, 0, stage1Mem.m_gpuVisIndirectDispatchArgs);
  563. cmdb.bindUav(6, 0, m_outOfMemoryReadbackBuffer);
  564. if(gatherAabbIndices)
  565. {
  566. cmdb.bindUav(7, 0, stage1Mem.m_visibleAabbIndices);
  567. }
  568. if(genHash)
  569. {
  570. cmdb.bindUav(8, 0, stage1Mem.m_hash);
  571. }
  572. if(frustumTestData)
  573. {
  574. FrustumGpuVisibilityConsts* consts = allocateAndBindConstants<FrustumGpuVisibilityConsts>(cmdb, 0, 0);
  575. Array<Plane, 6> planes;
  576. extractClipPlanes(frustumTestData->m_viewProjMat, planes);
  577. for(U32 i = 0; i < 6; ++i)
  578. {
  579. consts->m_clipPlanes[i] = Vec4(planes[i].getNormal().xyz, planes[i].getOffset());
  580. }
  581. ANKI_ASSERT(kMaxLodCount == 3);
  582. consts->m_maxLodDistances[0] = lodDistances[0];
  583. consts->m_maxLodDistances[1] = lodDistances[1];
  584. consts->m_maxLodDistances[2] = kMaxF32;
  585. consts->m_maxLodDistances[3] = kMaxF32;
  586. consts->m_lodReferencePoint = lodReferencePoint;
  587. consts->m_viewProjectionMat = frustumTestData->m_viewProjMat;
  588. consts->m_finalRenderTargetSize = Vec2(frustumTestData->m_finalRenderTargetSize);
  589. if(frustumTestData->m_hzbRt.isValid())
  590. {
  591. rpass.bindSrv(4, 0, frustumTestData->m_hzbRt);
  592. cmdb.bindSampler(0, 0, getRenderer().getSamplers().m_nearestNearestClamp.get());
  593. }
  594. }
  595. else
  596. {
  597. DistanceGpuVisibilityConstants consts;
  598. consts.m_pointOfTest = distTestData->m_pointOfTest;
  599. consts.m_testRadius = distTestData->m_testRadius;
  600. consts.m_maxLodDistances[0] = lodDistances[0];
  601. consts.m_maxLodDistances[1] = lodDistances[1];
  602. consts.m_maxLodDistances[2] = kMaxF32;
  603. consts.m_maxLodDistances[3] = kMaxF32;
  604. consts.m_lodReferencePoint = lodReferencePoint;
  605. cmdb.setFastConstants(&consts, sizeof(consts));
  606. }
  607. dispatchPPCompute(cmdb, 64, 1, aabbCount, 1);
  608. });
  609. } // end 1st stage
  610. // 2nd stage
  611. {
  612. NonGraphicsRenderPass& pass = rgraph.newNonGraphicsRenderPass(generateTempPassName("GPU vis 2nd stage: %s", in.m_passesName.cstr()));
  613. pass.newBufferDependency(out.m_dependency, BufferUsageBit::kIndirectCompute | BufferUsageBit::kUavCompute);
  614. if(frustumTestData && frustumTestData->m_hzbRt.isValid())
  615. {
  616. pass.newTextureDependency(frustumTestData->m_hzbRt, TextureUsageBit::kSrvCompute);
  617. }
  618. pass.setWork([this, stage1Mem, stage2Mem, bLegacyRendering, bMeshletRendering, bHwMeshletRendering, out, frustumTestData,
  619. lodReferencePoint = in.m_lodReferencePoint, bStoreMeshletsFailedHzb](RenderPassWorkContext& rpass) {
  620. ANKI_TRACE_SCOPED_EVENT(GpuVis2ndStage);
  621. CommandBuffer& cmdb = *rpass.m_commandBuffer;
  622. if(bLegacyRendering)
  623. {
  624. cmdb.bindShaderProgram(m_gatherGrProg.get());
  625. cmdb.bindSrv(0, 0, GpuSceneArrays::Renderable::getSingleton().getBufferView());
  626. cmdb.bindSrv(1, 0, GpuSceneArrays::ParticleEmitter2::getSingleton().getBufferViewSafe());
  627. cmdb.bindSrv(2, 0, GpuSceneArrays::MeshLod::getSingleton().getBufferView());
  628. cmdb.bindSrv(3, 0, stage1Mem.m_visibleRenderables);
  629. cmdb.bindSrv(4, 0, stage1Mem.m_counters);
  630. cmdb.bindSrv(5, 0, stage1Mem.m_renderablePrefixSums);
  631. WeakArray<UVec2> firstDrawIndirectArgAndCount =
  632. allocateAndBindSrvStructuredBuffer<UVec2>(cmdb, 6, 0, out.m_legacy.m_bucketIndirectArgsRanges.getSize());
  633. for(U32 ibucket = 0; ibucket < out.m_legacy.m_bucketIndirectArgsRanges.getSize(); ++ibucket)
  634. {
  635. firstDrawIndirectArgAndCount[ibucket].x = out.m_legacy.m_bucketIndirectArgsRanges[ibucket].m_firstInstance;
  636. firstDrawIndirectArgAndCount[ibucket].y = out.m_legacy.m_bucketIndirectArgsRanges[ibucket].m_instanceCount;
  637. }
  638. cmdb.bindUav(0, 0, stage2Mem.m_legacy.m_perDraw);
  639. cmdb.bindUav(1, 0, stage2Mem.m_legacy.m_drawIndexedIndirectArgs);
  640. cmdb.bindUav(2, 0, stage2Mem.m_legacy.m_mdiDrawCounts);
  641. cmdb.bindUav(3, 0, m_outOfMemoryReadbackBuffer);
  642. cmdb.dispatchComputeIndirect(
  643. BufferView(stage1Mem.m_gpuVisIndirectDispatchArgs)
  644. .incrementOffset(sizeof(DispatchIndirectArgs) * U32(GpuVisibilityIndirectDispatches::k2ndStageLegacy))
  645. .setRange(sizeof(DispatchIndirectArgs)));
  646. }
  647. if(bMeshletRendering)
  648. {
  649. const Bool hzbTex = frustumTestData && frustumTestData->m_hzbRt.isValid();
  650. const Bool passthrough = frustumTestData == nullptr;
  651. const Bool meshShaders = GrManager::getSingleton().getDeviceCapabilities().m_meshShaders;
  652. cmdb.bindShaderProgram(m_meshletGrProgs[hzbTex][passthrough][meshShaders][bStoreMeshletsFailedHzb].get());
  653. cmdb.bindSrv(0, 0, GpuSceneArrays::Renderable::getSingleton().getBufferView());
  654. cmdb.bindSrv(1, 0, GpuSceneArrays::MeshLod::getSingleton().getBufferView());
  655. cmdb.bindSrv(2, 0, GpuSceneArrays::Transform::getSingleton().getBufferView());
  656. cmdb.bindSrv(3, 0, UnifiedGeometryBuffer::getSingleton().getBufferView());
  657. if(hzbTex)
  658. {
  659. rpass.bindSrv(4, 0, frustumTestData->m_hzbRt);
  660. cmdb.bindSampler(0, 0, getRenderer().getSamplers().m_nearestNearestClamp.get());
  661. }
  662. cmdb.bindUav(0, 0, stage1Mem.m_counters);
  663. cmdb.bindSrv(5, 0, stage1Mem.m_meshletPrefixSums);
  664. cmdb.bindSrv(6, 0, stage1Mem.m_visibleMeshlets);
  665. cmdb.bindUav(1, 0, (bHwMeshletRendering) ? stage2Mem.m_meshlet.m_dispatchMeshIndirectArgs : stage2Mem.m_meshlet.m_indirectDrawArgs);
  666. cmdb.bindUav(2, 0, stage2Mem.m_meshlet.m_meshletInstances);
  667. cmdb.bindUav(3, 0, m_outOfMemoryReadbackBuffer);
  668. if(bStoreMeshletsFailedHzb)
  669. {
  670. cmdb.bindUav(4, 0, stage2Mem.m_meshlet.m_meshletsFailedHzb);
  671. cmdb.bindUav(5, 0, stage1Mem.m_gpuVisIndirectDispatchArgs);
  672. }
  673. if(!passthrough)
  674. {
  675. GpuVisibilityMeshletConstants consts;
  676. consts.m_viewProjectionMatrix = frustumTestData->m_viewProjMat;
  677. consts.m_cameraPos = lodReferencePoint;
  678. consts.m_viewportSizef = Vec2(frustumTestData->m_finalRenderTargetSize);
  679. cmdb.setFastConstants(&consts, sizeof(consts));
  680. }
  681. cmdb.dispatchComputeIndirect(
  682. BufferView(stage1Mem.m_gpuVisIndirectDispatchArgs)
  683. .incrementOffset(sizeof(DispatchIndirectArgs) * U32(GpuVisibilityIndirectDispatches::k2ndStageMeshlets))
  684. .setRange(sizeof(DispatchIndirectArgs)));
  685. }
  686. });
  687. } // end 2nd stage
  688. }
  689. void GpuVisibility::populateRenderGraphStage3(FrustumGpuVisibilityInput& in, GpuVisibilityOutput& out)
  690. {
  691. if(RenderStateBucketContainer::getSingleton().getBucketsActiveUserCount(in.m_technique) == 0) [[unlikely]]
  692. {
  693. // Early exit
  694. out = {};
  695. return;
  696. }
  697. RenderGraphBuilder& rgraph = *in.m_rgraph;
  698. const GpuVisLimits limits = computeLimits(in.m_technique);
  699. const Bool bHwMeshletRendering = getRenderer().getMeshletRenderingType() == MeshletRenderingType::kMeshShaders && limits.m_maxVisibleMeshlets > 0;
  700. const Bool bSwMeshletRendering = getRenderer().getMeshletRenderingType() == MeshletRenderingType::kSoftware && limits.m_maxVisibleMeshlets > 0;
  701. const Bool bMeshletRendering = bHwMeshletRendering || bSwMeshletRendering;
  702. if(!bMeshletRendering)
  703. {
  704. return;
  705. }
  706. // Set the output
  707. if(bHwMeshletRendering)
  708. {
  709. out.m_mesh.m_dispatchMeshIndirectArgsBuffer = out.m_stage3Mem.m_dispatchMeshIndirectArgs;
  710. }
  711. else
  712. {
  713. out.m_mesh.m_drawIndirectArgs = out.m_stage3Mem.m_indirectDrawArgs;
  714. }
  715. out.m_mesh.m_meshletInstancesBuffer = out.m_stage3Mem.m_meshletInstances;
  716. // Create the pass
  717. NonGraphicsRenderPass& pass = rgraph.newNonGraphicsRenderPass(generateTempPassName("GPU vis 3rd stage: %s", in.m_passesName.cstr()));
  718. pass.newBufferDependency(out.m_dependency, BufferUsageBit::kIndirectCompute | BufferUsageBit::kUavCompute);
  719. pass.newBufferDependency(m_persistentMemory.m_dep, BufferUsageBit::kIndirectCompute | BufferUsageBit::kUavCompute);
  720. pass.newTextureDependency(*in.m_hzbRt, TextureUsageBit::kSrvCompute);
  721. pass.setWork([this, hzbRt = *in.m_hzbRt, bHwMeshletRendering, stage1And2Mem = out.m_stage1And2Mem, stage3Mem = out.m_stage3Mem,
  722. in](RenderPassWorkContext& rpass) {
  723. ANKI_TRACE_SCOPED_EVENT(GpuVis3rdStage);
  724. CommandBuffer& cmdb = *rpass.m_commandBuffer;
  725. const Bool hzbTex = true;
  726. const Bool passthrough = false;
  727. const Bool bStoreMeshletsFailedHzb = false;
  728. cmdb.bindShaderProgram(m_meshletGrProgs[hzbTex][passthrough][bHwMeshletRendering][bStoreMeshletsFailedHzb].get());
  729. cmdb.bindSrv(0, 0, GpuSceneArrays::Renderable::getSingleton().getBufferView());
  730. cmdb.bindSrv(1, 0, GpuSceneArrays::MeshLod::getSingleton().getBufferView());
  731. cmdb.bindSrv(2, 0, GpuSceneArrays::Transform::getSingleton().getBufferView());
  732. cmdb.bindSrv(3, 0, UnifiedGeometryBuffer::getSingleton().getBufferView());
  733. rpass.bindSrv(4, 0, hzbRt);
  734. cmdb.bindSampler(0, 0, getRenderer().getSamplers().m_nearestNearestClamp.get());
  735. cmdb.bindUav(0, 0, stage1And2Mem.m_counters);
  736. cmdb.bindSrv(5, 0, stage1And2Mem.m_meshletPrefixSums);
  737. cmdb.bindSrv(6, 0, stage1And2Mem.m_meshletsFailedHzb);
  738. cmdb.bindUav(1, 0, (bHwMeshletRendering) ? stage3Mem.m_dispatchMeshIndirectArgs : stage3Mem.m_indirectDrawArgs);
  739. cmdb.bindUav(2, 0, stage3Mem.m_meshletInstances);
  740. cmdb.bindUav(3, 0, m_outOfMemoryReadbackBuffer);
  741. GpuVisibilityMeshletConstants consts;
  742. consts.m_viewProjectionMatrix = in.m_viewProjectionMatrix;
  743. consts.m_cameraPos = in.m_lodReferencePoint;
  744. consts.m_viewportSizef = Vec2(in.m_viewportSize);
  745. cmdb.setFastConstants(&consts, sizeof(consts));
  746. cmdb.dispatchComputeIndirect(BufferView(stage1And2Mem.m_gpuVisIndirectDispatchArgs)
  747. .incrementOffset(sizeof(DispatchIndirectArgs) * U32(GpuVisibilityIndirectDispatches::k3rdStageMeshlets))
  748. .setRange(sizeof(DispatchIndirectArgs)));
  749. });
  750. }
  751. Error GpuVisibilityNonRenderables::init()
  752. {
  753. ANKI_CHECK(ResourceManager::getSingleton().loadResource("ShaderBinaries/GpuVisibilityNonRenderables.ankiprogbin", m_prog));
  754. for(MutatorValue hzb = 0; hzb < 2; ++hzb)
  755. {
  756. for(GpuSceneNonRenderableObjectType type : EnumIterable<GpuSceneNonRenderableObjectType>())
  757. {
  758. for(MutatorValue cpuFeedback = 0; cpuFeedback < 2; ++cpuFeedback)
  759. {
  760. ANKI_CHECK(loadShaderProgram("ShaderBinaries/GpuVisibilityNonRenderables.ankiprogbin",
  761. {{"HZB_TEST", hzb}, {"OBJECT_TYPE", MutatorValue(type)}, {"CPU_FEEDBACK", cpuFeedback}}, m_prog,
  762. m_grProgs[hzb][type][cpuFeedback]));
  763. }
  764. }
  765. }
  766. return Error::kNone;
  767. }
  768. void GpuVisibilityNonRenderables::populateRenderGraph(GpuVisibilityNonRenderablesInput& in, GpuVisibilityNonRenderablesOutput& out)
  769. {
  770. ANKI_ASSERT(in.m_viewProjectionMat != Mat4::getZero());
  771. RenderGraphBuilder& rgraph = *in.m_rgraph;
  772. U32 objCount = 0;
  773. switch(in.m_objectType)
  774. {
  775. case GpuSceneNonRenderableObjectType::kLight:
  776. objCount = GpuSceneArrays::Light::getSingleton().getElementCount();
  777. break;
  778. case GpuSceneNonRenderableObjectType::kDecal:
  779. objCount = GpuSceneArrays::Decal::getSingleton().getElementCount();
  780. break;
  781. case GpuSceneNonRenderableObjectType::kFogDensityVolume:
  782. objCount = GpuSceneArrays::FogDensityVolume::getSingleton().getElementCount();
  783. break;
  784. case GpuSceneNonRenderableObjectType::kGlobalIlluminationProbe:
  785. objCount = GpuSceneArrays::GlobalIlluminationProbe::getSingleton().getElementCount();
  786. break;
  787. case GpuSceneNonRenderableObjectType::kReflectionProbe:
  788. objCount = GpuSceneArrays::ReflectionProbe::getSingleton().getElementCount();
  789. break;
  790. default:
  791. ANKI_ASSERT(0);
  792. }
  793. if(objCount == 0)
  794. {
  795. WeakArray<U32> count;
  796. out.m_visiblesBuffer = RebarTransientMemoryPool::getSingleton().allocateStructuredBuffer(1, count);
  797. count[0] = 0;
  798. out.m_visiblesBufferHandle = rgraph.importBuffer(out.m_visiblesBuffer, BufferUsageBit::kNone);
  799. return;
  800. }
  801. if(in.m_cpuFeedbackBuffer.isValid())
  802. {
  803. ANKI_ASSERT(in.m_cpuFeedbackBuffer.getRange() == sizeof(U32) * (objCount * 2 + 1));
  804. }
  805. const Bool firstRunInFrame = m_lastFrameIdx != getRenderer().getFrameCount();
  806. if(firstRunInFrame)
  807. {
  808. // 1st run in this frame, do some bookkeeping
  809. m_lastFrameIdx = getRenderer().getFrameCount();
  810. m_counterBufferOffset = 0;
  811. m_counterBufferZeroingHandle = {};
  812. }
  813. U32 counterBufferElementSize;
  814. if(GrManager::getSingleton().getDeviceCapabilities().m_structuredBufferNaturalAlignment)
  815. {
  816. counterBufferElementSize = sizeof(GpuVisibilityNonRenderablesCounters);
  817. }
  818. else
  819. {
  820. counterBufferElementSize = getAlignedRoundUp(GrManager::getSingleton().getDeviceCapabilities().m_structuredBufferBindOffsetAlignment,
  821. U32(sizeof(GpuVisibilityNonRenderablesCounters)));
  822. }
  823. if(!m_counterBuffer.isCreated() || m_counterBufferOffset + counterBufferElementSize > m_counterBuffer->getSize()) [[unlikely]]
  824. {
  825. // Counter buffer not created or not big enough, create a new one
  826. BufferInitInfo buffInit("GpuVisibilityNonRenderablesCounters");
  827. buffInit.m_size = (m_counterBuffer.isCreated()) ? m_counterBuffer->getSize() * 2 : counterBufferElementSize * kInitialCounterArraySize;
  828. buffInit.m_usage = BufferUsageBit::kUavCompute | BufferUsageBit::kSrvCompute | BufferUsageBit::kCopyDestination;
  829. m_counterBuffer = GrManager::getSingleton().newBuffer(buffInit);
  830. m_counterBufferZeroingHandle = rgraph.importBuffer(BufferView(m_counterBuffer.get()), buffInit.m_usage);
  831. NonGraphicsRenderPass& pass =
  832. rgraph.newNonGraphicsRenderPass(generateTempPassName("Non-renderables vis: Clear counter buff: %s", in.m_passesName.cstr()));
  833. pass.newBufferDependency(m_counterBufferZeroingHandle, BufferUsageBit::kUavCompute);
  834. pass.setWork([counterBuffer = m_counterBuffer](RenderPassWorkContext& rgraph) {
  835. ANKI_TRACE_SCOPED_EVENT(GpuVisNonRenderablesSetup);
  836. fillBuffer(*rgraph.m_commandBuffer, BufferView(counterBuffer.get()), 0);
  837. });
  838. m_counterBufferOffset = 0;
  839. }
  840. else if(!firstRunInFrame)
  841. {
  842. m_counterBufferOffset += counterBufferElementSize;
  843. }
  844. // Allocate memory for the result
  845. out.m_visiblesBuffer = allocateStructuredBuffer<U32>(objCount + 1);
  846. out.m_visiblesBufferHandle = rgraph.importBuffer(out.m_visiblesBuffer, BufferUsageBit::kNone);
  847. // Create the renderpass
  848. NonGraphicsRenderPass& pass = rgraph.newNonGraphicsRenderPass(generateTempPassName("Non-renderables vis: %s", in.m_passesName.cstr()));
  849. pass.newBufferDependency(getRenderer().getGpuSceneBufferHandle(), BufferUsageBit::kSrvCompute);
  850. pass.newBufferDependency(out.m_visiblesBufferHandle, BufferUsageBit::kUavCompute);
  851. if(in.m_hzbRt)
  852. {
  853. pass.newTextureDependency(*in.m_hzbRt, TextureUsageBit::kSrvCompute);
  854. }
  855. if(m_counterBufferZeroingHandle.isValid()) [[unlikely]]
  856. {
  857. pass.newBufferDependency(m_counterBufferZeroingHandle, BufferUsageBit::kSrvCompute | BufferUsageBit::kUavCompute);
  858. }
  859. pass.setWork([this, objType = in.m_objectType, feedbackBuffer = in.m_cpuFeedbackBuffer, viewProjectionMat = in.m_viewProjectionMat,
  860. visibleIndicesBuffHandle = out.m_visiblesBufferHandle, counterBuffer = m_counterBuffer, counterBufferOffset = m_counterBufferOffset,
  861. objCount, hzbRt = (in.m_hzbRt) ? *in.m_hzbRt : RenderTargetHandle()](RenderPassWorkContext& rgraph) {
  862. ANKI_TRACE_SCOPED_EVENT(GpuVisNonRenderables);
  863. CommandBuffer& cmdb = *rgraph.m_commandBuffer;
  864. const Bool needsFeedback = feedbackBuffer.isValid();
  865. const Bool hasHzb = hzbRt.isValid();
  866. cmdb.bindShaderProgram(m_grProgs[hasHzb][objType][needsFeedback].get());
  867. BufferView objBuffer;
  868. switch(objType)
  869. {
  870. case GpuSceneNonRenderableObjectType::kLight:
  871. objBuffer = GpuSceneArrays::Light::getSingleton().getBufferView();
  872. break;
  873. case GpuSceneNonRenderableObjectType::kDecal:
  874. objBuffer = GpuSceneArrays::Decal::getSingleton().getBufferView();
  875. break;
  876. case GpuSceneNonRenderableObjectType::kFogDensityVolume:
  877. objBuffer = GpuSceneArrays::FogDensityVolume::getSingleton().getBufferView();
  878. break;
  879. case GpuSceneNonRenderableObjectType::kGlobalIlluminationProbe:
  880. objBuffer = GpuSceneArrays::GlobalIlluminationProbe::getSingleton().getBufferView();
  881. break;
  882. case GpuSceneNonRenderableObjectType::kReflectionProbe:
  883. objBuffer = GpuSceneArrays::ReflectionProbe::getSingleton().getBufferView();
  884. break;
  885. default:
  886. ANKI_ASSERT(0);
  887. }
  888. cmdb.bindSrv(0, 0, objBuffer);
  889. GpuVisibilityNonRenderableConstants consts;
  890. Array<Plane, 6> planes;
  891. extractClipPlanes(viewProjectionMat, planes);
  892. for(U32 i = 0; i < 6; ++i)
  893. {
  894. consts.m_clipPlanes[i] = Vec4(planes[i].getNormal().xyz, planes[i].getOffset());
  895. }
  896. consts.m_viewProjectionMat = viewProjectionMat;
  897. cmdb.setFastConstants(&consts, sizeof(consts));
  898. rgraph.bindUav(0, 0, visibleIndicesBuffHandle);
  899. cmdb.bindUav(1, 0, BufferView(counterBuffer.get(), counterBufferOffset, sizeof(GpuVisibilityNonRenderablesCounters)));
  900. if(needsFeedback)
  901. {
  902. cmdb.bindUav(2, 0, feedbackBuffer);
  903. }
  904. if(hasHzb)
  905. {
  906. rgraph.bindSrv(1, 0, hzbRt);
  907. cmdb.bindSampler(0, 0, getRenderer().getSamplers().m_nearestNearestClamp.get());
  908. }
  909. dispatchPPCompute(cmdb, 64, 1, objCount, 1);
  910. });
  911. }
  912. Error GpuVisibilityAccelerationStructures::init()
  913. {
  914. ANKI_CHECK(
  915. loadShaderProgram("ShaderBinaries/GpuVisibilityAccelerationStructures.ankiprogbin", {}, m_visibilityProg, m_visibilityGrProg, "Visibility"));
  916. ANKI_CHECK(loadShaderProgram("ShaderBinaries/GpuVisibilityAccelerationStructures.ankiprogbin", {}, m_visibilityProg,
  917. m_zeroRemainingInstancesGrProg, "ZeroRemainingInstances"));
  918. BufferInitInfo inf("GpuVisibilityAccelerationStructuresCounters");
  919. inf.m_size = sizeof(U32) * 2;
  920. inf.m_usage = BufferUsageBit::kUavCompute | BufferUsageBit::kSrvCompute | BufferUsageBit::kCopyDestination;
  921. m_counterBuffer = GrManager::getSingleton().newBuffer(inf);
  922. zeroBuffer(m_counterBuffer.get());
  923. return Error::kNone;
  924. }
  925. void GpuVisibilityAccelerationStructures::pupulateRenderGraph(GpuVisibilityAccelerationStructuresInput& in,
  926. GpuVisibilityAccelerationStructuresOutput& out)
  927. {
  928. in.validate();
  929. RenderGraphBuilder& rgraph = *in.m_rgraph;
  930. #if ANKI_ASSERTIONS_ENABLED
  931. ANKI_ASSERT(m_lastFrameIdx != getRenderer().getFrameCount());
  932. m_lastFrameIdx = getRenderer().getFrameCount();
  933. #endif
  934. const U32 aabbCount = GpuSceneArrays::RenderableBoundingVolumeRt::getSingleton().getElementCount();
  935. if(aabbCount == 0) [[unlikely]]
  936. {
  937. out.m_instancesBuffer = {};
  938. WeakArray<U32> arr2;
  939. out.m_renderablesBuffer = RebarTransientMemoryPool::getSingleton().allocateStructuredBuffer<U32>(1, arr2);
  940. arr2[0] = 0;
  941. WeakArray<DispatchIndirectArgs> arr3;
  942. out.m_buildSbtIndirectArgsBuffer = RebarTransientMemoryPool::getSingleton().allocateStructuredBuffer<DispatchIndirectArgs>(1, arr3);
  943. zeroMemory(arr3[0]);
  944. out.m_dependency = rgraph.importBuffer(out.m_renderablesBuffer, BufferUsageBit::kNone);
  945. return;
  946. }
  947. // Allocate the transient buffers
  948. out.m_instancesBuffer = allocateStructuredBuffer<AccelerationStructureInstance>(aabbCount);
  949. out.m_dependency = rgraph.importBuffer(out.m_instancesBuffer, BufferUsageBit::kNone);
  950. out.m_renderablesBuffer = allocateStructuredBuffer<LodAndRenderableIndex>(aabbCount + 1);
  951. const BufferView zeroInstancesAndSbtBuildDispatchArgsBuff = allocateStructuredBuffer<DispatchIndirectArgs>(2);
  952. out.m_buildSbtIndirectArgsBuffer = BufferView(zeroInstancesAndSbtBuildDispatchArgsBuff).incrementOffset(sizeof(DispatchIndirectArgs));
  953. // Create vis pass
  954. {
  955. NonGraphicsRenderPass& pass = rgraph.newNonGraphicsRenderPass(generateTempPassName("Accel vis: %s", in.m_passesName.cstr()));
  956. pass.newBufferDependency(getRenderer().getGpuSceneBufferHandle(), BufferUsageBit::kSrvCompute);
  957. pass.newBufferDependency(out.m_dependency, BufferUsageBit::kUavCompute);
  958. pass.setWork([this, viewProjMat = in.m_viewProjectionMatrix, lodDistances = in.m_lodDistances, pointOfTest = in.m_pointOfTest,
  959. testRadius = in.m_testRadius, instancesBuff = out.m_instancesBuffer, visRenderablesBuff = out.m_renderablesBuffer,
  960. zeroInstancesAndSbtBuildDispatchArgsBuff](RenderPassWorkContext& rgraph) {
  961. ANKI_TRACE_SCOPED_EVENT(GpuVisibilityAccelStruct);
  962. CommandBuffer& cmdb = *rgraph.m_commandBuffer;
  963. cmdb.bindShaderProgram(m_visibilityGrProg.get());
  964. GpuVisibilityAccelerationStructuresConstants consts;
  965. Array<Plane, 6> planes;
  966. extractClipPlanes(viewProjMat, planes);
  967. for(U32 i = 0; i < 6; ++i)
  968. {
  969. consts.m_clipPlanes[i] = Vec4(planes[i].getNormal().xyz, planes[i].getOffset());
  970. }
  971. consts.m_pointOfTest = pointOfTest;
  972. consts.m_testRadius = testRadius;
  973. ANKI_ASSERT(kMaxLodCount == 3);
  974. consts.m_maxLodDistances[0] = lodDistances[0];
  975. consts.m_maxLodDistances[1] = lodDistances[1];
  976. consts.m_maxLodDistances[2] = kMaxF32;
  977. consts.m_maxLodDistances[3] = kMaxF32;
  978. cmdb.setFastConstants(&consts, sizeof(consts));
  979. cmdb.bindSrv(0, 0, GpuSceneArrays::RenderableBoundingVolumeRt::getSingleton().getBufferView());
  980. cmdb.bindSrv(1, 0, GpuSceneArrays::Renderable::getSingleton().getBufferView());
  981. cmdb.bindSrv(2, 0, GpuSceneArrays::MeshLod::getSingleton().getBufferView());
  982. cmdb.bindSrv(3, 0, GpuSceneArrays::Transform::getSingleton().getBufferView());
  983. cmdb.bindUav(0, 0, instancesBuff);
  984. cmdb.bindUav(1, 0, visRenderablesBuff);
  985. cmdb.bindUav(2, 0, BufferView(m_counterBuffer.get()));
  986. cmdb.bindUav(3, 0, zeroInstancesAndSbtBuildDispatchArgsBuff);
  987. const U32 aabbCount = GpuSceneArrays::RenderableBoundingVolumeRt::getSingleton().getElementCount();
  988. dispatchPPCompute(cmdb, 64, 1, aabbCount, 1);
  989. });
  990. }
  991. // Zero remaining instances
  992. {
  993. NonGraphicsRenderPass& pass =
  994. rgraph.newNonGraphicsRenderPass(generateTempPassName("Accel vis zero remaining instances: %s", in.m_passesName.cstr()));
  995. pass.newBufferDependency(out.m_dependency, BufferUsageBit::kUavCompute | BufferUsageBit::kIndirectCompute);
  996. pass.setWork([this, zeroInstancesAndSbtBuildDispatchArgsBuff, instancesBuff = out.m_instancesBuffer,
  997. visRenderablesBuff = out.m_renderablesBuffer](RenderPassWorkContext& rgraph) {
  998. ANKI_TRACE_SCOPED_EVENT(GpuVisibilityAccelStructZero);
  999. CommandBuffer& cmdb = *rgraph.m_commandBuffer;
  1000. cmdb.bindShaderProgram(m_zeroRemainingInstancesGrProg.get());
  1001. cmdb.bindSrv(0, 0, visRenderablesBuff);
  1002. cmdb.bindUav(0, 0, instancesBuff);
  1003. cmdb.dispatchComputeIndirect(BufferView(zeroInstancesAndSbtBuildDispatchArgsBuff).setRange(sizeof(DispatchIndirectArgs)));
  1004. });
  1005. }
  1006. }
  1007. Error GpuVisibilityLocalLights::init()
  1008. {
  1009. const CString fname = "ShaderBinaries/GpuVisibilityLocalLights.ankiprogbin";
  1010. ANKI_CHECK(loadShaderProgram(fname, {}, m_visibilityProg, m_setupGrProg, "Setup"));
  1011. ANKI_CHECK(loadShaderProgram(fname, {}, m_visibilityProg, m_countGrProg, "Count"));
  1012. ANKI_CHECK(loadShaderProgram(fname, {}, m_visibilityProg, m_prefixSumGrProg, "PrefixSum"));
  1013. ANKI_CHECK(loadShaderProgram(fname, {}, m_visibilityProg, m_fillGrProg, "Fill"));
  1014. return Error::kNone;
  1015. }
  1016. void GpuVisibilityLocalLights::populateRenderGraph(GpuVisibilityLocalLightsInput& in, GpuVisibilityLocalLightsOutput& out)
  1017. {
  1018. RenderGraphBuilder& rgraph = *in.m_rgraph;
  1019. // Compute the bounds
  1020. const Vec3 newCamPos = in.m_cameraPosition + in.m_lookDirection * kForwardBias;
  1021. const Vec3 gridSize = Vec3(in.m_cellCounts) * in.m_cellSize;
  1022. out.m_lightGridMin = newCamPos - gridSize / 2.0f;
  1023. out.m_lightGridMax = out.m_lightGridMin + gridSize;
  1024. const U32 cellCount = in.m_cellCounts.x * in.m_cellCounts.y * in.m_cellCounts.z;
  1025. const BufferView lightIndexCountsPerCellBuff = allocateStructuredBuffer<U32>(cellCount);
  1026. const BufferView lightIndexOffsetsPerCellBuff = allocateStructuredBuffer<U32>(cellCount);
  1027. const BufferView lightIndexCountBuff = allocateStructuredBuffer<U32>(1);
  1028. const BufferView lightIndexListBuff = allocateStructuredBuffer<U32>(in.m_lightIndexListSize);
  1029. const BufferView threadgroupCountBuff = allocateStructuredBuffer<U32>(1);
  1030. constexpr U32 kPrefixSumThreadCount = 1024; // Common for most GPUs
  1031. constexpr U32 kPrefixSumElementCountPerThreadgroup = kPrefixSumThreadCount * 2;
  1032. const BufferView groupWidePrefixSumsBuff =
  1033. allocateStructuredBuffer<U32>((cellCount + kPrefixSumElementCountPerThreadgroup - 1) / kPrefixSumElementCountPerThreadgroup);
  1034. const BufferHandle dep = rgraph.importBuffer(lightIndexCountBuff, BufferUsageBit::kNone);
  1035. out.m_dependency = dep;
  1036. out.m_lightIndexListBuffer = lightIndexListBuff;
  1037. out.m_lightIndexCountsPerCellBuffer = lightIndexCountsPerCellBuff;
  1038. out.m_lightIndexOffsetsPerCellBuffer = lightIndexOffsetsPerCellBuff;
  1039. GpuVisibilityLocalLightsConsts consts;
  1040. consts.m_cellSize = in.m_cellSize;
  1041. consts.m_maxLightIndices = in.m_lightIndexListSize;
  1042. consts.m_gridVolumeMin = out.m_lightGridMin;
  1043. consts.m_gridVolumeSize = gridSize;
  1044. consts.m_cellCounts = in.m_cellCounts;
  1045. consts.m_cellCount = cellCount;
  1046. // Setup
  1047. {
  1048. NonGraphicsRenderPass& pass = rgraph.newNonGraphicsRenderPass(generateTempPassName("GPU local light vis setup: %s", in.m_passesName.cstr()));
  1049. pass.newBufferDependency(dep, BufferUsageBit::kUavCompute);
  1050. pass.setWork([this, lightIndexCountsPerCellBuff, lightIndexCountBuff, cellCount, threadgroupCountBuff,
  1051. groupWidePrefixSumsBuff](RenderPassWorkContext& rgraph) {
  1052. ANKI_TRACE_SCOPED_EVENT(GpuVisibilityLocalLightsSetup);
  1053. CommandBuffer& cmdb = *rgraph.m_commandBuffer;
  1054. cmdb.bindShaderProgram(m_setupGrProg.get());
  1055. cmdb.bindUav(0, 0, lightIndexCountsPerCellBuff);
  1056. cmdb.bindUav(1, 0, lightIndexCountBuff);
  1057. cmdb.bindUav(2, 0, groupWidePrefixSumsBuff);
  1058. cmdb.bindUav(3, 0, threadgroupCountBuff);
  1059. dispatchPPCompute(cmdb, 64, 1, cellCount, 1);
  1060. });
  1061. }
  1062. // Count
  1063. const GpuSceneArrays::Light& lights = GpuSceneArrays::Light::getSingleton();
  1064. if(lights.getElementCount())
  1065. {
  1066. NonGraphicsRenderPass& pass = rgraph.newNonGraphicsRenderPass(generateTempPassName("GPU local light vis count: %s", in.m_passesName.cstr()));
  1067. pass.newBufferDependency(dep, BufferUsageBit::kUavCompute);
  1068. pass.newBufferDependency(getRenderer().getGpuSceneBufferHandle(), BufferUsageBit::kSrvCompute);
  1069. pass.setWork([this, lightIndexCountsPerCellBuff, lightIndexCountBuff, consts, threadgroupCountBuff,
  1070. groupWidePrefixSumsBuff](RenderPassWorkContext& rgraph) {
  1071. ANKI_TRACE_SCOPED_EVENT(GpuVisibilityLocalLightsCount);
  1072. const GpuSceneArrays::Light& lights = GpuSceneArrays::Light::getSingleton();
  1073. CommandBuffer& cmdb = *rgraph.m_commandBuffer;
  1074. cmdb.bindShaderProgram(m_countGrProg.get());
  1075. cmdb.bindSrv(0, 0, lights.getBufferView());
  1076. cmdb.bindUav(0, 0, lightIndexCountsPerCellBuff);
  1077. cmdb.bindUav(1, 0, lightIndexCountBuff);
  1078. cmdb.bindUav(2, 0, groupWidePrefixSumsBuff);
  1079. cmdb.bindUav(3, 0, threadgroupCountBuff);
  1080. cmdb.setFastConstants(&consts, sizeof(consts));
  1081. dispatchPPCompute(cmdb, 64, 1, consts.m_cellCount, 1);
  1082. });
  1083. }
  1084. // PrefixSum
  1085. {
  1086. NonGraphicsRenderPass& pass =
  1087. rgraph.newNonGraphicsRenderPass(generateTempPassName("GPU local light vis prefix sum: %s", in.m_passesName.cstr()));
  1088. pass.newBufferDependency(dep, BufferUsageBit::kUavCompute);
  1089. pass.setWork([this, lightIndexCountsPerCellBuff, lightIndexOffsetsPerCellBuff, lightIndexCountBuff, consts,
  1090. groupWidePrefixSumsBuff](RenderPassWorkContext& rgraph) {
  1091. ANKI_TRACE_SCOPED_EVENT(GpuVisibilityLocalLightsPrefixSum);
  1092. CommandBuffer& cmdb = *rgraph.m_commandBuffer;
  1093. cmdb.bindShaderProgram(m_prefixSumGrProg.get());
  1094. cmdb.bindSrv(0, 0, groupWidePrefixSumsBuff);
  1095. cmdb.bindUav(0, 0, lightIndexCountsPerCellBuff);
  1096. cmdb.bindUav(1, 0, lightIndexOffsetsPerCellBuff);
  1097. cmdb.bindUav(2, 0, lightIndexCountBuff);
  1098. cmdb.setFastConstants(&consts, sizeof(consts));
  1099. cmdb.dispatchCompute((consts.m_cellCount + kPrefixSumElementCountPerThreadgroup - 1) / kPrefixSumElementCountPerThreadgroup, 1, 1);
  1100. });
  1101. }
  1102. // Fill
  1103. if(lights.getElementCount())
  1104. {
  1105. NonGraphicsRenderPass& pass = rgraph.newNonGraphicsRenderPass(generateTempPassName("GPU local light vis fill: %s", in.m_passesName.cstr()));
  1106. pass.newBufferDependency(dep, BufferUsageBit::kUavCompute);
  1107. pass.setWork([this, lightIndexCountsPerCellBuff, lightIndexOffsetsPerCellBuff, lightIndexCountBuff, consts,
  1108. lightIndexListBuff](RenderPassWorkContext& rgraph) {
  1109. ANKI_TRACE_SCOPED_EVENT(GpuVisibilityLocalLightsPrefixSum);
  1110. const GpuSceneArrays::Light& lights = GpuSceneArrays::Light::getSingleton();
  1111. CommandBuffer& cmdb = *rgraph.m_commandBuffer;
  1112. cmdb.bindShaderProgram(m_fillGrProg.get());
  1113. cmdb.bindSrv(0, 0, lights.getBufferView());
  1114. cmdb.bindSrv(1, 0, lightIndexOffsetsPerCellBuff);
  1115. cmdb.bindUav(0, 0, lightIndexCountBuff);
  1116. cmdb.bindUav(1, 0, lightIndexCountsPerCellBuff);
  1117. cmdb.bindUav(2, 0, lightIndexListBuff);
  1118. cmdb.setFastConstants(&consts, sizeof(consts));
  1119. dispatchPPCompute(cmdb, 64, 1, consts.m_cellCount, 1);
  1120. });
  1121. }
  1122. }
  1123. } // end namespace anki