GpuVisibility.cpp 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587
  1. // Copyright (C) 2009-2023, Panagiotis Christopoulos Charitos and contributors.
  2. // All rights reserved.
  3. // Code licensed under the BSD License.
  4. // http://www.anki3d.org/LICENSE
  5. #include <AnKi/Renderer/Utils/GpuVisibility.h>
  6. #include <AnKi/Renderer/Renderer.h>
  7. #include <AnKi/Scene/RenderStateBucket.h>
  8. #include <AnKi/Scene/GpuSceneArray.h>
  9. #include <AnKi/Core/GpuMemory/GpuVisibleTransientMemoryPool.h>
  10. #include <AnKi/Core/GpuMemory/RebarTransientMemoryPool.h>
  11. #include <AnKi/Core/GpuMemory/GpuSceneBuffer.h>
  12. #include <AnKi/Collision/Functions.h>
  13. #include <AnKi/Shaders/Include/GpuVisibilityTypes.h>
  14. #include <AnKi/Core/StatsSet.h>
  15. namespace anki {
  16. Error GpuVisibility::init()
  17. {
  18. for(MutatorValue hzb = 0; hzb < 2; ++hzb)
  19. {
  20. for(MutatorValue gatherAabbs = 0; gatherAabbs < 2; ++gatherAabbs)
  21. {
  22. for(MutatorValue genHash = 0; genHash < 2; ++genHash)
  23. {
  24. ANKI_CHECK(loadShaderProgram(
  25. "ShaderBinaries/GpuVisibility.ankiprogbin",
  26. Array<SubMutation, 4>{{{"HZB_TEST", hzb}, {"DISTANCE_TEST", 0}, {"GATHER_AABBS", gatherAabbs}, {"HASH_VISIBLES", genHash}}},
  27. m_prog, m_frustumGrProgs[hzb][gatherAabbs][genHash]));
  28. }
  29. }
  30. }
  31. for(MutatorValue gatherAabbs = 0; gatherAabbs < 2; ++gatherAabbs)
  32. {
  33. for(MutatorValue genHash = 0; genHash < 2; ++genHash)
  34. {
  35. ANKI_CHECK(loadShaderProgram(
  36. "ShaderBinaries/GpuVisibility.ankiprogbin",
  37. Array<SubMutation, 4>{{{"HZB_TEST", 0}, {"DISTANCE_TEST", 1}, {"GATHER_AABBS", gatherAabbs}, {"HASH_VISIBLES", genHash}}}, m_prog,
  38. m_distGrProgs[gatherAabbs][genHash]));
  39. }
  40. }
  41. return Error::kNone;
  42. }
  43. void GpuVisibility::populateRenderGraphInternal(Bool distanceBased, BaseGpuVisibilityInput& in, GpuVisibilityOutput& out)
  44. {
  45. ANKI_ASSERT(in.m_lodReferencePoint.x() != kMaxF32);
  46. class DistanceTestData
  47. {
  48. public:
  49. Vec3 m_pointOfTest;
  50. F32 m_testRadius;
  51. };
  52. class FrustumTestData
  53. {
  54. public:
  55. RenderTargetHandle m_hzbRt;
  56. Mat4 m_viewProjMat;
  57. };
  58. FrustumTestData* frustumTestData = nullptr;
  59. DistanceTestData* distTestData = nullptr;
  60. if(distanceBased)
  61. {
  62. distTestData = newInstance<DistanceTestData>(getRenderer().getFrameMemoryPool());
  63. const DistanceGpuVisibilityInput& din = static_cast<DistanceGpuVisibilityInput&>(in);
  64. distTestData->m_pointOfTest = din.m_pointOfTest;
  65. distTestData->m_testRadius = din.m_testRadius;
  66. }
  67. else
  68. {
  69. frustumTestData = newInstance<FrustumTestData>(getRenderer().getFrameMemoryPool());
  70. const FrustumGpuVisibilityInput& fin = static_cast<FrustumGpuVisibilityInput&>(in);
  71. frustumTestData->m_viewProjMat = fin.m_viewProjectionMatrix;
  72. }
  73. U32 aabbCount = 0;
  74. switch(in.m_technique)
  75. {
  76. case RenderingTechnique::kGBuffer:
  77. aabbCount = GpuSceneArrays::RenderableBoundingVolumeGBuffer::getSingleton().getElementCount();
  78. break;
  79. case RenderingTechnique::kDepth:
  80. aabbCount = GpuSceneArrays::RenderableBoundingVolumeDepth::getSingleton().getElementCount();
  81. break;
  82. case RenderingTechnique::kForward:
  83. aabbCount = GpuSceneArrays::RenderableBoundingVolumeForward::getSingleton().getElementCount();
  84. break;
  85. default:
  86. ANKI_ASSERT(0);
  87. }
  88. if(aabbCount == 0) [[unlikely]]
  89. {
  90. // Early exit
  91. out.m_instanceRateRenderablesBuffer = GpuVisibleTransientMemoryPool::getSingleton().allocate(1 * sizeof(GpuSceneRenderable));
  92. out.m_drawIndexedIndirectArgsBuffer = GpuVisibleTransientMemoryPool::getSingleton().allocate(1 * sizeof(DrawIndexedIndirectArgs));
  93. U32* atomics;
  94. out.m_mdiDrawCountsBuffer = RebarTransientMemoryPool::getSingleton().allocateFrame<U32>(1, atomics);
  95. atomics[0] = 0;
  96. out.m_someBufferHandle = in.m_rgraph->importBuffer(BufferUsageBit::kNone, out.m_mdiDrawCountsBuffer);
  97. if(in.m_gatherAabbIndices)
  98. {
  99. U32* atomic;
  100. out.m_visibleAaabbIndicesBuffer = RebarTransientMemoryPool::getSingleton().allocateFrame<U32>(1, atomic);
  101. atomic[0] = 0;
  102. }
  103. return;
  104. }
  105. const U32 bucketCount = RenderStateBucketContainer::getSingleton().getBucketCount(in.m_technique);
  106. // Allocate memory for the indirect commands
  107. out.m_drawIndexedIndirectArgsBuffer = GpuVisibleTransientMemoryPool::getSingleton().allocate(aabbCount * sizeof(DrawIndexedIndirectArgs));
  108. out.m_instanceRateRenderablesBuffer = GpuVisibleTransientMemoryPool::getSingleton().allocate(aabbCount * sizeof(GpuSceneRenderableVertex));
  109. // Allocate memory for AABB indices
  110. if(in.m_gatherAabbIndices)
  111. {
  112. out.m_visibleAaabbIndicesBuffer = GpuVisibleTransientMemoryPool::getSingleton().allocate((aabbCount + 1) * sizeof(U32));
  113. }
  114. // Allocate memory for counters
  115. PtrSize counterMemory = 0;
  116. if(in.m_hashVisibles)
  117. {
  118. counterMemory += sizeof(GpuVisibilityHash);
  119. alignRoundUp(GrManager::getSingleton().getDeviceCapabilities().m_uavBufferBindOffsetAlignment, counterMemory);
  120. }
  121. const PtrSize mdiBufferOffset = counterMemory;
  122. const PtrSize mdiBufferSize = sizeof(U32) * bucketCount;
  123. counterMemory += mdiBufferSize;
  124. alignRoundUp(GrManager::getSingleton().getDeviceCapabilities().m_uavBufferBindOffsetAlignment, counterMemory);
  125. const BufferOffsetRange counterBuffer = GpuVisibleTransientMemoryPool::getSingleton().allocate(counterMemory);
  126. const BufferHandle counterBufferHandle = in.m_rgraph->importBuffer(BufferUsageBit::kNone, counterBuffer);
  127. out.m_someBufferHandle = counterBufferHandle;
  128. if(in.m_hashVisibles)
  129. {
  130. out.m_visiblesHashBuffer = {counterBuffer.m_buffer, counterBuffer.m_offset, sizeof(GpuVisibilityHash)};
  131. }
  132. // Zero some stuff
  133. {
  134. ComputeRenderPassDescription& pass = in.m_rgraph->newComputeRenderPass("GPU visibility: Zero stuff");
  135. pass.newBufferDependency(counterBufferHandle, BufferUsageBit::kTransferDestination);
  136. pass.setWork([counterBuffer, visibleAaabbIndicesBuffer = out.m_visibleAaabbIndicesBuffer](RenderPassWorkContext& rpass) {
  137. rpass.m_commandBuffer->fillBuffer(counterBuffer.m_buffer, counterBuffer.m_offset, counterBuffer.m_range, 0);
  138. if(visibleAaabbIndicesBuffer.m_buffer)
  139. {
  140. rpass.m_commandBuffer->fillBuffer(visibleAaabbIndicesBuffer.m_buffer, visibleAaabbIndicesBuffer.m_offset, sizeof(U32), 0);
  141. }
  142. });
  143. }
  144. // Set the MDI count buffer
  145. out.m_mdiDrawCountsBuffer = {counterBuffer.m_buffer, counterBuffer.m_offset + mdiBufferOffset, mdiBufferSize};
  146. // Create the renderpass
  147. ComputeRenderPassDescription& pass = in.m_rgraph->newComputeRenderPass(in.m_passesName);
  148. pass.newBufferDependency(getRenderer().getGpuSceneBufferHandle(), BufferUsageBit::kUavComputeRead);
  149. pass.newBufferDependency(counterBufferHandle, BufferUsageBit::kUavComputeWrite);
  150. if(!distanceBased && static_cast<FrustumGpuVisibilityInput&>(in).m_hzbRt)
  151. {
  152. frustumTestData->m_hzbRt = *static_cast<FrustumGpuVisibilityInput&>(in).m_hzbRt;
  153. pass.newTextureDependency(frustumTestData->m_hzbRt, TextureUsageBit::kSampledCompute);
  154. }
  155. pass.setWork([this, frustumTestData, distTestData, lodReferencePoint = in.m_lodReferencePoint, lodDistances = in.m_lodDistances,
  156. technique = in.m_technique, mdiDrawCountsBuffer = out.m_mdiDrawCountsBuffer,
  157. instanceRateRenderables = out.m_instanceRateRenderablesBuffer, indirectArgs = out.m_drawIndexedIndirectArgsBuffer, aabbCount,
  158. visibleAabbsBuffer = out.m_visibleAaabbIndicesBuffer, hashBuffer = out.m_visiblesHashBuffer](RenderPassWorkContext& rpass) {
  159. CommandBuffer& cmdb = *rpass.m_commandBuffer;
  160. const Bool gatherAabbIndices = visibleAabbsBuffer.m_buffer != nullptr;
  161. const Bool genHash = hashBuffer.m_buffer != nullptr;
  162. if(frustumTestData)
  163. {
  164. cmdb.bindShaderProgram(m_frustumGrProgs[frustumTestData->m_hzbRt.isValid()][gatherAabbIndices][genHash].get());
  165. }
  166. else
  167. {
  168. cmdb.bindShaderProgram(m_distGrProgs[gatherAabbIndices][genHash].get());
  169. }
  170. BufferOffsetRange aabbsBuffer;
  171. switch(technique)
  172. {
  173. case RenderingTechnique::kGBuffer:
  174. aabbsBuffer = GpuSceneArrays::RenderableBoundingVolumeGBuffer::getSingleton().getBufferOffsetRange();
  175. break;
  176. case RenderingTechnique::kDepth:
  177. aabbsBuffer = GpuSceneArrays::RenderableBoundingVolumeDepth::getSingleton().getBufferOffsetRange();
  178. break;
  179. case RenderingTechnique::kForward:
  180. aabbsBuffer = GpuSceneArrays::RenderableBoundingVolumeForward::getSingleton().getBufferOffsetRange();
  181. break;
  182. default:
  183. ANKI_ASSERT(0);
  184. }
  185. cmdb.bindUavBuffer(0, 0, aabbsBuffer);
  186. cmdb.bindUavBuffer(0, 1, GpuSceneArrays::Renderable::getSingleton().getBufferOffsetRange());
  187. cmdb.bindUavBuffer(0, 2, &GpuSceneBuffer::getSingleton().getBuffer(), 0, kMaxPtrSize);
  188. cmdb.bindUavBuffer(0, 3, instanceRateRenderables);
  189. cmdb.bindUavBuffer(0, 4, indirectArgs);
  190. U32* offsets = allocateAndBindUav<U32>(cmdb, 0, 5, RenderStateBucketContainer::getSingleton().getBucketCount(technique));
  191. U32 bucketCount = 0;
  192. U32 userCount = 0;
  193. RenderStateBucketContainer::getSingleton().iterateBuckets(technique, [&](const RenderStateInfo&, U32 userCount_) {
  194. offsets[bucketCount] = userCount;
  195. userCount += userCount_;
  196. ++bucketCount;
  197. });
  198. ANKI_ASSERT(userCount == RenderStateBucketContainer::getSingleton().getBucketsItemCount(technique));
  199. cmdb.bindUavBuffer(0, 6, mdiDrawCountsBuffer);
  200. if(frustumTestData)
  201. {
  202. FrustumGpuVisibilityConstants* unis = allocateAndBindConstants<FrustumGpuVisibilityConstants>(cmdb, 0, 7);
  203. Array<Plane, 6> planes;
  204. extractClipPlanes(frustumTestData->m_viewProjMat, planes);
  205. for(U32 i = 0; i < 6; ++i)
  206. {
  207. unis->m_clipPlanes[i] = Vec4(planes[i].getNormal().xyz(), planes[i].getOffset());
  208. }
  209. ANKI_ASSERT(kMaxLodCount == 3);
  210. unis->m_maxLodDistances[0] = lodDistances[0];
  211. unis->m_maxLodDistances[1] = lodDistances[1];
  212. unis->m_maxLodDistances[2] = kMaxF32;
  213. unis->m_maxLodDistances[3] = kMaxF32;
  214. unis->m_lodReferencePoint = lodReferencePoint;
  215. unis->m_viewProjectionMat = frustumTestData->m_viewProjMat;
  216. if(frustumTestData->m_hzbRt.isValid())
  217. {
  218. rpass.bindColorTexture(0, 8, frustumTestData->m_hzbRt);
  219. cmdb.bindSampler(0, 9, getRenderer().getSamplers().m_nearestNearestClamp.get());
  220. }
  221. }
  222. else
  223. {
  224. DistanceGpuVisibilityConstants unis;
  225. unis.m_pointOfTest = distTestData->m_pointOfTest;
  226. unis.m_testRadius = distTestData->m_testRadius;
  227. unis.m_maxLodDistances[0] = lodDistances[0];
  228. unis.m_maxLodDistances[1] = lodDistances[1];
  229. unis.m_maxLodDistances[2] = kMaxF32;
  230. unis.m_maxLodDistances[3] = kMaxF32;
  231. unis.m_lodReferencePoint = lodReferencePoint;
  232. cmdb.setPushConstants(&unis, sizeof(unis));
  233. }
  234. if(gatherAabbIndices)
  235. {
  236. cmdb.bindUavBuffer(0, 12, visibleAabbsBuffer);
  237. }
  238. if(genHash)
  239. {
  240. cmdb.bindUavBuffer(0, 13, hashBuffer);
  241. }
  242. dispatchPPCompute(cmdb, 64, 1, aabbCount, 1);
  243. });
  244. }
  245. Error GpuVisibilityNonRenderables::init()
  246. {
  247. ANKI_CHECK(ResourceManager::getSingleton().loadResource("ShaderBinaries/GpuVisibilityNonRenderables.ankiprogbin", m_prog));
  248. for(U32 hzb = 0; hzb < 2; ++hzb)
  249. {
  250. for(GpuSceneNonRenderableObjectType type : EnumIterable<GpuSceneNonRenderableObjectType>())
  251. {
  252. for(U32 cpuFeedback = 0; cpuFeedback < 2; ++cpuFeedback)
  253. {
  254. ShaderProgramResourceVariantInitInfo variantInit(m_prog);
  255. variantInit.addMutation("HZB_TEST", hzb);
  256. variantInit.addMutation("OBJECT_TYPE", U32(type));
  257. variantInit.addMutation("CPU_FEEDBACK", cpuFeedback);
  258. const ShaderProgramResourceVariant* variant;
  259. m_prog->getOrCreateVariant(variantInit, variant);
  260. if(variant)
  261. {
  262. m_grProgs[hzb][type][cpuFeedback].reset(&variant->getProgram());
  263. }
  264. else
  265. {
  266. m_grProgs[hzb][type][cpuFeedback].reset(nullptr);
  267. }
  268. }
  269. }
  270. }
  271. return Error::kNone;
  272. }
  273. void GpuVisibilityNonRenderables::populateRenderGraph(GpuVisibilityNonRenderablesInput& in, GpuVisibilityNonRenderablesOutput& out)
  274. {
  275. ANKI_ASSERT(in.m_viewProjectionMat != Mat4::getZero());
  276. RenderGraphDescription& rgraph = *in.m_rgraph;
  277. U32 objCount = 0;
  278. switch(in.m_objectType)
  279. {
  280. case GpuSceneNonRenderableObjectType::kLight:
  281. objCount = GpuSceneArrays::Light::getSingleton().getElementCount();
  282. break;
  283. case GpuSceneNonRenderableObjectType::kDecal:
  284. objCount = GpuSceneArrays::Decal::getSingleton().getElementCount();
  285. break;
  286. case GpuSceneNonRenderableObjectType::kFogDensityVolume:
  287. objCount = GpuSceneArrays::FogDensityVolume::getSingleton().getElementCount();
  288. break;
  289. case GpuSceneNonRenderableObjectType::kGlobalIlluminationProbe:
  290. objCount = GpuSceneArrays::GlobalIlluminationProbe::getSingleton().getElementCount();
  291. break;
  292. case GpuSceneNonRenderableObjectType::kReflectionProbe:
  293. objCount = GpuSceneArrays::ReflectionProbe::getSingleton().getElementCount();
  294. break;
  295. default:
  296. ANKI_ASSERT(0);
  297. }
  298. if(objCount == 0)
  299. {
  300. U32* count;
  301. out.m_visiblesBuffer = RebarTransientMemoryPool::getSingleton().allocateFrame(sizeof(U32), count);
  302. *count = 0;
  303. out.m_visiblesBufferHandle = rgraph.importBuffer(BufferUsageBit::kNone, out.m_visiblesBuffer);
  304. return;
  305. }
  306. if(in.m_cpuFeedbackBuffer.m_buffer)
  307. {
  308. ANKI_ASSERT(in.m_cpuFeedbackBuffer.m_range == sizeof(U32) * (objCount * 2 + 1));
  309. }
  310. const Bool firstRunInFrame = m_lastFrameIdx != getRenderer().getFrameCount();
  311. if(firstRunInFrame)
  312. {
  313. // 1st run in this frame, do some bookkeeping
  314. m_lastFrameIdx = getRenderer().getFrameCount();
  315. m_counterBufferOffset = 0;
  316. m_counterBufferZeroingHandle = {};
  317. }
  318. constexpr U32 kCountersPerDispatch = 3; // 1 for the threadgroup, 1 for the visbile object count and 1 for objects with feedback
  319. const U32 counterBufferElementSize =
  320. getAlignedRoundUp(GrManager::getSingleton().getDeviceCapabilities().m_uavBufferBindOffsetAlignment, U32(kCountersPerDispatch * sizeof(U32)));
  321. if(!m_counterBuffer.isCreated() || m_counterBufferOffset + counterBufferElementSize > m_counterBuffer->getSize()) [[unlikely]]
  322. {
  323. // Counter buffer not created or not big enough, create a new one
  324. BufferInitInfo buffInit("GpuVisibilityNonRenderablesCounters");
  325. buffInit.m_size = (m_counterBuffer.isCreated()) ? m_counterBuffer->getSize() * 2
  326. : kCountersPerDispatch * counterBufferElementSize * kInitialCounterArraySize;
  327. buffInit.m_usage = BufferUsageBit::kUavComputeWrite | BufferUsageBit::kUavComputeRead | BufferUsageBit::kTransferDestination;
  328. m_counterBuffer = GrManager::getSingleton().newBuffer(buffInit);
  329. m_counterBufferZeroingHandle = rgraph.importBuffer(m_counterBuffer.get(), buffInit.m_usage, 0, kMaxPtrSize);
  330. ComputeRenderPassDescription& pass = rgraph.newComputeRenderPass("GpuVisibilityNonRenderablesClearCounterBuffer");
  331. pass.newBufferDependency(m_counterBufferZeroingHandle, BufferUsageBit::kTransferDestination);
  332. pass.setWork([counterBuffer = m_counterBuffer](RenderPassWorkContext& rgraph) {
  333. rgraph.m_commandBuffer->fillBuffer(counterBuffer.get(), 0, kMaxPtrSize, 0);
  334. });
  335. m_counterBufferOffset = 0;
  336. }
  337. else if(!firstRunInFrame)
  338. {
  339. m_counterBufferOffset += counterBufferElementSize;
  340. }
  341. // Allocate memory for the result
  342. out.m_visiblesBuffer = GpuVisibleTransientMemoryPool::getSingleton().allocate((objCount + 1) * sizeof(U32));
  343. out.m_visiblesBufferHandle = rgraph.importBuffer(BufferUsageBit::kNone, out.m_visiblesBuffer);
  344. // Create the renderpass
  345. ComputeRenderPassDescription& pass = rgraph.newComputeRenderPass(in.m_passesName);
  346. pass.newBufferDependency(getRenderer().getGpuSceneBufferHandle(), BufferUsageBit::kUavComputeRead);
  347. pass.newBufferDependency(out.m_visiblesBufferHandle, BufferUsageBit::kUavComputeWrite);
  348. if(in.m_hzbRt)
  349. {
  350. pass.newTextureDependency(*in.m_hzbRt, TextureUsageBit::kSampledCompute);
  351. }
  352. if(m_counterBufferZeroingHandle.isValid()) [[unlikely]]
  353. {
  354. pass.newBufferDependency(m_counterBufferZeroingHandle, BufferUsageBit::kUavComputeRead | BufferUsageBit::kUavComputeWrite);
  355. }
  356. pass.setWork([this, objType = in.m_objectType, feedbackBuffer = in.m_cpuFeedbackBuffer, viewProjectionMat = in.m_viewProjectionMat,
  357. visibleIndicesBuffHandle = out.m_visiblesBufferHandle, counterBuffer = m_counterBuffer, counterBufferOffset = m_counterBufferOffset,
  358. objCount](RenderPassWorkContext& rgraph) {
  359. CommandBuffer& cmdb = *rgraph.m_commandBuffer;
  360. const Bool needsFeedback = feedbackBuffer.m_buffer != nullptr;
  361. cmdb.bindShaderProgram(m_grProgs[0][objType][needsFeedback].get());
  362. BufferOffsetRange objBuffer;
  363. switch(objType)
  364. {
  365. case GpuSceneNonRenderableObjectType::kLight:
  366. objBuffer = GpuSceneArrays::Light::getSingleton().getBufferOffsetRange();
  367. break;
  368. case GpuSceneNonRenderableObjectType::kDecal:
  369. objBuffer = GpuSceneArrays::Decal::getSingleton().getBufferOffsetRange();
  370. break;
  371. case GpuSceneNonRenderableObjectType::kFogDensityVolume:
  372. objBuffer = GpuSceneArrays::FogDensityVolume::getSingleton().getBufferOffsetRange();
  373. break;
  374. case GpuSceneNonRenderableObjectType::kGlobalIlluminationProbe:
  375. objBuffer = GpuSceneArrays::GlobalIlluminationProbe::getSingleton().getBufferOffsetRange();
  376. break;
  377. case GpuSceneNonRenderableObjectType::kReflectionProbe:
  378. objBuffer = GpuSceneArrays::ReflectionProbe::getSingleton().getBufferOffsetRange();
  379. break;
  380. default:
  381. ANKI_ASSERT(0);
  382. }
  383. cmdb.bindUavBuffer(0, 0, objBuffer);
  384. GpuVisibilityNonRenderableConstants unis;
  385. Array<Plane, 6> planes;
  386. extractClipPlanes(viewProjectionMat, planes);
  387. for(U32 i = 0; i < 6; ++i)
  388. {
  389. unis.m_clipPlanes[i] = Vec4(planes[i].getNormal().xyz(), planes[i].getOffset());
  390. }
  391. cmdb.setPushConstants(&unis, sizeof(unis));
  392. rgraph.bindUavBuffer(0, 1, visibleIndicesBuffHandle);
  393. cmdb.bindUavBuffer(0, 2, counterBuffer.get(), counterBufferOffset, sizeof(U32) * kCountersPerDispatch);
  394. if(needsFeedback)
  395. {
  396. cmdb.bindUavBuffer(0, 3, feedbackBuffer.m_buffer, feedbackBuffer.m_offset, feedbackBuffer.m_range);
  397. }
  398. dispatchPPCompute(cmdb, 64, 1, objCount, 1);
  399. });
  400. }
  401. Error GpuVisibilityAccelerationStructures::init()
  402. {
  403. ANKI_CHECK(loadShaderProgram("ShaderBinaries/GpuVisibilityAccelerationStructures.ankiprogbin", m_visibilityProg, m_visibilityGrProg));
  404. ANKI_CHECK(loadShaderProgram("ShaderBinaries/GpuVisibilityAccelerationStructuresZeroRemainingInstances.ankiprogbin", m_zeroRemainingInstancesProg,
  405. m_zeroRemainingInstancesGrProg));
  406. BufferInitInfo inf("GpuVisibilityAccelerationStructuresCounters");
  407. inf.m_size = sizeof(U32) * 2;
  408. inf.m_usage = BufferUsageBit::kUavComputeWrite | BufferUsageBit::kUavComputeRead | BufferUsageBit::kTransferDestination;
  409. m_counterBuffer = GrManager::getSingleton().newBuffer(inf);
  410. zeroBuffer(m_counterBuffer.get());
  411. return Error::kNone;
  412. }
  413. void GpuVisibilityAccelerationStructures::pupulateRenderGraph(GpuVisibilityAccelerationStructuresInput& in,
  414. GpuVisibilityAccelerationStructuresOutput& out)
  415. {
  416. in.validate();
  417. RenderGraphDescription& rgraph = *in.m_rgraph;
  418. #if ANKI_ASSERTIONS_ENABLED
  419. ANKI_ASSERT(m_lastFrameIdx != getRenderer().getFrameCount());
  420. m_lastFrameIdx = getRenderer().getFrameCount();
  421. #endif
  422. // Allocate the transient buffers
  423. const U32 aabbCount = GpuSceneArrays::RenderableBoundingVolumeRt::getSingleton().getElementCount();
  424. out.m_instancesBuffer = GpuVisibleTransientMemoryPool::getSingleton().allocate(aabbCount * sizeof(AccelerationStructureInstance));
  425. out.m_someBufferHandle = rgraph.importBuffer(BufferUsageBit::kUavComputeWrite, out.m_instancesBuffer);
  426. out.m_renderableIndicesBuffer = GpuVisibleTransientMemoryPool::getSingleton().allocate((aabbCount + 1) * sizeof(U32));
  427. const BufferOffsetRange zeroInstancesDispatchArgsBuff = GpuVisibleTransientMemoryPool::getSingleton().allocate(sizeof(DispatchIndirectArgs));
  428. // Create vis pass
  429. {
  430. ComputeRenderPassDescription& pass = rgraph.newComputeRenderPass(in.m_passesName);
  431. pass.newBufferDependency(getRenderer().getGpuSceneBufferHandle(), BufferUsageBit::kUavComputeRead);
  432. pass.newBufferDependency(out.m_someBufferHandle, BufferUsageBit::kUavComputeWrite);
  433. pass.setWork([this, viewProjMat = in.m_viewProjectionMatrix, lodDistances = in.m_lodDistances, pointOfTest = in.m_pointOfTest,
  434. testRadius = in.m_testRadius, instancesBuff = out.m_instancesBuffer, indicesBuff = out.m_renderableIndicesBuffer,
  435. zeroInstancesDispatchArgsBuff](RenderPassWorkContext& rgraph) {
  436. CommandBuffer& cmdb = *rgraph.m_commandBuffer;
  437. cmdb.bindShaderProgram(m_visibilityGrProg.get());
  438. GpuVisibilityAccelerationStructuresConstants unis;
  439. Array<Plane, 6> planes;
  440. extractClipPlanes(viewProjMat, planes);
  441. for(U32 i = 0; i < 6; ++i)
  442. {
  443. unis.m_clipPlanes[i] = Vec4(planes[i].getNormal().xyz(), planes[i].getOffset());
  444. }
  445. unis.m_pointOfTest = pointOfTest;
  446. unis.m_testRadius = testRadius;
  447. ANKI_ASSERT(kMaxLodCount == 3);
  448. unis.m_maxLodDistances[0] = lodDistances[0];
  449. unis.m_maxLodDistances[1] = lodDistances[1];
  450. unis.m_maxLodDistances[2] = kMaxF32;
  451. unis.m_maxLodDistances[3] = kMaxF32;
  452. cmdb.setPushConstants(&unis, sizeof(unis));
  453. cmdb.bindUavBuffer(0, 0, GpuSceneArrays::RenderableBoundingVolumeRt::getSingleton().getBufferOffsetRange());
  454. cmdb.bindUavBuffer(0, 1, GpuSceneArrays::Renderable::getSingleton().getBufferOffsetRange());
  455. cmdb.bindUavBuffer(0, 2, &GpuSceneBuffer::getSingleton().getBuffer(), 0, kMaxPtrSize);
  456. cmdb.bindUavBuffer(0, 3, instancesBuff);
  457. cmdb.bindUavBuffer(0, 4, indicesBuff);
  458. cmdb.bindUavBuffer(0, 5, m_counterBuffer.get(), 0, sizeof(U32) * 2);
  459. cmdb.bindUavBuffer(0, 6, zeroInstancesDispatchArgsBuff);
  460. const U32 aabbCount = GpuSceneArrays::RenderableBoundingVolumeRt::getSingleton().getElementCount();
  461. dispatchPPCompute(cmdb, 64, 1, aabbCount, 1);
  462. });
  463. }
  464. // Zero remaining instances
  465. {
  466. Array<Char, 64> passName;
  467. snprintf(passName.getBegin(), sizeof(passName), "%s: Zero remaining instances", in.m_passesName.cstr());
  468. ComputeRenderPassDescription& pass = rgraph.newComputeRenderPass(passName.getBegin());
  469. pass.newBufferDependency(out.m_someBufferHandle, BufferUsageBit::kUavComputeWrite);
  470. pass.setWork([this, zeroInstancesDispatchArgsBuff, instancesBuff = out.m_instancesBuffer,
  471. indicesBuff = out.m_renderableIndicesBuffer](RenderPassWorkContext& rgraph) {
  472. CommandBuffer& cmdb = *rgraph.m_commandBuffer;
  473. cmdb.bindShaderProgram(m_zeroRemainingInstancesGrProg.get());
  474. cmdb.bindUavBuffer(0, 0, indicesBuff);
  475. cmdb.bindUavBuffer(0, 1, instancesBuff);
  476. cmdb.dispatchComputeIndirect(zeroInstancesDispatchArgsBuff.m_buffer, zeroInstancesDispatchArgsBuff.m_offset);
  477. });
  478. }
  479. }
  480. } // end namespace anki