GpuVisibility.cpp 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360
  1. // Copyright (C) 2009-2023, Panagiotis Christopoulos Charitos and contributors.
  2. // All rights reserved.
  3. // Code licensed under the BSD License.
  4. // http://www.anki3d.org/LICENSE
  5. #include <AnKi/Renderer/Utils/GpuVisibility.h>
  6. #include <AnKi/Renderer/Renderer.h>
  7. #include <AnKi/Scene/RenderStateBucket.h>
  8. #include <AnKi/Scene/ContiguousArrayAllocator.h>
  9. #include <AnKi/Core/GpuMemory/GpuVisibleTransientMemoryPool.h>
  10. #include <AnKi/Core/GpuMemory/RebarTransientMemoryPool.h>
  11. #include <AnKi/Core/GpuMemory/GpuSceneBuffer.h>
  12. #include <AnKi/Collision/Functions.h>
  13. #include <AnKi/Shaders/Include/GpuVisibilityTypes.h>
  14. #include <AnKi/Core/StatsSet.h>
  15. namespace anki {
  16. static StatCounter g_visibleObjects(StatCategory::kMisc, "Visible objects", StatFlag::kZeroEveryFrame);
  17. static StatCounter g_testedObjects(StatCategory::kMisc, "Visbility tested objects", StatFlag::kZeroEveryFrame);
  18. static GpuSceneContiguousArrayType techniqueToArrayType(RenderingTechnique technique)
  19. {
  20. GpuSceneContiguousArrayType arrayType;
  21. switch(technique)
  22. {
  23. case RenderingTechnique::kGBuffer:
  24. arrayType = GpuSceneContiguousArrayType::kRenderableBoundingVolumesGBuffer;
  25. break;
  26. case RenderingTechnique::kDepth:
  27. arrayType = GpuSceneContiguousArrayType::kRenderableBoundingVolumesDepth;
  28. break;
  29. default:
  30. ANKI_ASSERT(0);
  31. arrayType = GpuSceneContiguousArrayType::kCount;
  32. }
  33. return arrayType;
  34. }
  35. Error GpuVisibility::init()
  36. {
  37. ANKI_CHECK(ResourceManager::getSingleton().loadResource("ShaderBinaries/GpuVisibility.ankiprogbin", m_prog));
  38. for(U32 i = 0; i < 2; ++i)
  39. {
  40. ShaderProgramResourceVariantInitInfo variantInit(m_prog);
  41. variantInit.addMutation("HZB_TEST", i);
  42. variantInit.addMutation("STATS", ANKI_STATS_ENABLED);
  43. const ShaderProgramResourceVariant* variant;
  44. m_prog->getOrCreateVariant(variantInit, variant);
  45. m_grProgs[i].reset(&variant->getProgram());
  46. }
  47. #if ANKI_STATS_ENABLED
  48. for(GpuReadbackMemoryAllocation& alloc : m_readbackMemory)
  49. {
  50. alloc = GpuReadbackMemoryPool::getSingleton().allocate(sizeof(U32));
  51. }
  52. #endif
  53. return Error::kNone;
  54. }
  55. void GpuVisibility::populateRenderGraph(CString passesName, RenderingTechnique technique, const Mat4& viewProjectionMat, Vec3 lodReferencePoint,
  56. const Array<F32, kMaxLodCount - 1> lodDistances, const RenderTargetHandle* hzbRt,
  57. RenderGraphDescription& rgraph, GpuVisibilityOutput& out)
  58. {
  59. const U32 aabbCount = GpuSceneContiguousArrays::getSingleton().getElementCount(techniqueToArrayType(technique));
  60. const U32 bucketCount = RenderStateBucketContainer::getSingleton().getBucketCount(technique);
  61. #if ANKI_STATS_ENABLED
  62. Bool firstCallInTheFrame = false;
  63. if(m_lastFrameIdx != getRenderer().getFrameCount())
  64. {
  65. firstCallInTheFrame = true;
  66. m_lastFrameIdx = getRenderer().getFrameCount();
  67. }
  68. const GpuReadbackMemoryAllocation& readAlloc = m_readbackMemory[(m_lastFrameIdx + 1) % m_readbackMemory.getSize()];
  69. const GpuReadbackMemoryAllocation& writeAlloc = m_readbackMemory[m_lastFrameIdx % m_readbackMemory.getSize()];
  70. Buffer* clearStatsBuffer = &readAlloc.getBuffer();
  71. const PtrSize clearStatsBufferOffset = readAlloc.getOffset();
  72. Buffer* writeStatsBuffer = &writeAlloc.getBuffer();
  73. const PtrSize writeStatsBufferOffset = writeAlloc.getOffset();
  74. if(firstCallInTheFrame)
  75. {
  76. U32 visibleCount;
  77. memcpy(&visibleCount, readAlloc.getMappedMemory(), sizeof(visibleCount));
  78. g_visibleObjects.set(visibleCount);
  79. }
  80. g_testedObjects.increment(aabbCount);
  81. #endif
  82. // Allocate memory for the indirect commands
  83. const GpuVisibleTransientMemoryAllocation indirectArgs =
  84. GpuVisibleTransientMemoryPool::getSingleton().allocate(aabbCount * sizeof(DrawIndexedIndirectArgs));
  85. out.m_drawIndexedIndirectArgsBuffer = indirectArgs.m_buffer;
  86. out.m_drawIndexedIndirectArgsBufferOffset = indirectArgs.m_offset;
  87. out.m_drawIndexedIndirectArgsBufferRange = indirectArgs.m_size;
  88. const GpuVisibleTransientMemoryAllocation instanceRateRenderables =
  89. GpuVisibleTransientMemoryPool::getSingleton().allocate(aabbCount * sizeof(GpuSceneRenderable));
  90. out.m_instanceRateRenderablesBuffer = instanceRateRenderables.m_buffer;
  91. out.m_instanceRateRenderablesBufferOffset = instanceRateRenderables.m_offset;
  92. out.m_instanceRateRenderablesBufferRange = instanceRateRenderables.m_size;
  93. // Allocate and zero the MDI counts
  94. RebarAllocation mdiDrawCounts;
  95. U32* atomics = RebarTransientMemoryPool::getSingleton().allocateFrame<U32>(bucketCount, mdiDrawCounts);
  96. memset(atomics, 0, mdiDrawCounts.m_range);
  97. out.m_mdiDrawCountsBuffer = &RebarTransientMemoryPool::getSingleton().getBuffer();
  98. out.m_mdiDrawCountsBufferOffset = mdiDrawCounts.m_offset;
  99. out.m_mdiDrawCountsBufferRange = mdiDrawCounts.m_range;
  100. // Import buffers
  101. out.m_mdiDrawCountsHandle = rgraph.importBuffer(&RebarTransientMemoryPool::getSingleton().getBuffer(), BufferUsageBit::kNone,
  102. mdiDrawCounts.m_offset, mdiDrawCounts.m_range);
  103. // Create the renderpass
  104. ComputeRenderPassDescription& pass = rgraph.newComputeRenderPass(passesName);
  105. pass.newBufferDependency(getRenderer().getGpuSceneBufferHandle(), BufferUsageBit::kStorageComputeRead);
  106. pass.newBufferDependency(out.m_mdiDrawCountsHandle, BufferUsageBit::kStorageComputeWrite);
  107. if(hzbRt)
  108. {
  109. pass.newTextureDependency(*hzbRt, TextureUsageBit::kSampledCompute);
  110. }
  111. const RenderTargetHandle hzbRtCopy =
  112. (hzbRt) ? *hzbRt : RenderTargetHandle(); // Can't pass to the lambda the hzbRt which is a pointer to who knows what
  113. pass.setWork([this, viewProjectionMat, lodReferencePoint, lodDistances, technique, hzbRtCopy, mdiDrawCountsHandle = out.m_mdiDrawCountsHandle,
  114. instanceRateRenderables, indirectArgs
  115. #if ANKI_STATS_ENABLED
  116. ,
  117. clearStatsBuffer, clearStatsBufferOffset, writeStatsBuffer, writeStatsBufferOffset
  118. #endif
  119. ](RenderPassWorkContext& rpass) {
  120. CommandBuffer& cmdb = *rpass.m_commandBuffer;
  121. cmdb.bindShaderProgram(m_grProgs[hzbRtCopy.isValid()].get());
  122. const GpuSceneContiguousArrayType type = techniqueToArrayType(technique);
  123. cmdb.bindStorageBuffer(0, 0, &GpuSceneBuffer::getSingleton().getBuffer(), GpuSceneContiguousArrays::getSingleton().getArrayBaseOffset(type),
  124. GpuSceneContiguousArrays::getSingleton().getElementCount(type)
  125. * GpuSceneContiguousArrays::getSingleton().getElementSize(type));
  126. cmdb.bindStorageBuffer(0, 1, &GpuSceneBuffer::getSingleton().getBuffer(),
  127. GpuSceneContiguousArrays::getSingleton().getArrayBaseOffset(GpuSceneContiguousArrayType::kRenderables),
  128. GpuSceneContiguousArrays::getSingleton().getElementCount(GpuSceneContiguousArrayType::kRenderables)
  129. * GpuSceneContiguousArrays::getSingleton().getElementSize(GpuSceneContiguousArrayType::kRenderables));
  130. cmdb.bindStorageBuffer(0, 2, &GpuSceneBuffer::getSingleton().getBuffer(), 0, kMaxPtrSize);
  131. cmdb.bindStorageBuffer(0, 3, instanceRateRenderables.m_buffer, instanceRateRenderables.m_offset, instanceRateRenderables.m_size);
  132. cmdb.bindStorageBuffer(0, 4, indirectArgs.m_buffer, indirectArgs.m_offset, indirectArgs.m_size);
  133. U32* offsets = allocateAndBindStorage<U32*>(sizeof(U32) * RenderStateBucketContainer::getSingleton().getBucketCount(technique), cmdb, 0, 5);
  134. U32 bucketCount = 0;
  135. U32 userCount = 0;
  136. RenderStateBucketContainer::getSingleton().iterateBuckets(technique, [&](const RenderStateInfo&, U32 userCount_) {
  137. offsets[bucketCount] = userCount;
  138. userCount += userCount_;
  139. ++bucketCount;
  140. });
  141. ANKI_ASSERT(userCount == RenderStateBucketContainer::getSingleton().getBucketsItemCount(technique));
  142. rpass.bindStorageBuffer(0, 6, mdiDrawCountsHandle);
  143. GpuVisibilityUniforms* unis = allocateAndBindUniforms<GpuVisibilityUniforms*>(sizeof(GpuVisibilityUniforms), cmdb, 0, 7);
  144. Array<Plane, 6> planes;
  145. extractClipPlanes(viewProjectionMat, planes);
  146. for(U32 i = 0; i < 6; ++i)
  147. {
  148. unis->m_clipPlanes[i] = Vec4(planes[i].getNormal().xyz(), planes[i].getOffset());
  149. }
  150. const U32 aabbCount = GpuSceneContiguousArrays::getSingleton().getElementCount(type);
  151. unis->m_aabbCount = aabbCount;
  152. ANKI_ASSERT(kMaxLodCount == 3);
  153. unis->m_maxLodDistances[0] = lodDistances[0];
  154. unis->m_maxLodDistances[1] = lodDistances[1];
  155. unis->m_maxLodDistances[2] = kMaxF32;
  156. unis->m_maxLodDistances[3] = kMaxF32;
  157. unis->m_lodReferencePoint = lodReferencePoint;
  158. unis->m_viewProjectionMat = viewProjectionMat;
  159. if(hzbRtCopy.isValid())
  160. {
  161. rpass.bindColorTexture(0, 8, hzbRtCopy);
  162. cmdb.bindSampler(0, 9, getRenderer().getSamplers().m_nearestNearestClamp.get());
  163. }
  164. #if ANKI_STATS_ENABLED
  165. cmdb.bindStorageBuffer(0, 10, writeStatsBuffer, writeStatsBufferOffset, sizeof(U32));
  166. cmdb.bindStorageBuffer(0, 11, clearStatsBuffer, clearStatsBufferOffset, sizeof(U32));
  167. #endif
  168. dispatchPPCompute(cmdb, 64, 1, aabbCount, 1);
  169. });
  170. }
  171. Error GpuVisibilityNonRenderables::init()
  172. {
  173. ANKI_CHECK(ResourceManager::getSingleton().loadResource("ShaderBinaries/GpuVisibilityNonRenderables.ankiprogbin", m_prog));
  174. for(U32 hzb = 0; hzb < 2; ++hzb)
  175. {
  176. for(GpuSceneNonRenderableObjectType type : EnumIterable<GpuSceneNonRenderableObjectType>())
  177. {
  178. for(U32 cpuFeedback = 0; cpuFeedback < 2; ++cpuFeedback)
  179. {
  180. ShaderProgramResourceVariantInitInfo variantInit(m_prog);
  181. variantInit.addMutation("HZB_TEST", hzb);
  182. variantInit.addMutation("OBJECT_TYPE", U32(type));
  183. variantInit.addMutation("CPU_FEEDBACK", cpuFeedback);
  184. const ShaderProgramResourceVariant* variant;
  185. m_prog->getOrCreateVariant(variantInit, variant);
  186. if(variant)
  187. {
  188. m_grProgs[hzb][type][cpuFeedback].reset(&variant->getProgram());
  189. }
  190. else
  191. {
  192. m_grProgs[hzb][type][cpuFeedback].reset(nullptr);
  193. }
  194. }
  195. }
  196. }
  197. {
  198. CommandBufferInitInfo cmdbInit("TmpClear");
  199. cmdbInit.m_flags |= CommandBufferFlag::kSmallBatch;
  200. CommandBufferPtr cmdb = GrManager::getSingleton().newCommandBuffer(cmdbInit);
  201. for(U32 i = 0; i < kMaxFeedbackRequestsPerFrame; ++i)
  202. {
  203. BufferInitInfo buffInit("GpuVisibilityNonRenderablesFeedbackCounters");
  204. buffInit.m_size = 2 * sizeof(U32);
  205. buffInit.m_usage = BufferUsageBit::kStorageComputeWrite | BufferUsageBit::kTransferDestination;
  206. m_counterBuffers[i] = GrManager::getSingleton().newBuffer(buffInit);
  207. cmdb->fillBuffer(m_counterBuffers[i].get(), 0, kMaxPtrSize, 0);
  208. }
  209. cmdb->flush();
  210. GrManager::getSingleton().finish();
  211. }
  212. return Error::kNone;
  213. }
  214. void GpuVisibilityNonRenderables::populateRenderGraph(GpuVisibilityNonRenderablesInput& in, GpuVisibilityNonRenderablesOutput& out)
  215. {
  216. const GpuSceneContiguousArrayType arrayType = gpuSceneNonRenderableObjectTypeToGpuSceneContiguousArrayType(in.m_objectType);
  217. const U32 objCount = GpuSceneContiguousArrays::getSingleton().getElementCount(arrayType);
  218. if(objCount == 0)
  219. {
  220. return;
  221. }
  222. if(in.m_cpuFeedback.m_buffer)
  223. {
  224. ANKI_ASSERT(in.m_cpuFeedback.m_bufferRange == sizeof(U32) * (objCount + 1));
  225. }
  226. // Find the counter buffer required for feedback
  227. U32 counterBufferIdx = kMaxU32;
  228. if(in.m_cpuFeedback.m_buffer)
  229. {
  230. if(m_lastFrameIdx != getRenderer().getFrameCount())
  231. {
  232. m_lastFrameIdx = getRenderer().getFrameCount();
  233. m_feedbackRequestCountThisFrame = 0;
  234. }
  235. counterBufferIdx = m_feedbackRequestCountThisFrame++;
  236. m_counterIdx[counterBufferIdx] = (m_counterIdx[counterBufferIdx] + 1) & 1;
  237. }
  238. // Allocate memory for the result
  239. RebarAllocation visibleIndicesAlloc;
  240. U32* indices = RebarTransientMemoryPool::getSingleton().allocateFrame<U32>(objCount + 1, visibleIndicesAlloc);
  241. indices[0] = 0;
  242. out.m_visibleIndicesBuffer = &RebarTransientMemoryPool::getSingleton().getBuffer();
  243. out.m_visibleIndicesBufferOffset = visibleIndicesAlloc.m_offset;
  244. out.m_visibleIndicesBufferRange = visibleIndicesAlloc.m_range;
  245. // Import buffers
  246. RenderGraphDescription& rgraph = *in.m_rgraph;
  247. out.m_bufferHandle =
  248. rgraph.importBuffer(out.m_visibleIndicesBuffer, BufferUsageBit::kNone, out.m_visibleIndicesBufferOffset, out.m_visibleIndicesBufferRange);
  249. // Create the renderpass
  250. ComputeRenderPassDescription& pass = rgraph.newComputeRenderPass(in.m_passesName);
  251. pass.newBufferDependency(getRenderer().getGpuSceneBufferHandle(), BufferUsageBit::kStorageComputeRead);
  252. pass.newBufferDependency(out.m_bufferHandle, BufferUsageBit::kStorageComputeWrite);
  253. if(in.m_hzbRt)
  254. {
  255. pass.newTextureDependency(*in.m_hzbRt, TextureUsageBit::kSampledCompute);
  256. }
  257. pass.setWork([this, objType = in.m_objectType, feedbackBuffer = in.m_cpuFeedback.m_buffer, feedbackBufferOffset = in.m_cpuFeedback.m_bufferOffset,
  258. feedbackBufferRange = in.m_cpuFeedback.m_bufferRange, viewProjectionMat = in.m_viewProjectionMat,
  259. visibleIndicesBuffHandle = out.m_bufferHandle, counterBufferIdx,
  260. counterIdx = m_counterIdx[counterBufferIdx]](RenderPassWorkContext& rgraph) {
  261. CommandBuffer& cmdb = *rgraph.m_commandBuffer;
  262. const GpuSceneContiguousArrayType arrayType = gpuSceneNonRenderableObjectTypeToGpuSceneContiguousArrayType(objType);
  263. const U32 objCount = GpuSceneContiguousArrays::getSingleton().getElementCount(arrayType);
  264. const GpuSceneContiguousArrays& cArrays = GpuSceneContiguousArrays::getSingleton();
  265. const Bool needsFeedback = feedbackBuffer != nullptr;
  266. cmdb.bindShaderProgram(m_grProgs[0][objType][needsFeedback].get());
  267. cmdb.bindStorageBuffer(0, 0, &GpuSceneBuffer::getSingleton().getBuffer(), cArrays.getArrayBaseOffset(arrayType),
  268. cArrays.getElementSize(arrayType) * cArrays.getElementCount(arrayType), 0);
  269. GpuVisibilityNonRenderableUniforms* unis =
  270. allocateAndBindUniforms<GpuVisibilityNonRenderableUniforms*>(sizeof(GpuVisibilityNonRenderableUniforms), cmdb, 0, 1);
  271. Array<Plane, 6> planes;
  272. extractClipPlanes(viewProjectionMat, planes);
  273. for(U32 i = 0; i < 6; ++i)
  274. {
  275. unis->m_clipPlanes[i] = Vec4(planes[i].getNormal().xyz(), planes[i].getOffset());
  276. }
  277. unis->m_feedbackCounterIdx = counterIdx;
  278. rgraph.bindStorageBuffer(0, 2, visibleIndicesBuffHandle);
  279. if(needsFeedback)
  280. {
  281. cmdb.bindStorageBuffer(0, 3, feedbackBuffer, feedbackBufferOffset, feedbackBufferRange);
  282. cmdb.bindStorageBuffer(0, 4, m_counterBuffers[counterBufferIdx].get(), 0, kMaxPtrSize);
  283. }
  284. dispatchPPCompute(cmdb, 64, 1, objCount, 1);
  285. });
  286. }
  287. } // end namespace anki