ClusterBinning.cpp 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282
  1. // Copyright (C) 2009-present, Panagiotis Christopoulos Charitos and contributors.
  2. // All rights reserved.
  3. // Code licensed under the BSD License.
  4. // http://www.anki3d.org/LICENSE
  5. #include <AnKi/Renderer/ClusterBinning.h>
  6. #include <AnKi/Renderer/PrimaryNonRenderableVisibility.h>
  7. #include <AnKi/Renderer/Renderer.h>
  8. #include <AnKi/Renderer/ProbeReflections.h>
  9. #include <AnKi/Renderer/VolumetricLightingAccumulation.h>
  10. #include <AnKi/GpuMemory/GpuVisibleTransientMemoryPool.h>
  11. #include <AnKi/Scene/Components/CameraComponent.h>
  12. #include <AnKi/Scene/Components/LightComponent.h>
  13. #include <AnKi/Collision/Functions.h>
  14. #include <AnKi/Util/Tracer.h>
  15. #include <AnKi/Util/HighRezTimer.h>
  16. namespace anki {
  17. ClusterBinning::ClusterBinning()
  18. {
  19. }
  20. ClusterBinning::~ClusterBinning()
  21. {
  22. }
  23. Error ClusterBinning::init()
  24. {
  25. ANKI_CHECK(loadShaderProgram("ShaderBinaries/ClusterBinning.ankiprogbin", {{"OBJECT_TYPE", 0}}, m_prog, m_jobSetupGrProg, "Setup"));
  26. for(GpuSceneNonRenderableObjectType type : EnumIterable<GpuSceneNonRenderableObjectType>())
  27. {
  28. ANKI_CHECK(loadShaderProgram("ShaderBinaries/ClusterBinning.ankiprogbin", {{"OBJECT_TYPE", MutatorValue(type)}}, m_prog,
  29. m_binningGrProgs[type], "Binning"));
  30. ANKI_CHECK(loadShaderProgram("ShaderBinaries/ClusterBinning.ankiprogbin", {{"OBJECT_TYPE", MutatorValue(type)}}, m_prog,
  31. m_packingGrProgs[type], "PackVisibles"));
  32. }
  33. return Error::kNone;
  34. }
  35. void ClusterBinning::populateRenderGraph(RenderingContext& ctx)
  36. {
  37. ANKI_TRACE_SCOPED_EVENT(ClusterBinning);
  38. RenderGraphBuilder& rgraph = ctx.m_renderGraphDescr;
  39. // Allocate the clusters buffer
  40. {
  41. const U32 clusterCount = getRenderer().getTileCounts().x() * getRenderer().getTileCounts().y() + getRenderer().getZSplitCount();
  42. m_runCtx.m_clustersBuffer = GpuVisibleTransientMemoryPool::getSingleton().allocateStructuredBuffer<Cluster>(clusterCount);
  43. m_runCtx.m_dep = rgraph.importBuffer(m_runCtx.m_clustersBuffer, BufferUsageBit::kNone);
  44. }
  45. // Setup the indirect dispatches and zero the clusters buffer
  46. BufferView indirectArgsBuff;
  47. {
  48. // Allocate memory for the indirect args
  49. constexpr U32 dispatchCount = U32(GpuSceneNonRenderableObjectType::kCount) * 2;
  50. indirectArgsBuff = GpuVisibleTransientMemoryPool::getSingleton().allocateStructuredBuffer<DispatchIndirectArgs>(dispatchCount);
  51. // Create the pass
  52. NonGraphicsRenderPass& rpass = rgraph.newNonGraphicsRenderPass("Cluster binning setup");
  53. for(GpuSceneNonRenderableObjectType type : EnumIterable<GpuSceneNonRenderableObjectType>())
  54. {
  55. rpass.newBufferDependency(getRenderer().getPrimaryNonRenderableVisibility().getVisibleIndicesBufferHandle(type),
  56. BufferUsageBit::kSrvCompute);
  57. }
  58. rpass.newBufferDependency(m_runCtx.m_dep, BufferUsageBit::kUavCompute);
  59. rpass.setWork([this, indirectArgsBuff](RenderPassWorkContext& rgraphCtx) {
  60. ANKI_TRACE_SCOPED_EVENT(ClusterBinningSetup);
  61. CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
  62. cmdb.bindShaderProgram(m_jobSetupGrProg.get());
  63. const UVec4 consts(getRenderer().getTileCounts().x() * getRenderer().getTileCounts().y());
  64. cmdb.setFastConstants(&consts, sizeof(consts));
  65. for(GpuSceneNonRenderableObjectType type : EnumIterable<GpuSceneNonRenderableObjectType>())
  66. {
  67. const BufferView& buff = getRenderer().getPrimaryNonRenderableVisibility().getVisibleIndicesBuffer(type);
  68. cmdb.bindSrv(U32(type), 0, buff);
  69. }
  70. cmdb.bindUav(0, 0, indirectArgsBuff);
  71. cmdb.dispatchCompute(1, 1, 1);
  72. // Now zero the clusters buffer
  73. fillBuffer(cmdb, m_runCtx.m_clustersBuffer, 0);
  74. });
  75. }
  76. // Cluster binning
  77. {
  78. // Create the pass
  79. NonGraphicsRenderPass& rpass = rgraph.newNonGraphicsRenderPass("Cluster binning");
  80. rpass.newBufferDependency(m_runCtx.m_dep, BufferUsageBit::kUavCompute | BufferUsageBit::kIndirectCompute);
  81. rpass.setWork([this, &ctx, indirectArgsBuff](RenderPassWorkContext& rgraphCtx) {
  82. ANKI_TRACE_SCOPED_EVENT(ClusterBinning);
  83. CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
  84. PtrSize indirectArgsBuffOffset = indirectArgsBuff.getOffset();
  85. for(GpuSceneNonRenderableObjectType type : EnumIterable<GpuSceneNonRenderableObjectType>())
  86. {
  87. cmdb.bindShaderProgram(m_binningGrProgs[type].get());
  88. const BufferView& idsBuff = getRenderer().getPrimaryNonRenderableVisibility().getVisibleIndicesBuffer(type);
  89. cmdb.bindSrv(0, 0, idsBuff);
  90. PtrSize objBufferOffset = 0;
  91. PtrSize objBufferRange = 0;
  92. U32 elementSize = 0;
  93. switch(type)
  94. {
  95. case GpuSceneNonRenderableObjectType::kLight:
  96. objBufferOffset = GpuSceneArrays::Light::getSingleton().getGpuSceneOffsetOfArrayBase();
  97. objBufferRange = GpuSceneArrays::Light::getSingleton().getBufferRange();
  98. elementSize = GpuSceneArrays::Light::getElementSize();
  99. break;
  100. case GpuSceneNonRenderableObjectType::kDecal:
  101. objBufferOffset = GpuSceneArrays::Decal::getSingleton().getGpuSceneOffsetOfArrayBase();
  102. objBufferRange = GpuSceneArrays::Decal::getSingleton().getBufferRange();
  103. elementSize = GpuSceneArrays::Decal::getElementSize();
  104. break;
  105. case GpuSceneNonRenderableObjectType::kFogDensityVolume:
  106. objBufferOffset = GpuSceneArrays::FogDensityVolume::getSingleton().getGpuSceneOffsetOfArrayBase();
  107. objBufferRange = GpuSceneArrays::FogDensityVolume::getSingleton().getBufferRange();
  108. elementSize = GpuSceneArrays::FogDensityVolume::getElementSize();
  109. break;
  110. case GpuSceneNonRenderableObjectType::kGlobalIlluminationProbe:
  111. objBufferOffset = GpuSceneArrays::GlobalIlluminationProbe::getSingleton().getGpuSceneOffsetOfArrayBase();
  112. objBufferRange = GpuSceneArrays::GlobalIlluminationProbe::getSingleton().getBufferRange();
  113. elementSize = GpuSceneArrays::GlobalIlluminationProbe::getElementSize();
  114. break;
  115. case GpuSceneNonRenderableObjectType::kReflectionProbe:
  116. objBufferOffset = GpuSceneArrays::ReflectionProbe::getSingleton().getGpuSceneOffsetOfArrayBase();
  117. objBufferRange = GpuSceneArrays::ReflectionProbe::getSingleton().getBufferRange();
  118. elementSize = GpuSceneArrays::ReflectionProbe::getElementSize();
  119. break;
  120. default:
  121. ANKI_ASSERT(0);
  122. }
  123. if(objBufferRange == 0)
  124. {
  125. objBufferOffset = 0;
  126. objBufferRange = getAlignedRoundDown(elementSize, GpuSceneBuffer::getSingleton().getBufferView().getRange());
  127. }
  128. cmdb.bindSrv(1, 0, BufferView(&GpuSceneBuffer::getSingleton().getBuffer(), objBufferOffset, objBufferRange));
  129. cmdb.bindUav(0, 0, m_runCtx.m_clustersBuffer);
  130. struct ClusterBinningConstants
  131. {
  132. Vec3 m_cameraOrigin;
  133. F32 m_zSplitCountOverFrustumLength;
  134. Vec2 m_renderingSize;
  135. U32 m_tileCountX;
  136. U32 m_tileCount;
  137. Vec4 m_nearPlaneWorld;
  138. I32 m_zSplitCountMinusOne;
  139. I32 m_padding0;
  140. I32 m_padding1;
  141. I32 m_padding2;
  142. Mat4 m_invertedViewProjMat;
  143. } consts;
  144. consts.m_cameraOrigin = ctx.m_matrices.m_cameraTransform.getTranslationPart().xyz();
  145. consts.m_zSplitCountOverFrustumLength = F32(getRenderer().getZSplitCount()) / (ctx.m_cameraFar - ctx.m_cameraNear);
  146. consts.m_renderingSize = Vec2(getRenderer().getInternalResolution());
  147. consts.m_tileCountX = getRenderer().getTileCounts().x();
  148. consts.m_tileCount = getRenderer().getTileCounts().x() * getRenderer().getTileCounts().y();
  149. Plane nearPlane;
  150. extractClipPlane(ctx.m_matrices.m_viewProjection, FrustumPlaneType::kNear, nearPlane);
  151. consts.m_nearPlaneWorld = Vec4(nearPlane.getNormal().xyz(), nearPlane.getOffset());
  152. consts.m_zSplitCountMinusOne = getRenderer().getZSplitCount() - 1;
  153. consts.m_invertedViewProjMat = ctx.m_matrices.m_invertedViewProjectionJitter;
  154. cmdb.setFastConstants(&consts, sizeof(consts));
  155. cmdb.dispatchComputeIndirect(BufferView(indirectArgsBuff).setOffset(indirectArgsBuffOffset).setRange(sizeof(DispatchIndirectArgs)));
  156. indirectArgsBuffOffset += sizeof(DispatchIndirectArgs);
  157. }
  158. });
  159. }
  160. // Object packing
  161. {
  162. // Allocations
  163. for(GpuSceneNonRenderableObjectType type : EnumIterable<GpuSceneNonRenderableObjectType>())
  164. {
  165. m_runCtx.m_packedObjectsBuffers[type] = GpuVisibleTransientMemoryPool::getSingleton().allocateStructuredBuffer(
  166. kMaxVisibleClusteredObjects[type], kClusteredObjectSizes[type]);
  167. }
  168. // Create the pass
  169. NonGraphicsRenderPass& rpass = rgraph.newNonGraphicsRenderPass("Cluster object packing");
  170. rpass.newBufferDependency(m_runCtx.m_dep, BufferUsageBit::kIndirectCompute | BufferUsageBit::kUavCompute);
  171. rpass.setWork([this, indirectArgsBuff](RenderPassWorkContext& rgraphCtx) {
  172. ANKI_TRACE_SCOPED_EVENT(ClusterBinningObjectPacking);
  173. CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
  174. PtrSize indirectArgsBuffOffset =
  175. indirectArgsBuff.getOffset() + sizeof(DispatchIndirectArgs) * U32(GpuSceneNonRenderableObjectType::kCount);
  176. for(GpuSceneNonRenderableObjectType type : EnumIterable<GpuSceneNonRenderableObjectType>())
  177. {
  178. cmdb.bindShaderProgram(m_packingGrProgs[type].get());
  179. PtrSize objBufferOffset = 0;
  180. PtrSize objBufferRange = 0;
  181. U32 objSize = 0;
  182. switch(type)
  183. {
  184. case GpuSceneNonRenderableObjectType::kLight:
  185. objBufferOffset = GpuSceneArrays::Light::getSingleton().getGpuSceneOffsetOfArrayBase();
  186. objBufferRange = GpuSceneArrays::Light::getSingleton().getBufferRange();
  187. objSize = GpuSceneArrays::Light::getSingleton().getElementSize();
  188. break;
  189. case GpuSceneNonRenderableObjectType::kDecal:
  190. objBufferOffset = GpuSceneArrays::Decal::getSingleton().getGpuSceneOffsetOfArrayBase();
  191. objBufferRange = GpuSceneArrays::Decal::getSingleton().getBufferRange();
  192. objSize = GpuSceneArrays::Decal::getSingleton().getElementSize();
  193. break;
  194. case GpuSceneNonRenderableObjectType::kFogDensityVolume:
  195. objBufferOffset = GpuSceneArrays::FogDensityVolume::getSingleton().getGpuSceneOffsetOfArrayBase();
  196. objBufferRange = GpuSceneArrays::FogDensityVolume::getSingleton().getBufferRange();
  197. objSize = GpuSceneArrays::FogDensityVolume::getSingleton().getElementSize();
  198. break;
  199. case GpuSceneNonRenderableObjectType::kGlobalIlluminationProbe:
  200. objBufferOffset = GpuSceneArrays::GlobalIlluminationProbe::getSingleton().getGpuSceneOffsetOfArrayBase();
  201. objBufferRange = GpuSceneArrays::GlobalIlluminationProbe::getSingleton().getBufferRange();
  202. objSize = GpuSceneArrays::GlobalIlluminationProbe::getSingleton().getElementSize();
  203. break;
  204. case GpuSceneNonRenderableObjectType::kReflectionProbe:
  205. objBufferOffset = GpuSceneArrays::ReflectionProbe::getSingleton().getGpuSceneOffsetOfArrayBase();
  206. objBufferRange = GpuSceneArrays::ReflectionProbe::getSingleton().getBufferRange();
  207. objSize = GpuSceneArrays::ReflectionProbe::getSingleton().getElementSize();
  208. break;
  209. default:
  210. ANKI_ASSERT(0);
  211. }
  212. if(objBufferRange == 0)
  213. {
  214. objBufferOffset = 0;
  215. objBufferRange = getAlignedRoundDown(objSize, GpuSceneBuffer::getSingleton().getBufferView().getRange());
  216. }
  217. cmdb.bindSrv(0, 0, BufferView(&GpuSceneBuffer::getSingleton().getBuffer(), objBufferOffset, objBufferRange));
  218. cmdb.bindUav(0, 0, m_runCtx.m_packedObjectsBuffers[type]);
  219. const BufferView& idsBuff = getRenderer().getPrimaryNonRenderableVisibility().getVisibleIndicesBuffer(type);
  220. cmdb.bindSrv(1, 0, idsBuff);
  221. cmdb.dispatchComputeIndirect(BufferView(indirectArgsBuff).setOffset(indirectArgsBuffOffset).setRange(sizeof(DispatchIndirectArgs)));
  222. indirectArgsBuffOffset += sizeof(DispatchIndirectArgs);
  223. }
  224. });
  225. }
  226. }
  227. } // end namespace anki