HzbGenerator.cpp 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329
  1. // Copyright (C) 2009-present, Panagiotis Christopoulos Charitos and contributors.
  2. // All rights reserved.
  3. // Code licensed under the BSD License.
  4. // http://www.anki3d.org/LICENSE
  5. #include <AnKi/Renderer/Utils/HzbGenerator.h>
  6. #include <AnKi/Renderer/Renderer.h>
  7. #if ANKI_COMPILER_GCC_COMPATIBLE
  8. # pragma GCC diagnostic push
  9. # pragma GCC diagnostic ignored "-Wunused-function"
  10. # pragma GCC diagnostic ignored "-Wignored-qualifiers"
  11. #elif ANKI_COMPILER_MSVC
  12. # pragma warning(push)
  13. # pragma warning(disable : 4505)
  14. #endif
  15. #define A_CPU
  16. #include <ThirdParty/FidelityFX/ffx_a.h>
  17. #include <ThirdParty/FidelityFX/ffx_spd.h>
  18. #if ANKI_COMPILER_GCC_COMPATIBLE
  19. # pragma GCC diagnostic pop
  20. #elif ANKI_COMPILER_MSVC
  21. # pragma warning(pop)
  22. #endif
  23. namespace anki {
  24. // 7 +----+ 6
  25. // /| /|
  26. // 3 +----+2|
  27. // | *--| + 5
  28. // |/4 |/
  29. // 0 +----+ 1
  30. static constexpr U16 kBoxIndices[] = {1, 2, 5, 2, 6, 5, 0, 4, 3, 4, 7, 3, 3, 7, 2, 7, 6, 2, 0, 1, 4, 1, 5, 4, 0, 3, 1, 3, 2, 1, 4, 5, 7, 5, 6, 7};
  31. Error HzbGenerator::init()
  32. {
  33. if(GrManager::getSingleton().getDeviceCapabilities().m_samplingFilterMinMax)
  34. {
  35. SamplerInitInfo sinit("HzbReductionMax");
  36. sinit.m_addressing = SamplingAddressing::kClamp;
  37. sinit.m_mipmapFilter = SamplingFilter::kMax;
  38. sinit.m_minMagFilter = SamplingFilter::kMax;
  39. m_maxSampler = GrManager::getSingleton().newSampler(sinit);
  40. }
  41. ANKI_CHECK(loadShaderProgram("ShaderBinaries/HzbGenPyramid.ankiprogbin", {{"REDUCTION_TYPE", 1}, {"MIN_MAX_SAMPLER", m_maxSampler.isCreated()}},
  42. m_genPyramidProg, m_genPyramidGrProg));
  43. ANKI_CHECK(loadShaderProgram("ShaderBinaries/HzbMaxDepth.ankiprogbin", m_maxDepthProg, m_maxDepthGrProg));
  44. ANKI_CHECK(loadShaderProgram("ShaderBinaries/HzbMaxDepthProject.ankiprogbin", m_maxBoxProg, m_maxBoxGrProg));
  45. m_counterBufferElementSize = max<U32>(sizeof(U32), GrManager::getSingleton().getDeviceCapabilities().m_storageBufferBindOffsetAlignment);
  46. BufferInitInfo buffInit("HzbCounterBuffer");
  47. buffInit.m_size = m_counterBufferElementSize * kCounterBufferElementCount;
  48. buffInit.m_usage = BufferUsageBit::kStorageComputeWrite | BufferUsageBit::kTransferDestination;
  49. m_counterBuffer = GrManager::getSingleton().newBuffer(buffInit);
  50. // Zero counter buffer
  51. {
  52. CommandBufferInitInfo cmdbInit;
  53. cmdbInit.m_flags |= CommandBufferFlag::kSmallBatch;
  54. CommandBufferPtr cmdb = GrManager::getSingleton().newCommandBuffer(cmdbInit);
  55. cmdb->fillBuffer(BufferView(m_counterBuffer.get()), 0);
  56. FencePtr fence;
  57. cmdb->endRecording();
  58. GrManager::getSingleton().submit(cmdb.get(), {}, &fence);
  59. fence->clientWait(6.0_sec);
  60. }
  61. buffInit = BufferInitInfo("HzbBoxIndices");
  62. buffInit.m_size = sizeof(kBoxIndices);
  63. buffInit.m_usage = BufferUsageBit::kIndex;
  64. buffInit.m_mapAccess = BufferMapAccessBit::kWrite;
  65. m_boxIndexBuffer = GrManager::getSingleton().newBuffer(buffInit);
  66. void* mappedMem = m_boxIndexBuffer->map(0, kMaxPtrSize, BufferMapAccessBit::kWrite);
  67. memcpy(mappedMem, kBoxIndices, sizeof(kBoxIndices));
  68. m_boxIndexBuffer->unmap();
  69. return Error::kNone;
  70. }
  71. void HzbGenerator::populateRenderGraphInternal(ConstWeakArray<DispatchInput> dispatchInputs, U32 firstCounterBufferElement, CString customName,
  72. RenderGraphBuilder& rgraph) const
  73. {
  74. const U32 dispatchCount = dispatchInputs.getSize();
  75. #if ANKI_ASSERTIONS_ENABLED
  76. if(m_crntFrame != getRenderer().getFrameCount())
  77. {
  78. m_crntFrame = getRenderer().getFrameCount();
  79. m_counterBufferElementUseMask = 0;
  80. }
  81. for(U32 i = 0; i < dispatchCount; ++i)
  82. {
  83. ANKI_ASSERT(!(m_counterBufferElementUseMask & (1 << (firstCounterBufferElement + i))));
  84. m_counterBufferElementUseMask |= (1 << (firstCounterBufferElement + i));
  85. }
  86. #endif
  87. NonGraphicsRenderPass& pass = rgraph.newNonGraphicsRenderPass((customName.isEmpty()) ? "HZB generation" : customName);
  88. Array<DispatchInput, kMaxShadowCascades> dispatchInputsCopy;
  89. for(U32 i = 0; i < dispatchCount; ++i)
  90. {
  91. const TextureSubresourceDesc firstMipSubresource = TextureSubresourceDesc::firstSurface(DepthStencilAspectBit::kDepth);
  92. pass.newTextureDependency(dispatchInputs[i].m_srcDepthRt, TextureUsageBit::kSampledCompute, firstMipSubresource);
  93. pass.newTextureDependency(dispatchInputs[i].m_dstHzbRt, TextureUsageBit::kStorageComputeWrite);
  94. dispatchInputsCopy[i] = dispatchInputs[i];
  95. }
  96. pass.setWork([this, dispatchInputsCopy, dispatchCount, firstCounterBufferElement](RenderPassWorkContext& rgraphCtx) {
  97. CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
  98. cmdb.bindShaderProgram(m_genPyramidGrProg.get());
  99. cmdb.bindSampler(ANKI_REG(s0), m_maxSampler.isCreated() ? m_maxSampler.get() : getRenderer().getSamplers().m_trilinearClamp.get());
  100. for(U32 dispatch = 0; dispatch < dispatchCount; ++dispatch)
  101. {
  102. const DispatchInput& in = dispatchInputsCopy[dispatch];
  103. const U32 hzbMipCount =
  104. min(kMaxMipsSinglePassDownsamplerCanProduce, computeMaxMipmapCount2d(in.m_dstHzbRtSize.x(), in.m_dstHzbRtSize.y()));
  105. const U32 mipsToCompute = hzbMipCount;
  106. varAU2(dispatchThreadGroupCountXY);
  107. varAU2(workGroupOffset); // needed if Left and Top are not 0,0
  108. varAU2(numWorkGroupsAndMips);
  109. varAU4(rectInfo) = initAU4(0, 0, in.m_dstHzbRtSize.x() * 2, in.m_dstHzbRtSize.y() * 2);
  110. SpdSetup(dispatchThreadGroupCountXY, workGroupOffset, numWorkGroupsAndMips, rectInfo, mipsToCompute);
  111. struct Uniforms
  112. {
  113. Vec2 m_invSrcTexSize;
  114. U32 m_threadGroupCount;
  115. U32 m_mipmapCount;
  116. } pc;
  117. pc.m_invSrcTexSize = 1.0f / Vec2(in.m_dstHzbRtSize * 2);
  118. pc.m_threadGroupCount = numWorkGroupsAndMips[0];
  119. pc.m_mipmapCount = numWorkGroupsAndMips[1];
  120. cmdb.setPushConstants(&pc, sizeof(pc));
  121. Register mipsReg(ANKI_REG(u1));
  122. for(U32 mip = 0; mip < kMaxMipsSinglePassDownsamplerCanProduce; ++mip)
  123. {
  124. TextureSubresourceDesc subresource = TextureSubresourceDesc::firstSurface();
  125. if(mip < mipsToCompute)
  126. {
  127. subresource.m_mipmap = mip;
  128. }
  129. else
  130. {
  131. subresource.m_mipmap = 0; // Put something random
  132. }
  133. rgraphCtx.bindTexture(mipsReg, in.m_dstHzbRt, subresource);
  134. ++mipsReg.m_bindPoint;
  135. }
  136. cmdb.bindStorageBuffer(
  137. ANKI_REG(u0), BufferView(m_counterBuffer.get(), (firstCounterBufferElement + dispatch) * m_counterBufferElementSize, sizeof(U32)));
  138. rgraphCtx.bindTexture(ANKI_REG(t0), in.m_srcDepthRt, TextureSubresourceDesc::firstSurface(DepthStencilAspectBit::kDepth));
  139. cmdb.dispatchCompute(dispatchThreadGroupCountXY[0], dispatchThreadGroupCountXY[1], 1);
  140. }
  141. });
  142. }
  143. void HzbGenerator::populateRenderGraph(RenderTargetHandle srcDepthRt, UVec2 srcDepthRtSize, RenderTargetHandle dstHzbRt, UVec2 dstHzbRtSize,
  144. RenderGraphBuilder& rgraph, CString customName) const
  145. {
  146. DispatchInput in;
  147. in.m_dstHzbRt = dstHzbRt;
  148. in.m_dstHzbRtSize = dstHzbRtSize;
  149. in.m_srcDepthRt = srcDepthRt;
  150. in.m_srcDepthRtSize = srcDepthRtSize;
  151. populateRenderGraphInternal({&in, 1}, 0, customName, rgraph);
  152. }
  153. void HzbGenerator::populateRenderGraphDirectionalLight(const HzbDirectionalLightInput& in, RenderGraphBuilder& rgraph) const
  154. {
  155. const U32 cascadeCount = in.m_cascadeCount;
  156. ANKI_ASSERT(cascadeCount > 0);
  157. // Generate a temp RT with the max depth of each 64x64 tile of the depth buffer
  158. RenderTargetHandle maxDepthRt;
  159. constexpr U32 kTileSize = 64;
  160. const UVec2 maxDepthRtSize = (in.m_depthBufferRtSize + kTileSize - 1) / kTileSize;
  161. {
  162. RenderTargetDesc maxDepthRtDescr("HZB max tile depth");
  163. maxDepthRtDescr.m_width = maxDepthRtSize.x();
  164. maxDepthRtDescr.m_height = maxDepthRtSize.y();
  165. maxDepthRtDescr.m_format = Format::kR32_Sfloat;
  166. maxDepthRtDescr.bake();
  167. maxDepthRt = rgraph.newRenderTarget(maxDepthRtDescr);
  168. NonGraphicsRenderPass& pass = rgraph.newNonGraphicsRenderPass("HZB max tile depth");
  169. pass.newTextureDependency(in.m_depthBufferRt, TextureUsageBit::kSampledCompute, DepthStencilAspectBit::kDepth);
  170. pass.newTextureDependency(maxDepthRt, TextureUsageBit::kStorageComputeWrite);
  171. pass.setWork([this, depthBufferRt = in.m_depthBufferRt, maxDepthRt, maxDepthRtSize](RenderPassWorkContext& rgraphCtx) {
  172. CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
  173. rgraphCtx.bindTexture(ANKI_REG(t0), depthBufferRt, TextureSubresourceDesc::firstSurface(DepthStencilAspectBit::kDepth));
  174. cmdb.bindSampler(ANKI_REG(s0), getRenderer().getSamplers().m_trilinearClamp.get());
  175. rgraphCtx.bindTexture(ANKI_REG(u0), maxDepthRt);
  176. cmdb.bindShaderProgram(m_maxDepthGrProg.get());
  177. cmdb.dispatchCompute(maxDepthRtSize.x(), maxDepthRtSize.y(), 1);
  178. });
  179. }
  180. // Project a box for each tile on each cascade's HZB
  181. Array<RenderTargetHandle, kMaxShadowCascades> depthRts;
  182. for(U32 i = 0; i < cascadeCount; ++i)
  183. {
  184. const HzbDirectionalLightInput::Cascade& cascade = in.m_cascades[i];
  185. // Compute the cascade's min and max depth as seen by the camera
  186. F32 cascadeMinDepth, cascadeMaxDepth;
  187. {
  188. if(i > 0)
  189. {
  190. // Do the reverse of computeShadowCascadeIndex2 to find the actual distance of this cascade. computeShadowCascadeIndex2 makes the min
  191. // distance of a cascade to become even less. See https://www.desmos.com/calculator/g1ibye6ebg
  192. // F = ((x-m)/(M-m))^16 and solving for x we have the new minDist
  193. const F32 m = (i >= 2) ? in.m_cascades[i - 2].m_cascadeMaxDistance : 0.0f; // Prev cascade min dist
  194. const F32 M = in.m_cascades[i - 1].m_cascadeMaxDistance; // Prev cascade max dist
  195. constexpr F32 F = 0.01f; // Desired factor
  196. const F32 minDist = pow(F, 1.0f / 16.0f) * (M - m) + m;
  197. ANKI_ASSERT(minDist < M);
  198. Vec4 v4 = in.m_cameraProjectionMatrix * Vec4(0.0f, 0.0f, -minDist, 1.0f);
  199. cascadeMinDepth = saturate(v4.z() / v4.w());
  200. }
  201. else
  202. {
  203. cascadeMinDepth = 0.0f;
  204. }
  205. const F32 maxDist = cascade.m_cascadeMaxDistance;
  206. const Vec4 v4 = in.m_cameraProjectionMatrix * Vec4(0.0f, 0.0f, -maxDist, 1.0f);
  207. cascadeMaxDepth = saturate(v4.z() / v4.w());
  208. ANKI_ASSERT(cascadeMinDepth <= cascadeMaxDepth);
  209. }
  210. RenderTargetDesc depthRtDescr(generateTempPassName("HZB boxes depth cascade:%u", i));
  211. depthRtDescr.m_width = cascade.m_hzbRtSize.x() * 2;
  212. depthRtDescr.m_height = cascade.m_hzbRtSize.y() * 2;
  213. depthRtDescr.m_format = Format::kD16_Unorm;
  214. depthRtDescr.bake();
  215. depthRts[i] = rgraph.newRenderTarget(depthRtDescr);
  216. GraphicsRenderPass& pass = rgraph.newGraphicsRenderPass("HZB boxes");
  217. GraphicsRenderPassTargetDesc depthRt(depthRts[i]);
  218. depthRt.m_subresource.m_depthStencilAspect = DepthStencilAspectBit::kDepth;
  219. depthRt.m_clearValue.m_depthStencil.m_depth = 0.0f;
  220. depthRt.m_loadOperation = RenderTargetLoadOperation::kClear;
  221. pass.setRenderpassInfo({}, &depthRt);
  222. pass.newTextureDependency(maxDepthRt, TextureUsageBit::kSampledFragment);
  223. pass.newTextureDependency(depthRts[i], TextureUsageBit::kFramebufferWrite, DepthStencilAspectBit::kDepth);
  224. pass.setWork([this, maxDepthRt, invViewProjMat = in.m_cameraInverseViewProjectionMatrix,
  225. lightViewProjMat = cascade.m_projectionMatrix * Mat4(cascade.m_viewMatrix, Vec4(0.0f, 0.0f, 0.0f, 1.0f)),
  226. viewport = cascade.m_hzbRtSize * 2, maxDepthRtSize, cascadeMinDepth, cascadeMaxDepth](RenderPassWorkContext& rgraphCtx) {
  227. CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
  228. cmdb.setDepthCompareOperation(CompareOperation::kGreater);
  229. cmdb.setViewport(0, 0, viewport.x(), viewport.y());
  230. cmdb.bindShaderProgram(m_maxBoxGrProg.get());
  231. rgraphCtx.bindTexture(ANKI_REG(t0), maxDepthRt);
  232. struct Uniforms
  233. {
  234. Mat4 m_reprojectionMat;
  235. F32 m_cascadeMinDepth;
  236. F32 m_cascadeMaxDepth;
  237. F32 m_padding0;
  238. F32 m_padding1;
  239. } unis;
  240. unis.m_reprojectionMat = lightViewProjMat * invViewProjMat;
  241. unis.m_cascadeMinDepth = cascadeMinDepth;
  242. unis.m_cascadeMaxDepth = cascadeMaxDepth;
  243. cmdb.setPushConstants(&unis, sizeof(unis));
  244. cmdb.bindIndexBuffer(BufferView(m_boxIndexBuffer.get()), IndexType::kU16);
  245. cmdb.drawIndexed(PrimitiveTopology::kTriangles, sizeof(kBoxIndices) / sizeof(kBoxIndices[0]), maxDepthRtSize.x() * maxDepthRtSize.y());
  246. // Restore state
  247. cmdb.setDepthCompareOperation(CompareOperation::kLess);
  248. });
  249. }
  250. // Generate the HZBs
  251. Array<DispatchInput, kMaxShadowCascades> inputs;
  252. for(U32 i = 0; i < cascadeCount; ++i)
  253. {
  254. const HzbDirectionalLightInput::Cascade& cascade = in.m_cascades[i];
  255. inputs[i].m_dstHzbRt = cascade.m_hzbRt;
  256. inputs[i].m_dstHzbRtSize = cascade.m_hzbRtSize;
  257. inputs[i].m_srcDepthRt = depthRts[i];
  258. inputs[i].m_srcDepthRtSize = cascade.m_hzbRtSize * 2;
  259. }
  260. populateRenderGraphInternal({&inputs[0], cascadeCount}, 1, "HZB generation shadow cascades", rgraph);
  261. }
  262. } // end namespace anki