DepthDownscale.cpp 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347
  1. // Copyright (C) 2009-2023, Panagiotis Christopoulos Charitos and contributors.
  2. // All rights reserved.
  3. // Code licensed under the BSD License.
  4. // http://www.anki3d.org/LICENSE
  5. #include <AnKi/Renderer/DepthDownscale.h>
  6. #include <AnKi/Renderer/Renderer.h>
  7. #include <AnKi/Renderer/GBuffer.h>
  8. #include <AnKi/Core/ConfigSet.h>
  9. #if ANKI_COMPILER_GCC_COMPATIBLE
  10. # pragma GCC diagnostic push
  11. # pragma GCC diagnostic ignored "-Wunused-function"
  12. # pragma GCC diagnostic ignored "-Wignored-qualifiers"
  13. #elif ANKI_COMPILER_MSVC
  14. # pragma warning(push)
  15. # pragma warning(disable : 4505)
  16. #endif
  17. #define A_CPU
  18. #include <ThirdParty/FidelityFX/ffx_a.h>
  19. #include <ThirdParty/FidelityFX/ffx_spd.h>
  20. #if ANKI_COMPILER_GCC_COMPATIBLE
  21. # pragma GCC diagnostic pop
  22. #elif ANKI_COMPILER_MSVC
  23. # pragma warning(pop)
  24. #endif
  25. namespace anki {
  26. DepthDownscale::~DepthDownscale()
  27. {
  28. if(m_clientBufferAddr)
  29. {
  30. m_clientBuffer->unmap();
  31. }
  32. }
  33. Error DepthDownscale::initInternal()
  34. {
  35. const U32 width = getRenderer().getInternalResolution().x() >> 1;
  36. const U32 height = getRenderer().getInternalResolution().y() >> 1;
  37. m_mipCount = computeMaxMipmapCount2d(width, height, hHierachicalZMinHeight);
  38. m_lastMipSize.x() = width >> (m_mipCount - 1);
  39. m_lastMipSize.y() = height >> (m_mipCount - 1);
  40. ANKI_R_LOGV("Initializing HiZ. Mip count %u, last mip size %ux%u", m_mipCount, m_lastMipSize.x(), m_lastMipSize.y());
  41. const Bool preferCompute = g_preferComputeCVar.get();
  42. const Bool supportsReductionSampler = GrManager::getSingleton().getDeviceCapabilities().m_samplingFilterMinMax;
  43. // Create RT descr
  44. {
  45. TextureUsageBit usage = TextureUsageBit::kAllSampled;
  46. if(preferCompute)
  47. {
  48. usage |= TextureUsageBit::kImageComputeWrite;
  49. }
  50. else
  51. {
  52. usage |= TextureUsageBit::kFramebufferWrite;
  53. }
  54. TextureInitInfo texInit = getRenderer().create2DRenderTargetInitInfo(width, height, Format::kR32_Sfloat, usage, "HiZ");
  55. texInit.m_mipmapCount = U8(m_mipCount);
  56. m_hizTex = getRenderer().createAndClearRenderTarget(texInit, TextureUsageBit::kSampledFragment);
  57. }
  58. // Progs
  59. if(preferCompute)
  60. {
  61. ANKI_CHECK(ResourceManager::getSingleton().loadResource("ShaderBinaries/DepthDownscaleCompute.ankiprogbin", m_prog));
  62. ShaderProgramResourceVariantInitInfo variantInitInfo(m_prog);
  63. variantInitInfo.addMutation("WAVE_OPERATIONS", 0);
  64. const ShaderProgramResourceVariant* variant;
  65. m_prog->getOrCreateVariant(variantInitInfo, variant);
  66. m_grProg.reset(&variant->getProgram());
  67. }
  68. else
  69. {
  70. ANKI_CHECK(ResourceManager::getSingleton().loadResource("ShaderBinaries/DepthDownscaleRaster.ankiprogbin", m_prog));
  71. ShaderProgramResourceVariantInitInfo variantInitInfo(m_prog);
  72. variantInitInfo.addMutation("REDUCTION_SAMPLER", supportsReductionSampler);
  73. const ShaderProgramResourceVariant* variant;
  74. m_prog->getOrCreateVariant(variantInitInfo, variant);
  75. m_grProg.reset(&variant->getProgram());
  76. // 1st mip prog
  77. variantInitInfo.addMutation("REDUCTION_SAMPLER", 1);
  78. m_prog->getOrCreateVariant(variantInitInfo, variant);
  79. m_firstMipGrProg.reset(&variant->getProgram());
  80. }
  81. // Counter buffer
  82. if(preferCompute)
  83. {
  84. BufferInitInfo buffInit("HiZCounterBuffer");
  85. buffInit.m_size = sizeof(U32);
  86. buffInit.m_usage = BufferUsageBit::kStorageComputeWrite | BufferUsageBit::kTransferDestination;
  87. m_counterBuffer = GrManager::getSingleton().newBuffer(buffInit);
  88. }
  89. // Client buffer
  90. {
  91. // Create buffer
  92. BufferInitInfo buffInit("HiZ Client");
  93. buffInit.m_mapAccess = BufferMapAccessBit::kRead;
  94. buffInit.m_size = PtrSize(m_lastMipSize.y()) * PtrSize(m_lastMipSize.x()) * sizeof(F32);
  95. buffInit.m_usage = BufferUsageBit::kStorageComputeWrite | BufferUsageBit::kStorageFragmentWrite;
  96. m_clientBuffer = GrManager::getSingleton().newBuffer(buffInit);
  97. m_clientBufferAddr = m_clientBuffer->map(0, buffInit.m_size, BufferMapAccessBit::kRead);
  98. // Fill the buffer with 1.0f
  99. for(U32 i = 0; i < m_lastMipSize.x() * m_lastMipSize.y(); ++i)
  100. {
  101. static_cast<F32*>(m_clientBufferAddr)[i] = 1.0f;
  102. }
  103. }
  104. // Reduction sampler
  105. if(!preferCompute && supportsReductionSampler)
  106. {
  107. SamplerInitInfo sinit("HiZReductionMax");
  108. sinit.m_addressing = SamplingAddressing::kClamp;
  109. sinit.m_mipmapFilter = SamplingFilter::kMax;
  110. sinit.m_minMagFilter = SamplingFilter::kMax;
  111. m_reductionSampler = GrManager::getSingleton().newSampler(sinit);
  112. }
  113. if(!preferCompute)
  114. {
  115. m_fbDescrs.resize(m_mipCount);
  116. for(U32 mip = 0; mip < m_mipCount; ++mip)
  117. {
  118. FramebufferDescription& fbDescr = m_fbDescrs[mip];
  119. fbDescr.m_colorAttachmentCount = 1;
  120. fbDescr.m_colorAttachments[0].m_surface.m_level = mip;
  121. fbDescr.bake();
  122. }
  123. }
  124. return Error::kNone;
  125. }
  126. Error DepthDownscale::init()
  127. {
  128. const Error err = initInternal();
  129. if(err)
  130. {
  131. ANKI_R_LOGE("Failed to initialize depth downscale passes");
  132. }
  133. return err;
  134. }
  135. void DepthDownscale::importRenderTargets(RenderingContext& ctx)
  136. {
  137. RenderGraphDescription& rgraph = ctx.m_renderGraphDescr;
  138. // Import RT
  139. if(m_hizTexImportedOnce)
  140. {
  141. m_runCtx.m_hizRt = rgraph.importRenderTarget(m_hizTex.get());
  142. }
  143. else
  144. {
  145. m_runCtx.m_hizRt = rgraph.importRenderTarget(m_hizTex.get(), TextureUsageBit::kSampledFragment);
  146. m_hizTexImportedOnce = true;
  147. }
  148. }
  149. void DepthDownscale::populateRenderGraph(RenderingContext& ctx)
  150. {
  151. RenderGraphDescription& rgraph = ctx.m_renderGraphDescr;
  152. if(g_preferComputeCVar.get())
  153. {
  154. // Do it with compute
  155. ComputeRenderPassDescription& pass = rgraph.newComputeRenderPass("HiZ");
  156. pass.newTextureDependency(getRenderer().getGBuffer().getDepthRt(), TextureUsageBit::kSampledCompute,
  157. TextureSubresourceInfo(DepthStencilAspectBit::kDepth));
  158. for(U32 mip = 0; mip < m_mipCount; ++mip)
  159. {
  160. TextureSubresourceInfo subresource;
  161. subresource.m_firstMipmap = mip;
  162. pass.newTextureDependency(m_runCtx.m_hizRt, TextureUsageBit::kImageComputeWrite, subresource);
  163. }
  164. pass.setWork([this](RenderPassWorkContext& rgraphCtx) {
  165. runCompute(rgraphCtx);
  166. });
  167. }
  168. else
  169. {
  170. // Do it with raster
  171. for(U32 mip = 0; mip < m_mipCount; ++mip)
  172. {
  173. static constexpr Array<CString, 8> passNames = {"HiZ #1", "HiZ #2", "HiZ #3", "HiZ #4", "HiZ #5", "HiZ #6", "HiZ #7", "HiZ #8"};
  174. GraphicsRenderPassDescription& pass = rgraph.newGraphicsRenderPass(passNames[mip]);
  175. pass.setFramebufferInfo(m_fbDescrs[mip], {m_runCtx.m_hizRt});
  176. if(mip == 0)
  177. {
  178. pass.newTextureDependency(getRenderer().getGBuffer().getDepthRt(), TextureUsageBit::kSampledFragment,
  179. TextureSubresourceInfo(DepthStencilAspectBit::kDepth));
  180. }
  181. else
  182. {
  183. TextureSurfaceInfo subresource;
  184. subresource.m_level = mip - 1;
  185. pass.newTextureDependency(m_runCtx.m_hizRt, TextureUsageBit::kSampledFragment, subresource);
  186. }
  187. TextureSurfaceInfo subresource;
  188. subresource.m_level = mip;
  189. pass.newTextureDependency(m_runCtx.m_hizRt, TextureUsageBit::kFramebufferWrite, subresource);
  190. pass.setWork([this, mip](RenderPassWorkContext& rgraphCtx) {
  191. runGraphics(mip, rgraphCtx);
  192. });
  193. }
  194. }
  195. }
  196. void DepthDownscale::runCompute(RenderPassWorkContext& rgraphCtx)
  197. {
  198. CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
  199. // Zero the counter buffer before everything else
  200. if(!m_counterBufferZeroed) [[unlikely]]
  201. {
  202. m_counterBufferZeroed = true;
  203. cmdb.fillBuffer(m_counterBuffer.get(), 0, kMaxPtrSize, 0);
  204. const BufferBarrierInfo barrier = {m_counterBuffer.get(), BufferUsageBit::kTransferDestination, BufferUsageBit::kStorageComputeWrite, 0,
  205. kMaxPtrSize};
  206. cmdb.setPipelineBarrier({}, {&barrier, 1}, {});
  207. }
  208. cmdb.bindShaderProgram(m_grProg.get());
  209. varAU2(dispatchThreadGroupCountXY);
  210. varAU2(workGroupOffset); // needed if Left and Top are not 0,0
  211. varAU2(numWorkGroupsAndMips);
  212. varAU4(rectInfo) = initAU4(0, 0, getRenderer().getInternalResolution().x(), getRenderer().getInternalResolution().y());
  213. SpdSetup(dispatchThreadGroupCountXY, workGroupOffset, numWorkGroupsAndMips, rectInfo);
  214. SpdSetup(dispatchThreadGroupCountXY, workGroupOffset, numWorkGroupsAndMips, rectInfo, m_mipCount);
  215. DepthDownscaleUniforms pc;
  216. pc.m_workgroupCount = numWorkGroupsAndMips[0];
  217. pc.m_mipmapCount = numWorkGroupsAndMips[1];
  218. pc.m_srcTexSizeOverOne = 1.0f / Vec2(getRenderer().getInternalResolution());
  219. pc.m_lastMipWidth = m_lastMipSize.x();
  220. cmdb.setPushConstants(&pc, sizeof(pc));
  221. constexpr U32 maxMipsSpdCanProduce = 12;
  222. for(U32 mip = 0; mip < maxMipsSpdCanProduce; ++mip)
  223. {
  224. TextureSubresourceInfo subresource;
  225. if(mip < m_mipCount)
  226. {
  227. subresource.m_firstMipmap = mip;
  228. }
  229. else
  230. {
  231. subresource.m_firstMipmap = 0;
  232. }
  233. rgraphCtx.bindImage(0, 0, m_runCtx.m_hizRt, subresource, mip);
  234. }
  235. if(m_mipCount >= 5)
  236. {
  237. TextureSubresourceInfo subresource;
  238. subresource.m_firstMipmap = 4;
  239. rgraphCtx.bindImage(0, 1, m_runCtx.m_hizRt, subresource);
  240. }
  241. else
  242. {
  243. // Bind something random
  244. TextureSubresourceInfo subresource;
  245. subresource.m_firstMipmap = 0;
  246. rgraphCtx.bindImage(0, 1, m_runCtx.m_hizRt, subresource);
  247. }
  248. cmdb.bindStorageBuffer(0, 2, m_counterBuffer.get(), 0, kMaxPtrSize);
  249. cmdb.bindStorageBuffer(0, 3, m_clientBuffer.get(), 0, kMaxPtrSize);
  250. cmdb.bindSampler(0, 4, getRenderer().getSamplers().m_trilinearClamp.get());
  251. rgraphCtx.bindTexture(0, 5, getRenderer().getGBuffer().getDepthRt(), TextureSubresourceInfo(DepthStencilAspectBit::kDepth));
  252. cmdb.dispatchCompute(dispatchThreadGroupCountXY[0], dispatchThreadGroupCountXY[1], 1);
  253. }
  254. void DepthDownscale::runGraphics(U32 mip, RenderPassWorkContext& rgraphCtx)
  255. {
  256. CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
  257. if(mip == 0)
  258. {
  259. rgraphCtx.bindTexture(0, 0, getRenderer().getGBuffer().getDepthRt(), TextureSubresourceInfo(DepthStencilAspectBit::kDepth));
  260. cmdb.bindSampler(0, 1, getRenderer().getSamplers().m_trilinearClamp.get());
  261. cmdb.bindShaderProgram(m_firstMipGrProg.get());
  262. }
  263. else
  264. {
  265. TextureSubresourceInfo subresource;
  266. subresource.m_firstMipmap = mip - 1;
  267. rgraphCtx.bindTexture(0, 0, m_runCtx.m_hizRt, subresource);
  268. if(m_reductionSampler.isCreated())
  269. {
  270. cmdb.bindSampler(0, 1, m_reductionSampler.get());
  271. }
  272. else
  273. {
  274. cmdb.bindSampler(0, 1, getRenderer().getSamplers().m_trilinearClamp.get());
  275. }
  276. cmdb.bindShaderProgram(m_grProg.get());
  277. }
  278. cmdb.bindStorageBuffer(0, 2, m_clientBuffer.get(), 0, kMaxPtrSize);
  279. const UVec4 pc((mip != m_mipCount - 1) ? 0 : m_lastMipSize.x());
  280. cmdb.setPushConstants(&pc, sizeof(pc));
  281. const UVec2 size = (getRenderer().getInternalResolution() / 2) >> mip;
  282. cmdb.setViewport(0, 0, size.x(), size.y());
  283. cmdb.draw(PrimitiveTopology::kTriangles, 3);
  284. }
  285. } // end namespace anki