IndirectDiffuse.cpp 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401
  1. // Copyright (C) 2009-2022, Panagiotis Christopoulos Charitos and contributors.
  2. // All rights reserved.
  3. // Code licensed under the BSD License.
  4. // http://www.anki3d.org/LICENSE
  5. #include <AnKi/Renderer/IndirectDiffuse.h>
  6. #include <AnKi/Renderer/Renderer.h>
  7. #include <AnKi/Renderer/DepthDownscale.h>
  8. #include <AnKi/Renderer/GBuffer.h>
  9. #include <AnKi/Renderer/DownscaleBlur.h>
  10. #include <AnKi/Renderer/MotionVectors.h>
  11. #include <AnKi/Renderer/IndirectDiffuseProbes.h>
  12. #include <AnKi/Core/ConfigSet.h>
  13. namespace anki {
  14. IndirectDiffuse::~IndirectDiffuse()
  15. {
  16. }
  17. Error IndirectDiffuse::init()
  18. {
  19. const Error err = initInternal();
  20. if(err)
  21. {
  22. ANKI_R_LOGE("Failed to initialize indirect diffuse pass");
  23. }
  24. return err;
  25. }
  26. Error IndirectDiffuse::initInternal()
  27. {
  28. const UVec2 size = m_r->getInternalResolution() / 2;
  29. ANKI_ASSERT((m_r->getInternalResolution() % 2) == UVec2(0u) && "Needs to be dividable for proper upscaling");
  30. ANKI_R_LOGV("Initializing indirect diffuse. Resolution %ux%u", size.x(), size.y());
  31. const Bool preferCompute = getConfig().getRPreferCompute();
  32. // Init textures
  33. TextureUsageBit usage = TextureUsageBit::ALL_SAMPLED;
  34. usage |= (preferCompute) ? TextureUsageBit::IMAGE_COMPUTE_WRITE : TextureUsageBit::FRAMEBUFFER_ATTACHMENT_WRITE;
  35. TextureInitInfo texInit =
  36. m_r->create2DRenderTargetInitInfo(size.x(), size.y(), m_r->getHdrFormat(), usage, "IndirectDiffuse #1");
  37. m_rts[0] = m_r->createAndClearRenderTarget(texInit, TextureUsageBit::ALL_SAMPLED);
  38. texInit.setName("IndirectDiffuse #2");
  39. m_rts[1] = m_r->createAndClearRenderTarget(texInit, TextureUsageBit::ALL_SAMPLED);
  40. if(!preferCompute)
  41. {
  42. m_main.m_fbDescr.m_colorAttachmentCount = 1;
  43. m_main.m_fbDescr.bake();
  44. }
  45. // Init VRS SRI generation
  46. const Bool enableVrs = getGrManager().getDeviceCapabilities().m_vrs && getConfig().getRVrs() && !preferCompute;
  47. if(enableVrs)
  48. {
  49. m_vrs.m_sriTexelDimension = getGrManager().getDeviceCapabilities().m_minShadingRateImageTexelSize;
  50. ANKI_ASSERT(m_vrs.m_sriTexelDimension == 8 || m_vrs.m_sriTexelDimension == 16);
  51. const UVec2 rez = (size + m_vrs.m_sriTexelDimension - 1) / m_vrs.m_sriTexelDimension;
  52. m_vrs.m_rtHandle =
  53. m_r->create2DRenderTargetDescription(rez.x(), rez.y(), Format::R8_UINT, "IndirectDiffuseVrsSri");
  54. m_vrs.m_rtHandle.bake();
  55. ANKI_CHECK(getResourceManager().loadResource("ShaderBinaries/IndirectDiffuseVrsSriGeneration.ankiprogbin",
  56. m_vrs.m_prog));
  57. ShaderProgramResourceVariantInitInfo variantInit(m_vrs.m_prog);
  58. variantInit.addMutation("SRI_TEXEL_DIMENSION", m_vrs.m_sriTexelDimension);
  59. if(m_vrs.m_sriTexelDimension == 16 && getGrManager().getDeviceCapabilities().m_minSubgroupSize >= 32)
  60. {
  61. // Algorithm's workgroup size is 32, GPU's subgroup size is min 32 -> each workgroup has 1 subgroup -> No
  62. // need for shared mem
  63. variantInit.addMutation("SHARED_MEMORY", 0);
  64. }
  65. else if(m_vrs.m_sriTexelDimension == 8 && getGrManager().getDeviceCapabilities().m_minSubgroupSize >= 16)
  66. {
  67. // Algorithm's workgroup size is 16, GPU's subgroup size is min 16 -> each workgroup has 1 subgroup -> No
  68. // need for shared mem
  69. variantInit.addMutation("SHARED_MEMORY", 0);
  70. }
  71. else
  72. {
  73. variantInit.addMutation("SHARED_MEMORY", 1);
  74. }
  75. variantInit.addMutation("LIMIT_RATE_TO_2X2", getConfig().getRVrsLimitTo2x2());
  76. const ShaderProgramResourceVariant* variant;
  77. m_vrs.m_prog->getOrCreateVariant(variantInit, variant);
  78. m_vrs.m_grProg = variant->getProgram();
  79. ANKI_CHECK(getResourceManager().loadResource("ShaderBinaries/VrsSriVisualizeRenderTarget.ankiprogbin",
  80. m_vrs.m_visualizeProg));
  81. m_vrs.m_visualizeProg->getOrCreateVariant(variant);
  82. m_vrs.m_visualizeGrProg = variant->getProgram();
  83. }
  84. // Init SSGI+probes pass
  85. {
  86. ANKI_CHECK(getResourceManager().loadResource((preferCompute)
  87. ? "ShaderBinaries/IndirectDiffuseCompute.ankiprogbin"
  88. : "ShaderBinaries/IndirectDiffuseRaster.ankiprogbin",
  89. m_main.m_prog));
  90. const ShaderProgramResourceVariant* variant;
  91. m_main.m_prog->getOrCreateVariant(variant);
  92. m_main.m_grProg = variant->getProgram();
  93. }
  94. // Init denoise
  95. {
  96. m_denoise.m_fbDescr.m_colorAttachmentCount = 1;
  97. m_denoise.m_fbDescr.bake();
  98. ANKI_CHECK(getResourceManager().loadResource((preferCompute)
  99. ? "ShaderBinaries/IndirectDiffuseDenoiseCompute.ankiprogbin"
  100. : "ShaderBinaries/IndirectDiffuseDenoiseRaster.ankiprogbin",
  101. m_denoise.m_prog));
  102. ShaderProgramResourceVariantInitInfo variantInit(m_denoise.m_prog);
  103. variantInit.addMutation("BLUR_ORIENTATION", 0);
  104. const ShaderProgramResourceVariant* variant;
  105. m_denoise.m_prog->getOrCreateVariant(variantInit, variant);
  106. m_denoise.m_grProgs[0] = variant->getProgram();
  107. variantInit.addMutation("BLUR_ORIENTATION", 1);
  108. m_denoise.m_prog->getOrCreateVariant(variantInit, variant);
  109. m_denoise.m_grProgs[1] = variant->getProgram();
  110. }
  111. return Error::NONE;
  112. }
  113. void IndirectDiffuse::populateRenderGraph(RenderingContext& ctx)
  114. {
  115. RenderGraphDescription& rgraph = ctx.m_renderGraphDescr;
  116. const Bool preferCompute = getConfig().getRPreferCompute();
  117. const Bool enableVrs = getGrManager().getDeviceCapabilities().m_vrs && getConfig().getRVrs() && !preferCompute;
  118. const Bool fbDescrHasVrs = m_main.m_fbDescr.m_shadingRateAttachmentTexelWidth > 0;
  119. if(!preferCompute && enableVrs != fbDescrHasVrs)
  120. {
  121. // Re-bake the FB descriptor if the VRS state has changed
  122. if(enableVrs)
  123. {
  124. m_main.m_fbDescr.m_shadingRateAttachmentTexelWidth = m_vrs.m_sriTexelDimension;
  125. m_main.m_fbDescr.m_shadingRateAttachmentTexelHeight = m_vrs.m_sriTexelDimension;
  126. }
  127. else
  128. {
  129. m_main.m_fbDescr.m_shadingRateAttachmentTexelWidth = 0;
  130. m_main.m_fbDescr.m_shadingRateAttachmentTexelHeight = 0;
  131. }
  132. m_main.m_fbDescr.bake();
  133. }
  134. // VRS SRI
  135. if(enableVrs)
  136. {
  137. m_runCtx.m_sriRt = rgraph.newRenderTarget(m_vrs.m_rtHandle);
  138. ComputeRenderPassDescription& pass = rgraph.newComputeRenderPass("IndirectDiffuse VRS SRI gen");
  139. pass.newDependency(RenderPassDependency(m_runCtx.m_sriRt, TextureUsageBit::IMAGE_COMPUTE_WRITE));
  140. pass.newDependency(RenderPassDependency(m_r->getDepthDownscale().getHiZRt(), TextureUsageBit::SAMPLED_COMPUTE,
  141. HIZ_HALF_DEPTH));
  142. pass.setWork([this, &ctx](RenderPassWorkContext& rgraphCtx) {
  143. const UVec2 viewport = m_r->getInternalResolution() / 2u;
  144. CommandBufferPtr& cmdb = rgraphCtx.m_commandBuffer;
  145. cmdb->bindShaderProgram(m_vrs.m_grProg);
  146. rgraphCtx.bindTexture(0, 0, m_r->getDepthDownscale().getHiZRt(), HIZ_HALF_DEPTH);
  147. cmdb->bindSampler(0, 1, m_r->getSamplers().m_nearestNearestClamp);
  148. rgraphCtx.bindImage(0, 2, m_runCtx.m_sriRt);
  149. class
  150. {
  151. public:
  152. Vec4 m_v4;
  153. Mat4 m_invertedProjectionJitter;
  154. } pc;
  155. pc.m_v4 = Vec4(1.0f / Vec2(viewport), getConfig().getRIndirectDiffuseVrsDistanceThreshold(), 0.0f);
  156. pc.m_invertedProjectionJitter = ctx.m_matrices.m_invertedProjectionJitter;
  157. cmdb->setPushConstants(&pc, sizeof(pc));
  158. dispatchPPCompute(cmdb, m_vrs.m_sriTexelDimension, m_vrs.m_sriTexelDimension, viewport.x(), viewport.y());
  159. });
  160. }
  161. // SSGI+probes
  162. {
  163. // Create RTs
  164. const U32 readRtIdx = m_r->getFrameCount() & 1;
  165. const U32 writeRtIdx = !readRtIdx;
  166. if(ANKI_LIKELY(m_rtsImportedOnce))
  167. {
  168. m_runCtx.m_mainRtHandles[0] = rgraph.importRenderTarget(m_rts[readRtIdx]);
  169. m_runCtx.m_mainRtHandles[1] = rgraph.importRenderTarget(m_rts[writeRtIdx]);
  170. }
  171. else
  172. {
  173. m_runCtx.m_mainRtHandles[0] = rgraph.importRenderTarget(m_rts[readRtIdx], TextureUsageBit::ALL_SAMPLED);
  174. m_runCtx.m_mainRtHandles[1] = rgraph.importRenderTarget(m_rts[writeRtIdx], TextureUsageBit::ALL_SAMPLED);
  175. m_rtsImportedOnce = true;
  176. }
  177. // Create main pass
  178. TextureUsageBit readUsage;
  179. TextureUsageBit writeUsage;
  180. RenderPassDescriptionBase* prpass;
  181. if(preferCompute)
  182. {
  183. ComputeRenderPassDescription& rpass = rgraph.newComputeRenderPass("IndirectDiffuse");
  184. readUsage = TextureUsageBit::SAMPLED_COMPUTE;
  185. writeUsage = TextureUsageBit::IMAGE_COMPUTE_WRITE;
  186. prpass = &rpass;
  187. }
  188. else
  189. {
  190. GraphicsRenderPassDescription& rpass = rgraph.newGraphicsRenderPass("IndirectDiffuse");
  191. rpass.setFramebufferInfo(m_main.m_fbDescr, {m_runCtx.m_mainRtHandles[WRITE]}, {},
  192. (enableVrs) ? m_runCtx.m_sriRt : RenderTargetHandle());
  193. readUsage = TextureUsageBit::SAMPLED_FRAGMENT;
  194. writeUsage = TextureUsageBit::FRAMEBUFFER_ATTACHMENT_WRITE;
  195. prpass = &rpass;
  196. if(enableVrs)
  197. {
  198. prpass->newDependency(
  199. RenderPassDependency(m_runCtx.m_sriRt, TextureUsageBit::FRAMEBUFFER_SHADING_RATE));
  200. }
  201. }
  202. prpass->newDependency(RenderPassDependency(m_runCtx.m_mainRtHandles[WRITE], writeUsage));
  203. m_r->getIndirectDiffuseProbes().setRenderGraphDependencies(ctx, *prpass, readUsage);
  204. prpass->newDependency(RenderPassDependency(m_r->getGBuffer().getColorRt(2), readUsage));
  205. TextureSubresourceInfo hizSubresource;
  206. hizSubresource.m_mipmapCount = 1;
  207. prpass->newDependency(RenderPassDependency(m_r->getDepthDownscale().getHiZRt(), readUsage, hizSubresource));
  208. prpass->newDependency(RenderPassDependency(m_r->getDownscaleBlur().getRt(), readUsage));
  209. prpass->newDependency(RenderPassDependency(m_r->getMotionVectors().getMotionVectorsRt(), readUsage));
  210. prpass->newDependency(RenderPassDependency(m_r->getMotionVectors().getHistoryLengthRt(), readUsage));
  211. prpass->newDependency(RenderPassDependency(m_runCtx.m_mainRtHandles[READ], readUsage));
  212. prpass->setWork([this, &ctx, enableVrs](RenderPassWorkContext& rgraphCtx) {
  213. CommandBufferPtr& cmdb = rgraphCtx.m_commandBuffer;
  214. cmdb->bindShaderProgram(m_main.m_grProg);
  215. const ClusteredShadingContext& binning = ctx.m_clusteredShading;
  216. bindUniforms(cmdb, 0, 0, binning.m_clusteredShadingUniformsToken);
  217. m_r->getIndirectDiffuseProbes().bindVolumeTextures(ctx, rgraphCtx, 0, 1);
  218. bindUniforms(cmdb, 0, 2, binning.m_globalIlluminationProbesToken);
  219. bindStorage(cmdb, 0, 3, binning.m_clustersToken);
  220. cmdb->bindSampler(0, 4, m_r->getSamplers().m_trilinearClamp);
  221. rgraphCtx.bindColorTexture(0, 5, m_r->getGBuffer().getColorRt(2));
  222. TextureSubresourceInfo hizSubresource;
  223. hizSubresource.m_mipmapCount = 1;
  224. rgraphCtx.bindTexture(0, 6, m_r->getDepthDownscale().getHiZRt(), hizSubresource);
  225. rgraphCtx.bindColorTexture(0, 7, m_r->getDownscaleBlur().getRt());
  226. rgraphCtx.bindColorTexture(0, 8, m_runCtx.m_mainRtHandles[READ]);
  227. rgraphCtx.bindColorTexture(0, 9, m_r->getMotionVectors().getMotionVectorsRt());
  228. rgraphCtx.bindColorTexture(0, 10, m_r->getMotionVectors().getHistoryLengthRt());
  229. if(getConfig().getRPreferCompute())
  230. {
  231. rgraphCtx.bindImage(0, 11, m_runCtx.m_mainRtHandles[WRITE]);
  232. }
  233. // Bind uniforms
  234. IndirectDiffuseUniforms unis;
  235. unis.m_viewportSize = m_r->getInternalResolution() / 2u;
  236. unis.m_viewportSizef = Vec2(unis.m_viewportSize);
  237. const Mat4& pmat = ctx.m_matrices.m_projection;
  238. unis.m_projectionMat = Vec4(pmat(0, 0), pmat(1, 1), pmat(2, 2), pmat(2, 3));
  239. unis.m_radius = getConfig().getRIndirectDiffuseSsgiRadius();
  240. unis.m_sampleCount = getConfig().getRIndirectDiffuseSsgiSampleCount();
  241. unis.m_sampleCountf = F32(unis.m_sampleCount);
  242. unis.m_ssaoBias = getConfig().getRIndirectDiffuseSsaoBias();
  243. unis.m_ssaoStrength = getConfig().getRIndirectDiffuseSsaoStrength();
  244. cmdb->setPushConstants(&unis, sizeof(unis));
  245. if(getConfig().getRPreferCompute())
  246. {
  247. dispatchPPCompute(cmdb, 8, 8, unis.m_viewportSize.x(), unis.m_viewportSize.y());
  248. }
  249. else
  250. {
  251. cmdb->setViewport(0, 0, unis.m_viewportSize.x(), unis.m_viewportSize.y());
  252. if(enableVrs)
  253. {
  254. cmdb->setVrsRate(VrsRate::_1x1);
  255. }
  256. cmdb->drawArrays(PrimitiveTopology::TRIANGLES, 3);
  257. }
  258. });
  259. }
  260. // Denoise
  261. for(U32 dir = 0; dir < 2; ++dir)
  262. {
  263. const U32 readIdx = (dir == 0) ? WRITE : READ;
  264. TextureUsageBit readUsage;
  265. TextureUsageBit writeUsage;
  266. RenderPassDescriptionBase* prpass;
  267. if(preferCompute)
  268. {
  269. ComputeRenderPassDescription& rpass =
  270. rgraph.newComputeRenderPass((dir == 0) ? "IndirectDiffuseDenoiseH" : "IndirectDiffuseDenoiseV");
  271. readUsage = TextureUsageBit::SAMPLED_COMPUTE;
  272. writeUsage = TextureUsageBit::IMAGE_COMPUTE_WRITE;
  273. prpass = &rpass;
  274. }
  275. else
  276. {
  277. GraphicsRenderPassDescription& rpass =
  278. rgraph.newGraphicsRenderPass((dir == 0) ? "IndirectDiffuseDenoiseH" : "IndirectDiffuseDenoiseV");
  279. rpass.setFramebufferInfo(m_denoise.m_fbDescr, {m_runCtx.m_mainRtHandles[!readIdx]});
  280. readUsage = TextureUsageBit::SAMPLED_FRAGMENT;
  281. writeUsage = TextureUsageBit::FRAMEBUFFER_ATTACHMENT_WRITE;
  282. prpass = &rpass;
  283. }
  284. prpass->newDependency(RenderPassDependency(m_runCtx.m_mainRtHandles[readIdx], readUsage));
  285. TextureSubresourceInfo hizSubresource;
  286. hizSubresource.m_mipmapCount = 1;
  287. prpass->newDependency(RenderPassDependency(m_r->getDepthDownscale().getHiZRt(), readUsage, hizSubresource));
  288. prpass->newDependency(RenderPassDependency(m_runCtx.m_mainRtHandles[!readIdx], writeUsage));
  289. prpass->setWork([this, &ctx, dir, readIdx](RenderPassWorkContext& rgraphCtx) {
  290. CommandBufferPtr& cmdb = rgraphCtx.m_commandBuffer;
  291. cmdb->bindShaderProgram(m_denoise.m_grProgs[dir]);
  292. cmdb->bindSampler(0, 0, m_r->getSamplers().m_trilinearClamp);
  293. rgraphCtx.bindColorTexture(0, 1, m_runCtx.m_mainRtHandles[readIdx]);
  294. TextureSubresourceInfo hizSubresource;
  295. hizSubresource.m_mipmapCount = 1;
  296. rgraphCtx.bindTexture(0, 2, m_r->getDepthDownscale().getHiZRt(), hizSubresource);
  297. if(getConfig().getRPreferCompute())
  298. {
  299. rgraphCtx.bindImage(0, 3, m_runCtx.m_mainRtHandles[!readIdx]);
  300. }
  301. IndirectDiffuseDenoiseUniforms unis;
  302. unis.m_invertedViewProjectionJitterMat = ctx.m_matrices.m_invertedViewProjectionJitter;
  303. unis.m_viewportSize = m_r->getInternalResolution() / 2u;
  304. unis.m_viewportSizef = Vec2(unis.m_viewportSize);
  305. unis.m_sampleCountDiv2 = F32(getConfig().getRIndirectDiffuseDenoiseSampleCount());
  306. unis.m_sampleCountDiv2 = max(1.0f, std::round(unis.m_sampleCountDiv2 / 2.0f));
  307. cmdb->setPushConstants(&unis, sizeof(unis));
  308. if(getConfig().getRPreferCompute())
  309. {
  310. dispatchPPCompute(cmdb, 8, 8, unis.m_viewportSize.x(), unis.m_viewportSize.y());
  311. }
  312. else
  313. {
  314. cmdb->setViewport(0, 0, unis.m_viewportSize.x(), unis.m_viewportSize.y());
  315. cmdb->drawArrays(PrimitiveTopology::TRIANGLES, 3);
  316. }
  317. });
  318. }
  319. }
  320. void IndirectDiffuse::getDebugRenderTarget(CString rtName, Array<RenderTargetHandle, kMaxDebugRenderTargets>& handles,
  321. ShaderProgramPtr& optionalShaderProgram) const
  322. {
  323. if(rtName == "IndirectDiffuse")
  324. {
  325. handles[0] = m_runCtx.m_mainRtHandles[WRITE];
  326. }
  327. else
  328. {
  329. ANKI_ASSERT(rtName == "IndirectDiffuseVrsSri");
  330. handles[0] = m_runCtx.m_sriRt;
  331. optionalShaderProgram = m_vrs.m_visualizeGrProg;
  332. }
  333. }
  334. } // end namespace anki