ShadowMapping.cpp 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879
  1. // Copyright (C) 2009-2022, Panagiotis Christopoulos Charitos and contributors.
  2. // All rights reserved.
  3. // Code licensed under the BSD License.
  4. // http://www.anki3d.org/LICENSE
  5. #include <AnKi/Renderer/ShadowMapping.h>
  6. #include <AnKi/Renderer/Renderer.h>
  7. #include <AnKi/Renderer/RenderQueue.h>
  8. #include <AnKi/Core/ConfigSet.h>
  9. #include <AnKi/Util/ThreadHive.h>
  10. #include <AnKi/Util/Tracer.h>
  11. namespace anki {
  12. class ShadowMapping::Scratch::WorkItem
  13. {
  14. public:
  15. UVec4 m_viewport;
  16. RenderQueue* m_renderQueue;
  17. U32 m_firstRenderableElement;
  18. U32 m_renderableElementCount;
  19. U32 m_threadPoolTaskIdx;
  20. U32 m_renderQueueElementsLod;
  21. };
  22. class ShadowMapping::Scratch::LightToRenderToScratchInfo
  23. {
  24. public:
  25. UVec4 m_viewport;
  26. RenderQueue* m_renderQueue;
  27. U32 m_drawcallCount;
  28. U32 m_renderQueueElementsLod;
  29. };
  30. class ShadowMapping::Atlas::ResolveWorkItem
  31. {
  32. public:
  33. Vec4 m_uvInBounds; ///< Bounds used to avoid blurring neighbour tiles.
  34. Vec4 m_uvIn; ///< UV + size that point to the scratch buffer.
  35. UVec4 m_viewportOut; ///< Viewport in the atlas RT.
  36. Bool m_blur;
  37. };
  38. ShadowMapping::~ShadowMapping()
  39. {
  40. }
  41. Error ShadowMapping::init()
  42. {
  43. ANKI_R_LOGV("Initializing shadowmapping")
  44. const Error err = initInternal();
  45. if(err)
  46. {
  47. ANKI_R_LOGE("Failed to initialize shadowmapping");
  48. }
  49. else
  50. {
  51. ANKI_R_LOGV("Shadowmapping initialized. Scratch size %ux%u, atlas size %ux%u",
  52. m_scratch.m_tileCountX * m_scratch.m_tileResolution,
  53. m_scratch.m_tileCountY * m_scratch.m_tileResolution,
  54. m_atlas.m_tileCountBothAxis * m_atlas.m_tileResolution,
  55. m_atlas.m_tileCountBothAxis * m_atlas.m_tileResolution);
  56. }
  57. return err;
  58. }
  59. Error ShadowMapping::initScratch()
  60. {
  61. // Init the shadowmaps and FBs
  62. {
  63. m_scratch.m_tileCountX = getConfig().getRShadowMappingScratchTileCountX();
  64. m_scratch.m_tileCountY = getConfig().getRShadowMappingScratchTileCountY();
  65. m_scratch.m_tileResolution = getConfig().getRShadowMappingTileResolution();
  66. // RT
  67. m_scratch.m_rtDescr = m_r->create2DRenderTargetDescription(m_scratch.m_tileResolution * m_scratch.m_tileCountX,
  68. m_scratch.m_tileResolution * m_scratch.m_tileCountY,
  69. m_r->getDepthNoStencilFormat(), "SM scratch");
  70. m_scratch.m_rtDescr.bake();
  71. // FB
  72. m_scratch.m_fbDescr.m_depthStencilAttachment.m_loadOperation = AttachmentLoadOperation::CLEAR;
  73. m_scratch.m_fbDescr.m_depthStencilAttachment.m_clearValue.m_depthStencil.m_depth = 1.0f;
  74. m_scratch.m_fbDescr.m_depthStencilAttachment.m_aspect = DepthStencilAspectBit::DEPTH;
  75. m_scratch.m_fbDescr.bake();
  76. }
  77. m_scratch.m_tileAlloc.init(getAllocator(), m_scratch.m_tileCountX, m_scratch.m_tileCountY, MAX_LOD_COUNT, false);
  78. return Error::NONE;
  79. }
  80. Error ShadowMapping::initAtlas()
  81. {
  82. const Bool preferCompute = getConfig().getRPreferCompute();
  83. // Init RT
  84. {
  85. m_atlas.m_tileResolution = getConfig().getRShadowMappingTileResolution();
  86. m_atlas.m_tileCountBothAxis = getConfig().getRShadowMappingTileCountPerRowOrColumn();
  87. // RT
  88. const Format texFormat = (ANKI_EVSM4) ? Format::R32G32B32A32_SFLOAT : Format::R32G32_SFLOAT;
  89. TextureUsageBit usage = TextureUsageBit::SAMPLED_FRAGMENT | TextureUsageBit::SAMPLED_COMPUTE;
  90. usage |= (preferCompute) ? TextureUsageBit::IMAGE_COMPUTE_WRITE : TextureUsageBit::ALL_FRAMEBUFFER_ATTACHMENT;
  91. TextureInitInfo texinit = m_r->create2DRenderTargetInitInfo(
  92. m_atlas.m_tileResolution * m_atlas.m_tileCountBothAxis,
  93. m_atlas.m_tileResolution * m_atlas.m_tileCountBothAxis, texFormat, usage, "SM atlas");
  94. ClearValue clearVal;
  95. clearVal.m_colorf[0] = 1.0f;
  96. m_atlas.m_tex = m_r->createAndClearRenderTarget(texinit, TextureUsageBit::SAMPLED_FRAGMENT, clearVal);
  97. }
  98. // Tiles
  99. m_atlas.m_tileAlloc.init(getAllocator(), m_atlas.m_tileCountBothAxis, m_atlas.m_tileCountBothAxis, MAX_LOD_COUNT,
  100. true);
  101. // Programs and shaders
  102. {
  103. ANKI_CHECK(getResourceManager().loadResource((preferCompute) ? "ShaderBinaries/EvsmCompute.ankiprogbin"
  104. : "ShaderBinaries/EvsmRaster.ankiprogbin",
  105. m_atlas.m_resolveProg));
  106. ShaderProgramResourceVariantInitInfo variantInitInfo(m_atlas.m_resolveProg);
  107. variantInitInfo.addConstant("INPUT_TEXTURE_SIZE", UVec2(m_scratch.m_tileCountX * m_scratch.m_tileResolution,
  108. m_scratch.m_tileCountY * m_scratch.m_tileResolution));
  109. if(!preferCompute)
  110. {
  111. variantInitInfo.addConstant("FB_SIZE", UVec2(m_atlas.m_tileCountBothAxis * m_atlas.m_tileResolution));
  112. }
  113. const ShaderProgramResourceVariant* variant;
  114. m_atlas.m_resolveProg->getOrCreateVariant(variantInitInfo, variant);
  115. m_atlas.m_resolveGrProg = variant->getProgram();
  116. }
  117. m_atlas.m_fbDescr.m_colorAttachmentCount = 1;
  118. m_atlas.m_fbDescr.m_colorAttachments[0].m_loadOperation = AttachmentLoadOperation::LOAD;
  119. m_atlas.m_fbDescr.bake();
  120. return Error::NONE;
  121. }
  122. Error ShadowMapping::initInternal()
  123. {
  124. ANKI_CHECK(initScratch());
  125. ANKI_CHECK(initAtlas());
  126. return Error::NONE;
  127. }
  128. void ShadowMapping::runAtlas(RenderPassWorkContext& rgraphCtx)
  129. {
  130. ANKI_ASSERT(m_atlas.m_resolveWorkItems.getSize());
  131. ANKI_TRACE_SCOPED_EVENT(R_SM);
  132. CommandBufferPtr& cmdb = rgraphCtx.m_commandBuffer;
  133. // Allocate and populate uniforms
  134. EvsmResolveUniforms* uniforms = allocateAndBindStorage<EvsmResolveUniforms*>(
  135. m_atlas.m_resolveWorkItems.getSize() * sizeof(EvsmResolveUniforms), cmdb, 0, 0);
  136. for(U32 i = 0; i < m_atlas.m_resolveWorkItems.getSize(); ++i)
  137. {
  138. EvsmResolveUniforms& uni = uniforms[i];
  139. const Atlas::ResolveWorkItem& workItem = m_atlas.m_resolveWorkItems[i];
  140. uni.m_viewportXY = IVec2(workItem.m_viewportOut.xy());
  141. uni.m_viewportZW = Vec2(workItem.m_viewportOut.zw());
  142. uni.m_uvScale = workItem.m_uvIn.zw();
  143. uni.m_uvTranslation = workItem.m_uvIn.xy();
  144. uni.m_uvMin = workItem.m_uvInBounds.xy();
  145. uni.m_uvMax = workItem.m_uvInBounds.xy() + workItem.m_uvInBounds.zw();
  146. uni.m_blur = workItem.m_blur;
  147. }
  148. cmdb->bindShaderProgram(m_atlas.m_resolveGrProg);
  149. // Continue
  150. cmdb->bindSampler(0, 1, m_r->getSamplers().m_trilinearClamp);
  151. rgraphCtx.bindTexture(0, 2, m_scratch.m_rt, TextureSubresourceInfo(DepthStencilAspectBit::DEPTH));
  152. if(getConfig().getRPreferCompute())
  153. {
  154. rgraphCtx.bindImage(0, 3, m_atlas.m_rt);
  155. constexpr U32 workgroupSize = 8;
  156. ANKI_ASSERT(m_atlas.m_tileResolution >= workgroupSize && (m_atlas.m_tileResolution % workgroupSize) == 0);
  157. cmdb->dispatchCompute(m_atlas.m_tileResolution / workgroupSize, m_atlas.m_tileResolution / workgroupSize,
  158. m_atlas.m_resolveWorkItems.getSize());
  159. }
  160. else
  161. {
  162. cmdb->setViewport(0, 0, m_atlas.m_tex->getWidth(), m_atlas.m_tex->getHeight());
  163. cmdb->drawArrays(PrimitiveTopology::TRIANGLES, 6, m_atlas.m_resolveWorkItems.getSize());
  164. }
  165. }
  166. void ShadowMapping::runShadowMapping(RenderPassWorkContext& rgraphCtx)
  167. {
  168. ANKI_ASSERT(m_scratch.m_workItems.getSize());
  169. ANKI_TRACE_SCOPED_EVENT(R_SM);
  170. CommandBufferPtr& cmdb = rgraphCtx.m_commandBuffer;
  171. const U threadIdx = rgraphCtx.m_currentSecondLevelCommandBufferIndex;
  172. for(Scratch::WorkItem& work : m_scratch.m_workItems)
  173. {
  174. if(work.m_threadPoolTaskIdx != threadIdx)
  175. {
  176. continue;
  177. }
  178. // Set state
  179. cmdb->setViewport(work.m_viewport[0], work.m_viewport[1], work.m_viewport[2], work.m_viewport[3]);
  180. cmdb->setScissor(work.m_viewport[0], work.m_viewport[1], work.m_viewport[2], work.m_viewport[3]);
  181. RenderableDrawerArguments args;
  182. args.m_viewMatrix = work.m_renderQueue->m_viewMatrix;
  183. args.m_cameraTransform = Mat3x4::getIdentity(); // Don't care
  184. args.m_viewProjectionMatrix = work.m_renderQueue->m_viewProjectionMatrix;
  185. args.m_previousViewProjectionMatrix = Mat4::getIdentity(); // Don't care
  186. args.m_sampler = m_r->getSamplers().m_trilinearRepeatAniso;
  187. args.m_minLod = args.m_maxLod = work.m_renderQueueElementsLod;
  188. m_r->getSceneDrawer().drawRange(RenderingTechnique::SHADOW, args,
  189. work.m_renderQueue->m_renderables.getBegin() + work.m_firstRenderableElement,
  190. work.m_renderQueue->m_renderables.getBegin() + work.m_firstRenderableElement
  191. + work.m_renderableElementCount,
  192. cmdb);
  193. }
  194. }
  195. void ShadowMapping::populateRenderGraph(RenderingContext& ctx)
  196. {
  197. ANKI_TRACE_SCOPED_EVENT(R_SM);
  198. // First process the lights
  199. U32 threadCountForScratchPass = 0;
  200. processLights(ctx, threadCountForScratchPass);
  201. // Build the render graph
  202. RenderGraphDescription& rgraph = ctx.m_renderGraphDescr;
  203. if(m_scratch.m_workItems.getSize())
  204. {
  205. // Will have to create render passes
  206. // Scratch pass
  207. {
  208. // Compute render area
  209. const U32 minx = 0, miny = 0;
  210. const U32 height = m_scratch.m_maxViewportHeight;
  211. const U32 width = m_scratch.m_maxViewportWidth;
  212. GraphicsRenderPassDescription& pass = rgraph.newGraphicsRenderPass("SM scratch");
  213. m_scratch.m_rt = rgraph.newRenderTarget(m_scratch.m_rtDescr);
  214. pass.setFramebufferInfo(m_scratch.m_fbDescr, {}, m_scratch.m_rt, {}, minx, miny, width, height);
  215. ANKI_ASSERT(threadCountForScratchPass
  216. && threadCountForScratchPass <= m_r->getThreadHive().getThreadCount());
  217. pass.setWork(threadCountForScratchPass, [this](RenderPassWorkContext& rgraphCtx) {
  218. runShadowMapping(rgraphCtx);
  219. });
  220. TextureSubresourceInfo subresource = TextureSubresourceInfo(DepthStencilAspectBit::DEPTH);
  221. pass.newDependency({m_scratch.m_rt, TextureUsageBit::ALL_FRAMEBUFFER_ATTACHMENT, subresource});
  222. }
  223. // Atlas pass
  224. {
  225. if(ANKI_LIKELY(m_atlas.m_rtImportedOnce))
  226. {
  227. m_atlas.m_rt = rgraph.importRenderTarget(m_atlas.m_tex);
  228. }
  229. else
  230. {
  231. m_atlas.m_rt = rgraph.importRenderTarget(m_atlas.m_tex, TextureUsageBit::SAMPLED_FRAGMENT);
  232. m_atlas.m_rtImportedOnce = true;
  233. }
  234. if(getConfig().getRPreferCompute())
  235. {
  236. ComputeRenderPassDescription& pass = rgraph.newComputeRenderPass("EVSM resolve");
  237. pass.setWork([this](RenderPassWorkContext& rgraphCtx) {
  238. runAtlas(rgraphCtx);
  239. });
  240. pass.newDependency(RenderPassDependency(m_scratch.m_rt, TextureUsageBit::SAMPLED_COMPUTE,
  241. TextureSubresourceInfo(DepthStencilAspectBit::DEPTH)));
  242. pass.newDependency(RenderPassDependency(m_atlas.m_rt, TextureUsageBit::IMAGE_COMPUTE_WRITE));
  243. }
  244. else
  245. {
  246. GraphicsRenderPassDescription& pass = rgraph.newGraphicsRenderPass("EVSM resolve");
  247. pass.setFramebufferInfo(m_atlas.m_fbDescr, {m_atlas.m_rt});
  248. pass.setWork([this](RenderPassWorkContext& rgraphCtx) {
  249. runAtlas(rgraphCtx);
  250. });
  251. pass.newDependency(RenderPassDependency(m_scratch.m_rt, TextureUsageBit::SAMPLED_FRAGMENT,
  252. TextureSubresourceInfo(DepthStencilAspectBit::DEPTH)));
  253. pass.newDependency(
  254. RenderPassDependency(m_atlas.m_rt, TextureUsageBit::FRAMEBUFFER_ATTACHMENT_READ
  255. | TextureUsageBit::FRAMEBUFFER_ATTACHMENT_WRITE));
  256. }
  257. }
  258. }
  259. else
  260. {
  261. // No need for shadowmapping passes, just import the atlas
  262. if(ANKI_LIKELY(m_atlas.m_rtImportedOnce))
  263. {
  264. m_atlas.m_rt = rgraph.importRenderTarget(m_atlas.m_tex);
  265. }
  266. else
  267. {
  268. m_atlas.m_rt = rgraph.importRenderTarget(m_atlas.m_tex, TextureUsageBit::SAMPLED_FRAGMENT);
  269. m_atlas.m_rtImportedOnce = true;
  270. }
  271. }
  272. }
  273. Mat4 ShadowMapping::createSpotLightTextureMatrix(const UVec4& viewport) const
  274. {
  275. const F32 atlasSize = F32(m_atlas.m_tileResolution * m_atlas.m_tileCountBothAxis);
  276. #if ANKI_COMPILER_GCC_COMPATIBLE
  277. # pragma GCC diagnostic push
  278. # pragma GCC diagnostic ignored "-Wpedantic" // Because GCC and clang throw an incorrect warning
  279. #endif
  280. const Vec2 uv(F32(viewport[0]) / atlasSize, F32(viewport[1]) / atlasSize);
  281. #if ANKI_COMPILER_GCC_COMPATIBLE
  282. # pragma GCC diagnostic pop
  283. #endif
  284. ANKI_ASSERT(uv >= Vec2(0.0f) && uv <= Vec2(1.0f));
  285. ANKI_ASSERT(viewport[2] == viewport[3]);
  286. const F32 sizeTextureSpace = F32(viewport[2]) / atlasSize;
  287. return Mat4(sizeTextureSpace, 0.0f, 0.0f, uv.x(), 0.0f, sizeTextureSpace, 0.0f, uv.y(), 0.0f, 0.0f, 1.0f, 0.0f,
  288. 0.0f, 0.0f, 0.0f, 1.0f);
  289. }
  290. void ShadowMapping::chooseLod(const Vec4& cameraOrigin, const PointLightQueueElement& light, Bool& blurAtlas,
  291. U32& tileBufferLod, U32& renderQueueElementsLod) const
  292. {
  293. const F32 distFromTheCamera = (cameraOrigin - light.m_worldPosition.xyz0()).getLength() - light.m_radius;
  294. if(distFromTheCamera < getConfig().getLod0MaxDistance())
  295. {
  296. ANKI_ASSERT(m_pointLightsMaxLod == 1);
  297. blurAtlas = true;
  298. tileBufferLod = 1;
  299. renderQueueElementsLod = 0;
  300. }
  301. else
  302. {
  303. blurAtlas = false;
  304. tileBufferLod = 0;
  305. renderQueueElementsLod = MAX_LOD_COUNT - 1;
  306. }
  307. }
  308. void ShadowMapping::chooseLod(const Vec4& cameraOrigin, const SpotLightQueueElement& light, Bool& blurAtlas,
  309. U32& tileBufferLod, U32& renderQueueElementsLod) const
  310. {
  311. // Get some data
  312. const Vec4 coneOrigin = light.m_worldTransform.getTranslationPart().xyz0();
  313. const Vec4 coneDir = -light.m_worldTransform.getZAxis().xyz0();
  314. const F32 coneAngle = light.m_outerAngle;
  315. // Compute the distance from the camera to the light cone
  316. const Vec4 V = cameraOrigin - coneOrigin;
  317. const F32 VlenSq = V.dot(V);
  318. const F32 V1len = V.dot(coneDir);
  319. const F32 distFromTheCamera = cos(coneAngle) * sqrt(VlenSq - V1len * V1len) - V1len * sin(coneAngle);
  320. if(distFromTheCamera < getConfig().getLod0MaxDistance())
  321. {
  322. blurAtlas = true;
  323. tileBufferLod = 2;
  324. renderQueueElementsLod = 0;
  325. }
  326. else if(distFromTheCamera < getConfig().getLod1MaxDistance())
  327. {
  328. blurAtlas = false;
  329. tileBufferLod = 1;
  330. renderQueueElementsLod = MAX_LOD_COUNT - 1;
  331. }
  332. else
  333. {
  334. blurAtlas = false;
  335. tileBufferLod = 0;
  336. renderQueueElementsLod = MAX_LOD_COUNT - 1;
  337. }
  338. }
  339. TileAllocatorResult ShadowMapping::allocateTilesAndScratchTiles(U64 lightUuid, U32 faceCount, const U64* faceTimestamps,
  340. const U32* faceIndices, const U32* drawcallsCount,
  341. const U32* lods, UVec4* atlasTileViewports,
  342. UVec4* scratchTileViewports,
  343. TileAllocatorResult* subResults)
  344. {
  345. ANKI_ASSERT(lightUuid > 0);
  346. ANKI_ASSERT(faceCount > 0);
  347. ANKI_ASSERT(faceTimestamps);
  348. ANKI_ASSERT(faceIndices);
  349. ANKI_ASSERT(drawcallsCount);
  350. ANKI_ASSERT(lods);
  351. TileAllocatorResult res = TileAllocatorResult::ALLOCATION_FAILED;
  352. // Allocate atlas tiles first. They may be cached and that will affect how many scratch tiles we'll need
  353. for(U i = 0; i < faceCount; ++i)
  354. {
  355. Array<U32, 4> tileRanges;
  356. res = m_atlas.m_tileAlloc.allocate(m_r->getGlobalTimestamp(), faceTimestamps[i], lightUuid, faceIndices[i],
  357. drawcallsCount[i], lods[i], tileRanges);
  358. if(res == TileAllocatorResult::ALLOCATION_FAILED)
  359. {
  360. ANKI_R_LOGW("There is not enough space in the shadow atlas for more shadow maps. "
  361. "Increase the r_shadowMappingTileCountPerRowOrColumn or decrease the scene's shadow casters");
  362. // Invalidate cache entries for what we already allocated
  363. for(U j = 0; j < i; ++j)
  364. {
  365. m_atlas.m_tileAlloc.invalidateCache(lightUuid, faceIndices[j]);
  366. }
  367. return res;
  368. }
  369. subResults[i] = res;
  370. // Set viewport
  371. atlasTileViewports[i] = UVec4(tileRanges) * m_atlas.m_tileResolution;
  372. }
  373. // Allocate scratch tiles
  374. for(U i = 0; i < faceCount; ++i)
  375. {
  376. if(subResults[i] == TileAllocatorResult::CACHED)
  377. {
  378. continue;
  379. }
  380. ANKI_ASSERT(subResults[i] == TileAllocatorResult::ALLOCATION_SUCCEEDED);
  381. Array<U32, 4> tileRanges;
  382. res = m_scratch.m_tileAlloc.allocate(m_r->getGlobalTimestamp(), faceTimestamps[i], lightUuid, faceIndices[i],
  383. drawcallsCount[i], lods[i], tileRanges);
  384. if(res == TileAllocatorResult::ALLOCATION_FAILED)
  385. {
  386. ANKI_R_LOGW("Don't have enough space in the scratch shadow mapping buffer. "
  387. "If you see this message too often increase r_shadowMappingScratchTileCountX/Y");
  388. // Invalidate atlas tiles
  389. for(U j = 0; j < faceCount; ++j)
  390. {
  391. m_atlas.m_tileAlloc.invalidateCache(lightUuid, faceIndices[j]);
  392. }
  393. return res;
  394. }
  395. // Fix viewport
  396. scratchTileViewports[i] = UVec4(tileRanges) * m_scratch.m_tileResolution;
  397. // Update the max view width
  398. m_scratch.m_maxViewportWidth =
  399. max(m_scratch.m_maxViewportWidth, scratchTileViewports[i][0] + scratchTileViewports[i][2]);
  400. m_scratch.m_maxViewportHeight =
  401. max(m_scratch.m_maxViewportHeight, scratchTileViewports[i][1] + scratchTileViewports[i][3]);
  402. }
  403. return res;
  404. }
  405. void ShadowMapping::processLights(RenderingContext& ctx, U32& threadCountForScratchPass)
  406. {
  407. // Reset the scratch viewport width
  408. m_scratch.m_maxViewportWidth = 0;
  409. m_scratch.m_maxViewportHeight = 0;
  410. // Vars
  411. const Vec4 cameraOrigin = ctx.m_renderQueue->m_cameraTransform.getTranslationPart().xyz0();
  412. DynamicArrayAuto<Scratch::LightToRenderToScratchInfo> lightsToRender(ctx.m_tempAllocator);
  413. U32 drawcallCount = 0;
  414. DynamicArrayAuto<Atlas::ResolveWorkItem> atlasWorkItems(ctx.m_tempAllocator);
  415. // First thing, allocate an empty tile for empty faces of point lights
  416. UVec4 emptyTileViewport;
  417. {
  418. Array<U32, 4> tileRange;
  419. [[maybe_unused]] const TileAllocatorResult res =
  420. m_atlas.m_tileAlloc.allocate(m_r->getGlobalTimestamp(), 1, MAX_U64, 0, 1, m_pointLightsMaxLod, tileRange);
  421. emptyTileViewport = UVec4(tileRange);
  422. #if ANKI_ENABLE_ASSERTIONS
  423. static Bool firstRun = true;
  424. if(firstRun)
  425. {
  426. ANKI_ASSERT(res == TileAllocatorResult::ALLOCATION_SUCCEEDED);
  427. firstRun = false;
  428. }
  429. else
  430. {
  431. ANKI_ASSERT(res == TileAllocatorResult::CACHED);
  432. }
  433. #endif
  434. }
  435. // Process the directional light first.
  436. if(ctx.m_renderQueue->m_directionalLight.m_shadowCascadeCount > 0)
  437. {
  438. DirectionalLightQueueElement& light = ctx.m_renderQueue->m_directionalLight;
  439. Array<U64, MAX_SHADOW_CASCADES2> timestamps;
  440. Array<U32, MAX_SHADOW_CASCADES2> cascadeIndices;
  441. Array<U32, MAX_SHADOW_CASCADES2> drawcallCounts;
  442. Array<UVec4, MAX_SHADOW_CASCADES2> atlasViewports;
  443. Array<UVec4, MAX_SHADOW_CASCADES2> scratchViewports;
  444. Array<TileAllocatorResult, MAX_SHADOW_CASCADES2> subResults;
  445. Array<U32, MAX_SHADOW_CASCADES2> lods;
  446. Array<U32, MAX_SHADOW_CASCADES2> renderQueueElementsLods;
  447. Array<Bool, MAX_SHADOW_CASCADES2> blurAtlass;
  448. U32 activeCascades = 0;
  449. for(U32 cascade = 0; cascade < light.m_shadowCascadeCount; ++cascade)
  450. {
  451. ANKI_ASSERT(light.m_shadowRenderQueues[cascade]);
  452. if(light.m_shadowRenderQueues[cascade]->m_renderables.getSize() > 0)
  453. {
  454. // Cascade with drawcalls, will need tiles
  455. timestamps[activeCascades] = m_r->getGlobalTimestamp(); // This light is always updated
  456. cascadeIndices[activeCascades] = cascade;
  457. drawcallCounts[activeCascades] = 1; // Doesn't matter
  458. // Change the quality per cascade
  459. blurAtlass[activeCascades] = (cascade <= 1);
  460. lods[activeCascades] = (cascade <= 1) ? (MAX_LOD_COUNT - 1) : (lods[0] - 1);
  461. renderQueueElementsLods[activeCascades] = (cascade == 0) ? 0 : (MAX_LOD_COUNT - 1);
  462. ++activeCascades;
  463. }
  464. }
  465. const Bool allocationFailed =
  466. activeCascades == 0
  467. || allocateTilesAndScratchTiles(light.m_uuid, activeCascades, &timestamps[0], &cascadeIndices[0],
  468. &drawcallCounts[0], &lods[0], &atlasViewports[0], &scratchViewports[0],
  469. &subResults[0])
  470. == TileAllocatorResult::ALLOCATION_FAILED;
  471. if(!allocationFailed)
  472. {
  473. activeCascades = 0;
  474. for(U cascade = 0; cascade < light.m_shadowCascadeCount; ++cascade)
  475. {
  476. if(light.m_shadowRenderQueues[cascade]->m_renderables.getSize() > 0)
  477. {
  478. // Cascade with drawcalls, push some work for it
  479. // Update the texture matrix to point to the correct region in the atlas
  480. light.m_textureMatrices[cascade] =
  481. createSpotLightTextureMatrix(atlasViewports[activeCascades]) * light.m_textureMatrices[cascade];
  482. // Push work
  483. newScratchAndAtlasResloveRenderWorkItems(
  484. atlasViewports[activeCascades], scratchViewports[activeCascades], blurAtlass[activeCascades],
  485. light.m_shadowRenderQueues[cascade], renderQueueElementsLods[activeCascades], lightsToRender,
  486. atlasWorkItems, drawcallCount);
  487. ++activeCascades;
  488. }
  489. else
  490. {
  491. // Empty cascade, point it to the empty tile
  492. light.m_textureMatrices[cascade] =
  493. createSpotLightTextureMatrix(emptyTileViewport) * light.m_textureMatrices[cascade];
  494. }
  495. }
  496. }
  497. else
  498. {
  499. // Light can't be a caster this frame
  500. light.m_shadowCascadeCount = 0;
  501. zeroMemory(light.m_shadowRenderQueues);
  502. }
  503. }
  504. // Process the point lights.
  505. for(PointLightQueueElement& light : ctx.m_renderQueue->m_pointLights)
  506. {
  507. if(!light.hasShadow())
  508. {
  509. continue;
  510. }
  511. // Prepare data to allocate tiles and allocate
  512. Array<U64, 6> timestamps;
  513. Array<U32, 6> faceIndices;
  514. Array<U32, 6> drawcallCounts;
  515. Array<UVec4, 6> atlasViewports;
  516. Array<UVec4, 6> scratchViewports;
  517. Array<TileAllocatorResult, 6> subResults;
  518. Array<U32, 6> lods;
  519. U32 numOfFacesThatHaveDrawcalls = 0;
  520. Bool blurAtlas;
  521. U32 lod, renderQueueElementsLod;
  522. chooseLod(cameraOrigin, light, blurAtlas, lod, renderQueueElementsLod);
  523. for(U32 face = 0; face < 6; ++face)
  524. {
  525. ANKI_ASSERT(light.m_shadowRenderQueues[face]);
  526. if(light.m_shadowRenderQueues[face]->m_renderables.getSize())
  527. {
  528. // Has renderables, need to allocate tiles for it so add it to the arrays
  529. faceIndices[numOfFacesThatHaveDrawcalls] = face;
  530. timestamps[numOfFacesThatHaveDrawcalls] =
  531. light.m_shadowRenderQueues[face]->m_shadowRenderablesLastUpdateTimestamp;
  532. drawcallCounts[numOfFacesThatHaveDrawcalls] = light.m_shadowRenderQueues[face]->m_renderables.getSize();
  533. lods[numOfFacesThatHaveDrawcalls] = lod;
  534. ++numOfFacesThatHaveDrawcalls;
  535. }
  536. }
  537. const Bool allocationFailed =
  538. numOfFacesThatHaveDrawcalls == 0
  539. || allocateTilesAndScratchTiles(light.m_uuid, numOfFacesThatHaveDrawcalls, &timestamps[0], &faceIndices[0],
  540. &drawcallCounts[0], &lods[0], &atlasViewports[0], &scratchViewports[0],
  541. &subResults[0])
  542. == TileAllocatorResult::ALLOCATION_FAILED;
  543. if(!allocationFailed)
  544. {
  545. // All good, update the lights
  546. const F32 atlasResolution = F32(m_atlas.m_tileResolution * m_atlas.m_tileCountBothAxis);
  547. F32 superTileSize = F32(atlasViewports[0][2]); // Should be the same for all tiles and faces
  548. superTileSize -= 1.0f; // Remove 2 half texels to avoid bilinear filtering bleeding
  549. light.m_shadowAtlasTileSize = superTileSize / atlasResolution;
  550. numOfFacesThatHaveDrawcalls = 0;
  551. for(U face = 0; face < 6; ++face)
  552. {
  553. if(light.m_shadowRenderQueues[face]->m_renderables.getSize())
  554. {
  555. // Has drawcalls, asigned it to a tile
  556. const UVec4& atlasViewport = atlasViewports[numOfFacesThatHaveDrawcalls];
  557. const UVec4& scratchViewport = scratchViewports[numOfFacesThatHaveDrawcalls];
  558. // Add a half texel to the viewport's start to avoid bilinear filtering bleeding
  559. light.m_shadowAtlasTileOffsets[face].x() = (F32(atlasViewport[0]) + 0.5f) / atlasResolution;
  560. light.m_shadowAtlasTileOffsets[face].y() = (F32(atlasViewport[1]) + 0.5f) / atlasResolution;
  561. if(subResults[numOfFacesThatHaveDrawcalls] != TileAllocatorResult::CACHED)
  562. {
  563. newScratchAndAtlasResloveRenderWorkItems(
  564. atlasViewport, scratchViewport, blurAtlas, light.m_shadowRenderQueues[face],
  565. renderQueueElementsLod, lightsToRender, atlasWorkItems, drawcallCount);
  566. }
  567. ++numOfFacesThatHaveDrawcalls;
  568. }
  569. else
  570. {
  571. // Doesn't have renderables, point the face to the empty tile
  572. UVec4 atlasViewport = emptyTileViewport;
  573. ANKI_ASSERT(F32(atlasViewport[2]) <= superTileSize && F32(atlasViewport[3]) <= superTileSize);
  574. atlasViewport[2] = U32(superTileSize);
  575. atlasViewport[3] = U32(superTileSize);
  576. light.m_shadowAtlasTileOffsets[face].x() = (F32(atlasViewport[0]) + 0.5f) / atlasResolution;
  577. light.m_shadowAtlasTileOffsets[face].y() = (F32(atlasViewport[1]) + 0.5f) / atlasResolution;
  578. }
  579. }
  580. }
  581. else
  582. {
  583. // Light can't be a caster this frame
  584. zeroMemory(light.m_shadowRenderQueues);
  585. }
  586. }
  587. // Process the spot lights
  588. for(SpotLightQueueElement& light : ctx.m_renderQueue->m_spotLights)
  589. {
  590. if(!light.hasShadow())
  591. {
  592. continue;
  593. }
  594. // Allocate tiles
  595. U32 faceIdx = 0;
  596. TileAllocatorResult subResult = TileAllocatorResult::ALLOCATION_FAILED;
  597. UVec4 atlasViewport;
  598. UVec4 scratchViewport;
  599. const U32 localDrawcallCount = light.m_shadowRenderQueue->m_renderables.getSize();
  600. Bool blurAtlas;
  601. U32 lod, renderQueueElementsLod;
  602. chooseLod(cameraOrigin, light, blurAtlas, lod, renderQueueElementsLod);
  603. const Bool allocationFailed =
  604. localDrawcallCount == 0
  605. || allocateTilesAndScratchTiles(
  606. light.m_uuid, 1, &light.m_shadowRenderQueue->m_shadowRenderablesLastUpdateTimestamp, &faceIdx,
  607. &localDrawcallCount, &lod, &atlasViewport, &scratchViewport, &subResult)
  608. == TileAllocatorResult::ALLOCATION_FAILED;
  609. if(!allocationFailed)
  610. {
  611. // All good, update the light
  612. // Update the texture matrix to point to the correct region in the atlas
  613. light.m_textureMatrix = createSpotLightTextureMatrix(atlasViewport) * light.m_textureMatrix;
  614. if(subResult != TileAllocatorResult::CACHED)
  615. {
  616. newScratchAndAtlasResloveRenderWorkItems(atlasViewport, scratchViewport, blurAtlas,
  617. light.m_shadowRenderQueue, renderQueueElementsLod,
  618. lightsToRender, atlasWorkItems, drawcallCount);
  619. }
  620. }
  621. else
  622. {
  623. // Doesn't have renderables or the allocation failed, won't be a shadow caster
  624. light.m_shadowRenderQueue = nullptr;
  625. }
  626. }
  627. // Split the work that will happen in the scratch buffer
  628. if(lightsToRender.getSize())
  629. {
  630. DynamicArrayAuto<Scratch::WorkItem> workItems(ctx.m_tempAllocator);
  631. Scratch::LightToRenderToScratchInfo* lightToRender = lightsToRender.getBegin();
  632. U32 lightToRenderDrawcallCount = lightToRender->m_drawcallCount;
  633. const Scratch::LightToRenderToScratchInfo* lightToRenderEnd = lightsToRender.getEnd();
  634. const U32 threadCount = computeNumberOfSecondLevelCommandBuffers(drawcallCount);
  635. threadCountForScratchPass = threadCount;
  636. for(U32 taskId = 0; taskId < threadCount; ++taskId)
  637. {
  638. U32 start, end;
  639. splitThreadedProblem(taskId, threadCount, drawcallCount, start, end);
  640. // While there are drawcalls in this task emit new work items
  641. U32 taskDrawcallCount = end - start;
  642. ANKI_ASSERT(taskDrawcallCount > 0 && "Because we used computeNumberOfSecondLevelCommandBuffers()");
  643. while(taskDrawcallCount)
  644. {
  645. ANKI_ASSERT(lightToRender != lightToRenderEnd);
  646. const U32 workItemDrawcallCount = min(lightToRenderDrawcallCount, taskDrawcallCount);
  647. Scratch::WorkItem workItem;
  648. workItem.m_viewport = lightToRender->m_viewport;
  649. workItem.m_renderQueue = lightToRender->m_renderQueue;
  650. workItem.m_firstRenderableElement = lightToRender->m_drawcallCount - lightToRenderDrawcallCount;
  651. workItem.m_renderableElementCount = workItemDrawcallCount;
  652. workItem.m_threadPoolTaskIdx = taskId;
  653. workItem.m_renderQueueElementsLod = lightToRender->m_renderQueueElementsLod;
  654. workItems.emplaceBack(workItem);
  655. // Decrease the drawcall counts for the task and the light
  656. ANKI_ASSERT(taskDrawcallCount >= workItemDrawcallCount);
  657. taskDrawcallCount -= workItemDrawcallCount;
  658. ANKI_ASSERT(lightToRenderDrawcallCount >= workItemDrawcallCount);
  659. lightToRenderDrawcallCount -= workItemDrawcallCount;
  660. // Move to the next light
  661. if(lightToRenderDrawcallCount == 0)
  662. {
  663. ++lightToRender;
  664. lightToRenderDrawcallCount =
  665. (lightToRender != lightToRenderEnd) ? lightToRender->m_drawcallCount : 0;
  666. }
  667. }
  668. }
  669. ANKI_ASSERT(lightToRender == lightToRenderEnd);
  670. ANKI_ASSERT(lightsToRender.getSize() <= workItems.getSize());
  671. // All good, store the work items for the threads to pick up
  672. {
  673. Scratch::WorkItem* items;
  674. U32 itemSize;
  675. U32 itemStorageSize;
  676. workItems.moveAndReset(items, itemSize, itemStorageSize);
  677. ANKI_ASSERT(items && itemSize && itemStorageSize);
  678. m_scratch.m_workItems = WeakArray<Scratch::WorkItem>(items, itemSize);
  679. Atlas::ResolveWorkItem* atlasItems;
  680. atlasWorkItems.moveAndReset(atlasItems, itemSize, itemStorageSize);
  681. ANKI_ASSERT(atlasItems && itemSize && itemStorageSize);
  682. m_atlas.m_resolveWorkItems = WeakArray<Atlas::ResolveWorkItem>(atlasItems, itemSize);
  683. }
  684. }
  685. else
  686. {
  687. m_scratch.m_workItems = WeakArray<Scratch::WorkItem>();
  688. m_atlas.m_resolveWorkItems = WeakArray<Atlas::ResolveWorkItem>();
  689. }
  690. }
  691. void ShadowMapping::newScratchAndAtlasResloveRenderWorkItems(
  692. const UVec4& atlasViewport, const UVec4& scratchVewport, Bool blurAtlas, RenderQueue* lightRenderQueue,
  693. U32 renderQueueElementsLod, DynamicArrayAuto<Scratch::LightToRenderToScratchInfo>& scratchWorkItem,
  694. DynamicArrayAuto<Atlas::ResolveWorkItem>& atlasResolveWorkItem, U32& drawcallCount) const
  695. {
  696. // Scratch work item
  697. {
  698. Scratch::LightToRenderToScratchInfo toRender;
  699. toRender.m_renderQueue = lightRenderQueue;
  700. toRender.m_viewport = scratchVewport;
  701. toRender.m_drawcallCount = lightRenderQueue->m_renderables.getSize();
  702. toRender.m_renderQueueElementsLod = renderQueueElementsLod;
  703. scratchWorkItem.emplaceBack(toRender);
  704. drawcallCount += lightRenderQueue->m_renderables.getSize();
  705. }
  706. // Atlas resolve work items
  707. const U32 tilesX = scratchVewport[2] / m_scratch.m_tileResolution;
  708. const U32 tilesY = scratchVewport[3] / m_scratch.m_tileResolution;
  709. for(U32 x = 0; x < tilesX; ++x)
  710. {
  711. for(U32 y = 0; y < tilesY; ++y)
  712. {
  713. const F32 scratchAtlasWidth = F32(m_scratch.m_tileCountX * m_scratch.m_tileResolution);
  714. const F32 scratchAtlasHeight = F32(m_scratch.m_tileCountY * m_scratch.m_tileResolution);
  715. Atlas::ResolveWorkItem atlasItem;
  716. atlasItem.m_uvInBounds[0] = F32(scratchVewport[0]) / scratchAtlasWidth;
  717. atlasItem.m_uvInBounds[1] = F32(scratchVewport[1]) / scratchAtlasHeight;
  718. atlasItem.m_uvInBounds[2] = F32(scratchVewport[2]) / scratchAtlasWidth;
  719. atlasItem.m_uvInBounds[3] = F32(scratchVewport[3]) / scratchAtlasHeight;
  720. atlasItem.m_uvIn[0] = F32(scratchVewport[0] + scratchVewport[2] / tilesX * x) / scratchAtlasWidth;
  721. atlasItem.m_uvIn[1] = F32(scratchVewport[1] + scratchVewport[3] / tilesY * y) / scratchAtlasHeight;
  722. atlasItem.m_uvIn[2] = F32(scratchVewport[2] / tilesX) / scratchAtlasWidth;
  723. atlasItem.m_uvIn[3] = F32(scratchVewport[3] / tilesY) / scratchAtlasHeight;
  724. atlasItem.m_viewportOut[0] = atlasViewport[0] + atlasViewport[2] / tilesX * x;
  725. atlasItem.m_viewportOut[1] = atlasViewport[1] + atlasViewport[3] / tilesY * y;
  726. atlasItem.m_viewportOut[2] = atlasViewport[2] / tilesX;
  727. atlasItem.m_viewportOut[3] = atlasViewport[3] / tilesY;
  728. atlasItem.m_blur = blurAtlas;
  729. atlasResolveWorkItem.emplaceBack(atlasItem);
  730. }
  731. }
  732. }
  733. } // end namespace anki