ShadowMapping.cpp 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816
  1. // Copyright (C) 2009-2021, Panagiotis Christopoulos Charitos and contributors.
  2. // All rights reserved.
  3. // Code licensed under the BSD License.
  4. // http://www.anki3d.org/LICENSE
  5. #include <AnKi/Renderer/ShadowMapping.h>
  6. #include <AnKi/Renderer/Renderer.h>
  7. #include <AnKi/Renderer/RenderQueue.h>
  8. #include <AnKi/Core/ConfigSet.h>
  9. #include <AnKi/Util/ThreadHive.h>
  10. #include <AnKi/Util/Tracer.h>
  11. #include <AnKi/Shaders/Include/ShadowMappingTypes.h>
  12. namespace anki {
  13. class ShadowMapping::Scratch::WorkItem
  14. {
  15. public:
  16. UVec4 m_viewport;
  17. RenderQueue* m_renderQueue;
  18. U32 m_firstRenderableElement;
  19. U32 m_renderableElementCount;
  20. U32 m_threadPoolTaskIdx;
  21. U32 m_renderQueueElementsLod;
  22. };
  23. class ShadowMapping::Scratch::LightToRenderToScratchInfo
  24. {
  25. public:
  26. UVec4 m_viewport;
  27. RenderQueue* m_renderQueue;
  28. U32 m_drawcallCount;
  29. U32 m_renderQueueElementsLod;
  30. };
  31. class ShadowMapping::Atlas::ResolveWorkItem
  32. {
  33. public:
  34. Vec4 m_uvInBounds; ///< Bounds used to avoid blurring neighbour tiles.
  35. Vec4 m_uvIn; ///< UV + size that point to the scratch buffer.
  36. UVec4 m_viewportOut; ///< Viewport in the atlas RT.
  37. Bool m_blur;
  38. };
  39. ShadowMapping::~ShadowMapping()
  40. {
  41. }
  42. Error ShadowMapping::init()
  43. {
  44. const Error err = initInternal();
  45. if(err)
  46. {
  47. ANKI_R_LOGE("Failed to initialize shadowmapping");
  48. }
  49. else
  50. {
  51. ANKI_R_LOGI(
  52. "Shadowmapping scratch size %ux%u. atlas size %ux%u", m_scratch.m_tileCountX * m_scratch.m_tileResolution,
  53. m_scratch.m_tileCountY * m_scratch.m_tileResolution, m_atlas.m_tileCountBothAxis * m_atlas.m_tileResolution,
  54. m_atlas.m_tileCountBothAxis * m_atlas.m_tileResolution);
  55. }
  56. return err;
  57. }
  58. Error ShadowMapping::initScratch()
  59. {
  60. // Init the shadowmaps and FBs
  61. {
  62. m_scratch.m_tileCountX = getConfig().getRShadowMappingScratchTileCountX();
  63. m_scratch.m_tileCountY = getConfig().getRShadowMappingScratchTileCountY();
  64. m_scratch.m_tileResolution = getConfig().getRShadowMappingTileResolution();
  65. // RT
  66. m_scratch.m_rtDescr = m_r->create2DRenderTargetDescription(m_scratch.m_tileResolution * m_scratch.m_tileCountX,
  67. m_scratch.m_tileResolution * m_scratch.m_tileCountY,
  68. SHADOW_DEPTH_PIXEL_FORMAT, "SM scratch");
  69. m_scratch.m_rtDescr.bake();
  70. // FB
  71. m_scratch.m_fbDescr.m_depthStencilAttachment.m_loadOperation = AttachmentLoadOperation::CLEAR;
  72. m_scratch.m_fbDescr.m_depthStencilAttachment.m_clearValue.m_depthStencil.m_depth = 1.0f;
  73. m_scratch.m_fbDescr.m_depthStencilAttachment.m_aspect = DepthStencilAspectBit::DEPTH;
  74. m_scratch.m_fbDescr.bake();
  75. }
  76. m_scratch.m_tileAlloc.init(getAllocator(), m_scratch.m_tileCountX, m_scratch.m_tileCountY, MAX_LOD_COUNT, false);
  77. return Error::NONE;
  78. }
  79. Error ShadowMapping::initAtlas()
  80. {
  81. // Init RT
  82. {
  83. m_atlas.m_tileResolution = getConfig().getRShadowMappingTileResolution();
  84. m_atlas.m_tileCountBothAxis = getConfig().getRShadowMappingTileCountPerRowOrColumn();
  85. // RT
  86. TextureInitInfo texinit = m_r->create2DRenderTargetInitInfo(
  87. m_atlas.m_tileResolution * m_atlas.m_tileCountBothAxis,
  88. m_atlas.m_tileResolution * m_atlas.m_tileCountBothAxis, SHADOW_COLOR_PIXEL_FORMAT,
  89. TextureUsageBit::SAMPLED_FRAGMENT | TextureUsageBit::IMAGE_COMPUTE_WRITE | TextureUsageBit::SAMPLED_COMPUTE,
  90. "SM atlas");
  91. texinit.m_initialUsage = TextureUsageBit::SAMPLED_FRAGMENT;
  92. ClearValue clearVal;
  93. clearVal.m_colorf[0] = 1.0f;
  94. m_atlas.m_tex = m_r->createAndClearRenderTarget(texinit, clearVal);
  95. }
  96. // Tiles
  97. m_atlas.m_tileAlloc.init(getAllocator(), m_atlas.m_tileCountBothAxis, m_atlas.m_tileCountBothAxis, MAX_LOD_COUNT,
  98. true);
  99. // Programs and shaders
  100. {
  101. ANKI_CHECK(getResourceManager().loadResource("Shaders/ExponentialShadowmappingResolve.ankiprog",
  102. m_atlas.m_resolveProg));
  103. ShaderProgramResourceVariantInitInfo variantInitInfo(m_atlas.m_resolveProg);
  104. variantInitInfo.addConstant("INPUT_TEXTURE_SIZE", UVec2(m_scratch.m_tileCountX * m_scratch.m_tileResolution,
  105. m_scratch.m_tileCountY * m_scratch.m_tileResolution));
  106. const ShaderProgramResourceVariant* variant;
  107. m_atlas.m_resolveProg->getOrCreateVariant(variantInitInfo, variant);
  108. m_atlas.m_resolveGrProg = variant->getProgram();
  109. }
  110. return Error::NONE;
  111. }
  112. Error ShadowMapping::initInternal()
  113. {
  114. ANKI_CHECK(initScratch());
  115. ANKI_CHECK(initAtlas());
  116. return Error::NONE;
  117. }
  118. void ShadowMapping::runAtlas(RenderPassWorkContext& rgraphCtx)
  119. {
  120. ANKI_ASSERT(m_atlas.m_resolveWorkItems.getSize());
  121. ANKI_TRACE_SCOPED_EVENT(R_SM);
  122. CommandBufferPtr& cmdb = rgraphCtx.m_commandBuffer;
  123. // Allocate and populate uniforms
  124. ShadowMappingUniforms* uniforms = allocateAndBindStorage<ShadowMappingUniforms*>(
  125. m_atlas.m_resolveWorkItems.getSize() * sizeof(ShadowMappingUniforms), cmdb, 0, 0);
  126. for(U32 i = 0; i < m_atlas.m_resolveWorkItems.getSize(); ++i)
  127. {
  128. ShadowMappingUniforms& uni = uniforms[i];
  129. const Atlas::ResolveWorkItem& workItem = m_atlas.m_resolveWorkItems[i];
  130. uni.m_viewportXY = IVec2(workItem.m_viewportOut.xy());
  131. uni.m_viewportZW = Vec2(workItem.m_viewportOut.zw());
  132. uni.m_uvScale = workItem.m_uvIn.zw();
  133. uni.m_uvTranslation = workItem.m_uvIn.xy();
  134. uni.m_uvMin = workItem.m_uvInBounds.xy();
  135. uni.m_uvMax = workItem.m_uvInBounds.xy() + workItem.m_uvInBounds.zw();
  136. uni.m_blur = workItem.m_blur;
  137. }
  138. cmdb->bindShaderProgram(m_atlas.m_resolveGrProg);
  139. // Continue
  140. cmdb->bindSampler(0, 1, m_r->getSamplers().m_trilinearClamp);
  141. rgraphCtx.bindTexture(0, 2, m_scratch.m_rt, TextureSubresourceInfo(DepthStencilAspectBit::DEPTH));
  142. rgraphCtx.bindImage(0, 3, m_atlas.m_rt);
  143. constexpr U32 workgroupSize = 8;
  144. ANKI_ASSERT(m_atlas.m_tileResolution >= workgroupSize && (m_atlas.m_tileResolution % workgroupSize) == 0);
  145. cmdb->dispatchCompute(m_atlas.m_tileResolution / workgroupSize, m_atlas.m_tileResolution / workgroupSize,
  146. m_atlas.m_resolveWorkItems.getSize());
  147. }
  148. void ShadowMapping::runShadowMapping(RenderPassWorkContext& rgraphCtx)
  149. {
  150. ANKI_ASSERT(m_scratch.m_workItems.getSize());
  151. ANKI_TRACE_SCOPED_EVENT(R_SM);
  152. CommandBufferPtr& cmdb = rgraphCtx.m_commandBuffer;
  153. const U threadIdx = rgraphCtx.m_currentSecondLevelCommandBufferIndex;
  154. for(Scratch::WorkItem& work : m_scratch.m_workItems)
  155. {
  156. if(work.m_threadPoolTaskIdx != threadIdx)
  157. {
  158. continue;
  159. }
  160. // Set state
  161. cmdb->setViewport(work.m_viewport[0], work.m_viewport[1], work.m_viewport[2], work.m_viewport[3]);
  162. cmdb->setScissor(work.m_viewport[0], work.m_viewport[1], work.m_viewport[2], work.m_viewport[3]);
  163. m_r->getSceneDrawer().drawRange(Pass::SM, work.m_renderQueue->m_viewMatrix,
  164. work.m_renderQueue->m_viewProjectionMatrix,
  165. Mat4::getIdentity(), // Don't care about prev matrices here
  166. cmdb, m_r->getSamplers().m_trilinearRepeatAniso,
  167. work.m_renderQueue->m_renderables.getBegin() + work.m_firstRenderableElement,
  168. work.m_renderQueue->m_renderables.getBegin() + work.m_firstRenderableElement
  169. + work.m_renderableElementCount,
  170. work.m_renderQueueElementsLod, work.m_renderQueueElementsLod);
  171. }
  172. }
  173. void ShadowMapping::populateRenderGraph(RenderingContext& ctx)
  174. {
  175. ANKI_TRACE_SCOPED_EVENT(R_SM);
  176. // First process the lights
  177. U32 threadCountForScratchPass = 0;
  178. processLights(ctx, threadCountForScratchPass);
  179. // Build the render graph
  180. RenderGraphDescription& rgraph = ctx.m_renderGraphDescr;
  181. if(m_scratch.m_workItems.getSize())
  182. {
  183. // Will have to create render passes
  184. // Scratch pass
  185. {
  186. // Compute render area
  187. const U32 minx = 0, miny = 0;
  188. const U32 height = m_scratch.m_maxViewportHeight;
  189. const U32 width = m_scratch.m_maxViewportWidth;
  190. GraphicsRenderPassDescription& pass = rgraph.newGraphicsRenderPass("SM scratch");
  191. m_scratch.m_rt = rgraph.newRenderTarget(m_scratch.m_rtDescr);
  192. pass.setFramebufferInfo(m_scratch.m_fbDescr, {}, m_scratch.m_rt, minx, miny, width, height);
  193. ANKI_ASSERT(threadCountForScratchPass
  194. && threadCountForScratchPass <= m_r->getThreadHive().getThreadCount());
  195. pass.setWork(threadCountForScratchPass, [this](RenderPassWorkContext& rgraphCtx) {
  196. runShadowMapping(rgraphCtx);
  197. });
  198. TextureSubresourceInfo subresource = TextureSubresourceInfo(DepthStencilAspectBit::DEPTH);
  199. pass.newDependency({m_scratch.m_rt, TextureUsageBit::ALL_FRAMEBUFFER_ATTACHMENT, subresource});
  200. }
  201. // Atlas pass
  202. {
  203. ComputeRenderPassDescription& pass = rgraph.newComputeRenderPass("SM atlas");
  204. m_atlas.m_rt = rgraph.importRenderTarget(m_atlas.m_tex, TextureUsageBit::SAMPLED_FRAGMENT);
  205. pass.setWork([this](RenderPassWorkContext& rgraphCtx) {
  206. runAtlas(rgraphCtx);
  207. });
  208. pass.newDependency({m_scratch.m_rt, TextureUsageBit::SAMPLED_COMPUTE,
  209. TextureSubresourceInfo(DepthStencilAspectBit::DEPTH)});
  210. pass.newDependency({m_atlas.m_rt, TextureUsageBit::IMAGE_COMPUTE_WRITE});
  211. }
  212. }
  213. else
  214. {
  215. // No need for shadowmapping passes, just import the atlas
  216. m_atlas.m_rt = rgraph.importRenderTarget(m_atlas.m_tex, TextureUsageBit::SAMPLED_FRAGMENT);
  217. }
  218. }
  219. Mat4 ShadowMapping::createSpotLightTextureMatrix(const UVec4& viewport) const
  220. {
  221. const F32 atlasSize = F32(m_atlas.m_tileResolution * m_atlas.m_tileCountBothAxis);
  222. #if ANKI_COMPILER_GCC_COMPATIBLE
  223. # pragma GCC diagnostic push
  224. # pragma GCC diagnostic ignored "-Wpedantic" // Because GCC and clang throw an incorrect warning
  225. #endif
  226. const Vec2 uv(F32(viewport[0]) / atlasSize, F32(viewport[1]) / atlasSize);
  227. #if ANKI_COMPILER_GCC_COMPATIBLE
  228. # pragma GCC diagnostic pop
  229. #endif
  230. ANKI_ASSERT(uv >= Vec2(0.0f) && uv <= Vec2(1.0f));
  231. ANKI_ASSERT(viewport[2] == viewport[3]);
  232. const F32 sizeTextureSpace = F32(viewport[2]) / atlasSize;
  233. return Mat4(sizeTextureSpace, 0.0f, 0.0f, uv.x(), 0.0f, sizeTextureSpace, 0.0f, uv.y(), 0.0f, 0.0f, 1.0f, 0.0f,
  234. 0.0f, 0.0f, 0.0f, 1.0f);
  235. }
  236. void ShadowMapping::chooseLod(const Vec4& cameraOrigin, const PointLightQueueElement& light, Bool& blurAtlas,
  237. U32& tileBufferLod, U32& renderQueueElementsLod) const
  238. {
  239. const F32 distFromTheCamera = (cameraOrigin - light.m_worldPosition.xyz0()).getLength() - light.m_radius;
  240. if(distFromTheCamera < getConfig().getLod0MaxDistance())
  241. {
  242. ANKI_ASSERT(m_pointLightsMaxLod == 1);
  243. blurAtlas = true;
  244. tileBufferLod = 1;
  245. renderQueueElementsLod = 0;
  246. }
  247. else
  248. {
  249. blurAtlas = false;
  250. tileBufferLod = 0;
  251. renderQueueElementsLod = MAX_LOD_COUNT - 1;
  252. }
  253. }
  254. void ShadowMapping::chooseLod(const Vec4& cameraOrigin, const SpotLightQueueElement& light, Bool& blurAtlas,
  255. U32& tileBufferLod, U32& renderQueueElementsLod) const
  256. {
  257. // Get some data
  258. const Vec4 coneOrigin = light.m_worldTransform.getTranslationPart().xyz0();
  259. const Vec4 coneDir = -light.m_worldTransform.getZAxis().xyz0();
  260. const F32 coneAngle = light.m_outerAngle;
  261. // Compute the distance from the camera to the light cone
  262. const Vec4 V = cameraOrigin - coneOrigin;
  263. const F32 VlenSq = V.dot(V);
  264. const F32 V1len = V.dot(coneDir);
  265. const F32 distFromTheCamera = cos(coneAngle) * sqrt(VlenSq - V1len * V1len) - V1len * sin(coneAngle);
  266. if(distFromTheCamera < getConfig().getLod0MaxDistance())
  267. {
  268. blurAtlas = true;
  269. tileBufferLod = 2;
  270. renderQueueElementsLod = 0;
  271. }
  272. else if(distFromTheCamera < getConfig().getLod1MaxDistance())
  273. {
  274. blurAtlas = false;
  275. tileBufferLod = 1;
  276. renderQueueElementsLod = MAX_LOD_COUNT - 1;
  277. }
  278. else
  279. {
  280. blurAtlas = false;
  281. tileBufferLod = 0;
  282. renderQueueElementsLod = MAX_LOD_COUNT - 1;
  283. }
  284. }
  285. TileAllocatorResult ShadowMapping::allocateTilesAndScratchTiles(U64 lightUuid, U32 faceCount, const U64* faceTimestamps,
  286. const U32* faceIndices, const U32* drawcallsCount,
  287. const U32* lods, UVec4* atlasTileViewports,
  288. UVec4* scratchTileViewports,
  289. TileAllocatorResult* subResults)
  290. {
  291. ANKI_ASSERT(lightUuid > 0);
  292. ANKI_ASSERT(faceCount > 0);
  293. ANKI_ASSERT(faceTimestamps);
  294. ANKI_ASSERT(faceIndices);
  295. ANKI_ASSERT(drawcallsCount);
  296. ANKI_ASSERT(lods);
  297. TileAllocatorResult res = TileAllocatorResult::ALLOCATION_FAILED;
  298. // Allocate atlas tiles first. They may be cached and that will affect how many scratch tiles we'll need
  299. for(U i = 0; i < faceCount; ++i)
  300. {
  301. Array<U32, 4> tileRanges;
  302. res = m_atlas.m_tileAlloc.allocate(m_r->getGlobalTimestamp(), faceTimestamps[i], lightUuid, faceIndices[i],
  303. drawcallsCount[i], lods[i], tileRanges);
  304. if(res == TileAllocatorResult::ALLOCATION_FAILED)
  305. {
  306. ANKI_R_LOGW("There is not enough space in the shadow atlas for more shadow maps. "
  307. "Increase the r_shadowMappingTileCountPerRowOrColumn or decrease the scene's shadow casters");
  308. // Invalidate cache entries for what we already allocated
  309. for(U j = 0; j < i; ++j)
  310. {
  311. m_atlas.m_tileAlloc.invalidateCache(lightUuid, faceIndices[j]);
  312. }
  313. return res;
  314. }
  315. subResults[i] = res;
  316. // Set viewport
  317. atlasTileViewports[i] = UVec4(tileRanges) * m_atlas.m_tileResolution;
  318. }
  319. // Allocate scratch tiles
  320. for(U i = 0; i < faceCount; ++i)
  321. {
  322. if(subResults[i] == TileAllocatorResult::CACHED)
  323. {
  324. continue;
  325. }
  326. ANKI_ASSERT(subResults[i] == TileAllocatorResult::ALLOCATION_SUCCEEDED);
  327. Array<U32, 4> tileRanges;
  328. res = m_scratch.m_tileAlloc.allocate(m_r->getGlobalTimestamp(), faceTimestamps[i], lightUuid, faceIndices[i],
  329. drawcallsCount[i], lods[i], tileRanges);
  330. if(res == TileAllocatorResult::ALLOCATION_FAILED)
  331. {
  332. ANKI_R_LOGW("Don't have enough space in the scratch shadow mapping buffer. "
  333. "If you see this message too often increase r_shadowMappingScratchTileCountX/Y");
  334. // Invalidate atlas tiles
  335. for(U j = 0; j < faceCount; ++j)
  336. {
  337. m_atlas.m_tileAlloc.invalidateCache(lightUuid, faceIndices[j]);
  338. }
  339. return res;
  340. }
  341. // Fix viewport
  342. scratchTileViewports[i] = UVec4(tileRanges) * m_scratch.m_tileResolution;
  343. // Update the max view width
  344. m_scratch.m_maxViewportWidth =
  345. max(m_scratch.m_maxViewportWidth, scratchTileViewports[i][0] + scratchTileViewports[i][2]);
  346. m_scratch.m_maxViewportHeight =
  347. max(m_scratch.m_maxViewportHeight, scratchTileViewports[i][1] + scratchTileViewports[i][3]);
  348. }
  349. return res;
  350. }
  351. void ShadowMapping::processLights(RenderingContext& ctx, U32& threadCountForScratchPass)
  352. {
  353. // Reset the scratch viewport width
  354. m_scratch.m_maxViewportWidth = 0;
  355. m_scratch.m_maxViewportHeight = 0;
  356. // Vars
  357. const Vec4 cameraOrigin = ctx.m_renderQueue->m_cameraTransform.getTranslationPart().xyz0();
  358. DynamicArrayAuto<Scratch::LightToRenderToScratchInfo> lightsToRender(ctx.m_tempAllocator);
  359. U32 drawcallCount = 0;
  360. DynamicArrayAuto<Atlas::ResolveWorkItem> atlasWorkItems(ctx.m_tempAllocator);
  361. // First thing, allocate an empty tile for empty faces of point lights
  362. UVec4 emptyTileViewport;
  363. {
  364. Array<U32, 4> tileRange;
  365. const TileAllocatorResult res =
  366. m_atlas.m_tileAlloc.allocate(m_r->getGlobalTimestamp(), 1, MAX_U64, 0, 1, m_pointLightsMaxLod, tileRange);
  367. emptyTileViewport = UVec4(tileRange);
  368. (void)res;
  369. #if ANKI_ENABLE_ASSERTIONS
  370. static Bool firstRun = true;
  371. if(firstRun)
  372. {
  373. ANKI_ASSERT(res == TileAllocatorResult::ALLOCATION_SUCCEEDED);
  374. firstRun = false;
  375. }
  376. else
  377. {
  378. ANKI_ASSERT(res == TileAllocatorResult::CACHED);
  379. }
  380. #endif
  381. }
  382. // Process the directional light first.
  383. if(ctx.m_renderQueue->m_directionalLight.m_shadowCascadeCount > 0)
  384. {
  385. DirectionalLightQueueElement& light = ctx.m_renderQueue->m_directionalLight;
  386. Array<U64, MAX_SHADOW_CASCADES2> timestamps;
  387. Array<U32, MAX_SHADOW_CASCADES2> cascadeIndices;
  388. Array<U32, MAX_SHADOW_CASCADES2> drawcallCounts;
  389. Array<UVec4, MAX_SHADOW_CASCADES2> atlasViewports;
  390. Array<UVec4, MAX_SHADOW_CASCADES2> scratchViewports;
  391. Array<TileAllocatorResult, MAX_SHADOW_CASCADES2> subResults;
  392. Array<U32, MAX_SHADOW_CASCADES2> lods;
  393. Array<U32, MAX_SHADOW_CASCADES2> renderQueueElementsLods;
  394. Array<Bool, MAX_SHADOW_CASCADES2> blurAtlass;
  395. U32 activeCascades = 0;
  396. for(U32 cascade = 0; cascade < light.m_shadowCascadeCount; ++cascade)
  397. {
  398. ANKI_ASSERT(light.m_shadowRenderQueues[cascade]);
  399. if(light.m_shadowRenderQueues[cascade]->m_renderables.getSize() > 0)
  400. {
  401. // Cascade with drawcalls, will need tiles
  402. timestamps[activeCascades] = m_r->getGlobalTimestamp(); // This light is always updated
  403. cascadeIndices[activeCascades] = cascade;
  404. drawcallCounts[activeCascades] = 1; // Doesn't matter
  405. // Change the quality per cascade
  406. blurAtlass[activeCascades] = (cascade <= 1);
  407. lods[activeCascades] = (cascade <= 1) ? (MAX_LOD_COUNT - 1) : (lods[0] - 1);
  408. renderQueueElementsLods[activeCascades] = (cascade == 0) ? 0 : (MAX_LOD_COUNT - 1);
  409. ++activeCascades;
  410. }
  411. }
  412. const Bool allocationFailed =
  413. activeCascades == 0
  414. || allocateTilesAndScratchTiles(light.m_uuid, activeCascades, &timestamps[0], &cascadeIndices[0],
  415. &drawcallCounts[0], &lods[0], &atlasViewports[0], &scratchViewports[0],
  416. &subResults[0])
  417. == TileAllocatorResult::ALLOCATION_FAILED;
  418. if(!allocationFailed)
  419. {
  420. activeCascades = 0;
  421. for(U cascade = 0; cascade < light.m_shadowCascadeCount; ++cascade)
  422. {
  423. if(light.m_shadowRenderQueues[cascade]->m_renderables.getSize() > 0)
  424. {
  425. // Cascade with drawcalls, push some work for it
  426. // Update the texture matrix to point to the correct region in the atlas
  427. light.m_textureMatrices[cascade] =
  428. createSpotLightTextureMatrix(atlasViewports[activeCascades]) * light.m_textureMatrices[cascade];
  429. // Push work
  430. newScratchAndAtlasResloveRenderWorkItems(
  431. atlasViewports[activeCascades], scratchViewports[activeCascades], blurAtlass[activeCascades],
  432. light.m_shadowRenderQueues[cascade], renderQueueElementsLods[activeCascades], lightsToRender,
  433. atlasWorkItems, drawcallCount);
  434. ++activeCascades;
  435. }
  436. else
  437. {
  438. // Empty cascade, point it to the empty tile
  439. light.m_textureMatrices[cascade] =
  440. createSpotLightTextureMatrix(emptyTileViewport) * light.m_textureMatrices[cascade];
  441. }
  442. }
  443. }
  444. else
  445. {
  446. // Light can't be a caster this frame
  447. light.m_shadowCascadeCount = 0;
  448. zeroMemory(light.m_shadowRenderQueues);
  449. }
  450. }
  451. // Process the point lights.
  452. for(PointLightQueueElement& light : ctx.m_renderQueue->m_pointLights)
  453. {
  454. if(!light.hasShadow())
  455. {
  456. continue;
  457. }
  458. // Prepare data to allocate tiles and allocate
  459. Array<U64, 6> timestamps;
  460. Array<U32, 6> faceIndices;
  461. Array<U32, 6> drawcallCounts;
  462. Array<UVec4, 6> atlasViewports;
  463. Array<UVec4, 6> scratchViewports;
  464. Array<TileAllocatorResult, 6> subResults;
  465. Array<U32, 6> lods;
  466. U32 numOfFacesThatHaveDrawcalls = 0;
  467. Bool blurAtlas;
  468. U32 lod, renderQueueElementsLod;
  469. chooseLod(cameraOrigin, light, blurAtlas, lod, renderQueueElementsLod);
  470. for(U32 face = 0; face < 6; ++face)
  471. {
  472. ANKI_ASSERT(light.m_shadowRenderQueues[face]);
  473. if(light.m_shadowRenderQueues[face]->m_renderables.getSize())
  474. {
  475. // Has renderables, need to allocate tiles for it so add it to the arrays
  476. faceIndices[numOfFacesThatHaveDrawcalls] = face;
  477. timestamps[numOfFacesThatHaveDrawcalls] =
  478. light.m_shadowRenderQueues[face]->m_shadowRenderablesLastUpdateTimestamp;
  479. drawcallCounts[numOfFacesThatHaveDrawcalls] = light.m_shadowRenderQueues[face]->m_renderables.getSize();
  480. lods[numOfFacesThatHaveDrawcalls] = lod;
  481. ++numOfFacesThatHaveDrawcalls;
  482. }
  483. }
  484. const Bool allocationFailed =
  485. numOfFacesThatHaveDrawcalls == 0
  486. || allocateTilesAndScratchTiles(light.m_uuid, numOfFacesThatHaveDrawcalls, &timestamps[0], &faceIndices[0],
  487. &drawcallCounts[0], &lods[0], &atlasViewports[0], &scratchViewports[0],
  488. &subResults[0])
  489. == TileAllocatorResult::ALLOCATION_FAILED;
  490. if(!allocationFailed)
  491. {
  492. // All good, update the lights
  493. const F32 atlasResolution = F32(m_atlas.m_tileResolution * m_atlas.m_tileCountBothAxis);
  494. F32 superTileSize = F32(atlasViewports[0][2]); // Should be the same for all tiles and faces
  495. superTileSize -= 1.0f; // Remove 2 half texels to avoid bilinear filtering bleeding
  496. light.m_shadowAtlasTileSize = superTileSize / atlasResolution;
  497. numOfFacesThatHaveDrawcalls = 0;
  498. for(U face = 0; face < 6; ++face)
  499. {
  500. if(light.m_shadowRenderQueues[face]->m_renderables.getSize())
  501. {
  502. // Has drawcalls, asigned it to a tile
  503. const UVec4& atlasViewport = atlasViewports[numOfFacesThatHaveDrawcalls];
  504. const UVec4& scratchViewport = scratchViewports[numOfFacesThatHaveDrawcalls];
  505. // Add a half texel to the viewport's start to avoid bilinear filtering bleeding
  506. light.m_shadowAtlasTileOffsets[face].x() = (F32(atlasViewport[0]) + 0.5f) / atlasResolution;
  507. light.m_shadowAtlasTileOffsets[face].y() = (F32(atlasViewport[1]) + 0.5f) / atlasResolution;
  508. if(subResults[numOfFacesThatHaveDrawcalls] != TileAllocatorResult::CACHED)
  509. {
  510. newScratchAndAtlasResloveRenderWorkItems(
  511. atlasViewport, scratchViewport, blurAtlas, light.m_shadowRenderQueues[face],
  512. renderQueueElementsLod, lightsToRender, atlasWorkItems, drawcallCount);
  513. }
  514. ++numOfFacesThatHaveDrawcalls;
  515. }
  516. else
  517. {
  518. // Doesn't have renderables, point the face to the empty tile
  519. UVec4 atlasViewport = emptyTileViewport;
  520. ANKI_ASSERT(F32(atlasViewport[2]) <= superTileSize && F32(atlasViewport[3]) <= superTileSize);
  521. atlasViewport[2] = U32(superTileSize);
  522. atlasViewport[3] = U32(superTileSize);
  523. light.m_shadowAtlasTileOffsets[face].x() = (F32(atlasViewport[0]) + 0.5f) / atlasResolution;
  524. light.m_shadowAtlasTileOffsets[face].y() = (F32(atlasViewport[1]) + 0.5f) / atlasResolution;
  525. }
  526. }
  527. }
  528. else
  529. {
  530. // Light can't be a caster this frame
  531. zeroMemory(light.m_shadowRenderQueues);
  532. }
  533. }
  534. // Process the spot lights
  535. for(SpotLightQueueElement& light : ctx.m_renderQueue->m_spotLights)
  536. {
  537. if(!light.hasShadow())
  538. {
  539. continue;
  540. }
  541. // Allocate tiles
  542. U32 faceIdx = 0;
  543. TileAllocatorResult subResult;
  544. UVec4 atlasViewport;
  545. UVec4 scratchViewport;
  546. const U32 localDrawcallCount = light.m_shadowRenderQueue->m_renderables.getSize();
  547. Bool blurAtlas;
  548. U32 lod, renderQueueElementsLod;
  549. chooseLod(cameraOrigin, light, blurAtlas, lod, renderQueueElementsLod);
  550. const Bool allocationFailed =
  551. localDrawcallCount == 0
  552. || allocateTilesAndScratchTiles(
  553. light.m_uuid, 1, &light.m_shadowRenderQueue->m_shadowRenderablesLastUpdateTimestamp, &faceIdx,
  554. &localDrawcallCount, &lod, &atlasViewport, &scratchViewport, &subResult)
  555. == TileAllocatorResult::ALLOCATION_FAILED;
  556. if(!allocationFailed)
  557. {
  558. // All good, update the light
  559. // Update the texture matrix to point to the correct region in the atlas
  560. light.m_textureMatrix = createSpotLightTextureMatrix(atlasViewport) * light.m_textureMatrix;
  561. if(subResult != TileAllocatorResult::CACHED)
  562. {
  563. newScratchAndAtlasResloveRenderWorkItems(atlasViewport, scratchViewport, blurAtlas,
  564. light.m_shadowRenderQueue, renderQueueElementsLod,
  565. lightsToRender, atlasWorkItems, drawcallCount);
  566. }
  567. }
  568. else
  569. {
  570. // Doesn't have renderables or the allocation failed, won't be a shadow caster
  571. light.m_shadowRenderQueue = nullptr;
  572. }
  573. }
  574. // Split the work that will happen in the scratch buffer
  575. if(lightsToRender.getSize())
  576. {
  577. DynamicArrayAuto<Scratch::WorkItem> workItems(ctx.m_tempAllocator);
  578. Scratch::LightToRenderToScratchInfo* lightToRender = lightsToRender.getBegin();
  579. U32 lightToRenderDrawcallCount = lightToRender->m_drawcallCount;
  580. const Scratch::LightToRenderToScratchInfo* lightToRenderEnd = lightsToRender.getEnd();
  581. const U32 threadCount = computeNumberOfSecondLevelCommandBuffers(drawcallCount);
  582. threadCountForScratchPass = threadCount;
  583. for(U32 taskId = 0; taskId < threadCount; ++taskId)
  584. {
  585. U32 start, end;
  586. splitThreadedProblem(taskId, threadCount, drawcallCount, start, end);
  587. // While there are drawcalls in this task emit new work items
  588. U32 taskDrawcallCount = end - start;
  589. ANKI_ASSERT(taskDrawcallCount > 0 && "Because we used computeNumberOfSecondLevelCommandBuffers()");
  590. while(taskDrawcallCount)
  591. {
  592. ANKI_ASSERT(lightToRender != lightToRenderEnd);
  593. const U32 workItemDrawcallCount = min(lightToRenderDrawcallCount, taskDrawcallCount);
  594. Scratch::WorkItem workItem;
  595. workItem.m_viewport = lightToRender->m_viewport;
  596. workItem.m_renderQueue = lightToRender->m_renderQueue;
  597. workItem.m_firstRenderableElement = lightToRender->m_drawcallCount - lightToRenderDrawcallCount;
  598. workItem.m_renderableElementCount = workItemDrawcallCount;
  599. workItem.m_threadPoolTaskIdx = taskId;
  600. workItem.m_renderQueueElementsLod = lightToRender->m_renderQueueElementsLod;
  601. workItems.emplaceBack(workItem);
  602. // Decrease the drawcall counts for the task and the light
  603. ANKI_ASSERT(taskDrawcallCount >= workItemDrawcallCount);
  604. taskDrawcallCount -= workItemDrawcallCount;
  605. ANKI_ASSERT(lightToRenderDrawcallCount >= workItemDrawcallCount);
  606. lightToRenderDrawcallCount -= workItemDrawcallCount;
  607. // Move to the next light
  608. if(lightToRenderDrawcallCount == 0)
  609. {
  610. ++lightToRender;
  611. lightToRenderDrawcallCount =
  612. (lightToRender != lightToRenderEnd) ? lightToRender->m_drawcallCount : 0;
  613. }
  614. }
  615. }
  616. ANKI_ASSERT(lightToRender == lightToRenderEnd);
  617. ANKI_ASSERT(lightsToRender.getSize() <= workItems.getSize());
  618. // All good, store the work items for the threads to pick up
  619. {
  620. Scratch::WorkItem* items;
  621. U32 itemSize;
  622. U32 itemStorageSize;
  623. workItems.moveAndReset(items, itemSize, itemStorageSize);
  624. ANKI_ASSERT(items && itemSize && itemStorageSize);
  625. m_scratch.m_workItems = WeakArray<Scratch::WorkItem>(items, itemSize);
  626. Atlas::ResolveWorkItem* atlasItems;
  627. atlasWorkItems.moveAndReset(atlasItems, itemSize, itemStorageSize);
  628. ANKI_ASSERT(atlasItems && itemSize && itemStorageSize);
  629. m_atlas.m_resolveWorkItems = WeakArray<Atlas::ResolveWorkItem>(atlasItems, itemSize);
  630. }
  631. }
  632. else
  633. {
  634. m_scratch.m_workItems = WeakArray<Scratch::WorkItem>();
  635. m_atlas.m_resolveWorkItems = WeakArray<Atlas::ResolveWorkItem>();
  636. }
  637. }
  638. void ShadowMapping::newScratchAndAtlasResloveRenderWorkItems(
  639. const UVec4& atlasViewport, const UVec4& scratchVewport, Bool blurAtlas, RenderQueue* lightRenderQueue,
  640. U32 renderQueueElementsLod, DynamicArrayAuto<Scratch::LightToRenderToScratchInfo>& scratchWorkItem,
  641. DynamicArrayAuto<Atlas::ResolveWorkItem>& atlasResolveWorkItem, U32& drawcallCount) const
  642. {
  643. // Scratch work item
  644. {
  645. Scratch::LightToRenderToScratchInfo toRender;
  646. toRender.m_renderQueue = lightRenderQueue;
  647. toRender.m_viewport = scratchVewport;
  648. toRender.m_drawcallCount = lightRenderQueue->m_renderables.getSize();
  649. toRender.m_renderQueueElementsLod = renderQueueElementsLod;
  650. scratchWorkItem.emplaceBack(toRender);
  651. drawcallCount += lightRenderQueue->m_renderables.getSize();
  652. }
  653. // Atlas resolve work items
  654. const U32 tilesX = scratchVewport[2] / m_scratch.m_tileResolution;
  655. const U32 tilesY = scratchVewport[3] / m_scratch.m_tileResolution;
  656. for(U32 x = 0; x < tilesX; ++x)
  657. {
  658. for(U32 y = 0; y < tilesY; ++y)
  659. {
  660. const F32 scratchAtlasWidth = F32(m_scratch.m_tileCountX * m_scratch.m_tileResolution);
  661. const F32 scratchAtlasHeight = F32(m_scratch.m_tileCountY * m_scratch.m_tileResolution);
  662. Atlas::ResolveWorkItem atlasItem;
  663. atlasItem.m_uvInBounds[0] = F32(scratchVewport[0]) / scratchAtlasWidth;
  664. atlasItem.m_uvInBounds[1] = F32(scratchVewport[1]) / scratchAtlasHeight;
  665. atlasItem.m_uvInBounds[2] = F32(scratchVewport[2]) / scratchAtlasWidth;
  666. atlasItem.m_uvInBounds[3] = F32(scratchVewport[3]) / scratchAtlasHeight;
  667. atlasItem.m_uvIn[0] = F32(scratchVewport[0] + scratchVewport[2] / tilesX * x) / scratchAtlasWidth;
  668. atlasItem.m_uvIn[1] = F32(scratchVewport[1] + scratchVewport[3] / tilesY * y) / scratchAtlasHeight;
  669. atlasItem.m_uvIn[2] = F32(scratchVewport[2] / tilesX) / scratchAtlasWidth;
  670. atlasItem.m_uvIn[3] = F32(scratchVewport[3] / tilesY) / scratchAtlasHeight;
  671. atlasItem.m_viewportOut[0] = atlasViewport[0] + atlasViewport[2] / tilesX * x;
  672. atlasItem.m_viewportOut[1] = atlasViewport[1] + atlasViewport[3] / tilesY * y;
  673. atlasItem.m_viewportOut[2] = atlasViewport[2] / tilesX;
  674. atlasItem.m_viewportOut[3] = atlasViewport[3] / tilesY;
  675. atlasItem.m_blur = blurAtlas;
  676. atlasResolveWorkItem.emplaceBack(atlasItem);
  677. }
  678. }
  679. }
  680. } // end namespace anki