FrameGraphCompiler.cpp 40 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836
  1. /*
  2. * Copyright (c) Contributors to the Open 3D Engine Project.
  3. * For complete copyright and license terms please see the LICENSE at the root of this distribution.
  4. *
  5. * SPDX-License-Identifier: Apache-2.0 OR MIT
  6. *
  7. */
  8. #include <Atom/RHI/FrameGraphCompiler.h>
  9. #include <Atom/RHI/BufferFrameAttachment.h>
  10. #include <Atom/RHI/BufferScopeAttachment.h>
  11. #include <Atom/RHI/Factory.h>
  12. #include <Atom/RHI/FrameGraph.h>
  13. #include <Atom/RHI/ImageFrameAttachment.h>
  14. #include <Atom/RHI/ImageScopeAttachment.h>
  15. #include <Atom/RHI/RHIUtils.h>
  16. #include <Atom/RHI/Scope.h>
  17. #include <Atom/RHI/SwapChainFrameAttachment.h>
  18. #include <Atom/RHI/TransientAttachmentPool.h>
  19. #include <AzCore/IO/SystemFile.h>
  20. #include <AzCore/std/sort.h>
  21. #include <AzCore/std/optional.h>
  22. namespace AZ::RHI
  23. {
  24. ResultCode FrameGraphCompiler::Init()
  25. {
  26. const ResultCode resultCode = InitInternal();
  27. if (resultCode == ResultCode::Success)
  28. {
  29. // These are immutable for now. Could be configured per-frame using the compile request.
  30. const uint32_t BufferViewCapacity = 128;
  31. m_bufferViewCache.SetCapacity(BufferViewCapacity);
  32. const uint32_t ImageViewCapacity = 128;
  33. m_imageViewCache.SetCapacity(ImageViewCapacity);
  34. }
  35. return resultCode;
  36. }
  37. void FrameGraphCompiler::Shutdown()
  38. {
  39. m_imageViewCache.Clear();
  40. m_bufferViewCache.Clear();
  41. m_imageReverseLookupHash.clear();
  42. m_bufferReverseLookupHash.clear();
  43. ShutdownInternal();
  44. }
  45. MessageOutcome FrameGraphCompiler::ValidateCompileRequest(const FrameGraphCompileRequest& request) const
  46. {
  47. if (Validation::IsEnabled())
  48. {
  49. if (request.m_frameGraph == nullptr)
  50. {
  51. return AZ::Failure(AZStd::string("FrameGraph is null. Skipping compilation..."));
  52. }
  53. if (request.m_frameGraph->IsCompiled())
  54. {
  55. return AZ::Failure(AZStd::string("FrameGraph already compiled. Skipping compilation..."));
  56. }
  57. const FrameGraphAttachmentDatabase& attachmentDatabase = request.m_frameGraph->GetAttachmentDatabase();
  58. const bool hasTransientAttachments = attachmentDatabase.GetTransientBufferAttachments().size() || attachmentDatabase.GetTransientImageAttachments().size();
  59. if (request.m_transientAttachmentPool == nullptr && hasTransientAttachments)
  60. {
  61. return AZ::Failure(AZStd::string("DeviceTransientAttachmentPool is null, but transient attachments are in the graph. Skipping compilation..."));
  62. }
  63. }
  64. (void)request;
  65. return AZ::Success();
  66. }
  67. // The entry point for FrameGraph compilation. Frame Graph compilation is broken into several phases:
  68. //
  69. // 1) Queue-Centric Scope Graph Compilation:
  70. //
  71. // This phase takes the scope graph and compiles a queue-centric scope graph. The former is a simple
  72. // producer / consumer graph where certain scopes can produce resources for consumer scopes. The queue-centric
  73. // graph is split into tracks according to each hardware queue. Scopes are serialized onto each track according
  74. // to the topological sort, and cross-track dependencies are generated.
  75. //
  76. // 2) Transient Attachment Compilation:
  77. //
  78. // This phase takes the transient attachment set and acquires physical resources from the Transient
  79. // Attachment Pool. The resources are assigned to the attachments.
  80. //
  81. // 3) Resource View Compilation:
  82. //
  83. // After acquiring all transient resources, the compiler creates and assigns resource views
  84. // to each scope attachment. View ownership is managed by an internal cache.
  85. //
  86. // 4) Platform-specific Compilation:
  87. //
  88. // The final phase is to compile the platform specific scopes and hand-off compilation to the platform-specific
  89. // implementation, which may introduce more phases specific to the platform API.
  90. MessageOutcome FrameGraphCompiler::Compile(const FrameGraphCompileRequest& request)
  91. {
  92. AZ_PROFILE_SCOPE(RHI, "FrameGraphCompiler: Compile");
  93. MessageOutcome outcome = ValidateCompileRequest(request);
  94. if (!outcome)
  95. {
  96. return outcome;
  97. }
  98. FrameGraph& frameGraph = *request.m_frameGraph;
  99. /// [Phase 1] Compiles the cross-queue scope graph.
  100. CompileQueueCentricScopeGraph(frameGraph, request.m_compileFlags);
  101. /// [Phase 2] Compile transient attachments across all scopes.
  102. CompileTransientAttachments(
  103. frameGraph,
  104. *request.m_transientAttachmentPool,
  105. request.m_compileFlags,
  106. request.m_statisticsFlags);
  107. /// [Phase 3] Compiles buffer / image views and assigns them to scope attachments.
  108. CompileResourceViews(frameGraph.GetAttachmentDatabase());
  109. /// [Phase 4] Compile platform-specific scope data after all attachments and views have been compiled.
  110. {
  111. AZ_PROFILE_SCOPE(RHI, "FrameGraphCompiler: Scope Compile");
  112. for (Scope* scope : frameGraph.GetScopes())
  113. {
  114. if (scope->GetDeviceIndex() == MultiDevice::InvalidDeviceIndex)
  115. {
  116. scope->SetDeviceIndex(MultiDevice::DefaultDeviceIndex);
  117. }
  118. scope->Compile();
  119. }
  120. }
  121. /// Perform platform-specific compilation.
  122. return CompileInternal(request);
  123. }
  124. void FrameGraphCompiler::CompileQueueCentricScopeGraph(
  125. FrameGraph& frameGraph,
  126. FrameSchedulerCompileFlags compileFlags)
  127. {
  128. AZ_PROFILE_SCOPE(RHI, "FrameGraphCompiler: CompileQueueCentricScopeGraph");
  129. const bool disableAsyncQueues = CheckBitsAll(compileFlags, FrameSchedulerCompileFlags::DisableAsyncQueues);
  130. if (disableAsyncQueues)
  131. {
  132. for (Scope* scope : frameGraph.GetScopes())
  133. {
  134. scope->m_hardwareQueueClass = HardwareQueueClass::Graphics;
  135. }
  136. }
  137. // Build the per-queue graph by first linking scopes on the same queue
  138. // with their neighbors. This is because the queue is going to execute serially.
  139. {
  140. Scope* producer[HardwareQueueClassCount] = {};
  141. for (Scope* consumer : frameGraph.GetScopes())
  142. {
  143. const uint32_t hardwareQueueClassIdx = static_cast<uint32_t>(consumer->GetHardwareQueueClass());
  144. if (producer[hardwareQueueClassIdx])
  145. {
  146. if (producer[hardwareQueueClassIdx]->GetDeviceIndex() == consumer->GetDeviceIndex())
  147. {
  148. Scope::LinkProducerConsumerByQueues(producer[hardwareQueueClassIdx], consumer);
  149. }
  150. }
  151. producer[hardwareQueueClassIdx] = consumer;
  152. }
  153. }
  154. /// If async queues are disabled, just return.
  155. if (disableAsyncQueues)
  156. {
  157. return;
  158. }
  159. // Build cross-queue edges. This is more complicated because each queue forms a "track" of serialized scopes,
  160. // but each track is able to mark dependencies on nodes in other tracks. In the final graph, each scope is able to have
  161. // a single producer / consumer from each queue. We also want to cull out edges that are superfluous.
  162. //
  163. // The algorithm first iterates the list of scopes from beginning to end. For consumers of the current scope,
  164. // we can pick the earliest one for each queue, since all later ones are unnecessary (due to same-queue serialization).
  165. //
  166. // When we find the first consumer (for each queue), we need to check that we are the last producer feeding into that consumer on the queue. Otherwise,
  167. // we are fencing too early. For instance, a later scope on the same queue as us could fence the consumer (or an earlier consumer), which satisfies the constraint
  168. // making the current edge unnecessary. Once we find the last producer and the first consumer for the current node, we search for a later
  169. // producer (on the producer's queue) which feeds an earlier consumer (on the consumer's queue). If this test fails, we have found the optimal fencing point.
  170. for (Scope* currentScope : frameGraph.GetScopes())
  171. {
  172. // Grab the last producer on a specific queue that feeds into this scope. Then search to see if a later producer
  173. // on the producer queue feeds an earlier consumer on the consumer queue. If not, then we have a valid edge.
  174. for (uint32_t producerHardwareQueueIdx = 0; producerHardwareQueueIdx < HardwareQueueClassCount; ++producerHardwareQueueIdx)
  175. {
  176. if (Scope* producerScopeLast = currentScope->m_producersByQueueLast[producerHardwareQueueIdx])
  177. {
  178. bool foundEarlierConsumerOnSameQueue = false;
  179. const Scope* nextProducerScope = producerScopeLast->GetConsumerOnSameQueue();
  180. while (nextProducerScope)
  181. {
  182. if (const Scope* sameQueueConsumerScope = nextProducerScope->GetConsumerByQueue(currentScope->GetHardwareQueueClass()))
  183. {
  184. if (sameQueueConsumerScope->GetIndex() < currentScope->GetIndex())
  185. {
  186. foundEarlierConsumerOnSameQueue = true;
  187. }
  188. }
  189. nextProducerScope = nextProducerScope->GetConsumerOnSameQueue();
  190. }
  191. if (foundEarlierConsumerOnSameQueue == false)
  192. {
  193. if (producerScopeLast->GetDeviceIndex() == currentScope->GetDeviceIndex())
  194. {
  195. Scope::LinkProducerConsumerByQueues(producerScopeLast, currentScope);
  196. }
  197. }
  198. }
  199. }
  200. Scope* consumersByQueueFirst[HardwareQueueClassCount] = {};
  201. // Compute the first consumer for each queue.
  202. for (Scope* consumer : frameGraph.GetConsumers(*currentScope))
  203. {
  204. const bool crossQueueEdge = currentScope->GetHardwareQueueClass() != consumer->GetHardwareQueueClass();
  205. if (crossQueueEdge)
  206. {
  207. Scope*& consumerScopeFirst = consumersByQueueFirst[static_cast<uint32_t>(consumer->GetHardwareQueueClass())];
  208. if (consumerScopeFirst == nullptr || consumerScopeFirst->GetIndex() > consumer->GetIndex())
  209. {
  210. consumerScopeFirst = consumer;
  211. }
  212. }
  213. }
  214. // For each valid first consumer (one per queue), check if we (the producer) are the last (so far) producer to feed into
  215. // that consumer on our queue. If so, make us the new producer on our queue.
  216. for (uint32_t consumerHardwareQueueClassIdx = 0; consumerHardwareQueueClassIdx < HardwareQueueClassCount; ++consumerHardwareQueueClassIdx)
  217. {
  218. if (Scope* consumerScopeFirst = consumersByQueueFirst[consumerHardwareQueueClassIdx])
  219. {
  220. Scope*& producerScopeLast = consumerScopeFirst->m_producersByQueueLast[consumerHardwareQueueClassIdx];
  221. if (producerScopeLast == nullptr || producerScopeLast->GetIndex() < currentScope->GetIndex())
  222. {
  223. producerScopeLast = currentScope;
  224. }
  225. }
  226. }
  227. }
  228. }
  229. void FrameGraphCompiler::ExtendTransientAttachmentAsyncQueueLifetimes(
  230. FrameGraph& frameGraph,
  231. FrameSchedulerCompileFlags compileFlags)
  232. {
  233. /// No need to do this if we have disabled async queues entirely.
  234. if (CheckBitsAny(compileFlags, FrameSchedulerCompileFlags::DisableAsyncQueues))
  235. {
  236. return;
  237. }
  238. AZ_PROFILE_FUNCTION(RHI);
  239. // Each attachment declares which queue classes it can be used on. We require that the first scope be on the most
  240. // capable queue. This is because we know that we are always able to service transition barrier requests for all
  241. // frames. NOTE: This only applies to images which have certain restrictions around layout transitions.
  242. const FrameGraphAttachmentDatabase& attachmentDatabase = frameGraph.GetAttachmentDatabase();
  243. for (FrameAttachment* transientImage : attachmentDatabase.GetTransientImageAttachments())
  244. {
  245. Scope* firstScope = transientImage->GetFirstScope();
  246. if (firstScope == nullptr)
  247. {
  248. // If the attachment is owned by a pass that isn't a scope-producer (e.g. Parent-Pass), and is not connected to
  249. // anything, the first and last scope will be empty. We will get a warning its unused in ValidateEnd(), but we don't want to
  250. // crash here
  251. continue;
  252. }
  253. const HardwareQueueClass mostCapableQueueUsage = GetMostCapableHardwareQueue(transientImage->GetSupportedQueueMask());
  254. if (firstScope->GetHardwareQueueClass() != mostCapableQueueUsage)
  255. {
  256. if (Scope* foundScope = firstScope->FindCapableCrossQueueProducer(mostCapableQueueUsage))
  257. {
  258. transientImage->m_firstScope = foundScope;
  259. continue;
  260. }
  261. AZ_Warning("FrameGraphCompiler", false,
  262. "Could not find a %s queue producer scope to begin aliasing attachment '%s'. This can be remedied by "
  263. "having a %s scope earlier in the frame (or as the root of the frame graph).",
  264. GetHardwareQueueClassName(mostCapableQueueUsage),
  265. transientImage->GetId().GetCStr(),
  266. GetHardwareQueueClassName(mostCapableQueueUsage));
  267. }
  268. }
  269. const auto& scopes = frameGraph.GetScopes();
  270. // Adjust asynchronous attachment lifetimes. If scopes executing in parallel are utilizing transient
  271. // attachments, we must extend their lifetimes so that memory is aliased properly. To do this, we first
  272. // compute the intervals in the sorted scope array where asynchronous activity is occurring. This is
  273. // done by traversing cross-queue fork / join events.
  274. struct AsyncInterval
  275. {
  276. uint32_t m_indexFirst = 0;
  277. uint32_t m_indexLast = 0;
  278. uint32_t m_attachmentCountsByQueue[HardwareQueueClassCount] = {};
  279. /// This the hardware queue that is allowed to alias memory.
  280. HardwareQueueClass m_aliasingQueueClass = HardwareQueueClass::Graphics;
  281. };
  282. AZStd::vector<AsyncInterval> asyncIntervals;
  283. const uint32_t ScopeCount = static_cast<uint32_t>(scopes.size());
  284. for (uint32_t scopeIdx = 0; scopeIdx < ScopeCount; ++scopeIdx)
  285. {
  286. Scope* scope = scopes[scopeIdx];
  287. bool foundInterval = false;
  288. AsyncInterval interval;
  289. interval.m_indexFirst = scope->GetIndex();
  290. for (uint32_t hardwareQueueClassIdx = 0; hardwareQueueClassIdx < HardwareQueueClassCount; ++hardwareQueueClassIdx)
  291. {
  292. HardwareQueueClass hardwareQueueClass = static_cast<HardwareQueueClass>(hardwareQueueClassIdx);
  293. // Skip the queue class matching this scope, we only want cross-queue fork events.
  294. if (hardwareQueueClass == scope->GetHardwareQueueClass())
  295. {
  296. continue;
  297. }
  298. // If this succeeds, we have reached a cross-queue fork. This is the beginning of the async
  299. // interval. To find the end, we search along the newly forked path (on the other queue) until
  300. // we join back to the original queue. The interval ends just before the join scope.
  301. if (const Scope* otherQueueScope = scope->GetConsumerByQueue(hardwareQueueClass))
  302. {
  303. // If the search fails, we fall back to the end of the scope list.
  304. uint32_t indexLast = ScopeCount - 1;
  305. // Search for a join event.
  306. do
  307. {
  308. if (const Scope* joinScope = otherQueueScope->GetConsumerByQueue(scope->GetHardwareQueueClass()))
  309. {
  310. // End the interval just before the join scope.
  311. indexLast = joinScope->GetIndex() - 1;
  312. foundInterval = true;
  313. break;
  314. }
  315. otherQueueScope = otherQueueScope->GetConsumerOnSameQueue();
  316. } while (otherQueueScope);
  317. // Keep track of the last index. Since we search across all the queues, we may have multiple.
  318. interval.m_indexLast = AZStd::max(interval.m_indexLast, indexLast);
  319. }
  320. }
  321. if (foundInterval)
  322. {
  323. // Accumulate scope attachments for all scopes in the interval. This will be used to find the best queue to
  324. // allow aliasing.
  325. for (uint32_t asyncScopeIdx = interval.m_indexFirst; asyncScopeIdx <= interval.m_indexLast; ++asyncScopeIdx)
  326. {
  327. const Scope* asyncScope = scopes[asyncScopeIdx];
  328. interval.m_attachmentCountsByQueue[static_cast<uint32_t>(asyncScope->GetHardwareQueueClass())] += static_cast<uint32_t>(asyncScope->GetTransientAttachments().size());
  329. }
  330. asyncIntervals.push_back(interval);
  331. scopeIdx = interval.m_indexLast;
  332. }
  333. }
  334. const bool disableAsyncQueueAliasing = CheckBitsAny(compileFlags, FrameSchedulerCompileFlags::DisableAttachmentAliasingAsyncQueue);
  335. // Find the maximum number of transient scope attachments per queue. The one with the most gets to alias memory.
  336. if (disableAsyncQueueAliasing == false)
  337. {
  338. for (AsyncInterval& interval : asyncIntervals)
  339. {
  340. uint32_t scopeAttachmentCountMax = 0;
  341. for (uint32_t i = 0; i < HardwareQueueClassCount; ++i)
  342. {
  343. if (scopeAttachmentCountMax < interval.m_attachmentCountsByQueue[i])
  344. {
  345. scopeAttachmentCountMax = interval.m_attachmentCountsByQueue[i];
  346. interval.m_aliasingQueueClass = (HardwareQueueClass)i;
  347. }
  348. }
  349. }
  350. }
  351. // Finally, for each scope that is within an async interval, we must extend
  352. // the lifetimes to fill the whole interval. This is because we cannot alias
  353. // memory between queues on the GPU, as the aliasing system assumes serialized
  354. // lifetimes. However, we can still allow one queue to alias memory with itself.
  355. for (uint32_t scopeIdx = 0; scopeIdx < uint32_t(scopes.size()); ++scopeIdx)
  356. {
  357. Scope* scope = scopes[scopeIdx];
  358. for (AsyncInterval interval : asyncIntervals)
  359. {
  360. // Only one queue is allowed to alias in async scenarios. In order to alias properly,
  361. // attachments must have well-defined lifetimes, which is not possible with async execution.
  362. // However, this is true of a single queue with itself, so one queue is chosen to allow aliasing
  363. // and the rest will extend lifetimes.
  364. const bool isAliasingAllowed = !disableAsyncQueueAliasing && interval.m_aliasingQueueClass == scope->GetHardwareQueueClass();
  365. if (interval.m_indexFirst <= scopeIdx && scopeIdx <= interval.m_indexLast)
  366. {
  367. for (ScopeAttachment* scopeAttachment : scope->GetTransientAttachments())
  368. {
  369. FrameAttachment& frameAttachment = scopeAttachment->GetFrameAttachment();
  370. // If we aren't allowed to alias or we're a cross queue attachment, then extend lifetimes to
  371. // the beginning and end of the async interval.
  372. if (!isAliasingAllowed)
  373. {
  374. if (frameAttachment.m_firstScope->GetIndex() > interval.m_indexFirst)
  375. {
  376. frameAttachment.m_firstScope = scopes[interval.m_indexFirst];
  377. }
  378. if (frameAttachment.m_lastScope->GetIndex() < interval.m_indexLast)
  379. {
  380. frameAttachment.m_lastScope = scopes[interval.m_indexLast];
  381. }
  382. }
  383. }
  384. }
  385. }
  386. }
  387. }
  388. void FrameGraphCompiler::CompileTransientAttachments(
  389. FrameGraph& frameGraph,
  390. TransientAttachmentPool& transientAttachmentPool,
  391. FrameSchedulerCompileFlags compileFlags,
  392. FrameSchedulerStatisticsFlags statisticsFlags)
  393. {
  394. const FrameGraphAttachmentDatabase& attachmentDatabase = frameGraph.GetAttachmentDatabase();
  395. if (attachmentDatabase.GetTransientBufferAttachments().empty() && attachmentDatabase.GetTransientImageAttachments().empty())
  396. {
  397. return;
  398. }
  399. AZ_PROFILE_SCOPE(RHI, "FrameGraphCompiler: CompileTransientAttachments");
  400. ExtendTransientAttachmentAsyncQueueLifetimes(frameGraph, compileFlags);
  401. // Builds a sortable key. It iterates each scope and performs deactivations
  402. // followed by activations on each attachment.
  403. const uint32_t ATTACHMENT_BIT_COUNT = 16;
  404. const uint32_t SCOPE_BIT_COUNT = 14;
  405. enum class Action
  406. {
  407. ActivateImage = 0,
  408. ActivateBuffer,
  409. DeactivateImage,
  410. DeactivateBuffer,
  411. };
  412. struct Command
  413. {
  414. Command(uint32_t scopeIndex, Action action, uint32_t attachmentIndex)
  415. {
  416. m_bits.m_scopeIndex = scopeIndex;
  417. m_bits.m_action = (uint32_t)action;
  418. m_bits.m_attachmentIndex = attachmentIndex;
  419. }
  420. bool operator < (Command rhs) const
  421. {
  422. return m_command < rhs.m_command;
  423. }
  424. struct Bits
  425. {
  426. /// Sort by attachment index last
  427. uint32_t m_attachmentIndex : ATTACHMENT_BIT_COUNT;
  428. /// Sort by the action after the scope. First by deactivations, then by activations.
  429. uint32_t m_action : 2;
  430. /// Sort by scope index first.
  431. uint32_t m_scopeIndex : SCOPE_BIT_COUNT;
  432. };
  433. union
  434. {
  435. Bits m_bits;
  436. uint32_t m_command = 0;
  437. };
  438. };
  439. const auto& scopes = frameGraph.GetScopes();
  440. const auto& transientBufferGraphAttachments = attachmentDatabase.GetTransientBufferAttachments();
  441. const auto& transientImageGraphAttachments = attachmentDatabase.GetTransientImageAttachments();
  442. AZ_Assert(scopes.size() < AZ_BIT(SCOPE_BIT_COUNT),
  443. "Exceeded maximum number of allowed scopes");
  444. AZ_Assert(transientBufferGraphAttachments.size() + transientImageGraphAttachments.size() < AZ_BIT(ATTACHMENT_BIT_COUNT),
  445. "Exceeded maximum number of allowed attachments");
  446. AZStd::vector<Buffer*> transientBuffers(transientBufferGraphAttachments.size());
  447. AZStd::vector<Image*> transientImages(transientImageGraphAttachments.size());
  448. AZStd::vector<Command> commands;
  449. commands.reserve((transientBufferGraphAttachments.size() + transientImageGraphAttachments.size()) * 2);
  450. if (CheckBitsAny(compileFlags, FrameSchedulerCompileFlags::DisableAttachmentAliasing))
  451. {
  452. const uint32_t ScopeIndexFirst = 0;
  453. const uint32_t ScopeIndexLast = static_cast<uint32_t>(scopes.size() - 1);
  454. // Generate commands for each transient buffer: one for activation, and one for deactivation.
  455. for (uint32_t attachmentIndex = 0; attachmentIndex < (uint32_t)transientBufferGraphAttachments.size(); ++attachmentIndex)
  456. {
  457. commands.emplace_back(ScopeIndexFirst, Action::ActivateBuffer, attachmentIndex);
  458. commands.emplace_back(ScopeIndexLast, Action::DeactivateBuffer, attachmentIndex);
  459. }
  460. // Generate commands for each transient image: one for activation, and one for deactivation.
  461. for (uint32_t attachmentIndex = 0; attachmentIndex < (uint32_t)transientImageGraphAttachments.size(); ++attachmentIndex)
  462. {
  463. commands.emplace_back(ScopeIndexFirst, Action::ActivateImage, attachmentIndex);
  464. commands.emplace_back(ScopeIndexLast, Action::DeactivateImage, attachmentIndex);
  465. }
  466. }
  467. else
  468. {
  469. // Generate commands for each transient buffer: one for activation, and one for deactivation.
  470. for (uint32_t attachmentIndex = 0; attachmentIndex < (uint32_t)transientBufferGraphAttachments.size(); ++attachmentIndex)
  471. {
  472. BufferFrameAttachment* transientBuffer = transientBufferGraphAttachments[attachmentIndex];
  473. const auto* firstScope = transientBuffer->GetFirstScope();
  474. const auto* lastScope = transientBuffer->GetLastScope();
  475. if (firstScope == nullptr || lastScope == nullptr)
  476. {
  477. // If the attachment is owned by a pass that isn't a scope-producer (e.g. Parent-Pass), and is not connected to
  478. // anything, the first and last scope will be empty. We will get a warning its unused in ValidateEnd(), but we don't
  479. // want to crash here
  480. continue;
  481. }
  482. const uint32_t scopeIndexFirst = firstScope->GetIndex();
  483. const uint32_t scopeIndexLast = lastScope->GetIndex();
  484. commands.emplace_back(scopeIndexFirst, Action::ActivateBuffer, attachmentIndex);
  485. commands.emplace_back(scopeIndexLast, Action::DeactivateBuffer, attachmentIndex);
  486. }
  487. // Generate commands for each transient image: one for activation, and one for deactivation.
  488. for (uint32_t attachmentIndex = 0; attachmentIndex < (uint32_t)transientImageGraphAttachments.size(); ++attachmentIndex)
  489. {
  490. ImageFrameAttachment* transientImage = transientImageGraphAttachments[attachmentIndex];
  491. const auto* firstScope = transientImage->GetFirstScope();
  492. const auto* lastScope = transientImage->GetLastScope();
  493. if (firstScope == nullptr || lastScope == nullptr)
  494. {
  495. // If the attachment is owned by a pass that isn't a scope-producer (e.g. Parent-Pass), and is not connected to
  496. // anything, the first and last scope will be empty. We will get a warning its unused in ValidateEnd(), but we don't
  497. // want to crash here
  498. continue;
  499. }
  500. const uint32_t scopeIndexFirst = firstScope->GetIndex();
  501. const uint32_t scopeIndexLast = lastScope->GetIndex();
  502. commands.emplace_back(scopeIndexFirst, Action::ActivateImage, attachmentIndex);
  503. commands.emplace_back(scopeIndexLast, Action::DeactivateImage, attachmentIndex);
  504. }
  505. }
  506. AZStd::sort(commands.begin(), commands.end());
  507. auto processCommands = [&](TransientAttachmentPoolCompileFlags compileFlags, TransientAttachmentStatistics::MemoryUsage* memoryHint = nullptr)
  508. {
  509. transientAttachmentPool.Begin(compileFlags, memoryHint);
  510. uint32_t currentScopeIndex = static_cast<uint32_t>(-1);
  511. bool allocateResources = !CheckBitsAny(compileFlags, TransientAttachmentPoolCompileFlags::DontAllocateResources);
  512. for (Command command : commands)
  513. {
  514. const uint32_t scopeIndex = command.m_bits.m_scopeIndex;
  515. const uint32_t attachmentIndex = command.m_bits.m_attachmentIndex;
  516. const Action action = (Action)command.m_bits.m_action;
  517. // Make sure to walk the full set of scopes, even if a transient resource doesn't
  518. // exist in it. This is necessary for proper statistics tracking.
  519. while (currentScopeIndex != scopeIndex)
  520. {
  521. const uint32_t nextScope = ++currentScopeIndex;
  522. // End the previous scope (if there is one).
  523. if (nextScope > 0)
  524. {
  525. transientAttachmentPool.EndScope();
  526. }
  527. transientAttachmentPool.BeginScope(*scopes[nextScope]);
  528. }
  529. switch (action)
  530. {
  531. case Action::DeactivateBuffer:
  532. {
  533. AZ_Assert(!allocateResources || transientBuffers[attachmentIndex] || IsNullRHI(), "DeviceBuffer is not active: %s", transientBufferGraphAttachments[attachmentIndex]->GetId().GetCStr());
  534. BufferFrameAttachment* bufferFrameAttachment = transientBufferGraphAttachments[attachmentIndex];
  535. transientAttachmentPool.DeactivateBuffer(bufferFrameAttachment->GetId());
  536. transientBuffers[attachmentIndex] = nullptr;
  537. break;
  538. }
  539. case Action::DeactivateImage:
  540. {
  541. AZ_Assert(!allocateResources || transientImages[attachmentIndex] || IsNullRHI(), "DeviceImage is not active: %s", transientImageGraphAttachments[attachmentIndex]->GetId().GetCStr());
  542. ImageFrameAttachment* imageFrameAttachment = transientImageGraphAttachments[attachmentIndex];
  543. transientAttachmentPool.DeactivateImage(imageFrameAttachment->GetId());
  544. transientImages[attachmentIndex] = nullptr;
  545. break;
  546. }
  547. case Action::ActivateBuffer:
  548. {
  549. BufferFrameAttachment* bufferFrameAttachment = transientBufferGraphAttachments[attachmentIndex];
  550. AZ_Assert(transientBuffers[attachmentIndex] == nullptr, "DeviceBuffer has been activated already. %s", bufferFrameAttachment->GetId().GetCStr());
  551. TransientBufferDescriptor descriptor;
  552. descriptor.m_attachmentId = bufferFrameAttachment->GetId();
  553. descriptor.m_bufferDescriptor = bufferFrameAttachment->GetBufferDescriptor();
  554. auto buffer = transientAttachmentPool.ActivateBuffer(descriptor);
  555. if (allocateResources && buffer)
  556. {
  557. bufferFrameAttachment->SetResource(buffer);
  558. transientBuffers[attachmentIndex] = buffer;
  559. }
  560. break;
  561. }
  562. case Action::ActivateImage:
  563. {
  564. ImageFrameAttachment* imageFrameAttachment = transientImageGraphAttachments[attachmentIndex];
  565. AZ_Assert(transientImages[attachmentIndex] == nullptr, "DeviceImage has been activated already. %s", imageFrameAttachment->GetId().GetCStr());
  566. ClearValue optimizedClearValue;
  567. TransientImageDescriptor descriptor;
  568. descriptor.m_attachmentId = imageFrameAttachment->GetId();
  569. descriptor.m_imageDescriptor = imageFrameAttachment->GetImageDescriptor();
  570. descriptor.m_supportedQueueMask = imageFrameAttachment->GetSupportedQueueMask();
  571. const bool isOutputMerger = RHI::CheckBitsAny(descriptor.m_imageDescriptor.m_bindFlags, RHI::ImageBindFlags::Color | RHI::ImageBindFlags::DepthStencil);
  572. if (isOutputMerger)
  573. {
  574. optimizedClearValue = imageFrameAttachment->GetOptimizedClearValue();
  575. descriptor.m_optimizedClearValue = &optimizedClearValue;
  576. }
  577. auto image = transientAttachmentPool.ActivateImage(descriptor);
  578. if (allocateResources && image)
  579. {
  580. imageFrameAttachment->SetResource(image);
  581. transientImages[attachmentIndex] = image;
  582. }
  583. break;
  584. }
  585. }
  586. }
  587. transientAttachmentPool.EndScope();
  588. transientAttachmentPool.End();
  589. };
  590. AZStd::optional<TransientAttachmentStatistics::MemoryUsage> memoryUsage;
  591. for (auto& [deviceIndex, descriptor] : transientAttachmentPool.GetDescriptor())
  592. {
  593. // Check if we need to do two passes (one for calculating the size and the second one for allocating the resources)
  594. if (descriptor.m_heapParameters.m_type == HeapAllocationStrategy::MemoryHint)
  595. {
  596. // First pass to calculate size needed.
  597. processCommands(TransientAttachmentPoolCompileFlags::GatherStatistics | TransientAttachmentPoolCompileFlags::DontAllocateResources);
  598. auto statistics = transientAttachmentPool.GetDeviceTransientAttachmentPool(deviceIndex)->GetStatistics();
  599. memoryUsage = statistics.m_reservedMemory;
  600. }
  601. }
  602. // Second pass uses the information about memory usage
  603. TransientAttachmentPoolCompileFlags poolCompileFlags = TransientAttachmentPoolCompileFlags::None;
  604. if (CheckBitsAny(statisticsFlags, FrameSchedulerStatisticsFlags::GatherTransientAttachmentStatistics))
  605. {
  606. poolCompileFlags |= TransientAttachmentPoolCompileFlags::GatherStatistics;
  607. }
  608. processCommands(poolCompileFlags, memoryUsage ? &memoryUsage.value() : nullptr);
  609. }
  610. ImageView* FrameGraphCompiler::GetImageViewFromLocalCache(Image* image, const ImageViewDescriptor& imageViewDescriptor)
  611. {
  612. const size_t baseHash = AZStd::hash<Image*>()(image);
  613. // [GFX TODO][ATOM-6289] This should be looked into, combining cityhash with AZStd::hash
  614. const HashValue64 hash = imageViewDescriptor.GetHash(static_cast<HashValue64>(baseHash));
  615. // Attempt to find the image view in the cache.
  616. ImageView* imageView = m_imageViewCache.Find(static_cast<uint64_t>(hash));
  617. if (!imageView)
  618. {
  619. // This is one way of clearing view entries within the cache if we are creating a new view to replace the old one.
  620. // Normally this can happen for transient resources if their pointer within the heap changes for the current frame
  621. const ImageResourceViewData imageResourceViewData = ImageResourceViewData {image->GetName(), imageViewDescriptor};
  622. RemoveFromCache(imageResourceViewData, m_imageReverseLookupHash, m_imageViewCache);
  623. // Create a new image view instance and insert it into the cache.
  624. Ptr<ImageView> imageViewPtr = image->BuildImageView(imageViewDescriptor);
  625. imageView = imageViewPtr.get();
  626. m_imageViewCache.Insert(static_cast<uint64_t>(hash), AZStd::move(imageViewPtr));
  627. if (!image->GetName().IsEmpty())
  628. {
  629. m_imageReverseLookupHash.emplace(imageResourceViewData, hash);
  630. }
  631. }
  632. return imageView;
  633. }
  634. BufferView* FrameGraphCompiler::GetBufferViewFromLocalCache(Buffer* buffer, const BufferViewDescriptor& bufferViewDescriptor)
  635. {
  636. const size_t baseHash = AZStd::hash<Buffer*>()(buffer);
  637. // [GFX TODO][ATOM-6289] This should be looked into, combining cityhash with AZStd::hash
  638. const HashValue64 hash = bufferViewDescriptor.GetHash(static_cast<HashValue64>(baseHash));
  639. // Attempt to find the buffer view in the cache.
  640. BufferView* bufferView = m_bufferViewCache.Find(static_cast<uint64_t>(hash));
  641. if (!bufferView)
  642. {
  643. // This is one way of clearing view entries within the cache if we are creating a new view to replace the old one.
  644. // Normally this can happen for transient resources if their pointer within the heap changes for the current frame
  645. const BufferResourceViewData bufferResourceViewData = BufferResourceViewData {buffer->GetName(), bufferViewDescriptor};
  646. RemoveFromCache(bufferResourceViewData, m_bufferReverseLookupHash, m_bufferViewCache);
  647. // Create a new buffer view instance and insert it into the cache.
  648. Ptr<BufferView> bufferViewPtr = buffer->BuildBufferView(bufferViewDescriptor);
  649. bufferView = bufferViewPtr.get();
  650. m_bufferViewCache.Insert(static_cast<uint64_t>(hash), AZStd::move(bufferViewPtr));
  651. if (!buffer->GetName().IsEmpty())
  652. {
  653. m_bufferReverseLookupHash.emplace(bufferResourceViewData, hash);
  654. }
  655. }
  656. return bufferView;
  657. }
  658. void FrameGraphCompiler::CompileResourceViews(const FrameGraphAttachmentDatabase& attachmentDatabase)
  659. {
  660. AZ_PROFILE_SCOPE(RHI, "FrameGraphCompiler: CompileResourceViews");
  661. for (ImageFrameAttachment* imageAttachment : attachmentDatabase.GetImageAttachments())
  662. {
  663. Image* image = imageAttachment->GetImage();
  664. if (!image)
  665. {
  666. continue;
  667. }
  668. // Iterates through every usage of the image, pulls image views
  669. // from image's cache or local cache, and assigns them to the scope attachments.
  670. for (ImageScopeAttachment* node = imageAttachment->GetFirstScopeAttachment(); node != nullptr; node = node->GetNext())
  671. {
  672. const ImageViewDescriptor& imageViewDescriptor = node->GetDescriptor().m_imageViewDescriptor;
  673. // Multi device image views don't have a global cache, so we always cache them
  674. ImageView* imageView = GetImageViewFromLocalCache(image, imageViewDescriptor);
  675. node->SetImageView(imageView);
  676. }
  677. }
  678. for (BufferFrameAttachment* bufferAttachment : attachmentDatabase.GetBufferAttachments())
  679. {
  680. Buffer* buffer = bufferAttachment->GetBuffer();
  681. if (!buffer)
  682. {
  683. continue;
  684. }
  685. // Iterates through every usage of the buffer attachment, pulls buffer views
  686. // from the cache within the buffer, and assigns them to the scope attachments.
  687. for (BufferScopeAttachment* node = bufferAttachment->GetFirstScopeAttachment(); node != nullptr; node = node->GetNext())
  688. {
  689. const BufferViewDescriptor& bufferViewDescriptor = node->GetDescriptor().m_bufferViewDescriptor;
  690. // Multi device buffer views don't have a global cache, so we always cache them
  691. BufferView* bufferView = GetBufferViewFromLocalCache(buffer, bufferViewDescriptor);
  692. node->SetBufferView(bufferView);
  693. }
  694. }
  695. }
  696. template<typename ReverseLookupObjectType, typename ObjectCacheType>
  697. void FrameGraphCompiler::RemoveFromCache(ReverseLookupObjectType objectToRemove,
  698. AZStd::unordered_map<ReverseLookupObjectType, HashValue64>& reverseHashLookupMap,
  699. ObjectCache<ObjectCacheType>& objectCache)
  700. {
  701. if (objectToRemove.m_name.IsEmpty())
  702. {
  703. return;
  704. }
  705. bool isResourceRegistered = reverseHashLookupMap.contains(objectToRemove);
  706. if (isResourceRegistered)
  707. {
  708. HashValue64 originalHash = reverseHashLookupMap.find(objectToRemove)->second;
  709. objectCache.EraseItem(aznumeric_cast<uint64_t>(originalHash));
  710. reverseHashLookupMap.erase(objectToRemove);
  711. }
  712. }
  713. }