CommandBufferImpl.cpp 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891
  1. // Copyright (C) 2009-2022, Panagiotis Christopoulos Charitos and contributors.
  2. // All rights reserved.
  3. // Code licensed under the BSD License.
  4. // http://www.anki3d.org/LICENSE
  5. #include <AnKi/Gr/Vulkan/CommandBufferImpl.h>
  6. #include <AnKi/Gr/GrManager.h>
  7. #include <AnKi/Gr/Vulkan/GrManagerImpl.h>
  8. #include <AnKi/Gr/Framebuffer.h>
  9. #include <AnKi/Gr/Vulkan/GrUpscalerImpl.h>
  10. #include <AnKi/Gr/Vulkan/AccelerationStructureImpl.h>
  11. #include <AnKi/Gr/Vulkan/FramebufferImpl.h>
  12. #if ANKI_DLSS
  13. # include <ThirdParty/DlssSdk/sdk/include/nvsdk_ngx.h>
  14. # include <ThirdParty/DlssSdk/sdk/include/nvsdk_ngx_helpers.h>
  15. # include <ThirdParty/DlssSdk/sdk/include/nvsdk_ngx_vk.h>
  16. # include <ThirdParty/DlssSdk/sdk/include/nvsdk_ngx_helpers_vk.h>
  17. #endif
  18. #include <algorithm>
  19. namespace anki {
  20. CommandBufferImpl::~CommandBufferImpl()
  21. {
  22. if(m_empty)
  23. {
  24. ANKI_VK_LOGW("Command buffer was empty");
  25. }
  26. if(!m_finalized)
  27. {
  28. ANKI_VK_LOGW("Command buffer was not flushed");
  29. }
  30. m_imgBarriers.destroy(m_alloc);
  31. m_buffBarriers.destroy(m_alloc);
  32. m_memBarriers.destroy(m_alloc);
  33. m_queryResetAtoms.destroy(m_alloc);
  34. m_writeQueryAtoms.destroy(m_alloc);
  35. m_secondLevelAtoms.destroy(m_alloc);
  36. }
  37. Error CommandBufferImpl::init(const CommandBufferInitInfo& init)
  38. {
  39. m_tid = Thread::getCurrentThreadId();
  40. m_flags = init.m_flags;
  41. ANKI_CHECK(getGrManagerImpl().getCommandBufferFactory().newCommandBuffer(m_tid, m_flags, m_microCmdb));
  42. m_handle = m_microCmdb->getHandle();
  43. m_alloc = m_microCmdb->getFastAllocator();
  44. // Store some of the init info for later
  45. if(!!(m_flags & CommandBufferFlag::SECOND_LEVEL))
  46. {
  47. m_activeFb = init.m_framebuffer;
  48. m_colorAttachmentUsages = init.m_colorAttachmentUsages;
  49. m_depthStencilAttachmentUsage = init.m_depthStencilAttachmentUsage;
  50. m_state.beginRenderPass(static_cast<FramebufferImpl*>(m_activeFb.get()));
  51. m_microCmdb->pushObjectRef(m_activeFb);
  52. }
  53. for(DescriptorSetState& state : m_dsetState)
  54. {
  55. state.init(m_alloc);
  56. }
  57. return Error::NONE;
  58. }
  59. void CommandBufferImpl::beginRecording()
  60. {
  61. // Do the begin
  62. VkCommandBufferInheritanceInfo inheritance = {};
  63. inheritance.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_INFO;
  64. VkCommandBufferBeginInfo begin = {};
  65. begin.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
  66. begin.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
  67. begin.pInheritanceInfo = &inheritance;
  68. if(!!(m_flags & CommandBufferFlag::SECOND_LEVEL))
  69. {
  70. FramebufferImpl& impl = static_cast<FramebufferImpl&>(*m_activeFb);
  71. // Calc the layouts
  72. Array<VkImageLayout, MAX_COLOR_ATTACHMENTS> colAttLayouts;
  73. for(U i = 0; i < impl.getColorAttachmentCount(); ++i)
  74. {
  75. const TextureViewImpl& view = static_cast<const TextureViewImpl&>(*impl.getColorAttachment(i));
  76. colAttLayouts[i] = view.getTextureImpl().computeLayout(m_colorAttachmentUsages[i], 0);
  77. }
  78. VkImageLayout dsAttLayout = VK_IMAGE_LAYOUT_MAX_ENUM;
  79. if(impl.hasDepthStencil())
  80. {
  81. const TextureViewImpl& view = static_cast<const TextureViewImpl&>(*impl.getDepthStencilAttachment());
  82. dsAttLayout = view.getTextureImpl().computeLayout(m_depthStencilAttachmentUsage, 0);
  83. }
  84. VkImageLayout sriAttachmentLayout = VK_IMAGE_LAYOUT_MAX_ENUM;
  85. if(impl.hasSri())
  86. {
  87. // Technically it's possible for SRI to be in other layout. Don't bother though
  88. sriAttachmentLayout = VK_IMAGE_LAYOUT_FRAGMENT_SHADING_RATE_ATTACHMENT_OPTIMAL_KHR;
  89. }
  90. inheritance.renderPass = impl.getRenderPassHandle(colAttLayouts, dsAttLayout, sriAttachmentLayout);
  91. inheritance.subpass = 0;
  92. inheritance.framebuffer = impl.getFramebufferHandle();
  93. begin.flags |= VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT;
  94. }
  95. vkBeginCommandBuffer(m_handle, &begin);
  96. // Stats
  97. if(!!(getGrManagerImpl().getExtensions() & VulkanExtensions::KHR_PIPELINE_EXECUTABLE_PROPERTIES))
  98. {
  99. m_state.setEnablePipelineStatistics(true);
  100. }
  101. }
  102. void CommandBufferImpl::beginRenderPassInternal(
  103. const FramebufferPtr& fb, const Array<TextureUsageBit, MAX_COLOR_ATTACHMENTS>& colorAttachmentUsages,
  104. TextureUsageBit depthStencilAttachmentUsage, U32 minx, U32 miny, U32 width, U32 height)
  105. {
  106. commandCommon();
  107. ANKI_ASSERT(!insideRenderPass());
  108. m_rpCommandCount = 0;
  109. m_activeFb = fb;
  110. FramebufferImpl& fbimpl = static_cast<FramebufferImpl&>(*fb);
  111. U32 fbWidth, fbHeight;
  112. fbimpl.getAttachmentsSize(fbWidth, fbHeight);
  113. m_fbSize[0] = fbWidth;
  114. m_fbSize[1] = fbHeight;
  115. ANKI_ASSERT(minx < fbWidth && miny < fbHeight);
  116. const U32 maxx = min<U32>(minx + width, fbWidth);
  117. const U32 maxy = min<U32>(miny + height, fbHeight);
  118. width = maxx - minx;
  119. height = maxy - miny;
  120. ANKI_ASSERT(minx + width <= fbWidth && miny + height <= fbHeight);
  121. m_renderArea[0] = minx;
  122. m_renderArea[1] = miny;
  123. m_renderArea[2] = width;
  124. m_renderArea[3] = height;
  125. m_colorAttachmentUsages = colorAttachmentUsages;
  126. m_depthStencilAttachmentUsage = depthStencilAttachmentUsage;
  127. m_microCmdb->pushObjectRef(fb);
  128. m_subpassContents = VK_SUBPASS_CONTENTS_MAX_ENUM;
  129. // Re-set the viewport and scissor because sometimes they are set clamped
  130. m_viewportDirty = true;
  131. m_scissorDirty = true;
  132. }
  133. void CommandBufferImpl::beginRenderPassInternal()
  134. {
  135. FramebufferImpl& impl = static_cast<FramebufferImpl&>(*m_activeFb);
  136. flushBatches(CommandBufferCommandType::ANY_OTHER_COMMAND); // Flush before the marker
  137. m_state.beginRenderPass(&impl);
  138. VkRenderPassBeginInfo bi = {};
  139. bi.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO;
  140. bi.clearValueCount = impl.getAttachmentCount();
  141. bi.pClearValues = impl.getClearValues();
  142. bi.framebuffer = impl.getFramebufferHandle();
  143. // Calc the layouts
  144. Array<VkImageLayout, MAX_COLOR_ATTACHMENTS> colAttLayouts;
  145. for(U i = 0; i < impl.getColorAttachmentCount(); ++i)
  146. {
  147. const TextureViewImpl& view = static_cast<const TextureViewImpl&>(*impl.getColorAttachment(i));
  148. colAttLayouts[i] = view.getTextureImpl().computeLayout(m_colorAttachmentUsages[i], 0);
  149. }
  150. VkImageLayout dsAttLayout = VK_IMAGE_LAYOUT_MAX_ENUM;
  151. if(impl.hasDepthStencil())
  152. {
  153. const TextureViewImpl& view = static_cast<const TextureViewImpl&>(*impl.getDepthStencilAttachment());
  154. dsAttLayout = view.getTextureImpl().computeLayout(m_depthStencilAttachmentUsage, 0);
  155. }
  156. VkImageLayout sriAttachmentLayout = VK_IMAGE_LAYOUT_MAX_ENUM;
  157. if(impl.hasSri())
  158. {
  159. // Technically it's possible for SRI to be in other layout. Don't bother though
  160. sriAttachmentLayout = VK_IMAGE_LAYOUT_FRAGMENT_SHADING_RATE_ATTACHMENT_OPTIMAL_KHR;
  161. }
  162. bi.renderPass = impl.getRenderPassHandle(colAttLayouts, dsAttLayout, sriAttachmentLayout);
  163. const Bool flipvp = flipViewport();
  164. bi.renderArea.offset.x = m_renderArea[0];
  165. if(flipvp)
  166. {
  167. ANKI_ASSERT(m_renderArea[3] <= m_fbSize[1]);
  168. }
  169. bi.renderArea.offset.y = (flipvp) ? m_fbSize[1] - (m_renderArea[1] + m_renderArea[3]) : m_renderArea[1];
  170. bi.renderArea.extent.width = m_renderArea[2];
  171. bi.renderArea.extent.height = m_renderArea[3];
  172. getGrManagerImpl().beginMarker(m_handle, impl.getName(), Vec3(0.0f, 1.0f, 0.0f));
  173. #if !ANKI_PLATFORM_MOBILE
  174. // nVidia SRI cache workaround
  175. if(impl.hasSri())
  176. {
  177. VkMemoryBarrier memBarrier = {};
  178. memBarrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
  179. memBarrier.dstAccessMask = VK_ACCESS_FRAGMENT_SHADING_RATE_ATTACHMENT_READ_BIT_KHR;
  180. const VkPipelineStageFlags srcStages = VK_PIPELINE_STAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR;
  181. const VkPipelineStageFlags dstStages = VK_PIPELINE_STAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR;
  182. vkCmdPipelineBarrier(m_handle, srcStages, dstStages, 0, 1, &memBarrier, 0, nullptr, 0, nullptr);
  183. }
  184. #endif
  185. VkSubpassBeginInfo subpassBeginInfo = {};
  186. subpassBeginInfo.sType = VK_STRUCTURE_TYPE_SUBPASS_BEGIN_INFO;
  187. subpassBeginInfo.contents = m_subpassContents;
  188. vkCmdBeginRenderPass2KHR(m_handle, &bi, &subpassBeginInfo);
  189. m_renderedToDefaultFb = m_renderedToDefaultFb || impl.hasPresentableTexture();
  190. }
  191. void CommandBufferImpl::endRenderPassInternal()
  192. {
  193. commandCommon();
  194. ANKI_ASSERT(insideRenderPass());
  195. if(m_rpCommandCount == 0)
  196. {
  197. // Empty pass
  198. m_subpassContents = VK_SUBPASS_CONTENTS_INLINE;
  199. beginRenderPassInternal();
  200. }
  201. VkSubpassEndInfo subpassEndInfo = {};
  202. subpassEndInfo.sType = VK_STRUCTURE_TYPE_SUBPASS_END_INFO;
  203. ANKI_CMD(vkCmdEndRenderPass2KHR(m_handle, &subpassEndInfo), ANY_OTHER_COMMAND);
  204. getGrManagerImpl().endMarker(m_handle);
  205. m_activeFb.reset(nullptr);
  206. m_state.endRenderPass();
  207. // After pushing second level command buffers the state is undefined. Reset the tracker and rebind the dynamic state
  208. if(m_subpassContents == VK_SUBPASS_CONTENTS_SECONDARY_COMMAND_BUFFERS)
  209. {
  210. m_state.reset();
  211. rebindDynamicState();
  212. }
  213. }
  214. void CommandBufferImpl::endRecording()
  215. {
  216. commandCommon();
  217. ANKI_ASSERT(!m_finalized);
  218. ANKI_ASSERT(!m_empty);
  219. ANKI_CMD(ANKI_VK_CHECKF(vkEndCommandBuffer(m_handle)), ANY_OTHER_COMMAND);
  220. m_finalized = true;
  221. #if ANKI_EXTRA_CHECKS
  222. static Atomic<U32> messagePrintCount(0);
  223. constexpr U32 MAX_PRINT_COUNT = 10;
  224. CString message;
  225. if(!!(m_flags & CommandBufferFlag::SMALL_BATCH))
  226. {
  227. if(m_commandCount > COMMAND_BUFFER_SMALL_BATCH_MAX_COMMANDS * 4)
  228. {
  229. message = "Command buffer has too many commands%s: %u";
  230. }
  231. }
  232. else
  233. {
  234. if(m_commandCount <= COMMAND_BUFFER_SMALL_BATCH_MAX_COMMANDS / 4)
  235. {
  236. message = "Command buffer has too few commands%s: %u";
  237. }
  238. }
  239. if(!message.isEmpty())
  240. {
  241. const U32 count = messagePrintCount.fetchAdd(1) + 1;
  242. if(count < MAX_PRINT_COUNT)
  243. {
  244. ANKI_VK_LOGW(message.cstr(), "", m_commandCount);
  245. }
  246. else if(count == MAX_PRINT_COUNT)
  247. {
  248. ANKI_VK_LOGW(message.cstr(), " (will ignore further warnings)", m_commandCount);
  249. }
  250. }
  251. #endif
  252. }
  253. void CommandBufferImpl::generateMipmaps2dInternal(const TextureViewPtr& texView)
  254. {
  255. commandCommon();
  256. const TextureViewImpl& view = static_cast<const TextureViewImpl&>(*texView);
  257. const TextureImpl& tex = view.getTextureImpl();
  258. ANKI_ASSERT(tex.getTextureType() != TextureType::_3D && "Not for 3D");
  259. ANKI_ASSERT(tex.isSubresourceGoodForMipmapGeneration(view.getSubresource()));
  260. const U32 blitCount = tex.getMipmapCount() - 1u;
  261. if(blitCount == 0)
  262. {
  263. // Nothing to be done, flush the previous commands though because you may batch (and sort) things you shouldn't
  264. flushBatches(CommandBufferCommandType::ANY_OTHER_COMMAND);
  265. return;
  266. }
  267. const DepthStencilAspectBit aspect = view.getSubresource().m_depthStencilAspect;
  268. const U32 face = view.getSubresource().m_firstFace;
  269. const U32 layer = view.getSubresource().m_firstLayer;
  270. for(U32 i = 0; i < blitCount; ++i)
  271. {
  272. // Transition source
  273. if(i > 0)
  274. {
  275. VkImageSubresourceRange range;
  276. tex.computeVkImageSubresourceRange(TextureSubresourceInfo(TextureSurfaceInfo(i, 0, face, layer), aspect),
  277. range);
  278. setImageBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT,
  279. VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_PIPELINE_STAGE_TRANSFER_BIT,
  280. VK_ACCESS_TRANSFER_READ_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, tex.m_imageHandle,
  281. range);
  282. }
  283. // Transition destination
  284. {
  285. VkImageSubresourceRange range;
  286. tex.computeVkImageSubresourceRange(
  287. TextureSubresourceInfo(TextureSurfaceInfo(i + 1, 0, face, layer), aspect), range);
  288. setImageBarrier(VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, 0, VK_IMAGE_LAYOUT_UNDEFINED,
  289. VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT,
  290. VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, tex.m_imageHandle, range);
  291. }
  292. // Setup the blit struct
  293. I32 srcWidth = tex.getWidth() >> i;
  294. I32 srcHeight = tex.getHeight() >> i;
  295. I32 dstWidth = tex.getWidth() >> (i + 1);
  296. I32 dstHeight = tex.getHeight() >> (i + 1);
  297. ANKI_ASSERT(srcWidth > 0 && srcHeight > 0 && dstWidth > 0 && dstHeight > 0);
  298. U32 vkLayer = 0;
  299. switch(tex.getTextureType())
  300. {
  301. case TextureType::_2D:
  302. case TextureType::_2D_ARRAY:
  303. break;
  304. case TextureType::CUBE:
  305. vkLayer = face;
  306. break;
  307. case TextureType::CUBE_ARRAY:
  308. vkLayer = layer * 6 + face;
  309. break;
  310. default:
  311. ANKI_ASSERT(0);
  312. break;
  313. }
  314. VkImageBlit blit;
  315. blit.srcSubresource.aspectMask = convertImageAspect(aspect);
  316. blit.srcSubresource.baseArrayLayer = vkLayer;
  317. blit.srcSubresource.layerCount = 1;
  318. blit.srcSubresource.mipLevel = i;
  319. blit.srcOffsets[0] = {0, 0, 0};
  320. blit.srcOffsets[1] = {srcWidth, srcHeight, 1};
  321. blit.dstSubresource.aspectMask = convertImageAspect(aspect);
  322. blit.dstSubresource.baseArrayLayer = vkLayer;
  323. blit.dstSubresource.layerCount = 1;
  324. blit.dstSubresource.mipLevel = i + 1;
  325. blit.dstOffsets[0] = {0, 0, 0};
  326. blit.dstOffsets[1] = {dstWidth, dstHeight, 1};
  327. ANKI_CMD(vkCmdBlitImage(m_handle, tex.m_imageHandle, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, tex.m_imageHandle,
  328. VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &blit,
  329. (!!aspect) ? VK_FILTER_NEAREST : VK_FILTER_LINEAR),
  330. ANY_OTHER_COMMAND);
  331. }
  332. // Hold the reference
  333. m_microCmdb->pushObjectRef(texView);
  334. }
  335. void CommandBufferImpl::flushBarriers()
  336. {
  337. if(m_imgBarrierCount == 0 && m_buffBarrierCount == 0 && m_memBarrierCount == 0)
  338. {
  339. return;
  340. }
  341. // Sort
  342. //
  343. if(m_imgBarrierCount > 0)
  344. {
  345. std::sort(&m_imgBarriers[0], &m_imgBarriers[0] + m_imgBarrierCount,
  346. [](const VkImageMemoryBarrier& a, const VkImageMemoryBarrier& b) -> Bool {
  347. if(a.image != b.image)
  348. {
  349. return a.image < b.image;
  350. }
  351. if(a.subresourceRange.aspectMask != b.subresourceRange.aspectMask)
  352. {
  353. return a.subresourceRange.aspectMask < b.subresourceRange.aspectMask;
  354. }
  355. if(a.oldLayout != b.oldLayout)
  356. {
  357. return a.oldLayout < b.oldLayout;
  358. }
  359. if(a.newLayout != b.newLayout)
  360. {
  361. return a.newLayout < b.newLayout;
  362. }
  363. if(a.subresourceRange.baseArrayLayer != b.subresourceRange.baseArrayLayer)
  364. {
  365. return a.subresourceRange.baseArrayLayer < b.subresourceRange.baseArrayLayer;
  366. }
  367. if(a.subresourceRange.baseMipLevel != b.subresourceRange.baseMipLevel)
  368. {
  369. return a.subresourceRange.baseMipLevel < b.subresourceRange.baseMipLevel;
  370. }
  371. return false;
  372. });
  373. }
  374. // Batch
  375. //
  376. DynamicArrayAuto<VkImageMemoryBarrier> finalImgBarriers(m_alloc);
  377. U32 finalImgBarrierCount = 0;
  378. if(m_imgBarrierCount > 0)
  379. {
  380. DynamicArrayAuto<VkImageMemoryBarrier> squashedBarriers(m_alloc);
  381. U32 squashedBarrierCount = 0;
  382. squashedBarriers.create(m_imgBarrierCount);
  383. // Squash the mips by reducing the barriers
  384. for(U32 i = 0; i < m_imgBarrierCount; ++i)
  385. {
  386. const VkImageMemoryBarrier* prev = (i > 0) ? &m_imgBarriers[i - 1] : nullptr;
  387. const VkImageMemoryBarrier& crnt = m_imgBarriers[i];
  388. if(prev && prev->image == crnt.image
  389. && prev->subresourceRange.aspectMask == crnt.subresourceRange.aspectMask
  390. && prev->oldLayout == crnt.oldLayout && prev->newLayout == crnt.newLayout
  391. && prev->srcAccessMask == crnt.srcAccessMask && prev->dstAccessMask == crnt.dstAccessMask
  392. && prev->subresourceRange.baseMipLevel + prev->subresourceRange.levelCount
  393. == crnt.subresourceRange.baseMipLevel
  394. && prev->subresourceRange.baseArrayLayer == crnt.subresourceRange.baseArrayLayer
  395. && prev->subresourceRange.layerCount == crnt.subresourceRange.layerCount)
  396. {
  397. // Can batch
  398. squashedBarriers[squashedBarrierCount - 1].subresourceRange.levelCount +=
  399. crnt.subresourceRange.levelCount;
  400. }
  401. else
  402. {
  403. // Can't batch, create new barrier
  404. squashedBarriers[squashedBarrierCount++] = crnt;
  405. }
  406. }
  407. ANKI_ASSERT(squashedBarrierCount);
  408. // Squash the layers
  409. finalImgBarriers.create(squashedBarrierCount);
  410. for(U32 i = 0; i < squashedBarrierCount; ++i)
  411. {
  412. const VkImageMemoryBarrier* prev = (i > 0) ? &squashedBarriers[i - 1] : nullptr;
  413. const VkImageMemoryBarrier& crnt = squashedBarriers[i];
  414. if(prev && prev->image == crnt.image
  415. && prev->subresourceRange.aspectMask == crnt.subresourceRange.aspectMask
  416. && prev->oldLayout == crnt.oldLayout && prev->newLayout == crnt.newLayout
  417. && prev->srcAccessMask == crnt.srcAccessMask && prev->dstAccessMask == crnt.dstAccessMask
  418. && prev->subresourceRange.baseMipLevel == crnt.subresourceRange.baseMipLevel
  419. && prev->subresourceRange.levelCount == crnt.subresourceRange.levelCount
  420. && prev->subresourceRange.baseArrayLayer + prev->subresourceRange.layerCount
  421. == crnt.subresourceRange.baseArrayLayer)
  422. {
  423. // Can batch
  424. finalImgBarriers[finalImgBarrierCount - 1].subresourceRange.layerCount +=
  425. crnt.subresourceRange.layerCount;
  426. }
  427. else
  428. {
  429. // Can't batch, create new barrier
  430. finalImgBarriers[finalImgBarrierCount++] = crnt;
  431. }
  432. }
  433. ANKI_ASSERT(finalImgBarrierCount);
  434. }
  435. // Finish the job
  436. //
  437. vkCmdPipelineBarrier(m_handle, m_srcStageMask, m_dstStageMask, 0, m_memBarrierCount,
  438. (m_memBarrierCount) ? &m_memBarriers[0] : nullptr, m_buffBarrierCount,
  439. (m_buffBarrierCount) ? &m_buffBarriers[0] : nullptr, finalImgBarrierCount,
  440. (finalImgBarrierCount) ? &finalImgBarriers[0] : nullptr);
  441. ANKI_TRACE_INC_COUNTER(VK_PIPELINE_BARRIERS, 1);
  442. m_imgBarrierCount = 0;
  443. m_buffBarrierCount = 0;
  444. m_memBarrierCount = 0;
  445. m_srcStageMask = 0;
  446. m_dstStageMask = 0;
  447. }
  448. void CommandBufferImpl::flushQueryResets()
  449. {
  450. if(m_queryResetAtoms.getSize() == 0)
  451. {
  452. return;
  453. }
  454. std::sort(m_queryResetAtoms.getBegin(), m_queryResetAtoms.getEnd(),
  455. [](const QueryResetAtom& a, const QueryResetAtom& b) -> Bool {
  456. if(a.m_pool != b.m_pool)
  457. {
  458. return a.m_pool < b.m_pool;
  459. }
  460. ANKI_ASSERT(a.m_queryIdx != b.m_queryIdx && "Tried to reset the same query more than once");
  461. return a.m_queryIdx < b.m_queryIdx;
  462. });
  463. U32 firstQuery = m_queryResetAtoms[0].m_queryIdx;
  464. U32 queryCount = 1;
  465. VkQueryPool pool = m_queryResetAtoms[0].m_pool;
  466. for(U32 i = 1; i < m_queryResetAtoms.getSize(); ++i)
  467. {
  468. const QueryResetAtom& crnt = m_queryResetAtoms[i];
  469. const QueryResetAtom& prev = m_queryResetAtoms[i - 1];
  470. if(crnt.m_pool == prev.m_pool && crnt.m_queryIdx == prev.m_queryIdx + 1)
  471. {
  472. // Can batch
  473. ++queryCount;
  474. }
  475. else
  476. {
  477. // Flush batch
  478. vkCmdResetQueryPool(m_handle, pool, firstQuery, queryCount);
  479. // New batch
  480. firstQuery = crnt.m_queryIdx;
  481. queryCount = 1;
  482. pool = crnt.m_pool;
  483. }
  484. }
  485. vkCmdResetQueryPool(m_handle, pool, firstQuery, queryCount);
  486. m_queryResetAtoms.destroy(m_alloc);
  487. }
  488. void CommandBufferImpl::flushWriteQueryResults()
  489. {
  490. if(m_writeQueryAtoms.getSize() == 0)
  491. {
  492. return;
  493. }
  494. std::sort(&m_writeQueryAtoms[0], &m_writeQueryAtoms[0] + m_writeQueryAtoms.getSize(),
  495. [](const WriteQueryAtom& a, const WriteQueryAtom& b) -> Bool {
  496. if(a.m_pool != b.m_pool)
  497. {
  498. return a.m_pool < b.m_pool;
  499. }
  500. if(a.m_buffer != b.m_buffer)
  501. {
  502. return a.m_buffer < b.m_buffer;
  503. }
  504. if(a.m_offset != b.m_offset)
  505. {
  506. return a.m_offset < b.m_offset;
  507. }
  508. ANKI_ASSERT(a.m_queryIdx != b.m_queryIdx && "Tried to write the same query more than once");
  509. return a.m_queryIdx < b.m_queryIdx;
  510. });
  511. U32 firstQuery = m_writeQueryAtoms[0].m_queryIdx;
  512. U32 queryCount = 1;
  513. VkQueryPool pool = m_writeQueryAtoms[0].m_pool;
  514. PtrSize offset = m_writeQueryAtoms[0].m_offset;
  515. VkBuffer buff = m_writeQueryAtoms[0].m_buffer;
  516. for(U32 i = 1; i < m_writeQueryAtoms.getSize(); ++i)
  517. {
  518. const WriteQueryAtom& crnt = m_writeQueryAtoms[i];
  519. const WriteQueryAtom& prev = m_writeQueryAtoms[i - 1];
  520. if(crnt.m_pool == prev.m_pool && crnt.m_buffer == prev.m_buffer && prev.m_queryIdx + 1 == crnt.m_queryIdx
  521. && prev.m_offset + sizeof(U32) == crnt.m_offset)
  522. {
  523. // Can batch
  524. ++queryCount;
  525. }
  526. else
  527. {
  528. // Flush batch
  529. vkCmdCopyQueryPoolResults(m_handle, pool, firstQuery, queryCount, buff, offset, sizeof(U32),
  530. VK_QUERY_RESULT_PARTIAL_BIT);
  531. // New batch
  532. firstQuery = crnt.m_queryIdx;
  533. queryCount = 1;
  534. pool = crnt.m_pool;
  535. buff = crnt.m_buffer;
  536. }
  537. }
  538. vkCmdCopyQueryPoolResults(m_handle, pool, firstQuery, queryCount, buff, offset, sizeof(U32),
  539. VK_QUERY_RESULT_PARTIAL_BIT);
  540. m_writeQueryAtoms.resize(m_alloc, 0);
  541. }
  542. void CommandBufferImpl::copyBufferToTextureViewInternal(const BufferPtr& buff, PtrSize offset,
  543. [[maybe_unused]] PtrSize range, const TextureViewPtr& texView)
  544. {
  545. commandCommon();
  546. const TextureViewImpl& view = static_cast<const TextureViewImpl&>(*texView);
  547. const TextureImpl& tex = view.getTextureImpl();
  548. ANKI_ASSERT(tex.usageValid(TextureUsageBit::TRANSFER_DESTINATION));
  549. ANKI_ASSERT(tex.isSubresourceGoodForCopyFromBuffer(view.getSubresource()));
  550. const VkImageLayout layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
  551. const Bool is3D = tex.getTextureType() == TextureType::_3D;
  552. const VkImageAspectFlags aspect = convertImageAspect(view.getSubresource().m_depthStencilAspect);
  553. const TextureSurfaceInfo surf(view.getSubresource().m_firstMipmap, view.getSubresource().m_firstFace, 0,
  554. view.getSubresource().m_firstLayer);
  555. const TextureVolumeInfo vol(view.getSubresource().m_firstMipmap);
  556. // Compute the sizes of the mip
  557. const U32 width = tex.getWidth() >> surf.m_level;
  558. const U32 height = tex.getHeight() >> surf.m_level;
  559. ANKI_ASSERT(width && height);
  560. const U32 depth = (is3D) ? (tex.getDepth() >> surf.m_level) : 1u;
  561. if(!is3D)
  562. {
  563. ANKI_ASSERT(range == computeSurfaceSize(width, height, tex.getFormat()));
  564. }
  565. else
  566. {
  567. ANKI_ASSERT(range == computeVolumeSize(width, height, depth, tex.getFormat()));
  568. }
  569. // Copy
  570. VkBufferImageCopy region;
  571. region.imageSubresource.aspectMask = aspect;
  572. region.imageSubresource.baseArrayLayer = (is3D) ? tex.computeVkArrayLayer(vol) : tex.computeVkArrayLayer(surf);
  573. region.imageSubresource.layerCount = 1;
  574. region.imageSubresource.mipLevel = surf.m_level;
  575. region.imageOffset = {0, 0, 0};
  576. region.imageExtent.width = width;
  577. region.imageExtent.height = height;
  578. region.imageExtent.depth = depth;
  579. region.bufferOffset = offset;
  580. region.bufferImageHeight = 0;
  581. region.bufferRowLength = 0;
  582. ANKI_CMD(vkCmdCopyBufferToImage(m_handle, static_cast<const BufferImpl&>(*buff).getHandle(), tex.m_imageHandle,
  583. layout, 1, &region),
  584. ANY_OTHER_COMMAND);
  585. m_microCmdb->pushObjectRef(texView);
  586. m_microCmdb->pushObjectRef(buff);
  587. }
  588. void CommandBufferImpl::rebindDynamicState()
  589. {
  590. m_viewportDirty = true;
  591. m_lastViewport = {};
  592. m_scissorDirty = true;
  593. m_lastScissor = {};
  594. m_vrsRateDirty = true;
  595. m_vrsRate = VrsRate::_1x1;
  596. // Rebind the stencil compare mask
  597. if(m_stencilCompareMasks[0] == m_stencilCompareMasks[1])
  598. {
  599. ANKI_CMD(vkCmdSetStencilCompareMask(m_handle, VK_STENCIL_FACE_FRONT_BIT | VK_STENCIL_FACE_BACK_BIT,
  600. m_stencilCompareMasks[0]),
  601. ANY_OTHER_COMMAND);
  602. }
  603. else
  604. {
  605. ANKI_CMD(vkCmdSetStencilCompareMask(m_handle, VK_STENCIL_FACE_FRONT_BIT, m_stencilCompareMasks[0]),
  606. ANY_OTHER_COMMAND);
  607. ANKI_CMD(vkCmdSetStencilCompareMask(m_handle, VK_STENCIL_FACE_BACK_BIT, m_stencilCompareMasks[1]),
  608. ANY_OTHER_COMMAND);
  609. }
  610. // Rebind the stencil write mask
  611. if(m_stencilWriteMasks[0] == m_stencilWriteMasks[1])
  612. {
  613. ANKI_CMD(vkCmdSetStencilWriteMask(m_handle, VK_STENCIL_FACE_FRONT_BIT | VK_STENCIL_FACE_BACK_BIT,
  614. m_stencilWriteMasks[0]),
  615. ANY_OTHER_COMMAND);
  616. }
  617. else
  618. {
  619. ANKI_CMD(vkCmdSetStencilWriteMask(m_handle, VK_STENCIL_FACE_FRONT_BIT, m_stencilWriteMasks[0]),
  620. ANY_OTHER_COMMAND);
  621. ANKI_CMD(vkCmdSetStencilWriteMask(m_handle, VK_STENCIL_FACE_BACK_BIT, m_stencilWriteMasks[1]),
  622. ANY_OTHER_COMMAND);
  623. }
  624. // Rebind the stencil reference
  625. if(m_stencilReferenceMasks[0] == m_stencilReferenceMasks[1])
  626. {
  627. ANKI_CMD(vkCmdSetStencilReference(m_handle, VK_STENCIL_FACE_FRONT_BIT | VK_STENCIL_FACE_BACK_BIT,
  628. m_stencilReferenceMasks[0]),
  629. ANY_OTHER_COMMAND);
  630. }
  631. else
  632. {
  633. ANKI_CMD(vkCmdSetStencilReference(m_handle, VK_STENCIL_FACE_FRONT_BIT, m_stencilReferenceMasks[0]),
  634. ANY_OTHER_COMMAND);
  635. ANKI_CMD(vkCmdSetStencilReference(m_handle, VK_STENCIL_FACE_BACK_BIT, m_stencilReferenceMasks[1]),
  636. ANY_OTHER_COMMAND);
  637. }
  638. }
  639. void CommandBufferImpl::buildAccelerationStructureInternal(const AccelerationStructurePtr& as)
  640. {
  641. commandCommon();
  642. // Get objects
  643. const AccelerationStructureImpl& asImpl = static_cast<AccelerationStructureImpl&>(*as);
  644. // Create the scrach buffer
  645. BufferInitInfo bufferInit;
  646. bufferInit.m_usage = PrivateBufferUsageBit::ACCELERATION_STRUCTURE_BUILD_SCRATCH;
  647. bufferInit.m_size = asImpl.getBuildScratchBufferSize();
  648. BufferPtr scratchBuff = getManager().newBuffer(bufferInit);
  649. // Create the build info
  650. VkAccelerationStructureBuildGeometryInfoKHR buildInfo;
  651. VkAccelerationStructureBuildRangeInfoKHR rangeInfo;
  652. asImpl.generateBuildInfo(scratchBuff->getGpuAddress(), buildInfo, rangeInfo);
  653. // Do the command
  654. Array<const VkAccelerationStructureBuildRangeInfoKHR*, 1> pRangeInfos = {&rangeInfo};
  655. ANKI_CMD(vkCmdBuildAccelerationStructuresKHR(m_handle, 1, &buildInfo, &pRangeInfos[0]), ANY_OTHER_COMMAND);
  656. // Push refs
  657. m_microCmdb->pushObjectRef(as);
  658. m_microCmdb->pushObjectRef(scratchBuff);
  659. }
  660. #if ANKI_DLSS
  661. /// Utility function to get the NGX's resource structure for a texture
  662. /// @param[in] tex the texture to generate the NVSDK_NGX_Resource_VK from
  663. static NVSDK_NGX_Resource_VK getNGXResourceFromAnkiTexture(const TextureViewImpl& view)
  664. {
  665. const TextureImpl& tex = view.getTextureImpl();
  666. const VkImageView imageView = view.getHandle();
  667. const VkFormat format = tex.m_vkFormat;
  668. const VkImage image = tex.m_imageHandle;
  669. const VkImageSubresourceRange subresourceRange = view.getVkImageSubresourceRange();
  670. const Bool isUAV = !!(tex.m_vkUsageFlags & VK_IMAGE_USAGE_STORAGE_BIT);
  671. // TODO Not sure if I should pass the width,height of the image or the view
  672. return NVSDK_NGX_Create_ImageView_Resource_VK(imageView, image, subresourceRange, format, tex.getWidth(),
  673. tex.getHeight(), isUAV);
  674. }
  675. #endif
  676. void CommandBufferImpl::upscaleInternal(const GrUpscalerPtr& upscaler, const TextureViewPtr& inColor,
  677. const TextureViewPtr& outUpscaledColor, const TextureViewPtr& motionVectors,
  678. const TextureViewPtr& depth, const TextureViewPtr& exposure,
  679. const Bool resetAccumulation, const Vec2& jitterOffset,
  680. const Vec2& motionVectorsScale)
  681. {
  682. #if ANKI_DLSS
  683. ANKI_ASSERT(getGrManagerImpl().getDeviceCapabilities().m_dlss);
  684. ANKI_ASSERT(upscaler->getUpscalerType() == GrUpscalerType::DLSS_2);
  685. commandCommon();
  686. flushBatches(CommandBufferCommandType::ANY_OTHER_COMMAND);
  687. const GrUpscalerImpl& upscalerImpl = static_cast<const GrUpscalerImpl&>(*upscaler);
  688. const TextureViewImpl& srcViewImpl = static_cast<const TextureViewImpl&>(*inColor);
  689. const TextureViewImpl& dstViewImpl = static_cast<const TextureViewImpl&>(*outUpscaledColor);
  690. const TextureViewImpl& mvViewImpl = static_cast<const TextureViewImpl&>(*motionVectors);
  691. const TextureViewImpl& depthViewImpl = static_cast<const TextureViewImpl&>(*depth);
  692. const TextureViewImpl& exposureViewImpl = static_cast<const TextureViewImpl&>(*exposure);
  693. NVSDK_NGX_Resource_VK srcResVk = getNGXResourceFromAnkiTexture(srcViewImpl);
  694. NVSDK_NGX_Resource_VK dstResVk = getNGXResourceFromAnkiTexture(dstViewImpl);
  695. NVSDK_NGX_Resource_VK mvResVk = getNGXResourceFromAnkiTexture(mvViewImpl);
  696. NVSDK_NGX_Resource_VK depthResVk = getNGXResourceFromAnkiTexture(depthViewImpl);
  697. NVSDK_NGX_Resource_VK exposureResVk = getNGXResourceFromAnkiTexture(exposureViewImpl);
  698. const U32 mipLevel = srcViewImpl.getSubresource().m_firstMipmap;
  699. const NVSDK_NGX_Coordinates renderingOffset = {0, 0};
  700. const NVSDK_NGX_Dimensions renderingSize = {srcViewImpl.getTextureImpl().getWidth() >> mipLevel,
  701. srcViewImpl.getTextureImpl().getHeight() >> mipLevel};
  702. NVSDK_NGX_VK_DLSS_Eval_Params vkDlssEvalParams;
  703. memset(&vkDlssEvalParams, 0, sizeof(vkDlssEvalParams));
  704. vkDlssEvalParams.Feature.pInColor = &srcResVk;
  705. vkDlssEvalParams.Feature.pInOutput = &dstResVk;
  706. vkDlssEvalParams.pInDepth = &depthResVk;
  707. vkDlssEvalParams.pInMotionVectors = &mvResVk;
  708. vkDlssEvalParams.pInExposureTexture = &exposureResVk;
  709. vkDlssEvalParams.InJitterOffsetX = jitterOffset.x();
  710. vkDlssEvalParams.InJitterOffsetY = jitterOffset.y();
  711. vkDlssEvalParams.InReset = resetAccumulation;
  712. vkDlssEvalParams.InMVScaleX = motionVectorsScale.x();
  713. vkDlssEvalParams.InMVScaleY = motionVectorsScale.y();
  714. vkDlssEvalParams.InColorSubrectBase = renderingOffset;
  715. vkDlssEvalParams.InDepthSubrectBase = renderingOffset;
  716. vkDlssEvalParams.InTranslucencySubrectBase = renderingOffset;
  717. vkDlssEvalParams.InMVSubrectBase = renderingOffset;
  718. vkDlssEvalParams.InRenderSubrectDimensions = renderingSize;
  719. getGrManagerImpl().beginMarker(m_handle, "DLSS");
  720. NVSDK_NGX_Parameter* dlssParameters = &upscalerImpl.getParameters();
  721. NVSDK_NGX_Handle* dlssFeature = &upscalerImpl.getFeature();
  722. const NVSDK_NGX_Result result =
  723. NGX_VULKAN_EVALUATE_DLSS_EXT(m_handle, dlssFeature, dlssParameters, &vkDlssEvalParams);
  724. getGrManagerImpl().endMarker(m_handle);
  725. if(NVSDK_NGX_FAILED(result))
  726. {
  727. ANKI_VK_LOGF("Failed to NVSDK_NGX_VULKAN_EvaluateFeature for DLSS, code = 0x%08x, info: %ls", result,
  728. GetNGXResultAsString(result));
  729. }
  730. #else
  731. ANKI_ASSERT(0 && "Not supported");
  732. (void)upscaler;
  733. (void)inColor;
  734. (void)outUpscaledColor;
  735. (void)motionVectors;
  736. (void)depth;
  737. (void)exposure;
  738. (void)resetAccumulation;
  739. (void)jitterOffset;
  740. (void)motionVectorsScale;
  741. #endif
  742. }
  743. } // end namespace anki