CommandBufferImpl.cpp 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828
  1. // Copyright (C) 2009-2021, Panagiotis Christopoulos Charitos and contributors.
  2. // All rights reserved.
  3. // Code licensed under the BSD License.
  4. // http://www.anki3d.org/LICENSE
  5. #include <AnKi/Gr/Vulkan/CommandBufferImpl.h>
  6. #include <AnKi/Gr/GrManager.h>
  7. #include <AnKi/Gr/Vulkan/GrManagerImpl.h>
  8. #include <AnKi/Gr/Framebuffer.h>
  9. #include <AnKi/Gr/Vulkan/FramebufferImpl.h>
  10. #include <AnKi/Gr/Vulkan/AccelerationStructureImpl.h>
  11. #include <algorithm>
  12. namespace anki
  13. {
  14. CommandBufferImpl::~CommandBufferImpl()
  15. {
  16. if(m_empty)
  17. {
  18. ANKI_VK_LOGW("Command buffer was empty");
  19. }
  20. if(!m_finalized)
  21. {
  22. ANKI_VK_LOGW("Command buffer was not flushed");
  23. }
  24. m_imgBarriers.destroy(m_alloc);
  25. m_buffBarriers.destroy(m_alloc);
  26. m_memBarriers.destroy(m_alloc);
  27. m_queryResetAtoms.destroy(m_alloc);
  28. m_writeQueryAtoms.destroy(m_alloc);
  29. m_secondLevelAtoms.destroy(m_alloc);
  30. }
  31. Error CommandBufferImpl::init(const CommandBufferInitInfo& init)
  32. {
  33. m_tid = Thread::getCurrentThreadId();
  34. m_flags = init.m_flags;
  35. ANKI_CHECK(getGrManagerImpl().getCommandBufferFactory().newCommandBuffer(m_tid, m_flags, m_microCmdb));
  36. m_handle = m_microCmdb->getHandle();
  37. m_alloc = m_microCmdb->getFastAllocator();
  38. // Store some of the init info for later
  39. if(!!(m_flags & CommandBufferFlag::SECOND_LEVEL))
  40. {
  41. m_activeFb = init.m_framebuffer;
  42. m_colorAttachmentUsages = init.m_colorAttachmentUsages;
  43. m_depthStencilAttachmentUsage = init.m_depthStencilAttachmentUsage;
  44. m_state.beginRenderPass(static_cast<FramebufferImpl*>(m_activeFb.get()));
  45. m_microCmdb->pushObjectRef(m_activeFb);
  46. }
  47. for(DescriptorSetState& state : m_dsetState)
  48. {
  49. state.init(m_alloc);
  50. }
  51. return Error::NONE;
  52. }
  53. void CommandBufferImpl::beginRecording()
  54. {
  55. // Do the begin
  56. VkCommandBufferInheritanceInfo inheritance = {};
  57. inheritance.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_INFO;
  58. VkCommandBufferBeginInfo begin = {};
  59. begin.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
  60. begin.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
  61. begin.pInheritanceInfo = &inheritance;
  62. if(!!(m_flags & CommandBufferFlag::SECOND_LEVEL))
  63. {
  64. FramebufferImpl& impl = static_cast<FramebufferImpl&>(*m_activeFb);
  65. // Calc the layouts
  66. Array<VkImageLayout, MAX_COLOR_ATTACHMENTS> colAttLayouts;
  67. for(U i = 0; i < impl.getColorAttachmentCount(); ++i)
  68. {
  69. const TextureViewImpl& view = static_cast<const TextureViewImpl&>(*impl.getColorAttachment(i));
  70. colAttLayouts[i] = view.getTextureImpl().computeLayout(m_colorAttachmentUsages[i], 0);
  71. }
  72. VkImageLayout dsAttLayout = VK_IMAGE_LAYOUT_MAX_ENUM;
  73. if(impl.hasDepthStencil())
  74. {
  75. const TextureViewImpl& view = static_cast<const TextureViewImpl&>(*impl.getDepthStencilAttachment());
  76. dsAttLayout = view.getTextureImpl().computeLayout(m_depthStencilAttachmentUsage, 0);
  77. }
  78. inheritance.renderPass = impl.getRenderPassHandle(colAttLayouts, dsAttLayout);
  79. inheritance.subpass = 0;
  80. inheritance.framebuffer = impl.getFramebufferHandle();
  81. begin.flags |= VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT;
  82. }
  83. vkBeginCommandBuffer(m_handle, &begin);
  84. }
  85. void CommandBufferImpl::beginRenderPass(FramebufferPtr fb,
  86. const Array<TextureUsageBit, MAX_COLOR_ATTACHMENTS>& colorAttachmentUsages,
  87. TextureUsageBit depthStencilAttachmentUsage, U32 minx, U32 miny, U32 width,
  88. U32 height)
  89. {
  90. commandCommon();
  91. ANKI_ASSERT(!insideRenderPass());
  92. m_rpCommandCount = 0;
  93. m_activeFb = fb;
  94. FramebufferImpl& fbimpl = static_cast<FramebufferImpl&>(*fb);
  95. U32 fbWidth, fbHeight;
  96. fbimpl.getAttachmentsSize(fbWidth, fbHeight);
  97. m_fbSize[0] = fbWidth;
  98. m_fbSize[1] = fbHeight;
  99. ANKI_ASSERT(minx < fbWidth && miny < fbHeight);
  100. const U32 maxx = min<U32>(minx + width, fbWidth);
  101. const U32 maxy = min<U32>(miny + height, fbHeight);
  102. width = maxx - minx;
  103. height = maxy - miny;
  104. ANKI_ASSERT(minx + width <= fbWidth && miny + height <= fbHeight);
  105. m_renderArea[0] = minx;
  106. m_renderArea[1] = miny;
  107. m_renderArea[2] = width;
  108. m_renderArea[3] = height;
  109. m_colorAttachmentUsages = colorAttachmentUsages;
  110. m_depthStencilAttachmentUsage = depthStencilAttachmentUsage;
  111. m_microCmdb->pushObjectRef(fb);
  112. m_subpassContents = VK_SUBPASS_CONTENTS_MAX_ENUM;
  113. // Re-set the viewport and scissor because sometimes they are set clamped
  114. m_viewportDirty = true;
  115. m_scissorDirty = true;
  116. }
  117. void CommandBufferImpl::beginRenderPassInternal()
  118. {
  119. FramebufferImpl& impl = static_cast<FramebufferImpl&>(*m_activeFb);
  120. m_state.beginRenderPass(&impl);
  121. VkRenderPassBeginInfo bi = {};
  122. bi.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO;
  123. bi.clearValueCount = impl.getAttachmentCount();
  124. bi.pClearValues = impl.getClearValues();
  125. bi.framebuffer = impl.getFramebufferHandle();
  126. // Calc the layouts
  127. Array<VkImageLayout, MAX_COLOR_ATTACHMENTS> colAttLayouts;
  128. for(U i = 0; i < impl.getColorAttachmentCount(); ++i)
  129. {
  130. const TextureViewImpl& view = static_cast<const TextureViewImpl&>(*impl.getColorAttachment(i));
  131. colAttLayouts[i] = view.getTextureImpl().computeLayout(m_colorAttachmentUsages[i], 0);
  132. }
  133. VkImageLayout dsAttLayout = VK_IMAGE_LAYOUT_MAX_ENUM;
  134. if(impl.hasDepthStencil())
  135. {
  136. const TextureViewImpl& view = static_cast<const TextureViewImpl&>(*impl.getDepthStencilAttachment());
  137. dsAttLayout = view.getTextureImpl().computeLayout(m_depthStencilAttachmentUsage, 0);
  138. }
  139. bi.renderPass = impl.getRenderPassHandle(colAttLayouts, dsAttLayout);
  140. const Bool flipvp = flipViewport();
  141. bi.renderArea.offset.x = m_renderArea[0];
  142. if(flipvp)
  143. {
  144. ANKI_ASSERT(m_renderArea[3] <= m_fbSize[1]);
  145. }
  146. bi.renderArea.offset.y = (flipvp) ? m_fbSize[1] - (m_renderArea[1] + m_renderArea[3]) : m_renderArea[1];
  147. bi.renderArea.extent.width = m_renderArea[2];
  148. bi.renderArea.extent.height = m_renderArea[3];
  149. getGrManagerImpl().beginMarker(m_handle, impl.getName());
  150. ANKI_CMD(vkCmdBeginRenderPass(m_handle, &bi, m_subpassContents), ANY_OTHER_COMMAND);
  151. if(impl.hasPresentableTexture())
  152. {
  153. m_renderedToDefaultFb = true;
  154. }
  155. }
  156. void CommandBufferImpl::endRenderPass()
  157. {
  158. commandCommon();
  159. ANKI_ASSERT(insideRenderPass());
  160. if(m_rpCommandCount == 0)
  161. {
  162. m_subpassContents = VK_SUBPASS_CONTENTS_INLINE;
  163. beginRenderPassInternal();
  164. }
  165. ANKI_CMD(vkCmdEndRenderPass(m_handle), ANY_OTHER_COMMAND);
  166. getGrManagerImpl().endMarker(m_handle);
  167. m_activeFb.reset(nullptr);
  168. m_state.endRenderPass();
  169. // After pushing second level command buffers the state is undefined. Reset the tracker and rebind the dynamic state
  170. if(m_subpassContents == VK_SUBPASS_CONTENTS_SECONDARY_COMMAND_BUFFERS)
  171. {
  172. m_state.reset();
  173. rebindDynamicState();
  174. }
  175. }
  176. void CommandBufferImpl::endRecording()
  177. {
  178. commandCommon();
  179. ANKI_ASSERT(!m_finalized);
  180. ANKI_ASSERT(!m_empty);
  181. ANKI_CMD(ANKI_VK_CHECKF(vkEndCommandBuffer(m_handle)), ANY_OTHER_COMMAND);
  182. m_finalized = true;
  183. #if ANKI_EXTRA_CHECKS
  184. static Atomic<U32> messagePrintCount(0);
  185. constexpr U32 MAX_PRINT_COUNT = 10;
  186. CString message;
  187. if(!!(m_flags & CommandBufferFlag::SMALL_BATCH))
  188. {
  189. if(m_commandCount > COMMAND_BUFFER_SMALL_BATCH_MAX_COMMANDS * 4)
  190. {
  191. message = "Command buffer has too many commands%s: %u";
  192. }
  193. }
  194. else
  195. {
  196. if(m_commandCount <= COMMAND_BUFFER_SMALL_BATCH_MAX_COMMANDS / 4)
  197. {
  198. message = "Command buffer has too few commands%s: %u";
  199. }
  200. }
  201. if(!message.isEmpty())
  202. {
  203. const U32 count = messagePrintCount.fetchAdd(1) + 1;
  204. if(count < MAX_PRINT_COUNT)
  205. {
  206. ANKI_VK_LOGW(message.cstr(), "", m_commandCount);
  207. }
  208. else if(count == MAX_PRINT_COUNT)
  209. {
  210. ANKI_VK_LOGW(message.cstr(), " (will ignore further warnings)", m_commandCount);
  211. }
  212. }
  213. #endif
  214. }
  215. void CommandBufferImpl::generateMipmaps2d(TextureViewPtr texView)
  216. {
  217. commandCommon();
  218. const TextureViewImpl& view = static_cast<const TextureViewImpl&>(*texView);
  219. const TextureImpl& tex = view.getTextureImpl();
  220. ANKI_ASSERT(tex.getTextureType() != TextureType::_3D && "Not for 3D");
  221. ANKI_ASSERT(tex.isSubresourceGoodForMipmapGeneration(view.getSubresource()));
  222. const U32 blitCount = tex.getMipmapCount() - 1u;
  223. if(blitCount == 0)
  224. {
  225. // Nothing to be done, flush the previous commands though because you may batch (and sort) things you shouldn't
  226. flushBatches(CommandBufferCommandType::ANY_OTHER_COMMAND);
  227. return;
  228. }
  229. const DepthStencilAspectBit aspect = view.getSubresource().m_depthStencilAspect;
  230. const U32 face = view.getSubresource().m_firstFace;
  231. const U32 layer = view.getSubresource().m_firstLayer;
  232. for(U32 i = 0; i < blitCount; ++i)
  233. {
  234. // Transition source
  235. if(i > 0)
  236. {
  237. VkImageSubresourceRange range;
  238. tex.computeVkImageSubresourceRange(TextureSubresourceInfo(TextureSurfaceInfo(i, 0, face, layer), aspect),
  239. range);
  240. setImageBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT,
  241. VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_PIPELINE_STAGE_TRANSFER_BIT,
  242. VK_ACCESS_TRANSFER_READ_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, tex.m_imageHandle,
  243. range);
  244. }
  245. // Transition destination
  246. {
  247. VkImageSubresourceRange range;
  248. tex.computeVkImageSubresourceRange(
  249. TextureSubresourceInfo(TextureSurfaceInfo(i + 1, 0, face, layer), aspect), range);
  250. setImageBarrier(VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, 0, VK_IMAGE_LAYOUT_UNDEFINED,
  251. VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT,
  252. VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, tex.m_imageHandle, range);
  253. }
  254. // Setup the blit struct
  255. I32 srcWidth = tex.getWidth() >> i;
  256. I32 srcHeight = tex.getHeight() >> i;
  257. I32 dstWidth = tex.getWidth() >> (i + 1);
  258. I32 dstHeight = tex.getHeight() >> (i + 1);
  259. ANKI_ASSERT(srcWidth > 0 && srcHeight > 0 && dstWidth > 0 && dstHeight > 0);
  260. U32 vkLayer = 0;
  261. switch(tex.getTextureType())
  262. {
  263. case TextureType::_2D:
  264. case TextureType::_2D_ARRAY:
  265. break;
  266. case TextureType::CUBE:
  267. vkLayer = face;
  268. break;
  269. case TextureType::CUBE_ARRAY:
  270. vkLayer = layer * 6 + face;
  271. break;
  272. default:
  273. ANKI_ASSERT(0);
  274. break;
  275. }
  276. VkImageBlit blit;
  277. blit.srcSubresource.aspectMask = convertImageAspect(aspect);
  278. blit.srcSubresource.baseArrayLayer = vkLayer;
  279. blit.srcSubresource.layerCount = 1;
  280. blit.srcSubresource.mipLevel = i;
  281. blit.srcOffsets[0] = {0, 0, 0};
  282. blit.srcOffsets[1] = {srcWidth, srcHeight, 1};
  283. blit.dstSubresource.aspectMask = convertImageAspect(aspect);
  284. blit.dstSubresource.baseArrayLayer = vkLayer;
  285. blit.dstSubresource.layerCount = 1;
  286. blit.dstSubresource.mipLevel = i + 1;
  287. blit.dstOffsets[0] = {0, 0, 0};
  288. blit.dstOffsets[1] = {dstWidth, dstHeight, 1};
  289. ANKI_CMD(vkCmdBlitImage(m_handle, tex.m_imageHandle, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, tex.m_imageHandle,
  290. VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &blit,
  291. (!!aspect) ? VK_FILTER_NEAREST : VK_FILTER_LINEAR),
  292. ANY_OTHER_COMMAND);
  293. }
  294. // Hold the reference
  295. m_microCmdb->pushObjectRef(texView);
  296. }
  297. void CommandBufferImpl::flushBarriers()
  298. {
  299. if(m_imgBarrierCount == 0 && m_buffBarrierCount == 0 && m_memBarrierCount == 0)
  300. {
  301. return;
  302. }
  303. // Sort
  304. //
  305. if(m_imgBarrierCount > 0)
  306. {
  307. std::sort(&m_imgBarriers[0], &m_imgBarriers[0] + m_imgBarrierCount,
  308. [](const VkImageMemoryBarrier& a, const VkImageMemoryBarrier& b) -> Bool {
  309. if(a.image != b.image)
  310. {
  311. return a.image < b.image;
  312. }
  313. if(a.subresourceRange.aspectMask != b.subresourceRange.aspectMask)
  314. {
  315. return a.subresourceRange.aspectMask < b.subresourceRange.aspectMask;
  316. }
  317. if(a.oldLayout != b.oldLayout)
  318. {
  319. return a.oldLayout < b.oldLayout;
  320. }
  321. if(a.newLayout != b.newLayout)
  322. {
  323. return a.newLayout < b.newLayout;
  324. }
  325. if(a.subresourceRange.baseArrayLayer != b.subresourceRange.baseArrayLayer)
  326. {
  327. return a.subresourceRange.baseArrayLayer < b.subresourceRange.baseArrayLayer;
  328. }
  329. if(a.subresourceRange.baseMipLevel != b.subresourceRange.baseMipLevel)
  330. {
  331. return a.subresourceRange.baseMipLevel < b.subresourceRange.baseMipLevel;
  332. }
  333. return false;
  334. });
  335. }
  336. // Batch
  337. //
  338. DynamicArrayAuto<VkImageMemoryBarrier> finalImgBarriers(m_alloc);
  339. U32 finalImgBarrierCount = 0;
  340. if(m_imgBarrierCount > 0)
  341. {
  342. DynamicArrayAuto<VkImageMemoryBarrier> squashedBarriers(m_alloc);
  343. U32 squashedBarrierCount = 0;
  344. squashedBarriers.create(m_imgBarrierCount);
  345. // Squash the mips by reducing the barriers
  346. for(U32 i = 0; i < m_imgBarrierCount; ++i)
  347. {
  348. const VkImageMemoryBarrier* prev = (i > 0) ? &m_imgBarriers[i - 1] : nullptr;
  349. const VkImageMemoryBarrier& crnt = m_imgBarriers[i];
  350. if(prev && prev->image == crnt.image
  351. && prev->subresourceRange.aspectMask == crnt.subresourceRange.aspectMask
  352. && prev->oldLayout == crnt.oldLayout && prev->newLayout == crnt.newLayout
  353. && prev->srcAccessMask == crnt.srcAccessMask && prev->dstAccessMask == crnt.dstAccessMask
  354. && prev->subresourceRange.baseMipLevel + prev->subresourceRange.levelCount
  355. == crnt.subresourceRange.baseMipLevel
  356. && prev->subresourceRange.baseArrayLayer == crnt.subresourceRange.baseArrayLayer
  357. && prev->subresourceRange.layerCount == crnt.subresourceRange.layerCount)
  358. {
  359. // Can batch
  360. squashedBarriers[squashedBarrierCount - 1].subresourceRange.levelCount +=
  361. crnt.subresourceRange.levelCount;
  362. }
  363. else
  364. {
  365. // Can't batch, create new barrier
  366. squashedBarriers[squashedBarrierCount++] = crnt;
  367. }
  368. }
  369. ANKI_ASSERT(squashedBarrierCount);
  370. // Squash the layers
  371. finalImgBarriers.create(squashedBarrierCount);
  372. for(U32 i = 0; i < squashedBarrierCount; ++i)
  373. {
  374. const VkImageMemoryBarrier* prev = (i > 0) ? &squashedBarriers[i - 1] : nullptr;
  375. const VkImageMemoryBarrier& crnt = squashedBarriers[i];
  376. if(prev && prev->image == crnt.image
  377. && prev->subresourceRange.aspectMask == crnt.subresourceRange.aspectMask
  378. && prev->oldLayout == crnt.oldLayout && prev->newLayout == crnt.newLayout
  379. && prev->srcAccessMask == crnt.srcAccessMask && prev->dstAccessMask == crnt.dstAccessMask
  380. && prev->subresourceRange.baseMipLevel == crnt.subresourceRange.baseMipLevel
  381. && prev->subresourceRange.levelCount == crnt.subresourceRange.levelCount
  382. && prev->subresourceRange.baseArrayLayer + prev->subresourceRange.layerCount
  383. == crnt.subresourceRange.baseArrayLayer)
  384. {
  385. // Can batch
  386. finalImgBarriers[finalImgBarrierCount - 1].subresourceRange.layerCount +=
  387. crnt.subresourceRange.layerCount;
  388. }
  389. else
  390. {
  391. // Can't batch, create new barrier
  392. finalImgBarriers[finalImgBarrierCount++] = crnt;
  393. }
  394. }
  395. ANKI_ASSERT(finalImgBarrierCount);
  396. }
  397. // Finish the job
  398. //
  399. vkCmdPipelineBarrier(m_handle, m_srcStageMask, m_dstStageMask, 0, m_memBarrierCount,
  400. (m_memBarrierCount) ? &m_memBarriers[0] : nullptr, m_buffBarrierCount,
  401. (m_buffBarrierCount) ? &m_buffBarriers[0] : nullptr, finalImgBarrierCount,
  402. (finalImgBarrierCount) ? &finalImgBarriers[0] : nullptr);
  403. ANKI_TRACE_INC_COUNTER(VK_PIPELINE_BARRIERS, 1);
  404. m_imgBarrierCount = 0;
  405. m_buffBarrierCount = 0;
  406. m_memBarrierCount = 0;
  407. m_srcStageMask = 0;
  408. m_dstStageMask = 0;
  409. }
  410. void CommandBufferImpl::flushQueryResets()
  411. {
  412. if(m_queryResetAtoms.getSize() == 0)
  413. {
  414. return;
  415. }
  416. std::sort(m_queryResetAtoms.getBegin(), m_queryResetAtoms.getEnd(),
  417. [](const QueryResetAtom& a, const QueryResetAtom& b) -> Bool {
  418. if(a.m_pool != b.m_pool)
  419. {
  420. return a.m_pool < b.m_pool;
  421. }
  422. ANKI_ASSERT(a.m_queryIdx != b.m_queryIdx && "Tried to reset the same query more than once");
  423. return a.m_queryIdx < b.m_queryIdx;
  424. });
  425. U32 firstQuery = m_queryResetAtoms[0].m_queryIdx;
  426. U32 queryCount = 1;
  427. VkQueryPool pool = m_queryResetAtoms[0].m_pool;
  428. for(U32 i = 1; i < m_queryResetAtoms.getSize(); ++i)
  429. {
  430. const QueryResetAtom& crnt = m_queryResetAtoms[i];
  431. const QueryResetAtom& prev = m_queryResetAtoms[i - 1];
  432. if(crnt.m_pool == prev.m_pool && crnt.m_queryIdx == prev.m_queryIdx + 1)
  433. {
  434. // Can batch
  435. ++queryCount;
  436. }
  437. else
  438. {
  439. // Flush batch
  440. vkCmdResetQueryPool(m_handle, pool, firstQuery, queryCount);
  441. // New batch
  442. firstQuery = crnt.m_queryIdx;
  443. queryCount = 1;
  444. pool = crnt.m_pool;
  445. }
  446. }
  447. vkCmdResetQueryPool(m_handle, pool, firstQuery, queryCount);
  448. m_queryResetAtoms.destroy(m_alloc);
  449. }
  450. void CommandBufferImpl::flushWriteQueryResults()
  451. {
  452. if(m_writeQueryAtoms.getSize() == 0)
  453. {
  454. return;
  455. }
  456. std::sort(&m_writeQueryAtoms[0], &m_writeQueryAtoms[0] + m_writeQueryAtoms.getSize(),
  457. [](const WriteQueryAtom& a, const WriteQueryAtom& b) -> Bool {
  458. if(a.m_pool != b.m_pool)
  459. {
  460. return a.m_pool < b.m_pool;
  461. }
  462. if(a.m_buffer != b.m_buffer)
  463. {
  464. return a.m_buffer < b.m_buffer;
  465. }
  466. if(a.m_offset != b.m_offset)
  467. {
  468. return a.m_offset < b.m_offset;
  469. }
  470. ANKI_ASSERT(a.m_queryIdx != b.m_queryIdx && "Tried to write the same query more than once");
  471. return a.m_queryIdx < b.m_queryIdx;
  472. });
  473. U32 firstQuery = m_writeQueryAtoms[0].m_queryIdx;
  474. U32 queryCount = 1;
  475. VkQueryPool pool = m_writeQueryAtoms[0].m_pool;
  476. PtrSize offset = m_writeQueryAtoms[0].m_offset;
  477. VkBuffer buff = m_writeQueryAtoms[0].m_buffer;
  478. for(U32 i = 1; i < m_writeQueryAtoms.getSize(); ++i)
  479. {
  480. const WriteQueryAtom& crnt = m_writeQueryAtoms[i];
  481. const WriteQueryAtom& prev = m_writeQueryAtoms[i - 1];
  482. if(crnt.m_pool == prev.m_pool && crnt.m_buffer == prev.m_buffer && prev.m_queryIdx + 1 == crnt.m_queryIdx
  483. && prev.m_offset + sizeof(U32) == crnt.m_offset)
  484. {
  485. // Can batch
  486. ++queryCount;
  487. }
  488. else
  489. {
  490. // Flush batch
  491. vkCmdCopyQueryPoolResults(m_handle, pool, firstQuery, queryCount, buff, offset, sizeof(U32),
  492. VK_QUERY_RESULT_PARTIAL_BIT);
  493. // New batch
  494. firstQuery = crnt.m_queryIdx;
  495. queryCount = 1;
  496. pool = crnt.m_pool;
  497. buff = crnt.m_buffer;
  498. }
  499. }
  500. vkCmdCopyQueryPoolResults(m_handle, pool, firstQuery, queryCount, buff, offset, sizeof(U32),
  501. VK_QUERY_RESULT_PARTIAL_BIT);
  502. m_writeQueryAtoms.resize(m_alloc, 0);
  503. }
  504. void CommandBufferImpl::copyBufferToTextureViewInternal(BufferPtr buff, PtrSize offset, PtrSize range,
  505. TextureViewPtr texView)
  506. {
  507. commandCommon();
  508. const TextureViewImpl& view = static_cast<const TextureViewImpl&>(*texView);
  509. const TextureImpl& tex = view.getTextureImpl();
  510. ANKI_ASSERT(tex.usageValid(TextureUsageBit::TRANSFER_DESTINATION));
  511. ANKI_ASSERT(tex.isSubresourceGoodForCopyFromBuffer(view.getSubresource()));
  512. const VkImageLayout layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
  513. const Bool is3D = tex.getTextureType() == TextureType::_3D;
  514. const VkImageAspectFlags aspect = convertImageAspect(view.getSubresource().m_depthStencilAspect);
  515. const TextureSurfaceInfo surf(view.getSubresource().m_firstMipmap, view.getSubresource().m_firstFace, 0,
  516. view.getSubresource().m_firstLayer);
  517. const TextureVolumeInfo vol(view.getSubresource().m_firstMipmap);
  518. // Compute the sizes of the mip
  519. const U32 width = tex.getWidth() >> surf.m_level;
  520. const U32 height = tex.getHeight() >> surf.m_level;
  521. ANKI_ASSERT(width && height);
  522. const U32 depth = (is3D) ? (tex.getDepth() >> surf.m_level) : 1u;
  523. if(!tex.m_workarounds)
  524. {
  525. if(!is3D)
  526. {
  527. ANKI_ASSERT(range == computeSurfaceSize(width, height, tex.getFormat()));
  528. }
  529. else
  530. {
  531. ANKI_ASSERT(range == computeVolumeSize(width, height, depth, tex.getFormat()));
  532. }
  533. // Copy
  534. VkBufferImageCopy region;
  535. region.imageSubresource.aspectMask = aspect;
  536. region.imageSubresource.baseArrayLayer = (is3D) ? tex.computeVkArrayLayer(vol) : tex.computeVkArrayLayer(surf);
  537. region.imageSubresource.layerCount = 1;
  538. region.imageSubresource.mipLevel = surf.m_level;
  539. region.imageOffset = {0, 0, 0};
  540. region.imageExtent.width = width;
  541. region.imageExtent.height = height;
  542. region.imageExtent.depth = depth;
  543. region.bufferOffset = offset;
  544. region.bufferImageHeight = 0;
  545. region.bufferRowLength = 0;
  546. ANKI_CMD(vkCmdCopyBufferToImage(m_handle, static_cast<const BufferImpl&>(*buff).getHandle(), tex.m_imageHandle,
  547. layout, 1, &region),
  548. ANY_OTHER_COMMAND);
  549. }
  550. else if(!!(tex.m_workarounds & TextureImplWorkaround::R8G8B8_TO_R8G8B8A8))
  551. {
  552. // Create a new shadow buffer
  553. const PtrSize shadowSize = (is3D) ? computeVolumeSize(width, height, depth, Format::R8G8B8A8_UNORM)
  554. : computeSurfaceSize(width, height, Format::R8G8B8A8_UNORM);
  555. BufferPtr shadow = getManager().newBuffer(
  556. BufferInitInfo(shadowSize, BufferUsageBit::ALL_TRANSFER, BufferMapAccessBit::NONE, "Workaround"));
  557. const VkBuffer shadowHandle = static_cast<const BufferImpl&>(*shadow).getHandle();
  558. m_microCmdb->pushObjectRef(shadow);
  559. // Copy to shadow buffer in batches. If the number of pixels is high and we do a single vkCmdCopyBuffer we will
  560. // need many regions. That allocation will be huge so do the copies in batches.
  561. const U32 regionCount = width * height * depth;
  562. const U32 REGIONS_PER_CMD_COPY_BUFFER = 32;
  563. const U32 cmdCopyBufferCount = (regionCount + REGIONS_PER_CMD_COPY_BUFFER - 1) / REGIONS_PER_CMD_COPY_BUFFER;
  564. for(U32 cmdCopyBuffer = 0; cmdCopyBuffer < cmdCopyBufferCount; ++cmdCopyBuffer)
  565. {
  566. const U32 beginRegion = cmdCopyBuffer * REGIONS_PER_CMD_COPY_BUFFER;
  567. const U32 endRegion = min(regionCount, (cmdCopyBuffer + 1) * REGIONS_PER_CMD_COPY_BUFFER);
  568. ANKI_ASSERT(beginRegion < regionCount);
  569. ANKI_ASSERT(endRegion <= regionCount);
  570. const U32 crntRegionCount = endRegion - beginRegion;
  571. DynamicArrayAuto<VkBufferCopy> regions(m_alloc);
  572. regions.create(crntRegionCount);
  573. // Populate regions
  574. U32 count = 0;
  575. for(U32 regionIdx = beginRegion; regionIdx < endRegion; ++regionIdx)
  576. {
  577. U32 x, y, d;
  578. unflatten3dArrayIndex(width, height, depth, regionIdx, x, y, d);
  579. VkBufferCopy& c = regions[count++];
  580. if(is3D)
  581. {
  582. c.srcOffset = (d * height * width + y * width + x) * 3 + offset;
  583. c.dstOffset = (d * height * width + y * width + x) * 4 + 0;
  584. }
  585. else
  586. {
  587. c.srcOffset = (y * width + x) * 3 + offset;
  588. c.dstOffset = (y * width + x) * 4 + 0;
  589. }
  590. c.size = 3;
  591. }
  592. // Do the copy to the shadow buffer
  593. ANKI_CMD(vkCmdCopyBuffer(m_handle, static_cast<const BufferImpl&>(*buff).getHandle(), shadowHandle,
  594. regions.getSize(), &regions[0]),
  595. ANY_OTHER_COMMAND);
  596. }
  597. // Set barrier
  598. setBufferBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
  599. VK_ACCESS_TRANSFER_READ_BIT, 0, shadowSize, shadowHandle);
  600. // Do the copy to the image
  601. VkBufferImageCopy region;
  602. region.imageSubresource.aspectMask = aspect;
  603. region.imageSubresource.baseArrayLayer = (is3D) ? tex.computeVkArrayLayer(vol) : tex.computeVkArrayLayer(surf);
  604. region.imageSubresource.layerCount = 1;
  605. region.imageSubresource.mipLevel = surf.m_level;
  606. region.imageOffset = {0, 0, 0};
  607. region.imageExtent.width = width;
  608. region.imageExtent.height = height;
  609. region.imageExtent.depth = depth;
  610. region.bufferOffset = 0;
  611. region.bufferImageHeight = 0;
  612. region.bufferRowLength = 0;
  613. ANKI_CMD(vkCmdCopyBufferToImage(m_handle, shadowHandle, tex.m_imageHandle, layout, 1, &region),
  614. ANY_OTHER_COMMAND);
  615. }
  616. else
  617. {
  618. ANKI_ASSERT(0);
  619. }
  620. m_microCmdb->pushObjectRef(texView);
  621. m_microCmdb->pushObjectRef(buff);
  622. }
  623. void CommandBufferImpl::rebindDynamicState()
  624. {
  625. m_viewportDirty = true;
  626. m_lastViewport = {};
  627. m_scissorDirty = true;
  628. m_lastScissor = {};
  629. // Rebind the stencil compare mask
  630. if(m_stencilCompareMasks[0] == m_stencilCompareMasks[1])
  631. {
  632. ANKI_CMD(vkCmdSetStencilCompareMask(m_handle, VK_STENCIL_FACE_FRONT_BIT | VK_STENCIL_FACE_BACK_BIT,
  633. m_stencilCompareMasks[0]),
  634. ANY_OTHER_COMMAND);
  635. }
  636. else
  637. {
  638. ANKI_CMD(vkCmdSetStencilCompareMask(m_handle, VK_STENCIL_FACE_FRONT_BIT, m_stencilCompareMasks[0]),
  639. ANY_OTHER_COMMAND);
  640. ANKI_CMD(vkCmdSetStencilCompareMask(m_handle, VK_STENCIL_FACE_BACK_BIT, m_stencilCompareMasks[1]),
  641. ANY_OTHER_COMMAND);
  642. }
  643. // Rebind the stencil write mask
  644. if(m_stencilWriteMasks[0] == m_stencilWriteMasks[1])
  645. {
  646. ANKI_CMD(vkCmdSetStencilWriteMask(m_handle, VK_STENCIL_FACE_FRONT_BIT | VK_STENCIL_FACE_BACK_BIT,
  647. m_stencilWriteMasks[0]),
  648. ANY_OTHER_COMMAND);
  649. }
  650. else
  651. {
  652. ANKI_CMD(vkCmdSetStencilWriteMask(m_handle, VK_STENCIL_FACE_FRONT_BIT, m_stencilWriteMasks[0]),
  653. ANY_OTHER_COMMAND);
  654. ANKI_CMD(vkCmdSetStencilWriteMask(m_handle, VK_STENCIL_FACE_BACK_BIT, m_stencilWriteMasks[1]),
  655. ANY_OTHER_COMMAND);
  656. }
  657. // Rebind the stencil reference
  658. if(m_stencilReferenceMasks[0] == m_stencilReferenceMasks[1])
  659. {
  660. ANKI_CMD(vkCmdSetStencilReference(m_handle, VK_STENCIL_FACE_FRONT_BIT | VK_STENCIL_FACE_BACK_BIT,
  661. m_stencilReferenceMasks[0]),
  662. ANY_OTHER_COMMAND);
  663. }
  664. else
  665. {
  666. ANKI_CMD(vkCmdSetStencilReference(m_handle, VK_STENCIL_FACE_FRONT_BIT, m_stencilReferenceMasks[0]),
  667. ANY_OTHER_COMMAND);
  668. ANKI_CMD(vkCmdSetStencilReference(m_handle, VK_STENCIL_FACE_BACK_BIT, m_stencilReferenceMasks[1]),
  669. ANY_OTHER_COMMAND);
  670. }
  671. }
  672. void CommandBufferImpl::buildAccelerationStructureInternal(AccelerationStructurePtr& as)
  673. {
  674. commandCommon();
  675. // Get objects
  676. const AccelerationStructureImpl& asImpl = static_cast<AccelerationStructureImpl&>(*as);
  677. // Create the scrach buffer
  678. BufferInitInfo bufferInit;
  679. bufferInit.m_usage = PrivateBufferUsageBit::ACCELERATION_STRUCTURE_BUILD_SCRATCH;
  680. bufferInit.m_size = asImpl.getBuildScratchBufferSize();
  681. BufferPtr scratchBuff = getManager().newBuffer(bufferInit);
  682. // Create the build info
  683. VkAccelerationStructureBuildGeometryInfoKHR buildInfo;
  684. VkAccelerationStructureBuildRangeInfoKHR rangeInfo;
  685. asImpl.generateBuildInfo(scratchBuff->getGpuAddress(), buildInfo, rangeInfo);
  686. // Do the command
  687. Array<const VkAccelerationStructureBuildRangeInfoKHR*, 1> pRangeInfos = {&rangeInfo};
  688. ANKI_CMD(vkCmdBuildAccelerationStructuresKHR(m_handle, 1, &buildInfo, &pRangeInfos[0]), ANY_OTHER_COMMAND);
  689. // Push refs
  690. m_microCmdb->pushObjectRef(as);
  691. m_microCmdb->pushObjectRef(scratchBuff);
  692. }
  693. } // end namespace anki