CommandBufferImpl.cpp 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724
  1. // Copyright (C) 2009-2023, Panagiotis Christopoulos Charitos and contributors.
  2. // All rights reserved.
  3. // Code licensed under the BSD License.
  4. // http://www.anki3d.org/LICENSE
  5. #include <AnKi/Gr/Vulkan/CommandBufferImpl.h>
  6. #include <AnKi/Gr/GrManager.h>
  7. #include <AnKi/Gr/Vulkan/GrManagerImpl.h>
  8. #include <AnKi/Gr/Framebuffer.h>
  9. #include <AnKi/Gr/Vulkan/GrUpscalerImpl.h>
  10. #include <AnKi/Gr/Vulkan/AccelerationStructureImpl.h>
  11. #include <AnKi/Gr/Vulkan/FramebufferImpl.h>
  12. #if ANKI_DLSS
  13. # include <ThirdParty/DlssSdk/sdk/include/nvsdk_ngx.h>
  14. # include <ThirdParty/DlssSdk/sdk/include/nvsdk_ngx_helpers.h>
  15. # include <ThirdParty/DlssSdk/sdk/include/nvsdk_ngx_vk.h>
  16. # include <ThirdParty/DlssSdk/sdk/include/nvsdk_ngx_helpers_vk.h>
  17. #endif
  18. #include <algorithm>
  19. namespace anki {
  20. CommandBufferImpl::~CommandBufferImpl()
  21. {
  22. if(m_empty)
  23. {
  24. ANKI_VK_LOGW("Command buffer was empty");
  25. }
  26. if(!m_finalized)
  27. {
  28. ANKI_VK_LOGW("Command buffer was not flushed");
  29. }
  30. }
  31. Error CommandBufferImpl::init(const CommandBufferInitInfo& init)
  32. {
  33. m_tid = Thread::getCurrentThreadId();
  34. m_flags = init.m_flags;
  35. ANKI_CHECK(getGrManagerImpl().getCommandBufferFactory().newCommandBuffer(m_tid, m_flags, m_microCmdb));
  36. m_handle = m_microCmdb->getHandle();
  37. m_pool = &m_microCmdb->getFastMemoryPool();
  38. // Store some of the init info for later
  39. if(!!(m_flags & CommandBufferFlag::kSecondLevel))
  40. {
  41. m_activeFb = init.m_framebuffer;
  42. m_colorAttachmentUsages = init.m_colorAttachmentUsages;
  43. m_depthStencilAttachmentUsage = init.m_depthStencilAttachmentUsage;
  44. m_state.beginRenderPass(static_cast<FramebufferImpl*>(m_activeFb));
  45. m_microCmdb->pushObjectRef(m_activeFb);
  46. }
  47. for(DescriptorSetState& state : m_dsetState)
  48. {
  49. state.init(m_pool);
  50. }
  51. m_state.setVrsCapable(getGrManagerImpl().getDeviceCapabilities().m_vrs);
  52. return Error::kNone;
  53. }
  54. void CommandBufferImpl::beginRecording()
  55. {
  56. // Do the begin
  57. VkCommandBufferInheritanceInfo inheritance = {};
  58. inheritance.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_INFO;
  59. VkCommandBufferBeginInfo begin = {};
  60. begin.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
  61. begin.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
  62. begin.pInheritanceInfo = &inheritance;
  63. if(!!(m_flags & CommandBufferFlag::kSecondLevel))
  64. {
  65. FramebufferImpl& impl = static_cast<FramebufferImpl&>(*m_activeFb);
  66. // Calc the layouts
  67. Array<VkImageLayout, kMaxColorRenderTargets> colAttLayouts;
  68. for(U i = 0; i < impl.getColorAttachmentCount(); ++i)
  69. {
  70. const TextureViewImpl& view = static_cast<const TextureViewImpl&>(*impl.getColorAttachment(i));
  71. colAttLayouts[i] = view.getTextureImpl().computeLayout(m_colorAttachmentUsages[i], 0);
  72. }
  73. VkImageLayout dsAttLayout = VK_IMAGE_LAYOUT_MAX_ENUM;
  74. if(impl.hasDepthStencil())
  75. {
  76. const TextureViewImpl& view = static_cast<const TextureViewImpl&>(*impl.getDepthStencilAttachment());
  77. dsAttLayout = view.getTextureImpl().computeLayout(m_depthStencilAttachmentUsage, 0);
  78. }
  79. VkImageLayout sriAttachmentLayout = VK_IMAGE_LAYOUT_MAX_ENUM;
  80. if(impl.hasSri())
  81. {
  82. // Technically it's possible for SRI to be in other layout. Don't bother though
  83. sriAttachmentLayout = VK_IMAGE_LAYOUT_FRAGMENT_SHADING_RATE_ATTACHMENT_OPTIMAL_KHR;
  84. }
  85. inheritance.renderPass = impl.getRenderPassHandle(colAttLayouts, dsAttLayout, sriAttachmentLayout);
  86. inheritance.subpass = 0;
  87. inheritance.framebuffer = impl.getFramebufferHandle();
  88. begin.flags |= VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT;
  89. }
  90. vkBeginCommandBuffer(m_handle, &begin);
  91. // Stats
  92. if(!!(getGrManagerImpl().getExtensions() & VulkanExtensions::kKHR_pipeline_executable_properties))
  93. {
  94. m_state.setEnablePipelineStatistics(true);
  95. }
  96. }
  97. void CommandBufferImpl::beginRenderPassInternal(Framebuffer* fb, const Array<TextureUsageBit, kMaxColorRenderTargets>& colorAttachmentUsages,
  98. TextureUsageBit depthStencilAttachmentUsage, U32 minx, U32 miny, U32 width, U32 height)
  99. {
  100. commandCommon();
  101. ANKI_ASSERT(!insideRenderPass());
  102. m_rpCommandCount = 0;
  103. m_activeFb = fb;
  104. FramebufferImpl& fbimpl = static_cast<FramebufferImpl&>(*fb);
  105. U32 fbWidth, fbHeight;
  106. fbimpl.getAttachmentsSize(fbWidth, fbHeight);
  107. m_fbSize[0] = fbWidth;
  108. m_fbSize[1] = fbHeight;
  109. ANKI_ASSERT(minx < fbWidth && miny < fbHeight);
  110. const U32 maxx = min<U32>(minx + width, fbWidth);
  111. const U32 maxy = min<U32>(miny + height, fbHeight);
  112. width = maxx - minx;
  113. height = maxy - miny;
  114. ANKI_ASSERT(minx + width <= fbWidth && miny + height <= fbHeight);
  115. m_renderArea[0] = minx;
  116. m_renderArea[1] = miny;
  117. m_renderArea[2] = width;
  118. m_renderArea[3] = height;
  119. m_colorAttachmentUsages = colorAttachmentUsages;
  120. m_depthStencilAttachmentUsage = depthStencilAttachmentUsage;
  121. m_microCmdb->pushObjectRef(fb);
  122. m_subpassContents = VK_SUBPASS_CONTENTS_MAX_ENUM;
  123. // Re-set the viewport and scissor because sometimes they are set clamped
  124. m_viewportDirty = true;
  125. m_scissorDirty = true;
  126. }
  127. void CommandBufferImpl::beginRenderPassInternal()
  128. {
  129. FramebufferImpl& impl = static_cast<FramebufferImpl&>(*m_activeFb);
  130. m_state.beginRenderPass(&impl);
  131. VkRenderPassBeginInfo bi = {};
  132. bi.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO;
  133. bi.clearValueCount = impl.getAttachmentCount();
  134. bi.pClearValues = impl.getClearValues();
  135. bi.framebuffer = impl.getFramebufferHandle();
  136. // Calc the layouts
  137. Array<VkImageLayout, kMaxColorRenderTargets> colAttLayouts;
  138. for(U i = 0; i < impl.getColorAttachmentCount(); ++i)
  139. {
  140. const TextureViewImpl& view = static_cast<const TextureViewImpl&>(*impl.getColorAttachment(i));
  141. colAttLayouts[i] = view.getTextureImpl().computeLayout(m_colorAttachmentUsages[i], 0);
  142. }
  143. VkImageLayout dsAttLayout = VK_IMAGE_LAYOUT_MAX_ENUM;
  144. if(impl.hasDepthStencil())
  145. {
  146. const TextureViewImpl& view = static_cast<const TextureViewImpl&>(*impl.getDepthStencilAttachment());
  147. dsAttLayout = view.getTextureImpl().computeLayout(m_depthStencilAttachmentUsage, 0);
  148. }
  149. VkImageLayout sriAttachmentLayout = VK_IMAGE_LAYOUT_MAX_ENUM;
  150. if(impl.hasSri())
  151. {
  152. // Technically it's possible for SRI to be in other layout. Don't bother though
  153. sriAttachmentLayout = VK_IMAGE_LAYOUT_FRAGMENT_SHADING_RATE_ATTACHMENT_OPTIMAL_KHR;
  154. }
  155. bi.renderPass = impl.getRenderPassHandle(colAttLayouts, dsAttLayout, sriAttachmentLayout);
  156. const Bool flipvp = flipViewport();
  157. bi.renderArea.offset.x = m_renderArea[0];
  158. if(flipvp)
  159. {
  160. ANKI_ASSERT(m_renderArea[3] <= m_fbSize[1]);
  161. }
  162. bi.renderArea.offset.y = (flipvp) ? m_fbSize[1] - (m_renderArea[1] + m_renderArea[3]) : m_renderArea[1];
  163. bi.renderArea.extent.width = m_renderArea[2];
  164. bi.renderArea.extent.height = m_renderArea[3];
  165. getGrManagerImpl().beginMarker(m_handle, impl.getName(), Vec3(0.0f, 1.0f, 0.0f));
  166. #if !ANKI_PLATFORM_MOBILE
  167. // nVidia SRI cache workaround
  168. if(impl.hasSri())
  169. {
  170. VkMemoryBarrier memBarrier = {};
  171. memBarrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
  172. memBarrier.dstAccessMask = VK_ACCESS_FRAGMENT_SHADING_RATE_ATTACHMENT_READ_BIT_KHR;
  173. const VkPipelineStageFlags srcStages = VK_PIPELINE_STAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR;
  174. const VkPipelineStageFlags dstStages = VK_PIPELINE_STAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR;
  175. vkCmdPipelineBarrier(m_handle, srcStages, dstStages, 0, 1, &memBarrier, 0, nullptr, 0, nullptr);
  176. }
  177. #endif
  178. VkSubpassBeginInfo subpassBeginInfo = {};
  179. subpassBeginInfo.sType = VK_STRUCTURE_TYPE_SUBPASS_BEGIN_INFO;
  180. subpassBeginInfo.contents = m_subpassContents;
  181. vkCmdBeginRenderPass2KHR(m_handle, &bi, &subpassBeginInfo);
  182. m_renderedToDefaultFb = m_renderedToDefaultFb || impl.hasPresentableTexture();
  183. }
  184. void CommandBufferImpl::endRenderPassInternal()
  185. {
  186. commandCommon();
  187. ANKI_ASSERT(insideRenderPass());
  188. if(m_rpCommandCount == 0)
  189. {
  190. // Empty pass
  191. m_subpassContents = VK_SUBPASS_CONTENTS_INLINE;
  192. beginRenderPassInternal();
  193. }
  194. VkSubpassEndInfo subpassEndInfo = {};
  195. subpassEndInfo.sType = VK_STRUCTURE_TYPE_SUBPASS_END_INFO;
  196. vkCmdEndRenderPass2KHR(m_handle, &subpassEndInfo);
  197. getGrManagerImpl().endMarker(m_handle);
  198. m_activeFb = nullptr;
  199. m_state.endRenderPass();
  200. // After pushing second level command buffers the state is undefined. Reset the tracker and rebind the dynamic state
  201. if(m_subpassContents == VK_SUBPASS_CONTENTS_SECONDARY_COMMAND_BUFFERS)
  202. {
  203. m_state.reset();
  204. rebindDynamicState();
  205. }
  206. }
  207. void CommandBufferImpl::endRecording()
  208. {
  209. commandCommon();
  210. ANKI_ASSERT(!m_finalized);
  211. ANKI_ASSERT(!m_empty);
  212. ANKI_VK_CHECKF(vkEndCommandBuffer(m_handle));
  213. m_finalized = true;
  214. #if ANKI_EXTRA_CHECKS
  215. static Atomic<U32> messagePrintCount(0);
  216. constexpr U32 MAX_PRINT_COUNT = 10;
  217. CString message;
  218. if(!!(m_flags & CommandBufferFlag::kSmallBatch))
  219. {
  220. if(m_commandCount > kCommandBufferSmallBatchMaxCommands * 4)
  221. {
  222. message = "Command buffer has too many commands%s: %u";
  223. }
  224. }
  225. else
  226. {
  227. if(m_commandCount <= kCommandBufferSmallBatchMaxCommands / 4)
  228. {
  229. message = "Command buffer has too few commands%s: %u";
  230. }
  231. }
  232. if(!message.isEmpty())
  233. {
  234. const U32 count = messagePrintCount.fetchAdd(1) + 1;
  235. if(count < MAX_PRINT_COUNT)
  236. {
  237. ANKI_VK_LOGW(message.cstr(), "", m_commandCount);
  238. }
  239. else if(count == MAX_PRINT_COUNT)
  240. {
  241. ANKI_VK_LOGW(message.cstr(), " (will ignore further warnings)", m_commandCount);
  242. }
  243. }
  244. #endif
  245. }
  246. void CommandBufferImpl::generateMipmaps2dInternal(TextureView* texView)
  247. {
  248. commandCommon();
  249. const TextureViewImpl& view = static_cast<const TextureViewImpl&>(*texView);
  250. const TextureImpl& tex = view.getTextureImpl();
  251. ANKI_ASSERT(tex.getTextureType() != TextureType::k3D && "Not for 3D");
  252. ANKI_ASSERT(tex.isSubresourceGoodForMipmapGeneration(view.getSubresource()));
  253. const U32 blitCount = tex.getMipmapCount() - 1u;
  254. if(blitCount == 0)
  255. {
  256. // Nothing to be done, flush the previous commands though because you may batch (and sort) things you shouldn't
  257. return;
  258. }
  259. const DepthStencilAspectBit aspect = view.getSubresource().m_depthStencilAspect;
  260. const U32 face = view.getSubresource().m_firstFace;
  261. const U32 layer = view.getSubresource().m_firstLayer;
  262. for(U32 i = 0; i < blitCount; ++i)
  263. {
  264. // Transition source
  265. // OPT: Combine the 2 barriers
  266. if(i > 0)
  267. {
  268. VkImageSubresourceRange range;
  269. tex.computeVkImageSubresourceRange(TextureSubresourceInfo(TextureSurfaceInfo(i, 0, face, layer), aspect), range);
  270. setImageBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
  271. VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_READ_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, tex.m_imageHandle,
  272. range);
  273. }
  274. // Transition destination
  275. {
  276. VkImageSubresourceRange range;
  277. tex.computeVkImageSubresourceRange(TextureSubresourceInfo(TextureSurfaceInfo(i + 1, 0, face, layer), aspect), range);
  278. setImageBarrier(VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, 0, VK_IMAGE_LAYOUT_UNDEFINED, VK_PIPELINE_STAGE_TRANSFER_BIT,
  279. VK_ACCESS_TRANSFER_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, tex.m_imageHandle, range);
  280. }
  281. // Setup the blit struct
  282. I32 srcWidth = tex.getWidth() >> i;
  283. I32 srcHeight = tex.getHeight() >> i;
  284. I32 dstWidth = tex.getWidth() >> (i + 1);
  285. I32 dstHeight = tex.getHeight() >> (i + 1);
  286. ANKI_ASSERT(srcWidth > 0 && srcHeight > 0 && dstWidth > 0 && dstHeight > 0);
  287. U32 vkLayer = 0;
  288. switch(tex.getTextureType())
  289. {
  290. case TextureType::k2D:
  291. case TextureType::k2DArray:
  292. break;
  293. case TextureType::kCube:
  294. vkLayer = face;
  295. break;
  296. case TextureType::kCubeArray:
  297. vkLayer = layer * 6 + face;
  298. break;
  299. default:
  300. ANKI_ASSERT(0);
  301. break;
  302. }
  303. VkImageBlit blit;
  304. blit.srcSubresource.aspectMask = convertImageAspect(aspect);
  305. blit.srcSubresource.baseArrayLayer = vkLayer;
  306. blit.srcSubresource.layerCount = 1;
  307. blit.srcSubresource.mipLevel = i;
  308. blit.srcOffsets[0] = {0, 0, 0};
  309. blit.srcOffsets[1] = {srcWidth, srcHeight, 1};
  310. blit.dstSubresource.aspectMask = convertImageAspect(aspect);
  311. blit.dstSubresource.baseArrayLayer = vkLayer;
  312. blit.dstSubresource.layerCount = 1;
  313. blit.dstSubresource.mipLevel = i + 1;
  314. blit.dstOffsets[0] = {0, 0, 0};
  315. blit.dstOffsets[1] = {dstWidth, dstHeight, 1};
  316. vkCmdBlitImage(m_handle, tex.m_imageHandle, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, tex.m_imageHandle, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1,
  317. &blit, (!!aspect) ? VK_FILTER_NEAREST : VK_FILTER_LINEAR);
  318. }
  319. }
  320. void CommandBufferImpl::copyBufferToTextureViewInternal(Buffer* buff, PtrSize offset, [[maybe_unused]] PtrSize range, TextureView* texView)
  321. {
  322. commandCommon();
  323. const TextureViewImpl& view = static_cast<const TextureViewImpl&>(*texView);
  324. const TextureImpl& tex = view.getTextureImpl();
  325. ANKI_ASSERT(tex.usageValid(TextureUsageBit::kTransferDestination));
  326. ANKI_ASSERT(tex.isSubresourceGoodForCopyFromBuffer(view.getSubresource()));
  327. const VkImageLayout layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
  328. const Bool is3D = tex.getTextureType() == TextureType::k3D;
  329. const VkImageAspectFlags aspect = convertImageAspect(view.getSubresource().m_depthStencilAspect);
  330. const TextureSurfaceInfo surf(view.getSubresource().m_firstMipmap, view.getSubresource().m_firstFace, 0, view.getSubresource().m_firstLayer);
  331. const TextureVolumeInfo vol(view.getSubresource().m_firstMipmap);
  332. // Compute the sizes of the mip
  333. const U32 width = tex.getWidth() >> surf.m_level;
  334. const U32 height = tex.getHeight() >> surf.m_level;
  335. ANKI_ASSERT(width && height);
  336. const U32 depth = (is3D) ? (tex.getDepth() >> surf.m_level) : 1u;
  337. if(!is3D)
  338. {
  339. ANKI_ASSERT(range == computeSurfaceSize(width, height, tex.getFormat()));
  340. }
  341. else
  342. {
  343. ANKI_ASSERT(range == computeVolumeSize(width, height, depth, tex.getFormat()));
  344. }
  345. // Copy
  346. VkBufferImageCopy region;
  347. region.imageSubresource.aspectMask = aspect;
  348. region.imageSubresource.baseArrayLayer = (is3D) ? tex.computeVkArrayLayer(vol) : tex.computeVkArrayLayer(surf);
  349. region.imageSubresource.layerCount = 1;
  350. region.imageSubresource.mipLevel = surf.m_level;
  351. region.imageOffset = {0, 0, 0};
  352. region.imageExtent.width = width;
  353. region.imageExtent.height = height;
  354. region.imageExtent.depth = depth;
  355. region.bufferOffset = offset;
  356. region.bufferImageHeight = 0;
  357. region.bufferRowLength = 0;
  358. vkCmdCopyBufferToImage(m_handle, static_cast<const BufferImpl&>(*buff).getHandle(), tex.m_imageHandle, layout, 1, &region);
  359. }
  360. void CommandBufferImpl::rebindDynamicState()
  361. {
  362. m_viewportDirty = true;
  363. m_lastViewport = {};
  364. m_scissorDirty = true;
  365. m_lastScissor = {};
  366. m_vrsRateDirty = true;
  367. m_vrsRate = VrsRate::k1x1;
  368. // Rebind the stencil compare mask
  369. if(m_stencilCompareMasks[0] == m_stencilCompareMasks[1])
  370. {
  371. vkCmdSetStencilCompareMask(m_handle, VK_STENCIL_FACE_FRONT_BIT | VK_STENCIL_FACE_BACK_BIT, m_stencilCompareMasks[0]);
  372. }
  373. else
  374. {
  375. vkCmdSetStencilCompareMask(m_handle, VK_STENCIL_FACE_FRONT_BIT, m_stencilCompareMasks[0]);
  376. vkCmdSetStencilCompareMask(m_handle, VK_STENCIL_FACE_BACK_BIT, m_stencilCompareMasks[1]);
  377. }
  378. // Rebind the stencil write mask
  379. if(m_stencilWriteMasks[0] == m_stencilWriteMasks[1])
  380. {
  381. vkCmdSetStencilWriteMask(m_handle, VK_STENCIL_FACE_FRONT_BIT | VK_STENCIL_FACE_BACK_BIT, m_stencilWriteMasks[0]);
  382. }
  383. else
  384. {
  385. vkCmdSetStencilWriteMask(m_handle, VK_STENCIL_FACE_FRONT_BIT, m_stencilWriteMasks[0]);
  386. vkCmdSetStencilWriteMask(m_handle, VK_STENCIL_FACE_BACK_BIT, m_stencilWriteMasks[1]);
  387. }
  388. // Rebind the stencil reference
  389. if(m_stencilReferenceMasks[0] == m_stencilReferenceMasks[1])
  390. {
  391. vkCmdSetStencilReference(m_handle, VK_STENCIL_FACE_FRONT_BIT | VK_STENCIL_FACE_BACK_BIT, m_stencilReferenceMasks[0]);
  392. }
  393. else
  394. {
  395. vkCmdSetStencilReference(m_handle, VK_STENCIL_FACE_FRONT_BIT, m_stencilReferenceMasks[0]);
  396. vkCmdSetStencilReference(m_handle, VK_STENCIL_FACE_BACK_BIT, m_stencilReferenceMasks[1]);
  397. }
  398. }
  399. void CommandBufferImpl::buildAccelerationStructureInternal(AccelerationStructure* as)
  400. {
  401. commandCommon();
  402. // Get objects
  403. const AccelerationStructureImpl& asImpl = static_cast<AccelerationStructureImpl&>(*as);
  404. // Create the scrach buffer
  405. BufferInitInfo bufferInit;
  406. bufferInit.m_usage = PrivateBufferUsageBit::kAccelerationStructureBuildScratch;
  407. bufferInit.m_size = asImpl.getBuildScratchBufferSize();
  408. BufferPtr scratchBuff = getGrManagerImpl().newBuffer(bufferInit);
  409. // Create the build info
  410. VkAccelerationStructureBuildGeometryInfoKHR buildInfo;
  411. VkAccelerationStructureBuildRangeInfoKHR rangeInfo;
  412. asImpl.generateBuildInfo(scratchBuff->getGpuAddress(), buildInfo, rangeInfo);
  413. // Do the command
  414. Array<const VkAccelerationStructureBuildRangeInfoKHR*, 1> pRangeInfos = {&rangeInfo};
  415. vkCmdBuildAccelerationStructuresKHR(m_handle, 1, &buildInfo, &pRangeInfos[0]);
  416. // Push refs
  417. m_microCmdb->pushObjectRef(as);
  418. }
  419. #if ANKI_DLSS
  420. /// Utility function to get the NGX's resource structure for a texture
  421. /// @param[in] tex the texture to generate the NVSDK_NGX_Resource_VK from
  422. static NVSDK_NGX_Resource_VK getNGXResourceFromAnkiTexture(const TextureViewImpl& view)
  423. {
  424. const TextureImpl& tex = view.getTextureImpl();
  425. const VkImageView imageView = view.getHandle();
  426. const VkFormat format = tex.m_vkFormat;
  427. const VkImage image = tex.m_imageHandle;
  428. const VkImageSubresourceRange subresourceRange = view.getVkImageSubresourceRange();
  429. const Bool isUAV = !!(tex.m_vkUsageFlags & VK_IMAGE_USAGE_STORAGE_BIT);
  430. // TODO Not sure if I should pass the width,height of the image or the view
  431. return NVSDK_NGX_Create_ImageView_Resource_VK(imageView, image, subresourceRange, format, tex.getWidth(), tex.getHeight(), isUAV);
  432. }
  433. #endif
  434. void CommandBufferImpl::upscaleInternal(GrUpscaler* upscaler, TextureView* inColor, TextureView* outUpscaledColor, TextureView* motionVectors,
  435. TextureView* depth, TextureView* exposure, const Bool resetAccumulation, const Vec2& jitterOffset,
  436. const Vec2& motionVectorsScale)
  437. {
  438. #if ANKI_DLSS
  439. ANKI_ASSERT(getGrManagerImpl().getDeviceCapabilities().m_dlss);
  440. ANKI_ASSERT(upscaler->getUpscalerType() == GrUpscalerType::kDlss2);
  441. commandCommon();
  442. const GrUpscalerImpl& upscalerImpl = static_cast<const GrUpscalerImpl&>(*upscaler);
  443. const TextureViewImpl& srcViewImpl = static_cast<const TextureViewImpl&>(*inColor);
  444. const TextureViewImpl& dstViewImpl = static_cast<const TextureViewImpl&>(*outUpscaledColor);
  445. const TextureViewImpl& mvViewImpl = static_cast<const TextureViewImpl&>(*motionVectors);
  446. const TextureViewImpl& depthViewImpl = static_cast<const TextureViewImpl&>(*depth);
  447. const TextureViewImpl& exposureViewImpl = static_cast<const TextureViewImpl&>(*exposure);
  448. NVSDK_NGX_Resource_VK srcResVk = getNGXResourceFromAnkiTexture(srcViewImpl);
  449. NVSDK_NGX_Resource_VK dstResVk = getNGXResourceFromAnkiTexture(dstViewImpl);
  450. NVSDK_NGX_Resource_VK mvResVk = getNGXResourceFromAnkiTexture(mvViewImpl);
  451. NVSDK_NGX_Resource_VK depthResVk = getNGXResourceFromAnkiTexture(depthViewImpl);
  452. NVSDK_NGX_Resource_VK exposureResVk = getNGXResourceFromAnkiTexture(exposureViewImpl);
  453. const U32 mipLevel = srcViewImpl.getSubresource().m_firstMipmap;
  454. const NVSDK_NGX_Coordinates renderingOffset = {0, 0};
  455. const NVSDK_NGX_Dimensions renderingSize = {srcViewImpl.getTextureImpl().getWidth() >> mipLevel,
  456. srcViewImpl.getTextureImpl().getHeight() >> mipLevel};
  457. NVSDK_NGX_VK_DLSS_Eval_Params vkDlssEvalParams;
  458. memset(&vkDlssEvalParams, 0, sizeof(vkDlssEvalParams));
  459. vkDlssEvalParams.Feature.pInColor = &srcResVk;
  460. vkDlssEvalParams.Feature.pInOutput = &dstResVk;
  461. vkDlssEvalParams.pInDepth = &depthResVk;
  462. vkDlssEvalParams.pInMotionVectors = &mvResVk;
  463. vkDlssEvalParams.pInExposureTexture = &exposureResVk;
  464. vkDlssEvalParams.InJitterOffsetX = jitterOffset.x();
  465. vkDlssEvalParams.InJitterOffsetY = jitterOffset.y();
  466. vkDlssEvalParams.InReset = resetAccumulation;
  467. vkDlssEvalParams.InMVScaleX = motionVectorsScale.x();
  468. vkDlssEvalParams.InMVScaleY = motionVectorsScale.y();
  469. vkDlssEvalParams.InColorSubrectBase = renderingOffset;
  470. vkDlssEvalParams.InDepthSubrectBase = renderingOffset;
  471. vkDlssEvalParams.InTranslucencySubrectBase = renderingOffset;
  472. vkDlssEvalParams.InMVSubrectBase = renderingOffset;
  473. vkDlssEvalParams.InRenderSubrectDimensions = renderingSize;
  474. getGrManagerImpl().beginMarker(m_handle, "DLSS");
  475. NVSDK_NGX_Parameter* dlssParameters = &upscalerImpl.getParameters();
  476. NVSDK_NGX_Handle* dlssFeature = &upscalerImpl.getFeature();
  477. const NVSDK_NGX_Result result = NGX_VULKAN_EVALUATE_DLSS_EXT(m_handle, dlssFeature, dlssParameters, &vkDlssEvalParams);
  478. getGrManagerImpl().endMarker(m_handle);
  479. if(NVSDK_NGX_FAILED(result))
  480. {
  481. ANKI_VK_LOGF("Failed to NVSDK_NGX_VULKAN_EvaluateFeature for DLSS, code = 0x%08x, info: %ls", result, GetNGXResultAsString(result));
  482. }
  483. #else
  484. ANKI_ASSERT(0 && "Not supported");
  485. (void)upscaler;
  486. (void)inColor;
  487. (void)outUpscaledColor;
  488. (void)motionVectors;
  489. (void)depth;
  490. (void)exposure;
  491. (void)resetAccumulation;
  492. (void)jitterOffset;
  493. (void)motionVectorsScale;
  494. #endif
  495. }
  496. void CommandBufferImpl::setPipelineBarrierInternal(ConstWeakArray<TextureBarrierInfo> textures, ConstWeakArray<BufferBarrierInfo> buffers,
  497. ConstWeakArray<AccelerationStructureBarrierInfo> accelerationStructures)
  498. {
  499. commandCommon();
  500. DynamicArray<VkImageMemoryBarrier, MemoryPoolPtrWrapper<StackMemoryPool>> imageBarriers(m_pool);
  501. DynamicArray<VkBufferMemoryBarrier, MemoryPoolPtrWrapper<StackMemoryPool>> bufferBarriers(m_pool);
  502. DynamicArray<VkMemoryBarrier, MemoryPoolPtrWrapper<StackMemoryPool>> genericBarriers(m_pool);
  503. VkPipelineStageFlags srcStageMask = 0;
  504. VkPipelineStageFlags dstStageMask = 0;
  505. for(const TextureBarrierInfo& barrier : textures)
  506. {
  507. ANKI_ASSERT(barrier.m_texture);
  508. const TextureImpl& impl = static_cast<const TextureImpl&>(*barrier.m_texture);
  509. const TextureUsageBit nextUsage = barrier.m_nextUsage;
  510. const TextureUsageBit prevUsage = barrier.m_previousUsage;
  511. TextureSubresourceInfo subresource = barrier.m_subresource;
  512. ANKI_ASSERT(impl.usageValid(prevUsage));
  513. ANKI_ASSERT(impl.usageValid(nextUsage));
  514. ANKI_ASSERT(((nextUsage & TextureUsageBit::kGenerateMipmaps) == TextureUsageBit::kGenerateMipmaps
  515. || (nextUsage & TextureUsageBit::kGenerateMipmaps) == TextureUsageBit::kNone)
  516. && "GENERATE_MIPMAPS should be alone");
  517. ANKI_ASSERT(impl.isSubresourceValid(subresource));
  518. if(subresource.m_firstMipmap > 0 && nextUsage == TextureUsageBit::kGenerateMipmaps) [[unlikely]]
  519. {
  520. // This transition happens inside CommandBufferImpl::generateMipmapsX. No need to do something
  521. continue;
  522. }
  523. if(nextUsage == TextureUsageBit::kGenerateMipmaps) [[unlikely]]
  524. {
  525. // The transition of the non zero mip levels happens inside CommandBufferImpl::generateMipmapsX so limit the
  526. // subresource
  527. ANKI_ASSERT(subresource.m_firstMipmap == 0 && subresource.m_mipmapCount == 1);
  528. }
  529. VkImageMemoryBarrier& inf = *imageBarriers.emplaceBack();
  530. inf = {};
  531. inf.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
  532. inf.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
  533. inf.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
  534. inf.image = impl.m_imageHandle;
  535. impl.computeVkImageSubresourceRange(subresource, inf.subresourceRange);
  536. VkPipelineStageFlags srcStage;
  537. VkPipelineStageFlags dstStage;
  538. impl.computeBarrierInfo(prevUsage, nextUsage, inf.subresourceRange.baseMipLevel, srcStage, inf.srcAccessMask, dstStage, inf.dstAccessMask);
  539. inf.oldLayout = impl.computeLayout(prevUsage, inf.subresourceRange.baseMipLevel);
  540. inf.newLayout = impl.computeLayout(nextUsage, inf.subresourceRange.baseMipLevel);
  541. srcStageMask |= srcStage;
  542. dstStageMask |= dstStage;
  543. }
  544. for(const BufferBarrierInfo& barrier : buffers)
  545. {
  546. ANKI_ASSERT(barrier.m_buffer);
  547. const BufferImpl& impl = static_cast<const BufferImpl&>(*barrier.m_buffer);
  548. const BufferUsageBit prevUsage = barrier.m_previousUsage;
  549. const BufferUsageBit nextUsage = barrier.m_nextUsage;
  550. VkBufferMemoryBarrier& inf = *bufferBarriers.emplaceBack();
  551. inf = {};
  552. inf.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
  553. inf.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
  554. inf.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
  555. inf.buffer = impl.getHandle();
  556. ANKI_ASSERT(barrier.m_offset < impl.getSize());
  557. inf.offset = barrier.m_offset;
  558. if(barrier.m_size == kMaxPtrSize)
  559. {
  560. inf.size = VK_WHOLE_SIZE;
  561. }
  562. else
  563. {
  564. ANKI_ASSERT(barrier.m_size > 0);
  565. ANKI_ASSERT(barrier.m_offset + barrier.m_size <= impl.getSize());
  566. inf.size = barrier.m_size;
  567. }
  568. VkPipelineStageFlags srcStage;
  569. VkPipelineStageFlags dstStage;
  570. impl.computeBarrierInfo(prevUsage, nextUsage, srcStage, inf.srcAccessMask, dstStage, inf.dstAccessMask);
  571. srcStageMask |= srcStage;
  572. dstStageMask |= dstStage;
  573. }
  574. for(const AccelerationStructureBarrierInfo& barrier : accelerationStructures)
  575. {
  576. ANKI_ASSERT(barrier.m_as);
  577. VkMemoryBarrier& inf = *genericBarriers.emplaceBack();
  578. inf = {};
  579. inf.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
  580. VkPipelineStageFlags srcStage;
  581. VkPipelineStageFlags dstStage;
  582. AccelerationStructureImpl::computeBarrierInfo(barrier.m_previousUsage, barrier.m_nextUsage, srcStage, inf.srcAccessMask, dstStage,
  583. inf.dstAccessMask);
  584. srcStageMask |= srcStage;
  585. dstStageMask |= dstStage;
  586. m_microCmdb->pushObjectRef(barrier.m_as);
  587. }
  588. vkCmdPipelineBarrier(m_handle, srcStageMask, dstStageMask, 0, genericBarriers.getSize(),
  589. (genericBarriers.getSize()) ? &genericBarriers[0] : nullptr, bufferBarriers.getSize(),
  590. (bufferBarriers.getSize()) ? &bufferBarriers[0] : nullptr, imageBarriers.getSize(),
  591. (imageBarriers.getSize()) ? &imageBarriers[0] : nullptr);
  592. ANKI_TRACE_INC_COUNTER(VkBarrier, 1);
  593. }
  594. } // end namespace anki