CommandBufferImpl.inl.h 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954
  1. // Copyright (C) 2009-2021, Panagiotis Christopoulos Charitos and contributors.
  2. // All rights reserved.
  3. // Code licensed under the BSD License.
  4. // http://www.anki3d.org/LICENSE
  5. #include <AnKi/Gr/Vulkan/CommandBufferImpl.h>
  6. #include <AnKi/Gr/Vulkan/TextureImpl.h>
  7. #include <AnKi/Gr/OcclusionQuery.h>
  8. #include <AnKi/Gr/Vulkan/OcclusionQueryImpl.h>
  9. #include <AnKi/Gr/TimestampQuery.h>
  10. #include <AnKi/Gr/Vulkan/TimestampQueryImpl.h>
  11. #include <AnKi/Util/Tracer.h>
  12. namespace anki
  13. {
  14. inline void CommandBufferImpl::setStencilCompareMask(FaceSelectionBit face, U32 mask)
  15. {
  16. commandCommon();
  17. VkStencilFaceFlags flags = 0;
  18. if(!!(face & FaceSelectionBit::FRONT) && m_stencilCompareMasks[0] != mask)
  19. {
  20. m_stencilCompareMasks[0] = mask;
  21. flags = VK_STENCIL_FACE_FRONT_BIT;
  22. }
  23. if(!!(face & FaceSelectionBit::BACK) && m_stencilCompareMasks[1] != mask)
  24. {
  25. m_stencilCompareMasks[1] = mask;
  26. flags |= VK_STENCIL_FACE_BACK_BIT;
  27. }
  28. if(flags)
  29. {
  30. ANKI_CMD(vkCmdSetStencilCompareMask(m_handle, flags, mask), ANY_OTHER_COMMAND);
  31. }
  32. }
  33. inline void CommandBufferImpl::setStencilWriteMask(FaceSelectionBit face, U32 mask)
  34. {
  35. commandCommon();
  36. VkStencilFaceFlags flags = 0;
  37. if(!!(face & FaceSelectionBit::FRONT) && m_stencilWriteMasks[0] != mask)
  38. {
  39. m_stencilWriteMasks[0] = mask;
  40. flags = VK_STENCIL_FACE_FRONT_BIT;
  41. }
  42. if(!!(face & FaceSelectionBit::BACK) && m_stencilWriteMasks[1] != mask)
  43. {
  44. m_stencilWriteMasks[1] = mask;
  45. flags |= VK_STENCIL_FACE_BACK_BIT;
  46. }
  47. if(flags)
  48. {
  49. ANKI_CMD(vkCmdSetStencilWriteMask(m_handle, flags, mask), ANY_OTHER_COMMAND);
  50. }
  51. }
  52. inline void CommandBufferImpl::setStencilReference(FaceSelectionBit face, U32 ref)
  53. {
  54. commandCommon();
  55. VkStencilFaceFlags flags = 0;
  56. if(!!(face & FaceSelectionBit::FRONT) && m_stencilReferenceMasks[0] != ref)
  57. {
  58. m_stencilReferenceMasks[0] = ref;
  59. flags = VK_STENCIL_FACE_FRONT_BIT;
  60. }
  61. if(!!(face & FaceSelectionBit::BACK) && m_stencilReferenceMasks[1] != ref)
  62. {
  63. m_stencilWriteMasks[1] = ref;
  64. flags |= VK_STENCIL_FACE_BACK_BIT;
  65. }
  66. if(flags)
  67. {
  68. ANKI_CMD(vkCmdSetStencilReference(m_handle, flags, ref), ANY_OTHER_COMMAND);
  69. }
  70. }
  71. inline void CommandBufferImpl::setImageBarrier(VkPipelineStageFlags srcStage, VkAccessFlags srcAccess,
  72. VkImageLayout prevLayout, VkPipelineStageFlags dstStage,
  73. VkAccessFlags dstAccess, VkImageLayout newLayout, VkImage img,
  74. const VkImageSubresourceRange& range)
  75. {
  76. ANKI_ASSERT(img);
  77. commandCommon();
  78. VkImageMemoryBarrier inf = {};
  79. inf.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
  80. inf.srcAccessMask = srcAccess;
  81. inf.dstAccessMask = dstAccess;
  82. inf.oldLayout = prevLayout;
  83. inf.newLayout = newLayout;
  84. inf.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
  85. inf.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
  86. inf.image = img;
  87. inf.subresourceRange = range;
  88. #if ANKI_BATCH_COMMANDS
  89. flushBatches(CommandBufferCommandType::SET_BARRIER);
  90. if(m_imgBarriers.getSize() <= m_imgBarrierCount)
  91. {
  92. m_imgBarriers.resize(m_alloc, max<U32>(2, m_imgBarrierCount * 2));
  93. }
  94. m_imgBarriers[m_imgBarrierCount++] = inf;
  95. m_srcStageMask |= srcStage;
  96. m_dstStageMask |= dstStage;
  97. #else
  98. ANKI_CMD(vkCmdPipelineBarrier(m_handle, srcStage, dstStage, 0, 0, nullptr, 0, nullptr, 1, &inf), ANY_OTHER_COMMAND);
  99. ANKI_TRACE_INC_COUNTER(VK_PIPELINE_BARRIERS, 1);
  100. #endif
  101. }
  102. inline void CommandBufferImpl::setTextureBarrierRange(TexturePtr tex, TextureUsageBit prevUsage,
  103. TextureUsageBit nextUsage, const VkImageSubresourceRange& range)
  104. {
  105. const TextureImpl& impl = static_cast<const TextureImpl&>(*tex);
  106. ANKI_ASSERT(impl.usageValid(prevUsage));
  107. ANKI_ASSERT(impl.usageValid(nextUsage));
  108. ANKI_ASSERT(((nextUsage & TextureUsageBit::GENERATE_MIPMAPS) == TextureUsageBit::GENERATE_MIPMAPS
  109. || (nextUsage & TextureUsageBit::GENERATE_MIPMAPS) == TextureUsageBit::NONE)
  110. && "GENERATE_MIPMAPS should be alone");
  111. VkPipelineStageFlags srcStage;
  112. VkAccessFlags srcAccess;
  113. VkImageLayout oldLayout;
  114. VkPipelineStageFlags dstStage;
  115. VkAccessFlags dstAccess;
  116. VkImageLayout newLayout;
  117. impl.computeBarrierInfo(prevUsage, nextUsage, range.baseMipLevel, srcStage, srcAccess, dstStage, dstAccess);
  118. oldLayout = impl.computeLayout(prevUsage, range.baseMipLevel);
  119. newLayout = impl.computeLayout(nextUsage, range.baseMipLevel);
  120. setImageBarrier(srcStage, srcAccess, oldLayout, dstStage, dstAccess, newLayout, impl.m_imageHandle, range);
  121. m_microCmdb->pushObjectRef(tex);
  122. }
  123. inline void CommandBufferImpl::setTextureBarrier(TexturePtr tex, TextureUsageBit prevUsage, TextureUsageBit nextUsage,
  124. const TextureSubresourceInfo& subresource_)
  125. {
  126. TextureSubresourceInfo subresource = subresource_;
  127. const TextureImpl& impl = static_cast<const TextureImpl&>(*tex);
  128. // The transition of the non zero mip levels happens inside CommandBufferImpl::generateMipmapsX so limit the
  129. // subresource
  130. if(nextUsage == TextureUsageBit::GENERATE_MIPMAPS)
  131. {
  132. ANKI_ASSERT(impl.isSubresourceGoodForMipmapGeneration(subresource));
  133. subresource.m_firstMipmap = 0;
  134. subresource.m_mipmapCount = 1;
  135. }
  136. ANKI_ASSERT(tex->isSubresourceValid(subresource));
  137. VkImageSubresourceRange range;
  138. impl.computeVkImageSubresourceRange(subresource, range);
  139. setTextureBarrierRange(tex, prevUsage, nextUsage, range);
  140. }
  141. inline void CommandBufferImpl::setTextureSurfaceBarrier(TexturePtr tex, TextureUsageBit prevUsage,
  142. TextureUsageBit nextUsage, const TextureSurfaceInfo& surf)
  143. {
  144. if(ANKI_UNLIKELY(surf.m_level > 0 && nextUsage == TextureUsageBit::GENERATE_MIPMAPS))
  145. {
  146. // This transition happens inside CommandBufferImpl::generateMipmapsX. No need to do something
  147. return;
  148. }
  149. const TextureImpl& impl = static_cast<const TextureImpl&>(*tex);
  150. VkImageSubresourceRange range;
  151. impl.computeVkImageSubresourceRange(TextureSubresourceInfo(surf, impl.getDepthStencilAspect()), range);
  152. setTextureBarrierRange(tex, prevUsage, nextUsage, range);
  153. }
  154. inline void CommandBufferImpl::setTextureVolumeBarrier(TexturePtr tex, TextureUsageBit prevUsage,
  155. TextureUsageBit nextUsage, const TextureVolumeInfo& vol)
  156. {
  157. if(vol.m_level > 0)
  158. {
  159. ANKI_ASSERT(!(nextUsage & TextureUsageBit::GENERATE_MIPMAPS)
  160. && "This transition happens inside CommandBufferImpl::generateMipmaps");
  161. }
  162. const TextureImpl& impl = static_cast<const TextureImpl&>(*tex);
  163. VkImageSubresourceRange range;
  164. impl.computeVkImageSubresourceRange(TextureSubresourceInfo(vol, impl.getDepthStencilAspect()), range);
  165. setTextureBarrierRange(tex, prevUsage, nextUsage, range);
  166. }
  167. inline void CommandBufferImpl::setBufferBarrier(VkPipelineStageFlags srcStage, VkAccessFlags srcAccess,
  168. VkPipelineStageFlags dstStage, VkAccessFlags dstAccess, PtrSize offset,
  169. PtrSize size, VkBuffer buff)
  170. {
  171. ANKI_ASSERT(buff);
  172. commandCommon();
  173. VkBufferMemoryBarrier b = {};
  174. b.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
  175. b.srcAccessMask = srcAccess;
  176. b.dstAccessMask = dstAccess;
  177. b.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
  178. b.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
  179. b.buffer = buff;
  180. b.offset = offset;
  181. b.size = size;
  182. #if ANKI_BATCH_COMMANDS
  183. flushBatches(CommandBufferCommandType::SET_BARRIER);
  184. if(m_buffBarriers.getSize() <= m_buffBarrierCount)
  185. {
  186. m_buffBarriers.resize(m_alloc, max<U32>(2, m_buffBarrierCount * 2));
  187. }
  188. m_buffBarriers[m_buffBarrierCount++] = b;
  189. m_srcStageMask |= srcStage;
  190. m_dstStageMask |= dstStage;
  191. #else
  192. ANKI_CMD(vkCmdPipelineBarrier(m_handle, srcStage, dstStage, 0, 0, nullptr, 1, &b, 0, nullptr), ANY_OTHER_COMMAND);
  193. ANKI_TRACE_INC_COUNTER(VK_PIPELINE_BARRIERS, 1);
  194. #endif
  195. }
  196. inline void CommandBufferImpl::setBufferBarrier(BufferPtr& buff, BufferUsageBit before, BufferUsageBit after,
  197. PtrSize offset, PtrSize size)
  198. {
  199. const BufferImpl& impl = static_cast<const BufferImpl&>(*buff);
  200. VkPipelineStageFlags srcStage;
  201. VkAccessFlags srcAccess;
  202. VkPipelineStageFlags dstStage;
  203. VkAccessFlags dstAccess;
  204. impl.computeBarrierInfo(before, after, srcStage, srcAccess, dstStage, dstAccess);
  205. setBufferBarrier(srcStage, srcAccess, dstStage, dstAccess, offset, size, impl.getHandle());
  206. m_microCmdb->pushObjectRef(buff);
  207. }
  208. inline void CommandBufferImpl::setAccelerationStructureBarrierInternal(AccelerationStructurePtr& as,
  209. AccelerationStructureUsageBit prevUsage,
  210. AccelerationStructureUsageBit nextUsage)
  211. {
  212. commandCommon();
  213. VkPipelineStageFlags srcStage;
  214. VkAccessFlags srcAccess;
  215. VkPipelineStageFlags dstStage;
  216. VkAccessFlags dstAccess;
  217. AccelerationStructureImpl::computeBarrierInfo(prevUsage, nextUsage, srcStage, srcAccess, dstStage, dstAccess);
  218. #if ANKI_BATCH_COMMANDS
  219. flushBatches(CommandBufferCommandType::SET_BARRIER);
  220. VkMemoryBarrier memBarrier = {};
  221. memBarrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
  222. memBarrier.srcAccessMask = srcAccess;
  223. memBarrier.dstAccessMask = dstAccess;
  224. if(m_memBarriers.getSize() <= m_memBarrierCount)
  225. {
  226. m_memBarriers.resize(m_alloc, max<U32>(2, m_memBarrierCount * 2));
  227. }
  228. m_memBarriers[m_memBarrierCount++] = memBarrier;
  229. m_srcStageMask |= srcStage;
  230. m_dstStageMask |= dstStage;
  231. #else
  232. ANKI_ASSERT(!"TODO");
  233. #endif
  234. }
  235. inline void CommandBufferImpl::drawArrays(PrimitiveTopology topology, U32 count, U32 instanceCount, U32 first,
  236. U32 baseInstance)
  237. {
  238. m_state.setPrimitiveTopology(topology);
  239. drawcallCommon();
  240. ANKI_CMD(vkCmdDraw(m_handle, count, instanceCount, first, baseInstance), ANY_OTHER_COMMAND);
  241. }
  242. inline void CommandBufferImpl::drawElements(PrimitiveTopology topology, U32 count, U32 instanceCount, U32 firstIndex,
  243. U32 baseVertex, U32 baseInstance)
  244. {
  245. m_state.setPrimitiveTopology(topology);
  246. drawcallCommon();
  247. ANKI_CMD(vkCmdDrawIndexed(m_handle, count, instanceCount, firstIndex, baseVertex, baseInstance), ANY_OTHER_COMMAND);
  248. }
  249. inline void CommandBufferImpl::drawArraysIndirect(PrimitiveTopology topology, U32 drawCount, PtrSize offset,
  250. BufferPtr& buff)
  251. {
  252. m_state.setPrimitiveTopology(topology);
  253. drawcallCommon();
  254. const BufferImpl& impl = static_cast<const BufferImpl&>(*buff);
  255. ANKI_ASSERT(impl.usageValid(BufferUsageBit::INDIRECT_DRAW));
  256. ANKI_ASSERT((offset % 4) == 0);
  257. ANKI_ASSERT((offset + sizeof(DrawArraysIndirectInfo) * drawCount) <= impl.getSize());
  258. ANKI_CMD(vkCmdDrawIndirect(m_handle, impl.getHandle(), offset, drawCount, sizeof(DrawArraysIndirectInfo)),
  259. ANY_OTHER_COMMAND);
  260. }
  261. inline void CommandBufferImpl::drawElementsIndirect(PrimitiveTopology topology, U32 drawCount, PtrSize offset,
  262. BufferPtr& buff)
  263. {
  264. m_state.setPrimitiveTopology(topology);
  265. drawcallCommon();
  266. const BufferImpl& impl = static_cast<const BufferImpl&>(*buff);
  267. ANKI_ASSERT(impl.usageValid(BufferUsageBit::INDIRECT_DRAW));
  268. ANKI_ASSERT((offset % 4) == 0);
  269. ANKI_ASSERT((offset + sizeof(DrawElementsIndirectInfo) * drawCount) <= impl.getSize());
  270. ANKI_CMD(vkCmdDrawIndexedIndirect(m_handle, impl.getHandle(), offset, drawCount, sizeof(DrawElementsIndirectInfo)),
  271. ANY_OTHER_COMMAND);
  272. }
  273. inline void CommandBufferImpl::dispatchCompute(U32 groupCountX, U32 groupCountY, U32 groupCountZ)
  274. {
  275. ANKI_ASSERT(m_computeProg);
  276. ANKI_ASSERT(m_computeProg->getReflectionInfo().m_pushConstantsSize == m_setPushConstantsSize
  277. && "Forgot to set pushConstants");
  278. commandCommon();
  279. getGrManagerImpl().beginMarker(m_handle, m_computeProg->getName());
  280. // Bind descriptors
  281. for(U32 i = 0; i < MAX_DESCRIPTOR_SETS; ++i)
  282. {
  283. if(m_computeProg->getReflectionInfo().m_descriptorSetMask.get(i))
  284. {
  285. DescriptorSet dset;
  286. Bool dirty;
  287. Array<PtrSize, MAX_BINDINGS_PER_DESCRIPTOR_SET> dynamicOffsetsPtrSize;
  288. U32 dynamicOffsetCount;
  289. if(getGrManagerImpl().getDescriptorSetFactory().newDescriptorSet(
  290. m_tid, m_alloc, m_dsetState[i], dset, dirty, dynamicOffsetsPtrSize, dynamicOffsetCount))
  291. {
  292. ANKI_VK_LOGF("Cannot recover");
  293. }
  294. if(dirty)
  295. {
  296. // Vulkan should have had the dynamic offsets as VkDeviceSize and not U32. Workaround that.
  297. Array<U32, MAX_BINDINGS_PER_DESCRIPTOR_SET> dynamicOffsets;
  298. for(U32 i = 0; i < dynamicOffsetCount; ++i)
  299. {
  300. dynamicOffsets[i] = U32(dynamicOffsetsPtrSize[i]);
  301. }
  302. VkDescriptorSet dsHandle = dset.getHandle();
  303. ANKI_CMD(vkCmdBindDescriptorSets(m_handle, VK_PIPELINE_BIND_POINT_COMPUTE,
  304. m_computeProg->getPipelineLayout().getHandle(), i, 1, &dsHandle,
  305. dynamicOffsetCount, &dynamicOffsets[0]),
  306. ANY_OTHER_COMMAND);
  307. }
  308. }
  309. }
  310. ANKI_CMD(vkCmdDispatch(m_handle, groupCountX, groupCountY, groupCountZ), ANY_OTHER_COMMAND);
  311. getGrManagerImpl().endMarker(m_handle);
  312. }
  313. inline void CommandBufferImpl::traceRaysInternal(BufferPtr& sbtBuffer, PtrSize sbtBufferOffset, U32 sbtRecordSize32,
  314. U32 hitGroupSbtRecordCount, U32 rayTypeCount, U32 width, U32 height,
  315. U32 depth)
  316. {
  317. const PtrSize sbtRecordSize = sbtRecordSize32;
  318. ANKI_ASSERT(hitGroupSbtRecordCount > 0);
  319. ANKI_ASSERT(width > 0 && height > 0 && depth > 0);
  320. ANKI_ASSERT(m_rtProg);
  321. const ShaderProgramImpl& sprog = static_cast<const ShaderProgramImpl&>(*m_rtProg);
  322. ANKI_ASSERT(sprog.getReflectionInfo().m_pushConstantsSize == m_setPushConstantsSize
  323. && "Forgot to set pushConstants");
  324. ANKI_ASSERT(rayTypeCount == sprog.getMissShaderCount() && "All the miss shaders should be in use");
  325. ANKI_ASSERT((hitGroupSbtRecordCount % rayTypeCount) == 0);
  326. const PtrSize sbtRecordCount = 1 + rayTypeCount + hitGroupSbtRecordCount;
  327. const PtrSize sbtBufferSize = sbtRecordCount * sbtRecordSize;
  328. (void)sbtBufferSize;
  329. ANKI_ASSERT(sbtBufferSize + sbtBufferOffset <= sbtBuffer->getSize());
  330. ANKI_ASSERT(isAligned(getGrManagerImpl().getDeviceCapabilities().m_sbtRecordAlignment, sbtBufferOffset));
  331. commandCommon();
  332. getGrManagerImpl().beginMarker(m_handle, m_rtProg->getName());
  333. // Bind descriptors
  334. for(U32 i = 0; i < MAX_DESCRIPTOR_SETS; ++i)
  335. {
  336. if(sprog.getReflectionInfo().m_descriptorSetMask.get(i))
  337. {
  338. DescriptorSet dset;
  339. Bool dirty;
  340. Array<PtrSize, MAX_BINDINGS_PER_DESCRIPTOR_SET> dynamicOffsetsPtrSize;
  341. U32 dynamicOffsetCount;
  342. if(getGrManagerImpl().getDescriptorSetFactory().newDescriptorSet(
  343. m_tid, m_alloc, m_dsetState[i], dset, dirty, dynamicOffsetsPtrSize, dynamicOffsetCount))
  344. {
  345. ANKI_VK_LOGF("Cannot recover");
  346. }
  347. if(dirty)
  348. {
  349. // Vulkan should have had the dynamic offsets as VkDeviceSize and not U32. Workaround that.
  350. Array<U32, MAX_BINDINGS_PER_DESCRIPTOR_SET> dynamicOffsets;
  351. for(U32 i = 0; i < dynamicOffsetCount; ++i)
  352. {
  353. dynamicOffsets[i] = U32(dynamicOffsetsPtrSize[i]);
  354. }
  355. VkDescriptorSet dsHandle = dset.getHandle();
  356. ANKI_CMD(vkCmdBindDescriptorSets(m_handle, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR,
  357. sprog.getPipelineLayout().getHandle(), i, 1, &dsHandle,
  358. dynamicOffsetCount, &dynamicOffsets[0]),
  359. ANY_OTHER_COMMAND);
  360. }
  361. }
  362. }
  363. Array<VkStridedDeviceAddressRegionKHR, 4> regions;
  364. const U64 stbBufferAddress = sbtBuffer->getGpuAddress() + sbtBufferOffset;
  365. ANKI_ASSERT(isAligned(getGrManagerImpl().getDeviceCapabilities().m_sbtRecordAlignment, stbBufferAddress));
  366. // Rgen
  367. regions[0].deviceAddress = stbBufferAddress;
  368. regions[0].stride = sbtRecordSize;
  369. regions[0].size = sbtRecordSize;
  370. // Miss
  371. regions[1].deviceAddress = regions[0].deviceAddress + regions[0].size;
  372. regions[1].stride = sbtRecordSize;
  373. regions[1].size = sbtRecordSize * rayTypeCount;
  374. // Hit
  375. regions[2].deviceAddress = regions[1].deviceAddress + regions[1].size;
  376. regions[2].stride = sbtRecordSize * rayTypeCount;
  377. regions[2].size = sbtRecordSize * hitGroupSbtRecordCount;
  378. // Callable, nothing for now
  379. regions[3] = VkStridedDeviceAddressRegionKHR();
  380. ANKI_CMD(vkCmdTraceRaysKHR(m_handle, &regions[0], &regions[1], &regions[2], &regions[3], width, height, depth),
  381. ANY_OTHER_COMMAND);
  382. getGrManagerImpl().endMarker(m_handle);
  383. }
  384. inline void CommandBufferImpl::resetOcclusionQuery(OcclusionQueryPtr query)
  385. {
  386. commandCommon();
  387. VkQueryPool handle = static_cast<const OcclusionQueryImpl&>(*query).m_handle.getQueryPool();
  388. U32 idx = static_cast<const OcclusionQueryImpl&>(*query).m_handle.getQueryIndex();
  389. ANKI_ASSERT(handle);
  390. #if ANKI_BATCH_COMMANDS
  391. flushBatches(CommandBufferCommandType::RESET_QUERY);
  392. QueryResetAtom atom;
  393. atom.m_pool = handle;
  394. atom.m_queryIdx = idx;
  395. m_queryResetAtoms.emplaceBack(m_alloc, atom);
  396. #else
  397. ANKI_CMD(vkCmdResetQueryPool(m_handle, handle, idx, 1), ANY_OTHER_COMMAND);
  398. #endif
  399. m_microCmdb->pushObjectRef(query);
  400. }
  401. inline void CommandBufferImpl::beginOcclusionQuery(OcclusionQueryPtr query)
  402. {
  403. commandCommon();
  404. const VkQueryPool handle = static_cast<const OcclusionQueryImpl&>(*query).m_handle.getQueryPool();
  405. const U32 idx = static_cast<const OcclusionQueryImpl&>(*query).m_handle.getQueryIndex();
  406. ANKI_ASSERT(handle);
  407. ANKI_CMD(vkCmdBeginQuery(m_handle, handle, idx, 0), ANY_OTHER_COMMAND);
  408. m_microCmdb->pushObjectRef(query);
  409. }
  410. inline void CommandBufferImpl::endOcclusionQuery(OcclusionQueryPtr query)
  411. {
  412. commandCommon();
  413. const VkQueryPool handle = static_cast<const OcclusionQueryImpl&>(*query).m_handle.getQueryPool();
  414. const U32 idx = static_cast<const OcclusionQueryImpl&>(*query).m_handle.getQueryIndex();
  415. ANKI_ASSERT(handle);
  416. ANKI_CMD(vkCmdEndQuery(m_handle, handle, idx), ANY_OTHER_COMMAND);
  417. m_microCmdb->pushObjectRef(query);
  418. }
  419. inline void CommandBufferImpl::resetTimestampQueryInternal(TimestampQueryPtr& query)
  420. {
  421. commandCommon();
  422. const VkQueryPool handle = static_cast<const TimestampQueryImpl&>(*query).m_handle.getQueryPool();
  423. const U32 idx = static_cast<const TimestampQueryImpl&>(*query).m_handle.getQueryIndex();
  424. ANKI_ASSERT(handle);
  425. #if ANKI_BATCH_COMMANDS
  426. flushBatches(CommandBufferCommandType::RESET_QUERY);
  427. QueryResetAtom atom;
  428. atom.m_pool = handle;
  429. atom.m_queryIdx = idx;
  430. m_queryResetAtoms.emplaceBack(m_alloc, atom);
  431. #else
  432. ANKI_CMD(vkCmdResetQueryPool(m_handle, handle, idx, 1), ANY_OTHER_COMMAND);
  433. #endif
  434. m_microCmdb->pushObjectRef(query);
  435. }
  436. inline void CommandBufferImpl::writeTimestampInternal(TimestampQueryPtr& query)
  437. {
  438. commandCommon();
  439. const VkQueryPool handle = static_cast<const TimestampQueryImpl&>(*query).m_handle.getQueryPool();
  440. const U32 idx = static_cast<const TimestampQueryImpl&>(*query).m_handle.getQueryIndex();
  441. ANKI_CMD(vkCmdWriteTimestamp(m_handle, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, handle, idx), ANY_OTHER_COMMAND);
  442. m_microCmdb->pushObjectRef(query);
  443. }
  444. inline void CommandBufferImpl::clearTextureView(TextureViewPtr texView, const ClearValue& clearValue)
  445. {
  446. commandCommon();
  447. const TextureViewImpl& view = static_cast<const TextureViewImpl&>(*texView);
  448. const TextureImpl& tex = view.getTextureImpl();
  449. VkClearColorValue vclear;
  450. static_assert(sizeof(vclear) == sizeof(clearValue), "See file");
  451. memcpy(&vclear, &clearValue, sizeof(clearValue));
  452. if(!view.getSubresource().m_depthStencilAspect)
  453. {
  454. VkImageSubresourceRange vkRange = view.getVkImageSubresourceRange();
  455. ANKI_CMD(vkCmdClearColorImage(m_handle, tex.m_imageHandle, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &vclear, 1,
  456. &vkRange),
  457. ANY_OTHER_COMMAND);
  458. }
  459. else
  460. {
  461. ANKI_ASSERT(!"TODO");
  462. }
  463. m_microCmdb->pushObjectRef(texView);
  464. }
  465. inline void CommandBufferImpl::pushSecondLevelCommandBuffer(CommandBufferPtr cmdb)
  466. {
  467. commandCommon();
  468. ANKI_ASSERT(insideRenderPass());
  469. ANKI_ASSERT(m_subpassContents == VK_SUBPASS_CONTENTS_MAX_ENUM
  470. || m_subpassContents == VK_SUBPASS_CONTENTS_SECONDARY_COMMAND_BUFFERS);
  471. ANKI_ASSERT(static_cast<const CommandBufferImpl&>(*cmdb).m_finalized);
  472. m_subpassContents = VK_SUBPASS_CONTENTS_SECONDARY_COMMAND_BUFFERS;
  473. if(ANKI_UNLIKELY(m_rpCommandCount == 0))
  474. {
  475. beginRenderPassInternal();
  476. }
  477. #if ANKI_BATCH_COMMANDS
  478. flushBatches(CommandBufferCommandType::PUSH_SECOND_LEVEL);
  479. if(m_secondLevelAtoms.getSize() <= m_secondLevelAtomCount)
  480. {
  481. m_secondLevelAtoms.resize(m_alloc, max<U32>(8, m_secondLevelAtomCount * 2));
  482. }
  483. m_secondLevelAtoms[m_secondLevelAtomCount++] = static_cast<const CommandBufferImpl&>(*cmdb).m_handle;
  484. #else
  485. ANKI_CMD(vkCmdExecuteCommands(m_handle, 1, &static_cast<const CommandBufferImpl&>(*cmdb).m_handle),
  486. ANY_OTHER_COMMAND);
  487. #endif
  488. ++m_rpCommandCount;
  489. m_microCmdb->pushObjectRef(cmdb);
  490. }
  491. inline void CommandBufferImpl::drawcallCommon()
  492. {
  493. // Preconditions
  494. commandCommon();
  495. ANKI_ASSERT(m_graphicsProg);
  496. ANKI_ASSERT(insideRenderPass() || secondLevel());
  497. ANKI_ASSERT(m_subpassContents == VK_SUBPASS_CONTENTS_MAX_ENUM || m_subpassContents == VK_SUBPASS_CONTENTS_INLINE);
  498. ANKI_ASSERT(m_graphicsProg->getReflectionInfo().m_pushConstantsSize == m_setPushConstantsSize
  499. && "Forgot to set pushConstants");
  500. m_subpassContents = VK_SUBPASS_CONTENTS_INLINE;
  501. if(ANKI_UNLIKELY(m_rpCommandCount == 0) && !secondLevel())
  502. {
  503. beginRenderPassInternal();
  504. }
  505. ++m_rpCommandCount;
  506. // Get or create ppline
  507. Pipeline ppline;
  508. Bool stateDirty;
  509. m_graphicsProg->getPipelineFactory().getOrCreatePipeline(m_state, ppline, stateDirty);
  510. if(stateDirty)
  511. {
  512. ANKI_CMD(vkCmdBindPipeline(m_handle, VK_PIPELINE_BIND_POINT_GRAPHICS, ppline.getHandle()), ANY_OTHER_COMMAND);
  513. }
  514. // Bind dsets
  515. for(U32 i = 0; i < MAX_DESCRIPTOR_SETS; ++i)
  516. {
  517. if(m_graphicsProg->getReflectionInfo().m_descriptorSetMask.get(i))
  518. {
  519. DescriptorSet dset;
  520. Bool dirty;
  521. Array<PtrSize, MAX_BINDINGS_PER_DESCRIPTOR_SET> dynamicOffsetsPtrSize;
  522. U32 dynamicOffsetCount;
  523. if(getGrManagerImpl().getDescriptorSetFactory().newDescriptorSet(
  524. m_tid, m_alloc, m_dsetState[i], dset, dirty, dynamicOffsetsPtrSize, dynamicOffsetCount))
  525. {
  526. ANKI_VK_LOGF("Cannot recover");
  527. }
  528. if(dirty)
  529. {
  530. // Vulkan should have had the dynamic offsets as VkDeviceSize and not U32. Workaround that.
  531. Array<U32, MAX_BINDINGS_PER_DESCRIPTOR_SET> dynamicOffsets;
  532. for(U32 i = 0; i < dynamicOffsetCount; ++i)
  533. {
  534. dynamicOffsets[i] = U32(dynamicOffsetsPtrSize[i]);
  535. }
  536. VkDescriptorSet dsHandle = dset.getHandle();
  537. ANKI_CMD(vkCmdBindDescriptorSets(m_handle, VK_PIPELINE_BIND_POINT_GRAPHICS,
  538. m_graphicsProg->getPipelineLayout().getHandle(), i, 1, &dsHandle,
  539. dynamicOffsetCount, &dynamicOffsets[0]),
  540. ANY_OTHER_COMMAND);
  541. }
  542. }
  543. }
  544. // Flush viewport
  545. if(ANKI_UNLIKELY(m_viewportDirty))
  546. {
  547. const Bool flipvp = flipViewport();
  548. U32 fbWidth, fbHeight;
  549. static_cast<const FramebufferImpl&>(*m_activeFb).getAttachmentsSize(fbWidth, fbHeight);
  550. VkViewport vp = computeViewport(&m_viewport[0], fbWidth, fbHeight, flipvp);
  551. // Additional optimization
  552. if(memcmp(&vp, &m_lastViewport, sizeof(vp)) != 0)
  553. {
  554. ANKI_CMD(vkCmdSetViewport(m_handle, 0, 1, &vp), ANY_OTHER_COMMAND);
  555. m_lastViewport = vp;
  556. }
  557. m_viewportDirty = false;
  558. }
  559. // Flush scissor
  560. if(ANKI_UNLIKELY(m_scissorDirty))
  561. {
  562. const Bool flipvp = flipViewport();
  563. U32 fbWidth, fbHeight;
  564. static_cast<const FramebufferImpl&>(*m_activeFb).getAttachmentsSize(fbWidth, fbHeight);
  565. VkRect2D scissor = computeScissor(&m_scissor[0], fbWidth, fbHeight, flipvp);
  566. // Additional optimization
  567. if(memcmp(&scissor, &m_lastScissor, sizeof(scissor)) != 0)
  568. {
  569. ANKI_CMD(vkCmdSetScissor(m_handle, 0, 1, &scissor), ANY_OTHER_COMMAND);
  570. m_lastScissor = scissor;
  571. }
  572. m_scissorDirty = false;
  573. }
  574. // Some checks
  575. #if ANKI_ENABLE_ASSERTIONS
  576. if(m_state.getPrimitiveTopology() == PrimitiveTopology::LINES
  577. || m_state.getPrimitiveTopology() == PrimitiveTopology::LINE_STRIP)
  578. {
  579. ANKI_ASSERT(m_lineWidthSet == true);
  580. }
  581. #endif
  582. ANKI_TRACE_INC_COUNTER(GR_DRAWCALLS, 1);
  583. }
  584. inline void CommandBufferImpl::commandCommon()
  585. {
  586. ANKI_ASSERT(!m_finalized);
  587. #if ANKI_EXTRA_CHECKS
  588. ++m_commandCount;
  589. #endif
  590. m_empty = false;
  591. if(ANKI_UNLIKELY(!m_beganRecording))
  592. {
  593. beginRecording();
  594. m_beganRecording = true;
  595. }
  596. ANKI_ASSERT(Thread::getCurrentThreadId() == m_tid
  597. && "Commands must be recorder and flushed by the thread this command buffer was created");
  598. ANKI_ASSERT(m_handle);
  599. }
  600. inline void CommandBufferImpl::flushBatches(CommandBufferCommandType type)
  601. {
  602. if(type != m_lastCmdType)
  603. {
  604. switch(m_lastCmdType)
  605. {
  606. case CommandBufferCommandType::SET_BARRIER:
  607. flushBarriers();
  608. break;
  609. case CommandBufferCommandType::RESET_QUERY:
  610. flushQueryResets();
  611. break;
  612. case CommandBufferCommandType::WRITE_QUERY_RESULT:
  613. flushWriteQueryResults();
  614. break;
  615. case CommandBufferCommandType::PUSH_SECOND_LEVEL:
  616. ANKI_ASSERT(m_secondLevelAtomCount > 0);
  617. vkCmdExecuteCommands(m_handle, m_secondLevelAtomCount, &m_secondLevelAtoms[0]);
  618. m_secondLevelAtomCount = 0;
  619. break;
  620. case CommandBufferCommandType::ANY_OTHER_COMMAND:
  621. break;
  622. default:
  623. ANKI_ASSERT(0);
  624. }
  625. m_lastCmdType = type;
  626. }
  627. }
  628. inline void CommandBufferImpl::fillBuffer(BufferPtr buff, PtrSize offset, PtrSize size, U32 value)
  629. {
  630. commandCommon();
  631. ANKI_ASSERT(!insideRenderPass());
  632. const BufferImpl& impl = static_cast<const BufferImpl&>(*buff);
  633. ANKI_ASSERT(impl.usageValid(BufferUsageBit::TRANSFER_DESTINATION));
  634. ANKI_ASSERT(offset < impl.getSize());
  635. ANKI_ASSERT((offset % 4) == 0 && "Should be multiple of 4");
  636. size = (size == MAX_PTR_SIZE) ? (impl.getActualSize() - offset) : size;
  637. alignRoundUp(4, size); // Needs to be multiple of 4
  638. ANKI_ASSERT(offset + size <= impl.getActualSize());
  639. ANKI_ASSERT((size % 4) == 0 && "Should be multiple of 4");
  640. ANKI_CMD(vkCmdFillBuffer(m_handle, impl.getHandle(), offset, size, value), ANY_OTHER_COMMAND);
  641. m_microCmdb->pushObjectRef(buff);
  642. }
  643. inline void CommandBufferImpl::writeOcclusionQueryResultToBuffer(OcclusionQueryPtr query, PtrSize offset,
  644. BufferPtr buff)
  645. {
  646. commandCommon();
  647. ANKI_ASSERT(!insideRenderPass());
  648. const BufferImpl& impl = static_cast<const BufferImpl&>(*buff);
  649. ANKI_ASSERT(impl.usageValid(BufferUsageBit::TRANSFER_DESTINATION));
  650. ANKI_ASSERT((offset % 4) == 0);
  651. ANKI_ASSERT((offset + sizeof(U32)) <= impl.getSize());
  652. const OcclusionQueryImpl& q = static_cast<const OcclusionQueryImpl&>(*query);
  653. #if ANKI_BATCH_COMMANDS
  654. flushBatches(CommandBufferCommandType::WRITE_QUERY_RESULT);
  655. WriteQueryAtom atom;
  656. atom.m_pool = q.m_handle.getQueryPool();
  657. atom.m_queryIdx = q.m_handle.getQueryIndex();
  658. atom.m_buffer = impl.getHandle();
  659. atom.m_offset = offset;
  660. m_writeQueryAtoms.emplaceBack(m_alloc, atom);
  661. #else
  662. ANKI_CMD(vkCmdCopyQueryPoolResults(m_handle, q.m_handle.m_pool, q.m_handle.m_queryIndex, 1, impl.getHandle(),
  663. offset, sizeof(U32), VK_QUERY_RESULT_PARTIAL_BIT),
  664. ANY_OTHER_COMMAND);
  665. #endif
  666. m_microCmdb->pushObjectRef(query);
  667. m_microCmdb->pushObjectRef(buff);
  668. }
  669. inline void CommandBufferImpl::bindShaderProgram(ShaderProgramPtr& prog)
  670. {
  671. commandCommon();
  672. ShaderProgramImpl& impl = static_cast<ShaderProgramImpl&>(*prog);
  673. if(impl.isGraphics())
  674. {
  675. m_graphicsProg = &impl;
  676. m_computeProg = nullptr; // Unbind the compute prog. Doesn't work like vulkan
  677. m_rtProg = nullptr; // See above
  678. m_state.bindShaderProgram(&impl);
  679. }
  680. else if(!!(impl.getStages() & ShaderTypeBit::COMPUTE))
  681. {
  682. m_computeProg = &impl;
  683. m_graphicsProg = nullptr; // See comment in the if()
  684. m_rtProg = nullptr; // See above
  685. // Bind the pipeline now
  686. ANKI_CMD(vkCmdBindPipeline(m_handle, VK_PIPELINE_BIND_POINT_COMPUTE, impl.getComputePipelineHandle()),
  687. ANY_OTHER_COMMAND);
  688. }
  689. else
  690. {
  691. ANKI_ASSERT(!!(impl.getStages() & ShaderTypeBit::ALL_RAY_TRACING));
  692. m_computeProg = nullptr;
  693. m_graphicsProg = nullptr;
  694. m_rtProg = &impl;
  695. // Bind now
  696. ANKI_CMD(
  697. vkCmdBindPipeline(m_handle, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, impl.getRayTracingPipelineHandle()),
  698. ANY_OTHER_COMMAND);
  699. }
  700. for(U32 i = 0; i < MAX_DESCRIPTOR_SETS; ++i)
  701. {
  702. if(impl.getReflectionInfo().m_descriptorSetMask.get(i))
  703. {
  704. m_dsetState[i].setLayout(impl.getDescriptorSetLayout(i));
  705. }
  706. else
  707. {
  708. // According to the spec the bound DS may be disturbed if the ppline layout is not compatible. Play it safe
  709. // and dirty the slot. That will force rebind of the DS at drawcall time.
  710. m_dsetState[i].setLayout(DescriptorSetLayout());
  711. }
  712. }
  713. m_microCmdb->pushObjectRef(prog);
  714. #if ANKI_EXTRA_CHECKS
  715. m_setPushConstantsSize = 0;
  716. #endif
  717. }
  718. inline void CommandBufferImpl::copyBufferToBuffer(BufferPtr& src, PtrSize srcOffset, BufferPtr& dst, PtrSize dstOffset,
  719. PtrSize range)
  720. {
  721. ANKI_ASSERT(static_cast<const BufferImpl&>(*src).usageValid(BufferUsageBit::TRANSFER_SOURCE));
  722. ANKI_ASSERT(static_cast<const BufferImpl&>(*dst).usageValid(BufferUsageBit::TRANSFER_DESTINATION));
  723. ANKI_ASSERT(srcOffset + range <= src->getSize());
  724. ANKI_ASSERT(dstOffset + range <= dst->getSize());
  725. commandCommon();
  726. VkBufferCopy region = {};
  727. region.srcOffset = srcOffset;
  728. region.dstOffset = dstOffset;
  729. region.size = range;
  730. ANKI_CMD(vkCmdCopyBuffer(m_handle, static_cast<const BufferImpl&>(*src).getHandle(),
  731. static_cast<const BufferImpl&>(*dst).getHandle(), 1, &region),
  732. ANY_OTHER_COMMAND);
  733. m_microCmdb->pushObjectRef(src);
  734. m_microCmdb->pushObjectRef(dst);
  735. }
  736. inline Bool CommandBufferImpl::flipViewport() const
  737. {
  738. return static_cast<const FramebufferImpl&>(*m_activeFb).hasPresentableTexture();
  739. }
  740. inline void CommandBufferImpl::setPushConstants(const void* data, U32 dataSize)
  741. {
  742. ANKI_ASSERT(data && dataSize && dataSize % 16 == 0);
  743. const ShaderProgramImpl& prog = getBoundProgram();
  744. ANKI_ASSERT(prog.getReflectionInfo().m_pushConstantsSize == dataSize
  745. && "The bound program should have push constants equal to the \"dataSize\" parameter");
  746. commandCommon();
  747. ANKI_CMD(vkCmdPushConstants(m_handle, prog.getPipelineLayout().getHandle(), VK_SHADER_STAGE_ALL, 0, dataSize, data),
  748. ANY_OTHER_COMMAND);
  749. #if ANKI_EXTRA_CHECKS
  750. m_setPushConstantsSize = dataSize;
  751. #endif
  752. }
  753. inline void CommandBufferImpl::setRasterizationOrder(RasterizationOrder order)
  754. {
  755. commandCommon();
  756. if(!!(getGrManagerImpl().getExtensions() & VulkanExtensions::AMD_RASTERIZATION_ORDER))
  757. {
  758. m_state.setRasterizationOrder(order);
  759. }
  760. }
  761. inline void CommandBufferImpl::setLineWidth(F32 width)
  762. {
  763. commandCommon();
  764. vkCmdSetLineWidth(m_handle, width);
  765. #if ANKI_ENABLE_ASSERTIONS
  766. m_lineWidthSet = true;
  767. #endif
  768. }
  769. } // end namespace anki