CommandBufferImpl.inl.h 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953
  1. // Copyright (C) 2009-2021, Panagiotis Christopoulos Charitos and contributors.
  2. // All rights reserved.
  3. // Code licensed under the BSD License.
  4. // http://www.anki3d.org/LICENSE
  5. #include <AnKi/Gr/Vulkan/CommandBufferImpl.h>
  6. #include <AnKi/Gr/Vulkan/TextureImpl.h>
  7. #include <AnKi/Gr/OcclusionQuery.h>
  8. #include <AnKi/Gr/Vulkan/OcclusionQueryImpl.h>
  9. #include <AnKi/Gr/TimestampQuery.h>
  10. #include <AnKi/Gr/Vulkan/TimestampQueryImpl.h>
  11. #include <AnKi/Util/Tracer.h>
  12. namespace anki {
  13. inline void CommandBufferImpl::setStencilCompareMask(FaceSelectionBit face, U32 mask)
  14. {
  15. commandCommon();
  16. VkStencilFaceFlags flags = 0;
  17. if(!!(face & FaceSelectionBit::FRONT) && m_stencilCompareMasks[0] != mask)
  18. {
  19. m_stencilCompareMasks[0] = mask;
  20. flags = VK_STENCIL_FACE_FRONT_BIT;
  21. }
  22. if(!!(face & FaceSelectionBit::BACK) && m_stencilCompareMasks[1] != mask)
  23. {
  24. m_stencilCompareMasks[1] = mask;
  25. flags |= VK_STENCIL_FACE_BACK_BIT;
  26. }
  27. if(flags)
  28. {
  29. ANKI_CMD(vkCmdSetStencilCompareMask(m_handle, flags, mask), ANY_OTHER_COMMAND);
  30. }
  31. }
  32. inline void CommandBufferImpl::setStencilWriteMask(FaceSelectionBit face, U32 mask)
  33. {
  34. commandCommon();
  35. VkStencilFaceFlags flags = 0;
  36. if(!!(face & FaceSelectionBit::FRONT) && m_stencilWriteMasks[0] != mask)
  37. {
  38. m_stencilWriteMasks[0] = mask;
  39. flags = VK_STENCIL_FACE_FRONT_BIT;
  40. }
  41. if(!!(face & FaceSelectionBit::BACK) && m_stencilWriteMasks[1] != mask)
  42. {
  43. m_stencilWriteMasks[1] = mask;
  44. flags |= VK_STENCIL_FACE_BACK_BIT;
  45. }
  46. if(flags)
  47. {
  48. ANKI_CMD(vkCmdSetStencilWriteMask(m_handle, flags, mask), ANY_OTHER_COMMAND);
  49. }
  50. }
  51. inline void CommandBufferImpl::setStencilReference(FaceSelectionBit face, U32 ref)
  52. {
  53. commandCommon();
  54. VkStencilFaceFlags flags = 0;
  55. if(!!(face & FaceSelectionBit::FRONT) && m_stencilReferenceMasks[0] != ref)
  56. {
  57. m_stencilReferenceMasks[0] = ref;
  58. flags = VK_STENCIL_FACE_FRONT_BIT;
  59. }
  60. if(!!(face & FaceSelectionBit::BACK) && m_stencilReferenceMasks[1] != ref)
  61. {
  62. m_stencilWriteMasks[1] = ref;
  63. flags |= VK_STENCIL_FACE_BACK_BIT;
  64. }
  65. if(flags)
  66. {
  67. ANKI_CMD(vkCmdSetStencilReference(m_handle, flags, ref), ANY_OTHER_COMMAND);
  68. }
  69. }
  70. inline void CommandBufferImpl::setImageBarrier(VkPipelineStageFlags srcStage, VkAccessFlags srcAccess,
  71. VkImageLayout prevLayout, VkPipelineStageFlags dstStage,
  72. VkAccessFlags dstAccess, VkImageLayout newLayout, VkImage img,
  73. const VkImageSubresourceRange& range)
  74. {
  75. ANKI_ASSERT(img);
  76. commandCommon();
  77. VkImageMemoryBarrier inf = {};
  78. inf.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
  79. inf.srcAccessMask = srcAccess;
  80. inf.dstAccessMask = dstAccess;
  81. inf.oldLayout = prevLayout;
  82. inf.newLayout = newLayout;
  83. inf.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
  84. inf.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
  85. inf.image = img;
  86. inf.subresourceRange = range;
  87. #if ANKI_BATCH_COMMANDS
  88. flushBatches(CommandBufferCommandType::SET_BARRIER);
  89. if(m_imgBarriers.getSize() <= m_imgBarrierCount)
  90. {
  91. m_imgBarriers.resize(m_alloc, max<U32>(2, m_imgBarrierCount * 2));
  92. }
  93. m_imgBarriers[m_imgBarrierCount++] = inf;
  94. m_srcStageMask |= srcStage;
  95. m_dstStageMask |= dstStage;
  96. #else
  97. ANKI_CMD(vkCmdPipelineBarrier(m_handle, srcStage, dstStage, 0, 0, nullptr, 0, nullptr, 1, &inf), ANY_OTHER_COMMAND);
  98. ANKI_TRACE_INC_COUNTER(VK_PIPELINE_BARRIERS, 1);
  99. #endif
  100. }
  101. inline void CommandBufferImpl::setTextureBarrierRange(TexturePtr tex, TextureUsageBit prevUsage,
  102. TextureUsageBit nextUsage, const VkImageSubresourceRange& range)
  103. {
  104. const TextureImpl& impl = static_cast<const TextureImpl&>(*tex);
  105. ANKI_ASSERT(impl.usageValid(prevUsage));
  106. ANKI_ASSERT(impl.usageValid(nextUsage));
  107. ANKI_ASSERT(((nextUsage & TextureUsageBit::GENERATE_MIPMAPS) == TextureUsageBit::GENERATE_MIPMAPS
  108. || (nextUsage & TextureUsageBit::GENERATE_MIPMAPS) == TextureUsageBit::NONE)
  109. && "GENERATE_MIPMAPS should be alone");
  110. VkPipelineStageFlags srcStage;
  111. VkAccessFlags srcAccess;
  112. VkImageLayout oldLayout;
  113. VkPipelineStageFlags dstStage;
  114. VkAccessFlags dstAccess;
  115. VkImageLayout newLayout;
  116. impl.computeBarrierInfo(prevUsage, nextUsage, range.baseMipLevel, srcStage, srcAccess, dstStage, dstAccess);
  117. oldLayout = impl.computeLayout(prevUsage, range.baseMipLevel);
  118. newLayout = impl.computeLayout(nextUsage, range.baseMipLevel);
  119. setImageBarrier(srcStage, srcAccess, oldLayout, dstStage, dstAccess, newLayout, impl.m_imageHandle, range);
  120. m_microCmdb->pushObjectRef(tex);
  121. }
  122. inline void CommandBufferImpl::setTextureBarrier(TexturePtr tex, TextureUsageBit prevUsage, TextureUsageBit nextUsage,
  123. const TextureSubresourceInfo& subresource_)
  124. {
  125. TextureSubresourceInfo subresource = subresource_;
  126. const TextureImpl& impl = static_cast<const TextureImpl&>(*tex);
  127. // The transition of the non zero mip levels happens inside CommandBufferImpl::generateMipmapsX so limit the
  128. // subresource
  129. if(nextUsage == TextureUsageBit::GENERATE_MIPMAPS)
  130. {
  131. ANKI_ASSERT(impl.isSubresourceGoodForMipmapGeneration(subresource));
  132. subresource.m_firstMipmap = 0;
  133. subresource.m_mipmapCount = 1;
  134. }
  135. ANKI_ASSERT(tex->isSubresourceValid(subresource));
  136. VkImageSubresourceRange range;
  137. impl.computeVkImageSubresourceRange(subresource, range);
  138. setTextureBarrierRange(tex, prevUsage, nextUsage, range);
  139. }
  140. inline void CommandBufferImpl::setTextureSurfaceBarrier(TexturePtr tex, TextureUsageBit prevUsage,
  141. TextureUsageBit nextUsage, const TextureSurfaceInfo& surf)
  142. {
  143. if(ANKI_UNLIKELY(surf.m_level > 0 && nextUsage == TextureUsageBit::GENERATE_MIPMAPS))
  144. {
  145. // This transition happens inside CommandBufferImpl::generateMipmapsX. No need to do something
  146. return;
  147. }
  148. const TextureImpl& impl = static_cast<const TextureImpl&>(*tex);
  149. VkImageSubresourceRange range;
  150. impl.computeVkImageSubresourceRange(TextureSubresourceInfo(surf, impl.getDepthStencilAspect()), range);
  151. setTextureBarrierRange(tex, prevUsage, nextUsage, range);
  152. }
  153. inline void CommandBufferImpl::setTextureVolumeBarrier(TexturePtr tex, TextureUsageBit prevUsage,
  154. TextureUsageBit nextUsage, const TextureVolumeInfo& vol)
  155. {
  156. if(vol.m_level > 0)
  157. {
  158. ANKI_ASSERT(!(nextUsage & TextureUsageBit::GENERATE_MIPMAPS)
  159. && "This transition happens inside CommandBufferImpl::generateMipmaps");
  160. }
  161. const TextureImpl& impl = static_cast<const TextureImpl&>(*tex);
  162. VkImageSubresourceRange range;
  163. impl.computeVkImageSubresourceRange(TextureSubresourceInfo(vol, impl.getDepthStencilAspect()), range);
  164. setTextureBarrierRange(tex, prevUsage, nextUsage, range);
  165. }
  166. inline void CommandBufferImpl::setBufferBarrier(VkPipelineStageFlags srcStage, VkAccessFlags srcAccess,
  167. VkPipelineStageFlags dstStage, VkAccessFlags dstAccess, PtrSize offset,
  168. PtrSize size, VkBuffer buff)
  169. {
  170. ANKI_ASSERT(buff);
  171. commandCommon();
  172. VkBufferMemoryBarrier b = {};
  173. b.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
  174. b.srcAccessMask = srcAccess;
  175. b.dstAccessMask = dstAccess;
  176. b.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
  177. b.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
  178. b.buffer = buff;
  179. b.offset = offset;
  180. b.size = size;
  181. #if ANKI_BATCH_COMMANDS
  182. flushBatches(CommandBufferCommandType::SET_BARRIER);
  183. if(m_buffBarriers.getSize() <= m_buffBarrierCount)
  184. {
  185. m_buffBarriers.resize(m_alloc, max<U32>(2, m_buffBarrierCount * 2));
  186. }
  187. m_buffBarriers[m_buffBarrierCount++] = b;
  188. m_srcStageMask |= srcStage;
  189. m_dstStageMask |= dstStage;
  190. #else
  191. ANKI_CMD(vkCmdPipelineBarrier(m_handle, srcStage, dstStage, 0, 0, nullptr, 1, &b, 0, nullptr), ANY_OTHER_COMMAND);
  192. ANKI_TRACE_INC_COUNTER(VK_PIPELINE_BARRIERS, 1);
  193. #endif
  194. }
  195. inline void CommandBufferImpl::setBufferBarrier(BufferPtr& buff, BufferUsageBit before, BufferUsageBit after,
  196. PtrSize offset, PtrSize size)
  197. {
  198. const BufferImpl& impl = static_cast<const BufferImpl&>(*buff);
  199. VkPipelineStageFlags srcStage;
  200. VkAccessFlags srcAccess;
  201. VkPipelineStageFlags dstStage;
  202. VkAccessFlags dstAccess;
  203. impl.computeBarrierInfo(before, after, srcStage, srcAccess, dstStage, dstAccess);
  204. setBufferBarrier(srcStage, srcAccess, dstStage, dstAccess, offset, size, impl.getHandle());
  205. m_microCmdb->pushObjectRef(buff);
  206. }
  207. inline void CommandBufferImpl::setAccelerationStructureBarrierInternal(AccelerationStructurePtr& as,
  208. AccelerationStructureUsageBit prevUsage,
  209. AccelerationStructureUsageBit nextUsage)
  210. {
  211. commandCommon();
  212. VkPipelineStageFlags srcStage;
  213. VkAccessFlags srcAccess;
  214. VkPipelineStageFlags dstStage;
  215. VkAccessFlags dstAccess;
  216. AccelerationStructureImpl::computeBarrierInfo(prevUsage, nextUsage, srcStage, srcAccess, dstStage, dstAccess);
  217. #if ANKI_BATCH_COMMANDS
  218. flushBatches(CommandBufferCommandType::SET_BARRIER);
  219. VkMemoryBarrier memBarrier = {};
  220. memBarrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
  221. memBarrier.srcAccessMask = srcAccess;
  222. memBarrier.dstAccessMask = dstAccess;
  223. if(m_memBarriers.getSize() <= m_memBarrierCount)
  224. {
  225. m_memBarriers.resize(m_alloc, max<U32>(2, m_memBarrierCount * 2));
  226. }
  227. m_memBarriers[m_memBarrierCount++] = memBarrier;
  228. m_srcStageMask |= srcStage;
  229. m_dstStageMask |= dstStage;
  230. #else
  231. ANKI_ASSERT(!"TODO");
  232. #endif
  233. }
  234. inline void CommandBufferImpl::drawArrays(PrimitiveTopology topology, U32 count, U32 instanceCount, U32 first,
  235. U32 baseInstance)
  236. {
  237. m_state.setPrimitiveTopology(topology);
  238. drawcallCommon();
  239. ANKI_CMD(vkCmdDraw(m_handle, count, instanceCount, first, baseInstance), ANY_OTHER_COMMAND);
  240. }
  241. inline void CommandBufferImpl::drawElements(PrimitiveTopology topology, U32 count, U32 instanceCount, U32 firstIndex,
  242. U32 baseVertex, U32 baseInstance)
  243. {
  244. m_state.setPrimitiveTopology(topology);
  245. drawcallCommon();
  246. ANKI_CMD(vkCmdDrawIndexed(m_handle, count, instanceCount, firstIndex, baseVertex, baseInstance), ANY_OTHER_COMMAND);
  247. }
  248. inline void CommandBufferImpl::drawArraysIndirect(PrimitiveTopology topology, U32 drawCount, PtrSize offset,
  249. BufferPtr& buff)
  250. {
  251. m_state.setPrimitiveTopology(topology);
  252. drawcallCommon();
  253. const BufferImpl& impl = static_cast<const BufferImpl&>(*buff);
  254. ANKI_ASSERT(impl.usageValid(BufferUsageBit::INDIRECT_DRAW));
  255. ANKI_ASSERT((offset % 4) == 0);
  256. ANKI_ASSERT((offset + sizeof(DrawArraysIndirectInfo) * drawCount) <= impl.getSize());
  257. ANKI_CMD(vkCmdDrawIndirect(m_handle, impl.getHandle(), offset, drawCount, sizeof(DrawArraysIndirectInfo)),
  258. ANY_OTHER_COMMAND);
  259. }
  260. inline void CommandBufferImpl::drawElementsIndirect(PrimitiveTopology topology, U32 drawCount, PtrSize offset,
  261. BufferPtr& buff)
  262. {
  263. m_state.setPrimitiveTopology(topology);
  264. drawcallCommon();
  265. const BufferImpl& impl = static_cast<const BufferImpl&>(*buff);
  266. ANKI_ASSERT(impl.usageValid(BufferUsageBit::INDIRECT_DRAW));
  267. ANKI_ASSERT((offset % 4) == 0);
  268. ANKI_ASSERT((offset + sizeof(DrawElementsIndirectInfo) * drawCount) <= impl.getSize());
  269. ANKI_CMD(vkCmdDrawIndexedIndirect(m_handle, impl.getHandle(), offset, drawCount, sizeof(DrawElementsIndirectInfo)),
  270. ANY_OTHER_COMMAND);
  271. }
  272. inline void CommandBufferImpl::dispatchCompute(U32 groupCountX, U32 groupCountY, U32 groupCountZ)
  273. {
  274. ANKI_ASSERT(m_computeProg);
  275. ANKI_ASSERT(m_computeProg->getReflectionInfo().m_pushConstantsSize == m_setPushConstantsSize
  276. && "Forgot to set pushConstants");
  277. commandCommon();
  278. getGrManagerImpl().beginMarker(m_handle, m_computeProg->getName());
  279. // Bind descriptors
  280. for(U32 i = 0; i < MAX_DESCRIPTOR_SETS; ++i)
  281. {
  282. if(m_computeProg->getReflectionInfo().m_descriptorSetMask.get(i))
  283. {
  284. DescriptorSet dset;
  285. Bool dirty;
  286. Array<PtrSize, MAX_BINDINGS_PER_DESCRIPTOR_SET> dynamicOffsetsPtrSize;
  287. U32 dynamicOffsetCount;
  288. if(getGrManagerImpl().getDescriptorSetFactory().newDescriptorSet(
  289. m_tid, m_alloc, m_dsetState[i], dset, dirty, dynamicOffsetsPtrSize, dynamicOffsetCount))
  290. {
  291. ANKI_VK_LOGF("Cannot recover");
  292. }
  293. if(dirty)
  294. {
  295. // Vulkan should have had the dynamic offsets as VkDeviceSize and not U32. Workaround that.
  296. Array<U32, MAX_BINDINGS_PER_DESCRIPTOR_SET> dynamicOffsets;
  297. for(U32 i = 0; i < dynamicOffsetCount; ++i)
  298. {
  299. dynamicOffsets[i] = U32(dynamicOffsetsPtrSize[i]);
  300. }
  301. VkDescriptorSet dsHandle = dset.getHandle();
  302. ANKI_CMD(vkCmdBindDescriptorSets(m_handle, VK_PIPELINE_BIND_POINT_COMPUTE,
  303. m_computeProg->getPipelineLayout().getHandle(), i, 1, &dsHandle,
  304. dynamicOffsetCount, &dynamicOffsets[0]),
  305. ANY_OTHER_COMMAND);
  306. }
  307. }
  308. }
  309. ANKI_CMD(vkCmdDispatch(m_handle, groupCountX, groupCountY, groupCountZ), ANY_OTHER_COMMAND);
  310. getGrManagerImpl().endMarker(m_handle);
  311. }
  312. inline void CommandBufferImpl::traceRaysInternal(BufferPtr& sbtBuffer, PtrSize sbtBufferOffset, U32 sbtRecordSize32,
  313. U32 hitGroupSbtRecordCount, U32 rayTypeCount, U32 width, U32 height,
  314. U32 depth)
  315. {
  316. const PtrSize sbtRecordSize = sbtRecordSize32;
  317. ANKI_ASSERT(hitGroupSbtRecordCount > 0);
  318. ANKI_ASSERT(width > 0 && height > 0 && depth > 0);
  319. ANKI_ASSERT(m_rtProg);
  320. const ShaderProgramImpl& sprog = static_cast<const ShaderProgramImpl&>(*m_rtProg);
  321. ANKI_ASSERT(sprog.getReflectionInfo().m_pushConstantsSize == m_setPushConstantsSize
  322. && "Forgot to set pushConstants");
  323. ANKI_ASSERT(rayTypeCount == sprog.getMissShaderCount() && "All the miss shaders should be in use");
  324. ANKI_ASSERT((hitGroupSbtRecordCount % rayTypeCount) == 0);
  325. const PtrSize sbtRecordCount = 1 + rayTypeCount + hitGroupSbtRecordCount;
  326. const PtrSize sbtBufferSize = sbtRecordCount * sbtRecordSize;
  327. (void)sbtBufferSize;
  328. ANKI_ASSERT(sbtBufferSize + sbtBufferOffset <= sbtBuffer->getSize());
  329. ANKI_ASSERT(isAligned(getGrManagerImpl().getDeviceCapabilities().m_sbtRecordAlignment, sbtBufferOffset));
  330. commandCommon();
  331. getGrManagerImpl().beginMarker(m_handle, m_rtProg->getName());
  332. // Bind descriptors
  333. for(U32 i = 0; i < MAX_DESCRIPTOR_SETS; ++i)
  334. {
  335. if(sprog.getReflectionInfo().m_descriptorSetMask.get(i))
  336. {
  337. DescriptorSet dset;
  338. Bool dirty;
  339. Array<PtrSize, MAX_BINDINGS_PER_DESCRIPTOR_SET> dynamicOffsetsPtrSize;
  340. U32 dynamicOffsetCount;
  341. if(getGrManagerImpl().getDescriptorSetFactory().newDescriptorSet(
  342. m_tid, m_alloc, m_dsetState[i], dset, dirty, dynamicOffsetsPtrSize, dynamicOffsetCount))
  343. {
  344. ANKI_VK_LOGF("Cannot recover");
  345. }
  346. if(dirty)
  347. {
  348. // Vulkan should have had the dynamic offsets as VkDeviceSize and not U32. Workaround that.
  349. Array<U32, MAX_BINDINGS_PER_DESCRIPTOR_SET> dynamicOffsets;
  350. for(U32 i = 0; i < dynamicOffsetCount; ++i)
  351. {
  352. dynamicOffsets[i] = U32(dynamicOffsetsPtrSize[i]);
  353. }
  354. VkDescriptorSet dsHandle = dset.getHandle();
  355. ANKI_CMD(vkCmdBindDescriptorSets(m_handle, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR,
  356. sprog.getPipelineLayout().getHandle(), i, 1, &dsHandle,
  357. dynamicOffsetCount, &dynamicOffsets[0]),
  358. ANY_OTHER_COMMAND);
  359. }
  360. }
  361. }
  362. Array<VkStridedDeviceAddressRegionKHR, 4> regions;
  363. const U64 stbBufferAddress = sbtBuffer->getGpuAddress() + sbtBufferOffset;
  364. ANKI_ASSERT(isAligned(getGrManagerImpl().getDeviceCapabilities().m_sbtRecordAlignment, stbBufferAddress));
  365. // Rgen
  366. regions[0].deviceAddress = stbBufferAddress;
  367. regions[0].stride = sbtRecordSize;
  368. regions[0].size = sbtRecordSize;
  369. // Miss
  370. regions[1].deviceAddress = regions[0].deviceAddress + regions[0].size;
  371. regions[1].stride = sbtRecordSize;
  372. regions[1].size = sbtRecordSize * rayTypeCount;
  373. // Hit
  374. regions[2].deviceAddress = regions[1].deviceAddress + regions[1].size;
  375. regions[2].stride = sbtRecordSize * rayTypeCount;
  376. regions[2].size = sbtRecordSize * hitGroupSbtRecordCount;
  377. // Callable, nothing for now
  378. regions[3] = VkStridedDeviceAddressRegionKHR();
  379. ANKI_CMD(vkCmdTraceRaysKHR(m_handle, &regions[0], &regions[1], &regions[2], &regions[3], width, height, depth),
  380. ANY_OTHER_COMMAND);
  381. getGrManagerImpl().endMarker(m_handle);
  382. }
  383. inline void CommandBufferImpl::resetOcclusionQuery(OcclusionQueryPtr query)
  384. {
  385. commandCommon();
  386. VkQueryPool handle = static_cast<const OcclusionQueryImpl&>(*query).m_handle.getQueryPool();
  387. U32 idx = static_cast<const OcclusionQueryImpl&>(*query).m_handle.getQueryIndex();
  388. ANKI_ASSERT(handle);
  389. #if ANKI_BATCH_COMMANDS
  390. flushBatches(CommandBufferCommandType::RESET_QUERY);
  391. QueryResetAtom atom;
  392. atom.m_pool = handle;
  393. atom.m_queryIdx = idx;
  394. m_queryResetAtoms.emplaceBack(m_alloc, atom);
  395. #else
  396. ANKI_CMD(vkCmdResetQueryPool(m_handle, handle, idx, 1), ANY_OTHER_COMMAND);
  397. #endif
  398. m_microCmdb->pushObjectRef(query);
  399. }
  400. inline void CommandBufferImpl::beginOcclusionQuery(OcclusionQueryPtr query)
  401. {
  402. commandCommon();
  403. const VkQueryPool handle = static_cast<const OcclusionQueryImpl&>(*query).m_handle.getQueryPool();
  404. const U32 idx = static_cast<const OcclusionQueryImpl&>(*query).m_handle.getQueryIndex();
  405. ANKI_ASSERT(handle);
  406. ANKI_CMD(vkCmdBeginQuery(m_handle, handle, idx, 0), ANY_OTHER_COMMAND);
  407. m_microCmdb->pushObjectRef(query);
  408. }
  409. inline void CommandBufferImpl::endOcclusionQuery(OcclusionQueryPtr query)
  410. {
  411. commandCommon();
  412. const VkQueryPool handle = static_cast<const OcclusionQueryImpl&>(*query).m_handle.getQueryPool();
  413. const U32 idx = static_cast<const OcclusionQueryImpl&>(*query).m_handle.getQueryIndex();
  414. ANKI_ASSERT(handle);
  415. ANKI_CMD(vkCmdEndQuery(m_handle, handle, idx), ANY_OTHER_COMMAND);
  416. m_microCmdb->pushObjectRef(query);
  417. }
  418. inline void CommandBufferImpl::resetTimestampQueryInternal(TimestampQueryPtr& query)
  419. {
  420. commandCommon();
  421. const VkQueryPool handle = static_cast<const TimestampQueryImpl&>(*query).m_handle.getQueryPool();
  422. const U32 idx = static_cast<const TimestampQueryImpl&>(*query).m_handle.getQueryIndex();
  423. ANKI_ASSERT(handle);
  424. #if ANKI_BATCH_COMMANDS
  425. flushBatches(CommandBufferCommandType::RESET_QUERY);
  426. QueryResetAtom atom;
  427. atom.m_pool = handle;
  428. atom.m_queryIdx = idx;
  429. m_queryResetAtoms.emplaceBack(m_alloc, atom);
  430. #else
  431. ANKI_CMD(vkCmdResetQueryPool(m_handle, handle, idx, 1), ANY_OTHER_COMMAND);
  432. #endif
  433. m_microCmdb->pushObjectRef(query);
  434. }
  435. inline void CommandBufferImpl::writeTimestampInternal(TimestampQueryPtr& query)
  436. {
  437. commandCommon();
  438. const VkQueryPool handle = static_cast<const TimestampQueryImpl&>(*query).m_handle.getQueryPool();
  439. const U32 idx = static_cast<const TimestampQueryImpl&>(*query).m_handle.getQueryIndex();
  440. ANKI_CMD(vkCmdWriteTimestamp(m_handle, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, handle, idx), ANY_OTHER_COMMAND);
  441. m_microCmdb->pushObjectRef(query);
  442. }
  443. inline void CommandBufferImpl::clearTextureView(TextureViewPtr texView, const ClearValue& clearValue)
  444. {
  445. commandCommon();
  446. const TextureViewImpl& view = static_cast<const TextureViewImpl&>(*texView);
  447. const TextureImpl& tex = view.getTextureImpl();
  448. VkClearColorValue vclear;
  449. static_assert(sizeof(vclear) == sizeof(clearValue), "See file");
  450. memcpy(&vclear, &clearValue, sizeof(clearValue));
  451. if(!view.getSubresource().m_depthStencilAspect)
  452. {
  453. VkImageSubresourceRange vkRange = view.getVkImageSubresourceRange();
  454. ANKI_CMD(vkCmdClearColorImage(m_handle, tex.m_imageHandle, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &vclear, 1,
  455. &vkRange),
  456. ANY_OTHER_COMMAND);
  457. }
  458. else
  459. {
  460. ANKI_ASSERT(!"TODO");
  461. }
  462. m_microCmdb->pushObjectRef(texView);
  463. }
  464. inline void CommandBufferImpl::pushSecondLevelCommandBuffer(CommandBufferPtr cmdb)
  465. {
  466. commandCommon();
  467. ANKI_ASSERT(insideRenderPass());
  468. ANKI_ASSERT(m_subpassContents == VK_SUBPASS_CONTENTS_MAX_ENUM
  469. || m_subpassContents == VK_SUBPASS_CONTENTS_SECONDARY_COMMAND_BUFFERS);
  470. ANKI_ASSERT(static_cast<const CommandBufferImpl&>(*cmdb).m_finalized);
  471. m_subpassContents = VK_SUBPASS_CONTENTS_SECONDARY_COMMAND_BUFFERS;
  472. if(ANKI_UNLIKELY(m_rpCommandCount == 0))
  473. {
  474. beginRenderPassInternal();
  475. }
  476. #if ANKI_BATCH_COMMANDS
  477. flushBatches(CommandBufferCommandType::PUSH_SECOND_LEVEL);
  478. if(m_secondLevelAtoms.getSize() <= m_secondLevelAtomCount)
  479. {
  480. m_secondLevelAtoms.resize(m_alloc, max<U32>(8, m_secondLevelAtomCount * 2));
  481. }
  482. m_secondLevelAtoms[m_secondLevelAtomCount++] = static_cast<const CommandBufferImpl&>(*cmdb).m_handle;
  483. #else
  484. ANKI_CMD(vkCmdExecuteCommands(m_handle, 1, &static_cast<const CommandBufferImpl&>(*cmdb).m_handle),
  485. ANY_OTHER_COMMAND);
  486. #endif
  487. ++m_rpCommandCount;
  488. m_microCmdb->pushObjectRef(cmdb);
  489. }
  490. inline void CommandBufferImpl::drawcallCommon()
  491. {
  492. // Preconditions
  493. commandCommon();
  494. ANKI_ASSERT(m_graphicsProg);
  495. ANKI_ASSERT(insideRenderPass() || secondLevel());
  496. ANKI_ASSERT(m_subpassContents == VK_SUBPASS_CONTENTS_MAX_ENUM || m_subpassContents == VK_SUBPASS_CONTENTS_INLINE);
  497. ANKI_ASSERT(m_graphicsProg->getReflectionInfo().m_pushConstantsSize == m_setPushConstantsSize
  498. && "Forgot to set pushConstants");
  499. m_subpassContents = VK_SUBPASS_CONTENTS_INLINE;
  500. if(ANKI_UNLIKELY(m_rpCommandCount == 0) && !secondLevel())
  501. {
  502. beginRenderPassInternal();
  503. }
  504. ++m_rpCommandCount;
  505. // Get or create ppline
  506. Pipeline ppline;
  507. Bool stateDirty;
  508. m_graphicsProg->getPipelineFactory().getOrCreatePipeline(m_state, ppline, stateDirty);
  509. if(stateDirty)
  510. {
  511. ANKI_CMD(vkCmdBindPipeline(m_handle, VK_PIPELINE_BIND_POINT_GRAPHICS, ppline.getHandle()), ANY_OTHER_COMMAND);
  512. }
  513. // Bind dsets
  514. for(U32 i = 0; i < MAX_DESCRIPTOR_SETS; ++i)
  515. {
  516. if(m_graphicsProg->getReflectionInfo().m_descriptorSetMask.get(i))
  517. {
  518. DescriptorSet dset;
  519. Bool dirty;
  520. Array<PtrSize, MAX_BINDINGS_PER_DESCRIPTOR_SET> dynamicOffsetsPtrSize;
  521. U32 dynamicOffsetCount;
  522. if(getGrManagerImpl().getDescriptorSetFactory().newDescriptorSet(
  523. m_tid, m_alloc, m_dsetState[i], dset, dirty, dynamicOffsetsPtrSize, dynamicOffsetCount))
  524. {
  525. ANKI_VK_LOGF("Cannot recover");
  526. }
  527. if(dirty)
  528. {
  529. // Vulkan should have had the dynamic offsets as VkDeviceSize and not U32. Workaround that.
  530. Array<U32, MAX_BINDINGS_PER_DESCRIPTOR_SET> dynamicOffsets;
  531. for(U32 i = 0; i < dynamicOffsetCount; ++i)
  532. {
  533. dynamicOffsets[i] = U32(dynamicOffsetsPtrSize[i]);
  534. }
  535. VkDescriptorSet dsHandle = dset.getHandle();
  536. ANKI_CMD(vkCmdBindDescriptorSets(m_handle, VK_PIPELINE_BIND_POINT_GRAPHICS,
  537. m_graphicsProg->getPipelineLayout().getHandle(), i, 1, &dsHandle,
  538. dynamicOffsetCount, &dynamicOffsets[0]),
  539. ANY_OTHER_COMMAND);
  540. }
  541. }
  542. }
  543. // Flush viewport
  544. if(ANKI_UNLIKELY(m_viewportDirty))
  545. {
  546. const Bool flipvp = flipViewport();
  547. U32 fbWidth, fbHeight;
  548. static_cast<const FramebufferImpl&>(*m_activeFb).getAttachmentsSize(fbWidth, fbHeight);
  549. VkViewport vp = computeViewport(&m_viewport[0], fbWidth, fbHeight, flipvp);
  550. // Additional optimization
  551. if(memcmp(&vp, &m_lastViewport, sizeof(vp)) != 0)
  552. {
  553. ANKI_CMD(vkCmdSetViewport(m_handle, 0, 1, &vp), ANY_OTHER_COMMAND);
  554. m_lastViewport = vp;
  555. }
  556. m_viewportDirty = false;
  557. }
  558. // Flush scissor
  559. if(ANKI_UNLIKELY(m_scissorDirty))
  560. {
  561. const Bool flipvp = flipViewport();
  562. U32 fbWidth, fbHeight;
  563. static_cast<const FramebufferImpl&>(*m_activeFb).getAttachmentsSize(fbWidth, fbHeight);
  564. VkRect2D scissor = computeScissor(&m_scissor[0], fbWidth, fbHeight, flipvp);
  565. // Additional optimization
  566. if(memcmp(&scissor, &m_lastScissor, sizeof(scissor)) != 0)
  567. {
  568. ANKI_CMD(vkCmdSetScissor(m_handle, 0, 1, &scissor), ANY_OTHER_COMMAND);
  569. m_lastScissor = scissor;
  570. }
  571. m_scissorDirty = false;
  572. }
  573. // Some checks
  574. #if ANKI_ENABLE_ASSERTIONS
  575. if(m_state.getPrimitiveTopology() == PrimitiveTopology::LINES
  576. || m_state.getPrimitiveTopology() == PrimitiveTopology::LINE_STRIP)
  577. {
  578. ANKI_ASSERT(m_lineWidthSet == true);
  579. }
  580. #endif
  581. ANKI_TRACE_INC_COUNTER(GR_DRAWCALLS, 1);
  582. }
  583. inline void CommandBufferImpl::commandCommon()
  584. {
  585. ANKI_ASSERT(!m_finalized);
  586. #if ANKI_EXTRA_CHECKS
  587. ++m_commandCount;
  588. #endif
  589. m_empty = false;
  590. if(ANKI_UNLIKELY(!m_beganRecording))
  591. {
  592. beginRecording();
  593. m_beganRecording = true;
  594. }
  595. ANKI_ASSERT(Thread::getCurrentThreadId() == m_tid
  596. && "Commands must be recorder and flushed by the thread this command buffer was created");
  597. ANKI_ASSERT(m_handle);
  598. }
  599. inline void CommandBufferImpl::flushBatches(CommandBufferCommandType type)
  600. {
  601. if(type != m_lastCmdType)
  602. {
  603. switch(m_lastCmdType)
  604. {
  605. case CommandBufferCommandType::SET_BARRIER:
  606. flushBarriers();
  607. break;
  608. case CommandBufferCommandType::RESET_QUERY:
  609. flushQueryResets();
  610. break;
  611. case CommandBufferCommandType::WRITE_QUERY_RESULT:
  612. flushWriteQueryResults();
  613. break;
  614. case CommandBufferCommandType::PUSH_SECOND_LEVEL:
  615. ANKI_ASSERT(m_secondLevelAtomCount > 0);
  616. vkCmdExecuteCommands(m_handle, m_secondLevelAtomCount, &m_secondLevelAtoms[0]);
  617. m_secondLevelAtomCount = 0;
  618. break;
  619. case CommandBufferCommandType::ANY_OTHER_COMMAND:
  620. break;
  621. default:
  622. ANKI_ASSERT(0);
  623. }
  624. m_lastCmdType = type;
  625. }
  626. }
  627. inline void CommandBufferImpl::fillBuffer(BufferPtr buff, PtrSize offset, PtrSize size, U32 value)
  628. {
  629. commandCommon();
  630. ANKI_ASSERT(!insideRenderPass());
  631. const BufferImpl& impl = static_cast<const BufferImpl&>(*buff);
  632. ANKI_ASSERT(impl.usageValid(BufferUsageBit::TRANSFER_DESTINATION));
  633. ANKI_ASSERT(offset < impl.getSize());
  634. ANKI_ASSERT((offset % 4) == 0 && "Should be multiple of 4");
  635. size = (size == MAX_PTR_SIZE) ? (impl.getActualSize() - offset) : size;
  636. alignRoundUp(4, size); // Needs to be multiple of 4
  637. ANKI_ASSERT(offset + size <= impl.getActualSize());
  638. ANKI_ASSERT((size % 4) == 0 && "Should be multiple of 4");
  639. ANKI_CMD(vkCmdFillBuffer(m_handle, impl.getHandle(), offset, size, value), ANY_OTHER_COMMAND);
  640. m_microCmdb->pushObjectRef(buff);
  641. }
  642. inline void CommandBufferImpl::writeOcclusionQueryResultToBuffer(OcclusionQueryPtr query, PtrSize offset,
  643. BufferPtr buff)
  644. {
  645. commandCommon();
  646. ANKI_ASSERT(!insideRenderPass());
  647. const BufferImpl& impl = static_cast<const BufferImpl&>(*buff);
  648. ANKI_ASSERT(impl.usageValid(BufferUsageBit::TRANSFER_DESTINATION));
  649. ANKI_ASSERT((offset % 4) == 0);
  650. ANKI_ASSERT((offset + sizeof(U32)) <= impl.getSize());
  651. const OcclusionQueryImpl& q = static_cast<const OcclusionQueryImpl&>(*query);
  652. #if ANKI_BATCH_COMMANDS
  653. flushBatches(CommandBufferCommandType::WRITE_QUERY_RESULT);
  654. WriteQueryAtom atom;
  655. atom.m_pool = q.m_handle.getQueryPool();
  656. atom.m_queryIdx = q.m_handle.getQueryIndex();
  657. atom.m_buffer = impl.getHandle();
  658. atom.m_offset = offset;
  659. m_writeQueryAtoms.emplaceBack(m_alloc, atom);
  660. #else
  661. ANKI_CMD(vkCmdCopyQueryPoolResults(m_handle, q.m_handle.m_pool, q.m_handle.m_queryIndex, 1, impl.getHandle(),
  662. offset, sizeof(U32), VK_QUERY_RESULT_PARTIAL_BIT),
  663. ANY_OTHER_COMMAND);
  664. #endif
  665. m_microCmdb->pushObjectRef(query);
  666. m_microCmdb->pushObjectRef(buff);
  667. }
  668. inline void CommandBufferImpl::bindShaderProgram(ShaderProgramPtr& prog)
  669. {
  670. commandCommon();
  671. ShaderProgramImpl& impl = static_cast<ShaderProgramImpl&>(*prog);
  672. if(impl.isGraphics())
  673. {
  674. m_graphicsProg = &impl;
  675. m_computeProg = nullptr; // Unbind the compute prog. Doesn't work like vulkan
  676. m_rtProg = nullptr; // See above
  677. m_state.bindShaderProgram(&impl);
  678. }
  679. else if(!!(impl.getStages() & ShaderTypeBit::COMPUTE))
  680. {
  681. m_computeProg = &impl;
  682. m_graphicsProg = nullptr; // See comment in the if()
  683. m_rtProg = nullptr; // See above
  684. // Bind the pipeline now
  685. ANKI_CMD(vkCmdBindPipeline(m_handle, VK_PIPELINE_BIND_POINT_COMPUTE, impl.getComputePipelineHandle()),
  686. ANY_OTHER_COMMAND);
  687. }
  688. else
  689. {
  690. ANKI_ASSERT(!!(impl.getStages() & ShaderTypeBit::ALL_RAY_TRACING));
  691. m_computeProg = nullptr;
  692. m_graphicsProg = nullptr;
  693. m_rtProg = &impl;
  694. // Bind now
  695. ANKI_CMD(
  696. vkCmdBindPipeline(m_handle, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, impl.getRayTracingPipelineHandle()),
  697. ANY_OTHER_COMMAND);
  698. }
  699. for(U32 i = 0; i < MAX_DESCRIPTOR_SETS; ++i)
  700. {
  701. if(impl.getReflectionInfo().m_descriptorSetMask.get(i))
  702. {
  703. m_dsetState[i].setLayout(impl.getDescriptorSetLayout(i));
  704. }
  705. else
  706. {
  707. // According to the spec the bound DS may be disturbed if the ppline layout is not compatible. Play it safe
  708. // and dirty the slot. That will force rebind of the DS at drawcall time.
  709. m_dsetState[i].setLayout(DescriptorSetLayout());
  710. }
  711. }
  712. m_microCmdb->pushObjectRef(prog);
  713. #if ANKI_EXTRA_CHECKS
  714. m_setPushConstantsSize = 0;
  715. #endif
  716. }
  717. inline void CommandBufferImpl::copyBufferToBuffer(BufferPtr& src, PtrSize srcOffset, BufferPtr& dst, PtrSize dstOffset,
  718. PtrSize range)
  719. {
  720. ANKI_ASSERT(static_cast<const BufferImpl&>(*src).usageValid(BufferUsageBit::TRANSFER_SOURCE));
  721. ANKI_ASSERT(static_cast<const BufferImpl&>(*dst).usageValid(BufferUsageBit::TRANSFER_DESTINATION));
  722. ANKI_ASSERT(srcOffset + range <= src->getSize());
  723. ANKI_ASSERT(dstOffset + range <= dst->getSize());
  724. commandCommon();
  725. VkBufferCopy region = {};
  726. region.srcOffset = srcOffset;
  727. region.dstOffset = dstOffset;
  728. region.size = range;
  729. ANKI_CMD(vkCmdCopyBuffer(m_handle, static_cast<const BufferImpl&>(*src).getHandle(),
  730. static_cast<const BufferImpl&>(*dst).getHandle(), 1, &region),
  731. ANY_OTHER_COMMAND);
  732. m_microCmdb->pushObjectRef(src);
  733. m_microCmdb->pushObjectRef(dst);
  734. }
  735. inline Bool CommandBufferImpl::flipViewport() const
  736. {
  737. return static_cast<const FramebufferImpl&>(*m_activeFb).hasPresentableTexture();
  738. }
  739. inline void CommandBufferImpl::setPushConstants(const void* data, U32 dataSize)
  740. {
  741. ANKI_ASSERT(data && dataSize && dataSize % 16 == 0);
  742. const ShaderProgramImpl& prog = getBoundProgram();
  743. ANKI_ASSERT(prog.getReflectionInfo().m_pushConstantsSize == dataSize
  744. && "The bound program should have push constants equal to the \"dataSize\" parameter");
  745. commandCommon();
  746. ANKI_CMD(vkCmdPushConstants(m_handle, prog.getPipelineLayout().getHandle(), VK_SHADER_STAGE_ALL, 0, dataSize, data),
  747. ANY_OTHER_COMMAND);
  748. #if ANKI_EXTRA_CHECKS
  749. m_setPushConstantsSize = dataSize;
  750. #endif
  751. }
  752. inline void CommandBufferImpl::setRasterizationOrder(RasterizationOrder order)
  753. {
  754. commandCommon();
  755. if(!!(getGrManagerImpl().getExtensions() & VulkanExtensions::AMD_RASTERIZATION_ORDER))
  756. {
  757. m_state.setRasterizationOrder(order);
  758. }
  759. }
  760. inline void CommandBufferImpl::setLineWidth(F32 width)
  761. {
  762. commandCommon();
  763. vkCmdSetLineWidth(m_handle, width);
  764. #if ANKI_ENABLE_ASSERTIONS
  765. m_lineWidthSet = true;
  766. #endif
  767. }
  768. } // end namespace anki