VkCommandBufferFactory.cpp 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228
  1. // Copyright (C) 2009-present, Panagiotis Christopoulos Charitos and contributors.
  2. // All rights reserved.
  3. // Code licensed under the BSD License.
  4. // http://www.anki3d.org/LICENSE
  5. #include <AnKi/Gr/Vulkan/VkCommandBufferFactory.h>
  6. #include <AnKi/Gr/Vulkan/VkGrManager.h>
  7. #include <AnKi/Util/Tracer.h>
  8. #include <AnKi/Core/StatsSet.h>
  9. namespace anki {
  10. ANKI_SVAR(CommandBufferCount, StatCategory::kGr, "CommandBufferCount", StatFlag::kNone)
  11. MicroCommandBuffer::~MicroCommandBuffer()
  12. {
  13. m_dsAllocator.destroy();
  14. if(m_handle)
  15. {
  16. const U32 queueFamilyIdx =
  17. (m_queue == GpuQueueType::kCompute && getGrManagerImpl().getAsyncComputeType() == AsyncComputeType::kLowPriorityQueue) ? 0 : U32(m_queue);
  18. vkFreeCommandBuffers(getVkDevice(), m_threadAlloc->m_pools[queueFamilyIdx], 1, &m_handle);
  19. m_handle = {};
  20. g_svarCommandBufferCount.decrement(1_U64);
  21. }
  22. }
  23. void MicroCommandBuffer::releaseInternal()
  24. {
  25. ANKI_TRACE_FUNCTION();
  26. m_dsAllocator.reset();
  27. m_threadAlloc->recycleCommandBuffer(this);
  28. }
  29. Error CommandBufferThreadAllocator::init()
  30. {
  31. ConstWeakArray<U32> families = getGrManagerImpl().getQueueFamilies();
  32. for(U32 i = 0; i < families.getSize(); ++i)
  33. {
  34. VkCommandPoolCreateInfo ci = {};
  35. ci.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
  36. ci.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT;
  37. ci.queueFamilyIndex = families[i];
  38. ANKI_VK_CHECK(vkCreateCommandPool(getVkDevice(), &ci, nullptr, &m_pools[i]));
  39. }
  40. return Error::kNone;
  41. }
  42. void CommandBufferThreadAllocator::destroy()
  43. {
  44. for(U32 smallBatch = 0; smallBatch < 2; ++smallBatch)
  45. {
  46. for(GpuQueueType queue : EnumIterable<GpuQueueType>())
  47. {
  48. m_recyclers[smallBatch][queue].destroy();
  49. }
  50. }
  51. for(VkCommandPool& pool : m_pools)
  52. {
  53. if(pool)
  54. {
  55. vkDestroyCommandPool(getVkDevice(), pool, nullptr);
  56. pool = VK_NULL_HANDLE;
  57. }
  58. }
  59. }
  60. Error CommandBufferThreadAllocator::newCommandBuffer(CommandBufferFlag cmdbFlags, MicroCommandBufferPtr& outPtr)
  61. {
  62. ANKI_ASSERT(!!(cmdbFlags & CommandBufferFlag::kComputeWork) ^ !!(cmdbFlags & CommandBufferFlag::kGeneralWork));
  63. const Bool smallBatch = !!(cmdbFlags & CommandBufferFlag::kSmallBatch);
  64. GpuQueueType queue;
  65. U32 queueFamilyIdx;
  66. if(!!(cmdbFlags & CommandBufferFlag::kGeneralWork) || getGrManagerImpl().getAsyncComputeType() == AsyncComputeType::kDisabled)
  67. {
  68. queue = GpuQueueType::kGeneral;
  69. queueFamilyIdx = 0;
  70. }
  71. else if(getGrManagerImpl().getAsyncComputeType() == AsyncComputeType::kLowPriorityQueue)
  72. {
  73. queue = GpuQueueType::kCompute;
  74. queueFamilyIdx = 0;
  75. }
  76. else
  77. {
  78. queue = GpuQueueType::kCompute;
  79. queueFamilyIdx = 1;
  80. }
  81. MicroObjectRecycler<MicroCommandBuffer>& recycler = m_recyclers[smallBatch][queueFamilyIdx];
  82. MicroCommandBuffer* out = recycler.findToReuse();
  83. if(out == nullptr) [[unlikely]]
  84. {
  85. // Create a new one
  86. VkCommandBufferAllocateInfo ci = {};
  87. ci.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
  88. ci.commandPool = m_pools[queueFamilyIdx];
  89. ci.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
  90. ci.commandBufferCount = 1;
  91. ANKI_TRACE_INC_COUNTER(VkCommandBufferCreate, 1);
  92. g_svarCommandBufferCount.increment(1_U64);
  93. VkCommandBuffer cmdb;
  94. ANKI_VK_CHECK(vkAllocateCommandBuffers(getVkDevice(), &ci, &cmdb));
  95. MicroCommandBuffer* newCmdb = newInstance<MicroCommandBuffer>(GrMemoryPool::getSingleton(), this);
  96. newCmdb->m_handle = cmdb;
  97. newCmdb->m_flags = cmdbFlags;
  98. newCmdb->m_queue = queue;
  99. out = newCmdb;
  100. }
  101. ANKI_ASSERT(out && out->m_refcount.load() == 0);
  102. outPtr.reset(out);
  103. return Error::kNone;
  104. }
  105. void CommandBufferThreadAllocator::recycleCommandBuffer(MicroCommandBuffer* ptr)
  106. {
  107. ANKI_ASSERT(ptr);
  108. const Bool smallBatch = !!(ptr->m_flags & CommandBufferFlag::kSmallBatch);
  109. const U32 queueFamilyIdx =
  110. (ptr->m_queue == GpuQueueType::kCompute && getGrManagerImpl().getAsyncComputeType() == AsyncComputeType::kLowPriorityQueue)
  111. ? 0
  112. : U32(ptr->m_queue);
  113. m_recyclers[smallBatch][queueFamilyIdx].recycle(ptr);
  114. }
  115. void CommandBufferFactory::destroy()
  116. {
  117. // First trim the caches for all recyclers.
  118. for(CommandBufferThreadAllocator* talloc : m_threadAllocs)
  119. {
  120. for(U32 smallBatch = 0; smallBatch < 2; ++smallBatch)
  121. {
  122. for(GpuQueueType queue : EnumIterable<GpuQueueType>())
  123. {
  124. talloc->m_recyclers[smallBatch][queue].trimCache();
  125. }
  126. }
  127. }
  128. for(CommandBufferThreadAllocator* talloc : m_threadAllocs)
  129. {
  130. talloc->destroy();
  131. deleteInstance(GrMemoryPool::getSingleton(), talloc);
  132. }
  133. m_threadAllocs.destroy();
  134. }
  135. Error CommandBufferFactory::newCommandBuffer(ThreadId tid, CommandBufferFlag cmdbFlags, MicroCommandBufferPtr& ptr)
  136. {
  137. CommandBufferThreadAllocator* alloc = nullptr;
  138. // Get the thread allocator
  139. {
  140. class Comp
  141. {
  142. public:
  143. Bool operator()(const CommandBufferThreadAllocator* a, ThreadId tid) const
  144. {
  145. return a->m_tid < tid;
  146. }
  147. Bool operator()(ThreadId tid, const CommandBufferThreadAllocator* a) const
  148. {
  149. return tid < a->m_tid;
  150. }
  151. };
  152. // Find using binary search
  153. {
  154. RLockGuard lock(m_threadAllocMtx);
  155. auto it = binarySearch(m_threadAllocs.getBegin(), m_threadAllocs.getEnd(), tid, Comp());
  156. alloc = (it != m_threadAllocs.getEnd()) ? (*it) : nullptr;
  157. }
  158. if(alloc == nullptr) [[unlikely]]
  159. {
  160. WLockGuard lock(m_threadAllocMtx);
  161. // Check again
  162. auto it = binarySearch(m_threadAllocs.getBegin(), m_threadAllocs.getEnd(), tid, Comp());
  163. alloc = (it != m_threadAllocs.getEnd()) ? (*it) : nullptr;
  164. if(alloc == nullptr)
  165. {
  166. alloc = newInstance<CommandBufferThreadAllocator>(GrMemoryPool::getSingleton(), tid);
  167. m_threadAllocs.resize(m_threadAllocs.getSize() + 1);
  168. m_threadAllocs[m_threadAllocs.getSize() - 1] = alloc;
  169. // Sort for fast find
  170. std::sort(m_threadAllocs.getBegin(), m_threadAllocs.getEnd(),
  171. [](const CommandBufferThreadAllocator* a, const CommandBufferThreadAllocator* b) {
  172. return a->m_tid < b->m_tid;
  173. });
  174. ANKI_CHECK(alloc->init());
  175. }
  176. }
  177. }
  178. ANKI_ASSERT(alloc);
  179. ANKI_ASSERT(alloc->m_tid == tid);
  180. ANKI_CHECK(alloc->newCommandBuffer(cmdbFlags, ptr));
  181. return Error::kNone;
  182. }
  183. } // end namespace anki