VkBuffer.cpp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504
  1. // Copyright (C) 2009-present, Panagiotis Christopoulos Charitos and contributors.
  2. // All rights reserved.
  3. // Code licensed under the BSD License.
  4. // http://www.anki3d.org/LICENSE
  5. #include <AnKi/Gr/Vulkan/VkBuffer.h>
  6. #include <AnKi/Gr/Vulkan/VkGrManager.h>
  7. namespace anki {
  8. Buffer* Buffer::newInstance(const BufferInitInfo& init)
  9. {
  10. BufferImpl* impl = anki::newInstance<BufferImpl>(GrMemoryPool::getSingleton(), init.getName());
  11. const Error err = impl->init(init);
  12. if(err)
  13. {
  14. deleteInstance(GrMemoryPool::getSingleton(), impl);
  15. impl = nullptr;
  16. }
  17. return impl;
  18. }
  19. void* Buffer::map(PtrSize offset, PtrSize range, [[maybe_unused]] BufferMapAccessBit access)
  20. {
  21. ANKI_VK_SELF(BufferImpl);
  22. ANKI_ASSERT(self.isCreated());
  23. ANKI_ASSERT(access != BufferMapAccessBit::kNone);
  24. ANKI_ASSERT((access & m_access) != BufferMapAccessBit::kNone);
  25. ANKI_ASSERT(!self.m_mapped);
  26. ANKI_ASSERT(offset < m_size);
  27. if(range == kMaxPtrSize)
  28. {
  29. range = m_size - offset;
  30. }
  31. ANKI_ASSERT(offset + range <= m_size);
  32. void* ptr = GpuMemoryManager::getSingleton().getMappedAddress(self.m_memHandle);
  33. ANKI_ASSERT(ptr);
  34. #if ANKI_ASSERTIONS_ENABLED
  35. self.m_mapped = true;
  36. #endif
  37. return static_cast<void*>(static_cast<U8*>(ptr) + offset);
  38. }
  39. void Buffer::unmap()
  40. {
  41. #if ANKI_ASSERTIONS_ENABLED
  42. ANKI_VK_SELF(BufferImpl);
  43. ANKI_ASSERT(self.isCreated());
  44. ANKI_ASSERT(self.m_mapped);
  45. self.m_mapped = false;
  46. #endif
  47. }
  48. void Buffer::flush(PtrSize offset, PtrSize range) const
  49. {
  50. ANKI_VK_SELF_CONST(BufferImpl);
  51. ANKI_ASSERT(!!(m_access & BufferMapAccessBit::kWrite) && "No need to flush when the CPU doesn't write");
  52. if(self.m_needsFlush)
  53. {
  54. VkMappedMemoryRange vkrange = self.setVkMappedMemoryRange(offset, range);
  55. ANKI_VK_CHECKF(vkFlushMappedMemoryRanges(getVkDevice(), 1, &vkrange));
  56. #if ANKI_ASSERTIONS_ENABLED
  57. self.m_flushCount.fetchAdd(1);
  58. #endif
  59. }
  60. }
  61. void Buffer::invalidate(PtrSize offset, PtrSize range) const
  62. {
  63. ANKI_VK_SELF_CONST(BufferImpl);
  64. ANKI_ASSERT(!!(m_access & BufferMapAccessBit::kRead) && "No need to invalidate when the CPU doesn't read");
  65. if(self.m_needsInvalidate)
  66. {
  67. VkMappedMemoryRange vkrange = self.setVkMappedMemoryRange(offset, range);
  68. ANKI_VK_CHECKF(vkInvalidateMappedMemoryRanges(getVkDevice(), 1, &vkrange));
  69. #if ANKI_ASSERTIONS_ENABLED
  70. self.m_invalidateCount.fetchAdd(1);
  71. #endif
  72. }
  73. }
  74. BufferImpl::~BufferImpl()
  75. {
  76. ANKI_ASSERT(!m_mapped);
  77. #if ANKI_ASSERTIONS_ENABLED
  78. if(m_needsFlush && m_flushCount.load() == 0)
  79. {
  80. ANKI_VK_LOGW("Buffer needed flushing but you never flushed: %s", getName().cstr());
  81. }
  82. if(m_needsInvalidate && m_invalidateCount.load() == 0)
  83. {
  84. ANKI_VK_LOGW("Buffer needed invalidation but you never invalidated: %s", getName().cstr());
  85. }
  86. #endif
  87. for(VkBufferView view : m_views)
  88. {
  89. vkDestroyBufferView(getVkDevice(), view, nullptr);
  90. }
  91. if(m_handle)
  92. {
  93. vkDestroyBuffer(getVkDevice(), m_handle, nullptr);
  94. }
  95. if(m_memHandle)
  96. {
  97. GpuMemoryManager::getSingleton().freeMemory(m_memHandle);
  98. }
  99. }
  100. Error BufferImpl::init(const BufferInitInfo& inf)
  101. {
  102. ANKI_ASSERT(!isCreated());
  103. const Bool exposeGpuAddress = !!(inf.m_usage & ~BufferUsageBit::kAllCopy);
  104. PtrSize size = inf.m_size;
  105. BufferMapAccessBit access = inf.m_mapAccess;
  106. BufferUsageBit usage = inf.m_usage;
  107. ANKI_ASSERT(size > 0);
  108. ANKI_ASSERT(usage != BufferUsageBit::kNone);
  109. m_mappedMemoryRangeAlignment = getGrManagerImpl().getVulkanCapabilities().m_nonCoherentAtomSize;
  110. // Align the size to satisfy fill buffer
  111. alignRoundUp(4, size);
  112. // Align to satisfy the flush and invalidate
  113. alignRoundUp(m_mappedMemoryRangeAlignment, size);
  114. // Create the buffer
  115. VkBufferCreateInfo ci = {};
  116. ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
  117. ci.size = size;
  118. ci.usage = convertBufferUsageBit(usage);
  119. if(exposeGpuAddress)
  120. {
  121. ci.usage |= VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT;
  122. }
  123. ci.queueFamilyIndexCount = getGrManagerImpl().getQueueFamilies().getSize();
  124. ci.pQueueFamilyIndices = &getGrManagerImpl().getQueueFamilies()[0];
  125. ci.sharingMode = (ci.queueFamilyIndexCount > 1) ? VK_SHARING_MODE_CONCURRENT : VK_SHARING_MODE_EXCLUSIVE;
  126. ANKI_VK_CHECK(vkCreateBuffer(getVkDevice(), &ci, nullptr, &m_handle));
  127. getGrManagerImpl().trySetVulkanHandleName(inf.getName(), VK_OBJECT_TYPE_BUFFER, m_handle);
  128. // Get mem requirements
  129. VkMemoryRequirements req;
  130. vkGetBufferMemoryRequirements(getVkDevice(), m_handle, &req);
  131. U32 memIdx = kMaxU32;
  132. const Bool isDiscreteGpu = getGrManagerImpl().getDeviceCapabilities().m_discreteGpu;
  133. if(access == BufferMapAccessBit::kWrite)
  134. {
  135. // Only write, probably for uploads
  136. // 1st try: Device & host & coherent but not cached
  137. VkMemoryPropertyFlags prefer = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
  138. VkMemoryPropertyFlags avoid = VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
  139. if(isDiscreteGpu)
  140. {
  141. if((usage & (~BufferUsageBit::kAllCopy)) != BufferUsageBit::kNone)
  142. {
  143. // Will be used for something other than transfer, try to put it in the device
  144. prefer |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
  145. }
  146. else
  147. {
  148. // Will be used only for transfers, don't want it in the device
  149. avoid |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
  150. }
  151. }
  152. memIdx = GpuMemoryManager::getSingleton().findMemoryType(req.memoryTypeBits, prefer, avoid);
  153. // 2nd try: host & coherent
  154. if(memIdx == kMaxU32)
  155. {
  156. prefer = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
  157. avoid = 0;
  158. if(isDiscreteGpu)
  159. {
  160. ANKI_VK_LOGW("Using a fallback mode for write-only buffer");
  161. if((usage & (~BufferUsageBit::kAllCopy)) == BufferUsageBit::kNone)
  162. {
  163. // Will be used only for transfers, don't want it in the device
  164. avoid |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
  165. }
  166. }
  167. memIdx = GpuMemoryManager::getSingleton().findMemoryType(req.memoryTypeBits, prefer, avoid);
  168. }
  169. }
  170. else if(!!(access & BufferMapAccessBit::kRead))
  171. {
  172. // Read or read/write
  173. // Cached & coherent
  174. memIdx = GpuMemoryManager::getSingleton().findMemoryType(
  175. req.memoryTypeBits, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, 0);
  176. // Fallback: Just cached
  177. if(memIdx == kMaxU32)
  178. {
  179. if(isDiscreteGpu)
  180. {
  181. ANKI_VK_LOGW("Using a fallback mode for read/write buffer");
  182. }
  183. memIdx = GpuMemoryManager::getSingleton().findMemoryType(req.memoryTypeBits,
  184. VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT, 0);
  185. }
  186. }
  187. else
  188. {
  189. // Not mapped
  190. ANKI_ASSERT(access == BufferMapAccessBit::kNone);
  191. // Device only
  192. memIdx = GpuMemoryManager::getSingleton().findMemoryType(req.memoryTypeBits, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
  193. VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
  194. // Fallback: Device with anything else
  195. if(memIdx == kMaxU32)
  196. {
  197. memIdx = GpuMemoryManager::getSingleton().findMemoryType(req.memoryTypeBits, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, 0);
  198. }
  199. }
  200. if(memIdx == kMaxU32)
  201. {
  202. ANKI_VK_LOGE("Failed to find appropriate memory type for buffer: %s", getName().cstr());
  203. return Error::kFunctionFailed;
  204. }
  205. const VkPhysicalDeviceMemoryProperties& props = getGrManagerImpl().getMemoryProperties();
  206. m_memoryFlags = props.memoryTypes[memIdx].propertyFlags;
  207. if(!!(access & BufferMapAccessBit::kRead) && !(m_memoryFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT))
  208. {
  209. m_needsInvalidate = true;
  210. }
  211. if(!!(access & BufferMapAccessBit::kWrite) && !(m_memoryFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT))
  212. {
  213. m_needsFlush = true;
  214. }
  215. // Allocate
  216. const U32 alignment = U32(max(m_mappedMemoryRangeAlignment, req.alignment));
  217. GpuMemoryManager::getSingleton().allocateMemory(memIdx, req.size, alignment, m_memHandle);
  218. // Bind mem to buffer
  219. {
  220. ANKI_TRACE_SCOPED_EVENT(VkBindObject);
  221. ANKI_VK_CHECK(vkBindBufferMemory(getVkDevice(), m_handle, m_memHandle.m_memory, m_memHandle.m_offset));
  222. }
  223. // Get GPU buffer address
  224. if(exposeGpuAddress)
  225. {
  226. VkBufferDeviceAddressInfo info = {};
  227. info.sType = VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO;
  228. info.buffer = m_handle;
  229. m_gpuAddress = vkGetBufferDeviceAddress(getVkDevice(), &info);
  230. if(m_gpuAddress == 0)
  231. {
  232. ANKI_VK_LOGE("vkGetBufferDeviceAddress() failed");
  233. return Error::kFunctionFailed;
  234. }
  235. }
  236. m_access = access;
  237. m_size = inf.m_size;
  238. m_actualSize = size;
  239. m_usage = usage;
  240. return Error::kNone;
  241. }
  242. VkPipelineStageFlags BufferImpl::computePplineStage(BufferUsageBit usage)
  243. {
  244. const Bool rt = getGrManagerImpl().getDeviceCapabilities().m_rayTracingEnabled;
  245. VkPipelineStageFlags stageMask = 0;
  246. if(!!(usage & BufferUsageBit::kAllIndirect))
  247. {
  248. stageMask |= VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT;
  249. }
  250. if(!!(usage & (BufferUsageBit::kVertexOrIndex)))
  251. {
  252. stageMask |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT;
  253. }
  254. if(!!(usage & BufferUsageBit::kAllGeometry))
  255. {
  256. stageMask |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT
  257. | VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT | VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT;
  258. if(getGrManagerImpl().getDeviceCapabilities().m_meshShaders)
  259. {
  260. stageMask |= VK_PIPELINE_STAGE_MESH_SHADER_BIT_EXT | VK_PIPELINE_STAGE_TASK_SHADER_BIT_EXT;
  261. }
  262. }
  263. if(!!(usage & BufferUsageBit::kAllPixel))
  264. {
  265. stageMask |= VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
  266. }
  267. if(!!(usage & (BufferUsageBit::kAllCompute & ~BufferUsageBit::kIndirectCompute)))
  268. {
  269. stageMask |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
  270. }
  271. if(!!(usage & (BufferUsageBit::kAccelerationStructureBuild | BufferUsageBit::kAccelerationStructureBuildScratch)) && rt)
  272. {
  273. stageMask |= VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR;
  274. }
  275. if(!!(usage & (BufferUsageBit::kAllDispatchRays & ~BufferUsageBit::kIndirectDispatchRays)) && rt)
  276. {
  277. stageMask |= VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR;
  278. }
  279. if(!!(usage & BufferUsageBit::kAllCopy))
  280. {
  281. stageMask |= VK_PIPELINE_STAGE_TRANSFER_BIT;
  282. }
  283. if(!stageMask)
  284. {
  285. stageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
  286. }
  287. ANKI_ASSERT(stageMask);
  288. return stageMask;
  289. }
  290. VkAccessFlags BufferImpl::computeAccessMask(BufferUsageBit usage)
  291. {
  292. VkAccessFlags mask = 0;
  293. constexpr BufferUsageBit kShaderRead = BufferUsageBit::kAllShaderResource & BufferUsageBit::kAllRead;
  294. constexpr BufferUsageBit kShaderWrite = BufferUsageBit::kAllShaderResource & BufferUsageBit::kAllWrite;
  295. if(!!(usage & BufferUsageBit::kAllConstant))
  296. {
  297. mask |= VK_ACCESS_UNIFORM_READ_BIT;
  298. }
  299. if(!!(usage & kShaderRead))
  300. {
  301. mask |= VK_ACCESS_SHADER_READ_BIT;
  302. }
  303. if(!!(usage & kShaderWrite))
  304. {
  305. mask |= VK_ACCESS_SHADER_WRITE_BIT;
  306. }
  307. if(!!(usage & BufferUsageBit::kVertexOrIndex))
  308. {
  309. mask |= VK_ACCESS_INDEX_READ_BIT | VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT;
  310. }
  311. if(!!(usage & BufferUsageBit::kAllIndirect))
  312. {
  313. mask |= VK_ACCESS_INDIRECT_COMMAND_READ_BIT;
  314. }
  315. if(!!(usage & BufferUsageBit::kCopyDestination))
  316. {
  317. mask |= VK_ACCESS_TRANSFER_WRITE_BIT;
  318. }
  319. if(!!(usage & BufferUsageBit::kCopySource))
  320. {
  321. mask |= VK_ACCESS_TRANSFER_READ_BIT;
  322. }
  323. if(!!(usage & BufferUsageBit::kAccelerationStructureBuild))
  324. {
  325. mask |= VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR;
  326. }
  327. if(!!(usage & BufferUsageBit::kAccelerationStructureBuildScratch))
  328. {
  329. mask |= VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR;
  330. }
  331. return mask;
  332. }
  333. VkBufferMemoryBarrier BufferImpl::computeBarrierInfo(BufferUsageBit before, BufferUsageBit after, VkPipelineStageFlags& srcStages,
  334. VkPipelineStageFlags& dstStages) const
  335. {
  336. ANKI_ASSERT(usageValid(before) && usageValid(after));
  337. ANKI_ASSERT(!!after);
  338. VkBufferMemoryBarrier barrier = {};
  339. barrier.buffer = m_handle;
  340. barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
  341. barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
  342. barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
  343. barrier.srcAccessMask = computeAccessMask(before);
  344. barrier.dstAccessMask = computeAccessMask(after);
  345. barrier.offset = 0;
  346. barrier.size = VK_WHOLE_SIZE; // All size because none cares really
  347. srcStages |= computePplineStage(before);
  348. dstStages |= computePplineStage(after);
  349. return barrier;
  350. }
  351. VkBufferView BufferImpl::getOrCreateBufferView(Format fmt, PtrSize offset, PtrSize range) const
  352. {
  353. if(range == kMaxPtrSize)
  354. {
  355. ANKI_ASSERT(m_size >= offset);
  356. range = m_size - offset;
  357. range = getAlignedRoundDown(getFormatInfo(fmt).m_texelSize, range);
  358. }
  359. // Checks
  360. ANKI_ASSERT(offset + range <= m_size);
  361. ANKI_ASSERT(isAligned(getGrManagerImpl().getDeviceCapabilities().m_texelBufferBindOffsetAlignment, offset) && "Offset not aligned");
  362. ANKI_ASSERT((range % getFormatInfo(fmt).m_texelSize) == 0 && "Range doesn't align with the number of texel elements");
  363. [[maybe_unused]] const PtrSize elementCount = range / getFormatInfo(fmt).m_texelSize;
  364. ANKI_ASSERT(elementCount <= getGrManagerImpl().getVulkanCapabilities().m_maxTexelBufferElements);
  365. // Hash
  366. ANKI_BEGIN_PACKED_STRUCT
  367. class HashData
  368. {
  369. public:
  370. PtrSize m_offset;
  371. PtrSize m_range;
  372. Format m_fmt;
  373. } toHash;
  374. ANKI_END_PACKED_STRUCT
  375. toHash.m_fmt = fmt;
  376. toHash.m_offset = offset;
  377. toHash.m_range = range;
  378. const U64 hash = computeHash(&toHash, sizeof(toHash));
  379. // Check if exists
  380. {
  381. RLockGuard lock(m_viewsMtx);
  382. auto it = m_views.find(hash);
  383. if(it != m_views.getEnd())
  384. {
  385. return *it;
  386. }
  387. }
  388. WLockGuard lock(m_viewsMtx);
  389. // Check again
  390. auto it = m_views.find(hash);
  391. if(it != m_views.getEnd())
  392. {
  393. return *it;
  394. }
  395. // Doesn't exist, need to create it
  396. VkBufferViewCreateInfo viewCreateInfo = {};
  397. viewCreateInfo.sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO;
  398. viewCreateInfo.buffer = m_handle;
  399. viewCreateInfo.format = convertFormat(fmt);
  400. viewCreateInfo.offset = offset;
  401. viewCreateInfo.range = range;
  402. VkBufferView view;
  403. ANKI_VK_CHECKF(vkCreateBufferView(getVkDevice(), &viewCreateInfo, nullptr, &view));
  404. m_views.emplace(hash, view);
  405. return view;
  406. }
  407. } // end namespace anki