BufferImpl.cpp 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365
  1. // Copyright (C) 2009-2022, Panagiotis Christopoulos Charitos and contributors.
  2. // All rights reserved.
  3. // Code licensed under the BSD License.
  4. // http://www.anki3d.org/LICENSE
  5. #include <AnKi/Gr/Vulkan/BufferImpl.h>
  6. #include <AnKi/Gr/Vulkan/GrManagerImpl.h>
  7. namespace anki {
  8. BufferImpl::~BufferImpl()
  9. {
  10. ANKI_ASSERT(!m_mapped);
  11. BufferGarbage* garbage = getAllocator().newInstance<BufferGarbage>();
  12. garbage->m_bufferHandle = m_handle;
  13. garbage->m_memoryHandle = m_memHandle;
  14. getGrManagerImpl().getFrameGarbageCollector().newBufferGarbage(garbage);
  15. #if ANKI_EXTRA_CHECKS
  16. if(m_needsFlush && m_flushCount.load() == 0)
  17. {
  18. ANKI_VK_LOGW("Buffer needed flushing but you never flushed: %s", getName().cstr());
  19. }
  20. if(m_needsInvalidate && m_invalidateCount.load() == 0)
  21. {
  22. ANKI_VK_LOGW("Buffer needed invalidation but you never invalidated: %s", getName().cstr());
  23. }
  24. #endif
  25. }
  26. Error BufferImpl::init(const BufferInitInfo& inf)
  27. {
  28. ANKI_ASSERT(!isCreated());
  29. const Bool exposeGpuAddress = !!(getGrManagerImpl().getExtensions() & VulkanExtensions::KHR_BUFFER_DEVICE_ADDRESS)
  30. && !!(inf.m_usage & ~BufferUsageBit::ALL_TRANSFER);
  31. PtrSize size = inf.m_size;
  32. BufferMapAccessBit access = inf.m_mapAccess;
  33. BufferUsageBit usage = inf.m_usage;
  34. ANKI_ASSERT(size > 0);
  35. ANKI_ASSERT(usage != BufferUsageBit::NONE);
  36. m_mappedMemoryRangeAlignment = getGrManagerImpl().getPhysicalDeviceProperties().limits.nonCoherentAtomSize;
  37. // Align the size to satisfy fill buffer
  38. alignRoundUp(4, size);
  39. // Align to satisfy the flush and invalidate
  40. alignRoundUp(m_mappedMemoryRangeAlignment, size);
  41. // Create the buffer
  42. VkBufferCreateInfo ci = {};
  43. ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
  44. ci.size = size;
  45. ci.usage = convertBufferUsageBit(usage);
  46. if(exposeGpuAddress)
  47. {
  48. ci.usage |= VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT_KHR;
  49. }
  50. ci.queueFamilyIndexCount = getGrManagerImpl().getQueueFamilies().getSize();
  51. ci.pQueueFamilyIndices = &getGrManagerImpl().getQueueFamilies()[0];
  52. ci.sharingMode = (ci.queueFamilyIndexCount > 1) ? VK_SHARING_MODE_CONCURRENT : VK_SHARING_MODE_EXCLUSIVE;
  53. ANKI_VK_CHECK(vkCreateBuffer(getDevice(), &ci, nullptr, &m_handle));
  54. getGrManagerImpl().trySetVulkanHandleName(inf.getName(), VK_DEBUG_REPORT_OBJECT_TYPE_BUFFER_EXT, m_handle);
  55. // Get mem requirements
  56. VkMemoryRequirements req;
  57. vkGetBufferMemoryRequirements(getDevice(), m_handle, &req);
  58. U32 memIdx = MAX_U32;
  59. const Bool isDiscreteGpu = getGrManagerImpl().getDeviceCapabilities().m_discreteGpu;
  60. if(access == BufferMapAccessBit::WRITE)
  61. {
  62. // Only write, probably for uploads
  63. // 1st try: Device & host & coherent but not cached
  64. VkMemoryPropertyFlags prefer = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
  65. VkMemoryPropertyFlags avoid = VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
  66. if(isDiscreteGpu)
  67. {
  68. if((usage & (~BufferUsageBit::ALL_TRANSFER)) != BufferUsageBit::NONE)
  69. {
  70. // Will be used for something other than transfer, try to put it in the device
  71. prefer |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
  72. }
  73. else
  74. {
  75. // Will be used only for transfers, don't want it in the device
  76. avoid |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
  77. }
  78. }
  79. memIdx = getGrManagerImpl().getGpuMemoryManager().findMemoryType(req.memoryTypeBits, prefer, avoid);
  80. // 2nd try: host & coherent
  81. if(memIdx == MAX_U32)
  82. {
  83. prefer = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
  84. avoid = 0;
  85. if(isDiscreteGpu)
  86. {
  87. ANKI_VK_LOGW("Using a fallback mode for write-only buffer");
  88. if((usage & (~BufferUsageBit::ALL_TRANSFER)) == BufferUsageBit::NONE)
  89. {
  90. // Will be used only for transfers, don't want it in the device
  91. avoid |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
  92. }
  93. }
  94. memIdx = getGrManagerImpl().getGpuMemoryManager().findMemoryType(req.memoryTypeBits, prefer, avoid);
  95. }
  96. }
  97. else if(!!(access & BufferMapAccessBit::READ))
  98. {
  99. // Read or read/write
  100. // Cached & coherent
  101. memIdx = getGrManagerImpl().getGpuMemoryManager().findMemoryType(req.memoryTypeBits,
  102. VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT
  103. | VK_MEMORY_PROPERTY_HOST_CACHED_BIT
  104. | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
  105. 0);
  106. // Fallback: Just cached
  107. if(memIdx == MAX_U32)
  108. {
  109. if(isDiscreteGpu)
  110. {
  111. ANKI_VK_LOGW("Using a fallback mode for read/write buffer");
  112. }
  113. memIdx = getGrManagerImpl().getGpuMemoryManager().findMemoryType(
  114. req.memoryTypeBits, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT, 0);
  115. }
  116. }
  117. else
  118. {
  119. // Not mapped
  120. ANKI_ASSERT(access == BufferMapAccessBit::NONE);
  121. // Device only
  122. memIdx = getGrManagerImpl().getGpuMemoryManager().findMemoryType(
  123. req.memoryTypeBits, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
  124. // Fallback: Device with anything else
  125. if(memIdx == MAX_U32)
  126. {
  127. memIdx = getGrManagerImpl().getGpuMemoryManager().findMemoryType(req.memoryTypeBits,
  128. VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, 0);
  129. }
  130. }
  131. if(memIdx == MAX_U32)
  132. {
  133. ANKI_VK_LOGE("Failed to find appropriate memory type for buffer: %s", getName().cstr());
  134. return Error::FUNCTION_FAILED;
  135. }
  136. const VkPhysicalDeviceMemoryProperties& props = getGrManagerImpl().getMemoryProperties();
  137. m_memoryFlags = props.memoryTypes[memIdx].propertyFlags;
  138. if(!!(access & BufferMapAccessBit::READ) && !(m_memoryFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT))
  139. {
  140. m_needsInvalidate = true;
  141. }
  142. if(!!(access & BufferMapAccessBit::WRITE) && !(m_memoryFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT))
  143. {
  144. m_needsFlush = true;
  145. }
  146. // Allocate
  147. const U32 alignment = U32(max(m_mappedMemoryRangeAlignment, req.alignment));
  148. getGrManagerImpl().getGpuMemoryManager().allocateMemory(memIdx, req.size, alignment, true, m_memHandle);
  149. // Bind mem to buffer
  150. {
  151. ANKI_TRACE_SCOPED_EVENT(VK_BIND_OBJECT);
  152. ANKI_VK_CHECK(vkBindBufferMemory(getDevice(), m_handle, m_memHandle.m_memory, m_memHandle.m_offset));
  153. }
  154. // Get GPU buffer address
  155. if(exposeGpuAddress)
  156. {
  157. VkBufferDeviceAddressInfoKHR info = {};
  158. info.sType = VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO_KHR;
  159. info.buffer = m_handle;
  160. m_gpuAddress = vkGetBufferDeviceAddressKHR(getDevice(), &info);
  161. if(m_gpuAddress == 0)
  162. {
  163. ANKI_VK_LOGE("vkGetBufferDeviceAddressKHR() failed");
  164. return Error::FUNCTION_FAILED;
  165. }
  166. }
  167. m_access = access;
  168. m_size = inf.m_size;
  169. m_actualSize = size;
  170. m_usage = usage;
  171. return Error::NONE;
  172. }
  173. void* BufferImpl::map(PtrSize offset, PtrSize range, BufferMapAccessBit access)
  174. {
  175. ANKI_ASSERT(isCreated());
  176. ANKI_ASSERT(access != BufferMapAccessBit::NONE);
  177. ANKI_ASSERT((access & m_access) != BufferMapAccessBit::NONE);
  178. ANKI_ASSERT(!m_mapped);
  179. ANKI_ASSERT(offset < m_size);
  180. if(range == MAX_PTR_SIZE)
  181. {
  182. range = m_size - offset;
  183. }
  184. ANKI_ASSERT(offset + range <= m_size);
  185. void* ptr = getGrManagerImpl().getGpuMemoryManager().getMappedAddress(m_memHandle);
  186. ANKI_ASSERT(ptr);
  187. #if ANKI_EXTRA_CHECKS
  188. m_mapped = true;
  189. #endif
  190. return static_cast<void*>(static_cast<U8*>(ptr) + offset);
  191. }
  192. VkPipelineStageFlags BufferImpl::computePplineStage(BufferUsageBit usage)
  193. {
  194. VkPipelineStageFlags stageMask = 0;
  195. if(!!(usage & BufferUsageBit::ALL_INDIRECT))
  196. {
  197. stageMask |= VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT;
  198. }
  199. if(!!(usage & (BufferUsageBit::INDEX | BufferUsageBit::VERTEX)))
  200. {
  201. stageMask |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT;
  202. }
  203. if(!!(usage & BufferUsageBit::ALL_GEOMETRY))
  204. {
  205. stageMask |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT
  206. | VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT | VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT;
  207. }
  208. if(!!(usage & BufferUsageBit::ALL_FRAGMENT))
  209. {
  210. stageMask |= VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
  211. }
  212. if(!!(usage & (BufferUsageBit::ALL_COMPUTE & ~BufferUsageBit::INDIRECT_COMPUTE)))
  213. {
  214. stageMask |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
  215. }
  216. if(!!(usage & BufferUsageBit::ACCELERATION_STRUCTURE_BUILD))
  217. {
  218. stageMask |= VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR;
  219. }
  220. if(!!(usage & (BufferUsageBit::ALL_TRACE_RAYS & ~BufferUsageBit::INDIRECT_TRACE_RAYS)))
  221. {
  222. stageMask |= VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR;
  223. }
  224. if(!!(usage & BufferUsageBit::ALL_TRANSFER))
  225. {
  226. stageMask |= VK_PIPELINE_STAGE_TRANSFER_BIT;
  227. }
  228. if(!stageMask)
  229. {
  230. stageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
  231. }
  232. ANKI_ASSERT(stageMask);
  233. return stageMask;
  234. }
  235. VkAccessFlags BufferImpl::computeAccessMask(BufferUsageBit usage)
  236. {
  237. VkAccessFlags mask = 0;
  238. constexpr BufferUsageBit SHADER_READ =
  239. BufferUsageBit::STORAGE_GEOMETRY_READ | BufferUsageBit::STORAGE_FRAGMENT_READ
  240. | BufferUsageBit::STORAGE_COMPUTE_READ | BufferUsageBit::STORAGE_TRACE_RAYS_READ
  241. | BufferUsageBit::TEXTURE_GEOMETRY_READ | BufferUsageBit::TEXTURE_FRAGMENT_READ
  242. | BufferUsageBit::TEXTURE_COMPUTE_READ | BufferUsageBit::TEXTURE_TRACE_RAYS_READ;
  243. constexpr BufferUsageBit SHADER_WRITE =
  244. BufferUsageBit::STORAGE_GEOMETRY_WRITE | BufferUsageBit::STORAGE_FRAGMENT_WRITE
  245. | BufferUsageBit::STORAGE_COMPUTE_WRITE | BufferUsageBit::STORAGE_TRACE_RAYS_WRITE
  246. | BufferUsageBit::TEXTURE_GEOMETRY_WRITE | BufferUsageBit::TEXTURE_FRAGMENT_WRITE
  247. | BufferUsageBit::TEXTURE_COMPUTE_WRITE | BufferUsageBit::TEXTURE_TRACE_RAYS_WRITE;
  248. if(!!(usage & BufferUsageBit::ALL_UNIFORM))
  249. {
  250. mask |= VK_ACCESS_UNIFORM_READ_BIT;
  251. }
  252. if(!!(usage & SHADER_READ))
  253. {
  254. mask |= VK_ACCESS_SHADER_READ_BIT;
  255. }
  256. if(!!(usage & SHADER_WRITE))
  257. {
  258. mask |= VK_ACCESS_SHADER_WRITE_BIT;
  259. }
  260. if(!!(usage & BufferUsageBit::INDEX))
  261. {
  262. mask |= VK_ACCESS_INDEX_READ_BIT;
  263. }
  264. if(!!(usage & BufferUsageBit::VERTEX))
  265. {
  266. mask |= VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT;
  267. }
  268. if(!!(usage & BufferUsageBit::ALL_INDIRECT))
  269. {
  270. mask |= VK_ACCESS_INDIRECT_COMMAND_READ_BIT;
  271. }
  272. if(!!(usage & BufferUsageBit::TRANSFER_DESTINATION))
  273. {
  274. mask |= VK_ACCESS_TRANSFER_WRITE_BIT;
  275. }
  276. if(!!(usage & BufferUsageBit::TRANSFER_SOURCE))
  277. {
  278. mask |= VK_ACCESS_TRANSFER_READ_BIT;
  279. }
  280. if(!!(usage & BufferUsageBit::ACCELERATION_STRUCTURE_BUILD))
  281. {
  282. mask |= VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR;
  283. }
  284. return mask;
  285. }
  286. void BufferImpl::computeBarrierInfo(BufferUsageBit before, BufferUsageBit after, VkPipelineStageFlags& srcStages,
  287. VkAccessFlags& srcAccesses, VkPipelineStageFlags& dstStages,
  288. VkAccessFlags& dstAccesses) const
  289. {
  290. ANKI_ASSERT(usageValid(before) && usageValid(after));
  291. ANKI_ASSERT(!!after);
  292. srcStages = computePplineStage(before);
  293. dstStages = computePplineStage(after);
  294. srcAccesses = computeAccessMask(before);
  295. dstAccesses = computeAccessMask(after);
  296. }
  297. } // end namespace anki