2
0

BsVulkanHardwareBuffer.cpp 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472
  1. //********************************** Banshee Engine (www.banshee3d.com) **************************************************//
  2. //**************** Copyright (c) 2016 Marko Pintera ([email protected]). All rights reserved. **********************//
  3. #include "BsVulkanHardwareBuffer.h"
  4. #include "BsVulkanRenderAPI.h"
  5. #include "BsVulkanDevice.h"
  6. #include "BsVulkanUtility.h"
  7. #include "BsVulkanCommandBufferManager.h"
  8. #include "BsVulkanCommandBuffer.h"
  9. namespace BansheeEngine
  10. {
  11. VulkanBuffer::VulkanBuffer(VulkanResourceManager* owner, VkBuffer buffer, VkBufferView view, VkDeviceMemory memory)
  12. :VulkanResource(owner, false), mBuffer(buffer), mView(view), mMemory(memory)
  13. {
  14. }
  15. VulkanBuffer::~VulkanBuffer()
  16. {
  17. VulkanDevice& device = mOwner->getDevice();
  18. if (mView != VK_NULL_HANDLE)
  19. vkDestroyBufferView(device.getLogical(), mView, gVulkanAllocator);
  20. vkDestroyBuffer(device.getLogical(), mBuffer, gVulkanAllocator);
  21. device.freeMemory(mMemory);
  22. }
  23. UINT8* VulkanBuffer::map(VkDeviceSize offset, VkDeviceSize length) const
  24. {
  25. VulkanDevice& device = mOwner->getDevice();
  26. UINT8* data;
  27. VkResult result = vkMapMemory(device.getLogical(), mMemory, offset, length, 0, (void**)&data);
  28. assert(result == VK_SUCCESS);
  29. return data;
  30. }
  31. void VulkanBuffer::unmap()
  32. {
  33. VulkanDevice& device = mOwner->getDevice();
  34. vkUnmapMemory(device.getLogical(), mMemory);
  35. }
  36. void VulkanBuffer::copy(VulkanTransferBuffer* cb, VulkanBuffer* destination, VkDeviceSize srcOffset,
  37. VkDeviceSize dstOffset, VkDeviceSize length)
  38. {
  39. VkBufferCopy region;
  40. region.size = length;
  41. region.srcOffset = srcOffset;
  42. region.dstOffset = dstOffset;
  43. vkCmdCopyBuffer(cb->getCB()->getHandle(), mBuffer, destination->getHandle(), 1, &region);
  44. }
  45. VulkanHardwareBuffer::VulkanHardwareBuffer(BufferType type, GpuBufferFormat format, GpuBufferUsage usage,
  46. UINT32 size, GpuDeviceFlags deviceMask)
  47. : HardwareBuffer(size), mBuffers(), mStagingBuffer(nullptr), mMappedDeviceIdx(-1), mMappedGlobalQueueIdx(-1)
  48. , mMappedOffset(0), mMappedSize(0), mMappedLockOptions(GBL_WRITE_ONLY)
  49. , mDirectlyMappable((usage & GBU_DYNAMIC) != 0)
  50. , mSupportsGPUWrites(type == BT_STORAGE), mRequiresView(false), mReadable((usage & GBU_READABLE) != 0)
  51. , mIsMapped(false)
  52. {
  53. VkBufferUsageFlags usageFlags = 0;
  54. switch(type)
  55. {
  56. case BT_VERTEX:
  57. usageFlags = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
  58. break;
  59. case BT_INDEX:
  60. usageFlags = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT;
  61. break;
  62. case BT_UNIFORM:
  63. usageFlags = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT;
  64. break;
  65. case BT_GENERIC:
  66. usageFlags = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT;
  67. mRequiresView = true;
  68. break;
  69. case BT_STORAGE:
  70. usageFlags = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT;
  71. mRequiresView = true;
  72. break;
  73. }
  74. mBufferCI.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
  75. mBufferCI.pNext = nullptr;
  76. mBufferCI.flags = 0;
  77. mBufferCI.size = size;
  78. mBufferCI.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
  79. mBufferCI.usage = usageFlags;
  80. mBufferCI.queueFamilyIndexCount = 0;
  81. mBufferCI.pQueueFamilyIndices = nullptr;
  82. mViewCI.sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO;
  83. mViewCI.pNext = nullptr;
  84. mViewCI.flags = 0;
  85. mViewCI.format = VulkanUtility::getBufferFormat(format);
  86. mViewCI.offset = 0;
  87. mViewCI.range = VK_WHOLE_SIZE;
  88. VulkanRenderAPI& rapi = static_cast<VulkanRenderAPI&>(RenderAPICore::instance());
  89. VulkanDevice* devices[BS_MAX_DEVICES];
  90. VulkanUtility::getDevices(rapi, deviceMask, devices);
  91. // Allocate buffers per-device
  92. for (UINT32 i = 0; i < BS_MAX_DEVICES; i++)
  93. {
  94. if (devices[i] == nullptr)
  95. continue;
  96. mBuffers[i] = createBuffer(*devices[i], false, mReadable);
  97. }
  98. }
  99. VulkanHardwareBuffer::~VulkanHardwareBuffer()
  100. {
  101. for (UINT32 i = 0; i < BS_MAX_DEVICES; i++)
  102. {
  103. if (mBuffers[i] == nullptr)
  104. continue;
  105. mBuffers[i]->destroy();
  106. }
  107. assert(mStagingBuffer == nullptr);
  108. }
  109. VulkanBuffer* VulkanHardwareBuffer::createBuffer(VulkanDevice& device, bool staging, bool readable)
  110. {
  111. VkBufferUsageFlags usage = mBufferCI.usage;
  112. if (staging)
  113. {
  114. mBufferCI.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
  115. // Staging buffers are used as a destination for reads
  116. if (readable)
  117. mBufferCI.usage |= VK_BUFFER_USAGE_TRANSFER_DST_BIT;
  118. }
  119. else if(readable) // Non-staging readable
  120. mBufferCI.usage |= VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
  121. VkMemoryPropertyFlags flags = (mDirectlyMappable || staging) ?
  122. (VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) : // Note: Try using cached memory
  123. VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
  124. VkDevice vkDevice = device.getLogical();
  125. VkBuffer buffer;
  126. VkResult result = vkCreateBuffer(vkDevice, &mBufferCI, gVulkanAllocator, &buffer);
  127. assert(result == VK_SUCCESS);
  128. VkMemoryRequirements memReqs;
  129. vkGetBufferMemoryRequirements(vkDevice, buffer, &memReqs);
  130. VkDeviceMemory memory = device.allocateMemory(memReqs, flags);
  131. result = vkBindBufferMemory(vkDevice, buffer, memory, 0);
  132. assert(result == VK_SUCCESS);
  133. VkBufferView view;
  134. if (mRequiresView && !staging)
  135. {
  136. mViewCI.buffer = buffer;
  137. result = vkCreateBufferView(vkDevice, &mViewCI, gVulkanAllocator, &view);
  138. assert(result == VK_SUCCESS);
  139. }
  140. else
  141. view = VK_NULL_HANDLE;
  142. mBufferCI.usage = usage; // Restore original usage
  143. return device.getResourceManager().create<VulkanBuffer>(buffer, view, memory);
  144. }
  145. void* VulkanHardwareBuffer::map(UINT32 offset, UINT32 length, GpuLockOptions options, UINT32 deviceIdx, UINT32 queueIdx)
  146. {
  147. if ((offset + length) > mSize)
  148. {
  149. LOGERR("Provided offset(" + toString(offset) + ") + length(" + toString(length) + ") "
  150. "is larger than the buffer " + toString(mSize) + ".");
  151. return nullptr;
  152. }
  153. VulkanBuffer* buffer = mBuffers[deviceIdx];
  154. if (buffer == nullptr)
  155. return nullptr;
  156. mIsMapped = true;
  157. mMappedDeviceIdx = deviceIdx;
  158. mMappedGlobalQueueIdx = queueIdx;
  159. mMappedOffset = offset;
  160. mMappedSize = length;
  161. mMappedLockOptions = options;
  162. VulkanRenderAPI& rapi = static_cast<VulkanRenderAPI&>(RenderAPICore::instance());
  163. VulkanDevice& device = *rapi._getDevice(deviceIdx);
  164. VulkanCommandBufferManager& cbManager = gVulkanCBManager();
  165. GpuQueueType queueType;
  166. UINT32 localQueueIdx = CommandSyncMask::getQueueIdxAndType(queueIdx, queueType);
  167. VkAccessFlags accessFlags;
  168. if (options == GBL_READ_ONLY)
  169. accessFlags = VK_ACCESS_HOST_READ_BIT;
  170. else if (options == GBL_READ_WRITE)
  171. accessFlags = VK_ACCESS_HOST_READ_BIT | VK_ACCESS_HOST_WRITE_BIT;
  172. else
  173. accessFlags = VK_ACCESS_HOST_WRITE_BIT;
  174. // If memory is host visible try mapping it directly
  175. if(mDirectlyMappable)
  176. {
  177. // If GPU has the ability to write to the buffer we must issue a pipeline barrier to prevent any memory hazards
  178. // - Additionally it might be possible the GPU is /currently/ writing to the buffer, in which case we need to
  179. // wait for those writes to finish before continuing
  180. if(mSupportsGPUWrites) // Note: It might be tempting to only do this step only if buffer is currently being
  181. // written to, but that doesn't guarantee memory visibility if it was written to recently
  182. {
  183. // First try to avoid the expensive wait operation and barrier
  184. if(options == GBL_WRITE_ONLY_NO_OVERWRITE) // Caller guarantees he won't touch the same data as the GPU, so just map
  185. return buffer->map(offset, length);
  186. if(options == GBL_WRITE_ONLY_DISCARD) // Caller doesn't care about buffer contents, so just discard the
  187. { // existing buffer and create a new one
  188. buffer->destroy();
  189. buffer = createBuffer(device, false, mReadable);
  190. mBuffers[deviceIdx] = buffer;
  191. return buffer->map(offset, length);
  192. }
  193. // Otherwise we need to wait until (potential) GPU write completes, and issue a barrier so:
  194. // - If reading: the device makes the written memory available for read (read-after-write hazard)
  195. // - If writing: ensures our writes properly overlap with GPU writes (write-after-write hazard)
  196. VulkanTransferBuffer* transferCB = cbManager.getTransferBuffer(deviceIdx, queueType, localQueueIdx);
  197. // Ensure flush() will wait for all queues currently writing to the buffer (if any) to finish
  198. UINT32 writeUseMask = buffer->getUseInfo(VulkanUseFlag::Write);
  199. transferCB->appendMask(writeUseMask);
  200. // Issue barrier to avoid memory hazards
  201. transferCB->memoryBarrier(buffer->getHandle(),
  202. VK_ACCESS_SHADER_WRITE_BIT,
  203. accessFlags,
  204. // Last stages that could have written to the buffer:
  205. VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
  206. VK_PIPELINE_STAGE_HOST_BIT
  207. );
  208. // Submit the command buffer and wait until it finishes
  209. transferCB->flush(true);
  210. assert(!buffer->isUsed());
  211. }
  212. return buffer->map(offset, length);
  213. }
  214. else // Otherwise we use a staging buffer
  215. {
  216. bool needRead = options == GBL_READ_WRITE || options == GBL_READ_ONLY;
  217. // Allocate a staging buffer
  218. mStagingBuffer = createBuffer(device, true, needRead);
  219. if (needRead) // If reading, we need to copy the current contents of the buffer to the staging buffer
  220. {
  221. VulkanTransferBuffer* transferCB = cbManager.getTransferBuffer(deviceIdx, queueType, localQueueIdx);
  222. // Similar to above, if buffer supports GPU writes, we need to wait on any potential writes to complete
  223. if(mSupportsGPUWrites)
  224. {
  225. // Ensure flush() will wait for all queues currently writing to the buffer (if any) to finish
  226. UINT32 writeUseMask = buffer->getUseInfo(VulkanUseFlag::Write);
  227. transferCB->appendMask(writeUseMask);
  228. }
  229. // Queue copy command
  230. buffer->copy(transferCB, mStagingBuffer, offset, offset, length);
  231. // Ensure data written to the staging buffer is visible
  232. transferCB->memoryBarrier(buffer->getHandle(),
  233. VK_ACCESS_TRANSFER_WRITE_BIT,
  234. accessFlags,
  235. VK_PIPELINE_STAGE_TRANSFER_BIT,
  236. VK_PIPELINE_STAGE_HOST_BIT
  237. );
  238. // Submit the command buffer and wait until it finishes
  239. transferCB->flush(true);
  240. assert(!buffer->isUsed());
  241. }
  242. return mStagingBuffer->map(offset, length);
  243. }
  244. }
  245. void VulkanHardwareBuffer::unmap()
  246. {
  247. // Possibly map() failed with some error
  248. if (!mIsMapped)
  249. return;
  250. // Note: If we did any writes they need to be made visible to the GPU. However there is no need to execute
  251. // a pipeline barrier because (as per spec) host writes are implicitly visible to the device.
  252. if(mDirectlyMappable)
  253. mBuffers[mMappedDeviceIdx]->unmap();
  254. else
  255. {
  256. bool isWrite = mMappedLockOptions != GBL_READ_ONLY;
  257. // We the caller wrote anything to the staging buffer, we need to upload it back to the main buffer
  258. if(isWrite)
  259. {
  260. VulkanRenderAPI& rapi = static_cast<VulkanRenderAPI&>(RenderAPICore::instance());
  261. VulkanDevice& device = *rapi._getDevice(mMappedDeviceIdx);
  262. VulkanCommandBufferManager& cbManager = gVulkanCBManager();
  263. GpuQueueType queueType;
  264. UINT32 localQueueIdx = CommandSyncMask::getQueueIdxAndType(mMappedGlobalQueueIdx, queueType);
  265. VulkanBuffer* buffer = mBuffers[mMappedDeviceIdx];
  266. VulkanTransferBuffer* transferCB = cbManager.getTransferBuffer(mMappedDeviceIdx, queueType, localQueueIdx);
  267. // If the buffer is used in any way on the GPU, we need to wait for that use to finish before
  268. // we issue our copy
  269. UINT32 useMask = buffer->getUseInfo(VulkanUseFlag::Read | VulkanUseFlag::Write);
  270. if(useMask != 0) // Buffer is currently used on the GPU
  271. {
  272. // Try to avoid the wait
  273. if (mMappedLockOptions == GBL_WRITE_ONLY_NO_OVERWRITE) // Caller guarantees he won't touch the same data as the GPU, so just copy
  274. {
  275. // Fall through to copy()
  276. }
  277. else if (mMappedLockOptions == GBL_WRITE_ONLY_DISCARD) // Caller doesn't care about buffer contents, so just discard the
  278. { // existing buffer and create a new one
  279. buffer->destroy();
  280. buffer = createBuffer(device, false, mReadable);
  281. mBuffers[mMappedDeviceIdx] = buffer;
  282. }
  283. else // Otherwise we have no choice but to issue a dependency between the queues
  284. transferCB->appendMask(useMask);
  285. }
  286. // Queue copy command
  287. mStagingBuffer->copy(transferCB, buffer, mMappedOffset, mMappedOffset, mMappedSize);
  288. // Notify the command buffer that these resources are being used on it
  289. transferCB->getCB()->registerResource(mStagingBuffer, VK_ACCESS_TRANSFER_READ_BIT, VulkanUseFlag::Read);
  290. transferCB->getCB()->registerResource(buffer, VK_ACCESS_TRANSFER_WRITE_BIT, VulkanUseFlag::Write);
  291. // We don't actually flush the transfer buffer here since it's an expensive operation, but it's instead
  292. // done automatically before next "normal" command buffer submission.
  293. }
  294. mStagingBuffer->unmap();
  295. mStagingBuffer->destroy();
  296. mStagingBuffer = nullptr;
  297. }
  298. mIsMapped = false;
  299. }
  300. void VulkanHardwareBuffer::copyData(HardwareBuffer& srcBuffer, UINT32 srcOffset,
  301. UINT32 dstOffset, UINT32 length, bool discardWholeBuffer, UINT32 queueIdx)
  302. {
  303. if ((dstOffset + length) > mSize)
  304. {
  305. LOGERR("Provided offset(" + toString(dstOffset) + ") + length(" + toString(length) + ") "
  306. "is larger than the destination buffer " + toString(mSize) + ". Copy operation aborted.");
  307. return;
  308. }
  309. if ((srcOffset + length) > srcBuffer.getSize())
  310. {
  311. LOGERR("Provided offset(" + toString(srcOffset) + ") + length(" + toString(length) + ") "
  312. "is larger than the source buffer " + toString(srcBuffer.getSize()) + ". Copy operation aborted.");
  313. return;
  314. }
  315. VulkanHardwareBuffer& vkSource = static_cast<VulkanHardwareBuffer&>(srcBuffer);
  316. VulkanRenderAPI& rapi = static_cast<VulkanRenderAPI&>(RenderAPICore::instance());
  317. VulkanCommandBufferManager& cbManager = gVulkanCBManager();
  318. GpuQueueType queueType;
  319. UINT32 localQueueIdx = CommandSyncMask::getQueueIdxAndType(queueIdx, queueType);
  320. // Perform copy on every device that has both buffers
  321. for (UINT32 i = 0; i < BS_MAX_DEVICES; i++)
  322. {
  323. VulkanBuffer* src = vkSource.mBuffers[i];
  324. VulkanBuffer* dst = mBuffers[i];
  325. if (src == nullptr || dst == nullptr)
  326. continue;
  327. VulkanDevice& device = *rapi._getDevice(i);
  328. VulkanTransferBuffer* transferCB = cbManager.getTransferBuffer(i, queueType, localQueueIdx);
  329. // If either source or destination buffer is currently being written to do need to sync the copy operation so
  330. // it executes after both are done
  331. // If destination is being used on the GPU we need to wait until it finishes before writing to it
  332. UINT32 dstUseMask = dst->getUseInfo(VulkanUseFlag::Read | VulkanUseFlag::Write);
  333. // If discard is enabled and destination is used, instead of waiting just discard the existing buffer and make a new one
  334. if(dstUseMask != 0 && discardWholeBuffer)
  335. {
  336. dst->destroy();
  337. dst = createBuffer(device, false, mReadable);
  338. mBuffers[mMappedDeviceIdx] = dst;
  339. dstUseMask = 0;
  340. }
  341. // If source buffer is being written to on the GPU we need to wait until it finishes, before executing copy
  342. UINT32 srcUseMask = src->getUseInfo(VulkanUseFlag::Write);
  343. // Wait if anything is using the buffers
  344. if(dstUseMask != 0 || srcUseMask != 0)
  345. transferCB->appendMask(dstUseMask | srcUseMask);
  346. src->copy(transferCB, dst, srcOffset, dstOffset, length);
  347. // Notify the command buffer that these resources are being used on it
  348. transferCB->getCB()->registerResource(src, VK_ACCESS_TRANSFER_READ_BIT, VulkanUseFlag::Read);
  349. transferCB->getCB()->registerResource(dst, VK_ACCESS_TRANSFER_WRITE_BIT, VulkanUseFlag::Write);
  350. // We don't actually flush the transfer buffer here since it's an expensive operation, but it's instead
  351. // done automatically before next "normal" command buffer submission.
  352. }
  353. }
  354. void VulkanHardwareBuffer::readData(UINT32 offset, UINT32 length, void* dest, UINT32 deviceIdx, UINT32 queueIdx)
  355. {
  356. void* lockedData = lock(offset, length, GBL_READ_ONLY, deviceIdx, queueIdx);
  357. memcpy(dest, lockedData, length);
  358. unlock();
  359. }
  360. void VulkanHardwareBuffer::writeData(UINT32 offset, UINT32 length, const void* source, BufferWriteType writeFlags,
  361. UINT32 queueIdx)
  362. {
  363. GpuLockOptions lockOptions = GBL_WRITE_ONLY;
  364. if (writeFlags == BTW_NO_OVERWRITE)
  365. lockOptions = GBL_WRITE_ONLY_NO_OVERWRITE;
  366. else if (writeFlags == BWT_DISCARD)
  367. lockOptions = GBL_WRITE_ONLY_DISCARD;
  368. // Write to every device
  369. for (UINT32 i = 0; i < BS_MAX_DEVICES; i++)
  370. {
  371. if (mBuffers[i] == nullptr)
  372. continue;
  373. void* lockedData = lock(offset, length, lockOptions, i, queueIdx);
  374. memcpy(lockedData, source, length);
  375. unlock();
  376. }
  377. }
  378. }