BsVulkanHardwareBuffer.cpp 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662
  1. //********************************** Banshee Engine (www.banshee3d.com) **************************************************//
  2. //**************** Copyright (c) 2016 Marko Pintera ([email protected]). All rights reserved. **********************//
  3. #include "BsVulkanHardwareBuffer.h"
  4. #include "BsVulkanRenderAPI.h"
  5. #include "BsVulkanDevice.h"
  6. #include "BsVulkanUtility.h"
  7. #include "BsVulkanCommandBufferManager.h"
  8. #include "BsVulkanCommandBuffer.h"
  9. #include "BsVulkanTexture.h"
  10. namespace bs
  11. {
  12. VulkanBuffer::VulkanBuffer(VulkanResourceManager* owner, VkBuffer buffer, VkBufferView view, VkDeviceMemory memory,
  13. UINT32 rowPitch, UINT32 slicePitch)
  14. : VulkanResource(owner, false), mBuffer(buffer), mView(view), mMemory(memory), mRowPitch(rowPitch)
  15. , mSliceHeight(slicePitch / rowPitch)
  16. {
  17. }
  18. VulkanBuffer::~VulkanBuffer()
  19. {
  20. VulkanDevice& device = mOwner->getDevice();
  21. if (mView != VK_NULL_HANDLE)
  22. vkDestroyBufferView(device.getLogical(), mView, gVulkanAllocator);
  23. vkDestroyBuffer(device.getLogical(), mBuffer, gVulkanAllocator);
  24. device.freeMemory(mMemory);
  25. }
  26. UINT8* VulkanBuffer::map(VkDeviceSize offset, VkDeviceSize length) const
  27. {
  28. VulkanDevice& device = mOwner->getDevice();
  29. UINT8* data;
  30. VkResult result = vkMapMemory(device.getLogical(), mMemory, offset, length, 0, (void**)&data);
  31. assert(result == VK_SUCCESS);
  32. return data;
  33. }
  34. void VulkanBuffer::unmap()
  35. {
  36. VulkanDevice& device = mOwner->getDevice();
  37. vkUnmapMemory(device.getLogical(), mMemory);
  38. }
  39. void VulkanBuffer::copy(VulkanTransferBuffer* cb, VulkanBuffer* destination, VkDeviceSize srcOffset,
  40. VkDeviceSize dstOffset, VkDeviceSize length)
  41. {
  42. VkBufferCopy region;
  43. region.size = length;
  44. region.srcOffset = srcOffset;
  45. region.dstOffset = dstOffset;
  46. vkCmdCopyBuffer(cb->getCB()->getHandle(), mBuffer, destination->getHandle(), 1, &region);
  47. }
  48. void VulkanBuffer::copy(VulkanTransferBuffer* cb, VulkanImage* destination, const VkExtent3D& extent,
  49. const VkImageSubresourceLayers& range, VkImageLayout layout)
  50. {
  51. VkBufferImageCopy region;
  52. region.bufferRowLength = mRowPitch;
  53. region.bufferImageHeight = mSliceHeight;
  54. region.bufferOffset = 0;
  55. region.imageOffset.x = 0;
  56. region.imageOffset.y = 0;
  57. region.imageOffset.z = 0;
  58. region.imageExtent = extent;
  59. region.imageSubresource = range;
  60. vkCmdCopyBufferToImage(cb->getCB()->getHandle(), mBuffer, destination->getHandle(), layout, 1, &region);
  61. }
  62. void VulkanBuffer::update(VulkanTransferBuffer* cb, UINT8* data, VkDeviceSize offset, VkDeviceSize length)
  63. {
  64. vkCmdUpdateBuffer(cb->getCB()->getHandle(), mBuffer, offset, length, (uint32_t*)data);
  65. }
  66. VulkanHardwareBuffer::VulkanHardwareBuffer(BufferType type, GpuBufferFormat format, GpuBufferUsage usage,
  67. UINT32 size, GpuDeviceFlags deviceMask)
  68. : HardwareBuffer(size), mBuffers(), mStagingBuffer(nullptr), mStagingMemory(nullptr), mMappedDeviceIdx(-1)
  69. , mMappedGlobalQueueIdx(-1), mMappedOffset(0), mMappedSize(0), mMappedLockOptions(GBL_WRITE_ONLY)
  70. , mDirectlyMappable((usage & GBU_DYNAMIC) != 0), mSupportsGPUWrites(type == BT_STORAGE), mRequiresView(false)
  71. , mIsMapped(false)
  72. {
  73. VkBufferUsageFlags usageFlags = 0;
  74. switch(type)
  75. {
  76. case BT_VERTEX:
  77. usageFlags = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
  78. break;
  79. case BT_INDEX:
  80. usageFlags = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT;
  81. break;
  82. case BT_UNIFORM:
  83. usageFlags = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT;
  84. break;
  85. case BT_GENERIC:
  86. usageFlags = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT;
  87. mRequiresView = true;
  88. break;
  89. case BT_STORAGE:
  90. usageFlags = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT;
  91. mRequiresView = true;
  92. break;
  93. }
  94. mBufferCI.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
  95. mBufferCI.pNext = nullptr;
  96. mBufferCI.flags = 0;
  97. mBufferCI.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
  98. mBufferCI.usage = usageFlags;
  99. mBufferCI.queueFamilyIndexCount = 0;
  100. mBufferCI.pQueueFamilyIndices = nullptr;
  101. mViewCI.sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO;
  102. mViewCI.pNext = nullptr;
  103. mViewCI.flags = 0;
  104. mViewCI.format = VulkanUtility::getBufferFormat(format);
  105. mViewCI.offset = 0;
  106. mViewCI.range = VK_WHOLE_SIZE;
  107. VulkanRenderAPI& rapi = static_cast<VulkanRenderAPI&>(RenderAPICore::instance());
  108. VulkanDevice* devices[BS_MAX_DEVICES];
  109. VulkanUtility::getDevices(rapi, deviceMask, devices);
  110. // Allocate buffers per-device
  111. for (UINT32 i = 0; i < BS_MAX_DEVICES; i++)
  112. {
  113. if (devices[i] == nullptr)
  114. continue;
  115. mBuffers[i] = createBuffer(*devices[i], size, false, true);
  116. }
  117. }
  118. VulkanHardwareBuffer::~VulkanHardwareBuffer()
  119. {
  120. for (UINT32 i = 0; i < BS_MAX_DEVICES; i++)
  121. {
  122. if (mBuffers[i] == nullptr)
  123. continue;
  124. mBuffers[i]->destroy();
  125. }
  126. assert(mStagingBuffer == nullptr);
  127. }
  128. VulkanBuffer* VulkanHardwareBuffer::createBuffer(VulkanDevice& device, UINT32 size, bool staging, bool readable)
  129. {
  130. VkBufferUsageFlags usage = mBufferCI.usage;
  131. if (staging)
  132. {
  133. mBufferCI.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
  134. // Staging buffers are used as a destination for reads
  135. if (readable)
  136. mBufferCI.usage |= VK_BUFFER_USAGE_TRANSFER_DST_BIT;
  137. }
  138. else if(readable) // Non-staging readable
  139. mBufferCI.usage |= VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
  140. mBufferCI.size = size;
  141. VkMemoryPropertyFlags flags = (mDirectlyMappable || staging) ?
  142. (VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) : // Note: Try using cached memory
  143. VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
  144. VkDevice vkDevice = device.getLogical();
  145. VkBuffer buffer;
  146. VkResult result = vkCreateBuffer(vkDevice, &mBufferCI, gVulkanAllocator, &buffer);
  147. assert(result == VK_SUCCESS);
  148. VkMemoryRequirements memReqs;
  149. vkGetBufferMemoryRequirements(vkDevice, buffer, &memReqs);
  150. VkDeviceMemory memory = device.allocateMemory(memReqs, flags);
  151. result = vkBindBufferMemory(vkDevice, buffer, memory, 0);
  152. assert(result == VK_SUCCESS);
  153. VkBufferView view;
  154. if (mRequiresView && !staging)
  155. {
  156. mViewCI.buffer = buffer;
  157. result = vkCreateBufferView(vkDevice, &mViewCI, gVulkanAllocator, &view);
  158. assert(result == VK_SUCCESS);
  159. }
  160. else
  161. view = VK_NULL_HANDLE;
  162. mBufferCI.usage = usage; // Restore original usage
  163. return device.getResourceManager().create<VulkanBuffer>(buffer, view, memory);
  164. }
  165. void* VulkanHardwareBuffer::map(UINT32 offset, UINT32 length, GpuLockOptions options, UINT32 deviceIdx, UINT32 queueIdx)
  166. {
  167. if ((offset + length) > mSize)
  168. {
  169. LOGERR("Provided offset(" + toString(offset) + ") + length(" + toString(length) + ") "
  170. "is larger than the buffer " + toString(mSize) + ".");
  171. return nullptr;
  172. }
  173. VulkanBuffer* buffer = mBuffers[deviceIdx];
  174. if (buffer == nullptr)
  175. return nullptr;
  176. mIsMapped = true;
  177. mMappedDeviceIdx = deviceIdx;
  178. mMappedGlobalQueueIdx = queueIdx;
  179. mMappedOffset = offset;
  180. mMappedSize = length;
  181. mMappedLockOptions = options;
  182. VulkanRenderAPI& rapi = static_cast<VulkanRenderAPI&>(RenderAPICore::instance());
  183. VulkanDevice& device = *rapi._getDevice(deviceIdx);
  184. VulkanCommandBufferManager& cbManager = gVulkanCBManager();
  185. GpuQueueType queueType;
  186. UINT32 localQueueIdx = CommandSyncMask::getQueueIdxAndType(queueIdx, queueType);
  187. VkAccessFlags accessFlags;
  188. if (options == GBL_READ_ONLY)
  189. accessFlags = VK_ACCESS_HOST_READ_BIT;
  190. else if (options == GBL_READ_WRITE)
  191. accessFlags = VK_ACCESS_HOST_READ_BIT | VK_ACCESS_HOST_WRITE_BIT;
  192. else
  193. accessFlags = VK_ACCESS_HOST_WRITE_BIT;
  194. // If memory is host visible try mapping it directly
  195. if(mDirectlyMappable)
  196. {
  197. // Check is the GPU currently reading or writing from the buffer
  198. UINT32 useMask = buffer->getUseInfo(VulkanUseFlag::Read | VulkanUseFlag::Write);
  199. // Note: Even if GPU isn't currently using the buffer, but the buffer supports GPU writes, we consider it as
  200. // being used because the write could have completed yet still not visible, so we need to issue a pipeline
  201. // barrier below.
  202. bool isUsedOnGPU = useMask != 0 || mSupportsGPUWrites;
  203. // We're safe to map directly since GPU isn't using the buffer
  204. if (!isUsedOnGPU)
  205. {
  206. // If some CB has an operation queued that will be using the current contents of the buffer, create a new
  207. // buffer so we don't modify the previous use of the buffer
  208. if(buffer->isBound())
  209. {
  210. VulkanBuffer* newBuffer = createBuffer(device, mSize, false, true);
  211. // Copy contents of the current buffer to the new one, unless caller explicitly specifies he doesn't
  212. // care about the current contents
  213. if (options != GBL_WRITE_ONLY_DISCARD)
  214. {
  215. UINT8* src = buffer->map(offset, length);
  216. UINT8* dst = newBuffer->map(offset, length);
  217. memcpy(dst, src, length);
  218. buffer->unmap();
  219. newBuffer->unmap();
  220. }
  221. buffer->destroy();
  222. buffer = newBuffer;
  223. mBuffers[deviceIdx] = buffer;
  224. }
  225. return buffer->map(offset, length);
  226. }
  227. // Caller guarantees he won't touch the same data as the GPU, so just map even though the GPU is using the buffer
  228. if (options == GBL_WRITE_ONLY_NO_OVERWRITE)
  229. return buffer->map(offset, length);
  230. // Caller doesn't care about buffer contents, so just discard the existing buffer and create a new one
  231. if (options == GBL_WRITE_ONLY_DISCARD)
  232. {
  233. buffer->destroy();
  234. buffer = createBuffer(device, mSize, false, true);
  235. mBuffers[deviceIdx] = buffer;
  236. return buffer->map(offset, length);
  237. }
  238. // No GPU writes are are supported and we're only reading, so no need to wait on anything
  239. if (options == GBL_READ_ONLY && !mSupportsGPUWrites)
  240. return buffer->map(offset, length);
  241. // We need to read the buffer contents with GPU writes potentially enabled
  242. if(options == GBL_READ_ONLY || options == GBL_READ_WRITE)
  243. {
  244. // We need to wait until (potential) GPU read/write completes
  245. VulkanTransferBuffer* transferCB = cbManager.getTransferBuffer(deviceIdx, queueType, localQueueIdx);
  246. // Ensure flush() will wait for all queues currently using to the buffer (if any) to finish
  247. // If only reading, wait for all writes to complete, otherwise wait on both writes and reads
  248. if (options == GBL_READ_ONLY)
  249. useMask = buffer->getUseInfo(VulkanUseFlag::Write);
  250. else
  251. useMask = buffer->getUseInfo(VulkanUseFlag::Read | VulkanUseFlag::Write);
  252. transferCB->appendMask(useMask);
  253. // Make any writes visible before mapping
  254. if (mSupportsGPUWrites)
  255. {
  256. // Issue a barrier so :
  257. // - If reading: the device makes the written memory available for read (read-after-write hazard)
  258. // - If writing: ensures our writes properly overlap with GPU writes (write-after-write hazard)
  259. transferCB->memoryBarrier(buffer->getHandle(),
  260. VK_ACCESS_SHADER_WRITE_BIT,
  261. accessFlags,
  262. // Last stages that could have written to the buffer:
  263. VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
  264. VK_PIPELINE_STAGE_HOST_BIT
  265. );
  266. }
  267. // Submit the command buffer and wait until it finishes
  268. transferCB->flush(true);
  269. // If some CB has an operation queued that will be using the current contents of the buffer, create a new
  270. // buffer so we don't modify the previous use of the buffer
  271. if (buffer->isBound())
  272. {
  273. VulkanBuffer* newBuffer = createBuffer(device, mSize, false, true);
  274. // Copy contents of the current buffer to the new one
  275. UINT8* src = buffer->map(offset, length);
  276. UINT8* dst = newBuffer->map(offset, length);
  277. memcpy(dst, src, length);
  278. buffer->unmap();
  279. newBuffer->unmap();
  280. buffer->destroy();
  281. buffer = newBuffer;
  282. mBuffers[deviceIdx] = buffer;
  283. }
  284. return buffer->map(offset, length);
  285. }
  286. // Otherwise, we're doing write only, in which case it's best to use the staging buffer to avoid waiting
  287. // and blocking, so fall through
  288. }
  289. // Can't use direct mapping, so use a staging buffer or memory
  290. // We might need to copy the current contents of the buffer to the staging buffer. Even if the user doesn't plan on
  291. // reading, it is still required as we will eventually copy all of the contents back to the original buffer,
  292. // and we can't write potentially uninitialized data. The only exception is when the caller specifies the buffer
  293. // contents should be discarded in which he guarantees he will overwrite the entire locked area with his own
  294. // contents.
  295. bool needRead = options != GBL_WRITE_ONLY_DISCARD_RANGE && options != GBL_WRITE_ONLY_DISCARD;
  296. // See if we can use the cheaper staging memory, rather than a staging buffer
  297. if(!needRead && offset % 4 == 0 && length % 4 == 0 && length <= 65536)
  298. {
  299. mStagingMemory = (UINT8*)bs_alloc(length);
  300. return mStagingMemory;
  301. }
  302. // Create a staging buffer
  303. mStagingBuffer = createBuffer(device, length, true, needRead);
  304. if (needRead)
  305. {
  306. VulkanTransferBuffer* transferCB = cbManager.getTransferBuffer(deviceIdx, queueType, localQueueIdx);
  307. // Similar to above, if buffer supports GPU writes, we need to wait on any potential writes to complete
  308. if(mSupportsGPUWrites)
  309. {
  310. // Ensure flush() will wait for all queues currently writing to the buffer (if any) to finish
  311. UINT32 writeUseMask = buffer->getUseInfo(VulkanUseFlag::Write);
  312. transferCB->appendMask(writeUseMask);
  313. }
  314. // Queue copy command
  315. buffer->copy(transferCB, mStagingBuffer, offset, 0, length);
  316. // Ensure data written to the staging buffer is visible
  317. transferCB->memoryBarrier(mStagingBuffer->getHandle(),
  318. VK_ACCESS_TRANSFER_WRITE_BIT,
  319. accessFlags,
  320. VK_PIPELINE_STAGE_TRANSFER_BIT,
  321. VK_PIPELINE_STAGE_HOST_BIT
  322. );
  323. // Submit the command buffer and wait until it finishes
  324. transferCB->flush(true);
  325. assert(!buffer->isUsed());
  326. }
  327. return mStagingBuffer->map(0, length);
  328. }
  329. void VulkanHardwareBuffer::unmap()
  330. {
  331. // Possibly map() failed with some error
  332. if (!mIsMapped)
  333. return;
  334. // Note: If we did any writes they need to be made visible to the GPU. However there is no need to execute
  335. // a pipeline barrier because (as per spec) host writes are implicitly visible to the device.
  336. if(mStagingMemory == nullptr && mStagingBuffer == nullptr) // We directly mapped the buffer
  337. {
  338. mBuffers[mMappedDeviceIdx]->unmap();
  339. }
  340. else
  341. {
  342. if(mStagingBuffer != nullptr)
  343. mStagingBuffer->unmap();
  344. bool isWrite = mMappedLockOptions != GBL_READ_ONLY;
  345. // We the caller wrote anything to the staging buffer, we need to upload it back to the main buffer
  346. if(isWrite)
  347. {
  348. VulkanRenderAPI& rapi = static_cast<VulkanRenderAPI&>(RenderAPICore::instance());
  349. VulkanDevice& device = *rapi._getDevice(mMappedDeviceIdx);
  350. VulkanCommandBufferManager& cbManager = gVulkanCBManager();
  351. GpuQueueType queueType;
  352. UINT32 localQueueIdx = CommandSyncMask::getQueueIdxAndType(mMappedGlobalQueueIdx, queueType);
  353. VulkanBuffer* buffer = mBuffers[mMappedDeviceIdx];
  354. VulkanTransferBuffer* transferCB = cbManager.getTransferBuffer(mMappedDeviceIdx, queueType, localQueueIdx);
  355. // If the buffer is used in any way on the GPU, we need to wait for that use to finish before
  356. // we issue our copy
  357. UINT32 useMask = buffer->getUseInfo(VulkanUseFlag::Read | VulkanUseFlag::Write);
  358. bool isNormalWrite = false;
  359. if(useMask != 0) // Buffer is currently used on the GPU
  360. {
  361. // Try to avoid the wait by checking for special write conditions
  362. // Caller guarantees he won't touch the same data as the GPU, so just copy
  363. if (mMappedLockOptions == GBL_WRITE_ONLY_NO_OVERWRITE)
  364. {
  365. // Fall through to copy()
  366. }
  367. // Caller doesn't care about buffer contents, so just discard the existing buffer and create a new one
  368. else if (mMappedLockOptions == GBL_WRITE_ONLY_DISCARD)
  369. {
  370. buffer->destroy();
  371. buffer = createBuffer(device, mSize, false, true);
  372. mBuffers[mMappedDeviceIdx] = buffer;
  373. }
  374. else // Otherwise we have no choice but to issue a dependency between the queues
  375. {
  376. transferCB->appendMask(useMask);
  377. isNormalWrite = true;
  378. }
  379. }
  380. // Check if the buffer will still be bound somewhere after the CBs using it finish
  381. if (isNormalWrite)
  382. {
  383. UINT32 useCount = buffer->getUseCount();
  384. UINT32 boundCount = buffer->getBoundCount();
  385. bool isBoundWithoutUse = boundCount > useCount;
  386. // If buffer is queued for some operation on a CB, then we need to make a copy of the buffer to
  387. // avoid modifying its use in the previous operation
  388. if (isBoundWithoutUse)
  389. {
  390. VulkanBuffer* newBuffer = createBuffer(device, mSize, false, true);
  391. // Avoid copying original contents if the staging buffer completely covers it
  392. if (mMappedOffset > 0 || mMappedSize != mSize)
  393. {
  394. buffer->copy(transferCB, newBuffer, 0, 0, mSize);
  395. transferCB->getCB()->registerResource(buffer, VK_ACCESS_TRANSFER_READ_BIT, VulkanUseFlag::Read);
  396. }
  397. buffer->destroy();
  398. buffer = newBuffer;
  399. mBuffers[mMappedDeviceIdx] = buffer;
  400. }
  401. }
  402. // Queue copy/update command
  403. if (mStagingBuffer != nullptr)
  404. {
  405. mStagingBuffer->copy(transferCB, buffer, 0, mMappedOffset, mMappedSize);
  406. transferCB->getCB()->registerResource(mStagingBuffer, VK_ACCESS_TRANSFER_READ_BIT, VulkanUseFlag::Read);
  407. }
  408. else // Staging memory
  409. {
  410. buffer->update(transferCB, mStagingMemory, mMappedOffset, mMappedSize);
  411. }
  412. transferCB->getCB()->registerResource(buffer, VK_ACCESS_TRANSFER_WRITE_BIT, VulkanUseFlag::Write);
  413. // We don't actually flush the transfer buffer here since it's an expensive operation, but it's instead
  414. // done automatically before next "normal" command buffer submission.
  415. }
  416. if (mStagingBuffer != nullptr)
  417. {
  418. mStagingBuffer->destroy();
  419. mStagingBuffer = nullptr;
  420. }
  421. if(mStagingMemory != nullptr)
  422. {
  423. bs_free(mStagingMemory);
  424. mStagingMemory = nullptr;
  425. }
  426. }
  427. mIsMapped = false;
  428. }
  429. void VulkanHardwareBuffer::copyData(HardwareBuffer& srcBuffer, UINT32 srcOffset,
  430. UINT32 dstOffset, UINT32 length, bool discardWholeBuffer, UINT32 queueIdx)
  431. {
  432. if ((dstOffset + length) > mSize)
  433. {
  434. LOGERR("Provided offset(" + toString(dstOffset) + ") + length(" + toString(length) + ") "
  435. "is larger than the destination buffer " + toString(mSize) + ". Copy operation aborted.");
  436. return;
  437. }
  438. if ((srcOffset + length) > srcBuffer.getSize())
  439. {
  440. LOGERR("Provided offset(" + toString(srcOffset) + ") + length(" + toString(length) + ") "
  441. "is larger than the source buffer " + toString(srcBuffer.getSize()) + ". Copy operation aborted.");
  442. return;
  443. }
  444. VulkanHardwareBuffer& vkSource = static_cast<VulkanHardwareBuffer&>(srcBuffer);
  445. VulkanRenderAPI& rapi = static_cast<VulkanRenderAPI&>(RenderAPICore::instance());
  446. VulkanCommandBufferManager& cbManager = gVulkanCBManager();
  447. GpuQueueType queueType;
  448. UINT32 localQueueIdx = CommandSyncMask::getQueueIdxAndType(queueIdx, queueType);
  449. // Perform copy on every device that has both buffers
  450. for (UINT32 i = 0; i < BS_MAX_DEVICES; i++)
  451. {
  452. VulkanBuffer* src = vkSource.mBuffers[i];
  453. VulkanBuffer* dst = mBuffers[i];
  454. if (src == nullptr || dst == nullptr)
  455. continue;
  456. VulkanDevice& device = *rapi._getDevice(i);
  457. VulkanTransferBuffer* transferCB = cbManager.getTransferBuffer(i, queueType, localQueueIdx);
  458. // If either source or destination buffer is currently being written to do need to sync the copy operation so
  459. // it executes after both are done
  460. // If destination is being used on the GPU we need to wait until it finishes before writing to it
  461. UINT32 dstUseMask = dst->getUseInfo(VulkanUseFlag::Read | VulkanUseFlag::Write);
  462. // If discard is enabled and destination is used, instead of waiting just discard the existing buffer and make a new one
  463. bool isNormalWrite = true;
  464. if(dstUseMask != 0 && discardWholeBuffer)
  465. {
  466. dst->destroy();
  467. dst = createBuffer(device, mSize, false, true);
  468. mBuffers[i] = dst;
  469. dstUseMask = 0;
  470. isNormalWrite = false;
  471. }
  472. // If source buffer is being written to on the GPU we need to wait until it finishes, before executing copy
  473. UINT32 srcUseMask = src->getUseInfo(VulkanUseFlag::Write);
  474. // Wait if anything is using the buffers
  475. if(dstUseMask != 0 || srcUseMask != 0)
  476. transferCB->appendMask(dstUseMask | srcUseMask);
  477. // Check if the destination buffer will still be bound somewhere after the CBs using it finish
  478. if (isNormalWrite)
  479. {
  480. UINT32 useCount = dst->getUseCount();
  481. UINT32 boundCount = dst->getBoundCount();
  482. bool isBoundWithoutUse = boundCount > useCount;
  483. // If destination buffer is queued for some operation on a CB (ignoring the ones we're waiting for), then we
  484. // need to make a copy of the buffer to avoid modifying its use in the previous operation
  485. if (isBoundWithoutUse)
  486. {
  487. VulkanBuffer* newBuffer = createBuffer(device, mSize, false, true);
  488. // Avoid copying original contents if the copy completely covers it
  489. if (dstOffset > 0 || length != mSize)
  490. {
  491. dst->copy(transferCB, newBuffer, 0, 0, mSize);
  492. transferCB->getCB()->registerResource(dst, VK_ACCESS_TRANSFER_READ_BIT, VulkanUseFlag::Read);
  493. }
  494. dst->destroy();
  495. dst = newBuffer;
  496. mBuffers[i] = dst;
  497. }
  498. }
  499. src->copy(transferCB, dst, srcOffset, dstOffset, length);
  500. // Notify the command buffer that these resources are being used on it
  501. transferCB->getCB()->registerResource(src, VK_ACCESS_TRANSFER_READ_BIT, VulkanUseFlag::Read);
  502. transferCB->getCB()->registerResource(dst, VK_ACCESS_TRANSFER_WRITE_BIT, VulkanUseFlag::Write);
  503. // We don't actually flush the transfer buffer here since it's an expensive operation, but it's instead
  504. // done automatically before next "normal" command buffer submission.
  505. }
  506. }
  507. void VulkanHardwareBuffer::readData(UINT32 offset, UINT32 length, void* dest, UINT32 deviceIdx, UINT32 queueIdx)
  508. {
  509. void* lockedData = lock(offset, length, GBL_READ_ONLY, deviceIdx, queueIdx);
  510. memcpy(dest, lockedData, length);
  511. unlock();
  512. }
  513. void VulkanHardwareBuffer::writeData(UINT32 offset, UINT32 length, const void* source, BufferWriteType writeFlags,
  514. UINT32 queueIdx)
  515. {
  516. GpuLockOptions lockOptions = GBL_WRITE_ONLY_DISCARD_RANGE;
  517. if (writeFlags == BTW_NO_OVERWRITE)
  518. lockOptions = GBL_WRITE_ONLY_NO_OVERWRITE;
  519. else if (writeFlags == BWT_DISCARD)
  520. lockOptions = GBL_WRITE_ONLY_DISCARD;
  521. // Write to every device
  522. for (UINT32 i = 0; i < BS_MAX_DEVICES; i++)
  523. {
  524. if (mBuffers[i] == nullptr)
  525. continue;
  526. void* lockedData = lock(offset, length, lockOptions, i, queueIdx);
  527. memcpy(lockedData, source, length);
  528. unlock();
  529. }
  530. }
  531. }