BsVulkanHardwareBuffer.cpp 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669
  1. //********************************** Banshee Engine (www.banshee3d.com) **************************************************//
  2. //**************** Copyright (c) 2016 Marko Pintera ([email protected]). All rights reserved. **********************//
  3. #include "BsVulkanHardwareBuffer.h"
  4. #include "BsVulkanRenderAPI.h"
  5. #include "BsVulkanDevice.h"
  6. #include "BsVulkanUtility.h"
  7. #include "BsVulkanCommandBufferManager.h"
  8. #include "BsVulkanCommandBuffer.h"
  9. #include "BsVulkanTexture.h"
  10. namespace bs { namespace ct
  11. {
  12. VulkanBuffer::VulkanBuffer(VulkanResourceManager* owner, VkBuffer buffer, VkBufferView view, VkDeviceMemory memory,
  13. UINT32 rowPitch, UINT32 slicePitch)
  14. : VulkanResource(owner, false), mBuffer(buffer), mView(view), mMemory(memory), mRowPitch(rowPitch)
  15. {
  16. if (rowPitch != 0)
  17. mSliceHeight = slicePitch / rowPitch;
  18. else
  19. mSliceHeight = 0;
  20. }
  21. VulkanBuffer::~VulkanBuffer()
  22. {
  23. VulkanDevice& device = mOwner->getDevice();
  24. if (mView != VK_NULL_HANDLE)
  25. vkDestroyBufferView(device.getLogical(), mView, gVulkanAllocator);
  26. vkDestroyBuffer(device.getLogical(), mBuffer, gVulkanAllocator);
  27. device.freeMemory(mMemory);
  28. }
  29. UINT8* VulkanBuffer::map(VkDeviceSize offset, VkDeviceSize length) const
  30. {
  31. VulkanDevice& device = mOwner->getDevice();
  32. UINT8* data;
  33. VkResult result = vkMapMemory(device.getLogical(), mMemory, offset, length, 0, (void**)&data);
  34. assert(result == VK_SUCCESS);
  35. return data;
  36. }
  37. void VulkanBuffer::unmap()
  38. {
  39. VulkanDevice& device = mOwner->getDevice();
  40. vkUnmapMemory(device.getLogical(), mMemory);
  41. }
  42. void VulkanBuffer::copy(VulkanTransferBuffer* cb, VulkanBuffer* destination, VkDeviceSize srcOffset,
  43. VkDeviceSize dstOffset, VkDeviceSize length)
  44. {
  45. VkBufferCopy region;
  46. region.size = length;
  47. region.srcOffset = srcOffset;
  48. region.dstOffset = dstOffset;
  49. vkCmdCopyBuffer(cb->getCB()->getHandle(), mBuffer, destination->getHandle(), 1, &region);
  50. }
  51. void VulkanBuffer::copy(VulkanTransferBuffer* cb, VulkanImage* destination, const VkExtent3D& extent,
  52. const VkImageSubresourceLayers& range, VkImageLayout layout)
  53. {
  54. VkBufferImageCopy region;
  55. region.bufferRowLength = mRowPitch;
  56. region.bufferImageHeight = mSliceHeight;
  57. region.bufferOffset = 0;
  58. region.imageOffset.x = 0;
  59. region.imageOffset.y = 0;
  60. region.imageOffset.z = 0;
  61. region.imageExtent = extent;
  62. region.imageSubresource = range;
  63. vkCmdCopyBufferToImage(cb->getCB()->getHandle(), mBuffer, destination->getHandle(), layout, 1, &region);
  64. }
  65. void VulkanBuffer::update(VulkanTransferBuffer* cb, UINT8* data, VkDeviceSize offset, VkDeviceSize length)
  66. {
  67. vkCmdUpdateBuffer(cb->getCB()->getHandle(), mBuffer, offset, length, (uint32_t*)data);
  68. }
  69. VulkanHardwareBuffer::VulkanHardwareBuffer(BufferType type, GpuBufferFormat format, GpuBufferUsage usage,
  70. UINT32 size, GpuDeviceFlags deviceMask)
  71. : HardwareBuffer(size), mBuffers(), mStagingBuffer(nullptr), mStagingMemory(nullptr), mMappedDeviceIdx(-1)
  72. , mMappedGlobalQueueIdx(-1), mMappedOffset(0), mMappedSize(0), mMappedLockOptions(GBL_WRITE_ONLY)
  73. , mDirectlyMappable((usage & GBU_DYNAMIC) != 0), mSupportsGPUWrites(type == BT_STORAGE), mRequiresView(false)
  74. , mIsMapped(false)
  75. {
  76. VkBufferUsageFlags usageFlags = 0;
  77. switch(type)
  78. {
  79. case BT_VERTEX:
  80. usageFlags = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
  81. break;
  82. case BT_INDEX:
  83. usageFlags = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT;
  84. break;
  85. case BT_UNIFORM:
  86. usageFlags = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT;
  87. break;
  88. case BT_GENERIC:
  89. usageFlags = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT;
  90. mRequiresView = true;
  91. break;
  92. case BT_STORAGE:
  93. usageFlags = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT;
  94. mRequiresView = true;
  95. break;
  96. case BT_STRUCTURED:
  97. usageFlags = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
  98. break;
  99. }
  100. mBufferCI.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
  101. mBufferCI.pNext = nullptr;
  102. mBufferCI.flags = 0;
  103. mBufferCI.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
  104. mBufferCI.usage = usageFlags;
  105. mBufferCI.queueFamilyIndexCount = 0;
  106. mBufferCI.pQueueFamilyIndices = nullptr;
  107. mViewCI.sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO;
  108. mViewCI.pNext = nullptr;
  109. mViewCI.flags = 0;
  110. mViewCI.format = VulkanUtility::getBufferFormat(format);
  111. mViewCI.offset = 0;
  112. mViewCI.range = VK_WHOLE_SIZE;
  113. VulkanRenderAPI& rapi = static_cast<VulkanRenderAPI&>(RenderAPI::instance());
  114. VulkanDevice* devices[BS_MAX_DEVICES];
  115. VulkanUtility::getDevices(rapi, deviceMask, devices);
  116. // Allocate buffers per-device
  117. for (UINT32 i = 0; i < BS_MAX_DEVICES; i++)
  118. {
  119. if (devices[i] == nullptr)
  120. continue;
  121. mBuffers[i] = createBuffer(*devices[i], size, false, true);
  122. }
  123. }
  124. VulkanHardwareBuffer::~VulkanHardwareBuffer()
  125. {
  126. for (UINT32 i = 0; i < BS_MAX_DEVICES; i++)
  127. {
  128. if (mBuffers[i] == nullptr)
  129. continue;
  130. mBuffers[i]->destroy();
  131. }
  132. assert(mStagingBuffer == nullptr);
  133. }
  134. VulkanBuffer* VulkanHardwareBuffer::createBuffer(VulkanDevice& device, UINT32 size, bool staging, bool readable)
  135. {
  136. VkBufferUsageFlags usage = mBufferCI.usage;
  137. if (staging)
  138. {
  139. mBufferCI.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
  140. // Staging buffers are used as a destination for reads
  141. if (readable)
  142. mBufferCI.usage |= VK_BUFFER_USAGE_TRANSFER_DST_BIT;
  143. }
  144. else if(readable) // Non-staging readable
  145. mBufferCI.usage |= VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
  146. mBufferCI.size = size;
  147. VkMemoryPropertyFlags flags = (mDirectlyMappable || staging) ?
  148. (VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) : // Note: Try using cached memory
  149. VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
  150. VkDevice vkDevice = device.getLogical();
  151. VkBuffer buffer;
  152. VkResult result = vkCreateBuffer(vkDevice, &mBufferCI, gVulkanAllocator, &buffer);
  153. assert(result == VK_SUCCESS);
  154. VkMemoryRequirements memReqs;
  155. vkGetBufferMemoryRequirements(vkDevice, buffer, &memReqs);
  156. VkDeviceMemory memory = device.allocateMemory(memReqs, flags);
  157. result = vkBindBufferMemory(vkDevice, buffer, memory, 0);
  158. assert(result == VK_SUCCESS);
  159. VkBufferView view;
  160. if (mRequiresView && !staging)
  161. {
  162. mViewCI.buffer = buffer;
  163. result = vkCreateBufferView(vkDevice, &mViewCI, gVulkanAllocator, &view);
  164. assert(result == VK_SUCCESS);
  165. }
  166. else
  167. view = VK_NULL_HANDLE;
  168. mBufferCI.usage = usage; // Restore original usage
  169. return device.getResourceManager().create<VulkanBuffer>(buffer, view, memory);
  170. }
  171. void* VulkanHardwareBuffer::map(UINT32 offset, UINT32 length, GpuLockOptions options, UINT32 deviceIdx, UINT32 queueIdx)
  172. {
  173. if ((offset + length) > mSize)
  174. {
  175. LOGERR("Provided offset(" + toString(offset) + ") + length(" + toString(length) + ") "
  176. "is larger than the buffer " + toString(mSize) + ".");
  177. return nullptr;
  178. }
  179. if (length == 0)
  180. return nullptr;
  181. VulkanBuffer* buffer = mBuffers[deviceIdx];
  182. if (buffer == nullptr)
  183. return nullptr;
  184. mIsMapped = true;
  185. mMappedDeviceIdx = deviceIdx;
  186. mMappedGlobalQueueIdx = queueIdx;
  187. mMappedOffset = offset;
  188. mMappedSize = length;
  189. mMappedLockOptions = options;
  190. VulkanRenderAPI& rapi = static_cast<VulkanRenderAPI&>(RenderAPI::instance());
  191. VulkanDevice& device = *rapi._getDevice(deviceIdx);
  192. VulkanCommandBufferManager& cbManager = gVulkanCBManager();
  193. GpuQueueType queueType;
  194. UINT32 localQueueIdx = CommandSyncMask::getQueueIdxAndType(queueIdx, queueType);
  195. VkAccessFlags accessFlags;
  196. if (options == GBL_READ_ONLY)
  197. accessFlags = VK_ACCESS_HOST_READ_BIT;
  198. else if (options == GBL_READ_WRITE)
  199. accessFlags = VK_ACCESS_HOST_READ_BIT | VK_ACCESS_HOST_WRITE_BIT;
  200. else
  201. accessFlags = VK_ACCESS_HOST_WRITE_BIT;
  202. // If memory is host visible try mapping it directly
  203. if(mDirectlyMappable)
  204. {
  205. // Check is the GPU currently reading or writing from the buffer
  206. UINT32 useMask = buffer->getUseInfo(VulkanUseFlag::Read | VulkanUseFlag::Write);
  207. // Note: Even if GPU isn't currently using the buffer, but the buffer supports GPU writes, we consider it as
  208. // being used because the write could have completed yet still not visible, so we need to issue a pipeline
  209. // barrier below.
  210. bool isUsedOnGPU = useMask != 0 || mSupportsGPUWrites;
  211. // We're safe to map directly since GPU isn't using the buffer
  212. if (!isUsedOnGPU)
  213. {
  214. // If some CB has an operation queued that will be using the current contents of the buffer, create a new
  215. // buffer so we don't modify the previous use of the buffer
  216. if(buffer->isBound())
  217. {
  218. VulkanBuffer* newBuffer = createBuffer(device, mSize, false, true);
  219. // Copy contents of the current buffer to the new one, unless caller explicitly specifies he doesn't
  220. // care about the current contents
  221. if (options != GBL_WRITE_ONLY_DISCARD)
  222. {
  223. UINT8* src = buffer->map(offset, length);
  224. UINT8* dst = newBuffer->map(offset, length);
  225. memcpy(dst, src, length);
  226. buffer->unmap();
  227. newBuffer->unmap();
  228. }
  229. buffer->destroy();
  230. buffer = newBuffer;
  231. mBuffers[deviceIdx] = buffer;
  232. }
  233. return buffer->map(offset, length);
  234. }
  235. // Caller guarantees he won't touch the same data as the GPU, so just map even though the GPU is using the buffer
  236. if (options == GBL_WRITE_ONLY_NO_OVERWRITE)
  237. return buffer->map(offset, length);
  238. // Caller doesn't care about buffer contents, so just discard the existing buffer and create a new one
  239. if (options == GBL_WRITE_ONLY_DISCARD)
  240. {
  241. buffer->destroy();
  242. buffer = createBuffer(device, mSize, false, true);
  243. mBuffers[deviceIdx] = buffer;
  244. return buffer->map(offset, length);
  245. }
  246. // We need to read the buffer contents
  247. if(options == GBL_READ_ONLY || options == GBL_READ_WRITE)
  248. {
  249. // We need to wait until (potential) read/write operations complete
  250. VulkanTransferBuffer* transferCB = cbManager.getTransferBuffer(deviceIdx, queueType, localQueueIdx);
  251. // Ensure flush() will wait for all queues currently using to the buffer (if any) to finish
  252. // If only reading, wait for all writes to complete, otherwise wait on both writes and reads
  253. if (options == GBL_READ_ONLY)
  254. useMask = buffer->getUseInfo(VulkanUseFlag::Write);
  255. else
  256. useMask = buffer->getUseInfo(VulkanUseFlag::Read | VulkanUseFlag::Write);
  257. transferCB->appendMask(useMask);
  258. // Make any writes visible before mapping
  259. if (mSupportsGPUWrites)
  260. {
  261. // Issue a barrier so :
  262. // - If reading: the device makes the written memory available for read (read-after-write hazard)
  263. // - If writing: ensures our writes properly overlap with GPU writes (write-after-write hazard)
  264. transferCB->memoryBarrier(buffer->getHandle(),
  265. VK_ACCESS_SHADER_WRITE_BIT,
  266. accessFlags,
  267. // Last stages that could have written to the buffer:
  268. VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
  269. VK_PIPELINE_STAGE_HOST_BIT
  270. );
  271. }
  272. // Submit the command buffer and wait until it finishes
  273. transferCB->flush(true);
  274. // If writing and some CB has an operation queued that will be using the current contents of the buffer,
  275. // create a new buffer so we don't modify the previous use of the buffer
  276. if (options == GBL_READ_WRITE && buffer->isBound())
  277. {
  278. VulkanBuffer* newBuffer = createBuffer(device, mSize, false, true);
  279. // Copy contents of the current buffer to the new one
  280. UINT8* src = buffer->map(offset, length);
  281. UINT8* dst = newBuffer->map(offset, length);
  282. memcpy(dst, src, length);
  283. buffer->unmap();
  284. newBuffer->unmap();
  285. buffer->destroy();
  286. buffer = newBuffer;
  287. mBuffers[deviceIdx] = buffer;
  288. }
  289. return buffer->map(offset, length);
  290. }
  291. // Otherwise, we're doing write only, in which case it's best to use the staging buffer to avoid waiting
  292. // and blocking, so fall through
  293. }
  294. // Can't use direct mapping, so use a staging buffer or memory
  295. // We might need to copy the current contents of the buffer to the staging buffer. Even if the user doesn't plan on
  296. // reading, it is still required as we will eventually copy all of the contents back to the original buffer,
  297. // and we can't write potentially uninitialized data. The only exception is when the caller specifies the buffer
  298. // contents should be discarded in which he guarantees he will overwrite the entire locked area with his own
  299. // contents.
  300. bool needRead = options != GBL_WRITE_ONLY_DISCARD_RANGE && options != GBL_WRITE_ONLY_DISCARD;
  301. // See if we can use the cheaper staging memory, rather than a staging buffer
  302. if(!needRead && offset % 4 == 0 && length % 4 == 0 && length <= 65536)
  303. {
  304. mStagingMemory = (UINT8*)bs_alloc(length);
  305. return mStagingMemory;
  306. }
  307. // Create a staging buffer
  308. mStagingBuffer = createBuffer(device, length, true, needRead);
  309. if (needRead)
  310. {
  311. VulkanTransferBuffer* transferCB = cbManager.getTransferBuffer(deviceIdx, queueType, localQueueIdx);
  312. // Similar to above, if buffer supports GPU writes or is currently being written to, we need to wait on any
  313. // potential writes to complete
  314. UINT32 writeUseMask = buffer->getUseInfo(VulkanUseFlag::Write);
  315. if(mSupportsGPUWrites || writeUseMask != 0)
  316. {
  317. // Ensure flush() will wait for all queues currently writing to the buffer (if any) to finish
  318. transferCB->appendMask(writeUseMask);
  319. }
  320. // Queue copy command
  321. buffer->copy(transferCB, mStagingBuffer, offset, 0, length);
  322. // Ensure data written to the staging buffer is visible
  323. transferCB->memoryBarrier(mStagingBuffer->getHandle(),
  324. VK_ACCESS_TRANSFER_WRITE_BIT,
  325. accessFlags,
  326. VK_PIPELINE_STAGE_TRANSFER_BIT,
  327. VK_PIPELINE_STAGE_HOST_BIT
  328. );
  329. // Submit the command buffer and wait until it finishes
  330. transferCB->flush(true);
  331. assert(!buffer->isUsed());
  332. }
  333. return mStagingBuffer->map(0, length);
  334. }
  335. void VulkanHardwareBuffer::unmap()
  336. {
  337. // Possibly map() failed with some error
  338. if (!mIsMapped)
  339. return;
  340. // Note: If we did any writes they need to be made visible to the GPU. However there is no need to execute
  341. // a pipeline barrier because (as per spec) host writes are implicitly visible to the device.
  342. if(mStagingMemory == nullptr && mStagingBuffer == nullptr) // We directly mapped the buffer
  343. {
  344. mBuffers[mMappedDeviceIdx]->unmap();
  345. }
  346. else
  347. {
  348. if(mStagingBuffer != nullptr)
  349. mStagingBuffer->unmap();
  350. bool isWrite = mMappedLockOptions != GBL_READ_ONLY;
  351. // We the caller wrote anything to the staging buffer, we need to upload it back to the main buffer
  352. if(isWrite)
  353. {
  354. VulkanRenderAPI& rapi = static_cast<VulkanRenderAPI&>(RenderAPI::instance());
  355. VulkanDevice& device = *rapi._getDevice(mMappedDeviceIdx);
  356. VulkanCommandBufferManager& cbManager = gVulkanCBManager();
  357. GpuQueueType queueType;
  358. UINT32 localQueueIdx = CommandSyncMask::getQueueIdxAndType(mMappedGlobalQueueIdx, queueType);
  359. VulkanBuffer* buffer = mBuffers[mMappedDeviceIdx];
  360. VulkanTransferBuffer* transferCB = cbManager.getTransferBuffer(mMappedDeviceIdx, queueType, localQueueIdx);
  361. // If the buffer is used in any way on the GPU, we need to wait for that use to finish before
  362. // we issue our copy
  363. UINT32 useMask = buffer->getUseInfo(VulkanUseFlag::Read | VulkanUseFlag::Write);
  364. bool isNormalWrite = false;
  365. if(useMask != 0) // Buffer is currently used on the GPU
  366. {
  367. // Try to avoid the wait by checking for special write conditions
  368. // Caller guarantees he won't touch the same data as the GPU, so just copy
  369. if (mMappedLockOptions == GBL_WRITE_ONLY_NO_OVERWRITE)
  370. {
  371. // Fall through to copy()
  372. }
  373. // Caller doesn't care about buffer contents, so just discard the existing buffer and create a new one
  374. else if (mMappedLockOptions == GBL_WRITE_ONLY_DISCARD)
  375. {
  376. buffer->destroy();
  377. buffer = createBuffer(device, mSize, false, true);
  378. mBuffers[mMappedDeviceIdx] = buffer;
  379. }
  380. else // Otherwise we have no choice but to issue a dependency between the queues
  381. {
  382. transferCB->appendMask(useMask);
  383. isNormalWrite = true;
  384. }
  385. }
  386. else
  387. isNormalWrite = true;
  388. // Check if the buffer will still be bound somewhere after the CBs using it finish
  389. if (isNormalWrite)
  390. {
  391. UINT32 useCount = buffer->getUseCount();
  392. UINT32 boundCount = buffer->getBoundCount();
  393. bool isBoundWithoutUse = boundCount > useCount;
  394. // If buffer is queued for some operation on a CB, then we need to make a copy of the buffer to
  395. // avoid modifying its use in the previous operation
  396. if (isBoundWithoutUse)
  397. {
  398. VulkanBuffer* newBuffer = createBuffer(device, mSize, false, true);
  399. // Avoid copying original contents if the staging buffer completely covers it
  400. if (mMappedOffset > 0 || mMappedSize != mSize)
  401. {
  402. buffer->copy(transferCB, newBuffer, 0, 0, mSize);
  403. transferCB->getCB()->registerResource(buffer, VK_ACCESS_TRANSFER_READ_BIT, VulkanUseFlag::Read);
  404. }
  405. buffer->destroy();
  406. buffer = newBuffer;
  407. mBuffers[mMappedDeviceIdx] = buffer;
  408. }
  409. }
  410. // Queue copy/update command
  411. if (mStagingBuffer != nullptr)
  412. {
  413. mStagingBuffer->copy(transferCB, buffer, 0, mMappedOffset, mMappedSize);
  414. transferCB->getCB()->registerResource(mStagingBuffer, VK_ACCESS_TRANSFER_READ_BIT, VulkanUseFlag::Read);
  415. }
  416. else // Staging memory
  417. {
  418. buffer->update(transferCB, mStagingMemory, mMappedOffset, mMappedSize);
  419. }
  420. transferCB->getCB()->registerResource(buffer, VK_ACCESS_TRANSFER_WRITE_BIT, VulkanUseFlag::Write);
  421. // We don't actually flush the transfer buffer here since it's an expensive operation, but it's instead
  422. // done automatically before next "normal" command buffer submission.
  423. }
  424. if (mStagingBuffer != nullptr)
  425. {
  426. mStagingBuffer->destroy();
  427. mStagingBuffer = nullptr;
  428. }
  429. if(mStagingMemory != nullptr)
  430. {
  431. bs_free(mStagingMemory);
  432. mStagingMemory = nullptr;
  433. }
  434. }
  435. mIsMapped = false;
  436. }
  437. void VulkanHardwareBuffer::copyData(HardwareBuffer& srcBuffer, UINT32 srcOffset,
  438. UINT32 dstOffset, UINT32 length, bool discardWholeBuffer, UINT32 queueIdx)
  439. {
  440. if ((dstOffset + length) > mSize)
  441. {
  442. LOGERR("Provided offset(" + toString(dstOffset) + ") + length(" + toString(length) + ") "
  443. "is larger than the destination buffer " + toString(mSize) + ". Copy operation aborted.");
  444. return;
  445. }
  446. if ((srcOffset + length) > srcBuffer.getSize())
  447. {
  448. LOGERR("Provided offset(" + toString(srcOffset) + ") + length(" + toString(length) + ") "
  449. "is larger than the source buffer " + toString(srcBuffer.getSize()) + ". Copy operation aborted.");
  450. return;
  451. }
  452. VulkanHardwareBuffer& vkSource = static_cast<VulkanHardwareBuffer&>(srcBuffer);
  453. VulkanRenderAPI& rapi = static_cast<VulkanRenderAPI&>(RenderAPI::instance());
  454. VulkanCommandBufferManager& cbManager = gVulkanCBManager();
  455. GpuQueueType queueType;
  456. UINT32 localQueueIdx = CommandSyncMask::getQueueIdxAndType(queueIdx, queueType);
  457. // Perform copy on every device that has both buffers
  458. for (UINT32 i = 0; i < BS_MAX_DEVICES; i++)
  459. {
  460. VulkanBuffer* src = vkSource.mBuffers[i];
  461. VulkanBuffer* dst = mBuffers[i];
  462. if (src == nullptr || dst == nullptr)
  463. continue;
  464. VulkanDevice& device = *rapi._getDevice(i);
  465. VulkanTransferBuffer* transferCB = cbManager.getTransferBuffer(i, queueType, localQueueIdx);
  466. // If either source or destination buffer is currently being written to do need to sync the copy operation so
  467. // it executes after both are done
  468. // If destination is being used on the GPU we need to wait until it finishes before writing to it
  469. UINT32 dstUseMask = dst->getUseInfo(VulkanUseFlag::Read | VulkanUseFlag::Write);
  470. // If discard is enabled and destination is used, instead of waiting just discard the existing buffer and make a new one
  471. bool isNormalWrite = true;
  472. if(dstUseMask != 0 && discardWholeBuffer)
  473. {
  474. dst->destroy();
  475. dst = createBuffer(device, mSize, false, true);
  476. mBuffers[i] = dst;
  477. dstUseMask = 0;
  478. isNormalWrite = false;
  479. }
  480. // If source buffer is being written to on the GPU we need to wait until it finishes, before executing copy
  481. UINT32 srcUseMask = src->getUseInfo(VulkanUseFlag::Write);
  482. // Wait if anything is using the buffers
  483. if(dstUseMask != 0 || srcUseMask != 0)
  484. transferCB->appendMask(dstUseMask | srcUseMask);
  485. // Check if the destination buffer will still be bound somewhere after the CBs using it finish
  486. if (isNormalWrite)
  487. {
  488. UINT32 useCount = dst->getUseCount();
  489. UINT32 boundCount = dst->getBoundCount();
  490. bool isBoundWithoutUse = boundCount > useCount;
  491. // If destination buffer is queued for some operation on a CB (ignoring the ones we're waiting for), then we
  492. // need to make a copy of the buffer to avoid modifying its use in the previous operation
  493. if (isBoundWithoutUse)
  494. {
  495. VulkanBuffer* newBuffer = createBuffer(device, mSize, false, true);
  496. // Avoid copying original contents if the copy completely covers it
  497. if (dstOffset > 0 || length != mSize)
  498. {
  499. dst->copy(transferCB, newBuffer, 0, 0, mSize);
  500. transferCB->getCB()->registerResource(dst, VK_ACCESS_TRANSFER_READ_BIT, VulkanUseFlag::Read);
  501. }
  502. dst->destroy();
  503. dst = newBuffer;
  504. mBuffers[i] = dst;
  505. }
  506. }
  507. src->copy(transferCB, dst, srcOffset, dstOffset, length);
  508. // Notify the command buffer that these resources are being used on it
  509. transferCB->getCB()->registerResource(src, VK_ACCESS_TRANSFER_READ_BIT, VulkanUseFlag::Read);
  510. transferCB->getCB()->registerResource(dst, VK_ACCESS_TRANSFER_WRITE_BIT, VulkanUseFlag::Write);
  511. // We don't actually flush the transfer buffer here since it's an expensive operation, but it's instead
  512. // done automatically before next "normal" command buffer submission.
  513. }
  514. }
  515. void VulkanHardwareBuffer::readData(UINT32 offset, UINT32 length, void* dest, UINT32 deviceIdx, UINT32 queueIdx)
  516. {
  517. void* lockedData = lock(offset, length, GBL_READ_ONLY, deviceIdx, queueIdx);
  518. memcpy(dest, lockedData, length);
  519. unlock();
  520. }
  521. void VulkanHardwareBuffer::writeData(UINT32 offset, UINT32 length, const void* source, BufferWriteType writeFlags,
  522. UINT32 queueIdx)
  523. {
  524. GpuLockOptions lockOptions = GBL_WRITE_ONLY_DISCARD_RANGE;
  525. if (writeFlags == BTW_NO_OVERWRITE)
  526. lockOptions = GBL_WRITE_ONLY_NO_OVERWRITE;
  527. else if (writeFlags == BWT_DISCARD)
  528. lockOptions = GBL_WRITE_ONLY_DISCARD;
  529. // Write to every device
  530. for (UINT32 i = 0; i < BS_MAX_DEVICES; i++)
  531. {
  532. if (mBuffers[i] == nullptr)
  533. continue;
  534. void* lockedData = lock(offset, length, lockOptions, i, queueIdx);
  535. memcpy(lockedData, source, length);
  536. unlock();
  537. }
  538. }
  539. }}