Ver Fonte

Buffer copy operations now execute on the provided command buffer instead of the transfer queue, similar to textures

BearishSun há 8 anos atrás
pai
commit
12037d7ba2
26 ficheiros alterados com 200 adições e 167 exclusões
  1. 9 13
      Source/BansheeCore/Include/BsHardwareBuffer.h
  2. 2 2
      Source/BansheeD3D11RenderAPI/Include/BsD3D11GpuBuffer.h
  3. 1 1
      Source/BansheeD3D11RenderAPI/Include/BsD3D11HardwareBuffer.h
  4. 1 1
      Source/BansheeD3D11RenderAPI/Include/BsD3D11IndexBuffer.h
  5. 1 1
      Source/BansheeD3D11RenderAPI/Include/BsD3D11VertexBuffer.h
  6. 3 3
      Source/BansheeD3D11RenderAPI/Source/BsD3D11GpuBuffer.cpp
  7. 42 26
      Source/BansheeD3D11RenderAPI/Source/BsD3D11HardwareBuffer.cpp
  8. 2 2
      Source/BansheeD3D11RenderAPI/Source/BsD3D11IndexBuffer.cpp
  9. 2 2
      Source/BansheeD3D11RenderAPI/Source/BsD3D11VertexBuffer.cpp
  10. 10 0
      Source/BansheeGLRenderAPI/Include/BsGLBuffer.h
  11. 3 3
      Source/BansheeGLRenderAPI/Include/BsGLGpuBuffer.h
  12. 4 0
      Source/BansheeGLRenderAPI/Include/BsGLIndexBuffer.h
  13. 4 0
      Source/BansheeGLRenderAPI/Include/BsGLVertexBuffer.h
  14. 6 0
      Source/BansheeGLRenderAPI/Source/BsGLBuffer.cpp
  15. 20 6
      Source/BansheeGLRenderAPI/Source/BsGLGpuBuffer.cpp
  16. 22 1
      Source/BansheeGLRenderAPI/Source/BsGLIndexBuffer.cpp
  17. 22 2
      Source/BansheeGLRenderAPI/Source/BsGLVertexBuffer.cpp
  18. 4 4
      Source/BansheeVulkanRenderAPI/Include/BsVulkanGpuBuffer.h
  19. 4 4
      Source/BansheeVulkanRenderAPI/Include/BsVulkanHardwareBuffer.h
  20. 1 1
      Source/BansheeVulkanRenderAPI/Include/BsVulkanIndexBuffer.h
  21. 1 1
      Source/BansheeVulkanRenderAPI/Include/BsVulkanVertexBuffer.h
  22. 3 3
      Source/BansheeVulkanRenderAPI/Source/BsVulkanGpuBuffer.cpp
  23. 27 85
      Source/BansheeVulkanRenderAPI/Source/BsVulkanHardwareBuffer.cpp
  24. 3 3
      Source/BansheeVulkanRenderAPI/Source/BsVulkanIndexBuffer.cpp
  25. 1 1
      Source/BansheeVulkanRenderAPI/Source/BsVulkanTexture.cpp
  26. 2 2
      Source/BansheeVulkanRenderAPI/Source/BsVulkanVertexBuffer.cpp

+ 9 - 13
Source/BansheeCore/Include/BsHardwareBuffer.h

@@ -105,27 +105,23 @@ namespace bs
 		 * @param[in]	length				Size of the data to copy, in bytes.
 		 * @param[in]	discardWholeBuffer	Specify true if the data in the current buffer can be entirely discarded. This
 		 *									may improve performance.
-		 * @param[in]	queueIdx			Device queue to perform the copy operation on. See @ref queuesDoc.
+		 * @param[in]	commandBuffer		Command buffer to queue the copy operation on. If null, main command buffer is
+		 *									used.
 		 */
-		virtual void copyData(HardwareBuffer& srcBuffer, UINT32 srcOffset, 
-			UINT32 dstOffset, UINT32 length, bool discardWholeBuffer = false, UINT32 queueIdx = 0)
-		{
-			const void *srcData = srcBuffer.lock(
-				srcOffset, length, GBL_READ_ONLY, queueIdx);
-			this->writeData(dstOffset, length, srcData, discardWholeBuffer ? BWT_DISCARD : BWT_NORMAL, queueIdx);
-			srcBuffer.unlock();
-		}
+		virtual void copyData(HardwareBuffer& srcBuffer, UINT32 srcOffset, UINT32 dstOffset, UINT32 length, 
+			bool discardWholeBuffer = false, const SPtr<ct::CommandBuffer>& commandBuffer = nullptr) = 0;
 
 		/**
 		 * Copy data from the provided buffer into this buffer. If buffers are not the same size, smaller size will be used.
 		 * 
-		 * @param[in]	srcBuffer	Hardware buffer to copy from.
-		 * @param[in]	queueIdx	Device queue to perform the copy operation on. See @ref queuesDoc.
+		 * @param[in]	srcBuffer		Hardware buffer to copy from.
+		 * @param[in]	commandBuffer	Command buffer to queue the copy operation on. If null, main command buffer is
+		 *								used.
 		 */
-		virtual void copyData(HardwareBuffer& srcBuffer, UINT32 queueIdx = 0)
+		virtual void copyData(HardwareBuffer& srcBuffer, const SPtr<ct::CommandBuffer>& commandBuffer = nullptr)
 		{
 			UINT32 sz = std::min(getSize(), srcBuffer.getSize());
-			copyData(srcBuffer, 0, 0, sz, true, queueIdx);
+			copyData(srcBuffer, 0, 0, sz, true, commandBuffer);
 		}
 			
 		/** Returns the size of this buffer in bytes. */

+ 2 - 2
Source/BansheeD3D11RenderAPI/Include/BsD3D11GpuBuffer.h

@@ -33,8 +33,8 @@ namespace bs { namespace ct
 			BufferWriteType writeFlags = BWT_NORMAL, UINT32 queueIdx = 0) override;
 
 		/** @copydoc GpuBuffer::copyData */
-		void copyData(HardwareBuffer& srcBuffer, UINT32 srcOffset, 
-			UINT32 dstOffset, UINT32 length, bool discardWholeBuffer = false, UINT32 queueIdx = 0) override;
+		void copyData(HardwareBuffer& srcBuffer, UINT32 srcOffset, UINT32 dstOffset, UINT32 length, 
+			bool discardWholeBuffer = false, const SPtr<CommandBuffer>& commandBuffer = nullptr) override;
 
 		/**
 		 * Creates a buffer view that may be used for binding a buffer to a slot in the pipeline. Views allow you to specify

+ 1 - 1
Source/BansheeD3D11RenderAPI/Include/BsD3D11HardwareBuffer.h

@@ -52,7 +52,7 @@ namespace bs { namespace ct
 
 		/** @copydoc HardwareBuffer::copyData */
 		void copyData(HardwareBuffer& srcBuffer, UINT32 srcOffset, UINT32 dstOffset, 
-			UINT32 length, bool discardWholeBuffer = false, UINT32 queueIdx = 0) override;
+			UINT32 length, bool discardWholeBuffer = false, const SPtr<CommandBuffer>& commandBuffer = nullptr) override;
 
 		/**	Returns the internal DX11 buffer object. */
 		ID3D11Buffer* getD3DBuffer() const { return mD3DBuffer; }

+ 1 - 1
Source/BansheeD3D11RenderAPI/Include/BsD3D11IndexBuffer.h

@@ -29,7 +29,7 @@ namespace bs { namespace ct
 
 		/** @copydoc IndexBuffer::copyData */
 		void copyData(HardwareBuffer& srcBuffer, UINT32 srcOffset, UINT32 dstOffset, UINT32 length, 
-			bool discardWholeBuffer = false, UINT32 queueIdx = 0) override;
+			bool discardWholeBuffer = false, const SPtr<CommandBuffer>& commandBuffer = nullptr) override;
 
 		/**	Gets the internal DX11 index buffer object. */
 		ID3D11Buffer* getD3DIndexBuffer() const { return mBuffer->getD3DBuffer(); }		

+ 1 - 1
Source/BansheeD3D11RenderAPI/Include/BsD3D11VertexBuffer.h

@@ -29,7 +29,7 @@ namespace bs { namespace ct
 
 		/** @copydoc VertexBuffer::copyData */
 		void copyData(HardwareBuffer& srcBuffer, UINT32 srcOffset, UINT32 dstOffset, UINT32 length, 
-			bool discardWholeBuffer = false, UINT32 queueIdx = 0) override;
+			bool discardWholeBuffer = false, const SPtr<CommandBuffer>& commandBuffer = nullptr) override;
 
 		/**	Get the D3D-specific index buffer */
 		ID3D11Buffer* getD3DVertexBuffer() const { return mBuffer->getD3DBuffer(); }		

+ 3 - 3
Source/BansheeD3D11RenderAPI/Source/BsD3D11GpuBuffer.cpp

@@ -110,12 +110,12 @@ namespace bs { namespace ct
 		mBuffer->writeData(offset, length, source, writeFlags);
 	}
 
-	void D3D11GpuBuffer::copyData(HardwareBuffer& srcBuffer, UINT32 srcOffset,
-		UINT32 dstOffset, UINT32 length, bool discardWholeBuffer, UINT32 queueIdx)
+	void D3D11GpuBuffer::copyData(HardwareBuffer& srcBuffer, UINT32 srcOffset, UINT32 dstOffset, UINT32 length, 
+		bool discardWholeBuffer, const SPtr<CommandBuffer>& commandBuffer)
 	{
 		D3D11GpuBuffer* d3d11SrcBuffer = static_cast<D3D11GpuBuffer*>(&srcBuffer);
 
-		mBuffer->copyData(*d3d11SrcBuffer->mBuffer, srcOffset, dstOffset, length, discardWholeBuffer);
+		mBuffer->copyData(*d3d11SrcBuffer->mBuffer, srcOffset, dstOffset, length, discardWholeBuffer, commandBuffer);
 	}
 
 	ID3D11Buffer* D3D11GpuBuffer::getDX11Buffer() const

+ 42 - 26
Source/BansheeD3D11RenderAPI/Source/BsD3D11HardwareBuffer.cpp

@@ -5,6 +5,7 @@
 #include "BsD3D11Device.h"
 #include "BsException.h"
 #include "BsDebug.h"
+#include "BsD3D11CommandBuffer.h"
 
 namespace bs { namespace ct
 {
@@ -230,37 +231,52 @@ namespace bs { namespace ct
 	}
 
 	void D3D11HardwareBuffer::copyData(HardwareBuffer& srcBuffer, UINT32 srcOffset, 
-		UINT32 dstOffset, UINT32 length, bool discardWholeBuffer, UINT32 queueIdx)
+		UINT32 dstOffset, UINT32 length, bool discardWholeBuffer, const SPtr<ct::CommandBuffer>& commandBuffer)
 	{
-		// If we're copying same-size buffers in their entirety
-		if (srcOffset == 0 && dstOffset == 0 &&
-			length == mSize && mSize == srcBuffer.getSize())
+		auto executeRef = [this](HardwareBuffer& srcBuffer, UINT32 srcOffset, UINT32 dstOffset, UINT32 length)
 		{
-			mDevice.getImmediateContext()->CopyResource(mD3DBuffer, static_cast<D3D11HardwareBuffer&>(srcBuffer).getD3DBuffer());
-			if (mDevice.hasError())
+			// If we're copying same-size buffers in their entirety
+			if (srcOffset == 0 && dstOffset == 0 &&
+				length == mSize && mSize == srcBuffer.getSize())
 			{
-				String errorDescription = mDevice.getErrorDescription();
-				BS_EXCEPT(RenderingAPIException, "Cannot copy D3D11 resource\nError Description:" + errorDescription);
+				mDevice.getImmediateContext()->CopyResource(mD3DBuffer, 
+					static_cast<D3D11HardwareBuffer&>(srcBuffer).getD3DBuffer());
+				if (mDevice.hasError())
+				{
+					String errorDescription = mDevice.getErrorDescription();
+					BS_EXCEPT(RenderingAPIException, "Cannot copy D3D11 resource\nError Description:" + errorDescription);
+				}
 			}
-		}
-		else
-		{
-			// Copy subregion
-			D3D11_BOX srcBox;
-			srcBox.left = (UINT)srcOffset;
-			srcBox.right = (UINT)srcOffset + length;
-			srcBox.top = 0;
-			srcBox.bottom = 1;
-			srcBox.front = 0;
-			srcBox.back = 1;
-
-			mDevice.getImmediateContext()->CopySubresourceRegion(mD3DBuffer, 0, (UINT)dstOffset, 0, 0, 
-				static_cast<D3D11HardwareBuffer&>(srcBuffer).getD3DBuffer(), 0, &srcBox);
-			if (mDevice.hasError())
+			else
 			{
-				String errorDescription = mDevice.getErrorDescription();
-				BS_EXCEPT(RenderingAPIException, "Cannot copy D3D11 subresource region\nError Description:" + errorDescription);
+				// Copy subregion
+				D3D11_BOX srcBox;
+				srcBox.left = (UINT)srcOffset;
+				srcBox.right = (UINT)srcOffset + length;
+				srcBox.top = 0;
+				srcBox.bottom = 1;
+				srcBox.front = 0;
+				srcBox.back = 1;
+
+				mDevice.getImmediateContext()->CopySubresourceRegion(mD3DBuffer, 0, (UINT)dstOffset, 0, 0,
+					static_cast<D3D11HardwareBuffer&>(srcBuffer).getD3DBuffer(), 0, &srcBox);
+				if (mDevice.hasError())
+				{
+					String errorDescription = mDevice.getErrorDescription();
+					BS_EXCEPT(RenderingAPIException, "Cannot copy D3D11 subresource region\nError Description:" + 
+						errorDescription);
+				}
 			}
+		};
+
+		if (commandBuffer == nullptr)
+			executeRef(srcBuffer, srcOffset, dstOffset, length);
+		else
+		{
+			auto execute = [&]() { executeRef(srcBuffer, srcOffset, dstOffset, length); };
+
+			SPtr<D3D11CommandBuffer> cb = std::static_pointer_cast<D3D11CommandBuffer>(commandBuffer);
+			cb->queueCommand(execute);
 		}
 	}
 
@@ -314,4 +330,4 @@ namespace bs { namespace ct
 			LOGERR("Trying to write into a buffer with unsupported usage: " + toString(mDesc.Usage));
 		}
 	}
-}}
+}}

+ 2 - 2
Source/BansheeD3D11RenderAPI/Source/BsD3D11IndexBuffer.cpp

@@ -66,8 +66,8 @@ namespace bs { namespace ct
 	}
 
 	void D3D11IndexBuffer::copyData(HardwareBuffer& srcBuffer, UINT32 srcOffset,
-		UINT32 dstOffset, UINT32 length, bool discardWholeBuffer, UINT32 queueIdx)
+		UINT32 dstOffset, UINT32 length, bool discardWholeBuffer, const SPtr<CommandBuffer>& commandBuffer)
 	{
-		mBuffer->copyData(srcBuffer, srcOffset, dstOffset, length, discardWholeBuffer);
+		mBuffer->copyData(srcBuffer, srcOffset, dstOffset, length, discardWholeBuffer, commandBuffer);
 	}
 }}

+ 2 - 2
Source/BansheeD3D11RenderAPI/Source/BsD3D11VertexBuffer.cpp

@@ -58,9 +58,9 @@ namespace bs { namespace ct
 	}
 
 	void D3D11VertexBuffer::copyData(HardwareBuffer& srcBuffer, UINT32 srcOffset,
-		UINT32 dstOffset, UINT32 length, bool discardWholeBuffer, UINT32 queueIdx)
+		UINT32 dstOffset, UINT32 length, bool discardWholeBuffer, const SPtr<CommandBuffer>& commandBuffer)
 	{
-		mBuffer->copyData(srcBuffer, srcOffset, dstOffset, length, discardWholeBuffer);
+		mBuffer->copyData(srcBuffer, srcOffset, dstOffset, length, discardWholeBuffer, commandBuffer);
 	}
 
 	void D3D11VertexBuffer::initialize()

+ 10 - 0
Source/BansheeGLRenderAPI/Include/BsGLBuffer.h

@@ -64,6 +64,16 @@ namespace bs { namespace ct
         void writeData(UINT32 offset, UINT32 length, const void* source, 
 			BufferWriteType writeFlags = BWT_NORMAL);
 
+		/**
+		 * Copies data from a specific portion of this buffer into a specific portion of the provided buffer.
+		 *
+		 * @param[in]	dstBuffer			Buffer to copy from.
+		 * @param[in]	srcOffset			Offset into the source buffer to start copying from, in bytes.
+		 * @param[in]	dstOffset			Offset into this buffer to start copying to, in bytes.
+		 * @param[in]	length				Size of the data to copy, in bytes.
+		 */
+		void copyData(GLBuffer& dstBuffer, UINT32 srcOffset, UINT32 dstOffset, UINT32 length);
+
 		/**	Returns internal OpenGL buffer ID. */
         GLuint getGLBufferId() const { return mBufferId; }
 

+ 3 - 3
Source/BansheeGLRenderAPI/Include/BsGLGpuBuffer.h

@@ -29,11 +29,11 @@ namespace bs { namespace ct
 
 		/** @copydoc GpuBuffer::writeData */
         void writeData(UINT32 offset, UINT32 length, const void* source,
-				BufferWriteType writeFlags = BWT_NORMAL, UINT32 queueIdx = 0) override;
+			BufferWriteType writeFlags = BWT_NORMAL, UINT32 queueIdx = 0) override;
 
 		/** @copydoc GpuBuffer::copyData */
-		void copyData(HardwareBuffer& srcBuffer, UINT32 srcOffset,
-			UINT32 dstOffset, UINT32 length, bool discardWholeBuffer = false, UINT32 queueIdx = 0) override;
+		void copyData(HardwareBuffer& srcBuffer, UINT32 srcOffset, UINT32 dstOffset, UINT32 length, 
+			bool discardWholeBuffer = false, const SPtr<CommandBuffer>& commandBuffer = nullptr) override;
 
 		/**	
 		 * Returns internal OpenGL buffer ID. If binding the buffer to the pipeline, bind the texture using

+ 4 - 0
Source/BansheeGLRenderAPI/Include/BsGLIndexBuffer.h

@@ -26,6 +26,10 @@ namespace bs { namespace ct
         void writeData(UINT32 offset, UINT32 length, const void* source, 
 			BufferWriteType writeFlags = BWT_NORMAL, UINT32 queueIdx = 0) override;
 
+		/** @copydoc IndexBuffer::copyData */
+		void copyData(HardwareBuffer& srcBuffer, UINT32 srcOffset, UINT32 dstOffset, UINT32 length, 
+			bool discardWholeBuffer = false, const SPtr<ct::CommandBuffer>& commandBuffer = nullptr) override;
+
 		/**	Returns internal OpenGL index buffer handle. */
         GLuint getGLBufferId() const { return mBuffer.getGLBufferId(); }
 

+ 4 - 0
Source/BansheeGLRenderAPI/Include/BsGLVertexBuffer.h

@@ -27,6 +27,10 @@ namespace bs { namespace ct
         void writeData(UINT32 offset, UINT32 length, const void* source, 
 			BufferWriteType writeFlags = BWT_NORMAL, UINT32 queueIdx = 0) override;
 
+		/** @copydoc IndexBuffer::copyData */
+		void copyData(HardwareBuffer& srcBuffer, UINT32 srcOffset, UINT32 dstOffset, UINT32 length, 
+			bool discardWholeBuffer = false, const SPtr<CommandBuffer>& commandBuffer = nullptr) override;
+
 		/**	Returns internal OpenGL buffer ID. */
         GLuint getGLBufferId() const { return mBuffer.getGLBufferId(); }
 

+ 6 - 0
Source/BansheeGLRenderAPI/Source/BsGLBuffer.cpp

@@ -113,4 +113,10 @@ namespace bs { namespace ct
 		memcpy(bufferData, pSource, length);
 		unlock();
 	}
+
+	void GLBuffer::copyData(GLBuffer& dstBuffer, UINT32 srcOffset, UINT32 dstOffset, UINT32 length)
+	{
+		GLuint srcId = dstBuffer.getGLBufferId();
+		glCopyBufferSubData(srcId, getGLBufferId(), srcOffset, dstOffset, length);
+	}
 }}

+ 20 - 6
Source/BansheeGLRenderAPI/Source/BsGLGpuBuffer.cpp

@@ -5,6 +5,7 @@
 #include "BsRenderStats.h"
 #include "BsGLPixelFormat.h"
 #include "BsGLHardwareBufferManager.h"
+#include "BsGLCommandBuffer.h"
 
 namespace bs { namespace ct
 {
@@ -95,12 +96,25 @@ namespace bs { namespace ct
 		BS_INC_RENDER_STAT_CAT(ResWrite, RenderStatObject_GpuBuffer);
 	}
 
-	void GLGpuBuffer::copyData(HardwareBuffer& srcBuffer, UINT32 srcOffset,
-								   UINT32 dstOffset, UINT32 length, bool discardWholeBuffer, UINT32 queueIdx)
+	void GLGpuBuffer::copyData(HardwareBuffer& srcBuffer, UINT32 srcOffset, UINT32 dstOffset, UINT32 length, 
+		bool discardWholeBuffer, const SPtr<CommandBuffer>& commandBuffer)
 	{
-		GLGpuBuffer& glSrcBuffer = static_cast<GLGpuBuffer&>(srcBuffer);
+		auto executeRef = [this](HardwareBuffer& srcBuffer, UINT32 srcOffset, UINT32 dstOffset, UINT32 length)
+		{
+			GLGpuBuffer& glSrcBuffer = static_cast<GLGpuBuffer&>(srcBuffer);
+
+			GLuint srcId = glSrcBuffer.getGLBufferId();
+			glCopyBufferSubData(srcId, getGLBufferId(), srcOffset, dstOffset, length);
+		};
+
+		if (commandBuffer == nullptr)
+			executeRef(srcBuffer, srcOffset, dstOffset, length);
+		else
+		{
+			auto execute = [&]() { executeRef(srcBuffer, srcOffset, dstOffset, length); };
 
-		GLuint srcId = glSrcBuffer.getGLBufferId();
-		glCopyBufferSubData(srcId, getGLBufferId(), srcOffset, dstOffset, length);
+			SPtr<GLCommandBuffer> cb = std::static_pointer_cast<GLCommandBuffer>(commandBuffer);
+			cb->queueCommand(execute);
+		}
 	}
-}}
+}}

+ 22 - 1
Source/BansheeGLRenderAPI/Source/BsGLIndexBuffer.cpp

@@ -4,6 +4,7 @@
 #include "BsGLHardwareBufferManager.h"
 #include "BsRenderStats.h"
 #include "BsException.h"
+#include "BsGLCommandBuffer.h"
 
 namespace bs { namespace ct
 {
@@ -46,4 +47,24 @@ namespace bs { namespace ct
 	{
 		mBuffer.writeData(offset, length, pSource, writeFlags);
 	}
-}}
+
+	void GLIndexBuffer::copyData(HardwareBuffer& srcBuffer, UINT32 srcOffset, UINT32 dstOffset, UINT32 length, 
+		bool discardWholeBuffer, const SPtr<ct::CommandBuffer>& commandBuffer)
+	{
+		auto executeRef = [this](HardwareBuffer& srcBuffer, UINT32 srcOffset, UINT32 dstOffset, UINT32 length)
+		{
+			GLIndexBuffer& glSrcBuffer = static_cast<GLIndexBuffer&>(srcBuffer);
+			glSrcBuffer.mBuffer.copyData(mBuffer, srcOffset, dstOffset, length);
+		};
+
+		if (commandBuffer == nullptr)
+			executeRef(srcBuffer, srcOffset, dstOffset, length);
+		else
+		{
+			auto execute = [&]() { executeRef(srcBuffer, srcOffset, dstOffset, length); };
+
+			SPtr<GLCommandBuffer> cb = std::static_pointer_cast<GLCommandBuffer>(commandBuffer);
+			cb->queueCommand(execute);
+		}
+	}
+}}

+ 22 - 2
Source/BansheeGLRenderAPI/Source/BsGLVertexBuffer.cpp

@@ -4,7 +4,7 @@
 #include "BsGLVertexBuffer.h"
 #include "BsGLVertexArrayObjectManager.h"
 #include "BsRenderStats.h"
-#include "BsException.h"
+#include "BsGLCommandBuffer.h"
 
 namespace bs { namespace ct
 {
@@ -63,4 +63,24 @@ namespace bs { namespace ct
     {
 		mBuffer.writeData(offset, length, pSource, writeFlags);
     }
-}}
+
+	void GLVertexBuffer::copyData(HardwareBuffer& srcBuffer, UINT32 srcOffset,
+		UINT32 dstOffset, UINT32 length, bool discardWholeBuffer, const SPtr<ct::CommandBuffer>& commandBuffer)
+	{
+		auto executeRef = [this](HardwareBuffer& srcBuffer, UINT32 srcOffset, UINT32 dstOffset, UINT32 length)
+		{
+			GLVertexBuffer& glSrcBuffer = static_cast<GLVertexBuffer&>(srcBuffer);
+			glSrcBuffer.mBuffer.copyData(mBuffer, srcOffset, dstOffset, length);
+		};
+
+		if (commandBuffer == nullptr)
+			executeRef(srcBuffer, srcOffset, dstOffset, length);
+		else
+		{
+			auto execute = [&]() { executeRef(srcBuffer, srcOffset, dstOffset, length); };
+
+			SPtr<GLCommandBuffer> cb = std::static_pointer_cast<GLCommandBuffer>(commandBuffer);
+			cb->queueCommand(execute);
+		}
+	}
+}}

+ 4 - 4
Source/BansheeVulkanRenderAPI/Include/BsVulkanGpuBuffer.h

@@ -27,12 +27,12 @@ namespace bs { namespace ct
 		void readData(UINT32 offset, UINT32 length, void* dest, UINT32 deviceIdx = 0, UINT32 queueIdx = 0) override;
 
 		/** @copydoc GpuBuffer::writeData */
-        void writeData(UINT32 offset, UINT32 length, const void* source,
-			BufferWriteType writeFlags = BWT_NORMAL, UINT32 queueIdx = 0) override;
+        void writeData(UINT32 offset, UINT32 length, const void* source, BufferWriteType writeFlags = BWT_NORMAL, 
+			UINT32 queueIdx = 0) override;
 
 		/** @copydoc GpuBuffer::copyData */
-		void copyData(HardwareBuffer& srcBuffer, UINT32 srcOffset, 
-			UINT32 dstOffset, UINT32 length, bool discardWholeBuffer = false, UINT32 queueIdx = 0) override;
+		void copyData(HardwareBuffer& srcBuffer, UINT32 srcOffset, UINT32 dstOffset, UINT32 length, 
+			bool discardWholeBuffer = false, const SPtr<CommandBuffer>& commandBuffer = nullptr) override;
 		
 		/** 
 		 * Gets the resource wrapping the buffer object, on the specified device. If GPU param block buffer's device mask

+ 4 - 4
Source/BansheeVulkanRenderAPI/Include/BsVulkanHardwareBuffer.h

@@ -60,14 +60,14 @@ namespace bs { namespace ct
 		 * the destination buffer. Caller must ensure the provided offsets and length are within valid bounds of
 		 * both buffers.
 		 */
-		void copy(VulkanTransferBuffer* cb, VulkanBuffer* destination, VkDeviceSize srcOffset, VkDeviceSize dstOffset, 
+		void copy(VulkanCmdBuffer* cb, VulkanBuffer* destination, VkDeviceSize srcOffset, VkDeviceSize dstOffset,
 			VkDeviceSize length);
 
 		/** 
 		 * Queues a command on the provided command buffer. The command copies the contents of the current buffer to
 		 * the destination image subresource. 
 		 */
-		void copy(VulkanTransferBuffer* cb, VulkanImage* destination, const VkExtent3D& extent, 
+		void copy(VulkanCmdBuffer* cb, VulkanImage* destination, const VkExtent3D& extent,
 			const VkImageSubresourceLayers& range, VkImageLayout layout);
 
 		/** 
@@ -75,7 +75,7 @@ namespace bs { namespace ct
 		 * the destination buffer. Caller must ensure the provided offset and length are within valid bounds of
 		 * both buffers. Caller must ensure the offset and size is a multiple of 4, and size is equal to or less then 65536.
 		 */
-		void update(VulkanTransferBuffer* cb, UINT8* data, VkDeviceSize offset, VkDeviceSize length);
+		void update(VulkanCmdBuffer* cb, UINT8* data, VkDeviceSize offset, VkDeviceSize length);
 
 	private:
 		VkBuffer mBuffer;
@@ -120,7 +120,7 @@ namespace bs { namespace ct
 
 		/** @copydoc HardwareBuffer::copyData */
 		void copyData(HardwareBuffer& srcBuffer, UINT32 srcOffset, UINT32 dstOffset, 
-			UINT32 length, bool discardWholeBuffer = false, UINT32 queueIdx = 0) override;
+			UINT32 length, bool discardWholeBuffer = false, const SPtr<CommandBuffer>& commandBuffer = nullptr) override;
 
 		/** 
 		 * Gets the resource wrapping the buffer object, on the specified device. If hardware buffer device mask doesn't 

+ 1 - 1
Source/BansheeVulkanRenderAPI/Include/BsVulkanIndexBuffer.h

@@ -27,7 +27,7 @@ namespace bs { namespace ct
 
 		/** @copydoc IndexBuffer::copyData */
 		void copyData(HardwareBuffer& srcBuffer, UINT32 srcOffset, UINT32 dstOffset, UINT32 length, 
-			bool discardWholeBuffer = false, UINT32 queueIdx = 0) override;
+			bool discardWholeBuffer = false, const SPtr<CommandBuffer>& commandBuffer = nullptr) override;
 
 		/** 
 		 * Gets the resource wrapping the buffer object, on the specified device. If GPU param block buffer's device mask

+ 1 - 1
Source/BansheeVulkanRenderAPI/Include/BsVulkanVertexBuffer.h

@@ -27,7 +27,7 @@ namespace bs { namespace ct
 
 		/** @copydoc VertexBuffer::copyData */
 		void copyData(HardwareBuffer& srcBuffer, UINT32 srcOffset, UINT32 dstOffset, UINT32 length, 
-			bool discardWholeBuffer = false, UINT32 queueIdx = 0) override;
+			bool discardWholeBuffer = false, const SPtr<CommandBuffer>& commandBuffer = nullptr) override;
 
 		/** 
 		 * Gets the resource wrapping the buffer object, on the specified device. If GPU param block buffer's device mask

+ 3 - 3
Source/BansheeVulkanRenderAPI/Source/BsVulkanGpuBuffer.cpp

@@ -84,10 +84,10 @@ namespace bs { namespace ct
 		BS_INC_RENDER_STAT_CAT(ResWrite, RenderStatObject_GpuBuffer);
 	}
 
-	void VulkanGpuBuffer::copyData(HardwareBuffer& srcBuffer, UINT32 srcOffset,
-		UINT32 dstOffset, UINT32 length, bool discardWholeBuffer, UINT32 queueIdx)
+	void VulkanGpuBuffer::copyData(HardwareBuffer& srcBuffer, UINT32 srcOffset, UINT32 dstOffset, UINT32 length, 
+		bool discardWholeBuffer, const SPtr<CommandBuffer>& commandBuffer)
 	{
-		mBuffer->copyData(srcBuffer, srcOffset, dstOffset, length, discardWholeBuffer, queueIdx);
+		mBuffer->copyData(srcBuffer, srcOffset, dstOffset, length, discardWholeBuffer, commandBuffer);
 	}
 
 	VulkanBuffer* VulkanGpuBuffer::getResource(UINT32 deviceIdx) const

+ 27 - 85
Source/BansheeVulkanRenderAPI/Source/BsVulkanHardwareBuffer.cpp

@@ -49,7 +49,7 @@ namespace bs { namespace ct
 		vkUnmapMemory(device.getLogical(), mMemory);
 	}
 
-	void VulkanBuffer::copy(VulkanTransferBuffer* cb, VulkanBuffer* destination, VkDeviceSize srcOffset, 
+	void VulkanBuffer::copy(VulkanCmdBuffer* cb, VulkanBuffer* destination, VkDeviceSize srcOffset,
 		VkDeviceSize dstOffset, VkDeviceSize length)
 	{
 		VkBufferCopy region;
@@ -57,10 +57,10 @@ namespace bs { namespace ct
 		region.srcOffset = srcOffset;
 		region.dstOffset = dstOffset;
 
-		vkCmdCopyBuffer(cb->getCB()->getHandle(), mBuffer, destination->getHandle(), 1, &region);
+		vkCmdCopyBuffer(cb->getHandle(), mBuffer, destination->getHandle(), 1, &region);
 	}
 
-	void VulkanBuffer::copy(VulkanTransferBuffer* cb, VulkanImage* destination, const VkExtent3D& extent, 
+	void VulkanBuffer::copy(VulkanCmdBuffer* cb, VulkanImage* destination, const VkExtent3D& extent, 
 		const VkImageSubresourceLayers& range, VkImageLayout layout)
 	{
 		VkBufferImageCopy region;
@@ -73,12 +73,12 @@ namespace bs { namespace ct
 		region.imageExtent = extent;
 		region.imageSubresource = range;
 
-		vkCmdCopyBufferToImage(cb->getCB()->getHandle(), mBuffer, destination->getHandle(), layout, 1, &region);
+		vkCmdCopyBufferToImage(cb->getHandle(), mBuffer, destination->getHandle(), layout, 1, &region);
 	}
 
-	void VulkanBuffer::update(VulkanTransferBuffer* cb, UINT8* data, VkDeviceSize offset, VkDeviceSize length)
+	void VulkanBuffer::update(VulkanCmdBuffer* cb, UINT8* data, VkDeviceSize offset, VkDeviceSize length)
 	{
-		vkCmdUpdateBuffer(cb->getCB()->getHandle(), mBuffer, offset, length, (uint32_t*)data);
+		vkCmdUpdateBuffer(cb->getHandle(), mBuffer, offset, length, (uint32_t*)data);
 	}
 
 	VulkanHardwareBuffer::VulkanHardwareBuffer(BufferType type, GpuBufferFormat format, GpuBufferUsage usage, 
@@ -393,7 +393,7 @@ namespace bs { namespace ct
 			}
 
 			// Queue copy command
-			buffer->copy(transferCB, mStagingBuffer, offset, 0, length);
+			buffer->copy(transferCB->getCB(), mStagingBuffer, offset, 0, length);
 
 			// Ensure data written to the staging buffer is visible
 			transferCB->memoryBarrier(mStagingBuffer->getHandle(),
@@ -491,7 +491,7 @@ namespace bs { namespace ct
 						// Avoid copying original contents if the staging buffer completely covers it
 						if (mMappedOffset > 0 || mMappedSize != mSize)
 						{
-							buffer->copy(transferCB, newBuffer, 0, 0, mSize);
+							buffer->copy(transferCB->getCB(), newBuffer, 0, 0, mSize);
 
 							transferCB->getCB()->registerResource(buffer, VK_ACCESS_TRANSFER_READ_BIT, VulkanUseFlag::Read);
 						}
@@ -505,12 +505,12 @@ namespace bs { namespace ct
 				// Queue copy/update command
 				if (mStagingBuffer != nullptr)
 				{
-					mStagingBuffer->copy(transferCB, buffer, 0, mMappedOffset, mMappedSize);
+					mStagingBuffer->copy(transferCB->getCB(), buffer, 0, mMappedOffset, mMappedSize);
 					transferCB->getCB()->registerResource(mStagingBuffer, VK_ACCESS_TRANSFER_READ_BIT, VulkanUseFlag::Read);
 				}
 				else // Staging memory
 				{
-					buffer->update(transferCB, mStagingMemory, mMappedOffset, mMappedSize);
+					buffer->update(transferCB->getCB(), mStagingMemory, mMappedOffset, mMappedSize);
 				}
 
 				transferCB->getCB()->registerResource(buffer, VK_ACCESS_TRANSFER_WRITE_BIT, VulkanUseFlag::Write);
@@ -536,7 +536,7 @@ namespace bs { namespace ct
 	}
 
 	void VulkanHardwareBuffer::copyData(HardwareBuffer& srcBuffer, UINT32 srcOffset,
-		UINT32 dstOffset, UINT32 length, bool discardWholeBuffer, UINT32 queueIdx)
+		UINT32 dstOffset, UINT32 length, bool discardWholeBuffer, const SPtr<CommandBuffer>& commandBuffer)
 	{
 		if ((dstOffset + length) > mSize)
 		{
@@ -557,86 +557,28 @@ namespace bs { namespace ct
 		VulkanHardwareBuffer& vkSource = static_cast<VulkanHardwareBuffer&>(srcBuffer);
 
 		VulkanRenderAPI& rapi = static_cast<VulkanRenderAPI&>(RenderAPI::instance());
-		VulkanCommandBufferManager& cbManager = gVulkanCBManager();
-
-		GpuQueueType queueType;
-		UINT32 localQueueIdx = CommandSyncMask::getQueueIdxAndType(queueIdx, queueType);
-
-		// Perform copy on every device that has both buffers
-		for (UINT32 i = 0; i < BS_MAX_DEVICES; i++)
-		{
-			VulkanBuffer* src = vkSource.mBuffers[i];
-			VulkanBuffer* dst = mBuffers[i];
-
-			if (src == nullptr || dst == nullptr)
-				continue;
-
-			VulkanDevice& device = *rapi._getDevice(i);
-			VulkanTransferBuffer* transferCB = cbManager.getTransferBuffer(i, queueType, localQueueIdx);
-
-			// If either source or destination buffer is currently being written to do need to sync the copy operation so
-			// it executes after both are done
-
-			// If destination is being used on the GPU we need to wait until it finishes before writing to it
-			UINT32 dstUseMask = dst->getUseInfo(VulkanUseFlag::Read | VulkanUseFlag::Write);
-
-			// If discard is enabled and destination is used, instead of waiting just discard the existing buffer and make a new one
-			bool isNormalWrite = true;
-			if(dstUseMask != 0 && discardWholeBuffer)
-			{
-				dst->destroy();
-
-				dst = createBuffer(device, mSize, false, true);
-				mBuffers[i] = dst;
-
-				dstUseMask = 0;
-				isNormalWrite = false;
-			}
-
-			// If source buffer is being written to on the GPU we need to wait until it finishes, before executing copy
-			UINT32 srcUseMask = src->getUseInfo(VulkanUseFlag::Write);
-
-			// Wait if anything is using the buffers
-			if(dstUseMask != 0 || srcUseMask != 0)
-				transferCB->appendMask(dstUseMask | srcUseMask);
-
-			// Check if the destination buffer will still be bound somewhere after the CBs using it finish
-			if (isNormalWrite)
-			{
-				UINT32 useCount = dst->getUseCount();
-				UINT32 boundCount = dst->getBoundCount();
-
-				bool isBoundWithoutUse = boundCount > useCount;
-
-				// If destination buffer is queued for some operation on a CB (ignoring the ones we're waiting for), then we
-				// need to make a copy of the buffer to avoid modifying its use in the previous operation
-				if (isBoundWithoutUse)
-				{
-					VulkanBuffer* newBuffer = createBuffer(device, mSize, false, true);
+		VulkanCmdBuffer* vkCB;
+		if (commandBuffer != nullptr)
+			vkCB = static_cast<VulkanCommandBuffer*>(commandBuffer.get())->getInternal();
+		else
+			vkCB = rapi._getMainCommandBuffer()->getInternal();
 
-					// Avoid copying original contents if the copy completely covers it
-					if (dstOffset > 0 || length != mSize)
-					{
-						dst->copy(transferCB, newBuffer, 0, 0, mSize);
+		UINT32 deviceIdx = vkCB->getDeviceIdx();
 
-						transferCB->getCB()->registerResource(dst, VK_ACCESS_TRANSFER_READ_BIT, VulkanUseFlag::Read);
-					}
+		VulkanBuffer* src = vkSource.mBuffers[deviceIdx];
+		VulkanBuffer* dst = mBuffers[deviceIdx];
 
-					dst->destroy();
-					dst = newBuffer;
-					mBuffers[i] = dst;
-				}
-			}
+		if (src == nullptr || dst == nullptr)
+			return;
 
-			src->copy(transferCB, dst, srcOffset, dstOffset, length);
+		if (vkCB->isInRenderPass())
+			vkCB->endRenderPass();
 
-			// Notify the command buffer that these resources are being used on it
-			transferCB->getCB()->registerResource(src, VK_ACCESS_TRANSFER_READ_BIT, VulkanUseFlag::Read);
-			transferCB->getCB()->registerResource(dst, VK_ACCESS_TRANSFER_WRITE_BIT, VulkanUseFlag::Write);
+		src->copy(vkCB, dst, srcOffset, dstOffset, length);
 
-			// We don't actually flush the transfer buffer here since it's an expensive operation, but it's instead
-			// done automatically before next "normal" command buffer submission.
-		}
+		// Notify the command buffer that these resources are being used on it
+		vkCB->registerResource(src, VK_ACCESS_TRANSFER_READ_BIT, VulkanUseFlag::Read);
+		vkCB->registerResource(dst, VK_ACCESS_TRANSFER_WRITE_BIT, VulkanUseFlag::Write);
 	}
 
 	void VulkanHardwareBuffer::readData(UINT32 offset, UINT32 length, void* dest, UINT32 deviceIdx, UINT32 queueIdx)

+ 3 - 3
Source/BansheeVulkanRenderAPI/Source/BsVulkanIndexBuffer.cpp

@@ -62,10 +62,10 @@ namespace bs { namespace ct
 		BS_INC_RENDER_STAT_CAT(ResWrite, RenderStatObject_IndexBuffer);
 	}
 
-	void VulkanIndexBuffer::copyData(HardwareBuffer& srcBuffer, UINT32 srcOffset,
-										UINT32 dstOffset, UINT32 length, bool discardWholeBuffer, UINT32 queueIdx)
+	void VulkanIndexBuffer::copyData(HardwareBuffer& srcBuffer, UINT32 srcOffset, UINT32 dstOffset, UINT32 length, 
+		bool discardWholeBuffer, const SPtr<CommandBuffer>& commandBuffer)
 	{
-		mBuffer->copyData(srcBuffer, srcOffset, dstOffset, length, discardWholeBuffer, queueIdx);
+		mBuffer->copyData(srcBuffer, srcOffset, dstOffset, length, discardWholeBuffer, commandBuffer);
 	}
 
 	VulkanBuffer* VulkanIndexBuffer::getResource(UINT32 deviceIdx) const

+ 1 - 1
Source/BansheeVulkanRenderAPI/Source/BsVulkanTexture.cpp

@@ -1347,7 +1347,7 @@ namespace bs { namespace ct
 									  curLayout, transferLayout, range);
 
 				// Queue copy command
-				mStagingBuffer->copy(transferCB, image, extent, rangeLayers, transferLayout);
+				mStagingBuffer->copy(transferCB->getCB(), image, extent, rangeLayers, transferLayout);
 
 				// Transfer back to original  (or optimal if initial layout was undefined/preinitialized)
 				VkImageLayout dstLayout = image->getOptimalLayout();

+ 2 - 2
Source/BansheeVulkanRenderAPI/Source/BsVulkanVertexBuffer.cpp

@@ -64,9 +64,9 @@ namespace bs { namespace ct
 	}
 
 	void VulkanVertexBuffer::copyData(HardwareBuffer& srcBuffer, UINT32 srcOffset,
-		UINT32 dstOffset, UINT32 length, bool discardWholeBuffer, UINT32 queueIdx)
+		UINT32 dstOffset, UINT32 length, bool discardWholeBuffer, const SPtr<CommandBuffer>& commandBuffer)
 	{
-		mBuffer->copyData(srcBuffer, srcOffset, dstOffset, length, discardWholeBuffer, queueIdx);
+		mBuffer->copyData(srcBuffer, srcOffset, dstOffset, length, discardWholeBuffer, commandBuffer);
 	}
 
 	VulkanBuffer* VulkanVertexBuffer::getResource(UINT32 deviceIdx) const