9 жил өмнө · cc9a402774
--- a/Source/BansheeCore/Include/BsCommonTypes.h
+++ b/Source/BansheeCore/Include/BsCommonTypes.h
@@ -178,11 +178,15 @@ namespace BansheeEngine
 
				 	{

			
 
				 		/** 

			
 
				 		 * Signifies that you don't plan on modifying the buffer often (or at all) after creation. Modifying such buffer 

			
 
				-		 * will involve a larger performance hit.

			
 
				+		 * will involve a larger performance hit. Mutually exclusive with GBU_DYNAMIC.

			
 
				 		 */

			
 
				-        GBU_STATIC = 1,

			
 
				-		/** Signifies that you will modify this buffer fairly often (e.g. every frame). */

			
 
				-		GBU_DYNAMIC = 2

			
 
				+        GBU_STATIC = 0x01,

			
 
				+		/** 

			
 
				+		 * Signifies that you will modify this buffer fairly often (e.g. every frame). Mutually exclusive with GBU_STATIC. 

			
 
				+		 */

			
 
				+		GBU_DYNAMIC = 0x02,

			
 
				+		/** Signifies that the buffer's data on the GPU can be read by the CPU. */

			
 
				+		GBU_READABLE = 0x04

			
 
				 	};

			
 
				 

			
 
				 	/** Types of generic GPU buffers that may be attached to GPU programs. */

			
--- a/Source/BansheeCore/Source/BsVertexBuffer.cpp
+++ b/Source/BansheeCore/Source/BsVertexBuffer.cpp
@@ -10,7 +10,7 @@ namespace BansheeEngine
 
				 	{ }
			
 
				 
			
 
				 	VertexBufferCore::VertexBufferCore(const VERTEX_BUFFER_DESC& desc, GpuDeviceFlags deviceMask)
			
 
				-		:HardwareBuffer(mProperties.mVertexSize * mProperties.mNumVertices), mProperties(desc.numVerts, desc.vertexSize)
			
 
				+		:HardwareBuffer(desc.vertexSize * desc.numVerts), mProperties(desc.numVerts, desc.vertexSize)
			
 
				 	{ }
			
 
				 
			
 
				 	SPtr<VertexBufferCore> VertexBufferCore::create(const VERTEX_BUFFER_DESC& desc, GpuDeviceFlags deviceMask)
			
--- a/Source/BansheeD3D11RenderAPI/Source/BsD3D11Mappings.cpp
+++ b/Source/BansheeD3D11RenderAPI/Source/BsD3D11Mappings.cpp
@@ -812,13 +812,7 @@ namespace BansheeEngine
 
				 
			
 
				 	bool D3D11Mappings::isDynamic(GpuBufferUsage usage)
			
 
				 	{
			
 
				-		switch (usage)
			
 
				-		{
			
 
				-		case GBU_DYNAMIC:
			
 
				-			return true;
			
 
				-		}
			
 
				-
			
 
				-		return false;
			
 
				+		return (usage & GBU_DYNAMIC) != 0;
			
 
				 	}
			
 
				 
			
 
				 	bool D3D11Mappings::isMappingWrite(D3D11_MAP map)
			
--- a/Source/BansheeGLRenderAPI/Source/BsGLHardwareBufferManager.cpp
+++ b/Source/BansheeGLRenderAPI/Source/BsGLHardwareBufferManager.cpp
@@ -54,15 +54,13 @@ namespace BansheeEngine
 
				 
			
 
				     GLenum GLHardwareBufferCoreManager::getGLUsage(GpuBufferUsage usage)
			
 
				     {
			
 
				-        switch(usage)
			
 
				-        {
			
 
				-        case GBU_STATIC:
			
 
				-            return GL_STATIC_DRAW;
			
 
				-        case GBU_DYNAMIC:
			
 
				-            return GL_DYNAMIC_DRAW;
			
 
				-        default:
			
 
				-            return GL_DYNAMIC_DRAW;
			
 
				-        };
			
 
				+		if(usage & GBU_STATIC)
			
 
				+			return GL_STATIC_DRAW;
			
 
				+
			
 
				+		if(usage & GBU_DYNAMIC)
			
 
				+			return GL_DYNAMIC_DRAW;
			
 
				+
			
 
				+        return GL_DYNAMIC_DRAW;
			
 
				     }
			
 
				 
			
 
				     GLenum GLHardwareBufferCoreManager::getGLType(VertexElementType type)
			
--- a/Source/BansheeVulkanRenderAPI/Include/BsVulkanCommandBufferManager.h
+++ b/Source/BansheeVulkanRenderAPI/Include/BsVulkanCommandBufferManager.h
@@ -13,28 +13,54 @@ namespace BansheeEngine
 
				 	 */
			
 
				 
			
 
				 	/** Wrapper around a command buffer used specifically for transfer operations. */
			
 
				-	class VulkanTransferBufferInfo
			
 
				+	class VulkanTransferBuffer
			
 
				 	{
			
 
				 	public:
			
 
				-		VulkanTransferBufferInfo(UINT32 queueIdx);
			
 
				+		VulkanTransferBuffer();
			
 
				+		VulkanTransferBuffer(VulkanDevice* device, GpuQueueType type, UINT32 queueIdx);
			
 
				+		~VulkanTransferBuffer();
			
 
				 
			
 
				 		/** 
			
 
				 		 * OR's the provided sync mask with the internal sync mask. The sync mask determines on which queues should
			
 
				-		 * the buffer wait on before executing. See CommandSyncMask.
			
 
				+		 * the buffer wait on before executing. Sync mask is reset after a flush. See CommandSyncMask on how to generate
			
 
				+		 * a sync mask.
			
 
				 		 */
			
 
				 		void appendMask(UINT32 syncMask) { mSyncMask |= syncMask; }
			
 
				 
			
 
				 		/** Resets the sync mask. */
			
 
				 		void clearMask() { mSyncMask = 0; }
			
 
				 
			
 
				+		/** 
			
 
				+		 * Issues a pipeline barrier on the provided buffer. See vkCmdPipelineBarrier in Vulkan spec. for usage
			
 
				+		 * information.
			
 
				+		 */
			
 
				+		void memoryBarrier(VkBuffer buffer, VkAccessFlags srcAccessFlags, VkAccessFlags dstAccessFlags,
			
 
				+						   VkPipelineStageFlags srcStage, VkPipelineStageFlags dstStage);
			
 
				+
			
 
				+		/** 
			
 
				+		 * Submits the command buffer on the queue. 
			
 
				+		 * 
			
 
				+		 *	@param[in]	wait	If true, the caller thread will wait until all device operations on the command buffer's
			
 
				+		 *						queue complete.	
			
 
				+		 */
			
 
				+		void flush(bool wait);
			
 
				+
			
 
				 		/** Returns the internal command buffer. */
			
 
				 		VulkanCmdBuffer* getCB() const { return mCB; }
			
 
				 	private:
			
 
				 		friend class VulkanCommandBufferManager;
			
 
				 
			
 
				+		/** Allocates a new internal command buffer. */
			
 
				+		void allocate();
			
 
				+
			
 
				+		VulkanDevice* mDevice;
			
 
				+		GpuQueueType mType;
			
 
				+		UINT32 mQueueIdx;
			
 
				+		VulkanQueue* mQueue;
			
 
				+		UINT32 mQueueMask;
			
 
				+
			
 
				 		VulkanCmdBuffer* mCB;
			
 
				 		UINT32 mSyncMask;
			
 
				-		UINT32 mQueueIdx;
			
 
				 	};
			
 
				 
			
 
				 	/** 
			
@@ -78,7 +104,7 @@ namespace BansheeEngine
 
				 		 * Transfer buffers are automatically flushed (submitted) whenever a new (normal) command buffer is about to
			
 
				 		 * execute.
			
 
				 		 */
			
 
				-		VulkanTransferBufferInfo* getTransferBuffer(UINT32 deviceIdx, GpuQueueType type, UINT32 queueIdx);
			
 
				+		VulkanTransferBuffer* getTransferBuffer(UINT32 deviceIdx, GpuQueueType type, UINT32 queueIdx);
			
 
				 
			
 
				 		/** Submits all transfer command buffers, ensuring all queued transfer operations get executed. */
			
 
				 		void flushTransferBuffers(UINT32 deviceIdx);
			
@@ -88,7 +114,7 @@ namespace BansheeEngine
 
				 		struct PerDeviceData
			
 
				 		{
			
 
				 			VulkanCmdBuffer* activeBuffers[BS_MAX_UNIQUE_QUEUES];
			
 
				-			VulkanTransferBufferInfo transferBuffers[BS_MAX_UNIQUE_QUEUES];
			
 
				+			VulkanTransferBuffer transferBuffers[GQT_COUNT][BS_MAX_QUEUES_PER_TYPE];
			
 
				 		};
			
 
				 
			
 
				 		const VulkanRenderAPI& mRapi;
			
@@ -97,5 +123,8 @@ namespace BansheeEngine
 
				 		UINT32 mNumDevices;
			
 
				 	};
			
 
				 
			
 
				+	/**	Provides easy access to the VulkanCommandBufferManager. */
			
 
				+	VulkanCommandBufferManager& gVulkanCBManager();
			
 
				+
			
 
				 	/** @} */
			
 
				 }
			
--- a/Source/BansheeVulkanRenderAPI/Include/BsVulkanHardwareBuffer.h
+++ b/Source/BansheeVulkanRenderAPI/Include/BsVulkanHardwareBuffer.h
@@ -25,6 +25,22 @@ namespace BansheeEngine
 
				 		/** Returns a buffer view that covers the entire buffer. */
			
 
				 		VkBufferView getView() const { return mView; }
			
 
				 
			
 
				+		/** 
			
 
				+		 * Returns a pointer to internal buffer memory. Must be followed by unmap(). Caller must ensure the buffer was
			
 
				+		 * created in CPU readable memory, and that buffer isn't currently being written to by the GPU.
			
 
				+		 */
			
 
				+		UINT8* map(VkDeviceSize offset, VkDeviceSize length) const;
			
 
				+
			
 
				+		/** Unmaps a buffer previously mapped with map(). */
			
 
				+		void unmap();
			
 
				+
			
 
				+		/** 
			
 
				+		 * Queues a command on the provided command buffer. The command copies the contents of the current buffer to
			
 
				+		 * the destination buffer. Caller must ensure the provided offsets and lengths are within valid bounds of
			
 
				+		 * both buffers.
			
 
				+		 */
			
 
				+		void copy(VulkanTransferBuffer* cb, VulkanBuffer* destination, VkDeviceSize offset, VkDeviceSize length);
			
 
				+
			
 
				 	private:
			
 
				 		VkBuffer mBuffer;
			
 
				 		VkBufferView mView;
			
@@ -48,8 +64,6 @@ namespace BansheeEngine
 
				 			BT_GENERIC = 0x8,
			
 
				 			/** Generic read/write GPU buffer containing formatted data. */
			
 
				 			BT_STORAGE = 0x10,
			
 
				-			/** Helper buffer that can be written to on the CPU. Used for copy operations. */
			
 
				-			BT_STAGING = 0x20,
			
 
				 		};
			
 
				 
			
 
				 		VulkanHardwareBuffer(BufferType type, GpuBufferFormat format, GpuBufferUsage usage, UINT32 size,
			
@@ -80,8 +94,26 @@ namespace BansheeEngine
 
				 		/** @copydoc HardwareBuffer::unmap */
			
 
				 		void unmap() override;
			
 
				 
			
 
				+		/** Creates a new buffer for the specified device, matching the current buffer properties. */
			
 
				+		VulkanBuffer* createBuffer(VulkanDevice& device, bool staging, bool readable);
			
 
				+
			
 
				 		VulkanBuffer* mBuffers[BS_MAX_DEVICES];
			
 
				-		bool mStaging;
			
 
				+
			
 
				+		VulkanBuffer* mStagingBuffer;
			
 
				+		UINT32 mMappedDeviceIdx;
			
 
				+		UINT32 mMappedGlobalQueueIdx;
			
 
				+		UINT32 mMappedOffset;
			
 
				+		UINT32 mMappedSize;
			
 
				+		GpuLockOptions mMappedLockOptions;
			
 
				+
			
 
				+		VkBufferCreateInfo mBufferCI;
			
 
				+		VkBufferViewCreateInfo mViewCI;
			
 
				+		VkBufferUsageFlags mUsageFlags;
			
 
				+		bool mDirectlyMappable : 1;
			
 
				+		bool mSupportsGPUWrites : 1;
			
 
				+		bool mRequiresView : 1;
			
 
				+		bool mReadable : 1;
			
 
				+		bool mIsMapped : 1;
			
 
				 	};
			
 
				 
			
 
				 	/** @} */
			
--- a/Source/BansheeVulkanRenderAPI/Include/BsVulkanPrerequisites.h
+++ b/Source/BansheeVulkanRenderAPI/Include/BsVulkanPrerequisites.h
@@ -56,6 +56,7 @@ namespace BansheeEngine
 
				 	class VulkanImage;
			
 
				 	class VulkanDescriptorPool;
			
 
				 	class VulkanGpuParams;
			
 
				+	class VulkanTransferBuffer;
			
 
				 
			
 
				 	VkAllocationCallbacks* gVulkanAllocator = nullptr;
			
 
				 
			
--- a/Source/BansheeVulkanRenderAPI/Include/BsVulkanQueue.h
+++ b/Source/BansheeVulkanRenderAPI/Include/BsVulkanQueue.h
@@ -38,6 +38,9 @@ namespace BansheeEngine
 
				 		/** Submits the provided command buffer on the queue. */
			
 
				 		void submit(VulkanCmdBuffer* cmdBuffer, VkSemaphore* waitSemaphores, UINT32 semaphoresCount);
			
 
				 
			
 
				+		/** Blocks the calling thread until all operations on the queue finish. */
			
 
				+		void waitIdle() const;
			
 
				+
			
 
				 	protected:
			
 
				 		VulkanDevice& mDevice;
			
 
				 		VkQueue mQueue;
			
--- a/Source/BansheeVulkanRenderAPI/Include/BsVulkanResource.h
+++ b/Source/BansheeVulkanRenderAPI/Include/BsVulkanResource.h
@@ -105,13 +105,13 @@ namespace BansheeEngine
 
				 		UINT32 getQueueFamily() const { Lock(mMutex); return mQueueFamily; }
			
 
				 
			
 
				 		/** 
			
 
				-		 * Returns a mask that has bits set for every queue that the resource is currently used by.
			
 
				+		 * Returns a mask that has bits set for every queue that the resource is currently used (read or written) by.
			
 
				 		 *
			
 
				-		 * @param[out]	useFlags	Output parameter that notifies the caller in what way is the resource being used.
			
 
				+		 * @param[in]	useFlags	Flags for which to check use information (e.g. read only, write only, or both).
			
 
				 		 * @return					Bitmask of which queues is the resource used on. This has the same format as sync mask
			
 
				 		 *							created by CommandSyncMask.
			
 
				 		 */
			
 
				-		UINT32 getUseInfo(VulkanUseFlags& useFlags) const;
			
 
				+		UINT32 getUseInfo(VulkanUseFlags useFlags) const;
			
 
				 
			
 
				 		/** Returns true if the resource is only allowed to be used by a single queue family at once. */
			
 
				 		bool isExclusive() const { Lock(mMutex); return mState != State::Shared; }
			
--- a/Source/BansheeVulkanRenderAPI/Source/BsVulkanCommandBuffer.cpp
+++ b/Source/BansheeVulkanRenderAPI/Source/BsVulkanCommandBuffer.cpp
@@ -330,8 +330,8 @@ namespace BansheeEngine
 
				 			UINT32 numBufferBarriers = (UINT32)barriers.bufferBarriers.size();
			
 
				 
			
 
				 			vkCmdPipelineBarrier(vkCmdBuffer,
			
 
				-								 VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
			
 
				-								 VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
			
 
				+								 VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, // Note: VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT might be more correct here, according to the spec
			
 
				+								 VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, //       The main idea is that the barrier executes before the semaphore triggers, no actual stage dependencies are needed.
			
 
				 								 0, 0, nullptr,
			
 
				 								 numBufferBarriers, barriers.bufferBarriers.data(),
			
 
				 								 numImgBarriers, barriers.imageBarriers.data());
			
@@ -406,8 +406,8 @@ namespace BansheeEngine
 
				 			UINT32 numBufferBarriers = (UINT32)barriers.bufferBarriers.size();
			
 
				 
			
 
				 			vkCmdPipelineBarrier(vkCmdBuffer,
			
 
				-								 VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
			
 
				-								 VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
			
 
				+								 VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, // Note: VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT might be more correct here, according to the spec
			
 
				+								 VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 
			
 
				 								 0, 0, nullptr,
			
 
				 								 numBufferBarriers, barriers.bufferBarriers.data(),
			
 
				 								 numImgBarriers, barriers.imageBarriers.data());
			
@@ -448,8 +448,6 @@ namespace BansheeEngine
 
				 			entry.first->notifyUsed(mGlobalQueueIdx, mQueueFamily, useHandle.flags);
			
 
				 		}
			
 
				 
			
 
				-		cbm.refreshStates(deviceIdx);
			
 
				-
			
 
				 		// Note: Uncommented for debugging only, prevents any device concurrency issues.
			
 
				 		// vkQueueWaitIdle(queue->getHandle());
			
 
				 
			
@@ -655,6 +653,8 @@ namespace BansheeEngine
 
				 		syncMask &= ~mIdMask;
			
 
				 
			
 
				 		mBuffer->submit(mQueue, mQueueIdx, syncMask);
			
 
				+
			
 
				+		gVulkanCBManager().refreshStates(mDeviceIdx);
			
 
				 		acquireNewBuffer();
			
 
				 	}
			
 
				 }
			
--- a/Source/BansheeVulkanRenderAPI/Source/BsVulkanCommandBufferManager.cpp
+++ b/Source/BansheeVulkanRenderAPI/Source/BsVulkanCommandBufferManager.cpp
@@ -4,21 +4,99 @@
 
				 #include "BsVulkanCommandBuffer.h"
			
 
				 #include "BsVulkanRenderAPI.h"
			
 
				 #include "BsVulkanDevice.h"
			
 
				+#include "BsVulkanQueue.h"
			
 
				 
			
 
				 namespace BansheeEngine
			
 
				 {
			
 
				-	VulkanTransferBufferInfo::VulkanTransferBufferInfo(UINT32 queueIdx)
			
 
				-		:mCB(nullptr), mSyncMask(0), mQueueIdx(queueIdx)
			
 
				+	VulkanTransferBuffer::VulkanTransferBuffer()
			
 
				+		:mDevice(nullptr), mType(GQT_GRAPHICS), mQueueIdx(0), mQueue(nullptr), mCB(nullptr), mSyncMask(0), mQueueMask(0)
			
 
				 	{ }
			
 
				 
			
 
				+	VulkanTransferBuffer::VulkanTransferBuffer(VulkanDevice* device, GpuQueueType type, UINT32 queueIdx)
			
 
				+		:mDevice(device), mType(type), mQueueIdx(queueIdx), mQueue(nullptr), mCB(nullptr), mSyncMask(0), mQueueMask(0)
			
 
				+	{
			
 
				+		UINT32 numQueues = device->getNumQueues(mType);
			
 
				+		if (numQueues == 0)
			
 
				+		{
			
 
				+			mType = GQT_GRAPHICS;
			
 
				+			numQueues = device->getNumQueues(GQT_GRAPHICS);
			
 
				+		}
			
 
				+
			
 
				+		UINT32 physicalQueueIdx = queueIdx % numQueues;
			
 
				+		mQueue = device->getQueue(mType, physicalQueueIdx);
			
 
				+		mQueueMask = device->getQueueMask(mType, queueIdx);
			
 
				+	}
			
 
				+
			
 
				+	VulkanTransferBuffer::~VulkanTransferBuffer()
			
 
				+	{
			
 
				+		if (mCB != nullptr)
			
 
				+			mCB->end();
			
 
				+	}
			
 
				+
			
 
				+	void VulkanTransferBuffer::allocate()
			
 
				+	{
			
 
				+		if (mCB != nullptr)
			
 
				+			return;
			
 
				+
			
 
				+		UINT32 queueFamily = mDevice->getQueueFamily(mType);
			
 
				+		mCB = mDevice->getCmdBufferPool().getBuffer(queueFamily, false);
			
 
				+	}
			
 
				+
			
 
				+	void VulkanTransferBuffer::memoryBarrier(VkBuffer buffer, VkAccessFlags srcAccessFlags, VkAccessFlags dstAccessFlags,
			
 
				+					   VkPipelineStageFlags srcStage, VkPipelineStageFlags dstStage)
			
 
				+	{
			
 
				+		VkBufferMemoryBarrier barrier;
			
 
				+		barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
			
 
				+		barrier.pNext = nullptr;
			
 
				+		barrier.srcAccessMask = srcAccessFlags;
			
 
				+		barrier.dstAccessMask = dstAccessFlags;
			
 
				+		barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
			
 
				+		barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
			
 
				+		barrier.buffer = buffer;
			
 
				+		barrier.offset = 0;
			
 
				+		barrier.size = VK_WHOLE_SIZE;
			
 
				+
			
 
				+		vkCmdPipelineBarrier(mCB->getHandle(),
			
 
				+							 srcStage,
			
 
				+							 dstStage,
			
 
				+							 0, 0, nullptr,
			
 
				+							 1, &barrier,
			
 
				+							 0, nullptr);
			
 
				+	}
			
 
				+
			
 
				+	void VulkanTransferBuffer::flush(bool wait)
			
 
				+	{
			
 
				+		UINT32 syncMask = mSyncMask & ~mQueueMask; // Don't sync with itself
			
 
				+
			
 
				+		mCB->end();
			
 
				+		mCB->submit(mQueue, mQueueIdx, syncMask);
			
 
				+
			
 
				+		if (wait)
			
 
				+		{
			
 
				+			mQueue->waitIdle();
			
 
				+			gVulkanCBManager().refreshStates(mDevice->getIndex());
			
 
				+		}
			
 
				+
			
 
				+		mCB = nullptr;
			
 
				+	}
			
 
				+
			
 
				 	VulkanCommandBufferManager::VulkanCommandBufferManager(const VulkanRenderAPI& rapi)
			
 
				 		:mRapi(rapi), mDeviceData(nullptr), mNumDevices(rapi.getNumDevices())
			
 
				 	{
			
 
				 		mDeviceData = bs_newN<PerDeviceData>(mNumDevices);
			
 
				 		for (UINT32 i = 0; i < mNumDevices; i++)
			
 
				 		{
			
 
				+			SPtr<VulkanDevice> device = rapi._getDevice(i);
			
 
				+
			
 
				 			bs_zero_out(mDeviceData[i].activeBuffers);
			
 
				-			bs_zero_out(mDeviceData[i].transferBuffers);
			
 
				+
			
 
				+			for (UINT32 j = 0; j < GQT_COUNT; j++)
			
 
				+			{
			
 
				+				GpuQueueType queueType = (GpuQueueType)j;
			
 
				+
			
 
				+				for (UINT32 k = 0; k < BS_MAX_QUEUES_PER_TYPE; k++)
			
 
				+					mDeviceData[i].transferBuffers[j][k] = VulkanTransferBuffer(device.get(), queueType, k);
			
 
				+			}
			
 
				 		}
			
 
				 	}
			
 
				 
			
@@ -97,58 +175,32 @@ namespace BansheeEngine
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	VulkanTransferBufferInfo* VulkanCommandBufferManager::getTransferBuffer(UINT32 deviceIdx, GpuQueueType type, 
			
 
				+	VulkanTransferBuffer* VulkanCommandBufferManager::getTransferBuffer(UINT32 deviceIdx, GpuQueueType type,
			
 
				 		UINT32 queueIdx)
			
 
				 	{
			
 
				 		assert(deviceIdx < mNumDevices);
			
 
				 
			
 
				-		UINT32 globalQueueIdx = CommandSyncMask::getGlobalQueueIdx(type, queueIdx);
			
 
				-		assert(globalQueueIdx < BS_MAX_UNIQUE_QUEUES);
			
 
				-
			
 
				 		PerDeviceData& deviceData = mDeviceData[deviceIdx];
			
 
				-		if (deviceData.transferBuffers[globalQueueIdx].mCB == nullptr)
			
 
				-		{
			
 
				-			SPtr<VulkanDevice> device = mRapi._getDevice(deviceIdx);
			
 
				-
			
 
				-			UINT32 queueFamily = device->getQueueFamily(type);
			
 
				-			deviceData.transferBuffers[globalQueueIdx].mCB = device->getCmdBufferPool().getBuffer(queueFamily, false);
			
 
				-		}
			
 
				 
			
 
				-		return &deviceData.transferBuffers[globalQueueIdx];
			
 
				+		VulkanTransferBuffer* transferBuffer = &deviceData.transferBuffers[type][queueIdx];
			
 
				+		transferBuffer->allocate();
			
 
				+		return transferBuffer;
			
 
				 	}
			
 
				 
			
 
				 	void VulkanCommandBufferManager::flushTransferBuffers(UINT32 deviceIdx)
			
 
				 	{
			
 
				 		assert(deviceIdx < mNumDevices);
			
 
				 
			
 
				-		SPtr<VulkanDevice> device = mRapi._getDevice(deviceIdx);
			
 
				 		PerDeviceData& deviceData = mDeviceData[deviceIdx];
			
 
				-
			
 
				-		UINT32 transferBufferIdx = 0;
			
 
				-		for(UINT32 i = 0; i < GQT_COUNT; i++)
			
 
				+		for (UINT32 i = 0; i < GQT_COUNT; i++)
			
 
				 		{
			
 
				-			GpuQueueType queueType = (GpuQueueType)i;
			
 
				-			UINT32 numQueues = device->getNumQueues(queueType);
			
 
				-			if (numQueues == 0)
			
 
				-			{
			
 
				-				queueType = GQT_GRAPHICS;
			
 
				-				numQueues = device->getNumQueues(GQT_GRAPHICS);
			
 
				-			}
			
 
				-
			
 
				-			for(UINT32 j = 0; j < BS_MAX_QUEUES_PER_TYPE; j++)
			
 
				-			{
			
 
				-				VulkanTransferBufferInfo& bufferInfo = deviceData.transferBuffers[transferBufferIdx];
			
 
				-				if (bufferInfo.mCB == nullptr)
			
 
				-					continue;
			
 
				-
			
 
				-				UINT32 physicalQueueIdx = j % numQueues;
			
 
				-				VulkanQueue* queue = device->getQueue(queueType, physicalQueueIdx);
			
 
				-
			
 
				-				bufferInfo.mCB->submit(queue, bufferInfo.mQueueIdx, bufferInfo.mSyncMask);
			
 
				-				bufferInfo.mCB = nullptr;
			
 
				-
			
 
				-				transferBufferIdx++;
			
 
				-			}
			
 
				+			for (UINT32 j = 0; j < BS_MAX_QUEUES_PER_TYPE; j++)
			
 
				+				deviceData.transferBuffers[i][j].flush(false);
			
 
				 		}
			
 
				 	}
			
 
				+
			
 
				+	VulkanCommandBufferManager& gVulkanCBManager()
			
 
				+	{
			
 
				+		return static_cast<VulkanCommandBufferManager&>(CommandBufferManager::instance());
			
 
				+	}
			
 
				 }
			
--- a/Source/BansheeVulkanRenderAPI/Source/BsVulkanHardwareBuffer.cpp
+++ b/Source/BansheeVulkanRenderAPI/Source/BsVulkanHardwareBuffer.cpp
@@ -4,7 +4,7 @@
 
				 #include "BsVulkanRenderAPI.h"
			
 
				 #include "BsVulkanDevice.h"
			
 
				 #include "BsVulkanUtility.h"
			
 
				-#include "BsException.h"
			
 
				+#include "BsVulkanCommandBufferManager.h"
			
 
				 
			
 
				 namespace BansheeEngine
			
 
				 {
			
@@ -25,16 +25,42 @@ namespace BansheeEngine
 
				 		device.freeMemory(mMemory);
			
 
				 	}
			
 
				 
			
 
				-	VulkanHardwareBuffer::VulkanHardwareBuffer(BufferType type, GpuBufferFormat format, GpuBufferUsage usage, 
			
 
				-		UINT32 size, GpuDeviceFlags deviceMask)
			
 
				-		: HardwareBuffer(size), mBuffers(), mStaging(type == BT_STAGING)
			
 
				+	UINT8* VulkanBuffer::map(VkDeviceSize offset, VkDeviceSize length) const
			
 
				 	{
			
 
				-		bool needsView = false;
			
 
				+		VulkanDevice& device = mOwner->getDevice();
			
 
				 
			
 
				-		VkMemoryPropertyFlags flags = mStaging ?
			
 
				-			(VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) : // Note: Try using cached uncoherent memory
			
 
				-			VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
			
 
				+		UINT8* data;
			
 
				+		VkResult result = vkMapMemory(device.getLogical(), mMemory, offset, length, 0, (void**)&data);
			
 
				+		assert(result == VK_SUCCESS);
			
 
				+
			
 
				+		return data;
			
 
				+	}
			
 
				+
			
 
				+	void VulkanBuffer::unmap()
			
 
				+	{
			
 
				+		VulkanDevice& device = mOwner->getDevice();
			
 
				+
			
 
				+		vkUnmapMemory(device.getLogical(), mMemory);
			
 
				+	}
			
 
				+
			
 
				+	void VulkanBuffer::copy(VulkanTransferBuffer* cb, VulkanBuffer* destination, VkDeviceSize offset, VkDeviceSize length)
			
 
				+	{
			
 
				+		VkBufferCopy region;
			
 
				+		region.size = length;
			
 
				+		region.srcOffset = offset;
			
 
				+		region.dstOffset = offset;
			
 
				+
			
 
				+		vkCmdCopyBuffer(cb->getCB()->getHandle(), mBuffer, destination->getHandle(), 1, &region);
			
 
				+	}
			
 
				 
			
 
				+	VulkanHardwareBuffer::VulkanHardwareBuffer(BufferType type, GpuBufferFormat format, GpuBufferUsage usage, 
			
 
				+		UINT32 size, GpuDeviceFlags deviceMask)
			
 
				+		: HardwareBuffer(size), mBuffers(), mStagingBuffer(nullptr), mMappedDeviceIdx(-1), mMappedGlobalQueueIdx(-1)
			
 
				+		, mMappedOffset(0), mMappedSize(0), mMappedLockOptions(GBL_WRITE_ONLY)
			
 
				+		, mDirectlyMappable((usage & GBU_DYNAMIC) != 0)
			
 
				+		, mSupportsGPUWrites(type == BT_STORAGE), mRequiresView(false), mReadable((usage & GBU_READABLE) != 0)
			
 
				+		, mIsMapped(false)
			
 
				+	{
			
 
				 		VkBufferUsageFlags usageFlags = 0;
			
 
				 		switch(type)
			
 
				 		{
			
@@ -49,17 +75,30 @@ namespace BansheeEngine
 
				 			break;
			
 
				 		case BT_GENERIC:
			
 
				 			usageFlags = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT;
			
 
				-			needsView = true;
			
 
				+			mRequiresView = true;
			
 
				 			break;
			
 
				 		case BT_STORAGE:
			
 
				 			usageFlags = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT;
			
 
				-			needsView = true;
			
 
				-			break;
			
 
				-		case BT_STAGING:
			
 
				-			usageFlags = VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
			
 
				+			mRequiresView = true;
			
 
				 			break;
			
 
				 		}
			
 
				 
			
 
				+		mBufferCI.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
			
 
				+		mBufferCI.pNext = nullptr;
			
 
				+		mBufferCI.flags = 0;
			
 
				+		mBufferCI.size = size;
			
 
				+		mBufferCI.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
			
 
				+		mBufferCI.usage = usageFlags;
			
 
				+		mBufferCI.queueFamilyIndexCount = 0;
			
 
				+		mBufferCI.pQueueFamilyIndices = nullptr;
			
 
				+
			
 
				+		mViewCI.sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO;
			
 
				+		mViewCI.pNext = nullptr;
			
 
				+		mViewCI.flags = 0;
			
 
				+		mViewCI.format = VulkanUtility::getBufferFormat(format);
			
 
				+		mViewCI.offset = 0;
			
 
				+		mViewCI.range = VK_WHOLE_SIZE;
			
 
				+
			
 
				 		VulkanRenderAPI& rapi = static_cast<VulkanRenderAPI&>(RenderAPICore::instance());
			
 
				 		VulkanDevice* devices[BS_MAX_DEVICES];
			
 
				 		VulkanUtility::getDevices(rapi, deviceMask, devices);
			
@@ -70,48 +109,7 @@ namespace BansheeEngine
 
				 			if (devices[i] == nullptr)
			
 
				 				continue;
			
 
				 
			
 
				-			VkBufferCreateInfo bufferCI;
			
 
				-			bufferCI.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
			
 
				-			bufferCI.pNext = nullptr;
			
 
				-			bufferCI.flags = 0; 
			
 
				-			bufferCI.size = size;
			
 
				-			bufferCI.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
			
 
				-			bufferCI.usage = usageFlags;
			
 
				-			bufferCI.queueFamilyIndexCount = 0;
			
 
				-			bufferCI.pQueueFamilyIndices = nullptr;
			
 
				-
			
 
				-			VkDevice device = devices[i]->getLogical();
			
 
				-
			
 
				-			VkBuffer buffer;
			
 
				-			VkResult result = vkCreateBuffer(device, &bufferCI, gVulkanAllocator, &buffer);
			
 
				-			assert(result == VK_SUCCESS);
			
 
				-
			
 
				-			VkMemoryRequirements memReqs;
			
 
				-			vkGetBufferMemoryRequirements(device, buffer, &memReqs);
			
 
				-
			
 
				-			VkDeviceMemory memory = devices[i]->allocateMemory(memReqs, flags);
			
 
				-			result = vkBindBufferMemory(device, buffer, memory, 0);
			
 
				-			assert(result == VK_SUCCESS);
			
 
				-
			
 
				-			VkBufferView view;
			
 
				-			if (needsView)
			
 
				-			{
			
 
				-				VkBufferViewCreateInfo bufferViewCI;
			
 
				-				bufferViewCI.sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO;
			
 
				-				bufferViewCI.pNext = nullptr;
			
 
				-				bufferViewCI.flags = 0;
			
 
				-				bufferViewCI.buffer = buffer;
			
 
				-				bufferViewCI.format = VulkanUtility::getBufferFormat(format);
			
 
				-				bufferViewCI.offset = 0;
			
 
				-				bufferViewCI.range = VK_WHOLE_SIZE;
			
 
				-
			
 
				-				result = vkCreateBufferView(device, &bufferViewCI, gVulkanAllocator, &view);
			
 
				-				assert(result == VK_SUCCESS);
			
 
				-			}
			
 
				-			else
			
 
				-				view = VK_NULL_HANDLE;
			
 
				-
			
 
				-			mBuffers[i] = devices[i]->getResourceManager().create<VulkanBuffer>(buffer, view, memory);
			
 
				+			mBuffers[i] = createBuffer(*devices[i], false, mReadable);
			
 
				 		}
			
 
				 	}
			
 
				 
			
@@ -124,83 +122,240 @@ namespace BansheeEngine
 
				 
			
 
				 			mBuffers[i]->destroy();
			
 
				 		}
			
 
				+
			
 
				+		assert(mStagingBuffer == nullptr);
			
 
				 	}
			
 
				 
			
 
				-	void* VulkanHardwareBuffer::map(UINT32 offset, UINT32 length, GpuLockOptions options, UINT32 deviceIdx, UINT32 queueIdx)
			
 
				+	VulkanBuffer* VulkanHardwareBuffer::createBuffer(VulkanDevice& device, bool staging, bool readable)
			
 
				 	{
			
 
				-		if ((offset + length) > mSize)
			
 
				+		VkBufferUsageFlags usage = mBufferCI.usage;
			
 
				+		if (staging)
			
 
				 		{
			
 
				-			LOGERR("Provided offset(" + toString(offset) + ") + length(" + toString(length) + ") "
			
 
				-				   "is larger than the buffer " + toString(mSize) + ".");
			
 
				+			mBufferCI.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
			
 
				 
			
 
				-			return nullptr;
			
 
				+			// Staging buffers are used as a destination for reads
			
 
				+			if (readable)
			
 
				+				mBufferCI.usage |= VK_BUFFER_USAGE_TRANSFER_DST_BIT;
			
 
				 		}
			
 
				+		else if(readable) // Non-staging readable
			
 
				+			mBufferCI.usage |= VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
			
 
				 
			
 
				-		VulkanBuffer* buffer = mBuffers[deviceIdx];
			
 
				+		VkMemoryPropertyFlags flags = (mDirectlyMappable || staging) ?
			
 
				+			(VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) : // Note: Try using cached memory
			
 
				+			VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
			
 
				 
			
 
				-		if (buffer == nullptr)
			
 
				-			return;
			
 
				+		VkDevice vkDevice = device.getLogical();
			
 
				 
			
 
				-		bool directMap = mStaging && !buffer->isUsed();
			
 
				+		VkBuffer buffer;
			
 
				+		VkResult result = vkCreateBuffer(vkDevice, &mBufferCI, gVulkanAllocator, &buffer);
			
 
				+		assert(result == VK_SUCCESS);
			
 
				 
			
 
				-		// If memory is host visible and buffer isn't used on the GPU, map directly (no need for pipeline barriers
			
 
				-		// with access modifiers since we're sure the buffer isn't used on the GPU)
			
 
				-		if (directMap)
			
 
				-			return buffer->map();
			
 
				+		VkMemoryRequirements memReqs;
			
 
				+		vkGetBufferMemoryRequirements(vkDevice, buffer, &memReqs);
			
 
				 
			
 
				-		// TODO - Allocate staging buffer
			
 
				+		VkDeviceMemory memory = device.allocateMemory(memReqs, flags);
			
 
				+		result = vkBindBufferMemory(vkDevice, buffer, memory, 0);
			
 
				+		assert(result == VK_SUCCESS);
			
 
				 
			
 
				-		bool needRead = options == GBL_READ_WRITE || options == GBL_READ_ONLY;
			
 
				-		if(needRead)
			
 
				+		VkBufferView view;
			
 
				+		if (mRequiresView && !staging)
			
 
				 		{
			
 
				-			// TODO - Get command buffer on wanted queue (getTransferBuffer(deviceIdx, queueIdx))
			
 
				-			//      - Generate sync mask depending on where the resource is used on (VulkanResource::getUseInfo())
			
 
				-			//      - Register this buffer and staging buffer with the transfer buffer, updating the transfer buffer's sync mask
			
 
				-			//      - Flush the transfer buffer, wait for it to complete, and refresh CB states
			
 
				-			//      - Proceed below
			
 
				+			mViewCI.buffer = buffer;
			
 
				+
			
 
				+			result = vkCreateBufferView(vkDevice, &mViewCI, gVulkanAllocator, &view);
			
 
				+			assert(result == VK_SUCCESS);
			
 
				 		}
			
 
				+		else
			
 
				+			view = VK_NULL_HANDLE;
			
 
				 
			
 
				-		// TODO - Return staging buffer->map()
			
 
				-		//      - Set mRequiresUpload field to true
			
 
				-		//      - Remember lock mode
			
 
				-		//      - Remember staging buffer
			
 
				-		//      - Remember lock queue and device
			
 
				+		mBufferCI.usage = usage; // Restore original usage
			
 
				+		return device.getResourceManager().create<VulkanBuffer>(buffer, view, memory);
			
 
				+	}
			
 
				 
			
 
				-		switch (options)
			
 
				+	void* VulkanHardwareBuffer::map(UINT32 offset, UINT32 length, GpuLockOptions options, UINT32 deviceIdx, UINT32 queueIdx)
			
 
				+	{
			
 
				+		if ((offset + length) > mSize)
			
 
				 		{
			
 
				-		case GBL_WRITE_ONLY_DISCARD:
			
 
				+			LOGERR("Provided offset(" + toString(offset) + ") + length(" + toString(length) + ") "
			
 
				+				   "is larger than the buffer " + toString(mSize) + ".");
			
 
				 
			
 
				-			break;
			
 
				-		case GBL_WRITE_ONLY_NO_OVERWRITE:
			
 
				+			return nullptr;
			
 
				+		}
			
 
				 
			
 
				-			break;
			
 
				-		case GBL_WRITE_ONLY:
			
 
				+		VulkanBuffer* buffer = mBuffers[deviceIdx];
			
 
				 
			
 
				-			break;
			
 
				-		case GBL_READ_WRITE:
			
 
				+		if (buffer == nullptr)
			
 
				+			return nullptr;
			
 
				 
			
 
				-			break;
			
 
				-		case GBL_READ_ONLY:
			
 
				+		mIsMapped = true;
			
 
				+		mMappedDeviceIdx = deviceIdx;
			
 
				+		mMappedGlobalQueueIdx = queueIdx;
			
 
				+		mMappedOffset = offset;
			
 
				+		mMappedSize = length;
			
 
				+		mMappedLockOptions = options;
			
 
				 
			
 
				-			break;
			
 
				+		VulkanRenderAPI& rapi = static_cast<VulkanRenderAPI&>(RenderAPICore::instance());
			
 
				+		VulkanDevice& device = *rapi._getDevice(deviceIdx);
			
 
				+
			
 
				+		VulkanCommandBufferManager& cbManager = gVulkanCBManager();
			
 
				+		GpuQueueType queueType;
			
 
				+		UINT32 localQueueIdx = CommandSyncMask::getQueueIdxAndType(queueIdx, queueType);
			
 
				+
			
 
				+		VkAccessFlags accessFlags;
			
 
				+		if (options == GBL_READ_ONLY)
			
 
				+			accessFlags = VK_ACCESS_HOST_READ_BIT;
			
 
				+		else if (options == GBL_READ_WRITE)
			
 
				+			accessFlags = VK_ACCESS_HOST_READ_BIT | VK_ACCESS_HOST_WRITE_BIT;
			
 
				+		else
			
 
				+			accessFlags = VK_ACCESS_HOST_WRITE_BIT;
			
 
				+
			
 
				+		// If memory is host visible try mapping it directly
			
 
				+		if(mDirectlyMappable)
			
 
				+		{
			
 
				+			// If GPU has the ability to write to the buffer we must issue a pipeline barrier to prevent any memory hazards
			
 
				+			//  - Additionally it might be possible the GPU is /currently/ writing to the buffer, in which case we need to
			
 
				+			//    wait for those writes to finish before continuing
			
 
				+			if(mSupportsGPUWrites) // Note: It might be tempting to only do this step only if buffer is currently being 
			
 
				+								   // written to, but that doesn't guarantee memory visibility if it was written to recently
			
 
				+			{
			
 
				+				// First try to avoid the expensive wait operation and barrier
			
 
				+				if(options == GBL_WRITE_ONLY_NO_OVERWRITE) // Caller guarantees he won't touch the same data as the GPU, so just map
			
 
				+					return buffer->map(offset, length);
			
 
				+
			
 
				+				if(options == GBL_WRITE_ONLY_DISCARD) // Caller doesn't care about buffer contents, so just discard the 
			
 
				+				{									  // existing buffer and create a new one
			
 
				+					buffer->destroy();
			
 
				+
			
 
				+					buffer = createBuffer(device, false, mReadable);
			
 
				+					mBuffers[deviceIdx] = buffer;
			
 
				+
			
 
				+					return buffer->map(offset, length);
			
 
				+				}
			
 
				+
			
 
				+				// Otherwise we need to wait until (potential) GPU write completes, and issue a barrier so:
			
 
				+				//  - If reading: the device makes the written memory available for read (read-after-write hazard)
			
 
				+				//  - If writing: ensures our writes properly overlap with GPU writes (write-after-write hazard)
			
 
				+				VulkanTransferBuffer* transferCB = cbManager.getTransferBuffer(deviceIdx, queueType, localQueueIdx);
			
 
				+
			
 
				+				// Ensure flush() will wait for all queues currently writing to the buffer (if any) to finish
			
 
				+				UINT32 writeUseMask = buffer->getUseInfo(VulkanUseFlag::Write);
			
 
				+				transferCB->appendMask(writeUseMask); 
			
 
				+
			
 
				+				// Issue barrier to avoid memory hazards
			
 
				+				transferCB->memoryBarrier(buffer->getHandle(),
			
 
				+										  VK_ACCESS_SHADER_WRITE_BIT,
			
 
				+										  accessFlags,
			
 
				+										  // Last stages that could have written to the buffer:
			
 
				+										  VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 
			
 
				+										  VK_PIPELINE_STAGE_HOST_BIT
			
 
				+				);
			
 
				+
			
 
				+				// Submit the command buffer and wait until it finishes
			
 
				+				transferCB->flush(true);
			
 
				+				assert(!buffer->isUsed());
			
 
				+			}
			
 
				+
			
 
				+			return buffer->map(offset, length);
			
 
				 		}
			
 
				+		else // Otherwise we use a staging buffer
			
 
				+		{
			
 
				+			bool needRead = options == GBL_READ_WRITE || options == GBL_READ_ONLY;
			
 
				+
			
 
				+			// Allocate a staging buffer
			
 
				+			mStagingBuffer = createBuffer(device, true, needRead);
			
 
				 
			
 
				-		return nullptr;
			
 
				+			if (needRead) // If reading, we need to copy the current contents of the buffer to the staging buffer
			
 
				+			{
			
 
				+				VulkanTransferBuffer* transferCB = cbManager.getTransferBuffer(deviceIdx, queueType, localQueueIdx);
			
 
				+				
			
 
				+				// Similar to above, if buffer supports GPU writes, we need to wait on any potential writes to complete
			
 
				+				if(mSupportsGPUWrites)
			
 
				+				{
			
 
				+					// Ensure flush() will wait for all queues currently writing to the buffer (if any) to finish
			
 
				+					UINT32 writeUseMask = buffer->getUseInfo(VulkanUseFlag::Write);
			
 
				+					transferCB->appendMask(writeUseMask);
			
 
				+				}
			
 
				+
			
 
				+				// Queue copy command
			
 
				+				buffer->copy(transferCB, mStagingBuffer, offset, length);
			
 
				+
			
 
				+				// Ensure data written to the staging buffer is visible
			
 
				+				transferCB->memoryBarrier(buffer->getHandle(),
			
 
				+										  VK_ACCESS_TRANSFER_WRITE_BIT,
			
 
				+										  accessFlags,
			
 
				+										  VK_PIPELINE_STAGE_TRANSFER_BIT,
			
 
				+										  VK_PIPELINE_STAGE_HOST_BIT
			
 
				+				);
			
 
				+
			
 
				+				// Submit the command buffer and wait until it finishes
			
 
				+				transferCB->flush(true);
			
 
				+				assert(!buffer->isUsed());
			
 
				+			}
			
 
				+
			
 
				+			return mStagingBuffer->map(offset, length);
			
 
				+		}
			
 
				 	}
			
 
				 
			
 
				 	void VulkanHardwareBuffer::unmap()
			
 
				 	{
			
 
				-		// TODO - If direct map (mRequiresUpload == false), simply unmap
			
 
				-		// TODO - If mRequiresUpload is true
			
 
				-		//      - Get command buffer on locked queue and device
			
 
				-		//      - If lock was discard
			
 
				-		//        - Create a brand new internal buffer
			
 
				-		//        - Call destroy on the old one
			
 
				-		//        - Issue copy on the CB without a sync mask (register both resources on CB)
			
 
				-		//      - If lock was no overwrite
			
 
				-		//        - Issue copy on the CB without a sync mask (register both resources on CB)
			
 
				-		//      - Otherwise issue copy with a sync mask depending on current use flags
			
 
				-		//      - Destroy staging buffer
			
 
				+		// Possibly map() failed with some error
			
 
				+		if (!mIsMapped)
			
 
				+			return;
			
 
				+
			
 
				+		// Note: If we did any writes they need to be made visible to the GPU. However there is no need to execute 
			
 
				+		// a pipeline barrier because (as per spec) host writes are implicitly visible to the device.
			
 
				+
			
 
				+		if(mDirectlyMappable)
			
 
				+			mBuffers[mMappedDeviceIdx]->unmap();
			
 
				+		else
			
 
				+		{
			
 
				+			bool isWrite = mMappedLockOptions != GBL_READ_ONLY;
			
 
				+
			
 
				+			// We the caller wrote anything to the staging buffer, we need to upload it back to the main buffer
			
 
				+			if(isWrite)
			
 
				+			{
			
 
				+				VulkanRenderAPI& rapi = static_cast<VulkanRenderAPI&>(RenderAPICore::instance());
			
 
				+				VulkanDevice& device = *rapi._getDevice(mMappedDeviceIdx);
			
 
				+
			
 
				+				VulkanCommandBufferManager& cbManager = gVulkanCBManager();
			
 
				+				GpuQueueType queueType;
			
 
				+				UINT32 localQueueIdx = CommandSyncMask::getQueueIdxAndType(mMappedGlobalQueueIdx, queueType);
			
 
				+
			
 
				+				VulkanBuffer* buffer = mBuffers[mMappedDeviceIdx];
			
 
				+				VulkanTransferBuffer* transferCB = cbManager.getTransferBuffer(mMappedDeviceIdx, queueType, localQueueIdx);
			
 
				+
			
 
				+				// If the buffer is used in any way on the GPU, we need to wait for that use to finish before
			
 
				+				// we issue our copy
			
 
				+				UINT32 useMask = buffer->getUseInfo(VulkanUseFlag::Read | VulkanUseFlag::Write);
			
 
				+				if(useMask != 0) // Buffer is currently used on the GPU
			
 
				+				{
			
 
				+					// Try to avoid the wait
			
 
				+					if (mMappedLockOptions == GBL_WRITE_ONLY_NO_OVERWRITE) // Caller guarantees he won't touch the same data as the GPU, so just copy
			
 
				+					{
			
 
				+						// Fall through to copy()
			
 
				+					}
			
 
				+					else if (mMappedLockOptions == GBL_WRITE_ONLY_DISCARD) // Caller doesn't care about buffer contents, so just discard the 
			
 
				+					{													   // existing buffer and create a new one
			
 
				+						buffer->destroy();
			
 
				+
			
 
				+						buffer = createBuffer(device, false, mReadable);
			
 
				+						mBuffers[mMappedDeviceIdx] = buffer;
			
 
				+					} 
			
 
				+					else // Otherwise we have no choice but to issue a dependency between the queues
			
 
				+						transferCB->appendMask(useMask);
			
 
				+				}
			
 
				+				
			
 
				+				// Queue copy command
			
 
				+				mStagingBuffer->copy(transferCB, buffer, mMappedOffset, mMappedSize);
			
 
				+			}
			
 
				+
			
 
				+			mStagingBuffer->unmap();
			
 
				+
			
 
				+			mStagingBuffer->destroy();
			
 
				+			mStagingBuffer = nullptr;
			
 
				+		}
			
 
				+
			
 
				+		mIsMapped = false;
			
 
				 	}
			
 
				 
			
 
				 	void VulkanHardwareBuffer::copyData(HardwareBuffer& srcBuffer, UINT32 srcOffset,
			
--- a/Source/BansheeVulkanRenderAPI/Source/BsVulkanQueue.cpp
+++ b/Source/BansheeVulkanRenderAPI/Source/BsVulkanQueue.cpp
@@ -36,8 +36,15 @@ namespace BansheeEngine
 
				 		else
			
 
				 			submitInfo.pWaitSemaphores = nullptr;
			
 
				 
			
 
				-		vkQueueSubmit(mQueue, 1, &submitInfo, cmdBuffer->getFence());
			
 
				+		VkResult result = vkQueueSubmit(mQueue, 1, &submitInfo, cmdBuffer->getFence());
			
 
				+		assert(result == VK_SUCCESS);
			
 
				 
			
 
				 		mLastCommandBuffer = cmdBuffer;
			
 
				 	}
			
 
				+
			
 
				+	void VulkanQueue::waitIdle() const
			
 
				+	{
			
 
				+		VkResult result = vkQueueWaitIdle(mQueue);
			
 
				+		assert(result == VK_SUCCESS);
			
 
				+	}
			
 
				 }
			
--- a/Source/BansheeVulkanRenderAPI/Source/BsVulkanResource.cpp
+++ b/Source/BansheeVulkanRenderAPI/Source/BsVulkanResource.cpp
@@ -97,23 +97,25 @@ namespace BansheeEngine
 
				 			mOwner->destroy(this);
			
 
				 	}
			
 
				 
			
 
				-	UINT32 VulkanResource::getUseInfo(VulkanUseFlags& useFlags) const
			
 
				+	UINT32 VulkanResource::getUseInfo(VulkanUseFlags useFlags) const
			
 
				 	{
			
 
				-		useFlags = VulkanUseFlag::None;
			
 
				-
			
 
				 		UINT32 mask = 0;
			
 
				-		for(UINT32 i = 0; i < MAX_UNIQUE_QUEUES; i++)
			
 
				+
			
 
				+		if(useFlags.isSet(VulkanUseFlag::Read))
			
 
				 		{
			
 
				-			if (mReadUses[i] > 0)
			
 
				+			for (UINT32 i = 0; i < MAX_UNIQUE_QUEUES; i++)
			
 
				 			{
			
 
				-				mask |= 1 << i;
			
 
				-				useFlags |= VulkanUseFlag::Read;
			
 
				+				if (mReadUses[i] > 0)
			
 
				+					mask |= 1 << i;
			
 
				 			}
			
 
				+		}
			
 
				 
			
 
				-			if (mWriteUses[i] > 0)
			
 
				+		if (useFlags.isSet(VulkanUseFlag::Write))
			
 
				+		{
			
 
				+			for (UINT32 i = 0; i < MAX_UNIQUE_QUEUES; i++)
			
 
				 			{
			
 
				-				mask |= 1 << i;
			
 
				-				useFlags |= VulkanUseFlag::Write;
			
 
				+				if (mWriteUses[i] > 0)
			
 
				+					mask |= 1 << i;
			
 
				 			}
			
 
				 		}