Browse Source

Vulkan command buffer submission and inter-queue synchronization

BearishSun 9 years ago
parent
commit
dc9e96ae22
22 changed files with 302 additions and 64 deletions
  1. 2 1
      Source/BansheeCore/Include/BsCommandBuffer.h
  2. 2 3
      Source/BansheeCore/Include/BsCommandBufferManager.h
  3. 6 0
      Source/BansheeCore/Include/BsCorePrerequisites.h
  4. 3 3
      Source/BansheeCore/Source/BsCommandBuffer.cpp
  5. 9 5
      Source/BansheeCore/Source/BsCommandBufferManager.cpp
  6. 0 1
      Source/BansheeD3D11RenderAPI/Include/BsD3D11CommandBuffer.h
  7. 1 1
      Source/BansheeD3D11RenderAPI/Source/BsD3D11CommandBuffer.cpp
  8. 0 1
      Source/BansheeGLRenderAPI/Include/BsGLCommandBuffer.h
  9. 1 1
      Source/BansheeGLRenderAPI/Source/BsGLCommandBuffer.cpp
  10. 2 0
      Source/BansheeVulkanRenderAPI/CMakeSources.cmake
  11. 33 17
      Source/BansheeVulkanRenderAPI/Include/BsVulkanCommandBuffer.h
  12. 19 0
      Source/BansheeVulkanRenderAPI/Include/BsVulkanCommandBufferManager.h
  13. 2 2
      Source/BansheeVulkanRenderAPI/Include/BsVulkanDevice.h
  14. 1 1
      Source/BansheeVulkanRenderAPI/Include/BsVulkanHardwareBuffer.h
  15. 4 7
      Source/BansheeVulkanRenderAPI/Include/BsVulkanPrerequisites.h
  16. 41 0
      Source/BansheeVulkanRenderAPI/Include/BsVulkanQueue.h
  17. 85 15
      Source/BansheeVulkanRenderAPI/Source/BsVulkanCommandBuffer.cpp
  18. 42 1
      Source/BansheeVulkanRenderAPI/Source/BsVulkanCommandBufferManager.cpp
  19. 16 3
      Source/BansheeVulkanRenderAPI/Source/BsVulkanDevice.cpp
  20. 2 2
      Source/BansheeVulkanRenderAPI/Source/BsVulkanHardwareBuffer.cpp
  21. 17 0
      Source/BansheeVulkanRenderAPI/Source/BsVulkanQueue.cpp
  22. 14 0
      Source/BansheeVulkanRenderAPI/Source/BsVulkanRenderAPI.cpp

+ 2 - 1
Source/BansheeCore/Include/BsCommandBuffer.h

@@ -68,10 +68,11 @@ namespace BansheeEngine
 
 		/** @} */
 	protected:
-		CommandBuffer(UINT32 id, CommandBufferType type, UINT32 queueIdx, bool secondary);
+		CommandBuffer(UINT32 id, CommandBufferType type, UINT32 deviceIdx, UINT32 queueIdx, bool secondary);
 
 		UINT32 mId;
 		CommandBufferType mType;
+		UINT32 mDeviceIdx;
 		UINT32 mQueueIdx;
 		bool mIsSecondary;
 	};

+ 2 - 3
Source/BansheeCore/Include/BsCommandBufferManager.h

@@ -33,10 +33,9 @@ namespace BansheeEngine
 			UINT32 queueIdx = 0, bool secondary = false) = 0;
 
 		/** Called by a command buffer just before it is destroyed. */
-		void notifyCommandBufferDestroyed(UINT32 id);
+		void notifyCommandBufferDestroyed(UINT32 deviceIdx, UINT32 id);
 
-	private:
-		bool mActiveCommandBuffers[BS_MAX_COMMAND_BUFFERS];
+		CommandBuffer* mActiveCommandBuffers[BS_MAX_DEVICES][BS_MAX_COMMAND_BUFFERS];
 	};
 
 	/** @} */

+ 6 - 0
Source/BansheeCore/Include/BsCorePrerequisites.h

@@ -176,6 +176,12 @@
 /** Maximum number of individual GPU queues, per type. */
 #define BS_MAX_QUEUES_PER_TYPE 8
 
+/** Maximum number of hardware devices usable at once. */
+#define BS_MAX_DEVICES 5U
+
+/** Maximum number of devices one resource can exist at the same time. */
+#define BS_MAX_LINKED_DEVICES 4U
+
 // Windows Settings
 #if BS_PLATFORM == BS_PLATFORM_WIN32
 

+ 3 - 3
Source/BansheeCore/Source/BsCommandBuffer.cpp

@@ -13,15 +13,15 @@ namespace BansheeEngine
 		mMask |= 1 << buffer->_getId();
 	}
 
-	CommandBuffer::CommandBuffer(UINT32 id, CommandBufferType type, UINT32 queueIdx, bool secondary)
-		:mId(id), mType(type), mQueueIdx(queueIdx), mIsSecondary(secondary)
+	CommandBuffer::CommandBuffer(UINT32 id, CommandBufferType type, UINT32 deviceIdx, UINT32 queueIdx, bool secondary)
+		:mId(id), mType(type), mDeviceIdx(deviceIdx), mQueueIdx(queueIdx), mIsSecondary(secondary)
 	{
 
 	}
 
 	CommandBuffer::~CommandBuffer()
 	{
-		CommandBufferManager::instance().notifyCommandBufferDestroyed(mId);
+		CommandBufferManager::instance().notifyCommandBufferDestroyed(mDeviceIdx, mId);
 	}
 
 	SPtr<CommandBuffer> CommandBuffer::create(CommandBufferType type, UINT32 deviceIdx, UINT32 queueIdx,

+ 9 - 5
Source/BansheeCore/Source/BsCommandBufferManager.cpp

@@ -7,10 +7,12 @@ namespace BansheeEngine
 	SPtr<CommandBuffer> CommandBufferManager::create(CommandBufferType type, UINT32 deviceIdx, UINT32 queueIdx,
 		bool secondary)
 	{
+		assert(deviceIdx < BS_MAX_DEVICES);
+
 		UINT32 id = -1;
 		for(UINT32 i = 0; i < BS_MAX_COMMAND_BUFFERS; i++)
 		{
-			if (!mActiveCommandBuffers[i])
+			if (!mActiveCommandBuffers[deviceIdx][i])
 			{
 				id = i;
 				break;
@@ -23,12 +25,14 @@ namespace BansheeEngine
 			return nullptr;
 		}
 
-		mActiveCommandBuffers[id] = true;
-		return createInternal(id, type, deviceIdx, queueIdx, secondary);
+		SPtr<CommandBuffer> cmdBuffer = createInternal(id, type, deviceIdx, queueIdx, secondary);;
+		mActiveCommandBuffers[deviceIdx][id] = cmdBuffer.get();
+
+		return cmdBuffer;
 	}
 
-	void CommandBufferManager::notifyCommandBufferDestroyed(UINT32 id)
+	void CommandBufferManager::notifyCommandBufferDestroyed(UINT32 deviceIdx, UINT32 id)
 	{
-		mActiveCommandBuffers[id] = false;
+		mActiveCommandBuffers[deviceIdx][id] = nullptr;
 	}
 }

+ 0 - 1
Source/BansheeD3D11RenderAPI/Include/BsD3D11CommandBuffer.h

@@ -37,7 +37,6 @@ namespace BansheeEngine
 
 		D3D11CommandBuffer(UINT32 id, CommandBufferType type, UINT32 deviceIdx, UINT32 queueIdx, bool secondary);
 
-		UINT32 mDeviceIdx;
 		Vector<std::function<void()>> mCommands;
 
 		DrawOperationType mActiveDrawOp;

+ 1 - 1
Source/BansheeD3D11RenderAPI/Source/BsD3D11CommandBuffer.cpp

@@ -5,7 +5,7 @@
 namespace BansheeEngine
 {
 	D3D11CommandBuffer::D3D11CommandBuffer(UINT32 id, CommandBufferType type, UINT32 deviceIdx, UINT32 queueIdx, bool secondary)
-		: CommandBuffer(id, type, queueIdx, secondary), mDeviceIdx(deviceIdx), mActiveDrawOp(DOT_TRIANGLE_LIST)
+		: CommandBuffer(id, type, deviceIdx, queueIdx, secondary), mActiveDrawOp(DOT_TRIANGLE_LIST)
 	{
 		if (deviceIdx != 0)
 			BS_EXCEPT(InvalidParametersException, "Only a single device supported on DX11.");

+ 0 - 1
Source/BansheeGLRenderAPI/Include/BsGLCommandBuffer.h

@@ -37,7 +37,6 @@ namespace BansheeEngine
 
 		GLCommandBuffer(UINT32 id, CommandBufferType type, UINT32 deviceIdx, UINT32 queueIdx, bool secondary);
 
-		UINT32 mDeviceIdx;
 		Vector<std::function<void()>> mCommands;
 
 		DrawOperationType mCurrentDrawOperation;

+ 1 - 1
Source/BansheeGLRenderAPI/Source/BsGLCommandBuffer.cpp

@@ -5,7 +5,7 @@
 namespace BansheeEngine
 {
 	GLCommandBuffer::GLCommandBuffer(UINT32 id, CommandBufferType type, UINT32 deviceIdx, UINT32 queueIdx, bool secondary)
-		: CommandBuffer(id, type, queueIdx, secondary), mDeviceIdx(deviceIdx), mCurrentDrawOperation(DOT_TRIANGLE_LIST)
+		: CommandBuffer(id, type, deviceIdx, queueIdx, secondary), mCurrentDrawOperation(DOT_TRIANGLE_LIST)
 	{
 		if (deviceIdx != 0)
 			BS_EXCEPT(InvalidParametersException, "Only a single device supported on DX11.");

+ 2 - 0
Source/BansheeVulkanRenderAPI/CMakeSources.cmake

@@ -25,6 +25,7 @@ set(BS_BANSHEEVULKANRENDERAPI_INC_NOFILTER
 	"Include/BsVulkanDescriptorPool.h"
 	"Include/BsVulkanDescriptorLayout.h"
 	"Include/BsVulkanResource.h"
+	"Include/BsVulkanQueue.h"
 )
 
 set(BS_BANSHEEVULKANRENDERAPI_INC_MANAGERS
@@ -67,6 +68,7 @@ set(BS_BANSHEEVULKANRENDERAPI_SRC_NOFILTER
 	"Source/BsVulkanDescriptorPool.cpp"
 	"Source/BsVulkanDescriptorLayout.cpp"
 	"Source/BsVulkanResource.cpp"
+	"Source/BsVulkanQueue.cpp"
 )
 
 set(BS_BANSHEEVULKANRENDERAPI_SRC_MANAGERS

+ 33 - 17
Source/BansheeVulkanRenderAPI/Include/BsVulkanCommandBuffer.h

@@ -12,7 +12,7 @@ namespace BansheeEngine
 	 *  @{
 	 */
 
-	class LVulkanCommandBuffer;
+	class VulkanCmdBuffer;
 
 #define BS_MAX_VULKAN_COMMAND_BUFFERS_PER_QUEUE 32
 
@@ -24,11 +24,11 @@ namespace BansheeEngine
 		~VulkanCmdBufferPool();
 
 		/** Attempts to find a free command buffer, or creates a new one if not found. */
-		LVulkanCommandBuffer* getBuffer(CommandBufferType type, UINT32 queueIdx, bool secondary);
+		VulkanCmdBuffer* getBuffer(CommandBufferType type, UINT32 queueIdx, bool secondary);
 
 	private:
 		/** Creates a new command buffer. */
-		LVulkanCommandBuffer* createBuffer(VulkanQueueType type, bool secondary);
+		VulkanCmdBuffer* createBuffer(VulkanQueueType type, bool secondary);
 
 		/** Returns a Vulkan command pool for the specified queue type. */
 		VkCommandPool getPool(VulkanQueueType type);
@@ -36,14 +36,14 @@ namespace BansheeEngine
 		VulkanDevice& mDevice;
 		VkCommandPool mPools[VQT_COUNT];
 
-		LVulkanCommandBuffer* mBuffers[VQT_COUNT][BS_MAX_QUEUES_PER_TYPE][BS_MAX_VULKAN_COMMAND_BUFFERS_PER_QUEUE];
+		VulkanCmdBuffer* mBuffers[VQT_COUNT][BS_MAX_QUEUES_PER_TYPE][BS_MAX_VULKAN_COMMAND_BUFFERS_PER_QUEUE];
 	};
 
 	/** 
 	 * Represents a direct wrapper over an internal Vulkan command buffer. This is unlike VulkanCommandBuffer which is a
 	 * higher level class, and it allows for re-use by internally using multiple low-level command buffers.
 	 */
-	class LVulkanCommandBuffer
+	class VulkanCmdBuffer
 	{
 		/** Possible states a command buffer can be in. */
 		enum class State
@@ -61,8 +61,8 @@ namespace BansheeEngine
 		};
 
 	public:
-		LVulkanCommandBuffer(VulkanDevice& device, VkCommandPool pool, bool secondary);
-		~LVulkanCommandBuffer();
+		VulkanCmdBuffer(VulkanDevice& device, VkCommandPool pool, bool secondary);
+		~VulkanCmdBuffer();
 
 		/** Makes the command buffer ready to start recording commands. */
 		void begin();
@@ -88,6 +88,12 @@ namespace BansheeEngine
 		 */
 		VkSemaphore getSemaphore() const { return mSemaphore; }
 
+		/** Returns true if the command buffer is currently being processed by the device. */
+		bool isSubmitted() const { return mState == State::Submitted; }
+
+		/** Returns true if the command buffer is ready to be submitted to a queue. */
+		bool isReadyForSubmit() const { return mState == State::RecordingDone; }
+
 		/** Returns a counter that gets incremented whenever the command buffer is done executing. */
 		UINT32 getFenceCounter() const { return mFenceCounter; }
 
@@ -96,6 +102,7 @@ namespace BansheeEngine
 
 	private:
 		friend class VulkanCmdBufferPool;
+		friend class VulkanCommandBuffer;
 
 		State mState;
 		VulkanDevice& mDevice;
@@ -111,25 +118,34 @@ namespace BansheeEngine
 	{
 	public:
 		/** 
-		 * Returns the handle to the internal command buffer. This is a lower-level command buffer that more directly
-		 * maps to Vulkan's command buffers.
+		 * Submits the command buffer for execution. 
+		 * 
+		 * @param[in]	syncMask	Mask that controls which other command buffers does this command buffer depend upon
+		 *							(if any). See description of @p syncMask parameter in RenderAPICore::executeCommands().
 		 */
-		LVulkanCommandBuffer& getBuffer() const { return *mBuffer; }
+		void submit(UINT32 syncMask);
 
-		/** 
-		 * Tasks the command buffer to find a new internal command buffer. Call this after the command buffer has been
-		 * submitted to a queue (it's not allowed to be used until the queue is done with it).
-		 */
-		void acquireNewBuffer();
+		/** Checks if the submitted buffer finished executing, and updates state if it has. */
+		void refreshSubmitStatus();
 
 	private:
 		friend class VulkanCommandBufferManager;
 
-		VulkanCommandBuffer(VulkanDevice& device, UINT32 id, CommandBufferType type, UINT32 queueIdx, 
+		VulkanCommandBuffer(VulkanDevice& device, UINT32 id, CommandBufferType type, UINT32 deviceIdx, UINT32 queueIdx, 
 			bool secondary);
 
-		LVulkanCommandBuffer* mBuffer;
+		/** 
+		 * Tasks the command buffer to find a new internal command buffer. Call this after the command buffer has been
+		 * submitted to a queue (it's not allowed to be used until the queue is done with it).
+		 */
+		void acquireNewBuffer();
+
+		VulkanCmdBuffer* mBuffer;
+		VulkanCmdBuffer* mSubmittedBuffer;
 		VulkanDevice& mDevice;
+		VulkanQueue* mQueue;
+
+		VkSemaphore mSemaphoresTemp[BS_MAX_COMMAND_BUFFERS];
 	};
 
 	/** @} */

+ 19 - 0
Source/BansheeVulkanRenderAPI/Include/BsVulkanCommandBufferManager.h

@@ -26,6 +26,25 @@ namespace BansheeEngine
 		SPtr<CommandBuffer> createInternal(UINT32 id, CommandBufferType type, UINT32 deviceIdx = 0, UINT32 queueIdx = 0,
 			bool secondary = false) override;
 
+		/** 
+		 * Returns a set of command buffer semaphores depending on the provided sync mask. 
+		 *
+		 * @param[in]	deviceIdx	Index of the device to get the semaphores for.
+		 * @param[in]	syncMask	Mask that has a bit enabled for each command buffer to retrieve the semaphore for.
+		 *							If the command buffer is not currently executing, semaphore won't be returned.
+		 * @param[out]	semaphores	List containing all the required semaphores. Semaphores are tightly packed at the
+		 *							beginning of the array.
+		 * @param[out]	count		Number of semaphores provided in the @p semaphores array.
+		 */
+		void getSyncSemaphores(UINT32 deviceIdx, UINT32 syncMask, VkSemaphore(&semaphores)[BS_MAX_COMMAND_BUFFERS], 
+			UINT32& count);
+
+		/** 
+		 * Checks if any of the active command buffers finished executing on the device and updates their states 
+		 * accordingly. 
+		 */
+		void refreshStates(UINT32 deviceIdx);
+
 	private:
 		const VulkanRenderAPI& mRapi;
 	};

+ 2 - 2
Source/BansheeVulkanRenderAPI/Include/BsVulkanDevice.h

@@ -38,7 +38,7 @@ namespace BansheeEngine
 		UINT32 getNumQueues(VulkanQueueType type) const { return (UINT32)mQueueInfos[(int)type].queues.size(); }
 
 		/** Returns queue of the specified type at the specified index. Index must be in range [0, getNumQueues()). */
-		VkQueue getQueue(VulkanQueueType type, UINT32 idx) const { return mQueueInfos[(int)type].queues[idx]; }
+		VulkanQueue* getQueue(VulkanQueueType type, UINT32 idx) const { return mQueueInfos[(int)type].queues[idx]; }
 
 		/** 
 		 * Returns index of the queue family for the specified queue type. Returns -1 if no queues for the specified type 
@@ -91,7 +91,7 @@ namespace BansheeEngine
 		struct QueueInfo
 		{
 			UINT32 familyIdx;
-			Vector<VkQueue> queues;
+			Vector<VulkanQueue*> queues;
 		};
 
 		QueueInfo mQueueInfos[VQT_COUNT];

+ 1 - 1
Source/BansheeVulkanRenderAPI/Include/BsVulkanHardwareBuffer.h

@@ -44,7 +44,7 @@ namespace BansheeEngine
 		/** @copydoc HardwareBuffer::unmap */
 		void unmap() override;
 
-		MemoryInfo mAllocations[BS_MAX_VULKAN_DEVICES];
+		MemoryInfo mAllocations[BS_MAX_DEVICES];
 	};
 
 	/** @} */

+ 4 - 7
Source/BansheeVulkanRenderAPI/Include/BsVulkanPrerequisites.h

@@ -40,6 +40,9 @@ namespace BansheeEngine
 	class VulkanDescriptorLayout;
 	class VulkanDescriptorManager;
 	class VulkanCmdBufferPool;
+	class VulkanCmdBuffer;
+	class VulkanCommandBuffer;
+	class VulkanQueue;
 
 	VkAllocationCallbacks* gVulkanAllocator = nullptr;
 
@@ -71,10 +74,4 @@ namespace BansheeEngine
 
 /** Macro to get a procedure address based on a Vulkan device. */
 #define GET_DEVICE_PROC_ADDR(device, name) \
-	vk##name = reinterpret_cast<PFN_vk##name>(vkGetDeviceProcAddr(device, "vk"#name));
-
-/** Maximum number of hardware devices usable at once. */
-#define BS_MAX_VULKAN_DEVICES 5U
-
-/** Maximum number of devices one resource can exist at the same time. */
-#define BS_MAX_LINKED_DEVICES 4U
+	vk##name = reinterpret_cast<PFN_vk##name>(vkGetDeviceProcAddr(device, "vk"#name));

+ 41 - 0
Source/BansheeVulkanRenderAPI/Include/BsVulkanQueue.h

@@ -0,0 +1,41 @@
+//********************************** Banshee Engine (www.banshee3d.com) **************************************************//
+//**************** Copyright (c) 2016 Marko Pintera ([email protected]). All rights reserved. **********************//
+#pragma once
+
+#include "BsVulkanPrerequisites.h"
+
+namespace BansheeEngine
+{
+	/** @addtogroup Vulkan
+	 *  @{
+	 */
+
+	/** Wrapper for the Vulkan device queue. */
+	class VulkanQueue
+	{
+	public:
+		VulkanQueue(VkQueue queue);
+
+		/** Returns the internal handle to the Vulkan queue object. */
+		VkQueue getHandle() const { return mQueue; }
+		
+		/** 
+		 * Notifies the queue that a command buffer was submitted. 
+		 *
+		 * @param[in]	cmdBuffer		Command buffer that was submitted.
+		 * @param[in]	fenceCounter	Fence counter of the command buffer at time of submission. This counter gets
+		 *								incremented whenever a command buffer is done executing on the device. This allow
+		 *								us to know when the queue is done with a command buffer.
+		 */
+		void notifySubmit(const VulkanCommandBuffer& cmdBuffer, UINT32 fenceCounter);
+
+	protected:
+		VkQueue mQueue;
+		VkSemaphore mSemaphoresTemp[BS_MAX_COMMAND_BUFFERS];
+
+		UINT32 mFenceCounter;
+		UINT32 mLastCommandBufferId;
+	};
+
+	/** @} */
+}

+ 85 - 15
Source/BansheeVulkanRenderAPI/Source/BsVulkanCommandBuffer.cpp

@@ -1,8 +1,10 @@
 //********************************** Banshee Engine (www.banshee3d.com) **************************************************//
 //**************** Copyright (c) 2016 Marko Pintera ([email protected]). All rights reserved. **********************//
 #include "BsVulkanCommandBuffer.h"
+#include "BsVulkanCommandBufferManager.h"
 #include "BsVulkanUtility.h"
 #include "BsVulkanDevice.h"
+#include "BsVulkanQueue.h"
 
 namespace BansheeEngine
 {
@@ -32,7 +34,7 @@ namespace BansheeEngine
 		{
 			for(UINT32 j = 0; j < BS_MAX_QUEUES_PER_TYPE; j++)
 			{
-				LVulkanCommandBuffer** buffers = mBuffers[i][j];
+				VulkanCmdBuffer** buffers = mBuffers[i][j];
 				for(UINT32 k = 0; k < BS_MAX_VULKAN_COMMAND_BUFFERS_PER_QUEUE; k++)
 				{
 					if (buffers[k] == nullptr)
@@ -52,12 +54,12 @@ namespace BansheeEngine
 		}
 	}
 
-	LVulkanCommandBuffer* VulkanCmdBufferPool::getBuffer(CommandBufferType type, UINT32 queueIdx, bool secondary)
+	VulkanCmdBuffer* VulkanCmdBufferPool::getBuffer(CommandBufferType type, UINT32 queueIdx, bool secondary)
 	{
 		assert(queueIdx < BS_MAX_QUEUES_PER_TYPE);
 
 		VulkanQueueType queueType = VulkanUtility::getQueueType(type);
-		LVulkanCommandBuffer** buffers = mBuffers[queueType][queueIdx];
+		VulkanCmdBuffer** buffers = mBuffers[queueType][queueIdx];
 
 		UINT32 i = 0;
 		for(; i < BS_MAX_VULKAN_COMMAND_BUFFERS_PER_QUEUE; i++)
@@ -65,7 +67,7 @@ namespace BansheeEngine
 			if (buffers[i] == nullptr)
 				break;
 
-			if(buffers[i]->mState == LVulkanCommandBuffer::State::Ready)
+			if(buffers[i]->mState == VulkanCmdBuffer::State::Ready)
 			{
 				buffers[i]->begin();
 				return buffers[i];
@@ -81,11 +83,11 @@ namespace BansheeEngine
 		return buffers[i];
 	}
 
-	LVulkanCommandBuffer* VulkanCmdBufferPool::createBuffer(VulkanQueueType type, bool secondary)
+	VulkanCmdBuffer* VulkanCmdBufferPool::createBuffer(VulkanQueueType type, bool secondary)
 	{
 		VkCommandPool pool = getPool(type);
 
-		return bs_new<LVulkanCommandBuffer>(mDevice, pool, secondary);
+		return bs_new<VulkanCmdBuffer>(mDevice, pool, secondary);
 	}
 
 	VkCommandPool VulkanCmdBufferPool::getPool(VulkanQueueType type)
@@ -97,7 +99,7 @@ namespace BansheeEngine
 		return pool;
 	}
 
-	LVulkanCommandBuffer::LVulkanCommandBuffer(VulkanDevice& device, VkCommandPool pool, bool secondary)
+	VulkanCmdBuffer::VulkanCmdBuffer(VulkanDevice& device, VkCommandPool pool, bool secondary)
 		:mState(State::Ready), mDevice(device), mPool(pool)
 	{
 		VkCommandBufferAllocateInfo cmdBufferAllocInfo;
@@ -127,7 +129,7 @@ namespace BansheeEngine
 		assert(result == VK_SUCCESS);
 	}
 
-	LVulkanCommandBuffer::~LVulkanCommandBuffer()
+	VulkanCmdBuffer::~VulkanCmdBuffer()
 	{
 		VkDevice device = mDevice.getLogical();
 
@@ -147,7 +149,7 @@ namespace BansheeEngine
 		vkFreeCommandBuffers(device, mPool, 1, &mCmdBuffer);
 	}
 
-	void LVulkanCommandBuffer::begin()
+	void VulkanCmdBuffer::begin()
 	{
 		assert(mState == State::Ready);
 
@@ -163,7 +165,7 @@ namespace BansheeEngine
 		mState = State::Recording;
 	}
 
-	void LVulkanCommandBuffer::end()
+	void VulkanCmdBuffer::end()
 	{
 		assert(mState == State::Recording);
 
@@ -173,7 +175,7 @@ namespace BansheeEngine
 		mState = State::RecordingDone;
 	}
 
-	void LVulkanCommandBuffer::beginRenderPass()
+	void VulkanCmdBuffer::beginRenderPass()
 	{
 		assert(mState == State::Recording);
 
@@ -183,7 +185,7 @@ namespace BansheeEngine
 		mState = State::RecordingRenderPass;
 	}
 
-	void LVulkanCommandBuffer::endRenderPass()
+	void VulkanCmdBuffer::endRenderPass()
 	{
 		assert(mState == State::RecordingRenderPass);
 
@@ -192,7 +194,7 @@ namespace BansheeEngine
 		mState = State::Recording;
 	}
 
-	void LVulkanCommandBuffer::refreshFenceStatus()
+	void VulkanCmdBuffer::refreshFenceStatus()
 	{
 		VkResult result = vkGetFenceStatus(mDevice.getLogical(), mFence);
 		assert(result == VK_SUCCESS || result == VK_NOT_READY);
@@ -217,17 +219,85 @@ namespace BansheeEngine
 
 	}
 
-	VulkanCommandBuffer::VulkanCommandBuffer(VulkanDevice& device, UINT32 id, CommandBufferType type, 
+	VulkanCommandBuffer::VulkanCommandBuffer(VulkanDevice& device, UINT32 id, CommandBufferType type, UINT32 deviceIdx,
 		UINT32 queueIdx, bool secondary)
-		: CommandBuffer(id, type, queueIdx, secondary), mBuffer(nullptr), mDevice(device)
+		: CommandBuffer(id, type, deviceIdx, queueIdx, secondary), mBuffer(nullptr), mSubmittedBuffer(nullptr)
+		, mDevice(device), mQueue(nullptr)
 	{
+		VulkanQueueType queueType =  VulkanUtility::getQueueType(mType);
+
+		UINT32 numQueues = device.getNumQueues(queueType);
+		if (numQueues > 0)
+			mQueue = device.getQueue(queueType, mQueueIdx % numQueues);
+		else // Fallback to graphics queue
+		{
+			numQueues = device.getNumQueues(VQT_GRAPHICS);
+			mQueue = device.getQueue(VQT_GRAPHICS, mQueueIdx % numQueues);
+		}
+
 		acquireNewBuffer();
 	}
 
+	void VulkanCommandBuffer::refreshSubmitStatus()
+	{
+		if (mSubmittedBuffer == nullptr) // Nothing was submitted
+			return;
+
+		mSubmittedBuffer->refreshFenceStatus();
+		if (!mSubmittedBuffer->isSubmitted())
+			mSubmittedBuffer = nullptr;
+	}
+
 	void VulkanCommandBuffer::acquireNewBuffer()
 	{
 		VulkanCmdBufferPool& pool = mDevice.getCmdBufferPool();
 
+		if (mBuffer != nullptr)
+			assert(mBuffer->isSubmitted());
+
+		mSubmittedBuffer = mBuffer;
 		mBuffer = pool.getBuffer(mType, mQueueIdx, mIsSecondary);
 	}
+
+	void VulkanCommandBuffer::submit(UINT32 syncMask)
+	{
+		assert(mBuffer != nullptr && mBuffer->isReadyForSubmit());
+
+		VkCommandBuffer cmdBuffer = mBuffer->getHandle();
+		VkSemaphore signalSemaphore = mBuffer->getSemaphore();
+
+		VkSubmitInfo submitInfo;
+		submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
+		submitInfo.pNext = nullptr;
+		submitInfo.pWaitDstStageMask = 0;
+		submitInfo.commandBufferCount = 1;
+		submitInfo.pCommandBuffers = &cmdBuffer;
+		submitInfo.signalSemaphoreCount = 1;
+		submitInfo.pSignalSemaphores = &signalSemaphore;
+
+		// Ignore myself
+		syncMask &= ~mId;
+
+		VulkanCommandBufferManager& cmdBufManager = static_cast<VulkanCommandBufferManager&>(CommandBufferManager::instance());
+		cmdBufManager.getSyncSemaphores(mDeviceIdx, syncMask, mSemaphoresTemp, submitInfo.waitSemaphoreCount);
+
+		if (submitInfo.waitSemaphoreCount > 0)
+			submitInfo.pWaitSemaphores = mSemaphoresTemp;
+		else
+			submitInfo.pWaitSemaphores = nullptr;
+
+		VkQueue queue = mQueue->getHandle();
+		VkFence fence = mBuffer->getFence();
+		vkQueueSubmit(queue, 1, &submitInfo, fence);
+
+		cmdBufManager.refreshStates(mDeviceIdx);
+
+		mQueue->notifySubmit(*this, mBuffer->getFenceCounter());
+
+		// Note: Uncommented for debugging only, prevents any device concurrency issues.
+		// vkQueueWaitIdle(mQueue);
+
+		mBuffer->mState = VulkanCmdBuffer::State::Submitted;
+		acquireNewBuffer();
+	}
 }

+ 42 - 1
Source/BansheeVulkanRenderAPI/Source/BsVulkanCommandBufferManager.cpp

@@ -30,8 +30,49 @@ namespace BansheeEngine
 		SPtr<VulkanDevice> device = mRapi._getDevice(deviceIdx);
 
 		CommandBuffer* buffer = 
-			new (bs_alloc<VulkanCommandBuffer>()) VulkanCommandBuffer(*device, id, type, queueIdx, secondary);
+			new (bs_alloc<VulkanCommandBuffer>()) VulkanCommandBuffer(*device, id, type, deviceIdx, queueIdx, secondary);
 
 		return bs_shared_ptr(buffer);
 	}
+
+	void VulkanCommandBufferManager::getSyncSemaphores(UINT32 deviceIdx, UINT32 syncMask, 
+		VkSemaphore(&semaphores)[BS_MAX_COMMAND_BUFFERS], UINT32& count)
+	{
+		assert(deviceIdx < BS_MAX_DEVICES);
+
+		UINT32 semaphoreIdx = 0;
+		for (UINT32 i = 0; i < BS_MAX_COMMAND_BUFFERS; i++)
+		{
+			if ((syncMask & (1 << i)) == 0) // We don't care about the command buffer
+				continue;
+
+			if (mActiveCommandBuffers[deviceIdx][i] == nullptr) // Command buffer doesn't exist
+				continue;
+
+			VulkanCommandBuffer* cmdBuffer = static_cast<VulkanCommandBuffer*>(mActiveCommandBuffers[deviceIdx][i]);
+			VulkanCmdBuffer* lowLevelCmdBuffer = cmdBuffer->mSubmittedBuffer;
+
+			if (lowLevelCmdBuffer == nullptr || !lowLevelCmdBuffer->isSubmitted()) // If not submitted, no need to sync with it
+				continue;
+
+			semaphores[semaphoreIdx++] = lowLevelCmdBuffer->getSemaphore();
+		}
+
+		count = semaphoreIdx;
+	}
+
+	void VulkanCommandBufferManager::refreshStates(UINT32 deviceIdx)
+	{
+		assert(deviceIdx < BS_MAX_DEVICES);
+
+		UINT32 semaphoreIdx = 0;
+		for (UINT32 i = 0; i < BS_MAX_COMMAND_BUFFERS; i++)
+		{
+			if (mActiveCommandBuffers[deviceIdx][i] == nullptr) // Command buffer doesn't exist
+				continue;
+
+			VulkanCommandBuffer* cmdBuffer = static_cast<VulkanCommandBuffer*>(mActiveCommandBuffers[deviceIdx][i]);
+			cmdBuffer->refreshSubmitStatus();
+		}
+	}
 }

+ 16 - 3
Source/BansheeVulkanRenderAPI/Source/BsVulkanDevice.cpp

@@ -1,6 +1,7 @@
 //********************************** Banshee Engine (www.banshee3d.com) **************************************************//
 //**************** Copyright (c) 2016 Marko Pintera ([email protected]). All rights reserved. **********************//
 #include "BsVulkanDevice.h"
+#include "BsVulkanQueue.h"
 #include "BsVulkanCommandBuffer.h"
 #include "BsVulkanDescriptorManager.h"
 
@@ -40,7 +41,7 @@ namespace BansheeEngine
 			createInfo.pQueuePriorities = defaultQueuePriorities;
 
 			mQueueInfos[type].familyIdx = familyIdx;
-			mQueueInfos[type].queues.resize(createInfo.queueCount);
+			mQueueInfos[type].queues.resize(createInfo.queueCount, nullptr);
 		};
 
 		// Look for dedicated compute queues
@@ -98,8 +99,13 @@ namespace BansheeEngine
 		for(UINT32 i = 0; i < VQT_COUNT; i++)
 		{
 			UINT32 numQueues = (UINT32)mQueueInfos[i].queues.size();
-			for(UINT32 j = 0; j < numQueues; j++)
-				vkGetDeviceQueue(mLogicalDevice, mQueueInfos[i].familyIdx, j, &mQueueInfos[i].queues[j]);
+			for (UINT32 j = 0; j < numQueues; j++)
+			{
+				VkQueue queue;
+				vkGetDeviceQueue(mLogicalDevice, mQueueInfos[i].familyIdx, j, &queue);
+
+				mQueueInfos[i].queues[j] = bs_new<VulkanQueue>(queue);
+			}
 		}
 
 		// Create pools/managers
@@ -111,6 +117,13 @@ namespace BansheeEngine
 	{
 		vkDeviceWaitIdle(mLogicalDevice);
 
+		for (UINT32 i = 0; i < VQT_COUNT; i++)
+		{
+			UINT32 numQueues = (UINT32)mQueueInfos[i].queues.size();
+			for (UINT32 j = 0; j < numQueues; j++)
+				bs_delete(mQueueInfos[i].queues[j]);
+		}
+
 		bs_delete(mDescriptorManager);
 		bs_delete(mCommandBufferPool);
 		vkDestroyDevice(mLogicalDevice, gVulkanAllocator);

+ 2 - 2
Source/BansheeVulkanRenderAPI/Source/BsVulkanHardwareBuffer.cpp

@@ -22,7 +22,7 @@ namespace BansheeEngine
 		}
 		else
 		{
-			for(UINT32 i = 0; i < BS_MAX_VULKAN_DEVICES; i++)
+			for(UINT32 i = 0; i < BS_MAX_DEVICES; i++)
 			{
 				if ((1 << i) & deviceMask)
 					mAllocations[deviceIdx++].device = rapi._getDevice(i);
@@ -33,7 +33,7 @@ namespace BansheeEngine
 			(VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) : // Note: Try using cached uncoherent memory
 			VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
 
-		for (UINT32 i = 0; i < BS_MAX_VULKAN_DEVICES; i++)
+		for (UINT32 i = 0; i < BS_MAX_DEVICES; i++)
 		{
 			if (mAllocations[i].device == nullptr)
 				break;

+ 17 - 0
Source/BansheeVulkanRenderAPI/Source/BsVulkanQueue.cpp

@@ -0,0 +1,17 @@
+//********************************** Banshee Engine (www.banshee3d.com) **************************************************//
+//**************** Copyright (c) 2016 Marko Pintera ([email protected]). All rights reserved. **********************//
+#include "BsVulkanQueue.h"
+#include "BsVulkanCommandBuffer.h"
+
+namespace BansheeEngine
+{
+	VulkanQueue::VulkanQueue(VkQueue queue)
+		:mQueue(queue), mFenceCounter(0), mLastCommandBufferId(-1)
+	{ }
+
+	void VulkanQueue::notifySubmit(const VulkanCommandBuffer& cmdBuffer, UINT32 fenceCounter)
+	{
+		mLastCommandBufferId = cmdBuffer._getId();
+		mFenceCounter = fenceCounter;
+	}
+}

+ 14 - 0
Source/BansheeVulkanRenderAPI/Source/BsVulkanRenderAPI.cpp

@@ -13,6 +13,7 @@
 #include "BsVulkanQueryManager.h"
 #include "BsVulkanGLSLProgramFactory.h"
 #include "BsVulkanCommandBufferManager.h"
+#include "BsVulkanCommandBuffer.h"
 #include "BsVulkanVertexInputManager.h"
 #include "Win32/BsWin32VideoModeInfo.h"
 
@@ -391,6 +392,13 @@ namespace BansheeEngine
 
 	void VulkanRenderAPI::swapBuffers(const SPtr<RenderTargetCore>& target, const SPtr<CommandBuffer>& commandBuffer)
 	{
+		THROW_IF_NOT_CORE_THREAD;
+
+		// TODO - Actually swap buffers
+
+		VulkanCommandBuffer& cmdBuffer = static_cast<VulkanCommandBuffer&>(*commandBuffer);
+		cmdBuffer.refreshSubmitStatus();
+
 		BS_INC_RENDER_STAT(NumPresents);
 	}
 
@@ -401,7 +409,13 @@ namespace BansheeEngine
 
 	void VulkanRenderAPI::executeCommands(const SPtr<CommandBuffer>& commandBuffer, UINT32 syncMask)
 	{
+		THROW_IF_NOT_CORE_THREAD;
+
+		if (commandBuffer == nullptr)
+			return;
 
+		VulkanCommandBuffer& cmdBuffer = static_cast<VulkanCommandBuffer&>(*commandBuffer);
+		cmdBuffer.submit(syncMask);
 	}
 	
 	void VulkanRenderAPI::convertProjectionMatrix(const Matrix4& matrix, Matrix4& dest)