Просмотр исходного кода

Refactored command buffer submission and sync in order to resolve some design issue with previous sync approach, as well as command buffer IDs

BearishSun 9 лет назад
Родитель
Сommit
ebed609b8e
21 измененных файлов с 173 добавлено и 183 удалено
  1. 2 11
      Source/BansheeCore/Include/BsCommandBuffer.h
  2. 1 6
      Source/BansheeCore/Include/BsCommandBufferManager.h
  3. 1 1
      Source/BansheeCore/Include/BsCorePrerequisites.h
  4. 2 7
      Source/BansheeCore/Source/BsCommandBuffer.cpp
  5. 1 25
      Source/BansheeCore/Source/BsCommandBufferManager.cpp
  6. 1 1
      Source/BansheeD3D11RenderAPI/Include/BsD3D11CommandBuffer.h
  7. 1 1
      Source/BansheeD3D11RenderAPI/Include/BsD3D11CommandBufferManager.h
  8. 2 2
      Source/BansheeD3D11RenderAPI/Source/BsD3D11CommandBuffer.cpp
  9. 2 2
      Source/BansheeD3D11RenderAPI/Source/BsD3D11CommandBufferManager.cpp
  10. 1 1
      Source/BansheeGLRenderAPI/Include/BsGLCommandBuffer.h
  11. 1 1
      Source/BansheeGLRenderAPI/Include/BsGLCommandBufferManager.h
  12. 2 2
      Source/BansheeGLRenderAPI/Source/BsGLCommandBuffer.cpp
  13. 2 2
      Source/BansheeGLRenderAPI/Source/BsGLCommandBufferManager.cpp
  14. 16 17
      Source/BansheeVulkanRenderAPI/Include/BsVulkanCommandBuffer.h
  15. 15 2
      Source/BansheeVulkanRenderAPI/Include/BsVulkanCommandBufferManager.h
  16. 20 10
      Source/BansheeVulkanRenderAPI/Include/BsVulkanQueue.h
  17. 35 64
      Source/BansheeVulkanRenderAPI/Source/BsVulkanCommandBuffer.cpp
  18. 33 21
      Source/BansheeVulkanRenderAPI/Source/BsVulkanCommandBufferManager.cpp
  19. 1 1
      Source/BansheeVulkanRenderAPI/Source/BsVulkanDevice.cpp
  20. 31 5
      Source/BansheeVulkanRenderAPI/Source/BsVulkanQueue.cpp
  21. 3 1
      Source/BansheeVulkanRenderAPI/Source/BsVulkanRenderAPI.cpp

+ 2 - 11
Source/BansheeCore/Include/BsCommandBuffer.h

@@ -39,7 +39,7 @@ namespace BansheeEngine
 	class BS_CORE_EXPORT CommandBuffer
 	{
 	public:
-		virtual ~CommandBuffer();
+		virtual ~CommandBuffer() { }
 
 		/**
 		 * Creates a new CommandBuffer.
@@ -71,18 +71,9 @@ namespace BansheeEngine
 		/** Returns the device index this buffer will execute on. */
 		UINT32 getDeviceIdx() const { return mDeviceIdx; }
 
-		/** @name Internal
-		 *  @{
-		 */
-
-		/** Returns a unique ID of this command buffer. */
-		UINT32 _getId() const { return mId; }
-
-		/** @} */
 	protected:
-		CommandBuffer(UINT32 id, GpuQueueType type, UINT32 deviceIdx, UINT32 queueIdx, bool secondary);
+		CommandBuffer(GpuQueueType type, UINT32 deviceIdx, UINT32 queueIdx, bool secondary);
 
-		UINT32 mId;
 		GpuQueueType mType;
 		UINT32 mDeviceIdx;
 		UINT32 mQueueIdx;

+ 1 - 6
Source/BansheeCore/Include/BsCommandBufferManager.h

@@ -29,13 +29,8 @@ namespace BansheeEngine
 		friend CommandBuffer;
 
 		/** Creates a command buffer with the specified ID. See create(). */
-		virtual SPtr<CommandBuffer> createInternal(UINT32 id, GpuQueueType type, UINT32 deviceIdx = 0,
+		virtual SPtr<CommandBuffer> createInternal(GpuQueueType type, UINT32 deviceIdx = 0,
 			UINT32 queueIdx = 0, bool secondary = false) = 0;
-
-		/** Called by a command buffer just before it is destroyed. */
-		void notifyCommandBufferDestroyed(UINT32 deviceIdx, UINT32 id);
-
-		CommandBuffer* mActiveCommandBuffers[BS_MAX_DEVICES][BS_MAX_COMMAND_BUFFERS];
 	};
 
 	/** @} */

+ 1 - 1
Source/BansheeCore/Include/BsCorePrerequisites.h

@@ -171,7 +171,7 @@
 #define BS_FORCE_SINGLETHREADED_RENDERING 0
 
 /** Maximum number of CommandBuffer%s that may exist at once. */
-#define BS_MAX_COMMAND_BUFFERS 32
+#define BS_MAX_COMMAND_BUFFERS 32 // Should be higher than BS_MAX_QUEUES_PER_TYPE * VQT_COUNT, and fit within 4 bytes
 
 /** Maximum number of individual GPU queues, per type. */
 #define BS_MAX_QUEUES_PER_TYPE 8

+ 2 - 7
Source/BansheeCore/Source/BsCommandBuffer.cpp

@@ -33,17 +33,12 @@ namespace BansheeEngine
 		return (1 << queueIdx) << bitShift;
 	}
 
-	CommandBuffer::CommandBuffer(UINT32 id, GpuQueueType type, UINT32 deviceIdx, UINT32 queueIdx, bool secondary)
-		:mId(id), mType(type), mDeviceIdx(deviceIdx), mQueueIdx(queueIdx), mIsSecondary(secondary)
+	CommandBuffer::CommandBuffer(GpuQueueType type, UINT32 deviceIdx, UINT32 queueIdx, bool secondary)
+		:mType(type), mDeviceIdx(deviceIdx), mQueueIdx(queueIdx), mIsSecondary(secondary)
 	{
 
 	}
 
-	CommandBuffer::~CommandBuffer()
-	{
-		CommandBufferManager::instance().notifyCommandBufferDestroyed(mDeviceIdx, mId);
-	}
-
 	SPtr<CommandBuffer> CommandBuffer::create(GpuQueueType type, UINT32 deviceIdx, UINT32 queueIdx,
 		bool secondary)
 	{

+ 1 - 25
Source/BansheeCore/Source/BsCommandBufferManager.cpp

@@ -9,30 +9,6 @@ namespace BansheeEngine
 	{
 		assert(deviceIdx < BS_MAX_DEVICES);
 
-		UINT32 id = -1;
-		for(UINT32 i = 0; i < BS_MAX_COMMAND_BUFFERS; i++)
-		{
-			if (!mActiveCommandBuffers[deviceIdx][i])
-			{
-				id = i;
-				break;
-			}
-		}
-
-		if(id == -1)
-		{
-			LOGERR("Attempting to allocate more than 32 command buffers. This is not supported. ");
-			return nullptr;
-		}
-
-		SPtr<CommandBuffer> cmdBuffer = createInternal(id, type, deviceIdx, queueIdx, secondary);;
-		mActiveCommandBuffers[deviceIdx][id] = cmdBuffer.get();
-
-		return cmdBuffer;
-	}
-
-	void CommandBufferManager::notifyCommandBufferDestroyed(UINT32 deviceIdx, UINT32 id)
-	{
-		mActiveCommandBuffers[deviceIdx][id] = nullptr;
+		return createInternal(type, deviceIdx, queueIdx, secondary);
 	}
 }

+ 1 - 1
Source/BansheeD3D11RenderAPI/Include/BsD3D11CommandBuffer.h

@@ -35,7 +35,7 @@ namespace BansheeEngine
 		friend class D3D11CommandBufferManager;
 		friend class D3D11RenderAPI;
 
-		D3D11CommandBuffer(UINT32 id, GpuQueueType type, UINT32 deviceIdx, UINT32 queueIdx, bool secondary);
+		D3D11CommandBuffer(GpuQueueType type, UINT32 deviceIdx, UINT32 queueIdx, bool secondary);
 
 		Vector<std::function<void()>> mCommands;
 

+ 1 - 1
Source/BansheeD3D11RenderAPI/Include/BsD3D11CommandBufferManager.h

@@ -20,7 +20,7 @@ namespace BansheeEngine
 	{
 	public:
 		/** @copydoc CommandBufferManager::createInternal() */
-		SPtr<CommandBuffer> createInternal(UINT32 id, GpuQueueType type, UINT32 deviceIdx = 0, UINT32 queueIdx = 0,
+		SPtr<CommandBuffer> createInternal(GpuQueueType type, UINT32 deviceIdx = 0, UINT32 queueIdx = 0,
 			bool secondary = false) override;
 	};
 

+ 2 - 2
Source/BansheeD3D11RenderAPI/Source/BsD3D11CommandBuffer.cpp

@@ -4,8 +4,8 @@
 
 namespace BansheeEngine
 {
-	D3D11CommandBuffer::D3D11CommandBuffer(UINT32 id, GpuQueueType type, UINT32 deviceIdx, UINT32 queueIdx, bool secondary)
-		: CommandBuffer(id, type, deviceIdx, queueIdx, secondary), mActiveDrawOp(DOT_TRIANGLE_LIST)
+	D3D11CommandBuffer::D3D11CommandBuffer(GpuQueueType type, UINT32 deviceIdx, UINT32 queueIdx, bool secondary)
+		: CommandBuffer(type, deviceIdx, queueIdx, secondary), mActiveDrawOp(DOT_TRIANGLE_LIST)
 	{
 		if (deviceIdx != 0)
 			BS_EXCEPT(InvalidParametersException, "Only a single device supported on DX11.");

+ 2 - 2
Source/BansheeD3D11RenderAPI/Source/BsD3D11CommandBufferManager.cpp

@@ -5,10 +5,10 @@
 
 namespace BansheeEngine
 {
-	SPtr<CommandBuffer> D3D11CommandBufferManager::createInternal(UINT32 id, GpuQueueType type, UINT32 deviceIdx,
+	SPtr<CommandBuffer> D3D11CommandBufferManager::createInternal(GpuQueueType type, UINT32 deviceIdx,
 		UINT32 queueIdx, bool secondary)
 	{
-		CommandBuffer* buffer = new (bs_alloc<D3D11CommandBuffer>()) D3D11CommandBuffer(id, type, deviceIdx, queueIdx, secondary);
+		CommandBuffer* buffer = new (bs_alloc<D3D11CommandBuffer>()) D3D11CommandBuffer(type, deviceIdx, queueIdx, secondary);
 		return bs_shared_ptr(buffer);
 	}
 }

+ 1 - 1
Source/BansheeGLRenderAPI/Include/BsGLCommandBuffer.h

@@ -35,7 +35,7 @@ namespace BansheeEngine
 		friend class GLCommandBufferManager;
 		friend class GLRenderAPI;
 
-		GLCommandBuffer(UINT32 id, GpuQueueType type, UINT32 deviceIdx, UINT32 queueIdx, bool secondary);
+		GLCommandBuffer(GpuQueueType type, UINT32 deviceIdx, UINT32 queueIdx, bool secondary);
 
 		Vector<std::function<void()>> mCommands;
 

+ 1 - 1
Source/BansheeGLRenderAPI/Include/BsGLCommandBufferManager.h

@@ -20,7 +20,7 @@ namespace BansheeEngine
 	{
 	public:
 		/** @copydoc CommandBufferManager::createInternal() */
-		SPtr<CommandBuffer> createInternal(UINT32 id, GpuQueueType type, UINT32 deviceIdx = 0, UINT32 queueIdx = 0,
+		SPtr<CommandBuffer> createInternal(GpuQueueType type, UINT32 deviceIdx = 0, UINT32 queueIdx = 0,
 			bool secondary = false) override;
 	};
 

+ 2 - 2
Source/BansheeGLRenderAPI/Source/BsGLCommandBuffer.cpp

@@ -4,8 +4,8 @@
 
 namespace BansheeEngine
 {
-	GLCommandBuffer::GLCommandBuffer(UINT32 id, GpuQueueType type, UINT32 deviceIdx, UINT32 queueIdx, bool secondary)
-		: CommandBuffer(id, type, deviceIdx, queueIdx, secondary), mCurrentDrawOperation(DOT_TRIANGLE_LIST)
+	GLCommandBuffer::GLCommandBuffer(GpuQueueType type, UINT32 deviceIdx, UINT32 queueIdx, bool secondary)
+		: CommandBuffer(type, deviceIdx, queueIdx, secondary), mCurrentDrawOperation(DOT_TRIANGLE_LIST)
 	{
 		if (deviceIdx != 0)
 			BS_EXCEPT(InvalidParametersException, "Only a single device supported on DX11.");

+ 2 - 2
Source/BansheeGLRenderAPI/Source/BsGLCommandBufferManager.cpp

@@ -5,10 +5,10 @@
 
 namespace BansheeEngine
 {
-	SPtr<CommandBuffer> GLCommandBufferManager::createInternal(UINT32 id, GpuQueueType type, UINT32 deviceIdx,
+	SPtr<CommandBuffer> GLCommandBufferManager::createInternal(GpuQueueType type, UINT32 deviceIdx,
 		UINT32 queueIdx, bool secondary)
 	{
-		CommandBuffer* buffer = new (bs_alloc<GLCommandBuffer>()) GLCommandBuffer(id, type, deviceIdx, queueIdx, secondary);
+		CommandBuffer* buffer = new (bs_alloc<GLCommandBuffer>()) GLCommandBuffer(type, deviceIdx, queueIdx, secondary);
 		return bs_shared_ptr(buffer);
 	}
 }

+ 16 - 17
Source/BansheeVulkanRenderAPI/Include/BsVulkanCommandBuffer.h

@@ -93,6 +93,18 @@ namespace BansheeEngine
 		/** Ends render pass recording (as started with beginRenderPass(). */
 		void endRenderPass();
 
+		/** 
+		 * Submits the command buffer for execution. 
+		 * 
+		 * @param[in]	queue		Queue to submit the command buffer on.
+		 * @param[in]	queueIdx	Index of the queue the command buffer was submitted on. Note that this may be different
+		 *							from the actual VulkanQueue index since multiple command buffer queue indices can map
+		 *							to the same queue.
+		 * @param[in]	syncMask	Mask that controls which other command buffers does this command buffer depend upon
+		 *							(if any). See description of @p syncMask parameter in RenderAPICore::executeCommands().
+		 */
+		void submit(VulkanQueue* queue, UINT32 queueIdx, UINT32 syncMask);
+
 		/** Returns the handle to the internal Vulkan command buffer wrapped by this object. */
 		VkCommandBuffer getHandle() const { return mCmdBuffer; }
 
@@ -139,18 +151,6 @@ namespace BansheeEngine
 			VulkanUseFlags flags;
 		};
 
-		/** 
-		 * Called just before the buffer has been submitted to the queue.
-		 * 
-		 *  @param[out]	transitionInfo	Contains barriers that transition resources to appropriate queues families
-		 *								and/or transition image layouts. Caller should issue relevant pipeline barriers
-		 *								according to this structure, before submitting the command buffer.
-		 */
-		void prepareForSubmit(UnorderedMap<UINT32, TransitionInfo>& transitionInfo);
-
-		/** Called after the buffer has been submitted to the queue. */
-		void notifySubmit();
-
 		UINT32 mId;
 		UINT32 mQueueFamily;
 		State mState;
@@ -163,6 +163,9 @@ namespace BansheeEngine
 
 		UnorderedMap<VulkanResource*, ResourceInfo> mResources;
 		UnorderedSet<SPtr<VulkanGpuParams>> mBoundParams;
+
+		VkSemaphore mSemaphoresTemp[BS_MAX_COMMAND_BUFFERS];
+		UnorderedMap<UINT32, TransitionInfo> mTransitionInfoTemp;
 	};
 
 	/** CommandBuffer implementation for Vulkan. */
@@ -177,9 +180,6 @@ namespace BansheeEngine
 		 */
 		void submit(UINT32 syncMask);
 
-		/** Checks if the submitted buffer finished executing, and updates state if it has. */
-		void refreshSubmitStatus();
-
 		/** 
 		 * Returns the internal command buffer. 
 		 * 
@@ -190,7 +190,7 @@ namespace BansheeEngine
 	private:
 		friend class VulkanCommandBufferManager;
 
-		VulkanCommandBuffer(VulkanDevice& device, UINT32 id, GpuQueueType type, UINT32 deviceIdx, UINT32 queueIdx,
+		VulkanCommandBuffer(VulkanDevice& device, GpuQueueType type, UINT32 deviceIdx, UINT32 queueIdx,
 			bool secondary);
 
 		/** 
@@ -200,7 +200,6 @@ namespace BansheeEngine
 		void acquireNewBuffer();
 
 		VulkanCmdBuffer* mBuffer;
-		VulkanCmdBuffer* mSubmittedBuffer;
 		VulkanDevice& mDevice;
 		VulkanQueue* mQueue;
 		UINT32 mIdMask;

+ 15 - 2
Source/BansheeVulkanRenderAPI/Include/BsVulkanCommandBufferManager.h

@@ -4,6 +4,7 @@
 
 #include "BsVulkanPrerequisites.h"
 #include "BsCommandBufferManager.h"
+#include "BsVulkanCommandBuffer.h"
 
 namespace BansheeEngine
 {
@@ -23,9 +24,12 @@ namespace BansheeEngine
 		~VulkanCommandBufferManager();
 
 		/** @copydoc CommandBufferManager::createInternal() */
-		SPtr<CommandBuffer> createInternal(UINT32 id, GpuQueueType type, UINT32 deviceIdx = 0, UINT32 queueIdx = 0,
+		SPtr<CommandBuffer> createInternal(GpuQueueType type, UINT32 deviceIdx = 0, UINT32 queueIdx = 0,
 			bool secondary = false) override;
 
+		/** Notifies the manager that this buffer was just submitted to the queue for execution. */
+		void setActiveBuffer(GpuQueueType type, UINT32 deviceIdx, UINT32 queueIdx, VulkanCmdBuffer* buffer);
+
 		/** 
 		 * Returns a set of command buffer semaphores depending on the provided sync mask. 
 		 *
@@ -46,8 +50,17 @@ namespace BansheeEngine
 		void refreshStates(UINT32 deviceIdx);
 
 	private:
+		/** Contains command buffers specific to one device. */
+		struct PerDeviceData
+		{
+			VulkanCmdBuffer* buffers[BS_MAX_COMMAND_BUFFERS];
+		};
+
 		const VulkanRenderAPI& mRapi;
+
+		PerDeviceData* mDeviceData;
+		UINT32 mNumDevices;
 	};
 
 	/** @} */
-}
+}

+ 20 - 10
Source/BansheeVulkanRenderAPI/Include/BsVulkanQueue.h

@@ -14,27 +14,37 @@ namespace BansheeEngine
 	class VulkanQueue
 	{
 	public:
-		VulkanQueue(VkQueue queue);
+		VulkanQueue(VulkanDevice& device, VkQueue queue, GpuQueueType type, UINT32 index);
 
 		/** Returns the internal handle to the Vulkan queue object. */
 		VkQueue getHandle() const { return mQueue; }
 		
+		/** Returns the device that owns the queue. */
+		VulkanDevice& getDevice() const { return mDevice; }
+
+		/** Returns the type of the queue. */
+		GpuQueueType getType() const { return mType; }
+
+		/** Returns the unique index of the queue, for its type. */
+		UINT32 getIndex() const { return mIndex; }
+
 		/** 
-		 * Notifies the queue that a command buffer was submitted. 
+		 * Checks if anything is currently executing on this queue. 
 		 *
-		 * @param[in]	cmdBuffer		Command buffer that was submitted.
-		 * @param[in]	fenceCounter	Fence counter of the command buffer at time of submission. This counter gets
-		 *								incremented whenever a command buffer is done executing on the device. This allow
-		 *								us to know when the queue is done with a command buffer.
+		 * @note	This status is only updated after a VulkanCommandBufferManager::refreshStates() call.
 		 */
-		void notifySubmit(const VulkanCommandBuffer& cmdBuffer, UINT32 fenceCounter);
+		bool isExecuting() const;
+
+		/** Submits the provided command buffer on the queue. */
+		void submit(VulkanCmdBuffer* cmdBuffer, VkSemaphore* waitSemaphores, UINT32 semaphoresCount);
 
 	protected:
+		VulkanDevice& mDevice;
 		VkQueue mQueue;
-		VkSemaphore mSemaphoresTemp[BS_MAX_COMMAND_BUFFERS];
+		GpuQueueType mType;
+		UINT32 mIndex;
 
-		UINT32 mFenceCounter;
-		UINT32 mLastCommandBufferId;
+		VulkanCmdBuffer* mLastCommandBuffer;
 	};
 
 	/** @} */

+ 35 - 64
Source/BansheeVulkanRenderAPI/Source/BsVulkanCommandBuffer.cpp

@@ -35,6 +35,9 @@ namespace BansheeEngine
 
 	VulkanCmdBufferPool::~VulkanCmdBufferPool()
 	{
+		// Note: Shutdown should be the only place command buffers are destroyed at, as the system relies on the fact that
+		// they won't be destroyed during normal operation.
+
 		for(auto& entry : mPools)
 		{
 			PoolInfo& poolInfo = entry.second;
@@ -232,21 +235,9 @@ namespace BansheeEngine
 		mBoundParams.insert(params);
 	}
 
-	void VulkanCmdBuffer::prepareForSubmit(UnorderedMap<UINT32, TransitionInfo>& transitionInfo)
-	{
-		for (auto& entry : mBoundParams)
-			entry->prepareForSubmit(this, transitionInfo);
-	}
-
-	void VulkanCmdBuffer::notifySubmit()
-	{
-		for (auto& entry : mResources)
-			entry.first->notifyUsed(this, entry.second.flags);
-	}
-
-	VulkanCommandBuffer::VulkanCommandBuffer(VulkanDevice& device, UINT32 id, GpuQueueType type, UINT32 deviceIdx,
+	VulkanCommandBuffer::VulkanCommandBuffer(VulkanDevice& device, GpuQueueType type, UINT32 deviceIdx,
 		UINT32 queueIdx, bool secondary)
-		: CommandBuffer(id, type, deviceIdx, queueIdx, secondary), mBuffer(nullptr), mSubmittedBuffer(nullptr)
+		: CommandBuffer(type, deviceIdx, queueIdx, secondary), mBuffer(nullptr)
 		, mDevice(device), mQueue(nullptr), mIdMask(0)
 	{
 		UINT32 numQueues = device.getNumQueues(mType);
@@ -269,16 +260,6 @@ namespace BansheeEngine
 		acquireNewBuffer();
 	}
 
-	void VulkanCommandBuffer::refreshSubmitStatus()
-	{
-		if (mSubmittedBuffer == nullptr) // Nothing was submitted
-			return;
-
-		mSubmittedBuffer->refreshFenceStatus();
-		if (!mSubmittedBuffer->isSubmitted())
-			mSubmittedBuffer = nullptr;
-	}
-
 	void VulkanCommandBuffer::acquireNewBuffer()
 	{
 		VulkanCmdBufferPool& pool = mDevice.getCmdBufferPool();
@@ -286,40 +267,23 @@ namespace BansheeEngine
 		if (mBuffer != nullptr)
 			assert(mBuffer->isSubmitted());
 
-		mSubmittedBuffer = mBuffer;
 		UINT32 queueFamily = mDevice.getQueueFamily(mType);
 		mBuffer = pool.getBuffer(queueFamily, mIsSecondary);
 	}
 
-	void VulkanCommandBuffer::submit(UINT32 syncMask)
+	void VulkanCmdBuffer::submit(VulkanQueue* queue, UINT32 queueIdx, UINT32 syncMask)
 	{
-		assert(mBuffer != nullptr && mBuffer->isReadyForSubmit());
-
-		VkCommandBuffer cmdBuffer = mBuffer->getHandle();
-		VkSemaphore signalSemaphore = mBuffer->getSemaphore();
-
-		VkSubmitInfo submitInfo;
-		submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
-		submitInfo.pNext = nullptr;
-		submitInfo.pWaitDstStageMask = 0;
-		submitInfo.commandBufferCount = 1;
-		submitInfo.pCommandBuffers = &cmdBuffer;
-		submitInfo.signalSemaphoreCount = 1;
-		submitInfo.pSignalSemaphores = &signalSemaphore;
-
-		// Ignore myself
-		syncMask &= ~mIdMask;
-
 		// Issue pipeline barriers for queue transitions (need to happen on original queue first, then on new queue)
-		mBuffer->prepareForSubmit(mTransitionInfoTemp);
+		for (auto& entry : mBoundParams)
+			entry->prepareForSubmit(this, mTransitionInfoTemp);
 
-		UINT32 queueFamily = mDevice.getQueueFamily(mType);
-		for(auto& entry : mTransitionInfoTemp)
+		VulkanDevice& device = queue->getDevice();
+		for (auto& entry : mTransitionInfoTemp)
 		{
 			UINT32 entryQueueFamily = entry.first;
 
 			// No queue transition needed for entries on this queue (this entry is most likely an in image layout transition)
-			if (entryQueueFamily == queueFamily)
+			if (entryQueueFamily == mQueueFamily)
 				continue;
 
 			VkCommandBuffer cmdBuffer; // TODO - Get the command buffer on entryQueueFamily
@@ -361,35 +325,42 @@ namespace BansheeEngine
 								 numImgBarriers, barriers.imageBarriers.data());
 		}
 
-		VulkanCommandBufferManager& cmdBufManager = static_cast<VulkanCommandBufferManager&>(CommandBufferManager::instance());
-		cmdBufManager.getSyncSemaphores(mDeviceIdx, syncMask, mSemaphoresTemp, submitInfo.waitSemaphoreCount);
-
-		if (submitInfo.waitSemaphoreCount > 0)
-			submitInfo.pWaitSemaphores = mSemaphoresTemp;
-		else
-			submitInfo.pWaitSemaphores = nullptr;
+		UINT32 deviceIdx = device.getIndex();
+		VulkanCommandBufferManager& cbm = static_cast<VulkanCommandBufferManager&>(CommandBufferManager::instance());
+		
+		UINT32 numSemaphores;
+		cbm.getSyncSemaphores(deviceIdx, syncMask, mSemaphoresTemp, numSemaphores);
 
-		VkQueue queue = mQueue->getHandle();
-		VkFence fence = mBuffer->getFence();
-		vkQueueSubmit(queue, 1, &submitInfo, fence);
+		queue->submit(this, mSemaphoresTemp, numSemaphores);
 
-		cmdBufManager.refreshStates(mDeviceIdx);
+		for (auto& entry : mResources)
+			entry.first->notifyUsed(this, entry.second.flags);
 
-		mBuffer->notifySubmit();
-		mQueue->notifySubmit(*this, mBuffer->getFenceCounter());
+		cbm.refreshStates(deviceIdx);
 
 		// Note: Uncommented for debugging only, prevents any device concurrency issues.
-		// vkQueueWaitIdle(mQueue);
+		// vkQueueWaitIdle(queue->getHandle());
 
-		mBuffer->mState = VulkanCmdBuffer::State::Submitted;
-		acquireNewBuffer();
+		mState = State::Submitted;
+		cbm.setActiveBuffer(queue->getType(), deviceIdx, queueIdx, this);
 
 		// Clear vectors but don't clear the actual map, as we want to re-use the memory since we expect queue family
 		// indices to be the same
-		for(auto& entry : mTransitionInfoTemp)
+		for (auto& entry : mTransitionInfoTemp)
 		{
 			entry.second.imageBarriers.clear();
 			entry.second.bufferBarriers.clear();
 		}
 	}
+
+	void VulkanCommandBuffer::submit(UINT32 syncMask)
+	{
+		assert(mBuffer != nullptr && mBuffer->isReadyForSubmit());
+
+		// Ignore myself
+		syncMask &= ~mIdMask;
+
+		mBuffer->submit(mQueue, mQueueIdx, syncMask);
+		acquireNewBuffer();
+	}
 }

+ 33 - 21
Source/BansheeVulkanRenderAPI/Source/BsVulkanCommandBufferManager.cpp

@@ -3,19 +3,24 @@
 #include "BsVulkanCommandBufferManager.h"
 #include "BsVulkanCommandBuffer.h"
 #include "BsVulkanRenderAPI.h"
+#include "BsVulkanDevice.h"
 
 namespace BansheeEngine
 {
 	VulkanCommandBufferManager::VulkanCommandBufferManager(const VulkanRenderAPI& rapi)
-		:mRapi(rapi)
-	{ }
+		:mRapi(rapi), mDeviceData(nullptr), mNumDevices(rapi.getNumDevices())
+	{
+		mDeviceData = bs_newN<PerDeviceData>(mNumDevices);
+		for(UINT32 i = 0; i < mNumDevices; i++)
+			memset(mDeviceData[i].buffers, 0, BS_MAX_COMMAND_BUFFERS * sizeof(VulkanCmdBuffer*));
+	}
 
 	VulkanCommandBufferManager::~VulkanCommandBufferManager()
 	{
-		
+		bs_deleteN(mDeviceData, mNumDevices);
 	}
 
-	SPtr<CommandBuffer> VulkanCommandBufferManager::createInternal(UINT32 id, GpuQueueType type, UINT32 deviceIdx,
+	SPtr<CommandBuffer> VulkanCommandBufferManager::createInternal(GpuQueueType type, UINT32 deviceIdx,
 		UINT32 queueIdx, bool secondary)
 	{
 		UINT32 numDevices = mRapi._getNumDevices();
@@ -30,34 +35,38 @@ namespace BansheeEngine
 		SPtr<VulkanDevice> device = mRapi._getDevice(deviceIdx);
 
 		CommandBuffer* buffer = 
-			new (bs_alloc<VulkanCommandBuffer>()) VulkanCommandBuffer(*device, id, type, deviceIdx, queueIdx, secondary);
+			new (bs_alloc<VulkanCommandBuffer>()) VulkanCommandBuffer(*device, type, deviceIdx, queueIdx, secondary);
 
 		return bs_shared_ptr(buffer);
 	}
 
+	void VulkanCommandBufferManager::setActiveBuffer(GpuQueueType type, UINT32 deviceIdx, UINT32 queueIdx, 
+		VulkanCmdBuffer* buffer)
+	{
+		assert(deviceIdx < mNumDevices);
+		assert(buffer->isSubmitted());
+
+		UINT32 idx = CommandSyncMask::getGlobalQueueIdx(type, queueIdx);
+		mDeviceData[deviceIdx].buffers[idx] = buffer;
+	}
+
 	void VulkanCommandBufferManager::getSyncSemaphores(UINT32 deviceIdx, UINT32 syncMask, 
 		VkSemaphore(&semaphores)[BS_MAX_COMMAND_BUFFERS], UINT32& count)
 	{
-		assert(deviceIdx < BS_MAX_DEVICES);
+		assert(deviceIdx < mNumDevices);
+		const PerDeviceData& deviceData = mDeviceData[deviceIdx];
 
 		UINT32 semaphoreIdx = 0;
 		for (UINT32 i = 0; i < BS_MAX_COMMAND_BUFFERS; i++)
 		{
-			if (mActiveCommandBuffers[deviceIdx][i] == nullptr) // Command buffer doesn't exist
-				continue;
-
-			VulkanCommandBuffer* cmdBuffer = static_cast<VulkanCommandBuffer*>(mActiveCommandBuffers[deviceIdx][i]);
-			UINT32 globalQueueIdx = CommandSyncMask::getGlobalQueueIdx(cmdBuffer->getType(), cmdBuffer->getQueueIdx());
-
-			if ((syncMask & (1 << globalQueueIdx)) == 0) // We don't care about the command buffer
+			if (deviceData.buffers[i] == nullptr)
 				continue;
-			
-			VulkanCmdBuffer* lowLevelCmdBuffer = cmdBuffer->mSubmittedBuffer;
 
-			if (lowLevelCmdBuffer == nullptr || !lowLevelCmdBuffer->isSubmitted()) // If not submitted, no need to sync with it
+			if ((syncMask & (1 << i)) == 0) // We don't care about the command buffer
 				continue;
 
-			semaphores[semaphoreIdx++] = lowLevelCmdBuffer->getSemaphore();
+			assert(deviceData.buffers[i]->isSubmitted()); // It shouldn't be here if it wasn't submitted
+			semaphores[semaphoreIdx++] = deviceData.buffers[i]->getSemaphore();
 		}
 
 		count = semaphoreIdx;
@@ -65,16 +74,19 @@ namespace BansheeEngine
 
 	void VulkanCommandBufferManager::refreshStates(UINT32 deviceIdx)
 	{
-		assert(deviceIdx < BS_MAX_DEVICES);
+		assert(deviceIdx < mNumDevices);
+		PerDeviceData& deviceData = mDeviceData[deviceIdx];
 
 		UINT32 semaphoreIdx = 0;
 		for (UINT32 i = 0; i < BS_MAX_COMMAND_BUFFERS; i++)
 		{
-			if (mActiveCommandBuffers[deviceIdx][i] == nullptr) // Command buffer doesn't exist
+			if (deviceData.buffers[i] == nullptr)
 				continue;
 
-			VulkanCommandBuffer* cmdBuffer = static_cast<VulkanCommandBuffer*>(mActiveCommandBuffers[deviceIdx][i]);
-			cmdBuffer->refreshSubmitStatus();
+			VulkanCmdBuffer* cmdBuffer = deviceData.buffers[i];
+			cmdBuffer->refreshFenceStatus();
+			if (!cmdBuffer->isSubmitted())
+				deviceData.buffers[i] = nullptr;
 		}
 	}
 }

+ 1 - 1
Source/BansheeVulkanRenderAPI/Source/BsVulkanDevice.cpp

@@ -104,7 +104,7 @@ namespace BansheeEngine
 				VkQueue queue;
 				vkGetDeviceQueue(mLogicalDevice, mQueueInfos[i].familyIdx, j, &queue);
 
-				mQueueInfos[i].queues[j] = bs_new<VulkanQueue>(queue);
+				mQueueInfos[i].queues[j] = bs_new<VulkanQueue>(*this, queue, (GpuQueueType)i, j);
 			}
 		}
 

+ 31 - 5
Source/BansheeVulkanRenderAPI/Source/BsVulkanQueue.cpp

@@ -5,13 +5,39 @@
 
 namespace BansheeEngine
 {
-	VulkanQueue::VulkanQueue(VkQueue queue)
-		:mQueue(queue), mFenceCounter(0), mLastCommandBufferId(-1)
+	VulkanQueue::VulkanQueue(VulkanDevice& device, VkQueue queue, GpuQueueType type, UINT32 index)
+		:mDevice(device), mQueue(queue), mType(type), mIndex(index), mLastCommandBuffer(nullptr)
 	{ }
 
-	void VulkanQueue::notifySubmit(const VulkanCommandBuffer& cmdBuffer, UINT32 fenceCounter)
+	bool VulkanQueue::isExecuting() const
 	{
-		mLastCommandBufferId = cmdBuffer._getId();
-		mFenceCounter = fenceCounter;
+		if (mLastCommandBuffer == nullptr)
+			return false;
+
+		return mLastCommandBuffer->isSubmitted();
+	}
+
+	void VulkanQueue::submit(VulkanCmdBuffer* cmdBuffer, VkSemaphore* waitSemaphores, UINT32 semaphoresCount)
+	{
+		VkCommandBuffer vkCmdBuffer = cmdBuffer->getHandle();
+		VkSemaphore semaphore = cmdBuffer->getSemaphore();
+
+		VkSubmitInfo submitInfo;
+		submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
+		submitInfo.pNext = nullptr;
+		submitInfo.pWaitDstStageMask = 0;
+		submitInfo.commandBufferCount = 1;
+		submitInfo.pCommandBuffers = &vkCmdBuffer;
+		submitInfo.signalSemaphoreCount = 1;
+		submitInfo.pSignalSemaphores = &semaphore;
+
+		if (semaphoresCount > 0)
+			submitInfo.pWaitSemaphores = waitSemaphores;
+		else
+			submitInfo.pWaitSemaphores = nullptr;
+
+		vkQueueSubmit(mQueue, 1, &submitInfo, cmdBuffer->getFence());
+
+		mLastCommandBuffer = cmdBuffer;
 	}
 }

+ 3 - 1
Source/BansheeVulkanRenderAPI/Source/BsVulkanRenderAPI.cpp

@@ -422,7 +422,9 @@ namespace BansheeEngine
 
 		// TODO - Actually swap buffers
 
-		cb->refreshSubmitStatus();
+		// See if any command buffers finished executing
+		VulkanCommandBufferManager& cbm = static_cast<VulkanCommandBufferManager&>(CommandBufferManager::instance());
+		cbm.refreshStates(cb->getDeviceIdx());
 
 		BS_INC_RENDER_STAT(NumPresents);
 	}