Quellcode durchsuchen

Refactored Vulkan semaphores, so that a command buffers generates a new semaphore on each submission

BearishSun vor 9 Jahren
Ursprung
Commit
b58a3fd3ad

+ 1 - 1
Source/BansheeCore/Include/BsRenderAPI.h

@@ -543,7 +543,7 @@ namespace bs
 		 *
 		 * @note	Core thread only.
 		 */
-		virtual void executeCommands(const SPtr<CommandBuffer>& commandBuffer, UINT32 syncMask = 0xFFFFFFFF) = 0;
+		virtual void submitCommandBuffer(const SPtr<CommandBuffer>& commandBuffer, UINT32 syncMask = 0xFFFFFFFF) = 0;
 
 		/**
 		 * Gets the capabilities of a specific GPU.

+ 1 - 1
Source/BansheeD3D11RenderAPI/Include/BsD3D11RenderAPI.h

@@ -99,7 +99,7 @@ namespace bs
 		void addCommands(const SPtr<CommandBuffer>& commandBuffer, const SPtr<CommandBuffer>& secondary) override;
 
 		/** @copydoc RenderAPICore::executeCommands() */
-		void executeCommands(const SPtr<CommandBuffer>& commandBuffer, UINT32 syncMask = 0xFFFFFFFF) override;
+		void submitCommandBuffer(const SPtr<CommandBuffer>& commandBuffer, UINT32 syncMask = 0xFFFFFFFF) override;
 
 		/** @copydoc RenderAPICore::convertProjectionMatrix */
 		void convertProjectionMatrix(const Matrix4& matrix, Matrix4& dest) override;

+ 1 - 1
Source/BansheeD3D11RenderAPI/Source/BsD3D11RenderAPI.cpp

@@ -1086,7 +1086,7 @@ namespace bs
 		cb->appendSecondary(secondaryCb);
 	}
 
-	void D3D11RenderAPI::executeCommands(const SPtr<CommandBuffer>& commandBuffer, UINT32 syncMask)
+	void D3D11RenderAPI::submitCommandBuffer(const SPtr<CommandBuffer>& commandBuffer, UINT32 syncMask)
 	{
 		SPtr<D3D11CommandBuffer> cb = std::static_pointer_cast<D3D11CommandBuffer>(commandBuffer);
 		cb->executeCommands();

+ 1 - 1
Source/BansheeGLRenderAPI/Include/BsGLRenderAPI.h

@@ -101,7 +101,7 @@ namespace bs
 		void addCommands(const SPtr<CommandBuffer>& commandBuffer, const SPtr<CommandBuffer>& secondary) override;
 
 		/** @copydoc RenderAPICore::executeCommands() */
-		void executeCommands(const SPtr<CommandBuffer>& commandBuffer, UINT32 syncMask = 0xFFFFFFFF) override;
+		void submitCommandBuffer(const SPtr<CommandBuffer>& commandBuffer, UINT32 syncMask = 0xFFFFFFFF) override;
 
 		/** @copydoc RenderAPICore::convertProjectionMatrix() */
 		void convertProjectionMatrix(const Matrix4& matrix, Matrix4& dest) override;

+ 1 - 1
Source/BansheeGLRenderAPI/Source/BsGLRenderAPI.cpp

@@ -1145,7 +1145,7 @@ namespace bs
 		cb->appendSecondary(secondaryCb);
 	}
 
-	void GLRenderAPI::executeCommands(const SPtr<CommandBuffer>& commandBuffer, UINT32 syncMask)
+	void GLRenderAPI::submitCommandBuffer(const SPtr<CommandBuffer>& commandBuffer, UINT32 syncMask)
 	{
 		SPtr<GLCommandBuffer> cb = std::static_pointer_cast<GLCommandBuffer>(commandBuffer);
 		cb->executeCommands();

+ 31 - 6
Source/BansheeVulkanRenderAPI/Include/BsVulkanCommandBuffer.h

@@ -15,10 +15,24 @@ namespace bs
 	 *  @{
 	 */
 
-	class VulkanCmdBuffer;
-
 #define BS_MAX_VULKAN_CB_PER_QUEUE_FAMILY BS_MAX_QUEUES_PER_TYPE * 32
 
+	/** Wrapper around a Vulkan semaphore object that manages its usage and lifetime. */
+	class VulkanSemaphore : public VulkanResource
+	{
+	public:
+		VulkanSemaphore(VulkanResourceManager* owner);
+		~VulkanSemaphore();
+
+		/** Returns the internal handle to the Vulkan object. */
+		VkSemaphore getHandle() const { return mSemaphore; }
+
+	private:
+		VkSemaphore mSemaphore;
+	};
+
+	class VulkanCmdBuffer;
+
 	/** Pool that allocates and distributes Vulkan command buffers. */
 	class VulkanCmdBufferPool
 	{
@@ -128,7 +142,14 @@ namespace bs
 		 * Returns a semaphore that may be used for synchronizing execution between command buffers executing on different 
 		 * queues. 
 		 */
-		VkSemaphore getSemaphore() const { return mSemaphore; }
+		VulkanSemaphore* getSemaphore() const { return mSemaphore; }
+
+		/** 
+		 * Allocates a new semaphore that may be used for synchronizing execution between command buffers executing on different 
+		 * queues. Releases the previously allocated semaphore, if one exist. Use getSemaphore() to retrieve latest
+		 * allocated semaphore.
+		 */
+		VulkanSemaphore* allocateSemaphore();
 
 		/** Returns true if the command buffer is currently being processed by the device. */
 		bool isSubmitted() const { return mState == State::Submitted; }
@@ -235,6 +256,7 @@ namespace bs
 	private:
 		friend class VulkanCmdBufferPool;
 		friend class VulkanCommandBuffer;
+		friend class VulkanQueue;
 
 		/** Contains information about a single Vulkan resource bound/used on this command buffer. */
 		struct ResourceUseHandle
@@ -269,6 +291,9 @@ namespace bs
 		/** Checks if all the prerequisites for rendering have been made (e.g. render target and pipeline state are set. */
 		bool isReadyForRender();
 
+		/** Marks the command buffer as submitted on a queue. */
+		void setIsSubmitted() { mState = State::Submitted; }
+
 		/** Binds the current graphics pipeline to the command buffer. Returns true if bind was successful. */
 		bool bindGraphicsPipeline();
 
@@ -289,11 +314,11 @@ namespace bs
 		VkCommandPool mPool;
 		VkCommandBuffer mCmdBuffer;
 		VkFence mFence;
-		VkSemaphore mSemaphore;
+		VulkanSemaphore* mSemaphore;
 		UINT32 mFenceCounter;
 
 		VulkanFramebuffer* mFramebuffer;
-		VkSemaphore mPresentSemaphore;
+		VulkanSemaphore* mPresentSemaphore;
 		UINT32 mRenderTargetWidth;
 		UINT32 mRenderTargetHeight;
 		bool mRenderTargetDepthReadOnly;
@@ -320,7 +345,7 @@ namespace bs
 		bool mScissorRequiresBind : 1;
 		DescriptorSetBindFlags mDescriptorSetsBindState;
 
-		VkSemaphore mSemaphoresTemp[BS_MAX_UNIQUE_QUEUES + 1]; // +1 for present semaphore
+		VulkanSemaphore* mSemaphoresTemp[BS_MAX_UNIQUE_QUEUES + 1]; // +1 for present semaphore
 		VkBuffer mVertexBuffersTemp[BS_MAX_BOUND_VERTEX_BUFFERS];
 		VkDeviceSize mVertexBufferOffsetsTemp[BS_MAX_BOUND_VERTEX_BUFFERS];
 		VkDescriptorSet* mDescriptorSetsTemp;

+ 1 - 6
Source/BansheeVulkanRenderAPI/Include/BsVulkanCommandBufferManager.h

@@ -85,9 +85,6 @@ namespace bs
 		SPtr<CommandBuffer> createInternal(GpuQueueType type, UINT32 deviceIdx = 0, UINT32 queueIdx = 0,
 			bool secondary = false) override;
 
-		/** Notifies the manager that this buffer was just submitted to the queue for execution. */
-		void setActiveBuffer(GpuQueueType type, UINT32 deviceIdx, UINT32 queueIdx, VulkanCmdBuffer* buffer);
-
 		/** 
 		 * Returns a set of command buffer semaphores depending on the provided sync mask. 
 		 *
@@ -98,7 +95,7 @@ namespace bs
 		 *							beginning of the array. Must be able to hold at least BS_MAX_COMMAND_BUFFERS entries.
 		 * @param[out]	count		Number of semaphores provided in the @p semaphores array.
 		 */
-		void getSyncSemaphores(UINT32 deviceIdx, UINT32 syncMask, VkSemaphore* semaphores, UINT32& count);
+		void getSyncSemaphores(UINT32 deviceIdx, UINT32 syncMask, VulkanSemaphore** semaphores, UINT32& count);
 
 		/** 
 		 * Checks if any of the active command buffers finished executing on the device and updates their states 
@@ -120,8 +117,6 @@ namespace bs
 		/** Contains command buffers specific to one device. */
 		struct PerDeviceData
 		{
-			List<VulkanCmdBuffer*> activeBuffers;
-			VulkanCmdBuffer* lastActiveBuffer[BS_MAX_UNIQUE_QUEUES];
 			VulkanTransferBuffer transferBuffers[GQT_COUNT][BS_MAX_QUEUES_PER_TYPE];
 		};
 

+ 1 - 0
Source/BansheeVulkanRenderAPI/Include/BsVulkanPrerequisites.h

@@ -74,6 +74,7 @@ namespace bs
 	class VulkanQuery;
 	class VulkanQueryPool;
 	class VulkanVertexInput;
+	class VulkanSemaphore;
 
 	extern VkAllocationCallbacks* gVulkanAllocator;
 

+ 36 - 1
Source/BansheeVulkanRenderAPI/Include/BsVulkanQueue.h

@@ -36,19 +36,54 @@ namespace bs
 		bool isExecuting() const;
 
 		/** Submits the provided command buffer on the queue. */
-		void submit(VulkanCmdBuffer* cmdBuffer, VkSemaphore* waitSemaphores, UINT32 semaphoresCount);
+		void submit(VulkanCmdBuffer* cmdBuffer, VulkanSemaphore** waitSemaphores, UINT32 semaphoresCount);
+
+		/** 
+		 * Presents the back buffer of the provided swap chain. 
+		 *
+		 * @param[in]	swapChain			Swap chain whose back buffer to present.
+		 * @param[in]	waitSemaphores		Optional semaphores to wait on before presenting the queue.
+		 * @param[in]	semaphoresCount		Number of semaphores in the @p semaphores array.
+		 */
+		void present(VulkanSwapChain* swapChain, VulkanSemaphore** waitSemaphores, UINT32 semaphoresCount);
 
 		/** Blocks the calling thread until all operations on the queue finish. */
 		void waitIdle() const;
 
+		/** 
+		 * Checks if any of the active command buffers finished executing on the queue and updates their states 
+		 * accordingly. 
+		 */
+		void refreshStates();
+
+		/** Returns the last command buffer that was submitted on this queue. */
+		VulkanCmdBuffer* getLastCommandBuffer() const { return mLastCommandBuffer; }
+
 	protected:
+		/** Information about a single submitted command buffer. */
+		struct SubmitInfo
+		{
+			SubmitInfo(VulkanCmdBuffer* cmdBuffer, UINT32 submitIdx, VulkanSemaphore** semaphores, UINT32 numSemaphores)
+				:cmdBuffer(cmdBuffer), submitIdx(submitIdx), semaphores(semaphores), numSemaphores(numSemaphores)
+			{ }
+
+			VulkanCmdBuffer* cmdBuffer;
+			UINT32 submitIdx;
+			VulkanSemaphore** semaphores;
+			UINT32 numSemaphores;
+		};
+
 		VulkanDevice& mDevice;
 		VkQueue mQueue;
 		GpuQueueType mType;
 		UINT32 mIndex;
 		VkPipelineStageFlags mSubmitDstWaitMask[BS_MAX_UNIQUE_QUEUES];
 
+		List<SubmitInfo> mActiveBuffers;
 		VulkanCmdBuffer* mLastCommandBuffer;
+		UINT32 mNextSubmitIdx;
+
+		VkSemaphore mSemaphoresTemp[BS_MAX_UNIQUE_QUEUES + 1]; // +1 for present semaphore
 	};
 
 	/** @} */

+ 1 - 1
Source/BansheeVulkanRenderAPI/Include/BsVulkanRenderAPI.h

@@ -99,7 +99,7 @@ namespace bs
 		void addCommands(const SPtr<CommandBuffer>& commandBuffer, const SPtr<CommandBuffer>& secondary) override;
 
 		/** @copydoc RenderAPICore::executeCommands() */
-		void executeCommands(const SPtr<CommandBuffer>& commandBuffer, UINT32 syncMask = 0xFFFFFFFF) override;
+		void submitCommandBuffer(const SPtr<CommandBuffer>& commandBuffer, UINT32 syncMask = 0xFFFFFFFF) override;
 
 		/** @copydoc RenderAPICore::convertProjectionMatrix */
 		void convertProjectionMatrix(const Matrix4& matrix, Matrix4& dest) override;

+ 10 - 10
Source/BansheeVulkanRenderAPI/Include/BsVulkanSwapChain.h

@@ -16,7 +16,7 @@ namespace bs
 	{
 		VulkanImage* image;
 		VkImageView view;
-		VkSemaphore sync;
+		VulkanSemaphore* sync;
 		bool acquired;
 
 		VulkanFramebuffer* framebuffer;
@@ -48,15 +48,6 @@ namespace bs
 		 */
 		UINT32 getHeight() const { return mHeight; }
 
-		/** 
-		 * Presents the back buffer to the output device, swapping the buffers. 
-		 *
-		 * @param[in]	queue			Queue on which to queue the present operation. Must support present operations.
-		 * @param[in]	semaphores		Optional semaphores to wait on before presenting the queue.
-		 * @param[in]	numSemaphores	Number of semaphores in the @p semaphores array.
-		 */
-		void present(VkQueue queue, VkSemaphore* semaphores, UINT32 numSemaphores);
-
 		/**
 		 * Acquires a new back buffer image. Caller can retrieve the surface by calling getBackBuffer(). Caller must wait
 		 * on the semaphore provided by the surface before rendering to it.
@@ -65,6 +56,12 @@ namespace bs
 		 */
 		void acquireBackBuffer();
 
+		/** 
+		 * Prepares the swap chain for the present operation. Returns the index of the image representing the current
+		 * back buffer.
+		 */
+		UINT32 prepareForPresent();
+
 		/** Returns information describing the current back buffer. */
 		const SwapChainSurface& getBackBuffer() { return mSurfaces[mCurrentBackBufferIdx]; }
 
@@ -76,6 +73,9 @@ namespace bs
 
 		/** Returns an image view representing the depth-stencil buffer, if any. */
 		VkImageView getDepthStencilView() const { return mDepthStencilView; }
+
+		/** Returns the internal swap chain handle. */
+		VkSwapchainKHR getHandle() const { return mSwapChain; }
 	private:
 		/** Destroys current swap chain and depth stencil image (if any). */
 		void clear(VkSwapchainKHR swapChain);

+ 1 - 1
Source/BansheeVulkanRenderAPI/Include/Win32/BsWin32RenderWindow.h

@@ -116,7 +116,7 @@ namespace bs
 		VkFormat mDepthFormat;
 		UINT32 mPresentQueueFamily;
 		SPtr<VulkanSwapChain> mSwapChain;
-		VkSemaphore mSemaphoresTemp[BS_MAX_UNIQUE_QUEUES];
+		VulkanSemaphore* mSemaphoresTemp[BS_MAX_UNIQUE_QUEUES];
 		bool mRequiresNewBackBuffer;
 
 		Win32RenderWindowProperties mProperties;

+ 35 - 22
Source/BansheeVulkanRenderAPI/Source/BsVulkanCommandBuffer.cpp

@@ -21,6 +21,23 @@ static_assert(false, "Other platforms go here");
 
 namespace bs
 {
+	VulkanSemaphore::VulkanSemaphore(VulkanResourceManager* owner)
+		:VulkanResource(owner, true)
+	{
+		VkSemaphoreCreateInfo semaphoreCI;
+		semaphoreCI.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO;
+		semaphoreCI.pNext = nullptr;
+		semaphoreCI.flags = 0;
+
+		VkResult result = vkCreateSemaphore(owner->getDevice().getLogical(), &semaphoreCI, gVulkanAllocator, &mSemaphore);
+		assert(result == VK_SUCCESS);
+	}
+
+	VulkanSemaphore::~VulkanSemaphore()
+	{
+		vkDestroySemaphore(mOwner->getDevice().getLogical(), mSemaphore, gVulkanAllocator);
+	}
+
 	VulkanCmdBufferPool::VulkanCmdBufferPool(VulkanDevice& device)
 		:mDevice(device), mNextId(1)
 	{
@@ -108,9 +125,9 @@ namespace bs
 	}
 
 	VulkanCmdBuffer::VulkanCmdBuffer(VulkanDevice& device, UINT32 id, VkCommandPool pool, UINT32 queueFamily, bool secondary)
-		: mId(id), mQueueFamily(queueFamily), mState(State::Ready), mDevice(device), mPool(pool), mFenceCounter(0)
-		, mFramebuffer(nullptr), mPresentSemaphore(VK_NULL_HANDLE), mRenderTargetWidth(0), mRenderTargetHeight(0)
-		, mRenderTargetDepthReadOnly(false), mRenderTargetLoadMask(RT_NONE), mGlobalQueueIdx(-1)
+		: mId(id), mQueueFamily(queueFamily), mState(State::Ready), mDevice(device), mPool(pool), mSemaphore(nullptr)
+		, mFenceCounter(0), mFramebuffer(nullptr), mPresentSemaphore(nullptr), mRenderTargetWidth(0)
+		, mRenderTargetHeight(0), mRenderTargetDepthReadOnly(false), mRenderTargetLoadMask(RT_NONE), mGlobalQueueIdx(-1)
 		, mViewport(0.0f, 0.0f, 1.0f, 1.0f), mScissor(0, 0, 0, 0), mStencilRef(0), mDrawOp(DOT_TRIANGLE_LIST)
 		, mNumBoundDescriptorSets(0), mGfxPipelineRequiresBind(true), mCmpPipelineRequiresBind(true)
 		, mViewportRequiresBind(true), mStencilRefRequiresBind(true), mScissorRequiresBind(true), mVertexBuffersTemp()
@@ -136,14 +153,6 @@ namespace bs
 
 		result = vkCreateFence(mDevice.getLogical(), &fenceCI, gVulkanAllocator, &mFence);
 		assert(result == VK_SUCCESS);
-
-		VkSemaphoreCreateInfo semaphoreCI;
-		semaphoreCI.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO;
-		semaphoreCI.pNext = nullptr;
-		semaphoreCI.flags = 0;
-
-		result = vkCreateSemaphore(mDevice.getLogical(), &semaphoreCI, gVulkanAllocator, &mSemaphore);
-		assert(result == VK_SUCCESS);
 	}
 
 	VulkanCmdBuffer::~VulkanCmdBuffer()
@@ -193,9 +202,11 @@ namespace bs
 				entry.first->notifyUnbound();
 			}
 		}
+
+		if (mSemaphore != nullptr)
+			mSemaphore->destroy();
 		
 		vkDestroyFence(device, mFence, gVulkanAllocator);
-		vkDestroySemaphore(device, mSemaphore, gVulkanAllocator);
 		vkFreeCommandBuffers(device, mPool, 1, &mCmdBuffer);
 
 		bs_free(mDescriptorSetsTemp);
@@ -336,6 +347,15 @@ namespace bs
 		mState = State::Recording;
 	}
 
+	VulkanSemaphore* VulkanCmdBuffer::allocateSemaphore()
+	{
+		if (mSemaphore != nullptr)
+			mSemaphore->destroy();
+
+		mSemaphore = mDevice.getResourceManager().create<VulkanSemaphore>();
+		return mSemaphore;
+	}
+
 	void VulkanCmdBuffer::submit(VulkanQueue* queue, UINT32 queueIdx, UINT32 syncMask)
 	{
 		assert(isReadyForSubmit());
@@ -474,7 +494,7 @@ namespace bs
 		cbm.getSyncSemaphores(deviceIdx, syncMask, mSemaphoresTemp, numSemaphores);
 
 		// Wait on present (i.e. until the back buffer becomes available), if we're rendering to a window
-		if (mPresentSemaphore != VK_NULL_HANDLE)
+		if (mPresentSemaphore != nullptr)
 		{
 			mSemaphoresTemp[numSemaphores] = mPresentSemaphore;
 			numSemaphores++;
@@ -502,12 +522,8 @@ namespace bs
 								 numImgBarriers, barriers.imageBarriers.data());
 
 			cmdBuffer->end();
-
 			queue->submit(cmdBuffer, mSemaphoresTemp, numSemaphores);
 
-			cmdBuffer->mState = State::Submitted;
-			cbm.setActiveBuffer(queue->getType(), deviceIdx, queueIdx, cmdBuffer);
-
 			numSemaphores = 0; // Semaphores are only needed the first time, since we're adding the buffers on the same queue
 		}
 
@@ -547,9 +563,6 @@ namespace bs
 		// Note: Uncommented for debugging only, prevents any device concurrency issues.
 		// vkQueueWaitIdle(queue->getHandle());
 
-		mState = State::Submitted;
-		cbm.setActiveBuffer(queue->getType(), deviceIdx, queueIdx, this);
-
 		// Clear vectors but don't clear the actual map, as we want to re-use the memory since we expect queue family
 		// indices to be the same
 		for (auto& entry : mTransitionInfoTemp)
@@ -633,7 +646,7 @@ namespace bs
 		if(rt == nullptr)
 		{
 			mFramebuffer = nullptr;
-			mPresentSemaphore = VK_NULL_HANDLE;
+			mPresentSemaphore = nullptr;
 			mRenderTargetWidth = 0;
 			mRenderTargetHeight = 0;
 			mRenderTargetDepthReadOnly = false;
@@ -652,7 +665,7 @@ namespace bs
 			else
 			{
 				rt->getCustomAttribute("FB", &mFramebuffer);
-				mPresentSemaphore = VK_NULL_HANDLE;
+				mPresentSemaphore = nullptr;
 			}
 
 			mRenderTargetWidth = rt->getProperties().getWidth();

+ 28 - 44
Source/BansheeVulkanRenderAPI/Source/BsVulkanCommandBufferManager.cpp

@@ -111,8 +111,6 @@ namespace bs
 		{
 			SPtr<VulkanDevice> device = rapi._getDevice(i);
 
-			bs_zero_out(mDeviceData[i].lastActiveBuffer);
-
 			for (UINT32 j = 0; j < GQT_COUNT; j++)
 			{
 				GpuQueueType queueType = (GpuQueueType)j;
@@ -148,34 +146,33 @@ namespace bs
 		return bs_shared_ptr(buffer);
 	}
 
-	void VulkanCommandBufferManager::setActiveBuffer(GpuQueueType type, UINT32 deviceIdx, UINT32 queueIdx, 
-		VulkanCmdBuffer* buffer)
-	{
-		assert(deviceIdx < mNumDevices);
-		assert(buffer->isSubmitted());
-
-		UINT32 idx = CommandSyncMask::getGlobalQueueIdx(type, queueIdx);
-		mDeviceData[deviceIdx].lastActiveBuffer[idx] = buffer;
-		mDeviceData[deviceIdx].activeBuffers.push_back(buffer);
-	}
-
-	void VulkanCommandBufferManager::getSyncSemaphores(UINT32 deviceIdx, UINT32 syncMask, VkSemaphore* semaphores, 
+	void VulkanCommandBufferManager::getSyncSemaphores(UINT32 deviceIdx, UINT32 syncMask, VulkanSemaphore** semaphores,
 		UINT32& count)
 	{
-		assert(deviceIdx < mNumDevices);
-		const PerDeviceData& deviceData = mDeviceData[deviceIdx];
+		SPtr<VulkanDevice> device = mRapi._getDevice(deviceIdx);
 
 		UINT32 semaphoreIdx = 0;
-		for (UINT32 i = 0; i < BS_MAX_UNIQUE_QUEUES; i++)
+		for(UINT32 i = 0; i < GQT_COUNT; i++)
 		{
-			if (deviceData.lastActiveBuffer[i] == nullptr)
-				continue;
+			GpuQueueType queueType = (GpuQueueType)i;
+
+			UINT32 numQueues = device->getNumQueues(queueType);
+			for(UINT32 j = 0; j < numQueues; j++)
+			{
+				VulkanQueue* queue = device->getQueue(queueType, j);
+				VulkanCmdBuffer* lastCB = queue->getLastCommandBuffer();
+
+				// Check if a buffer is currently executing on the queue
+				if (lastCB == nullptr || !lastCB->isSubmitted())
+					continue;
 
-			if ((syncMask & (1 << i)) == 0) // We don't care about the command buffer
-				continue;
+				// Check if we care about this specific queue
+				UINT32 queueMask = device->getQueueMask(queueType, j);
+				if ((syncMask & queueMask) == 0)
+					continue;
 
-			assert(deviceData.lastActiveBuffer[i]->isSubmitted()); // It shouldn't be here if it wasn't submitted
-			semaphores[semaphoreIdx++] = deviceData.lastActiveBuffer[i]->getSemaphore();
+				semaphores[semaphoreIdx++] = lastCB->getSemaphore();
+			}
 		}
 
 		count = semaphoreIdx;
@@ -183,29 +180,16 @@ namespace bs
 
 	void VulkanCommandBufferManager::refreshStates(UINT32 deviceIdx)
 	{
-		assert(deviceIdx < mNumDevices);
-		PerDeviceData& deviceData = mDeviceData[deviceIdx];
-
-		auto iter = deviceData.activeBuffers.begin();
-		while(iter != deviceData.activeBuffers.end())
-		{
-			VulkanCmdBuffer* cmdBuffer = *iter;
-
-			cmdBuffer->refreshFenceStatus();
-			if (!cmdBuffer->isSubmitted())
-				iter = deviceData.activeBuffers.erase(iter);
-			else
-				++iter;
-		}
+		SPtr<VulkanDevice> device = mRapi._getDevice(deviceIdx);
 
-		for (UINT32 i = 0; i < BS_MAX_UNIQUE_QUEUES; i++)
+		for (UINT32 i = 0; i < GQT_COUNT; i++)
 		{
-			if (deviceData.lastActiveBuffer[i] == nullptr)
-				continue;
-
-			VulkanCmdBuffer* cmdBuffer = deviceData.lastActiveBuffer[i];
-			if (!cmdBuffer->isSubmitted())
-				deviceData.lastActiveBuffer[i] = nullptr;
+			UINT32 numQueues = device->getNumQueues((GpuQueueType)i);
+			for (UINT32 j = 0; j < numQueues; j++)
+			{
+				VulkanQueue* queue = device->getQueue((GpuQueueType)i, j);
+				queue->refreshStates();
+			}
 		}
 	}
 

+ 92 - 5
Source/BansheeVulkanRenderAPI/Source/BsVulkanQueue.cpp

@@ -2,11 +2,12 @@
 //**************** Copyright (c) 2016 Marko Pintera ([email protected]). All rights reserved. **********************//
 #include "BsVulkanQueue.h"
 #include "BsVulkanCommandBuffer.h"
+#include "BsVulkanSwapChain.h"
 
 namespace bs
 {
 	VulkanQueue::VulkanQueue(VulkanDevice& device, VkQueue queue, GpuQueueType type, UINT32 index)
-		:mDevice(device), mQueue(queue), mType(type), mIndex(index), mLastCommandBuffer(nullptr)
+		:mDevice(device), mQueue(queue), mType(type), mIndex(index), mLastCommandBuffer(nullptr), mNextSubmitIdx(1)
 	{
 		for (UINT32 i = 0; i < BS_MAX_UNIQUE_QUEUES; i++)
 			mSubmitDstWaitMask[i] = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
@@ -20,10 +21,12 @@ namespace bs
 		return mLastCommandBuffer->isSubmitted();
 	}
 
-	void VulkanQueue::submit(VulkanCmdBuffer* cmdBuffer, VkSemaphore* waitSemaphores, UINT32 semaphoresCount)
+	void VulkanQueue::submit(VulkanCmdBuffer* cmdBuffer, VulkanSemaphore** waitSemaphores, UINT32 semaphoresCount)
 	{
 		VkCommandBuffer vkCmdBuffer = cmdBuffer->getHandle();
-		VkSemaphore semaphore = cmdBuffer->getSemaphore();
+		VulkanSemaphore* semaphore = cmdBuffer->allocateSemaphore();
+
+		VkSemaphore vkSemaphore = semaphore->getHandle();
 
 		VkSubmitInfo submitInfo;
 		submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
@@ -31,12 +34,15 @@ namespace bs
 		submitInfo.commandBufferCount = 1;
 		submitInfo.pCommandBuffers = &vkCmdBuffer;
 		submitInfo.signalSemaphoreCount = 1;
-		submitInfo.pSignalSemaphores = &semaphore;
+		submitInfo.pSignalSemaphores = &vkSemaphore;
 		submitInfo.waitSemaphoreCount = semaphoresCount;
 
 		if (semaphoresCount > 0)
 		{
-			submitInfo.pWaitSemaphores = waitSemaphores;
+			for (UINT32 i = 0; i < semaphoresCount; i++)
+				mSemaphoresTemp[i] = waitSemaphores[i]->getHandle();
+
+			submitInfo.pWaitSemaphores = mSemaphoresTemp;
 			submitInfo.pWaitDstStageMask = mSubmitDstWaitMask;
 		}
 		else
@@ -48,7 +54,56 @@ namespace bs
 		VkResult result = vkQueueSubmit(mQueue, 1, &submitInfo, cmdBuffer->getFence());
 		assert(result == VK_SUCCESS);
 
+		cmdBuffer->setIsSubmitted();
 		mLastCommandBuffer = cmdBuffer;
+
+		for (UINT32 i = 0; i < semaphoresCount; i++)
+		{
+			waitSemaphores[i]->notifyBound();
+			waitSemaphores[i]->notifyUsed(0, 0, VulkanUseFlag::Read | VulkanUseFlag::Write);
+		}
+
+		mActiveBuffers.push_back(SubmitInfo(cmdBuffer, mNextSubmitIdx++, waitSemaphores, semaphoresCount));
+	}
+
+	void VulkanQueue::present(VulkanSwapChain* swapChain, VulkanSemaphore** waitSemaphores, UINT32 semaphoresCount)
+	{
+		UINT32 backBufferIdx = swapChain->prepareForPresent();
+		VkSwapchainKHR vkSwapChain = swapChain->getHandle();
+
+		VkPresentInfoKHR presentInfo;
+		presentInfo.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR;
+		presentInfo.pNext = nullptr;
+		presentInfo.swapchainCount = 1;
+		presentInfo.pSwapchains = &vkSwapChain;
+		presentInfo.pImageIndices = &backBufferIdx;
+		presentInfo.pResults = nullptr;
+
+		// Wait before presenting, if required
+		if (semaphoresCount > 0)
+		{
+			for (UINT32 i = 0; i < semaphoresCount; i++)
+				mSemaphoresTemp[i] = waitSemaphores[i]->getHandle();
+
+			presentInfo.pWaitSemaphores = mSemaphoresTemp;
+			presentInfo.waitSemaphoreCount = semaphoresCount;
+		}
+		else
+		{
+			presentInfo.pWaitSemaphores = nullptr;
+			presentInfo.waitSemaphoreCount = 0;
+		}
+
+		VkResult result = vkQueuePresentKHR(mQueue, &presentInfo);
+		assert(result == VK_SUCCESS || result == VK_SUBOPTIMAL_KHR);
+
+		for (UINT32 i = 0; i < semaphoresCount; i++)
+		{
+			waitSemaphores[i]->notifyBound();
+			waitSemaphores[i]->notifyUsed(0, 0, VulkanUseFlag::Read | VulkanUseFlag::Write);
+		}
+
+		mActiveBuffers.push_back(SubmitInfo(nullptr, mNextSubmitIdx++, waitSemaphores, semaphoresCount));
 	}
 
 	void VulkanQueue::waitIdle() const
@@ -56,4 +111,36 @@ namespace bs
 		VkResult result = vkQueueWaitIdle(mQueue);
 		assert(result == VK_SUCCESS);
 	}
+
+	void VulkanQueue::refreshStates()
+	{
+		UINT32 lastFinishedSubmission = 0;
+
+		auto iter = mActiveBuffers.begin();
+		while (iter != mActiveBuffers.end())
+		{
+			VulkanCmdBuffer* cmdBuffer = iter->cmdBuffer;
+			if (cmdBuffer == nullptr)
+				continue;
+
+			cmdBuffer->refreshFenceStatus();
+			if (cmdBuffer->isSubmitted())
+				break; // No chance of any later CBs of being done either
+				
+			lastFinishedSubmission = iter->submitIdx;
+			++iter;
+		}
+
+		iter = mActiveBuffers.begin();
+		while (iter != mActiveBuffers.end())
+		{
+			if (iter->submitIdx > lastFinishedSubmission)
+				break;
+
+			for(UINT32 i = 0; i < iter->numSemaphores; i++)
+				iter->semaphores[i]->notifyDone(0, VulkanUseFlag::Read | VulkanUseFlag::Write);
+
+			iter = mActiveBuffers.erase(iter);
+		}
+	}
 }

+ 2 - 2
Source/BansheeVulkanRenderAPI/Source/BsVulkanRenderAPI.cpp

@@ -507,7 +507,7 @@ namespace bs
 	{
 		THROW_IF_NOT_CORE_THREAD;
 
-		executeCommands(mMainCommandBuffer, syncMask);
+		submitCommandBuffer(mMainCommandBuffer, syncMask);
 		target->swapBuffers(syncMask);
 
 		// See if any command buffers finished executing
@@ -524,7 +524,7 @@ namespace bs
 		BS_EXCEPT(NotImplementedException, "Secondary command buffers not implemented");
 	}
 
-	void VulkanRenderAPI::executeCommands(const SPtr<CommandBuffer>& commandBuffer, UINT32 syncMask)
+	void VulkanRenderAPI::submitCommandBuffer(const SPtr<CommandBuffer>& commandBuffer, UINT32 syncMask)
 	{
 		THROW_IF_NOT_CORE_THREAD;
 

+ 13 - 39
Source/BansheeVulkanRenderAPI/Source/BsVulkanSwapChain.cpp

@@ -4,6 +4,7 @@
 #include "BsVulkanTexture.h"
 #include "BsVulkanRenderAPI.h"
 #include "BsVulkanDevice.h"
+#include "BsVulkanCommandBufferManager.h"
 
 namespace bs
 {
@@ -145,14 +146,7 @@ namespace bs
 			mSurfaces[i].acquired = false;
 			mSurfaces[i].image = resManager.create<VulkanImage>(imageDesc, false);
 			mSurfaces[i].view = mSurfaces[i].image->getView();
-
-			VkSemaphoreCreateInfo semaphoreCI;
-			semaphoreCI.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO;
-			semaphoreCI.pNext = nullptr;
-			semaphoreCI.flags = 0;
-
-			result = vkCreateSemaphore(logicalDevice, &semaphoreCI, gVulkanAllocator, &mSurfaces[i].sync);
-			assert(result == VK_SUCCESS);
+			mSurfaces[i].sync = resManager.create<VulkanSemaphore>();
 		}
 
 		bs_stack_free(images);
@@ -219,41 +213,12 @@ namespace bs
 		}
 	}
 
-	void VulkanSwapChain::present(VkQueue queue, VkSemaphore* semaphores, UINT32 numSemaphores)
-	{
-		assert(mSurfaces[mCurrentBackBufferIdx].acquired && "Attempting to present an unacquired back buffer.");
-		mSurfaces[mCurrentBackBufferIdx].acquired = false;
-
-		VkPresentInfoKHR presentInfo;
-		presentInfo.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR;
-		presentInfo.pNext = nullptr;
-		presentInfo.swapchainCount = 1;
-		presentInfo.pSwapchains = &mSwapChain;
-		presentInfo.pImageIndices = &mCurrentBackBufferIdx;
-		presentInfo.pResults = nullptr;
-
-		// Wait before presenting, if required
-		if (numSemaphores > 0)
-		{
-			presentInfo.pWaitSemaphores = semaphores;
-			presentInfo.waitSemaphoreCount = numSemaphores;
-		}
-		else
-		{
-			presentInfo.pWaitSemaphores = nullptr;
-			presentInfo.waitSemaphoreCount = 0;
-		}
-
-		VkResult result = vkQueuePresentKHR(queue, &presentInfo);
-		assert(result == VK_SUCCESS || result == VK_SUBOPTIMAL_KHR);
-	}
-
 	void VulkanSwapChain::acquireBackBuffer()
 	{
 		uint32_t imageIndex;
 
 		VkResult result = vkAcquireNextImageKHR(mDevice->getLogical(), mSwapChain, UINT64_MAX,
-			mSurfaces[mCurrentSemaphoreIdx].sync, VK_NULL_HANDLE, &imageIndex);
+			mSurfaces[mCurrentSemaphoreIdx].sync->getHandle(), VK_NULL_HANDLE, &imageIndex);
 		assert(result == VK_SUCCESS || result == VK_SUBOPTIMAL_KHR);
 
 		// In case surfaces aren't being distributed in round-robin fashion the image and semaphore indices might not match,
@@ -269,6 +234,14 @@ namespace bs
 		mCurrentBackBufferIdx = imageIndex;
 	}
 
+	UINT32 VulkanSwapChain::prepareForPresent()
+	{
+		assert(mSurfaces[mCurrentBackBufferIdx].acquired && "Attempting to present an unacquired back buffer.");
+		mSurfaces[mCurrentBackBufferIdx].acquired = false;
+
+		return mCurrentBackBufferIdx;
+	}
+
 	void VulkanSwapChain::clear(VkSwapchainKHR swapChain)
 	{
 		VkDevice logicalDevice = mDevice->getLogical();
@@ -282,7 +255,8 @@ namespace bs
 				surface.image->destroy();
 				surface.image = nullptr;
 
-				vkDestroySemaphore(logicalDevice, surface.sync, gVulkanAllocator);
+				surface.sync->destroy();
+				surface.sync = nullptr;
 			}
 
 			vkDestroySwapchainKHR(logicalDevice, swapChain, gVulkanAllocator);

+ 2 - 2
Source/BansheeVulkanRenderAPI/Source/Win32/BsWin32RenderWindow.cpp

@@ -311,7 +311,7 @@ namespace bs
 		UINT32 numSemaphores;
 		cbm.getSyncSemaphores(deviceIdx, syncMask, mSemaphoresTemp, numSemaphores);
 
-		mSwapChain->present(queue->getHandle(), mSemaphoresTemp, numSemaphores);
+		queue->present(mSwapChain.get(), mSemaphoresTemp, numSemaphores);
 		mRequiresNewBackBuffer = true;
 	}
 
@@ -527,7 +527,7 @@ namespace bs
 
 		if(name == "PS")
 		{
-			VkSemaphore* presentSemaphore = (VkSemaphore*)data;
+			VulkanSemaphore** presentSemaphore = (VulkanSemaphore**)data;
 			*presentSemaphore = mSwapChain->getBackBuffer().sync;
 			return;
 		}