Преглед изворни кода

Vulkan: Inter queue semaphores are now properly signaled on submit
Vulkan: Present semaphore is now properly waited on before rendering to the back buffer

BearishSun пре 9 година
родитељ
комит
df25d147ad

+ 6 - 2
Source/BansheeVulkanRenderAPI/Include/BsVulkanCommandBuffer.h

@@ -159,8 +159,11 @@ namespace bs { namespace ct
 		 * Allocates a new set of semaphores that may be used for synchronizing execution between different command buffers.
 		 * Allocates a new set of semaphores that may be used for synchronizing execution between different command buffers.
 		 * Releases the previously allocated semaphores, if they exist. Use getIntraQueueSemaphore() & 
 		 * Releases the previously allocated semaphores, if they exist. Use getIntraQueueSemaphore() & 
 		 * requestInterQueueSemaphore() to retrieve latest allocated semaphores.
 		 * requestInterQueueSemaphore() to retrieve latest allocated semaphores.
+		 * 
+		 * @param[out]	semaphores	Output array to place all allocated semaphores in. The array must be of size
+		 *							(BS_MAX_VULKAN_CB_DEPENDENCIES + 1).
 		 */
 		 */
-		void allocateSemaphores();
+		void allocateSemaphores(VkSemaphore* semaphores);
 
 
 		/** Returns true if the command buffer is currently being processed by the device. */
 		/** Returns true if the command buffer is currently being processed by the device. */
 		bool isSubmitted() const { return mState == State::Submitted; }
 		bool isSubmitted() const { return mState == State::Submitted; }
@@ -415,7 +418,7 @@ namespace bs { namespace ct
 		ClearMask mClearMask;
 		ClearMask mClearMask;
 		Rect2I mClearArea;
 		Rect2I mClearArea;
 
 
-		VulkanSemaphore* mSemaphoresTemp[BS_MAX_UNIQUE_QUEUES];
+		Vector<VulkanSemaphore*> mSemaphoresTemp;
 		VkBuffer mVertexBuffersTemp[BS_MAX_BOUND_VERTEX_BUFFERS];
 		VkBuffer mVertexBuffersTemp[BS_MAX_BOUND_VERTEX_BUFFERS];
 		VkDeviceSize mVertexBufferOffsetsTemp[BS_MAX_BOUND_VERTEX_BUFFERS];
 		VkDeviceSize mVertexBufferOffsetsTemp[BS_MAX_BOUND_VERTEX_BUFFERS];
 		VkDescriptorSet* mDescriptorSetsTemp;
 		VkDescriptorSet* mDescriptorSetsTemp;
@@ -424,6 +427,7 @@ namespace bs { namespace ct
 		UnorderedMap<VulkanImage*, UINT32> mQueuedLayoutTransitions;
 		UnorderedMap<VulkanImage*, UINT32> mQueuedLayoutTransitions;
 		Vector<VulkanEvent*> mQueuedEvents;
 		Vector<VulkanEvent*> mQueuedEvents;
 		Vector<VulkanQuery*> mQueuedQueryResets;
 		Vector<VulkanQuery*> mQueuedQueryResets;
+		UnorderedSet<VulkanSwapChain*> mSwapChains;
 	};
 	};
 
 
 	/** CommandBuffer implementation for Vulkan. */
 	/** CommandBuffer implementation for Vulkan. */

+ 1 - 1
Source/BansheeVulkanRenderAPI/Include/BsVulkanCommandBufferManager.h

@@ -92,7 +92,7 @@ namespace bs { namespace ct
 		 * @param[in]	syncMask	Mask that has a bit enabled for each command buffer to retrieve the semaphore for.
 		 * @param[in]	syncMask	Mask that has a bit enabled for each command buffer to retrieve the semaphore for.
 		 *							If the command buffer is not currently executing, semaphore won't be returned.
 		 *							If the command buffer is not currently executing, semaphore won't be returned.
 		 * @param[out]	semaphores	List containing all the required semaphores. Semaphores are tightly packed at the
 		 * @param[out]	semaphores	List containing all the required semaphores. Semaphores are tightly packed at the
-		 *							beginning of the array. Must be able to hold at least BS_MAX_COMMAND_BUFFERS entries.
+		 *							beginning of the array. Must be able to hold at least BS_MAX_UNIQUE_QUEUES entries.
 		 * @param[out]	count		Number of semaphores provided in the @p semaphores array.
 		 * @param[out]	count		Number of semaphores provided in the @p semaphores array.
 		 */
 		 */
 		void getSyncSemaphores(UINT32 deviceIdx, UINT32 syncMask, VulkanSemaphore** semaphores, UINT32& count);
 		void getSyncSemaphores(UINT32 deviceIdx, UINT32 syncMask, VulkanSemaphore** semaphores, UINT32& count);

+ 3 - 3
Source/BansheeVulkanRenderAPI/Include/BsVulkanQueue.h

@@ -77,8 +77,8 @@ namespace bs { namespace ct
 		 * Generates a submit-info structure that can be used for submitting the command buffer to the queue, but doesn't
 		 * Generates a submit-info structure that can be used for submitting the command buffer to the queue, but doesn't
 		 * perform the actual submit.
 		 * perform the actual submit.
 		 */
 		 */
-		void getSubmitInfo(VkCommandBuffer* cmdBuffer, VkSemaphore* signalSemaphore, VkSemaphore* waitSemaphores, 
-			UINT32 semaphoresCount, VkSubmitInfo& submitInfo);
+		void getSubmitInfo(VkCommandBuffer* cmdBuffer, VkSemaphore* signalSemaphores, UINT32 numSignalSemaphores,
+						   VkSemaphore* waitSemaphores, UINT32 numWaitSemaphores, VkSubmitInfo& submitInfo);
 
 
 		/** 
 		/** 
 		 * Prepares a list of semaphores that can be provided to submit or present calls. *
 		 * Prepares a list of semaphores that can be provided to submit or present calls. *
@@ -120,7 +120,7 @@ namespace bs { namespace ct
 		bool mLastCBSemaphoreUsed;
 		bool mLastCBSemaphoreUsed;
 		UINT32 mNextSubmitIdx;
 		UINT32 mNextSubmitIdx;
 
 
-		VkSemaphore mSemaphoresTemp[BS_MAX_UNIQUE_QUEUES + 2]; // +1 for present semaphore, +1 for self-semaphore
+		Vector<VkSemaphore> mSemaphoresTemp;
 	};
 	};
 
 
 	/** @} */
 	/** @} */

+ 4 - 0
Source/BansheeVulkanRenderAPI/Include/BsVulkanSwapChain.h

@@ -18,6 +18,7 @@ namespace bs { namespace ct
 		VkImageView view;
 		VkImageView view;
 		VulkanSemaphore* sync;
 		VulkanSemaphore* sync;
 		bool acquired;
 		bool acquired;
+		bool needsWait;
 
 
 		VulkanFramebuffer* framebuffer;
 		VulkanFramebuffer* framebuffer;
 		VULKAN_FRAMEBUFFER_DESC framebufferDesc;
 		VULKAN_FRAMEBUFFER_DESC framebufferDesc;
@@ -64,6 +65,9 @@ namespace bs { namespace ct
 		 */
 		 */
 		bool prepareForPresent(UINT32& backBufferIdx);
 		bool prepareForPresent(UINT32& backBufferIdx);
 
 
+		/** Notifies the chain that the semaphore waiting for the back buffer to become available is being waited on. */
+		void notifyBackBufferWaitIssued();
+
 		/** Returns information describing the current back buffer. */
 		/** Returns information describing the current back buffer. */
 		const SwapChainSurface& getBackBuffer() { return mSurfaces[mCurrentBackBufferIdx]; }
 		const SwapChainSurface& getBackBuffer() { return mSurfaces[mCurrentBackBufferIdx]; }
 
 

+ 61 - 26
Source/BansheeVulkanRenderAPI/Source/BsVulkanCommandBuffer.cpp

@@ -14,6 +14,7 @@
 #include "BsVulkanVertexInputManager.h"
 #include "BsVulkanVertexInputManager.h"
 #include "BsVulkanEventQuery.h"
 #include "BsVulkanEventQuery.h"
 #include "BsVulkanQueryManager.h"
 #include "BsVulkanQueryManager.h"
+#include "BsVulkanSwapChain.h"
 
 
 #if BS_PLATFORM == BS_PLATFORM_WIN32
 #if BS_PLATFORM == BS_PLATFORM_WIN32
 #include "Win32/BsWin32RenderWindow.h"
 #include "Win32/BsWin32RenderWindow.h"
@@ -134,7 +135,8 @@ namespace bs { namespace ct
 		, mViewport(0.0f, 0.0f, 1.0f, 1.0f), mScissor(0, 0, 0, 0), mStencilRef(0), mDrawOp(DOT_TRIANGLE_LIST)
 		, mViewport(0.0f, 0.0f, 1.0f, 1.0f), mScissor(0, 0, 0, 0), mStencilRef(0), mDrawOp(DOT_TRIANGLE_LIST)
 		, mNumBoundDescriptorSets(0), mGfxPipelineRequiresBind(true), mCmpPipelineRequiresBind(true)
 		, mNumBoundDescriptorSets(0), mGfxPipelineRequiresBind(true), mCmpPipelineRequiresBind(true)
 		, mViewportRequiresBind(true), mStencilRefRequiresBind(true), mScissorRequiresBind(true), mBoundParamsDirty(false)
 		, mViewportRequiresBind(true), mStencilRefRequiresBind(true), mScissorRequiresBind(true), mBoundParamsDirty(false)
-		, mClearValues(), mClearMask(), mVertexBuffersTemp(), mVertexBufferOffsetsTemp()
+		, mClearValues(), mClearMask(), mSemaphoresTemp(BS_MAX_UNIQUE_QUEUES), mVertexBuffersTemp()
+		, mVertexBufferOffsetsTemp()
 	{
 	{
 		UINT32 maxBoundDescriptorSets = device.getDeviceProperties().limits.maxBoundDescriptorSets;
 		UINT32 maxBoundDescriptorSets = device.getDeviceProperties().limits.maxBoundDescriptorSets;
 		mDescriptorSetsTemp = (VkDescriptorSet*)bs_alloc(sizeof(VkDescriptorSet) * maxBoundDescriptorSets);
 		mDescriptorSetsTemp = (VkDescriptorSet*)bs_alloc(sizeof(VkDescriptorSet) * maxBoundDescriptorSets);
@@ -366,19 +368,21 @@ namespace bs { namespace ct
 		mState = State::Recording;
 		mState = State::Recording;
 	}
 	}
 
 
-	void VulkanCmdBuffer::allocateSemaphores()
+	void VulkanCmdBuffer::allocateSemaphores(VkSemaphore* semaphores)
 	{
 	{
 		if (mIntraQueueSemaphore != nullptr)
 		if (mIntraQueueSemaphore != nullptr)
 			mIntraQueueSemaphore->destroy();
 			mIntraQueueSemaphore->destroy();
 
 
 		mIntraQueueSemaphore = mDevice.getResourceManager().create<VulkanSemaphore>();
 		mIntraQueueSemaphore = mDevice.getResourceManager().create<VulkanSemaphore>();
-		
+		semaphores[0] = mIntraQueueSemaphore->getHandle();
+
 		for (UINT32 i = 0; i < BS_MAX_VULKAN_CB_DEPENDENCIES; i++)
 		for (UINT32 i = 0; i < BS_MAX_VULKAN_CB_DEPENDENCIES; i++)
 		{
 		{
 			if (mInterQueueSemaphores[i] != nullptr)
 			if (mInterQueueSemaphores[i] != nullptr)
 				mInterQueueSemaphores[i]->destroy();
 				mInterQueueSemaphores[i]->destroy();
 
 
 			mInterQueueSemaphores[i] = mDevice.getResourceManager().create<VulkanSemaphore>();
 			mInterQueueSemaphores[i] = mDevice.getResourceManager().create<VulkanSemaphore>();
+			semaphores[i + 1] = mInterQueueSemaphores[i]->getHandle();
 		}
 		}
 
 
 		mNumUsedInterQueueSemaphores = 0;
 		mNumUsedInterQueueSemaphores = 0;
@@ -453,11 +457,29 @@ namespace bs { namespace ct
 			bool queueMismatch = resource->isExclusive() && currentQueueFamily != -1 && currentQueueFamily != mQueueFamily;
 			bool queueMismatch = resource->isExclusive() && currentQueueFamily != -1 && currentQueueFamily != mQueueFamily;
 
 
 			VkImageLayout currentLayout = resource->getLayout();
 			VkImageLayout currentLayout = resource->getLayout();
-			VkImageLayout initialLayout = imageInfo.initialLayout;
-			if (queueMismatch || (currentLayout != initialLayout && initialLayout != VK_IMAGE_LAYOUT_UNDEFINED))
+			if (queueMismatch)
 			{
 			{
 				Vector<VkImageMemoryBarrier>& barriers = mTransitionInfoTemp[currentQueueFamily].imageBarriers;
 				Vector<VkImageMemoryBarrier>& barriers = mTransitionInfoTemp[currentQueueFamily].imageBarriers;
 
 
+				barriers.push_back(VkImageMemoryBarrier());
+				VkImageMemoryBarrier& barrier = barriers.back();
+				barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
+				barrier.pNext = nullptr;
+				barrier.srcAccessMask = resource->getAccessFlags(currentLayout);
+				barrier.dstAccessMask = resource->getAccessFlags(currentLayout);
+				barrier.oldLayout = currentLayout;
+				barrier.newLayout = currentLayout;
+				barrier.image = resource->getHandle();
+				barrier.subresourceRange = imageInfo.range;
+				barrier.srcQueueFamilyIndex = currentQueueFamily;
+				barrier.dstQueueFamilyIndex = mQueueFamily;
+			}
+
+			VkImageLayout initialLayout = imageInfo.initialLayout;
+			if(currentLayout != initialLayout && initialLayout != VK_IMAGE_LAYOUT_UNDEFINED)
+			{
+				Vector<VkImageMemoryBarrier>& barriers = mTransitionInfoTemp[mQueueFamily].imageBarriers;
+
 				if (initialLayout == VK_IMAGE_LAYOUT_UNDEFINED)
 				if (initialLayout == VK_IMAGE_LAYOUT_UNDEFINED)
 					initialLayout = currentLayout;
 					initialLayout = currentLayout;
 
 
@@ -471,20 +493,8 @@ namespace bs { namespace ct
 				barrier.newLayout = initialLayout;
 				barrier.newLayout = initialLayout;
 				barrier.image = resource->getHandle();
 				barrier.image = resource->getHandle();
 				barrier.subresourceRange = imageInfo.range;
 				barrier.subresourceRange = imageInfo.range;
-				barrier.srcQueueFamilyIndex = currentQueueFamily;
-				barrier.dstQueueFamilyIndex = mQueueFamily;
-
-				// Check if queue transition needed
-				if (queueMismatch)
-				{
-					barrier.srcQueueFamilyIndex = currentQueueFamily;
-					barrier.dstQueueFamilyIndex = mQueueFamily;
-				}
-				else
-				{
-					barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
-					barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
-				}
+				barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
+				barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
 			}
 			}
 
 
 			resource->setLayout(imageInfo.finalLayout);
 			resource->setLayout(imageInfo.finalLayout);
@@ -551,18 +561,35 @@ namespace bs { namespace ct
 			syncMask |= CommandSyncMask::getGlobalQueueMask(otherQueueType, otherQueueIdx);
 			syncMask |= CommandSyncMask::getGlobalQueueMask(otherQueueType, otherQueueIdx);
 
 
 			cmdBuffer->end();
 			cmdBuffer->end();
-			otherQueue->submit(cmdBuffer, nullptr, 0);
 
 
-			// If there are any layout transitions, reset them as we don't need them for the second pipeline barrier
-			for (auto& barrierEntry : barriers.imageBarriers)
-				barrierEntry.oldLayout = barrierEntry.newLayout;
+			// Note: If I switch back to doing layout transitions here, I need to wait on present semaphore
+			otherQueue->submit(cmdBuffer, nullptr, 0);
 		}
 		}
 
 
 		UINT32 deviceIdx = device.getIndex();
 		UINT32 deviceIdx = device.getIndex();
 		VulkanCommandBufferManager& cbm = static_cast<VulkanCommandBufferManager&>(CommandBufferManager::instance());
 		VulkanCommandBufferManager& cbm = static_cast<VulkanCommandBufferManager&>(CommandBufferManager::instance());
 
 
 		UINT32 numSemaphores;
 		UINT32 numSemaphores;
-		cbm.getSyncSemaphores(deviceIdx, syncMask, mSemaphoresTemp, numSemaphores);
+		cbm.getSyncSemaphores(deviceIdx, syncMask, mSemaphoresTemp.data(), numSemaphores);
+
+		// Wait on present (i.e. until the back buffer becomes available) for any swap chains
+		for(auto& entry : mSwapChains)
+		{
+			const SwapChainSurface& surface = entry->getBackBuffer();
+			if (surface.needsWait)
+			{
+				VulkanSemaphore* semaphore = entry->getBackBuffer().sync;
+
+				if (numSemaphores >= (UINT32)mSemaphoresTemp.size())
+					mSemaphoresTemp.push_back(semaphore);
+				else
+					mSemaphoresTemp[numSemaphores] = semaphore;
+
+				numSemaphores++;
+
+				entry->notifyBackBufferWaitIssued();
+			}
+		}
 
 
 		// Issue second part of transition pipeline barriers (on this queue)
 		// Issue second part of transition pipeline barriers (on this queue)
 		for (auto& entry : mTransitionInfoTemp)
 		for (auto& entry : mTransitionInfoTemp)
@@ -586,12 +613,12 @@ namespace bs { namespace ct
 								 numImgBarriers, barriers.imageBarriers.data());
 								 numImgBarriers, barriers.imageBarriers.data());
 
 
 			cmdBuffer->end();
 			cmdBuffer->end();
-			queue->queueSubmit(cmdBuffer, mSemaphoresTemp, numSemaphores);
+			queue->queueSubmit(cmdBuffer, mSemaphoresTemp.data(), numSemaphores);
 
 
 			numSemaphores = 0; // Semaphores are only needed the first time, since we're adding the buffers on the same queue
 			numSemaphores = 0; // Semaphores are only needed the first time, since we're adding the buffers on the same queue
 		}
 		}
 
 
-		queue->queueSubmit(this, mSemaphoresTemp, numSemaphores);
+		queue->queueSubmit(this, mSemaphoresTemp.data(), numSemaphores);
 		queue->submitQueued();
 		queue->submitQueued();
 
 
 		mGlobalQueueIdx = CommandSyncMask::getGlobalQueueIdx(queue->getType(), queueIdx);
 		mGlobalQueueIdx = CommandSyncMask::getGlobalQueueIdx(queue->getType(), queueIdx);
@@ -645,6 +672,7 @@ namespace bs { namespace ct
 		mQueuedLayoutTransitions.clear();
 		mQueuedLayoutTransitions.clear();
 		mBoundParams = nullptr;
 		mBoundParams = nullptr;
 		mBoundParams = false;
 		mBoundParams = false;
+		mSwapChains.clear();
 	}
 	}
 
 
 	bool VulkanCmdBuffer::checkFenceStatus() const
 	bool VulkanCmdBuffer::checkFenceStatus() const
@@ -721,12 +749,19 @@ namespace bs { namespace ct
 			{
 			{
 				Win32RenderWindow* window = static_cast<Win32RenderWindow*>(rt.get());
 				Win32RenderWindow* window = static_cast<Win32RenderWindow*>(rt.get());
 				window->acquireBackBuffer();
 				window->acquireBackBuffer();
+
+				VulkanSwapChain* swapChain;
+				rt->getCustomAttribute("SC", &swapChain);
+
+				mSwapChains.insert(swapChain);
 			}
 			}
 
 
 			rt->getCustomAttribute("FB", &newFB);
 			rt->getCustomAttribute("FB", &newFB);
 		}
 		}
 		else
 		else
+		{
 			newFB = nullptr;
 			newFB = nullptr;
+		}
 
 
 		if (mFramebuffer == newFB && mRenderTargetDepthReadOnly == readOnlyDepthStencil && mRenderTargetLoadMask == loadMask)
 		if (mFramebuffer == newFB && mRenderTargetDepthReadOnly == readOnlyDepthStencil && mRenderTargetLoadMask == loadMask)
 			return;
 			return;

+ 3 - 0
Source/BansheeVulkanRenderAPI/Source/BsVulkanGpuParams.cpp

@@ -69,6 +69,9 @@ namespace bs { namespace ct
 		UINT32 numSets = vkParamInfo.getNumSets();
 		UINT32 numSets = vkParamInfo.getNumSets();
 		UINT32 numBindings = vkParamInfo.getNumElements();
 		UINT32 numBindings = vkParamInfo.getNumElements();
 
 
+		if (numSets == 0)
+			return;
+
 		// Note: I'm assuming a single WriteInfo per binding, but if arrays sizes larger than 1 are eventually supported
 		// Note: I'm assuming a single WriteInfo per binding, but if arrays sizes larger than 1 are eventually supported
 		// I'll need to adjust the code.
 		// I'll need to adjust the code.
 		mAlloc.reserve<bool>(numSets)
 		mAlloc.reserve<bool>(numSets)

+ 31 - 26
Source/BansheeVulkanRenderAPI/Source/BsVulkanQueue.cpp

@@ -24,16 +24,17 @@ namespace bs { namespace ct
 
 
 	void VulkanQueue::submit(VulkanCmdBuffer* cmdBuffer, VulkanSemaphore** waitSemaphores, UINT32 semaphoresCount)
 	void VulkanQueue::submit(VulkanCmdBuffer* cmdBuffer, VulkanSemaphore** waitSemaphores, UINT32 semaphoresCount)
 	{
 	{
-		cmdBuffer->allocateSemaphores();
-		VulkanSemaphore* signalSemaphore = cmdBuffer->getIntraQueueSemaphore();
+		VkSemaphore signalSemaphores[BS_MAX_VULKAN_CB_DEPENDENCIES + 1];
+		cmdBuffer->allocateSemaphores(signalSemaphores);
 
 
 		VkCommandBuffer vkCmdBuffer = cmdBuffer->getHandle();
 		VkCommandBuffer vkCmdBuffer = cmdBuffer->getHandle();
-		VkSemaphore vkSemaphore = signalSemaphore->getHandle();
 
 
-		prepareSemaphores(waitSemaphores, mSemaphoresTemp, semaphoresCount);
+		mSemaphoresTemp.resize(semaphoresCount + 1); // +1 for self semaphore
+		prepareSemaphores(waitSemaphores, mSemaphoresTemp.data(), semaphoresCount);
 		
 		
 		VkSubmitInfo submitInfo;
 		VkSubmitInfo submitInfo;
-		getSubmitInfo(&vkCmdBuffer, &vkSemaphore, mSemaphoresTemp, semaphoresCount, submitInfo);
+		getSubmitInfo(&vkCmdBuffer, signalSemaphores, BS_MAX_VULKAN_CB_DEPENDENCIES + 1, 
+					  mSemaphoresTemp.data(), semaphoresCount, submitInfo);
 
 
 		VkResult result = vkQueueSubmit(mQueue, 1, &submitInfo, cmdBuffer->getFence());
 		VkResult result = vkQueueSubmit(mQueue, 1, &submitInfo, cmdBuffer->getFence());
 		assert(result == VK_SUCCESS);
 		assert(result == VK_SUCCESS);
@@ -61,9 +62,10 @@ namespace bs { namespace ct
 			return;
 			return;
 
 
 		UINT32 totalNumWaitSemaphores = (UINT32)mQueuedSemaphores.size() + numCBs;
 		UINT32 totalNumWaitSemaphores = (UINT32)mQueuedSemaphores.size() + numCBs;
+		UINT32 signalSemaphoresPerCB = (BS_MAX_VULKAN_CB_DEPENDENCIES + 1);
 
 
-		UINT8* data = (UINT8*)bs_stack_alloc((sizeof(VkSubmitInfo) + sizeof(VkCommandBuffer) + sizeof(VkSemaphore)) * 
-			numCBs + sizeof(VkSemaphore) * totalNumWaitSemaphores);
+		UINT8* data = (UINT8*)bs_stack_alloc((sizeof(VkSubmitInfo) + sizeof(VkCommandBuffer)) * 
+			numCBs + sizeof(VkSemaphore) * signalSemaphoresPerCB * numCBs + sizeof(VkSemaphore) * totalNumWaitSemaphores);
 		UINT8* dataPtr = data;
 		UINT8* dataPtr = data;
 
 
 		VkSubmitInfo* submitInfos = (VkSubmitInfo*)dataPtr;
 		VkSubmitInfo* submitInfos = (VkSubmitInfo*)dataPtr;
@@ -73,38 +75,40 @@ namespace bs { namespace ct
 		dataPtr += sizeof(VkCommandBuffer) * numCBs;
 		dataPtr += sizeof(VkCommandBuffer) * numCBs;
 
 
 		VkSemaphore* signalSemaphores = (VkSemaphore*)dataPtr;
 		VkSemaphore* signalSemaphores = (VkSemaphore*)dataPtr;
-		dataPtr += sizeof(VkSemaphore) * numCBs;
+		dataPtr += sizeof(VkSemaphore) * signalSemaphoresPerCB * numCBs;
 
 
 		VkSemaphore* waitSemaphores = (VkSemaphore*)dataPtr;
 		VkSemaphore* waitSemaphores = (VkSemaphore*)dataPtr;
 		dataPtr += sizeof(VkSemaphore) * totalNumWaitSemaphores;
 		dataPtr += sizeof(VkSemaphore) * totalNumWaitSemaphores;
 
 
-		UINT32 semaphoreIdx = 0;
+		UINT32 readSemaphoreIdx = 0;
+		UINT32 writeSemaphoreIdx = 0;
+		UINT32 signalSemaphoreIdx = 0;
 		for(UINT32 i = 0; i < numCBs; i++)
 		for(UINT32 i = 0; i < numCBs; i++)
 		{
 		{
 			const SubmitInfo& entry = mQueuedBuffers[i];
 			const SubmitInfo& entry = mQueuedBuffers[i];
 
 
-			entry.cmdBuffer->allocateSemaphores();
-			VulkanSemaphore* signalSemaphore = entry.cmdBuffer->getIntraQueueSemaphore();
-
 			commandBuffers[i] = entry.cmdBuffer->getHandle();
 			commandBuffers[i] = entry.cmdBuffer->getHandle();
-			signalSemaphores[i] = signalSemaphore->getHandle();
+			entry.cmdBuffer->allocateSemaphores(&signalSemaphores[signalSemaphoreIdx]);
 
 
 			UINT32 semaphoresCount = entry.numSemaphores;
 			UINT32 semaphoresCount = entry.numSemaphores;
-			prepareSemaphores(mQueuedSemaphores.data() + semaphoreIdx, &waitSemaphores[semaphoreIdx], semaphoresCount);
+			prepareSemaphores(mQueuedSemaphores.data() + readSemaphoreIdx, &waitSemaphores[writeSemaphoreIdx], semaphoresCount);
 
 
-			getSubmitInfo(&commandBuffers[i], &signalSemaphores[i], &waitSemaphores[semaphoreIdx], semaphoresCount, 
-				submitInfos[i]);
+			getSubmitInfo(&commandBuffers[i], &signalSemaphores[signalSemaphoreIdx], signalSemaphoresPerCB, 
+						  &waitSemaphores[writeSemaphoreIdx], semaphoresCount, submitInfos[i]);
 
 
 			entry.cmdBuffer->setIsSubmitted();
 			entry.cmdBuffer->setIsSubmitted();
 			mLastCommandBuffer = entry.cmdBuffer; // Needs to be set because getSubmitInfo depends on it
 			mLastCommandBuffer = entry.cmdBuffer; // Needs to be set because getSubmitInfo depends on it
 			mLastCBSemaphoreUsed = false;
 			mLastCBSemaphoreUsed = false;
 
 
 			mActiveBuffers.push(entry.cmdBuffer);
 			mActiveBuffers.push(entry.cmdBuffer);
-			semaphoreIdx += semaphoresCount;
+
+			readSemaphoreIdx += entry.numSemaphores;
+			writeSemaphoreIdx += semaphoresCount;
+			signalSemaphoreIdx += signalSemaphoresPerCB;
 		}
 		}
 
 
 		VulkanCmdBuffer* lastCB = mQueuedBuffers[numCBs - 1].cmdBuffer;
 		VulkanCmdBuffer* lastCB = mQueuedBuffers[numCBs - 1].cmdBuffer;
-		UINT32 totalNumSemaphores = semaphoreIdx;
+		UINT32 totalNumSemaphores = writeSemaphoreIdx;
 		mActiveSubmissions.push_back(SubmitInfo(lastCB, mNextSubmitIdx++, totalNumSemaphores, numCBs));
 		mActiveSubmissions.push_back(SubmitInfo(lastCB, mNextSubmitIdx++, totalNumSemaphores, numCBs));
 
 
 		VkResult result = vkQueueSubmit(mQueue, numCBs, submitInfos, mLastCommandBuffer->getFence());
 		VkResult result = vkQueueSubmit(mQueue, numCBs, submitInfos, mLastCommandBuffer->getFence());
@@ -116,18 +120,18 @@ namespace bs { namespace ct
 		bs_stack_free(data);
 		bs_stack_free(data);
 	}
 	}
 
 
-	void VulkanQueue::getSubmitInfo(VkCommandBuffer* cmdBuffer, VkSemaphore* signalSemaphore, VkSemaphore* waitSemaphores,
-									UINT32 semaphoresCount, VkSubmitInfo& submitInfo)
+	void VulkanQueue::getSubmitInfo(VkCommandBuffer* cmdBuffer, VkSemaphore* signalSemaphores, UINT32 numSignalSemaphores,
+									VkSemaphore* waitSemaphores, UINT32 numWaitSemaphores, VkSubmitInfo& submitInfo)
 	{
 	{
 		submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
 		submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
 		submitInfo.pNext = nullptr;
 		submitInfo.pNext = nullptr;
 		submitInfo.commandBufferCount = 1;
 		submitInfo.commandBufferCount = 1;
 		submitInfo.pCommandBuffers = cmdBuffer;
 		submitInfo.pCommandBuffers = cmdBuffer;
-		submitInfo.signalSemaphoreCount = 1;
-		submitInfo.pSignalSemaphores = signalSemaphore;
-		submitInfo.waitSemaphoreCount = semaphoresCount;
+		submitInfo.signalSemaphoreCount = numSignalSemaphores;
+		submitInfo.pSignalSemaphores = signalSemaphores;
+		submitInfo.waitSemaphoreCount = numWaitSemaphores;
 
 
-		if (semaphoresCount > 0)
+		if (numWaitSemaphores > 0)
 		{
 		{
 			submitInfo.pWaitSemaphores = waitSemaphores;
 			submitInfo.pWaitSemaphores = waitSemaphores;
 			submitInfo.pWaitDstStageMask = mSubmitDstWaitMask;
 			submitInfo.pWaitDstStageMask = mSubmitDstWaitMask;
@@ -145,7 +149,8 @@ namespace bs { namespace ct
 		if (!swapChain->prepareForPresent(backBufferIdx))
 		if (!swapChain->prepareForPresent(backBufferIdx))
 			return; // Nothing to present (back buffer wasn't even acquired)
 			return; // Nothing to present (back buffer wasn't even acquired)
 
 
-		prepareSemaphores(waitSemaphores, mSemaphoresTemp, semaphoresCount);
+		mSemaphoresTemp.resize(semaphoresCount + 1); // +1 for self semaphore
+		prepareSemaphores(waitSemaphores, mSemaphoresTemp.data(), semaphoresCount);
 
 
 		VkSwapchainKHR vkSwapChain = swapChain->getHandle();
 		VkSwapchainKHR vkSwapChain = swapChain->getHandle();
 		VkPresentInfoKHR presentInfo;
 		VkPresentInfoKHR presentInfo;
@@ -159,7 +164,7 @@ namespace bs { namespace ct
 		// Wait before presenting, if required
 		// Wait before presenting, if required
 		if (semaphoresCount > 0)
 		if (semaphoresCount > 0)
 		{
 		{
-			presentInfo.pWaitSemaphores = mSemaphoresTemp;
+			presentInfo.pWaitSemaphores = mSemaphoresTemp.data();
 			presentInfo.waitSemaphoreCount = semaphoresCount;
 			presentInfo.waitSemaphoreCount = semaphoresCount;
 		}
 		}
 		else
 		else

+ 10 - 0
Source/BansheeVulkanRenderAPI/Source/BsVulkanSwapChain.cpp

@@ -143,6 +143,7 @@ namespace bs { namespace ct
 			imageDesc.image = images[i];
 			imageDesc.image = images[i];
 
 
 			mSurfaces[i].acquired = false;
 			mSurfaces[i].acquired = false;
+			mSurfaces[i].needsWait = false;
 			mSurfaces[i].image = resManager.create<VulkanImage>(imageDesc, false);
 			mSurfaces[i].image = resManager.create<VulkanImage>(imageDesc, false);
 			mSurfaces[i].view = mSurfaces[i].image->getView(true);
 			mSurfaces[i].view = mSurfaces[i].image->getView(true);
 			mSurfaces[i].sync = resManager.create<VulkanSemaphore>();
 			mSurfaces[i].sync = resManager.create<VulkanSemaphore>();
@@ -228,6 +229,7 @@ namespace bs { namespace ct
 
 
 		assert(!mSurfaces[imageIndex].acquired && "Same swap chain surface being acquired twice in a row without present().");
 		assert(!mSurfaces[imageIndex].acquired && "Same swap chain surface being acquired twice in a row without present().");
 		mSurfaces[imageIndex].acquired = true;
 		mSurfaces[imageIndex].acquired = true;
+		mSurfaces[imageIndex].needsWait = true;
 
 
 		mCurrentBackBufferIdx = imageIndex;
 		mCurrentBackBufferIdx = imageIndex;
 	}
 	}
@@ -244,6 +246,14 @@ namespace bs { namespace ct
 		return true;
 		return true;
 	}
 	}
 
 
+	void VulkanSwapChain::notifyBackBufferWaitIssued()
+	{
+		if (!mSurfaces[mCurrentBackBufferIdx].acquired)
+			return;
+
+		mSurfaces[mCurrentBackBufferIdx].needsWait = false;
+	}
+
 	void VulkanSwapChain::clear(VkSwapchainKHR swapChain)
 	void VulkanSwapChain::clear(VkSwapchainKHR swapChain)
 	{
 	{
 		VkDevice logicalDevice = mDevice->getLogical();
 		VkDevice logicalDevice = mDevice->getLogical();

+ 16 - 3
Source/BansheeVulkanRenderAPI/Source/Win32/BsWin32RenderWindow.cpp

@@ -367,9 +367,15 @@ namespace bs
 		UINT32 numSemaphores;
 		UINT32 numSemaphores;
 		cbm.getSyncSemaphores(deviceIdx, syncMask, mSemaphoresTemp, numSemaphores);
 		cbm.getSyncSemaphores(deviceIdx, syncMask, mSemaphoresTemp, numSemaphores);
 
 
-		// Wait on present (i.e. until the back buffer becomes available), if we're rendering to a window
-		mSemaphoresTemp[numSemaphores] = mSwapChain->getBackBuffer().sync;
-		numSemaphores++;
+		// Wait on present (i.e. until the back buffer becomes available), if we haven't already done so
+		const SwapChainSurface& surface = mSwapChain->getBackBuffer();
+		if(surface.needsWait)
+		{
+			mSemaphoresTemp[numSemaphores] = mSwapChain->getBackBuffer().sync;
+			numSemaphores++;
+
+			mSwapChain->notifyBackBufferWaitIssued();
+		}
 
 
 		queue->present(mSwapChain.get(), mSemaphoresTemp, numSemaphores);
 		queue->present(mSwapChain.get(), mSemaphoresTemp, numSemaphores);
 		mRequiresNewBackBuffer = true;
 		mRequiresNewBackBuffer = true;
@@ -589,6 +595,13 @@ namespace bs
 			return;
 			return;
 		}
 		}
 
 
+		if(name == "SC")
+		{
+			VulkanSwapChain** sc = (VulkanSwapChain**)data;
+			*sc = mSwapChain.get();
+			return;
+		}
+
 		if(name == "WINDOW")
 		if(name == "WINDOW")
 		{
 		{
 			UINT64 *pWnd = (UINT64*)data;
 			UINT64 *pWnd = (UINT64*)data;