浏览代码

Vulkan: Handle query resets when issued within render-pass
Vulkan: Issue semaphores to serve as intra-queue dependencies between different command buffers

BearishSun 9 年之前
父节点
当前提交
442b8af92b

+ 7 - 0
Source/BansheeVulkanRenderAPI/Include/BsVulkanCommandBuffer.h

@@ -259,6 +259,12 @@ namespace bs
 		 */
 		void setEvent(VulkanEvent* event);
 
+		/** 
+		 * Registers a command that resets the query. The command will be delayed until the next submit() if a render
+		 * pass is currently in progress, but is guaranteed to execute before this command buffer is submitted.
+		 */
+		void resetQuery(VulkanQuery* query);
+
 	private:
 		friend class VulkanCmdBufferPool;
 		friend class VulkanCommandBuffer;
@@ -368,6 +374,7 @@ namespace bs
 		Vector<VkImageMemoryBarrier> mLayoutTransitionBarriersTemp;
 		UnorderedMap<VulkanImage*, UINT32> mQueuedLayoutTransitions;
 		Vector<VulkanEvent*> mQueuedEvents;
+		Vector<VulkanQuery*> mQueuedQueryResets;
 	};
 
 	/** CommandBuffer implementation for Vulkan. */

+ 5 - 1
Source/BansheeVulkanRenderAPI/Include/BsVulkanQueryManager.h

@@ -117,12 +117,16 @@ namespace bs
 		 */
 		bool getResult(UINT64& result) const;
 
+		/** Queues a command for the query reset, on the provided command buffer. */
+		void reset(VkCommandBuffer cmdBuf);
+
 	private:
 		friend class VulkanQueryPool;
 
 		VkQueryPool mPool;
 		UINT32 mQueryIdx;
-		bool mFree;		
+		bool mFree;
+		bool mNeedsReset;
 	};
 
 	/** @} */

+ 32 - 1
Source/BansheeVulkanRenderAPI/Include/BsVulkanQueue.h

@@ -38,6 +38,16 @@ namespace bs
 		/** Submits the provided command buffer on the queue. */
 		void submit(VulkanCmdBuffer* cmdBuffer, VulkanSemaphore** waitSemaphores, UINT32 semaphoresCount);
 
+		/** 
+		 * Stores information about a submit internally, but doesn't actually execute it. The intended use is to queue
+		 * multiple submits and execute them all at once using submitQueued(), ensuring better performance than queuing them
+		 * all individually.
+		 */
+		void queueSubmit(VulkanCmdBuffer* cmdBuffer, VulkanSemaphore** waitSemaphores, UINT32 semaphoresCount);
+
+		/** Submits all previously queued commands buffers, as recorded by queueSubmit(). */
+		void submitQueued();
+
 		/** 
 		 * Presents the back buffer of the provided swap chain. 
 		 *
@@ -63,6 +73,23 @@ namespace bs
 		VulkanCmdBuffer* getLastCommandBuffer() const { return mLastCommandBuffer; }
 
 	protected:
+		/** 
+		 * Generates a submit-info structure that can be used for submitting the command buffer to the queue, but doesn't
+		 * perform the actual submit.
+		 */
+		void getSubmitInfo(VkCommandBuffer* cmdBuffer, VkSemaphore* signalSemaphore, VkSemaphore* waitSemaphores, 
+			UINT32 semaphoresCount, VkSubmitInfo& submitInfo);
+
+		/** 
+		 * Prepares a list of semaphores that can be provided to submit or present calls. *
+		 * 
+		 * @param[in]		inSemaphores	External wait semaphores that need to be waited on.
+		 * @param[out]		outSemaphores	All semaphores (external ones, and possibly additional ones), as Vulkan handles.
+		 * @param[in, out]	semaphoresCount	Number of semaphores in @p inSemaphores when calling. When method returns this
+		 *									will contain number of semaphores in @p outSemaphores.
+		 */
+		void prepareSemaphores(VulkanSemaphore** inSemaphores, VkSemaphore* outSemaphores, UINT32& semaphoresCount);
+
 		/** Information about a single submitted command buffer. */
 		struct SubmitInfo
 		{
@@ -81,12 +108,16 @@ namespace bs
 		UINT32 mIndex;
 		VkPipelineStageFlags mSubmitDstWaitMask[BS_MAX_UNIQUE_QUEUES];
 
+		Vector<SubmitInfo> mQueuedBuffers;
+		Vector<VulkanSemaphore*> mQueuedSemaphores;
+
 		List<SubmitInfo> mActiveBuffers;
 		Queue<VulkanSemaphore*> mActiveSemaphores;
 		VulkanCmdBuffer* mLastCommandBuffer;
+		bool mLastCBSemaphoreUsed;
 		UINT32 mNextSubmitIdx;
 
-		VkSemaphore mSemaphoresTemp[BS_MAX_UNIQUE_QUEUES + 1]; // +1 for present semaphore
+		VkSemaphore mSemaphoresTemp[BS_MAX_UNIQUE_QUEUES + 2]; // +1 for present semaphore, +1 for self-semaphore
 	};
 
 	/** @} */

+ 26 - 2
Source/BansheeVulkanRenderAPI/Source/BsVulkanCommandBuffer.cpp

@@ -13,6 +13,7 @@
 #include "BsVulkanFramebuffer.h"
 #include "BsVulkanVertexInputManager.h"
 #include "BsVulkanEventQuery.h"
+#include "BsVulkanQueryManager.h"
 
 #if BS_PLATFORM == BS_PLATFORM_WIN32
 #include "Win32/BsWin32RenderWindow.h"
@@ -345,6 +346,22 @@ namespace bs
 	{
 		assert(isReadyForSubmit());
 
+		// If there are any query resets needed, execute those first
+		VulkanDevice& device = queue->getDevice();
+		if(!mQueuedQueryResets.empty())
+		{
+			VulkanCmdBuffer* cmdBuffer = device.getCmdBufferPool().getBuffer(mQueueFamily, false);
+			VkCommandBuffer vkCmdBuffer = cmdBuffer->getHandle();
+
+			for (auto& entry : mQueuedQueryResets)
+				entry->reset(vkCmdBuffer);
+
+			cmdBuffer->end();
+			queue->submit(cmdBuffer, nullptr, 0);
+
+			mQueuedQueryResets.clear();
+		}
+
 		// Issue pipeline barriers for queue transitions (need to happen on original queue first, then on new queue)
 		for (auto& entry : mBuffers)
 		{
@@ -413,7 +430,6 @@ namespace bs
 			resource->setLayout(imageInfo.finalLayout);
 		}
 
-		VulkanDevice& device = queue->getDevice();
 		for (auto& entry : mTransitionInfoTemp)
 		{
 			bool empty = entry.second.imageBarriers.size() == 0 && entry.second.bufferBarriers.size() == 0;
@@ -475,7 +491,7 @@ namespace bs
 			syncMask |= CommandSyncMask::getGlobalQueueMask(otherQueueType, otherQueueIdx);
 
 			cmdBuffer->end();
-			cmdBuffer->submit(otherQueue, otherQueueIdx, 0);
+			otherQueue->submit(cmdBuffer, nullptr, 0);
 
 			// If there are any layout transitions, reset them as we don't need them for the second pipeline barrier
 			for (auto& barrierEntry : barriers.imageBarriers)
@@ -1286,6 +1302,14 @@ namespace bs
 			vkCmdSetEvent(mCmdBuffer, event->getHandle(), VK_PIPELINE_STAGE_ALL_COMMANDS_BIT);
 	}
 
+	void VulkanCmdBuffer::resetQuery(VulkanQuery* query)
+	{
+		if (isInRenderPass())
+			mQueuedQueryResets.push_back(query);
+		else
+			query->reset(mCmdBuffer);
+	}
+
 	void VulkanCmdBuffer::registerResource(VulkanResource* res, VulkanUseFlags flags)
 	{
 		auto insertResult = mResources.insert(std::make_pair(res, ResourceUseHandle()));

+ 8 - 2
Source/BansheeVulkanRenderAPI/Source/BsVulkanQueryManager.cpp

@@ -108,7 +108,7 @@ namespace bs
 		query->mFree = false;
 
 		VkCommandBuffer vkCmdBuf = cb->getHandle();
-		vkCmdResetQueryPool(vkCmdBuf, query->mPool, query->mQueryIdx, 1);
+		cb->resetQuery(query);
 		vkCmdWriteTimestamp(vkCmdBuf, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, query->mPool, query->mQueryIdx);
 
 		// Note: Must happen only here because we need to check VulkanResource::isBound under the same mutex
@@ -125,7 +125,7 @@ namespace bs
 		query->mFree = false;
 
 		VkCommandBuffer vkCmdBuf = cb->getHandle();
-		vkCmdResetQueryPool(vkCmdBuf, query->mPool, query->mQueryIdx, 1);
+		cb->resetQuery(query);
 		vkCmdBeginQuery(vkCmdBuf, query->mPool, query->mQueryIdx, precise ? VK_QUERY_CONTROL_PRECISE_BIT : 0);
 
 		// Note: Must happen only here because we need to check VulkanResource::isBound under the same mutex
@@ -147,6 +147,7 @@ namespace bs
 		Lock(mMutex);
 
 		query->mFree = true;
+		query->mNeedsReset = true;
 	}
 
 	VulkanQueryManager::VulkanQueryManager(VulkanRenderAPI& rapi)
@@ -209,4 +210,9 @@ namespace bs
 
 		return vkResult == VK_SUCCESS;
 	}
+
+	void VulkanQuery::reset(VkCommandBuffer cmdBuf)
+	{
+		vkCmdResetQueryPool(cmdBuf, mPool, mQueryIdx, 1);
+	}
 }

+ 125 - 38
Source/BansheeVulkanRenderAPI/Source/BsVulkanQueue.cpp

@@ -7,7 +7,8 @@
 namespace bs
 {
 	VulkanQueue::VulkanQueue(VulkanDevice& device, VkQueue queue, GpuQueueType type, UINT32 index)
-		:mDevice(device), mQueue(queue), mType(type), mIndex(index), mLastCommandBuffer(nullptr), mNextSubmitIdx(1)
+		: mDevice(device), mQueue(queue), mType(type), mIndex(index), mLastCommandBuffer(nullptr)
+		, mLastCBSemaphoreUsed(false), mNextSubmitIdx(1)
 	{
 		for (UINT32 i = 0; i < BS_MAX_UNIQUE_QUEUES; i++)
 			mSubmitDstWaitMask[i] = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
@@ -23,26 +24,104 @@ namespace bs
 
 	void VulkanQueue::submit(VulkanCmdBuffer* cmdBuffer, VulkanSemaphore** waitSemaphores, UINT32 semaphoresCount)
 	{
-		VkCommandBuffer vkCmdBuffer = cmdBuffer->getHandle();
-		VulkanSemaphore* semaphore = cmdBuffer->allocateSemaphore();
+		VulkanSemaphore* signalSemaphore = cmdBuffer->allocateSemaphore();
 
-		VkSemaphore vkSemaphore = semaphore->getHandle();
+		VkCommandBuffer vkCmdBuffer = cmdBuffer->getHandle();
+		VkSemaphore vkSemaphore = signalSemaphore->getHandle();
 
+		prepareSemaphores(waitSemaphores, mSemaphoresTemp, semaphoresCount);
+		
 		VkSubmitInfo submitInfo;
+		getSubmitInfo(&vkCmdBuffer, &vkSemaphore, mSemaphoresTemp, semaphoresCount, submitInfo);
+
+		VkResult result = vkQueueSubmit(mQueue, 1, &submitInfo, cmdBuffer->getFence());
+		assert(result == VK_SUCCESS);
+
+		cmdBuffer->setIsSubmitted();
+		mLastCommandBuffer = cmdBuffer;
+		mLastCBSemaphoreUsed = false;
+
+		mActiveBuffers.push_back(SubmitInfo(cmdBuffer, mNextSubmitIdx++, semaphoresCount));
+	}
+
+	void VulkanQueue::queueSubmit(VulkanCmdBuffer* cmdBuffer, VulkanSemaphore** waitSemaphores, UINT32 semaphoresCount)
+	{
+		mQueuedBuffers.push_back(SubmitInfo(cmdBuffer, 0, semaphoresCount));
+
+		for (UINT32 i = 0; i < semaphoresCount; i++)
+			mQueuedSemaphores.push_back(waitSemaphores[i]);
+	}
+
+	void VulkanQueue::submitQueued()
+	{
+		UINT32 numSubmits = (UINT32)mQueuedBuffers.size();
+		if (numSubmits == 0)
+			return;
+
+		UINT32 totalNumWaitSemaphores = (UINT32)mQueuedSemaphores.size() + numSubmits;
+
+		UINT8* data = (UINT8*)bs_stack_alloc((sizeof(VkSubmitInfo) + sizeof(VkCommandBuffer) + sizeof(VkSemaphore)) * 
+			numSubmits + sizeof(VkSemaphore) * totalNumWaitSemaphores);
+		UINT8* dataPtr = data;
+
+		VkSubmitInfo* submitInfos = (VkSubmitInfo*)dataPtr;
+		dataPtr += sizeof(VkSubmitInfo) * numSubmits;
+
+		VkCommandBuffer* commandBuffers = (VkCommandBuffer*)dataPtr;
+		dataPtr += sizeof(VkCommandBuffer) * numSubmits;
+
+		VkSemaphore* signalSemaphores = (VkSemaphore*)dataPtr;
+		dataPtr += sizeof(VkSemaphore) * numSubmits;
+
+		VkSemaphore* waitSemaphores = (VkSemaphore*)dataPtr;
+		dataPtr += sizeof(VkSemaphore) * totalNumWaitSemaphores;
+
+		UINT32 semaphoreIdx = 0;
+		for(UINT32 i = 0; i < numSubmits; i++)
+		{
+			const SubmitInfo& entry = mQueuedBuffers[i];
+
+			VulkanSemaphore* signalSemaphore = entry.cmdBuffer->getSemaphore();
+			commandBuffers[i] = entry.cmdBuffer->getHandle();
+			signalSemaphores[i] = signalSemaphore->getHandle();
+
+			UINT32 semaphoresCount = entry.numSemaphores;
+			prepareSemaphores(&mQueuedSemaphores[semaphoreIdx], &waitSemaphores[semaphoreIdx], semaphoresCount);
+
+			getSubmitInfo(&commandBuffers[i], &signalSemaphores[i], &waitSemaphores[semaphoreIdx], semaphoresCount, 
+				submitInfos[i]);
+
+			entry.cmdBuffer->setIsSubmitted();
+			mLastCommandBuffer = entry.cmdBuffer; // Needs to be set because getSubmitInfo depends on it
+			mLastCBSemaphoreUsed = false;
+
+			mActiveBuffers.push_back(SubmitInfo(entry.cmdBuffer, mNextSubmitIdx++, semaphoresCount));
+			semaphoreIdx += semaphoresCount;
+		}
+
+		VkResult result = vkQueueSubmit(mQueue, 1, submitInfos, mLastCommandBuffer->getFence());
+		assert(result == VK_SUCCESS);
+
+		mQueuedBuffers.clear();
+		mQueuedSemaphores.clear();
+
+		bs_stack_free(data);
+	}
+
+	void VulkanQueue::getSubmitInfo(VkCommandBuffer* cmdBuffer, VkSemaphore* signalSemaphore, VkSemaphore* waitSemaphores,
+									UINT32 semaphoresCount, VkSubmitInfo& submitInfo)
+	{
 		submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
 		submitInfo.pNext = nullptr;
 		submitInfo.commandBufferCount = 1;
-		submitInfo.pCommandBuffers = &vkCmdBuffer;
+		submitInfo.pCommandBuffers = cmdBuffer;
 		submitInfo.signalSemaphoreCount = 1;
-		submitInfo.pSignalSemaphores = &vkSemaphore;
+		submitInfo.pSignalSemaphores = signalSemaphore;
 		submitInfo.waitSemaphoreCount = semaphoresCount;
 
 		if (semaphoresCount > 0)
 		{
-			for (UINT32 i = 0; i < semaphoresCount; i++)
-				mSemaphoresTemp[i] = waitSemaphores[i]->getHandle();
-
-			submitInfo.pWaitSemaphores = mSemaphoresTemp;
+			submitInfo.pWaitSemaphores = waitSemaphores;
 			submitInfo.pWaitDstStageMask = mSubmitDstWaitMask;
 		}
 		else
@@ -50,22 +129,6 @@ namespace bs
 			submitInfo.pWaitSemaphores = nullptr;
 			submitInfo.pWaitDstStageMask = nullptr;
 		}
-
-		VkResult result = vkQueueSubmit(mQueue, 1, &submitInfo, cmdBuffer->getFence());
-		assert(result == VK_SUCCESS);
-
-		cmdBuffer->setIsSubmitted();
-		mLastCommandBuffer = cmdBuffer;
-
-		for (UINT32 i = 0; i < semaphoresCount; i++)
-		{
-			waitSemaphores[i]->notifyBound();
-			waitSemaphores[i]->notifyUsed(0, 0, VulkanUseFlag::Read | VulkanUseFlag::Write);
-
-			mActiveSemaphores.push(waitSemaphores[i]);
-		}
-
-		mActiveBuffers.push_back(SubmitInfo(cmdBuffer, mNextSubmitIdx++, semaphoresCount));
 	}
 
 	void VulkanQueue::present(VulkanSwapChain* swapChain, VulkanSemaphore** waitSemaphores, UINT32 semaphoresCount)
@@ -74,8 +137,9 @@ namespace bs
 		if (!swapChain->prepareForPresent(backBufferIdx))
 			return; // Nothing to present (back buffer wasn't even acquired)
 
-		VkSwapchainKHR vkSwapChain = swapChain->getHandle();
+		prepareSemaphores(waitSemaphores, mSemaphoresTemp, semaphoresCount);
 
+		VkSwapchainKHR vkSwapChain = swapChain->getHandle();
 		VkPresentInfoKHR presentInfo;
 		presentInfo.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR;
 		presentInfo.pNext = nullptr;
@@ -87,9 +151,6 @@ namespace bs
 		// Wait before presenting, if required
 		if (semaphoresCount > 0)
 		{
-			for (UINT32 i = 0; i < semaphoresCount; i++)
-				mSemaphoresTemp[i] = waitSemaphores[i]->getHandle();
-
 			presentInfo.pWaitSemaphores = mSemaphoresTemp;
 			presentInfo.waitSemaphoreCount = semaphoresCount;
 		}
@@ -102,14 +163,6 @@ namespace bs
 		VkResult result = vkQueuePresentKHR(mQueue, &presentInfo);
 		assert(result == VK_SUCCESS || result == VK_SUBOPTIMAL_KHR);
 
-		for (UINT32 i = 0; i < semaphoresCount; i++)
-		{
-			waitSemaphores[i]->notifyBound();
-			waitSemaphores[i]->notifyUsed(0, 0, VulkanUseFlag::Read | VulkanUseFlag::Write);
-
-			mActiveSemaphores.push(waitSemaphores[i]);
-		}
-
 		mActiveBuffers.push_back(SubmitInfo(nullptr, mNextSubmitIdx++, semaphoresCount));
 	}
 
@@ -164,4 +217,38 @@ namespace bs
 			iter = mActiveBuffers.erase(iter);
 		}
 	}
+
+	void VulkanQueue::prepareSemaphores(VulkanSemaphore** inSemaphores, VkSemaphore* outSemaphores, UINT32& semaphoresCount)
+	{
+		UINT32 semaphoreIdx = 0;
+		for (UINT32 i = 0; i < semaphoresCount; i++)
+		{
+			VulkanSemaphore* semaphore = inSemaphores[i];
+
+			semaphore->notifyBound();
+			semaphore->notifyUsed(0, 0, VulkanUseFlag::Read | VulkanUseFlag::Write);
+
+			outSemaphores[semaphoreIdx++] = semaphore->getHandle();
+			mActiveSemaphores.push(semaphore);
+		}
+
+		// Wait on previous CB, as we want execution to proceed in order
+		if (mLastCommandBuffer != nullptr && mLastCommandBuffer->isSubmitted() && !mLastCBSemaphoreUsed)
+		{
+			VulkanSemaphore* prevSemaphore = mLastCommandBuffer->getSemaphore();
+
+			prevSemaphore->notifyBound();
+			prevSemaphore->notifyUsed(0, 0, VulkanUseFlag::Read | VulkanUseFlag::Write);
+
+			outSemaphores[semaphoreIdx++] = prevSemaphore->getHandle();
+			mActiveSemaphores.push(prevSemaphore);
+
+			// This will prevent command buffers submitted after present() to use the same semaphore. This also means that
+			// there will be no intra-queue dependencies between commands for on the other ends of a present call
+			// (Meaning those queue submissions could execute concurrently).
+			mLastCBSemaphoreUsed = true;
+		}
+
+		semaphoresCount = semaphoreIdx;
+	}
 }