Jelajahi Sumber

Added special Vulkan command buffers for transfer operations

BearishSun 9 tahun lalu
induk
melakukan
59db12dbc5

+ 6 - 0
Source/BansheeCore/Include/BsCommandBuffer.h

@@ -23,9 +23,15 @@ namespace BansheeEngine
 		/** Returns a combined mask that contains all the required dependencies. */
 		UINT32 getMask() const { return mMask; }
 
+		/** Uses the queue type and index to generate a mask with a bit set for that queue's global index. */
+		static UINT32 getGlobalQueueMask(GpuQueueType type, UINT32 queueIdx);
+
 		/** Uses the queue type and index to generate a global queue index. */
 		static UINT32 getGlobalQueueIdx(GpuQueueType type, UINT32 queueIdx);
 
+		/** Uses the global queue index to retrieve local queue index and queue type. */
+		static UINT32 getQueueIdxAndType(UINT32 globalQueueIdx, GpuQueueType& type);
+
 	private:
 		UINT32 mMask = 0;
 	};

+ 0 - 3
Source/BansheeCore/Include/BsCorePrerequisites.h

@@ -170,9 +170,6 @@
 #define BS_MAX_MULTIPLE_RENDER_TARGETS 8
 #define BS_FORCE_SINGLETHREADED_RENDERING 0
 
-/** Maximum number of CommandBuffer%s that may exist at once. */
-#define BS_MAX_COMMAND_BUFFERS 32 // Should be higher than BS_MAX_QUEUES_PER_TYPE * VQT_COUNT, and fit within 4 bytes
-
 /** Maximum number of individual GPU queues, per type. */
 #define BS_MAX_QUEUES_PER_TYPE 8
 

+ 33 - 2
Source/BansheeCore/Source/BsCommandBuffer.cpp

@@ -10,10 +10,10 @@ namespace BansheeEngine
 		if (buffer == nullptr)
 			return;
 
-		mMask |= getGlobalQueueIdx(buffer->getType(), buffer->getQueueIdx());
+		mMask |= getGlobalQueueMask(buffer->getType(), buffer->getQueueIdx());
 	}
 
-	UINT32 CommandSyncMask::getGlobalQueueIdx(GpuQueueType type, UINT32 queueIdx)
+	UINT32 CommandSyncMask::getGlobalQueueMask(GpuQueueType type, UINT32 queueIdx)
 	{
 		UINT32 bitShift = 0;
 		switch (type)
@@ -33,6 +33,37 @@ namespace BansheeEngine
 		return (1 << queueIdx) << bitShift;
 	}
 
+	UINT32 CommandSyncMask::getGlobalQueueIdx(GpuQueueType type, UINT32 queueIdx)
+	{
+		switch (type)
+		{
+		case GQT_COMPUTE:
+			return 8 + queueIdx;
+		case GQT_UPLOAD:
+			return 16 + queueIdx;
+		default:
+			return queueIdx;
+		}
+	}
+
+	UINT32 CommandSyncMask::getQueueIdxAndType(UINT32 globalQueueIdx, GpuQueueType& type)
+	{
+		if(globalQueueIdx >= 16)
+		{
+			type = GQT_UPLOAD;
+			return globalQueueIdx - 16;
+		}
+
+		if(globalQueueIdx >= 8)
+		{
+			type = GQT_COMPUTE;
+			return globalQueueIdx - 8;
+		}
+
+		type = GQT_GRAPHICS;
+		return globalQueueIdx;
+	}
+
 	CommandBuffer::CommandBuffer(GpuQueueType type, UINT32 deviceIdx, UINT32 queueIdx, bool secondary)
 		:mType(type), mDeviceIdx(deviceIdx), mQueueIdx(queueIdx), mIsSecondary(secondary)
 	{

+ 2 - 2
Source/BansheeVulkanRenderAPI/Include/BsVulkanCommandBuffer.h

@@ -209,7 +209,7 @@ namespace BansheeEngine
 		UnorderedMap<VulkanResource*, BufferInfo> mBuffers;
 		UINT32 mGlobalQueueIdx;
 
-		VkSemaphore mSemaphoresTemp[BS_MAX_COMMAND_BUFFERS + 1]; // +1 for present semaphore
+		VkSemaphore mSemaphoresTemp[BS_MAX_UNIQUE_QUEUES + 1]; // +1 for present semaphore
 		UnorderedMap<UINT32, TransitionInfo> mTransitionInfoTemp;
 	};
 
@@ -249,7 +249,7 @@ namespace BansheeEngine
 		VulkanQueue* mQueue;
 		UINT32 mIdMask;
 
-		VkSemaphore mSemaphoresTemp[BS_MAX_COMMAND_BUFFERS];
+		VkSemaphore mSemaphoresTemp[BS_MAX_UNIQUE_QUEUES];
 		UnorderedMap<UINT32, TransitionInfo> mTransitionInfoTemp;
 	};
 

+ 37 - 1
Source/BansheeVulkanRenderAPI/Include/BsVulkanCommandBufferManager.h

@@ -12,6 +12,31 @@ namespace BansheeEngine
 	 *  @{
 	 */
 
+	/** Wrapper around a command buffer used specifically for transfer operations. */
+	class VulkanTransferBufferInfo
+	{
+	public:
+		VulkanTransferBufferInfo(UINT32 queueIdx);
+
+		/** 
+		 * OR's the provided sync mask with the internal sync mask. The sync mask determines on which queues should
+		 * the buffer wait on before executing. See CommandSyncMask.
+		 */
+		void appendMask(UINT32 syncMask) { mSyncMask |= syncMask; }
+
+		/** Resets the sync mask. */
+		void clearMask() { mSyncMask = 0; }
+
+		/** Returns the internal command buffer. */
+		VulkanCmdBuffer* getCB() const { return mCB; }
+	private:
+		friend class VulkanCommandBufferManager;
+
+		VulkanCmdBuffer* mCB;
+		UINT32 mSyncMask;
+		UINT32 mQueueIdx;
+	};
+
 	/** 
 	 * Handles creation of Vulkan command buffers. See CommandBuffer. 
 	 *
@@ -48,11 +73,22 @@ namespace BansheeEngine
 		 */
 		void refreshStates(UINT32 deviceIdx);
 
+		/** 
+		 * Returns an command buffer that can be used for executing transfer operations on the specified queue. 
+		 * Transfer buffers are automatically flushed (submitted) whenever a new (normal) command buffer is about to
+		 * execute.
+		 */
+		VulkanTransferBufferInfo* getTransferBuffer(UINT32 deviceIdx, GpuQueueType type, UINT32 queueIdx);
+
+		/** Submits all transfer command buffers, ensuring all queued transfer operations get executed. */
+		void flushTransferBuffers(UINT32 deviceIdx);
+
 	private:
 		/** Contains command buffers specific to one device. */
 		struct PerDeviceData
 		{
-			VulkanCmdBuffer* buffers[BS_MAX_COMMAND_BUFFERS];
+			VulkanCmdBuffer* activeBuffers[BS_MAX_UNIQUE_QUEUES];
+			VulkanTransferBufferInfo transferBuffers[BS_MAX_UNIQUE_QUEUES];
 		};
 
 		const VulkanRenderAPI& mRapi;

+ 3 - 0
Source/BansheeVulkanRenderAPI/Include/BsVulkanPrerequisites.h

@@ -15,6 +15,9 @@
 
 #define BS_NUM_BACK_BUFFERS 1
 
+/** Maximum number of GPU queues that may exist at once. */
+#define BS_MAX_UNIQUE_QUEUES BS_MAX_QUEUES_PER_TYPE * BansheeEngine::GQT_COUNT // Must fit within 4 bytes
+
 #include "vulkan/vulkan.h"
 
 /** @addtogroup Plugins

+ 1 - 1
Source/BansheeVulkanRenderAPI/Include/Win32/BsWin32RenderWindow.h

@@ -116,7 +116,7 @@ namespace BansheeEngine
 		VkFormat mDepthFormat;
 		UINT32 mPresentQueueFamily;
 		SPtr<VulkanSwapChain> mSwapChain;
-		VkSemaphore mSemaphoresTemp[BS_MAX_COMMAND_BUFFERS];
+		VkSemaphore mSemaphoresTemp[BS_MAX_UNIQUE_QUEUES];
 		bool mRequiresNewBackBuffer;
 
 		Win32RenderWindowProperties mProperties;

+ 1 - 2
Source/BansheeVulkanRenderAPI/Source/BsVulkanCommandBuffer.cpp

@@ -368,8 +368,7 @@ namespace BansheeEngine
 				break;
 			}
 
-			UINT32 otherGlobalQueueIdx = CommandSyncMask::getGlobalQueueIdx(otherQueueType, otherQueueIdx);
-			syncMask |= otherGlobalQueueIdx;
+			syncMask |= CommandSyncMask::getGlobalQueueMask(otherQueueType, otherQueueIdx);
 
 			cmdBuffer->end();
 			cmdBuffer->submit(otherQueue, otherQueueIdx, 0);

+ 73 - 11
Source/BansheeVulkanRenderAPI/Source/BsVulkanCommandBufferManager.cpp

@@ -7,12 +7,19 @@
 
 namespace BansheeEngine
 {
+	VulkanTransferBufferInfo::VulkanTransferBufferInfo(UINT32 queueIdx)
+		:mCB(nullptr), mSyncMask(0), mQueueIdx(queueIdx)
+	{ }
+
 	VulkanCommandBufferManager::VulkanCommandBufferManager(const VulkanRenderAPI& rapi)
 		:mRapi(rapi), mDeviceData(nullptr), mNumDevices(rapi.getNumDevices())
 	{
 		mDeviceData = bs_newN<PerDeviceData>(mNumDevices);
-		for(UINT32 i = 0; i < mNumDevices; i++)
-			memset(mDeviceData[i].buffers, 0, BS_MAX_COMMAND_BUFFERS * sizeof(VulkanCmdBuffer*));
+		for (UINT32 i = 0; i < mNumDevices; i++)
+		{
+			bs_zero_out(mDeviceData[i].activeBuffers);
+			bs_zero_out(mDeviceData[i].transferBuffers);
+		}
 	}
 
 	VulkanCommandBufferManager::~VulkanCommandBufferManager()
@@ -47,7 +54,7 @@ namespace BansheeEngine
 		assert(buffer->isSubmitted());
 
 		UINT32 idx = CommandSyncMask::getGlobalQueueIdx(type, queueIdx);
-		mDeviceData[deviceIdx].buffers[idx] = buffer;
+		mDeviceData[deviceIdx].activeBuffers[idx] = buffer;
 	}
 
 	void VulkanCommandBufferManager::getSyncSemaphores(UINT32 deviceIdx, UINT32 syncMask, VkSemaphore* semaphores, 
@@ -57,16 +64,16 @@ namespace BansheeEngine
 		const PerDeviceData& deviceData = mDeviceData[deviceIdx];
 
 		UINT32 semaphoreIdx = 0;
-		for (UINT32 i = 0; i < BS_MAX_COMMAND_BUFFERS; i++)
+		for (UINT32 i = 0; i < BS_MAX_UNIQUE_QUEUES; i++)
 		{
-			if (deviceData.buffers[i] == nullptr)
+			if (deviceData.activeBuffers[i] == nullptr)
 				continue;
 
 			if ((syncMask & (1 << i)) == 0) // We don't care about the command buffer
 				continue;
 
-			assert(deviceData.buffers[i]->isSubmitted()); // It shouldn't be here if it wasn't submitted
-			semaphores[semaphoreIdx++] = deviceData.buffers[i]->getSemaphore();
+			assert(deviceData.activeBuffers[i]->isSubmitted()); // It shouldn't be here if it wasn't submitted
+			semaphores[semaphoreIdx++] = deviceData.activeBuffers[i]->getSemaphore();
 		}
 
 		count = semaphoreIdx;
@@ -78,15 +85,70 @@ namespace BansheeEngine
 		PerDeviceData& deviceData = mDeviceData[deviceIdx];
 
 		UINT32 semaphoreIdx = 0;
-		for (UINT32 i = 0; i < BS_MAX_COMMAND_BUFFERS; i++)
+		for (UINT32 i = 0; i < BS_MAX_UNIQUE_QUEUES; i++)
 		{
-			if (deviceData.buffers[i] == nullptr)
+			if (deviceData.activeBuffers[i] == nullptr)
 				continue;
 
-			VulkanCmdBuffer* cmdBuffer = deviceData.buffers[i];
+			VulkanCmdBuffer* cmdBuffer = deviceData.activeBuffers[i];
 			cmdBuffer->refreshFenceStatus();
 			if (!cmdBuffer->isSubmitted())
-				deviceData.buffers[i] = nullptr;
+				deviceData.activeBuffers[i] = nullptr;
+		}
+	}
+
+	VulkanTransferBufferInfo* VulkanCommandBufferManager::getTransferBuffer(UINT32 deviceIdx, GpuQueueType type, 
+		UINT32 queueIdx)
+	{
+		assert(deviceIdx < mNumDevices);
+
+		UINT32 globalQueueIdx = CommandSyncMask::getGlobalQueueIdx(type, queueIdx);
+		assert(globalQueueIdx < BS_MAX_UNIQUE_QUEUES);
+
+		PerDeviceData& deviceData = mDeviceData[deviceIdx];
+		if (deviceData.transferBuffers[globalQueueIdx].mCB == nullptr)
+		{
+			SPtr<VulkanDevice> device = mRapi._getDevice(deviceIdx);
+
+			UINT32 queueFamily = device->getQueueFamily(type);
+			deviceData.transferBuffers[globalQueueIdx].mCB = device->getCmdBufferPool().getBuffer(queueFamily, false);
+		}
+
+		return &deviceData.transferBuffers[globalQueueIdx];
+	}
+
+	void VulkanCommandBufferManager::flushTransferBuffers(UINT32 deviceIdx)
+	{
+		assert(deviceIdx < mNumDevices);
+
+		SPtr<VulkanDevice> device = mRapi._getDevice(deviceIdx);
+		PerDeviceData& deviceData = mDeviceData[deviceIdx];
+
+		UINT32 transferBufferIdx = 0;
+		for(UINT32 i = 0; i < GQT_COUNT; i++)
+		{
+			GpuQueueType queueType = (GpuQueueType)i;
+			UINT32 numQueues = device->getNumQueues(queueType);
+			if (numQueues == 0)
+			{
+				queueType = GQT_GRAPHICS;
+				numQueues = device->getNumQueues(GQT_GRAPHICS);
+			}
+
+			for(UINT32 j = 0; j < BS_MAX_QUEUES_PER_TYPE; j++)
+			{
+				VulkanTransferBufferInfo& bufferInfo = deviceData.transferBuffers[transferBufferIdx];
+				if (bufferInfo.mCB == nullptr)
+					continue;
+
+				UINT32 physicalQueueIdx = j % numQueues;
+				VulkanQueue* queue = device->getQueue(queueType, physicalQueueIdx);
+
+				bufferInfo.mCB->submit(queue, bufferInfo.mQueueIdx, bufferInfo.mSyncMask);
+				bufferInfo.mCB = nullptr;
+
+				transferBufferIdx++;
+			}
 		}
 	}
 }

+ 1 - 1
Source/BansheeVulkanRenderAPI/Source/BsVulkanDevice.cpp

@@ -148,7 +148,7 @@ namespace BansheeEngine
 		UINT32 curIdx = queueIdx;
 		while (curIdx < BS_MAX_QUEUES_PER_TYPE)
 		{
-			idMask |= CommandSyncMask::getGlobalQueueIdx(type, curIdx);
+			idMask |= CommandSyncMask::getGlobalQueueMask(type, curIdx);
 			curIdx += numQueues;
 		}
 

+ 17 - 14
Source/BansheeVulkanRenderAPI/Source/BsVulkanHardwareBuffer.cpp

@@ -153,21 +153,18 @@ namespace BansheeEngine
 		bool needRead = options == GBL_READ_WRITE || options == GBL_READ_ONLY;
 		if(needRead)
 		{
-			// TODO - Get command buffer on wanted queue
-			//      - Generate sync mask depending on where the resource is used on
-			//      - Issue copy from source buffer to staging buffer, with sync mask semaphores
-			//      - Wait for queue to complete, refresh CB states
+			// TODO - Get command buffer on wanted queue (getTransferBuffer(deviceIdx, queueIdx))
+			//      - Generate sync mask depending on where the resource is used on (VulkanResource::getUseInfo())
+			//      - Register this buffer and staging buffer with the transfer buffer, updating the transfer buffer's sync mask
+			//      - Flush the transfer buffer, wait for it to complete, and refresh CB states
 			//      - Proceed below
 		}
 
 		// TODO - Return staging buffer->map()
 		//      - Set mRequiresUpload field to true
-
-
-		// TODO - Special
-		//      - Keep a list of upload command buffers per queue to avoid allocating them
-		//        - Submit and clear all upload command buffers whenever new command buffer is submitted
-		//      - Can I easily determine sync mask of which buffers a resource is used on from VulkanResource?
+		//      - Remember lock mode
+		//      - Remember staging buffer
+		//      - Remember lock queue and device
 
 		switch (options)
 		{
@@ -194,10 +191,16 @@ namespace BansheeEngine
 	void VulkanHardwareBuffer::unmap()
 	{
 		// TODO - If direct map (mRequiresUpload == false), simply unmap
-		// TODO - If mRequiresUpload is true, queue copyBuffer command
-		//      - If lock was discard, don't issue any write semaphores and instead create a brand new internal buffer (destroy old one)
-		//      - If lock was no overwrite, don't issue any write semaphores but write to the buffer
-		//      - Otherwise, wait until resource is not used before issuing write
+		// TODO - If mRequiresUpload is true
+		//      - Get command buffer on locked queue and device
+		//      - If lock was discard
+		//        - Create a brand new internal buffer
+		//        - Call destroy on the old one
+		//        - Issue copy on the CB without a sync mask (register both resources on CB)
+		//      - If lock was no overwrite
+		//        - Issue copy on the CB without a sync mask (register both resources on CB)
+		//      - Otherwise issue copy with a sync mask depending on current use flags
+		//      - Destroy staging buffer
 	}
 
 	void VulkanHardwareBuffer::copyData(HardwareBuffer& srcBuffer, UINT32 srcOffset,

+ 5 - 0
Source/BansheeVulkanRenderAPI/Source/BsVulkanRenderAPI.cpp

@@ -474,7 +474,12 @@ namespace BansheeEngine
 		if (commandBuffer == nullptr)
 			return;
 
+		// Submit all transfer buffers first
 		VulkanCommandBuffer& cmdBuffer = static_cast<VulkanCommandBuffer&>(*commandBuffer);
+
+		VulkanCommandBufferManager& cbm = static_cast<VulkanCommandBufferManager&>(CommandBufferManager::instance());
+		cbm.flushTransferBuffers(cmdBuffer.getDeviceIdx());
+
 		cmdBuffer.submit(syncMask);
 	}