Explorar el Código

Add support for async compute in the graphics backend

Panagiotis Christopoulos Charitos hace 4 años
padre
commit
e47c97577b

+ 4 - 7
AnKi/Gr/CommandBuffer.h

@@ -94,14 +94,11 @@ enum class CommandBufferFlag : U8
 	/// It will contain a handfull of commands.
 	SMALL_BATCH = 1 << 3,
 
-	/// Will contain graphics work.
-	GRAPHICS_WORK = 1 << 4,
+	/// Will contain graphics, compute and transfer work.
+	GENERAL_WORK = 1 << 4,
 
-	/// Will contain transfer commands.
-	TRANSFER_WORK = 1 << 5,
-
-	/// Will contain compute work.
-	COMPUTE_WORK = 1 << 6,
+	/// Will contain only compute work. It binds to async compute queues.
+	COMPUTE_WORK = 1 << 5,
 };
 ANKI_ENUM_ALLOW_NUMERIC_OPERATIONS(CommandBufferFlag)
 

+ 2 - 2
AnKi/Gr/RenderGraph.cpp

@@ -844,7 +844,7 @@ void RenderGraph::initBatches()
 		if(m_ctx->m_graphicsCmdbs.isEmpty() || drawsToPresentable)
 		{
 			CommandBufferInitInfo cmdbInit;
-			cmdbInit.m_flags = CommandBufferFlag::COMPUTE_WORK | CommandBufferFlag::GRAPHICS_WORK;
+			cmdbInit.m_flags = CommandBufferFlag::GENERAL_WORK;
 			CommandBufferPtr cmdb = getManager().newCommandBuffer(cmdbInit);
 
 			m_ctx->m_graphicsCmdbs.emplaceBack(m_ctx->m_alloc, cmdb);
@@ -923,7 +923,7 @@ void RenderGraph::initGraphicsPasses(const RenderGraphDescription& descr, StackA
 				{
 					outPass.m_secondLevelCmdbs.create(alloc, inPass.m_secondLevelCmdbsCount);
 					CommandBufferInitInfo& cmdbInit = outPass.m_secondLevelCmdbInitInfo;
-					cmdbInit.m_flags = CommandBufferFlag::GRAPHICS_WORK | CommandBufferFlag::SECOND_LEVEL;
+					cmdbInit.m_flags = CommandBufferFlag::GENERAL_WORK | CommandBufferFlag::SECOND_LEVEL;
 					ANKI_ASSERT(cmdbInit.m_framebuffer.isCreated());
 					cmdbInit.m_colorAttachmentUsages = outPass.m_colorUsages;
 					cmdbInit.m_depthStencilAttachmentUsage = outPass.m_dsUsage;

+ 3 - 4
AnKi/Gr/Vulkan/BufferImpl.cpp

@@ -49,10 +49,9 @@ Error BufferImpl::init(const BufferInitInfo& inf)
 	{
 		ci.usage |= VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT;
 	}
-	ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
-	ci.queueFamilyIndexCount = 1;
-	const U32 queueIdx = getGrManagerImpl().getGraphicsQueueFamily();
-	ci.pQueueFamilyIndices = &queueIdx;
+	ci.sharingMode = VK_SHARING_MODE_CONCURRENT;
+	ci.queueFamilyIndexCount = getGrManagerImpl().getQueueFamilies().getSize();
+	ci.pQueueFamilyIndices = &getGrManagerImpl().getQueueFamilies()[0];
 	ANKI_VK_CHECK(vkCreateBuffer(getDevice(), &ci, nullptr, &m_handle));
 	getGrManagerImpl().trySetVulkanHandleName(inf.getName(), VK_DEBUG_REPORT_OBJECT_TYPE_BUFFER_EXT, m_handle);
 

+ 30 - 21
AnKi/Gr/Vulkan/CommandBufferFactory.cpp

@@ -15,7 +15,8 @@ void MicroCommandBuffer::destroy()
 
 	if(m_handle)
 	{
-		vkFreeCommandBuffers(m_threadAlloc->m_factory->m_dev, m_threadAlloc->m_pool, 1, &m_handle);
+		vkFreeCommandBuffers(m_threadAlloc->m_factory->m_dev,
+							 m_threadAlloc->m_pools[getQueueTypeFromCommandBufferFlags(m_flags)], 1, &m_handle);
 		m_handle = {};
 	}
 }
@@ -39,12 +40,15 @@ void MicroCommandBuffer::reset()
 
 Error CommandBufferThreadAllocator::init()
 {
-	VkCommandPoolCreateInfo ci = {};
-	ci.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
-	ci.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT;
-	ci.queueFamilyIndex = m_factory->m_queueFamily;
+	for(QueueType qtype : EnumIterable<QueueType>())
+	{
+		VkCommandPoolCreateInfo ci = {};
+		ci.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
+		ci.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT;
+		ci.queueFamilyIndex = m_factory->m_queueFamilies[qtype];
 
-	ANKI_VK_CHECK(vkCreateCommandPool(m_factory->m_dev, &ci, nullptr, &m_pool));
+		ANKI_VK_CHECK(vkCreateCommandPool(m_factory->m_dev, &ci, nullptr, &m_pools[qtype]));
+	}
 
 	return Error::NONE;
 }
@@ -68,21 +72,27 @@ void CommandBufferThreadAllocator::destroyLists()
 	{
 		for(U j = 0; j < 2; ++j)
 		{
-			CmdbType& type = m_types[i][j];
+			for(QueueType qtype : EnumIterable<QueueType>())
+			{
+				CmdbType& type = m_types[i][j][qtype];
 
-			destroyList(type.m_deletedCmdbs);
-			destroyList(type.m_readyCmdbs);
-			destroyList(type.m_inUseCmdbs);
+				destroyList(type.m_deletedCmdbs);
+				destroyList(type.m_readyCmdbs);
+				destroyList(type.m_inUseCmdbs);
+			}
 		}
 	}
 }
 
 void CommandBufferThreadAllocator::destroy()
 {
-	if(m_pool)
+	for(VkCommandPool pool : m_pools)
 	{
-		vkDestroyCommandPool(m_factory->m_dev, m_pool, nullptr);
-		m_pool = {};
+		if(pool)
+		{
+			vkDestroyCommandPool(m_factory->m_dev, pool, nullptr);
+			pool = {};
+		}
 	}
 
 	ANKI_ASSERT(m_createdCmdbs.load() == 0 && "Someone still holds references to command buffers");
@@ -91,12 +101,12 @@ void CommandBufferThreadAllocator::destroy()
 Error CommandBufferThreadAllocator::newCommandBuffer(CommandBufferFlag cmdbFlags, MicroCommandBufferPtr& outPtr,
 													 Bool& createdNew)
 {
-	cmdbFlags = cmdbFlags & (CommandBufferFlag::SECOND_LEVEL | CommandBufferFlag::SMALL_BATCH);
+	ANKI_ASSERT(!!(cmdbFlags & CommandBufferFlag::COMPUTE_WORK) ^ !!(cmdbFlags & CommandBufferFlag::GENERAL_WORK));
 	createdNew = false;
 
 	const Bool secondLevel = !!(cmdbFlags & CommandBufferFlag::SECOND_LEVEL);
 	const Bool smallBatch = !!(cmdbFlags & CommandBufferFlag::SMALL_BATCH);
-	CmdbType& type = m_types[secondLevel][smallBatch];
+	CmdbType& type = m_types[secondLevel][smallBatch][getQueueTypeFromCommandBufferFlags(cmdbFlags)];
 
 	// Move the deleted to (possibly) in-use or ready
 	{
@@ -177,7 +187,7 @@ Error CommandBufferThreadAllocator::newCommandBuffer(CommandBufferFlag cmdbFlags
 
 		VkCommandBufferAllocateInfo ci = {};
 		ci.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
-		ci.commandPool = m_pool;
+		ci.commandPool = m_pools[getQueueTypeFromCommandBufferFlags(cmdbFlags)];
 		ci.level = (secondLevel) ? VK_COMMAND_BUFFER_LEVEL_SECONDARY : VK_COMMAND_BUFFER_LEVEL_PRIMARY;
 		ci.commandBufferCount = 1;
 
@@ -208,8 +218,7 @@ Error CommandBufferThreadAllocator::newCommandBuffer(CommandBufferFlag cmdbFlags
 	}
 
 	ANKI_ASSERT(out && out->m_refcount.load() == 0);
-	ANKI_ASSERT(!!(out->m_flags & CommandBufferFlag::SECOND_LEVEL) == secondLevel);
-	ANKI_ASSERT(!!(out->m_flags & CommandBufferFlag::SMALL_BATCH) == smallBatch);
+	ANKI_ASSERT(out->m_flags == cmdbFlags);
 	outPtr.reset(out);
 	return Error::NONE;
 }
@@ -221,19 +230,19 @@ void CommandBufferThreadAllocator::deleteCommandBuffer(MicroCommandBuffer* ptr)
 	const Bool secondLevel = !!(ptr->m_flags & CommandBufferFlag::SECOND_LEVEL);
 	const Bool smallBatch = !!(ptr->m_flags & CommandBufferFlag::SMALL_BATCH);
 
-	CmdbType& type = m_types[secondLevel][smallBatch];
+	CmdbType& type = m_types[secondLevel][smallBatch][getQueueTypeFromCommandBufferFlags(ptr->m_flags)];
 
 	LockGuard<Mutex> lock(type.m_deletedMtx);
 	type.m_deletedCmdbs.pushBack(ptr);
 }
 
-Error CommandBufferFactory::init(GrAllocator<U8> alloc, VkDevice dev, uint32_t queueFamily)
+Error CommandBufferFactory::init(GrAllocator<U8> alloc, VkDevice dev, Array<U32, U(QueueType::COUNT)> queueFamilies)
 {
 	ANKI_ASSERT(dev);
 
 	m_alloc = alloc;
 	m_dev = dev;
-	m_queueFamily = queueFamily;
+	m_queueFamilies = queueFamilies;
 	return Error::NONE;
 }
 

+ 15 - 4
AnKi/Gr/Vulkan/CommandBufferFactory.h

@@ -19,6 +19,12 @@ class CommandBufferThreadAllocator;
 /// @addtogroup vulkan
 /// @{
 
+inline QueueType getQueueTypeFromCommandBufferFlags(CommandBufferFlag flags)
+{
+	ANKI_ASSERT(!!(flags & CommandBufferFlag::GENERAL_WORK) ^ !!(flags & CommandBufferFlag::COMPUTE_WORK));
+	return !!(flags & CommandBufferFlag::GENERAL_WORK) ? QueueType::GENERAL : QueueType::COMPUTE;
+}
+
 class MicroCommandBuffer : public IntrusiveListEnabled<MicroCommandBuffer>
 {
 	friend class CommandBufferThreadAllocator;
@@ -62,6 +68,11 @@ public:
 		m_fence = fence;
 	}
 
+	CommandBufferFlag getFlags() const
+	{
+		return m_flags;
+	}
+
 private:
 	static constexpr U32 MAX_REF_OBJECT_SEARCH = 16;
 
@@ -150,7 +161,7 @@ public:
 private:
 	CommandBufferFactory* m_factory;
 	ThreadId m_tid;
-	VkCommandPool m_pool = VK_NULL_HANDLE;
+	Array<VkCommandPool, U(QueueType::COUNT)> m_pools = {};
 
 	class CmdbType
 	{
@@ -166,7 +177,7 @@ private:
 	Atomic<U32> m_createdCmdbs = {0};
 #endif
 
-	Array2d<CmdbType, 2, 2> m_types;
+	Array3d<CmdbType, 2, 2, U(QueueType::COUNT)> m_types;
 
 	void destroyList(IntrusiveList<MicroCommandBuffer>& list);
 	void destroyLists();
@@ -183,7 +194,7 @@ public:
 
 	~CommandBufferFactory() = default;
 
-	ANKI_USE_RESULT Error init(GrAllocator<U8> alloc, VkDevice dev, uint32_t queueFamily);
+	ANKI_USE_RESULT Error init(GrAllocator<U8> alloc, VkDevice dev, Array<U32, U(QueueType::COUNT)> queueFamilies);
 
 	void destroy();
 
@@ -199,7 +210,7 @@ public:
 private:
 	GrAllocator<U8> m_alloc;
 	VkDevice m_dev = VK_NULL_HANDLE;
-	uint32_t m_queueFamily;
+	Array<U32, U(QueueType::COUNT)> m_queueFamilies;
 
 	DynamicArray<CommandBufferThreadAllocator*> m_threadAllocs;
 	RWMutex m_threadAllocMtx;

+ 0 - 1
AnKi/Gr/Vulkan/CommandBufferImpl.inl.h

@@ -335,7 +335,6 @@ inline void CommandBufferImpl::drawElementsIndirect(PrimitiveTopology topology,
 inline void CommandBufferImpl::dispatchCompute(U32 groupCountX, U32 groupCountY, U32 groupCountZ)
 {
 	ANKI_ASSERT(m_computeProg);
-	ANKI_ASSERT(!!(m_flags & CommandBufferFlag::COMPUTE_WORK));
 	ANKI_ASSERT(m_computeProg->getReflectionInfo().m_pushConstantsSize == m_setPushConstantsSize
 				&& "Forgot to set pushConstants");
 

+ 10 - 0
AnKi/Gr/Vulkan/Common.h

@@ -72,6 +72,16 @@ enum class VulkanExtensions : U16
 };
 ANKI_ENUM_ALLOW_NUMERIC_OPERATIONS(VulkanExtensions)
 
+enum class QueueType : U8
+{
+	GENERAL,
+	COMPUTE,
+
+	COUNT,
+	FIRST = 0
+};
+ANKI_ENUM_ALLOW_NUMERIC_OPERATIONS(QueueType)
+
 /// @name Constants
 /// @{
 const U DESCRIPTOR_POOL_INITIAL_SIZE = 64;

+ 64 - 34
AnKi/Gr/Vulkan/GrManagerImpl.cpp

@@ -29,11 +29,14 @@ GrManagerImpl::~GrManagerImpl()
 	}
 
 	// 2nd THING: wait for the GPU
-	if(m_queue)
+	for(VkQueue& queue : m_queues)
 	{
 		LockGuard<Mutex> lock(m_globalMtx);
-		vkQueueWaitIdle(m_queue);
-		m_queue = VK_NULL_HANDLE;
+		if(queue)
+		{
+			vkQueueWaitIdle(queue);
+			queue = VK_NULL_HANDLE;
+		}
 	}
 
 	m_cmdbFactory.destroy();
@@ -111,7 +114,11 @@ Error GrManagerImpl::initInternal(const GrManagerInitInfo& init)
 	ANKI_CHECK(initInstance(init));
 	ANKI_CHECK(initSurface(init));
 	ANKI_CHECK(initDevice(init));
-	vkGetDeviceQueue(m_device, m_queueIdx, 0, &m_queue);
+
+	for(QueueType qtype : EnumIterable<QueueType>())
+	{
+		vkGetDeviceQueue(m_device, m_queueFamilyIndices[qtype], 0, &m_queues[qtype]);
+	}
 
 	m_swapchainFactory.init(this, init.m_config->getBool("gr_vsync"));
 
@@ -121,7 +128,7 @@ Error GrManagerImpl::initInternal(const GrManagerInitInfo& init)
 
 	ANKI_CHECK(initMemory(*init.m_config));
 
-	ANKI_CHECK(m_cmdbFactory.init(getAllocator(), m_device, m_queueIdx));
+	ANKI_CHECK(m_cmdbFactory.init(getAllocator(), m_device, m_queueFamilyIndices));
 
 	for(PerFrame& f : m_perFrame)
 	{
@@ -476,43 +483,54 @@ Error GrManagerImpl::initDevice(const GrManagerInitInfo& init)
 	queueInfos.create(count);
 	vkGetPhysicalDeviceQueueFamilyProperties(m_physicalDevice, &count, &queueInfos[0]);
 
-	uint32_t desiredFamilyIdx = MAX_U32;
-	const VkQueueFlags DESITED_QUEUE_FLAGS = VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT;
+	const VkQueueFlags GENERAL_QUEUE_FLAGS = VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT;
 	for(U32 i = 0; i < count; ++i)
 	{
-		if((queueInfos[i].queueFlags & DESITED_QUEUE_FLAGS) == DESITED_QUEUE_FLAGS)
-		{
-			VkBool32 supportsPresent = false;
-			ANKI_VK_CHECK(vkGetPhysicalDeviceSurfaceSupportKHR(m_physicalDevice, i, m_surface, &supportsPresent));
+		VkBool32 supportsPresent = false;
+		ANKI_VK_CHECK(vkGetPhysicalDeviceSurfaceSupportKHR(m_physicalDevice, i, m_surface, &supportsPresent));
 
-			if(supportsPresent)
+		if(supportsPresent)
+		{
+			if((queueInfos[i].queueFlags & GENERAL_QUEUE_FLAGS) == GENERAL_QUEUE_FLAGS)
+			{
+				m_queueFamilyIndices[QueueType::GENERAL] = i;
+			}
+			else if((queueInfos[i].queueFlags & VK_QUEUE_COMPUTE_BIT)
+					&& !(queueInfos[i].queueFlags & VK_QUEUE_GRAPHICS_BIT))
 			{
-				desiredFamilyIdx = i;
-				break;
+				// This must be the async compute
+				m_queueFamilyIndices[QueueType::COMPUTE] = i;
 			}
 		}
 	}
 
-	if(desiredFamilyIdx == MAX_U32)
+	if(m_queueFamilyIndices[QueueType::GENERAL] == MAX_U32)
 	{
-		ANKI_VK_LOGE("Couldn't find a queue family with graphics+compute+transfer+present."
-					 "The assumption was wrong. The code needs to be reworked");
+		ANKI_VK_LOGE("Couldn't find a queue family with graphics+compute+transfer+present. "
+					 "Something is wrong");
 		return Error::FUNCTION_FAILED;
 	}
 
-	m_queueIdx = desiredFamilyIdx;
+	if(m_queueFamilyIndices[QueueType::COMPUTE] == MAX_U32)
+	{
+		ANKI_VK_LOGE("Couldn't find an async compute queue");
+		return Error::FUNCTION_FAILED;
+	}
 
-	F32 priority = 1.0;
-	VkDeviceQueueCreateInfo q = {};
-	q.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
-	q.queueFamilyIndex = desiredFamilyIdx;
-	q.queueCount = 1;
-	q.pQueuePriorities = &priority;
+	const F32 priority = 1.0;
+	Array<VkDeviceQueueCreateInfo, U32(QueueType::COUNT)> q = {};
+	for(QueueType qtype : EnumIterable<QueueType>())
+	{
+		q[qtype].sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
+		q[qtype].queueFamilyIndex = m_queueFamilyIndices[qtype];
+		q[qtype].queueCount = 1;
+		q[qtype].pQueuePriorities = &priority;
+	}
 
 	VkDeviceCreateInfo ci = {};
 	ci.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
-	ci.queueCreateInfoCount = 1;
-	ci.pQueueCreateInfos = &q;
+	ci.queueCreateInfoCount = q.getSize();
+	ci.pQueueCreateInfos = &q[0];
 
 	// Extensions
 	U32 extCount = 0;
@@ -907,7 +925,10 @@ TexturePtr GrManagerImpl::acquireNextPresentableTexture()
 	if(res == VK_ERROR_OUT_OF_DATE_KHR)
 	{
 		ANKI_VK_LOGW("Swapchain is out of date. Will wait for the queue and create a new one");
-		vkQueueWaitIdle(m_queue);
+		for(VkQueue queue : m_queues)
+		{
+			vkQueueWaitIdle(queue);
+		}
 		m_crntSwapchain = m_swapchainFactory.newInstance();
 
 		// Can't fail a second time
@@ -933,7 +954,7 @@ void GrManagerImpl::endFrame()
 	PerFrame& frame = m_perFrame[m_frame % MAX_FRAMES_IN_FLIGHT];
 
 	// Wait for the fence of N-2 frame
-	U waitFrameIdx = (m_frame + 1) % MAX_FRAMES_IN_FLIGHT;
+	const U waitFrameIdx = (m_frame + 1) % MAX_FRAMES_IN_FLIGHT;
 	PerFrame& waitFrame = m_perFrame[waitFrameIdx];
 	if(waitFrame.m_presentFence)
 	{
@@ -955,15 +976,18 @@ void GrManagerImpl::endFrame()
 	present.pWaitSemaphores = (frame.m_renderSemaphore) ? &frame.m_renderSemaphore->getHandle() : nullptr;
 	present.swapchainCount = 1;
 	present.pSwapchains = &m_crntSwapchain->m_swapchain;
-	U32 idx = m_acquiredImageIdx;
+	const U32 idx = m_acquiredImageIdx;
 	present.pImageIndices = &idx;
 	present.pResults = &res;
 
-	VkResult res1 = vkQueuePresentKHR(m_queue, &present);
+	const VkResult res1 = vkQueuePresentKHR(m_queues[frame.m_queueWroteToSwapchainImage], &present);
 	if(res1 == VK_ERROR_OUT_OF_DATE_KHR)
 	{
 		ANKI_VK_LOGW("Swapchain is out of date. Will wait for the queue and create a new one");
-		vkQueueWaitIdle(m_queue);
+		for(VkQueue queue : m_queues)
+		{
+			vkQueueWaitIdle(queue);
+		}
 		m_crntSwapchain = m_swapchainFactory.newInstance();
 	}
 	else
@@ -1075,23 +1099,29 @@ void GrManagerImpl::flushCommandBuffer(MicroCommandBufferPtr cmdb, Bool cmdbRend
 
 		// Update the swapchain's fence
 		m_crntSwapchain->setFence(fence);
+
+		frame.m_queueWroteToSwapchainImage = getQueueTypeFromCommandBufferFlags(cmdb->getFlags());
 	}
 
 	{
 		ANKI_TRACE_SCOPED_EVENT(VK_QUEUE_SUBMIT);
-		ANKI_VK_CHECKF(vkQueueSubmit(m_queue, 1, &submit, fence->getHandle()));
+		ANKI_VK_CHECKF(vkQueueSubmit(m_queues[getQueueTypeFromCommandBufferFlags(cmdb->getFlags())], 1, &submit,
+									 fence->getHandle()));
 	}
 
 	if(wait)
 	{
-		vkQueueWaitIdle(m_queue);
+		vkQueueWaitIdle(m_queues[getQueueTypeFromCommandBufferFlags(cmdb->getFlags())]);
 	}
 }
 
 void GrManagerImpl::finish()
 {
 	LockGuard<Mutex> lock(m_globalMtx);
-	vkQueueWaitIdle(m_queue);
+	for(VkQueue queue : m_queues)
+	{
+		vkQueueWaitIdle(queue);
+	}
 }
 
 void GrManagerImpl::trySetVulkanHandleName(CString name, VkDebugReportObjectTypeEXT type, U64 handle) const

+ 6 - 9
AnKi/Gr/Vulkan/GrManagerImpl.h

@@ -47,9 +47,9 @@ public:
 
 	ANKI_USE_RESULT Error init(const GrManagerInitInfo& cfg);
 
-	U32 getGraphicsQueueFamily() const
+	const Array<U32, U(QueueType::COUNT)> getQueueFamilies() const
 	{
-		return m_queueIdx;
+		return m_queueFamilyIndices;
 	}
 
 	const VkPhysicalDeviceProperties& getPhysicalDeviceProperties() const
@@ -178,11 +178,6 @@ public:
 		return m_surface;
 	}
 
-	U32 getGraphicsQueueIndex() const
-	{
-		return m_queueIdx;
-	}
-
 	/// @name Debug report
 	/// @{
 	void beginMarker(VkCommandBuffer cmdb, CString name) const
@@ -244,8 +239,8 @@ private:
 	VkPhysicalDevice m_physicalDevice = VK_NULL_HANDLE;
 	VulkanExtensions m_extensions = VulkanExtensions::NONE;
 	VkDevice m_device = VK_NULL_HANDLE;
-	U32 m_queueIdx = MAX_U32;
-	VkQueue m_queue = VK_NULL_HANDLE;
+	Array<U32, U32(QueueType::COUNT)> m_queueFamilyIndices = {MAX_U32, MAX_U32};
+	Array<VkQueue, U32(QueueType::COUNT)> m_queues = {};
 	Mutex m_globalMtx;
 
 	VkPhysicalDeviceProperties2 m_devProps = {};
@@ -276,6 +271,8 @@ private:
 
 		/// Signaled by the submit that renders to the default FB. Present waits for it.
 		MicroSemaphorePtr m_renderSemaphore;
+
+		QueueType m_queueWroteToSwapchainImage = QueueType::COUNT;
 	};
 
 	VkSurfaceKHR m_surface = VK_NULL_HANDLE;

+ 3 - 4
AnKi/Gr/Vulkan/SwapchainFactory.cpp

@@ -146,10 +146,9 @@ Error MicroSwapchain::initInternal()
 		ci.imageExtent = surfaceProperties.currentExtent;
 		ci.imageArrayLayers = 1;
 		ci.imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_STORAGE_BIT;
-		ci.imageSharingMode = VK_SHARING_MODE_EXCLUSIVE;
-		ci.queueFamilyIndexCount = 1;
-		U32 idx = m_factory->m_gr->getGraphicsQueueIndex();
-		ci.pQueueFamilyIndices = &idx;
+		ci.imageSharingMode = VK_SHARING_MODE_CONCURRENT;
+		ci.queueFamilyIndexCount = m_factory->m_gr->getQueueFamilies().getSize();
+		ci.pQueueFamilyIndices = &m_factory->m_gr->getQueueFamilies()[0];
 		ci.preTransform = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR;
 		ci.compositeAlpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR;
 		ci.presentMode = presentMode;

+ 4 - 5
AnKi/Gr/Vulkan/TextureImpl.cpp

@@ -115,7 +115,7 @@ Error TextureImpl::initInternal(VkImage externalImage, const TextureInitInfo& in
 		ANKI_ASSERT(!(init.m_initialUsage & TextureUsageBit::GENERATE_MIPMAPS) && "That doesn't make any sense");
 
 		CommandBufferInitInfo cmdbinit;
-		cmdbinit.m_flags = CommandBufferFlag::GRAPHICS_WORK | CommandBufferFlag::SMALL_BATCH;
+		cmdbinit.m_flags = CommandBufferFlag::GENERAL_WORK | CommandBufferFlag::SMALL_BATCH;
 		CommandBufferPtr cmdb = getManager().newCommandBuffer(cmdbinit);
 
 		VkImageSubresourceRange range;
@@ -299,10 +299,9 @@ Error TextureImpl::initImage(const TextureInitInfo& init_)
 	ci.samples = VK_SAMPLE_COUNT_1_BIT;
 	ci.tiling = VK_IMAGE_TILING_OPTIMAL;
 	ci.usage = convertTextureUsage(init.m_usage, init.m_format);
-	ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
-	ci.queueFamilyIndexCount = 1;
-	U32 queueIdx = getGrManagerImpl().getGraphicsQueueFamily();
-	ci.pQueueFamilyIndices = &queueIdx;
+	ci.sharingMode = VK_SHARING_MODE_CONCURRENT;
+	ci.queueFamilyIndexCount = getGrManagerImpl().getQueueFamilies().getSize();
+	ci.pQueueFamilyIndices = &getGrManagerImpl().getQueueFamilies()[0];
 	ci.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
 
 	ANKI_VK_CHECK(vkCreateImage(getDevice(), &ci, nullptr, &m_imageHandle));

+ 1 - 1
AnKi/Renderer/Renderer.cpp

@@ -440,7 +440,7 @@ TexturePtr Renderer::createAndClearRenderTarget(const TextureInitInfo& inf, cons
 
 	// Clear all surfaces
 	CommandBufferInitInfo cmdbinit;
-	cmdbinit.m_flags = (useCompute) ? CommandBufferFlag::COMPUTE_WORK : CommandBufferFlag::GRAPHICS_WORK;
+	cmdbinit.m_flags = CommandBufferFlag::GENERAL_WORK;
 	if((inf.m_mipmapCount * faceCount * inf.m_layerCount * 4) < COMMAND_BUFFER_SMALL_BATCH_MAX_COMMANDS)
 	{
 		cmdbinit.m_flags |= CommandBufferFlag::SMALL_BATCH;

+ 1 - 1
AnKi/Renderer/Tonemapping.cpp

@@ -44,7 +44,7 @@ Error Tonemapping::initInternal(const ConfigSet& initializer)
 		BufferMapAccessBit::NONE, "AvgLum"));
 
 	CommandBufferInitInfo cmdbinit;
-	cmdbinit.m_flags = CommandBufferFlag::SMALL_BATCH | CommandBufferFlag::TRANSFER_WORK;
+	cmdbinit.m_flags = CommandBufferFlag::SMALL_BATCH | CommandBufferFlag::GENERAL_WORK;
 	CommandBufferPtr cmdb = getGrManager().newCommandBuffer(cmdbinit);
 
 	TransferGpuAllocatorHandle handle;

+ 2 - 2
AnKi/Resource/MeshResource.cpp

@@ -164,7 +164,7 @@ Error MeshResource::load(const ResourceFilename& filename, Bool async)
 	if(async)
 	{
 		CommandBufferInitInfo cmdbinit;
-		cmdbinit.m_flags = CommandBufferFlag::SMALL_BATCH;
+		cmdbinit.m_flags = CommandBufferFlag::SMALL_BATCH | CommandBufferFlag::GENERAL_WORK;
 		CommandBufferPtr cmdb = getManager().getGrManager().newCommandBuffer(cmdbinit);
 
 		cmdb->fillBuffer(m_vertexBuffer, 0, MAX_PTR_SIZE, 0);
@@ -261,7 +261,7 @@ Error MeshResource::loadAsync(MeshBinaryLoader& loader) const
 	Array<TransferGpuAllocatorHandle, 2> handles;
 
 	CommandBufferInitInfo cmdbinit;
-	cmdbinit.m_flags = CommandBufferFlag::SMALL_BATCH | CommandBufferFlag::TRANSFER_WORK;
+	cmdbinit.m_flags = CommandBufferFlag::SMALL_BATCH | CommandBufferFlag::GENERAL_WORK;
 	CommandBufferPtr cmdb = gr.newCommandBuffer(cmdbinit);
 
 	// Set barriers

+ 1 - 1
AnKi/Resource/TextureResource.cpp

@@ -189,7 +189,7 @@ Error TextureResource::load(LoadingContext& ctx)
 		const U32 end = min(copyCount, b + MAX_COPIES_BEFORE_FLUSH);
 
 		CommandBufferInitInfo ci;
-		ci.m_flags = CommandBufferFlag::TRANSFER_WORK | CommandBufferFlag::SMALL_BATCH;
+		ci.m_flags = CommandBufferFlag::GENERAL_WORK | CommandBufferFlag::SMALL_BATCH;
 		CommandBufferPtr cmdb = ctx.m_gr->newCommandBuffer(ci);
 
 		// Set the barriers of the batch

+ 1 - 1
AnKi/Ui/Font.cpp

@@ -96,7 +96,7 @@ void Font::createTexture(const void* data, U32 width, U32 height)
 	// Do the copy
 	static const TextureSurfaceInfo surf(0, 0, 0, 0);
 	CommandBufferInitInfo cmdbInit;
-	cmdbInit.m_flags = CommandBufferFlag::TRANSFER_WORK | CommandBufferFlag::SMALL_BATCH;
+	cmdbInit.m_flags = CommandBufferFlag::GENERAL_WORK | CommandBufferFlag::SMALL_BATCH;
 	CommandBufferPtr cmdb = m_manager->getGrManager().newCommandBuffer(cmdbInit);
 	{
 		TextureViewInitInfo viewInit(m_tex, surf, DepthStencilAspectBit::NONE, "TempFont");

+ 20 - 21
Tests/Gr/Gr.cpp

@@ -466,7 +466,7 @@ ANKI_TEST(Gr, ClearScreen)
 		FramebufferPtr fb = createColorFb(*gr, presentTex);
 
 		CommandBufferInitInfo cinit;
-		cinit.m_flags = CommandBufferFlag::GRAPHICS_WORK | CommandBufferFlag::SMALL_BATCH;
+		cinit.m_flags = CommandBufferFlag::GENERAL_WORK | CommandBufferFlag::SMALL_BATCH;
 		CommandBufferPtr cmdb = gr->newCommandBuffer(cinit);
 
 		presentBarrierA(cmdb, presentTex);
@@ -505,7 +505,7 @@ ANKI_TEST(Gr, SimpleDrawcall)
 		FramebufferPtr fb = createColorFb(*gr, presentTex);
 
 		CommandBufferInitInfo cinit;
-		cinit.m_flags = CommandBufferFlag::GRAPHICS_WORK;
+		cinit.m_flags = CommandBufferFlag::GENERAL_WORK;
 		CommandBufferPtr cmdb = gr->newCommandBuffer(cinit);
 
 		cmdb->setViewport(0, 0, WIDTH, HEIGHT);
@@ -566,7 +566,7 @@ ANKI_TEST(Gr, ViewportAndScissor)
 		gr->beginFrame();
 
 		CommandBufferInitInfo cinit;
-		cinit.m_flags = CommandBufferFlag::GRAPHICS_WORK | CommandBufferFlag::SMALL_BATCH;
+		cinit.m_flags = CommandBufferFlag::GENERAL_WORK | CommandBufferFlag::SMALL_BATCH;
 		CommandBufferPtr cmdb = gr->newCommandBuffer(cinit);
 
 		U idx = (i / 30) % 4;
@@ -669,7 +669,7 @@ ANKI_TEST(Gr, ViewportAndScissorOffscreen)
 		if(i == 0)
 		{
 			CommandBufferInitInfo cinit;
-			cinit.m_flags = CommandBufferFlag::GRAPHICS_WORK | CommandBufferFlag::SMALL_BATCH;
+			cinit.m_flags = CommandBufferFlag::GENERAL_WORK | CommandBufferFlag::SMALL_BATCH;
 			CommandBufferPtr cmdb = gr->newCommandBuffer(cinit);
 
 			cmdb->setViewport(0, 0, RT_WIDTH, RT_HEIGHT);
@@ -683,7 +683,7 @@ ANKI_TEST(Gr, ViewportAndScissorOffscreen)
 		}
 
 		CommandBufferInitInfo cinit;
-		cinit.m_flags = CommandBufferFlag::GRAPHICS_WORK | CommandBufferFlag::SMALL_BATCH;
+		cinit.m_flags = CommandBufferFlag::GENERAL_WORK | CommandBufferFlag::SMALL_BATCH;
 		CommandBufferPtr cmdb = gr->newCommandBuffer(cinit);
 
 		// Draw offscreen
@@ -780,7 +780,7 @@ ANKI_TEST(Gr, DrawWithUniforms)
 		FramebufferPtr fb = createColorFb(*gr, presentTex);
 
 		CommandBufferInitInfo cinit;
-		cinit.m_flags = CommandBufferFlag::GRAPHICS_WORK;
+		cinit.m_flags = CommandBufferFlag::GENERAL_WORK;
 		CommandBufferPtr cmdb = gr->newCommandBuffer(cinit);
 
 		cmdb->setViewport(0, 0, WIDTH, HEIGHT);
@@ -864,7 +864,7 @@ ANKI_TEST(Gr, DrawWithVertex)
 		FramebufferPtr fb = createColorFb(*gr, presentTex);
 
 		CommandBufferInitInfo cinit;
-		cinit.m_flags = CommandBufferFlag::GRAPHICS_WORK;
+		cinit.m_flags = CommandBufferFlag::GENERAL_WORK;
 		CommandBufferPtr cmdb = gr->newCommandBuffer(cinit);
 
 		cmdb->bindVertexBuffer(0, b, 0, sizeof(Vert));
@@ -990,7 +990,7 @@ ANKI_TEST(Gr, DrawWithTexture)
 								   0,   128, 0,   128, 0,   128, 128, 128, 128, 128, 255, 128, 0,   0,   128, 255}};
 
 	CommandBufferInitInfo cmdbinit;
-	cmdbinit.m_flags = CommandBufferFlag::TRANSFER_WORK;
+	cmdbinit.m_flags = CommandBufferFlag::GENERAL_WORK;
 	CommandBufferPtr cmdb = gr->newCommandBuffer(cmdbinit);
 
 	// Set barriers
@@ -1054,7 +1054,7 @@ ANKI_TEST(Gr, DrawWithTexture)
 		FramebufferPtr fb = createColorFb(*gr, presentTex);
 
 		CommandBufferInitInfo cinit;
-		cinit.m_flags = CommandBufferFlag::GRAPHICS_WORK | CommandBufferFlag::SMALL_BATCH;
+		cinit.m_flags = CommandBufferFlag::GENERAL_WORK | CommandBufferFlag::SMALL_BATCH;
 		CommandBufferPtr cmdb = gr->newCommandBuffer(cinit);
 
 		cmdb->setViewport(0, 0, WIDTH, HEIGHT);
@@ -1190,7 +1190,7 @@ static void drawOffscreen(GrManager& gr, Bool useSecondLevel)
 		timer.start();
 
 		CommandBufferInitInfo cinit;
-		cinit.m_flags = CommandBufferFlag::GRAPHICS_WORK;
+		cinit.m_flags = CommandBufferFlag::GENERAL_WORK;
 		CommandBufferPtr cmdb = gr.newCommandBuffer(cinit);
 
 		cmdb->setPolygonOffset(0.0, 0.0);
@@ -1212,7 +1212,7 @@ static void drawOffscreen(GrManager& gr, Bool useSecondLevel)
 		else
 		{
 			CommandBufferInitInfo cinit;
-			cinit.m_flags = CommandBufferFlag::SECOND_LEVEL | CommandBufferFlag::GRAPHICS_WORK;
+			cinit.m_flags = CommandBufferFlag::SECOND_LEVEL | CommandBufferFlag::GENERAL_WORK;
 			cinit.m_framebuffer = fb;
 			CommandBufferPtr cmdb2 = gr.newCommandBuffer(cinit);
 
@@ -1347,7 +1347,7 @@ ANKI_TEST(Gr, ImageLoadStore)
 
 		CommandBufferInitInfo cinit;
 		cinit.m_flags =
-			CommandBufferFlag::GRAPHICS_WORK | CommandBufferFlag::COMPUTE_WORK | CommandBufferFlag::SMALL_BATCH;
+			CommandBufferFlag::GENERAL_WORK | CommandBufferFlag::COMPUTE_WORK | CommandBufferFlag::SMALL_BATCH;
 		CommandBufferPtr cmdb = gr->newCommandBuffer(cinit);
 
 		// Write image
@@ -1429,7 +1429,7 @@ ANKI_TEST(Gr, 3DTextures)
 	Array<U8, 4> mip1 = {{128, 128, 128, 0}};
 
 	CommandBufferInitInfo cmdbinit;
-	cmdbinit.m_flags = CommandBufferFlag::TRANSFER_WORK | CommandBufferFlag::SMALL_BATCH;
+	cmdbinit.m_flags = CommandBufferFlag::GENERAL_WORK | CommandBufferFlag::SMALL_BATCH;
 	CommandBufferPtr cmdb = gr->newCommandBuffer(cmdbinit);
 
 	cmdb->setTextureVolumeBarrier(a, TextureUsageBit::NONE, TextureUsageBit::TRANSFER_DESTINATION,
@@ -1471,7 +1471,7 @@ ANKI_TEST(Gr, 3DTextures)
 		timer.start();
 
 		CommandBufferInitInfo cinit;
-		cinit.m_flags = CommandBufferFlag::GRAPHICS_WORK | CommandBufferFlag::SMALL_BATCH;
+		cinit.m_flags = CommandBufferFlag::GENERAL_WORK | CommandBufferFlag::SMALL_BATCH;
 		CommandBufferPtr cmdb = gr->newCommandBuffer(cinit);
 
 		cmdb->setViewport(0, 0, WIDTH, HEIGHT);
@@ -1807,8 +1807,7 @@ void main()
 
 	// Upload data and test them
 	CommandBufferInitInfo cmdbInit;
-	cmdbInit.m_flags =
-		CommandBufferFlag::TRANSFER_WORK | CommandBufferFlag::COMPUTE_WORK | CommandBufferFlag::SMALL_BATCH;
+	cmdbInit.m_flags = CommandBufferFlag::GENERAL_WORK | CommandBufferFlag::SMALL_BATCH;
 	CommandBufferPtr cmdb = gr->newCommandBuffer(cmdbInit);
 
 	TextureSubresourceInfo subresource;
@@ -1922,7 +1921,7 @@ void main()
 	// Draw
 
 	CommandBufferInitInfo cinit;
-	cinit.m_flags = CommandBufferFlag::GRAPHICS_WORK;
+	cinit.m_flags = CommandBufferFlag::GENERAL_WORK;
 	CommandBufferPtr cmdb = gr->newCommandBuffer(cinit);
 
 	cmdb->setViewport(0, 0, WIDTH, HEIGHT);
@@ -2034,7 +2033,7 @@ void main()
 
 	// Draw
 	CommandBufferInitInfo cinit;
-	cinit.m_flags = CommandBufferFlag::GRAPHICS_WORK;
+	cinit.m_flags = CommandBufferFlag::GENERAL_WORK;
 	CommandBufferPtr cmdb = gr->newCommandBuffer(cinit);
 
 	cmdb->fillBuffer(resultBuff, 0, resultBuff->getSize(), 0);
@@ -2555,7 +2554,7 @@ void main()
 	if(useRayTracing)
 	{
 		CommandBufferInitInfo cinit;
-		cinit.m_flags = CommandBufferFlag::GRAPHICS_WORK | CommandBufferFlag::SMALL_BATCH;
+		cinit.m_flags = CommandBufferFlag::GENERAL_WORK | CommandBufferFlag::SMALL_BATCH;
 		CommandBufferPtr cmdb = gr->newCommandBuffer(cinit);
 
 		cmdb->setAccelerationStructureBarrier(blas, AccelerationStructureUsageBit::NONE,
@@ -2585,7 +2584,7 @@ void main()
 		const Mat4 projMat = Mat4::calculatePerspectiveProjectionMatrix(toRad(90.0f), toRad(90.0f), 0.01f, 1000.0f);
 
 		CommandBufferInitInfo cinit;
-		cinit.m_flags = CommandBufferFlag::GRAPHICS_WORK | CommandBufferFlag::SMALL_BATCH;
+		cinit.m_flags = CommandBufferFlag::GENERAL_WORK | CommandBufferFlag::SMALL_BATCH;
 		CommandBufferPtr cmdb = gr->newCommandBuffer(cinit);
 
 		cmdb->setViewport(0, 0, WIDTH, HEIGHT);
@@ -3407,7 +3406,7 @@ void main()
 
 		CommandBufferInitInfo cinit;
 		cinit.m_flags =
-			CommandBufferFlag::GRAPHICS_WORK | CommandBufferFlag::COMPUTE_WORK | CommandBufferFlag::SMALL_BATCH;
+			CommandBufferFlag::GENERAL_WORK | CommandBufferFlag::COMPUTE_WORK | CommandBufferFlag::SMALL_BATCH;
 		CommandBufferPtr cmdb = gr->newCommandBuffer(cinit);
 
 		if(i == 0)

+ 1 - 1
Tests/Ui/Ui.cpp

@@ -120,7 +120,7 @@ ANKI_TEST(Ui, Ui)
 			}
 
 			CommandBufferInitInfo cinit;
-			cinit.m_flags = CommandBufferFlag::GRAPHICS_WORK | CommandBufferFlag::SMALL_BATCH;
+			cinit.m_flags = CommandBufferFlag::GENERAL_WORK | CommandBufferFlag::SMALL_BATCH;
 			CommandBufferPtr cmdb = gr->newCommandBuffer(cinit);
 
 			cmdb->setTextureBarrier(presentTex, TextureUsageBit::NONE, TextureUsageBit::FRAMEBUFFER_ATTACHMENT_WRITE,