Browse Source

Add the ability to work without async compute

Panagiotis Christopoulos Charitos 4 years ago
parent
commit
f4d52e75f9

+ 1 - 0
AnKi/Gr/ConfigDefs.h

@@ -15,3 +15,4 @@ ANKI_CONFIG_OPTION(gr_rayTracing, 0, 0, 1, "Try enabling ray tracing")
 ANKI_CONFIG_OPTION(gr_diskShaderCacheMaxSize, 128_MB, 1_MB, 1_GB)
 ANKI_CONFIG_OPTION(gr_vkminor, 1, 1, 1)
 ANKI_CONFIG_OPTION(gr_vkmajor, 1, 1, 1)
+ANKI_CONFIG_OPTION(gr_asyncCompute, 1, 0, 1, "Enable or not async compute")

+ 1 - 1
AnKi/Gr/Vulkan/BufferImpl.cpp

@@ -49,9 +49,9 @@ Error BufferImpl::init(const BufferInitInfo& inf)
 	{
 		ci.usage |= VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT_KHR;
 	}
-	ci.sharingMode = VK_SHARING_MODE_CONCURRENT;
 	ci.queueFamilyIndexCount = getGrManagerImpl().getQueueFamilies().getSize();
 	ci.pQueueFamilyIndices = &getGrManagerImpl().getQueueFamilies()[0];
+	ci.sharingMode = (ci.queueFamilyIndexCount > 1) ? VK_SHARING_MODE_CONCURRENT : VK_SHARING_MODE_EXCLUSIVE;
 	ANKI_VK_CHECK(vkCreateBuffer(getDevice(), &ci, nullptr, &m_handle));
 	getGrManagerImpl().trySetVulkanHandleName(inf.getName(), VK_DEBUG_REPORT_OBJECT_TYPE_BUFFER_EXT, m_handle);
 

+ 32 - 10
AnKi/Gr/Vulkan/CommandBufferFactory.cpp

@@ -9,14 +9,28 @@
 namespace anki
 {
 
+static VulkanQueueType getQueueTypeFromCommandBufferFlags(CommandBufferFlag flags,
+														  const VulkanQueueFamilies& queueFamilies)
+{
+	ANKI_ASSERT(!!(flags & CommandBufferFlag::GENERAL_WORK) ^ !!(flags & CommandBufferFlag::COMPUTE_WORK));
+	if(!(flags & CommandBufferFlag::GENERAL_WORK) && queueFamilies[VulkanQueueType::COMPUTE] != MAX_U32)
+	{
+		return VulkanQueueType::COMPUTE;
+	}
+	else
+	{
+		ANKI_ASSERT(queueFamilies[VulkanQueueType::GENERAL] != MAX_U32);
+		return VulkanQueueType::GENERAL;
+	}
+}
+
 void MicroCommandBuffer::destroy()
 {
 	reset();
 
 	if(m_handle)
 	{
-		vkFreeCommandBuffers(m_threadAlloc->m_factory->m_dev,
-							 m_threadAlloc->m_pools[getQueueTypeFromCommandBufferFlags(m_flags)], 1, &m_handle);
+		vkFreeCommandBuffers(m_threadAlloc->m_factory->m_dev, m_threadAlloc->m_pools[m_queue], 1, &m_handle);
 		m_handle = {};
 	}
 }
@@ -40,8 +54,13 @@ void MicroCommandBuffer::reset()
 
 Error CommandBufferThreadAllocator::init()
 {
-	for(QueueType qtype : EnumIterable<QueueType>())
+	for(VulkanQueueType qtype : EnumIterable<VulkanQueueType>())
 	{
+		if(m_factory->m_queueFamilies[qtype] == MAX_U32)
+		{
+			continue;
+		}
+
 		VkCommandPoolCreateInfo ci = {};
 		ci.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
 		ci.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT;
@@ -72,7 +91,7 @@ void CommandBufferThreadAllocator::destroyLists()
 	{
 		for(U j = 0; j < 2; ++j)
 		{
-			for(QueueType qtype : EnumIterable<QueueType>())
+			for(VulkanQueueType qtype : EnumIterable<VulkanQueueType>())
 			{
 				CmdbType& type = m_types[i][j][qtype];
 
@@ -86,12 +105,12 @@ void CommandBufferThreadAllocator::destroyLists()
 
 void CommandBufferThreadAllocator::destroy()
 {
-	for(VkCommandPool pool : m_pools)
+	for(VkCommandPool& pool : m_pools)
 	{
 		if(pool)
 		{
 			vkDestroyCommandPool(m_factory->m_dev, pool, nullptr);
-			pool = {};
+			pool = VK_NULL_HANDLE;
 		}
 	}
 
@@ -106,7 +125,9 @@ Error CommandBufferThreadAllocator::newCommandBuffer(CommandBufferFlag cmdbFlags
 
 	const Bool secondLevel = !!(cmdbFlags & CommandBufferFlag::SECOND_LEVEL);
 	const Bool smallBatch = !!(cmdbFlags & CommandBufferFlag::SMALL_BATCH);
-	CmdbType& type = m_types[secondLevel][smallBatch][getQueueTypeFromCommandBufferFlags(cmdbFlags)];
+	const VulkanQueueType queue = getQueueTypeFromCommandBufferFlags(cmdbFlags, m_factory->m_queueFamilies);
+
+	CmdbType& type = m_types[secondLevel][smallBatch][queue];
 
 	// Move the deleted to (possibly) in-use or ready
 	{
@@ -187,7 +208,7 @@ Error CommandBufferThreadAllocator::newCommandBuffer(CommandBufferFlag cmdbFlags
 
 		VkCommandBufferAllocateInfo ci = {};
 		ci.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
-		ci.commandPool = m_pools[getQueueTypeFromCommandBufferFlags(cmdbFlags)];
+		ci.commandPool = m_pools[queue];
 		ci.level = (secondLevel) ? VK_COMMAND_BUFFER_LEVEL_SECONDARY : VK_COMMAND_BUFFER_LEVEL_PRIMARY;
 		ci.commandBufferCount = 1;
 
@@ -207,6 +228,7 @@ Error CommandBufferThreadAllocator::newCommandBuffer(CommandBufferFlag cmdbFlags
 
 		newCmdb->m_handle = cmdb;
 		newCmdb->m_flags = cmdbFlags;
+		newCmdb->m_queue = queue;
 
 		out = newCmdb;
 
@@ -230,13 +252,13 @@ void CommandBufferThreadAllocator::deleteCommandBuffer(MicroCommandBuffer* ptr)
 	const Bool secondLevel = !!(ptr->m_flags & CommandBufferFlag::SECOND_LEVEL);
 	const Bool smallBatch = !!(ptr->m_flags & CommandBufferFlag::SMALL_BATCH);
 
-	CmdbType& type = m_types[secondLevel][smallBatch][getQueueTypeFromCommandBufferFlags(ptr->m_flags)];
+	CmdbType& type = m_types[secondLevel][smallBatch][ptr->m_queue];
 
 	LockGuard<Mutex> lock(type.m_deletedMtx);
 	type.m_deletedCmdbs.pushBack(ptr);
 }
 
-Error CommandBufferFactory::init(GrAllocator<U8> alloc, VkDevice dev, Array<U32, U(QueueType::COUNT)> queueFamilies)
+Error CommandBufferFactory::init(GrAllocator<U8> alloc, VkDevice dev, const VulkanQueueFamilies& queueFamilies)
 {
 	ANKI_ASSERT(dev);
 

+ 11 - 10
AnKi/Gr/Vulkan/CommandBufferFactory.h

@@ -19,12 +19,6 @@ class CommandBufferThreadAllocator;
 /// @addtogroup vulkan
 /// @{
 
-inline QueueType getQueueTypeFromCommandBufferFlags(CommandBufferFlag flags)
-{
-	ANKI_ASSERT(!!(flags & CommandBufferFlag::GENERAL_WORK) ^ !!(flags & CommandBufferFlag::COMPUTE_WORK));
-	return !!(flags & CommandBufferFlag::GENERAL_WORK) ? QueueType::GENERAL : QueueType::COMPUTE;
-}
-
 class MicroCommandBuffer : public IntrusiveListEnabled<MicroCommandBuffer>
 {
 	friend class CommandBufferThreadAllocator;
@@ -73,6 +67,12 @@ public:
 		return m_flags;
 	}
 
+	VulkanQueueType getVulkanQueueType() const
+	{
+		ANKI_ASSERT(m_queue != VulkanQueueType::COUNT);
+		return m_queue;
+	}
+
 private:
 	static constexpr U32 MAX_REF_OBJECT_SEARCH = 16;
 
@@ -87,6 +87,7 @@ private:
 	CommandBufferThreadAllocator* m_threadAlloc;
 	Atomic<I32> m_refcount = {0};
 	CommandBufferFlag m_flags = CommandBufferFlag::NONE;
+	VulkanQueueType m_queue = VulkanQueueType::COUNT;
 
 	void destroy();
 	void reset();
@@ -161,7 +162,7 @@ public:
 private:
 	CommandBufferFactory* m_factory;
 	ThreadId m_tid;
-	Array<VkCommandPool, U(QueueType::COUNT)> m_pools = {};
+	Array<VkCommandPool, U(VulkanQueueType::COUNT)> m_pools = {};
 
 	class CmdbType
 	{
@@ -177,7 +178,7 @@ private:
 	Atomic<U32> m_createdCmdbs = {0};
 #endif
 
-	Array3d<CmdbType, 2, 2, U(QueueType::COUNT)> m_types;
+	Array3d<CmdbType, 2, 2, U(VulkanQueueType::COUNT)> m_types;
 
 	void destroyList(IntrusiveList<MicroCommandBuffer>& list);
 	void destroyLists();
@@ -198,7 +199,7 @@ public:
 
 	CommandBufferFactory& operator=(const CommandBufferFactory&) = delete; // Non-copyable
 
-	ANKI_USE_RESULT Error init(GrAllocator<U8> alloc, VkDevice dev, Array<U32, U(QueueType::COUNT)> queueFamilies);
+	ANKI_USE_RESULT Error init(GrAllocator<U8> alloc, VkDevice dev, const VulkanQueueFamilies& queueFamilies);
 
 	void destroy();
 
@@ -214,7 +215,7 @@ public:
 private:
 	GrAllocator<U8> m_alloc;
 	VkDevice m_dev = VK_NULL_HANDLE;
-	Array<U32, U(QueueType::COUNT)> m_queueFamilies;
+	VulkanQueueFamilies m_queueFamilies;
 
 	DynamicArray<CommandBufferThreadAllocator*> m_threadAllocs;
 	RWMutex m_threadAllocMtx;

+ 4 - 2
AnKi/Gr/Vulkan/Common.h

@@ -83,7 +83,7 @@ enum class VulkanExtensions : U32
 };
 ANKI_ENUM_ALLOW_NUMERIC_OPERATIONS(VulkanExtensions)
 
-enum class QueueType : U8
+enum class VulkanQueueType : U8
 {
 	GENERAL,
 	COMPUTE,
@@ -91,7 +91,9 @@ enum class QueueType : U8
 	COUNT,
 	FIRST = 0
 };
-ANKI_ENUM_ALLOW_NUMERIC_OPERATIONS(QueueType)
+ANKI_ENUM_ALLOW_NUMERIC_OPERATIONS(VulkanQueueType)
+
+using VulkanQueueFamilies = Array<U32, U32(VulkanQueueType::COUNT)>;
 
 /// @name Constants
 /// @{

+ 51 - 23
AnKi/Gr/Vulkan/GrManagerImpl.cpp

@@ -115,9 +115,16 @@ Error GrManagerImpl::initInternal(const GrManagerInitInfo& init)
 	ANKI_CHECK(initSurface(init));
 	ANKI_CHECK(initDevice(init));
 
-	for(QueueType qtype : EnumIterable<QueueType>())
+	for(VulkanQueueType qtype : EnumIterable<VulkanQueueType>())
 	{
-		vkGetDeviceQueue(m_device, m_queueFamilyIndices[qtype], 0, &m_queues[qtype]);
+		if(m_queueFamilyIndices[qtype] != MAX_U32)
+		{
+			vkGetDeviceQueue(m_device, m_queueFamilyIndices[qtype], 0, &m_queues[qtype]);
+		}
+		else
+		{
+			m_queues[qtype] = VK_NULL_HANDLE;
+		}
 	}
 
 	m_swapchainFactory.init(this, init.m_config->getBool("gr_vsync"));
@@ -499,45 +506,58 @@ Error GrManagerImpl::initDevice(const GrManagerInitInfo& init)
 		{
 			if((queueInfos[i].queueFlags & GENERAL_QUEUE_FLAGS) == GENERAL_QUEUE_FLAGS)
 			{
-				m_queueFamilyIndices[QueueType::GENERAL] = i;
+				m_queueFamilyIndices[VulkanQueueType::GENERAL] = i;
 			}
 			else if((queueInfos[i].queueFlags & VK_QUEUE_COMPUTE_BIT)
 					&& !(queueInfos[i].queueFlags & VK_QUEUE_GRAPHICS_BIT))
 			{
 				// This must be the async compute
-				m_queueFamilyIndices[QueueType::COMPUTE] = i;
+				m_queueFamilyIndices[VulkanQueueType::COMPUTE] = i;
 			}
 		}
 	}
 
-	if(m_queueFamilyIndices[QueueType::GENERAL] == MAX_U32)
+	if(m_queueFamilyIndices[VulkanQueueType::GENERAL] == MAX_U32)
 	{
 		ANKI_VK_LOGE("Couldn't find a queue family with graphics+compute+transfer+present. "
 					 "Something is wrong");
 		return Error::FUNCTION_FAILED;
 	}
 
-	if(m_queueFamilyIndices[QueueType::COMPUTE] == MAX_U32)
+	if(!init.m_config->getBool("gr_asyncCompute"))
 	{
-		ANKI_VK_LOGE("Couldn't find an async compute queue");
-		return Error::FUNCTION_FAILED;
+		m_queueFamilyIndices[VulkanQueueType::COMPUTE] = MAX_U32;
 	}
 
-	const F32 priority = 1.0;
-	Array<VkDeviceQueueCreateInfo, U32(QueueType::COUNT)> q = {};
-	for(QueueType qtype : EnumIterable<QueueType>())
+	if(m_queueFamilyIndices[VulkanQueueType::COMPUTE] == MAX_U32)
 	{
-		q[qtype].sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
-		q[qtype].queueFamilyIndex = m_queueFamilyIndices[qtype];
-		q[qtype].queueCount = 1;
-		q[qtype].pQueuePriorities = &priority;
+		ANKI_VK_LOGW("Couldn't find an async compute queue. Will try to use the general queue instead");
 	}
+	else
+	{
+		ANKI_VK_LOGI("Async compute is enabled");
+	}
+
+	const F32 priority = 1.0f;
+	Array<VkDeviceQueueCreateInfo, U32(VulkanQueueType::COUNT)> q = {};
 
 	VkDeviceCreateInfo ci = {};
 	ci.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
-	ci.queueCreateInfoCount = q.getSize();
 	ci.pQueueCreateInfos = &q[0];
 
+	for(VulkanQueueType qtype : EnumIterable<VulkanQueueType>())
+	{
+		if(m_queueFamilyIndices[qtype] != MAX_U32)
+		{
+			q[qtype].sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
+			q[qtype].queueFamilyIndex = m_queueFamilyIndices[qtype];
+			q[qtype].queueCount = 1;
+			q[qtype].pQueuePriorities = &priority;
+
+			++ci.queueCreateInfoCount;
+		}
+	}
+
 	// Extensions
 	U32 extCount = 0;
 	vkEnumerateDeviceExtensionProperties(m_physicalDevice, nullptr, &extCount, nullptr);
@@ -1039,7 +1059,10 @@ TexturePtr GrManagerImpl::acquireNextPresentableTexture()
 		ANKI_VK_LOGW("Swapchain is out of date. Will wait for the queue and create a new one");
 		for(VkQueue queue : m_queues)
 		{
-			vkQueueWaitIdle(queue);
+			if(queue)
+			{
+				vkQueueWaitIdle(queue);
+			}
 		}
 		m_crntSwapchain.reset(nullptr);
 		m_crntSwapchain = m_swapchainFactory.newInstance();
@@ -1099,7 +1122,10 @@ void GrManagerImpl::endFrame()
 		ANKI_VK_LOGW("Swapchain is out of date. Will wait for the queues and create a new one");
 		for(VkQueue queue : m_queues)
 		{
-			vkQueueWaitIdle(queue);
+			if(queue)
+			{
+				vkQueueWaitIdle(queue);
+			}
 		}
 		vkDeviceWaitIdle(m_device);
 		m_crntSwapchain.reset(nullptr);
@@ -1218,19 +1244,18 @@ void GrManagerImpl::flushCommandBuffer(MicroCommandBufferPtr cmdb, Bool cmdbRend
 		// Update the swapchain's fence
 		m_crntSwapchain->setFence(fence);
 
-		frame.m_queueWroteToSwapchainImage = getQueueTypeFromCommandBufferFlags(cmdb->getFlags());
+		frame.m_queueWroteToSwapchainImage = cmdb->getVulkanQueueType();
 	}
 
 	// Submit
 	{
 		ANKI_TRACE_SCOPED_EVENT(VK_QUEUE_SUBMIT);
-		ANKI_VK_CHECKF(vkQueueSubmit(m_queues[getQueueTypeFromCommandBufferFlags(cmdb->getFlags())], 1, &submit,
-									 fence->getHandle()));
+		ANKI_VK_CHECKF(vkQueueSubmit(m_queues[cmdb->getVulkanQueueType()], 1, &submit, fence->getHandle()));
 	}
 
 	if(wait)
 	{
-		vkQueueWaitIdle(m_queues[getQueueTypeFromCommandBufferFlags(cmdb->getFlags())]);
+		vkQueueWaitIdle(m_queues[cmdb->getVulkanQueueType()]);
 	}
 }
 
@@ -1239,7 +1264,10 @@ void GrManagerImpl::finish()
 	LockGuard<Mutex> lock(m_globalMtx);
 	for(VkQueue queue : m_queues)
 	{
-		vkQueueWaitIdle(queue);
+		if(queue)
+		{
+			vkQueueWaitIdle(queue);
+		}
 	}
 }
 

+ 6 - 5
AnKi/Gr/Vulkan/GrManagerImpl.h

@@ -47,9 +47,10 @@ public:
 
 	ANKI_USE_RESULT Error init(const GrManagerInitInfo& cfg);
 
-	const Array<U32, U(QueueType::COUNT)>& getQueueFamilies() const
+	ConstWeakArray<U32> getQueueFamilies() const
 	{
-		return m_queueFamilyIndices;
+		const Bool hasAsyncCompute = m_queueFamilyIndices[VulkanQueueType::COMPUTE] != MAX_U32;
+		return (hasAsyncCompute) ? m_queueFamilyIndices : ConstWeakArray<U32>(&m_queueFamilyIndices[0], 1);
 	}
 
 	const VkPhysicalDeviceProperties& getPhysicalDeviceProperties() const
@@ -239,8 +240,8 @@ private:
 	VkPhysicalDevice m_physicalDevice = VK_NULL_HANDLE;
 	VulkanExtensions m_extensions = VulkanExtensions::NONE;
 	VkDevice m_device = VK_NULL_HANDLE;
-	Array<U32, U32(QueueType::COUNT)> m_queueFamilyIndices = {MAX_U32, MAX_U32};
-	Array<VkQueue, U32(QueueType::COUNT)> m_queues = {};
+	VulkanQueueFamilies m_queueFamilyIndices = {MAX_U32, MAX_U32};
+	Array<VkQueue, U32(VulkanQueueType::COUNT)> m_queues = {};
 	Mutex m_globalMtx;
 
 	VkPhysicalDeviceProperties2 m_devProps = {};
@@ -276,7 +277,7 @@ private:
 		/// Signaled by the submit that renders to the default FB. Present waits for it.
 		MicroSemaphorePtr m_renderSemaphore;
 
-		QueueType m_queueWroteToSwapchainImage = QueueType::COUNT;
+		VulkanQueueType m_queueWroteToSwapchainImage = VulkanQueueType::COUNT;
 	};
 
 	VkSurfaceKHR m_surface = VK_NULL_HANDLE;

+ 1 - 1
AnKi/Gr/Vulkan/SwapchainFactory.cpp

@@ -146,9 +146,9 @@ Error MicroSwapchain::initInternal()
 		ci.imageExtent = surfaceProperties.currentExtent;
 		ci.imageArrayLayers = 1;
 		ci.imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_STORAGE_BIT;
-		ci.imageSharingMode = VK_SHARING_MODE_CONCURRENT;
 		ci.queueFamilyIndexCount = m_factory->m_gr->getQueueFamilies().getSize();
 		ci.pQueueFamilyIndices = &m_factory->m_gr->getQueueFamilies()[0];
+		ci.imageSharingMode = (ci.queueFamilyIndexCount > 1) ? VK_SHARING_MODE_CONCURRENT : VK_SHARING_MODE_EXCLUSIVE;
 		ci.preTransform = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR;
 		ci.compositeAlpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR;
 		ci.presentMode = presentMode;

+ 1 - 1
AnKi/Gr/Vulkan/TextureImpl.cpp

@@ -331,9 +331,9 @@ Error TextureImpl::initImage(const TextureInitInfo& init_)
 	ci.samples = VK_SAMPLE_COUNT_1_BIT;
 	ci.tiling = VK_IMAGE_TILING_OPTIMAL;
 	ci.usage = convertTextureUsage(init.m_usage, init.m_format);
-	ci.sharingMode = VK_SHARING_MODE_CONCURRENT;
 	ci.queueFamilyIndexCount = getGrManagerImpl().getQueueFamilies().getSize();
 	ci.pQueueFamilyIndices = &getGrManagerImpl().getQueueFamilies()[0];
+	ci.sharingMode = (ci.queueFamilyIndexCount > 1) ? VK_SHARING_MODE_CONCURRENT : VK_SHARING_MODE_EXCLUSIVE;
 	ci.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
 
 	ANKI_VK_CHECK(vkCreateImage(getDevice(), &ci, nullptr, &m_imageHandle));