10 月之前 · feb9722089
--- a/AnKi/Gr/Common.h
+++ b/AnKi/Gr/Common.h
@@ -63,7 +63,9 @@ inline BoolCVar g_dredCVar("Gr", "Dred", false, "Enable DRED");
 
				 inline NumericCVar<PtrSize> g_diskShaderCacheMaxSizeCVar("Gr", "DiskShaderCacheMaxSize", 128_MB, 1_MB, 1_GB, "Max size of the pipeline cache file");
			
 
				 inline BoolCVar g_debugPrintfCVar("Gr", "DebugPrintf", false, "Enable or not debug printf");
			
 
				 inline BoolCVar g_samplerFilterMinMaxCVar("Gr", "SamplerFilterMinMax", true, "Enable or not min/max sample filtering");
			
 
				-inline BoolCVar g_asyncComputeCVar("Gr", "AsyncCompute", true, "Enable or not async compute");
			
 
				+inline NumericCVar<U8> g_asyncComputeCVar("Gr", "AsyncCompute", 0, 0, 2,
			
 
				+										  "Control the async compute behaviour: 0: Try use separate queue family, 1: Use lower priority queue in the "
			
 
				+										  "general's queue family, 2: Use the general queue");
			
 
				 inline StringCVar g_vkLayersCVar("Gr", "VkLayers", "", "VK layers to enable. Seperated by :");
			
 
				 #endif
			
 
				 
			
--- a/AnKi/Gr/Vulkan/VkCommandBufferFactory.cpp
+++ b/AnKi/Gr/Vulkan/VkCommandBufferFactory.cpp
@@ -4,6 +4,7 @@
 
				 // http://www.anki3d.org/LICENSE
			
 
				 
			
 
				 #include <AnKi/Gr/Vulkan/VkCommandBufferFactory.h>
			
 
				+#include <AnKi/Gr/Vulkan/VkGrManager.h>
			
 
				 #include <AnKi/Util/Tracer.h>
			
 
				 #include <AnKi/Core/StatsSet.h>
			
 
				 
			
@@ -11,20 +12,6 @@ namespace anki {
 
				 
			
 
				 static StatCounter g_commandBufferCountStatVar(StatCategory::kMisc, "CommandBufferCount", StatFlag::kNone);
			
 
				 
			
 
				-static GpuQueueType getQueueTypeFromCommandBufferFlags(CommandBufferFlag flags, const VulkanQueueFamilies& queueFamilies)
			
 
				-{
			
 
				-	ANKI_ASSERT(!!(flags & CommandBufferFlag::kGeneralWork) ^ !!(flags & CommandBufferFlag::kComputeWork));
			
 
				-	if(!(flags & CommandBufferFlag::kGeneralWork) && queueFamilies[GpuQueueType::kCompute] != kMaxU32)
			
 
				-	{
			
 
				-		return GpuQueueType::kCompute;
			
 
				-	}
			
 
				-	else
			
 
				-	{
			
 
				-		ANKI_ASSERT(queueFamilies[GpuQueueType::kGeneral] != kMaxU32);
			
 
				-		return GpuQueueType::kGeneral;
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				 void MicroCommandBufferPtrDeleter::operator()(MicroCommandBuffer* ptr)
			
 
				 {
			
 
				 	ANKI_ASSERT(ptr);
			
@@ -39,7 +26,10 @@ MicroCommandBuffer::~MicroCommandBuffer()
 
				 
			
 
				 	if(m_handle)
			
 
				 	{
			
 
				-		vkFreeCommandBuffers(getVkDevice(), m_threadAlloc->m_pools[m_queue], 1, &m_handle);
			
 
				+		const U32 queueFamilyIdx =
			
 
				+			(m_queue == GpuQueueType::kCompute && getGrManagerImpl().getAsyncComputeType() == AsyncComputeType::kLowPriorityQueue) ? 0 : U32(m_queue);
			
 
				+
			
 
				+		vkFreeCommandBuffers(getVkDevice(), m_threadAlloc->m_pools[queueFamilyIdx], 1, &m_handle);
			
 
				 		m_handle = {};
			
 
				 
			
 
				 		g_commandBufferCountStatVar.decrement(1_U64);
			
@@ -65,19 +55,15 @@ void MicroCommandBuffer::reset()
 
				 
			
 
				 Error CommandBufferThreadAllocator::init()
			
 
				 {
			
 
				-	for(GpuQueueType qtype : EnumIterable<GpuQueueType>())
			
 
				+	ConstWeakArray<U32> families = getGrManagerImpl().getQueueFamilies();
			
 
				+	for(U32 i = 0; i < families.getSize(); ++i)
			
 
				 	{
			
 
				-		if(CommandBufferFactory::getSingleton().m_queueFamilies[qtype] == kMaxU32)
			
 
				-		{
			
 
				-			continue;
			
 
				-		}
			
 
				-
			
 
				 		VkCommandPoolCreateInfo ci = {};
			
 
				 		ci.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
			
 
				 		ci.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT;
			
 
				-		ci.queueFamilyIndex = CommandBufferFactory::getSingleton().m_queueFamilies[qtype];
			
 
				+		ci.queueFamilyIndex = families[i];
			
 
				 
			
 
				-		ANKI_VK_CHECK(vkCreateCommandPool(getVkDevice(), &ci, nullptr, &m_pools[qtype]));
			
 
				+		ANKI_VK_CHECK(vkCreateCommandPool(getVkDevice(), &ci, nullptr, &m_pools[i]));
			
 
				 	}
			
 
				 
			
 
				 	return Error::kNone;
			
@@ -108,9 +94,26 @@ Error CommandBufferThreadAllocator::newCommandBuffer(CommandBufferFlag cmdbFlags
 
				 	ANKI_ASSERT(!!(cmdbFlags & CommandBufferFlag::kComputeWork) ^ !!(cmdbFlags & CommandBufferFlag::kGeneralWork));
			
 
				 
			
 
				 	const Bool smallBatch = !!(cmdbFlags & CommandBufferFlag::kSmallBatch);
			
 
				-	const GpuQueueType queue = getQueueTypeFromCommandBufferFlags(cmdbFlags, CommandBufferFactory::getSingleton().m_queueFamilies);
			
 
				 
			
 
				-	MicroObjectRecycler<MicroCommandBuffer>& recycler = m_recyclers[smallBatch][queue];
			
 
				+	GpuQueueType queue;
			
 
				+	U32 queueFamilyIdx;
			
 
				+	if(!!(cmdbFlags & CommandBufferFlag::kGeneralWork) || getGrManagerImpl().getAsyncComputeType() == AsyncComputeType::kDisabled)
			
 
				+	{
			
 
				+		queue = GpuQueueType::kGeneral;
			
 
				+		queueFamilyIdx = 0;
			
 
				+	}
			
 
				+	else if(getGrManagerImpl().getAsyncComputeType() == AsyncComputeType::kLowPriorityQueue)
			
 
				+	{
			
 
				+		queue = GpuQueueType::kCompute;
			
 
				+		queueFamilyIdx = 0;
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+		queue = GpuQueueType::kCompute;
			
 
				+		queueFamilyIdx = 1;
			
 
				+	}
			
 
				+
			
 
				+	MicroObjectRecycler<MicroCommandBuffer>& recycler = m_recyclers[smallBatch][queueFamilyIdx];
			
 
				 
			
 
				 	MicroCommandBuffer* out = recycler.findToReuse();
			
 
				 
			
@@ -120,7 +123,7 @@ Error CommandBufferThreadAllocator::newCommandBuffer(CommandBufferFlag cmdbFlags
 
				 
			
 
				 		VkCommandBufferAllocateInfo ci = {};
			
 
				 		ci.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
			
 
				-		ci.commandPool = m_pools[queue];
			
 
				+		ci.commandPool = m_pools[queueFamilyIdx];
			
 
				 		ci.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
			
 
				 		ci.commandBufferCount = 1;
			
 
				 
			
@@ -154,7 +157,6 @@ Error CommandBufferThreadAllocator::newCommandBuffer(CommandBufferFlag cmdbFlags
 
				 	}
			
 
				 
			
 
				 	ANKI_ASSERT(out && out->m_refcount.load() == 0);
			
 
				-	ANKI_ASSERT(out->m_flags == cmdbFlags);
			
 
				 	outPtr.reset(out);
			
 
				 	return Error::kNone;
			
 
				 }
			
@@ -165,7 +167,12 @@ void CommandBufferThreadAllocator::deleteCommandBuffer(MicroCommandBuffer* ptr)
 
				 
			
 
				 	const Bool smallBatch = !!(ptr->m_flags & CommandBufferFlag::kSmallBatch);
			
 
				 
			
 
				-	m_recyclers[smallBatch][ptr->m_queue].recycle(ptr);
			
 
				+	const U32 queueFamilyIdx =
			
 
				+		(ptr->m_queue == GpuQueueType::kCompute && getGrManagerImpl().getAsyncComputeType() == AsyncComputeType::kLowPriorityQueue)
			
 
				+			? 0
			
 
				+			: U32(ptr->m_queue);
			
 
				+
			
 
				+	m_recyclers[smallBatch][queueFamilyIdx].recycle(ptr);
			
 
				 }
			
 
				 
			
 
				 void CommandBufferFactory::destroy()
			
--- a/AnKi/Gr/Vulkan/VkCommandBufferFactory.h
+++ b/AnKi/Gr/Vulkan/VkCommandBufferFactory.h
@@ -196,10 +196,7 @@ class CommandBufferFactory : public MakeSingleton<CommandBufferFactory>
 
				 	friend class MicroCommandBuffer;
			
 
				 
			
 
				 public:
			
 
				-	CommandBufferFactory(const VulkanQueueFamilies& queueFamilies)
			
 
				-		: m_queueFamilies(queueFamilies)
			
 
				-	{
			
 
				-	}
			
 
				+	CommandBufferFactory() = default;
			
 
				 
			
 
				 	CommandBufferFactory(const CommandBufferFactory&) = delete; // Non-copyable
			
 
				 
			
@@ -214,8 +211,6 @@ public:
 
				 	Error newCommandBuffer(ThreadId tid, CommandBufferFlag cmdbFlags, MicroCommandBufferPtr& ptr);
			
 
				 
			
 
				 private:
			
 
				-	VulkanQueueFamilies m_queueFamilies;
			
 
				-
			
 
				 	GrDynamicArray<CommandBufferThreadAllocator*> m_threadAllocs;
			
 
				 	RWMutex m_threadAllocMtx;
			
 
				 
			
--- a/AnKi/Gr/Vulkan/VkGrManager.cpp
+++ b/AnKi/Gr/Vulkan/VkGrManager.cpp
@@ -385,18 +385,6 @@ Error GrManagerImpl::initInternal(const GrManagerInitInfo& init)
 
				 	ANKI_CHECK(initSurface());
			
 
				 	ANKI_CHECK(initDevice());
			
 
				 
			
 
				-	for(GpuQueueType qtype : EnumIterable<GpuQueueType>())
			
 
				-	{
			
 
				-		if(m_queueFamilyIndices[qtype] != kMaxU32)
			
 
				-		{
			
 
				-			vkGetDeviceQueue(m_device, m_queueFamilyIndices[qtype], 0, &m_queues[qtype]);
			
 
				-		}
			
 
				-		else
			
 
				-		{
			
 
				-			m_queues[qtype] = VK_NULL_HANDLE;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				 	SwapchainFactory::allocateSingleton(U32(g_vsyncCVar));
			
 
				 	m_crntSwapchain = SwapchainFactory::getSingleton().newInstance();
			
 
				 
			
@@ -405,7 +393,7 @@ Error GrManagerImpl::initInternal(const GrManagerInitInfo& init)
 
				 
			
 
				 	ANKI_CHECK(initMemory());
			
 
				 
			
 
				-	CommandBufferFactory::allocateSingleton(m_queueFamilyIndices);
			
 
				+	CommandBufferFactory::allocateSingleton();
			
 
				 	FenceFactory::allocateSingleton();
			
 
				 	SemaphoreFactory::allocateSingleton();
			
 
				 	OcclusionQueryFactory::allocateSingleton();
			
@@ -808,24 +796,33 @@ Error GrManagerImpl::initDevice()
 
				 	queueInfos.resize(count);
			
 
				 	vkGetPhysicalDeviceQueueFamilyProperties(m_physicalDevice, &count, &queueInfos[0]);
			
 
				 
			
 
				-	const VkQueueFlags GENERAL_QUEUE_FLAGS = VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT;
			
 
				+	Bool generalQueueFamilySupportsMultipleQueues = false;
			
 
				+
			
 
				+	const VkQueueFlags generalQueueFlags = VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT;
			
 
				 	for(U32 i = 0; i < count; ++i)
			
 
				 	{
			
 
				 		VkBool32 supportsPresent = false;
			
 
				 		ANKI_VK_CHECK(vkGetPhysicalDeviceSurfaceSupportKHR(m_physicalDevice, i, m_surface, &supportsPresent));
			
 
				 
			
 
				-		if(supportsPresent)
			
 
				+		if(!supportsPresent)
			
 
				 		{
			
 
				-			if((queueInfos[i].queueFlags & GENERAL_QUEUE_FLAGS) == GENERAL_QUEUE_FLAGS)
			
 
				-			{
			
 
				-				m_queueFamilyIndices[GpuQueueType::kGeneral] = i;
			
 
				-			}
			
 
				-			else if((queueInfos[i].queueFlags & VK_QUEUE_COMPUTE_BIT) && !(queueInfos[i].queueFlags & VK_QUEUE_GRAPHICS_BIT))
			
 
				+			continue;
			
 
				+		}
			
 
				+
			
 
				+		if((queueInfos[i].queueFlags & generalQueueFlags) == generalQueueFlags)
			
 
				+		{
			
 
				+			m_queueFamilyIndices[GpuQueueType::kGeneral] = i;
			
 
				+
			
 
				+			if(queueInfos[i].queueCount > 1)
			
 
				 			{
			
 
				-				// This must be the async compute
			
 
				-				m_queueFamilyIndices[GpuQueueType::kCompute] = i;
			
 
				+				generalQueueFamilySupportsMultipleQueues = true;
			
 
				 			}
			
 
				 		}
			
 
				+		else if((queueInfos[i].queueFlags & VK_QUEUE_COMPUTE_BIT) && !(queueInfos[i].queueFlags & VK_QUEUE_GRAPHICS_BIT))
			
 
				+		{
			
 
				+			// This must be the async compute
			
 
				+			m_queueFamilyIndices[GpuQueueType::kCompute] = i;
			
 
				+		}
			
 
				 	}
			
 
				 
			
 
				 	if(m_queueFamilyIndices[GpuQueueType::kGeneral] == kMaxU32)
			
@@ -834,39 +831,58 @@ Error GrManagerImpl::initDevice()
 
				 		return Error::kFunctionFailed;
			
 
				 	}
			
 
				 
			
 
				-	if(!g_asyncComputeCVar)
			
 
				-	{
			
 
				-		m_queueFamilyIndices[GpuQueueType::kCompute] = kMaxU32;
			
 
				-	}
			
 
				-
			
 
				-	if(m_queueFamilyIndices[GpuQueueType::kCompute] == kMaxU32)
			
 
				-	{
			
 
				-		ANKI_VK_LOGW("Couldn't find an async compute queue. Will try to use the general queue instead");
			
 
				-	}
			
 
				-	else
			
 
				-	{
			
 
				-		ANKI_VK_LOGI("Async compute is enabled");
			
 
				-	}
			
 
				+	const Bool pureAsyncCompute = m_queueFamilyIndices[GpuQueueType::kCompute] != kMaxU32 && g_asyncComputeCVar == 0;
			
 
				+	const Bool lowPriorityQueueAsyncCompute = !pureAsyncCompute && generalQueueFamilySupportsMultipleQueues && g_asyncComputeCVar <= 1;
			
 
				 
			
 
				-	const F32 priority = 1.0f;
			
 
				+	Array<F32, U32(GpuQueueType::kCount)> priorities = {1.0f, 0.5f};
			
 
				 	Array<VkDeviceQueueCreateInfo, U32(GpuQueueType::kCount)> q = {};
			
 
				+	q.fill({VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO});
			
 
				 
			
 
				 	VkDeviceCreateInfo ci = {};
			
 
				 	ci.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
			
 
				 	ci.pQueueCreateInfos = &q[0];
			
 
				 
			
 
				-	for(GpuQueueType qtype : EnumIterable<GpuQueueType>())
			
 
				+	CString asyncComputeMsg;
			
 
				+	if(pureAsyncCompute)
			
 
				 	{
			
 
				-		if(m_queueFamilyIndices[qtype] != kMaxU32)
			
 
				-		{
			
 
				-			q[qtype].sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
			
 
				-			q[qtype].queueFamilyIndex = m_queueFamilyIndices[qtype];
			
 
				-			q[qtype].queueCount = 1;
			
 
				-			q[qtype].pQueuePriorities = &priority;
			
 
				+		asyncComputeMsg = "Using pure async compute queue";
			
 
				 
			
 
				-			++ci.queueCreateInfoCount;
			
 
				-		}
			
 
				+		q[GpuQueueType::kGeneral].queueFamilyIndex = m_queueFamilyIndices[GpuQueueType::kGeneral];
			
 
				+		q[GpuQueueType::kGeneral].queueCount = 1;
			
 
				+		q[GpuQueueType::kGeneral].pQueuePriorities = &priorities[0];
			
 
				+
			
 
				+		q[GpuQueueType::kCompute].queueFamilyIndex = m_queueFamilyIndices[GpuQueueType::kCompute];
			
 
				+		q[GpuQueueType::kCompute].queueCount = 1;
			
 
				+		q[GpuQueueType::kCompute].pQueuePriorities = &priorities[0];
			
 
				+
			
 
				+		ci.queueCreateInfoCount = 2;
			
 
				 	}
			
 
				+	else if(lowPriorityQueueAsyncCompute)
			
 
				+	{
			
 
				+		asyncComputeMsg = "Using low priority queue in same family as general queue (fallback #1)";
			
 
				+
			
 
				+		m_queueFamilyIndices[GpuQueueType::kCompute] = m_queueFamilyIndices[GpuQueueType::kGeneral];
			
 
				+
			
 
				+		q[0].queueFamilyIndex = m_queueFamilyIndices[GpuQueueType::kGeneral];
			
 
				+		q[0].queueCount = 2;
			
 
				+		q[0].pQueuePriorities = &priorities[0];
			
 
				+
			
 
				+		ci.queueCreateInfoCount = 1;
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+		asyncComputeMsg = "Can't do much, using general queue (fallback #2)";
			
 
				+
			
 
				+		m_queueFamilyIndices[GpuQueueType::kCompute] = m_queueFamilyIndices[GpuQueueType::kGeneral];
			
 
				+
			
 
				+		q[0].queueFamilyIndex = m_queueFamilyIndices[GpuQueueType::kGeneral];
			
 
				+		q[0].queueCount = 1;
			
 
				+		q[0].pQueuePriorities = &priorities[0];
			
 
				+
			
 
				+		ci.queueCreateInfoCount = 1;
			
 
				+	}
			
 
				+
			
 
				+	ANKI_VK_LOGI("Async compute: %s", asyncComputeMsg.cstr());
			
 
				 
			
 
				 	// Extensions
			
 
				 	U32 extCount = 0;
			
@@ -1209,6 +1225,25 @@ Error GrManagerImpl::initDevice()
 
				 
			
 
				 	ANKI_VK_CHECK(vkCreateDevice(m_physicalDevice, &ci, nullptr, &m_device));
			
 
				 
			
 
				+	// Get the queues
			
 
				+	vkGetDeviceQueue(m_device, m_queueFamilyIndices[GpuQueueType::kGeneral], 0, &m_queues[GpuQueueType::kGeneral]);
			
 
				+	trySetVulkanHandleName("General", VK_OBJECT_TYPE_QUEUE, m_queues[GpuQueueType::kGeneral]);
			
 
				+
			
 
				+	if(pureAsyncCompute)
			
 
				+	{
			
 
				+		vkGetDeviceQueue(m_device, m_queueFamilyIndices[GpuQueueType::kCompute], 0, &m_queues[GpuQueueType::kCompute]);
			
 
				+		trySetVulkanHandleName("AsyncCompute", VK_OBJECT_TYPE_QUEUE, m_queues[GpuQueueType::kGeneral]);
			
 
				+	}
			
 
				+	else if(lowPriorityQueueAsyncCompute)
			
 
				+	{
			
 
				+		vkGetDeviceQueue(m_device, m_queueFamilyIndices[GpuQueueType::kGeneral], 1, &m_queues[GpuQueueType::kCompute]);
			
 
				+		trySetVulkanHandleName("GeneralLowPriority", VK_OBJECT_TYPE_QUEUE, m_queues[GpuQueueType::kCompute]);
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+		m_queues[GpuQueueType::kCompute] = nullptr;
			
 
				+	}
			
 
				+
			
 
				 	return Error::kNone;
			
 
				 }
			
 
				 
			
--- a/AnKi/Gr/Vulkan/VkGrManager.h
+++ b/AnKi/Gr/Vulkan/VkGrManager.h
@@ -25,6 +25,13 @@ class MicroCommandBuffer;
 
				 /// @addtogroup vulkan
			
 
				 /// @{
			
 
				 
			
 
				+enum class AsyncComputeType
			
 
				+{
			
 
				+	kProper,
			
 
				+	kLowPriorityQueue,
			
 
				+	kDisabled
			
 
				+};
			
 
				+
			
 
				 /// Vulkan implementation of GrManager.
			
 
				 class GrManagerImpl : public GrManager
			
 
				 {
			
@@ -41,10 +48,26 @@ public:
 
				 
			
 
				 	ConstWeakArray<U32> getQueueFamilies() const
			
 
				 	{
			
 
				-		const Bool hasAsyncCompute = m_queueFamilyIndices[GpuQueueType::kCompute] != kMaxU32;
			
 
				+		const Bool hasAsyncCompute = m_queueFamilyIndices[GpuQueueType::kGeneral] != m_queueFamilyIndices[GpuQueueType::kCompute];
			
 
				 		return (hasAsyncCompute) ? m_queueFamilyIndices : ConstWeakArray<U32>(&m_queueFamilyIndices[0], 1);
			
 
				 	}
			
 
				 
			
 
				+	AsyncComputeType getAsyncComputeType() const
			
 
				+	{
			
 
				+		if(m_queues[GpuQueueType::kCompute] == nullptr)
			
 
				+		{
			
 
				+			return AsyncComputeType::kDisabled;
			
 
				+		}
			
 
				+		else if(m_queueFamilyIndices[GpuQueueType::kCompute] == m_queueFamilyIndices[GpuQueueType::kGeneral])
			
 
				+		{
			
 
				+			return AsyncComputeType::kLowPriorityQueue;
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			return AsyncComputeType::kProper;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				 	const VkPhysicalDeviceProperties& getPhysicalDeviceProperties() const
			
 
				 	{
			
 
				 		return m_devProps.properties;
			
@@ -145,7 +168,7 @@ private:
 
				 	VulkanExtensions m_extensions = VulkanExtensions::kNone;
			
 
				 	VkDevice m_device = VK_NULL_HANDLE;
			
 
				 	VulkanQueueFamilies m_queueFamilyIndices = {kMaxU32, kMaxU32};
			
 
				-	Array<VkQueue, U32(GpuQueueType::kCount)> m_queues = {};
			
 
				+	Array<VkQueue, U32(GpuQueueType::kCount)> m_queues = {nullptr, nullptr};
			
 
				 	Mutex m_globalMtx;
			
 
				 
			
 
				 	VkPhysicalDeviceProperties2 m_devProps = {};
			
--- a/AnKi/Physics2/Common.h
+++ b/AnKi/Physics2/Common.h
@@ -41,14 +41,14 @@ namespace v2 {
 
				 #define ANKI_PHYSICS_COMMON_FRIENDS \
			
 
				 	friend class PhysicsWorld; \
			
 
				 	template<typename, typename> \
			
 
				-	friend class IntrusivePtr; \
			
 
				+	friend class anki::IntrusivePtr; \
			
 
				 	template<typename, typename, typename> \
			
 
				-	friend class BlockArray;
			
 
				+	friend class anki::BlockArray;
			
 
				 
			
 
				 class PhysicsMemoryPool : public HeapMemoryPool, public MakeSingleton<PhysicsMemoryPool>
			
 
				 {
			
 
				 	template<typename>
			
 
				-	friend class MakeSingleton;
			
 
				+	friend class anki::MakeSingleton;
			
 
				 
			
 
				 private:
			
 
				 	PhysicsMemoryPool(AllocAlignedCallback allocCb, void* allocCbUserData)
			
--- a/AnKi/Physics2/PhysicsWorld.h
+++ b/AnKi/Physics2/PhysicsWorld.h
@@ -39,7 +39,7 @@ public:
 
				 class PhysicsWorld : public MakeSingleton<PhysicsWorld>
			
 
				 {
			
 
				 	template<typename>
			
 
				-	friend class MakeSingleton;
			
 
				+	friend class anki::MakeSingleton;
			
 
				 	friend class PhysicsCollisionShapePtrDeleter;
			
 
				 	friend class PhysicsBodyPtrDeleter;
			
 
				 	friend class PhysicsBody;
			
--- a/Tests/CMakeLists.txt
+++ b/Tests/CMakeLists.txt
@@ -4,4 +4,9 @@ include_directories("..")
 
				 
			
 
				 anki_new_executable(Tests ${sources})
			
 
				 target_compile_definitions(Tests PRIVATE -DANKI_SOURCE_FILE)
			
 
				-target_link_libraries(Tests AnKi AnKiShaderCompiler AnKiImporter)
			
 
				+
			
 
				+if(!ANDROID)
			
 
				+	set(extra_libs "AnKiImporter")
			
 
				+endif()
			
 
				+
			
 
				+target_link_libraries(Tests AnKi AnKiShaderCompiler ${extra_libs})
			
--- a/Tests/Framework/Framework.cpp
+++ b/Tests/Framework/Framework.cpp
@@ -150,6 +150,10 @@ Options:
 
				 			}
			
 
				 			testName = argv[i];
			
 
				 		}
			
 
				+		else
			
 
				+		{
			
 
				+			break;
			
 
				+		}
			
 
				 	}
			
 
				 
			
 
				 	// Sanity check
			
--- a/Tests/Gr/GrAsyncCompute.cpp
+++ b/Tests/Gr/GrAsyncCompute.cpp
@@ -0,0 +1,404 @@
 
				+// Copyright (C) 2009-present, Panagiotis Christopoulos Charitos and contributors.
			
 
				+// All rights reserved.
			
 
				+// Code licensed under the BSD License.
			
 
				+// http://www.anki3d.org/LICENSE
			
 
				+
			
 
				+#include <Tests/Framework/Framework.h>
			
 
				+#include <Tests/Gr/GrCommon.h>
			
 
				+#include <AnKi/Gr.h>
			
 
				+#include <AnKi/Util/MemoryPool.h>
			
 
				+#include <AnKi/Util/HighRezTimer.h>
			
 
				+
			
 
				+using namespace anki;
			
 
				+
			
 
				+static void generateSphere(DynamicArray<Vec3>& positions, DynamicArray<UVec3>& indices, U32 sliceCount, U32 stackCount)
			
 
				+{
			
 
				+	positions.emplaceBack(0.0f, 1.0f, 0.0f);
			
 
				+	const U32 v0 = 0;
			
 
				+
			
 
				+	// generate vertices per stack / slice
			
 
				+	for(U32 i = 0u; i < stackCount - 1; i++)
			
 
				+	{
			
 
				+		const F32 phi = kPi * (i + 1) / stackCount;
			
 
				+		for(F32 j = 0u; j < sliceCount; j++)
			
 
				+		{
			
 
				+			const F32 theta = 2.0f * kPi * F32(j) / sliceCount;
			
 
				+			const F32 x = sin(phi) * cos(theta);
			
 
				+			const F32 y = cos(phi);
			
 
				+			const F32 z = sin(phi) * sin(theta);
			
 
				+			positions.emplaceBack(x, y, z);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	// add bottom vertex
			
 
				+	positions.emplaceBack(0.0f, -1.0f, 0.0f);
			
 
				+	const U32 v1 = U32(positions.getSize() - 1);
			
 
				+
			
 
				+	// add top / bottom triangles
			
 
				+	for(auto i = 0u; i < sliceCount; ++i)
			
 
				+	{
			
 
				+		auto i0 = i + 1;
			
 
				+		auto i1 = (i + 1) % sliceCount + 1;
			
 
				+		indices.emplaceBack(v0, i1, i0);
			
 
				+		i0 = i + sliceCount * (stackCount - 2) + 1;
			
 
				+		i1 = (i + 1) % sliceCount + sliceCount * (stackCount - 2) + 1;
			
 
				+		indices.emplaceBack(v1, i0, i1);
			
 
				+	}
			
 
				+
			
 
				+	// add quads per stack / slice
			
 
				+	for(U32 j = 0u; j < stackCount - 2; j++)
			
 
				+	{
			
 
				+		const U32 j0 = j * sliceCount + 1;
			
 
				+		const U32 j1 = (j + 1) * sliceCount + 1;
			
 
				+		for(U32 i = 0u; i < sliceCount; i++)
			
 
				+		{
			
 
				+			const U32 i0 = j0 + i;
			
 
				+			const U32 i1 = j0 + (i + 1) % sliceCount;
			
 
				+			const U32 i2 = j1 + (i + 1) % sliceCount;
			
 
				+			const U32 i3 = j1 + i;
			
 
				+
			
 
				+			indices.emplaceBack(i0, i1, i2);
			
 
				+			indices.emplaceBack(i0, i2, i3);
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+ANKI_TEST(Gr, AsyncComputeBench)
			
 
				+{
			
 
				+	const Bool useAsyncQueue = true;
			
 
				+	const Bool runConcurently = true;
			
 
				+	const U32 spheresToDrawPerDimension = 100;
			
 
				+	const U32 windowSize = 512;
			
 
				+
			
 
				+	g_validationCVar.set(false); // TODO
			
 
				+	g_debugMarkersCVar.set(false);
			
 
				+	g_windowWidthCVar.set(windowSize);
			
 
				+	g_windowHeightCVar.set(windowSize);
			
 
				+	g_asyncComputeCVar.set(0);
			
 
				+
			
 
				+	DefaultMemoryPool::allocateSingleton(allocAligned, nullptr);
			
 
				+	ShaderCompilerMemoryPool::allocateSingleton(allocAligned, nullptr);
			
 
				+	initWindow();
			
 
				+	initGrManager();
			
 
				+	Input::allocateSingleton();
			
 
				+
			
 
				+	{
			
 
				+		const CString computeShaderSrc = R"(
			
 
				+RWTexture2D<float4> g_inTex : register(u0);
			
 
				+RWTexture2D<float4> g_outTex : register(u1);
			
 
				+
			
 
				+[NumThreads(8, 8, 1)] void main(uint2 svDispatchThreadId : SV_DispatchThreadID)
			
 
				+{
			
 
				+	uint2 texSize;
			
 
				+	g_inTex.GetDimensions(texSize.x, texSize.y);
			
 
				+
			
 
				+	float4 val = 0.0;
			
 
				+	for(int x = -9; x <= 9; ++x)
			
 
				+	{
			
 
				+		for(int y = -9; y <= 9; ++y)
			
 
				+		{
			
 
				+			int2 coord = int2(svDispatchThreadId) + int2(x, y);
			
 
				+			if(coord.x < 0 || coord.y < 0 || coord.x >= texSize.x || coord.y >= texSize.y)
			
 
				+			{
			
 
				+				continue;
			
 
				+			}
			
 
				+
			
 
				+			val += g_inTex[coord];
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	g_outTex[svDispatchThreadId] = val;
			
 
				+})";
			
 
				+		const CString vertShaderSrc = R"(
			
 
				+struct Consts
			
 
				+{
			
 
				+	float3 m_worldPosition;
			
 
				+	float m_scale;
			
 
				+
			
 
				+	float4x4 m_viewProjMat;
			
 
				+};
			
 
				+
			
 
				+#if defined(__spirv__)
			
 
				+[[vk::push_constant]] ConstantBuffer<Consts> g_consts;
			
 
				+#else
			
 
				+ConstantBuffer<Consts> g_consts : register(b0, space3000);
			
 
				+#endif
			
 
				+
			
 
				+float4 main(float3 svPosition : POSITION) : SV_Position
			
 
				+{
			
 
				+	return mul(g_consts.m_viewProjMat, float4(svPosition * g_consts.m_scale + g_consts.m_worldPosition, 1.0));
			
 
				+})";
			
 
				+
			
 
				+		const CString pixelShaderSrc = R"(
			
 
				+float4 main() : SV_Target0
			
 
				+{
			
 
				+	return float4(1.0, 0.0, 0.5, 0.0);
			
 
				+})";
			
 
				+
			
 
				+		const CString blitVertShader = R"(
			
 
				+struct VertOut
			
 
				+{
			
 
				+	float4 m_svPosition : SV_POSITION;
			
 
				+	float2 m_uv : TEXCOORD;
			
 
				+};
			
 
				+
			
 
				+VertOut main(uint vertId : SV_VERTEXID)
			
 
				+{
			
 
				+	const float2 coord = float2(vertId >> 1, vertId & 1);
			
 
				+
			
 
				+	VertOut output;
			
 
				+	output.m_svPosition = float4(coord * float2(4.0, -4.0) + float2(-1.0, 1.0), 0.0, 1.0);
			
 
				+	output.m_uv = coord * 2.0f;
			
 
				+
			
 
				+	return output;
			
 
				+})";
			
 
				+
			
 
				+		const CString blitPixelShader = R"(
			
 
				+struct VertOut
			
 
				+{
			
 
				+	float4 m_svPosition : SV_POSITION;
			
 
				+	float2 m_uv : TEXCOORD;
			
 
				+};
			
 
				+
			
 
				+Texture2D g_inTex : register(t0);
			
 
				+SamplerState g_sampler : register(s0);
			
 
				+
			
 
				+float4 main(VertOut input) : SV_Target0
			
 
				+{
			
 
				+	return g_inTex.Sample(g_sampler, input.m_uv);
			
 
				+})";
			
 
				+
			
 
				+		ShaderProgramPtr compProg = createComputeProg(computeShaderSrc);
			
 
				+		ShaderProgramPtr graphicsProg = createVertFragProg(vertShaderSrc, pixelShaderSrc);
			
 
				+		ShaderProgramPtr blitProg = createVertFragProg(blitVertShader, blitPixelShader);
			
 
				+
			
 
				+		DynamicArray<Vec3> positions;
			
 
				+		DynamicArray<UVec3> indices;
			
 
				+		generateSphere(positions, indices, 50, 50);
			
 
				+
			
 
				+		BufferPtr posBuff = createBuffer(BufferUsageBit::kVertexOrIndex, ConstWeakArray(positions), "PosBuffer");
			
 
				+		BufferPtr indexBuff = createBuffer(BufferUsageBit::kVertexOrIndex, ConstWeakArray(indices), "IdxBuffer");
			
 
				+
			
 
				+		TextureInitInfo texInit("Tex");
			
 
				+		texInit.m_width = texInit.m_height = 2048;
			
 
				+		texInit.m_format = Format::kR32G32B32A32_Sfloat;
			
 
				+		texInit.m_usage = TextureUsageBit::kUavCompute;
			
 
				+		TexturePtr inTex = createTexture2d(texInit, Vec4(0.5f));
			
 
				+		TexturePtr outTex = createTexture2d(texInit, Vec4(0.1f));
			
 
				+
			
 
				+		{
			
 
				+			CommandBufferInitInfo cinit;
			
 
				+			cinit.m_flags = CommandBufferFlag::kGeneralWork | CommandBufferFlag::kSmallBatch;
			
 
				+			CommandBufferPtr cmdb = GrManager::getSingleton().newCommandBuffer(cinit);
			
 
				+
			
 
				+			const TextureBarrierInfo barrier2 = {TextureView(inTex.get(), TextureSubresourceDesc::all()), TextureUsageBit::kCopyDestination,
			
 
				+												 TextureUsageBit::kUavCompute};
			
 
				+			cmdb->setPipelineBarrier({&barrier2, 1}, {}, {});
			
 
				+			cmdb->endRecording();
			
 
				+
			
 
				+			FencePtr fence;
			
 
				+			GrManager::getSingleton().submit(cmdb.get(), {}, &fence);
			
 
				+			fence->clientWait(kMaxSecond);
			
 
				+		}
			
 
				+
			
 
				+		TextureInitInfo texInit2("RT");
			
 
				+		texInit2.m_width = texInit2.m_height = windowSize;
			
 
				+		texInit2.m_format = Format::kR32G32B32A32_Sfloat;
			
 
				+		texInit2.m_usage = TextureUsageBit::kRtvDsvWrite | TextureUsageBit::kSrvPixel;
			
 
				+		TexturePtr rtTex = createTexture2d(texInit2, Vec4(0.5f));
			
 
				+
			
 
				+		SamplerInitInfo samplerInit("sampler");
			
 
				+		SamplerPtr sampler = GrManager::getSingleton().newSampler(samplerInit);
			
 
				+
			
 
				+		Array<TimestampQueryPtr, 2> startTimestamps = {GrManager::getSingleton().newTimestampQuery(), GrManager::getSingleton().newTimestampQuery()};
			
 
				+		TimestampQueryPtr endTimestamp = GrManager::getSingleton().newTimestampQuery();
			
 
				+
			
 
				+		FencePtr finalFence;
			
 
				+
			
 
				+		const U32 iterationCount = 1000;
			
 
				+		for(U32 i = 0; i < iterationCount; ++i)
			
 
				+		{
			
 
				+			ANKI_TEST_EXPECT_NO_ERR(Input::getSingleton().handleEvents());
			
 
				+			TexturePtr presentTex = GrManager::getSingleton().acquireNextPresentableTexture();
			
 
				+
			
 
				+			// Init command buffers
			
 
				+			CommandBufferInitInfo cinit;
			
 
				+			cinit.m_flags = CommandBufferFlag::kGeneralWork | CommandBufferFlag::kSmallBatch;
			
 
				+			CommandBufferPtr gfxCmdb = GrManager::getSingleton().newCommandBuffer(cinit);
			
 
				+
			
 
				+			CommandBufferPtr compCmdb;
			
 
				+			if(useAsyncQueue)
			
 
				+			{
			
 
				+				CommandBufferInitInfo cinit;
			
 
				+				cinit.m_flags = CommandBufferFlag::kComputeWork | CommandBufferFlag::kSmallBatch;
			
 
				+				compCmdb = GrManager::getSingleton().newCommandBuffer(cinit);
			
 
				+			}
			
 
				+			else
			
 
				+			{
			
 
				+				compCmdb = gfxCmdb;
			
 
				+			}
			
 
				+
			
 
				+			CommandBufferPtr blitCmdb = GrManager::getSingleton().newCommandBuffer(cinit);
			
 
				+
			
 
				+			// Barriers
			
 
				+			{
			
 
				+				const TextureBarrierInfo rtBarrier = {TextureView(rtTex.get(), TextureSubresourceDesc::all()), TextureUsageBit::kNone,
			
 
				+													  TextureUsageBit::kRtvDsvWrite};
			
 
				+				gfxCmdb->setPipelineBarrier({&rtBarrier, 1}, {}, {});
			
 
				+
			
 
				+				const TextureBarrierInfo uavBarrier = {TextureView(outTex.get(), TextureSubresourceDesc::all()), TextureUsageBit::kNone,
			
 
				+													   TextureUsageBit::kUavCompute};
			
 
				+				compCmdb->setPipelineBarrier({&uavBarrier, 1}, {}, {});
			
 
				+
			
 
				+				const TextureBarrierInfo blitBarrier = {TextureView(presentTex.get(), TextureSubresourceDesc::all()), TextureUsageBit::kNone,
			
 
				+														TextureUsageBit::kRtvDsvWrite};
			
 
				+				blitCmdb->setPipelineBarrier({&blitBarrier, 1}, {}, {});
			
 
				+			}
			
 
				+
			
 
				+			// Compute dispatch
			
 
				+			{
			
 
				+				if(i == 0)
			
 
				+				{
			
 
				+					compCmdb->writeTimestamp(startTimestamps[0].get());
			
 
				+				}
			
 
				+
			
 
				+				compCmdb->bindShaderProgram(compProg.get());
			
 
				+				compCmdb->bindUav(0, 0, TextureView(inTex.get(), TextureSubresourceDesc::all()));
			
 
				+				compCmdb->bindUav(1, 0, TextureView(outTex.get(), TextureSubresourceDesc::all()));
			
 
				+				compCmdb->dispatchCompute(inTex->getWidth() / 8, inTex->getHeight() / 8, 1);
			
 
				+			}
			
 
				+
			
 
				+			// Draw spheres
			
 
				+			{
			
 
				+				if(i == 0)
			
 
				+				{
			
 
				+					compCmdb->writeTimestamp(startTimestamps[1].get());
			
 
				+				}
			
 
				+
			
 
				+				RenderTarget rt;
			
 
				+				rt.m_textureView = TextureView(rtTex.get(), TextureSubresourceDesc::all());
			
 
				+				rt.m_loadOperation = RenderTargetLoadOperation::kClear;
			
 
				+				rt.m_clearValue.m_colorf = {getRandomRange(0.0f, 1.0f), getRandomRange(0.0f, 1.0f), getRandomRange(0.0f, 1.0f), 1.0f};
			
 
				+				gfxCmdb->beginRenderPass({rt});
			
 
				+
			
 
				+				gfxCmdb->bindVertexBuffer(0, BufferView(posBuff.get()), sizeof(Vec3));
			
 
				+				gfxCmdb->setVertexAttribute(VertexAttributeSemantic::kPosition, 0, Format::kR32G32B32_Sfloat, 0);
			
 
				+				gfxCmdb->bindIndexBuffer(BufferView(indexBuff.get()), IndexType::kU32);
			
 
				+				gfxCmdb->bindShaderProgram(graphicsProg.get());
			
 
				+				gfxCmdb->setViewport(0, 0, windowSize, windowSize);
			
 
				+
			
 
				+				struct Consts
			
 
				+				{
			
 
				+					Vec3 m_worldPosition;
			
 
				+					F32 m_scale;
			
 
				+
			
 
				+					Mat4 m_viewProjMat;
			
 
				+				} consts;
			
 
				+
			
 
				+				constexpr F32 orthoHalfSize = 10.0f;
			
 
				+				constexpr F32 orthoSize = orthoHalfSize * 2.0f;
			
 
				+				const Mat4 viewMat = Mat4::getIdentity().getInverse();
			
 
				+				const Mat4 projMat =
			
 
				+					Mat4::calculateOrthographicProjectionMatrix(orthoHalfSize, -orthoHalfSize, orthoHalfSize, -orthoHalfSize, 0.1f, 200.0f);
			
 
				+				consts.m_viewProjMat = projMat * viewMat;
			
 
				+
			
 
				+				consts.m_scale = 0.07f;
			
 
				+
			
 
				+				for(U32 x = 0; x < spheresToDrawPerDimension; ++x)
			
 
				+				{
			
 
				+					for(U32 y = 0; y < spheresToDrawPerDimension; ++y)
			
 
				+					{
			
 
				+						consts.m_worldPosition = Vec3(F32(x) / (spheresToDrawPerDimension - 1) * orthoSize - orthoHalfSize,
			
 
				+													  F32(y) / (spheresToDrawPerDimension - 1) * orthoSize - orthoHalfSize, -1.0f);
			
 
				+
			
 
				+						gfxCmdb->setFastConstants(&consts, sizeof(consts));
			
 
				+
			
 
				+						gfxCmdb->drawIndexed(PrimitiveTopology::kTriangles, U32(indexBuff->getSize() / sizeof(U32)));
			
 
				+					}
			
 
				+				}
			
 
				+
			
 
				+				gfxCmdb->endRenderPass();
			
 
				+			}
			
 
				+
			
 
				+			// Blit
			
 
				+			{
			
 
				+				const TextureBarrierInfo blitBarrier = {TextureView(rtTex.get(), TextureSubresourceDesc::all()), TextureUsageBit::kRtvDsvWrite,
			
 
				+														TextureUsageBit::kSrvPixel};
			
 
				+				blitCmdb->setPipelineBarrier({&blitBarrier, 1}, {}, {});
			
 
				+
			
 
				+				RenderTarget rt;
			
 
				+				rt.m_textureView = TextureView(presentTex.get(), TextureSubresourceDesc::all());
			
 
				+				rt.m_loadOperation = RenderTargetLoadOperation::kDontCare;
			
 
				+				rt.m_clearValue.m_colorf = {getRandomRange(0.0f, 1.0f), getRandomRange(0.0f, 1.0f), getRandomRange(0.0f, 1.0f), 1.0f};
			
 
				+				blitCmdb->beginRenderPass({rt});
			
 
				+
			
 
				+				blitCmdb->bindShaderProgram(blitProg.get());
			
 
				+				blitCmdb->bindSrv(0, 0, TextureView(rtTex.get(), TextureSubresourceDesc::all()));
			
 
				+				blitCmdb->bindSampler(0, 0, sampler.get());
			
 
				+				blitCmdb->setViewport(0, 0, windowSize, windowSize);
			
 
				+				blitCmdb->draw(PrimitiveTopology::kTriangles, 3);
			
 
				+
			
 
				+				blitCmdb->endRenderPass();
			
 
				+
			
 
				+				const TextureBarrierInfo presentBarrier = {TextureView(presentTex.get(), TextureSubresourceDesc::all()),
			
 
				+														   TextureUsageBit::kRtvDsvWrite, TextureUsageBit::kPresent};
			
 
				+				blitCmdb->setPipelineBarrier({&presentBarrier, 1}, {}, {});
			
 
				+
			
 
				+				if(i == iterationCount - 1)
			
 
				+				{
			
 
				+					compCmdb->writeTimestamp(endTimestamp.get());
			
 
				+				}
			
 
				+			}
			
 
				+
			
 
				+			gfxCmdb->endRecording();
			
 
				+			blitCmdb->endRecording();
			
 
				+			if(useAsyncQueue)
			
 
				+			{
			
 
				+				compCmdb->endRecording();
			
 
				+			}
			
 
				+
			
 
				+			if(useAsyncQueue)
			
 
				+			{
			
 
				+				WeakArray<Fence*> firstWaveWaitFences;
			
 
				+				Array<Fence*, 1> arr;
			
 
				+				if(finalFence.isCreated())
			
 
				+				{
			
 
				+					arr = {finalFence.get()};
			
 
				+					firstWaveWaitFences = {arr};
			
 
				+				}
			
 
				+
			
 
				+				FencePtr fence2;
			
 
				+				GrManager::getSingleton().submit(compCmdb.get(), firstWaveWaitFences, &fence2);
			
 
				+
			
 
				+				FencePtr fence1;
			
 
				+				GrManager::getSingleton().submit(gfxCmdb.get(), firstWaveWaitFences, &fence1);
			
 
				+
			
 
				+				Array<Fence*, 2> waitFences = {{fence1.get(), fence2.get()}};
			
 
				+				GrManager::getSingleton().submit(blitCmdb.get(), {waitFences}, &finalFence);
			
 
				+			}
			
 
				+			else
			
 
				+			{
			
 
				+				GrManager::getSingleton().submit(gfxCmdb.get());
			
 
				+				GrManager::getSingleton().submit(blitCmdb.get(), {}, &finalFence);
			
 
				+			}
			
 
				+
			
 
				+			GrManager::getSingleton().swapBuffers();
			
 
				+		}
			
 
				+
			
 
				+		finalFence->clientWait(kMaxSecond);
			
 
				+
			
 
				+		Array<Second, 2> startTime;
			
 
				+		ANKI_TEST_EXPECT_EQ(startTimestamps[0]->getResult(startTime[0]), TimestampQueryResult::kAvailable);
			
 
				+		ANKI_TEST_EXPECT_EQ(startTimestamps[1]->getResult(startTime[1]), TimestampQueryResult::kAvailable);
			
 
				+		Second endTime;
			
 
				+		ANKI_TEST_EXPECT_EQ(endTimestamp->getResult(endTime), TimestampQueryResult::kAvailable);
			
 
				+
			
 
				+		ANKI_TEST_LOGI("GPU time %f\n", endTime - min(startTime[0], startTime[1]));
			
 
				+	}
			
 
				+
			
 
				+	Input::freeSingleton();
			
 
				+	GrManager::freeSingleton();
			
 
				+	NativeWindow::freeSingleton();
			
 
				+	ShaderCompilerMemoryPool::freeSingleton();
			
 
				+	DefaultMemoryPool::freeSingleton();
			
 
				+}
			
--- a/Tests/Gr/GrCommon.h
+++ b/Tests/Gr/GrCommon.h
@@ -62,6 +62,18 @@ inline ShaderProgramPtr createVertFragProg(CString vert, CString frag, ConstWeak
 
				 	return prog;
			
 
				 }
			
 
				 
			
 
				+inline ShaderProgramPtr createComputeProg(CString src, ConstWeakArray<CString> extraCompilerArgs = {})
			
 
				+{
			
 
				+	ShaderPtr shader = createShader(src, ShaderType::kCompute, extraCompilerArgs);
			
 
				+
			
 
				+	ShaderProgramInitInfo init;
			
 
				+	init.m_computeShader = shader.get();
			
 
				+
			
 
				+	ShaderProgramPtr prog = GrManager::getSingleton().newShaderProgram(init);
			
 
				+
			
 
				+	return prog;
			
 
				+}
			
 
				+
			
 
				 inline ShaderPtr loadShader(CString filename, ShaderType type, ConstWeakArray<CString> extraCompilerArgs = {})
			
 
				 {
			
 
				 	File file;
			
@@ -166,6 +178,9 @@ inline TexturePtr createTexture2d(const TextureInitInfo texInit_, ConstWeakArray
 
				 
			
 
				 	CommandBufferPtr cmdb = GrManager::getSingleton().newCommandBuffer(cmdbInit);
			
 
				 
			
 
				+	const TextureBarrierInfo barr = {TextureView(tex.get(), TextureSubresourceDesc::all()), TextureUsageBit::kNone,
			
 
				+									 TextureUsageBit::kCopyDestination};
			
 
				+	cmdb->setPipelineBarrier({&barr, 1}, {}, {});
			
 
				 	cmdb->copyBufferToTexture(BufferView(staging.get()), TextureView(tex.get(), TextureSubresourceDesc::all()));
			
 
				 	cmdb->endRecording();
			
 
				 
			
--- a/Tests/Main.cpp
+++ b/Tests/Main.cpp
@@ -11,8 +11,6 @@ using namespace anki;
 
				 ANKI_MAIN_FUNCTION(myMain)
			
 
				 int myMain(int argc, char** argv)
			
 
				 {
			
 
				-	HeapAllocator<U8> alloc(allocAligned, nullptr);
			
 
				-
			
 
				 	int exitcode = getTesterSingleton().run(argc, argv);
			
 
				 
			
 
				 	deleteTesterSingleton();