Browse Source

Improve the Vulkan memory allocation and its stats

Panagiotis Christopoulos Charitos 3 years ago
parent
commit
c99524e570

+ 1 - 5
AnKi/Core/App.cpp

@@ -494,15 +494,11 @@ Error App::mainLoop()
 				statsUi.setAllocatedCpuMemory(m_memStats.m_allocatedMem.load());
 				statsUi.setAllocatedCpuMemory(m_memStats.m_allocatedMem.load());
 				statsUi.setCpuAllocationCount(m_memStats.m_allocCount.load());
 				statsUi.setCpuAllocationCount(m_memStats.m_allocCount.load());
 				statsUi.setCpuFreeCount(m_memStats.m_freeCount.load());
 				statsUi.setCpuFreeCount(m_memStats.m_freeCount.load());
-				GrManagerStats grStats = m_gr->getStats();
-				statsUi.setVkCpuMemory(grStats.m_cpuMemory);
-				statsUi.setVkGpuMemory(grStats.m_gpuMemory);
+				statsUi.setGrStats(m_gr->getStats());
 				BuddyAllocatorBuilderStats vertMemStats;
 				BuddyAllocatorBuilderStats vertMemStats;
 				m_vertexMem->getMemoryStats(vertMemStats);
 				m_vertexMem->getMemoryStats(vertMemStats);
 				statsUi.setGlobalVertexMemoryPoolStats(vertMemStats);
 				statsUi.setGlobalVertexMemoryPoolStats(vertMemStats);
 
 
-				statsUi.setVkCommandBufferCount(grStats.m_commandBufferCount);
-
 				statsUi.setDrawableCount(rqueue.countAllRenderables());
 				statsUi.setDrawableCount(rqueue.countAllRenderables());
 			}
 			}
 
 

+ 11 - 4
AnKi/Core/StatsUi.cpp

@@ -97,18 +97,25 @@ void StatsUi::build(CanvasPtr canvas)
 		}
 		}
 
 
 		ImGui::Text("----");
 		ImGui::Text("----");
-		ImGui::Text("Memory:");
+		ImGui::Text("CPU Memory:");
 		labelBytes(m_allocatedCpuMem, "Total CPU");
 		labelBytes(m_allocatedCpuMem, "Total CPU");
 		labelUint(m_allocCount, "Total allocations");
 		labelUint(m_allocCount, "Total allocations");
 		labelUint(m_freeCount, "Total frees");
 		labelUint(m_freeCount, "Total frees");
-		labelBytes(m_vkCpuMem, "Vulkan CPU");
-		labelBytes(m_vkGpuMem, "Vulkan GPU");
+
+		ImGui::Text("----");
+		ImGui::Text("GPU Memory:");
+		labelBytes(m_grStats.m_hostMemoryAllocated, "Host");
+		labelBytes(m_grStats.m_hostMemoryInUse, "Host in use");
+		labelUint(m_grStats.m_hostMemoryAllocationCount, "Host allocations");
+		labelBytes(m_grStats.m_deviceMemoryAllocated, "Device");
+		labelBytes(m_grStats.m_deviceMemoryInUse, "Device in use");
+		labelUint(m_grStats.m_deviceMemoryAllocationCount, "Device allocations");
 		labelBytes(m_globalVertexPoolStats.m_userAllocatedSize, "Vertex/Index GPU memory");
 		labelBytes(m_globalVertexPoolStats.m_userAllocatedSize, "Vertex/Index GPU memory");
 		labelBytes(m_globalVertexPoolStats.m_realAllocatedSize, "Actual Vertex/Index GPU memory");
 		labelBytes(m_globalVertexPoolStats.m_realAllocatedSize, "Actual Vertex/Index GPU memory");
 
 
 		ImGui::Text("----");
 		ImGui::Text("----");
 		ImGui::Text("Vulkan:");
 		ImGui::Text("Vulkan:");
-		labelUint(m_vkCmdbCount, "Cmd buffers");
+		labelUint(m_grStats.m_commandBufferCount, "Cmd buffers");
 
 
 		ImGui::Text("----");
 		ImGui::Text("----");
 		ImGui::Text("Other:");
 		ImGui::Text("Other:");

+ 5 - 16
AnKi/Core/StatsUi.h

@@ -8,6 +8,7 @@
 #include <AnKi/Core/Common.h>
 #include <AnKi/Core/Common.h>
 #include <AnKi/Ui/UiImmediateModeBuilder.h>
 #include <AnKi/Ui/UiImmediateModeBuilder.h>
 #include <AnKi/Util/BuddyAllocatorBuilder.h>
 #include <AnKi/Util/BuddyAllocatorBuilder.h>
+#include <AnKi/Gr/GrManager.h>
 
 
 namespace anki {
 namespace anki {
 
 
@@ -89,19 +90,9 @@ public:
 		m_freeCount = v;
 		m_freeCount = v;
 	}
 	}
 
 
-	void setVkCpuMemory(PtrSize v)
+	void setGrStats(const GrManagerStats& stats)
 	{
 	{
-		m_vkCpuMem = v;
-	}
-
-	void setVkGpuMemory(PtrSize v)
-	{
-		m_vkGpuMem = v;
-	}
-
-	void setVkCommandBufferCount(U32 v)
-	{
-		m_vkCmdbCount = v;
+		m_grStats = stats;
 	}
 	}
 
 
 	void setDrawableCount(U64 v)
 	void setDrawableCount(U64 v)
@@ -162,12 +153,10 @@ private:
 	PtrSize m_allocatedCpuMem = 0;
 	PtrSize m_allocatedCpuMem = 0;
 	U64 m_allocCount = 0;
 	U64 m_allocCount = 0;
 	U64 m_freeCount = 0;
 	U64 m_freeCount = 0;
-	PtrSize m_vkCpuMem = 0;
-	PtrSize m_vkGpuMem = 0;
 	BuddyAllocatorBuilderStats m_globalVertexPoolStats = {};
 	BuddyAllocatorBuilderStats m_globalVertexPoolStats = {};
 
 
-	// Vulkan
-	U32 m_vkCmdbCount = 0;
+	// GR
+	GrManagerStats m_grStats = {};
 
 
 	// Other
 	// Other
 	PtrSize m_drawableCount = 0;
 	PtrSize m_drawableCount = 0;

+ 7 - 2
AnKi/Gr/GrManager.h

@@ -35,8 +35,13 @@ public:
 class GrManagerStats
 class GrManagerStats
 {
 {
 public:
 public:
-	PtrSize m_cpuMemory = 0;
-	PtrSize m_gpuMemory = 0;
+	PtrSize m_deviceMemoryAllocated = 0;
+	PtrSize m_deviceMemoryInUse = 0;
+	U32 m_deviceMemoryAllocationCount = 0;
+	PtrSize m_hostMemoryAllocated = 0;
+	PtrSize m_hostMemoryInUse = 0;
+	U32 m_hostMemoryAllocationCount = 0;
+
 	U32 m_commandBufferCount = 0;
 	U32 m_commandBufferCount = 0;
 };
 };
 
 

+ 129 - 57
AnKi/Gr/Vulkan/GpuMemoryManager.cpp

@@ -7,13 +7,12 @@
 
 
 namespace anki {
 namespace anki {
 
 
-static constexpr Array<GpuMemoryManagerClassInfo, 8> CLASSES{{{256_B, 16_KB},
-															  {4_KB, 256_KB},
+static constexpr Array<GpuMemoryManagerClassInfo, 7> CLASSES{{{4_KB, 256_KB},
 															  {128_KB, 8_MB},
 															  {128_KB, 8_MB},
 															  {1_MB, 64_MB},
 															  {1_MB, 64_MB},
 															  {16_MB, 128_MB},
 															  {16_MB, 128_MB},
-															  {64_MB, 256_MB},
-															  {128_MB, 256_MB},
+															  {64_MB, 128_MB},
+															  {128_MB, 128_MB},
 															  {256_MB, 256_MB}}};
 															  {256_MB, 256_MB}}};
 
 
 /// Special classes for the ReBAR memory. Have that as a special case because it's so limited and needs special care.
 /// Special classes for the ReBAR memory. Have that as a special case because it's so limited and needs special care.
@@ -91,6 +90,32 @@ void GpuMemoryManager::init(VkPhysicalDevice pdev, VkDevice dev, GrAllocator<U8>
 					 c.m_suballocationSize, c.m_chunkSize / c.m_suballocationSize);
 					 c.m_suballocationSize, c.m_chunkSize / c.m_suballocationSize);
 	}
 	}
 
 
+	// Image buffer granularity
+	{
+		VkPhysicalDeviceProperties props;
+		vkGetPhysicalDeviceProperties(pdev, &props);
+		m_bufferImageGranularity = U32(props.limits.bufferImageGranularity);
+		ANKI_ASSERT(m_bufferImageGranularity > 0 && isPowerOfTwo(m_bufferImageGranularity));
+
+		if(m_bufferImageGranularity > 4_KB)
+		{
+			ANKI_VK_LOGW(
+				"Buffer/image mem granularity is too high (%u). It will force high alignments and it will waste memory",
+				m_bufferImageGranularity);
+		}
+
+		for(const GpuMemoryManagerClassInfo& c : CLASSES)
+		{
+			if(!isAligned(m_bufferImageGranularity, c.m_suballocationSize))
+			{
+				ANKI_VK_LOGW("Memory class is not aligned to buffer/image granularity (%u). It won't be used in "
+							 "allocations: Chunk size: %lu, suballocationSize: %lu, allocsPerChunk %lu",
+							 m_bufferImageGranularity, c.m_chunkSize, c.m_suballocationSize,
+							 c.m_chunkSize / c.m_suballocationSize);
+			}
+		}
+	}
+
 	vkGetPhysicalDeviceMemoryProperties(pdev, &m_memoryProperties);
 	vkGetPhysicalDeviceMemoryProperties(pdev, &m_memoryProperties);
 
 
 	m_alloc = alloc;
 	m_alloc = alloc;
@@ -99,51 +124,49 @@ void GpuMemoryManager::init(VkPhysicalDevice pdev, VkDevice dev, GrAllocator<U8>
 	m_callocs.create(alloc, m_memoryProperties.memoryTypeCount);
 	m_callocs.create(alloc, m_memoryProperties.memoryTypeCount);
 	for(U32 memTypeIdx = 0; memTypeIdx < m_callocs.getSize(); ++memTypeIdx)
 	for(U32 memTypeIdx = 0; memTypeIdx < m_callocs.getSize(); ++memTypeIdx)
 	{
 	{
-		for(U32 linear = 0; linear < 2; ++linear)
+		GpuMemoryManagerInterface& iface = m_callocs[memTypeIdx].getInterface();
+		iface.m_parent = this;
+		iface.m_memTypeIdx = U8(memTypeIdx);
+		iface.m_exposesBufferGpuAddress = exposeBufferGpuAddress;
+
+		const U32 heapIdx = m_memoryProperties.memoryTypes[memTypeIdx].heapIndex;
+		iface.m_isDeviceMemory = !!(m_memoryProperties.memoryHeaps[heapIdx].flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT);
+
+		// Find if it's ReBAR
+		const VkMemoryPropertyFlags props = m_memoryProperties.memoryTypes[memTypeIdx].propertyFlags;
+		const VkMemoryPropertyFlags reBarProps = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT
+												 | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT
+												 | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
+		const PtrSize heapSize =
+			m_memoryProperties.memoryHeaps[m_memoryProperties.memoryTypes[memTypeIdx].heapIndex].size;
+		const Bool isReBar = props == reBarProps && heapSize <= 256_MB;
+
+		if(isReBar)
 		{
 		{
-			GpuMemoryManagerInterface& iface = m_callocs[memTypeIdx][linear].getInterface();
-			iface.m_parent = this;
-			iface.m_memTypeIdx = U8(memTypeIdx);
-			iface.m_exposesBufferGpuAddress = (linear == 1) && exposeBufferGpuAddress;
-
-			const U32 heapIdx = m_memoryProperties.memoryTypes[memTypeIdx].heapIndex;
-			iface.m_isDeviceMemory =
-				!!(m_memoryProperties.memoryHeaps[heapIdx].flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT);
-
-			// Find if it's ReBAR
-			const VkMemoryPropertyFlags props = m_memoryProperties.memoryTypes[memTypeIdx].propertyFlags;
-			const VkMemoryPropertyFlags reBarProps = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT
-													 | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT
-													 | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
-			const PtrSize heapSize =
-				m_memoryProperties.memoryHeaps[m_memoryProperties.memoryTypes[memTypeIdx].heapIndex].size;
-			const Bool isReBar = props == reBarProps && heapSize <= 256_MB;
-
-			if(isReBar)
-			{
-				ANKI_VK_LOGV("Memory type %u is ReBAR", memTypeIdx);
-			}
-
-			// Choose different classes
-			if(!isReBar)
-			{
-				iface.m_classInfos = CLASSES;
-			}
-			else
-			{
-				iface.m_classInfos = REBAR_CLASSES;
-			}
+			ANKI_VK_LOGV("Memory type %u is ReBAR", memTypeIdx);
+		}
 
 
-			// The interface is initialized, init the builder
-			m_callocs[memTypeIdx][linear].init(m_alloc);
+		// Choose different classes
+		if(!isReBar)
+		{
+			iface.m_classInfos = CLASSES;
 		}
 		}
+		else
+		{
+			iface.m_classInfos = REBAR_CLASSES;
+		}
+
+		// The interface is initialized, init the builder
+		m_callocs[memTypeIdx].init(m_alloc);
 	}
 	}
 }
 }
 
 
 void GpuMemoryManager::allocateMemory(U32 memTypeIdx, PtrSize size, U32 alignment, Bool linearResource,
 void GpuMemoryManager::allocateMemory(U32 memTypeIdx, PtrSize size, U32 alignment, Bool linearResource,
 									  GpuMemoryHandle& handle)
 									  GpuMemoryHandle& handle)
 {
 {
-	ClassAllocator& calloc = m_callocs[memTypeIdx][linearResource];
+	ClassAllocator& calloc = m_callocs[memTypeIdx];
+
+	alignment = max(alignment, m_bufferImageGranularity);
 
 
 	GpuMemoryManagerChunk* chunk;
 	GpuMemoryManagerChunk* chunk;
 	PtrSize offset;
 	PtrSize offset;
@@ -154,15 +177,54 @@ void GpuMemoryManager::allocateMemory(U32 memTypeIdx, PtrSize size, U32 alignmen
 	handle.m_offset = offset;
 	handle.m_offset = offset;
 	handle.m_chunk = chunk;
 	handle.m_chunk = chunk;
 	handle.m_memTypeIdx = U8(memTypeIdx);
 	handle.m_memTypeIdx = U8(memTypeIdx);
-	handle.m_linear = linearResource;
+	handle.m_size = size;
+}
+
+void GpuMemoryManager::allocateMemoryDedicated(U32 memTypeIdx, PtrSize size, VkImage image, GpuMemoryHandle& handle)
+{
+	VkMemoryDedicatedAllocateInfoKHR dedicatedInfo = {};
+	dedicatedInfo.sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO_KHR;
+	dedicatedInfo.image = image;
+
+	VkMemoryAllocateInfo memoryAllocateInfo = {};
+	memoryAllocateInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
+	memoryAllocateInfo.pNext = &dedicatedInfo;
+	memoryAllocateInfo.allocationSize = size;
+	memoryAllocateInfo.memoryTypeIndex = memTypeIdx;
+
+	VkDeviceMemory mem;
+	ANKI_VK_CHECKF(vkAllocateMemory(m_dev, &memoryAllocateInfo, nullptr, &mem));
+
+	handle.m_memory = mem;
+	handle.m_offset = 0;
+	handle.m_chunk = nullptr;
+	handle.m_memTypeIdx = U8(memTypeIdx);
+	handle.m_size = size;
+
+	m_dedicatedAllocatedMemory.fetchAdd(size);
+	m_dedicatedAllocationCount.fetchAdd(1);
 }
 }
 
 
 void GpuMemoryManager::freeMemory(GpuMemoryHandle& handle)
 void GpuMemoryManager::freeMemory(GpuMemoryHandle& handle)
 {
 {
 	ANKI_ASSERT(handle);
 	ANKI_ASSERT(handle);
-	ClassAllocator& calloc = m_callocs[handle.m_memTypeIdx][handle.m_linear];
 
 
-	calloc.free(handle.m_chunk, handle.m_offset);
+	if(handle.isDedicated())
+	{
+		vkFreeMemory(m_dev, handle.m_memory, nullptr);
+		const PtrSize prevSize = m_dedicatedAllocatedMemory.fetchSub(handle.m_size);
+		ANKI_ASSERT(prevSize >= handle.m_size);
+		(void)prevSize;
+
+		const U32 count = m_dedicatedAllocationCount.fetchSub(1);
+		ANKI_ASSERT(count > 0);
+		(void)count;
+	}
+	else
+	{
+		ClassAllocator& calloc = m_callocs[handle.m_memTypeIdx];
+		calloc.free(handle.m_chunk, handle.m_offset);
+	}
 
 
 	handle = {};
 	handle = {};
 }
 }
@@ -170,8 +232,9 @@ void GpuMemoryManager::freeMemory(GpuMemoryHandle& handle)
 void* GpuMemoryManager::getMappedAddress(GpuMemoryHandle& handle)
 void* GpuMemoryManager::getMappedAddress(GpuMemoryHandle& handle)
 {
 {
 	ANKI_ASSERT(handle);
 	ANKI_ASSERT(handle);
+	ANKI_ASSERT(!handle.isDedicated());
 
 
-	LockGuard<SpinLock> lock(handle.m_chunk->m_m_mappedAddressMtx);
+	LockGuard<SpinLock> lock(handle.m_chunk->m_mappedAddressMtx);
 
 
 	if(handle.m_chunk->m_mappedAddress == nullptr)
 	if(handle.m_chunk->m_mappedAddress == nullptr)
 	{
 	{
@@ -224,26 +287,35 @@ U32 GpuMemoryManager::findMemoryType(U32 resourceMemTypeBits, VkMemoryPropertyFl
 	return prefered;
 	return prefered;
 }
 }
 
 
-void GpuMemoryManager::getAllocatedMemory(PtrSize& gpuMemory, PtrSize& cpuMemory) const
+void GpuMemoryManager::getStats(GpuMemoryManagerStats& stats) const
 {
 {
-	gpuMemory = 0;
-	cpuMemory = 0;
+	stats = {};
 
 
 	for(U32 memTypeIdx = 0; memTypeIdx < m_callocs.getSize(); ++memTypeIdx)
 	for(U32 memTypeIdx = 0; memTypeIdx < m_callocs.getSize(); ++memTypeIdx)
 	{
 	{
-		for(U32 linear = 0; linear < 2; ++linear)
+		const GpuMemoryManagerInterface& iface = m_callocs[memTypeIdx].getInterface();
+		ClassAllocatorBuilderStats cstats;
+		m_callocs[memTypeIdx].getStats(cstats);
+
+		if(iface.m_isDeviceMemory)
 		{
 		{
-			const GpuMemoryManagerInterface& iface = m_callocs[memTypeIdx][linear].getInterface();
-			if(iface.m_isDeviceMemory)
-			{
-				gpuMemory += iface.m_allocatedMemory;
-			}
-			else
-			{
-				cpuMemory += iface.m_allocatedMemory;
-			}
+			stats.m_deviceMemoryAllocated += cstats.m_allocatedSize;
+			stats.m_deviceMemoryInUse += cstats.m_inUseSize;
+			stats.m_deviceMemoryAllocationCount += cstats.m_chunkCount;
+		}
+		else
+		{
+			stats.m_hostMemoryAllocated += cstats.m_allocatedSize;
+			stats.m_hostMemoryInUse += cstats.m_inUseSize;
+			stats.m_hostMemoryAllocationCount += cstats.m_chunkCount;
 		}
 		}
 	}
 	}
+
+	// Add dedicated stats
+	const PtrSize dedicatedAllocatedMemory = m_dedicatedAllocatedMemory.load();
+	stats.m_deviceMemoryAllocated += dedicatedAllocatedMemory;
+	stats.m_deviceMemoryInUse += dedicatedAllocatedMemory;
+	stats.m_deviceMemoryAllocationCount += m_dedicatedAllocationCount.load();
 }
 }
 
 
 } // end namespace anki
 } // end namespace anki

+ 29 - 4
AnKi/Gr/Vulkan/GpuMemoryManager.h

@@ -33,7 +33,7 @@ public:
 	VkDeviceMemory m_handle = VK_NULL_HANDLE;
 	VkDeviceMemory m_handle = VK_NULL_HANDLE;
 
 
 	void* m_mappedAddress = nullptr;
 	void* m_mappedAddress = nullptr;
-	SpinLock m_m_mappedAddressMtx;
+	SpinLock m_mappedAddressMtx;
 
 
 	PtrSize m_size = 0;
 	PtrSize m_size = 0;
 
 
@@ -57,6 +57,7 @@ public:
 	Bool m_isDeviceMemory = false;
 	Bool m_isDeviceMemory = false;
 
 
 	PtrSize m_allocatedMemory = 0;
 	PtrSize m_allocatedMemory = 0;
+	PtrSize m_usedMemory = 0;
 
 
 	ConstWeakArray<GpuMemoryManagerClassInfo> m_classInfos;
 	ConstWeakArray<GpuMemoryManagerClassInfo> m_classInfos;
 
 
@@ -94,8 +95,25 @@ public:
 
 
 private:
 private:
 	GpuMemoryManagerChunk* m_chunk = nullptr;
 	GpuMemoryManagerChunk* m_chunk = nullptr;
+	PtrSize m_size = MAX_PTR_SIZE;
 	U8 m_memTypeIdx = MAX_U8;
 	U8 m_memTypeIdx = MAX_U8;
-	Bool m_linear = false;
+
+	Bool isDedicated() const
+	{
+		return m_chunk == nullptr;
+	}
+};
+
+/// @memberof GpuMemoryManager
+class GpuMemoryManagerStats
+{
+public:
+	PtrSize m_deviceMemoryAllocated;
+	PtrSize m_deviceMemoryInUse;
+	U32 m_deviceMemoryAllocationCount;
+	PtrSize m_hostMemoryAllocated;
+	PtrSize m_hostMemoryInUse;
+	U32 m_hostMemoryAllocationCount;
 };
 };
 
 
 /// Dynamic GPU memory allocator for all types.
 /// Dynamic GPU memory allocator for all types.
@@ -119,6 +137,8 @@ public:
 	/// Allocate memory.
 	/// Allocate memory.
 	void allocateMemory(U32 memTypeIdx, PtrSize size, U32 alignment, Bool linearResource, GpuMemoryHandle& handle);
 	void allocateMemory(U32 memTypeIdx, PtrSize size, U32 alignment, Bool linearResource, GpuMemoryHandle& handle);
 
 
+	void allocateMemoryDedicated(U32 memTypeIdx, PtrSize size, VkImage image, GpuMemoryHandle& handle);
+
 	/// Free memory.
 	/// Free memory.
 	void freeMemory(GpuMemoryHandle& handle);
 	void freeMemory(GpuMemoryHandle& handle);
 
 
@@ -130,7 +150,7 @@ public:
 					   VkMemoryPropertyFlags avoidFlags) const;
 					   VkMemoryPropertyFlags avoidFlags) const;
 
 
 	/// Get some statistics.
 	/// Get some statistics.
-	void getAllocatedMemory(PtrSize& gpuMemory, PtrSize& cpuMemory) const;
+	void getStats(GpuMemoryManagerStats& stats) const;
 
 
 private:
 private:
 	using ClassAllocator = ClassAllocatorBuilder<GpuMemoryManagerChunk, GpuMemoryManagerInterface, Mutex>;
 	using ClassAllocator = ClassAllocatorBuilder<GpuMemoryManagerChunk, GpuMemoryManagerInterface, Mutex>;
@@ -139,9 +159,14 @@ private:
 
 
 	VkDevice m_dev = VK_NULL_HANDLE;
 	VkDevice m_dev = VK_NULL_HANDLE;
 
 
-	DynamicArray<Array<ClassAllocator, 2>> m_callocs;
+	DynamicArray<ClassAllocator> m_callocs;
 
 
 	VkPhysicalDeviceMemoryProperties m_memoryProperties;
 	VkPhysicalDeviceMemoryProperties m_memoryProperties;
+	U32 m_bufferImageGranularity = 0;
+
+	// Dedicated allocation stats
+	Atomic<PtrSize> m_dedicatedAllocatedMemory = {0};
+	Atomic<U32> m_dedicatedAllocationCount = {0};
 };
 };
 /// @}
 /// @}
 
 

+ 10 - 1
AnKi/Gr/Vulkan/GrManager.cpp

@@ -90,7 +90,16 @@ GrManagerStats GrManager::getStats() const
 	ANKI_VK_SELF_CONST(GrManagerImpl);
 	ANKI_VK_SELF_CONST(GrManagerImpl);
 	GrManagerStats out;
 	GrManagerStats out;
 
 
-	self.getGpuMemoryManager().getAllocatedMemory(out.m_gpuMemory, out.m_cpuMemory);
+	GpuMemoryManagerStats memStats;
+	self.getGpuMemoryManager().getStats(memStats);
+
+	out.m_deviceMemoryAllocated = memStats.m_deviceMemoryAllocated;
+	out.m_deviceMemoryInUse = memStats.m_deviceMemoryInUse;
+	out.m_deviceMemoryAllocationCount = memStats.m_deviceMemoryAllocationCount;
+	out.m_hostMemoryAllocated = memStats.m_hostMemoryAllocated;
+	out.m_hostMemoryInUse = memStats.m_hostMemoryInUse;
+	out.m_hostMemoryAllocationCount = memStats.m_hostMemoryAllocationCount;
+
 	out.m_commandBufferCount = self.getCommandBufferFactory().getCreatedCommandBufferCount();
 	out.m_commandBufferCount = self.getCommandBufferFactory().getCreatedCommandBufferCount();
 
 
 	return out;
 	return out;

+ 6 - 26
AnKi/Gr/Vulkan/TextureImpl.cpp

@@ -104,11 +104,6 @@ TextureImpl::~TextureImpl()
 	{
 	{
 		getGrManagerImpl().getGpuMemoryManager().freeMemory(m_memHandle);
 		getGrManagerImpl().getGpuMemoryManager().freeMemory(m_memHandle);
 	}
 	}
-
-	if(m_dedicatedMem)
-	{
-		vkFreeMemory(getDevice(), m_dedicatedMem, nullptr);
-	}
 }
 }
 
 
 Error TextureImpl::initInternal(VkImage externalImage, const TextureInitInfo& init_)
 Error TextureImpl::initInternal(VkImage externalImage, const TextureInitInfo& init_)
@@ -343,37 +338,22 @@ Error TextureImpl::initImage(const TextureInitInfo& init)
 
 
 	ANKI_ASSERT(memIdx != MAX_U32);
 	ANKI_ASSERT(memIdx != MAX_U32);
 
 
+	// Allocate
 	if(!dedicatedRequirements.prefersDedicatedAllocation)
 	if(!dedicatedRequirements.prefersDedicatedAllocation)
 	{
 	{
-		// Allocate
 		getGrManagerImpl().getGpuMemoryManager().allocateMemory(memIdx, requirements.memoryRequirements.size,
 		getGrManagerImpl().getGpuMemoryManager().allocateMemory(memIdx, requirements.memoryRequirements.size,
 																U32(requirements.memoryRequirements.alignment), false,
 																U32(requirements.memoryRequirements.alignment), false,
 																m_memHandle);
 																m_memHandle);
-
-		// Bind mem to image
-		ANKI_TRACE_SCOPED_EVENT(VK_BIND_OBJECT);
-		ANKI_VK_CHECK(vkBindImageMemory(getDevice(), m_imageHandle, m_memHandle.m_memory, m_memHandle.m_offset));
 	}
 	}
 	else
 	else
 	{
 	{
-		VkMemoryDedicatedAllocateInfoKHR dedicatedInfo = {};
-		dedicatedInfo.sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO_KHR;
-		dedicatedInfo.image = m_imageHandle;
-
-		VkMemoryAllocateInfo memoryAllocateInfo = {};
-		memoryAllocateInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
-		memoryAllocateInfo.pNext = &dedicatedInfo;
-		memoryAllocateInfo.allocationSize = requirements.memoryRequirements.size;
-		memoryAllocateInfo.memoryTypeIndex = memIdx;
-
-		ANKI_VK_CHECK(vkAllocateMemory(getDevice(), &memoryAllocateInfo, nullptr, &m_dedicatedMem));
-		getGrManagerImpl().trySetVulkanHandleName(init.getName(), VK_DEBUG_REPORT_OBJECT_TYPE_DEVICE_MEMORY_EXT,
-												  ptrToNumber(m_dedicatedMem));
-
-		ANKI_TRACE_SCOPED_EVENT(VK_BIND_OBJECT);
-		ANKI_VK_CHECK(vkBindImageMemory(getDevice(), m_imageHandle, m_dedicatedMem, 0));
+		getGrManagerImpl().getGpuMemoryManager().allocateMemoryDedicated(memIdx, requirements.memoryRequirements.size,
+																		 m_imageHandle, m_memHandle);
 	}
 	}
 
 
+	// Bind
+	ANKI_VK_CHECK(vkBindImageMemory(getDevice(), m_imageHandle, m_memHandle.m_memory, m_memHandle.m_offset));
+
 	return Error::NONE;
 	return Error::NONE;
 }
 }
 
 

+ 0 - 2
AnKi/Gr/Vulkan/TextureImpl.h

@@ -202,8 +202,6 @@ private:
 	/// the hashmap above.
 	/// the hashmap above.
 	MicroImageView m_singleSurfaceImageView;
 	MicroImageView m_singleSurfaceImageView;
 
 
-	VkDeviceMemory m_dedicatedMem = VK_NULL_HANDLE;
-
 #if ANKI_ENABLE_ASSERTIONS
 #if ANKI_ENABLE_ASSERTIONS
 	mutable TextureUsageBit m_usedFor = TextureUsageBit::NONE;
 	mutable TextureUsageBit m_usedFor = TextureUsageBit::NONE;
 	mutable SpinLock m_usedForMtx;
 	mutable SpinLock m_usedForMtx;

+ 14 - 1
AnKi/Util/ClassAllocatorBuilder.h

@@ -13,6 +13,15 @@ namespace anki {
 /// @addtogroup util_memory
 /// @addtogroup util_memory
 /// @{
 /// @{
 
 
+/// @memberof ClassAllocatorBuilder
+class ClassAllocatorBuilderStats
+{
+public:
+	PtrSize m_allocatedSize;
+	PtrSize m_inUseSize;
+	U32 m_chunkCount; ///< Can be assosiated with the number of allocations.
+};
+
 /// This is a convenience class used to build class memory allocators.
 /// This is a convenience class used to build class memory allocators.
 /// @tparam TChunk This is the type of the internally allocated chunks. This should be having the following members:
 /// @tparam TChunk This is the type of the internally allocated chunks. This should be having the following members:
 ///                @code
 ///                @code
@@ -79,6 +88,10 @@ public:
 		return m_interface;
 		return m_interface;
 	}
 	}
 
 
+	/// Get some statistics.
+	/// @note It's thread-safe because it will lock. Don't overuse it.
+	void getStats(ClassAllocatorBuilderStats& stats) const;
+
 private:
 private:
 	/// A class of allocations. It's a list of memory chunks. Each chunk is dividied in suballocations.
 	/// A class of allocations. It's a list of memory chunks. Each chunk is dividied in suballocations.
 	class Class
 	class Class
@@ -94,7 +107,7 @@ private:
 		PtrSize m_suballocationSize = 0;
 		PtrSize m_suballocationSize = 0;
 
 
 		/// Lock.
 		/// Lock.
-		TLock m_mtx;
+		mutable TLock m_mtx;
 	};
 	};
 
 
 	GenericMemoryPoolAllocator<U8> m_alloc;
 	GenericMemoryPoolAllocator<U8> m_alloc;

+ 18 - 0
AnKi/Util/ClassAllocatorBuilder.inl.h

@@ -169,4 +169,22 @@ void ClassAllocatorBuilder<TChunk, TInterface, TLock>::free(TChunk* chunk, PtrSi
 	}
 	}
 }
 }
 
 
+template<typename TChunk, typename TInterface, typename TLock>
+void ClassAllocatorBuilder<TChunk, TInterface, TLock>::getStats(ClassAllocatorBuilderStats& stats) const
+{
+	stats = {};
+
+	for(const Class& c : m_classes)
+	{
+		LockGuard<TLock> lock(c.m_mtx);
+
+		for(const TChunk& chunk : c.m_chunkList)
+		{
+			stats.m_allocatedSize += c.m_chunkSize;
+			stats.m_inUseSize += c.m_suballocationSize * chunk.m_suballocationCount;
+			++stats.m_chunkCount;
+		}
+	}
+}
+
 } // end namespace anki
 } // end namespace anki

+ 14 - 2
Tests/Util/ClassAllocatorBuilder.cpp

@@ -126,6 +126,14 @@ ANKI_TEST(Util, ClassAllocatorBuilder)
 	std::vector<std::pair<Chunk*, PtrSize>> allocations;
 	std::vector<std::pair<Chunk*, PtrSize>> allocations;
 	const U TEST_COUNT = 100;
 	const U TEST_COUNT = 100;
 	const U ITERATIONS = 20;
 	const U ITERATIONS = 20;
+	const U maxAlignment = 256;
+
+	auto getRandAlignment = [&]() -> U {
+		U out = rand() % maxAlignment;
+		out = nextPowerOfTwo(out);
+		out = max<U>(1, out);
+		return out;
+	};
 
 
 	for(U tests = 0; tests < TEST_COUNT; ++tests)
 	for(U tests = 0; tests < TEST_COUNT; ++tests)
 	{
 	{
@@ -137,12 +145,14 @@ ANKI_TEST(Util, ClassAllocatorBuilder)
 				const PtrSize size = nextAllocSize();
 				const PtrSize size = nextAllocSize();
 				Chunk* chunk;
 				Chunk* chunk;
 				PtrSize offset;
 				PtrSize offset;
+				const U alignment = getRandAlignment();
 
 
-				if(calloc.allocate(size, 1, chunk, offset))
+				if(calloc.allocate(size, alignment, chunk, offset))
 				{
 				{
 					break;
 					break;
 				}
 				}
 
 
+				ANKI_TEST_EXPECT_EQ(isAligned(alignment, offset), true);
 				allocations.push_back({chunk, offset});
 				allocations.push_back({chunk, offset});
 			}
 			}
 
 
@@ -185,8 +195,10 @@ ANKI_TEST(Util, ClassAllocatorBuilder)
 		{
 		{
 			Chunk* chunk;
 			Chunk* chunk;
 			PtrSize offset;
 			PtrSize offset;
-			while(calloc.allocate(baseFreeSize, 1, chunk, offset) == Error::NONE)
+			const U alignment = getRandAlignment();
+			while(calloc.allocate(baseFreeSize, alignment, chunk, offset) == Error::NONE)
 			{
 			{
+				ANKI_TEST_EXPECT_EQ(isAligned(alignment, offset), true);
 				score += (pow(POWER, (log2(F32(baseFreeSize / BASE_SIZE)) + BIAS)) + OFFSET) * F32(baseFreeSize);
 				score += (pow(POWER, (log2(F32(baseFreeSize / BASE_SIZE)) + BIAS)) + OFFSET) * F32(baseFreeSize);
 				allocations.push_back({chunk, offset});
 				allocations.push_back({chunk, offset});
 			}
 			}