Browse Source

Add alignment support in buddy allocator

Panagiotis Christopoulos Charitos 3 years ago
parent
commit
d828f32a3d

+ 8 - 0
AnKi/Core/App.cpp

@@ -496,6 +496,14 @@ Error App::mainLoop()
 				GrManagerStats grStats = m_gr->getStats();
 				statsUi.setVkCpuMemory(grStats.m_cpuMemory);
 				statsUi.setVkGpuMemory(grStats.m_gpuMemory);
+				PtrSize userAllocatedSize;
+				PtrSize realAllocatedSize;
+				F64 externalFragmentation;
+				F64 internalFragmentation;
+				m_vertexMem->getMemoryInfo(userAllocatedSize, realAllocatedSize, externalFragmentation,
+										   internalFragmentation);
+				statsUi.setGlobalVertexMemoryPoolInfo(userAllocatedSize, realAllocatedSize, externalFragmentation,
+													  internalFragmentation);
 
 				statsUi.setVkCommandBufferCount(grStats.m_commandBufferCount);
 

+ 2 - 2
AnKi/Core/GpuMemoryPools.cpp

@@ -45,7 +45,7 @@ Error VertexGpuMemoryPool::init(GenericMemoryPoolAllocator<U8> alloc, GrManager*
 ANKI_USE_RESULT Error VertexGpuMemoryPool::allocate(PtrSize size, PtrSize& offset)
 {
 	U32 offset32;
-	const Bool success = m_buddyAllocator.allocate(size, offset32);
+	const Bool success = m_buddyAllocator.allocate(size, 4, offset32);
 	if(ANKI_UNLIKELY(!success))
 	{
 		ANKI_CORE_LOGE("Failed to allocate vertex memory of size: %zu", size);
@@ -59,7 +59,7 @@ ANKI_USE_RESULT Error VertexGpuMemoryPool::allocate(PtrSize size, PtrSize& offse
 
 void VertexGpuMemoryPool::free(PtrSize size, PtrSize offset)
 {
-	m_buddyAllocator.free(U32(offset), size);
+	m_buddyAllocator.free(U32(offset), size, 4);
 }
 
 StagingGpuMemoryPool::~StagingGpuMemoryPool()

+ 6 - 0
AnKi/Core/GpuMemoryPools.h

@@ -41,6 +41,12 @@ public:
 		return m_vertBuffer;
 	}
 
+	void getMemoryInfo(PtrSize& userAllocatedSize, PtrSize& realAllocatedSize, F64& externalFragmentation,
+					   F64& internalFragmentation) const
+	{
+		m_buddyAllocator.getInfo(userAllocatedSize, realAllocatedSize, externalFragmentation, internalFragmentation);
+	}
+
 private:
 	GrManager* m_gr = nullptr;
 	BufferPtr m_vertBuffer;

+ 6 - 4
AnKi/Core/StatsUi.cpp

@@ -39,19 +39,19 @@ void StatsUi::labelBytes(PtrSize val, CString name) const
 	StringAuto timestamp(getAllocator());
 	if(gb)
 	{
-		timestamp.sprintf("%s: %4u,%04u,%04u,%04u", name.cstr(), gb, mb, kb, b);
+		timestamp.sprintf("%s: %u,%04u,%04u,%04u", name.cstr(), gb, mb, kb, b);
 	}
 	else if(mb)
 	{
-		timestamp.sprintf("%s: %4u,%04u,%04u", name.cstr(), mb, kb, b);
+		timestamp.sprintf("%s: %u,%04u,%04u", name.cstr(), mb, kb, b);
 	}
 	else if(kb)
 	{
-		timestamp.sprintf("%s: %4u,%04u", name.cstr(), kb, b);
+		timestamp.sprintf("%s: %u,%04u", name.cstr(), kb, b);
 	}
 	else
 	{
-		timestamp.sprintf("%s: %4u", name.cstr(), b);
+		timestamp.sprintf("%s: %u", name.cstr(), b);
 	}
 	ImGui::TextUnformatted(timestamp.cstr());
 }
@@ -103,6 +103,8 @@ void StatsUi::build(CanvasPtr canvas)
 		labelUint(m_freeCount, "Total frees");
 		labelBytes(m_vkCpuMem, "Vulkan CPU");
 		labelBytes(m_vkGpuMem, "Vulkan GPU");
+		labelBytes(m_globalVertexPool.m_userAllocatedSize, "Vertex/Index GPU memory");
+		labelBytes(m_globalVertexPool.m_realAllocatedSize, "Actual Vertex/Index GPU memory");
 
 		ImGui::Text("----");
 		ImGui::Text("Vulkan:");

+ 17 - 0
AnKi/Core/StatsUi.h

@@ -108,6 +108,15 @@ public:
 		m_drawableCount = v;
 	}
 
+	void setGlobalVertexMemoryPoolInfo(PtrSize userAllocatedSize, PtrSize realAllocatedSize, F64 externalFragmentation,
+									   F64 internalFragmentation)
+	{
+		m_globalVertexPool.m_userAllocatedSize = userAllocatedSize;
+		m_globalVertexPool.m_realAllocatedSize = realAllocatedSize;
+		m_globalVertexPool.m_externalFragmentation = F32(externalFragmentation);
+		m_globalVertexPool.m_internalFragmentation = F32(internalFragmentation);
+	}
+
 private:
 	static constexpr U32 BUFFERED_FRAMES = 16;
 
@@ -158,6 +167,14 @@ private:
 	U64 m_freeCount = 0;
 	PtrSize m_vkCpuMem = 0;
 	PtrSize m_vkGpuMem = 0;
+	class
+	{
+	public:
+		PtrSize m_userAllocatedSize = 0;
+		PtrSize m_realAllocatedSize = 0;
+		F32 m_externalFragmentation = 0;
+		F64 m_internalFragmentation = 0;
+	} m_globalVertexPool;
 
 	// Vulkan
 	U32 m_vkCmdbCount = 0;

+ 4 - 2
AnKi/Util/BuddyAllocatorBuilder.h

@@ -51,14 +51,16 @@ public:
 
 	/// Allocate memory.
 	/// @param size The size of the allocation.
+	/// @param alignment The returned address should have this alignment.
 	/// @param[out] address The returned address if the allocation didn't fail. It will stay untouched if it failed.
 	/// @return True if the allocation succeeded.
-	ANKI_USE_RESULT Bool allocate(PtrSize size, Address& address);
+	ANKI_USE_RESULT Bool allocate(PtrSize size, PtrSize alignment, Address& address);
 
 	/// Free memory.
 	/// @param address The address to free.
+	/// @param alignment The alignment of the original allocation.
 	/// @param size The size of the allocation.
-	void free(Address address, PtrSize size);
+	void free(Address address, PtrSize size, PtrSize alignment);
 
 	/// Print a debug representation of the internal structures.
 	void debugPrint() const;

+ 42 - 7
AnKi/Util/BuddyAllocatorBuilder.inl.h

@@ -32,10 +32,28 @@ void BuddyAllocatorBuilder<T_MAX_MEMORY_RANGE_LOG2, TLock>::destroy()
 }
 
 template<U32 T_MAX_MEMORY_RANGE_LOG2, typename TLock>
-Bool BuddyAllocatorBuilder<T_MAX_MEMORY_RANGE_LOG2, TLock>::allocate(PtrSize size, Address& outAddress)
+Bool BuddyAllocatorBuilder<T_MAX_MEMORY_RANGE_LOG2, TLock>::allocate(PtrSize size, PtrSize alignment,
+																	 Address& outAddress)
 {
 	ANKI_ASSERT(size > 0 && size <= m_maxMemoryRange);
 
+	PtrSize alignedSize = nextPowerOfTwo(size);
+	U32 order = log2(alignedSize);
+	const PtrSize orderSize = pow2<PtrSize>(order);
+
+	// The alignment for the requested "size" is the "orderSize". If the "orderSize" doesn't satisfy the "alignment"
+	// parameter then we need to align the allocation address
+	const Bool needsPadding = !isAligned(alignment, orderSize);
+
+	if(needsPadding)
+	{
+		// We need more space to accommodate possible unaligned allocation address
+		alignedSize = nextPowerOfTwo(size + alignment);
+
+		// Re-calcuate the order as well
+		order = log2(alignedSize);
+	}
+
 	LockGuard<TLock> lock(m_mutex);
 
 	// Lazy initialize
@@ -46,9 +64,6 @@ Bool BuddyAllocatorBuilder<T_MAX_MEMORY_RANGE_LOG2, TLock>::allocate(PtrSize siz
 	}
 
 	// Find the order to start the search
-	const PtrSize alignedSize = nextPowerOfTwo(size);
-	U32 order = log2(alignedSize);
-
 	while(m_freeLists[order].getSize() == 0)
 	{
 		++order;
@@ -79,7 +94,14 @@ Bool BuddyAllocatorBuilder<T_MAX_MEMORY_RANGE_LOG2, TLock>::allocate(PtrSize siz
 		--order;
 	}
 
-	ANKI_ASSERT(address + alignedSize <= m_maxMemoryRange);
+	// Align the returned address if needed
+	if(needsPadding)
+	{
+		alignRoundUp(alignment, address);
+	}
+
+	ANKI_ASSERT(address + size <= m_maxMemoryRange);
+	ANKI_ASSERT(isAligned(alignment, address));
 	m_userAllocatedSize += size;
 	m_realAllocatedSize += alignedSize;
 	ANKI_ASSERT(address <= getMaxNumericLimit<Address>());
@@ -88,9 +110,22 @@ Bool BuddyAllocatorBuilder<T_MAX_MEMORY_RANGE_LOG2, TLock>::allocate(PtrSize siz
 }
 
 template<U32 T_MAX_MEMORY_RANGE_LOG2, typename TLock>
-void BuddyAllocatorBuilder<T_MAX_MEMORY_RANGE_LOG2, TLock>::free(Address address, PtrSize size)
+void BuddyAllocatorBuilder<T_MAX_MEMORY_RANGE_LOG2, TLock>::free(Address address, PtrSize size, PtrSize alignment)
 {
-	const PtrSize alignedSize = nextPowerOfTwo(size);
+	PtrSize alignedSize = nextPowerOfTwo(size);
+	U32 order = log2(alignedSize);
+	const PtrSize orderSize = pow2<PtrSize>(order);
+
+	// See allocate()
+	const Bool needsPadding = !isAligned(alignment, orderSize);
+
+	if(needsPadding)
+	{
+		alignedSize = nextPowerOfTwo(size + alignment);
+
+		// Address was rounded up on allocate(), do the opposite
+		alignRoundDown(orderSize, address);
+	}
 
 	LockGuard<TLock> lock(m_mutex);
 

+ 8 - 0
AnKi/Util/Functions.h

@@ -173,6 +173,14 @@ inline constexpr Int nextPowerOfTwo(Int x)
 	return Int(res);
 }
 
+/// Get the previous power of two number. For example if x is 130 this will return 128.
+template<typename Int, ANKI_ENABLE(std::is_integral<Int>::value)>
+inline constexpr Int previousPowerOfTwo(Int x)
+{
+	const U64 out = (x != 0) ? (1_U64 << ((sizeof(U64) * 8 - 1) - __builtin_clzll(x))) : 0;
+	return Int(out);
+}
+
 /// Get the aligned number rounded up.
 /// @param alignment The bytes of alignment
 /// @param value The value to align

+ 44 - 12
Tests/Util/BuddyAllocatorBuilder.cpp

@@ -5,35 +5,53 @@
 
 #include <Tests/Framework/Framework.h>
 #include <AnKi/Util/BuddyAllocatorBuilder.h>
+#include <tuple>
 
 namespace anki {
 
+/// Check if all memory has the same value.
+static int memvcmp(const void* memory, U8 val, PtrSize size)
+{
+	const U8* mm = static_cast<const U8*>(memory);
+	return (*mm == val) && memcmp(mm, mm + 1, size - 1) == 0;
+}
+
 ANKI_TEST(Util, BuddyAllocatorBuilder)
 {
 	HeapAllocator<U8> alloc(allocAligned, nullptr);
 
 	// Simple
 	{
-		BuddyAllocatorBuilder<4, Mutex> buddy(alloc, 4);
+		BuddyAllocatorBuilder<32, Mutex> buddy(alloc, 32);
 
 		Array<U32, 2> addr;
-		Bool success = buddy.allocate(1, addr[0]);
-		success = buddy.allocate(3, addr[1]);
-		(void)success;
+		const Array<U32, 2> sizes = {58, 198010775};
+		const Array<U32, 2> alignments = {21, 17};
+		Bool success = buddy.allocate(sizes[0], alignments[0], addr[0]);
+		ANKI_TEST_EXPECT_EQ(success, true);
+		success = buddy.allocate(sizes[1], alignments[1], addr[1]);
+		ANKI_TEST_EXPECT_EQ(success, true);
 
 		// buddy.debugPrint();
 
-		buddy.free(addr[0], 1);
-		buddy.free(addr[1], 3);
+		buddy.free(addr[0], sizes[0], alignments[0]);
+		buddy.free(addr[1], sizes[1], alignments[1]);
 
 		// printf("\n");
 		// buddy.debugPrint();
 	}
 
-	// Fuzzy
+	// Fuzzy with alignment
 	{
 		BuddyAllocatorBuilder<32, Mutex> buddy(alloc, 32);
-		std::vector<std::pair<U32, U32>> allocations;
+		std::vector<std::tuple<U32, U32, U32, U8>> allocations;
+
+		U8* backingMemory = static_cast<U8*>(malloc(MAX_U32));
+		for(PtrSize i = 0; i < MAX_U32; ++i)
+		{
+			backingMemory[i] = i % MAX_U8;
+		}
+
 		for(U32 it = 0; it < 10000; ++it)
 		{
 			if((getRandom() % 2) == 0)
@@ -41,10 +59,14 @@ ANKI_TEST(Util, BuddyAllocatorBuilder)
 				// Do an allocation
 				U32 addr;
 				const U32 size = max<U32>(getRandom() % 256_MB, 1);
-				const Bool success = buddy.allocate(size, addr);
+				const U32 alignment = max<U32>(getRandom() % 24, 1);
+				const Bool success = buddy.allocate(size, alignment, addr);
+				// printf("al %u %u\n", size, alignment);
 				if(success)
 				{
-					allocations.push_back({addr, size});
+					const U8 bufferValue = getRandom() % MAX_U8;
+					memset(backingMemory + addr, bufferValue, size);
+					allocations.push_back({addr, size, alignment, bufferValue});
 				}
 			}
 			else
@@ -53,12 +75,22 @@ ANKI_TEST(Util, BuddyAllocatorBuilder)
 				if(allocations.size())
 				{
 					const PtrSize randPos = getRandom() % allocations.size();
-					buddy.free(allocations[randPos].first, allocations[randPos].second);
+
+					const U32 address = std::get<0>(allocations[randPos]);
+					const U32 size = std::get<1>(allocations[randPos]);
+					const U32 alignment = std::get<2>(allocations[randPos]);
+					const U8 bufferValue = std::get<3>(allocations[randPos]);
+
+					ANKI_TEST_EXPECT_EQ(memvcmp(backingMemory + address, bufferValue, size), 1);
+
+					// printf("fr %u %u\n", size, alignment);
+					buddy.free(address, size, alignment);
 
 					allocations.erase(allocations.begin() + randPos);
 				}
 			}
 		}
+		free(backingMemory);
 
 		// Get the fragmentation
 		PtrSize userAllocatedSize, realAllocatedSize;
@@ -71,7 +103,7 @@ ANKI_TEST(Util, BuddyAllocatorBuilder)
 		// Remove the remaining
 		for(const auto& pair : allocations)
 		{
-			buddy.free(pair.first, pair.second);
+			buddy.free(std::get<0>(pair), std::get<1>(pair), std::get<2>(pair));
 		}
 	}
 }