Browse Source

Vulkan: Work on the GPU mem allocator and GPU buffer

Panagiotis Christopoulos Charitos 9 năm trước cách đây
mục cha
commit
8ca1485c13

+ 32 - 1
include/anki/gr/vulkan/BufferImpl.h

@@ -6,6 +6,7 @@
 #pragma once
 
 #include <anki/gr/vulkan/VulkanObject.h>
+#include <anki/gr/vulkan/GpuMemoryAllocator.h>
 
 namespace anki
 {
@@ -22,8 +23,38 @@ public:
 	{
 	}
 
-	~BufferImpl()
+	~BufferImpl();
+
+	ANKI_USE_RESULT Error init(
+		PtrSize size, BufferUsageBit usage, BufferAccessBit access);
+
+	ANKI_USE_RESULT void* map(
+		PtrSize offset, PtrSize range, BufferAccessBit access);
+
+	void unmap()
+	{
+		ANKI_ASSERT(isCreated());
+		ANKI_ASSERT(m_mapped);
+
+#if ANKI_ASSERTIONS
+		m_mapped = false;
+#endif
+	}
+
+private:
+	VkBuffer m_handle = VK_NULL_HANDLE;
+	GpuMemoryAllocationHandle m_memHandle;
+	U32 m_memIdx = 0;
+	BufferAccessBit m_access = BufferAccessBit::NONE;
+	U32 m_size = 0;
+
+#if ANKI_ASSERTIONS
+	Bool8 m_mapped = false;
+#endif
+
+	Bool isCreated() const
 	{
+		return m_handle != VK_NULL_HANDLE;
 	}
 };
 /// @}

+ 4 - 0
include/anki/gr/vulkan/Common.h

@@ -83,6 +83,10 @@ ANKI_USE_RESULT VkAttachmentLoadOp convertLoadOp(AttachmentLoadOperation ak);
 
 /// Convert store op.
 ANKI_USE_RESULT VkAttachmentStoreOp convertStoreOp(AttachmentStoreOperation ak);
+
+/// Convert buffer usage bitmask.
+ANKI_USE_RESULT VkBufferUsageFlags convertBufferUsageBit(
+	BufferUsageBit usageMask);
 /// @}
 
 } // end namespace anki

+ 25 - 25
include/anki/gr/vulkan/GpuMemoryAllocator.h

@@ -13,6 +13,7 @@ namespace anki
 
 // Forward
 class GpuMemoryAllocatorChunk;
+class GpuMemoryAllocatorClass;
 
 /// @addtorgoup vulkan
 /// @{
@@ -23,14 +24,19 @@ class GpuMemoryAllocationHandle
 	friend class GpuMemoryAllocator;
 
 public:
-	VkDeviceMemory m_memory;
-	PtrSize m_offset;
+	VkDeviceMemory m_memory = VK_NULL_HANDLE;
+	PtrSize m_offset = MAX_PTR_SIZE;
+
+	Bool isEmpty() const
+	{
+		return m_memory == VK_NULL_HANDLE;
+	}
 
 private:
-	GpuMemoryAllocatorChunk* m_chunk;
+	GpuMemoryAllocatorChunk* m_chunk = nullptr;
 };
 
-/// GPU memory allocator.
+/// Dynamic GPU memory allocator.
 class GpuMemoryAllocator
 {
 public:
@@ -40,20 +46,20 @@ public:
 
 	~GpuMemoryAllocator();
 
-	void init(GenericMemoryPoolAllocator<U8> alloc,
-		VkDevice dev,
-		U memoryTypeIdx,
-		PtrSize chunkInitialSize,
-		F32 nextChunkScale,
-		PtrSize nextChunkBias);
+	void init(
+		GenericMemoryPoolAllocator<U8> alloc, VkDevice dev, U memoryTypeIdx);
 
 	/// Allocate GPU memory.
 	void allocate(PtrSize size, U alignment, GpuMemoryAllocationHandle& handle);
 
 	/// Free allocated memory.
-	void free(const GpuMemoryAllocationHandle& handle);
+	void free(GpuMemoryAllocationHandle& handle);
+
+	/// Get CPU visible address.
+	void* getMappedAddress(GpuMemoryAllocationHandle& handle);
 
 private:
+	using Class = GpuMemoryAllocatorClass;
 	using Chunk = GpuMemoryAllocatorChunk;
 
 	GenericMemoryPoolAllocator<U8> m_alloc;
@@ -61,24 +67,18 @@ private:
 	VkDevice m_dev = VK_NULL_HANDLE;
 	U32 m_memIdx;
 
-	IntrusiveList<Chunk> m_activeChunks;
-	Mutex m_mtx;
-
-	/// Size of the first chunk.
-	PtrSize m_initSize = 0;
+	/// The memory classes.
+	DynamicArray<Class> m_classes;
 
-	/// Chunk scale.
-	F32 m_scale = 2.0;
+	Class* findClass(PtrSize size, U32 alignment);
 
-	/// Chunk bias.
-	PtrSize m_bias = 0;
+	Chunk* findChunkWithUnusedSlot(Class& cl);
 
-	void createNewChunk();
+	/// Create or recycle chunk.
+	Chunk& createChunk(Class& cl);
 
-	Bool allocateFromChunk(PtrSize size,
-		U alignment,
-		Chunk& ch,
-		GpuMemoryAllocationHandle& handle);
+	/// Park the chunk.
+	void destroyChunk(Class& cl, Chunk& chunk);
 };
 /// @}
 

+ 39 - 5
include/anki/gr/vulkan/GrManagerImpl.h

@@ -35,6 +35,16 @@ public:
 
 	GrAllocator<U8> getAllocator() const;
 
+	U32 getGraphicsQueueFamily() const
+	{
+		return m_queueIdx;
+	}
+
+	const VkPhysicalDeviceProperties& getPhysicalDeviceProperties() const
+	{
+		return m_devProps;
+	}
+
 	void beginFrame();
 
 	void endFrame();
@@ -92,6 +102,33 @@ public:
 
 	void flushCommandBuffer(CommandBufferImpl& impl, CommandBufferPtr ptr);
 
+	/// @name Memory
+	/// @{
+	void allocateMemory(U memTypeIdx,
+		PtrSize size,
+		U alignment,
+		GpuMemoryAllocationHandle& handle)
+	{
+		m_gpuMemAllocs[memTypeIdx].allocate(size, alignment, handle);
+	}
+
+	void freeMemory(U memTypeIdx, GpuMemoryAllocationHandle& handle)
+	{
+		m_gpuMemAllocs[memTypeIdx].free(handle);
+	}
+
+	ANKI_USE_RESULT void* getMappedAddress(
+		U memTypeIdx, GpuMemoryAllocationHandle& handle)
+	{
+		return m_gpuMemAllocs[memTypeIdx].getMappedAddress(handle);
+	}
+
+	/// Find a suitable memory type.
+	U findMemoryType(U resourceMemTypeBits,
+		VkMemoryPropertyFlags preferFlags,
+		VkMemoryPropertyFlags avoidFlags) const;
+	/// @}
+
 private:
 	GrManager* m_manager = nullptr;
 
@@ -104,6 +141,8 @@ private:
 	VkQueue m_queue = VK_NULL_HANDLE;
 	Mutex m_queueSubmitMtx;
 
+	VkPhysicalDeviceProperties m_devProps = {};
+
 	/// @name Surface_related
 	/// @{
 	class PerFrame
@@ -192,11 +231,6 @@ private:
 	ANKI_USE_RESULT Error initGlobalPplineLayout();
 	void initMemory();
 
-	/// Find a suitable memory type.
-	U findMemoryType(U resourceMemTypeBits,
-		VkMemoryPropertyFlags preferFlags,
-		VkMemoryPropertyFlags avoidFlags) const;
-
 	static void* allocateCallback(void* userData,
 		size_t size,
 		size_t alignment,

+ 1 - 1
include/anki/util/BitSet.h

@@ -121,7 +121,7 @@ protected:
 	static const U CHUNK_COUNT = (N + (CHUNK_BIT_COUNT - 1)) / CHUNK_BIT_COUNT;
 
 	/// A mask for some stuff.
-	static const U MASK = 1 << (CHUNK_BIT_COUNT - 1);
+	static const ChunkType MASK = 1 << (CHUNK_BIT_COUNT - 1);
 
 	ChunkType m_chunks[CHUNK_COUNT];
 

+ 8 - 0
src/gr/vulkan/Buffer.cpp

@@ -23,16 +23,24 @@ Buffer::~Buffer()
 //==============================================================================
 void Buffer::init(PtrSize size, BufferUsageBit usage, BufferAccessBit access)
 {
+	m_impl.reset(getAllocator().newInstance<BufferImpl>(&getManager()));
+
+	if(m_impl->init(size, usage, access))
+	{
+		ANKI_LOGF("Cannot recover");
+	}
 }
 
 //==============================================================================
 void* Buffer::map(PtrSize offset, PtrSize range, BufferAccessBit access)
 {
+	return m_impl->map(offset, range, access);
 }
 
 //==============================================================================
 void Buffer::unmap()
 {
+	m_impl->unmap();
 }
 
 } // end namespace anki

+ 109 - 0
src/gr/vulkan/BufferImpl.cpp

@@ -4,3 +4,112 @@
 // http://www.anki3d.org/LICENSE
 
 #include <anki/gr/vulkan/BufferImpl.h>
+#include <anki/gr/vulkan/GrManagerImpl.h>
+
+namespace anki
+{
+
+//==============================================================================
+BufferImpl::~BufferImpl()
+{
+	ANKI_ASSERT(!m_mapped);
+
+	if(m_handle)
+	{
+		vkDestroyBuffer(getDevice(), m_handle, nullptr);
+	}
+
+	if(!m_memHandle.isEmpty())
+	{
+		getGrManagerImpl().freeMemory(m_memIdx, m_memHandle);
+	}
+}
+
+//==============================================================================
+Error BufferImpl::init(
+	PtrSize size, BufferUsageBit usage, BufferAccessBit access)
+{
+	ANKI_ASSERT(!isCreated());
+	ANKI_ASSERT(size > 0);
+	ANKI_ASSERT(usage != BufferUsageBit::NONE);
+
+	// Create the buffer
+	VkBufferCreateInfo ci = {};
+	ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
+	ci.size = size;
+	ci.usage = convertBufferUsageBit(usage);
+	ci.queueFamilyIndexCount = 1;
+	U32 queueIdx = getGrManagerImpl().getGraphicsQueueFamily();
+	ci.pQueueFamilyIndices = &queueIdx;
+	ANKI_VK_CHECK(vkCreateBuffer(getDevice(), &ci, nullptr, &m_handle));
+
+	// Get mem requirements
+	VkMemoryRequirements req;
+	vkGetBufferMemoryRequirements(getDevice(), m_handle, &req);
+
+	if((access & (BufferAccessBit::CLIENT_MAP_READ
+					 | BufferAccessBit::CLIENT_MAP_WRITE))
+		!= BufferAccessBit::NONE)
+	{
+		m_memIdx = getGrManagerImpl().findMemoryType(req.memoryTypeBits,
+			VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT
+				| VK_MEMORY_PROPERTY_HOST_COHERENT_BIT
+				| VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
+			0);
+
+		// Fallback
+		if(m_memIdx == MAX_U32)
+		{
+			m_memIdx = getGrManagerImpl().findMemoryType(req.memoryTypeBits,
+				VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
+				0);
+		}
+	}
+	else
+	{
+		m_memIdx = getGrManagerImpl().findMemoryType(req.memoryTypeBits,
+			VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
+			VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
+
+		// Fallback
+		if(m_memIdx == MAX_U32)
+		{
+			m_memIdx = getGrManagerImpl().findMemoryType(
+				req.memoryTypeBits, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, 0);
+		}
+	}
+
+	ANKI_ASSERT(m_memIdx != MAX_U32);
+
+	// Allocate
+	getGrManagerImpl().allocateMemory(
+		m_memIdx, req.size, req.alignment, m_memHandle);
+
+	// Bind mem to buffer
+	ANKI_VK_CHECK(vkBindBufferMemory(
+		getDevice(), m_handle, m_memHandle.m_memory, m_memHandle.m_offset));
+
+	m_access = access;
+	m_size = size;
+	return ErrorCode::NONE;
+}
+
+//==============================================================================
+void* BufferImpl::map(PtrSize offset, PtrSize range, BufferAccessBit access)
+{
+	ANKI_ASSERT(isCreated());
+	ANKI_ASSERT((access & m_access) != BufferAccessBit::NONE);
+	ANKI_ASSERT(!m_mapped);
+	ANKI_ASSERT(offset + range <= m_size);
+
+	void* ptr = getGrManagerImpl().getMappedAddress(m_memIdx, m_memHandle);
+	ANKI_ASSERT(ptr);
+
+#if ANKI_ASSERTIONS
+	m_mapped = true;
+#endif
+
+	return static_cast<void*>(static_cast<U8*>(ptr) + offset);
+}
+
+} // end namespace anki

+ 35 - 0
src/gr/vulkan/Common.cpp

@@ -486,4 +486,39 @@ VkAttachmentStoreOp convertStoreOp(AttachmentStoreOperation ak)
 	return out;
 }
 
+//==============================================================================
+VkBufferUsageFlags convertBufferUsageBit(BufferUsageBit usageMask)
+{
+	VkBufferUsageFlags out = 0;
+
+	if((usageMask & BufferUsageBit::UNIFORM) != BufferUsageBit::NONE)
+	{
+		out |= VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT;
+	}
+
+	if((usageMask & BufferUsageBit::STORAGE) != BufferUsageBit::NONE)
+	{
+		out |= VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
+	}
+
+	if((usageMask & BufferUsageBit::INDEX) != BufferUsageBit::NONE)
+	{
+		out |= VK_BUFFER_USAGE_INDEX_BUFFER_BIT;
+	}
+
+	if((usageMask & BufferUsageBit::VERTEX) != BufferUsageBit::NONE)
+	{
+		out |= VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
+	}
+
+	if((usageMask & BufferUsageBit::INDIRECT) != BufferUsageBit::NONE)
+	{
+		out |= VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT;
+	}
+
+	ANKI_ASSERT(out);
+
+	return out;
+}
+
 } // end namespace anki

+ 316 - 66
src/gr/vulkan/GpuMemoryAllocator.cpp

@@ -4,10 +4,27 @@
 // http://www.anki3d.org/LICENSE
 
 #include <anki/gr/vulkan/GpuMemoryAllocator.h>
+#include <anki/util/BitSet.h>
 
 namespace anki
 {
 
+//==============================================================================
+// Misc                                                                        =
+//==============================================================================
+
+/// Define this type just to show what is what.
+using Page = U32;
+
+/// The size of the page. This is the minumum allocation size as well.
+const U PAGE_SIZE = 8 * 1024;
+
+/// Max number of sub allocations (aka slots) per chunk.
+const U MAX_SLOTS_PER_CHUNK = 128;
+
+#define ANKI_CHECK_HANDLE(handle_)                                             \
+	ANKI_ASSERT(handle_.m_memory&& handle_.m_chunk&& handle_.m_chunk->m_class)
+
 //==============================================================================
 // GpuMemoryAllocatorChunk                                                     =
 //==============================================================================
@@ -16,16 +33,47 @@ class GpuMemoryAllocatorChunk
 {
 public:
 	/// GPU mem.
-	VkDeviceMemory m_mem;
+	VkDeviceMemory m_mem = VK_NULL_HANDLE;
+
+	/// The in use slots mask.
+	BitSet<MAX_SLOTS_PER_CHUNK, U8> m_inUseSlots = {false};
+
+	/// The number of in-use slots.
+	U32 m_inUseSlotCount = 0;
+
+	/// The owner.
+	GpuMemoryAllocatorClass* m_class = nullptr;
+
+	/// It points to a CPU address if mapped.
+	U8* m_mappedAddress = nullptr;
+
+	/// Protect the m_mappedAddress. It's a SpinLock because we don't want a
+	/// whole mutex for every GpuMemoryAllocatorChunk.
+	SpinLock m_mtx;
+};
+
+//==============================================================================
+// GpuMemoryAllocatorClass                                                     =
+//==============================================================================
+class GpuMemoryAllocatorClass
+{
+public:
+	/// The active chunks.
+	IntrusiveList<GpuMemoryAllocatorChunk> m_inUseChunks;
 
-	/// Size of the allocation.
-	PtrSize m_size;
+	/// The empty chunks.
+	IntrusiveList<GpuMemoryAllocatorChunk> m_unusedChunks;
 
-	/// Current offset.
-	PtrSize m_offset;
+	/// The size of each chunk.
+	Page m_chunkPages = 0;
 
-	/// Number of allocations.
-	U32 m_allocationCount;
+	/// The max slot size for this class.
+	U32 m_maxSlotSize = 0;
+
+	/// The number of slots for a single chunk.
+	U32 m_slotsPerChunkCount = 0;
+
+	Mutex m_mtx;
 };
 
 //==============================================================================
@@ -35,75 +83,207 @@ public:
 //==============================================================================
 GpuMemoryAllocator::~GpuMemoryAllocator()
 {
+	for(Class& cl : m_classes)
+	{
+		if(!cl.m_inUseChunks.isEmpty())
+		{
+			ANKI_LOGW("Forgot to deallocate GPU memory");
+
+			while(!cl.m_inUseChunks.isEmpty())
+			{
+				Chunk* chunk = &cl.m_inUseChunks.getBack();
+				cl.m_inUseChunks.popBack();
+
+				// Unmap
+				if(chunk->m_mappedAddress)
+				{
+					vkUnmapMemory(m_dev, chunk->m_mem);
+				}
+
+				vkFreeMemory(m_dev, chunk->m_mem, nullptr);
+				m_alloc.deleteInstance(chunk);
+			}
+		}
+
+		while(!cl.m_unusedChunks.isEmpty())
+		{
+			Chunk* chunk = &cl.m_unusedChunks.getBack();
+			cl.m_unusedChunks.popBack();
+
+			// Unmap
+			if(chunk->m_mappedAddress)
+			{
+				vkUnmapMemory(m_dev, chunk->m_mem);
+			}
+
+			vkFreeMemory(m_dev, chunk->m_mem, nullptr);
+			m_alloc.deleteInstance(chunk);
+		}
+	}
+
+	m_classes.destroy(m_alloc);
 }
 
 //==============================================================================
-void GpuMemoryAllocator::init(GenericMemoryPoolAllocator<U8> alloc,
-	VkDevice dev,
-	U memoryTypeIdx,
-	PtrSize chunkInitialSize,
-	F32 nextChunkScale,
-	PtrSize nextChunkBias)
+void GpuMemoryAllocator::init(
+	GenericMemoryPoolAllocator<U8> alloc, VkDevice dev, U memoryTypeIdx)
 {
 	m_alloc = alloc;
 	m_dev = dev;
 	m_memIdx = memoryTypeIdx;
-	m_initSize = chunkInitialSize;
-	m_scale = nextChunkScale;
-	m_bias = nextChunkBias;
+
+	//
+	// Initialize the classes
+	//
+	const U CLASS_COUNT = 6;
+	m_classes.create(m_alloc, CLASS_COUNT);
+
+	// 1st class. From (0, 256] bytes
+	{
+		Class& c = m_classes[0];
+		c.m_chunkPages = 2;
+		c.m_maxSlotSize = 256;
+	}
+
+	// 2st class. From (256, 4K] bytes
+	{
+		Class& c = m_classes[1];
+		c.m_chunkPages = 32;
+		c.m_maxSlotSize = 4 * 1024;
+	}
+
+	// 3rd class. From (4K, 128K] bytes
+	{
+		Class& c = m_classes[2];
+		c.m_chunkPages = 1024;
+		c.m_maxSlotSize = 128 * 1024;
+	}
+
+	// 4rth class. From (128K, 1M] bytes
+	{
+		Class& c = m_classes[3];
+		c.m_chunkPages = 4 * 1024;
+		c.m_maxSlotSize = 1 * 1024 * 1024;
+	}
+
+	// 5th class. From (1M, 10M] bytes
+	{
+		Class& c = m_classes[4];
+		c.m_chunkPages = 10 * 1024;
+		c.m_maxSlotSize = 10 * 1024 * 1024;
+	}
+
+	// 6th class. From (10M, 80M] bytes
+	{
+		Class& c = m_classes[5];
+		c.m_chunkPages = 20 * 1024;
+		c.m_maxSlotSize = 80 * 1024 * 1024;
+	}
+
+	for(Class& c : m_classes)
+	{
+		ANKI_ASSERT(((c.m_chunkPages * PAGE_SIZE) % c.m_maxSlotSize) == 0);
+		c.m_slotsPerChunkCount = (c.m_chunkPages * PAGE_SIZE) / c.m_maxSlotSize;
+		ANKI_ASSERT(c.m_slotsPerChunkCount <= MAX_SLOTS_PER_CHUNK);
+	}
 }
 
 //==============================================================================
-Bool GpuMemoryAllocator::allocateFromChunk(
-	PtrSize size, U alignment, Chunk& ch, GpuMemoryAllocationHandle& handle)
+GpuMemoryAllocator::Class* GpuMemoryAllocator::findClass(
+	PtrSize size, U32 alignment)
 {
-	alignRoundUp(alignment, ch.m_offset);
-	if(ch.m_offset + size <= ch.m_size)
+	ANKI_ASSERT(size > 0 && alignment > 0);
+
+	PtrSize lowLimit = 0;
+	Class* it = m_classes.getBegin();
+	const Class* end = m_classes.getEnd();
+
+	while(it != end)
 	{
-		++ch.m_allocationCount;
+		PtrSize highLimit = it->m_maxSlotSize;
+
+		if(size > lowLimit && size <= highLimit)
+		{
+			if(alignment <= highLimit)
+			{
+				// Found the class
+				return it;
+			}
+			else
+			{
+				// The class found doesn't have the proper alignment. Need to
+				// go higher
 
-		handle.m_memory = ch.m_mem;
-		handle.m_offset = ch.m_offset;
-		handle.m_chunk = &ch;
-		return true;
+				while(++it != end)
+				{
+					if(alignment <= it->m_maxSlotSize)
+					{
+						// Now found something
+						return it;
+					}
+				}
+			}
+		}
+
+		lowLimit = highLimit;
+		++it;
 	}
-	else
+
+	return nullptr;
+}
+
+//==============================================================================
+GpuMemoryAllocator::Chunk* GpuMemoryAllocator::findChunkWithUnusedSlot(
+	Class& cl)
+{
+	auto it = cl.m_inUseChunks.getBegin();
+	const auto end = cl.m_inUseChunks.getEnd();
+	while(it != end)
 	{
-		return false;
+		if(it->m_inUseSlotCount < cl.m_slotsPerChunkCount)
+		{
+			return &(*it);
+		}
+
+		++it;
 	}
+
+	return nullptr;
 }
 
 //==============================================================================
-void GpuMemoryAllocator::createNewChunk()
+GpuMemoryAllocator::Chunk& GpuMemoryAllocator::createChunk(Class& cl)
 {
-	// Determine new chunk size
-	PtrSize newChunkSize;
-	if(!m_activeChunks.isEmpty())
+	Chunk* chunk = nullptr;
+
+	if(cl.m_unusedChunks.isEmpty())
 	{
-		newChunkSize = m_activeChunks.getBack().m_size * m_scale + m_bias;
+		// Create new
+
+		chunk = m_alloc.newInstance<Chunk>();
+		chunk->m_class = &cl;
+
+		VkMemoryAllocateInfo ci = {};
+		ci.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
+		ci.allocationSize = cl.m_chunkPages * PAGE_SIZE;
+		ci.memoryTypeIndex = m_memIdx;
+		ANKI_VK_CHECKF(vkAllocateMemory(m_dev, &ci, nullptr, &chunk->m_mem));
 	}
 	else
 	{
-		newChunkSize = m_initSize;
-	}
+		// Recycle
 
-	Chunk* chunk = m_alloc.newInstance<Chunk>();
-	chunk->m_size = newChunkSize;
-	chunk->m_offset = 0;
-	chunk->m_allocationCount = 0;
+		chunk = &cl.m_unusedChunks.getFront();
+		cl.m_unusedChunks.popFront();
+	}
 
-	VkMemoryAllocateInfo inf;
-	inf.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
-	inf.pNext = nullptr;
-	inf.allocationSize = newChunkSize;
-	inf.memoryTypeIndex = m_memIdx;
+	cl.m_inUseChunks.pushBack(chunk);
 
-	if(vkAllocateMemory(m_dev, &inf, nullptr, &chunk->m_mem))
-	{
-		ANKI_LOGF("Out of GPU memory");
-	}
+	ANKI_ASSERT(chunk->m_mem && chunk->m_class == &cl
+		&& chunk->m_inUseSlotCount == 0
+		&& !chunk->m_inUseSlots.getAny());
 
-	m_activeChunks.pushBack(chunk);
+	return *chunk;
 }
 
 //==============================================================================
@@ -112,39 +292,109 @@ void GpuMemoryAllocator::allocate(
 {
 	handle.m_memory = VK_NULL_HANDLE;
 
-	LockGuard<Mutex> lock(m_mtx);
+	// Find the class for the given size
+	Class* cl = findClass(size, alignment);
+	ANKI_ASSERT(cl && "Didn't found a suitable class");
+
+	LockGuard<Mutex> lock(cl->m_mtx);
+	Chunk* chunk = findChunkWithUnusedSlot(*cl);
 
-	if(m_activeChunks.isEmpty()
-		|| allocateFromChunk(size, alignment, m_activeChunks.getBack(), handle))
+	// Create a new chunk if needed
+	if(chunk == nullptr)
 	{
-		createNewChunk();
+		chunk = &createChunk(*cl);
 	}
 
-	if(handle.m_memory == VK_NULL_HANDLE)
+	// Allocate from chunk
+	U bitCount = cl->m_slotsPerChunkCount;
+	for(U i = 0; i < bitCount; ++i)
 	{
-		Bool success = allocateFromChunk(
-			size, alignment, m_activeChunks.getBack(), handle);
-		(void)success;
-		ANKI_ASSERT(success && "The chunk should have space");
+		if(!chunk->m_inUseSlots.get(i))
+		{
+			// Found an empty slot, allocate from it
+			chunk->m_inUseSlots.set(i);
+			++chunk->m_inUseSlotCount;
+
+			handle.m_memory = chunk->m_mem;
+			handle.m_offset = i * cl->m_maxSlotSize;
+			handle.m_chunk = chunk;
+
+			break;
+		}
 	}
+
+	ANKI_ASSERT(handle.m_memory && handle.m_chunk);
+	ANKI_ASSERT(isAligned(alignment, handle.m_offset));
 }
 
 //==============================================================================
-void GpuMemoryAllocator::free(const GpuMemoryAllocationHandle& handle)
+void GpuMemoryAllocator::destroyChunk(Class& cl, Chunk& chunk)
 {
-	ANKI_ASSERT(handle.m_memory && handle.m_chunk);
+	// Push the chunk to unused area
+	cl.m_inUseChunks.erase(&chunk);
+	cl.m_unusedChunks.pushBack(&chunk);
+
+	// Unmap. This may free some VA
+	if(chunk.m_mappedAddress)
+	{
+		vkUnmapMemory(m_dev, chunk.m_mem);
+		chunk.m_mappedAddress = nullptr;
+	}
+}
+
+//==============================================================================
+void GpuMemoryAllocator::free(GpuMemoryAllocationHandle& handle)
+{
+	ANKI_CHECK_HANDLE(handle);
+
+	Chunk& chunk = *handle.m_chunk;
+	Class& cl = *chunk.m_class;
 
-	LockGuard<Mutex> lock(m_mtx);
+	LockGuard<Mutex> lock(cl.m_mtx);
+	U slotIdx = handle.m_offset / cl.m_maxSlotSize;
 
-	ANKI_ASSERT(handle.m_chunk->m_allocationCount > 0);
+	ANKI_ASSERT(chunk.m_inUseSlots.get(slotIdx));
+	ANKI_ASSERT(chunk.m_inUseSlotCount > 0);
+	chunk.m_inUseSlots.unset(slotIdx);
+	--chunk.m_inUseSlotCount;
 
-	--handle.m_chunk->m_allocationCount;
-	if(handle.m_chunk->m_allocationCount == 0)
+	if(chunk.m_inUseSlotCount == 0)
 	{
-		m_activeChunks.erase(handle.m_chunk);
-		vkFreeMemory(m_dev, handle.m_chunk->m_mem, nullptr);
-		m_alloc.deleteInstance(handle.m_chunk);
+		destroyChunk(cl, chunk);
 	}
+
+	handle = {};
+}
+
+//==============================================================================
+void* GpuMemoryAllocator::getMappedAddress(GpuMemoryAllocationHandle& handle)
+{
+	ANKI_CHECK_HANDLE(handle);
+
+	Chunk& chunk = *handle.m_chunk;
+	U8* out = nullptr;
+
+	{
+		LockGuard<SpinLock> lock(chunk.m_mtx);
+		if(chunk.m_mappedAddress)
+		{
+			out = chunk.m_mappedAddress;
+		}
+		else
+		{
+			ANKI_VK_CHECKF(vkMapMemory(m_dev,
+				chunk.m_mem,
+				0,
+				chunk.m_class->m_chunkPages * PAGE_SIZE,
+				0,
+				reinterpret_cast<void**>(&out)));
+
+			chunk.m_mappedAddress = out;
+		}
+	}
+
+	ANKI_ASSERT(out);
+	return static_cast<void*>(out + handle.m_offset);
 }
 
 } // end namespace anki

+ 6 - 2
src/gr/vulkan/GrManagerImpl.cpp

@@ -145,6 +145,8 @@ GrManagerImpl::~GrManagerImpl()
 
 	m_perThread.destroy(getAllocator());
 
+	m_gpuMemAllocs.destroy(getAllocator());
+
 	if(m_swapchain)
 	{
 		vkDestroySwapchainKHR(m_device, m_swapchain, nullptr);
@@ -195,6 +197,7 @@ Error GrManagerImpl::initInternal(const GrManagerInitInfo& init)
 	vkGetDeviceQueue(m_device, m_queueIdx, 0, &m_queue);
 	ANKI_CHECK(initSwapchain(init));
 
+	initMemory();
 	ANKI_CHECK(initGlobalDsetLayout());
 	ANKI_CHECK(initGlobalPplineLayout());
 
@@ -278,6 +281,8 @@ Error GrManagerImpl::initInstance(const GrManagerInitInfo& init)
 	ANKI_VK_CHECK(
 		vkEnumeratePhysicalDevices(m_instance, &count, &m_physicalDevice));
 
+	vkGetPhysicalDeviceProperties(m_physicalDevice, &m_devProps);
+
 	return ErrorCode::NONE;
 }
 
@@ -650,7 +655,7 @@ void GrManagerImpl::initMemory()
 	U idx = 0;
 	for(GpuMemoryAllocator& alloc : m_gpuMemAllocs)
 	{
-		alloc.init(getAllocator(), m_device, idx++, 50 * 1024 * 1024, 1.0, 0);
+		alloc.init(getAllocator(), m_device, idx++);
 	}
 }
 
@@ -688,7 +693,6 @@ U GrManagerImpl::findMemoryType(U resourceMemTypeBits,
 	}
 	else
 	{
-		ANKI_ASSERT(preferedMed < MAX_U32);
 		return preferedMed;
 	}
 }

+ 1 - 1
src/script/lua_glue_gen.py

@@ -482,7 +482,7 @@ def constructor(constr_el, class_name):
 	wglue("")
 
 def destructor(class_name):
-	""" Create a destroctor """
+	""" Create a destructor """
 
 	global separator
 

+ 5 - 2
src/util/Memory.cpp

@@ -222,9 +222,12 @@ HeapMemoryPool::HeapMemoryPool()
 //==============================================================================
 HeapMemoryPool::~HeapMemoryPool()
 {
-	if(m_allocationsCount.load() != 0)
+	U count = m_allocationsCount.load();
+	if(count != 0)
 	{
-		ANKI_LOGW("Memory pool destroyed before all memory being released");
+		ANKI_LOGW("Memory pool destroyed before all memory being released "
+				  "(%u deallocations missed)",
+			count);
 	}
 }
 

+ 37 - 21
tests/gr/Gr.cpp

@@ -48,6 +48,15 @@ void main()
 	out_color = vec4(0.5);
 })";
 
+#define COMMON_BEGIN()                                                         \
+	NativeWindow* win = nullptr;                                               \
+	GrManager* gr = nullptr;                                                   \
+	createGrManager(win, gr)
+
+#define COMMON_END()                                                           \
+	delete gr;                                                                 \
+	delete win
+
 //==============================================================================
 static NativeWindow* createWindow()
 {
@@ -82,36 +91,27 @@ static void createGrManager(NativeWindow*& win, GrManager*& gr)
 //==============================================================================
 ANKI_TEST(Gr, GrManager)
 {
-	NativeWindow* win = nullptr;
-	GrManager* gr = nullptr;
-	createGrManager(win, gr);
-
-	delete gr;
-	delete win;
+	COMMON_BEGIN();
+	COMMON_END();
 }
 
 //==============================================================================
 ANKI_TEST(Gr, Shader)
 {
-	NativeWindow* win = nullptr;
-	GrManager* gr = nullptr;
-	createGrManager(win, gr);
+	COMMON_BEGIN();
 
 	{
 		ShaderPtr shader =
 			gr->newInstance<Shader>(ShaderType::VERTEX, VERT_SRC);
 	}
 
-	delete gr;
-	delete win;
+	COMMON_END();
 }
 
 //==============================================================================
 ANKI_TEST(Gr, Pipeline)
 {
-	NativeWindow* win = nullptr;
-	GrManager* gr = nullptr;
-	createGrManager(win, gr);
+	COMMON_BEGIN();
 
 	{
 		ShaderPtr vert = gr->newInstance<Shader>(ShaderType::VERTEX, VERT_SRC);
@@ -129,16 +129,13 @@ ANKI_TEST(Gr, Pipeline)
 		PipelinePtr ppline = gr->newInstance<Pipeline>(init);
 	}
 
-	delete gr;
-	delete win;
+	COMMON_END();
 }
 
 //==============================================================================
 ANKI_TEST(Gr, SimpleDrawcall)
 {
-	NativeWindow* win = nullptr;
-	GrManager* gr = nullptr;
-	createGrManager(win, gr);
+	COMMON_BEGIN();
 
 	{
 		ShaderPtr vert = gr->newInstance<Shader>(ShaderType::VERTEX, VERT_SRC);
@@ -192,8 +189,27 @@ ANKI_TEST(Gr, SimpleDrawcall)
 		}
 	}
 
-	delete gr;
-	delete win;
+	COMMON_END();
+}
+
+//==============================================================================
+ANKI_TEST(Gr, Buffer)
+{
+	COMMON_BEGIN();
+
+	{
+		BufferPtr a = gr->newInstance<Buffer>(
+			512, BufferUsageBit::UNIFORM, BufferAccessBit::NONE);
+
+		BufferPtr b = gr->newInstance<Buffer>(
+			64, BufferUsageBit::STORAGE, BufferAccessBit::CLIENT_MAP_WRITE);
+
+		void* ptr = b->map(0, 64, BufferAccessBit::CLIENT_MAP_WRITE);
+		ANKI_TEST_EXPECT_NEQ(ptr, nullptr);
+		b->unmap();
+	}
+
+	COMMON_END();
 }
 
 } // end namespace anki