Browse Source

Implemented crude memory allocator for Vulkan (#1429)

This is enough to stay well below the 4096 allocation limit
Jorrit Rouwe 7 months ago
parent
commit
636b080266

+ 3 - 17
TestFramework/Renderer/VK/BufferVK.h

@@ -10,26 +10,12 @@
 class BufferVK
 {
 public:
-	/// Free memory associated with a buffer
-	void						Free(VkDevice inDevice)
-	{
-		if (mBuffer != VK_NULL_HANDLE)
-		{
-			vkDestroyBuffer(inDevice, mBuffer, nullptr);
-			mBuffer = VK_NULL_HANDLE;
-		}
-
-		if (mMemory != VK_NULL_HANDLE)
-		{
-			vkFreeMemory(inDevice, mMemory, nullptr);
-			mMemory = VK_NULL_HANDLE;
-		}
-	}
-
 	VkBuffer					mBuffer = VK_NULL_HANDLE;
 	VkDeviceMemory				mMemory = VK_NULL_HANDLE;
+	VkDeviceSize				mOffset = 0;
+	VkDeviceSize				mSize = 0;
 
 	VkBufferUsageFlags			mUsage;
 	VkMemoryPropertyFlags		mProperties;
-	VkDeviceSize				mSize = 0;
+	VkDeviceSize				mAllocatedSize;
 };

+ 1 - 1
TestFramework/Renderer/VK/ConstantBufferVK.cpp

@@ -22,7 +22,7 @@ ConstantBufferVK::~ConstantBufferVK()
 void *ConstantBufferVK::MapInternal()
 {
 	void *data = nullptr;
-	FatalErrorIfFailed(vkMapMemory(mRenderer->GetDevice(), mBuffer.mMemory, 0, mBuffer.mSize, 0, &data));
+	FatalErrorIfFailed(vkMapMemory(mRenderer->GetDevice(), mBuffer.mMemory, mBuffer.mOffset, mBuffer.mSize, 0, &data));
 	return data;
 }
 

+ 1 - 1
TestFramework/Renderer/VK/RenderInstancesVK.cpp

@@ -23,7 +23,7 @@ void RenderInstancesVK::CreateBuffer(int inNumInstances, int inInstanceSize)
 void *RenderInstancesVK::Lock()
 {
 	void *data;
-	FatalErrorIfFailed(vkMapMemory(mRenderer->GetDevice(), mInstancesBuffer.mMemory, 0, mInstancesBuffer.mSize, 0, &data));
+	FatalErrorIfFailed(vkMapMemory(mRenderer->GetDevice(), mInstancesBuffer.mMemory, mInstancesBuffer.mOffset, mInstancesBuffer.mSize, 0, &data));
 	return data;
 }
 

+ 2 - 2
TestFramework/Renderer/VK/RenderPrimitiveVK.cpp

@@ -42,7 +42,7 @@ void *RenderPrimitiveVK::LockVertexBuffer()
 	JPH_ASSERT(!mVertexBufferDeviceLocal);
 
 	void *data;
-	FatalErrorIfFailed(vkMapMemory(mRenderer->GetDevice(), mVertexBuffer.mMemory, 0, VkDeviceSize(mNumVtx) * mVtxSize, 0, &data));
+	FatalErrorIfFailed(vkMapMemory(mRenderer->GetDevice(), mVertexBuffer.mMemory, mVertexBuffer.mOffset, VkDeviceSize(mNumVtx) * mVtxSize, 0, &data));
 	return data;
 }
 
@@ -70,7 +70,7 @@ uint32 *RenderPrimitiveVK::LockIndexBuffer()
 	JPH_ASSERT(!mIndexBufferDeviceLocal);
 
 	void *data;
-	vkMapMemory(mRenderer->GetDevice(), mIndexBuffer.mMemory, 0, VkDeviceSize(mNumIdx) * sizeof(uint32), 0, &data);
+	vkMapMemory(mRenderer->GetDevice(), mIndexBuffer.mMemory, mIndexBuffer.mOffset, VkDeviceSize(mNumIdx) * sizeof(uint32), 0, &data);
 	return reinterpret_cast<uint32 *>(data);
 }
 

+ 98 - 18
TestFramework/Renderer/VK/RendererVK.cpp

@@ -39,6 +39,9 @@ RendererVK::~RendererVK()
 {
 	vkDeviceWaitIdle(mDevice);
 
+	// Trace allocation stats
+	Trace("VK: Max allocations: %u, max size: %u MB", mMaxNumAllocations, uint32(mMaxTotalAllocated >> 20));
+
 	// Destroy the shadow map
 	mShadowMap = nullptr;
 	vkDestroyFramebuffer(mDevice, mShadowFrameBuffer, nullptr);
@@ -50,16 +53,22 @@ RendererVK::~RendererVK()
 		cb = nullptr;
 	for (unique_ptr<ConstantBufferVK> &cb : mPixelShaderConstantBuffer)
 		cb = nullptr;
-
+	
 	// Free all buffers
 	for (BufferCache &bc : mFreedBuffers)
 		for (BufferCache::value_type &vt : bc)
 			for (BufferVK &bvk : vt.second)
-				bvk.Free(mDevice);
+				FreeBufferInternal(bvk);
 	for (BufferCache::value_type &vt : mBufferCache)
 		for (BufferVK &bvk : vt.second)
-			bvk.Free(mDevice);
+			FreeBufferInternal(bvk);
 
+	// Free all blocks in the memory cache
+	for (MemoryCache::value_type &mc : mMemoryCache)
+		for (Memory &m : mc.second)
+			if (m.mOffset == 0)
+				vkFreeMemory(mDevice, m.mMemory, nullptr); // Don't care about memory tracking anymore
+	
 	for (VkFence fence : mInFlightFences)
 		vkDestroyFence(mDevice, fence, nullptr);
 
@@ -735,8 +744,8 @@ void RendererVK::DestroySwapChain()
 	if (mDepthImageView != VK_NULL_HANDLE)
 	{
 		vkDestroyImageView(mDevice, mDepthImageView, nullptr);
-		vkDestroyImage(mDevice, mDepthImage, nullptr);
-		vkFreeMemory(mDevice, mDepthImageMemory, nullptr);
+
+		DestroyImage(mDepthImage, mDepthImageMemory);
 	}
 
 	for (VkFramebuffer frame_buffer : mSwapChainFramebuffers)
@@ -794,7 +803,7 @@ void RendererVK::BeginFrame(const CameraState &inCamera, float inWorldScale)
 	// Free buffers that weren't used this frame
 	for (BufferCache::value_type &vt : mBufferCache)
 		for (BufferVK &bvk : vt.second)
-			bvk.Free(mDevice);
+			FreeBufferInternal(bvk);
 	mBufferCache.clear();
 
 	// Recycle the buffers that were freed
@@ -986,6 +995,32 @@ uint32 RendererVK::FindMemoryType(uint32 inTypeFilter, VkMemoryPropertyFlags inP
 	FatalError("Failed to find memory type!");
 }
 
+void RendererVK::AllocateMemory(VkDeviceSize inSize, uint32 inMemoryTypeBits, VkMemoryPropertyFlags inProperties, VkDeviceMemory &outMemory)
+{
+	VkMemoryAllocateInfo alloc_info = {};
+	alloc_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
+	alloc_info.allocationSize = inSize;
+	alloc_info.memoryTypeIndex = FindMemoryType(inMemoryTypeBits, inProperties);
+	FatalErrorIfFailed(vkAllocateMemory(mDevice, &alloc_info, nullptr, &outMemory));
+
+	// Track allocation
+	++mNumAllocations;
+	mTotalAllocated += inSize;
+
+	// Track max usage
+	mMaxTotalAllocated = max(mMaxTotalAllocated, mTotalAllocated);
+	mMaxNumAllocations = max(mMaxNumAllocations, mNumAllocations);
+}
+
+void RendererVK::FreeMemory(VkDeviceMemory inMemory, VkDeviceSize inSize)
+{
+	vkFreeMemory(mDevice, inMemory, nullptr);
+
+	// Track free
+	--mNumAllocations;
+	mTotalAllocated -= inSize;
+}
+
 void RendererVK::CreateBuffer(VkDeviceSize inSize, VkBufferUsageFlags inUsage, VkMemoryPropertyFlags inProperties, BufferVK &outBuffer)
 {
 	// Check the cache
@@ -1012,14 +1047,40 @@ void RendererVK::CreateBuffer(VkDeviceSize inSize, VkBufferUsageFlags inUsage, V
 	VkMemoryRequirements mem_requirements;
 	vkGetBufferMemoryRequirements(mDevice, outBuffer.mBuffer, &mem_requirements);
 
-	VkMemoryAllocateInfo alloc_info = {};
-	alloc_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
-	alloc_info.allocationSize = mem_requirements.size;
-	alloc_info.memoryTypeIndex = FindMemoryType(mem_requirements.memoryTypeBits, inProperties);
+	if (mem_requirements.size > cMaxAllocSize)
+	{
+		// Allocate block directly
+		AllocateMemory(mem_requirements.size, mem_requirements.memoryTypeBits, inProperties, outBuffer.mMemory);
+		outBuffer.mAllocatedSize = mem_requirements.size;
+		outBuffer.mOffset = 0;
+	}
+	else
+	{
+		// Round allocation to the next power of 2 so that we can use a simple block based allocator
+		outBuffer.mAllocatedSize = max(VkDeviceSize(GetNextPowerOf2(uint32(mem_requirements.size))), cMinAllocSize);
+
+		// Ensure that we have memory available from the right pool
+		Array<Memory> &mem_array = mMemoryCache[{ outBuffer.mAllocatedSize, outBuffer.mUsage, outBuffer.mProperties }];
+		if (mem_array.empty())
+		{
+			// Allocate a bigger block
+			VkDeviceMemory device_memory;
+			AllocateMemory(cBlockSize, mem_requirements.memoryTypeBits, inProperties, device_memory);
 
-	FatalErrorIfFailed(vkAllocateMemory(mDevice, &alloc_info, nullptr, &outBuffer.mMemory));
+			// Divide into sub blocks
+			for (VkDeviceSize offset = 0; offset < cBlockSize; offset += outBuffer.mAllocatedSize)
+				mem_array.push_back({ device_memory, offset });
+		}
 
-	vkBindBufferMemory(mDevice, outBuffer.mBuffer, outBuffer.mMemory, 0);
+		// Claim memory from the pool
+		Memory &memory = mem_array.back();
+		outBuffer.mMemory = memory.mMemory;
+		outBuffer.mOffset = memory.mOffset;
+		mem_array.pop_back();
+	}
+
+	// Bind the memory to the buffer
+	vkBindBufferMemory(mDevice, outBuffer.mBuffer, outBuffer.mMemory, outBuffer.mOffset);
 }
 
 VkCommandBuffer RendererVK::StartTempCommandBuffer()
@@ -1073,7 +1134,7 @@ void RendererVK::CreateDeviceLocalBuffer(const void *inData, VkDeviceSize inSize
 	CreateBuffer(inSize, VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, staging_buffer);
 
 	void *data;
-	vkMapMemory(mDevice, staging_buffer.mMemory, 0, inSize, 0, &data);
+	vkMapMemory(mDevice, staging_buffer.mMemory, staging_buffer.mOffset, inSize, 0, &data);
 	memcpy(data, inData, (size_t)inSize);
 	vkUnmapMemory(mDevice, staging_buffer.mMemory);
 
@@ -1093,6 +1154,19 @@ void RendererVK::FreeBuffer(BufferVK &ioBuffer)
 	}
 }
 
+void RendererVK::FreeBufferInternal(BufferVK &ioBuffer)
+{
+	// Destroy the buffer
+	vkDestroyBuffer(mDevice, ioBuffer.mBuffer, nullptr);
+	ioBuffer.mBuffer = VK_NULL_HANDLE;
+
+	if (ioBuffer.mAllocatedSize > cMaxAllocSize)
+		FreeMemory(ioBuffer.mMemory, ioBuffer.mAllocatedSize);
+	else
+		mMemoryCache[{ ioBuffer.mAllocatedSize, ioBuffer.mUsage, ioBuffer.mProperties }].push_back({ ioBuffer.mMemory, ioBuffer.mOffset });
+	ioBuffer.mMemory = VK_NULL_HANDLE;
+}
+
 unique_ptr<ConstantBufferVK> RendererVK::CreateConstantBuffer(VkDeviceSize inBufferSize)
 {
 	return make_unique<ConstantBufferVK>(this, inBufferSize);
@@ -1136,15 +1210,21 @@ void RendererVK::CreateImage(uint32 inWidth, uint32 inHeight, VkFormat inFormat,
 	VkMemoryRequirements mem_requirements;
 	vkGetImageMemoryRequirements(mDevice, outImage, &mem_requirements);
 
-	VkMemoryAllocateInfo alloc_info = {};
-	alloc_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
-	alloc_info.allocationSize = mem_requirements.size;
-	alloc_info.memoryTypeIndex = FindMemoryType(mem_requirements.memoryTypeBits, inProperties);
-	FatalErrorIfFailed(vkAllocateMemory(mDevice, &alloc_info, nullptr, &outMemory));
+	AllocateMemory(mem_requirements.size, mem_requirements.memoryTypeBits, inProperties, outMemory);
 
 	vkBindImageMemory(mDevice, outImage, outMemory, 0);
 }
 
+void RendererVK::DestroyImage(VkImage inImage, VkDeviceMemory inMemory)
+{
+	VkMemoryRequirements mem_requirements;
+	vkGetImageMemoryRequirements(mDevice, inImage, &mem_requirements);
+
+	vkDestroyImage(mDevice, inImage, nullptr);
+
+	FreeMemory(inMemory, mem_requirements.size);
+}
+
 void RendererVK::UpdateViewPortAndScissorRect(uint32 inWidth, uint32 inHeight)
 {
 	VkCommandBuffer command_buffer = GetCommandBuffer();

+ 28 - 1
TestFramework/Renderer/VK/RendererVK.h

@@ -45,17 +45,21 @@ public:
 	VkCommandBuffer					GetCommandBuffer()												{ JPH_ASSERT(mInFrame); return mCommandBuffers[mFrameIndex]; }
 	VkCommandBuffer					StartTempCommandBuffer();
 	void							EndTempCommandBuffer(VkCommandBuffer inCommandBuffer);
+	void							AllocateMemory(VkDeviceSize inSize, uint32 inMemoryTypeBits, VkMemoryPropertyFlags inProperties, VkDeviceMemory &outMemory);
+	void							FreeMemory(VkDeviceMemory inMemory, VkDeviceSize inSize);
 	void							CreateBuffer(VkDeviceSize inSize, VkBufferUsageFlags inUsage, VkMemoryPropertyFlags inProperties, BufferVK &outBuffer);
 	void							CopyBuffer(VkBuffer inSrc, VkBuffer inDst, VkDeviceSize inSize);
 	void							CreateDeviceLocalBuffer(const void *inData, VkDeviceSize inSize, VkBufferUsageFlags inUsage, BufferVK &outBuffer);
 	void							FreeBuffer(BufferVK &ioBuffer);
 	unique_ptr<ConstantBufferVK>	CreateConstantBuffer(VkDeviceSize inBufferSize);
 	void							CreateImage(uint32 inWidth, uint32 inHeight, VkFormat inFormat, VkImageTiling inTiling, VkImageUsageFlags inUsage, VkMemoryPropertyFlags inProperties, VkImage &outImage, VkDeviceMemory &outMemory);
+	void							DestroyImage(VkImage inImage, VkDeviceMemory inMemory);
 	VkImageView						CreateImageView(VkImage inImage, VkFormat inFormat, VkImageAspectFlags inAspectFlags);
 	VkFormat						FindDepthFormat();
 
 private:
 	uint32							FindMemoryType(uint32 inTypeFilter, VkMemoryPropertyFlags inProperties);
+	void							FreeBufferInternal(BufferVK &ioBuffer);
 	VkSurfaceFormatKHR				SelectFormat(VkPhysicalDevice inDevice);
 	void							CreateSwapChain(VkPhysicalDevice inDevice);
 	void							DestroySwapChain();
@@ -103,7 +107,7 @@ private:
 	unique_ptr<ConstantBufferVK>	mVertexShaderConstantBufferProjection[cFrameCount];
 	unique_ptr<ConstantBufferVK>	mVertexShaderConstantBufferOrtho[cFrameCount];
 	unique_ptr<ConstantBufferVK>	mPixelShaderConstantBuffer[cFrameCount];
-		
+
 	struct Key
 	{
 		bool						operator == (const Key &inRHS) const
@@ -118,8 +122,31 @@ private:
 
 	JPH_MAKE_HASH_STRUCT(Key, KeyHasher, t.mSize, t.mUsage, t.mProperties)
 
+	// We try to recycle buffers from frame to frame
 	using BufferCache = UnorderedMap<Key, Array<BufferVK>, KeyHasher>;
 
 	BufferCache						mFreedBuffers[cFrameCount];
 	BufferCache						mBufferCache;
+
+	// Smaller allocations (from cMinAllocSize to cMaxAllocSize) will be done in blocks of cBlockSize bytes.
+	// We do this because there is a limit to the number of allocations that we can make in Vulkan.
+	static constexpr VkDeviceSize	cMinAllocSize = 512;
+	static constexpr VkDeviceSize	cMaxAllocSize = 65536;
+	static constexpr VkDeviceSize	cBlockSize = 524288;
+
+	JPH_MAKE_HASH_STRUCT(Key, MemKeyHasher, t.mUsage, t.mProperties, t.mSize)
+
+	struct Memory
+	{
+		VkDeviceMemory				mMemory;
+		VkDeviceSize				mOffset;
+	};
+
+	using MemoryCache = UnorderedMap<Key, Array<Memory>, KeyHasher>;
+
+	MemoryCache						mMemoryCache;
+	uint32							mNumAllocations = 0;
+	uint32							mMaxNumAllocations = 0;
+	VkDeviceSize					mTotalAllocated = 0;
+	VkDeviceSize					mMaxTotalAllocated = 0;
 };

+ 3 - 3
TestFramework/Renderer/VK/TextureVK.cpp

@@ -56,7 +56,7 @@ TextureVK::TextureVK(RendererVK *inRenderer, const Surface *inSurface) :
 	// Copy data to upload texture
 	surface->Lock(ESurfaceLockMode::Read);
 	void *data;
-	vkMapMemory(device, staging_buffer.mMemory, 0, image_size, 0, &data);
+	vkMapMemory(device, staging_buffer.mMemory, staging_buffer.mOffset, image_size, 0, &data);
 	for (int y = 0; y < mHeight; ++y)
 		memcpy(reinterpret_cast<uint8 *>(data) + y * mWidth * bpp, surface->GetData() + y * surface->GetStride(), mWidth * bpp);
 	vkUnmapMemory(device, staging_buffer.mMemory);
@@ -109,8 +109,8 @@ TextureVK::~TextureVK()
 		vkDeviceWaitIdle(device);
 
 		vkDestroyImageView(device, mImageView, nullptr);
-		vkDestroyImage(device, mImage, nullptr);
-		vkFreeMemory(device, mImageMemory, nullptr);
+
+		mRenderer->DestroyImage(mImage, mImageMemory);
 	}
 }