Browse Source

Reworking GR's dynamic memory

Panagiotis Christopoulos Charitos 9 years ago
parent
commit
a0459dfe54

+ 11 - 6
include/anki/gr/common/GpuBlockAllocator.h

@@ -28,22 +28,22 @@ public:
 
 	/// Initialize the allocator using pre-allocated CPU mapped memory.
 	void init(GenericMemoryPoolAllocator<U8> alloc,
-		void* cpuMappedMem,
-		PtrSize cpuMappedMemSize,
+		PtrSize totalSize,
 		PtrSize blockSize);
 
 	/// Allocate GPU memory.
-	ANKI_USE_RESULT void* allocate(
-		PtrSize size, U alignment, DynamicBufferToken& handle);
+	ANKI_USE_RESULT Error allocate(PtrSize size,
+		U alignment,
+		DynamicBufferToken& handle,
+		Bool handleOomError = true);
 
 	/// Free GPU memory.
-	void free(void* ptr);
+	void free(const DynamicBufferToken& handle);
 
 private:
 	class Block;
 
 	GenericMemoryPoolAllocator<U8> m_alloc;
-	U8* m_mem = nullptr;
 	PtrSize m_size = 0;
 	PtrSize m_blockSize = 0;
 	DynamicArray<Block> m_blocks;
@@ -53,6 +53,11 @@ private:
 	U32 m_currentBlock = MAX_U32;
 	Mutex m_mtx;
 
+	Bool isCreated() const
+	{
+		return m_size > 0;
+	}
+
 	Bool blockHasEnoughSpace(U blockIdx, PtrSize size, U alignment) const;
 };
 /// @}

+ 11 - 9
include/anki/gr/common/GpuFrameRingAllocator.h

@@ -13,7 +13,7 @@ namespace anki
 /// @addtogroup graphics
 /// @{
 
-/// Manages pre-allocated, always mapped GPU memory for per frame usage.
+/// Manages pre-allocated GPU memory for per frame usage.
 class GpuFrameRingAllocator : public NonCopyable
 {
 	friend class DynamicMemorySerializeCommand;
@@ -28,31 +28,33 @@ public:
 	}
 
 	/// Initialize with pre-allocated always mapped memory.
-	/// @param[in] cpuMappedMem Pre-allocated always mapped GPU memory.
-	/// @param size The size of the cpuMappedMem.
+	/// @param size The size of the GPU buffer.
 	/// @param alignment The working alignment.
 	/// @param maxAllocationSize The size in @a allocate cannot exceed
 	///        maxAllocationSize.
-	void init(void* cpuMappedMem,
-		PtrSize size,
-		U32 alignment,
-		PtrSize maxAllocationSize = MAX_PTR_SIZE);
+	void init(
+		PtrSize size, U32 alignment, PtrSize maxAllocationSize = MAX_PTR_SIZE);
 
 	/// Allocate memory for a dynamic buffer.
-	ANKI_USE_RESULT void* allocate(PtrSize size, DynamicBufferToken& token);
+	ANKI_USE_RESULT Error allocate(
+		PtrSize size, DynamicBufferToken& token, Bool handleOomErrors = true);
 
 	/// Call this at the end of the frame.
 	/// @return The bytes that were not used. Used for statistics.
 	PtrSize endFrame();
 
 private:
-	U8* m_cpuAddress = nullptr; ///< Host address of the buffer.
 	PtrSize m_size = 0; ///< The full size of the buffer.
 	U32 m_alignment = 0; ///< Always work in that alignment.
 	PtrSize m_maxAllocationSize = 0; ///< For debugging.
 
 	Atomic<PtrSize> m_offset = {0};
 	U64 m_frame = 0;
+
+	Bool isCreated() const
+	{
+		return m_size > 0;
+	}
 };
 /// @}
 

+ 4 - 0
include/anki/gr/gl/Common.h

@@ -31,6 +31,10 @@ class RenderingThread;
 /// @addtogroup opengl
 /// @{
 
+// Spec limits
+const U MAX_UNIFORM_BLOCK_SIZE = 16384;
+const U MAX_STORAGE_BLOCK_SIZE = 2 << 27;
+
 /// Converter.
 GLenum convertCompareOperation(CompareOperation in);
 

+ 65 - 0
include/anki/gr/gl/DynamicMemoryManager.h

@@ -0,0 +1,65 @@
+// Copyright (C) 2009-2016, Panagiotis Christopoulos Charitos and contributors.
+// All rights reserved.
+// Code licensed under the BSD License.
+// http://www.anki3d.org/LICENSE
+
+#pragma once
+
+#include <anki/gr/gl/Common.h>
+#include <anki/gr/common/GpuBlockAllocator.h>
+#include <anki/gr/common/GpuFrameRingAllocator.h>
+
+namespace anki
+{
+
+// Forward
+class ConfigSet;
+
+/// @addtogroup opengl
+/// @{
+
+/// Manages all dynamic memory.
+class DynamicMemoryManager : public NonCopyable
+{
+public:
+	DynamicMemoryManager()
+	{
+	}
+
+	~DynamicMemoryManager();
+
+	void init(GenericMemoryPoolAllocator<U8> alloc, const ConfigSet& cfg);
+
+	void destroy();
+
+	ANKI_USE_RESULT void* allocatePerFrame(
+		BufferUsage usage, PtrSize size, DynamicBufferToken& handle);
+
+	ANKI_USE_RESULT void* allocatePersistent(
+		BufferUsage usage, PtrSize size, DynamicBufferToken& handle);
+
+	void freePersistent(BufferUsage usage, const DynamicBufferToken& handle);
+
+private:
+	class alignas(16) Aligned16Type
+	{
+		U8 _m_val[16];
+	};
+
+	// CPU or GPU buffer.
+	class DynamicBuffer
+	{
+	public:
+		GLuint m_name = 0;
+		DynamicArray<Aligned16Type> m_cpuBuff;
+		U8* m_mappedMem = nullptr;
+		GpuBlockAllocator m_persistentAlloc;
+		GpuFrameRingAllocator m_frameAlloc;
+	};
+
+	GenericMemoryPoolAllocator<U8> m_alloc;
+	Array<DynamicBuffer, U(BufferUsage::COUNT)> m_buffers;
+};
+/// @}
+
+} // end namespace anki

+ 0 - 4
include/anki/gr/gl/GlState.h

@@ -30,10 +30,6 @@ enum class GpuVendor : U8
 class GlState
 {
 public:
-	// Spec limits
-	static const U MAX_UNIFORM_BLOCK_SIZE = 16384;
-	static const U MAX_STORAGE_BLOCK_SIZE = 2 << 27;
-
 	I32 m_version = -1; ///< Minor major GL version. Something like 430
 	GpuVendor m_gpu = GpuVendor::UNKNOWN;
 	Bool8 m_registerMessages = false;

+ 3 - 4
sandbox/config.xml

@@ -41,10 +41,9 @@
 	<tessellation>1</tessellation>
 	<clusterSizeZ>32</clusterSizeZ>
 	<imageReflectionMaxDistance>30</imageReflectionMaxDistance>
-	<gr.frameUniformsSize>16777216</gr.frameUniformsSize>
-	<gr.frameStorageSize>16777216</gr.frameStorageSize>
-	<gr.frameVertexSize>2097152</gr.frameVertexSize>
-	<gr.frameTransferSize>33554432</gr.frameTransferSize>
+	<gr.uniformPerFrameMemorySize>16777216</gr.uniformPerFrameMemorySize>
+	<gr.storagePerFrameMemorySize>16777216</gr.storagePerFrameMemorySize>
+	<gr.transferPersistentMemorySize>33554432</gr.transferPersistentMemorySize>
 	<maxTextureSize>1048576</maxTextureSize>
 	<textureAnisotropy>8</textureAnisotropy>
 	<dataPaths>assets:.</dataPaths>

+ 3 - 4
src/core/Config.cpp

@@ -70,10 +70,9 @@ Config::Config()
 	//
 	// GR
 	//
-	newOption("gr.frameUniformsSize", 1024 * 1024 * 16);
-	newOption("gr.frameStorageSize", 1024 * 1024 * 16);
-	newOption("gr.frameVertexSize", 1024 * 1024 * 2);
-	newOption("gr.frameTransferSize", 1024 * 1024 * 32);
+	newOption("gr.uniformPerFrameMemorySize", 1024 * 1024 * 16);
+	newOption("gr.storagePerFrameMemorySize", 1024 * 1024 * 16);
+	newOption("gr.transferPersistentMemorySize", 1024 * 1024 * 32);
 
 	//
 	// Resource

+ 29 - 24
src/gr/common/GpuBlockAllocator.cpp

@@ -36,19 +36,18 @@ GpuBlockAllocator::~GpuBlockAllocator()
 }
 
 //==============================================================================
-void GpuBlockAllocator::init(GenericMemoryPoolAllocator<U8> alloc,
-	void* cpuMappedMem,
-	PtrSize cpuMappedMemSize,
-	PtrSize blockSize)
+void GpuBlockAllocator::init(
+	GenericMemoryPoolAllocator<U8> alloc, PtrSize totalSize, PtrSize blockSize)
 {
-	ANKI_ASSERT(cpuMappedMem && cpuMappedMemSize > 0 && blockSize > 0);
-	ANKI_ASSERT((cpuMappedMemSize % blockSize) == 0);
+	ANKI_ASSERT(!isCreated());
+	ANKI_ASSERT(totalSize > 0 && blockSize > 0);
+	ANKI_ASSERT(totalSize > blockSize);
+	ANKI_ASSERT((totalSize % blockSize) == 0);
 
 	m_alloc = alloc;
-	m_mem = static_cast<U8*>(cpuMappedMem);
-	m_size = cpuMappedMemSize;
+	m_size = totalSize;
 	m_blockSize = blockSize;
-	m_blocks.create(alloc, cpuMappedMemSize / blockSize);
+	m_blocks.create(alloc, totalSize / blockSize);
 
 	m_freeBlocksStack.create(alloc, m_blocks.getSize());
 	m_freeBlockCount = m_blocks.getSize();
@@ -69,20 +68,21 @@ Bool GpuBlockAllocator::blockHasEnoughSpace(
 
 	const Block& block = m_blocks[blockIdx];
 
-	U8* allocEnd = getAlignedRoundUp(alignment, m_mem + block.m_offset) + size;
-	U8* blockEnd = m_mem + blockIdx * m_blockSize + m_blockSize;
+	PtrSize allocEnd = getAlignedRoundUp(alignment, block.m_offset) + size;
+	PtrSize blockEnd = (blockIdx + 1) * m_blockSize;
 
 	return allocEnd <= blockEnd;
 }
 
 //==============================================================================
-void* GpuBlockAllocator::allocate(
-	PtrSize size, U alignment, DynamicBufferToken& handle)
+Error GpuBlockAllocator::allocate(
+	PtrSize size, U alignment, DynamicBufferToken& handle, Bool handleOomError)
 {
+	ANKI_ASSERT(isCreated());
 	ANKI_ASSERT(size < m_blockSize);
 
 	Block* block = nullptr;
-	U8* ptr = nullptr;
+	Error err = ErrorCode::NONE;
 
 	LockGuard<Mutex> lock(m_mtx);
 
@@ -106,9 +106,7 @@ void* GpuBlockAllocator::allocate(
 
 	if(block)
 	{
-		ptr = getAlignedRoundUp(alignment, m_mem + block->m_offset);
-
-		PtrSize outOffset = ptr - m_mem;
+		PtrSize outOffset = getAlignedRoundUp(alignment, block->m_offset);
 		block->m_offset = outOffset + size;
 		ANKI_ASSERT(
 			block->m_offset <= (block - &m_blocks[0] + 1) * m_blockSize);
@@ -119,19 +117,26 @@ void* GpuBlockAllocator::allocate(
 		handle.m_offset = outOffset;
 		handle.m_range = size;
 	}
+	else if(handleOomError)
+	{
+		ANKI_LOGF("Out of memory");
+	}
+	else
+	{
+		err = ErrorCode::OUT_OF_MEMORY;
+	}
 
-	return static_cast<void*>(ptr);
+	return err;
 }
 
 //==============================================================================
-void GpuBlockAllocator::free(void* vptr)
+void GpuBlockAllocator::free(const DynamicBufferToken& handle)
 {
-	U8* ptr = static_cast<U8*>(vptr);
-	ANKI_ASSERT(ptr);
-	ANKI_ASSERT(ptr >= m_mem && ptr < m_mem + m_size);
+	ANKI_ASSERT(isCreated());
+	ANKI_ASSERT(handle.m_range > 0);
+	ANKI_ASSERT(handle.m_offset < m_size);
 
-	PtrSize offset = static_cast<PtrSize>(ptr - m_mem);
-	U blockIdx = offset / m_blockSize;
+	U blockIdx = handle.m_offset / m_blockSize;
 
 	LockGuard<Mutex> lock(m_mtx);
 

+ 16 - 12
src/gr/common/GpuFrameRingAllocator.cpp

@@ -10,16 +10,15 @@ namespace anki
 
 //==============================================================================
 void GpuFrameRingAllocator::init(
-	void* cpuMappedMem, PtrSize size, U32 alignment, PtrSize maxAllocationSize)
+	PtrSize size, U32 alignment, PtrSize maxAllocationSize)
 {
-	ANKI_ASSERT(
-		cpuMappedMem && size > 0 && alignment > 0 && maxAllocationSize > 0);
+	ANKI_ASSERT(!isCreated());
+	ANKI_ASSERT(size > 0 && alignment > 0 && maxAllocationSize > 0);
 
 	PtrSize perFrameSize = size / MAX_FRAMES_IN_FLIGHT;
 	alignRoundDown(alignment, perFrameSize);
 	m_size = perFrameSize * MAX_FRAMES_IN_FLIGHT;
 
-	m_cpuAddress = static_cast<U8*>(cpuMappedMem);
 	m_alignment = alignment;
 	m_maxAllocationSize = maxAllocationSize;
 }
@@ -27,6 +26,8 @@ void GpuFrameRingAllocator::init(
 //==============================================================================
 PtrSize GpuFrameRingAllocator::endFrame()
 {
+	ANKI_ASSERT(isCreated());
+
 	PtrSize perFrameSize = m_size / MAX_FRAMES_IN_FLIGHT;
 
 	PtrSize crntFrameStartOffset =
@@ -47,11 +48,12 @@ PtrSize GpuFrameRingAllocator::endFrame()
 }
 
 //==============================================================================
-void* GpuFrameRingAllocator::allocate(
-	PtrSize originalSize, DynamicBufferToken& token)
+Error GpuFrameRingAllocator::allocate(
+	PtrSize originalSize, DynamicBufferToken& token, Bool handleOomError)
 {
+	ANKI_ASSERT(isCreated());
 	ANKI_ASSERT(originalSize > 0);
-	ANKI_ASSERT(m_cpuAddress);
+	Error err = ErrorCode::NONE;
 
 	// Align size
 	PtrSize size = getAlignedRoundUp(m_alignment, originalSize);
@@ -64,21 +66,23 @@ void* GpuFrameRingAllocator::allocate(
 
 	if(offset - crntFrameStartOffset + size <= perFrameSize)
 	{
-		ANKI_ASSERT(isAligned(m_alignment, m_cpuAddress + offset));
+		ANKI_ASSERT(isAligned(m_alignment, offset));
 		ANKI_ASSERT((offset + size) <= m_size);
 
 		// Encode token
 		token.m_offset = offset;
 		token.m_range = originalSize;
-
-		return static_cast<void*>(m_cpuAddress + offset);
 	}
-	else
+	else if(handleOomError)
 	{
 		ANKI_LOGF("Out of GPU dynamic memory");
 	}
+	else
+	{
+		err = ErrorCode::OUT_OF_MEMORY;
+	}
 
-	return nullptr;
+	return err;
 }
 
 } // end namespace anki

+ 131 - 0
src/gr/gl/DynamicMemoryManager.cpp

@@ -0,0 +1,131 @@
+// Copyright (C) 2009-2016, Panagiotis Christopoulos Charitos and contributors.
+// All rights reserved.
+// Code licensed under the BSD License.
+// http://www.anki3d.org/LICENSE
+
+#include <anki/gr/gl/DynamicMemoryManager.h>
+#include <anki/core/Config.h>
+
+namespace anki
+{
+
+//==============================================================================
+DynamicMemoryManager::~DynamicMemoryManager()
+{
+	for(DynamicBuffer& buff : m_buffers)
+	{
+		ANKI_ASSERT(buff.m_name == 0);
+	}
+}
+
+//==============================================================================
+void DynamicMemoryManager::destroy()
+{
+	for(DynamicBuffer& buff : m_buffers)
+	{
+		if(buff.m_name != 0)
+		{
+			glDeleteBuffers(1, &buff.m_name);
+		}
+
+		buff.m_cpuBuff.destroy(m_alloc);
+	}
+}
+
+//==============================================================================
+void DynamicMemoryManager::init(
+	GenericMemoryPoolAllocator<U8> alloc, const ConfigSet& cfg)
+{
+	const U BUFF_FLAGS = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT;
+
+	m_alloc = alloc;
+
+	// Uniform
+	{
+		// Create buffer
+		PtrSize size = cfg.getNumber("gr.uniformPerFrameMemorySize");
+		DynamicBuffer& buff = m_buffers[BufferUsage::UNIFORM];
+		glGenBuffers(1, &buff.m_name);
+
+		// Map it
+		glNamedBufferStorage(buff.m_name, size, nullptr, BUFF_FLAGS);
+		buff.m_mappedMem = static_cast<U8*>(
+			glMapNamedBufferRange(buff.m_name, 0, size, BUFF_FLAGS));
+		ANKI_ASSERT(buff.m_mappedMem);
+
+		// Create the allocator
+		GLint64 blockAlignment;
+		glGetInteger64v(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &blockAlignment);
+		buff.m_frameAlloc.init(size, blockAlignment, MAX_UNIFORM_BLOCK_SIZE);
+	}
+
+	// Storage
+	{
+		// Create buffer
+		PtrSize size = cfg.getNumber("gr.storagePerFrameMemorySize");
+		DynamicBuffer& buff = m_buffers[BufferUsage::STORAGE];
+		glGenBuffers(1, &buff.m_name);
+
+		// Map it
+		glNamedBufferStorage(buff.m_name, size, nullptr, BUFF_FLAGS);
+		buff.m_mappedMem = static_cast<U8*>(
+			glMapNamedBufferRange(buff.m_name, 0, size, BUFF_FLAGS));
+		ANKI_ASSERT(buff.m_mappedMem);
+
+		// Create the allocator
+		GLint64 blockAlignment;
+		glGetInteger64v(
+			GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT, &blockAlignment);
+		buff.m_frameAlloc.init(size, blockAlignment, MAX_STORAGE_BLOCK_SIZE);
+	}
+
+	// Transfer
+	{
+		// Big enough block to hold a texture surface
+		const PtrSize BLOCK_SIZE = (4096 * 4096) / 4 * 16 + 512;
+
+		PtrSize size = cfg.getNumber("gr.transferPersistentMemorySize");
+		DynamicBuffer& buff = m_buffers[BufferUsage::TRANSFER];
+		buff.m_cpuBuff.create(m_alloc, size);
+
+		buff.m_mappedMem = reinterpret_cast<U8*>(&buff.m_cpuBuff[0]);
+		buff.m_persistentAlloc.init(m_alloc, size, BLOCK_SIZE);
+	}
+}
+
+//==============================================================================
+void* DynamicMemoryManager::allocatePerFrame(
+	BufferUsage usage, PtrSize size, DynamicBufferToken& handle)
+{
+	DynamicBuffer& buff = m_buffers[usage];
+	Error err = buff.m_frameAlloc.allocate(size, handle, true);
+	(void)err;
+	return buff.m_mappedMem + handle.m_offset;
+}
+
+//==============================================================================
+void* DynamicMemoryManager::allocatePersistent(
+	BufferUsage usage, PtrSize size, DynamicBufferToken& handle)
+{
+	DynamicBuffer& buff = m_buffers[usage];
+	Error err = buff.m_persistentAlloc.allocate(size, 16, handle, false);
+	if(!err)
+	{
+		return buff.m_mappedMem + handle.m_offset;
+	}
+	else
+	{
+		ANKI_LOGW("Out of persistent dynamic memory. Someone should serialize");
+		return nullptr;
+	}
+}
+
+//==============================================================================
+void DynamicMemoryManager::freePersistent(
+	BufferUsage usage, const DynamicBufferToken& handle)
+{
+	DynamicBuffer& buff = m_buffers[usage];
+	buff.m_persistentAlloc.free(handle);
+}
+
+} // end namespace anki

+ 1 - 1
thirdparty

@@ -1 +1 @@
-Subproject commit e7b5666520d2202d02a846156a6afdb011a08bd7
+Subproject commit f4bd88a88ec48298b460426af93515984e3772a2