|
|
@@ -4,185 +4,66 @@
|
|
|
// http://www.anki3d.org/LICENSE
|
|
|
|
|
|
#include <AnKi/Gr/Vulkan/GpuMemoryManager.h>
|
|
|
-#include <AnKi/Util/List.h>
|
|
|
|
|
|
namespace anki {
|
|
|
|
|
|
-class ClassInf
|
|
|
-{
|
|
|
-public:
|
|
|
- PtrSize m_slotSize;
|
|
|
- PtrSize m_chunkSize;
|
|
|
-};
|
|
|
-
|
|
|
-static constexpr Array<ClassInf, 7> CLASSES{{{256_B, 16_KB},
|
|
|
- {4_KB, 256_KB},
|
|
|
- {128_KB, 8_MB},
|
|
|
- {1_MB, 64_MB},
|
|
|
- {16_MB, 128_MB},
|
|
|
- {64_MB, 256_MB},
|
|
|
- {128_MB, 256_MB}}};
|
|
|
+static constexpr Array<GpuMemoryManagerClassInfo, 7> CLASSES{{{256_B, 16_KB},
|
|
|
+ {4_KB, 256_KB},
|
|
|
+ {128_KB, 8_MB},
|
|
|
+ {1_MB, 64_MB},
|
|
|
+ {16_MB, 128_MB},
|
|
|
+ {64_MB, 256_MB},
|
|
|
+ {128_MB, 256_MB}}};
|
|
|
|
|
|
/// Special classes for the ReBAR memory. Have that as a special case because it's so limited and needs special care.
|
|
|
-static constexpr Array<ClassInf, 3> REBAR_CLASSES{{{1_MB, 1_MB}, {12_MB, 12_MB}, {24_MB, 24_MB}}};
|
|
|
-
|
|
|
-class GpuMemoryManager::Memory final :
|
|
|
- public ClassGpuAllocatorMemory,
|
|
|
- public IntrusiveListEnabled<GpuMemoryManager::Memory>
|
|
|
-{
|
|
|
-public:
|
|
|
- VkDeviceMemory m_handle = VK_NULL_HANDLE;
|
|
|
-
|
|
|
- void* m_mappedAddress = nullptr;
|
|
|
- SpinLock m_mtx;
|
|
|
-
|
|
|
- U8 m_classIdx = MAX_U8;
|
|
|
-};
|
|
|
+static constexpr Array<GpuMemoryManagerClassInfo, 3> REBAR_CLASSES{{{1_MB, 1_MB}, {12_MB, 12_MB}, {24_MB, 24_MB}}};
|
|
|
|
|
|
-class GpuMemoryManager::Interface final : public ClassGpuAllocatorInterface
|
|
|
+Error GpuMemoryManagerInterface::allocateChunk(U32 classIdx, GpuMemoryManagerChunk*& chunk)
|
|
|
{
|
|
|
-public:
|
|
|
- GrAllocator<U8> m_alloc;
|
|
|
- Array<IntrusiveList<Memory>, CLASSES.getSize()> m_vacantMemory;
|
|
|
- Array<ClassInf, CLASSES.getSize()> m_classes = {};
|
|
|
- U8 m_classCount = 0;
|
|
|
- Mutex m_mtx;
|
|
|
- VkDevice m_dev = VK_NULL_HANDLE;
|
|
|
- U8 m_memTypeIdx = MAX_U8;
|
|
|
- Bool m_exposesBufferGpuAddress = false;
|
|
|
-
|
|
|
- Error allocate(U32 classIdx, ClassGpuAllocatorMemory*& cmem) override
|
|
|
- {
|
|
|
- ANKI_ASSERT(classIdx < m_classCount);
|
|
|
- Memory* mem;
|
|
|
-
|
|
|
- LockGuard<Mutex> lock(m_mtx);
|
|
|
-
|
|
|
- if(!m_vacantMemory[classIdx].isEmpty())
|
|
|
- {
|
|
|
- // Recycle
|
|
|
- mem = &m_vacantMemory[classIdx].getFront();
|
|
|
- m_vacantMemory[classIdx].popFront();
|
|
|
- }
|
|
|
- else
|
|
|
- {
|
|
|
- // Create new
|
|
|
-
|
|
|
- VkMemoryAllocateInfo ci = {};
|
|
|
- ci.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
|
|
|
- ci.allocationSize = m_classes[classIdx].m_chunkSize;
|
|
|
- ci.memoryTypeIndex = m_memTypeIdx;
|
|
|
-
|
|
|
- VkMemoryAllocateFlagsInfo flags = {};
|
|
|
- flags.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO;
|
|
|
- flags.flags = VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT;
|
|
|
- if(m_exposesBufferGpuAddress)
|
|
|
- {
|
|
|
- ci.pNext = &flags;
|
|
|
- }
|
|
|
-
|
|
|
- VkDeviceMemory memHandle;
|
|
|
- if(ANKI_UNLIKELY(vkAllocateMemory(m_dev, &ci, nullptr, &memHandle) < 0))
|
|
|
- {
|
|
|
- ANKI_VK_LOGF("Out of GPU memory. Mem type index %u, size %zu", m_memTypeIdx,
|
|
|
- m_classes[classIdx].m_chunkSize);
|
|
|
- }
|
|
|
-
|
|
|
- mem = m_alloc.newInstance<Memory>();
|
|
|
- mem->m_handle = memHandle;
|
|
|
- mem->m_classIdx = U8(classIdx);
|
|
|
- }
|
|
|
-
|
|
|
- ANKI_ASSERT(mem);
|
|
|
- ANKI_ASSERT(mem->m_handle);
|
|
|
- ANKI_ASSERT(mem->m_classIdx == classIdx);
|
|
|
- ANKI_ASSERT(mem->m_mappedAddress == nullptr);
|
|
|
- cmem = mem;
|
|
|
-
|
|
|
- return Error::NONE;
|
|
|
- }
|
|
|
-
|
|
|
- void free(ClassGpuAllocatorMemory* cmem) override
|
|
|
- {
|
|
|
- ANKI_ASSERT(cmem);
|
|
|
-
|
|
|
- Memory* mem = static_cast<Memory*>(cmem);
|
|
|
- ANKI_ASSERT(mem->m_handle);
|
|
|
-
|
|
|
- LockGuard<Mutex> lock(m_mtx);
|
|
|
- m_vacantMemory[mem->m_classIdx].pushBack(mem);
|
|
|
-
|
|
|
- // Unmap
|
|
|
- if(mem->m_mappedAddress)
|
|
|
- {
|
|
|
- vkUnmapMemory(m_dev, mem->m_handle);
|
|
|
- mem->m_mappedAddress = nullptr;
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- U32 getClassCount() const override
|
|
|
+ VkMemoryAllocateInfo ci = {};
|
|
|
+ ci.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
|
|
|
+ ci.allocationSize = m_classInfos[classIdx].m_chunkSize;
|
|
|
+ ci.memoryTypeIndex = m_memTypeIdx;
|
|
|
+
|
|
|
+ VkMemoryAllocateFlagsInfo flags = {};
|
|
|
+ flags.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO;
|
|
|
+ flags.flags = VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT;
|
|
|
+ if(m_exposesBufferGpuAddress)
|
|
|
{
|
|
|
- return m_classCount;
|
|
|
+ ci.pNext = &flags;
|
|
|
}
|
|
|
|
|
|
- void getClassInfo(U32 classIdx, PtrSize& slotSize, PtrSize& chunkSize) const override
|
|
|
+ VkDeviceMemory memHandle;
|
|
|
+ if(ANKI_UNLIKELY(vkAllocateMemory(m_parent->m_dev, &ci, nullptr, &memHandle) < 0))
|
|
|
{
|
|
|
- ANKI_ASSERT(classIdx < m_classCount);
|
|
|
- slotSize = m_classes[classIdx].m_slotSize;
|
|
|
- chunkSize = m_classes[classIdx].m_chunkSize;
|
|
|
+ ANKI_VK_LOGF("Out of GPU memory. Mem type index %u, size %zu", m_memTypeIdx,
|
|
|
+ m_classInfos[classIdx].m_suballocationSize);
|
|
|
}
|
|
|
|
|
|
- void collectGarbage()
|
|
|
- {
|
|
|
- LockGuard<Mutex> lock(m_mtx);
|
|
|
-
|
|
|
- for(U classIdx = 0; classIdx < m_classCount; ++classIdx)
|
|
|
- {
|
|
|
- while(!m_vacantMemory[classIdx].isEmpty())
|
|
|
- {
|
|
|
- Memory* mem = &m_vacantMemory[classIdx].getFront();
|
|
|
- m_vacantMemory[classIdx].popFront();
|
|
|
+ chunk = m_parent->m_alloc.newInstance<GpuMemoryManagerChunk>();
|
|
|
+ chunk->m_handle = memHandle;
|
|
|
+ chunk->m_size = m_classInfos[classIdx].m_chunkSize;
|
|
|
|
|
|
- if(mem->m_mappedAddress)
|
|
|
- {
|
|
|
- vkUnmapMemory(m_dev, mem->m_handle);
|
|
|
- }
|
|
|
+ m_allocatedMemory += m_classInfos[classIdx].m_chunkSize;
|
|
|
|
|
|
- vkFreeMemory(m_dev, mem->m_handle, nullptr);
|
|
|
+ return Error::NONE;
|
|
|
+}
|
|
|
|
|
|
- m_alloc.deleteInstance(mem);
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
+void GpuMemoryManagerInterface::freeChunk(GpuMemoryManagerChunk* chunk)
|
|
|
+{
|
|
|
+ ANKI_ASSERT(chunk);
|
|
|
+ ANKI_ASSERT(chunk->m_handle != VK_NULL_HANDLE);
|
|
|
|
|
|
- // Map memory
|
|
|
- void* mapMemory(ClassGpuAllocatorMemory* cmem)
|
|
|
+ if(chunk->m_mappedAddress)
|
|
|
{
|
|
|
- ANKI_ASSERT(cmem);
|
|
|
- Memory* mem = static_cast<Memory*>(cmem);
|
|
|
- void* out;
|
|
|
-
|
|
|
- LockGuard<SpinLock> lock(mem->m_mtx);
|
|
|
- if(mem->m_mappedAddress)
|
|
|
- {
|
|
|
- out = mem->m_mappedAddress;
|
|
|
- }
|
|
|
- else
|
|
|
- {
|
|
|
- ANKI_VK_CHECKF(vkMapMemory(m_dev, mem->m_handle, 0, m_classes[mem->m_classIdx].m_chunkSize, 0, &out));
|
|
|
- mem->m_mappedAddress = out;
|
|
|
- }
|
|
|
-
|
|
|
- ANKI_ASSERT(out);
|
|
|
- return out;
|
|
|
+ vkUnmapMemory(m_parent->m_dev, chunk->m_handle);
|
|
|
}
|
|
|
-};
|
|
|
|
|
|
-class GpuMemoryManager::ClassAllocator : public ClassGpuAllocator
|
|
|
-{
|
|
|
-public:
|
|
|
- Bool m_isDeviceMemory;
|
|
|
-};
|
|
|
+ vkFreeMemory(m_parent->m_dev, chunk->m_handle, nullptr);
|
|
|
+
|
|
|
+ ANKI_ASSERT(m_allocatedMemory >= chunk->m_size);
|
|
|
+ m_allocatedMemory -= chunk->m_size;
|
|
|
+}
|
|
|
|
|
|
GpuMemoryManager::~GpuMemoryManager()
|
|
|
{
|
|
|
@@ -190,15 +71,6 @@ GpuMemoryManager::~GpuMemoryManager()
|
|
|
|
|
|
void GpuMemoryManager::destroy()
|
|
|
{
|
|
|
- for(U32 i = 0; i < m_ifaces.getSize(); ++i)
|
|
|
- {
|
|
|
- for(U32 j = 0; j < 2; j++)
|
|
|
- {
|
|
|
- m_ifaces[i][j].collectGarbage();
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- m_ifaces.destroy(m_alloc);
|
|
|
m_callocs.destroy(m_alloc);
|
|
|
}
|
|
|
|
|
|
@@ -209,27 +81,31 @@ void GpuMemoryManager::init(VkPhysicalDevice pdev, VkDevice dev, GrAllocator<U8>
|
|
|
|
|
|
// Print some info
|
|
|
ANKI_VK_LOGI("Initializing memory manager");
|
|
|
- for(const ClassInf& c : CLASSES)
|
|
|
+ for(const GpuMemoryManagerClassInfo& c : CLASSES)
|
|
|
{
|
|
|
- ANKI_VK_LOGI("\tGPU mem class. Chunk size: %lu, slotSize: %lu, allocsPerChunk %lu", c.m_chunkSize, c.m_slotSize,
|
|
|
- c.m_chunkSize / c.m_slotSize);
|
|
|
+ ANKI_VK_LOGI("\tGPU mem class. Chunk size: %lu, suballocationSize: %lu, allocsPerChunk %lu", c.m_chunkSize,
|
|
|
+ c.m_suballocationSize, c.m_chunkSize / c.m_suballocationSize);
|
|
|
}
|
|
|
|
|
|
vkGetPhysicalDeviceMemoryProperties(pdev, &m_memoryProperties);
|
|
|
|
|
|
m_alloc = alloc;
|
|
|
+ m_dev = dev;
|
|
|
|
|
|
- m_ifaces.create(alloc, m_memoryProperties.memoryTypeCount);
|
|
|
- for(U32 memTypeIdx = 0; memTypeIdx < m_ifaces.getSize(); ++memTypeIdx)
|
|
|
+ m_callocs.create(alloc, m_memoryProperties.memoryTypeCount);
|
|
|
+ for(U32 memTypeIdx = 0; memTypeIdx < m_callocs.getSize(); ++memTypeIdx)
|
|
|
{
|
|
|
for(U32 linear = 0; linear < 2; ++linear)
|
|
|
{
|
|
|
- Interface& iface = m_ifaces[memTypeIdx][linear];
|
|
|
- iface.m_alloc = alloc;
|
|
|
- iface.m_dev = dev;
|
|
|
+ GpuMemoryManagerInterface& iface = m_callocs[memTypeIdx][linear].getInterface();
|
|
|
+ iface.m_parent = this;
|
|
|
iface.m_memTypeIdx = U8(memTypeIdx);
|
|
|
iface.m_exposesBufferGpuAddress = (linear == 1) && exposeBufferGpuAddress;
|
|
|
|
|
|
+ const U32 heapIdx = m_memoryProperties.memoryTypes[memTypeIdx].heapIndex;
|
|
|
+ iface.m_isDeviceMemory =
|
|
|
+ !!(m_memoryProperties.memoryHeaps[heapIdx].flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT);
|
|
|
+
|
|
|
// Find if it's ReBAR
|
|
|
const VkMemoryPropertyFlags props = m_memoryProperties.memoryTypes[memTypeIdx].propertyFlags;
|
|
|
const VkMemoryPropertyFlags reBarProps = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT
|
|
|
@@ -247,28 +123,15 @@ void GpuMemoryManager::init(VkPhysicalDevice pdev, VkDevice dev, GrAllocator<U8>
|
|
|
// Choose different classes
|
|
|
if(!isReBar)
|
|
|
{
|
|
|
- iface.m_classCount = CLASSES.getSize();
|
|
|
- iface.m_classes = CLASSES;
|
|
|
+ iface.m_classInfos = CLASSES;
|
|
|
}
|
|
|
else
|
|
|
{
|
|
|
- iface.m_classCount = REBAR_CLASSES.getSize();
|
|
|
- memcpy(&iface.m_classes[0], &REBAR_CLASSES[0], REBAR_CLASSES.getSizeInBytes());
|
|
|
+ iface.m_classInfos = REBAR_CLASSES;
|
|
|
}
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- // One allocator per linear/non-linear resources
|
|
|
- m_callocs.create(alloc, m_memoryProperties.memoryTypeCount);
|
|
|
- for(U32 memTypeIdx = 0; memTypeIdx < m_callocs.getSize(); ++memTypeIdx)
|
|
|
- {
|
|
|
- for(U32 linear = 0; linear < 2; ++linear)
|
|
|
- {
|
|
|
- m_callocs[memTypeIdx][linear].init(m_alloc, &m_ifaces[memTypeIdx][linear]);
|
|
|
|
|
|
- const U32 heapIdx = m_memoryProperties.memoryTypes[memTypeIdx].heapIndex;
|
|
|
- m_callocs[memTypeIdx][linear].m_isDeviceMemory =
|
|
|
- !!(m_memoryProperties.memoryHeaps[heapIdx].flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT);
|
|
|
+ // The interface is initialized, init the builder
|
|
|
+ m_callocs[memTypeIdx][linear].init(m_alloc);
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
@@ -276,21 +139,26 @@ void GpuMemoryManager::init(VkPhysicalDevice pdev, VkDevice dev, GrAllocator<U8>
|
|
|
void GpuMemoryManager::allocateMemory(U32 memTypeIdx, PtrSize size, U32 alignment, Bool linearResource,
|
|
|
GpuMemoryHandle& handle)
|
|
|
{
|
|
|
- ClassGpuAllocator& calloc = m_callocs[memTypeIdx][linearResource];
|
|
|
- const Error err = calloc.allocate(size, alignment, handle.m_classHandle);
|
|
|
+ ClassAllocator& calloc = m_callocs[memTypeIdx][linearResource];
|
|
|
+
|
|
|
+ GpuMemoryManagerChunk* chunk;
|
|
|
+ PtrSize offset;
|
|
|
+ const Error err = calloc.allocate(size, alignment, chunk, offset);
|
|
|
(void)err;
|
|
|
|
|
|
- handle.m_memory = static_cast<Memory*>(handle.m_classHandle.m_memory)->m_handle;
|
|
|
- handle.m_offset = handle.m_classHandle.m_offset;
|
|
|
- handle.m_linear = linearResource;
|
|
|
+ handle.m_memory = chunk->m_handle;
|
|
|
+ handle.m_offset = offset;
|
|
|
+ handle.m_chunk = chunk;
|
|
|
handle.m_memTypeIdx = U8(memTypeIdx);
|
|
|
+ handle.m_linear = linearResource;
|
|
|
}
|
|
|
|
|
|
void GpuMemoryManager::freeMemory(GpuMemoryHandle& handle)
|
|
|
{
|
|
|
ANKI_ASSERT(handle);
|
|
|
- ClassGpuAllocator& calloc = m_callocs[handle.m_memTypeIdx][handle.m_linear];
|
|
|
- calloc.free(handle.m_classHandle);
|
|
|
+ ClassAllocator& calloc = m_callocs[handle.m_memTypeIdx][handle.m_linear];
|
|
|
+
|
|
|
+ calloc.free(handle.m_chunk, handle.m_offset);
|
|
|
|
|
|
handle = {};
|
|
|
}
|
|
|
@@ -299,9 +167,15 @@ void* GpuMemoryManager::getMappedAddress(GpuMemoryHandle& handle)
|
|
|
{
|
|
|
ANKI_ASSERT(handle);
|
|
|
|
|
|
- Interface& iface = m_ifaces[handle.m_memTypeIdx][handle.m_linear];
|
|
|
- U8* out = static_cast<U8*>(iface.mapMemory(handle.m_classHandle.m_memory));
|
|
|
- return static_cast<void*>(out + handle.m_offset);
|
|
|
+ LockGuard<SpinLock> lock(handle.m_chunk->m_m_mappedAddressMtx);
|
|
|
+
|
|
|
+ if(handle.m_chunk->m_mappedAddress == nullptr)
|
|
|
+ {
|
|
|
+ ANKI_VK_CHECKF(vkMapMemory(m_dev, handle.m_chunk->m_handle, 0, handle.m_chunk->m_size, 0,
|
|
|
+ &handle.m_chunk->m_mappedAddress));
|
|
|
+ }
|
|
|
+
|
|
|
+ return static_cast<void*>(static_cast<U8*>(handle.m_chunk->m_mappedAddress) + handle.m_offset);
|
|
|
}
|
|
|
|
|
|
U32 GpuMemoryManager::findMemoryType(U32 resourceMemTypeBits, VkMemoryPropertyFlags preferFlags,
|
|
|
@@ -326,7 +200,7 @@ U32 GpuMemoryManager::findMemoryType(U32 resourceMemTypeBits, VkMemoryPropertyFl
|
|
|
}
|
|
|
else
|
|
|
{
|
|
|
- // On some Intel drivers there are identical memory types pointing to different heaps. Chose the
|
|
|
+ // On some Intel drivers there are identical memory types pointing to different heaps. Choose the
|
|
|
// biggest heap
|
|
|
|
|
|
const PtrSize crntHeapSize =
|
|
|
@@ -355,13 +229,14 @@ void GpuMemoryManager::getAllocatedMemory(PtrSize& gpuMemory, PtrSize& cpuMemory
|
|
|
{
|
|
|
for(U32 linear = 0; linear < 2; ++linear)
|
|
|
{
|
|
|
- if(m_callocs[memTypeIdx][linear].m_isDeviceMemory)
|
|
|
+ const GpuMemoryManagerInterface& iface = m_callocs[memTypeIdx][linear].getInterface();
|
|
|
+ if(iface.m_isDeviceMemory)
|
|
|
{
|
|
|
- gpuMemory += m_callocs[memTypeIdx][linear].getAllocatedMemory();
|
|
|
+ gpuMemory += iface.m_allocatedMemory;
|
|
|
}
|
|
|
else
|
|
|
{
|
|
|
- cpuMemory += m_callocs[memTypeIdx][linear].getAllocatedMemory();
|
|
|
+ cpuMemory += iface.m_allocatedMemory;
|
|
|
}
|
|
|
}
|
|
|
}
|