// Copyright (C) 2009-2023, Panagiotis Christopoulos Charitos and contributors. // All rights reserved. // Code licensed under the BSD License. // http://www.anki3d.org/LICENSE #include #include namespace anki { static constexpr Array kClasses{ {{4_KB, 256_KB}, {128_KB, 8_MB}, {1_MB, 64_MB}, {16_MB, 128_MB}, {64_MB, 128_MB}, {128_MB, 128_MB}, {256_MB, 256_MB}}}; /// Special classes for the ReBAR memory. Have that as a special case because it's so limited and needs special care. static constexpr Array kRebarClasses{{{1_MB, 1_MB}, {12_MB, 12_MB}, {24_MB, 24_MB}}}; Error GpuMemoryManagerInterface::allocateChunk(U32 classIdx, GpuMemoryManagerChunk*& chunk) { VkMemoryAllocateInfo ci = {}; ci.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; ci.allocationSize = m_classInfos[classIdx].m_chunkSize; ci.memoryTypeIndex = m_memTypeIdx; VkMemoryAllocateFlagsInfo flags = {}; flags.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO; flags.flags = VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT; if(m_exposesBufferGpuAddress) { ci.pNext = &flags; } VkDeviceMemory memHandle; if(vkAllocateMemory(getVkDevice(), &ci, nullptr, &memHandle) != VK_SUCCESS) [[unlikely]] { ANKI_VK_LOGF("Out of GPU memory. Mem type index %u, size %zu", m_memTypeIdx, m_classInfos[classIdx].m_suballocationSize); } chunk = newInstance(GrMemoryPool::getSingleton()); chunk->m_handle = memHandle; chunk->m_size = m_classInfos[classIdx].m_chunkSize; m_allocatedMemory += m_classInfos[classIdx].m_chunkSize; return Error::kNone; } void GpuMemoryManagerInterface::freeChunk(GpuMemoryManagerChunk* chunk) { ANKI_ASSERT(chunk); ANKI_ASSERT(chunk->m_handle != VK_NULL_HANDLE); if(chunk->m_mappedAddress) { vkUnmapMemory(getVkDevice(), chunk->m_handle); } vkFreeMemory(getVkDevice(), chunk->m_handle, nullptr); ANKI_ASSERT(m_allocatedMemory >= chunk->m_size); m_allocatedMemory -= chunk->m_size; deleteInstance(GrMemoryPool::getSingleton(), chunk); } GpuMemoryManager::GpuMemoryManager() { } GpuMemoryManager::~GpuMemoryManager() { } void GpuMemoryManager::destroy() { ANKI_VK_LOGV("Destroying memory manager"); m_callocs.destroy(); } void GpuMemoryManager::init(Bool exposeBufferGpuAddress) { // Print some info ANKI_VK_LOGV("Initializing memory manager"); for(const GpuMemoryManagerClassInfo& c : kClasses) { ANKI_VK_LOGV("\tGPU mem class. Chunk size: %lu, suballocationSize: %lu, allocsPerChunk %lu", c.m_chunkSize, c.m_suballocationSize, c.m_chunkSize / c.m_suballocationSize); } // Image buffer granularity { VkPhysicalDeviceProperties props; vkGetPhysicalDeviceProperties(getGrManagerImpl().getPhysicalDevice(), &props); m_bufferImageGranularity = U32(props.limits.bufferImageGranularity); ANKI_ASSERT(m_bufferImageGranularity > 0 && isPowerOfTwo(m_bufferImageGranularity)); if(m_bufferImageGranularity > 4_KB) { ANKI_VK_LOGW("Buffer/image mem granularity is too high (%u). It will force high alignments and it will waste memory", m_bufferImageGranularity); } for(const GpuMemoryManagerClassInfo& c : kClasses) { if(!isAligned(m_bufferImageGranularity, c.m_suballocationSize)) { ANKI_VK_LOGW("Memory class is not aligned to buffer/image granularity (%u). It won't be used in " "allocations: Chunk size: %lu, suballocationSize: %lu, allocsPerChunk %lu", m_bufferImageGranularity, c.m_chunkSize, c.m_suballocationSize, c.m_chunkSize / c.m_suballocationSize); } } } vkGetPhysicalDeviceMemoryProperties(getGrManagerImpl().getPhysicalDevice(), &m_memoryProperties); m_callocs.resize(m_memoryProperties.memoryTypeCount); for(U32 memTypeIdx = 0; memTypeIdx < m_callocs.getSize(); ++memTypeIdx) { GpuMemoryManagerInterface& iface = m_callocs[memTypeIdx].getInterface(); iface.m_parent = this; iface.m_memTypeIdx = U8(memTypeIdx); iface.m_exposesBufferGpuAddress = exposeBufferGpuAddress; const U32 heapIdx = m_memoryProperties.memoryTypes[memTypeIdx].heapIndex; iface.m_isDeviceMemory = !!(m_memoryProperties.memoryHeaps[heapIdx].flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT); // Find if it's ReBAR const VkMemoryPropertyFlags props = m_memoryProperties.memoryTypes[memTypeIdx].propertyFlags; const VkMemoryPropertyFlags reBarProps = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; const PtrSize heapSize = m_memoryProperties.memoryHeaps[m_memoryProperties.memoryTypes[memTypeIdx].heapIndex].size; const Bool isReBar = props == reBarProps && heapSize <= 256_MB; if(isReBar) { ANKI_VK_LOGV("Memory type %u is ReBAR", memTypeIdx); } // Choose different classes if(!isReBar) { iface.m_classInfos = kClasses; } else { iface.m_classInfos = kRebarClasses; } // The interface is initialized, init the builder m_callocs[memTypeIdx].init(); } } void GpuMemoryManager::allocateMemory(U32 memTypeIdx, PtrSize size, U32 alignment, GpuMemoryHandle& handle) { ClassAllocator& calloc = m_callocs[memTypeIdx]; alignment = max(alignment, m_bufferImageGranularity); GpuMemoryManagerChunk* chunk; PtrSize offset; [[maybe_unused]] const Error err = calloc.allocate(size, alignment, chunk, offset); handle.m_memory = chunk->m_handle; handle.m_offset = offset; handle.m_chunk = chunk; handle.m_memTypeIdx = U8(memTypeIdx); handle.m_size = size; } void GpuMemoryManager::allocateMemoryDedicated(U32 memTypeIdx, PtrSize size, VkImage image, GpuMemoryHandle& handle) { VkMemoryDedicatedAllocateInfoKHR dedicatedInfo = {}; dedicatedInfo.sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO_KHR; dedicatedInfo.image = image; VkMemoryAllocateInfo memoryAllocateInfo = {}; memoryAllocateInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; memoryAllocateInfo.pNext = &dedicatedInfo; memoryAllocateInfo.allocationSize = size; memoryAllocateInfo.memoryTypeIndex = memTypeIdx; VkDeviceMemory mem; ANKI_VK_CHECKF(vkAllocateMemory(getVkDevice(), &memoryAllocateInfo, nullptr, &mem)); handle.m_memory = mem; handle.m_offset = 0; handle.m_chunk = nullptr; handle.m_memTypeIdx = U8(memTypeIdx); handle.m_size = size; m_dedicatedAllocatedMemory.fetchAdd(size); m_dedicatedAllocationCount.fetchAdd(1); } void GpuMemoryManager::freeMemory(GpuMemoryHandle& handle) { ANKI_ASSERT(handle); if(handle.isDedicated()) { vkFreeMemory(getVkDevice(), handle.m_memory, nullptr); [[maybe_unused]] const PtrSize prevSize = m_dedicatedAllocatedMemory.fetchSub(handle.m_size); ANKI_ASSERT(prevSize >= handle.m_size); [[maybe_unused]] const U32 count = m_dedicatedAllocationCount.fetchSub(1); ANKI_ASSERT(count > 0); } else { ClassAllocator& calloc = m_callocs[handle.m_memTypeIdx]; calloc.free(handle.m_chunk, handle.m_offset); } handle = {}; } void* GpuMemoryManager::getMappedAddress(GpuMemoryHandle& handle) { ANKI_ASSERT(handle); ANKI_ASSERT(!handle.isDedicated()); LockGuard lock(handle.m_chunk->m_mappedAddressMtx); if(handle.m_chunk->m_mappedAddress == nullptr) { ANKI_VK_CHECKF(vkMapMemory(getVkDevice(), handle.m_chunk->m_handle, 0, handle.m_chunk->m_size, 0, &handle.m_chunk->m_mappedAddress)); } return static_cast(static_cast(handle.m_chunk->m_mappedAddress) + handle.m_offset); } U32 GpuMemoryManager::findMemoryType(U32 resourceMemTypeBits, VkMemoryPropertyFlags preferFlags, VkMemoryPropertyFlags avoidFlags) const { U32 prefered = kMaxU32; // Iterate all mem types for(U32 i = 0; i < m_memoryProperties.memoryTypeCount; i++) { if(resourceMemTypeBits & (1u << i)) { const VkMemoryPropertyFlags flags = m_memoryProperties.memoryTypes[i].propertyFlags; if((flags & preferFlags) == preferFlags && (flags & avoidFlags) == 0) { // It's the candidate we want if(prefered == kMaxU32) { prefered = i; } else { // On some Intel drivers there are identical memory types pointing to different heaps. Choose the // biggest heap const PtrSize crntHeapSize = m_memoryProperties.memoryHeaps[m_memoryProperties.memoryTypes[i].heapIndex].size; const PtrSize prevHeapSize = m_memoryProperties.memoryHeaps[m_memoryProperties.memoryTypes[prefered].heapIndex].size; if(crntHeapSize > prevHeapSize) { prefered = i; } } } } } return prefered; } void GpuMemoryManager::getStats(GpuMemoryManagerStats& stats) const { stats = {}; for(U32 memTypeIdx = 0; memTypeIdx < m_callocs.getSize(); ++memTypeIdx) { const GpuMemoryManagerInterface& iface = m_callocs[memTypeIdx].getInterface(); ClassAllocatorBuilderStats cstats; m_callocs[memTypeIdx].getStats(cstats); if(iface.m_isDeviceMemory) { stats.m_deviceMemoryAllocated += cstats.m_allocatedSize; stats.m_deviceMemoryInUse += cstats.m_inUseSize; stats.m_deviceMemoryAllocationCount += cstats.m_chunkCount; } else { stats.m_hostMemoryAllocated += cstats.m_allocatedSize; stats.m_hostMemoryInUse += cstats.m_inUseSize; stats.m_hostMemoryAllocationCount += cstats.m_chunkCount; } } // Add dedicated stats const PtrSize dedicatedAllocatedMemory = m_dedicatedAllocatedMemory.load(); stats.m_deviceMemoryAllocated += dedicatedAllocatedMemory; stats.m_deviceMemoryInUse += dedicatedAllocatedMemory; stats.m_deviceMemoryAllocationCount += m_dedicatedAllocationCount.load(); } } // end namespace anki