GpuMemoryManager.cpp 9.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302
  1. // Copyright (C) 2009-2023, Panagiotis Christopoulos Charitos and contributors.
  2. // All rights reserved.
  3. // Code licensed under the BSD License.
  4. // http://www.anki3d.org/LICENSE
  5. #include <AnKi/Gr/Vulkan/GpuMemoryManager.h>
  6. #include <AnKi/Gr/Vulkan/GrManagerImpl.h>
  7. namespace anki {
  8. static constexpr Array<GpuMemoryManagerClassInfo, 7> kClasses{
  9. {{4_KB, 256_KB}, {128_KB, 8_MB}, {1_MB, 64_MB}, {16_MB, 128_MB}, {64_MB, 128_MB}, {128_MB, 128_MB}, {256_MB, 256_MB}}};
  10. /// Special classes for the ReBAR memory. Have that as a special case because it's so limited and needs special care.
  11. static constexpr Array<GpuMemoryManagerClassInfo, 3> kRebarClasses{{{1_MB, 1_MB}, {12_MB, 12_MB}, {24_MB, 24_MB}}};
  12. Error GpuMemoryManagerInterface::allocateChunk(U32 classIdx, GpuMemoryManagerChunk*& chunk)
  13. {
  14. VkMemoryAllocateInfo ci = {};
  15. ci.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
  16. ci.allocationSize = m_classInfos[classIdx].m_chunkSize;
  17. ci.memoryTypeIndex = m_memTypeIdx;
  18. VkMemoryAllocateFlagsInfo flags = {};
  19. flags.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO;
  20. flags.flags = VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT;
  21. if(m_exposesBufferGpuAddress)
  22. {
  23. ci.pNext = &flags;
  24. }
  25. VkDeviceMemory memHandle;
  26. if(vkAllocateMemory(getVkDevice(), &ci, nullptr, &memHandle) != VK_SUCCESS) [[unlikely]]
  27. {
  28. ANKI_VK_LOGF("Out of GPU memory. Mem type index %u, size %zu", m_memTypeIdx, m_classInfos[classIdx].m_suballocationSize);
  29. }
  30. chunk = newInstance<GpuMemoryManagerChunk>(GrMemoryPool::getSingleton());
  31. chunk->m_handle = memHandle;
  32. chunk->m_size = m_classInfos[classIdx].m_chunkSize;
  33. m_allocatedMemory += m_classInfos[classIdx].m_chunkSize;
  34. return Error::kNone;
  35. }
  36. void GpuMemoryManagerInterface::freeChunk(GpuMemoryManagerChunk* chunk)
  37. {
  38. ANKI_ASSERT(chunk);
  39. ANKI_ASSERT(chunk->m_handle != VK_NULL_HANDLE);
  40. if(chunk->m_mappedAddress)
  41. {
  42. vkUnmapMemory(getVkDevice(), chunk->m_handle);
  43. }
  44. vkFreeMemory(getVkDevice(), chunk->m_handle, nullptr);
  45. ANKI_ASSERT(m_allocatedMemory >= chunk->m_size);
  46. m_allocatedMemory -= chunk->m_size;
  47. deleteInstance(GrMemoryPool::getSingleton(), chunk);
  48. }
  49. GpuMemoryManager::GpuMemoryManager()
  50. {
  51. }
  52. GpuMemoryManager::~GpuMemoryManager()
  53. {
  54. }
  55. void GpuMemoryManager::destroy()
  56. {
  57. ANKI_VK_LOGV("Destroying memory manager");
  58. m_callocs.destroy();
  59. }
  60. void GpuMemoryManager::init(Bool exposeBufferGpuAddress)
  61. {
  62. // Print some info
  63. ANKI_VK_LOGV("Initializing memory manager");
  64. for(const GpuMemoryManagerClassInfo& c : kClasses)
  65. {
  66. ANKI_VK_LOGV("\tGPU mem class. Chunk size: %lu, suballocationSize: %lu, allocsPerChunk %lu", c.m_chunkSize, c.m_suballocationSize,
  67. c.m_chunkSize / c.m_suballocationSize);
  68. }
  69. // Image buffer granularity
  70. {
  71. VkPhysicalDeviceProperties props;
  72. vkGetPhysicalDeviceProperties(getGrManagerImpl().getPhysicalDevice(), &props);
  73. m_bufferImageGranularity = U32(props.limits.bufferImageGranularity);
  74. ANKI_ASSERT(m_bufferImageGranularity > 0 && isPowerOfTwo(m_bufferImageGranularity));
  75. if(m_bufferImageGranularity > 4_KB)
  76. {
  77. ANKI_VK_LOGW("Buffer/image mem granularity is too high (%u). It will force high alignments and it will waste memory",
  78. m_bufferImageGranularity);
  79. }
  80. for(const GpuMemoryManagerClassInfo& c : kClasses)
  81. {
  82. if(!isAligned(m_bufferImageGranularity, c.m_suballocationSize))
  83. {
  84. ANKI_VK_LOGW("Memory class is not aligned to buffer/image granularity (%u). It won't be used in "
  85. "allocations: Chunk size: %lu, suballocationSize: %lu, allocsPerChunk %lu",
  86. m_bufferImageGranularity, c.m_chunkSize, c.m_suballocationSize, c.m_chunkSize / c.m_suballocationSize);
  87. }
  88. }
  89. }
  90. vkGetPhysicalDeviceMemoryProperties(getGrManagerImpl().getPhysicalDevice(), &m_memoryProperties);
  91. m_callocs.resize(m_memoryProperties.memoryTypeCount);
  92. for(U32 memTypeIdx = 0; memTypeIdx < m_callocs.getSize(); ++memTypeIdx)
  93. {
  94. GpuMemoryManagerInterface& iface = m_callocs[memTypeIdx].getInterface();
  95. iface.m_parent = this;
  96. iface.m_memTypeIdx = U8(memTypeIdx);
  97. iface.m_exposesBufferGpuAddress = exposeBufferGpuAddress;
  98. const U32 heapIdx = m_memoryProperties.memoryTypes[memTypeIdx].heapIndex;
  99. iface.m_isDeviceMemory = !!(m_memoryProperties.memoryHeaps[heapIdx].flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT);
  100. // Find if it's ReBAR
  101. const VkMemoryPropertyFlags props = m_memoryProperties.memoryTypes[memTypeIdx].propertyFlags;
  102. const VkMemoryPropertyFlags reBarProps =
  103. VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
  104. const PtrSize heapSize = m_memoryProperties.memoryHeaps[m_memoryProperties.memoryTypes[memTypeIdx].heapIndex].size;
  105. const Bool isReBar = props == reBarProps && heapSize <= 256_MB;
  106. if(isReBar)
  107. {
  108. ANKI_VK_LOGV("Memory type %u is ReBAR", memTypeIdx);
  109. }
  110. // Choose different classes
  111. if(!isReBar)
  112. {
  113. iface.m_classInfos = kClasses;
  114. }
  115. else
  116. {
  117. iface.m_classInfos = kRebarClasses;
  118. }
  119. // The interface is initialized, init the builder
  120. m_callocs[memTypeIdx].init();
  121. }
  122. }
  123. void GpuMemoryManager::allocateMemory(U32 memTypeIdx, PtrSize size, U32 alignment, GpuMemoryHandle& handle)
  124. {
  125. ClassAllocator& calloc = m_callocs[memTypeIdx];
  126. alignment = max(alignment, m_bufferImageGranularity);
  127. GpuMemoryManagerChunk* chunk;
  128. PtrSize offset;
  129. [[maybe_unused]] const Error err = calloc.allocate(size, alignment, chunk, offset);
  130. handle.m_memory = chunk->m_handle;
  131. handle.m_offset = offset;
  132. handle.m_chunk = chunk;
  133. handle.m_memTypeIdx = U8(memTypeIdx);
  134. handle.m_size = size;
  135. }
  136. void GpuMemoryManager::allocateMemoryDedicated(U32 memTypeIdx, PtrSize size, VkImage image, GpuMemoryHandle& handle)
  137. {
  138. VkMemoryDedicatedAllocateInfoKHR dedicatedInfo = {};
  139. dedicatedInfo.sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO_KHR;
  140. dedicatedInfo.image = image;
  141. VkMemoryAllocateInfo memoryAllocateInfo = {};
  142. memoryAllocateInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
  143. memoryAllocateInfo.pNext = &dedicatedInfo;
  144. memoryAllocateInfo.allocationSize = size;
  145. memoryAllocateInfo.memoryTypeIndex = memTypeIdx;
  146. VkDeviceMemory mem;
  147. ANKI_VK_CHECKF(vkAllocateMemory(getVkDevice(), &memoryAllocateInfo, nullptr, &mem));
  148. handle.m_memory = mem;
  149. handle.m_offset = 0;
  150. handle.m_chunk = nullptr;
  151. handle.m_memTypeIdx = U8(memTypeIdx);
  152. handle.m_size = size;
  153. m_dedicatedAllocatedMemory.fetchAdd(size);
  154. m_dedicatedAllocationCount.fetchAdd(1);
  155. }
  156. void GpuMemoryManager::freeMemory(GpuMemoryHandle& handle)
  157. {
  158. ANKI_ASSERT(handle);
  159. if(handle.isDedicated())
  160. {
  161. vkFreeMemory(getVkDevice(), handle.m_memory, nullptr);
  162. [[maybe_unused]] const PtrSize prevSize = m_dedicatedAllocatedMemory.fetchSub(handle.m_size);
  163. ANKI_ASSERT(prevSize >= handle.m_size);
  164. [[maybe_unused]] const U32 count = m_dedicatedAllocationCount.fetchSub(1);
  165. ANKI_ASSERT(count > 0);
  166. }
  167. else
  168. {
  169. ClassAllocator& calloc = m_callocs[handle.m_memTypeIdx];
  170. calloc.free(handle.m_chunk, handle.m_offset);
  171. }
  172. handle = {};
  173. }
  174. void* GpuMemoryManager::getMappedAddress(GpuMemoryHandle& handle)
  175. {
  176. ANKI_ASSERT(handle);
  177. ANKI_ASSERT(!handle.isDedicated());
  178. LockGuard<SpinLock> lock(handle.m_chunk->m_mappedAddressMtx);
  179. if(handle.m_chunk->m_mappedAddress == nullptr)
  180. {
  181. ANKI_VK_CHECKF(vkMapMemory(getVkDevice(), handle.m_chunk->m_handle, 0, handle.m_chunk->m_size, 0, &handle.m_chunk->m_mappedAddress));
  182. }
  183. return static_cast<void*>(static_cast<U8*>(handle.m_chunk->m_mappedAddress) + handle.m_offset);
  184. }
  185. U32 GpuMemoryManager::findMemoryType(U32 resourceMemTypeBits, VkMemoryPropertyFlags preferFlags, VkMemoryPropertyFlags avoidFlags) const
  186. {
  187. U32 prefered = kMaxU32;
  188. // Iterate all mem types
  189. for(U32 i = 0; i < m_memoryProperties.memoryTypeCount; i++)
  190. {
  191. if(resourceMemTypeBits & (1u << i))
  192. {
  193. const VkMemoryPropertyFlags flags = m_memoryProperties.memoryTypes[i].propertyFlags;
  194. if((flags & preferFlags) == preferFlags && (flags & avoidFlags) == 0)
  195. {
  196. // It's the candidate we want
  197. if(prefered == kMaxU32)
  198. {
  199. prefered = i;
  200. }
  201. else
  202. {
  203. // On some Intel drivers there are identical memory types pointing to different heaps. Choose the
  204. // biggest heap
  205. const PtrSize crntHeapSize = m_memoryProperties.memoryHeaps[m_memoryProperties.memoryTypes[i].heapIndex].size;
  206. const PtrSize prevHeapSize = m_memoryProperties.memoryHeaps[m_memoryProperties.memoryTypes[prefered].heapIndex].size;
  207. if(crntHeapSize > prevHeapSize)
  208. {
  209. prefered = i;
  210. }
  211. }
  212. }
  213. }
  214. }
  215. return prefered;
  216. }
  217. void GpuMemoryManager::getStats(GpuMemoryManagerStats& stats) const
  218. {
  219. stats = {};
  220. for(U32 memTypeIdx = 0; memTypeIdx < m_callocs.getSize(); ++memTypeIdx)
  221. {
  222. const GpuMemoryManagerInterface& iface = m_callocs[memTypeIdx].getInterface();
  223. ClassAllocatorBuilderStats cstats;
  224. m_callocs[memTypeIdx].getStats(cstats);
  225. if(iface.m_isDeviceMemory)
  226. {
  227. stats.m_deviceMemoryAllocated += cstats.m_allocatedSize;
  228. stats.m_deviceMemoryInUse += cstats.m_inUseSize;
  229. stats.m_deviceMemoryAllocationCount += cstats.m_chunkCount;
  230. }
  231. else
  232. {
  233. stats.m_hostMemoryAllocated += cstats.m_allocatedSize;
  234. stats.m_hostMemoryInUse += cstats.m_inUseSize;
  235. stats.m_hostMemoryAllocationCount += cstats.m_chunkCount;
  236. }
  237. }
  238. // Add dedicated stats
  239. const PtrSize dedicatedAllocatedMemory = m_dedicatedAllocatedMemory.load();
  240. stats.m_deviceMemoryAllocated += dedicatedAllocatedMemory;
  241. stats.m_deviceMemoryInUse += dedicatedAllocatedMemory;
  242. stats.m_deviceMemoryAllocationCount += m_dedicatedAllocationCount.load();
  243. }
  244. } // end namespace anki