VkGpuMemoryManager.cpp 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330
  1. // Copyright (C) 2009-present, Panagiotis Christopoulos Charitos and contributors.
  2. // All rights reserved.
  3. // Code licensed under the BSD License.
  4. // http://www.anki3d.org/LICENSE
  5. #include <AnKi/Gr/Vulkan/VkGpuMemoryManager.h>
  6. #include <AnKi/Gr/Vulkan/VkGrManager.h>
  7. #include <AnKi/Core/StatsSet.h>
  8. namespace anki {
  9. ANKI_SVAR(DeviceMemoryAllocated, StatCategory::kGpuMem, "Device mem", StatFlag::kBytes)
  10. ANKI_SVAR(DeviceMemoryInUse, StatCategory::kGpuMem, "Device mem in use", StatFlag::kBytes)
  11. ANKI_SVAR(DeviceMemoryAllocationCount, StatCategory::kGpuMem, "Device mem allocations", StatFlag::kNone)
  12. ANKI_SVAR(HostMemoryAllocated, StatCategory::kGpuMem, "Host mem", StatFlag::kBytes)
  13. ANKI_SVAR(HostMemoryInUse, StatCategory::kGpuMem, "Host mem in use", StatFlag::kBytes)
  14. ANKI_SVAR(HostMemoryAllocationCount, StatCategory::kGpuMem, "Host mem allocations", StatFlag::kNone)
  15. static constexpr Array<GpuMemoryManagerClassInfo, 8> kClasses{
  16. {{4_KB, 256_KB}, {128_KB, 8_MB}, {1_MB, 64_MB}, {16_MB, 128_MB}, {64_MB, 128_MB}, {128_MB, 128_MB}, {256_MB, 256_MB}, {512_MB, 512_MB}}};
  17. /// Special classes for the ReBAR memory. Have that as a special case because it's so limited and needs special care.
  18. static constexpr Array<GpuMemoryManagerClassInfo, 3> kRebarClasses{{{1_MB, 1_MB}, {12_MB, 12_MB}, {24_MB, 24_MB}}};
  19. Error GpuMemoryManagerInterface::allocateChunk(U32 classIdx, GpuMemoryManagerChunk*& chunk)
  20. {
  21. VkMemoryAllocateInfo ci = {};
  22. ci.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
  23. ci.allocationSize = m_classInfos[classIdx].m_chunkSize;
  24. ci.memoryTypeIndex = m_memTypeIdx;
  25. VkMemoryAllocateFlagsInfo flags = {};
  26. flags.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO;
  27. flags.flags = VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT;
  28. if(m_exposesBufferGpuAddress)
  29. {
  30. ci.pNext = &flags;
  31. }
  32. VkDeviceMemory memHandle;
  33. if(vkAllocateMemory(getVkDevice(), &ci, nullptr, &memHandle) != VK_SUCCESS) [[unlikely]]
  34. {
  35. ANKI_VK_LOGF("Out of GPU memory. Mem type index %u, size %zu", m_memTypeIdx, m_classInfos[classIdx].m_suballocationSize);
  36. }
  37. chunk = newInstance<GpuMemoryManagerChunk>(GrMemoryPool::getSingleton());
  38. chunk->m_handle = memHandle;
  39. chunk->m_size = m_classInfos[classIdx].m_chunkSize;
  40. m_allocatedMemory += m_classInfos[classIdx].m_chunkSize;
  41. return Error::kNone;
  42. }
  43. void GpuMemoryManagerInterface::freeChunk(GpuMemoryManagerChunk* chunk)
  44. {
  45. ANKI_ASSERT(chunk);
  46. ANKI_ASSERT(chunk->m_handle != VK_NULL_HANDLE);
  47. if(chunk->m_mappedAddress)
  48. {
  49. vkUnmapMemory(getVkDevice(), chunk->m_handle);
  50. }
  51. vkFreeMemory(getVkDevice(), chunk->m_handle, nullptr);
  52. ANKI_ASSERT(m_allocatedMemory >= chunk->m_size);
  53. m_allocatedMemory -= chunk->m_size;
  54. deleteInstance(GrMemoryPool::getSingleton(), chunk);
  55. }
  56. void GpuMemoryManager::destroy()
  57. {
  58. ANKI_VK_LOGV("Destroying memory manager");
  59. m_callocs.destroy();
  60. }
  61. void GpuMemoryManager::init()
  62. {
  63. // Print some info
  64. ANKI_VK_LOGV("Initializing memory manager");
  65. for(const GpuMemoryManagerClassInfo& c : kClasses)
  66. {
  67. ANKI_VK_LOGV("\tGPU mem class. Chunk size: %lu, suballocationSize: %lu, allocsPerChunk %lu", c.m_chunkSize, c.m_suballocationSize,
  68. c.m_chunkSize / c.m_suballocationSize);
  69. }
  70. // Image buffer granularity
  71. {
  72. VkPhysicalDeviceProperties props;
  73. vkGetPhysicalDeviceProperties(getGrManagerImpl().getPhysicalDevice(), &props);
  74. m_bufferImageGranularity = U32(props.limits.bufferImageGranularity);
  75. ANKI_ASSERT(m_bufferImageGranularity > 0 && isPowerOfTwo(m_bufferImageGranularity));
  76. if(m_bufferImageGranularity > 4_KB)
  77. {
  78. ANKI_VK_LOGW("Buffer/image mem granularity is too high (%u). It will force high alignments and it will waste memory",
  79. m_bufferImageGranularity);
  80. }
  81. for(const GpuMemoryManagerClassInfo& c : kClasses)
  82. {
  83. if(!isAligned(m_bufferImageGranularity, c.m_suballocationSize))
  84. {
  85. ANKI_VK_LOGW("Memory class is not aligned to buffer/image granularity (%u). It won't be used in "
  86. "allocations: Chunk size: %lu, suballocationSize: %lu, allocsPerChunk %lu",
  87. m_bufferImageGranularity, c.m_chunkSize, c.m_suballocationSize, c.m_chunkSize / c.m_suballocationSize);
  88. }
  89. }
  90. }
  91. vkGetPhysicalDeviceMemoryProperties(getGrManagerImpl().getPhysicalDevice(), &m_memoryProperties);
  92. m_callocs.resize(m_memoryProperties.memoryTypeCount);
  93. for(U32 memTypeIdx = 0; memTypeIdx < m_callocs.getSize(); ++memTypeIdx)
  94. {
  95. GpuMemoryManagerInterface& iface = m_callocs[memTypeIdx].getInterface();
  96. iface.m_parent = this;
  97. iface.m_memTypeIdx = U8(memTypeIdx);
  98. iface.m_exposesBufferGpuAddress = true;
  99. const U32 heapIdx = m_memoryProperties.memoryTypes[memTypeIdx].heapIndex;
  100. iface.m_isDeviceMemory = !!(m_memoryProperties.memoryHeaps[heapIdx].flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT);
  101. // Find if it's ReBAR
  102. const VkMemoryPropertyFlags props = m_memoryProperties.memoryTypes[memTypeIdx].propertyFlags;
  103. const VkMemoryPropertyFlags reBarProps =
  104. VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
  105. const PtrSize heapSize = m_memoryProperties.memoryHeaps[m_memoryProperties.memoryTypes[memTypeIdx].heapIndex].size;
  106. const Bool isReBar = props == reBarProps && heapSize <= 256_MB;
  107. if(isReBar)
  108. {
  109. ANKI_VK_LOGV("Memory type %u is ReBAR", memTypeIdx);
  110. }
  111. // Choose different classes
  112. if(!isReBar)
  113. {
  114. iface.m_classInfos = kClasses;
  115. }
  116. else
  117. {
  118. iface.m_classInfos = kRebarClasses;
  119. }
  120. // The interface is initialized, init the builder
  121. m_callocs[memTypeIdx].init();
  122. }
  123. }
  124. void GpuMemoryManager::allocateMemory(U32 memTypeIdx, PtrSize size, U32 alignment, GpuMemoryHandle& handle)
  125. {
  126. ClassAllocator& calloc = m_callocs[memTypeIdx];
  127. alignment = max(alignment, m_bufferImageGranularity);
  128. GpuMemoryManagerChunk* chunk;
  129. PtrSize offset;
  130. [[maybe_unused]] const Error err = calloc.allocate(size, alignment, chunk, offset);
  131. handle.m_memory = chunk->m_handle;
  132. handle.m_offset = offset;
  133. handle.m_chunk = chunk;
  134. handle.m_memTypeIdx = U8(memTypeIdx);
  135. handle.m_size = size;
  136. }
  137. void GpuMemoryManager::allocateMemoryDedicated(U32 memTypeIdx, PtrSize size, VkImage image, GpuMemoryHandle& handle)
  138. {
  139. VkMemoryDedicatedAllocateInfoKHR dedicatedInfo = {};
  140. dedicatedInfo.sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO_KHR;
  141. dedicatedInfo.image = image;
  142. VkMemoryAllocateInfo memoryAllocateInfo = {};
  143. memoryAllocateInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
  144. memoryAllocateInfo.pNext = &dedicatedInfo;
  145. memoryAllocateInfo.allocationSize = size;
  146. memoryAllocateInfo.memoryTypeIndex = memTypeIdx;
  147. VkDeviceMemory mem;
  148. ANKI_VK_CHECKF(vkAllocateMemory(getVkDevice(), &memoryAllocateInfo, nullptr, &mem));
  149. handle.m_memory = mem;
  150. handle.m_offset = 0;
  151. handle.m_chunk = nullptr;
  152. handle.m_memTypeIdx = U8(memTypeIdx);
  153. handle.m_size = size;
  154. m_dedicatedAllocatedMemory.fetchAdd(size);
  155. m_dedicatedAllocationCount.fetchAdd(1);
  156. }
  157. void GpuMemoryManager::freeMemory(GpuMemoryHandle& handle)
  158. {
  159. ANKI_ASSERT(handle);
  160. if(handle.isDedicated())
  161. {
  162. vkFreeMemory(getVkDevice(), handle.m_memory, nullptr);
  163. [[maybe_unused]] const PtrSize prevSize = m_dedicatedAllocatedMemory.fetchSub(handle.m_size);
  164. ANKI_ASSERT(prevSize >= handle.m_size);
  165. [[maybe_unused]] const U32 count = m_dedicatedAllocationCount.fetchSub(1);
  166. ANKI_ASSERT(count > 0);
  167. }
  168. else
  169. {
  170. ClassAllocator& calloc = m_callocs[handle.m_memTypeIdx];
  171. calloc.free(handle.m_chunk, handle.m_offset);
  172. }
  173. handle = {};
  174. }
  175. void* GpuMemoryManager::getMappedAddress(GpuMemoryHandle& handle)
  176. {
  177. ANKI_ASSERT(handle);
  178. ANKI_ASSERT(!handle.isDedicated());
  179. LockGuard<SpinLock> lock(handle.m_chunk->m_mappedAddressMtx);
  180. if(handle.m_chunk->m_mappedAddress == nullptr)
  181. {
  182. ANKI_VK_CHECKF(vkMapMemory(getVkDevice(), handle.m_chunk->m_handle, 0, handle.m_chunk->m_size, 0, &handle.m_chunk->m_mappedAddress));
  183. }
  184. return static_cast<void*>(static_cast<U8*>(handle.m_chunk->m_mappedAddress) + handle.m_offset);
  185. }
  186. U32 GpuMemoryManager::findMemoryType(U32 resourceMemTypeBits, VkMemoryPropertyFlags preferFlags, VkMemoryPropertyFlags avoidFlags) const
  187. {
  188. U32 prefered = kMaxU32;
  189. // Iterate all mem types
  190. for(U32 i = 0; i < m_memoryProperties.memoryTypeCount; i++)
  191. {
  192. if(resourceMemTypeBits & (1u << i))
  193. {
  194. const VkMemoryPropertyFlags flags = m_memoryProperties.memoryTypes[i].propertyFlags;
  195. if((flags & preferFlags) == preferFlags && (flags & avoidFlags) == 0)
  196. {
  197. // It's the candidate we want
  198. if(prefered == kMaxU32)
  199. {
  200. prefered = i;
  201. }
  202. else
  203. {
  204. // On some Intel drivers there are identical memory types pointing to different heaps. Choose the biggest heap
  205. const PtrSize crntHeapSize = m_memoryProperties.memoryHeaps[m_memoryProperties.memoryTypes[i].heapIndex].size;
  206. const PtrSize prevHeapSize = m_memoryProperties.memoryHeaps[m_memoryProperties.memoryTypes[prefered].heapIndex].size;
  207. if(crntHeapSize > prevHeapSize)
  208. {
  209. prefered = i;
  210. }
  211. }
  212. }
  213. }
  214. }
  215. return prefered;
  216. }
  217. void GpuMemoryManager::updateStats() const
  218. {
  219. g_svarDeviceMemoryAllocated.set(0);
  220. g_svarDeviceMemoryAllocationCount.set(0);
  221. g_svarDeviceMemoryInUse.set(0);
  222. g_svarHostMemoryAllocated.set(0);
  223. g_svarHostMemoryAllocationCount.set(0);
  224. g_svarHostMemoryInUse.set(0);
  225. for(U32 memTypeIdx = 0; memTypeIdx < m_callocs.getSize(); ++memTypeIdx)
  226. {
  227. const GpuMemoryManagerInterface& iface = m_callocs[memTypeIdx].getInterface();
  228. ClassAllocatorBuilderStats cstats;
  229. m_callocs[memTypeIdx].getStats(cstats);
  230. if(iface.m_isDeviceMemory)
  231. {
  232. g_svarDeviceMemoryAllocated.increment(cstats.m_allocatedSize);
  233. g_svarDeviceMemoryInUse.increment(cstats.m_inUseSize);
  234. g_svarDeviceMemoryAllocationCount.increment(cstats.m_chunkCount);
  235. }
  236. else
  237. {
  238. g_svarHostMemoryAllocated.increment(cstats.m_allocatedSize);
  239. g_svarHostMemoryInUse.increment(cstats.m_inUseSize);
  240. g_svarHostMemoryAllocationCount.increment(cstats.m_chunkCount);
  241. }
  242. }
  243. // Add dedicated stats
  244. const PtrSize dedicatedAllocatedMemory = m_dedicatedAllocatedMemory.load();
  245. g_svarDeviceMemoryAllocated.increment(dedicatedAllocatedMemory);
  246. g_svarDeviceMemoryInUse.increment(dedicatedAllocatedMemory);
  247. g_svarDeviceMemoryAllocationCount.increment(m_dedicatedAllocationCount.load());
  248. }
  249. void GpuMemoryManager::getImageMemoryRequirements(VkImage image, VkMemoryDedicatedRequirementsKHR& dedicatedRequirements,
  250. VkMemoryRequirements2& requirements)
  251. {
  252. dedicatedRequirements = {};
  253. dedicatedRequirements.sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS_KHR;
  254. requirements = {};
  255. requirements.sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2;
  256. requirements.pNext = &dedicatedRequirements;
  257. VkImageMemoryRequirementsInfo2 imageRequirements = {};
  258. imageRequirements.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2;
  259. imageRequirements.image = image;
  260. vkGetImageMemoryRequirements2(getVkDevice(), &imageRequirements, &requirements);
  261. if(requirements.memoryRequirements.size > kClasses.getBack().m_chunkSize)
  262. {
  263. // Allocation to big, force a dedicated allocation
  264. dedicatedRequirements.prefersDedicatedAllocation = true;
  265. dedicatedRequirements.requiresDedicatedAllocation = true;
  266. }
  267. }
  268. } // end namespace anki