2
0

GpuMemoryManager.cpp 9.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371
  1. // Copyright (C) 2009-2021, Panagiotis Christopoulos Charitos and contributors.
  2. // All rights reserved.
  3. // Code licensed under the BSD License.
  4. // http://www.anki3d.org/LICENSE
  5. #include <AnKi/Gr/Vulkan/GpuMemoryManager.h>
  6. #include <AnKi/Util/List.h>
  7. namespace anki
  8. {
  9. class ClassInf
  10. {
  11. public:
  12. PtrSize m_slotSize;
  13. PtrSize m_chunkSize;
  14. };
  15. static constexpr Array<ClassInf, 7> CLASSES{{{256_B, 16_KB},
  16. {4_KB, 256_KB},
  17. {128_KB, 8_MB},
  18. {1_MB, 64_MB},
  19. {16_MB, 128_MB},
  20. {64_MB, 256_MB},
  21. {128_MB, 256_MB}}};
  22. /// Special classes for the ReBAR memory. Have that as a special case because it's so limited and needs special care.
  23. static constexpr Array<ClassInf, 3> REBAR_CLASSES{{{1_MB, 1_MB}, {12_MB, 12_MB}, {24_MB, 24_MB}}};
  24. class GpuMemoryManager::Memory final :
  25. public ClassGpuAllocatorMemory,
  26. public IntrusiveListEnabled<GpuMemoryManager::Memory>
  27. {
  28. public:
  29. VkDeviceMemory m_handle = VK_NULL_HANDLE;
  30. void* m_mappedAddress = nullptr;
  31. SpinLock m_mtx;
  32. U8 m_classIdx = MAX_U8;
  33. };
  34. class GpuMemoryManager::Interface final : public ClassGpuAllocatorInterface
  35. {
  36. public:
  37. GrAllocator<U8> m_alloc;
  38. Array<IntrusiveList<Memory>, CLASSES.getSize()> m_vacantMemory;
  39. Array<ClassInf, CLASSES.getSize()> m_classes = {};
  40. U8 m_classCount = 0;
  41. Mutex m_mtx;
  42. VkDevice m_dev = VK_NULL_HANDLE;
  43. U8 m_memTypeIdx = MAX_U8;
  44. Bool m_exposesBufferGpuAddress = false;
  45. Error allocate(U32 classIdx, ClassGpuAllocatorMemory*& cmem) override
  46. {
  47. ANKI_ASSERT(classIdx < m_classCount);
  48. Memory* mem;
  49. LockGuard<Mutex> lock(m_mtx);
  50. if(!m_vacantMemory[classIdx].isEmpty())
  51. {
  52. // Recycle
  53. mem = &m_vacantMemory[classIdx].getFront();
  54. m_vacantMemory[classIdx].popFront();
  55. }
  56. else
  57. {
  58. // Create new
  59. VkMemoryAllocateInfo ci = {};
  60. ci.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
  61. ci.allocationSize = m_classes[classIdx].m_chunkSize;
  62. ci.memoryTypeIndex = m_memTypeIdx;
  63. VkMemoryAllocateFlagsInfo flags = {};
  64. flags.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO;
  65. flags.flags = VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT;
  66. if(m_exposesBufferGpuAddress)
  67. {
  68. ci.pNext = &flags;
  69. }
  70. VkDeviceMemory memHandle;
  71. if(ANKI_UNLIKELY(vkAllocateMemory(m_dev, &ci, nullptr, &memHandle) < 0))
  72. {
  73. ANKI_VK_LOGF("Out of GPU memory. Mem type index %u, size %zu", m_memTypeIdx,
  74. m_classes[classIdx].m_chunkSize);
  75. }
  76. mem = m_alloc.newInstance<Memory>();
  77. mem->m_handle = memHandle;
  78. mem->m_classIdx = U8(classIdx);
  79. }
  80. ANKI_ASSERT(mem);
  81. ANKI_ASSERT(mem->m_handle);
  82. ANKI_ASSERT(mem->m_classIdx == classIdx);
  83. ANKI_ASSERT(mem->m_mappedAddress == nullptr);
  84. cmem = mem;
  85. return Error::NONE;
  86. }
  87. void free(ClassGpuAllocatorMemory* cmem) override
  88. {
  89. ANKI_ASSERT(cmem);
  90. Memory* mem = static_cast<Memory*>(cmem);
  91. ANKI_ASSERT(mem->m_handle);
  92. LockGuard<Mutex> lock(m_mtx);
  93. m_vacantMemory[mem->m_classIdx].pushBack(mem);
  94. // Unmap
  95. if(mem->m_mappedAddress)
  96. {
  97. vkUnmapMemory(m_dev, mem->m_handle);
  98. mem->m_mappedAddress = nullptr;
  99. }
  100. }
  101. U32 getClassCount() const override
  102. {
  103. return m_classCount;
  104. }
  105. void getClassInfo(U32 classIdx, PtrSize& slotSize, PtrSize& chunkSize) const override
  106. {
  107. ANKI_ASSERT(classIdx < m_classCount);
  108. slotSize = m_classes[classIdx].m_slotSize;
  109. chunkSize = m_classes[classIdx].m_chunkSize;
  110. }
  111. void collectGarbage()
  112. {
  113. LockGuard<Mutex> lock(m_mtx);
  114. for(U classIdx = 0; classIdx < m_classCount; ++classIdx)
  115. {
  116. while(!m_vacantMemory[classIdx].isEmpty())
  117. {
  118. Memory* mem = &m_vacantMemory[classIdx].getFront();
  119. m_vacantMemory[classIdx].popFront();
  120. if(mem->m_mappedAddress)
  121. {
  122. vkUnmapMemory(m_dev, mem->m_handle);
  123. }
  124. vkFreeMemory(m_dev, mem->m_handle, nullptr);
  125. m_alloc.deleteInstance(mem);
  126. }
  127. }
  128. }
  129. // Map memory
  130. void* mapMemory(ClassGpuAllocatorMemory* cmem)
  131. {
  132. ANKI_ASSERT(cmem);
  133. Memory* mem = static_cast<Memory*>(cmem);
  134. void* out;
  135. LockGuard<SpinLock> lock(mem->m_mtx);
  136. if(mem->m_mappedAddress)
  137. {
  138. out = mem->m_mappedAddress;
  139. }
  140. else
  141. {
  142. ANKI_VK_CHECKF(vkMapMemory(m_dev, mem->m_handle, 0, m_classes[mem->m_classIdx].m_chunkSize, 0, &out));
  143. mem->m_mappedAddress = out;
  144. }
  145. ANKI_ASSERT(out);
  146. return out;
  147. }
  148. };
  149. class GpuMemoryManager::ClassAllocator : public ClassGpuAllocator
  150. {
  151. public:
  152. Bool m_isDeviceMemory;
  153. };
  154. GpuMemoryManager::~GpuMemoryManager()
  155. {
  156. }
  157. void GpuMemoryManager::destroy()
  158. {
  159. for(U32 i = 0; i < m_ifaces.getSize(); ++i)
  160. {
  161. for(U32 j = 0; j < 2; j++)
  162. {
  163. m_ifaces[i][j].collectGarbage();
  164. }
  165. }
  166. m_ifaces.destroy(m_alloc);
  167. m_callocs.destroy(m_alloc);
  168. }
  169. void GpuMemoryManager::init(VkPhysicalDevice pdev, VkDevice dev, GrAllocator<U8> alloc, Bool exposeBufferGpuAddress)
  170. {
  171. ANKI_ASSERT(pdev);
  172. ANKI_ASSERT(dev);
  173. // Print some info
  174. ANKI_VK_LOGI("Initializing memory manager");
  175. for(const ClassInf& c : CLASSES)
  176. {
  177. ANKI_VK_LOGI("\tGPU mem class. Chunk size: %lu, slotSize: %lu, allocsPerChunk %lu", c.m_chunkSize, c.m_slotSize,
  178. c.m_chunkSize / c.m_slotSize);
  179. }
  180. vkGetPhysicalDeviceMemoryProperties(pdev, &m_memoryProperties);
  181. m_alloc = alloc;
  182. m_ifaces.create(alloc, m_memoryProperties.memoryTypeCount);
  183. for(U32 memTypeIdx = 0; memTypeIdx < m_ifaces.getSize(); ++memTypeIdx)
  184. {
  185. for(U32 linear = 0; linear < 2; ++linear)
  186. {
  187. Interface& iface = m_ifaces[memTypeIdx][linear];
  188. iface.m_alloc = alloc;
  189. iface.m_dev = dev;
  190. iface.m_memTypeIdx = U8(memTypeIdx);
  191. iface.m_exposesBufferGpuAddress = (linear == 1) && exposeBufferGpuAddress;
  192. // Find if it's ReBAR
  193. const VkMemoryPropertyFlags props = m_memoryProperties.memoryTypes[memTypeIdx].propertyFlags;
  194. const VkMemoryPropertyFlags reBarProps = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT
  195. | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT
  196. | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
  197. const PtrSize heapSize =
  198. m_memoryProperties.memoryHeaps[m_memoryProperties.memoryTypes[memTypeIdx].heapIndex].size;
  199. const Bool isReBar = props == reBarProps && heapSize <= 256_MB;
  200. if(isReBar)
  201. {
  202. ANKI_VK_LOGI("Memory type %u is ReBAR", memTypeIdx);
  203. }
  204. // Choose different classes
  205. if(!isReBar)
  206. {
  207. iface.m_classCount = CLASSES.getSize();
  208. iface.m_classes = CLASSES;
  209. }
  210. else
  211. {
  212. iface.m_classCount = REBAR_CLASSES.getSize();
  213. memcpy(&iface.m_classes[0], &REBAR_CLASSES[0], REBAR_CLASSES.getSizeInBytes());
  214. }
  215. }
  216. }
  217. // One allocator per linear/non-linear resources
  218. m_callocs.create(alloc, m_memoryProperties.memoryTypeCount);
  219. for(U32 memTypeIdx = 0; memTypeIdx < m_callocs.getSize(); ++memTypeIdx)
  220. {
  221. for(U32 linear = 0; linear < 2; ++linear)
  222. {
  223. m_callocs[memTypeIdx][linear].init(m_alloc, &m_ifaces[memTypeIdx][linear]);
  224. const U32 heapIdx = m_memoryProperties.memoryTypes[memTypeIdx].heapIndex;
  225. m_callocs[memTypeIdx][linear].m_isDeviceMemory =
  226. !!(m_memoryProperties.memoryHeaps[heapIdx].flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT);
  227. }
  228. }
  229. }
  230. void GpuMemoryManager::allocateMemory(U32 memTypeIdx, PtrSize size, U32 alignment, Bool linearResource,
  231. GpuMemoryHandle& handle)
  232. {
  233. ClassGpuAllocator& calloc = m_callocs[memTypeIdx][linearResource];
  234. const Error err = calloc.allocate(size, alignment, handle.m_classHandle);
  235. (void)err;
  236. handle.m_memory = static_cast<Memory*>(handle.m_classHandle.m_memory)->m_handle;
  237. handle.m_offset = handle.m_classHandle.m_offset;
  238. handle.m_linear = linearResource;
  239. handle.m_memTypeIdx = U8(memTypeIdx);
  240. }
  241. void GpuMemoryManager::freeMemory(GpuMemoryHandle& handle)
  242. {
  243. ANKI_ASSERT(handle);
  244. ClassGpuAllocator& calloc = m_callocs[handle.m_memTypeIdx][handle.m_linear];
  245. calloc.free(handle.m_classHandle);
  246. handle = {};
  247. }
  248. void* GpuMemoryManager::getMappedAddress(GpuMemoryHandle& handle)
  249. {
  250. ANKI_ASSERT(handle);
  251. Interface& iface = m_ifaces[handle.m_memTypeIdx][handle.m_linear];
  252. U8* out = static_cast<U8*>(iface.mapMemory(handle.m_classHandle.m_memory));
  253. return static_cast<void*>(out + handle.m_offset);
  254. }
  255. U32 GpuMemoryManager::findMemoryType(U32 resourceMemTypeBits, VkMemoryPropertyFlags preferFlags,
  256. VkMemoryPropertyFlags avoidFlags) const
  257. {
  258. U32 prefered = MAX_U32;
  259. // Iterate all mem types
  260. for(U32 i = 0; i < m_memoryProperties.memoryTypeCount; i++)
  261. {
  262. if(resourceMemTypeBits & (1u << i))
  263. {
  264. const VkMemoryPropertyFlags flags = m_memoryProperties.memoryTypes[i].propertyFlags;
  265. if((flags & preferFlags) == preferFlags && (flags & avoidFlags) == 0)
  266. {
  267. // It's the candidate we want
  268. if(prefered == MAX_U32)
  269. {
  270. prefered = i;
  271. }
  272. else
  273. {
  274. // On some Intel drivers there are identical memory types pointing to different heaps. Chose the
  275. // biggest heap
  276. const PtrSize crntHeapSize =
  277. m_memoryProperties.memoryHeaps[m_memoryProperties.memoryTypes[i].heapIndex].size;
  278. const PtrSize prevHeapSize =
  279. m_memoryProperties.memoryHeaps[m_memoryProperties.memoryTypes[prefered].heapIndex].size;
  280. if(crntHeapSize > prevHeapSize)
  281. {
  282. prefered = i;
  283. }
  284. }
  285. }
  286. }
  287. }
  288. return prefered;
  289. }
  290. void GpuMemoryManager::getAllocatedMemory(PtrSize& gpuMemory, PtrSize& cpuMemory) const
  291. {
  292. gpuMemory = 0;
  293. cpuMemory = 0;
  294. for(U32 memTypeIdx = 0; memTypeIdx < m_callocs.getSize(); ++memTypeIdx)
  295. {
  296. for(U32 linear = 0; linear < 2; ++linear)
  297. {
  298. if(m_callocs[memTypeIdx][linear].m_isDeviceMemory)
  299. {
  300. gpuMemory += m_callocs[memTypeIdx][linear].getAllocatedMemory();
  301. }
  302. else
  303. {
  304. cpuMemory += m_callocs[memTypeIdx][linear].getAllocatedMemory();
  305. }
  306. }
  307. }
  308. }
  309. } // end namespace anki