tracy_rpmalloc.cpp 86 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495
  1. #ifdef TRACY_ENABLE
  2. /* rpmalloc.c - Memory allocator - Public Domain - 2016 Mattias Jansson
  3. *
  4. * This library provides a cross-platform lock free thread caching malloc implementation in C11.
  5. * The latest source code is always available at
  6. *
  7. * https://github.com/mjansson/rpmalloc
  8. *
  9. * This library is put in the public domain; you can redistribute it and/or modify it without any restrictions.
  10. *
  11. */
  12. #include "tracy_rpmalloc.hpp"
  13. /// Build time configurable limits
  14. #ifndef HEAP_ARRAY_SIZE
  15. //! Size of heap hashmap
  16. #define HEAP_ARRAY_SIZE 47
  17. #endif
  18. #ifndef ENABLE_THREAD_CACHE
  19. //! Enable per-thread cache
  20. #define ENABLE_THREAD_CACHE 1
  21. #endif
  22. #ifndef ENABLE_GLOBAL_CACHE
  23. //! Enable global cache shared between all threads, requires thread cache
  24. #define ENABLE_GLOBAL_CACHE 1
  25. #endif
  26. #ifndef ENABLE_VALIDATE_ARGS
  27. //! Enable validation of args to public entry points
  28. #define ENABLE_VALIDATE_ARGS 0
  29. #endif
  30. #ifndef ENABLE_STATISTICS
  31. //! Enable statistics collection
  32. #define ENABLE_STATISTICS 0
  33. #endif
  34. #ifndef ENABLE_ASSERTS
  35. //! Enable asserts
  36. #define ENABLE_ASSERTS 0
  37. #endif
  38. #ifndef ENABLE_OVERRIDE
  39. //! Override standard library malloc/free and new/delete entry points
  40. #define ENABLE_OVERRIDE 0
  41. #endif
  42. #ifndef ENABLE_PRELOAD
  43. //! Support preloading
  44. #define ENABLE_PRELOAD 0
  45. #endif
  46. #ifndef DISABLE_UNMAP
  47. //! Disable unmapping memory pages
  48. #define DISABLE_UNMAP 0
  49. #endif
  50. #ifndef DEFAULT_SPAN_MAP_COUNT
  51. //! Default number of spans to map in call to map more virtual memory (default values yield 4MiB here)
  52. #define DEFAULT_SPAN_MAP_COUNT 64
  53. #endif
  54. #if ENABLE_THREAD_CACHE
  55. #ifndef ENABLE_UNLIMITED_CACHE
  56. //! Unlimited thread and global cache
  57. #define ENABLE_UNLIMITED_CACHE 0
  58. #endif
  59. #ifndef ENABLE_UNLIMITED_THREAD_CACHE
  60. //! Unlimited cache disables any thread cache limitations
  61. #define ENABLE_UNLIMITED_THREAD_CACHE ENABLE_UNLIMITED_CACHE
  62. #endif
  63. #if !ENABLE_UNLIMITED_THREAD_CACHE
  64. #ifndef THREAD_CACHE_MULTIPLIER
  65. //! Multiplier for thread cache (cache limit will be span release count multiplied by this value)
  66. #define THREAD_CACHE_MULTIPLIER 16
  67. #endif
  68. #ifndef ENABLE_ADAPTIVE_THREAD_CACHE
  69. //! Enable adaptive size of per-thread cache (still bounded by THREAD_CACHE_MULTIPLIER hard limit)
  70. #define ENABLE_ADAPTIVE_THREAD_CACHE 0
  71. #endif
  72. #endif
  73. #endif
  74. #if ENABLE_GLOBAL_CACHE && ENABLE_THREAD_CACHE
  75. #ifndef ENABLE_UNLIMITED_GLOBAL_CACHE
  76. //! Unlimited cache disables any global cache limitations
  77. #define ENABLE_UNLIMITED_GLOBAL_CACHE ENABLE_UNLIMITED_CACHE
  78. #endif
  79. #if !ENABLE_UNLIMITED_GLOBAL_CACHE
  80. //! Multiplier for global cache (cache limit will be span release count multiplied by this value)
  81. #define GLOBAL_CACHE_MULTIPLIER (THREAD_CACHE_MULTIPLIER * 6)
  82. #endif
  83. #else
  84. # undef ENABLE_GLOBAL_CACHE
  85. # define ENABLE_GLOBAL_CACHE 0
  86. #endif
  87. #if !ENABLE_THREAD_CACHE || ENABLE_UNLIMITED_THREAD_CACHE
  88. # undef ENABLE_ADAPTIVE_THREAD_CACHE
  89. # define ENABLE_ADAPTIVE_THREAD_CACHE 0
  90. #endif
  91. #if DISABLE_UNMAP && !ENABLE_GLOBAL_CACHE
  92. # error Must use global cache if unmap is disabled
  93. #endif
  94. #if defined( _WIN32 ) || defined( __WIN32__ ) || defined( _WIN64 )
  95. # define PLATFORM_WINDOWS 1
  96. # define PLATFORM_POSIX 0
  97. #else
  98. # define PLATFORM_WINDOWS 0
  99. # define PLATFORM_POSIX 1
  100. #endif
  101. #define _Static_assert static_assert
  102. /// Platform and arch specifics
  103. #ifndef FORCEINLINE
  104. # if defined(_MSC_VER) && !defined(__clang__)
  105. # define FORCEINLINE inline __forceinline
  106. # else
  107. # define FORCEINLINE inline __attribute__((__always_inline__))
  108. # endif
  109. #endif
  110. #if PLATFORM_WINDOWS
  111. # ifndef WIN32_LEAN_AND_MEAN
  112. # define WIN32_LEAN_AND_MEAN
  113. # endif
  114. # include <windows.h>
  115. # if ENABLE_VALIDATE_ARGS
  116. # include <Intsafe.h>
  117. # endif
  118. #else
  119. # include <unistd.h>
  120. # include <stdio.h>
  121. # include <stdlib.h>
  122. # if defined(__APPLE__)
  123. # include <mach/mach_vm.h>
  124. # include <mach/vm_statistics.h>
  125. # include <pthread.h>
  126. # endif
  127. # if defined(__HAIKU__)
  128. # include <OS.h>
  129. # include <pthread.h>
  130. # endif
  131. #endif
  132. #include <stdint.h>
  133. #include <string.h>
  134. #if ENABLE_ASSERTS
  135. # undef NDEBUG
  136. # if defined(_MSC_VER) && !defined(_DEBUG)
  137. # define _DEBUG
  138. # endif
  139. # include <assert.h>
  140. #else
  141. # undef assert
  142. # define assert(x) do {} while(0)
  143. #endif
  144. #if ENABLE_STATISTICS
  145. # include <stdio.h>
  146. #endif
  147. #include <atomic>
  148. namespace tracy
  149. {
  150. typedef std::atomic<int32_t> atomic32_t;
  151. typedef std::atomic<int64_t> atomic64_t;
  152. typedef std::atomic<void*> atomicptr_t;
  153. #define atomic_thread_fence_acquire() std::atomic_thread_fence(std::memory_order_acquire)
  154. #define atomic_thread_fence_release() std::atomic_thread_fence(std::memory_order_release)
  155. static FORCEINLINE int32_t atomic_load32(atomic32_t* src) { return std::atomic_load_explicit(src, std::memory_order_relaxed); }
  156. static FORCEINLINE void atomic_store32(atomic32_t* dst, int32_t val) { std::atomic_store_explicit(dst, val, std::memory_order_relaxed); }
  157. static FORCEINLINE int32_t atomic_incr32(atomic32_t* val) { return std::atomic_fetch_add_explicit(val, 1, std::memory_order_relaxed) + 1; }
  158. #if ENABLE_STATISTICS || ENABLE_ADAPTIVE_THREAD_CACHE
  159. static FORCEINLINE int32_t atomic_decr32(atomic32_t* val) { return atomic_fetch_add_explicit(val, -1, memory_order_relaxed) - 1; }
  160. #endif
  161. static FORCEINLINE int32_t atomic_add32(atomic32_t* val, int32_t add) { return std::atomic_fetch_add_explicit(val, add, std::memory_order_relaxed) + add; }
  162. static FORCEINLINE void* atomic_load_ptr(atomicptr_t* src) { return std::atomic_load_explicit(src, std::memory_order_relaxed); }
  163. static FORCEINLINE void atomic_store_ptr(atomicptr_t* dst, void* val) { std::atomic_store_explicit(dst, val, std::memory_order_relaxed); }
  164. static FORCEINLINE int atomic_cas_ptr(atomicptr_t* dst, void* val, void* ref) { return std::atomic_compare_exchange_weak_explicit(dst, &ref, val, std::memory_order_release, std::memory_order_acquire); }
  165. #if defined(_MSC_VER) && !defined(__clang__)
  166. # define EXPECTED(x) (x)
  167. # define UNEXPECTED(x) (x)
  168. #else
  169. # define EXPECTED(x) __builtin_expect((x), 1)
  170. # define UNEXPECTED(x) __builtin_expect((x), 0)
  171. #endif
  172. /// Preconfigured limits and sizes
  173. //! Granularity of a small allocation block
  174. #define SMALL_GRANULARITY 16
  175. //! Small granularity shift count
  176. #define SMALL_GRANULARITY_SHIFT 4
  177. //! Number of small block size classes
  178. #define SMALL_CLASS_COUNT 65
  179. //! Maximum size of a small block
  180. #define SMALL_SIZE_LIMIT (SMALL_GRANULARITY * (SMALL_CLASS_COUNT - 1))
  181. //! Granularity of a medium allocation block
  182. #define MEDIUM_GRANULARITY 512
  183. //! Medium granularity shift count
  184. #define MEDIUM_GRANULARITY_SHIFT 9
  185. //! Number of medium block size classes
  186. #define MEDIUM_CLASS_COUNT 61
  187. //! Total number of small + medium size classes
  188. #define SIZE_CLASS_COUNT (SMALL_CLASS_COUNT + MEDIUM_CLASS_COUNT)
  189. //! Number of large block size classes
  190. #define LARGE_CLASS_COUNT 32
  191. //! Maximum size of a medium block
  192. #define MEDIUM_SIZE_LIMIT (SMALL_SIZE_LIMIT + (MEDIUM_GRANULARITY * MEDIUM_CLASS_COUNT))
  193. //! Maximum size of a large block
  194. #define LARGE_SIZE_LIMIT ((LARGE_CLASS_COUNT * _memory_span_size) - SPAN_HEADER_SIZE)
  195. //! Size of a span header (must be a multiple of SMALL_GRANULARITY)
  196. #define SPAN_HEADER_SIZE 96
  197. #if ENABLE_VALIDATE_ARGS
  198. //! Maximum allocation size to avoid integer overflow
  199. #undef MAX_ALLOC_SIZE
  200. #define MAX_ALLOC_SIZE (((size_t)-1) - _memory_span_size)
  201. #endif
  202. #define pointer_offset(ptr, ofs) (void*)((char*)(ptr) + (ptrdiff_t)(ofs))
  203. #define pointer_diff(first, second) (ptrdiff_t)((const char*)(first) - (const char*)(second))
  204. #define INVALID_POINTER ((void*)((uintptr_t)-1))
  205. /// Data types
  206. //! A memory heap, per thread
  207. typedef struct heap_t heap_t;
  208. //! Heap spans per size class
  209. typedef struct heap_class_t heap_class_t;
  210. //! Span of memory pages
  211. typedef struct span_t span_t;
  212. //! Span list
  213. typedef struct span_list_t span_list_t;
  214. //! Span active data
  215. typedef struct span_active_t span_active_t;
  216. //! Size class definition
  217. typedef struct size_class_t size_class_t;
  218. //! Global cache
  219. typedef struct global_cache_t global_cache_t;
  220. //! Flag indicating span is the first (master) span of a split superspan
  221. #define SPAN_FLAG_MASTER 1U
  222. //! Flag indicating span is a secondary (sub) span of a split superspan
  223. #define SPAN_FLAG_SUBSPAN 2U
  224. //! Flag indicating span has blocks with increased alignment
  225. #define SPAN_FLAG_ALIGNED_BLOCKS 4U
  226. #if ENABLE_ADAPTIVE_THREAD_CACHE || ENABLE_STATISTICS
  227. struct span_use_t {
  228. //! Current number of spans used (actually used, not in cache)
  229. atomic32_t current;
  230. //! High water mark of spans used
  231. uint32_t high;
  232. #if ENABLE_STATISTICS
  233. //! Number of spans transitioned to global cache
  234. uint32_t spans_to_global;
  235. //! Number of spans transitioned from global cache
  236. uint32_t spans_from_global;
  237. //! Number of spans transitioned to thread cache
  238. uint32_t spans_to_cache;
  239. //! Number of spans transitioned from thread cache
  240. uint32_t spans_from_cache;
  241. //! Number of spans transitioned to reserved state
  242. uint32_t spans_to_reserved;
  243. //! Number of spans transitioned from reserved state
  244. uint32_t spans_from_reserved;
  245. //! Number of raw memory map calls
  246. uint32_t spans_map_calls;
  247. #endif
  248. };
  249. typedef struct span_use_t span_use_t;
  250. #endif
  251. #if ENABLE_STATISTICS
  252. struct size_class_use_t {
  253. //! Current number of allocations
  254. atomic32_t alloc_current;
  255. //! Peak number of allocations
  256. int32_t alloc_peak;
  257. //! Total number of allocations
  258. int32_t alloc_total;
  259. //! Total number of frees
  260. atomic32_t free_total;
  261. //! Number of spans in use
  262. uint32_t spans_current;
  263. //! Number of spans transitioned to cache
  264. uint32_t spans_peak;
  265. //! Number of spans transitioned to cache
  266. uint32_t spans_to_cache;
  267. //! Number of spans transitioned from cache
  268. uint32_t spans_from_cache;
  269. //! Number of spans transitioned from reserved state
  270. uint32_t spans_from_reserved;
  271. //! Number of spans mapped
  272. uint32_t spans_map_calls;
  273. };
  274. typedef struct size_class_use_t size_class_use_t;
  275. #endif
  276. typedef enum span_state_t {
  277. SPAN_STATE_ACTIVE = 0,
  278. SPAN_STATE_PARTIAL,
  279. SPAN_STATE_FULL
  280. } span_state_t;
  281. //A span can either represent a single span of memory pages with size declared by span_map_count configuration variable,
  282. //or a set of spans in a continuous region, a super span. Any reference to the term "span" usually refers to both a single
  283. //span or a super span. A super span can further be divided into multiple spans (or this, super spans), where the first
  284. //(super)span is the master and subsequent (super)spans are subspans. The master span keeps track of how many subspans
  285. //that are still alive and mapped in virtual memory, and once all subspans and master have been unmapped the entire
  286. //superspan region is released and unmapped (on Windows for example, the entire superspan range has to be released
  287. //in the same call to release the virtual memory range, but individual subranges can be decommitted individually
  288. //to reduce physical memory use).
  289. struct span_t {
  290. //! Free list
  291. void* free_list;
  292. //! State
  293. uint32_t state;
  294. //! Used count when not active (not including deferred free list)
  295. uint32_t used_count;
  296. //! Block count
  297. uint32_t block_count;
  298. //! Size class
  299. uint32_t size_class;
  300. //! Index of last block initialized in free list
  301. uint32_t free_list_limit;
  302. //! Span list size when part of a cache list, or size of deferred free list when partial/full
  303. uint32_t list_size;
  304. //! Deferred free list
  305. atomicptr_t free_list_deferred;
  306. //! Size of a block
  307. uint32_t block_size;
  308. //! Flags and counters
  309. uint32_t flags;
  310. //! Number of spans
  311. uint32_t span_count;
  312. //! Total span counter for master spans, distance for subspans
  313. uint32_t total_spans_or_distance;
  314. //! Remaining span counter, for master spans
  315. atomic32_t remaining_spans;
  316. //! Alignment offset
  317. uint32_t align_offset;
  318. //! Owning heap
  319. heap_t* heap;
  320. //! Next span
  321. span_t* next;
  322. //! Previous span
  323. span_t* prev;
  324. };
  325. _Static_assert(sizeof(span_t) <= SPAN_HEADER_SIZE, "span size mismatch");
  326. struct heap_class_t {
  327. //! Free list of active span
  328. void* free_list;
  329. //! Double linked list of partially used spans with free blocks for each size class.
  330. // Current active span is at head of list. Previous span pointer in head points to tail span of list.
  331. span_t* partial_span;
  332. };
  333. struct heap_t {
  334. //! Active and semi-used span data per size class
  335. heap_class_t span_class[SIZE_CLASS_COUNT];
  336. #if ENABLE_THREAD_CACHE
  337. //! List of free spans (single linked list)
  338. span_t* span_cache[LARGE_CLASS_COUNT];
  339. //! List of deferred free spans of class 0 (single linked list)
  340. atomicptr_t span_cache_deferred;
  341. #endif
  342. #if ENABLE_ADAPTIVE_THREAD_CACHE || ENABLE_STATISTICS
  343. //! Current and high water mark of spans used per span count
  344. span_use_t span_use[LARGE_CLASS_COUNT];
  345. #endif
  346. //! Mapped but unused spans
  347. span_t* span_reserve;
  348. //! Master span for mapped but unused spans
  349. span_t* span_reserve_master;
  350. //! Number of mapped but unused spans
  351. size_t spans_reserved;
  352. //! Next heap in id list
  353. heap_t* next_heap;
  354. //! Next heap in orphan list
  355. heap_t* next_orphan;
  356. //! Memory pages alignment offset
  357. size_t align_offset;
  358. //! Heap ID
  359. int32_t id;
  360. #if ENABLE_STATISTICS
  361. //! Number of bytes transitioned thread -> global
  362. size_t thread_to_global;
  363. //! Number of bytes transitioned global -> thread
  364. size_t global_to_thread;
  365. //! Allocation stats per size class
  366. size_class_use_t size_class_use[SIZE_CLASS_COUNT + 1];
  367. #endif
  368. };
  369. struct size_class_t {
  370. //! Size of blocks in this class
  371. uint32_t block_size;
  372. //! Number of blocks in each chunk
  373. uint16_t block_count;
  374. //! Class index this class is merged with
  375. uint16_t class_idx;
  376. };
  377. _Static_assert(sizeof(size_class_t) == 8, "Size class size mismatch");
  378. struct global_cache_t {
  379. //! Cache list pointer
  380. atomicptr_t cache;
  381. //! Cache size
  382. atomic32_t size;
  383. //! ABA counter
  384. atomic32_t counter;
  385. };
  386. /// Global data
  387. //! Initialized flag
  388. static int _rpmalloc_initialized;
  389. //! Configuration
  390. static rpmalloc_config_t _memory_config;
  391. //! Memory page size
  392. static size_t _memory_page_size;
  393. //! Shift to divide by page size
  394. static size_t _memory_page_size_shift;
  395. //! Granularity at which memory pages are mapped by OS
  396. static size_t _memory_map_granularity;
  397. #if RPMALLOC_CONFIGURABLE
  398. //! Size of a span of memory pages
  399. static size_t _memory_span_size;
  400. //! Shift to divide by span size
  401. static size_t _memory_span_size_shift;
  402. //! Mask to get to start of a memory span
  403. static uintptr_t _memory_span_mask;
  404. #else
  405. //! Hardwired span size (64KiB)
  406. #define _memory_span_size (64 * 1024)
  407. #define _memory_span_size_shift 16
  408. #define _memory_span_mask (~((uintptr_t)(_memory_span_size - 1)))
  409. #endif
  410. //! Number of spans to map in each map call
  411. static size_t _memory_span_map_count;
  412. //! Number of spans to release from thread cache to global cache (single spans)
  413. static size_t _memory_span_release_count;
  414. //! Number of spans to release from thread cache to global cache (large multiple spans)
  415. static size_t _memory_span_release_count_large;
  416. //! Global size classes
  417. static size_class_t _memory_size_class[SIZE_CLASS_COUNT];
  418. //! Run-time size limit of medium blocks
  419. static size_t _memory_medium_size_limit;
  420. //! Heap ID counter
  421. static atomic32_t _memory_heap_id;
  422. //! Huge page support
  423. static int _memory_huge_pages;
  424. #if ENABLE_GLOBAL_CACHE
  425. //! Global span cache
  426. static global_cache_t _memory_span_cache[LARGE_CLASS_COUNT];
  427. #endif
  428. //! All heaps
  429. static atomicptr_t _memory_heaps[HEAP_ARRAY_SIZE];
  430. //! Orphaned heaps
  431. static atomicptr_t _memory_orphan_heaps;
  432. //! Running orphan counter to avoid ABA issues in linked list
  433. static atomic32_t _memory_orphan_counter;
  434. #if ENABLE_STATISTICS
  435. //! Active heap count
  436. static atomic32_t _memory_active_heaps;
  437. //! Number of currently mapped memory pages
  438. static atomic32_t _mapped_pages;
  439. //! Peak number of concurrently mapped memory pages
  440. static int32_t _mapped_pages_peak;
  441. //! Number of currently unused spans
  442. static atomic32_t _reserved_spans;
  443. //! Running counter of total number of mapped memory pages since start
  444. static atomic32_t _mapped_total;
  445. //! Running counter of total number of unmapped memory pages since start
  446. static atomic32_t _unmapped_total;
  447. //! Number of currently mapped memory pages in OS calls
  448. static atomic32_t _mapped_pages_os;
  449. //! Number of currently allocated pages in huge allocations
  450. static atomic32_t _huge_pages_current;
  451. //! Peak number of currently allocated pages in huge allocations
  452. static int32_t _huge_pages_peak;
  453. #endif
  454. //! Current thread heap
  455. #if (defined(__APPLE__) || defined(__HAIKU__)) && ENABLE_PRELOAD
  456. static pthread_key_t _memory_thread_heap;
  457. #else
  458. # ifdef _MSC_VER
  459. # define _Thread_local __declspec(thread)
  460. # define TLS_MODEL
  461. # else
  462. # define TLS_MODEL __attribute__((tls_model("initial-exec")))
  463. # if !defined(__clang__) && defined(__GNUC__)
  464. # define _Thread_local __thread
  465. # endif
  466. # endif
  467. static _Thread_local heap_t* _memory_thread_heap TLS_MODEL;
  468. #endif
  469. static inline heap_t*
  470. get_thread_heap_raw(void) {
  471. #if (defined(__APPLE__) || defined(__HAIKU__)) && ENABLE_PRELOAD
  472. return pthread_getspecific(_memory_thread_heap);
  473. #else
  474. return _memory_thread_heap;
  475. #endif
  476. }
  477. //! Get the current thread heap
  478. static inline heap_t*
  479. get_thread_heap(void) {
  480. heap_t* heap = get_thread_heap_raw();
  481. #if ENABLE_PRELOAD
  482. if (EXPECTED(heap != 0))
  483. return heap;
  484. rpmalloc_initialize();
  485. return get_thread_heap_raw();
  486. #else
  487. return heap;
  488. #endif
  489. }
  490. //! Set the current thread heap
  491. static void
  492. set_thread_heap(heap_t* heap) {
  493. #if (defined(__APPLE__) || defined(__HAIKU__)) && ENABLE_PRELOAD
  494. pthread_setspecific(_memory_thread_heap, heap);
  495. #else
  496. _memory_thread_heap = heap;
  497. #endif
  498. }
  499. //! Default implementation to map more virtual memory
  500. static void*
  501. _memory_map_os(size_t size, size_t* offset);
  502. //! Default implementation to unmap virtual memory
  503. static void
  504. _memory_unmap_os(void* address, size_t size, size_t offset, size_t release);
  505. //! Lookup a memory heap from heap ID
  506. static heap_t*
  507. _memory_heap_lookup(int32_t id) {
  508. uint32_t list_idx = id % HEAP_ARRAY_SIZE;
  509. heap_t* heap = (heap_t*)atomic_load_ptr(&_memory_heaps[list_idx]);
  510. while (heap && (heap->id != id))
  511. heap = heap->next_heap;
  512. return heap;
  513. }
  514. #if ENABLE_STATISTICS
  515. # define _memory_statistics_inc(counter, value) counter += value
  516. # define _memory_statistics_dec(counter, value) counter -= value
  517. # define _memory_statistics_add(atomic_counter, value) atomic_add32(atomic_counter, (int32_t)(value))
  518. # define _memory_statistics_add_peak(atomic_counter, value, peak) do { int32_t _cur_count = atomic_add32(atomic_counter, (int32_t)(value)); if (_cur_count > (peak)) peak = _cur_count; } while (0)
  519. # define _memory_statistics_sub(atomic_counter, value) atomic_add32(atomic_counter, -(int32_t)(value))
  520. # define _memory_statistics_inc_alloc(heap, class_idx) do { \
  521. int32_t alloc_current = atomic_incr32(&heap->size_class_use[class_idx].alloc_current); \
  522. if (alloc_current > heap->size_class_use[class_idx].alloc_peak) \
  523. heap->size_class_use[class_idx].alloc_peak = alloc_current; \
  524. heap->size_class_use[class_idx].alloc_total++; \
  525. } while(0)
  526. # define _memory_statistics_inc_free(heap, class_idx) do { \
  527. atomic_decr32(&heap->size_class_use[class_idx].alloc_current); \
  528. atomic_incr32(&heap->size_class_use[class_idx].free_total); \
  529. } while(0)
  530. #else
  531. # define _memory_statistics_inc(counter, value) do {} while(0)
  532. # define _memory_statistics_dec(counter, value) do {} while(0)
  533. # define _memory_statistics_add(atomic_counter, value) do {} while(0)
  534. # define _memory_statistics_add_peak(atomic_counter, value, peak) do {} while (0)
  535. # define _memory_statistics_sub(atomic_counter, value) do {} while(0)
  536. # define _memory_statistics_inc_alloc(heap, class_idx) do {} while(0)
  537. # define _memory_statistics_inc_free(heap, class_idx) do {} while(0)
  538. #endif
  539. static void
  540. _memory_heap_cache_insert(heap_t* heap, span_t* span);
  541. //! Map more virtual memory
  542. static void*
  543. _memory_map(size_t size, size_t* offset) {
  544. assert(!(size % _memory_page_size));
  545. assert(size >= _memory_page_size);
  546. _memory_statistics_add_peak(&_mapped_pages, (size >> _memory_page_size_shift), _mapped_pages_peak);
  547. _memory_statistics_add(&_mapped_total, (size >> _memory_page_size_shift));
  548. return _memory_config.memory_map(size, offset);
  549. }
  550. //! Unmap virtual memory
  551. static void
  552. _memory_unmap(void* address, size_t size, size_t offset, size_t release) {
  553. assert(!release || (release >= size));
  554. assert(!release || (release >= _memory_page_size));
  555. if (release) {
  556. assert(!(release % _memory_page_size));
  557. _memory_statistics_sub(&_mapped_pages, (release >> _memory_page_size_shift));
  558. _memory_statistics_add(&_unmapped_total, (release >> _memory_page_size_shift));
  559. }
  560. _memory_config.memory_unmap(address, size, offset, release);
  561. }
  562. //! Declare the span to be a subspan and store distance from master span and span count
  563. static void
  564. _memory_span_mark_as_subspan_unless_master(span_t* master, span_t* subspan, size_t span_count) {
  565. assert((subspan != master) || (subspan->flags & SPAN_FLAG_MASTER));
  566. if (subspan != master) {
  567. subspan->flags = SPAN_FLAG_SUBSPAN;
  568. subspan->total_spans_or_distance = (uint32_t)((uintptr_t)pointer_diff(subspan, master) >> _memory_span_size_shift);
  569. subspan->align_offset = 0;
  570. }
  571. subspan->span_count = (uint32_t)span_count;
  572. }
  573. //! Use reserved spans to fulfill a memory map request (reserve size must be checked by caller)
  574. static span_t*
  575. _memory_map_from_reserve(heap_t* heap, size_t span_count) {
  576. //Update the heap span reserve
  577. span_t* span = heap->span_reserve;
  578. heap->span_reserve = (span_t*)pointer_offset(span, span_count * _memory_span_size);
  579. heap->spans_reserved -= span_count;
  580. _memory_span_mark_as_subspan_unless_master(heap->span_reserve_master, span, span_count);
  581. if (span_count <= LARGE_CLASS_COUNT)
  582. _memory_statistics_inc(heap->span_use[span_count - 1].spans_from_reserved, 1);
  583. return span;
  584. }
  585. //! Get the aligned number of spans to map in based on wanted count, configured mapping granularity and the page size
  586. static size_t
  587. _memory_map_align_span_count(size_t span_count) {
  588. size_t request_count = (span_count > _memory_span_map_count) ? span_count : _memory_span_map_count;
  589. if ((_memory_page_size > _memory_span_size) && ((request_count * _memory_span_size) % _memory_page_size))
  590. request_count += _memory_span_map_count - (request_count % _memory_span_map_count);
  591. return request_count;
  592. }
  593. //! Store the given spans as reserve in the given heap
  594. static void
  595. _memory_heap_set_reserved_spans(heap_t* heap, span_t* master, span_t* reserve, size_t reserve_span_count) {
  596. heap->span_reserve_master = master;
  597. heap->span_reserve = reserve;
  598. heap->spans_reserved = reserve_span_count;
  599. }
  600. //! Setup a newly mapped span
  601. static void
  602. _memory_span_initialize(span_t* span, size_t total_span_count, size_t span_count, size_t align_offset) {
  603. span->total_spans_or_distance = (uint32_t)total_span_count;
  604. span->span_count = (uint32_t)span_count;
  605. span->align_offset = (uint32_t)align_offset;
  606. span->flags = SPAN_FLAG_MASTER;
  607. atomic_store32(&span->remaining_spans, (int32_t)total_span_count);
  608. }
  609. //! Map a akigned set of spans, taking configured mapping granularity and the page size into account
  610. static span_t*
  611. _memory_map_aligned_span_count(heap_t* heap, size_t span_count) {
  612. //If we already have some, but not enough, reserved spans, release those to heap cache and map a new
  613. //full set of spans. Otherwise we would waste memory if page size > span size (huge pages)
  614. size_t aligned_span_count = _memory_map_align_span_count(span_count);
  615. size_t align_offset = 0;
  616. span_t* span = (span_t*)_memory_map(aligned_span_count * _memory_span_size, &align_offset);
  617. if (!span)
  618. return 0;
  619. _memory_span_initialize(span, aligned_span_count, span_count, align_offset);
  620. _memory_statistics_add(&_reserved_spans, aligned_span_count);
  621. if (span_count <= LARGE_CLASS_COUNT)
  622. _memory_statistics_inc(heap->span_use[span_count - 1].spans_map_calls, 1);
  623. if (aligned_span_count > span_count) {
  624. if (heap->spans_reserved) {
  625. _memory_span_mark_as_subspan_unless_master(heap->span_reserve_master, heap->span_reserve, heap->spans_reserved);
  626. _memory_heap_cache_insert(heap, heap->span_reserve);
  627. }
  628. _memory_heap_set_reserved_spans(heap, span, (span_t*)pointer_offset(span, span_count * _memory_span_size), aligned_span_count - span_count);
  629. }
  630. return span;
  631. }
  632. //! Map in memory pages for the given number of spans (or use previously reserved pages)
  633. static span_t*
  634. _memory_map_spans(heap_t* heap, size_t span_count) {
  635. if (span_count <= heap->spans_reserved)
  636. return _memory_map_from_reserve(heap, span_count);
  637. return _memory_map_aligned_span_count(heap, span_count);
  638. }
  639. //! Unmap memory pages for the given number of spans (or mark as unused if no partial unmappings)
  640. static void
  641. _memory_unmap_span(span_t* span) {
  642. assert((span->flags & SPAN_FLAG_MASTER) || (span->flags & SPAN_FLAG_SUBSPAN));
  643. assert(!(span->flags & SPAN_FLAG_MASTER) || !(span->flags & SPAN_FLAG_SUBSPAN));
  644. int is_master = !!(span->flags & SPAN_FLAG_MASTER);
  645. span_t* master = is_master ? span : (span_t*)(pointer_offset(span, -(int32_t)(span->total_spans_or_distance * _memory_span_size)));
  646. assert(is_master || (span->flags & SPAN_FLAG_SUBSPAN));
  647. assert(master->flags & SPAN_FLAG_MASTER);
  648. size_t span_count = span->span_count;
  649. if (!is_master) {
  650. //Directly unmap subspans (unless huge pages, in which case we defer and unmap entire page range with master)
  651. assert(span->align_offset == 0);
  652. if (_memory_span_size >= _memory_page_size) {
  653. _memory_unmap(span, span_count * _memory_span_size, 0, 0);
  654. _memory_statistics_sub(&_reserved_spans, span_count);
  655. }
  656. } else {
  657. //Special double flag to denote an unmapped master
  658. //It must be kept in memory since span header must be used
  659. span->flags |= SPAN_FLAG_MASTER | SPAN_FLAG_SUBSPAN;
  660. }
  661. if (atomic_add32(&master->remaining_spans, -(int32_t)span_count) <= 0) {
  662. //Everything unmapped, unmap the master span with release flag to unmap the entire range of the super span
  663. assert(!!(master->flags & SPAN_FLAG_MASTER) && !!(master->flags & SPAN_FLAG_SUBSPAN));
  664. size_t unmap_count = master->span_count;
  665. if (_memory_span_size < _memory_page_size)
  666. unmap_count = master->total_spans_or_distance;
  667. _memory_statistics_sub(&_reserved_spans, unmap_count);
  668. _memory_unmap(master, unmap_count * _memory_span_size, master->align_offset, master->total_spans_or_distance * _memory_span_size);
  669. }
  670. }
  671. #if ENABLE_THREAD_CACHE
  672. //! Unmap a single linked list of spans
  673. static void
  674. _memory_unmap_span_list(span_t* span) {
  675. size_t list_size = span->list_size;
  676. for (size_t ispan = 0; ispan < list_size; ++ispan) {
  677. span_t* next_span = span->next;
  678. _memory_unmap_span(span);
  679. span = next_span;
  680. }
  681. assert(!span);
  682. }
  683. //! Add span to head of single linked span list
  684. static size_t
  685. _memory_span_list_push(span_t** head, span_t* span) {
  686. span->next = *head;
  687. if (*head)
  688. span->list_size = (*head)->list_size + 1;
  689. else
  690. span->list_size = 1;
  691. *head = span;
  692. return span->list_size;
  693. }
  694. //! Remove span from head of single linked span list, returns the new list head
  695. static span_t*
  696. _memory_span_list_pop(span_t** head) {
  697. span_t* span = *head;
  698. span_t* next_span = 0;
  699. if (span->list_size > 1) {
  700. assert(span->next);
  701. next_span = span->next;
  702. assert(next_span);
  703. next_span->list_size = span->list_size - 1;
  704. }
  705. *head = next_span;
  706. return span;
  707. }
  708. //! Split a single linked span list
  709. static span_t*
  710. _memory_span_list_split(span_t* span, size_t limit) {
  711. span_t* next = 0;
  712. if (limit < 2)
  713. limit = 2;
  714. if (span->list_size > limit) {
  715. uint32_t list_size = 1;
  716. span_t* last = span;
  717. next = span->next;
  718. while (list_size < limit) {
  719. last = next;
  720. next = next->next;
  721. ++list_size;
  722. }
  723. last->next = 0;
  724. assert(next);
  725. next->list_size = span->list_size - list_size;
  726. span->list_size = list_size;
  727. span->prev = 0;
  728. }
  729. return next;
  730. }
  731. #endif
  732. //! Add a span to partial span double linked list at the head
  733. static void
  734. _memory_span_partial_list_add(span_t** head, span_t* span) {
  735. if (*head) {
  736. span->next = *head;
  737. //Maintain pointer to tail span
  738. span->prev = (*head)->prev;
  739. (*head)->prev = span;
  740. } else {
  741. span->next = 0;
  742. span->prev = span;
  743. }
  744. *head = span;
  745. }
  746. //! Add a span to partial span double linked list at the tail
  747. static void
  748. _memory_span_partial_list_add_tail(span_t** head, span_t* span) {
  749. span->next = 0;
  750. if (*head) {
  751. span_t* tail = (*head)->prev;
  752. tail->next = span;
  753. span->prev = tail;
  754. //Maintain pointer to tail span
  755. (*head)->prev = span;
  756. } else {
  757. span->prev = span;
  758. *head = span;
  759. }
  760. }
  761. //! Pop head span from partial span double linked list
  762. static void
  763. _memory_span_partial_list_pop_head(span_t** head) {
  764. span_t* span = *head;
  765. *head = span->next;
  766. if (*head) {
  767. //Maintain pointer to tail span
  768. (*head)->prev = span->prev;
  769. }
  770. }
  771. //! Remove a span from partial span double linked list
  772. static void
  773. _memory_span_partial_list_remove(span_t** head, span_t* span) {
  774. if (UNEXPECTED(*head == span)) {
  775. _memory_span_partial_list_pop_head(head);
  776. } else {
  777. span_t* next_span = span->next;
  778. span_t* prev_span = span->prev;
  779. prev_span->next = next_span;
  780. if (EXPECTED(next_span != 0)) {
  781. next_span->prev = prev_span;
  782. } else {
  783. //Update pointer to tail span
  784. (*head)->prev = prev_span;
  785. }
  786. }
  787. }
  788. #if ENABLE_GLOBAL_CACHE
  789. //! Insert the given list of memory page spans in the global cache
  790. static void
  791. _memory_cache_insert(global_cache_t* cache, span_t* span, size_t cache_limit) {
  792. assert((span->list_size == 1) || (span->next != 0));
  793. int32_t list_size = (int32_t)span->list_size;
  794. //Unmap if cache has reached the limit
  795. if (atomic_add32(&cache->size, list_size) > (int32_t)cache_limit) {
  796. #if !ENABLE_UNLIMITED_GLOBAL_CACHE
  797. _memory_unmap_span_list(span);
  798. atomic_add32(&cache->size, -list_size);
  799. return;
  800. #endif
  801. }
  802. void* current_cache, *new_cache;
  803. do {
  804. current_cache = atomic_load_ptr(&cache->cache);
  805. span->prev = (span_t*)((uintptr_t)current_cache & _memory_span_mask);
  806. new_cache = (void*)((uintptr_t)span | ((uintptr_t)atomic_incr32(&cache->counter) & ~_memory_span_mask));
  807. } while (!atomic_cas_ptr(&cache->cache, new_cache, current_cache));
  808. }
  809. //! Extract a number of memory page spans from the global cache
  810. static span_t*
  811. _memory_cache_extract(global_cache_t* cache) {
  812. uintptr_t span_ptr;
  813. do {
  814. void* global_span = atomic_load_ptr(&cache->cache);
  815. span_ptr = (uintptr_t)global_span & _memory_span_mask;
  816. if (span_ptr) {
  817. span_t* span = (span_t*)span_ptr;
  818. //By accessing the span ptr before it is swapped out of list we assume that a contending thread
  819. //does not manage to traverse the span to being unmapped before we access it
  820. void* new_cache = (void*)((uintptr_t)span->prev | ((uintptr_t)atomic_incr32(&cache->counter) & ~_memory_span_mask));
  821. if (atomic_cas_ptr(&cache->cache, new_cache, global_span)) {
  822. atomic_add32(&cache->size, -(int32_t)span->list_size);
  823. return span;
  824. }
  825. }
  826. } while (span_ptr);
  827. return 0;
  828. }
  829. //! Finalize a global cache, only valid from allocator finalization (not thread safe)
  830. static void
  831. _memory_cache_finalize(global_cache_t* cache) {
  832. void* current_cache = atomic_load_ptr(&cache->cache);
  833. span_t* span = (span_t*)((uintptr_t)current_cache & _memory_span_mask);
  834. while (span) {
  835. span_t* skip_span = (span_t*)((uintptr_t)span->prev & _memory_span_mask);
  836. atomic_add32(&cache->size, -(int32_t)span->list_size);
  837. _memory_unmap_span_list(span);
  838. span = skip_span;
  839. }
  840. assert(!atomic_load32(&cache->size));
  841. atomic_store_ptr(&cache->cache, 0);
  842. atomic_store32(&cache->size, 0);
  843. }
  844. //! Insert the given list of memory page spans in the global cache
  845. static void
  846. _memory_global_cache_insert(span_t* span) {
  847. size_t span_count = span->span_count;
  848. #if ENABLE_UNLIMITED_GLOBAL_CACHE
  849. _memory_cache_insert(&_memory_span_cache[span_count - 1], span, 0);
  850. #else
  851. const size_t cache_limit = (GLOBAL_CACHE_MULTIPLIER * ((span_count == 1) ? _memory_span_release_count : _memory_span_release_count_large));
  852. _memory_cache_insert(&_memory_span_cache[span_count - 1], span, cache_limit);
  853. #endif
  854. }
  855. //! Extract a number of memory page spans from the global cache for large blocks
  856. static span_t*
  857. _memory_global_cache_extract(size_t span_count) {
  858. span_t* span = _memory_cache_extract(&_memory_span_cache[span_count - 1]);
  859. assert(!span || (span->span_count == span_count));
  860. return span;
  861. }
  862. #endif
  863. #if ENABLE_THREAD_CACHE
  864. //! Adopt the deferred span cache list
  865. static void
  866. _memory_heap_cache_adopt_deferred(heap_t* heap) {
  867. atomic_thread_fence_acquire();
  868. span_t* span = (span_t*)atomic_load_ptr(&heap->span_cache_deferred);
  869. if (!span)
  870. return;
  871. do {
  872. span = (span_t*)atomic_load_ptr(&heap->span_cache_deferred);
  873. } while (!atomic_cas_ptr(&heap->span_cache_deferred, 0, span));
  874. while (span) {
  875. span_t* next_span = span->next;
  876. _memory_span_list_push(&heap->span_cache[0], span);
  877. #if ENABLE_STATISTICS
  878. atomic_decr32(&heap->span_use[span->span_count - 1].current);
  879. ++heap->size_class_use[span->size_class].spans_to_cache;
  880. --heap->size_class_use[span->size_class].spans_current;
  881. #endif
  882. span = next_span;
  883. }
  884. }
  885. #endif
  886. //! Insert a single span into thread heap cache, releasing to global cache if overflow
  887. static void
  888. _memory_heap_cache_insert(heap_t* heap, span_t* span) {
  889. #if ENABLE_THREAD_CACHE
  890. size_t span_count = span->span_count;
  891. size_t idx = span_count - 1;
  892. _memory_statistics_inc(heap->span_use[idx].spans_to_cache, 1);
  893. if (!idx)
  894. _memory_heap_cache_adopt_deferred(heap);
  895. #if ENABLE_UNLIMITED_THREAD_CACHE
  896. _memory_span_list_push(&heap->span_cache[idx], span);
  897. #else
  898. const size_t release_count = (!idx ? _memory_span_release_count : _memory_span_release_count_large);
  899. size_t current_cache_size = _memory_span_list_push(&heap->span_cache[idx], span);
  900. if (current_cache_size <= release_count)
  901. return;
  902. const size_t hard_limit = release_count * THREAD_CACHE_MULTIPLIER;
  903. if (current_cache_size <= hard_limit) {
  904. #if ENABLE_ADAPTIVE_THREAD_CACHE
  905. //Require 25% of high water mark to remain in cache (and at least 1, if use is 0)
  906. const size_t high_mark = heap->span_use[idx].high;
  907. const size_t min_limit = (high_mark >> 2) + release_count + 1;
  908. if (current_cache_size < min_limit)
  909. return;
  910. #else
  911. return;
  912. #endif
  913. }
  914. heap->span_cache[idx] = _memory_span_list_split(span, release_count);
  915. assert(span->list_size == release_count);
  916. #if ENABLE_STATISTICS
  917. heap->thread_to_global += (size_t)span->list_size * span_count * _memory_span_size;
  918. heap->span_use[idx].spans_to_global += span->list_size;
  919. #endif
  920. #if ENABLE_GLOBAL_CACHE
  921. _memory_global_cache_insert(span);
  922. #else
  923. _memory_unmap_span_list(span);
  924. #endif
  925. #endif
  926. #else
  927. (void)sizeof(heap);
  928. _memory_unmap_span(span);
  929. #endif
  930. }
  931. //! Extract the given number of spans from the different cache levels
  932. static span_t*
  933. _memory_heap_thread_cache_extract(heap_t* heap, size_t span_count) {
  934. #if ENABLE_THREAD_CACHE
  935. size_t idx = span_count - 1;
  936. if (!idx)
  937. _memory_heap_cache_adopt_deferred(heap);
  938. if (heap->span_cache[idx]) {
  939. #if ENABLE_STATISTICS
  940. heap->span_use[idx].spans_from_cache++;
  941. #endif
  942. return _memory_span_list_pop(&heap->span_cache[idx]);
  943. }
  944. #endif
  945. return 0;
  946. }
  947. static span_t*
  948. _memory_heap_reserved_extract(heap_t* heap, size_t span_count) {
  949. if (heap->spans_reserved >= span_count)
  950. return _memory_map_spans(heap, span_count);
  951. return 0;
  952. }
  953. //! Extract a span from the global cache
  954. static span_t*
  955. _memory_heap_global_cache_extract(heap_t* heap, size_t span_count) {
  956. #if ENABLE_GLOBAL_CACHE
  957. size_t idx = span_count - 1;
  958. heap->span_cache[idx] = _memory_global_cache_extract(span_count);
  959. if (heap->span_cache[idx]) {
  960. #if ENABLE_STATISTICS
  961. heap->global_to_thread += (size_t)heap->span_cache[idx]->list_size * span_count * _memory_span_size;
  962. heap->span_use[idx].spans_from_global += heap->span_cache[idx]->list_size;
  963. #endif
  964. return _memory_span_list_pop(&heap->span_cache[idx]);
  965. }
  966. #endif
  967. return 0;
  968. }
  969. //! Get a span from one of the cache levels (thread cache, reserved, global cache) or fallback to mapping more memory
  970. static span_t*
  971. _memory_heap_extract_new_span(heap_t* heap, size_t span_count, uint32_t class_idx) {
  972. (void)sizeof(class_idx);
  973. #if ENABLE_ADAPTIVE_THREAD_CACHE || ENABLE_STATISTICS
  974. uint32_t idx = (uint32_t)span_count - 1;
  975. uint32_t current_count = (uint32_t)atomic_incr32(&heap->span_use[idx].current);
  976. if (current_count > heap->span_use[idx].high)
  977. heap->span_use[idx].high = current_count;
  978. #if ENABLE_STATISTICS
  979. uint32_t spans_current = ++heap->size_class_use[class_idx].spans_current;
  980. if (spans_current > heap->size_class_use[class_idx].spans_peak)
  981. heap->size_class_use[class_idx].spans_peak = spans_current;
  982. #endif
  983. #endif
  984. span_t* span = _memory_heap_thread_cache_extract(heap, span_count);
  985. if (EXPECTED(span != 0)) {
  986. _memory_statistics_inc(heap->size_class_use[class_idx].spans_from_cache, 1);
  987. return span;
  988. }
  989. span = _memory_heap_reserved_extract(heap, span_count);
  990. if (EXPECTED(span != 0)) {
  991. _memory_statistics_inc(heap->size_class_use[class_idx].spans_from_reserved, 1);
  992. return span;
  993. }
  994. span = _memory_heap_global_cache_extract(heap, span_count);
  995. if (EXPECTED(span != 0)) {
  996. _memory_statistics_inc(heap->size_class_use[class_idx].spans_from_cache, 1);
  997. return span;
  998. }
  999. //Final fallback, map in more virtual memory
  1000. span = _memory_map_spans(heap, span_count);
  1001. _memory_statistics_inc(heap->size_class_use[class_idx].spans_map_calls, 1);
  1002. return span;
  1003. }
  1004. //! Move the span (used for small or medium allocations) to the heap thread cache
  1005. static void
  1006. _memory_span_release_to_cache(heap_t* heap, span_t* span) {
  1007. heap_class_t* heap_class = heap->span_class + span->size_class;
  1008. assert(heap_class->partial_span != span);
  1009. if (span->state == SPAN_STATE_PARTIAL)
  1010. _memory_span_partial_list_remove(&heap_class->partial_span, span);
  1011. #if ENABLE_ADAPTIVE_THREAD_CACHE || ENABLE_STATISTICS
  1012. atomic_decr32(&heap->span_use[0].current);
  1013. #endif
  1014. _memory_statistics_inc(heap->span_use[0].spans_to_cache, 1);
  1015. _memory_statistics_inc(heap->size_class_use[span->size_class].spans_to_cache, 1);
  1016. _memory_statistics_dec(heap->size_class_use[span->size_class].spans_current, 1);
  1017. _memory_heap_cache_insert(heap, span);
  1018. }
  1019. //! Initialize a (partial) free list up to next system memory page, while reserving the first block
  1020. //! as allocated, returning number of blocks in list
  1021. static uint32_t
  1022. free_list_partial_init(void** list, void** first_block, void* page_start, void* block_start,
  1023. uint32_t block_count, uint32_t block_size) {
  1024. assert(block_count);
  1025. *first_block = block_start;
  1026. if (block_count > 1) {
  1027. void* free_block = pointer_offset(block_start, block_size);
  1028. void* block_end = pointer_offset(block_start, block_size * block_count);
  1029. //If block size is less than half a memory page, bound init to next memory page boundary
  1030. if (block_size < (_memory_page_size >> 1)) {
  1031. void* page_end = pointer_offset(page_start, _memory_page_size);
  1032. if (page_end < block_end)
  1033. block_end = page_end;
  1034. }
  1035. *list = free_block;
  1036. block_count = 2;
  1037. void* next_block = pointer_offset(free_block, block_size);
  1038. while (next_block < block_end) {
  1039. *((void**)free_block) = next_block;
  1040. free_block = next_block;
  1041. ++block_count;
  1042. next_block = pointer_offset(next_block, block_size);
  1043. }
  1044. *((void**)free_block) = 0;
  1045. } else {
  1046. *list = 0;
  1047. }
  1048. return block_count;
  1049. }
  1050. //! Initialize an unused span (from cache or mapped) to be new active span
  1051. static void*
  1052. _memory_span_set_new_active(heap_t* heap, heap_class_t* heap_class, span_t* span, uint32_t class_idx) {
  1053. assert(span->span_count == 1);
  1054. size_class_t* size_class = _memory_size_class + class_idx;
  1055. span->size_class = class_idx;
  1056. span->heap = heap;
  1057. span->flags &= ~SPAN_FLAG_ALIGNED_BLOCKS;
  1058. span->block_count = size_class->block_count;
  1059. span->block_size = size_class->block_size;
  1060. span->state = SPAN_STATE_ACTIVE;
  1061. span->free_list = 0;
  1062. //Setup free list. Only initialize one system page worth of free blocks in list
  1063. void* block;
  1064. span->free_list_limit = free_list_partial_init(&heap_class->free_list, &block,
  1065. span, pointer_offset(span, SPAN_HEADER_SIZE), size_class->block_count, size_class->block_size);
  1066. atomic_store_ptr(&span->free_list_deferred, 0);
  1067. span->list_size = 0;
  1068. atomic_thread_fence_release();
  1069. _memory_span_partial_list_add(&heap_class->partial_span, span);
  1070. return block;
  1071. }
  1072. //! Promote a partially used span (from heap used list) to be new active span
  1073. static void
  1074. _memory_span_set_partial_active(heap_class_t* heap_class, span_t* span) {
  1075. assert(span->state == SPAN_STATE_PARTIAL);
  1076. assert(span->block_count == _memory_size_class[span->size_class].block_count);
  1077. //Move data to heap size class and set span as active
  1078. heap_class->free_list = span->free_list;
  1079. span->state = SPAN_STATE_ACTIVE;
  1080. span->free_list = 0;
  1081. assert(heap_class->free_list);
  1082. }
  1083. //! Mark span as full (from active)
  1084. static void
  1085. _memory_span_set_active_full(heap_class_t* heap_class, span_t* span) {
  1086. assert(span->state == SPAN_STATE_ACTIVE);
  1087. assert(span == heap_class->partial_span);
  1088. _memory_span_partial_list_pop_head(&heap_class->partial_span);
  1089. span->used_count = span->block_count;
  1090. span->state = SPAN_STATE_FULL;
  1091. span->free_list = 0;
  1092. }
  1093. //! Move span from full to partial state
  1094. static void
  1095. _memory_span_set_full_partial(heap_t* heap, span_t* span) {
  1096. assert(span->state == SPAN_STATE_FULL);
  1097. heap_class_t* heap_class = &heap->span_class[span->size_class];
  1098. span->state = SPAN_STATE_PARTIAL;
  1099. _memory_span_partial_list_add_tail(&heap_class->partial_span, span);
  1100. }
  1101. static void*
  1102. _memory_span_extract_deferred(span_t* span) {
  1103. void* free_list;
  1104. do {
  1105. free_list = atomic_load_ptr(&span->free_list_deferred);
  1106. } while ((free_list == INVALID_POINTER) || !atomic_cas_ptr(&span->free_list_deferred, INVALID_POINTER, free_list));
  1107. span->list_size = 0;
  1108. atomic_store_ptr(&span->free_list_deferred, 0);
  1109. atomic_thread_fence_release();
  1110. return free_list;
  1111. }
  1112. //! Pop first block from a free list
  1113. static void*
  1114. free_list_pop(void** list) {
  1115. void* block = *list;
  1116. *list = *((void**)block);
  1117. return block;
  1118. }
  1119. //! Allocate a small/medium sized memory block from the given heap
  1120. static void*
  1121. _memory_allocate_from_heap_fallback(heap_t* heap, uint32_t class_idx) {
  1122. heap_class_t* heap_class = &heap->span_class[class_idx];
  1123. void* block;
  1124. span_t* active_span = heap_class->partial_span;
  1125. if (EXPECTED(active_span != 0)) {
  1126. assert(active_span->state == SPAN_STATE_ACTIVE);
  1127. assert(active_span->block_count == _memory_size_class[active_span->size_class].block_count);
  1128. //Swap in free list if not empty
  1129. if (active_span->free_list) {
  1130. heap_class->free_list = active_span->free_list;
  1131. active_span->free_list = 0;
  1132. return free_list_pop(&heap_class->free_list);
  1133. }
  1134. //If the span did not fully initialize free list, link up another page worth of blocks
  1135. if (active_span->free_list_limit < active_span->block_count) {
  1136. void* block_start = pointer_offset(active_span, SPAN_HEADER_SIZE + (active_span->free_list_limit * active_span->block_size));
  1137. active_span->free_list_limit += free_list_partial_init(&heap_class->free_list, &block,
  1138. (void*)((uintptr_t)block_start & ~(_memory_page_size - 1)), block_start,
  1139. active_span->block_count - active_span->free_list_limit, active_span->block_size);
  1140. return block;
  1141. }
  1142. //Swap in deferred free list
  1143. atomic_thread_fence_acquire();
  1144. if (atomic_load_ptr(&active_span->free_list_deferred)) {
  1145. heap_class->free_list = _memory_span_extract_deferred(active_span);
  1146. return free_list_pop(&heap_class->free_list);
  1147. }
  1148. //If the active span is fully allocated, mark span as free floating (fully allocated and not part of any list)
  1149. assert(!heap_class->free_list);
  1150. assert(active_span->free_list_limit >= active_span->block_count);
  1151. _memory_span_set_active_full(heap_class, active_span);
  1152. }
  1153. assert(!heap_class->free_list);
  1154. //Try promoting a semi-used span to active
  1155. active_span = heap_class->partial_span;
  1156. if (EXPECTED(active_span != 0)) {
  1157. _memory_span_set_partial_active(heap_class, active_span);
  1158. return free_list_pop(&heap_class->free_list);
  1159. }
  1160. assert(!heap_class->free_list);
  1161. assert(!heap_class->partial_span);
  1162. //Find a span in one of the cache levels
  1163. active_span = _memory_heap_extract_new_span(heap, 1, class_idx);
  1164. //Mark span as owned by this heap and set base data, return first block
  1165. return _memory_span_set_new_active(heap, heap_class, active_span, class_idx);
  1166. }
  1167. //! Allocate a small sized memory block from the given heap
  1168. static void*
  1169. _memory_allocate_small(heap_t* heap, size_t size) {
  1170. //Small sizes have unique size classes
  1171. const uint32_t class_idx = (uint32_t)((size + (SMALL_GRANULARITY - 1)) >> SMALL_GRANULARITY_SHIFT);
  1172. _memory_statistics_inc_alloc(heap, class_idx);
  1173. if (EXPECTED(heap->span_class[class_idx].free_list != 0))
  1174. return free_list_pop(&heap->span_class[class_idx].free_list);
  1175. return _memory_allocate_from_heap_fallback(heap, class_idx);
  1176. }
  1177. //! Allocate a medium sized memory block from the given heap
  1178. static void*
  1179. _memory_allocate_medium(heap_t* heap, size_t size) {
  1180. //Calculate the size class index and do a dependent lookup of the final class index (in case of merged classes)
  1181. const uint32_t base_idx = (uint32_t)(SMALL_CLASS_COUNT + ((size - (SMALL_SIZE_LIMIT + 1)) >> MEDIUM_GRANULARITY_SHIFT));
  1182. const uint32_t class_idx = _memory_size_class[base_idx].class_idx;
  1183. _memory_statistics_inc_alloc(heap, class_idx);
  1184. if (EXPECTED(heap->span_class[class_idx].free_list != 0))
  1185. return free_list_pop(&heap->span_class[class_idx].free_list);
  1186. return _memory_allocate_from_heap_fallback(heap, class_idx);
  1187. }
  1188. //! Allocate a large sized memory block from the given heap
  1189. static void*
  1190. _memory_allocate_large(heap_t* heap, size_t size) {
  1191. //Calculate number of needed max sized spans (including header)
  1192. //Since this function is never called if size > LARGE_SIZE_LIMIT
  1193. //the span_count is guaranteed to be <= LARGE_CLASS_COUNT
  1194. size += SPAN_HEADER_SIZE;
  1195. size_t span_count = size >> _memory_span_size_shift;
  1196. if (size & (_memory_span_size - 1))
  1197. ++span_count;
  1198. size_t idx = span_count - 1;
  1199. //Find a span in one of the cache levels
  1200. span_t* span = _memory_heap_extract_new_span(heap, span_count, SIZE_CLASS_COUNT);
  1201. //Mark span as owned by this heap and set base data
  1202. assert(span->span_count == span_count);
  1203. span->size_class = (uint32_t)(SIZE_CLASS_COUNT + idx);
  1204. span->heap = heap;
  1205. atomic_thread_fence_release();
  1206. return pointer_offset(span, SPAN_HEADER_SIZE);
  1207. }
  1208. //! Allocate a huge block by mapping memory pages directly
  1209. static void*
  1210. _memory_allocate_huge(size_t size) {
  1211. size += SPAN_HEADER_SIZE;
  1212. size_t num_pages = size >> _memory_page_size_shift;
  1213. if (size & (_memory_page_size - 1))
  1214. ++num_pages;
  1215. size_t align_offset = 0;
  1216. span_t* span = (span_t*)_memory_map(num_pages * _memory_page_size, &align_offset);
  1217. if (!span)
  1218. return span;
  1219. //Store page count in span_count
  1220. span->size_class = (uint32_t)-1;
  1221. span->span_count = (uint32_t)num_pages;
  1222. span->align_offset = (uint32_t)align_offset;
  1223. _memory_statistics_add_peak(&_huge_pages_current, num_pages, _huge_pages_peak);
  1224. return pointer_offset(span, SPAN_HEADER_SIZE);
  1225. }
  1226. //! Allocate a block larger than medium size
  1227. static void*
  1228. _memory_allocate_oversized(heap_t* heap, size_t size) {
  1229. if (size <= LARGE_SIZE_LIMIT)
  1230. return _memory_allocate_large(heap, size);
  1231. return _memory_allocate_huge(size);
  1232. }
  1233. //! Allocate a block of the given size
  1234. static void*
  1235. _memory_allocate(heap_t* heap, size_t size) {
  1236. if (EXPECTED(size <= SMALL_SIZE_LIMIT))
  1237. return _memory_allocate_small(heap, size);
  1238. else if (size <= _memory_medium_size_limit)
  1239. return _memory_allocate_medium(heap, size);
  1240. return _memory_allocate_oversized(heap, size);
  1241. }
  1242. //! Allocate a new heap
  1243. static heap_t*
  1244. _memory_allocate_heap(void) {
  1245. void* raw_heap;
  1246. void* next_raw_heap;
  1247. uintptr_t orphan_counter;
  1248. heap_t* heap;
  1249. heap_t* next_heap;
  1250. //Try getting an orphaned heap
  1251. atomic_thread_fence_acquire();
  1252. do {
  1253. raw_heap = atomic_load_ptr(&_memory_orphan_heaps);
  1254. heap = (heap_t*)((uintptr_t)raw_heap & ~(uintptr_t)0x1FF);
  1255. if (!heap)
  1256. break;
  1257. next_heap = heap->next_orphan;
  1258. orphan_counter = (uintptr_t)atomic_incr32(&_memory_orphan_counter);
  1259. next_raw_heap = (void*)((uintptr_t)next_heap | (orphan_counter & (uintptr_t)0x1FF));
  1260. } while (!atomic_cas_ptr(&_memory_orphan_heaps, next_raw_heap, raw_heap));
  1261. if (!heap) {
  1262. //Map in pages for a new heap
  1263. size_t align_offset = 0;
  1264. heap = (heap_t*)_memory_map((1 + (sizeof(heap_t) >> _memory_page_size_shift)) * _memory_page_size, &align_offset);
  1265. if (!heap)
  1266. return heap;
  1267. memset((char*)heap, 0, sizeof(heap_t));
  1268. heap->align_offset = align_offset;
  1269. //Get a new heap ID
  1270. do {
  1271. heap->id = atomic_incr32(&_memory_heap_id);
  1272. if (_memory_heap_lookup(heap->id))
  1273. heap->id = 0;
  1274. } while (!heap->id);
  1275. //Link in heap in heap ID map
  1276. size_t list_idx = heap->id % HEAP_ARRAY_SIZE;
  1277. do {
  1278. next_heap = (heap_t*)atomic_load_ptr(&_memory_heaps[list_idx]);
  1279. heap->next_heap = next_heap;
  1280. } while (!atomic_cas_ptr(&_memory_heaps[list_idx], heap, next_heap));
  1281. }
  1282. return heap;
  1283. }
  1284. //! Deallocate the given small/medium memory block in the current thread local heap
  1285. static void
  1286. _memory_deallocate_direct(span_t* span, void* block) {
  1287. assert(span->heap == get_thread_heap_raw());
  1288. uint32_t state = span->state;
  1289. //Add block to free list
  1290. *((void**)block) = span->free_list;
  1291. span->free_list = block;
  1292. if (UNEXPECTED(state == SPAN_STATE_ACTIVE))
  1293. return;
  1294. uint32_t used = --span->used_count;
  1295. uint32_t free = span->list_size;
  1296. if (UNEXPECTED(used == free))
  1297. _memory_span_release_to_cache(span->heap, span);
  1298. else if (UNEXPECTED(state == SPAN_STATE_FULL))
  1299. _memory_span_set_full_partial(span->heap, span);
  1300. }
  1301. //! Put the block in the deferred free list of the owning span
  1302. static void
  1303. _memory_deallocate_defer(span_t* span, void* block) {
  1304. atomic_thread_fence_acquire();
  1305. if (span->state == SPAN_STATE_FULL) {
  1306. if ((span->list_size + 1) == span->block_count) {
  1307. //Span will be completely freed by deferred deallocations, no other thread can
  1308. //currently touch it. Safe to move to owner heap deferred cache
  1309. span_t* last_head;
  1310. heap_t* heap = span->heap;
  1311. do {
  1312. last_head = (span_t*)atomic_load_ptr(&heap->span_cache_deferred);
  1313. span->next = last_head;
  1314. } while (!atomic_cas_ptr(&heap->span_cache_deferred, span, last_head));
  1315. return;
  1316. }
  1317. }
  1318. void* free_list;
  1319. do {
  1320. atomic_thread_fence_acquire();
  1321. free_list = atomic_load_ptr(&span->free_list_deferred);
  1322. *((void**)block) = free_list;
  1323. } while ((free_list == INVALID_POINTER) || !atomic_cas_ptr(&span->free_list_deferred, INVALID_POINTER, free_list));
  1324. ++span->list_size;
  1325. atomic_store_ptr(&span->free_list_deferred, block);
  1326. }
  1327. static void
  1328. _memory_deallocate_small_or_medium(span_t* span, void* p) {
  1329. _memory_statistics_inc_free(span->heap, span->size_class);
  1330. if (span->flags & SPAN_FLAG_ALIGNED_BLOCKS) {
  1331. //Realign pointer to block start
  1332. void* blocks_start = pointer_offset(span, SPAN_HEADER_SIZE);
  1333. uint32_t block_offset = (uint32_t)pointer_diff(p, blocks_start);
  1334. p = pointer_offset(p, -(int32_t)(block_offset % span->block_size));
  1335. }
  1336. //Check if block belongs to this heap or if deallocation should be deferred
  1337. if (span->heap == get_thread_heap_raw())
  1338. _memory_deallocate_direct(span, p);
  1339. else
  1340. _memory_deallocate_defer(span, p);
  1341. }
  1342. //! Deallocate the given large memory block to the current heap
  1343. static void
  1344. _memory_deallocate_large(span_t* span) {
  1345. //Decrease counter
  1346. assert(span->span_count == ((size_t)span->size_class - SIZE_CLASS_COUNT + 1));
  1347. assert(span->size_class >= SIZE_CLASS_COUNT);
  1348. assert(span->size_class - SIZE_CLASS_COUNT < LARGE_CLASS_COUNT);
  1349. assert(!(span->flags & SPAN_FLAG_MASTER) || !(span->flags & SPAN_FLAG_SUBSPAN));
  1350. assert((span->flags & SPAN_FLAG_MASTER) || (span->flags & SPAN_FLAG_SUBSPAN));
  1351. //Large blocks can always be deallocated and transferred between heaps
  1352. //Investigate if it is better to defer large spans as well through span_cache_deferred,
  1353. //possibly with some heuristics to pick either scheme at runtime per deallocation
  1354. heap_t* heap = get_thread_heap();
  1355. if (!heap) return;
  1356. #if ENABLE_ADAPTIVE_THREAD_CACHE || ENABLE_STATISTICS
  1357. size_t idx = span->span_count - 1;
  1358. atomic_decr32(&span->heap->span_use[idx].current);
  1359. #endif
  1360. if ((span->span_count > 1) && !heap->spans_reserved) {
  1361. heap->span_reserve = span;
  1362. heap->spans_reserved = span->span_count;
  1363. if (span->flags & SPAN_FLAG_MASTER) {
  1364. heap->span_reserve_master = span;
  1365. } else { //SPAN_FLAG_SUBSPAN
  1366. uint32_t distance = span->total_spans_or_distance;
  1367. span_t* master = (span_t*)pointer_offset(span, -(int32_t)(distance * _memory_span_size));
  1368. heap->span_reserve_master = master;
  1369. assert(master->flags & SPAN_FLAG_MASTER);
  1370. assert(atomic_load32(&master->remaining_spans) >= (int32_t)span->span_count);
  1371. }
  1372. _memory_statistics_inc(heap->span_use[idx].spans_to_reserved, 1);
  1373. } else {
  1374. //Insert into cache list
  1375. _memory_heap_cache_insert(heap, span);
  1376. }
  1377. }
  1378. //! Deallocate the given huge span
  1379. static void
  1380. _memory_deallocate_huge(span_t* span) {
  1381. //Oversized allocation, page count is stored in span_count
  1382. size_t num_pages = span->span_count;
  1383. _memory_unmap(span, num_pages * _memory_page_size, span->align_offset, num_pages * _memory_page_size);
  1384. _memory_statistics_sub(&_huge_pages_current, num_pages);
  1385. }
  1386. //! Deallocate the given block
  1387. static void
  1388. _memory_deallocate(void* p) {
  1389. //Grab the span (always at start of span, using span alignment)
  1390. span_t* span = (span_t*)((uintptr_t)p & _memory_span_mask);
  1391. if (UNEXPECTED(!span))
  1392. return;
  1393. if (EXPECTED(span->size_class < SIZE_CLASS_COUNT))
  1394. _memory_deallocate_small_or_medium(span, p);
  1395. else if (span->size_class != (uint32_t)-1)
  1396. _memory_deallocate_large(span);
  1397. else
  1398. _memory_deallocate_huge(span);
  1399. }
  1400. //! Reallocate the given block to the given size
  1401. static void*
  1402. _memory_reallocate(void* p, size_t size, size_t oldsize, unsigned int flags) {
  1403. if (p) {
  1404. //Grab the span using guaranteed span alignment
  1405. span_t* span = (span_t*)((uintptr_t)p & _memory_span_mask);
  1406. if (span->heap) {
  1407. if (span->size_class < SIZE_CLASS_COUNT) {
  1408. //Small/medium sized block
  1409. assert(span->span_count == 1);
  1410. void* blocks_start = pointer_offset(span, SPAN_HEADER_SIZE);
  1411. uint32_t block_offset = (uint32_t)pointer_diff(p, blocks_start);
  1412. uint32_t block_idx = block_offset / span->block_size;
  1413. void* block = pointer_offset(blocks_start, block_idx * span->block_size);
  1414. if (!oldsize)
  1415. oldsize = span->block_size - (uint32_t)pointer_diff(p, block);
  1416. if ((size_t)span->block_size >= size) {
  1417. //Still fits in block, never mind trying to save memory, but preserve data if alignment changed
  1418. if ((p != block) && !(flags & RPMALLOC_NO_PRESERVE))
  1419. memmove(block, p, oldsize);
  1420. return block;
  1421. }
  1422. } else {
  1423. //Large block
  1424. size_t total_size = size + SPAN_HEADER_SIZE;
  1425. size_t num_spans = total_size >> _memory_span_size_shift;
  1426. if (total_size & (_memory_span_mask - 1))
  1427. ++num_spans;
  1428. size_t current_spans = span->span_count;
  1429. assert(current_spans == ((span->size_class - SIZE_CLASS_COUNT) + 1));
  1430. void* block = pointer_offset(span, SPAN_HEADER_SIZE);
  1431. if (!oldsize)
  1432. oldsize = (current_spans * _memory_span_size) - (size_t)pointer_diff(p, block) - SPAN_HEADER_SIZE;
  1433. if ((current_spans >= num_spans) && (num_spans >= (current_spans / 2))) {
  1434. //Still fits in block, never mind trying to save memory, but preserve data if alignment changed
  1435. if ((p != block) && !(flags & RPMALLOC_NO_PRESERVE))
  1436. memmove(block, p, oldsize);
  1437. return block;
  1438. }
  1439. }
  1440. } else {
  1441. //Oversized block
  1442. size_t total_size = size + SPAN_HEADER_SIZE;
  1443. size_t num_pages = total_size >> _memory_page_size_shift;
  1444. if (total_size & (_memory_page_size - 1))
  1445. ++num_pages;
  1446. //Page count is stored in span_count
  1447. size_t current_pages = span->span_count;
  1448. void* block = pointer_offset(span, SPAN_HEADER_SIZE);
  1449. if (!oldsize)
  1450. oldsize = (current_pages * _memory_page_size) - (size_t)pointer_diff(p, block) - SPAN_HEADER_SIZE;
  1451. if ((current_pages >= num_pages) && (num_pages >= (current_pages / 2))) {
  1452. //Still fits in block, never mind trying to save memory, but preserve data if alignment changed
  1453. if ((p != block) && !(flags & RPMALLOC_NO_PRESERVE))
  1454. memmove(block, p, oldsize);
  1455. return block;
  1456. }
  1457. }
  1458. } else {
  1459. oldsize = 0;
  1460. }
  1461. //Size is greater than block size, need to allocate a new block and deallocate the old
  1462. heap_t* heap = get_thread_heap();
  1463. //Avoid hysteresis by overallocating if increase is small (below 37%)
  1464. size_t lower_bound = oldsize + (oldsize >> 2) + (oldsize >> 3);
  1465. size_t new_size = (size > lower_bound) ? size : ((size > oldsize) ? lower_bound : size);
  1466. void* block = _memory_allocate(heap, new_size);
  1467. if (p && block) {
  1468. if (!(flags & RPMALLOC_NO_PRESERVE))
  1469. memcpy(block, p, oldsize < new_size ? oldsize : new_size);
  1470. _memory_deallocate(p);
  1471. }
  1472. return block;
  1473. }
  1474. //! Get the usable size of the given block
  1475. static size_t
  1476. _memory_usable_size(void* p) {
  1477. //Grab the span using guaranteed span alignment
  1478. span_t* span = (span_t*)((uintptr_t)p & _memory_span_mask);
  1479. if (span->heap) {
  1480. //Small/medium block
  1481. if (span->size_class < SIZE_CLASS_COUNT) {
  1482. void* blocks_start = pointer_offset(span, SPAN_HEADER_SIZE);
  1483. return span->block_size - ((size_t)pointer_diff(p, blocks_start) % span->block_size);
  1484. }
  1485. //Large block
  1486. size_t current_spans = (span->size_class - SIZE_CLASS_COUNT) + 1;
  1487. return (current_spans * _memory_span_size) - (size_t)pointer_diff(p, span);
  1488. }
  1489. //Oversized block, page count is stored in span_count
  1490. size_t current_pages = span->span_count;
  1491. return (current_pages * _memory_page_size) - (size_t)pointer_diff(p, span);
  1492. }
  1493. //! Adjust and optimize the size class properties for the given class
  1494. static void
  1495. _memory_adjust_size_class(size_t iclass) {
  1496. size_t block_size = _memory_size_class[iclass].block_size;
  1497. size_t block_count = (_memory_span_size - SPAN_HEADER_SIZE) / block_size;
  1498. _memory_size_class[iclass].block_count = (uint16_t)block_count;
  1499. _memory_size_class[iclass].class_idx = (uint16_t)iclass;
  1500. //Check if previous size classes can be merged
  1501. size_t prevclass = iclass;
  1502. while (prevclass > 0) {
  1503. --prevclass;
  1504. //A class can be merged if number of pages and number of blocks are equal
  1505. if (_memory_size_class[prevclass].block_count == _memory_size_class[iclass].block_count)
  1506. memcpy(_memory_size_class + prevclass, _memory_size_class + iclass, sizeof(_memory_size_class[iclass]));
  1507. else
  1508. break;
  1509. }
  1510. }
  1511. static void
  1512. _memory_heap_finalize(void* heapptr) {
  1513. heap_t* heap = (heap_t*)heapptr;
  1514. if (!heap)
  1515. return;
  1516. //Release thread cache spans back to global cache
  1517. #if ENABLE_THREAD_CACHE
  1518. _memory_heap_cache_adopt_deferred(heap);
  1519. for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
  1520. span_t* span = heap->span_cache[iclass];
  1521. #if ENABLE_GLOBAL_CACHE
  1522. while (span) {
  1523. assert(span->span_count == (iclass + 1));
  1524. size_t release_count = (!iclass ? _memory_span_release_count : _memory_span_release_count_large);
  1525. span_t* next = _memory_span_list_split(span, (uint32_t)release_count);
  1526. #if ENABLE_STATISTICS
  1527. heap->thread_to_global += (size_t)span->list_size * span->span_count * _memory_span_size;
  1528. heap->span_use[iclass].spans_to_global += span->list_size;
  1529. #endif
  1530. _memory_global_cache_insert(span);
  1531. span = next;
  1532. }
  1533. #else
  1534. if (span)
  1535. _memory_unmap_span_list(span);
  1536. #endif
  1537. heap->span_cache[iclass] = 0;
  1538. }
  1539. #endif
  1540. //Orphan the heap
  1541. void* raw_heap;
  1542. uintptr_t orphan_counter;
  1543. heap_t* last_heap;
  1544. do {
  1545. last_heap = (heap_t*)atomic_load_ptr(&_memory_orphan_heaps);
  1546. heap->next_orphan = (heap_t*)((uintptr_t)last_heap & ~(uintptr_t)0x1FF);
  1547. orphan_counter = (uintptr_t)atomic_incr32(&_memory_orphan_counter);
  1548. raw_heap = (void*)((uintptr_t)heap | (orphan_counter & (uintptr_t)0x1FF));
  1549. } while (!atomic_cas_ptr(&_memory_orphan_heaps, raw_heap, last_heap));
  1550. set_thread_heap(0);
  1551. #if ENABLE_STATISTICS
  1552. atomic_decr32(&_memory_active_heaps);
  1553. assert(atomic_load32(&_memory_active_heaps) >= 0);
  1554. #endif
  1555. }
  1556. #if defined(_MSC_VER) && !defined(__clang__) && (!defined(BUILD_DYNAMIC_LINK) || !BUILD_DYNAMIC_LINK)
  1557. #include <fibersapi.h>
  1558. static DWORD fls_key;
  1559. static void NTAPI
  1560. rp_thread_destructor(void* value) {
  1561. if (value)
  1562. rpmalloc_thread_finalize();
  1563. }
  1564. #endif
  1565. #if PLATFORM_POSIX
  1566. # include <sys/mman.h>
  1567. # include <sched.h>
  1568. # ifdef __FreeBSD__
  1569. # include <sys/sysctl.h>
  1570. # define MAP_HUGETLB MAP_ALIGNED_SUPER
  1571. # endif
  1572. # ifndef MAP_UNINITIALIZED
  1573. # define MAP_UNINITIALIZED 0
  1574. # endif
  1575. #endif
  1576. #include <errno.h>
  1577. //! Initialize the allocator and setup global data
  1578. TRACY_API int
  1579. rpmalloc_initialize(void) {
  1580. if (_rpmalloc_initialized) {
  1581. rpmalloc_thread_initialize();
  1582. return 0;
  1583. }
  1584. memset(&_memory_config, 0, sizeof(rpmalloc_config_t));
  1585. return rpmalloc_initialize_config(0);
  1586. }
  1587. int
  1588. rpmalloc_initialize_config(const rpmalloc_config_t* config) {
  1589. if (_rpmalloc_initialized) {
  1590. rpmalloc_thread_initialize();
  1591. return 0;
  1592. }
  1593. _rpmalloc_initialized = 1;
  1594. if (config)
  1595. memcpy(&_memory_config, config, sizeof(rpmalloc_config_t));
  1596. if (!_memory_config.memory_map || !_memory_config.memory_unmap) {
  1597. _memory_config.memory_map = _memory_map_os;
  1598. _memory_config.memory_unmap = _memory_unmap_os;
  1599. }
  1600. #if RPMALLOC_CONFIGURABLE
  1601. _memory_page_size = _memory_config.page_size;
  1602. #else
  1603. _memory_page_size = 0;
  1604. #endif
  1605. _memory_huge_pages = 0;
  1606. _memory_map_granularity = _memory_page_size;
  1607. if (!_memory_page_size) {
  1608. #if PLATFORM_WINDOWS
  1609. SYSTEM_INFO system_info;
  1610. memset(&system_info, 0, sizeof(system_info));
  1611. GetSystemInfo(&system_info);
  1612. _memory_page_size = system_info.dwPageSize;
  1613. _memory_map_granularity = system_info.dwAllocationGranularity;
  1614. if (config && config->enable_huge_pages) {
  1615. HANDLE token = 0;
  1616. size_t large_page_minimum = GetLargePageMinimum();
  1617. if (large_page_minimum)
  1618. OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &token);
  1619. if (token) {
  1620. LUID luid;
  1621. if (LookupPrivilegeValue(0, SE_LOCK_MEMORY_NAME, &luid)) {
  1622. TOKEN_PRIVILEGES token_privileges;
  1623. memset(&token_privileges, 0, sizeof(token_privileges));
  1624. token_privileges.PrivilegeCount = 1;
  1625. token_privileges.Privileges[0].Luid = luid;
  1626. token_privileges.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED;
  1627. if (AdjustTokenPrivileges(token, FALSE, &token_privileges, 0, 0, 0)) {
  1628. DWORD err = GetLastError();
  1629. if (err == ERROR_SUCCESS) {
  1630. _memory_huge_pages = 1;
  1631. _memory_page_size = large_page_minimum;
  1632. _memory_map_granularity = large_page_minimum;
  1633. }
  1634. }
  1635. }
  1636. CloseHandle(token);
  1637. }
  1638. }
  1639. #else
  1640. _memory_page_size = (size_t)sysconf(_SC_PAGESIZE);
  1641. _memory_map_granularity = _memory_page_size;
  1642. if (config && config->enable_huge_pages) {
  1643. #if defined(__linux__)
  1644. size_t huge_page_size = 0;
  1645. FILE* meminfo = fopen("/proc/meminfo", "r");
  1646. if (meminfo) {
  1647. char line[128];
  1648. while (!huge_page_size && fgets(line, sizeof(line) - 1, meminfo)) {
  1649. line[sizeof(line) - 1] = 0;
  1650. if (strstr(line, "Hugepagesize:"))
  1651. huge_page_size = (size_t)strtol(line + 13, 0, 10) * 1024;
  1652. }
  1653. fclose(meminfo);
  1654. }
  1655. if (huge_page_size) {
  1656. _memory_huge_pages = 1;
  1657. _memory_page_size = huge_page_size;
  1658. _memory_map_granularity = huge_page_size;
  1659. }
  1660. #elif defined(__FreeBSD__)
  1661. int rc;
  1662. size_t sz = sizeof(rc);
  1663. if (sysctlbyname("vm.pmap.pg_ps_enabled", &rc, &sz, NULL, 0) == 0 && rc == 1) {
  1664. _memory_huge_pages = 1;
  1665. _memory_page_size = 2 * 1024 * 1024;
  1666. _memory_map_granularity = _memory_page_size;
  1667. }
  1668. #elif defined(__APPLE__)
  1669. _memory_huge_pages = 1;
  1670. _memory_page_size = 2 * 1024 * 1024;
  1671. _memory_map_granularity = _memory_page_size;
  1672. #endif
  1673. }
  1674. #endif
  1675. } else {
  1676. if (config && config->enable_huge_pages)
  1677. _memory_huge_pages = 1;
  1678. }
  1679. //The ABA counter in heap orphan list is tied to using 512 (bitmask 0x1FF)
  1680. if (_memory_page_size < 512)
  1681. _memory_page_size = 512;
  1682. if (_memory_page_size > (64 * 1024 * 1024))
  1683. _memory_page_size = (64 * 1024 * 1024);
  1684. _memory_page_size_shift = 0;
  1685. size_t page_size_bit = _memory_page_size;
  1686. while (page_size_bit != 1) {
  1687. ++_memory_page_size_shift;
  1688. page_size_bit >>= 1;
  1689. }
  1690. _memory_page_size = ((size_t)1 << _memory_page_size_shift);
  1691. #if RPMALLOC_CONFIGURABLE
  1692. size_t span_size = _memory_config.span_size;
  1693. if (!span_size)
  1694. span_size = (64 * 1024);
  1695. if (span_size > (256 * 1024))
  1696. span_size = (256 * 1024);
  1697. _memory_span_size = 4096;
  1698. _memory_span_size_shift = 12;
  1699. while (_memory_span_size < span_size) {
  1700. _memory_span_size <<= 1;
  1701. ++_memory_span_size_shift;
  1702. }
  1703. _memory_span_mask = ~(uintptr_t)(_memory_span_size - 1);
  1704. #endif
  1705. _memory_span_map_count = ( _memory_config.span_map_count ? _memory_config.span_map_count : DEFAULT_SPAN_MAP_COUNT);
  1706. if ((_memory_span_size * _memory_span_map_count) < _memory_page_size)
  1707. _memory_span_map_count = (_memory_page_size / _memory_span_size);
  1708. if ((_memory_page_size >= _memory_span_size) && ((_memory_span_map_count * _memory_span_size) % _memory_page_size))
  1709. _memory_span_map_count = (_memory_page_size / _memory_span_size);
  1710. _memory_config.page_size = _memory_page_size;
  1711. _memory_config.span_size = _memory_span_size;
  1712. _memory_config.span_map_count = _memory_span_map_count;
  1713. _memory_config.enable_huge_pages = _memory_huge_pages;
  1714. _memory_span_release_count = (_memory_span_map_count > 4 ? ((_memory_span_map_count < 64) ? _memory_span_map_count : 64) : 4);
  1715. _memory_span_release_count_large = (_memory_span_release_count > 8 ? (_memory_span_release_count / 4) : 2);
  1716. #if (defined(__APPLE__) || defined(__HAIKU__)) && ENABLE_PRELOAD
  1717. if (pthread_key_create(&_memory_thread_heap, _memory_heap_finalize))
  1718. return -1;
  1719. #endif
  1720. #if defined(_MSC_VER) && !defined(__clang__) && (!defined(BUILD_DYNAMIC_LINK) || !BUILD_DYNAMIC_LINK)
  1721. fls_key = FlsAlloc(&rp_thread_destructor);
  1722. #endif
  1723. atomic_store32(&_memory_heap_id, 0);
  1724. atomic_store32(&_memory_orphan_counter, 0);
  1725. #if ENABLE_STATISTICS
  1726. atomic_store32(&_memory_active_heaps, 0);
  1727. atomic_store32(&_reserved_spans, 0);
  1728. atomic_store32(&_mapped_pages, 0);
  1729. _mapped_pages_peak = 0;
  1730. atomic_store32(&_mapped_total, 0);
  1731. atomic_store32(&_unmapped_total, 0);
  1732. atomic_store32(&_mapped_pages_os, 0);
  1733. atomic_store32(&_huge_pages_current, 0);
  1734. _huge_pages_peak = 0;
  1735. #endif
  1736. //Setup all small and medium size classes
  1737. size_t iclass = 0;
  1738. _memory_size_class[iclass].block_size = SMALL_GRANULARITY;
  1739. _memory_adjust_size_class(iclass);
  1740. for (iclass = 1; iclass < SMALL_CLASS_COUNT; ++iclass) {
  1741. size_t size = iclass * SMALL_GRANULARITY;
  1742. _memory_size_class[iclass].block_size = (uint32_t)size;
  1743. _memory_adjust_size_class(iclass);
  1744. }
  1745. //At least two blocks per span, then fall back to large allocations
  1746. _memory_medium_size_limit = (_memory_span_size - SPAN_HEADER_SIZE) >> 1;
  1747. if (_memory_medium_size_limit > MEDIUM_SIZE_LIMIT)
  1748. _memory_medium_size_limit = MEDIUM_SIZE_LIMIT;
  1749. for (iclass = 0; iclass < MEDIUM_CLASS_COUNT; ++iclass) {
  1750. size_t size = SMALL_SIZE_LIMIT + ((iclass + 1) * MEDIUM_GRANULARITY);
  1751. if (size > _memory_medium_size_limit)
  1752. break;
  1753. _memory_size_class[SMALL_CLASS_COUNT + iclass].block_size = (uint32_t)size;
  1754. _memory_adjust_size_class(SMALL_CLASS_COUNT + iclass);
  1755. }
  1756. for (size_t list_idx = 0; list_idx < HEAP_ARRAY_SIZE; ++list_idx)
  1757. atomic_store_ptr(&_memory_heaps[list_idx], 0);
  1758. //Initialize this thread
  1759. rpmalloc_thread_initialize();
  1760. return 0;
  1761. }
  1762. //! Finalize the allocator
  1763. TRACY_API void
  1764. rpmalloc_finalize(void) {
  1765. atomic_thread_fence_acquire();
  1766. rpmalloc_thread_finalize();
  1767. //rpmalloc_dump_statistics(stderr);
  1768. //Free all thread caches
  1769. for (size_t list_idx = 0; list_idx < HEAP_ARRAY_SIZE; ++list_idx) {
  1770. heap_t* heap = (heap_t*)atomic_load_ptr(&_memory_heaps[list_idx]);
  1771. while (heap) {
  1772. if (heap->spans_reserved) {
  1773. span_t* span = _memory_map_spans(heap, heap->spans_reserved);
  1774. _memory_unmap_span(span);
  1775. }
  1776. for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) {
  1777. heap_class_t* heap_class = heap->span_class + iclass;
  1778. span_t* span = heap_class->partial_span;
  1779. while (span) {
  1780. span_t* next = span->next;
  1781. if (span->state == SPAN_STATE_ACTIVE) {
  1782. uint32_t used_blocks = span->block_count;
  1783. if (span->free_list_limit < span->block_count)
  1784. used_blocks = span->free_list_limit;
  1785. uint32_t free_blocks = 0;
  1786. void* block = heap_class->free_list;
  1787. while (block) {
  1788. ++free_blocks;
  1789. block = *((void**)block);
  1790. }
  1791. block = span->free_list;
  1792. while (block) {
  1793. ++free_blocks;
  1794. block = *((void**)block);
  1795. }
  1796. if (used_blocks == (free_blocks + span->list_size))
  1797. _memory_heap_cache_insert(heap, span);
  1798. } else {
  1799. if (span->used_count == span->list_size)
  1800. _memory_heap_cache_insert(heap, span);
  1801. }
  1802. span = next;
  1803. }
  1804. }
  1805. #if ENABLE_THREAD_CACHE
  1806. //Free span caches (other thread might have deferred after the thread using this heap finalized)
  1807. _memory_heap_cache_adopt_deferred(heap);
  1808. for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
  1809. if (heap->span_cache[iclass])
  1810. _memory_unmap_span_list(heap->span_cache[iclass]);
  1811. }
  1812. #endif
  1813. heap_t* next_heap = heap->next_heap;
  1814. size_t heap_size = (1 + (sizeof(heap_t) >> _memory_page_size_shift)) * _memory_page_size;
  1815. _memory_unmap(heap, heap_size, heap->align_offset, heap_size);
  1816. heap = next_heap;
  1817. }
  1818. }
  1819. #if ENABLE_GLOBAL_CACHE
  1820. //Free global caches
  1821. for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass)
  1822. _memory_cache_finalize(&_memory_span_cache[iclass]);
  1823. #endif
  1824. atomic_store_ptr(&_memory_orphan_heaps, 0);
  1825. atomic_thread_fence_release();
  1826. #if (defined(__APPLE__) || defined(__HAIKU__)) && ENABLE_PRELOAD
  1827. pthread_key_delete(_memory_thread_heap);
  1828. #endif
  1829. #if defined(_MSC_VER) && !defined(__clang__) && (!defined(BUILD_DYNAMIC_LINK) || !BUILD_DYNAMIC_LINK)
  1830. FlsFree(fls_key);
  1831. #endif
  1832. #if ENABLE_STATISTICS
  1833. //If you hit these asserts you probably have memory leaks or double frees in your code
  1834. assert(!atomic_load32(&_mapped_pages));
  1835. assert(!atomic_load32(&_reserved_spans));
  1836. assert(!atomic_load32(&_mapped_pages_os));
  1837. #endif
  1838. _rpmalloc_initialized = 0;
  1839. }
  1840. //! Initialize thread, assign heap
  1841. TRACY_API void
  1842. rpmalloc_thread_initialize(void) {
  1843. if (!get_thread_heap_raw()) {
  1844. heap_t* heap = _memory_allocate_heap();
  1845. if (heap) {
  1846. atomic_thread_fence_acquire();
  1847. #if ENABLE_STATISTICS
  1848. atomic_incr32(&_memory_active_heaps);
  1849. #endif
  1850. set_thread_heap(heap);
  1851. #if defined(_MSC_VER) && !defined(__clang__) && (!defined(BUILD_DYNAMIC_LINK) || !BUILD_DYNAMIC_LINK)
  1852. FlsSetValue(fls_key, heap);
  1853. #endif
  1854. }
  1855. }
  1856. }
  1857. //! Finalize thread, orphan heap
  1858. TRACY_API void
  1859. rpmalloc_thread_finalize(void) {
  1860. heap_t* heap = get_thread_heap_raw();
  1861. if (heap)
  1862. _memory_heap_finalize(heap);
  1863. }
  1864. int
  1865. rpmalloc_is_thread_initialized(void) {
  1866. return (get_thread_heap_raw() != 0) ? 1 : 0;
  1867. }
  1868. const rpmalloc_config_t*
  1869. rpmalloc_config(void) {
  1870. return &_memory_config;
  1871. }
  1872. //! Map new pages to virtual memory
  1873. static void*
  1874. _memory_map_os(size_t size, size_t* offset) {
  1875. //Either size is a heap (a single page) or a (multiple) span - we only need to align spans, and only if larger than map granularity
  1876. size_t padding = ((size >= _memory_span_size) && (_memory_span_size > _memory_map_granularity)) ? _memory_span_size : 0;
  1877. assert(size >= _memory_page_size);
  1878. #if PLATFORM_WINDOWS
  1879. //Ok to MEM_COMMIT - according to MSDN, "actual physical pages are not allocated unless/until the virtual addresses are actually accessed"
  1880. void* ptr = VirtualAlloc(0, size + padding, (_memory_huge_pages ? MEM_LARGE_PAGES : 0) | MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE);
  1881. if (!ptr) {
  1882. assert(!"Failed to map virtual memory block");
  1883. return 0;
  1884. }
  1885. #else
  1886. int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_UNINITIALIZED;
  1887. # if defined(__APPLE__)
  1888. int fd = (int)VM_MAKE_TAG(240U);
  1889. if (_memory_huge_pages)
  1890. fd |= VM_FLAGS_SUPERPAGE_SIZE_2MB;
  1891. void* ptr = mmap(0, size + padding, PROT_READ | PROT_WRITE, flags, fd, 0);
  1892. # elif defined(MAP_HUGETLB)
  1893. void* ptr = mmap(0, size + padding, PROT_READ | PROT_WRITE, (_memory_huge_pages ? MAP_HUGETLB : 0) | flags, -1, 0);
  1894. # else
  1895. void* ptr = mmap(0, size + padding, PROT_READ | PROT_WRITE, flags, -1, 0);
  1896. # endif
  1897. if ((ptr == MAP_FAILED) || !ptr) {
  1898. assert("Failed to map virtual memory block" == 0);
  1899. return 0;
  1900. }
  1901. #endif
  1902. #if ENABLE_STATISTICS
  1903. atomic_add32(&_mapped_pages_os, (int32_t)((size + padding) >> _memory_page_size_shift));
  1904. #endif
  1905. if (padding) {
  1906. size_t final_padding = padding - ((uintptr_t)ptr & ~_memory_span_mask);
  1907. assert(final_padding <= _memory_span_size);
  1908. assert(final_padding <= padding);
  1909. assert(!(final_padding % 8));
  1910. ptr = pointer_offset(ptr, final_padding);
  1911. *offset = final_padding >> 3;
  1912. }
  1913. assert((size < _memory_span_size) || !((uintptr_t)ptr & ~_memory_span_mask));
  1914. return ptr;
  1915. }
  1916. //! Unmap pages from virtual memory
  1917. static void
  1918. _memory_unmap_os(void* address, size_t size, size_t offset, size_t release) {
  1919. assert(release || (offset == 0));
  1920. assert(!release || (release >= _memory_page_size));
  1921. assert(size >= _memory_page_size);
  1922. if (release && offset) {
  1923. offset <<= 3;
  1924. address = pointer_offset(address, -(int32_t)offset);
  1925. #if PLATFORM_POSIX
  1926. //Padding is always one span size
  1927. release += _memory_span_size;
  1928. #endif
  1929. }
  1930. #if !DISABLE_UNMAP
  1931. #if PLATFORM_WINDOWS
  1932. if (!VirtualFree(address, release ? 0 : size, release ? MEM_RELEASE : MEM_DECOMMIT)) {
  1933. assert(!"Failed to unmap virtual memory block");
  1934. }
  1935. #else
  1936. if (release) {
  1937. if (munmap(address, release)) {
  1938. assert("Failed to unmap virtual memory block" == 0);
  1939. }
  1940. }
  1941. else {
  1942. #if defined(POSIX_MADV_FREE)
  1943. if (posix_madvise(address, size, POSIX_MADV_FREE))
  1944. #endif
  1945. #if defined(POSIX_MADV_DONTNEED)
  1946. if (posix_madvise(address, size, POSIX_MADV_DONTNEED)) {
  1947. assert("Failed to madvise virtual memory block as free" == 0);
  1948. }
  1949. #endif
  1950. }
  1951. #endif
  1952. #endif
  1953. #if ENABLE_STATISTICS
  1954. if (release)
  1955. atomic_add32(&_mapped_pages_os, -(int32_t)(release >> _memory_page_size_shift));
  1956. #endif
  1957. }
  1958. // Extern interface
  1959. TRACY_API RPMALLOC_ALLOCATOR void*
  1960. rpmalloc(size_t size) {
  1961. #if ENABLE_VALIDATE_ARGS
  1962. if (size >= MAX_ALLOC_SIZE) {
  1963. errno = EINVAL;
  1964. return 0;
  1965. }
  1966. #endif
  1967. heap_t* heap = get_thread_heap();
  1968. return _memory_allocate(heap, size);
  1969. }
  1970. TRACY_API void
  1971. rpfree(void* ptr) {
  1972. _memory_deallocate(ptr);
  1973. }
  1974. extern inline RPMALLOC_ALLOCATOR void*
  1975. rpcalloc(size_t num, size_t size) {
  1976. size_t total;
  1977. #if ENABLE_VALIDATE_ARGS
  1978. #if PLATFORM_WINDOWS
  1979. int err = SizeTMult(num, size, &total);
  1980. if ((err != S_OK) || (total >= MAX_ALLOC_SIZE)) {
  1981. errno = EINVAL;
  1982. return 0;
  1983. }
  1984. #else
  1985. int err = __builtin_umull_overflow(num, size, &total);
  1986. if (err || (total >= MAX_ALLOC_SIZE)) {
  1987. errno = EINVAL;
  1988. return 0;
  1989. }
  1990. #endif
  1991. #else
  1992. total = num * size;
  1993. #endif
  1994. heap_t* heap = get_thread_heap();
  1995. void* block = _memory_allocate(heap, total);
  1996. memset(block, 0, total);
  1997. return block;
  1998. }
  1999. TRACY_API RPMALLOC_ALLOCATOR void*
  2000. rprealloc(void* ptr, size_t size) {
  2001. #if ENABLE_VALIDATE_ARGS
  2002. if (size >= MAX_ALLOC_SIZE) {
  2003. errno = EINVAL;
  2004. return ptr;
  2005. }
  2006. #endif
  2007. return _memory_reallocate(ptr, size, 0, 0);
  2008. }
  2009. extern RPMALLOC_ALLOCATOR void*
  2010. rpaligned_realloc(void* ptr, size_t alignment, size_t size, size_t oldsize,
  2011. unsigned int flags) {
  2012. #if ENABLE_VALIDATE_ARGS
  2013. if ((size + alignment < size) || (alignment > _memory_page_size)) {
  2014. errno = EINVAL;
  2015. return 0;
  2016. }
  2017. #endif
  2018. void* block;
  2019. if (alignment > 32) {
  2020. size_t usablesize = _memory_usable_size(ptr);
  2021. if ((usablesize >= size) && (size >= (usablesize / 2)) && !((uintptr_t)ptr & (alignment - 1)))
  2022. return ptr;
  2023. block = rpaligned_alloc(alignment, size);
  2024. if (ptr) {
  2025. if (!oldsize)
  2026. oldsize = usablesize;
  2027. if (!(flags & RPMALLOC_NO_PRESERVE))
  2028. memcpy(block, ptr, oldsize < size ? oldsize : size);
  2029. rpfree(ptr);
  2030. }
  2031. //Mark as having aligned blocks
  2032. span_t* span = (span_t*)((uintptr_t)block & _memory_span_mask);
  2033. span->flags |= SPAN_FLAG_ALIGNED_BLOCKS;
  2034. } else {
  2035. block = _memory_reallocate(ptr, size, oldsize, flags);
  2036. }
  2037. return block;
  2038. }
  2039. extern RPMALLOC_ALLOCATOR void*
  2040. rpaligned_alloc(size_t alignment, size_t size) {
  2041. if (alignment <= 16)
  2042. return rpmalloc(size);
  2043. #if ENABLE_VALIDATE_ARGS
  2044. if ((size + alignment) < size) {
  2045. errno = EINVAL;
  2046. return 0;
  2047. }
  2048. if (alignment & (alignment - 1)) {
  2049. errno = EINVAL;
  2050. return 0;
  2051. }
  2052. #endif
  2053. void* ptr = 0;
  2054. size_t align_mask = alignment - 1;
  2055. if (alignment < _memory_page_size) {
  2056. ptr = rpmalloc(size + alignment);
  2057. if ((uintptr_t)ptr & align_mask)
  2058. ptr = (void*)(((uintptr_t)ptr & ~(uintptr_t)align_mask) + alignment);
  2059. //Mark as having aligned blocks
  2060. span_t* span = (span_t*)((uintptr_t)ptr & _memory_span_mask);
  2061. span->flags |= SPAN_FLAG_ALIGNED_BLOCKS;
  2062. return ptr;
  2063. }
  2064. // Fallback to mapping new pages for this request. Since pointers passed
  2065. // to rpfree must be able to reach the start of the span by bitmasking of
  2066. // the address with the span size, the returned aligned pointer from this
  2067. // function must be with a span size of the start of the mapped area.
  2068. // In worst case this requires us to loop and map pages until we get a
  2069. // suitable memory address. It also means we can never align to span size
  2070. // or greater, since the span header will push alignment more than one
  2071. // span size away from span start (thus causing pointer mask to give us
  2072. // an invalid span start on free)
  2073. if (alignment & align_mask) {
  2074. errno = EINVAL;
  2075. return 0;
  2076. }
  2077. if (alignment >= _memory_span_size) {
  2078. errno = EINVAL;
  2079. return 0;
  2080. }
  2081. size_t extra_pages = alignment / _memory_page_size;
  2082. // Since each span has a header, we will at least need one extra memory page
  2083. size_t num_pages = 1 + (size / _memory_page_size);
  2084. if (size & (_memory_page_size - 1))
  2085. ++num_pages;
  2086. if (extra_pages > num_pages)
  2087. num_pages = 1 + extra_pages;
  2088. size_t original_pages = num_pages;
  2089. size_t limit_pages = (_memory_span_size / _memory_page_size) * 2;
  2090. if (limit_pages < (original_pages * 2))
  2091. limit_pages = original_pages * 2;
  2092. size_t mapped_size, align_offset;
  2093. span_t* span;
  2094. retry:
  2095. align_offset = 0;
  2096. mapped_size = num_pages * _memory_page_size;
  2097. span = (span_t*)_memory_map(mapped_size, &align_offset);
  2098. if (!span) {
  2099. errno = ENOMEM;
  2100. return 0;
  2101. }
  2102. ptr = pointer_offset(span, SPAN_HEADER_SIZE);
  2103. if ((uintptr_t)ptr & align_mask)
  2104. ptr = (void*)(((uintptr_t)ptr & ~(uintptr_t)align_mask) + alignment);
  2105. if (((size_t)pointer_diff(ptr, span) >= _memory_span_size) ||
  2106. (pointer_offset(ptr, size) > pointer_offset(span, mapped_size)) ||
  2107. (((uintptr_t)ptr & _memory_span_mask) != (uintptr_t)span)) {
  2108. _memory_unmap(span, mapped_size, align_offset, mapped_size);
  2109. ++num_pages;
  2110. if (num_pages > limit_pages) {
  2111. errno = EINVAL;
  2112. return 0;
  2113. }
  2114. goto retry;
  2115. }
  2116. //Store page count in span_count
  2117. span->size_class = (uint32_t)-1;
  2118. span->span_count = (uint32_t)num_pages;
  2119. span->align_offset = (uint32_t)align_offset;
  2120. _memory_statistics_add_peak(&_huge_pages_current, num_pages, _huge_pages_peak);
  2121. return ptr;
  2122. }
  2123. extern inline RPMALLOC_ALLOCATOR void*
  2124. rpmemalign(size_t alignment, size_t size) {
  2125. return rpaligned_alloc(alignment, size);
  2126. }
  2127. extern inline int
  2128. rpposix_memalign(void **memptr, size_t alignment, size_t size) {
  2129. if (memptr)
  2130. *memptr = rpaligned_alloc(alignment, size);
  2131. else
  2132. return EINVAL;
  2133. return *memptr ? 0 : ENOMEM;
  2134. }
  2135. extern inline size_t
  2136. rpmalloc_usable_size(void* ptr) {
  2137. return (ptr ? _memory_usable_size(ptr) : 0);
  2138. }
  2139. extern inline void
  2140. rpmalloc_thread_collect(void) {
  2141. }
  2142. void
  2143. rpmalloc_thread_statistics(rpmalloc_thread_statistics_t* stats) {
  2144. memset(stats, 0, sizeof(rpmalloc_thread_statistics_t));
  2145. heap_t* heap = get_thread_heap_raw();
  2146. if (!heap)
  2147. return;
  2148. for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) {
  2149. size_class_t* size_class = _memory_size_class + iclass;
  2150. heap_class_t* heap_class = heap->span_class + iclass;
  2151. span_t* span = heap_class->partial_span;
  2152. while (span) {
  2153. atomic_thread_fence_acquire();
  2154. size_t free_count = span->list_size;
  2155. if (span->state == SPAN_STATE_PARTIAL)
  2156. free_count += (size_class->block_count - span->used_count);
  2157. stats->sizecache = free_count * size_class->block_size;
  2158. span = span->next;
  2159. }
  2160. }
  2161. #if ENABLE_THREAD_CACHE
  2162. for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
  2163. if (heap->span_cache[iclass])
  2164. stats->spancache = (size_t)heap->span_cache[iclass]->list_size * (iclass + 1) * _memory_span_size;
  2165. span_t* deferred_list = !iclass ? (span_t*)atomic_load_ptr(&heap->span_cache_deferred) : 0;
  2166. //TODO: Incorrect, for deferred lists the size is NOT stored in list_size
  2167. if (deferred_list)
  2168. stats->spancache = (size_t)deferred_list->list_size * (iclass + 1) * _memory_span_size;
  2169. }
  2170. #endif
  2171. #if ENABLE_STATISTICS
  2172. stats->thread_to_global = heap->thread_to_global;
  2173. stats->global_to_thread = heap->global_to_thread;
  2174. for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
  2175. stats->span_use[iclass].current = (size_t)atomic_load32(&heap->span_use[iclass].current);
  2176. stats->span_use[iclass].peak = (size_t)heap->span_use[iclass].high;
  2177. stats->span_use[iclass].to_global = (size_t)heap->span_use[iclass].spans_to_global;
  2178. stats->span_use[iclass].from_global = (size_t)heap->span_use[iclass].spans_from_global;
  2179. stats->span_use[iclass].to_cache = (size_t)heap->span_use[iclass].spans_to_cache;
  2180. stats->span_use[iclass].from_cache = (size_t)heap->span_use[iclass].spans_from_cache;
  2181. stats->span_use[iclass].to_reserved = (size_t)heap->span_use[iclass].spans_to_reserved;
  2182. stats->span_use[iclass].from_reserved = (size_t)heap->span_use[iclass].spans_from_reserved;
  2183. stats->span_use[iclass].map_calls = (size_t)heap->span_use[iclass].spans_map_calls;
  2184. }
  2185. for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) {
  2186. stats->size_use[iclass].alloc_current = (size_t)atomic_load32(&heap->size_class_use[iclass].alloc_current);
  2187. stats->size_use[iclass].alloc_peak = (size_t)heap->size_class_use[iclass].alloc_peak;
  2188. stats->size_use[iclass].alloc_total = (size_t)heap->size_class_use[iclass].alloc_total;
  2189. stats->size_use[iclass].free_total = (size_t)atomic_load32(&heap->size_class_use[iclass].free_total);
  2190. stats->size_use[iclass].spans_to_cache = (size_t)heap->size_class_use[iclass].spans_to_cache;
  2191. stats->size_use[iclass].spans_from_cache = (size_t)heap->size_class_use[iclass].spans_from_cache;
  2192. stats->size_use[iclass].spans_from_reserved = (size_t)heap->size_class_use[iclass].spans_from_reserved;
  2193. stats->size_use[iclass].map_calls = (size_t)heap->size_class_use[iclass].spans_map_calls;
  2194. }
  2195. #endif
  2196. }
  2197. void
  2198. rpmalloc_global_statistics(rpmalloc_global_statistics_t* stats) {
  2199. memset(stats, 0, sizeof(rpmalloc_global_statistics_t));
  2200. #if ENABLE_STATISTICS
  2201. stats->mapped = (size_t)atomic_load32(&_mapped_pages) * _memory_page_size;
  2202. stats->mapped_peak = (size_t)_mapped_pages_peak * _memory_page_size;
  2203. stats->mapped_total = (size_t)atomic_load32(&_mapped_total) * _memory_page_size;
  2204. stats->unmapped_total = (size_t)atomic_load32(&_unmapped_total) * _memory_page_size;
  2205. stats->huge_alloc = (size_t)atomic_load32(&_huge_pages_current) * _memory_page_size;
  2206. stats->huge_alloc_peak = (size_t)_huge_pages_peak * _memory_page_size;
  2207. #endif
  2208. #if ENABLE_GLOBAL_CACHE
  2209. for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
  2210. stats->cached += (size_t)atomic_load32(&_memory_span_cache[iclass].size) * (iclass + 1) * _memory_span_size;
  2211. }
  2212. #endif
  2213. }
  2214. void
  2215. rpmalloc_dump_statistics(void* file) {
  2216. #if ENABLE_STATISTICS
  2217. //If you hit this assert, you still have active threads or forgot to finalize some thread(s)
  2218. assert(atomic_load32(&_memory_active_heaps) == 0);
  2219. for (size_t list_idx = 0; list_idx < HEAP_ARRAY_SIZE; ++list_idx) {
  2220. heap_t* heap = atomic_load_ptr(&_memory_heaps[list_idx]);
  2221. while (heap) {
  2222. fprintf(file, "Heap %d stats:\n", heap->id);
  2223. fprintf(file, "Class CurAlloc PeakAlloc TotAlloc TotFree BlkSize BlkCount SpansCur SpansPeak PeakAllocMiB ToCacheMiB FromCacheMiB FromReserveMiB MmapCalls\n");
  2224. for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) {
  2225. if (!heap->size_class_use[iclass].alloc_total) {
  2226. assert(!atomic_load32(&heap->size_class_use[iclass].free_total));
  2227. assert(!heap->size_class_use[iclass].spans_map_calls);
  2228. continue;
  2229. }
  2230. fprintf(file, "%3u: %10u %10u %10u %10u %8u %8u %8d %9d %13zu %11zu %12zu %14zu %9u\n", (uint32_t)iclass,
  2231. atomic_load32(&heap->size_class_use[iclass].alloc_current),
  2232. heap->size_class_use[iclass].alloc_peak,
  2233. heap->size_class_use[iclass].alloc_total,
  2234. atomic_load32(&heap->size_class_use[iclass].free_total),
  2235. _memory_size_class[iclass].block_size,
  2236. _memory_size_class[iclass].block_count,
  2237. heap->size_class_use[iclass].spans_current,
  2238. heap->size_class_use[iclass].spans_peak,
  2239. ((size_t)heap->size_class_use[iclass].alloc_peak * (size_t)_memory_size_class[iclass].block_size) / (size_t)(1024 * 1024),
  2240. ((size_t)heap->size_class_use[iclass].spans_to_cache * _memory_span_size) / (size_t)(1024 * 1024),
  2241. ((size_t)heap->size_class_use[iclass].spans_from_cache * _memory_span_size) / (size_t)(1024 * 1024),
  2242. ((size_t)heap->size_class_use[iclass].spans_from_reserved * _memory_span_size) / (size_t)(1024 * 1024),
  2243. heap->size_class_use[iclass].spans_map_calls);
  2244. }
  2245. fprintf(file, "Spans Current Peak PeakMiB Cached ToCacheMiB FromCacheMiB ToReserveMiB FromReserveMiB ToGlobalMiB FromGlobalMiB MmapCalls\n");
  2246. for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
  2247. if (!heap->span_use[iclass].high && !heap->span_use[iclass].spans_map_calls)
  2248. continue;
  2249. fprintf(file, "%4u: %8d %8u %8zu %7u %11zu %12zu %12zu %14zu %11zu %13zu %10u\n", (uint32_t)(iclass + 1),
  2250. atomic_load32(&heap->span_use[iclass].current),
  2251. heap->span_use[iclass].high,
  2252. ((size_t)heap->span_use[iclass].high * (size_t)_memory_span_size * (iclass + 1)) / (size_t)(1024 * 1024),
  2253. heap->span_cache[iclass] ? heap->span_cache[iclass]->list_size : 0,
  2254. ((size_t)heap->span_use[iclass].spans_to_cache * (iclass + 1) * _memory_span_size) / (size_t)(1024 * 1024),
  2255. ((size_t)heap->span_use[iclass].spans_from_cache * (iclass + 1) * _memory_span_size) / (size_t)(1024 * 1024),
  2256. ((size_t)heap->span_use[iclass].spans_to_reserved * (iclass + 1) * _memory_span_size) / (size_t)(1024 * 1024),
  2257. ((size_t)heap->span_use[iclass].spans_from_reserved * (iclass + 1) * _memory_span_size) / (size_t)(1024 * 1024),
  2258. ((size_t)heap->span_use[iclass].spans_to_global * (size_t)_memory_span_size * (iclass + 1)) / (size_t)(1024 * 1024),
  2259. ((size_t)heap->span_use[iclass].spans_from_global * (size_t)_memory_span_size * (iclass + 1)) / (size_t)(1024 * 1024),
  2260. heap->span_use[iclass].spans_map_calls);
  2261. }
  2262. fprintf(file, "ThreadToGlobalMiB GlobalToThreadMiB\n");
  2263. fprintf(file, "%17zu %17zu\n", (size_t)heap->thread_to_global / (size_t)(1024 * 1024), (size_t)heap->global_to_thread / (size_t)(1024 * 1024));
  2264. heap = heap->next_heap;
  2265. }
  2266. }
  2267. fprintf(file, "Global stats:\n");
  2268. size_t huge_current = (size_t)atomic_load32(&_huge_pages_current) * _memory_page_size;
  2269. size_t huge_peak = (size_t)_huge_pages_peak * _memory_page_size;
  2270. fprintf(file, "HugeCurrentMiB HugePeakMiB\n");
  2271. fprintf(file, "%14zu %11zu\n", huge_current / (size_t)(1024 * 1024), huge_peak / (size_t)(1024 * 1024));
  2272. size_t mapped = (size_t)atomic_load32(&_mapped_pages) * _memory_page_size;
  2273. size_t mapped_os = (size_t)atomic_load32(&_mapped_pages_os) * _memory_page_size;
  2274. size_t mapped_peak = (size_t)_mapped_pages_peak * _memory_page_size;
  2275. size_t mapped_total = (size_t)atomic_load32(&_mapped_total) * _memory_page_size;
  2276. size_t unmapped_total = (size_t)atomic_load32(&_unmapped_total) * _memory_page_size;
  2277. size_t reserved_total = (size_t)atomic_load32(&_reserved_spans) * _memory_span_size;
  2278. fprintf(file, "MappedMiB MappedOSMiB MappedPeakMiB MappedTotalMiB UnmappedTotalMiB ReservedTotalMiB\n");
  2279. fprintf(file, "%9zu %11zu %13zu %14zu %16zu %16zu\n",
  2280. mapped / (size_t)(1024 * 1024),
  2281. mapped_os / (size_t)(1024 * 1024),
  2282. mapped_peak / (size_t)(1024 * 1024),
  2283. mapped_total / (size_t)(1024 * 1024),
  2284. unmapped_total / (size_t)(1024 * 1024),
  2285. reserved_total / (size_t)(1024 * 1024));
  2286. fprintf(file, "\n");
  2287. #else
  2288. (void)sizeof(file);
  2289. #endif
  2290. }
  2291. }
  2292. #endif