alloc.cpp 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461
  1. // Copyright 2009-2021 Intel Corporation
  2. // SPDX-License-Identifier: Apache-2.0
  3. #include "alloc.h"
  4. #include "intrinsics.h"
  5. #include "sysinfo.h"
  6. #include "mutex.h"
  7. ////////////////////////////////////////////////////////////////////////////////
  8. /// All Platforms
  9. ////////////////////////////////////////////////////////////////////////////////
  10. namespace embree
  11. {
  12. size_t total_allocations = 0;
  13. #if defined(EMBREE_SYCL_SUPPORT)
  14. __thread sycl::context* tls_context_tutorial = nullptr;
  15. __thread sycl::device* tls_device_tutorial = nullptr;
  16. __thread sycl::context* tls_context_embree = nullptr;
  17. __thread sycl::device* tls_device_embree = nullptr;
  18. void enableUSMAllocEmbree(sycl::context* context, sycl::device* device)
  19. {
  20. //if (tls_context_embree != nullptr) throw std::runtime_error("USM allocation already enabled");
  21. //if (tls_device_embree != nullptr) throw std::runtime_error("USM allocation already enabled");
  22. if (tls_context_embree != nullptr) {
  23. abort();
  24. }
  25. if (tls_device_embree != nullptr) {
  26. abort();
  27. }
  28. tls_context_embree = context;
  29. tls_device_embree = device;
  30. }
  31. void disableUSMAllocEmbree()
  32. {
  33. //if (tls_context_embree == nullptr) throw std::runtime_error("USM allocation not enabled");
  34. //if (tls_device_embree == nullptr) throw std::runtime_error("USM allocation not enabled");
  35. if (tls_context_embree == nullptr) {
  36. abort();
  37. }
  38. if (tls_device_embree == nullptr) {
  39. abort();
  40. }
  41. tls_context_embree = nullptr;
  42. tls_device_embree = nullptr;
  43. }
  44. void enableUSMAllocTutorial(sycl::context* context, sycl::device* device)
  45. {
  46. //if (tls_context_tutorial != nullptr) throw std::runtime_error("USM allocation already enabled");
  47. //if (tls_device_tutorial != nullptr) throw std::runtime_error("USM allocation already enabled");
  48. tls_context_tutorial = context;
  49. tls_device_tutorial = device;
  50. }
  51. void disableUSMAllocTutorial()
  52. {
  53. //if (tls_context_tutorial == nullptr) throw std::runtime_error("USM allocation not enabled");
  54. //if (tls_device_tutorial == nullptr) throw std::runtime_error("USM allocation not enabled");
  55. if (tls_context_tutorial == nullptr) {
  56. abort();
  57. }
  58. if (tls_device_tutorial == nullptr) {
  59. abort();
  60. }
  61. tls_context_tutorial = nullptr;
  62. tls_device_tutorial = nullptr;
  63. }
  64. #endif
  65. void* alignedMalloc(size_t size, size_t align)
  66. {
  67. if (size == 0)
  68. return nullptr;
  69. assert((align & (align-1)) == 0);
  70. void* ptr = _mm_malloc(size,align);
  71. //if (size != 0 && ptr == nullptr)
  72. // throw std::bad_alloc();
  73. if (size != 0 && ptr == nullptr) {
  74. abort();
  75. }
  76. return ptr;
  77. }
  78. void alignedFree(void* ptr)
  79. {
  80. if (ptr)
  81. _mm_free(ptr);
  82. }
  83. #if defined(EMBREE_SYCL_SUPPORT)
  84. void* alignedSYCLMalloc(sycl::context* context, sycl::device* device, size_t size, size_t align, EmbreeUSMMode mode)
  85. {
  86. assert(context);
  87. assert(device);
  88. if (size == 0)
  89. return nullptr;
  90. assert((align & (align-1)) == 0);
  91. total_allocations++;
  92. void* ptr = nullptr;
  93. if (mode == EMBREE_USM_SHARED_DEVICE_READ_ONLY)
  94. ptr = sycl::aligned_alloc_shared(align,size,*device,*context,sycl::ext::oneapi::property::usm::device_read_only());
  95. else
  96. ptr = sycl::aligned_alloc_shared(align,size,*device,*context);
  97. //if (size != 0 && ptr == nullptr)
  98. // throw std::bad_alloc();
  99. if (size != 0 && ptr == nullptr) {
  100. abort();
  101. }
  102. return ptr;
  103. }
  104. static MutexSys g_alloc_mutex;
  105. void* alignedSYCLMalloc(size_t size, size_t align, EmbreeUSMMode mode)
  106. {
  107. if (tls_context_tutorial) return alignedSYCLMalloc(tls_context_tutorial, tls_device_tutorial, size, align, mode);
  108. if (tls_context_embree ) return alignedSYCLMalloc(tls_context_embree, tls_device_embree, size, align, mode);
  109. return nullptr;
  110. }
  111. void alignedSYCLFree(sycl::context* context, void* ptr)
  112. {
  113. assert(context);
  114. if (ptr) {
  115. sycl::free(ptr,*context);
  116. }
  117. }
  118. void alignedSYCLFree(void* ptr)
  119. {
  120. if (tls_context_tutorial) return alignedSYCLFree(tls_context_tutorial, ptr);
  121. if (tls_context_embree ) return alignedSYCLFree(tls_context_embree, ptr);
  122. }
  123. #endif
  124. void* alignedUSMMalloc(size_t size, size_t align, EmbreeUSMMode mode)
  125. {
  126. #if defined(EMBREE_SYCL_SUPPORT)
  127. if (tls_context_embree || tls_context_tutorial)
  128. return alignedSYCLMalloc(size,align,mode);
  129. else
  130. #endif
  131. return alignedMalloc(size,align);
  132. }
  133. void alignedUSMFree(void* ptr)
  134. {
  135. #if defined(EMBREE_SYCL_SUPPORT)
  136. if (tls_context_embree || tls_context_tutorial)
  137. return alignedSYCLFree(ptr);
  138. else
  139. #endif
  140. return alignedFree(ptr);
  141. }
  142. static bool huge_pages_enabled = false;
  143. static MutexSys os_init_mutex;
  144. __forceinline bool isHugePageCandidate(const size_t bytes)
  145. {
  146. if (!huge_pages_enabled)
  147. return false;
  148. /* use huge pages only when memory overhead is low */
  149. const size_t hbytes = (bytes+PAGE_SIZE_2M-1) & ~size_t(PAGE_SIZE_2M-1);
  150. return 66*(hbytes-bytes) < bytes; // at most 1.5% overhead
  151. }
  152. }
  153. ////////////////////////////////////////////////////////////////////////////////
  154. /// Windows Platform
  155. ////////////////////////////////////////////////////////////////////////////////
  156. #ifdef _WIN32
  157. #define WIN32_LEAN_AND_MEAN
  158. #include <windows.h>
  159. #include <malloc.h>
  160. namespace embree
  161. {
  162. bool win_enable_selockmemoryprivilege (bool verbose)
  163. {
  164. HANDLE hToken;
  165. if (!OpenProcessToken(GetCurrentProcess(), TOKEN_QUERY | TOKEN_ADJUST_PRIVILEGES, &hToken)) {
  166. if (verbose) std::cout << "WARNING: OpenProcessToken failed while trying to enable SeLockMemoryPrivilege: " << GetLastError() << std::endl;
  167. return false;
  168. }
  169. TOKEN_PRIVILEGES tp;
  170. tp.PrivilegeCount = 1;
  171. tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED;
  172. if (!LookupPrivilegeValueW(nullptr, L"SeLockMemoryPrivilege", &tp.Privileges[0].Luid)) {
  173. if (verbose) std::cout << "WARNING: LookupPrivilegeValue failed while trying to enable SeLockMemoryPrivilege: " << GetLastError() << std::endl;
  174. return false;
  175. }
  176. SetLastError(ERROR_SUCCESS);
  177. if (!AdjustTokenPrivileges(hToken, FALSE, &tp, sizeof(tp), nullptr, 0)) {
  178. if (verbose) std::cout << "WARNING: AdjustTokenPrivileges failed while trying to enable SeLockMemoryPrivilege" << std::endl;
  179. return false;
  180. }
  181. if (GetLastError() == ERROR_NOT_ALL_ASSIGNED) {
  182. if (verbose) std::cout << "WARNING: AdjustTokenPrivileges failed to enable SeLockMemoryPrivilege: Add SeLockMemoryPrivilege for current user and run process in elevated mode (Run as administrator)." << std::endl;
  183. return false;
  184. }
  185. return true;
  186. }
  187. bool os_init(bool hugepages, bool verbose)
  188. {
  189. Lock<MutexSys> lock(os_init_mutex);
  190. if (!hugepages) {
  191. huge_pages_enabled = false;
  192. return true;
  193. }
  194. if (GetLargePageMinimum() != PAGE_SIZE_2M) {
  195. huge_pages_enabled = false;
  196. return false;
  197. }
  198. huge_pages_enabled = true;
  199. return true;
  200. }
  201. void* os_malloc(size_t bytes, bool& hugepages)
  202. {
  203. if (bytes == 0) {
  204. hugepages = false;
  205. return nullptr;
  206. }
  207. /* try direct huge page allocation first */
  208. if (isHugePageCandidate(bytes))
  209. {
  210. int flags = MEM_COMMIT | MEM_RESERVE | MEM_LARGE_PAGES;
  211. char* ptr = (char*) VirtualAlloc(nullptr,bytes,flags,PAGE_READWRITE);
  212. if (ptr != nullptr) {
  213. hugepages = true;
  214. return ptr;
  215. }
  216. }
  217. /* fall back to 4k pages */
  218. int flags = MEM_COMMIT | MEM_RESERVE;
  219. char* ptr = (char*) VirtualAlloc(nullptr,bytes,flags,PAGE_READWRITE);
  220. //if (ptr == nullptr) throw std::bad_alloc();
  221. if (ptr == nullptr) {
  222. abort();
  223. }
  224. hugepages = false;
  225. return ptr;
  226. }
  227. size_t os_shrink(void* ptr, size_t bytesNew, size_t bytesOld, bool hugepages)
  228. {
  229. if (hugepages) // decommitting huge pages seems not to work under Windows
  230. return bytesOld;
  231. const size_t pageSize = hugepages ? PAGE_SIZE_2M : PAGE_SIZE_4K;
  232. bytesNew = (bytesNew+pageSize-1) & ~(pageSize-1);
  233. bytesOld = (bytesOld+pageSize-1) & ~(pageSize-1);
  234. if (bytesNew >= bytesOld)
  235. return bytesOld;
  236. //if (!VirtualFree((char*)ptr+bytesNew,bytesOld-bytesNew,MEM_DECOMMIT))
  237. // throw std::bad_alloc();
  238. if (!VirtualFree((char*)ptr+bytesNew,bytesOld-bytesNew,MEM_DECOMMIT)) {
  239. abort();
  240. }
  241. return bytesNew;
  242. }
  243. void os_free(void* ptr, size_t bytes, bool hugepages)
  244. {
  245. if (bytes == 0)
  246. return;
  247. //if (!VirtualFree(ptr,0,MEM_RELEASE))
  248. // throw std::bad_alloc();
  249. if (!VirtualFree(ptr,0,MEM_RELEASE)) {
  250. abort();
  251. }
  252. }
  253. void os_advise(void *ptr, size_t bytes)
  254. {
  255. }
  256. }
  257. #endif
  258. ////////////////////////////////////////////////////////////////////////////////
  259. /// Unix Platform
  260. ////////////////////////////////////////////////////////////////////////////////
  261. #if defined(__UNIX__)
  262. #include <sys/mman.h>
  263. #include <errno.h>
  264. #include <stdlib.h>
  265. #include <string.h>
  266. #include <sstream>
  267. #if defined(__MACOSX__)
  268. #include <mach/vm_statistics.h>
  269. #endif
  270. namespace embree
  271. {
  272. bool os_init(bool hugepages, bool verbose)
  273. {
  274. Lock<MutexSys> lock(os_init_mutex);
  275. if (!hugepages) {
  276. huge_pages_enabled = false;
  277. return true;
  278. }
  279. #if defined(__LINUX__)
  280. int hugepagesize = 0;
  281. std::ifstream file;
  282. file.open("/proc/meminfo",std::ios::in);
  283. if (!file.is_open()) {
  284. if (verbose) std::cout << "WARNING: Could not open /proc/meminfo. Huge page support cannot get enabled!" << std::endl;
  285. huge_pages_enabled = false;
  286. return false;
  287. }
  288. std::string line;
  289. while (getline(file,line))
  290. {
  291. std::stringstream sline(line);
  292. while (!sline.eof() && sline.peek() == ' ') sline.ignore();
  293. std::string tag; getline(sline,tag,' ');
  294. while (!sline.eof() && sline.peek() == ' ') sline.ignore();
  295. std::string val; getline(sline,val,' ');
  296. while (!sline.eof() && sline.peek() == ' ') sline.ignore();
  297. std::string unit; getline(sline,unit,' ');
  298. if (tag == "Hugepagesize:" && unit == "kB") {
  299. hugepagesize = std::stoi(val)*1024;
  300. break;
  301. }
  302. }
  303. if (hugepagesize != PAGE_SIZE_2M)
  304. {
  305. if (verbose) std::cout << "WARNING: Only 2MB huge pages supported. Huge page support cannot get enabled!" << std::endl;
  306. huge_pages_enabled = false;
  307. return false;
  308. }
  309. #endif
  310. huge_pages_enabled = true;
  311. return true;
  312. }
  313. void* os_malloc(size_t bytes, bool& hugepages)
  314. {
  315. if (bytes == 0) {
  316. hugepages = false;
  317. return nullptr;
  318. }
  319. /* try direct huge page allocation first */
  320. if (isHugePageCandidate(bytes))
  321. {
  322. #if defined(__MACOSX__)
  323. void* ptr = mmap(0, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, VM_FLAGS_SUPERPAGE_SIZE_2MB, 0);
  324. if (ptr != MAP_FAILED) {
  325. hugepages = true;
  326. return ptr;
  327. }
  328. #elif defined(MAP_HUGETLB)
  329. void* ptr = mmap(0, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON | MAP_HUGETLB, -1, 0);
  330. if (ptr != MAP_FAILED) {
  331. hugepages = true;
  332. return ptr;
  333. }
  334. #endif
  335. }
  336. /* fallback to 4k pages */
  337. void* ptr = (char*) mmap(0, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
  338. //if (ptr == MAP_FAILED) throw std::bad_alloc();
  339. if (ptr == MAP_FAILED) {
  340. abort();
  341. }
  342. hugepages = false;
  343. /* advise huge page hint for THP */
  344. os_advise(ptr,bytes);
  345. return ptr;
  346. }
  347. size_t os_shrink(void* ptr, size_t bytesNew, size_t bytesOld, bool hugepages)
  348. {
  349. const size_t pageSize = hugepages ? PAGE_SIZE_2M : PAGE_SIZE_4K;
  350. bytesNew = (bytesNew+pageSize-1) & ~(pageSize-1);
  351. bytesOld = (bytesOld+pageSize-1) & ~(pageSize-1);
  352. if (bytesNew >= bytesOld)
  353. return bytesOld;
  354. //if (munmap((char*)ptr+bytesNew,bytesOld-bytesNew) == -1)
  355. // throw std::bad_alloc();
  356. if (munmap((char*)ptr+bytesNew,bytesOld-bytesNew) == -1) {
  357. abort();
  358. }
  359. return bytesNew;
  360. }
  361. void os_free(void* ptr, size_t bytes, bool hugepages)
  362. {
  363. if (bytes == 0)
  364. return;
  365. /* for hugepages we need to also align the size */
  366. const size_t pageSize = hugepages ? PAGE_SIZE_2M : PAGE_SIZE_4K;
  367. bytes = (bytes+pageSize-1) & ~(pageSize-1);
  368. //if (munmap(ptr,bytes) == -1)
  369. // throw std::bad_alloc();
  370. if (munmap(ptr,bytes) == -1) {
  371. abort();
  372. }
  373. }
  374. /* hint for transparent huge pages (THP) */
  375. void os_advise(void* pptr, size_t bytes)
  376. {
  377. #if defined(MADV_HUGEPAGE)
  378. madvise(pptr,bytes,MADV_HUGEPAGE);
  379. #endif
  380. }
  381. }
  382. #endif