sysinfo.cpp 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676
  1. // Copyright 2009-2020 Intel Corporation
  2. // SPDX-License-Identifier: Apache-2.0
  3. #include "sysinfo.h"
  4. #include "intrinsics.h"
  5. #include "string.h"
  6. #include "ref.h"
  7. #if defined(__FREEBSD__)
  8. #include <sys/cpuset.h>
  9. #include <pthread_np.h>
  10. typedef cpuset_t cpu_set_t;
  11. #endif
  12. ////////////////////////////////////////////////////////////////////////////////
  13. /// All Platforms
  14. ////////////////////////////////////////////////////////////////////////////////
  15. namespace embree
  16. {
  17. NullTy null;
  18. std::string getPlatformName()
  19. {
  20. #if defined(__LINUX__) && defined(__ANDROID__) && defined(__aarch64__) && defined(__ARM_NEON)
  21. return "Android Linux (aarch64 / arm64)";
  22. #elif defined(__LINUX__) && defined(__ANDROID__) && defined(__X86_64__)
  23. return "Android Linux (x64)";
  24. #elif defined(__LINUX__) && defined(__ANDROID__) && (defined(_X86_) || defined(__X86__) || defined(_M_IX86))
  25. return "Android Linux (x86)";
  26. #elif defined(__LINUX__) && !defined(__X86_64__)
  27. return "Linux (32bit)";
  28. #elif defined(__LINUX__) && defined(__X86_64__)
  29. return "Linux (64bit)";
  30. #elif defined(__FREEBSD__) && !defined(__X86_64__)
  31. return "FreeBSD (32bit)";
  32. #elif defined(__FREEBSD__) && defined(__X86_64__)
  33. return "FreeBSD (64bit)";
  34. #elif defined(__CYGWIN__) && !defined(__X86_64__)
  35. return "Cygwin (32bit)";
  36. #elif defined(__CYGWIN__) && defined(__X86_64__)
  37. return "Cygwin (64bit)";
  38. #elif defined(__WIN32__) && !defined(__X86_64__)
  39. return "Windows (32bit)";
  40. #elif defined(__WIN32__) && defined(__X86_64__)
  41. return "Windows (64bit)";
  42. #elif defined(TARGET_IPHONE_SIMULATOR) && defined(__X86_64__)
  43. return "iOS Simulator (x64)";
  44. #elif defined(TARGET_OS_IPHONE) && defined(__aarch64__) && defined(__ARM_NEON)
  45. return "iOS (aarch64 / arm64)";
  46. #elif defined(__MACOSX__) && !defined(__X86_64__)
  47. return "Mac OS X (32bit)";
  48. #elif defined(__MACOSX__) && defined(__X86_64__)
  49. return "Mac OS X (64bit)";
  50. #elif defined(__UNIX__) && defined(__aarch64__)
  51. return "Unix (aarch64)";
  52. #elif defined(__UNIX__) && !defined(__X86_64__)
  53. return "Unix (32bit)";
  54. #elif defined(__UNIX__) && defined(__X86_64__)
  55. return "Unix (64bit)";
  56. #else
  57. return "Unknown";
  58. #endif
  59. }
  60. std::string getCompilerName()
  61. {
  62. #if defined(__INTEL_COMPILER)
  63. int icc_mayor = __INTEL_COMPILER / 100 % 100;
  64. int icc_minor = __INTEL_COMPILER % 100;
  65. std::string version = "Intel Compiler ";
  66. version += toString(icc_mayor);
  67. version += "." + toString(icc_minor);
  68. #if defined(__INTEL_COMPILER_UPDATE)
  69. version += "." + toString(__INTEL_COMPILER_UPDATE);
  70. #endif
  71. return version;
  72. #elif defined(__clang__)
  73. return "CLANG " __clang_version__;
  74. #elif defined (__GNUC__)
  75. return "GCC " __VERSION__;
  76. #elif defined(_MSC_VER)
  77. std::string version = toString(_MSC_FULL_VER);
  78. version.insert(4,".");
  79. version.insert(9,".");
  80. version.insert(2,".");
  81. return "Visual C++ Compiler " + version;
  82. #else
  83. return "Unknown Compiler";
  84. #endif
  85. }
  86. std::string getCPUVendor()
  87. {
  88. int cpuinfo[4];
  89. __cpuid (cpuinfo, 0);
  90. int name[4];
  91. name[0] = cpuinfo[1];
  92. name[1] = cpuinfo[3];
  93. name[2] = cpuinfo[2];
  94. name[3] = 0;
  95. return (char*)name;
  96. }
  97. CPU getCPUModel()
  98. {
  99. if (getCPUVendor() != "GenuineIntel")
  100. return CPU::UNKNOWN;
  101. int out[4];
  102. __cpuid(out, 0);
  103. if (out[0] < 1) return CPU::UNKNOWN;
  104. __cpuid(out, 1);
  105. /* please see CPUID documentation for these formulas */
  106. uint32_t family_ID = (out[0] >> 8) & 0x0F;
  107. uint32_t extended_family_ID = (out[0] >> 20) & 0xFF;
  108. uint32_t model_ID = (out[0] >> 4) & 0x0F;
  109. uint32_t extended_model_ID = (out[0] >> 16) & 0x0F;
  110. uint32_t DisplayFamily = family_ID;
  111. if (family_ID == 0x0F)
  112. DisplayFamily += extended_family_ID;
  113. uint32_t DisplayModel = model_ID;
  114. if (family_ID == 0x06 || family_ID == 0x0F)
  115. DisplayModel += extended_model_ID << 4;
  116. uint32_t DisplayFamily_DisplayModel = (DisplayFamily << 8) + (DisplayModel << 0);
  117. // Data from Intel® 64 and IA-32 Architectures, Volume 4, Chapter 2, Table 2-1 (CPUID Signature Values of DisplayFamily_DisplayModel)
  118. if (DisplayFamily_DisplayModel == 0x067D) return CPU::CORE_ICE_LAKE;
  119. if (DisplayFamily_DisplayModel == 0x067E) return CPU::CORE_ICE_LAKE;
  120. if (DisplayFamily_DisplayModel == 0x068C) return CPU::CORE_TIGER_LAKE;
  121. if (DisplayFamily_DisplayModel == 0x06A5) return CPU::CORE_COMET_LAKE;
  122. if (DisplayFamily_DisplayModel == 0x06A6) return CPU::CORE_COMET_LAKE;
  123. if (DisplayFamily_DisplayModel == 0x0666) return CPU::CORE_CANNON_LAKE;
  124. if (DisplayFamily_DisplayModel == 0x068E) return CPU::CORE_KABY_LAKE;
  125. if (DisplayFamily_DisplayModel == 0x069E) return CPU::CORE_KABY_LAKE;
  126. if (DisplayFamily_DisplayModel == 0x066A) return CPU::XEON_ICE_LAKE;
  127. if (DisplayFamily_DisplayModel == 0x066C) return CPU::XEON_ICE_LAKE;
  128. if (DisplayFamily_DisplayModel == 0x0655) return CPU::XEON_SKY_LAKE;
  129. if (DisplayFamily_DisplayModel == 0x064E) return CPU::CORE_SKY_LAKE;
  130. if (DisplayFamily_DisplayModel == 0x065E) return CPU::CORE_SKY_LAKE;
  131. if (DisplayFamily_DisplayModel == 0x0656) return CPU::XEON_BROADWELL;
  132. if (DisplayFamily_DisplayModel == 0x064F) return CPU::XEON_BROADWELL;
  133. if (DisplayFamily_DisplayModel == 0x0647) return CPU::CORE_BROADWELL;
  134. if (DisplayFamily_DisplayModel == 0x063D) return CPU::CORE_BROADWELL;
  135. if (DisplayFamily_DisplayModel == 0x063F) return CPU::XEON_HASWELL;
  136. if (DisplayFamily_DisplayModel == 0x063C) return CPU::CORE_HASWELL;
  137. if (DisplayFamily_DisplayModel == 0x0645) return CPU::CORE_HASWELL;
  138. if (DisplayFamily_DisplayModel == 0x0646) return CPU::CORE_HASWELL;
  139. if (DisplayFamily_DisplayModel == 0x063E) return CPU::XEON_IVY_BRIDGE;
  140. if (DisplayFamily_DisplayModel == 0x063A) return CPU::CORE_IVY_BRIDGE;
  141. if (DisplayFamily_DisplayModel == 0x062D) return CPU::SANDY_BRIDGE;
  142. if (DisplayFamily_DisplayModel == 0x062F) return CPU::SANDY_BRIDGE;
  143. if (DisplayFamily_DisplayModel == 0x062A) return CPU::SANDY_BRIDGE;
  144. if (DisplayFamily_DisplayModel == 0x062E) return CPU::NEHALEM;
  145. if (DisplayFamily_DisplayModel == 0x0625) return CPU::NEHALEM;
  146. if (DisplayFamily_DisplayModel == 0x062C) return CPU::NEHALEM;
  147. if (DisplayFamily_DisplayModel == 0x061E) return CPU::NEHALEM;
  148. if (DisplayFamily_DisplayModel == 0x061F) return CPU::NEHALEM;
  149. if (DisplayFamily_DisplayModel == 0x061A) return CPU::NEHALEM;
  150. if (DisplayFamily_DisplayModel == 0x061D) return CPU::NEHALEM;
  151. if (DisplayFamily_DisplayModel == 0x0617) return CPU::CORE2;
  152. if (DisplayFamily_DisplayModel == 0x060F) return CPU::CORE2;
  153. if (DisplayFamily_DisplayModel == 0x060E) return CPU::CORE1;
  154. if (DisplayFamily_DisplayModel == 0x0685) return CPU::XEON_PHI_KNIGHTS_MILL;
  155. if (DisplayFamily_DisplayModel == 0x0657) return CPU::XEON_PHI_KNIGHTS_LANDING;
  156. return CPU::UNKNOWN;
  157. }
  158. std::string stringOfCPUModel(CPU model)
  159. {
  160. switch (model) {
  161. case CPU::XEON_ICE_LAKE : return "Xeon Ice Lake";
  162. case CPU::CORE_ICE_LAKE : return "Core Ice Lake";
  163. case CPU::CORE_TIGER_LAKE : return "Core Tiger Lake";
  164. case CPU::CORE_COMET_LAKE : return "Core Comet Lake";
  165. case CPU::CORE_CANNON_LAKE : return "Core Cannon Lake";
  166. case CPU::CORE_KABY_LAKE : return "Core Kaby Lake";
  167. case CPU::XEON_SKY_LAKE : return "Xeon Sky Lake";
  168. case CPU::CORE_SKY_LAKE : return "Core Sky Lake";
  169. case CPU::XEON_PHI_KNIGHTS_MILL : return "Xeon Phi Knights Mill";
  170. case CPU::XEON_PHI_KNIGHTS_LANDING: return "Xeon Phi Knights Landing";
  171. case CPU::XEON_BROADWELL : return "Xeon Broadwell";
  172. case CPU::CORE_BROADWELL : return "Core Broadwell";
  173. case CPU::XEON_HASWELL : return "Xeon Haswell";
  174. case CPU::CORE_HASWELL : return "Core Haswell";
  175. case CPU::XEON_IVY_BRIDGE : return "Xeon Ivy Bridge";
  176. case CPU::CORE_IVY_BRIDGE : return "Core Ivy Bridge";
  177. case CPU::SANDY_BRIDGE : return "Sandy Bridge";
  178. case CPU::NEHALEM : return "Nehalem";
  179. case CPU::CORE2 : return "Core2";
  180. case CPU::CORE1 : return "Core";
  181. case CPU::ARM : return "Arm";
  182. case CPU::UNKNOWN : return "Unknown CPU";
  183. }
  184. return "Unknown CPU (error)";
  185. }
  186. #if !defined(__ARM_NEON)
  187. /* constants to access destination registers of CPUID instruction */
  188. static const int EAX = 0;
  189. static const int EBX = 1;
  190. static const int ECX = 2;
  191. static const int EDX = 3;
  192. /* cpuid[eax=1].ecx */
  193. static const int CPU_FEATURE_BIT_SSE3 = 1 << 0;
  194. static const int CPU_FEATURE_BIT_SSSE3 = 1 << 9;
  195. static const int CPU_FEATURE_BIT_FMA3 = 1 << 12;
  196. static const int CPU_FEATURE_BIT_SSE4_1 = 1 << 19;
  197. static const int CPU_FEATURE_BIT_SSE4_2 = 1 << 20;
  198. //static const int CPU_FEATURE_BIT_MOVBE = 1 << 22;
  199. static const int CPU_FEATURE_BIT_POPCNT = 1 << 23;
  200. //static const int CPU_FEATURE_BIT_XSAVE = 1 << 26;
  201. static const int CPU_FEATURE_BIT_OXSAVE = 1 << 27;
  202. static const int CPU_FEATURE_BIT_AVX = 1 << 28;
  203. static const int CPU_FEATURE_BIT_F16C = 1 << 29;
  204. static const int CPU_FEATURE_BIT_RDRAND = 1 << 30;
  205. /* cpuid[eax=1].edx */
  206. static const int CPU_FEATURE_BIT_SSE = 1 << 25;
  207. static const int CPU_FEATURE_BIT_SSE2 = 1 << 26;
  208. /* cpuid[eax=0x80000001].ecx */
  209. static const int CPU_FEATURE_BIT_LZCNT = 1 << 5;
  210. /* cpuid[eax=7,ecx=0].ebx */
  211. static const int CPU_FEATURE_BIT_BMI1 = 1 << 3;
  212. static const int CPU_FEATURE_BIT_AVX2 = 1 << 5;
  213. static const int CPU_FEATURE_BIT_BMI2 = 1 << 8;
  214. static const int CPU_FEATURE_BIT_AVX512F = 1 << 16; // AVX512F (foundation)
  215. static const int CPU_FEATURE_BIT_AVX512DQ = 1 << 17; // AVX512DQ (doubleword and quadword instructions)
  216. static const int CPU_FEATURE_BIT_AVX512PF = 1 << 26; // AVX512PF (prefetch gather/scatter instructions)
  217. static const int CPU_FEATURE_BIT_AVX512ER = 1 << 27; // AVX512ER (exponential and reciprocal instructions)
  218. static const int CPU_FEATURE_BIT_AVX512CD = 1 << 28; // AVX512CD (conflict detection instructions)
  219. static const int CPU_FEATURE_BIT_AVX512BW = 1 << 30; // AVX512BW (byte and word instructions)
  220. static const int CPU_FEATURE_BIT_AVX512VL = 1 << 31; // AVX512VL (vector length extensions)
  221. static const int CPU_FEATURE_BIT_AVX512IFMA = 1 << 21; // AVX512IFMA (integer fused multiple-add instructions)
  222. /* cpuid[eax=7,ecx=0].ecx */
  223. static const int CPU_FEATURE_BIT_AVX512VBMI = 1 << 1; // AVX512VBMI (vector bit manipulation instructions)
  224. #endif
  225. #if !defined(__ARM_NEON)
  226. __noinline int64_t get_xcr0()
  227. {
  228. // https://github.com/opencv/opencv/blob/master/modules/core/src/system.cpp#L466
  229. #if defined (__WIN32__) && defined(_XCR_XFEATURE_ENABLED_MASK)
  230. int64_t xcr0 = 0; // int64_t is workaround for compiler bug under VS2013, Win32
  231. xcr0 = _xgetbv(0);
  232. return xcr0;
  233. #else
  234. int xcr0 = 0;
  235. __asm__ ("xgetbv" : "=a" (xcr0) : "c" (0) : "%edx" );
  236. return xcr0;
  237. #endif
  238. }
  239. #endif
  240. int getCPUFeatures()
  241. {
  242. #if defined(__ARM_NEON)
  243. int cpu_features = CPU_FEATURE_NEON|CPU_FEATURE_SSE|CPU_FEATURE_SSE2;
  244. #if defined(NEON_AVX2_EMULATION)
  245. cpu_features |= CPU_FEATURE_SSE3|CPU_FEATURE_SSSE3|CPU_FEATURE_SSE42;
  246. cpu_features |= CPU_FEATURE_XMM_ENABLED;
  247. cpu_features |= CPU_FEATURE_YMM_ENABLED;
  248. cpu_features |= CPU_FEATURE_SSE41 | CPU_FEATURE_RDRAND | CPU_FEATURE_F16C;
  249. cpu_features |= CPU_FEATURE_POPCNT;
  250. cpu_features |= CPU_FEATURE_AVX;
  251. cpu_features |= CPU_FEATURE_AVX2;
  252. cpu_features |= CPU_FEATURE_FMA3;
  253. cpu_features |= CPU_FEATURE_LZCNT;
  254. cpu_features |= CPU_FEATURE_BMI1;
  255. cpu_features |= CPU_FEATURE_BMI2;
  256. cpu_features |= CPU_FEATURE_NEON_2X;
  257. #endif
  258. return cpu_features;
  259. #else
  260. /* cache CPU features access */
  261. static int cpu_features = 0;
  262. if (cpu_features)
  263. return cpu_features;
  264. /* get number of CPUID leaves */
  265. int cpuid_leaf0[4];
  266. __cpuid(cpuid_leaf0, 0x00000000);
  267. unsigned nIds = cpuid_leaf0[EAX];
  268. /* get number of extended CPUID leaves */
  269. int cpuid_leafe[4];
  270. __cpuid(cpuid_leafe, 0x80000000);
  271. unsigned nExIds = cpuid_leafe[EAX];
  272. /* get CPUID leaves for EAX = 1,7, and 0x80000001 */
  273. int cpuid_leaf_1[4] = { 0,0,0,0 };
  274. int cpuid_leaf_7[4] = { 0,0,0,0 };
  275. int cpuid_leaf_e1[4] = { 0,0,0,0 };
  276. if (nIds >= 1) __cpuid (cpuid_leaf_1,0x00000001);
  277. #if _WIN32
  278. #if _MSC_VER && (_MSC_FULL_VER < 160040219)
  279. #else
  280. if (nIds >= 7) __cpuidex(cpuid_leaf_7,0x00000007,0);
  281. #endif
  282. #else
  283. if (nIds >= 7) __cpuid_count(cpuid_leaf_7,0x00000007,0);
  284. #endif
  285. if (nExIds >= 0x80000001) __cpuid(cpuid_leaf_e1,0x80000001);
  286. /* detect if OS saves XMM, YMM, and ZMM states */
  287. bool xmm_enabled = true;
  288. bool ymm_enabled = false;
  289. bool zmm_enabled = false;
  290. if (cpuid_leaf_1[ECX] & CPU_FEATURE_BIT_OXSAVE) {
  291. int64_t xcr0 = get_xcr0();
  292. xmm_enabled = ((xcr0 & 0x02) == 0x02); /* checks if xmm are enabled in XCR0 */
  293. ymm_enabled = xmm_enabled && ((xcr0 & 0x04) == 0x04); /* checks if ymm state are enabled in XCR0 */
  294. zmm_enabled = ymm_enabled && ((xcr0 & 0xE0) == 0xE0); /* checks if OPMASK state, upper 256-bit of ZMM0-ZMM15 and ZMM16-ZMM31 state are enabled in XCR0 */
  295. }
  296. if (xmm_enabled) cpu_features |= CPU_FEATURE_XMM_ENABLED;
  297. if (ymm_enabled) cpu_features |= CPU_FEATURE_YMM_ENABLED;
  298. if (zmm_enabled) cpu_features |= CPU_FEATURE_ZMM_ENABLED;
  299. if (cpuid_leaf_1[EDX] & CPU_FEATURE_BIT_SSE ) cpu_features |= CPU_FEATURE_SSE;
  300. if (cpuid_leaf_1[EDX] & CPU_FEATURE_BIT_SSE2 ) cpu_features |= CPU_FEATURE_SSE2;
  301. if (cpuid_leaf_1[ECX] & CPU_FEATURE_BIT_SSE3 ) cpu_features |= CPU_FEATURE_SSE3;
  302. if (cpuid_leaf_1[ECX] & CPU_FEATURE_BIT_SSSE3 ) cpu_features |= CPU_FEATURE_SSSE3;
  303. if (cpuid_leaf_1[ECX] & CPU_FEATURE_BIT_SSE4_1) cpu_features |= CPU_FEATURE_SSE41;
  304. if (cpuid_leaf_1[ECX] & CPU_FEATURE_BIT_SSE4_2) cpu_features |= CPU_FEATURE_SSE42;
  305. if (cpuid_leaf_1[ECX] & CPU_FEATURE_BIT_POPCNT) cpu_features |= CPU_FEATURE_POPCNT;
  306. if (cpuid_leaf_1[ECX] & CPU_FEATURE_BIT_AVX ) cpu_features |= CPU_FEATURE_AVX;
  307. if (cpuid_leaf_1[ECX] & CPU_FEATURE_BIT_F16C ) cpu_features |= CPU_FEATURE_F16C;
  308. if (cpuid_leaf_1[ECX] & CPU_FEATURE_BIT_RDRAND) cpu_features |= CPU_FEATURE_RDRAND;
  309. if (cpuid_leaf_7[EBX] & CPU_FEATURE_BIT_AVX2 ) cpu_features |= CPU_FEATURE_AVX2;
  310. if (cpuid_leaf_1[ECX] & CPU_FEATURE_BIT_FMA3 ) cpu_features |= CPU_FEATURE_FMA3;
  311. if (cpuid_leaf_e1[ECX] & CPU_FEATURE_BIT_LZCNT) cpu_features |= CPU_FEATURE_LZCNT;
  312. if (cpuid_leaf_7 [EBX] & CPU_FEATURE_BIT_BMI1 ) cpu_features |= CPU_FEATURE_BMI1;
  313. if (cpuid_leaf_7 [EBX] & CPU_FEATURE_BIT_BMI2 ) cpu_features |= CPU_FEATURE_BMI2;
  314. if (cpuid_leaf_7[EBX] & CPU_FEATURE_BIT_AVX512F ) cpu_features |= CPU_FEATURE_AVX512F;
  315. if (cpuid_leaf_7[EBX] & CPU_FEATURE_BIT_AVX512DQ ) cpu_features |= CPU_FEATURE_AVX512DQ;
  316. if (cpuid_leaf_7[EBX] & CPU_FEATURE_BIT_AVX512PF ) cpu_features |= CPU_FEATURE_AVX512PF;
  317. if (cpuid_leaf_7[EBX] & CPU_FEATURE_BIT_AVX512ER ) cpu_features |= CPU_FEATURE_AVX512ER;
  318. if (cpuid_leaf_7[EBX] & CPU_FEATURE_BIT_AVX512CD ) cpu_features |= CPU_FEATURE_AVX512CD;
  319. if (cpuid_leaf_7[EBX] & CPU_FEATURE_BIT_AVX512BW ) cpu_features |= CPU_FEATURE_AVX512BW;
  320. if (cpuid_leaf_7[EBX] & CPU_FEATURE_BIT_AVX512IFMA) cpu_features |= CPU_FEATURE_AVX512IFMA;
  321. if (cpuid_leaf_7[EBX] & CPU_FEATURE_BIT_AVX512VL ) cpu_features |= CPU_FEATURE_AVX512VL;
  322. if (cpuid_leaf_7[ECX] & CPU_FEATURE_BIT_AVX512VBMI) cpu_features |= CPU_FEATURE_AVX512VBMI;
  323. return cpu_features;
  324. #endif
  325. }
  326. std::string stringOfCPUFeatures(int features)
  327. {
  328. std::string str;
  329. if (features & CPU_FEATURE_XMM_ENABLED) str += "XMM ";
  330. if (features & CPU_FEATURE_YMM_ENABLED) str += "YMM ";
  331. if (features & CPU_FEATURE_ZMM_ENABLED) str += "ZMM ";
  332. if (features & CPU_FEATURE_SSE ) str += "SSE ";
  333. if (features & CPU_FEATURE_SSE2 ) str += "SSE2 ";
  334. if (features & CPU_FEATURE_SSE3 ) str += "SSE3 ";
  335. if (features & CPU_FEATURE_SSSE3 ) str += "SSSE3 ";
  336. if (features & CPU_FEATURE_SSE41 ) str += "SSE4.1 ";
  337. if (features & CPU_FEATURE_SSE42 ) str += "SSE4.2 ";
  338. if (features & CPU_FEATURE_POPCNT) str += "POPCNT ";
  339. if (features & CPU_FEATURE_AVX ) str += "AVX ";
  340. if (features & CPU_FEATURE_F16C ) str += "F16C ";
  341. if (features & CPU_FEATURE_RDRAND) str += "RDRAND ";
  342. if (features & CPU_FEATURE_AVX2 ) str += "AVX2 ";
  343. if (features & CPU_FEATURE_FMA3 ) str += "FMA3 ";
  344. if (features & CPU_FEATURE_LZCNT ) str += "LZCNT ";
  345. if (features & CPU_FEATURE_BMI1 ) str += "BMI1 ";
  346. if (features & CPU_FEATURE_BMI2 ) str += "BMI2 ";
  347. if (features & CPU_FEATURE_AVX512F) str += "AVX512F ";
  348. if (features & CPU_FEATURE_AVX512DQ) str += "AVX512DQ ";
  349. if (features & CPU_FEATURE_AVX512PF) str += "AVX512PF ";
  350. if (features & CPU_FEATURE_AVX512ER) str += "AVX512ER ";
  351. if (features & CPU_FEATURE_AVX512CD) str += "AVX512CD ";
  352. if (features & CPU_FEATURE_AVX512BW) str += "AVX512BW ";
  353. if (features & CPU_FEATURE_AVX512VL) str += "AVX512VL ";
  354. if (features & CPU_FEATURE_AVX512IFMA) str += "AVX512IFMA ";
  355. if (features & CPU_FEATURE_AVX512VBMI) str += "AVX512VBMI ";
  356. if (features & CPU_FEATURE_NEON) str += "NEON ";
  357. if (features & CPU_FEATURE_NEON_2X) str += "2xNEON ";
  358. return str;
  359. }
  360. std::string stringOfISA (int isa)
  361. {
  362. if (isa == SSE) return "SSE";
  363. if (isa == SSE2) return "SSE2";
  364. if (isa == SSE3) return "SSE3";
  365. if (isa == SSSE3) return "SSSE3";
  366. if (isa == SSE41) return "SSE4.1";
  367. if (isa == SSE42) return "SSE4.2";
  368. if (isa == AVX) return "AVX";
  369. if (isa == AVX2) return "AVX2";
  370. if (isa == AVX512KNL) return "AVX512KNL";
  371. if (isa == AVX512SKX) return "AVX512SKX";
  372. if (isa == NEON) return "NEON";
  373. if (isa == NEON_2X) return "2xNEON";
  374. return "UNKNOWN";
  375. }
  376. bool hasISA(int features, int isa) {
  377. return (features & isa) == isa;
  378. }
  379. std::string supportedTargetList (int features)
  380. {
  381. std::string v;
  382. if (hasISA(features,SSE)) v += "SSE ";
  383. if (hasISA(features,SSE2)) v += "SSE2 ";
  384. if (hasISA(features,SSE3)) v += "SSE3 ";
  385. if (hasISA(features,SSSE3)) v += "SSSE3 ";
  386. if (hasISA(features,SSE41)) v += "SSE4.1 ";
  387. if (hasISA(features,SSE42)) v += "SSE4.2 ";
  388. if (hasISA(features,AVX)) v += "AVX ";
  389. if (hasISA(features,AVXI)) v += "AVXI ";
  390. if (hasISA(features,AVX2)) v += "AVX2 ";
  391. if (hasISA(features,AVX512KNL)) v += "AVX512KNL ";
  392. if (hasISA(features,AVX512SKX)) v += "AVX512SKX ";
  393. if (hasISA(features,NEON)) v += "NEON ";
  394. if (hasISA(features,NEON_2X)) v += "2xNEON ";
  395. return v;
  396. }
  397. }
  398. ////////////////////////////////////////////////////////////////////////////////
  399. /// Windows Platform
  400. ////////////////////////////////////////////////////////////////////////////////
  401. #if defined(__WIN32__)
  402. #define WIN32_LEAN_AND_MEAN
  403. #include <windows.h>
  404. #include <psapi.h>
  405. namespace embree
  406. {
  407. std::string getExecutableFileName() {
  408. char filename[1024];
  409. if (!GetModuleFileName(nullptr, filename, sizeof(filename)))
  410. return std::string();
  411. return std::string(filename);
  412. }
  413. unsigned int getNumberOfLogicalThreads()
  414. {
  415. static int nThreads = -1;
  416. if (nThreads != -1) return nThreads;
  417. typedef WORD (WINAPI *GetActiveProcessorGroupCountFunc)();
  418. typedef DWORD (WINAPI *GetActiveProcessorCountFunc)(WORD);
  419. HMODULE hlib = LoadLibrary("Kernel32");
  420. GetActiveProcessorGroupCountFunc pGetActiveProcessorGroupCount = (GetActiveProcessorGroupCountFunc)GetProcAddress(hlib, "GetActiveProcessorGroupCount");
  421. GetActiveProcessorCountFunc pGetActiveProcessorCount = (GetActiveProcessorCountFunc) GetProcAddress(hlib, "GetActiveProcessorCount");
  422. if (pGetActiveProcessorGroupCount && pGetActiveProcessorCount)
  423. {
  424. int groups = pGetActiveProcessorGroupCount();
  425. int totalProcessors = 0;
  426. for (int i = 0; i < groups; i++)
  427. totalProcessors += pGetActiveProcessorCount(i);
  428. nThreads = totalProcessors;
  429. }
  430. else
  431. {
  432. SYSTEM_INFO sysinfo;
  433. GetSystemInfo(&sysinfo);
  434. nThreads = sysinfo.dwNumberOfProcessors;
  435. }
  436. assert(nThreads);
  437. return nThreads;
  438. }
  439. int getTerminalWidth()
  440. {
  441. HANDLE handle = GetStdHandle(STD_OUTPUT_HANDLE);
  442. if (handle == INVALID_HANDLE_VALUE) return 80;
  443. CONSOLE_SCREEN_BUFFER_INFO info;
  444. memset(&info,0,sizeof(info));
  445. GetConsoleScreenBufferInfo(handle, &info);
  446. return info.dwSize.X;
  447. }
  448. double getSeconds()
  449. {
  450. LARGE_INTEGER freq, val;
  451. QueryPerformanceFrequency(&freq);
  452. QueryPerformanceCounter(&val);
  453. return (double)val.QuadPart / (double)freq.QuadPart;
  454. }
  455. void sleepSeconds(double t) {
  456. Sleep(DWORD(1000.0*t));
  457. }
  458. size_t getVirtualMemoryBytes()
  459. {
  460. PROCESS_MEMORY_COUNTERS info;
  461. GetProcessMemoryInfo( GetCurrentProcess( ), &info, sizeof(info) );
  462. return (size_t)info.QuotaPeakPagedPoolUsage;
  463. }
  464. size_t getResidentMemoryBytes()
  465. {
  466. PROCESS_MEMORY_COUNTERS info;
  467. GetProcessMemoryInfo( GetCurrentProcess( ), &info, sizeof(info) );
  468. return (size_t)info.WorkingSetSize;
  469. }
  470. }
  471. #endif
  472. ////////////////////////////////////////////////////////////////////////////////
  473. /// Linux Platform
  474. ////////////////////////////////////////////////////////////////////////////////
  475. #if defined(__LINUX__)
  476. #include <stdio.h>
  477. #include <unistd.h>
  478. namespace embree
  479. {
  480. std::string getExecutableFileName()
  481. {
  482. std::string pid = "/proc/" + toString(getpid()) + "/exe";
  483. char buf[4096];
  484. memset(buf,0,sizeof(buf));
  485. if (readlink(pid.c_str(), buf, sizeof(buf)-1) == -1)
  486. return std::string();
  487. return std::string(buf);
  488. }
  489. size_t getVirtualMemoryBytes()
  490. {
  491. size_t virt, resident, shared;
  492. std::ifstream buffer("/proc/self/statm");
  493. buffer >> virt >> resident >> shared;
  494. return virt*sysconf(_SC_PAGE_SIZE);
  495. }
  496. size_t getResidentMemoryBytes()
  497. {
  498. size_t virt, resident, shared;
  499. std::ifstream buffer("/proc/self/statm");
  500. buffer >> virt >> resident >> shared;
  501. return resident*sysconf(_SC_PAGE_SIZE);
  502. }
  503. }
  504. #endif
  505. ////////////////////////////////////////////////////////////////////////////////
  506. /// FreeBSD Platform
  507. ////////////////////////////////////////////////////////////////////////////////
  508. #if defined (__FreeBSD__)
  509. #include <sys/sysctl.h>
  510. namespace embree
  511. {
  512. std::string getExecutableFileName()
  513. {
  514. const int mib[4] = { CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, -1 };
  515. char buf[4096];
  516. memset(buf,0,sizeof(buf));
  517. size_t len = sizeof(buf)-1;
  518. if (sysctl(mib, 4, buf, &len, 0x0, 0) == -1)
  519. return std::string();
  520. return std::string(buf);
  521. }
  522. size_t getVirtualMemoryBytes() {
  523. return 0;
  524. }
  525. size_t getResidentMemoryBytes() {
  526. return 0;
  527. }
  528. }
  529. #endif
  530. ////////////////////////////////////////////////////////////////////////////////
  531. /// Mac OS X Platform
  532. ////////////////////////////////////////////////////////////////////////////////
  533. #if defined(__MACOSX__)
  534. #include <mach-o/dyld.h>
  535. namespace embree
  536. {
  537. std::string getExecutableFileName()
  538. {
  539. char buf[4096];
  540. uint32_t size = sizeof(buf);
  541. if (_NSGetExecutablePath(buf, &size) != 0)
  542. return std::string();
  543. return std::string(buf);
  544. }
  545. size_t getVirtualMemoryBytes() {
  546. return 0;
  547. }
  548. size_t getResidentMemoryBytes() {
  549. return 0;
  550. }
  551. }
  552. #endif
  553. ////////////////////////////////////////////////////////////////////////////////
  554. /// Unix Platform
  555. ////////////////////////////////////////////////////////////////////////////////
  556. #if defined(__UNIX__)
  557. #include <unistd.h>
  558. #include <sys/ioctl.h>
  559. #include <sys/time.h>
  560. #include <pthread.h>
  561. namespace embree
  562. {
  563. unsigned int getNumberOfLogicalThreads()
  564. {
  565. static int nThreads = -1;
  566. if (nThreads != -1) return nThreads;
  567. #if defined(__MACOSX__) || defined(__ANDROID__)
  568. nThreads = sysconf(_SC_NPROCESSORS_ONLN); // does not work in Linux LXC container
  569. assert(nThreads);
  570. #else
  571. cpu_set_t set;
  572. if (pthread_getaffinity_np(pthread_self(), sizeof(set), &set) == 0)
  573. nThreads = CPU_COUNT(&set);
  574. #endif
  575. assert(nThreads);
  576. return nThreads;
  577. }
  578. int getTerminalWidth()
  579. {
  580. struct winsize info;
  581. if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &info) < 0) return 80;
  582. return info.ws_col;
  583. }
  584. double getSeconds() {
  585. struct timeval tp; gettimeofday(&tp,nullptr);
  586. return double(tp.tv_sec) + double(tp.tv_usec)/1E6;
  587. }
  588. void sleepSeconds(double t) {
  589. usleep(1000000.0*t);
  590. }
  591. }
  592. #endif