sysinfo.cpp 25 KB


  1. // Copyright 2009-2021 Intel Corporation
  2. // SPDX-License-Identifier: Apache-2.0
  3. #if defined(__INTEL_LLVM_COMPILER)
  4. // prevents "'__thiscall' calling convention is not supported for this target" warning from TBB
  5. #pragma clang diagnostic push
  6. #pragma clang diagnostic ignored "-Wignored-attributes"
  7. #endif
  8. #include "sysinfo.h"
  9. #include "intrinsics.h"
  10. #include "estring.h"
  11. #include "ref.h"
  12. #if defined(__FREEBSD__)
  13. #include <sys/cpuset.h>
  14. #include <pthread_np.h>
  15. typedef cpuset_t cpu_set_t;
  16. #endif
  17. ////////////////////////////////////////////////////////////////////////////////
  18. /// All Platforms
  19. ////////////////////////////////////////////////////////////////////////////////
  20. namespace embree
  21. {
  22. NullTy null;
  23. std::string getPlatformName()
  24. {
  25. #if defined(__ANDROID__) && !defined(__64BIT__)
  26. return "Android (32bit)";
  27. #elif defined(__ANDROID__) && defined(__64BIT__)
  28. return "Android (64bit)";
  29. #elif defined(__LINUX__) && !defined(__64BIT__)
  30. return "Linux (32bit)";
  31. #elif defined(__LINUX__) && defined(__64BIT__)
  32. return "Linux (64bit)";
  33. #elif defined(__FREEBSD__) && !defined(__64BIT__)
  34. return "FreeBSD (32bit)";
  35. #elif defined(__FREEBSD__) && defined(__64BIT__)
  36. return "FreeBSD (64bit)";
  37. #elif defined(__CYGWIN__) && !defined(__64BIT__)
  38. return "Cygwin (32bit)";
  39. #elif defined(__CYGWIN__) && defined(__64BIT__)
  40. return "Cygwin (64bit)";
  41. #elif defined(__WIN32__) && !defined(__64BIT__)
  42. return "Windows (32bit)";
  43. #elif defined(__WIN32__) && defined(__64BIT__)
  44. return "Windows (64bit)";
  45. #elif defined(__MACOSX__) && !defined(__64BIT__)
  46. return "Mac OS X (32bit)";
  47. #elif defined(__MACOSX__) && defined(__64BIT__)
  48. return "Mac OS X (64bit)";
  49. #elif defined(__UNIX__) && !defined(__64BIT__)
  50. return "Unix (32bit)";
  51. #elif defined(__UNIX__) && defined(__64BIT__)
  52. return "Unix (64bit)";
  53. #else
  54. return "Unknown";
  55. #endif
  56. }
  57. std::string getCompilerName()
  58. {
  59. #if defined(__INTEL_COMPILER)
  60. int icc_mayor = __INTEL_COMPILER / 100 % 100;
  61. int icc_minor = __INTEL_COMPILER % 100;
  62. std::string version = "Intel Compiler ";
  63. version += toString(icc_mayor);
  64. version += "." + toString(icc_minor);
  65. #if defined(__INTEL_COMPILER_UPDATE)
  66. version += "." + toString(__INTEL_COMPILER_UPDATE);
  67. #endif
  68. return version;
  69. #elif defined(__clang__)
  70. return "CLANG " __clang_version__;
  71. #elif defined (__GNUC__)
  72. return "GCC " __VERSION__;
  73. #elif defined(_MSC_VER)
  74. std::string version = toString(_MSC_FULL_VER);
  75. version.insert(4,".");
  76. version.insert(9,".");
  77. version.insert(2,".");
  78. return "Visual C++ Compiler " + version;
  79. #else
  80. return "Unknown Compiler";
  81. #endif
  82. }
  83. std::string getCPUVendor()
  84. {
  85. #if defined(__X86_ASM__)
  86. int cpuinfo[4];
  87. __cpuid (cpuinfo, 0);
  88. int name[4];
  89. name[0] = cpuinfo[1];
  90. name[1] = cpuinfo[3];
  91. name[2] = cpuinfo[2];
  92. name[3] = 0;
  93. return (char*)name;
  94. #elif defined(__ARM_NEON)
  95. return "ARM";
  96. #else
  97. return "Unknown";
  98. #endif
  99. }
  100. CPU getCPUModel()
  101. {
  102. #if defined(__X86_ASM__)
  103. if (getCPUVendor() != "GenuineIntel")
  104. return CPU::UNKNOWN;
  105. int out[4];
  106. __cpuid(out, 0);
  107. if (out[0] < 1) return CPU::UNKNOWN;
  108. __cpuid(out, 1);
  109. /* please see CPUID documentation for these formulas */
  110. uint32_t family_ID = (out[0] >> 8) & 0x0F;
  111. uint32_t extended_family_ID = (out[0] >> 20) & 0xFF;
  112. uint32_t model_ID = (out[0] >> 4) & 0x0F;
  113. uint32_t extended_model_ID = (out[0] >> 16) & 0x0F;
  114. uint32_t DisplayFamily = family_ID;
  115. if (family_ID == 0x0F)
  116. DisplayFamily += extended_family_ID;
  117. uint32_t DisplayModel = model_ID;
  118. if (family_ID == 0x06 || family_ID == 0x0F)
  119. DisplayModel += extended_model_ID << 4;
  120. uint32_t DisplayFamily_DisplayModel = (DisplayFamily << 8) + (DisplayModel << 0);
  121. // Data from Intel® 64 and IA-32 Architectures, Volume 4, Chapter 2, Table 2-1 (CPUID Signature Values of DisplayFamily_DisplayModel)
  122. if (DisplayFamily_DisplayModel == 0x067D) return CPU::CORE_ICE_LAKE;
  123. if (DisplayFamily_DisplayModel == 0x067E) return CPU::CORE_ICE_LAKE;
  124. if (DisplayFamily_DisplayModel == 0x068C) return CPU::CORE_TIGER_LAKE;
  125. if (DisplayFamily_DisplayModel == 0x06A5) return CPU::CORE_COMET_LAKE;
  126. if (DisplayFamily_DisplayModel == 0x06A6) return CPU::CORE_COMET_LAKE;
  127. if (DisplayFamily_DisplayModel == 0x0666) return CPU::CORE_CANNON_LAKE;
  128. if (DisplayFamily_DisplayModel == 0x068E) return CPU::CORE_KABY_LAKE;
  129. if (DisplayFamily_DisplayModel == 0x069E) return CPU::CORE_KABY_LAKE;
  130. if (DisplayFamily_DisplayModel == 0x066A) return CPU::XEON_ICE_LAKE;
  131. if (DisplayFamily_DisplayModel == 0x066C) return CPU::XEON_ICE_LAKE;
  132. if (DisplayFamily_DisplayModel == 0x0655) return CPU::XEON_SKY_LAKE;
  133. if (DisplayFamily_DisplayModel == 0x064E) return CPU::CORE_SKY_LAKE;
  134. if (DisplayFamily_DisplayModel == 0x065E) return CPU::CORE_SKY_LAKE;
  135. if (DisplayFamily_DisplayModel == 0x0656) return CPU::XEON_BROADWELL;
  136. if (DisplayFamily_DisplayModel == 0x064F) return CPU::XEON_BROADWELL;
  137. if (DisplayFamily_DisplayModel == 0x0647) return CPU::CORE_BROADWELL;
  138. if (DisplayFamily_DisplayModel == 0x063D) return CPU::CORE_BROADWELL;
  139. if (DisplayFamily_DisplayModel == 0x063F) return CPU::XEON_HASWELL;
  140. if (DisplayFamily_DisplayModel == 0x063C) return CPU::CORE_HASWELL;
  141. if (DisplayFamily_DisplayModel == 0x0645) return CPU::CORE_HASWELL;
  142. if (DisplayFamily_DisplayModel == 0x0646) return CPU::CORE_HASWELL;
  143. if (DisplayFamily_DisplayModel == 0x063E) return CPU::XEON_IVY_BRIDGE;
  144. if (DisplayFamily_DisplayModel == 0x063A) return CPU::CORE_IVY_BRIDGE;
  145. if (DisplayFamily_DisplayModel == 0x062D) return CPU::SANDY_BRIDGE;
  146. if (DisplayFamily_DisplayModel == 0x062F) return CPU::SANDY_BRIDGE;
  147. if (DisplayFamily_DisplayModel == 0x062A) return CPU::SANDY_BRIDGE;
  148. if (DisplayFamily_DisplayModel == 0x062E) return CPU::NEHALEM;
  149. if (DisplayFamily_DisplayModel == 0x0625) return CPU::NEHALEM;
  150. if (DisplayFamily_DisplayModel == 0x062C) return CPU::NEHALEM;
  151. if (DisplayFamily_DisplayModel == 0x061E) return CPU::NEHALEM;
  152. if (DisplayFamily_DisplayModel == 0x061F) return CPU::NEHALEM;
  153. if (DisplayFamily_DisplayModel == 0x061A) return CPU::NEHALEM;
  154. if (DisplayFamily_DisplayModel == 0x061D) return CPU::NEHALEM;
  155. if (DisplayFamily_DisplayModel == 0x0617) return CPU::CORE2;
  156. if (DisplayFamily_DisplayModel == 0x060F) return CPU::CORE2;
  157. if (DisplayFamily_DisplayModel == 0x060E) return CPU::CORE1;
  158. if (DisplayFamily_DisplayModel == 0x0685) return CPU::XEON_PHI_KNIGHTS_MILL;
  159. if (DisplayFamily_DisplayModel == 0x0657) return CPU::XEON_PHI_KNIGHTS_LANDING;
  160. #elif defined(__ARM_NEON)
  161. return CPU::ARM;
  162. #endif
  163. return CPU::UNKNOWN;
  164. }
  165. std::string stringOfCPUModel(CPU model)
  166. {
  167. switch (model) {
  168. case CPU::XEON_ICE_LAKE : return "Xeon Ice Lake";
  169. case CPU::CORE_ICE_LAKE : return "Core Ice Lake";
  170. case CPU::CORE_TIGER_LAKE : return "Core Tiger Lake";
  171. case CPU::CORE_COMET_LAKE : return "Core Comet Lake";
  172. case CPU::CORE_CANNON_LAKE : return "Core Cannon Lake";
  173. case CPU::CORE_KABY_LAKE : return "Core Kaby Lake";
  174. case CPU::XEON_SKY_LAKE : return "Xeon Sky Lake";
  175. case CPU::CORE_SKY_LAKE : return "Core Sky Lake";
  176. case CPU::XEON_PHI_KNIGHTS_MILL : return "Xeon Phi Knights Mill";
  177. case CPU::XEON_PHI_KNIGHTS_LANDING: return "Xeon Phi Knights Landing";
  178. case CPU::XEON_BROADWELL : return "Xeon Broadwell";
  179. case CPU::CORE_BROADWELL : return "Core Broadwell";
  180. case CPU::XEON_HASWELL : return "Xeon Haswell";
  181. case CPU::CORE_HASWELL : return "Core Haswell";
  182. case CPU::XEON_IVY_BRIDGE : return "Xeon Ivy Bridge";
  183. case CPU::CORE_IVY_BRIDGE : return "Core Ivy Bridge";
  184. case CPU::SANDY_BRIDGE : return "Sandy Bridge";
  185. case CPU::NEHALEM : return "Nehalem";
  186. case CPU::CORE2 : return "Core2";
  187. case CPU::CORE1 : return "Core";
  188. case CPU::ARM : return "ARM";
  189. case CPU::UNKNOWN : return "Unknown CPU";
  190. }
  191. return "Unknown CPU (error)";
  192. }
  193. #if defined(__X86_ASM__)
  194. /* constants to access destination registers of CPUID instruction */
  195. static const int EAX = 0;
  196. static const int EBX = 1;
  197. static const int ECX = 2;
  198. static const int EDX = 3;
  199. /* cpuid[eax=1].ecx */
  200. static const int CPU_FEATURE_BIT_SSE3 = 1 << 0;
  201. static const int CPU_FEATURE_BIT_SSSE3 = 1 << 9;
  202. static const int CPU_FEATURE_BIT_FMA3 = 1 << 12;
  203. static const int CPU_FEATURE_BIT_SSE4_1 = 1 << 19;
  204. static const int CPU_FEATURE_BIT_SSE4_2 = 1 << 20;
  205. //static const int CPU_FEATURE_BIT_MOVBE = 1 << 22;
  206. static const int CPU_FEATURE_BIT_POPCNT = 1 << 23;
  207. //static const int CPU_FEATURE_BIT_XSAVE = 1 << 26;
  208. static const int CPU_FEATURE_BIT_OXSAVE = 1 << 27;
  209. static const int CPU_FEATURE_BIT_AVX = 1 << 28;
  210. static const int CPU_FEATURE_BIT_F16C = 1 << 29;
  211. static const int CPU_FEATURE_BIT_RDRAND = 1 << 30;
  212. /* cpuid[eax=1].edx */
  213. static const int CPU_FEATURE_BIT_SSE = 1 << 25;
  214. static const int CPU_FEATURE_BIT_SSE2 = 1 << 26;
  215. /* cpuid[eax=0x80000001].ecx */
  216. static const int CPU_FEATURE_BIT_LZCNT = 1 << 5;
  217. /* cpuid[eax=7,ecx=0].ebx */
  218. static const int CPU_FEATURE_BIT_BMI1 = 1 << 3;
  219. static const int CPU_FEATURE_BIT_AVX2 = 1 << 5;
  220. static const int CPU_FEATURE_BIT_BMI2 = 1 << 8;
  221. static const int CPU_FEATURE_BIT_AVX512F = 1 << 16; // AVX512F (foundation)
  222. static const int CPU_FEATURE_BIT_AVX512DQ = 1 << 17; // AVX512DQ (doubleword and quadword instructions)
  223. static const int CPU_FEATURE_BIT_AVX512PF = 1 << 26; // AVX512PF (prefetch gather/scatter instructions)
  224. static const int CPU_FEATURE_BIT_AVX512ER = 1 << 27; // AVX512ER (exponential and reciprocal instructions)
  225. static const int CPU_FEATURE_BIT_AVX512CD = 1 << 28; // AVX512CD (conflict detection instructions)
  226. static const int CPU_FEATURE_BIT_AVX512BW = 1 << 30; // AVX512BW (byte and word instructions)
  227. static const int CPU_FEATURE_BIT_AVX512VL = 1 << 31; // AVX512VL (vector length extensions)
  228. static const int CPU_FEATURE_BIT_AVX512IFMA = 1 << 21; // AVX512IFMA (integer fused multiple-add instructions)
  229. /* cpuid[eax=7,ecx=0].ecx */
  230. static const int CPU_FEATURE_BIT_AVX512VBMI = 1 << 1; // AVX512VBMI (vector bit manipulation instructions)
  231. #endif
  232. #if defined(__X86_ASM__)
  233. __noinline int64_t get_xcr0()
  234. {
  235. #if defined (__WIN32__) && !defined (__MINGW32__) && defined(_XCR_XFEATURE_ENABLED_MASK)
  236. int64_t xcr0 = 0; // int64_t is workaround for compiler bug under VS2013, Win32
  237. xcr0 = _xgetbv(0);
  238. return xcr0;
  239. #else
  240. int xcr0 = 0;
  241. __asm__ ("xgetbv" : "=a" (xcr0) : "c" (0) : "%edx" );
  242. return xcr0;
  243. #endif
  244. }
  245. #endif
  246. int getCPUFeatures()
  247. {
  248. #if defined(__X86_ASM__)
  249. /* cache CPU features access */
  250. static int cpu_features = 0;
  251. if (cpu_features)
  252. return cpu_features;
  253. /* get number of CPUID leaves */
  254. int cpuid_leaf0[4];
  255. __cpuid(cpuid_leaf0, 0x00000000);
  256. unsigned nIds = cpuid_leaf0[EAX];
  257. /* get number of extended CPUID leaves */
  258. int cpuid_leafe[4];
  259. __cpuid(cpuid_leafe, 0x80000000);
  260. unsigned nExIds = cpuid_leafe[EAX];
  261. /* get CPUID leaves for EAX = 1,7, and 0x80000001 */
  262. int cpuid_leaf_1[4] = { 0,0,0,0 };
  263. int cpuid_leaf_7[4] = { 0,0,0,0 };
  264. int cpuid_leaf_e1[4] = { 0,0,0,0 };
  265. if (nIds >= 1) __cpuid (cpuid_leaf_1,0x00000001);
  266. #if _WIN32
  267. #if _MSC_VER && (_MSC_FULL_VER < 160040219)
  268. #elif defined(_MSC_VER)
  269. if (nIds >= 7) __cpuidex(cpuid_leaf_7,0x00000007,0);
  270. #endif
  271. #else
  272. if (nIds >= 7) __cpuid_count(cpuid_leaf_7,0x00000007,0);
  273. #endif
  274. if (nExIds >= 0x80000001) __cpuid(cpuid_leaf_e1,0x80000001);
  275. /* detect if OS saves XMM, YMM, and ZMM states */
  276. bool xmm_enabled = true;
  277. bool ymm_enabled = false;
  278. bool zmm_enabled = false;
  279. if (cpuid_leaf_1[ECX] & CPU_FEATURE_BIT_OXSAVE) {
  280. int64_t xcr0 = get_xcr0();
  281. xmm_enabled = ((xcr0 & 0x02) == 0x02); /* checks if xmm are enabled in XCR0 */
  282. ymm_enabled = xmm_enabled && ((xcr0 & 0x04) == 0x04); /* checks if ymm state are enabled in XCR0 */
  283. zmm_enabled = ymm_enabled && ((xcr0 & 0xE0) == 0xE0); /* checks if OPMASK state, upper 256-bit of ZMM0-ZMM15 and ZMM16-ZMM31 state are enabled in XCR0 */
  284. }
  285. if (xmm_enabled) cpu_features |= CPU_FEATURE_XMM_ENABLED;
  286. if (ymm_enabled) cpu_features |= CPU_FEATURE_YMM_ENABLED;
  287. if (zmm_enabled) cpu_features |= CPU_FEATURE_ZMM_ENABLED;
  288. if (cpuid_leaf_1[EDX] & CPU_FEATURE_BIT_SSE ) cpu_features |= CPU_FEATURE_SSE;
  289. if (cpuid_leaf_1[EDX] & CPU_FEATURE_BIT_SSE2 ) cpu_features |= CPU_FEATURE_SSE2;
  290. if (cpuid_leaf_1[ECX] & CPU_FEATURE_BIT_SSE3 ) cpu_features |= CPU_FEATURE_SSE3;
  291. if (cpuid_leaf_1[ECX] & CPU_FEATURE_BIT_SSSE3 ) cpu_features |= CPU_FEATURE_SSSE3;
  292. if (cpuid_leaf_1[ECX] & CPU_FEATURE_BIT_SSE4_1) cpu_features |= CPU_FEATURE_SSE41;
  293. if (cpuid_leaf_1[ECX] & CPU_FEATURE_BIT_SSE4_2) cpu_features |= CPU_FEATURE_SSE42;
  294. if (cpuid_leaf_1[ECX] & CPU_FEATURE_BIT_POPCNT) cpu_features |= CPU_FEATURE_POPCNT;
  295. if (cpuid_leaf_1[ECX] & CPU_FEATURE_BIT_AVX ) cpu_features |= CPU_FEATURE_AVX;
  296. if (cpuid_leaf_1[ECX] & CPU_FEATURE_BIT_F16C ) cpu_features |= CPU_FEATURE_F16C;
  297. if (cpuid_leaf_1[ECX] & CPU_FEATURE_BIT_RDRAND) cpu_features |= CPU_FEATURE_RDRAND;
  298. if (cpuid_leaf_7[EBX] & CPU_FEATURE_BIT_AVX2 ) cpu_features |= CPU_FEATURE_AVX2;
  299. if (cpuid_leaf_1[ECX] & CPU_FEATURE_BIT_FMA3 ) cpu_features |= CPU_FEATURE_FMA3;
  300. if (cpuid_leaf_e1[ECX] & CPU_FEATURE_BIT_LZCNT) cpu_features |= CPU_FEATURE_LZCNT;
  301. if (cpuid_leaf_7 [EBX] & CPU_FEATURE_BIT_BMI1 ) cpu_features |= CPU_FEATURE_BMI1;
  302. if (cpuid_leaf_7 [EBX] & CPU_FEATURE_BIT_BMI2 ) cpu_features |= CPU_FEATURE_BMI2;
  303. if (cpuid_leaf_7[EBX] & CPU_FEATURE_BIT_AVX512F ) cpu_features |= CPU_FEATURE_AVX512F;
  304. if (cpuid_leaf_7[EBX] & CPU_FEATURE_BIT_AVX512DQ ) cpu_features |= CPU_FEATURE_AVX512DQ;
  305. if (cpuid_leaf_7[EBX] & CPU_FEATURE_BIT_AVX512PF ) cpu_features |= CPU_FEATURE_AVX512PF;
  306. if (cpuid_leaf_7[EBX] & CPU_FEATURE_BIT_AVX512ER ) cpu_features |= CPU_FEATURE_AVX512ER;
  307. if (cpuid_leaf_7[EBX] & CPU_FEATURE_BIT_AVX512CD ) cpu_features |= CPU_FEATURE_AVX512CD;
  308. if (cpuid_leaf_7[EBX] & CPU_FEATURE_BIT_AVX512BW ) cpu_features |= CPU_FEATURE_AVX512BW;
  309. if (cpuid_leaf_7[EBX] & CPU_FEATURE_BIT_AVX512IFMA) cpu_features |= CPU_FEATURE_AVX512IFMA;
  310. if (cpuid_leaf_7[EBX] & CPU_FEATURE_BIT_AVX512VL ) cpu_features |= CPU_FEATURE_AVX512VL;
  311. if (cpuid_leaf_7[ECX] & CPU_FEATURE_BIT_AVX512VBMI) cpu_features |= CPU_FEATURE_AVX512VBMI;
  312. return cpu_features;
  313. #elif defined(__ARM_NEON) || defined(__EMSCRIPTEN__)
  314. int cpu_features = CPU_FEATURE_NEON|CPU_FEATURE_SSE|CPU_FEATURE_SSE2;
  315. cpu_features |= CPU_FEATURE_SSE3|CPU_FEATURE_SSSE3|CPU_FEATURE_SSE42;
  316. cpu_features |= CPU_FEATURE_XMM_ENABLED;
  317. cpu_features |= CPU_FEATURE_YMM_ENABLED;
  318. cpu_features |= CPU_FEATURE_SSE41 | CPU_FEATURE_RDRAND | CPU_FEATURE_F16C;
  319. cpu_features |= CPU_FEATURE_POPCNT;
  320. cpu_features |= CPU_FEATURE_AVX;
  321. cpu_features |= CPU_FEATURE_AVX2;
  322. cpu_features |= CPU_FEATURE_FMA3;
  323. cpu_features |= CPU_FEATURE_LZCNT;
  324. cpu_features |= CPU_FEATURE_BMI1;
  325. cpu_features |= CPU_FEATURE_BMI2;
  326. cpu_features |= CPU_FEATURE_NEON_2X;
  327. return cpu_features;
  328. #else
  329. /* Unknown CPU. */
  330. return 0;
  331. #endif
  332. }
  333. std::string stringOfCPUFeatures(int features)
  334. {
  335. std::string str;
  336. if (features & CPU_FEATURE_XMM_ENABLED) str += "XMM ";
  337. if (features & CPU_FEATURE_YMM_ENABLED) str += "YMM ";
  338. if (features & CPU_FEATURE_ZMM_ENABLED) str += "ZMM ";
  339. if (features & CPU_FEATURE_SSE ) str += "SSE ";
  340. if (features & CPU_FEATURE_SSE2 ) str += "SSE2 ";
  341. if (features & CPU_FEATURE_SSE3 ) str += "SSE3 ";
  342. if (features & CPU_FEATURE_SSSE3 ) str += "SSSE3 ";
  343. if (features & CPU_FEATURE_SSE41 ) str += "SSE4.1 ";
  344. if (features & CPU_FEATURE_SSE42 ) str += "SSE4.2 ";
  345. if (features & CPU_FEATURE_POPCNT) str += "POPCNT ";
  346. if (features & CPU_FEATURE_AVX ) str += "AVX ";
  347. if (features & CPU_FEATURE_F16C ) str += "F16C ";
  348. if (features & CPU_FEATURE_RDRAND) str += "RDRAND ";
  349. if (features & CPU_FEATURE_AVX2 ) str += "AVX2 ";
  350. if (features & CPU_FEATURE_FMA3 ) str += "FMA3 ";
  351. if (features & CPU_FEATURE_LZCNT ) str += "LZCNT ";
  352. if (features & CPU_FEATURE_BMI1 ) str += "BMI1 ";
  353. if (features & CPU_FEATURE_BMI2 ) str += "BMI2 ";
  354. if (features & CPU_FEATURE_AVX512F) str += "AVX512F ";
  355. if (features & CPU_FEATURE_AVX512DQ) str += "AVX512DQ ";
  356. if (features & CPU_FEATURE_AVX512PF) str += "AVX512PF ";
  357. if (features & CPU_FEATURE_AVX512ER) str += "AVX512ER ";
  358. if (features & CPU_FEATURE_AVX512CD) str += "AVX512CD ";
  359. if (features & CPU_FEATURE_AVX512BW) str += "AVX512BW ";
  360. if (features & CPU_FEATURE_AVX512VL) str += "AVX512VL ";
  361. if (features & CPU_FEATURE_AVX512IFMA) str += "AVX512IFMA ";
  362. if (features & CPU_FEATURE_AVX512VBMI) str += "AVX512VBMI ";
  363. if (features & CPU_FEATURE_NEON) str += "NEON ";
  364. if (features & CPU_FEATURE_NEON_2X) str += "2xNEON ";
  365. return str;
  366. }
  367. std::string stringOfISA (int isa)
  368. {
  369. if (isa == SSE) return "SSE";
  370. if (isa == SSE2) return "SSE2";
  371. if (isa == SSE3) return "SSE3";
  372. if (isa == SSSE3) return "SSSE3";
  373. if (isa == SSE41) return "SSE4.1";
  374. if (isa == SSE42) return "SSE4.2";
  375. if (isa == AVX) return "AVX";
  376. if (isa == AVX2) return "AVX2";
  377. if (isa == AVX512) return "AVX512";
  378. if (isa == NEON) return "NEON";
  379. if (isa == NEON_2X) return "2xNEON";
  380. return "UNKNOWN";
  381. }
  382. bool hasISA(int features, int isa) {
  383. return (features & isa) == isa;
  384. }
  385. std::string supportedTargetList (int features)
  386. {
  387. std::string v;
  388. if (hasISA(features,SSE)) v += "SSE ";
  389. if (hasISA(features,SSE2)) v += "SSE2 ";
  390. if (hasISA(features,SSE3)) v += "SSE3 ";
  391. if (hasISA(features,SSSE3)) v += "SSSE3 ";
  392. if (hasISA(features,SSE41)) v += "SSE4.1 ";
  393. if (hasISA(features,SSE42)) v += "SSE4.2 ";
  394. if (hasISA(features,AVX)) v += "AVX ";
  395. if (hasISA(features,AVXI)) v += "AVXI ";
  396. if (hasISA(features,AVX2)) v += "AVX2 ";
  397. if (hasISA(features,AVX512)) v += "AVX512 ";
  398. if (hasISA(features,NEON)) v += "NEON ";
  399. if (hasISA(features,NEON_2X)) v += "2xNEON ";
  400. return v;
  401. }
  402. }
  403. ////////////////////////////////////////////////////////////////////////////////
  404. /// Windows Platform
  405. ////////////////////////////////////////////////////////////////////////////////
  406. #if defined(__WIN32__)
  407. #define WIN32_LEAN_AND_MEAN
  408. #include <windows.h>
  409. #include <psapi.h>
  410. namespace embree
  411. {
  412. std::string getExecutableFileName() {
  413. char filename[1024];
  414. if (!GetModuleFileName(nullptr, filename, sizeof(filename)))
  415. return std::string();
  416. return std::string(filename);
  417. }
  418. unsigned int getNumberOfLogicalThreads()
  419. {
  420. static int nThreads = -1;
  421. if (nThreads != -1) return nThreads;
  422. typedef WORD (WINAPI *GetActiveProcessorGroupCountFunc)();
  423. typedef DWORD (WINAPI *GetActiveProcessorCountFunc)(WORD);
  424. HMODULE hlib = LoadLibrary("Kernel32");
  425. GetActiveProcessorGroupCountFunc pGetActiveProcessorGroupCount = (GetActiveProcessorGroupCountFunc)GetProcAddress(hlib, "GetActiveProcessorGroupCount");
  426. GetActiveProcessorCountFunc pGetActiveProcessorCount = (GetActiveProcessorCountFunc) GetProcAddress(hlib, "GetActiveProcessorCount");
  427. if (pGetActiveProcessorGroupCount && pGetActiveProcessorCount)
  428. {
  429. int groups = pGetActiveProcessorGroupCount();
  430. int totalProcessors = 0;
  431. for (int i = 0; i < groups; i++)
  432. totalProcessors += pGetActiveProcessorCount(i);
  433. nThreads = totalProcessors;
  434. }
  435. else
  436. {
  437. SYSTEM_INFO sysinfo;
  438. GetSystemInfo(&sysinfo);
  439. nThreads = sysinfo.dwNumberOfProcessors;
  440. }
  441. assert(nThreads);
  442. return nThreads;
  443. }
  444. int getTerminalWidth()
  445. {
  446. HANDLE handle = GetStdHandle(STD_OUTPUT_HANDLE);
  447. if (handle == INVALID_HANDLE_VALUE) return 80;
  448. CONSOLE_SCREEN_BUFFER_INFO info;
  449. memset(&info,0,sizeof(info));
  450. GetConsoleScreenBufferInfo(handle, &info);
  451. return info.dwSize.X;
  452. }
  453. double getSeconds()
  454. {
  455. LARGE_INTEGER freq, val;
  456. QueryPerformanceFrequency(&freq);
  457. QueryPerformanceCounter(&val);
  458. return (double)val.QuadPart / (double)freq.QuadPart;
  459. }
  460. void sleepSeconds(double t) {
  461. Sleep(DWORD(1000.0*t));
  462. }
  463. size_t getVirtualMemoryBytes()
  464. {
  465. PROCESS_MEMORY_COUNTERS info;
  466. GetProcessMemoryInfo( GetCurrentProcess( ), &info, sizeof(info) );
  467. return (size_t)info.QuotaPeakPagedPoolUsage;
  468. }
  469. size_t getResidentMemoryBytes()
  470. {
  471. PROCESS_MEMORY_COUNTERS info;
  472. GetProcessMemoryInfo( GetCurrentProcess( ), &info, sizeof(info) );
  473. return (size_t)info.WorkingSetSize;
  474. }
  475. }
  476. #endif
  477. ////////////////////////////////////////////////////////////////////////////////
  478. /// Linux Platform
  479. ////////////////////////////////////////////////////////////////////////////////
  480. #if defined(__LINUX__)
  481. #include <stdio.h>
  482. #include <unistd.h>
  483. namespace embree
  484. {
  485. std::string getExecutableFileName()
  486. {
  487. std::string pid = "/proc/" + toString(getpid()) + "/exe";
  488. char buf[4096];
  489. memset(buf,0,sizeof(buf));
  490. if (readlink(pid.c_str(), buf, sizeof(buf)-1) == -1)
  491. return std::string();
  492. return std::string(buf);
  493. }
  494. size_t getVirtualMemoryBytes()
  495. {
  496. size_t virt, resident, shared;
  497. std::ifstream buffer("/proc/self/statm");
  498. buffer >> virt >> resident >> shared;
  499. return virt*sysconf(_SC_PAGE_SIZE);
  500. }
  501. size_t getResidentMemoryBytes()
  502. {
  503. size_t virt, resident, shared;
  504. std::ifstream buffer("/proc/self/statm");
  505. buffer >> virt >> resident >> shared;
  506. return resident*sysconf(_SC_PAGE_SIZE);
  507. }
  508. }
  509. #endif
  510. ////////////////////////////////////////////////////////////////////////////////
  511. /// FreeBSD Platform
  512. ////////////////////////////////////////////////////////////////////////////////
  513. #if defined (__FreeBSD__)
  514. #include <sys/sysctl.h>
  515. namespace embree
  516. {
  517. std::string getExecutableFileName()
  518. {
  519. const int mib[4] = { CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, -1 };
  520. char buf[4096];
  521. memset(buf,0,sizeof(buf));
  522. size_t len = sizeof(buf)-1;
  523. if (sysctl(mib, 4, buf, &len, 0x0, 0) == -1)
  524. return std::string();
  525. return std::string(buf);
  526. }
  527. size_t getVirtualMemoryBytes() {
  528. return 0;
  529. }
  530. size_t getResidentMemoryBytes() {
  531. return 0;
  532. }
  533. }
  534. #endif
  535. ////////////////////////////////////////////////////////////////////////////////
  536. /// Mac OS X Platform
  537. ////////////////////////////////////////////////////////////////////////////////
  538. #if defined(__MACOSX__)
  539. #include <mach-o/dyld.h>
  540. namespace embree
  541. {
  542. std::string getExecutableFileName()
  543. {
  544. char buf[4096];
  545. uint32_t size = sizeof(buf);
  546. if (_NSGetExecutablePath(buf, &size) != 0)
  547. return std::string();
  548. return std::string(buf);
  549. }
  550. size_t getVirtualMemoryBytes() {
  551. return 0;
  552. }
  553. size_t getResidentMemoryBytes() {
  554. return 0;
  555. }
  556. }
  557. #endif
  558. ////////////////////////////////////////////////////////////////////////////////
  559. /// Unix Platform
  560. ////////////////////////////////////////////////////////////////////////////////
  561. #if defined(__UNIX__)
  562. #include <unistd.h>
  563. #include <sys/ioctl.h>
  564. #include <sys/time.h>
  565. #include <pthread.h>
  566. #if defined(__EMSCRIPTEN__)
  567. #include <emscripten.h>
  568. extern "C" {
  569. extern int godot_js_os_hw_concurrency_get();
  570. }
  571. #endif
  572. namespace embree
  573. {
  574. unsigned int getNumberOfLogicalThreads()
  575. {
  576. static int nThreads = -1;
  577. if (nThreads != -1) return nThreads;
  578. #if defined(__MACOSX__) || defined(__ANDROID__)
  579. nThreads = sysconf(_SC_NPROCESSORS_ONLN); // does not work in Linux LXC container
  580. assert(nThreads);
  581. #elif defined(__EMSCRIPTEN__)
  582. nThreads = godot_js_os_hw_concurrency_get();
  583. #if 0
  584. // WebAssembly supports pthreads, but not pthread_getaffinity_np. Get the number of logical
  585. // threads from the browser or Node.js using JavaScript.
  586. nThreads = MAIN_THREAD_EM_ASM_INT({
  587. const isBrowser = typeof window !== 'undefined';
  588. const isNode = typeof process !== 'undefined' && process.versions != null &&
  589. process.versions.node != null;
  590. if (isBrowser) {
  591. // Return 1 if the browser does not expose hardwareConcurrency.
  592. return window.navigator.hardwareConcurrency || 1;
  593. } else if (isNode) {
  594. return require('os').cpus().length;
  595. } else {
  596. return 1;
  597. }
  598. });
  599. #endif
  600. #else
  601. cpu_set_t set;
  602. if (pthread_getaffinity_np(pthread_self(), sizeof(set), &set) == 0)
  603. nThreads = CPU_COUNT(&set);
  604. #endif
  605. assert(nThreads);
  606. return nThreads;
  607. }
  608. int getTerminalWidth()
  609. {
  610. struct winsize info;
  611. if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &info) < 0) return 80;
  612. return info.ws_col;
  613. }
  614. double getSeconds() {
  615. struct timeval tp; gettimeofday(&tp,nullptr);
  616. return double(tp.tv_sec) + double(tp.tv_usec)/1E6;
  617. }
  618. void sleepSeconds(double t) {
  619. usleep(1000000.0*t);
  620. }
  621. }
  622. #endif
  623. #if defined(__INTEL_LLVM_COMPILER)
  624. #pragma clang diagnostic pop
  625. #endif