cpu_intel.odin 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187
  1. #+build i386, amd64
  2. package sysinfo
  3. import "base:intrinsics"
  4. CPU_Feature :: enum u64 {
  5. aes, // AES hardware implementation (AES NI)
  6. adx, // Multi-precision add-carry instruction extensions
  7. avx, // Advanced vector extension
  8. avx2, // Advanced vector extension 2
  9. bmi1, // Bit manipulation instruction set 1
  10. bmi2, // Bit manipulation instruction set 2
  11. erms, // Enhanced REP for MOVSB and STOSB
  12. fma, // Fused-multiply-add instructions
  13. os_xsave, // OS supports XSAVE/XRESTOR for saving/restoring XMM registers.
  14. pclmulqdq, // PCLMULQDQ instruction - most often used for AES-GCM
  15. popcnt, // Hamming weight instruction POPCNT.
  16. rdrand, // RDRAND instruction (on-chip random number generator)
  17. rdseed, // RDSEED instruction (on-chip random number generator)
  18. sha, // SHA Extensions (SHA-1, SHA-224, SHA-256)
  19. sse2, // Streaming SIMD extension 2 (always available on amd64)
  20. sse3, // Streaming SIMD extension 3
  21. ssse3, // Supplemental streaming SIMD extension 3
  22. sse41, // Streaming SIMD extension 4 and 4.1
  23. sse42, // Streaming SIMD extension 4 and 4.2
  24. avx512bf16, // Vector Neural Network Instructions supporting bfloat16
  25. avx512bitalg, // Bit Algorithms
  26. avx512bw, // Byte and Word instructions
  27. avx512cd, // Conflict Detection instructions
  28. avx512dq, // Doubleword and Quadword instructions
  29. avx512er, // Exponential and Reciprocal instructions
  30. avx512f, // Foundation
  31. avx512fp16, // Vector 16-bit float instructions
  32. avx512ifma, // Integer Fused Multiply Add
  33. avx512pf, // Prefetch instructions
  34. avx512vbmi, // Vector Byte Manipulation Instructions
  35. avx512vbmi2, // Vector Byte Manipulation Instructions 2
  36. avx512vl, // Vector Length extensions
  37. avx512vnni, // Vector Neural Network Instructions
  38. avx512vp2intersect, // Vector Pair Intersection to a Pair of Mask Registers
  39. avx512vpopcntdq, // Vector Population Count for Doubleword and Quadword
  40. }
  41. CPU_Features :: distinct bit_set[CPU_Feature; u64]
  42. CPU :: struct {
  43. name: Maybe(string),
  44. features: Maybe(CPU_Features),
  45. physical_cores: int, // Initialized by cpu_<os>.odin
  46. logical_cores: int, // Initialized by cpu_<os>.odin
  47. }
  48. cpu: CPU
  49. @(init, private)
  50. init_cpu_features :: proc "contextless" () {
  51. is_set :: #force_inline proc "c" (bit: u32, value: u32) -> bool {
  52. return (value>>bit) & 0x1 != 0
  53. }
  54. try_set :: #force_inline proc "c" (set: ^CPU_Features, feature: CPU_Feature, bit: u32, value: u32) {
  55. if is_set(bit, value) {
  56. set^ += {feature}
  57. }
  58. }
  59. max_id, _, _, _ := cpuid(0, 0)
  60. if max_id < 1 {
  61. return
  62. }
  63. set: CPU_Features
  64. _, _, ecx1, edx1 := cpuid(1, 0)
  65. try_set(&set, .sse2, 26, edx1)
  66. try_set(&set, .sse3, 0, ecx1)
  67. try_set(&set, .pclmulqdq, 1, ecx1)
  68. try_set(&set, .ssse3, 9, ecx1)
  69. try_set(&set, .fma, 12, ecx1)
  70. try_set(&set, .sse41, 19, ecx1)
  71. try_set(&set, .sse42, 20, ecx1)
  72. try_set(&set, .popcnt, 23, ecx1)
  73. try_set(&set, .aes, 25, ecx1)
  74. try_set(&set, .os_xsave, 27, ecx1)
  75. try_set(&set, .rdrand, 30, ecx1)
  76. when ODIN_OS == .FreeBSD || ODIN_OS == .OpenBSD || ODIN_OS == .NetBSD {
  77. // xgetbv is an illegal instruction under FreeBSD 13, OpenBSD 7.1 and NetBSD 10
  78. // return before probing further
  79. cpu.features = set
  80. return
  81. }
  82. // In certain rare cases (reason unknown), XGETBV generates an
  83. // illegal instruction, even if OSXSAVE is set per CPUID.
  84. //
  85. // When Chrome ran into this problem, the problem went away
  86. // after they started checking both OSXSAVE and XSAVE.
  87. //
  88. // See: crbug.com/375968
  89. os_supports_avx := false
  90. os_supports_avx512 := false
  91. if .os_xsave in set && is_set(26, ecx1) {
  92. eax, _ := xgetbv(0)
  93. os_supports_avx = is_set(1, eax) && is_set(2, eax)
  94. os_supports_avx512 = is_set(5, eax) && is_set(6, eax) && is_set(7, eax)
  95. }
  96. if os_supports_avx {
  97. try_set(&set, .avx, 28, ecx1)
  98. }
  99. if max_id < 7 {
  100. return
  101. }
  102. _, ebx7, ecx7, edx7 := cpuid(7, 0)
  103. try_set(&set, .bmi1, 3, ebx7)
  104. try_set(&set, .sha, 29, ebx7)
  105. if os_supports_avx {
  106. try_set(&set, .avx2, 5, ebx7)
  107. }
  108. if os_supports_avx512 {
  109. try_set(&set, .avx512f, 16, ebx7)
  110. try_set(&set, .avx512dq, 17, ebx7)
  111. try_set(&set, .avx512ifma, 21, ebx7)
  112. try_set(&set, .avx512pf, 26, ebx7)
  113. try_set(&set, .avx512er, 27, ebx7)
  114. try_set(&set, .avx512cd, 28, ebx7)
  115. try_set(&set, .avx512bw, 30, ebx7)
  116. // XMM/YMM are also required for 128/256-bit instructions
  117. if os_supports_avx {
  118. try_set(&set, .avx512vl, 31, ebx7)
  119. }
  120. try_set(&set, .avx512vbmi, 1, ecx7)
  121. try_set(&set, .avx512vbmi2, 6, ecx7)
  122. try_set(&set, .avx512vnni, 11, ecx7)
  123. try_set(&set, .avx512bitalg, 12, ecx7)
  124. try_set(&set, .avx512vpopcntdq, 14, ecx7)
  125. try_set(&set, .avx512vp2intersect, 8, edx7)
  126. try_set(&set, .avx512fp16, 23, edx7)
  127. eax7_1, _, _, _ := cpuid(7, 1)
  128. try_set(&set, .avx512bf16, 5, eax7_1)
  129. }
  130. try_set(&set, .bmi2, 8, ebx7)
  131. try_set(&set, .erms, 9, ebx7)
  132. try_set(&set, .rdseed, 18, ebx7)
  133. try_set(&set, .adx, 19, ebx7)
  134. cpu.features = set
  135. }
  136. @(private)
  137. _cpu_name_buf: [72]u8
  138. @(init, private)
  139. init_cpu_name :: proc "contextless" () {
  140. number_of_extended_ids, _, _, _ := cpuid(0x8000_0000, 0)
  141. if number_of_extended_ids < 0x8000_0004 {
  142. return
  143. }
  144. _buf := (^[0x12]u32)(&_cpu_name_buf)
  145. _buf[ 0], _buf[ 1], _buf[ 2], _buf[ 3] = cpuid(0x8000_0002, 0)
  146. _buf[ 4], _buf[ 5], _buf[ 6], _buf[ 7] = cpuid(0x8000_0003, 0)
  147. _buf[ 8], _buf[ 9], _buf[10], _buf[11] = cpuid(0x8000_0004, 0)
  148. // Some CPUs like may include leading or trailing spaces. Trim them.
  149. // e.g. ` Intel(R) Xeon(R) CPU E5-1650 v2 @ 3.50GHz`
  150. brand := string(_cpu_name_buf[:])
  151. for len(brand) > 0 && brand[0] == 0 || brand[0] == ' ' {
  152. brand = brand[1:]
  153. }
  154. for len(brand) > 0 && brand[len(brand) - 1] == 0 || brand[len(brand) - 1] == ' ' {
  155. brand = brand[:len(brand) - 1]
  156. }
  157. cpu.name = brand
  158. }
  159. // cpuid :: proc(ax, cx: u32) -> (eax, ebc, ecx, edx: u32) ---
  160. cpuid :: intrinsics.x86_cpuid
  161. // xgetbv :: proc(cx: u32) -> (eax, edx: u32) ---
  162. xgetbv :: intrinsics.x86_xgetbv