simd.odin 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207
  1. package simd
  2. import "base:builtin"
  3. import "base:intrinsics"
  4. // IS_EMULATED is true iff the compile-time target lacks hardware support
  5. // for at least 128-bit SIMD.
  6. IS_EMULATED :: true when (ODIN_ARCH == .amd64 || ODIN_ARCH == .i386) && !intrinsics.has_target_feature("sse2") else
  7. true when (ODIN_ARCH == .arm64 || ODIN_ARCH == .arm32) && !intrinsics.has_target_feature("neon") else
  8. true when (ODIN_ARCH == .wasm64p32 || ODIN_ARCH == .wasm32) && !intrinsics.has_target_feature("simd128") else
  9. true when (ODIN_ARCH == .riscv64) && !intrinsics.has_target_feature("v") else
  10. false
  11. // 128-bit vector aliases
  12. u8x16 :: #simd[16]u8
  13. i8x16 :: #simd[16]i8
  14. u16x8 :: #simd[8]u16
  15. i16x8 :: #simd[8]i16
  16. u32x4 :: #simd[4]u32
  17. i32x4 :: #simd[4]i32
  18. u64x2 :: #simd[2]u64
  19. i64x2 :: #simd[2]i64
  20. f32x4 :: #simd[4]f32
  21. f64x2 :: #simd[2]f64
  22. boolx16 :: #simd[16]bool
  23. b8x16 :: #simd[16]b8
  24. b16x8 :: #simd[8]b16
  25. b32x4 :: #simd[4]b32
  26. b64x2 :: #simd[2]b64
  27. // 256-bit vector aliases
  28. u8x32 :: #simd[32]u8
  29. i8x32 :: #simd[32]i8
  30. u16x16 :: #simd[16]u16
  31. i16x16 :: #simd[16]i16
  32. u32x8 :: #simd[8]u32
  33. i32x8 :: #simd[8]i32
  34. u64x4 :: #simd[4]u64
  35. i64x4 :: #simd[4]i64
  36. f32x8 :: #simd[8]f32
  37. f64x4 :: #simd[4]f64
  38. boolx32 :: #simd[32]bool
  39. b8x32 :: #simd[32]b8
  40. b16x16 :: #simd[16]b16
  41. b32x8 :: #simd[8]b32
  42. b64x4 :: #simd[4]b64
  43. // 512-bit vector aliases
  44. u8x64 :: #simd[64]u8
  45. i8x64 :: #simd[64]i8
  46. u16x32 :: #simd[32]u16
  47. i16x32 :: #simd[32]i16
  48. u32x16 :: #simd[16]u32
  49. i32x16 :: #simd[16]i32
  50. u64x8 :: #simd[8]u64
  51. i64x8 :: #simd[8]i64
  52. f32x16 :: #simd[16]f32
  53. f64x8 :: #simd[8]f64
  54. boolx64 :: #simd[64]bool
  55. b8x64 :: #simd[64]b8
  56. b16x32 :: #simd[32]b16
  57. b32x16 :: #simd[16]b32
  58. b64x8 :: #simd[8]b64
  59. add :: intrinsics.simd_add
  60. sub :: intrinsics.simd_sub
  61. mul :: intrinsics.simd_mul
  62. div :: intrinsics.simd_div // floats only
  63. // Keeps Odin's Behaviour
  64. // (x << y) if y <= mask else 0
  65. shl :: intrinsics.simd_shl
  66. shr :: intrinsics.simd_shr
  67. // Similar to C's Behaviour
  68. // x << (y & mask)
  69. shl_masked :: intrinsics.simd_shl_masked
  70. shr_masked :: intrinsics.simd_shr_masked
  71. // Saturation Arithmetic
  72. saturating_add :: intrinsics.simd_saturating_add
  73. saturating_sub :: intrinsics.simd_saturating_sub
  74. bit_and :: intrinsics.simd_bit_and
  75. bit_or :: intrinsics.simd_bit_or
  76. bit_xor :: intrinsics.simd_bit_xor
  77. bit_and_not :: intrinsics.simd_bit_and_not
  78. neg :: intrinsics.simd_neg
  79. abs :: intrinsics.simd_abs
  80. min :: intrinsics.simd_min
  81. max :: intrinsics.simd_max
  82. clamp :: intrinsics.simd_clamp
  83. // Return an unsigned integer of the same size as the input type
  84. // NOT A BOOLEAN
  85. // element-wise:
  86. // false => 0x00...00
  87. // true => 0xff...ff
  88. lanes_eq :: intrinsics.simd_lanes_eq
  89. lanes_ne :: intrinsics.simd_lanes_ne
  90. lanes_lt :: intrinsics.simd_lanes_lt
  91. lanes_le :: intrinsics.simd_lanes_le
  92. lanes_gt :: intrinsics.simd_lanes_gt
  93. lanes_ge :: intrinsics.simd_lanes_ge
  94. // Gather and Scatter intrinsics
  95. gather :: intrinsics.simd_gather
  96. scatter :: intrinsics.simd_scatter
  97. masked_load :: intrinsics.simd_masked_load
  98. masked_store :: intrinsics.simd_masked_store
  99. masked_expand_load :: intrinsics.simd_masked_expand_load
  100. masked_compress_store :: intrinsics.simd_masked_compress_store
  101. // extract :: proc(a: #simd[N]T, idx: uint) -> T
  102. extract :: intrinsics.simd_extract
  103. // replace :: proc(a: #simd[N]T, idx: uint, elem: T) -> #simd[N]T
  104. replace :: intrinsics.simd_replace
  105. reduce_add_ordered :: intrinsics.simd_reduce_add_ordered
  106. reduce_mul_ordered :: intrinsics.simd_reduce_mul_ordered
  107. reduce_min :: intrinsics.simd_reduce_min
  108. reduce_max :: intrinsics.simd_reduce_max
  109. reduce_and :: intrinsics.simd_reduce_and
  110. reduce_or :: intrinsics.simd_reduce_or
  111. reduce_xor :: intrinsics.simd_reduce_xor
  112. reduce_any :: intrinsics.simd_reduce_any
  113. reduce_all :: intrinsics.simd_reduce_all
  114. // swizzle :: proc(a: #simd[N]T, indices: ..int) -> #simd[len(indices)]T
  115. swizzle :: builtin.swizzle
  116. // shuffle :: proc(a, b: #simd[N]T, indices: #simd[max 2*N]u32) -> #simd[len(indices)]T
  117. shuffle :: intrinsics.simd_shuffle
  118. // select :: proc(cond: #simd[N]boolean_or_integer, true, false: #simd[N]T) -> #simd[N]T
  119. select :: intrinsics.simd_select
  120. sqrt :: intrinsics.sqrt
  121. ceil :: intrinsics.simd_ceil
  122. floor :: intrinsics.simd_floor
  123. trunc :: intrinsics.simd_trunc
  124. nearest :: intrinsics.simd_nearest
  125. to_bits :: intrinsics.simd_to_bits
  126. lanes_reverse :: intrinsics.simd_lanes_reverse
  127. lanes_rotate_left :: intrinsics.simd_lanes_rotate_left
  128. lanes_rotate_right :: intrinsics.simd_lanes_rotate_right
  129. count_ones :: intrinsics.count_ones
  130. count_zeros :: intrinsics.count_zeros
  131. count_trailing_zeros :: intrinsics.count_trailing_zeros
  132. count_leading_zeros :: intrinsics.count_leading_zeros
  133. reverse_bits :: intrinsics.reverse_bits
  134. fused_mul_add :: intrinsics.fused_mul_add
  135. fma :: intrinsics.fused_mul_add
  136. to_array_ptr :: #force_inline proc "contextless" (v: ^#simd[$LANES]$E) -> ^[LANES]E {
  137. return (^[LANES]E)(v)
  138. }
  139. to_array :: #force_inline proc "contextless" (v: #simd[$LANES]$E) -> [LANES]E {
  140. return transmute([LANES]E)(v)
  141. }
  142. from_array :: #force_inline proc "contextless" (v: $A/[$LANES]$E) -> #simd[LANES]E {
  143. return transmute(#simd[LANES]E)v
  144. }
  145. from_slice :: proc($T: typeid/#simd[$LANES]$E, slice: []E) -> T {
  146. assert(len(slice) >= LANES, "slice length must be a least the number of lanes")
  147. array: [LANES]E
  148. #no_bounds_check for i in 0..<LANES {
  149. array[i] = slice[i]
  150. }
  151. return transmute(T)array
  152. }
  153. bit_not :: #force_inline proc "contextless" (v: $T/#simd[$LANES]$E) -> T where intrinsics.type_is_integer(E) {
  154. return xor(v, T(~E(0)))
  155. }
  156. copysign :: #force_inline proc "contextless" (v, sign: $T/#simd[$LANES]$E) -> T where intrinsics.type_is_float(E) {
  157. neg_zero := to_bits(T(-0.0))
  158. sign_bit := to_bits(sign) & neg_zero
  159. magnitude := to_bits(v) &~ neg_zero
  160. return transmute(T)(sign_bit|magnitude)
  161. }
  162. signum :: #force_inline proc "contextless" (v: $T/#simd[$LANES]$E) -> T where intrinsics.type_is_float(E) {
  163. is_nan := lanes_ne(v, v)
  164. return select(is_nan, v, copysign(T(1), v))
  165. }
  166. recip :: #force_inline proc "contextless" (v: $T/#simd[$LANES]$E) -> T where intrinsics.type_is_float(E) {
  167. return T(1) / v
  168. }