simd.odin 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187
  1. package simd
  2. import "core:builtin"
  3. import "core:intrinsics"
  4. // 128-bit vector aliases
  5. u8x16 :: #simd[16]u8
  6. i8x16 :: #simd[16]i8
  7. u16x8 :: #simd[8]u16
  8. i16x8 :: #simd[8]i16
  9. u32x4 :: #simd[4]u32
  10. i32x4 :: #simd[4]i32
  11. u64x2 :: #simd[2]u64
  12. i64x2 :: #simd[2]i64
  13. f32x4 :: #simd[4]f32
  14. f64x2 :: #simd[2]f64
  15. boolx16 :: #simd[16]bool
  16. b8x16 :: #simd[16]b8
  17. b16x8 :: #simd[8]b16
  18. b32x4 :: #simd[4]b32
  19. b64x2 :: #simd[2]b64
  20. // 256-bit vector aliases
  21. u8x32 :: #simd[32]u8
  22. i8x32 :: #simd[32]i8
  23. u16x16 :: #simd[16]u16
  24. i16x16 :: #simd[16]i16
  25. u32x8 :: #simd[8]u32
  26. i32x8 :: #simd[8]i32
  27. u64x4 :: #simd[4]u64
  28. i64x4 :: #simd[4]i64
  29. f32x8 :: #simd[8]f32
  30. f64x4 :: #simd[4]f64
  31. boolx32 :: #simd[32]bool
  32. b8x32 :: #simd[32]b8
  33. b16x16 :: #simd[16]b16
  34. b32x8 :: #simd[8]b32
  35. b64x4 :: #simd[4]b64
  36. // 512-bit vector aliases
  37. u8x64 :: #simd[64]u8
  38. i8x64 :: #simd[64]i8
  39. u16x32 :: #simd[32]u16
  40. i16x32 :: #simd[32]i16
  41. u32x16 :: #simd[16]u32
  42. i32x16 :: #simd[16]i32
  43. u64x8 :: #simd[8]u64
  44. i64x8 :: #simd[8]i64
  45. f32x16 :: #simd[16]f32
  46. f64x8 :: #simd[8]f64
  47. boolx64 :: #simd[64]bool
  48. b8x64 :: #simd[64]b8
  49. b16x32 :: #simd[32]b16
  50. b32x16 :: #simd[16]b32
  51. b64x8 :: #simd[8]b64
  52. add :: intrinsics.simd_add
  53. sub :: intrinsics.simd_sub
  54. mul :: intrinsics.simd_mul
  55. div :: intrinsics.simd_div // floats only
  56. // Keeps Odin's Behaviour
  57. // (x << y) if y <= mask else 0
  58. shl :: intrinsics.simd_shl
  59. shr :: intrinsics.simd_shr
  60. // Similar to C's Behaviour
  61. // x << (y & mask)
  62. shl_masked :: intrinsics.simd_shl_masked
  63. shr_masked :: intrinsics.simd_shr_masked
  64. // Saturation Arithmetic
  65. add_sat :: intrinsics.simd_add_sat
  66. sub_sat :: intrinsics.simd_sub_sat
  67. and :: intrinsics.simd_and
  68. or :: intrinsics.simd_or
  69. xor :: intrinsics.simd_xor
  70. and_not :: intrinsics.simd_and_not
  71. neg :: intrinsics.simd_neg
  72. abs :: intrinsics.simd_abs
  73. min :: intrinsics.simd_min
  74. max :: intrinsics.simd_max
  75. clamp :: intrinsics.simd_clamp
  76. // Return an unsigned integer of the same size as the input type
  77. // NOT A BOOLEAN
  78. // element-wise:
  79. // false => 0x00...00
  80. // true => 0xff...ff
  81. lanes_eq :: intrinsics.simd_lanes_eq
  82. lanes_ne :: intrinsics.simd_lanes_ne
  83. lanes_lt :: intrinsics.simd_lanes_lt
  84. lanes_le :: intrinsics.simd_lanes_le
  85. lanes_gt :: intrinsics.simd_lanes_gt
  86. lanes_ge :: intrinsics.simd_lanes_ge
  87. // extract :: proc(a: #simd[N]T, idx: uint) -> T
  88. extract :: intrinsics.simd_extract
  89. // replace :: proc(a: #simd[N]T, idx: uint, elem: T) -> #simd[N]T
  90. replace :: intrinsics.simd_replace
  91. reduce_add_ordered :: intrinsics.simd_reduce_add_ordered
  92. reduce_mul_ordered :: intrinsics.simd_reduce_mul_ordered
  93. reduce_min :: intrinsics.simd_reduce_min
  94. reduce_max :: intrinsics.simd_reduce_max
  95. reduce_and :: intrinsics.simd_reduce_and
  96. reduce_or :: intrinsics.simd_reduce_or
  97. reduce_xor :: intrinsics.simd_reduce_xor
  98. // swizzle :: proc(a: #simd[N]T, indices: ..int) -> #simd[len(indices)]T
  99. swizzle :: builtin.swizzle
  100. // shuffle :: proc(a, b: #simd[N]T, indices: #simd[max 2*N]u32) -> #simd[len(indices)]T
  101. shuffle :: intrinsics.simd_shuffle
  102. // select :: proc(cond: #simd[N]boolean_or_integer, true, false: #simd[N]T) -> #simd[N]T
  103. select :: intrinsics.simd_select
  104. sqrt :: intrinsics.sqrt
  105. ceil :: intrinsics.simd_ceil
  106. floor :: intrinsics.simd_floor
  107. trunc :: intrinsics.simd_trunc
  108. nearest :: intrinsics.simd_nearest
  109. to_bits :: intrinsics.simd_to_bits
  110. lanes_reverse :: intrinsics.simd_lanes_reverse
  111. lanes_rotate_left :: intrinsics.simd_lanes_rotate_left
  112. lanes_rotate_right :: intrinsics.simd_lanes_rotate_right
  113. count_ones :: intrinsics.count_ones
  114. count_zeros :: intrinsics.count_zeros
  115. count_trailing_zeros :: intrinsics.count_trailing_zeros
  116. count_leading_zeros :: intrinsics.count_leading_zeros
  117. reverse_bits :: intrinsics.reverse_bits
  118. fused_mul_add :: intrinsics.fused_mul_add
  119. fma :: intrinsics.fused_mul_add
  120. to_array_ptr :: #force_inline proc "contextless" (v: ^#simd[$LANES]$E) -> ^[LANES]E {
  121. return (^[LANES]E)(v)
  122. }
  123. to_array :: #force_inline proc "contextless" (v: #simd[$LANES]$E) -> [LANES]E {
  124. return transmute([LANES]E)(v)
  125. }
  126. from_array :: #force_inline proc "contextless" (v: $A/[$LANES]$E) -> #simd[LANES]E {
  127. return transmute(#simd[LANES]E)v
  128. }
  129. from_slice :: proc($T: typeid/#simd[$LANES]$E, slice: []E) -> T {
  130. assert(len(slice) >= LANES, "slice length must be a least the number of lanes")
  131. array: [LANES]E
  132. #no_bounds_check for i in 0..<LANES {
  133. array[i] = slice[i]
  134. }
  135. return transmute(T)array
  136. }
  137. bit_not :: #force_inline proc "contextless" (v: $T/#simd[$LANES]$E) -> T where intrinsics.type_is_integer(E) {
  138. return xor(v, T(~E(0)))
  139. }
  140. copysign :: #force_inline proc "contextless" (v, sign: $T/#simd[$LANES]$E) -> T where intrinsics.type_is_float(E) {
  141. neg_zero := to_bits(T(-0.0))
  142. sign_bit := to_bits(sign) & neg_zero
  143. magnitude := to_bits(v) &~ neg_zero
  144. return transmute(T)(sign_bit|magnitude)
  145. }
  146. signum :: #force_inline proc "contextless" (v: $T/#simd[$LANES]$E) -> T where intrinsics.type_is_float(E) {
  147. is_nan := lanes_ne(v, v)
  148. return select(is_nan, v, copysign(T(1), v))
  149. }
  150. recip :: #force_inline proc "contextless" (v: $T/#simd[$LANES]$E) -> T where intrinsics.type_is_float(E) {
  151. return T(1) / v
  152. }