ssse3.odin 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
  1. //+build i386, amd64
  2. package simd_x86
  3. import "core:intrinsics"
  4. import "core:simd"
  5. _ :: simd
  6. @(require_results, enable_target_feature="ssse3")
  7. _mm_abs_epi8 :: #force_inline proc "c" (a: __m128i) -> __m128i {
  8. return transmute(__m128i)pabsb128(transmute(i8x16)a)
  9. }
  10. @(require_results, enable_target_feature="ssse3")
  11. _mm_abs_epi16 :: #force_inline proc "c" (a: __m128i) -> __m128i {
  12. return transmute(__m128i)pabsw128(transmute(i16x8)a)
  13. }
  14. @(require_results, enable_target_feature="ssse3")
  15. _mm_abs_epi32 :: #force_inline proc "c" (a: __m128i) -> __m128i {
  16. return transmute(__m128i)pabsd128(transmute(i32x4)a)
  17. }
  18. @(require_results, enable_target_feature="ssse3")
  19. _mm_shuffle_epi8 :: #force_inline proc "c" (a, b: __m128i) -> __m128i {
  20. return transmute(__m128i)pshufb128(transmute(u8x16)a, transmute(u8x16)b)
  21. }
  22. @(require_results, enable_target_feature="ssse3")
  23. _mm_alignr_epi8 :: #force_inline proc "c" (a, b: __m128i, $IMM8: u32) -> __m128i {
  24. shift :: IMM8
  25. // If palignr is shifting the pair of vectors more than the size of two
  26. // lanes, emit zero.
  27. if shift > 32 {
  28. return _mm_set1_epi8(0)
  29. }
  30. a, b := a, b
  31. if shift > 16 {
  32. a, b = _mm_set1_epi8(0), a
  33. }
  34. return transmute(__m128i)simd.shuffle(
  35. transmute(i8x16)b,
  36. transmute(i8x16)a,
  37. 0 when shift > 32 else shift - 16 + 0 when shift > 16 else shift + 0,
  38. 1 when shift > 32 else shift - 16 + 1 when shift > 16 else shift + 1,
  39. 2 when shift > 32 else shift - 16 + 2 when shift > 16 else shift + 2,
  40. 3 when shift > 32 else shift - 16 + 3 when shift > 16 else shift + 3,
  41. 4 when shift > 32 else shift - 16 + 4 when shift > 16 else shift + 4,
  42. 5 when shift > 32 else shift - 16 + 5 when shift > 16 else shift + 5,
  43. 6 when shift > 32 else shift - 16 + 6 when shift > 16 else shift + 6,
  44. 7 when shift > 32 else shift - 16 + 7 when shift > 16 else shift + 7,
  45. 8 when shift > 32 else shift - 16 + 8 when shift > 16 else shift + 8,
  46. 9 when shift > 32 else shift - 16 + 9 when shift > 16 else shift + 9,
  47. 10 when shift > 32 else shift - 16 + 10 when shift > 16 else shift + 10,
  48. 11 when shift > 32 else shift - 16 + 11 when shift > 16 else shift + 11,
  49. 12 when shift > 32 else shift - 16 + 12 when shift > 16 else shift + 12,
  50. 13 when shift > 32 else shift - 16 + 13 when shift > 16 else shift + 13,
  51. 14 when shift > 32 else shift - 16 + 14 when shift > 16 else shift + 14,
  52. 15 when shift > 32 else shift - 16 + 15 when shift > 16 else shift + 15,
  53. )
  54. }
  55. @(require_results, enable_target_feature="ssse3")
  56. _mm_hadd_epi16 :: #force_inline proc "c" (a, b: __m128i) -> __m128i {
  57. return transmute(__m128i)phaddw128(transmute(i16x8)a, transmute(i16x8)b)
  58. }
  59. @(require_results, enable_target_feature="ssse3")
  60. _mm_hadds_epi16 :: #force_inline proc "c" (a, b: __m128i) -> __m128i {
  61. return transmute(__m128i)phaddsw128(transmute(i16x8)a, transmute(i16x8)b)
  62. }
  63. @(require_results, enable_target_feature="ssse3")
  64. _mm_hadd_epi32 :: #force_inline proc "c" (a, b: __m128i) -> __m128i {
  65. return transmute(__m128i)phaddd128(transmute(i32x4)a, transmute(i32x4)b)
  66. }
  67. @(require_results, enable_target_feature="ssse3")
  68. _mm_hsub_epi16 :: #force_inline proc "c" (a, b: __m128i) -> __m128i {
  69. return transmute(__m128i)phsubw128(transmute(i16x8)a, transmute(i16x8)b)
  70. }
  71. @(require_results, enable_target_feature="ssse3")
  72. _mm_hsubs_epi16 :: #force_inline proc "c" (a, b: __m128i) -> __m128i {
  73. return transmute(__m128i)phsubsw128(transmute(i16x8)a, transmute(i16x8)b)
  74. }
  75. @(require_results, enable_target_feature="ssse3")
  76. _mm_hsub_epi32 :: #force_inline proc "c" (a, b: __m128i) -> __m128i {
  77. return transmute(__m128i)phsubd128(transmute(i32x4)a, transmute(i32x4)b)
  78. }
  79. @(require_results, enable_target_feature="ssse3")
  80. _mm_maddubs_epi16 :: #force_inline proc "c" (a, b: __m128i) -> __m128i {
  81. return transmute(__m128i)pmaddubsw128(transmute(u8x16)a, transmute(i8x16)b)
  82. }
  83. @(require_results, enable_target_feature="ssse3")
  84. _mm_mulhrs_epi16 :: #force_inline proc "c" (a, b: __m128i) -> __m128i {
  85. return transmute(__m128i)pmulhrsw128(transmute(i16x8)a, transmute(i16x8)b)
  86. }
  87. @(require_results, enable_target_feature="ssse3")
  88. _mm_sign_epi8 :: #force_inline proc "c" (a, b: __m128i) -> __m128i {
  89. return transmute(__m128i)psignb128(transmute(i8x16)a, transmute(i8x16)b)
  90. }
  91. @(require_results, enable_target_feature="ssse3")
  92. _mm_sign_epi16 :: #force_inline proc "c" (a, b: __m128i) -> __m128i {
  93. return transmute(__m128i)psignw128(transmute(i16x8)a, transmute(i16x8)b)
  94. }
  95. @(require_results, enable_target_feature="ssse3")
  96. _mm_sign_epi32 :: #force_inline proc "c" (a, b: __m128i) -> __m128i {
  97. return transmute(__m128i)psignd128(transmute(i32x4)a, transmute(i32x4)b)
  98. }
  99. @(private, default_calling_convention="c")
  100. foreign _ {
  101. @(link_name = "llvm.x86.ssse3.pabs.b.128")
  102. pabsb128 :: proc(a: i8x16) -> u8x16 ---
  103. @(link_name = "llvm.x86.ssse3.pabs.w.128")
  104. pabsw128 :: proc(a: i16x8) -> u16x8 ---
  105. @(link_name = "llvm.x86.ssse3.pabs.d.128")
  106. pabsd128 :: proc(a: i32x4) -> u32x4 ---
  107. @(link_name = "llvm.x86.ssse3.pshuf.b.128")
  108. pshufb128 :: proc(a, b: u8x16) -> u8x16 ---
  109. @(link_name = "llvm.x86.ssse3.phadd.w.128")
  110. phaddw128 :: proc(a, b: i16x8) -> i16x8 ---
  111. @(link_name = "llvm.x86.ssse3.phadd.sw.128")
  112. phaddsw128 :: proc(a, b: i16x8) -> i16x8 ---
  113. @(link_name = "llvm.x86.ssse3.phadd.d.128")
  114. phaddd128 :: proc(a, b: i32x4) -> i32x4 ---
  115. @(link_name = "llvm.x86.ssse3.phsub.w.128")
  116. phsubw128 :: proc(a, b: i16x8) -> i16x8 ---
  117. @(link_name = "llvm.x86.ssse3.phsub.sw.128")
  118. phsubsw128 :: proc(a, b: i16x8) -> i16x8 ---
  119. @(link_name = "llvm.x86.ssse3.phsub.d.128")
  120. phsubd128 :: proc(a, b: i32x4) -> i32x4 ---
  121. @(link_name = "llvm.x86.ssse3.pmadd.ub.sw.128")
  122. pmaddubsw128 :: proc(a: u8x16, b: i8x16) -> i16x8 ---
  123. @(link_name = "llvm.x86.ssse3.pmul.hr.sw.128")
  124. pmulhrsw128 :: proc(a, b: i16x8) -> i16x8 ---
  125. @(link_name = "llvm.x86.ssse3.psign.b.128")
  126. psignb128 :: proc(a, b: i8x16) -> i8x16 ---
  127. @(link_name = "llvm.x86.ssse3.psign.w.128")
  128. psignw128 :: proc(a, b: i16x8) -> i16x8 ---
  129. @(link_name = "llvm.x86.ssse3.psign.d.128")
  130. psignd128 :: proc(a, b: i32x4) -> i32x4 ---
  131. }