x86-vperm2.ll 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283
  1. ; RUN: opt < %s -instcombine -S | FileCheck %s
  2. ; This should never happen, but make sure we don't crash handling a non-constant immediate byte.
  3. define <4 x double> @perm2pd_non_const_imm(<4 x double> %a0, <4 x double> %a1, i8 %b) {
  4. %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 %b)
  5. ret <4 x double> %res
  6. ; CHECK-LABEL: @perm2pd_non_const_imm
  7. ; CHECK-NEXT: call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 %b)
  8. ; CHECK-NEXT: ret <4 x double>
  9. }
  10. ; In the following 4 tests, both zero mask bits of the immediate are set.
  11. define <4 x double> @perm2pd_0x88(<4 x double> %a0, <4 x double> %a1) {
  12. %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 136)
  13. ret <4 x double> %res
  14. ; CHECK-LABEL: @perm2pd_0x88
  15. ; CHECK-NEXT: ret <4 x double> zeroinitializer
  16. }
  17. define <8 x float> @perm2ps_0x88(<8 x float> %a0, <8 x float> %a1) {
  18. %res = call <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float> %a0, <8 x float> %a1, i8 136)
  19. ret <8 x float> %res
  20. ; CHECK-LABEL: @perm2ps_0x88
  21. ; CHECK-NEXT: ret <8 x float> zeroinitializer
  22. }
  23. define <8 x i32> @perm2si_0x88(<8 x i32> %a0, <8 x i32> %a1) {
  24. %res = call <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32> %a0, <8 x i32> %a1, i8 136)
  25. ret <8 x i32> %res
  26. ; CHECK-LABEL: @perm2si_0x88
  27. ; CHECK-NEXT: ret <8 x i32> zeroinitializer
  28. }
  29. define <4 x i64> @perm2i_0x88(<4 x i64> %a0, <4 x i64> %a1) {
  30. %res = call <4 x i64> @llvm.x86.avx2.vperm2i128(<4 x i64> %a0, <4 x i64> %a1, i8 136)
  31. ret <4 x i64> %res
  32. ; CHECK-LABEL: @perm2i_0x88
  33. ; CHECK-NEXT: ret <4 x i64> zeroinitializer
  34. }
  35. ; The other control bits are ignored when zero mask bits of the immediate are set.
  36. define <4 x double> @perm2pd_0xff(<4 x double> %a0, <4 x double> %a1) {
  37. %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 255)
  38. ret <4 x double> %res
  39. ; CHECK-LABEL: @perm2pd_0xff
  40. ; CHECK-NEXT: ret <4 x double> zeroinitializer
  41. }
  42. ; The following 16 tests are simple shuffles, except for 2 cases where we can just return one of the
  43. ; source vectors. Verify that we generate the right shuffle masks and undef source operand where possible..
  44. define <4 x double> @perm2pd_0x00(<4 x double> %a0, <4 x double> %a1) {
  45. %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 0)
  46. ret <4 x double> %res
  47. ; CHECK-LABEL: @perm2pd_0x00
  48. ; CHECK-NEXT: %1 = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
  49. ; CHECK-NEXT: ret <4 x double> %1
  50. }
  51. define <4 x double> @perm2pd_0x01(<4 x double> %a0, <4 x double> %a1) {
  52. %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 1)
  53. ret <4 x double> %res
  54. ; CHECK-LABEL: @perm2pd_0x01
  55. ; CHECK-NEXT: %1 = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
  56. ; CHECK-NEXT: ret <4 x double> %1
  57. }
  58. define <4 x double> @perm2pd_0x02(<4 x double> %a0, <4 x double> %a1) {
  59. %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 2)
  60. ret <4 x double> %res
  61. ; CHECK-LABEL: @perm2pd_0x02
  62. ; CHECK-NEXT: %1 = shufflevector <4 x double> %a1, <4 x double> %a0, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
  63. ; CHECK-NEXT: ret <4 x double> %1
  64. }
  65. define <4 x double> @perm2pd_0x03(<4 x double> %a0, <4 x double> %a1) {
  66. %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 3)
  67. ret <4 x double> %res
  68. ; CHECK-LABEL: @perm2pd_0x03
  69. ; CHECK-NEXT: %1 = shufflevector <4 x double> %a1, <4 x double> %a0, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
  70. ; CHECK-NEXT: ret <4 x double> %1
  71. }
  72. define <4 x double> @perm2pd_0x10(<4 x double> %a0, <4 x double> %a1) {
  73. %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 16)
  74. ret <4 x double> %res
  75. ; CHECK-LABEL: @perm2pd_0x10
  76. ; CHECK-NEXT: ret <4 x double> %a0
  77. }
  78. define <4 x double> @perm2pd_0x11(<4 x double> %a0, <4 x double> %a1) {
  79. %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 17)
  80. ret <4 x double> %res
  81. ; CHECK-LABEL: @perm2pd_0x11
  82. ; CHECK-NEXT: %1 = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
  83. ; CHECK-NEXT: ret <4 x double> %1
  84. }
  85. define <4 x double> @perm2pd_0x12(<4 x double> %a0, <4 x double> %a1) {
  86. %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 18)
  87. ret <4 x double> %res
  88. ; CHECK-LABEL: @perm2pd_0x12
  89. ; CHECK-NEXT: %1 = shufflevector <4 x double> %a1, <4 x double> %a0, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
  90. ; CHECK-NEXT: ret <4 x double> %1
  91. }
  92. define <4 x double> @perm2pd_0x13(<4 x double> %a0, <4 x double> %a1) {
  93. %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 19)
  94. ret <4 x double> %res
  95. ; CHECK-LABEL: @perm2pd_0x13
  96. ; CHECK-NEXT: %1 = shufflevector <4 x double> %a1, <4 x double> %a0, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
  97. ; CHECK-NEXT: ret <4 x double> %1
  98. }
  99. define <4 x double> @perm2pd_0x20(<4 x double> %a0, <4 x double> %a1) {
  100. %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 32)
  101. ret <4 x double> %res
  102. ; CHECK-LABEL: @perm2pd_0x20
  103. ; CHECK-NEXT: %1 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
  104. ; CHECK-NEXT: ret <4 x double> %1
  105. }
  106. define <4 x double> @perm2pd_0x21(<4 x double> %a0, <4 x double> %a1) {
  107. %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 33)
  108. ret <4 x double> %res
  109. ; CHECK-LABEL: @perm2pd_0x21
  110. ; CHECK-NEXT: %1 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
  111. ; CHECK-NEXT: ret <4 x double> %1
  112. }
  113. define <4 x double> @perm2pd_0x22(<4 x double> %a0, <4 x double> %a1) {
  114. %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 34)
  115. ret <4 x double> %res
  116. ; CHECK-LABEL: @perm2pd_0x22
  117. ; CHECK-NEXT: %1 = shufflevector <4 x double> %a1, <4 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
  118. ; CHECK-NEXT: ret <4 x double> %1
  119. }
  120. define <4 x double> @perm2pd_0x23(<4 x double> %a0, <4 x double> %a1) {
  121. %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 35)
  122. ret <4 x double> %res
  123. ; CHECK-LABEL: @perm2pd_0x23
  124. ; CHECK-NEXT: %1 = shufflevector <4 x double> %a1, <4 x double> undef, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
  125. ; CHECK-NEXT: ret <4 x double> %1
  126. }
  127. define <4 x double> @perm2pd_0x30(<4 x double> %a0, <4 x double> %a1) {
  128. %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 48)
  129. ret <4 x double> %res
  130. ; CHECK-LABEL: @perm2pd_0x30
  131. ; CHECK-NEXT: %1 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
  132. ; CHECK-NEXT: ret <4 x double> %1
  133. }
  134. define <4 x double> @perm2pd_0x31(<4 x double> %a0, <4 x double> %a1) {
  135. %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 49)
  136. ret <4 x double> %res
  137. ; CHECK-LABEL: @perm2pd_0x31
  138. ; CHECK-NEXT: %1 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
  139. ; CHECK-NEXT: ret <4 x double> %1
  140. }
  141. define <4 x double> @perm2pd_0x32(<4 x double> %a0, <4 x double> %a1) {
  142. %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 50)
  143. ret <4 x double> %res
  144. ; CHECK-LABEL: @perm2pd_0x32
  145. ; CHECK-NEXT: ret <4 x double> %a1
  146. }
  147. define <4 x double> @perm2pd_0x33(<4 x double> %a0, <4 x double> %a1) {
  148. %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 51)
  149. ret <4 x double> %res
  150. ; CHECK-LABEL: @perm2pd_0x33
  151. ; CHECK-NEXT: %1 = shufflevector <4 x double> %a1, <4 x double> undef, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
  152. ; CHECK-NEXT: ret <4 x double> %1
  153. }
  154. ; Confirm that a mask for 32-bit elements is also correct.
  155. define <8 x float> @perm2ps_0x31(<8 x float> %a0, <8 x float> %a1) {
  156. %res = call <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float> %a0, <8 x float> %a1, i8 49)
  157. ret <8 x float> %res
  158. ; CHECK-LABEL: @perm2ps_0x31
  159. ; CHECK-NEXT: %1 = shufflevector <8 x float> %a0, <8 x float> %a1, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15>
  160. ; CHECK-NEXT: ret <8 x float> %1
  161. }
  162. ; Confirm that the AVX2 version works the same.
  163. define <4 x i64> @perm2i_0x33(<4 x i64> %a0, <4 x i64> %a1) {
  164. %res = call <4 x i64> @llvm.x86.avx2.vperm2i128(<4 x i64> %a0, <4 x i64> %a1, i8 51)
  165. ret <4 x i64> %res
  166. ; CHECK-LABEL: @perm2i_0x33
  167. ; CHECK-NEXT: %1 = shufflevector <4 x i64> %a1, <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
  168. ; CHECK-NEXT: ret <4 x i64> %1
  169. }
  170. ; Confirm that when a single zero mask bit is set, we replace a source vector with zeros.
  171. define <4 x double> @perm2pd_0x81(<4 x double> %a0, <4 x double> %a1) {
  172. %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 129)
  173. ret <4 x double> %res
  174. ; CHECK-LABEL: @perm2pd_0x81
  175. ; CHECK-NEXT: shufflevector <4 x double> %a0, <4 x double> <double 0.0{{.*}}<4 x i32> <i32 2, i32 3, i32 4, i32 5>
  176. ; CHECK-NEXT: ret <4 x double>
  177. }
  178. define <4 x double> @perm2pd_0x83(<4 x double> %a0, <4 x double> %a1) {
  179. %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 131)
  180. ret <4 x double> %res
  181. ; CHECK-LABEL: @perm2pd_0x83
  182. ; CHECK-NEXT: shufflevector <4 x double> %a1, <4 x double> <double 0.0{{.*}}, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
  183. ; CHECK-NEXT: ret <4 x double>
  184. }
  185. define <4 x double> @perm2pd_0x28(<4 x double> %a0, <4 x double> %a1) {
  186. %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 40)
  187. ret <4 x double> %res
  188. ; CHECK-LABEL: @perm2pd_0x28
  189. ; CHECK-NEXT: shufflevector <4 x double> <double 0.0{{.*}}, <4 x double> %a1, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
  190. ; CHECK-NEXT: ret <4 x double>
  191. }
  192. define <4 x double> @perm2pd_0x08(<4 x double> %a0, <4 x double> %a1) {
  193. %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 8)
  194. ret <4 x double> %res
  195. ; CHECK-LABEL: @perm2pd_0x08
  196. ; CHECK-NEXT: shufflevector <4 x double> <double 0.0{{.*}}, <4 x double> %a0, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
  197. ; CHECK-NEXT: ret <4 x double>
  198. }
  199. ; Check one more with the AVX2 version.
  200. define <4 x i64> @perm2i_0x28(<4 x i64> %a0, <4 x i64> %a1) {
  201. %res = call <4 x i64> @llvm.x86.avx2.vperm2i128(<4 x i64> %a0, <4 x i64> %a1, i8 40)
  202. ret <4 x i64> %res
  203. ; CHECK-LABEL: @perm2i_0x28
  204. ; CHECK-NEXT: shufflevector <4 x i64> <i64 0{{.*}}, <4 x i64> %a1, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
  205. ; CHECK-NEXT: ret <4 x i64>
  206. }
  207. declare <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone
  208. declare <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
  209. declare <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32>, <8 x i32>, i8) nounwind readnone
  210. declare <4 x i64> @llvm.x86.avx2.vperm2i128(<4 x i64>, <4 x i64>, i8) nounwind readnone