| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283 |
- ; RUN: opt < %s -instcombine -S | FileCheck %s
- ; This should never happen, but make sure we don't crash handling a non-constant immediate byte.
- define <4 x double> @perm2pd_non_const_imm(<4 x double> %a0, <4 x double> %a1, i8 %b) {
- %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 %b)
- ret <4 x double> %res
- ; CHECK-LABEL: @perm2pd_non_const_imm
- ; CHECK-NEXT: call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 %b)
- ; CHECK-NEXT: ret <4 x double>
- }
- ; In the following 4 tests, both zero mask bits of the immediate are set.
- define <4 x double> @perm2pd_0x88(<4 x double> %a0, <4 x double> %a1) {
- %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 136)
- ret <4 x double> %res
- ; CHECK-LABEL: @perm2pd_0x88
- ; CHECK-NEXT: ret <4 x double> zeroinitializer
- }
- define <8 x float> @perm2ps_0x88(<8 x float> %a0, <8 x float> %a1) {
- %res = call <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float> %a0, <8 x float> %a1, i8 136)
- ret <8 x float> %res
- ; CHECK-LABEL: @perm2ps_0x88
- ; CHECK-NEXT: ret <8 x float> zeroinitializer
- }
- define <8 x i32> @perm2si_0x88(<8 x i32> %a0, <8 x i32> %a1) {
- %res = call <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32> %a0, <8 x i32> %a1, i8 136)
- ret <8 x i32> %res
- ; CHECK-LABEL: @perm2si_0x88
- ; CHECK-NEXT: ret <8 x i32> zeroinitializer
- }
- define <4 x i64> @perm2i_0x88(<4 x i64> %a0, <4 x i64> %a1) {
- %res = call <4 x i64> @llvm.x86.avx2.vperm2i128(<4 x i64> %a0, <4 x i64> %a1, i8 136)
- ret <4 x i64> %res
- ; CHECK-LABEL: @perm2i_0x88
- ; CHECK-NEXT: ret <4 x i64> zeroinitializer
- }
- ; The other control bits are ignored when zero mask bits of the immediate are set.
- define <4 x double> @perm2pd_0xff(<4 x double> %a0, <4 x double> %a1) {
- %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 255)
- ret <4 x double> %res
- ; CHECK-LABEL: @perm2pd_0xff
- ; CHECK-NEXT: ret <4 x double> zeroinitializer
- }
- ; The following 16 tests are simple shuffles, except for 2 cases where we can just return one of the
- ; source vectors. Verify that we generate the right shuffle masks and undef source operand where possible..
- define <4 x double> @perm2pd_0x00(<4 x double> %a0, <4 x double> %a1) {
- %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 0)
- ret <4 x double> %res
- ; CHECK-LABEL: @perm2pd_0x00
- ; CHECK-NEXT: %1 = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
- ; CHECK-NEXT: ret <4 x double> %1
- }
- define <4 x double> @perm2pd_0x01(<4 x double> %a0, <4 x double> %a1) {
- %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 1)
- ret <4 x double> %res
- ; CHECK-LABEL: @perm2pd_0x01
- ; CHECK-NEXT: %1 = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
- ; CHECK-NEXT: ret <4 x double> %1
- }
- define <4 x double> @perm2pd_0x02(<4 x double> %a0, <4 x double> %a1) {
- %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 2)
- ret <4 x double> %res
- ; CHECK-LABEL: @perm2pd_0x02
- ; CHECK-NEXT: %1 = shufflevector <4 x double> %a1, <4 x double> %a0, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
- ; CHECK-NEXT: ret <4 x double> %1
- }
- define <4 x double> @perm2pd_0x03(<4 x double> %a0, <4 x double> %a1) {
- %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 3)
- ret <4 x double> %res
- ; CHECK-LABEL: @perm2pd_0x03
- ; CHECK-NEXT: %1 = shufflevector <4 x double> %a1, <4 x double> %a0, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
- ; CHECK-NEXT: ret <4 x double> %1
- }
- define <4 x double> @perm2pd_0x10(<4 x double> %a0, <4 x double> %a1) {
- %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 16)
- ret <4 x double> %res
- ; CHECK-LABEL: @perm2pd_0x10
- ; CHECK-NEXT: ret <4 x double> %a0
- }
- define <4 x double> @perm2pd_0x11(<4 x double> %a0, <4 x double> %a1) {
- %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 17)
- ret <4 x double> %res
- ; CHECK-LABEL: @perm2pd_0x11
- ; CHECK-NEXT: %1 = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
- ; CHECK-NEXT: ret <4 x double> %1
- }
- define <4 x double> @perm2pd_0x12(<4 x double> %a0, <4 x double> %a1) {
- %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 18)
- ret <4 x double> %res
- ; CHECK-LABEL: @perm2pd_0x12
- ; CHECK-NEXT: %1 = shufflevector <4 x double> %a1, <4 x double> %a0, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
- ; CHECK-NEXT: ret <4 x double> %1
- }
- define <4 x double> @perm2pd_0x13(<4 x double> %a0, <4 x double> %a1) {
- %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 19)
- ret <4 x double> %res
- ; CHECK-LABEL: @perm2pd_0x13
- ; CHECK-NEXT: %1 = shufflevector <4 x double> %a1, <4 x double> %a0, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
- ; CHECK-NEXT: ret <4 x double> %1
- }
- define <4 x double> @perm2pd_0x20(<4 x double> %a0, <4 x double> %a1) {
- %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 32)
- ret <4 x double> %res
- ; CHECK-LABEL: @perm2pd_0x20
- ; CHECK-NEXT: %1 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
- ; CHECK-NEXT: ret <4 x double> %1
- }
- define <4 x double> @perm2pd_0x21(<4 x double> %a0, <4 x double> %a1) {
- %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 33)
- ret <4 x double> %res
- ; CHECK-LABEL: @perm2pd_0x21
- ; CHECK-NEXT: %1 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
- ; CHECK-NEXT: ret <4 x double> %1
- }
- define <4 x double> @perm2pd_0x22(<4 x double> %a0, <4 x double> %a1) {
- %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 34)
- ret <4 x double> %res
- ; CHECK-LABEL: @perm2pd_0x22
- ; CHECK-NEXT: %1 = shufflevector <4 x double> %a1, <4 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
- ; CHECK-NEXT: ret <4 x double> %1
- }
- define <4 x double> @perm2pd_0x23(<4 x double> %a0, <4 x double> %a1) {
- %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 35)
- ret <4 x double> %res
- ; CHECK-LABEL: @perm2pd_0x23
- ; CHECK-NEXT: %1 = shufflevector <4 x double> %a1, <4 x double> undef, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
- ; CHECK-NEXT: ret <4 x double> %1
- }
- define <4 x double> @perm2pd_0x30(<4 x double> %a0, <4 x double> %a1) {
- %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 48)
- ret <4 x double> %res
- ; CHECK-LABEL: @perm2pd_0x30
- ; CHECK-NEXT: %1 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
- ; CHECK-NEXT: ret <4 x double> %1
- }
- define <4 x double> @perm2pd_0x31(<4 x double> %a0, <4 x double> %a1) {
- %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 49)
- ret <4 x double> %res
- ; CHECK-LABEL: @perm2pd_0x31
- ; CHECK-NEXT: %1 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
- ; CHECK-NEXT: ret <4 x double> %1
- }
- define <4 x double> @perm2pd_0x32(<4 x double> %a0, <4 x double> %a1) {
- %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 50)
- ret <4 x double> %res
- ; CHECK-LABEL: @perm2pd_0x32
- ; CHECK-NEXT: ret <4 x double> %a1
- }
- define <4 x double> @perm2pd_0x33(<4 x double> %a0, <4 x double> %a1) {
- %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 51)
- ret <4 x double> %res
- ; CHECK-LABEL: @perm2pd_0x33
- ; CHECK-NEXT: %1 = shufflevector <4 x double> %a1, <4 x double> undef, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
- ; CHECK-NEXT: ret <4 x double> %1
- }
- ; Confirm that a mask for 32-bit elements is also correct.
- define <8 x float> @perm2ps_0x31(<8 x float> %a0, <8 x float> %a1) {
- %res = call <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float> %a0, <8 x float> %a1, i8 49)
- ret <8 x float> %res
- ; CHECK-LABEL: @perm2ps_0x31
- ; CHECK-NEXT: %1 = shufflevector <8 x float> %a0, <8 x float> %a1, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15>
- ; CHECK-NEXT: ret <8 x float> %1
- }
- ; Confirm that the AVX2 version works the same.
- define <4 x i64> @perm2i_0x33(<4 x i64> %a0, <4 x i64> %a1) {
- %res = call <4 x i64> @llvm.x86.avx2.vperm2i128(<4 x i64> %a0, <4 x i64> %a1, i8 51)
- ret <4 x i64> %res
- ; CHECK-LABEL: @perm2i_0x33
- ; CHECK-NEXT: %1 = shufflevector <4 x i64> %a1, <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
- ; CHECK-NEXT: ret <4 x i64> %1
- }
- ; Confirm that when a single zero mask bit is set, we replace a source vector with zeros.
- define <4 x double> @perm2pd_0x81(<4 x double> %a0, <4 x double> %a1) {
- %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 129)
- ret <4 x double> %res
- ; CHECK-LABEL: @perm2pd_0x81
- ; CHECK-NEXT: shufflevector <4 x double> %a0, <4 x double> <double 0.0{{.*}}<4 x i32> <i32 2, i32 3, i32 4, i32 5>
- ; CHECK-NEXT: ret <4 x double>
- }
- define <4 x double> @perm2pd_0x83(<4 x double> %a0, <4 x double> %a1) {
- %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 131)
- ret <4 x double> %res
- ; CHECK-LABEL: @perm2pd_0x83
- ; CHECK-NEXT: shufflevector <4 x double> %a1, <4 x double> <double 0.0{{.*}}, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
- ; CHECK-NEXT: ret <4 x double>
- }
- define <4 x double> @perm2pd_0x28(<4 x double> %a0, <4 x double> %a1) {
- %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 40)
- ret <4 x double> %res
- ; CHECK-LABEL: @perm2pd_0x28
- ; CHECK-NEXT: shufflevector <4 x double> <double 0.0{{.*}}, <4 x double> %a1, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
- ; CHECK-NEXT: ret <4 x double>
- }
- define <4 x double> @perm2pd_0x08(<4 x double> %a0, <4 x double> %a1) {
- %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 8)
- ret <4 x double> %res
- ; CHECK-LABEL: @perm2pd_0x08
- ; CHECK-NEXT: shufflevector <4 x double> <double 0.0{{.*}}, <4 x double> %a0, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
- ; CHECK-NEXT: ret <4 x double>
- }
- ; Check one more with the AVX2 version.
- define <4 x i64> @perm2i_0x28(<4 x i64> %a0, <4 x i64> %a1) {
- %res = call <4 x i64> @llvm.x86.avx2.vperm2i128(<4 x i64> %a0, <4 x i64> %a1, i8 40)
- ret <4 x i64> %res
- ; CHECK-LABEL: @perm2i_0x28
- ; CHECK-NEXT: shufflevector <4 x i64> <i64 0{{.*}}, <4 x i64> %a1, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
- ; CHECK-NEXT: ret <4 x i64>
- }
- declare <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone
- declare <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
- declare <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32>, <8 x i32>, i8) nounwind readnone
- declare <4 x i64> @llvm.x86.avx2.vperm2i128(<4 x i64>, <4 x i64>, i8) nounwind readnone
|