| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529 |
- ; RUN: llc -mtriple=x86_64-apple-darwin -mcpu=core2 < %s | FileCheck --check-prefix=SSE2-CODEGEN %s
- ; RUN: opt -mtriple=x86_64-apple-darwin -mcpu=core2 -cost-model -analyze < %s | FileCheck --check-prefix=SSE2 %s
- %shifttype = type <2 x i16>
- define %shifttype @shift2i16(%shifttype %a, %shifttype %b) {
- entry:
- ; SSE2: shift2i16
- ; SSE2: cost of 20 {{.*}} lshr
- ; SSE2-CODEGEN: shift2i16
- ; SSE2-CODEGEN: psrlq
- %0 = lshr %shifttype %a , %b
- ret %shifttype %0
- }
- %shifttype4i16 = type <4 x i16>
- define %shifttype4i16 @shift4i16(%shifttype4i16 %a, %shifttype4i16 %b) {
- entry:
- ; SSE2: shift4i16
- ; SSE2: cost of 16 {{.*}} lshr
- ; SSE2-CODEGEN: shift4i16
- ; SSE2-CODEGEN: psrld
- %0 = lshr %shifttype4i16 %a , %b
- ret %shifttype4i16 %0
- }
- %shifttype8i16 = type <8 x i16>
- define %shifttype8i16 @shift8i16(%shifttype8i16 %a, %shifttype8i16 %b) {
- entry:
- ; SSE2: shift8i16
- ; SSE2: cost of 32 {{.*}} lshr
- ; SSE2-CODEGEN: shift8i16
- ; SSE2-CODEGEN: psrlw
- %0 = lshr %shifttype8i16 %a , %b
- ret %shifttype8i16 %0
- }
- %shifttype16i16 = type <16 x i16>
- define %shifttype16i16 @shift16i16(%shifttype16i16 %a, %shifttype16i16 %b) {
- entry:
- ; SSE2: shift16i16
- ; SSE2: cost of 64 {{.*}} lshr
- ; SSE2-CODEGEN: shift16i16
- ; SSE2-CODEGEN: psrlw
- %0 = lshr %shifttype16i16 %a , %b
- ret %shifttype16i16 %0
- }
- %shifttype32i16 = type <32 x i16>
- define %shifttype32i16 @shift32i16(%shifttype32i16 %a, %shifttype32i16 %b) {
- entry:
- ; SSE2: shift32i16
- ; SSE2: cost of 128 {{.*}} lshr
- ; SSE2-CODEGEN: shift32i16
- ; SSE2-CODEGEN: psrlw
- %0 = lshr %shifttype32i16 %a , %b
- ret %shifttype32i16 %0
- }
- %shifttype2i32 = type <2 x i32>
- define %shifttype2i32 @shift2i32(%shifttype2i32 %a, %shifttype2i32 %b) {
- entry:
- ; SSE2: shift2i32
- ; SSE2: cost of 20 {{.*}} lshr
- ; SSE2-CODEGEN: shift2i32
- ; SSE2-CODEGEN: psrlq
- %0 = lshr %shifttype2i32 %a , %b
- ret %shifttype2i32 %0
- }
- %shifttype4i32 = type <4 x i32>
- define %shifttype4i32 @shift4i32(%shifttype4i32 %a, %shifttype4i32 %b) {
- entry:
- ; SSE2: shift4i32
- ; SSE2: cost of 16 {{.*}} lshr
- ; SSE2-CODEGEN: shift4i32
- ; SSE2-CODEGEN: psrld
- %0 = lshr %shifttype4i32 %a , %b
- ret %shifttype4i32 %0
- }
- %shifttype8i32 = type <8 x i32>
- define %shifttype8i32 @shift8i32(%shifttype8i32 %a, %shifttype8i32 %b) {
- entry:
- ; SSE2: shift8i32
- ; SSE2: cost of 32 {{.*}} lshr
- ; SSE2-CODEGEN: shift8i32
- ; SSE2-CODEGEN: psrld
- %0 = lshr %shifttype8i32 %a , %b
- ret %shifttype8i32 %0
- }
- %shifttype16i32 = type <16 x i32>
- define %shifttype16i32 @shift16i32(%shifttype16i32 %a, %shifttype16i32 %b) {
- entry:
- ; SSE2: shift16i32
- ; SSE2: cost of 64 {{.*}} lshr
- ; SSE2-CODEGEN: shift16i32
- ; SSE2-CODEGEN: psrld
- %0 = lshr %shifttype16i32 %a , %b
- ret %shifttype16i32 %0
- }
- %shifttype32i32 = type <32 x i32>
- define %shifttype32i32 @shift32i32(%shifttype32i32 %a, %shifttype32i32 %b) {
- entry:
- ; SSE2: shift32i32
- ; SSE2: cost of 128 {{.*}} lshr
- ; SSE2-CODEGEN: shift32i32
- ; SSE2-CODEGEN: psrld
- %0 = lshr %shifttype32i32 %a , %b
- ret %shifttype32i32 %0
- }
- %shifttype2i64 = type <2 x i64>
- define %shifttype2i64 @shift2i64(%shifttype2i64 %a, %shifttype2i64 %b) {
- entry:
- ; SSE2: shift2i64
- ; SSE2: cost of 20 {{.*}} lshr
- ; SSE2-CODEGEN: shift2i64
- ; SSE2-CODEGEN: psrlq
- %0 = lshr %shifttype2i64 %a , %b
- ret %shifttype2i64 %0
- }
- %shifttype4i64 = type <4 x i64>
- define %shifttype4i64 @shift4i64(%shifttype4i64 %a, %shifttype4i64 %b) {
- entry:
- ; SSE2: shift4i64
- ; SSE2: cost of 40 {{.*}} lshr
- ; SSE2-CODEGEN: shift4i64
- ; SSE2-CODEGEN: psrlq
- %0 = lshr %shifttype4i64 %a , %b
- ret %shifttype4i64 %0
- }
- %shifttype8i64 = type <8 x i64>
- define %shifttype8i64 @shift8i64(%shifttype8i64 %a, %shifttype8i64 %b) {
- entry:
- ; SSE2: shift8i64
- ; SSE2: cost of 80 {{.*}} lshr
- ; SSE2-CODEGEN: shift8i64
- ; SSE2-CODEGEN: psrlq
- %0 = lshr %shifttype8i64 %a , %b
- ret %shifttype8i64 %0
- }
- %shifttype16i64 = type <16 x i64>
- define %shifttype16i64 @shift16i64(%shifttype16i64 %a, %shifttype16i64 %b) {
- entry:
- ; SSE2: shift16i64
- ; SSE2: cost of 160 {{.*}} lshr
- ; SSE2-CODEGEN: shift16i64
- ; SSE2-CODEGEN: psrlq
- %0 = lshr %shifttype16i64 %a , %b
- ret %shifttype16i64 %0
- }
- %shifttype32i64 = type <32 x i64>
- define %shifttype32i64 @shift32i64(%shifttype32i64 %a, %shifttype32i64 %b) {
- entry:
- ; SSE2: shift32i64
- ; SSE2: cost of 320 {{.*}} lshr
- ; SSE2-CODEGEN: shift32i64
- ; SSE2-CODEGEN: psrlq
- %0 = lshr %shifttype32i64 %a , %b
- ret %shifttype32i64 %0
- }
- %shifttype2i8 = type <2 x i8>
- define %shifttype2i8 @shift2i8(%shifttype2i8 %a, %shifttype2i8 %b) {
- entry:
- ; SSE2: shift2i8
- ; SSE2: cost of 20 {{.*}} lshr
- ; SSE2-CODEGEN: shift2i8
- ; SSE2-CODEGEN: psrlq
- %0 = lshr %shifttype2i8 %a , %b
- ret %shifttype2i8 %0
- }
- %shifttype4i8 = type <4 x i8>
- define %shifttype4i8 @shift4i8(%shifttype4i8 %a, %shifttype4i8 %b) {
- entry:
- ; SSE2: shift4i8
- ; SSE2: cost of 16 {{.*}} lshr
- ; SSE2-CODEGEN: shift4i8
- ; SSE2-CODEGEN: psrld
- %0 = lshr %shifttype4i8 %a , %b
- ret %shifttype4i8 %0
- }
- %shifttype8i8 = type <8 x i8>
- define %shifttype8i8 @shift8i8(%shifttype8i8 %a, %shifttype8i8 %b) {
- entry:
- ; SSE2: shift8i8
- ; SSE2: cost of 32 {{.*}} lshr
- ; SSE2-CODEGEN: shift8i8
- ; SSE2-CODEGEN: psrlw
- %0 = lshr %shifttype8i8 %a , %b
- ret %shifttype8i8 %0
- }
- %shifttype16i8 = type <16 x i8>
- define %shifttype16i8 @shift16i8(%shifttype16i8 %a, %shifttype16i8 %b) {
- entry:
- ; SSE2: shift16i8
- ; SSE2: cost of 26 {{.*}} lshr
- ; SSE2-CODEGEN: shift16i8
- ; SSE2-CODEGEN: psrlw
- %0 = lshr %shifttype16i8 %a , %b
- ret %shifttype16i8 %0
- }
- %shifttype32i8 = type <32 x i8>
- define %shifttype32i8 @shift32i8(%shifttype32i8 %a, %shifttype32i8 %b) {
- entry:
- ; SSE2: shift32i8
- ; SSE2: cost of 52 {{.*}} lshr
- ; SSE2-CODEGEN: shift32i8
- ; SSE2-CODEGEN: psrlw
- %0 = lshr %shifttype32i8 %a , %b
- ret %shifttype32i8 %0
- }
- ; Test shift by a constant vector.
- %shifttypec = type <2 x i16>
- define %shifttypec @shift2i16const(%shifttypec %a, %shifttypec %b) {
- entry:
- ; SSE2: shift2i16const
- ; SSE2: cost of 1 {{.*}} lshr
- ; SSE2-CODEGEN: shift2i16const
- ; SSE2-CODEGEN: psrlq $3
- %0 = lshr %shifttypec %a , <i16 3, i16 3>
- ret %shifttypec %0
- }
- %shifttypec4i16 = type <4 x i16>
- define %shifttypec4i16 @shift4i16const(%shifttypec4i16 %a, %shifttypec4i16 %b) {
- entry:
- ; SSE2: shift4i16const
- ; SSE2: cost of 1 {{.*}} lshr
- ; SSE2-CODEGEN: shift4i16const
- ; SSE2-CODEGEN: psrld $3
- %0 = lshr %shifttypec4i16 %a , <i16 3, i16 3, i16 3, i16 3>
- ret %shifttypec4i16 %0
- }
- %shifttypec8i16 = type <8 x i16>
- define %shifttypec8i16 @shift8i16const(%shifttypec8i16 %a, %shifttypec8i16 %b) {
- entry:
- ; SSE2: shift8i16const
- ; SSE2: cost of 1 {{.*}} lshr
- ; SSE2-CODEGEN: shift8i16const
- ; SSE2-CODEGEN: psrlw $3
- %0 = lshr %shifttypec8i16 %a , <i16 3, i16 3, i16 3, i16 3,
- i16 3, i16 3, i16 3, i16 3>
- ret %shifttypec8i16 %0
- }
- %shifttypec16i16 = type <16 x i16>
- define %shifttypec16i16 @shift16i16const(%shifttypec16i16 %a,
- %shifttypec16i16 %b) {
- entry:
- ; SSE2: shift16i16const
- ; SSE2: cost of 2 {{.*}} lshr
- ; SSE2-CODEGEN: shift16i16const
- ; SSE2-CODEGEN: psrlw $3
- %0 = lshr %shifttypec16i16 %a , <i16 3, i16 3, i16 3, i16 3,
- i16 3, i16 3, i16 3, i16 3,
- i16 3, i16 3, i16 3, i16 3,
- i16 3, i16 3, i16 3, i16 3>
- ret %shifttypec16i16 %0
- }
- %shifttypec32i16 = type <32 x i16>
- define %shifttypec32i16 @shift32i16const(%shifttypec32i16 %a,
- %shifttypec32i16 %b) {
- entry:
- ; SSE2: shift32i16const
- ; SSE2: cost of 4 {{.*}} lshr
- ; SSE2-CODEGEN: shift32i16const
- ; SSE2-CODEGEN: psrlw $3
- %0 = lshr %shifttypec32i16 %a , <i16 3, i16 3, i16 3, i16 3,
- i16 3, i16 3, i16 3, i16 3,
- i16 3, i16 3, i16 3, i16 3,
- i16 3, i16 3, i16 3, i16 3,
- i16 3, i16 3, i16 3, i16 3,
- i16 3, i16 3, i16 3, i16 3,
- i16 3, i16 3, i16 3, i16 3,
- i16 3, i16 3, i16 3, i16 3>
- ret %shifttypec32i16 %0
- }
- %shifttypec2i32 = type <2 x i32>
- define %shifttypec2i32 @shift2i32c(%shifttypec2i32 %a, %shifttypec2i32 %b) {
- entry:
- ; SSE2: shift2i32c
- ; SSE2: cost of 1 {{.*}} lshr
- ; SSE2-CODEGEN: shift2i32c
- ; SSE2-CODEGEN: psrlq $3
- %0 = lshr %shifttypec2i32 %a , <i32 3, i32 3>
- ret %shifttypec2i32 %0
- }
- %shifttypec4i32 = type <4 x i32>
- define %shifttypec4i32 @shift4i32c(%shifttypec4i32 %a, %shifttypec4i32 %b) {
- entry:
- ; SSE2: shift4i32c
- ; SSE2: cost of 1 {{.*}} lshr
- ; SSE2-CODEGEN: shift4i32c
- ; SSE2-CODEGEN: psrld $3
- %0 = lshr %shifttypec4i32 %a , <i32 3, i32 3, i32 3, i32 3>
- ret %shifttypec4i32 %0
- }
- %shifttypec8i32 = type <8 x i32>
- define %shifttypec8i32 @shift8i32c(%shifttypec8i32 %a, %shifttypec8i32 %b) {
- entry:
- ; SSE2: shift8i32c
- ; SSE2: cost of 2 {{.*}} lshr
- ; SSE2-CODEGEN: shift8i32c
- ; SSE2-CODEGEN: psrld $3
- %0 = lshr %shifttypec8i32 %a , <i32 3, i32 3, i32 3, i32 3,
- i32 3, i32 3, i32 3, i32 3>
- ret %shifttypec8i32 %0
- }
- %shifttypec16i32 = type <16 x i32>
- define %shifttypec16i32 @shift16i32c(%shifttypec16i32 %a, %shifttypec16i32 %b) {
- entry:
- ; SSE2: shift16i32c
- ; SSE2: cost of 4 {{.*}} lshr
- ; SSE2-CODEGEN: shift16i32c
- ; SSE2-CODEGEN: psrld $3
- %0 = lshr %shifttypec16i32 %a , <i32 3, i32 3, i32 3, i32 3,
- i32 3, i32 3, i32 3, i32 3,
- i32 3, i32 3, i32 3, i32 3,
- i32 3, i32 3, i32 3, i32 3>
- ret %shifttypec16i32 %0
- }
- %shifttypec32i32 = type <32 x i32>
- define %shifttypec32i32 @shift32i32c(%shifttypec32i32 %a, %shifttypec32i32 %b) {
- entry:
- ; SSE2: shift32i32c
- ; SSE2: cost of 8 {{.*}} lshr
- ; SSE2-CODEGEN: shift32i32c
- ; SSE2-CODEGEN: psrld $3
- %0 = lshr %shifttypec32i32 %a , <i32 3, i32 3, i32 3, i32 3,
- i32 3, i32 3, i32 3, i32 3,
- i32 3, i32 3, i32 3, i32 3,
- i32 3, i32 3, i32 3, i32 3,
- i32 3, i32 3, i32 3, i32 3,
- i32 3, i32 3, i32 3, i32 3,
- i32 3, i32 3, i32 3, i32 3,
- i32 3, i32 3, i32 3, i32 3>
- ret %shifttypec32i32 %0
- }
- %shifttypec2i64 = type <2 x i64>
- define %shifttypec2i64 @shift2i64c(%shifttypec2i64 %a, %shifttypec2i64 %b) {
- entry:
- ; SSE2: shift2i64c
- ; SSE2: cost of 1 {{.*}} lshr
- ; SSE2-CODEGEN: shift2i64c
- ; SSE2-CODEGEN: psrlq $3
- %0 = lshr %shifttypec2i64 %a , <i64 3, i64 3>
- ret %shifttypec2i64 %0
- }
- %shifttypec4i64 = type <4 x i64>
- define %shifttypec4i64 @shift4i64c(%shifttypec4i64 %a, %shifttypec4i64 %b) {
- entry:
- ; SSE2: shift4i64c
- ; SSE2: cost of 2 {{.*}} lshr
- ; SSE2-CODEGEN: shift4i64c
- ; SSE2-CODEGEN: psrlq $3
- %0 = lshr %shifttypec4i64 %a , <i64 3, i64 3, i64 3, i64 3>
- ret %shifttypec4i64 %0
- }
- %shifttypec8i64 = type <8 x i64>
- define %shifttypec8i64 @shift8i64c(%shifttypec8i64 %a, %shifttypec8i64 %b) {
- entry:
- ; SSE2: shift8i64c
- ; SSE2: cost of 4 {{.*}} lshr
- ; SSE2-CODEGEN: shift8i64c
- ; SSE2-CODEGEN: psrlq $3
- %0 = lshr %shifttypec8i64 %a , <i64 3, i64 3, i64 3, i64 3,
- i64 3, i64 3, i64 3, i64 3>
- ret %shifttypec8i64 %0
- }
- %shifttypec16i64 = type <16 x i64>
- define %shifttypec16i64 @shift16i64c(%shifttypec16i64 %a, %shifttypec16i64 %b) {
- entry:
- ; SSE2: shift16i64c
- ; SSE2: cost of 8 {{.*}} lshr
- ; SSE2-CODEGEN: shift16i64c
- ; SSE2-CODEGEN: psrlq $3
- %0 = lshr %shifttypec16i64 %a , <i64 3, i64 3, i64 3, i64 3,
- i64 3, i64 3, i64 3, i64 3,
- i64 3, i64 3, i64 3, i64 3,
- i64 3, i64 3, i64 3, i64 3>
- ret %shifttypec16i64 %0
- }
- %shifttypec32i64 = type <32 x i64>
- define %shifttypec32i64 @shift32i64c(%shifttypec32i64 %a, %shifttypec32i64 %b) {
- entry:
- ; SSE2: shift32i64c
- ; SSE2: cost of 16 {{.*}} lshr
- ; SSE2-CODEGEN: shift32i64c
- ; SSE2-CODEGEN: psrlq $3
- %0 = lshr %shifttypec32i64 %a ,<i64 3, i64 3, i64 3, i64 3,
- i64 3, i64 3, i64 3, i64 3,
- i64 3, i64 3, i64 3, i64 3,
- i64 3, i64 3, i64 3, i64 3,
- i64 3, i64 3, i64 3, i64 3,
- i64 3, i64 3, i64 3, i64 3,
- i64 3, i64 3, i64 3, i64 3,
- i64 3, i64 3, i64 3, i64 3>
- ret %shifttypec32i64 %0
- }
- %shifttypec2i8 = type <2 x i8>
- define %shifttypec2i8 @shift2i8c(%shifttypec2i8 %a, %shifttypec2i8 %b) {
- entry:
- ; SSE2: shift2i8c
- ; SSE2: cost of 1 {{.*}} lshr
- ; SSE2-CODEGEN: shift2i8c
- ; SSE2-CODEGEN: psrlq $3
- %0 = lshr %shifttypec2i8 %a , <i8 3, i8 3>
- ret %shifttypec2i8 %0
- }
- %shifttypec4i8 = type <4 x i8>
- define %shifttypec4i8 @shift4i8c(%shifttypec4i8 %a, %shifttypec4i8 %b) {
- entry:
- ; SSE2: shift4i8c
- ; SSE2: cost of 1 {{.*}} lshr
- ; SSE2-CODEGEN: shift4i8c
- ; SSE2-CODEGEN: psrld $3
- %0 = lshr %shifttypec4i8 %a , <i8 3, i8 3, i8 3, i8 3>
- ret %shifttypec4i8 %0
- }
- %shifttypec8i8 = type <8 x i8>
- define %shifttypec8i8 @shift8i8c(%shifttypec8i8 %a, %shifttypec8i8 %b) {
- entry:
- ; SSE2: shift8i8c
- ; SSE2: cost of 1 {{.*}} lshr
- ; SSE2-CODEGEN: shift8i8c
- ; SSE2-CODEGEN: psrlw $3
- %0 = lshr %shifttypec8i8 %a , <i8 3, i8 3, i8 3, i8 3,
- i8 3, i8 3, i8 3, i8 3>
- ret %shifttypec8i8 %0
- }
- %shifttypec16i8 = type <16 x i8>
- define %shifttypec16i8 @shift16i8c(%shifttypec16i8 %a, %shifttypec16i8 %b) {
- entry:
- ; SSE2: shift16i8c
- ; SSE2: cost of 1 {{.*}} lshr
- ; SSE2-CODEGEN: shift16i8c
- ; SSE2-CODEGEN: psrlw $3
- %0 = lshr %shifttypec16i8 %a , <i8 3, i8 3, i8 3, i8 3,
- i8 3, i8 3, i8 3, i8 3,
- i8 3, i8 3, i8 3, i8 3,
- i8 3, i8 3, i8 3, i8 3>
- ret %shifttypec16i8 %0
- }
- %shifttypec32i8 = type <32 x i8>
- define %shifttypec32i8 @shift32i8c(%shifttypec32i8 %a, %shifttypec32i8 %b) {
- entry:
- ; SSE2: shift32i8c
- ; SSE2: cost of 2 {{.*}} lshr
- ; SSE2-CODEGEN: shift32i8c
- ; SSE2-CODEGEN: psrlw $3
- %0 = lshr %shifttypec32i8 %a , <i8 3, i8 3, i8 3, i8 3,
- i8 3, i8 3, i8 3, i8 3,
- i8 3, i8 3, i8 3, i8 3,
- i8 3, i8 3, i8 3, i8 3,
- i8 3, i8 3, i8 3, i8 3,
- i8 3, i8 3, i8 3, i8 3,
- i8 3, i8 3, i8 3, i8 3,
- i8 3, i8 3, i8 3, i8 3>
- ret %shifttypec32i8 %0
- }
|