arith.ll 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128
  1. ; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
  2. ; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=core2 | FileCheck %s --check-prefix=SSE3
  3. ; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=core-avx2 | FileCheck %s --check-prefix=AVX2
  4. target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
  5. target triple = "x86_64-apple-macosx10.8.0"
  6. define i32 @add(i32 %arg) {
  7. ;CHECK: cost of 1 {{.*}} add
  8. %A = add <4 x i32> undef, undef
  9. ;CHECK: cost of 4 {{.*}} add
  10. %B = add <8 x i32> undef, undef
  11. ;CHECK: cost of 1 {{.*}} add
  12. %C = add <2 x i64> undef, undef
  13. ;CHECK: cost of 4 {{.*}} add
  14. %D = add <4 x i64> undef, undef
  15. ;CHECK: cost of 8 {{.*}} add
  16. %E = add <8 x i64> undef, undef
  17. ;CHECK: cost of 0 {{.*}} ret
  18. ret i32 undef
  19. }
  20. define i32 @xor(i32 %arg) {
  21. ;CHECK: cost of 1 {{.*}} xor
  22. %A = xor <4 x i32> undef, undef
  23. ;CHECK: cost of 1 {{.*}} xor
  24. %B = xor <8 x i32> undef, undef
  25. ;CHECK: cost of 1 {{.*}} xor
  26. %C = xor <2 x i64> undef, undef
  27. ;CHECK: cost of 1 {{.*}} xor
  28. %D = xor <4 x i64> undef, undef
  29. ;CHECK: cost of 0 {{.*}} ret
  30. ret i32 undef
  31. }
  32. ; CHECK: mul
  33. define void @mul() {
  34. ; A <2 x i32> gets expanded to a <2 x i64> vector.
  35. ; A <2 x i64> vector multiply is implemented using
  36. ; 3 PMULUDQ and 2 PADDS and 4 shifts.
  37. ;CHECK: cost of 9 {{.*}} mul
  38. %A0 = mul <2 x i32> undef, undef
  39. ;CHECK: cost of 9 {{.*}} mul
  40. %A1 = mul <2 x i64> undef, undef
  41. ;CHECK: cost of 18 {{.*}} mul
  42. %A2 = mul <4 x i64> undef, undef
  43. ret void
  44. }
  45. ; SSE3: sse3mull
  46. define void @sse3mull() {
  47. ; SSE3: cost of 6 {{.*}} mul
  48. %A0 = mul <4 x i32> undef, undef
  49. ret void
  50. ; SSE3: avx2mull
  51. }
  52. ; AVX2: avx2mull
  53. define void @avx2mull() {
  54. ; AVX2: cost of 9 {{.*}} mul
  55. %A0 = mul <4 x i64> undef, undef
  56. ret void
  57. ; AVX2: fmul
  58. }
  59. ; CHECK: fmul
  60. define i32 @fmul(i32 %arg) {
  61. ;CHECK: cost of 2 {{.*}} fmul
  62. %A = fmul <4 x float> undef, undef
  63. ;CHECK: cost of 2 {{.*}} fmul
  64. %B = fmul <8 x float> undef, undef
  65. ret i32 undef
  66. }
  67. ; AVX: shift
  68. ; AVX2: shift
  69. define void @shift() {
  70. ; AVX: cost of 2 {{.*}} shl
  71. ; AVX2: cost of 1 {{.*}} shl
  72. %A0 = shl <4 x i32> undef, undef
  73. ; AVX: cost of 2 {{.*}} shl
  74. ; AVX2: cost of 1 {{.*}} shl
  75. %A1 = shl <2 x i64> undef, undef
  76. ; AVX: cost of 2 {{.*}} lshr
  77. ; AVX2: cost of 1 {{.*}} lshr
  78. %B0 = lshr <4 x i32> undef, undef
  79. ; AVX: cost of 2 {{.*}} lshr
  80. ; AVX2: cost of 1 {{.*}} lshr
  81. %B1 = lshr <2 x i64> undef, undef
  82. ; AVX: cost of 2 {{.*}} ashr
  83. ; AVX2: cost of 1 {{.*}} ashr
  84. %C0 = ashr <4 x i32> undef, undef
  85. ; AVX: cost of 6 {{.*}} ashr
  86. ; AVX2: cost of 20 {{.*}} ashr
  87. %C1 = ashr <2 x i64> undef, undef
  88. ret void
  89. }
  90. ; AVX: avx2shift
  91. ; AVX2: avx2shift
  92. define void @avx2shift() {
  93. ; AVX: cost of 2 {{.*}} shl
  94. ; AVX2: cost of 1 {{.*}} shl
  95. %A0 = shl <8 x i32> undef, undef
  96. ; AVX: cost of 2 {{.*}} shl
  97. ; AVX2: cost of 1 {{.*}} shl
  98. %A1 = shl <4 x i64> undef, undef
  99. ; AVX: cost of 2 {{.*}} lshr
  100. ; AVX2: cost of 1 {{.*}} lshr
  101. %B0 = lshr <8 x i32> undef, undef
  102. ; AVX: cost of 2 {{.*}} lshr
  103. ; AVX2: cost of 1 {{.*}} lshr
  104. %B1 = lshr <4 x i64> undef, undef
  105. ; AVX: cost of 2 {{.*}} ashr
  106. ; AVX2: cost of 1 {{.*}} ashr
  107. %C0 = ashr <8 x i32> undef, undef
  108. ; AVX: cost of 12 {{.*}} ashr
  109. ; AVX2: cost of 40 {{.*}} ashr
  110. %C1 = ashr <4 x i64> undef, undef
  111. ret void
  112. }