vselect-cost.ll 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124
  1. ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=+sse2,-sse4.1 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE2
  2. ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE41
  3. ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX
  4. ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core-avx2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX2
  5. ; Verify the cost of vector select instructions.
  6. ; SSE41 added blend instructions with an immediate for <2 x double> and
  7. ; <4 x float>. Integers of the same size should also use those instructions.
  8. define <2 x i64> @test_2i64(<2 x i64> %a, <2 x i64> %b) {
  9. ; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_2i64':
  10. ; SSE2: Cost Model: {{.*}} 1 for instruction: %sel = select <2 x i1>
  11. ; SSE41: Cost Model: {{.*}} 1 for instruction: %sel = select <2 x i1>
  12. ; AVX: Cost Model: {{.*}} 1 for instruction: %sel = select <2 x i1>
  13. ; AVX2: Cost Model: {{.*}} 1 for instruction: %sel = select <2 x i1>
  14. %sel = select <2 x i1> <i1 true, i1 false>, <2 x i64> %a, <2 x i64> %b
  15. ret <2 x i64> %sel
  16. }
  17. define <2 x double> @test_2double(<2 x double> %a, <2 x double> %b) {
  18. ; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_2double':
  19. ; SSE2: Cost Model: {{.*}} 1 for instruction: %sel = select <2 x i1>
  20. ; SSE41: Cost Model: {{.*}} 1 for instruction: %sel = select <2 x i1>
  21. ; AVX: Cost Model: {{.*}} 1 for instruction: %sel = select <2 x i1>
  22. ; AVX2: Cost Model: {{.*}} 1 for instruction: %sel = select <2 x i1>
  23. %sel = select <2 x i1> <i1 true, i1 false>, <2 x double> %a, <2 x double> %b
  24. ret <2 x double> %sel
  25. }
  26. define <4 x i32> @test_4i32(<4 x i32> %a, <4 x i32> %b) {
  27. ; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_4i32':
  28. ; SSE2: Cost Model: {{.*}} 1 for instruction: %sel = select <4 x i1>
  29. ; SSE41: Cost Model: {{.*}} 1 for instruction: %sel = select <4 x i1>
  30. ; AVX: Cost Model: {{.*}} 1 for instruction: %sel = select <4 x i1>
  31. ; AVX2: Cost Model: {{.*}} 1 for instruction: %sel = select <4 x i1>
  32. %sel = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x i32> %a, <4 x i32> %b
  33. ret <4 x i32> %sel
  34. }
  35. define <4 x float> @test_4float(<4 x float> %a, <4 x float> %b) {
  36. ; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_4float':
  37. ; SSE2: Cost Model: {{.*}} 1 for instruction: %sel = select <4 x i1>
  38. ; SSE41: Cost Model: {{.*}} 1 for instruction: %sel = select <4 x i1>
  39. ; AVX: Cost Model: {{.*}} 1 for instruction: %sel = select <4 x i1>
  40. ; AVX2: Cost Model: {{.*}} 1 for instruction: %sel = select <4 x i1>
  41. %sel = select <4 x i1> <i1 true, i1 false, i1 true, i1 true>, <4 x float> %a, <4 x float> %b
  42. ret <4 x float> %sel
  43. }
  44. define <16 x i8> @test_16i8(<16 x i8> %a, <16 x i8> %b) {
  45. ; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_16i8':
  46. ; SSE2: Cost Model: {{.*}} 1 for instruction: %sel = select <16 x i1>
  47. ; SSE41: Cost Model: {{.*}} 1 for instruction: %sel = select <16 x i1>
  48. ; AVX: Cost Model: {{.*}} 1 for instruction: %sel = select <16 x i1>
  49. ; AVX2: Cost Model: {{.*}} 1 for instruction: %sel = select <16 x i1>
  50. %sel = select <16 x i1> <i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true>, <16 x i8> %a, <16 x i8> %b
  51. ret <16 x i8> %sel
  52. }
  53. ; AVX added blend instructions with an immediate for <4 x double> and
  54. ; <8 x float>. Integers of the same size should also use those instructions.
  55. define <4 x i64> @test_4i64(<4 x i64> %a, <4 x i64> %b) {
  56. ; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_4i64':
  57. ; SSE2: Cost Model: {{.*}} 2 for instruction: %sel = select <4 x i1>
  58. ; SSE41: Cost Model: {{.*}} 2 for instruction: %sel = select <4 x i1>
  59. ; AVX: Cost Model: {{.*}} 1 for instruction: %sel = select <4 x i1>
  60. ; AVX2: Cost Model: {{.*}} 1 for instruction: %sel = select <4 x i1>
  61. %sel = select <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i64> %a, <4 x i64> %b
  62. ret <4 x i64> %sel
  63. }
  64. define <4 x double> @test_4double(<4 x double> %a, <4 x double> %b) {
  65. ; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_4double':
  66. ; SSE2: Cost Model: {{.*}} 2 for instruction: %sel = select <4 x i1>
  67. ; SSE41: Cost Model: {{.*}} 2 for instruction: %sel = select <4 x i1>
  68. ; AVX: Cost Model: {{.*}} 1 for instruction: %sel = select <4 x i1>
  69. ; AVX2: Cost Model: {{.*}} 1 for instruction: %sel = select <4 x i1>
  70. %sel = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x double> %a, <4 x double> %b
  71. ret <4 x double> %sel
  72. }
  73. define <8 x i32> @test_8i32(<8 x i32> %a, <8 x i32> %b) {
  74. ; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_8i32':
  75. ; SSE2: Cost Model: {{.*}} 2 for instruction: %sel = select <8 x i1>
  76. ; SSE41: Cost Model: {{.*}} 2 for instruction: %sel = select <8 x i1>
  77. ; AVX: Cost Model: {{.*}} 1 for instruction: %sel = select <8 x i1>
  78. ; AVX2: Cost Model: {{.*}} 1 for instruction: %sel = select <8 x i1>
  79. %sel = select <8 x i1> <i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 true, i1 false>, <8 x i32> %a, <8 x i32> %b
  80. ret <8 x i32> %sel
  81. }
  82. define <8 x float> @test_8float(<8 x float> %a, <8 x float> %b) {
  83. ; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_8float':
  84. ; SSE2: Cost Model: {{.*}} 2 for instruction: %sel = select <8 x i1>
  85. ; SSE41: Cost Model: {{.*}} 2 for instruction: %sel = select <8 x i1>
  86. ; AVX: Cost Model: {{.*}} 1 for instruction: %sel = select <8 x i1>
  87. ; AVX2: Cost Model: {{.*}} 1 for instruction: %sel = select <8 x i1>
  88. %sel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x float> %a, <8 x float> %b
  89. ret <8 x float> %sel
  90. }
  91. ; AVX2
  92. define <16 x i16> @test_16i16(<16 x i16> %a, <16 x i16> %b) {
  93. ; CHECK:Printing analysis 'Cost Model Analysis' for function 'test_16i16':
  94. ; SSE2: Cost Model: {{.*}} 2 for instruction: %sel = select <16 x i1>
  95. ; SSE41: Cost Model: {{.*}} 2 for instruction: %sel = select <16 x i1>
  96. ; AVX: Cost Model: {{.*}} 1 for instruction: %sel = select <16 x i1>
  97. ; AVX2: Cost Model: {{.*}} 1 for instruction: %sel = select <16 x i1>
  98. %sel = select <16 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <16 x i16> %a, <16 x i16> %b
  99. ret <16 x i16> %sel
  100. }
  101. define <32 x i8> @test_32i8(<32 x i8> %a, <32 x i8> %b) {
  102. ; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_32i8':
  103. ; SSE2: Cost Model: {{.*}} 2 for instruction: %sel = select <32 x i1>
  104. ; SSE41: Cost Model: {{.*}} 2 for instruction: %sel = select <32 x i1>
  105. ; AVX: Cost Model: {{.*}} 1 for instruction: %sel = select <32 x i1>
  106. ; AVX2: Cost Model: {{.*}} 1 for instruction: %sel = select <32 x i1>
  107. %sel = select <32 x i1> <i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true>, <32 x i8> %a, <32 x i8> %b
  108. ret <32 x i8> %sel
  109. }