x86-insertps.ll 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150
  1. ; RUN: opt < %s -instcombine -S | FileCheck %s
  2. declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone
  3. ; This should never happen, but make sure we don't crash handling a non-constant immediate byte.
  4. define <4 x float> @insertps_non_const_imm(<4 x float> %v1, <4 x float> %v2, i8 %c) {
  5. %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 %c)
  6. ret <4 x float> %res
  7. ; CHECK-LABEL: @insertps_non_const_imm
  8. ; CHECK-NEXT: call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 %c)
  9. ; CHECK-NEXT: ret <4 x float>
  10. }
  11. ; If all zero mask bits are set, return a zero regardless of the other control bits.
  12. define <4 x float> @insertps_0x0f(<4 x float> %v1, <4 x float> %v2) {
  13. %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 15)
  14. ret <4 x float> %res
  15. ; CHECK-LABEL: @insertps_0x0f
  16. ; CHECK-NEXT: ret <4 x float> zeroinitializer
  17. }
  18. define <4 x float> @insertps_0xff(<4 x float> %v1, <4 x float> %v2) {
  19. %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 255)
  20. ret <4 x float> %res
  21. ; CHECK-LABEL: @insertps_0xff
  22. ; CHECK-NEXT: ret <4 x float> zeroinitializer
  23. }
  24. ; If some zero mask bits are set that do not override the insertion, we do not change anything.
  25. define <4 x float> @insertps_0x0c(<4 x float> %v1, <4 x float> %v2) {
  26. %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 12)
  27. ret <4 x float> %res
  28. ; CHECK-LABEL: @insertps_0x0c
  29. ; CHECK-NEXT: call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 12)
  30. ; CHECK-NEXT: ret <4 x float>
  31. }
  32. ; ...unless both input vectors are the same operand.
  33. define <4 x float> @insertps_0x15_single_input(<4 x float> %v1) {
  34. %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v1, i8 21)
  35. ret <4 x float> %res
  36. ; CHECK-LABEL: @insertps_0x15_single_input
  37. ; CHECK-NEXT: shufflevector <4 x float> %v1, <4 x float> <float 0.000000e+00, float undef, float 0.000000e+00, float undef>, <4 x i32> <i32 4, i32 0, i32 6, i32 3>
  38. ; CHECK-NEXT: ret <4 x float>
  39. }
  40. ; The zero mask overrides the insertion lane.
  41. define <4 x float> @insertps_0x1a_single_input(<4 x float> %v1) {
  42. %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v1, i8 26)
  43. ret <4 x float> %res
  44. ; CHECK-LABEL: @insertps_0x1a_single_input
  45. ; CHECK-NEXT: shufflevector <4 x float> %v1, <4 x float> <float undef, float 0.000000e+00, float undef, float 0.000000e+00>, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
  46. ; CHECK-NEXT: ret <4 x float>
  47. }
  48. ; The zero mask overrides the insertion lane, so the second input vector is not used.
  49. define <4 x float> @insertps_0xc1(<4 x float> %v1, <4 x float> %v2) {
  50. %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 193)
  51. ret <4 x float> %res
  52. ; CHECK-LABEL: @insertps_0xc1
  53. ; CHECK-NEXT: shufflevector <4 x float> %v1, <4 x float> <float 0.000000e+00, float undef, float undef, float undef>, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
  54. ; CHECK-NEXT: ret <4 x float>
  55. }
  56. ; If no zero mask bits are set, convert to a shuffle.
  57. define <4 x float> @insertps_0x00(<4 x float> %v1, <4 x float> %v2) {
  58. %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 0)
  59. ret <4 x float> %res
  60. ; CHECK-LABEL: @insertps_0x00
  61. ; CHECK-NEXT: shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
  62. ; CHECK-NEXT: ret <4 x float>
  63. }
  64. define <4 x float> @insertps_0x10(<4 x float> %v1, <4 x float> %v2) {
  65. %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 16)
  66. ret <4 x float> %res
  67. ; CHECK-LABEL: @insertps_0x10
  68. ; CHECK-NEXT: shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 0, i32 4, i32 2, i32 3>
  69. ; CHECK-NEXT: ret <4 x float>
  70. }
  71. define <4 x float> @insertps_0x20(<4 x float> %v1, <4 x float> %v2) {
  72. %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 32)
  73. ret <4 x float> %res
  74. ; CHECK-LABEL: @insertps_0x20
  75. ; CHECK-NEXT: shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 0, i32 1, i32 4, i32 3>
  76. ; CHECK-NEXT: ret <4 x float>
  77. }
  78. define <4 x float> @insertps_0x30(<4 x float> %v1, <4 x float> %v2) {
  79. %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 48)
  80. ret <4 x float> %res
  81. ; CHECK-LABEL: @insertps_0x30
  82. ; CHECK-NEXT: shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
  83. ; CHECK-NEXT: ret <4 x float>
  84. }
  85. define <4 x float> @insertps_0xc0(<4 x float> %v1, <4 x float> %v2) {
  86. %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 192)
  87. ret <4 x float> %res
  88. ; CHECK-LABEL: @insertps_0xc0
  89. ; CHECK-NEXT: shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 7, i32 1, i32 2, i32 3>
  90. ; CHECK-NEXT: ret <4 x float>
  91. }
  92. define <4 x float> @insertps_0xd0(<4 x float> %v1, <4 x float> %v2) {
  93. %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 208)
  94. ret <4 x float> %res
  95. ; CHECK-LABEL: @insertps_0xd0
  96. ; CHECK-NEXT: shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 0, i32 7, i32 2, i32 3>
  97. ; CHECK-NEXT: ret <4 x float>
  98. }
  99. define <4 x float> @insertps_0xe0(<4 x float> %v1, <4 x float> %v2) {
  100. %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 224)
  101. ret <4 x float> %res
  102. ; CHECK-LABEL: @insertps_0xe0
  103. ; CHECK-NEXT: shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 0, i32 1, i32 7, i32 3>
  104. ; CHECK-NEXT: ret <4 x float>
  105. }
  106. define <4 x float> @insertps_0xf0(<4 x float> %v1, <4 x float> %v2) {
  107. %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 240)
  108. ret <4 x float> %res
  109. ; CHECK-LABEL: @insertps_0xf0
  110. ; CHECK-NEXT: shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
  111. ; CHECK-NEXT: ret <4 x float>
  112. }