tiny-tree.ll 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171
  1. target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
  2. target triple = "x86_64-apple-macosx10.8.0"
  3. ; RUN: opt < %s -basicaa -slp-vectorizer -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 | FileCheck %s
  4. ; CHECK: tiny_tree_fully_vectorizable
  5. ; CHECK: load <2 x double>
  6. ; CHECK: store <2 x double>
  7. ; CHECK: ret
  8. define void @tiny_tree_fully_vectorizable(double* noalias nocapture %dst, double* noalias nocapture readonly %src, i64 %count) #0 {
  9. entry:
  10. %cmp12 = icmp eq i64 %count, 0
  11. br i1 %cmp12, label %for.end, label %for.body
  12. for.body: ; preds = %entry, %for.body
  13. %i.015 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
  14. %dst.addr.014 = phi double* [ %add.ptr4, %for.body ], [ %dst, %entry ]
  15. %src.addr.013 = phi double* [ %add.ptr, %for.body ], [ %src, %entry ]
  16. %0 = load double, double* %src.addr.013, align 8
  17. store double %0, double* %dst.addr.014, align 8
  18. %arrayidx2 = getelementptr inbounds double, double* %src.addr.013, i64 1
  19. %1 = load double, double* %arrayidx2, align 8
  20. %arrayidx3 = getelementptr inbounds double, double* %dst.addr.014, i64 1
  21. store double %1, double* %arrayidx3, align 8
  22. %add.ptr = getelementptr inbounds double, double* %src.addr.013, i64 %i.015
  23. %add.ptr4 = getelementptr inbounds double, double* %dst.addr.014, i64 %i.015
  24. %inc = add i64 %i.015, 1
  25. %exitcond = icmp eq i64 %inc, %count
  26. br i1 %exitcond, label %for.end, label %for.body
  27. for.end: ; preds = %for.body, %entry
  28. ret void
  29. }
  30. ; CHECK: tiny_tree_fully_vectorizable2
  31. ; CHECK: load <4 x float>
  32. ; CHECK: store <4 x float>
  33. ; CHECK: ret
  34. define void @tiny_tree_fully_vectorizable2(float* noalias nocapture %dst, float* noalias nocapture readonly %src, i64 %count) #0 {
  35. entry:
  36. %cmp20 = icmp eq i64 %count, 0
  37. br i1 %cmp20, label %for.end, label %for.body
  38. for.body: ; preds = %entry, %for.body
  39. %i.023 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
  40. %dst.addr.022 = phi float* [ %add.ptr8, %for.body ], [ %dst, %entry ]
  41. %src.addr.021 = phi float* [ %add.ptr, %for.body ], [ %src, %entry ]
  42. %0 = load float, float* %src.addr.021, align 4
  43. store float %0, float* %dst.addr.022, align 4
  44. %arrayidx2 = getelementptr inbounds float, float* %src.addr.021, i64 1
  45. %1 = load float, float* %arrayidx2, align 4
  46. %arrayidx3 = getelementptr inbounds float, float* %dst.addr.022, i64 1
  47. store float %1, float* %arrayidx3, align 4
  48. %arrayidx4 = getelementptr inbounds float, float* %src.addr.021, i64 2
  49. %2 = load float, float* %arrayidx4, align 4
  50. %arrayidx5 = getelementptr inbounds float, float* %dst.addr.022, i64 2
  51. store float %2, float* %arrayidx5, align 4
  52. %arrayidx6 = getelementptr inbounds float, float* %src.addr.021, i64 3
  53. %3 = load float, float* %arrayidx6, align 4
  54. %arrayidx7 = getelementptr inbounds float, float* %dst.addr.022, i64 3
  55. store float %3, float* %arrayidx7, align 4
  56. %add.ptr = getelementptr inbounds float, float* %src.addr.021, i64 %i.023
  57. %add.ptr8 = getelementptr inbounds float, float* %dst.addr.022, i64 %i.023
  58. %inc = add i64 %i.023, 1
  59. %exitcond = icmp eq i64 %inc, %count
  60. br i1 %exitcond, label %for.end, label %for.body
  61. for.end: ; preds = %for.body, %entry
  62. ret void
  63. }
  64. ; We do not vectorize the tiny tree which is not fully vectorizable.
  65. ; CHECK: tiny_tree_not_fully_vectorizable
  66. ; CHECK-NOT: <2 x double>
  67. ; CHECK: ret
  68. define void @tiny_tree_not_fully_vectorizable(double* noalias nocapture %dst, double* noalias nocapture readonly %src, i64 %count) #0 {
  69. entry:
  70. %cmp12 = icmp eq i64 %count, 0
  71. br i1 %cmp12, label %for.end, label %for.body
  72. for.body: ; preds = %entry, %for.body
  73. %i.015 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
  74. %dst.addr.014 = phi double* [ %add.ptr4, %for.body ], [ %dst, %entry ]
  75. %src.addr.013 = phi double* [ %add.ptr, %for.body ], [ %src, %entry ]
  76. %0 = load double, double* %src.addr.013, align 8
  77. store double %0, double* %dst.addr.014, align 8
  78. %arrayidx2 = getelementptr inbounds double, double* %src.addr.013, i64 2
  79. %1 = load double, double* %arrayidx2, align 8
  80. %arrayidx3 = getelementptr inbounds double, double* %dst.addr.014, i64 1
  81. store double %1, double* %arrayidx3, align 8
  82. %add.ptr = getelementptr inbounds double, double* %src.addr.013, i64 %i.015
  83. %add.ptr4 = getelementptr inbounds double, double* %dst.addr.014, i64 %i.015
  84. %inc = add i64 %i.015, 1
  85. %exitcond = icmp eq i64 %inc, %count
  86. br i1 %exitcond, label %for.end, label %for.body
  87. for.end: ; preds = %for.body, %entry
  88. ret void
  89. }
  90. ; CHECK: tiny_tree_not_fully_vectorizable2
  91. ; CHECK-NOT: <2 x double>
  92. ; CHECK: ret
  93. define void @tiny_tree_not_fully_vectorizable2(float* noalias nocapture %dst, float* noalias nocapture readonly %src, i64 %count) #0 {
  94. entry:
  95. %cmp20 = icmp eq i64 %count, 0
  96. br i1 %cmp20, label %for.end, label %for.body
  97. for.body: ; preds = %entry, %for.body
  98. %i.023 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
  99. %dst.addr.022 = phi float* [ %add.ptr8, %for.body ], [ %dst, %entry ]
  100. %src.addr.021 = phi float* [ %add.ptr, %for.body ], [ %src, %entry ]
  101. %0 = load float, float* %src.addr.021, align 4
  102. store float %0, float* %dst.addr.022, align 4
  103. %arrayidx2 = getelementptr inbounds float, float* %src.addr.021, i64 4
  104. %1 = load float, float* %arrayidx2, align 4
  105. %arrayidx3 = getelementptr inbounds float, float* %dst.addr.022, i64 1
  106. store float %1, float* %arrayidx3, align 4
  107. %arrayidx4 = getelementptr inbounds float, float* %src.addr.021, i64 2
  108. %2 = load float, float* %arrayidx4, align 4
  109. %arrayidx5 = getelementptr inbounds float, float* %dst.addr.022, i64 2
  110. store float %2, float* %arrayidx5, align 4
  111. %arrayidx6 = getelementptr inbounds float, float* %src.addr.021, i64 3
  112. %3 = load float, float* %arrayidx6, align 4
  113. %arrayidx7 = getelementptr inbounds float, float* %dst.addr.022, i64 3
  114. store float %3, float* %arrayidx7, align 4
  115. %add.ptr = getelementptr inbounds float, float* %src.addr.021, i64 %i.023
  116. %add.ptr8 = getelementptr inbounds float, float* %dst.addr.022, i64 %i.023
  117. %inc = add i64 %i.023, 1
  118. %exitcond = icmp eq i64 %inc, %count
  119. br i1 %exitcond, label %for.end, label %for.body
  120. for.end: ; preds = %for.body, %entry
  121. ret void
  122. }
  123. ; CHECK-LABEL: store_splat
  124. ; CHECK: store <4 x float>
  125. define void @store_splat(float*, float) {
  126. %3 = getelementptr inbounds float, float* %0, i64 0
  127. store float %1, float* %3, align 4
  128. %4 = getelementptr inbounds float, float* %0, i64 1
  129. store float %1, float* %4, align 4
  130. %5 = getelementptr inbounds float, float* %0, i64 2
  131. store float %1, float* %5, align 4
  132. %6 = getelementptr inbounds float, float* %0, i64 3
  133. store float %1, float* %6, align 4
  134. ret void
  135. }
  136. ; CHECK-LABEL: store_const
  137. ; CHECK: store <4 x i32>
  138. define void @store_const(i32* %a) {
  139. entry:
  140. %ptr0 = getelementptr inbounds i32, i32* %a, i64 0
  141. store i32 10, i32* %ptr0, align 4
  142. %ptr1 = getelementptr inbounds i32, i32* %a, i64 1
  143. store i32 30, i32* %ptr1, align 4
  144. %ptr2 = getelementptr inbounds i32, i32* %a, i64 2
  145. store i32 20, i32* %ptr2, align 4
  146. %ptr3 = getelementptr inbounds i32, i32* %a, i64 3
  147. store i32 40, i32* %ptr3, align 4
  148. ret void
  149. }