memdep.ll 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222
  1. ; RUN: opt < %s -loop-vectorize -force-vector-width=2 -force-vector-interleave=1 -S | FileCheck %s
  2. ; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -S | FileCheck %s -check-prefix=WIDTH
  3. target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
  4. ; Vectorization with dependence checks.
  5. ; No plausible dependence - can be vectorized.
  6. ; for (i = 0; i < 1024; ++i)
  7. ; A[i] = A[i + 1] + 1;
  8. ; CHECK-LABEL: @f1_vec(
  9. ; CHECK: <2 x i32>
  10. define void @f1_vec(i32* %A) {
  11. entry:
  12. br label %for.body
  13. for.body:
  14. %indvars.iv = phi i32 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
  15. %indvars.iv.next = add i32 %indvars.iv, 1
  16. %arrayidx = getelementptr inbounds i32, i32* %A, i32 %indvars.iv.next
  17. %0 = load i32, i32* %arrayidx, align 4
  18. %add1 = add nsw i32 %0, 1
  19. %arrayidx3 = getelementptr inbounds i32, i32* %A, i32 %indvars.iv
  20. store i32 %add1, i32* %arrayidx3, align 4
  21. %exitcond = icmp ne i32 %indvars.iv.next, 1024
  22. br i1 %exitcond, label %for.body, label %for.end
  23. for.end:
  24. ret void
  25. }
  26. ; Plausible dependence of distance 1 - can't be vectorized.
  27. ; for (i = 0; i < 1024; ++i)
  28. ; A[i+1] = A[i] + 1;
  29. ; CHECK-LABEL: @f2_novec(
  30. ; CHECK-NOT: <2 x i32>
  31. define void @f2_novec(i32* %A) {
  32. entry:
  33. br label %for.body
  34. for.body:
  35. %indvars.iv = phi i32 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
  36. %arrayidx = getelementptr inbounds i32, i32* %A, i32 %indvars.iv
  37. %0 = load i32, i32* %arrayidx, align 4
  38. %add = add nsw i32 %0, 1
  39. %indvars.iv.next = add i32 %indvars.iv, 1
  40. %arrayidx3 = getelementptr inbounds i32, i32* %A, i32 %indvars.iv.next
  41. store i32 %add, i32* %arrayidx3, align 4
  42. %exitcond = icmp ne i32 %indvars.iv.next, 1024
  43. br i1 %exitcond, label %for.body, label %for.end
  44. for.end:
  45. ret void
  46. }
  47. ; Plausible dependence of distance 2 - can be vectorized with a width of 2.
  48. ; for (i = 0; i < 1024; ++i)
  49. ; A[i+2] = A[i] + 1;
  50. ; CHECK-LABEL: @f3_vec_len(
  51. ; CHECK: <2 x i32>
  52. ; WIDTH: f3_vec_len
  53. ; WIDTH-NOT: <4 x i32>
  54. define void @f3_vec_len(i32* %A) {
  55. entry:
  56. br label %for.body
  57. for.body:
  58. %i.01 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
  59. %idxprom = sext i32 %i.01 to i64
  60. %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom
  61. %0 = load i32, i32* %arrayidx, align 4
  62. %add = add nsw i32 %0, 1
  63. %add1 = add nsw i32 %i.01, 2
  64. %idxprom2 = sext i32 %add1 to i64
  65. %arrayidx3 = getelementptr inbounds i32, i32* %A, i64 %idxprom2
  66. store i32 %add, i32* %arrayidx3, align 4
  67. %inc = add nsw i32 %i.01, 1
  68. %cmp = icmp slt i32 %inc, 1024
  69. br i1 %cmp, label %for.body, label %for.end
  70. for.end:
  71. ret void
  72. }
  73. ; Plausible dependence of distance 1 - cannot be vectorized (without reordering
  74. ; accesses).
  75. ; for (i = 0; i < 1024; ++i) {
  76. ; B[i] = A[i];
  77. ; A[i] = B[i + 1];
  78. ; }
  79. ; CHECK-LABEL: @f5(
  80. ; CHECK-NOT: <2 x i32>
  81. define void @f5(i32* %A, i32* %B) {
  82. entry:
  83. br label %for.body
  84. for.body:
  85. %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
  86. %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
  87. %0 = load i32, i32* %arrayidx, align 4
  88. %arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
  89. store i32 %0, i32* %arrayidx2, align 4
  90. %indvars.iv.next = add nsw i64 %indvars.iv, 1
  91. %arrayidx4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv.next
  92. %1 = load i32, i32* %arrayidx4, align 4
  93. store i32 %1, i32* %arrayidx, align 4
  94. %lftr.wideiv = trunc i64 %indvars.iv.next to i32
  95. %exitcond = icmp ne i32 %lftr.wideiv, 1024
  96. br i1 %exitcond, label %for.body, label %for.end
  97. for.end:
  98. ret void
  99. }
  100. ; Dependence through a phi node - must not vectorize.
  101. ; for (i = 0; i < 1024; ++i) {
  102. ; a[i+1] = tmp;
  103. ; tmp = a[i];
  104. ; }
  105. ; CHECK-LABEL: @f6
  106. ; CHECK-NOT: <2 x i32>
  107. define i32 @f6(i32* %a, i32 %tmp) {
  108. entry:
  109. br label %for.body
  110. for.body:
  111. %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
  112. %tmp.addr.08 = phi i32 [ %tmp, %entry ], [ %0, %for.body ]
  113. %indvars.iv.next = add nsw i64 %indvars.iv, 1
  114. %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next
  115. store i32 %tmp.addr.08, i32* %arrayidx, align 4
  116. %arrayidx3 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
  117. %0 = load i32, i32* %arrayidx3, align 4
  118. %lftr.wideiv = trunc i64 %indvars.iv.next to i32
  119. %exitcond = icmp ne i32 %lftr.wideiv, 1024
  120. br i1 %exitcond, label %for.body, label %for.end
  121. for.end:
  122. ret i32 undef
  123. }
  124. ; Don't vectorize true loop carried dependencies that are not a multiple of the
  125. ; vector width.
  126. ; Example:
  127. ; for (int i = ...; ++i) {
  128. ; a[i] = a[i-3] + ...;
  129. ; It is a bad idea to vectorize this loop because store-load forwarding will not
  130. ; happen.
  131. ;
  132. ; CHECK-LABEL: @nostoreloadforward(
  133. ; CHECK-NOT: <2 x i32>
  134. define void @nostoreloadforward(i32* %A) {
  135. entry:
  136. br label %for.body
  137. for.body:
  138. %indvars.iv = phi i64 [ 16, %entry ], [ %indvars.iv.next, %for.body ]
  139. %0 = add nsw i64 %indvars.iv, -3
  140. %arrayidx = getelementptr inbounds i32, i32* %A, i64 %0
  141. %1 = load i32, i32* %arrayidx, align 4
  142. %2 = add nsw i64 %indvars.iv, 4
  143. %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %2
  144. %3 = load i32, i32* %arrayidx2, align 4
  145. %add3 = add nsw i32 %3, %1
  146. %arrayidx5 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
  147. store i32 %add3, i32* %arrayidx5, align 4
  148. %indvars.iv.next = add i64 %indvars.iv, 1
  149. %lftr.wideiv = trunc i64 %indvars.iv.next to i32
  150. %exitcond = icmp ne i32 %lftr.wideiv, 128
  151. br i1 %exitcond, label %for.body, label %for.end
  152. for.end:
  153. ret void
  154. }
  155. ; Example:
  156. ; for (int i = ...; ++i) {
  157. ; a[i] = b[i];
  158. ; c[i] = a[i-3] + ...;
  159. ; It is a bad idea to vectorize this loop because store-load forwarding will not
  160. ; happen.
  161. ;
  162. ; CHECK-LABEL: @nostoreloadforward2(
  163. ; CHECK-NOT: <2 x i32>
  164. define void @nostoreloadforward2(i32* noalias %A, i32* noalias %B, i32* noalias %C) {
  165. entry:
  166. br label %for.body
  167. for.body:
  168. %indvars.iv = phi i64 [ 16, %entry ], [ %indvars.iv.next, %for.body ]
  169. %arrayidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
  170. %0 = load i32, i32* %arrayidx, align 4
  171. %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
  172. store i32 %0, i32* %arrayidx2, align 4
  173. %1 = add nsw i64 %indvars.iv, -3
  174. %arrayidx4 = getelementptr inbounds i32, i32* %A, i64 %1
  175. %2 = load i32, i32* %arrayidx4, align 4
  176. %arrayidx6 = getelementptr inbounds i32, i32* %C, i64 %indvars.iv
  177. store i32 %2, i32* %arrayidx6, align 4
  178. %indvars.iv.next = add i64 %indvars.iv, 1
  179. %lftr.wideiv = trunc i64 %indvars.iv.next to i32
  180. %exitcond = icmp ne i32 %lftr.wideiv, 128
  181. br i1 %exitcond, label %for.body, label %for.end
  182. for.end:
  183. ret void
  184. }