2
0

reductions.ll 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235
  1. ; RUN: opt < %s -basicaa -loop-interchange -S | FileCheck %s
  2. @A = common global [500 x [500 x i32]] zeroinitializer
  3. @X = common global i32 0
  4. @B = common global [500 x [500 x i32]] zeroinitializer
  5. @Y = common global i32 0
  6. ;; for( int i=1;i<N;i++)
  7. ;; for( int j=1;j<N;j++)
  8. ;; X+=A[j][i];
  9. define void @reduction_01(i32 %N) {
  10. entry:
  11. %cmp16 = icmp sgt i32 %N, 1
  12. br i1 %cmp16, label %for.body3.lr.ph, label %for.end8
  13. for.body3.lr.ph: ; preds = %entry, %for.cond1.for.inc6_crit_edge
  14. %indvars.iv18 = phi i64 [ %indvars.iv.next19, %for.cond1.for.inc6_crit_edge ], [ 1, %entry ]
  15. %X.promoted = load i32, i32* @X
  16. br label %for.body3
  17. for.body3: ; preds = %for.body3, %for.body3.lr.ph
  18. %indvars.iv = phi i64 [ 1, %for.body3.lr.ph ], [ %indvars.iv.next, %for.body3 ]
  19. %add15 = phi i32 [ %X.promoted, %for.body3.lr.ph ], [ %add, %for.body3 ]
  20. %arrayidx5 = getelementptr inbounds [500 x [500 x i32]], [500 x [500 x i32]]* @A, i64 0, i64 %indvars.iv, i64 %indvars.iv18
  21. %0 = load i32, i32* %arrayidx5
  22. %add = add nsw i32 %add15, %0
  23. %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
  24. %lftr.wideiv = trunc i64 %indvars.iv.next to i32
  25. %exitcond = icmp eq i32 %lftr.wideiv, %N
  26. br i1 %exitcond, label %for.cond1.for.inc6_crit_edge, label %for.body3
  27. for.cond1.for.inc6_crit_edge: ; preds = %for.body3
  28. store i32 %add, i32* @X
  29. %indvars.iv.next19 = add nuw nsw i64 %indvars.iv18, 1
  30. %lftr.wideiv20 = trunc i64 %indvars.iv.next19 to i32
  31. %exitcond21 = icmp eq i32 %lftr.wideiv20, %N
  32. br i1 %exitcond21, label %for.end8, label %for.body3.lr.ph
  33. for.end8: ; preds = %for.cond1.for.inc6_crit_edge, %entry
  34. ret void
  35. }
  36. ;; Loop is interchanged check that the phi nodes are split and the promoted value is used instead of the reduction phi.
  37. ; CHECK-LABEL: @reduction_01
  38. ; CHECK: for.body3: ; preds = %for.body3.preheader, %for.body3.split
  39. ; CHECK: %indvars.iv = phi i64 [ %indvars.iv.next, %for.body3.split ], [ 1, %for.body3.preheader ]
  40. ; CHECK: br label %for.body3.lr.ph.preheader
  41. ; CHECK: %add = add nsw i32 %X.promoted
  42. ;; Test for more than 1 reductions inside a loop.
  43. ;; for( int i=1;i<N;i++)
  44. ;; for( int j=1;j<N;j++)
  45. ;; for( int k=1;k<N;k++) {
  46. ;; X+=A[k][j];
  47. ;; Y+=B[k][i];
  48. ;; }
  49. define void @reduction_02(i32 %N) {
  50. entry:
  51. %cmp34 = icmp sgt i32 %N, 1
  52. br i1 %cmp34, label %for.cond4.preheader.preheader, label %for.end19
  53. for.cond4.preheader.preheader: ; preds = %entry, %for.inc17
  54. %indvars.iv40 = phi i64 [ %indvars.iv.next41, %for.inc17 ], [ 1, %entry ]
  55. br label %for.body6.lr.ph
  56. for.body6.lr.ph: ; preds = %for.cond4.for.inc14_crit_edge, %for.cond4.preheader.preheader
  57. %indvars.iv36 = phi i64 [ %indvars.iv.next37, %for.cond4.for.inc14_crit_edge ], [ 1, %for.cond4.preheader.preheader ]
  58. %X.promoted = load i32, i32* @X
  59. %Y.promoted = load i32, i32* @Y
  60. br label %for.body6
  61. for.body6: ; preds = %for.body6, %for.body6.lr.ph
  62. %indvars.iv = phi i64 [ 1, %for.body6.lr.ph ], [ %indvars.iv.next, %for.body6 ]
  63. %add1331 = phi i32 [ %Y.promoted, %for.body6.lr.ph ], [ %add13, %for.body6 ]
  64. %add30 = phi i32 [ %X.promoted, %for.body6.lr.ph ], [ %add, %for.body6 ]
  65. %arrayidx8 = getelementptr inbounds [500 x [500 x i32]], [500 x [500 x i32]]* @A, i64 0, i64 %indvars.iv, i64 %indvars.iv36
  66. %0 = load i32, i32* %arrayidx8
  67. %add = add nsw i32 %add30, %0
  68. %arrayidx12 = getelementptr inbounds [500 x [500 x i32]], [500 x [500 x i32]]* @B, i64 0, i64 %indvars.iv, i64 %indvars.iv40
  69. %1 = load i32, i32* %arrayidx12
  70. %add13 = add nsw i32 %add1331, %1
  71. %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
  72. %lftr.wideiv = trunc i64 %indvars.iv.next to i32
  73. %exitcond = icmp eq i32 %lftr.wideiv, %N
  74. br i1 %exitcond, label %for.cond4.for.inc14_crit_edge, label %for.body6
  75. for.cond4.for.inc14_crit_edge: ; preds = %for.body6
  76. store i32 %add, i32* @X
  77. store i32 %add13, i32* @Y
  78. %indvars.iv.next37 = add nuw nsw i64 %indvars.iv36, 1
  79. %lftr.wideiv38 = trunc i64 %indvars.iv.next37 to i32
  80. %exitcond39 = icmp eq i32 %lftr.wideiv38, %N
  81. br i1 %exitcond39, label %for.inc17, label %for.body6.lr.ph
  82. for.inc17: ; preds = %for.cond4.for.inc14_crit_edge
  83. %indvars.iv.next41 = add nuw nsw i64 %indvars.iv40, 1
  84. %lftr.wideiv42 = trunc i64 %indvars.iv.next41 to i32
  85. %exitcond43 = icmp eq i32 %lftr.wideiv42, %N
  86. br i1 %exitcond43, label %for.end19, label %for.cond4.preheader.preheader
  87. for.end19: ; preds = %for.inc17, %entry
  88. ret void
  89. }
  90. ;; Loop is interchanged check that the phi nodes are split and the promoted value is used instead of the reduction phi.
  91. ; CHECK-LABEL: @reduction_02
  92. ; CHECK: for.body6: ; preds = %for.body6.preheader, %for.body6.split
  93. ; CHECK: %indvars.iv = phi i64 [ %indvars.iv.next, %for.body6.split ], [ 1, %for.body6.preheader ]
  94. ; CHECK: br label %for.cond4.preheader.preheader.preheader
  95. ; CHECK: %add13 = add nsw i32 %Y.promoted
  96. ;; Not tightly nested. Do not interchange.
  97. ;; for( int i=1;i<N;i++)
  98. ;; for( int j=1;j<N;j++) {
  99. ;; for( int k=1;k<N;k++) {
  100. ;; X+=A[k][j];
  101. ;; }
  102. ;; Y+=B[j][i];
  103. ;; }
  104. define void @reduction_03(i32 %N) {
  105. entry:
  106. %cmp35 = icmp sgt i32 %N, 1
  107. br i1 %cmp35, label %for.cond4.preheader.lr.ph, label %for.end19
  108. for.cond4.preheader.lr.ph: ; preds = %entry, %for.cond1.for.inc17_crit_edge
  109. %indvars.iv41 = phi i64 [ %indvars.iv.next42, %for.cond1.for.inc17_crit_edge ], [ 1, %entry ]
  110. %Y.promoted = load i32, i32* @Y
  111. br label %for.body6.lr.ph
  112. for.body6.lr.ph: ; preds = %for.cond4.preheader.lr.ph, %for.cond4.for.end_crit_edge
  113. %indvars.iv37 = phi i64 [ 1, %for.cond4.preheader.lr.ph ], [ %indvars.iv.next38, %for.cond4.for.end_crit_edge ]
  114. %add1334 = phi i32 [ %Y.promoted, %for.cond4.preheader.lr.ph ], [ %add13, %for.cond4.for.end_crit_edge ]
  115. %X.promoted = load i32, i32* @X
  116. br label %for.body6
  117. for.body6: ; preds = %for.body6, %for.body6.lr.ph
  118. %indvars.iv = phi i64 [ 1, %for.body6.lr.ph ], [ %indvars.iv.next, %for.body6 ]
  119. %add31 = phi i32 [ %X.promoted, %for.body6.lr.ph ], [ %add, %for.body6 ]
  120. %arrayidx8 = getelementptr inbounds [500 x [500 x i32]], [500 x [500 x i32]]* @A, i64 0, i64 %indvars.iv, i64 %indvars.iv37
  121. %0 = load i32, i32* %arrayidx8
  122. %add = add nsw i32 %add31, %0
  123. %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
  124. %lftr.wideiv = trunc i64 %indvars.iv.next to i32
  125. %exitcond = icmp eq i32 %lftr.wideiv, %N
  126. br i1 %exitcond, label %for.cond4.for.end_crit_edge, label %for.body6
  127. for.cond4.for.end_crit_edge: ; preds = %for.body6
  128. store i32 %add, i32* @X
  129. %arrayidx12 = getelementptr inbounds [500 x [500 x i32]], [500 x [500 x i32]]* @B, i64 0, i64 %indvars.iv37, i64 %indvars.iv41
  130. %1 = load i32, i32* %arrayidx12
  131. %add13 = add nsw i32 %add1334, %1
  132. %indvars.iv.next38 = add nuw nsw i64 %indvars.iv37, 1
  133. %lftr.wideiv39 = trunc i64 %indvars.iv.next38 to i32
  134. %exitcond40 = icmp eq i32 %lftr.wideiv39, %N
  135. br i1 %exitcond40, label %for.cond1.for.inc17_crit_edge, label %for.body6.lr.ph
  136. for.cond1.for.inc17_crit_edge: ; preds = %for.cond4.for.end_crit_edge
  137. store i32 %add13, i32* @Y
  138. %indvars.iv.next42 = add nuw nsw i64 %indvars.iv41, 1
  139. %lftr.wideiv43 = trunc i64 %indvars.iv.next42 to i32
  140. %exitcond44 = icmp eq i32 %lftr.wideiv43, %N
  141. br i1 %exitcond44, label %for.end19, label %for.cond4.preheader.lr.ph
  142. for.end19: ; preds = %for.cond1.for.inc17_crit_edge, %entry
  143. ret void
  144. }
  145. ;; Not tightly nested. Do not interchange.
  146. ;; Not interchanged hence the phi's in the inner loop will not be split. Check for the same.
  147. ; CHECK-LABEL: @reduction_03
  148. ; CHECK: for.body6: ; preds = %for.body6.preheader, %for.body6
  149. ; CHECK: %indvars.iv = phi i64 [ %indvars.iv.next, %for.body6 ], [ 1, %for.body6.preheader ]
  150. ; CHECK: %add31 = phi i32 [ %add, %for.body6 ], [ %X.promoted, %for.body6.preheader ]
  151. ;; Multiple use of reduction not safe. Do not interchange.
  152. ;; for( int i=1;i<N;i++)
  153. ;; for( int j=1;j<N;j++)
  154. ;; for( int k=1;k<N;k++) {
  155. ;; X+=A[k][j];
  156. ;; Y+=X;
  157. ;; }
  158. define void @reduction_04(i32 %N) {
  159. entry:
  160. %cmp28 = icmp sgt i32 %N, 1
  161. br i1 %cmp28, label %for.cond4.preheader.preheader, label %for.end15
  162. for.cond4.preheader.preheader: ; preds = %entry, %for.inc13
  163. %i.029 = phi i32 [ %inc14, %for.inc13 ], [ 1, %entry ]
  164. br label %for.body6.lr.ph
  165. for.body6.lr.ph: ; preds = %for.cond4.for.inc10_crit_edge, %for.cond4.preheader.preheader
  166. %indvars.iv30 = phi i64 [ %indvars.iv.next31, %for.cond4.for.inc10_crit_edge ], [ 1, %for.cond4.preheader.preheader ]
  167. %X.promoted = load i32, i32* @X
  168. %Y.promoted = load i32, i32* @Y
  169. br label %for.body6
  170. for.body6: ; preds = %for.body6, %for.body6.lr.ph
  171. %indvars.iv = phi i64 [ 1, %for.body6.lr.ph ], [ %indvars.iv.next, %for.body6 ]
  172. %add925 = phi i32 [ %Y.promoted, %for.body6.lr.ph ], [ %add9, %for.body6 ]
  173. %add24 = phi i32 [ %X.promoted, %for.body6.lr.ph ], [ %add, %for.body6 ]
  174. %arrayidx8 = getelementptr inbounds [500 x [500 x i32]], [500 x [500 x i32]]* @A, i64 0, i64 %indvars.iv, i64 %indvars.iv30
  175. %0 = load i32, i32* %arrayidx8
  176. %add = add nsw i32 %add24, %0
  177. %add9 = add nsw i32 %add925, %add
  178. %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
  179. %lftr.wideiv = trunc i64 %indvars.iv.next to i32
  180. %exitcond = icmp eq i32 %lftr.wideiv, %N
  181. br i1 %exitcond, label %for.cond4.for.inc10_crit_edge, label %for.body6
  182. for.cond4.for.inc10_crit_edge: ; preds = %for.body6
  183. store i32 %add, i32* @X
  184. store i32 %add9, i32* @Y
  185. %indvars.iv.next31 = add nuw nsw i64 %indvars.iv30, 1
  186. %lftr.wideiv32 = trunc i64 %indvars.iv.next31 to i32
  187. %exitcond33 = icmp eq i32 %lftr.wideiv32, %N
  188. br i1 %exitcond33, label %for.inc13, label %for.body6.lr.ph
  189. for.inc13: ; preds = %for.cond4.for.inc10_crit_edge
  190. %inc14 = add nuw nsw i32 %i.029, 1
  191. %exitcond34 = icmp eq i32 %inc14, %N
  192. br i1 %exitcond34, label %for.end15, label %for.cond4.preheader.preheader
  193. for.end15: ; preds = %for.inc13, %entry
  194. ret void
  195. }
  196. ;; Not interchanged hence the phi's in the inner loop will not be split. Check for the same.
  197. ; CHECK-LABEL: @reduction_04
  198. ; CHECK: for.body6: ; preds = %for.body6.preheader, %for.body6
  199. ; CHECK: %indvars.iv = phi i64 [ %indvars.iv.next, %for.body6 ], [ 1, %for.body6.preheader ]
  200. ; CHECK: %add925 = phi i32 [ %add9, %for.body6 ], [ %Y.promoted, %for.body6.preheader ]