unroll-pragmas.ll 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258
  1. ; RUN: opt < %s -loop-unroll -pragma-unroll-threshold=1024 -S | FileCheck %s
  2. ; RUN: opt < %s -loop-unroll -loop-unroll -pragma-unroll-threshold=1024 -S | FileCheck %s
  3. ;
  4. ; Run loop unrolling twice to verify that loop unrolling metadata is properly
  5. ; removed and further unrolling is disabled after the pass is run once.
  6. target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
  7. target triple = "x86_64-unknown-linux-gnu"
  8. ; loop4 contains a small loop which should be completely unrolled by
  9. ; the default unrolling heuristics. It serves as a control for the
  10. ; unroll(disable) pragma test loop4_with_disable.
  11. ;
  12. ; CHECK-LABEL: @loop4(
  13. ; CHECK-NOT: br i1
  14. define void @loop4(i32* nocapture %a) {
  15. entry:
  16. br label %for.body
  17. for.body: ; preds = %for.body, %entry
  18. %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
  19. %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
  20. %0 = load i32, i32* %arrayidx, align 4
  21. %inc = add nsw i32 %0, 1
  22. store i32 %inc, i32* %arrayidx, align 4
  23. %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
  24. %exitcond = icmp eq i64 %indvars.iv.next, 4
  25. br i1 %exitcond, label %for.end, label %for.body
  26. for.end: ; preds = %for.body
  27. ret void
  28. }
  29. ; #pragma clang loop unroll(disable)
  30. ;
  31. ; CHECK-LABEL: @loop4_with_disable(
  32. ; CHECK: store i32
  33. ; CHECK-NOT: store i32
  34. ; CHECK: br i1
  35. define void @loop4_with_disable(i32* nocapture %a) {
  36. entry:
  37. br label %for.body
  38. for.body: ; preds = %for.body, %entry
  39. %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
  40. %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
  41. %0 = load i32, i32* %arrayidx, align 4
  42. %inc = add nsw i32 %0, 1
  43. store i32 %inc, i32* %arrayidx, align 4
  44. %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
  45. %exitcond = icmp eq i64 %indvars.iv.next, 4
  46. br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !1
  47. for.end: ; preds = %for.body
  48. ret void
  49. }
  50. !1 = !{!1, !2}
  51. !2 = !{!"llvm.loop.unroll.disable"}
  52. ; loop64 has a high enough count that it should *not* be unrolled by
  53. ; the default unrolling heuristic. It serves as the control for the
  54. ; unroll(full) pragma test loop64_with_.* tests below.
  55. ;
  56. ; CHECK-LABEL: @loop64(
  57. ; CHECK: store i32
  58. ; CHECK-NOT: store i32
  59. ; CHECK: br i1
  60. define void @loop64(i32* nocapture %a) {
  61. entry:
  62. br label %for.body
  63. for.body: ; preds = %for.body, %entry
  64. %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
  65. %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
  66. %0 = load i32, i32* %arrayidx, align 4
  67. %inc = add nsw i32 %0, 1
  68. store i32 %inc, i32* %arrayidx, align 4
  69. %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
  70. %exitcond = icmp eq i64 %indvars.iv.next, 64
  71. br i1 %exitcond, label %for.end, label %for.body
  72. for.end: ; preds = %for.body
  73. ret void
  74. }
  75. ; #pragma clang loop unroll(full)
  76. ; Loop should be fully unrolled.
  77. ;
  78. ; CHECK-LABEL: @loop64_with_full(
  79. ; CHECK-NOT: br i1
  80. define void @loop64_with_full(i32* nocapture %a) {
  81. entry:
  82. br label %for.body
  83. for.body: ; preds = %for.body, %entry
  84. %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
  85. %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
  86. %0 = load i32, i32* %arrayidx, align 4
  87. %inc = add nsw i32 %0, 1
  88. store i32 %inc, i32* %arrayidx, align 4
  89. %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
  90. %exitcond = icmp eq i64 %indvars.iv.next, 64
  91. br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !3
  92. for.end: ; preds = %for.body
  93. ret void
  94. }
  95. !3 = !{!3, !4}
  96. !4 = !{!"llvm.loop.unroll.full"}
  97. ; #pragma clang loop unroll_count(4)
  98. ; Loop should be unrolled 4 times.
  99. ;
  100. ; CHECK-LABEL: @loop64_with_count4(
  101. ; CHECK: store i32
  102. ; CHECK: store i32
  103. ; CHECK: store i32
  104. ; CHECK: store i32
  105. ; CHECK-NOT: store i32
  106. ; CHECK: br i1
  107. define void @loop64_with_count4(i32* nocapture %a) {
  108. entry:
  109. br label %for.body
  110. for.body: ; preds = %for.body, %entry
  111. %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
  112. %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
  113. %0 = load i32, i32* %arrayidx, align 4
  114. %inc = add nsw i32 %0, 1
  115. store i32 %inc, i32* %arrayidx, align 4
  116. %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
  117. %exitcond = icmp eq i64 %indvars.iv.next, 64
  118. br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !5
  119. for.end: ; preds = %for.body
  120. ret void
  121. }
  122. !5 = !{!5, !6}
  123. !6 = !{!"llvm.loop.unroll.count", i32 4}
  124. ; #pragma clang loop unroll(full)
  125. ; Full unrolling is requested, but loop has a runtime trip count so
  126. ; no unrolling should occur.
  127. ;
  128. ; CHECK-LABEL: @runtime_loop_with_full(
  129. ; CHECK: store i32
  130. ; CHECK-NOT: store i32
  131. define void @runtime_loop_with_full(i32* nocapture %a, i32 %b) {
  132. entry:
  133. %cmp3 = icmp sgt i32 %b, 0
  134. br i1 %cmp3, label %for.body, label %for.end, !llvm.loop !8
  135. for.body: ; preds = %entry, %for.body
  136. %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
  137. %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
  138. %0 = load i32, i32* %arrayidx, align 4
  139. %inc = add nsw i32 %0, 1
  140. store i32 %inc, i32* %arrayidx, align 4
  141. %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
  142. %lftr.wideiv = trunc i64 %indvars.iv.next to i32
  143. %exitcond = icmp eq i32 %lftr.wideiv, %b
  144. br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !8
  145. for.end: ; preds = %for.body, %entry
  146. ret void
  147. }
  148. !8 = !{!8, !4}
  149. ; #pragma clang loop unroll_count(4)
  150. ; Loop has a runtime trip count. Runtime unrolling should occur and loop
  151. ; should be duplicated (original and 4x unrolled).
  152. ;
  153. ; CHECK-LABEL: @runtime_loop_with_count4(
  154. ; CHECK: for.body.prol:
  155. ; CHECK: store
  156. ; CHECK-NOT: store
  157. ; CHECK: br i1
  158. ; CHECK: for.body
  159. ; CHECK: store
  160. ; CHECK: store
  161. ; CHECK: store
  162. ; CHECK: store
  163. ; CHECK-NOT: store
  164. ; CHECK: br i1
  165. define void @runtime_loop_with_count4(i32* nocapture %a, i32 %b) {
  166. entry:
  167. %cmp3 = icmp sgt i32 %b, 0
  168. br i1 %cmp3, label %for.body, label %for.end, !llvm.loop !9
  169. for.body: ; preds = %entry, %for.body
  170. %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
  171. %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
  172. %0 = load i32, i32* %arrayidx, align 4
  173. %inc = add nsw i32 %0, 1
  174. store i32 %inc, i32* %arrayidx, align 4
  175. %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
  176. %lftr.wideiv = trunc i64 %indvars.iv.next to i32
  177. %exitcond = icmp eq i32 %lftr.wideiv, %b
  178. br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !9
  179. for.end: ; preds = %for.body, %entry
  180. ret void
  181. }
  182. !9 = !{!9, !6}
  183. ; #pragma clang loop unroll_count(1)
  184. ; Loop should not be unrolled
  185. ;
  186. ; CHECK-LABEL: @unroll_1(
  187. ; CHECK: store i32
  188. ; CHECK-NOT: store i32
  189. ; CHECK: br i1
  190. define void @unroll_1(i32* nocapture %a, i32 %b) {
  191. entry:
  192. br label %for.body
  193. for.body: ; preds = %for.body, %entry
  194. %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
  195. %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
  196. %0 = load i32, i32* %arrayidx, align 4
  197. %inc = add nsw i32 %0, 1
  198. store i32 %inc, i32* %arrayidx, align 4
  199. %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
  200. %exitcond = icmp eq i64 %indvars.iv.next, 4
  201. br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !10
  202. for.end: ; preds = %for.body
  203. ret void
  204. }
  205. !10 = !{!10, !11}
  206. !11 = !{!"llvm.loop.unroll.count", i32 1}
  207. ; #pragma clang loop unroll(full)
  208. ; Loop has very high loop count (1 million) and full unrolling was requested.
  209. ; Loop should unrolled up to the pragma threshold, but not completely.
  210. ;
  211. ; CHECK-LABEL: @unroll_1M(
  212. ; CHECK: store i32
  213. ; CHECK: store i32
  214. ; CHECK: br i1
  215. define void @unroll_1M(i32* nocapture %a, i32 %b) {
  216. entry:
  217. br label %for.body
  218. for.body: ; preds = %for.body, %entry
  219. %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
  220. %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
  221. %0 = load i32, i32* %arrayidx, align 4
  222. %inc = add nsw i32 %0, 1
  223. store i32 %inc, i32* %arrayidx, align 4
  224. %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
  225. %exitcond = icmp eq i64 %indvars.iv.next, 1000000
  226. br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !12
  227. for.end: ; preds = %for.body
  228. ret void
  229. }
  230. !12 = !{!12, !4}