basic.ll 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426
  1. ; RUN: opt -basicaa -loop-idiom < %s -S | FileCheck %s
  2. target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
  3. ; For @test11_pattern
  4. ; CHECK: @.memset_pattern = private unnamed_addr constant [4 x i32] [i32 1, i32 1, i32 1, i32 1]
  5. ; For @test13_pattern
  6. ; CHECK: @.memset_pattern.1 = private unnamed_addr constant [2 x i32*] [i32* @G, i32* @G]
  7. target triple = "x86_64-apple-darwin10.0.0"
  8. define void @test1(i8* %Base, i64 %Size) nounwind ssp {
  9. bb.nph: ; preds = %entry
  10. br label %for.body
  11. for.body: ; preds = %bb.nph, %for.body
  12. %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
  13. %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar
  14. store i8 0, i8* %I.0.014, align 1
  15. %indvar.next = add i64 %indvar, 1
  16. %exitcond = icmp eq i64 %indvar.next, %Size
  17. br i1 %exitcond, label %for.end, label %for.body
  18. for.end: ; preds = %for.body, %entry
  19. ret void
  20. ; CHECK-LABEL: @test1(
  21. ; CHECK: call void @llvm.memset.p0i8.i64(i8* %Base, i8 0, i64 %Size, i32 1, i1 false)
  22. ; CHECK-NOT: store
  23. }
  24. ; This is a loop that was rotated but where the blocks weren't merged. This
  25. ; shouldn't perturb us.
  26. define void @test1a(i8* %Base, i64 %Size) nounwind ssp {
  27. bb.nph: ; preds = %entry
  28. br label %for.body
  29. for.body: ; preds = %bb.nph, %for.body
  30. %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body.cont ]
  31. %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar
  32. store i8 0, i8* %I.0.014, align 1
  33. %indvar.next = add i64 %indvar, 1
  34. br label %for.body.cont
  35. for.body.cont:
  36. %exitcond = icmp eq i64 %indvar.next, %Size
  37. br i1 %exitcond, label %for.end, label %for.body
  38. for.end: ; preds = %for.body, %entry
  39. ret void
  40. ; CHECK-LABEL: @test1a(
  41. ; CHECK: call void @llvm.memset.p0i8.i64(i8* %Base, i8 0, i64 %Size, i32 1, i1 false)
  42. ; CHECK-NOT: store
  43. }
  44. define void @test2(i32* %Base, i64 %Size) nounwind ssp {
  45. entry:
  46. %cmp10 = icmp eq i64 %Size, 0
  47. br i1 %cmp10, label %for.end, label %for.body
  48. for.body: ; preds = %entry, %for.body
  49. %i.011 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
  50. %add.ptr.i = getelementptr i32, i32* %Base, i64 %i.011
  51. store i32 16843009, i32* %add.ptr.i, align 4
  52. %inc = add nsw i64 %i.011, 1
  53. %exitcond = icmp eq i64 %inc, %Size
  54. br i1 %exitcond, label %for.end, label %for.body
  55. for.end: ; preds = %for.body, %entry
  56. ret void
  57. ; CHECK-LABEL: @test2(
  58. ; CHECK: br i1 %cmp10,
  59. ; CHECK: %0 = shl i64 %Size, 2
  60. ; CHECK: call void @llvm.memset.p0i8.i64(i8* %Base1, i8 1, i64 %0, i32 4, i1 false)
  61. ; CHECK-NOT: store
  62. }
  63. ; This is a case where there is an extra may-aliased store in the loop, we can't
  64. ; promote the memset.
  65. define void @test3(i32* %Base, i64 %Size, i8 *%MayAlias) nounwind ssp {
  66. entry:
  67. br label %for.body
  68. for.body: ; preds = %entry, %for.body
  69. %i.011 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
  70. %add.ptr.i = getelementptr i32, i32* %Base, i64 %i.011
  71. store i32 16843009, i32* %add.ptr.i, align 4
  72. store i8 42, i8* %MayAlias
  73. %inc = add nsw i64 %i.011, 1
  74. %exitcond = icmp eq i64 %inc, %Size
  75. br i1 %exitcond, label %for.end, label %for.body
  76. for.end: ; preds = %entry
  77. ret void
  78. ; CHECK-LABEL: @test3(
  79. ; CHECK-NOT: memset
  80. ; CHECK: ret void
  81. }
  82. ;; TODO: We should be able to promote this memset. Not yet though.
  83. define void @test4(i8* %Base) nounwind ssp {
  84. bb.nph: ; preds = %entry
  85. %Base100 = getelementptr i8, i8* %Base, i64 1000
  86. br label %for.body
  87. for.body: ; preds = %bb.nph, %for.body
  88. %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
  89. %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar
  90. store i8 0, i8* %I.0.014, align 1
  91. ;; Store beyond the range memset, should be safe to promote.
  92. store i8 42, i8* %Base100
  93. %indvar.next = add i64 %indvar, 1
  94. %exitcond = icmp eq i64 %indvar.next, 100
  95. br i1 %exitcond, label %for.end, label %for.body
  96. for.end: ; preds = %for.body, %entry
  97. ret void
  98. ; CHECK-TODO-LABEL: @test4(
  99. ; CHECK-TODO: call void @llvm.memset.p0i8.i64(i8* %Base, i8 0, i64 100, i32 1, i1 false)
  100. ; CHECK-TODO-NOT: store
  101. }
  102. ; This can't be promoted: the memset is a store of a loop variant value.
  103. define void @test5(i8* %Base, i64 %Size) nounwind ssp {
  104. bb.nph: ; preds = %entry
  105. br label %for.body
  106. for.body: ; preds = %bb.nph, %for.body
  107. %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
  108. %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar
  109. %V = trunc i64 %indvar to i8
  110. store i8 %V, i8* %I.0.014, align 1
  111. %indvar.next = add i64 %indvar, 1
  112. %exitcond = icmp eq i64 %indvar.next, %Size
  113. br i1 %exitcond, label %for.end, label %for.body
  114. for.end: ; preds = %for.body, %entry
  115. ret void
  116. ; CHECK-LABEL: @test5(
  117. ; CHECK-NOT: memset
  118. ; CHECK: ret void
  119. }
  120. ;; memcpy formation
  121. define void @test6(i64 %Size) nounwind ssp {
  122. bb.nph:
  123. %Base = alloca i8, i32 10000
  124. %Dest = alloca i8, i32 10000
  125. br label %for.body
  126. for.body: ; preds = %bb.nph, %for.body
  127. %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
  128. %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar
  129. %DestI = getelementptr i8, i8* %Dest, i64 %indvar
  130. %V = load i8, i8* %I.0.014, align 1
  131. store i8 %V, i8* %DestI, align 1
  132. %indvar.next = add i64 %indvar, 1
  133. %exitcond = icmp eq i64 %indvar.next, %Size
  134. br i1 %exitcond, label %for.end, label %for.body
  135. for.end: ; preds = %for.body, %entry
  136. ret void
  137. ; CHECK-LABEL: @test6(
  138. ; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %Dest, i8* %Base, i64 %Size, i32 1, i1 false)
  139. ; CHECK-NOT: store
  140. ; CHECK: ret void
  141. }
  142. ; This is a loop that was rotated but where the blocks weren't merged. This
  143. ; shouldn't perturb us.
  144. define void @test7(i8* %Base, i64 %Size) nounwind ssp {
  145. bb.nph: ; preds = %entry
  146. br label %for.body
  147. for.body: ; preds = %bb.nph, %for.body
  148. %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body.cont ]
  149. br label %for.body.cont
  150. for.body.cont:
  151. %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar
  152. store i8 0, i8* %I.0.014, align 1
  153. %indvar.next = add i64 %indvar, 1
  154. %exitcond = icmp eq i64 %indvar.next, %Size
  155. br i1 %exitcond, label %for.end, label %for.body
  156. for.end: ; preds = %for.body, %entry
  157. ret void
  158. ; CHECK-LABEL: @test7(
  159. ; CHECK: call void @llvm.memset.p0i8.i64(i8* %Base, i8 0, i64 %Size, i32 1, i1 false)
  160. ; CHECK-NOT: store
  161. }
  162. ; This is a loop should not be transformed, it only executes one iteration.
  163. define void @test8(i64* %Ptr, i64 %Size) nounwind ssp {
  164. bb.nph: ; preds = %entry
  165. br label %for.body
  166. for.body: ; preds = %bb.nph, %for.body
  167. %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
  168. %PI = getelementptr i64, i64* %Ptr, i64 %indvar
  169. store i64 0, i64 *%PI
  170. %indvar.next = add i64 %indvar, 1
  171. %exitcond = icmp eq i64 %indvar.next, 1
  172. br i1 %exitcond, label %for.end, label %for.body
  173. for.end: ; preds = %for.body, %entry
  174. ret void
  175. ; CHECK-LABEL: @test8(
  176. ; CHECK: store i64 0, i64* %PI
  177. }
  178. declare i8* @external(i8*)
  179. ;; This cannot be transformed into a memcpy, because the read-from location is
  180. ;; mutated by the loop.
  181. define void @test9(i64 %Size) nounwind ssp {
  182. bb.nph:
  183. %Base = alloca i8, i32 10000
  184. %Dest = alloca i8, i32 10000
  185. %BaseAlias = call i8* @external(i8* %Base)
  186. br label %for.body
  187. for.body: ; preds = %bb.nph, %for.body
  188. %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
  189. %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar
  190. %DestI = getelementptr i8, i8* %Dest, i64 %indvar
  191. %V = load i8, i8* %I.0.014, align 1
  192. store i8 %V, i8* %DestI, align 1
  193. ;; This store can clobber the input.
  194. store i8 4, i8* %BaseAlias
  195. %indvar.next = add i64 %indvar, 1
  196. %exitcond = icmp eq i64 %indvar.next, %Size
  197. br i1 %exitcond, label %for.end, label %for.body
  198. for.end: ; preds = %for.body, %entry
  199. ret void
  200. ; CHECK-LABEL: @test9(
  201. ; CHECK-NOT: llvm.memcpy
  202. ; CHECK: ret void
  203. }
  204. ; Two dimensional nested loop should be promoted to one big memset.
  205. define void @test10(i8* %X) nounwind ssp {
  206. entry:
  207. br label %bb.nph
  208. bb.nph: ; preds = %entry, %for.inc10
  209. %i.04 = phi i32 [ 0, %entry ], [ %inc12, %for.inc10 ]
  210. br label %for.body5
  211. for.body5: ; preds = %for.body5, %bb.nph
  212. %j.02 = phi i32 [ 0, %bb.nph ], [ %inc, %for.body5 ]
  213. %mul = mul nsw i32 %i.04, 100
  214. %add = add nsw i32 %j.02, %mul
  215. %idxprom = sext i32 %add to i64
  216. %arrayidx = getelementptr inbounds i8, i8* %X, i64 %idxprom
  217. store i8 0, i8* %arrayidx, align 1
  218. %inc = add nsw i32 %j.02, 1
  219. %cmp4 = icmp eq i32 %inc, 100
  220. br i1 %cmp4, label %for.inc10, label %for.body5
  221. for.inc10: ; preds = %for.body5
  222. %inc12 = add nsw i32 %i.04, 1
  223. %cmp = icmp eq i32 %inc12, 100
  224. br i1 %cmp, label %for.end13, label %bb.nph
  225. for.end13: ; preds = %for.inc10
  226. ret void
  227. ; CHECK-LABEL: @test10(
  228. ; CHECK: entry:
  229. ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %X, i8 0, i64 10000, i32 1, i1 false)
  230. ; CHECK-NOT: store
  231. ; CHECK: ret void
  232. }
  233. ; On darwin10 (which is the triple in this .ll file) this loop can be turned
  234. ; into a memset_pattern call.
  235. ; rdar://9009151
  236. define void @test11_pattern(i32* nocapture %P) nounwind ssp {
  237. entry:
  238. br label %for.body
  239. for.body: ; preds = %entry, %for.body
  240. %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %for.body ]
  241. %arrayidx = getelementptr i32, i32* %P, i64 %indvar
  242. store i32 1, i32* %arrayidx, align 4
  243. %indvar.next = add i64 %indvar, 1
  244. %exitcond = icmp eq i64 %indvar.next, 10000
  245. br i1 %exitcond, label %for.end, label %for.body
  246. for.end: ; preds = %for.body
  247. ret void
  248. ; CHECK-LABEL: @test11_pattern(
  249. ; CHECK-NEXT: entry:
  250. ; CHECK-NEXT: bitcast
  251. ; CHECK-NEXT: memset_pattern
  252. ; CHECK-NOT: store
  253. ; CHECK: ret void
  254. }
  255. ; Store of null should turn into memset of zero.
  256. define void @test12(i32** nocapture %P) nounwind ssp {
  257. entry:
  258. br label %for.body
  259. for.body: ; preds = %entry, %for.body
  260. %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %for.body ]
  261. %arrayidx = getelementptr i32*, i32** %P, i64 %indvar
  262. store i32* null, i32** %arrayidx, align 4
  263. %indvar.next = add i64 %indvar, 1
  264. %exitcond = icmp eq i64 %indvar.next, 10000
  265. br i1 %exitcond, label %for.end, label %for.body
  266. for.end: ; preds = %for.body
  267. ret void
  268. ; CHECK-LABEL: @test12(
  269. ; CHECK-NEXT: entry:
  270. ; CHECK-NEXT: bitcast
  271. ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %P1, i8 0, i64 80000, i32 4, i1 false)
  272. ; CHECK-NOT: store
  273. ; CHECK: ret void
  274. }
  275. @G = global i32 5
  276. ; This store-of-address loop can be turned into a memset_pattern call.
  277. ; rdar://9009151
  278. define void @test13_pattern(i32** nocapture %P) nounwind ssp {
  279. entry:
  280. br label %for.body
  281. for.body: ; preds = %entry, %for.body
  282. %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %for.body ]
  283. %arrayidx = getelementptr i32*, i32** %P, i64 %indvar
  284. store i32* @G, i32** %arrayidx, align 4
  285. %indvar.next = add i64 %indvar, 1
  286. %exitcond = icmp eq i64 %indvar.next, 10000
  287. br i1 %exitcond, label %for.end, label %for.body
  288. for.end: ; preds = %for.body
  289. ret void
  290. ; CHECK-LABEL: @test13_pattern(
  291. ; CHECK-NEXT: entry:
  292. ; CHECK-NEXT: bitcast
  293. ; CHECK-NEXT: memset_pattern
  294. ; CHECK-NOT: store
  295. ; CHECK: ret void
  296. }
  297. ; PR9815 - This is a partial overlap case that cannot be safely transformed
  298. ; into a memcpy.
  299. @g_50 = global [7 x i32] [i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0], align 16
  300. define i32 @test14() nounwind {
  301. entry:
  302. br label %for.body
  303. for.body: ; preds = %for.inc, %for.body.lr.ph
  304. %tmp5 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
  305. %add = add nsw i32 %tmp5, 4
  306. %idxprom = sext i32 %add to i64
  307. %arrayidx = getelementptr inbounds [7 x i32], [7 x i32]* @g_50, i32 0, i64 %idxprom
  308. %tmp2 = load i32, i32* %arrayidx, align 4
  309. %add4 = add nsw i32 %tmp5, 5
  310. %idxprom5 = sext i32 %add4 to i64
  311. %arrayidx6 = getelementptr inbounds [7 x i32], [7 x i32]* @g_50, i32 0, i64 %idxprom5
  312. store i32 %tmp2, i32* %arrayidx6, align 4
  313. %inc = add nsw i32 %tmp5, 1
  314. %cmp = icmp slt i32 %inc, 2
  315. br i1 %cmp, label %for.body, label %for.end
  316. for.end: ; preds = %for.inc
  317. %tmp8 = load i32, i32* getelementptr inbounds ([7 x i32], [7 x i32]* @g_50, i32 0, i64 6), align 4
  318. ret i32 %tmp8
  319. ; CHECK-LABEL: @test14(
  320. ; CHECK: for.body:
  321. ; CHECK: load i32
  322. ; CHECK: store i32
  323. ; CHECK: br i1 %cmp
  324. }
  325. define void @PR14241(i32* %s, i64 %size) {
  326. ; Ensure that we don't form a memcpy for strided loops. Briefly, when we taught
  327. ; LoopIdiom about memmove and strided loops, this got miscompiled into a memcpy
  328. ; instead of a memmove. If we get the memmove transform back, this will catch
  329. ; regressions.
  330. ;
  331. ; CHECK-LABEL: @PR14241(
  332. entry:
  333. %end.idx = add i64 %size, -1
  334. %end.ptr = getelementptr inbounds i32, i32* %s, i64 %end.idx
  335. br label %while.body
  336. ; CHECK-NOT: memcpy
  337. ;
  338. ; FIXME: When we regain the ability to form a memmove here, this test should be
  339. ; reversed and turned into a positive assertion.
  340. ; CHECK-NOT: memmove
  341. while.body:
  342. %phi.ptr = phi i32* [ %s, %entry ], [ %next.ptr, %while.body ]
  343. %src.ptr = getelementptr inbounds i32, i32* %phi.ptr, i64 1
  344. %val = load i32, i32* %src.ptr, align 4
  345. ; CHECK: load
  346. %dst.ptr = getelementptr inbounds i32, i32* %phi.ptr, i64 0
  347. store i32 %val, i32* %dst.ptr, align 4
  348. ; CHECK: store
  349. %next.ptr = getelementptr inbounds i32, i32* %phi.ptr, i64 1
  350. %cmp = icmp eq i32* %next.ptr, %end.ptr
  351. br i1 %cmp, label %exit, label %while.body
  352. exit:
  353. ret void
  354. ; CHECK: ret void
  355. }