pre-load.ll 9.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391
  1. ; RUN: opt < %s -basicaa -gvn -enable-load-pre -S | FileCheck %s
  2. target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
  3. define i32 @test1(i32* %p, i1 %C) {
  4. ; CHECK-LABEL: @test1(
  5. block1:
  6. br i1 %C, label %block2, label %block3
  7. block2:
  8. br label %block4
  9. ; CHECK: block2:
  10. ; CHECK-NEXT: load i32, i32* %p
  11. block3:
  12. store i32 0, i32* %p
  13. br label %block4
  14. block4:
  15. %PRE = load i32, i32* %p
  16. ret i32 %PRE
  17. ; CHECK: block4:
  18. ; CHECK-NEXT: phi i32
  19. ; CHECK-NEXT: ret i32
  20. }
  21. ; This is a simple phi translation case.
  22. define i32 @test2(i32* %p, i32* %q, i1 %C) {
  23. ; CHECK-LABEL: @test2(
  24. block1:
  25. br i1 %C, label %block2, label %block3
  26. block2:
  27. br label %block4
  28. ; CHECK: block2:
  29. ; CHECK-NEXT: load i32, i32* %q
  30. block3:
  31. store i32 0, i32* %p
  32. br label %block4
  33. block4:
  34. %P2 = phi i32* [%p, %block3], [%q, %block2]
  35. %PRE = load i32, i32* %P2
  36. ret i32 %PRE
  37. ; CHECK: block4:
  38. ; CHECK-NEXT: phi i32 [
  39. ; CHECK-NOT: load
  40. ; CHECK: ret i32
  41. }
  42. ; This is a PRE case that requires phi translation through a GEP.
  43. define i32 @test3(i32* %p, i32* %q, i32** %Hack, i1 %C) {
  44. ; CHECK-LABEL: @test3(
  45. block1:
  46. %B = getelementptr i32, i32* %q, i32 1
  47. store i32* %B, i32** %Hack
  48. br i1 %C, label %block2, label %block3
  49. block2:
  50. br label %block4
  51. ; CHECK: block2:
  52. ; CHECK-NEXT: load i32, i32* %B
  53. block3:
  54. %A = getelementptr i32, i32* %p, i32 1
  55. store i32 0, i32* %A
  56. br label %block4
  57. block4:
  58. %P2 = phi i32* [%p, %block3], [%q, %block2]
  59. %P3 = getelementptr i32, i32* %P2, i32 1
  60. %PRE = load i32, i32* %P3
  61. ret i32 %PRE
  62. ; CHECK: block4:
  63. ; CHECK-NEXT: phi i32 [
  64. ; CHECK-NOT: load
  65. ; CHECK: ret i32
  66. }
  67. ;; Here the loaded address is available, but the computation is in 'block3'
  68. ;; which does not dominate 'block2'.
  69. define i32 @test4(i32* %p, i32* %q, i32** %Hack, i1 %C) {
  70. ; CHECK-LABEL: @test4(
  71. block1:
  72. br i1 %C, label %block2, label %block3
  73. block2:
  74. br label %block4
  75. ; CHECK: block2:
  76. ; CHECK: load i32, i32*
  77. ; CHECK: br label %block4
  78. block3:
  79. %B = getelementptr i32, i32* %q, i32 1
  80. store i32* %B, i32** %Hack
  81. %A = getelementptr i32, i32* %p, i32 1
  82. store i32 0, i32* %A
  83. br label %block4
  84. block4:
  85. %P2 = phi i32* [%p, %block3], [%q, %block2]
  86. %P3 = getelementptr i32, i32* %P2, i32 1
  87. %PRE = load i32, i32* %P3
  88. ret i32 %PRE
  89. ; CHECK: block4:
  90. ; CHECK-NEXT: phi i32 [
  91. ; CHECK-NOT: load
  92. ; CHECK: ret i32
  93. }
  94. ;void test5(int N, double *G) {
  95. ; int j;
  96. ; for (j = 0; j < N - 1; j++)
  97. ; G[j] = G[j] + G[j+1];
  98. ;}
  99. define void @test5(i32 %N, double* nocapture %G) nounwind ssp {
  100. ; CHECK-LABEL: @test5(
  101. entry:
  102. %0 = add i32 %N, -1
  103. %1 = icmp sgt i32 %0, 0
  104. br i1 %1, label %bb.nph, label %return
  105. bb.nph:
  106. %tmp = zext i32 %0 to i64
  107. br label %bb
  108. ; CHECK: bb.nph:
  109. ; CHECK: load double, double*
  110. ; CHECK: br label %bb
  111. bb:
  112. %indvar = phi i64 [ 0, %bb.nph ], [ %tmp6, %bb ]
  113. %tmp6 = add i64 %indvar, 1
  114. %scevgep = getelementptr double, double* %G, i64 %tmp6
  115. %scevgep7 = getelementptr double, double* %G, i64 %indvar
  116. %2 = load double, double* %scevgep7, align 8
  117. %3 = load double, double* %scevgep, align 8
  118. %4 = fadd double %2, %3
  119. store double %4, double* %scevgep7, align 8
  120. %exitcond = icmp eq i64 %tmp6, %tmp
  121. br i1 %exitcond, label %return, label %bb
  122. ; Should only be one load in the loop.
  123. ; CHECK: bb:
  124. ; CHECK: load double, double*
  125. ; CHECK-NOT: load double, double*
  126. ; CHECK: br i1 %exitcond
  127. return:
  128. ret void
  129. }
  130. ;void test6(int N, double *G) {
  131. ; int j;
  132. ; for (j = 0; j < N - 1; j++)
  133. ; G[j+1] = G[j] + G[j+1];
  134. ;}
  135. define void @test6(i32 %N, double* nocapture %G) nounwind ssp {
  136. ; CHECK-LABEL: @test6(
  137. entry:
  138. %0 = add i32 %N, -1
  139. %1 = icmp sgt i32 %0, 0
  140. br i1 %1, label %bb.nph, label %return
  141. bb.nph:
  142. %tmp = zext i32 %0 to i64
  143. br label %bb
  144. ; CHECK: bb.nph:
  145. ; CHECK: load double, double*
  146. ; CHECK: br label %bb
  147. bb:
  148. %indvar = phi i64 [ 0, %bb.nph ], [ %tmp6, %bb ]
  149. %tmp6 = add i64 %indvar, 1
  150. %scevgep = getelementptr double, double* %G, i64 %tmp6
  151. %scevgep7 = getelementptr double, double* %G, i64 %indvar
  152. %2 = load double, double* %scevgep7, align 8
  153. %3 = load double, double* %scevgep, align 8
  154. %4 = fadd double %2, %3
  155. store double %4, double* %scevgep, align 8
  156. %exitcond = icmp eq i64 %tmp6, %tmp
  157. br i1 %exitcond, label %return, label %bb
  158. ; Should only be one load in the loop.
  159. ; CHECK: bb:
  160. ; CHECK: load double, double*
  161. ; CHECK-NOT: load double, double*
  162. ; CHECK: br i1 %exitcond
  163. return:
  164. ret void
  165. }
  166. ;void test7(int N, double* G) {
  167. ; long j;
  168. ; G[1] = 1;
  169. ; for (j = 1; j < N - 1; j++)
  170. ; G[j+1] = G[j] + G[j+1];
  171. ;}
  172. ; This requires phi translation of the adds.
  173. define void @test7(i32 %N, double* nocapture %G) nounwind ssp {
  174. entry:
  175. %0 = getelementptr inbounds double, double* %G, i64 1
  176. store double 1.000000e+00, double* %0, align 8
  177. %1 = add i32 %N, -1
  178. %2 = icmp sgt i32 %1, 1
  179. br i1 %2, label %bb.nph, label %return
  180. bb.nph:
  181. %tmp = sext i32 %1 to i64
  182. %tmp7 = add i64 %tmp, -1
  183. br label %bb
  184. bb:
  185. %indvar = phi i64 [ 0, %bb.nph ], [ %tmp9, %bb ]
  186. %tmp8 = add i64 %indvar, 2
  187. %scevgep = getelementptr double, double* %G, i64 %tmp8
  188. %tmp9 = add i64 %indvar, 1
  189. %scevgep10 = getelementptr double, double* %G, i64 %tmp9
  190. %3 = load double, double* %scevgep10, align 8
  191. %4 = load double, double* %scevgep, align 8
  192. %5 = fadd double %3, %4
  193. store double %5, double* %scevgep, align 8
  194. %exitcond = icmp eq i64 %tmp9, %tmp7
  195. br i1 %exitcond, label %return, label %bb
  196. ; Should only be one load in the loop.
  197. ; CHECK: bb:
  198. ; CHECK: load double, double*
  199. ; CHECK-NOT: load double, double*
  200. ; CHECK: br i1 %exitcond
  201. return:
  202. ret void
  203. }
  204. ;; Here the loaded address isn't available in 'block2' at all, requiring a new
  205. ;; GEP to be inserted into it.
  206. define i32 @test8(i32* %p, i32* %q, i32** %Hack, i1 %C) {
  207. ; CHECK-LABEL: @test8(
  208. block1:
  209. br i1 %C, label %block2, label %block3
  210. block2:
  211. br label %block4
  212. ; CHECK: block2:
  213. ; CHECK: load i32, i32*
  214. ; CHECK: br label %block4
  215. block3:
  216. %A = getelementptr i32, i32* %p, i32 1
  217. store i32 0, i32* %A
  218. br label %block4
  219. block4:
  220. %P2 = phi i32* [%p, %block3], [%q, %block2]
  221. %P3 = getelementptr i32, i32* %P2, i32 1
  222. %PRE = load i32, i32* %P3
  223. ret i32 %PRE
  224. ; CHECK: block4:
  225. ; CHECK-NEXT: phi i32 [
  226. ; CHECK-NOT: load
  227. ; CHECK: ret i32
  228. }
  229. ;void test9(int N, double* G) {
  230. ; long j;
  231. ; for (j = 1; j < N - 1; j++)
  232. ; G[j+1] = G[j] + G[j+1];
  233. ;}
  234. ; This requires phi translation of the adds.
  235. define void @test9(i32 %N, double* nocapture %G) nounwind ssp {
  236. entry:
  237. add i32 0, 0
  238. %1 = add i32 %N, -1
  239. %2 = icmp sgt i32 %1, 1
  240. br i1 %2, label %bb.nph, label %return
  241. bb.nph:
  242. %tmp = sext i32 %1 to i64
  243. %tmp7 = add i64 %tmp, -1
  244. br label %bb
  245. ; CHECK: bb.nph:
  246. ; CHECK: load double, double*
  247. ; CHECK: br label %bb
  248. bb:
  249. %indvar = phi i64 [ 0, %bb.nph ], [ %tmp9, %bb ]
  250. %tmp8 = add i64 %indvar, 2
  251. %scevgep = getelementptr double, double* %G, i64 %tmp8
  252. %tmp9 = add i64 %indvar, 1
  253. %scevgep10 = getelementptr double, double* %G, i64 %tmp9
  254. %3 = load double, double* %scevgep10, align 8
  255. %4 = load double, double* %scevgep, align 8
  256. %5 = fadd double %3, %4
  257. store double %5, double* %scevgep, align 8
  258. %exitcond = icmp eq i64 %tmp9, %tmp7
  259. br i1 %exitcond, label %return, label %bb
  260. ; Should only be one load in the loop.
  261. ; CHECK: bb:
  262. ; CHECK: load double, double*
  263. ; CHECK-NOT: load double, double*
  264. ; CHECK: br i1 %exitcond
  265. return:
  266. ret void
  267. }
  268. ;void test10(int N, double* G) {
  269. ; long j;
  270. ; for (j = 1; j < N - 1; j++)
  271. ; G[j] = G[j] + G[j+1] + G[j-1];
  272. ;}
  273. ; PR5501
  274. define void @test10(i32 %N, double* nocapture %G) nounwind ssp {
  275. entry:
  276. %0 = add i32 %N, -1
  277. %1 = icmp sgt i32 %0, 1
  278. br i1 %1, label %bb.nph, label %return
  279. bb.nph:
  280. %tmp = sext i32 %0 to i64
  281. %tmp8 = add i64 %tmp, -1
  282. br label %bb
  283. ; CHECK: bb.nph:
  284. ; CHECK: load double, double*
  285. ; CHECK: load double, double*
  286. ; CHECK: br label %bb
  287. bb:
  288. %indvar = phi i64 [ 0, %bb.nph ], [ %tmp11, %bb ]
  289. %scevgep = getelementptr double, double* %G, i64 %indvar
  290. %tmp9 = add i64 %indvar, 2
  291. %scevgep10 = getelementptr double, double* %G, i64 %tmp9
  292. %tmp11 = add i64 %indvar, 1
  293. %scevgep12 = getelementptr double, double* %G, i64 %tmp11
  294. %2 = load double, double* %scevgep12, align 8
  295. %3 = load double, double* %scevgep10, align 8
  296. %4 = fadd double %2, %3
  297. %5 = load double, double* %scevgep, align 8
  298. %6 = fadd double %4, %5
  299. store double %6, double* %scevgep12, align 8
  300. %exitcond = icmp eq i64 %tmp11, %tmp8
  301. br i1 %exitcond, label %return, label %bb
  302. ; Should only be one load in the loop.
  303. ; CHECK: bb:
  304. ; CHECK: load double, double*
  305. ; CHECK-NOT: load double, double*
  306. ; CHECK: br i1 %exitcond
  307. return:
  308. ret void
  309. }
  310. ; Test critical edge splitting.
  311. define i32 @test11(i32* %p, i1 %C, i32 %N) {
  312. ; CHECK-LABEL: @test11(
  313. block1:
  314. br i1 %C, label %block2, label %block3
  315. block2:
  316. %cond = icmp sgt i32 %N, 1
  317. br i1 %cond, label %block4, label %block5
  318. ; CHECK: load i32, i32* %p
  319. ; CHECK-NEXT: br label %block4
  320. block3:
  321. store i32 0, i32* %p
  322. br label %block4
  323. block4:
  324. %PRE = load i32, i32* %p
  325. br label %block5
  326. block5:
  327. %ret = phi i32 [ 0, %block2 ], [ %PRE, %block4 ]
  328. ret i32 %ret
  329. ; CHECK: block4:
  330. ; CHECK-NEXT: phi i32
  331. }