global_alias.ll 39 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077
  1. ; RUN: opt < %s -O1 -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
  2. target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
  3. %struct.anon = type { [100 x i32], i32, [100 x i32] }
  4. %struct.anon.0 = type { [100 x [100 x i32]], i32, [100 x [100 x i32]] }
  5. @Foo = common global %struct.anon zeroinitializer, align 4
  6. @Bar = common global %struct.anon.0 zeroinitializer, align 4
  7. @PB = external global i32*
  8. @PA = external global i32*
  9. ;; === First, the tests that should always vectorize, wither statically or by adding run-time checks ===
  10. ; /// Different objects, positive induction, constant distance
  11. ; int noAlias01 (int a) {
  12. ; int i;
  13. ; for (i=0; i<SIZE; i++)
  14. ; Foo.A[i] = Foo.B[i] + a;
  15. ; return Foo.A[a];
  16. ; }
  17. ; CHECK-LABEL: define i32 @noAlias01(
  18. ; CHECK: add nsw <4 x i32>
  19. ; CHECK: ret
  20. define i32 @noAlias01(i32 %a) nounwind {
  21. entry:
  22. %a.addr = alloca i32, align 4
  23. %i = alloca i32, align 4
  24. store i32 %a, i32* %a.addr, align 4
  25. store i32 0, i32* %i, align 4
  26. br label %for.cond
  27. for.cond: ; preds = %for.inc, %entry
  28. %0 = load i32, i32* %i, align 4
  29. %cmp = icmp slt i32 %0, 100
  30. br i1 %cmp, label %for.body, label %for.end
  31. for.body: ; preds = %for.cond
  32. %1 = load i32, i32* %i, align 4
  33. %arrayidx = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %1
  34. %2 = load i32, i32* %arrayidx, align 4
  35. %3 = load i32, i32* %a.addr, align 4
  36. %add = add nsw i32 %2, %3
  37. %4 = load i32, i32* %i, align 4
  38. %arrayidx1 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %4
  39. store i32 %add, i32* %arrayidx1, align 4
  40. br label %for.inc
  41. for.inc: ; preds = %for.body
  42. %5 = load i32, i32* %i, align 4
  43. %inc = add nsw i32 %5, 1
  44. store i32 %inc, i32* %i, align 4
  45. br label %for.cond
  46. for.end: ; preds = %for.cond
  47. %6 = load i32, i32* %a.addr, align 4
  48. %arrayidx2 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
  49. %7 = load i32, i32* %arrayidx2, align 4
  50. ret i32 %7
  51. }
  52. ; /// Different objects, positive induction with widening slide
  53. ; int noAlias02 (int a) {
  54. ; int i;
  55. ; for (i=0; i<SIZE-10; i++)
  56. ; Foo.A[i] = Foo.B[i+10] + a;
  57. ; return Foo.A[a];
  58. ; }
  59. ; CHECK-LABEL: define i32 @noAlias02(
  60. ; CHECK: add nsw <4 x i32>
  61. ; CHECK: ret
  62. define i32 @noAlias02(i32 %a) {
  63. entry:
  64. %a.addr = alloca i32, align 4
  65. %i = alloca i32, align 4
  66. store i32 %a, i32* %a.addr, align 4
  67. store i32 0, i32* %i, align 4
  68. br label %for.cond
  69. for.cond: ; preds = %for.inc, %entry
  70. %0 = load i32, i32* %i, align 4
  71. %cmp = icmp slt i32 %0, 90
  72. br i1 %cmp, label %for.body, label %for.end
  73. for.body: ; preds = %for.cond
  74. %1 = load i32, i32* %i, align 4
  75. %add = add nsw i32 %1, 10
  76. %arrayidx = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %add
  77. %2 = load i32, i32* %arrayidx, align 4
  78. %3 = load i32, i32* %a.addr, align 4
  79. %add1 = add nsw i32 %2, %3
  80. %4 = load i32, i32* %i, align 4
  81. %arrayidx2 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %4
  82. store i32 %add1, i32* %arrayidx2, align 4
  83. br label %for.inc
  84. for.inc: ; preds = %for.body
  85. %5 = load i32, i32* %i, align 4
  86. %inc = add nsw i32 %5, 1
  87. store i32 %inc, i32* %i, align 4
  88. br label %for.cond
  89. for.end: ; preds = %for.cond
  90. %6 = load i32, i32* %a.addr, align 4
  91. %arrayidx3 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
  92. %7 = load i32, i32* %arrayidx3, align 4
  93. ret i32 %7
  94. }
  95. ; /// Different objects, positive induction with shortening slide
  96. ; int noAlias03 (int a) {
  97. ; int i;
  98. ; for (i=0; i<SIZE; i++)
  99. ; Foo.A[i+10] = Foo.B[i] + a;
  100. ; return Foo.A[a];
  101. ; }
  102. ; CHECK-LABEL: define i32 @noAlias03(
  103. ; CHECK: add nsw <4 x i32>
  104. ; CHECK: ret
  105. define i32 @noAlias03(i32 %a) {
  106. entry:
  107. %a.addr = alloca i32, align 4
  108. %i = alloca i32, align 4
  109. store i32 %a, i32* %a.addr, align 4
  110. store i32 0, i32* %i, align 4
  111. br label %for.cond
  112. for.cond: ; preds = %for.inc, %entry
  113. %0 = load i32, i32* %i, align 4
  114. %cmp = icmp slt i32 %0, 100
  115. br i1 %cmp, label %for.body, label %for.end
  116. for.body: ; preds = %for.cond
  117. %1 = load i32, i32* %i, align 4
  118. %arrayidx = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %1
  119. %2 = load i32, i32* %arrayidx, align 4
  120. %3 = load i32, i32* %a.addr, align 4
  121. %add = add nsw i32 %2, %3
  122. %4 = load i32, i32* %i, align 4
  123. %add1 = add nsw i32 %4, 10
  124. %arrayidx2 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %add1
  125. store i32 %add, i32* %arrayidx2, align 4
  126. br label %for.inc
  127. for.inc: ; preds = %for.body
  128. %5 = load i32, i32* %i, align 4
  129. %inc = add nsw i32 %5, 1
  130. store i32 %inc, i32* %i, align 4
  131. br label %for.cond
  132. for.end: ; preds = %for.cond
  133. %6 = load i32, i32* %a.addr, align 4
  134. %arrayidx3 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
  135. %7 = load i32, i32* %arrayidx3, align 4
  136. ret i32 %7
  137. }
  138. ; /// Pointer access, positive stride, run-time check added
  139. ; int noAlias04 (int a) {
  140. ; int i;
  141. ; for (i=0; i<SIZE; i++)
  142. ; *(PA+i) = *(PB+i) + a;
  143. ; return *(PA+a);
  144. ; }
  145. ; CHECK-LABEL: define i32 @noAlias04(
  146. ; CHECK-NOT: add nsw <4 x i32>
  147. ; CHECK: ret
  148. ;
  149. ; TODO: This test vectorizes (with run-time check) on real targets with -O3)
  150. ; Check why it's not being vectorized even when forcing vectorization
  151. define i32 @noAlias04(i32 %a) #0 {
  152. entry:
  153. %a.addr = alloca i32, align 4
  154. %i = alloca i32, align 4
  155. store i32 %a, i32* %a.addr, align 4
  156. store i32 0, i32* %i, align 4
  157. br label %for.cond
  158. for.cond: ; preds = %for.inc, %entry
  159. %0 = load i32, i32* %i, align 4
  160. %cmp = icmp slt i32 %0, 100
  161. br i1 %cmp, label %for.body, label %for.end
  162. for.body: ; preds = %for.cond
  163. %1 = load i32*, i32** @PB, align 4
  164. %2 = load i32, i32* %i, align 4
  165. %add.ptr = getelementptr inbounds i32, i32* %1, i32 %2
  166. %3 = load i32, i32* %add.ptr, align 4
  167. %4 = load i32, i32* %a.addr, align 4
  168. %add = add nsw i32 %3, %4
  169. %5 = load i32*, i32** @PA, align 4
  170. %6 = load i32, i32* %i, align 4
  171. %add.ptr1 = getelementptr inbounds i32, i32* %5, i32 %6
  172. store i32 %add, i32* %add.ptr1, align 4
  173. br label %for.inc
  174. for.inc: ; preds = %for.body
  175. %7 = load i32, i32* %i, align 4
  176. %inc = add nsw i32 %7, 1
  177. store i32 %inc, i32* %i, align 4
  178. br label %for.cond
  179. for.end: ; preds = %for.cond
  180. %8 = load i32*, i32** @PA, align 4
  181. %9 = load i32, i32* %a.addr, align 4
  182. %add.ptr2 = getelementptr inbounds i32, i32* %8, i32 %9
  183. %10 = load i32, i32* %add.ptr2, align 4
  184. ret i32 %10
  185. }
  186. ; /// Different objects, positive induction, multi-array
  187. ; int noAlias05 (int a) {
  188. ; int i, N=10;
  189. ; for (i=0; i<SIZE; i++)
  190. ; Bar.A[N][i] = Bar.B[N][i] + a;
  191. ; return Bar.A[N][a];
  192. ; }
  193. ; CHECK-LABEL: define i32 @noAlias05(
  194. ; CHECK: add nsw <4 x i32>
  195. ; CHECK: ret
  196. define i32 @noAlias05(i32 %a) #0 {
  197. entry:
  198. %a.addr = alloca i32, align 4
  199. %i = alloca i32, align 4
  200. %N = alloca i32, align 4
  201. store i32 %a, i32* %a.addr, align 4
  202. store i32 10, i32* %N, align 4
  203. store i32 0, i32* %i, align 4
  204. br label %for.cond
  205. for.cond: ; preds = %for.inc, %entry
  206. %0 = load i32, i32* %i, align 4
  207. %cmp = icmp slt i32 %0, 100
  208. br i1 %cmp, label %for.body, label %for.end
  209. for.body: ; preds = %for.cond
  210. %1 = load i32, i32* %i, align 4
  211. %2 = load i32, i32* %N, align 4
  212. %arrayidx = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0, %struct.anon.0* @Bar, i32 0, i32 2), i32 0, i32 %2
  213. %arrayidx1 = getelementptr inbounds [100 x i32], [100 x i32]* %arrayidx, i32 0, i32 %1
  214. %3 = load i32, i32* %arrayidx1, align 4
  215. %4 = load i32, i32* %a.addr, align 4
  216. %add = add nsw i32 %3, %4
  217. %5 = load i32, i32* %i, align 4
  218. %6 = load i32, i32* %N, align 4
  219. %arrayidx2 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0, %struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %6
  220. %arrayidx3 = getelementptr inbounds [100 x i32], [100 x i32]* %arrayidx2, i32 0, i32 %5
  221. store i32 %add, i32* %arrayidx3, align 4
  222. br label %for.inc
  223. for.inc: ; preds = %for.body
  224. %7 = load i32, i32* %i, align 4
  225. %inc = add nsw i32 %7, 1
  226. store i32 %inc, i32* %i, align 4
  227. br label %for.cond
  228. for.end: ; preds = %for.cond
  229. %8 = load i32, i32* %a.addr, align 4
  230. %9 = load i32, i32* %N, align 4
  231. %arrayidx4 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0, %struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %9
  232. %arrayidx5 = getelementptr inbounds [100 x i32], [100 x i32]* %arrayidx4, i32 0, i32 %8
  233. %10 = load i32, i32* %arrayidx5, align 4
  234. ret i32 %10
  235. }
  236. ; /// Same objects, positive induction, multi-array, different sub-elements
  237. ; int noAlias06 (int a) {
  238. ; int i, N=10;
  239. ; for (i=0; i<SIZE; i++)
  240. ; Bar.A[N][i] = Bar.A[N+1][i] + a;
  241. ; return Bar.A[N][a];
  242. ; }
  243. ; CHECK-LABEL: define i32 @noAlias06(
  244. ; CHECK: add nsw <4 x i32>
  245. ; CHECK: ret
  246. define i32 @noAlias06(i32 %a) #0 {
  247. entry:
  248. %a.addr = alloca i32, align 4
  249. %i = alloca i32, align 4
  250. %N = alloca i32, align 4
  251. store i32 %a, i32* %a.addr, align 4
  252. store i32 10, i32* %N, align 4
  253. store i32 0, i32* %i, align 4
  254. br label %for.cond
  255. for.cond: ; preds = %for.inc, %entry
  256. %0 = load i32, i32* %i, align 4
  257. %cmp = icmp slt i32 %0, 100
  258. br i1 %cmp, label %for.body, label %for.end
  259. for.body: ; preds = %for.cond
  260. %1 = load i32, i32* %i, align 4
  261. %2 = load i32, i32* %N, align 4
  262. %add = add nsw i32 %2, 1
  263. %arrayidx = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0, %struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %add
  264. %arrayidx1 = getelementptr inbounds [100 x i32], [100 x i32]* %arrayidx, i32 0, i32 %1
  265. %3 = load i32, i32* %arrayidx1, align 4
  266. %4 = load i32, i32* %a.addr, align 4
  267. %add2 = add nsw i32 %3, %4
  268. %5 = load i32, i32* %i, align 4
  269. %6 = load i32, i32* %N, align 4
  270. %arrayidx3 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0, %struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %6
  271. %arrayidx4 = getelementptr inbounds [100 x i32], [100 x i32]* %arrayidx3, i32 0, i32 %5
  272. store i32 %add2, i32* %arrayidx4, align 4
  273. br label %for.inc
  274. for.inc: ; preds = %for.body
  275. %7 = load i32, i32* %i, align 4
  276. %inc = add nsw i32 %7, 1
  277. store i32 %inc, i32* %i, align 4
  278. br label %for.cond
  279. for.end: ; preds = %for.cond
  280. %8 = load i32, i32* %a.addr, align 4
  281. %9 = load i32, i32* %N, align 4
  282. %arrayidx5 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0, %struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %9
  283. %arrayidx6 = getelementptr inbounds [100 x i32], [100 x i32]* %arrayidx5, i32 0, i32 %8
  284. %10 = load i32, i32* %arrayidx6, align 4
  285. ret i32 %10
  286. }
  287. ; /// Different objects, negative induction, constant distance
  288. ; int noAlias07 (int a) {
  289. ; int i;
  290. ; for (i=0; i<SIZE; i++)
  291. ; Foo.A[SIZE-i-1] = Foo.B[SIZE-i-1] + a;
  292. ; return Foo.A[a];
  293. ; }
  294. ; CHECK-LABEL: define i32 @noAlias07(
  295. ; CHECK: store <4 x i32>
  296. ; CHECK: ret
  297. define i32 @noAlias07(i32 %a) #0 {
  298. entry:
  299. %a.addr = alloca i32, align 4
  300. %i = alloca i32, align 4
  301. store i32 %a, i32* %a.addr, align 4
  302. store i32 0, i32* %i, align 4
  303. br label %for.cond
  304. for.cond: ; preds = %for.inc, %entry
  305. %0 = load i32, i32* %i, align 4
  306. %cmp = icmp slt i32 %0, 100
  307. br i1 %cmp, label %for.body, label %for.end
  308. for.body: ; preds = %for.cond
  309. %1 = load i32, i32* %i, align 4
  310. %sub = sub nsw i32 100, %1
  311. %sub1 = sub nsw i32 %sub, 1
  312. %arrayidx = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1
  313. %2 = load i32, i32* %arrayidx, align 4
  314. %3 = load i32, i32* %a.addr, align 4
  315. %add = add nsw i32 %2, %3
  316. %4 = load i32, i32* %i, align 4
  317. %sub2 = sub nsw i32 100, %4
  318. %sub3 = sub nsw i32 %sub2, 1
  319. %arrayidx4 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %sub3
  320. store i32 %add, i32* %arrayidx4, align 4
  321. br label %for.inc
  322. for.inc: ; preds = %for.body
  323. %5 = load i32, i32* %i, align 4
  324. %inc = add nsw i32 %5, 1
  325. store i32 %inc, i32* %i, align 4
  326. br label %for.cond
  327. for.end: ; preds = %for.cond
  328. %6 = load i32, i32* %a.addr, align 4
  329. %arrayidx5 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
  330. %7 = load i32, i32* %arrayidx5, align 4
  331. ret i32 %7
  332. }
  333. ; /// Different objects, negative induction, shortening slide
  334. ; int noAlias08 (int a) {
  335. ; int i;
  336. ; for (i=0; i<SIZE-10; i++)
  337. ; Foo.A[SIZE-i-1] = Foo.B[SIZE-i-10] + a;
  338. ; return Foo.A[a];
  339. ; }
  340. ; CHECK-LABEL: define i32 @noAlias08(
  341. ; CHECK: sub <4 x i32>
  342. ; CHECK: ret
  343. define i32 @noAlias08(i32 %a) #0 {
  344. entry:
  345. %a.addr = alloca i32, align 4
  346. %i = alloca i32, align 4
  347. store i32 %a, i32* %a.addr, align 4
  348. store i32 0, i32* %i, align 4
  349. br label %for.cond
  350. for.cond: ; preds = %for.inc, %entry
  351. %0 = load i32, i32* %i, align 4
  352. %cmp = icmp slt i32 %0, 90
  353. br i1 %cmp, label %for.body, label %for.end
  354. for.body: ; preds = %for.cond
  355. %1 = load i32, i32* %i, align 4
  356. %sub = sub nsw i32 100, %1
  357. %sub1 = sub nsw i32 %sub, 10
  358. %arrayidx = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1
  359. %2 = load i32, i32* %arrayidx, align 4
  360. %3 = load i32, i32* %a.addr, align 4
  361. %add = add nsw i32 %2, %3
  362. %4 = load i32, i32* %i, align 4
  363. %sub2 = sub nsw i32 100, %4
  364. %sub3 = sub nsw i32 %sub2, 1
  365. %arrayidx4 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %sub3
  366. store i32 %add, i32* %arrayidx4, align 4
  367. br label %for.inc
  368. for.inc: ; preds = %for.body
  369. %5 = load i32, i32* %i, align 4
  370. %inc = add nsw i32 %5, 1
  371. store i32 %inc, i32* %i, align 4
  372. br label %for.cond
  373. for.end: ; preds = %for.cond
  374. %6 = load i32, i32* %a.addr, align 4
  375. %arrayidx5 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
  376. %7 = load i32, i32* %arrayidx5, align 4
  377. ret i32 %7
  378. }
  379. ; /// Different objects, negative induction, widening slide
  380. ; int noAlias09 (int a) {
  381. ; int i;
  382. ; for (i=0; i<SIZE; i++)
  383. ; Foo.A[SIZE-i-10] = Foo.B[SIZE-i-1] + a;
  384. ; return Foo.A[a];
  385. ; }
  386. ; CHECK-LABEL: define i32 @noAlias09(
  387. ; CHECK: sub <4 x i32>
  388. ; CHECK: ret
  389. define i32 @noAlias09(i32 %a) #0 {
  390. entry:
  391. %a.addr = alloca i32, align 4
  392. %i = alloca i32, align 4
  393. store i32 %a, i32* %a.addr, align 4
  394. store i32 0, i32* %i, align 4
  395. br label %for.cond
  396. for.cond: ; preds = %for.inc, %entry
  397. %0 = load i32, i32* %i, align 4
  398. %cmp = icmp slt i32 %0, 100
  399. br i1 %cmp, label %for.body, label %for.end
  400. for.body: ; preds = %for.cond
  401. %1 = load i32, i32* %i, align 4
  402. %sub = sub nsw i32 100, %1
  403. %sub1 = sub nsw i32 %sub, 1
  404. %arrayidx = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1
  405. %2 = load i32, i32* %arrayidx, align 4
  406. %3 = load i32, i32* %a.addr, align 4
  407. %add = add nsw i32 %2, %3
  408. %4 = load i32, i32* %i, align 4
  409. %sub2 = sub nsw i32 100, %4
  410. %sub3 = sub nsw i32 %sub2, 10
  411. %arrayidx4 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %sub3
  412. store i32 %add, i32* %arrayidx4, align 4
  413. br label %for.inc
  414. for.inc: ; preds = %for.body
  415. %5 = load i32, i32* %i, align 4
  416. %inc = add nsw i32 %5, 1
  417. store i32 %inc, i32* %i, align 4
  418. br label %for.cond
  419. for.end: ; preds = %for.cond
  420. %6 = load i32, i32* %a.addr, align 4
  421. %arrayidx5 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
  422. %7 = load i32, i32* %arrayidx5, align 4
  423. ret i32 %7
  424. }
  425. ; /// Pointer access, negative stride, run-time check added
  426. ; int noAlias10 (int a) {
  427. ; int i;
  428. ; for (i=0; i<SIZE; i++)
  429. ; *(PA+SIZE-i-1) = *(PB+SIZE-i-1) + a;
  430. ; return *(PA+a);
  431. ; }
  432. ; CHECK-LABEL: define i32 @noAlias10(
  433. ; CHECK-NOT: sub {{.*}} <4 x i32>
  434. ; CHECK: ret
  435. ;
  436. ; TODO: This test vectorizes (with run-time check) on real targets with -O3)
  437. ; Check why it's not being vectorized even when forcing vectorization
  438. define i32 @noAlias10(i32 %a) #0 {
  439. entry:
  440. %a.addr = alloca i32, align 4
  441. %i = alloca i32, align 4
  442. store i32 %a, i32* %a.addr, align 4
  443. store i32 0, i32* %i, align 4
  444. br label %for.cond
  445. for.cond: ; preds = %for.inc, %entry
  446. %0 = load i32, i32* %i, align 4
  447. %cmp = icmp slt i32 %0, 100
  448. br i1 %cmp, label %for.body, label %for.end
  449. for.body: ; preds = %for.cond
  450. %1 = load i32*, i32** @PB, align 4
  451. %add.ptr = getelementptr inbounds i32, i32* %1, i32 100
  452. %2 = load i32, i32* %i, align 4
  453. %idx.neg = sub i32 0, %2
  454. %add.ptr1 = getelementptr inbounds i32, i32* %add.ptr, i32 %idx.neg
  455. %add.ptr2 = getelementptr inbounds i32, i32* %add.ptr1, i32 -1
  456. %3 = load i32, i32* %add.ptr2, align 4
  457. %4 = load i32, i32* %a.addr, align 4
  458. %add = add nsw i32 %3, %4
  459. %5 = load i32*, i32** @PA, align 4
  460. %add.ptr3 = getelementptr inbounds i32, i32* %5, i32 100
  461. %6 = load i32, i32* %i, align 4
  462. %idx.neg4 = sub i32 0, %6
  463. %add.ptr5 = getelementptr inbounds i32, i32* %add.ptr3, i32 %idx.neg4
  464. %add.ptr6 = getelementptr inbounds i32, i32* %add.ptr5, i32 -1
  465. store i32 %add, i32* %add.ptr6, align 4
  466. br label %for.inc
  467. for.inc: ; preds = %for.body
  468. %7 = load i32, i32* %i, align 4
  469. %inc = add nsw i32 %7, 1
  470. store i32 %inc, i32* %i, align 4
  471. br label %for.cond
  472. for.end: ; preds = %for.cond
  473. %8 = load i32*, i32** @PA, align 4
  474. %9 = load i32, i32* %a.addr, align 4
  475. %add.ptr7 = getelementptr inbounds i32, i32* %8, i32 %9
  476. %10 = load i32, i32* %add.ptr7, align 4
  477. ret i32 %10
  478. }
  479. ; /// Different objects, negative induction, multi-array
  480. ; int noAlias11 (int a) {
  481. ; int i, N=10;
  482. ; for (i=0; i<SIZE; i++)
  483. ; Bar.A[N][SIZE-i-1] = Bar.B[N][SIZE-i-1] + a;
  484. ; return Bar.A[N][a];
  485. ; }
  486. ; CHECK-LABEL: define i32 @noAlias11(
  487. ; CHECK: store <4 x i32>
  488. ; CHECK: ret
  489. define i32 @noAlias11(i32 %a) #0 {
  490. entry:
  491. %a.addr = alloca i32, align 4
  492. %i = alloca i32, align 4
  493. %N = alloca i32, align 4
  494. store i32 %a, i32* %a.addr, align 4
  495. store i32 10, i32* %N, align 4
  496. store i32 0, i32* %i, align 4
  497. br label %for.cond
  498. for.cond: ; preds = %for.inc, %entry
  499. %0 = load i32, i32* %i, align 4
  500. %cmp = icmp slt i32 %0, 100
  501. br i1 %cmp, label %for.body, label %for.end
  502. for.body: ; preds = %for.cond
  503. %1 = load i32, i32* %i, align 4
  504. %sub = sub nsw i32 100, %1
  505. %sub1 = sub nsw i32 %sub, 1
  506. %2 = load i32, i32* %N, align 4
  507. %arrayidx = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0, %struct.anon.0* @Bar, i32 0, i32 2), i32 0, i32 %2
  508. %arrayidx2 = getelementptr inbounds [100 x i32], [100 x i32]* %arrayidx, i32 0, i32 %sub1
  509. %3 = load i32, i32* %arrayidx2, align 4
  510. %4 = load i32, i32* %a.addr, align 4
  511. %add = add nsw i32 %3, %4
  512. %5 = load i32, i32* %i, align 4
  513. %sub3 = sub nsw i32 100, %5
  514. %sub4 = sub nsw i32 %sub3, 1
  515. %6 = load i32, i32* %N, align 4
  516. %arrayidx5 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0, %struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %6
  517. %arrayidx6 = getelementptr inbounds [100 x i32], [100 x i32]* %arrayidx5, i32 0, i32 %sub4
  518. store i32 %add, i32* %arrayidx6, align 4
  519. br label %for.inc
  520. for.inc: ; preds = %for.body
  521. %7 = load i32, i32* %i, align 4
  522. %inc = add nsw i32 %7, 1
  523. store i32 %inc, i32* %i, align 4
  524. br label %for.cond
  525. for.end: ; preds = %for.cond
  526. %8 = load i32, i32* %a.addr, align 4
  527. %9 = load i32, i32* %N, align 4
  528. %arrayidx7 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0, %struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %9
  529. %arrayidx8 = getelementptr inbounds [100 x i32], [100 x i32]* %arrayidx7, i32 0, i32 %8
  530. %10 = load i32, i32* %arrayidx8, align 4
  531. ret i32 %10
  532. }
  533. ; /// Same objects, negative induction, multi-array, different sub-elements
  534. ; int noAlias12 (int a) {
  535. ; int i, N=10;
  536. ; for (i=0; i<SIZE; i++)
  537. ; Bar.A[N][SIZE-i-1] = Bar.A[N+1][SIZE-i-1] + a;
  538. ; return Bar.A[N][a];
  539. ; }
  540. ; CHECK-LABEL: define i32 @noAlias12(
  541. ; CHECK: store <4 x i32>
  542. ; CHECK: ret
  543. define i32 @noAlias12(i32 %a) #0 {
  544. entry:
  545. %a.addr = alloca i32, align 4
  546. %i = alloca i32, align 4
  547. %N = alloca i32, align 4
  548. store i32 %a, i32* %a.addr, align 4
  549. store i32 10, i32* %N, align 4
  550. store i32 0, i32* %i, align 4
  551. br label %for.cond
  552. for.cond: ; preds = %for.inc, %entry
  553. %0 = load i32, i32* %i, align 4
  554. %cmp = icmp slt i32 %0, 100
  555. br i1 %cmp, label %for.body, label %for.end
  556. for.body: ; preds = %for.cond
  557. %1 = load i32, i32* %i, align 4
  558. %sub = sub nsw i32 100, %1
  559. %sub1 = sub nsw i32 %sub, 1
  560. %2 = load i32, i32* %N, align 4
  561. %add = add nsw i32 %2, 1
  562. %arrayidx = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0, %struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %add
  563. %arrayidx2 = getelementptr inbounds [100 x i32], [100 x i32]* %arrayidx, i32 0, i32 %sub1
  564. %3 = load i32, i32* %arrayidx2, align 4
  565. %4 = load i32, i32* %a.addr, align 4
  566. %add3 = add nsw i32 %3, %4
  567. %5 = load i32, i32* %i, align 4
  568. %sub4 = sub nsw i32 100, %5
  569. %sub5 = sub nsw i32 %sub4, 1
  570. %6 = load i32, i32* %N, align 4
  571. %arrayidx6 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0, %struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %6
  572. %arrayidx7 = getelementptr inbounds [100 x i32], [100 x i32]* %arrayidx6, i32 0, i32 %sub5
  573. store i32 %add3, i32* %arrayidx7, align 4
  574. br label %for.inc
  575. for.inc: ; preds = %for.body
  576. %7 = load i32, i32* %i, align 4
  577. %inc = add nsw i32 %7, 1
  578. store i32 %inc, i32* %i, align 4
  579. br label %for.cond
  580. for.end: ; preds = %for.cond
  581. %8 = load i32, i32* %a.addr, align 4
  582. %9 = load i32, i32* %N, align 4
  583. %arrayidx8 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0, %struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %9
  584. %arrayidx9 = getelementptr inbounds [100 x i32], [100 x i32]* %arrayidx8, i32 0, i32 %8
  585. %10 = load i32, i32* %arrayidx9, align 4
  586. ret i32 %10
  587. }
  588. ; /// Same objects, positive induction, constant distance, just enough for vector size
  589. ; int noAlias13 (int a) {
  590. ; int i;
  591. ; for (i=0; i<SIZE; i++)
  592. ; Foo.A[i] = Foo.A[i+4] + a;
  593. ; return Foo.A[a];
  594. ; }
  595. ; CHECK-LABEL: define i32 @noAlias13(
  596. ; CHECK: add nsw <4 x i32>
  597. ; CHECK: ret
  598. define i32 @noAlias13(i32 %a) #0 {
  599. entry:
  600. %a.addr = alloca i32, align 4
  601. %i = alloca i32, align 4
  602. store i32 %a, i32* %a.addr, align 4
  603. store i32 0, i32* %i, align 4
  604. br label %for.cond
  605. for.cond: ; preds = %for.inc, %entry
  606. %0 = load i32, i32* %i, align 4
  607. %cmp = icmp slt i32 %0, 100
  608. br i1 %cmp, label %for.body, label %for.end
  609. for.body: ; preds = %for.cond
  610. %1 = load i32, i32* %i, align 4
  611. %add = add nsw i32 %1, 4
  612. %arrayidx = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %add
  613. %2 = load i32, i32* %arrayidx, align 4
  614. %3 = load i32, i32* %a.addr, align 4
  615. %add1 = add nsw i32 %2, %3
  616. %4 = load i32, i32* %i, align 4
  617. %arrayidx2 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %4
  618. store i32 %add1, i32* %arrayidx2, align 4
  619. br label %for.inc
  620. for.inc: ; preds = %for.body
  621. %5 = load i32, i32* %i, align 4
  622. %inc = add nsw i32 %5, 1
  623. store i32 %inc, i32* %i, align 4
  624. br label %for.cond
  625. for.end: ; preds = %for.cond
  626. %6 = load i32, i32* %a.addr, align 4
  627. %arrayidx3 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
  628. %7 = load i32, i32* %arrayidx3, align 4
  629. ret i32 %7
  630. }
  631. ; /// Same objects, negative induction, constant distance, just enough for vector size
  632. ; int noAlias14 (int a) {
  633. ; int i;
  634. ; for (i=0; i<SIZE; i++)
  635. ; Foo.A[SIZE-i-1] = Foo.A[SIZE-i-5] + a;
  636. ; return Foo.A[a];
  637. ; }
  638. ; CHECK-LABEL: define i32 @noAlias14(
  639. ; CHECK: sub <4 x i32>
  640. ; CHECK: ret
  641. define i32 @noAlias14(i32 %a) #0 {
  642. entry:
  643. %a.addr = alloca i32, align 4
  644. %i = alloca i32, align 4
  645. store i32 %a, i32* %a.addr, align 4
  646. store i32 0, i32* %i, align 4
  647. br label %for.cond
  648. for.cond: ; preds = %for.inc, %entry
  649. %0 = load i32, i32* %i, align 4
  650. %cmp = icmp slt i32 %0, 100
  651. br i1 %cmp, label %for.body, label %for.end
  652. for.body: ; preds = %for.cond
  653. %1 = load i32, i32* %i, align 4
  654. %sub = sub nsw i32 100, %1
  655. %sub1 = sub nsw i32 %sub, 5
  656. %arrayidx = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %sub1
  657. %2 = load i32, i32* %arrayidx, align 4
  658. %3 = load i32, i32* %a.addr, align 4
  659. %add = add nsw i32 %2, %3
  660. %4 = load i32, i32* %i, align 4
  661. %sub2 = sub nsw i32 100, %4
  662. %sub3 = sub nsw i32 %sub2, 1
  663. %arrayidx4 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %sub3
  664. store i32 %add, i32* %arrayidx4, align 4
  665. br label %for.inc
  666. for.inc: ; preds = %for.body
  667. %5 = load i32, i32* %i, align 4
  668. %inc = add nsw i32 %5, 1
  669. store i32 %inc, i32* %i, align 4
  670. br label %for.cond
  671. for.end: ; preds = %for.cond
  672. %6 = load i32, i32* %a.addr, align 4
  673. %arrayidx5 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
  674. %7 = load i32, i32* %arrayidx5, align 4
  675. ret i32 %7
  676. }
  677. ;; === Now, the tests that we could vectorize with induction changes or run-time checks ===
  678. ; /// Different objects, swapped induction, alias at the end
  679. ; int mayAlias01 (int a) {
  680. ; int i;
  681. ; for (i=0; i<SIZE; i++)
  682. ; Foo.A[i] = Foo.B[SIZE-i-1] + a;
  683. ; return Foo.A[a];
  684. ; }
  685. ; CHECK-LABEL: define i32 @mayAlias01(
  686. ; CHECK-NOT: add nsw <4 x i32>
  687. ; CHECK: ret
  688. define i32 @mayAlias01(i32 %a) nounwind {
  689. entry:
  690. %a.addr = alloca i32, align 4
  691. %i = alloca i32, align 4
  692. store i32 %a, i32* %a.addr, align 4
  693. store i32 0, i32* %i, align 4
  694. br label %for.cond
  695. for.cond: ; preds = %for.inc, %entry
  696. %0 = load i32, i32* %i, align 4
  697. %cmp = icmp slt i32 %0, 100
  698. br i1 %cmp, label %for.body, label %for.end
  699. for.body: ; preds = %for.cond
  700. %1 = load i32, i32* %i, align 4
  701. %sub = sub nsw i32 100, %1
  702. %sub1 = sub nsw i32 %sub, 1
  703. %arrayidx = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1
  704. %2 = load i32, i32* %arrayidx, align 4
  705. %3 = load i32, i32* %a.addr, align 4
  706. %add = add nsw i32 %2, %3
  707. %4 = load i32, i32* %i, align 4
  708. %arrayidx2 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %4
  709. store i32 %add, i32* %arrayidx2, align 4
  710. br label %for.inc
  711. for.inc: ; preds = %for.body
  712. %5 = load i32, i32* %i, align 4
  713. %inc = add nsw i32 %5, 1
  714. store i32 %inc, i32* %i, align 4
  715. br label %for.cond
  716. for.end: ; preds = %for.cond
  717. %6 = load i32, i32* %a.addr, align 4
  718. %arrayidx3 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
  719. %7 = load i32, i32* %arrayidx3, align 4
  720. ret i32 %7
  721. }
  722. ; /// Different objects, swapped induction, alias at the beginning
  723. ; int mayAlias02 (int a) {
  724. ; int i;
  725. ; for (i=0; i<SIZE; i++)
  726. ; Foo.A[SIZE-i-1] = Foo.B[i] + a;
  727. ; return Foo.A[a];
  728. ; }
  729. ; CHECK-LABEL: define i32 @mayAlias02(
  730. ; CHECK-NOT: add nsw <4 x i32>
  731. ; CHECK: ret
  732. define i32 @mayAlias02(i32 %a) nounwind {
  733. entry:
  734. %a.addr = alloca i32, align 4
  735. %i = alloca i32, align 4
  736. store i32 %a, i32* %a.addr, align 4
  737. store i32 0, i32* %i, align 4
  738. br label %for.cond
  739. for.cond: ; preds = %for.inc, %entry
  740. %0 = load i32, i32* %i, align 4
  741. %cmp = icmp slt i32 %0, 100
  742. br i1 %cmp, label %for.body, label %for.end
  743. for.body: ; preds = %for.cond
  744. %1 = load i32, i32* %i, align 4
  745. %arrayidx = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %1
  746. %2 = load i32, i32* %arrayidx, align 4
  747. %3 = load i32, i32* %a.addr, align 4
  748. %add = add nsw i32 %2, %3
  749. %4 = load i32, i32* %i, align 4
  750. %sub = sub nsw i32 100, %4
  751. %sub1 = sub nsw i32 %sub, 1
  752. %arrayidx2 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %sub1
  753. store i32 %add, i32* %arrayidx2, align 4
  754. br label %for.inc
  755. for.inc: ; preds = %for.body
  756. %5 = load i32, i32* %i, align 4
  757. %inc = add nsw i32 %5, 1
  758. store i32 %inc, i32* %i, align 4
  759. br label %for.cond
  760. for.end: ; preds = %for.cond
  761. %6 = load i32, i32* %a.addr, align 4
  762. %arrayidx3 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
  763. %7 = load i32, i32* %arrayidx3, align 4
  764. ret i32 %7
  765. }
  766. ; /// Pointer access, run-time check added
  767. ; int mayAlias03 (int a) {
  768. ; int i;
  769. ; for (i=0; i<SIZE; i++)
  770. ; *(PA+i) = *(PB+SIZE-i-1) + a;
  771. ; return *(PA+a);
  772. ; }
  773. ; CHECK-LABEL: define i32 @mayAlias03(
  774. ; CHECK-NOT: add nsw <4 x i32>
  775. ; CHECK: ret
  776. define i32 @mayAlias03(i32 %a) nounwind {
  777. entry:
  778. %a.addr = alloca i32, align 4
  779. %i = alloca i32, align 4
  780. store i32 %a, i32* %a.addr, align 4
  781. store i32 0, i32* %i, align 4
  782. br label %for.cond
  783. for.cond: ; preds = %for.inc, %entry
  784. %0 = load i32, i32* %i, align 4
  785. %cmp = icmp slt i32 %0, 100
  786. br i1 %cmp, label %for.body, label %for.end
  787. for.body: ; preds = %for.cond
  788. %1 = load i32*, i32** @PB, align 4
  789. %add.ptr = getelementptr inbounds i32, i32* %1, i32 100
  790. %2 = load i32, i32* %i, align 4
  791. %idx.neg = sub i32 0, %2
  792. %add.ptr1 = getelementptr inbounds i32, i32* %add.ptr, i32 %idx.neg
  793. %add.ptr2 = getelementptr inbounds i32, i32* %add.ptr1, i32 -1
  794. %3 = load i32, i32* %add.ptr2, align 4
  795. %4 = load i32, i32* %a.addr, align 4
  796. %add = add nsw i32 %3, %4
  797. %5 = load i32*, i32** @PA, align 4
  798. %6 = load i32, i32* %i, align 4
  799. %add.ptr3 = getelementptr inbounds i32, i32* %5, i32 %6
  800. store i32 %add, i32* %add.ptr3, align 4
  801. br label %for.inc
  802. for.inc: ; preds = %for.body
  803. %7 = load i32, i32* %i, align 4
  804. %inc = add nsw i32 %7, 1
  805. store i32 %inc, i32* %i, align 4
  806. br label %for.cond
  807. for.end: ; preds = %for.cond
  808. %8 = load i32*, i32** @PA, align 4
  809. %9 = load i32, i32* %a.addr, align 4
  810. %add.ptr4 = getelementptr inbounds i32, i32* %8, i32 %9
  811. %10 = load i32, i32* %add.ptr4, align 4
  812. ret i32 %10
  813. }
  814. ;; === Finally, the tests that should only vectorize with care (or if we ignore undefined behaviour at all) ===
  815. ; int mustAlias01 (int a) {
  816. ; int i;
  817. ; for (i=0; i<SIZE; i++)
  818. ; Foo.A[i+10] = Foo.B[SIZE-i-1] + a;
  819. ; return Foo.A[a];
  820. ; }
  821. ; CHECK-LABEL: define i32 @mustAlias01(
  822. ; CHECK-NOT: add nsw <4 x i32>
  823. ; CHECK: ret
  824. define i32 @mustAlias01(i32 %a) nounwind {
  825. entry:
  826. %a.addr = alloca i32, align 4
  827. %i = alloca i32, align 4
  828. store i32 %a, i32* %a.addr, align 4
  829. store i32 0, i32* %i, align 4
  830. br label %for.cond
  831. for.cond: ; preds = %for.inc, %entry
  832. %0 = load i32, i32* %i, align 4
  833. %cmp = icmp slt i32 %0, 100
  834. br i1 %cmp, label %for.body, label %for.end
  835. for.body: ; preds = %for.cond
  836. %1 = load i32, i32* %i, align 4
  837. %sub = sub nsw i32 100, %1
  838. %sub1 = sub nsw i32 %sub, 1
  839. %arrayidx = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1
  840. %2 = load i32, i32* %arrayidx, align 4
  841. %3 = load i32, i32* %a.addr, align 4
  842. %add = add nsw i32 %2, %3
  843. %4 = load i32, i32* %i, align 4
  844. %add2 = add nsw i32 %4, 10
  845. %arrayidx3 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %add2
  846. store i32 %add, i32* %arrayidx3, align 4
  847. br label %for.inc
  848. for.inc: ; preds = %for.body
  849. %5 = load i32, i32* %i, align 4
  850. %inc = add nsw i32 %5, 1
  851. store i32 %inc, i32* %i, align 4
  852. br label %for.cond
  853. for.end: ; preds = %for.cond
  854. %6 = load i32, i32* %a.addr, align 4
  855. %arrayidx4 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
  856. %7 = load i32, i32* %arrayidx4, align 4
  857. ret i32 %7
  858. }
  859. ; int mustAlias02 (int a) {
  860. ; int i;
  861. ; for (i=0; i<SIZE; i++)
  862. ; Foo.A[i] = Foo.B[SIZE-i-10] + a;
  863. ; return Foo.A[a];
  864. ; }
  865. ; CHECK-LABEL: define i32 @mustAlias02(
  866. ; CHECK-NOT: add nsw <4 x i32>
  867. ; CHECK: ret
  868. define i32 @mustAlias02(i32 %a) nounwind {
  869. entry:
  870. %a.addr = alloca i32, align 4
  871. %i = alloca i32, align 4
  872. store i32 %a, i32* %a.addr, align 4
  873. store i32 0, i32* %i, align 4
  874. br label %for.cond
  875. for.cond: ; preds = %for.inc, %entry
  876. %0 = load i32, i32* %i, align 4
  877. %cmp = icmp slt i32 %0, 100
  878. br i1 %cmp, label %for.body, label %for.end
  879. for.body: ; preds = %for.cond
  880. %1 = load i32, i32* %i, align 4
  881. %sub = sub nsw i32 100, %1
  882. %sub1 = sub nsw i32 %sub, 10
  883. %arrayidx = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1
  884. %2 = load i32, i32* %arrayidx, align 4
  885. %3 = load i32, i32* %a.addr, align 4
  886. %add = add nsw i32 %2, %3
  887. %4 = load i32, i32* %i, align 4
  888. %arrayidx2 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %4
  889. store i32 %add, i32* %arrayidx2, align 4
  890. br label %for.inc
  891. for.inc: ; preds = %for.body
  892. %5 = load i32, i32* %i, align 4
  893. %inc = add nsw i32 %5, 1
  894. store i32 %inc, i32* %i, align 4
  895. br label %for.cond
  896. for.end: ; preds = %for.cond
  897. %6 = load i32, i32* %a.addr, align 4
  898. %arrayidx3 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
  899. %7 = load i32, i32* %arrayidx3, align 4
  900. ret i32 %7
  901. }
  902. ; int mustAlias03 (int a) {
  903. ; int i;
  904. ; for (i=0; i<SIZE; i++)
  905. ; Foo.A[i+10] = Foo.B[SIZE-i-10] + a;
  906. ; return Foo.A[a];
  907. ; }
  908. ; CHECK-LABEL: define i32 @mustAlias03(
  909. ; CHECK-NOT: add nsw <4 x i32>
  910. ; CHECK: ret
  911. define i32 @mustAlias03(i32 %a) nounwind {
  912. entry:
  913. %a.addr = alloca i32, align 4
  914. %i = alloca i32, align 4
  915. store i32 %a, i32* %a.addr, align 4
  916. store i32 0, i32* %i, align 4
  917. br label %for.cond
  918. for.cond: ; preds = %for.inc, %entry
  919. %0 = load i32, i32* %i, align 4
  920. %cmp = icmp slt i32 %0, 100
  921. br i1 %cmp, label %for.body, label %for.end
  922. for.body: ; preds = %for.cond
  923. %1 = load i32, i32* %i, align 4
  924. %sub = sub nsw i32 100, %1
  925. %sub1 = sub nsw i32 %sub, 10
  926. %arrayidx = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1
  927. %2 = load i32, i32* %arrayidx, align 4
  928. %3 = load i32, i32* %a.addr, align 4
  929. %add = add nsw i32 %2, %3
  930. %4 = load i32, i32* %i, align 4
  931. %add2 = add nsw i32 %4, 10
  932. %arrayidx3 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %add2
  933. store i32 %add, i32* %arrayidx3, align 4
  934. br label %for.inc
  935. for.inc: ; preds = %for.body
  936. %5 = load i32, i32* %i, align 4
  937. %inc = add nsw i32 %5, 1
  938. store i32 %inc, i32* %i, align 4
  939. br label %for.cond
  940. for.end: ; preds = %for.cond
  941. %6 = load i32, i32* %a.addr, align 4
  942. %arrayidx4 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
  943. %7 = load i32, i32* %arrayidx4, align 4
  944. ret i32 %7
  945. }