interleaved-accesses.ll 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467
  1. ; RUN: opt -S -loop-vectorize -instcombine -force-vector-width=4 -force-vector-interleave=1 -enable-interleaved-mem-accesses=true -runtime-memory-check-threshold=24 < %s | FileCheck %s
  2. target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
  3. ; Check vectorization on an interleaved load group of factor 2 and an interleaved
  4. ; store group of factor 2.
  5. ; int AB[1024];
  6. ; int CD[1024];
  7. ; void test_array_load2_store2(int C, int D) {
  8. ; for (int i = 0; i < 1024; i+=2) {
  9. ; int A = AB[i];
  10. ; int B = AB[i+1];
  11. ; CD[i] = A + C;
  12. ; CD[i+1] = B * D;
  13. ; }
  14. ; }
  15. ; CHECK-LABEL: @test_array_load2_store2(
  16. ; CHECK: %wide.vec = load <8 x i32>, <8 x i32>* %{{.*}}, align 4
  17. ; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
  18. ; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
  19. ; CHECK: add nsw <4 x i32>
  20. ; CHECK: mul nsw <4 x i32>
  21. ; CHECK: %interleaved.vec = shufflevector <4 x i32> {{.*}}, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
  22. ; CHECK: store <8 x i32> %interleaved.vec, <8 x i32>* %{{.*}}, align 4
  23. @AB = common global [1024 x i32] zeroinitializer, align 4
  24. @CD = common global [1024 x i32] zeroinitializer, align 4
  25. define void @test_array_load2_store2(i32 %C, i32 %D) {
  26. entry:
  27. br label %for.body
  28. for.body: ; preds = %for.body, %entry
  29. %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
  30. %arrayidx0 = getelementptr inbounds [1024 x i32], [1024 x i32]* @AB, i64 0, i64 %indvars.iv
  31. %tmp = load i32, i32* %arrayidx0, align 4
  32. %tmp1 = or i64 %indvars.iv, 1
  33. %arrayidx1 = getelementptr inbounds [1024 x i32], [1024 x i32]* @AB, i64 0, i64 %tmp1
  34. %tmp2 = load i32, i32* %arrayidx1, align 4
  35. %add = add nsw i32 %tmp, %C
  36. %mul = mul nsw i32 %tmp2, %D
  37. %arrayidx2 = getelementptr inbounds [1024 x i32], [1024 x i32]* @CD, i64 0, i64 %indvars.iv
  38. store i32 %add, i32* %arrayidx2, align 4
  39. %arrayidx3 = getelementptr inbounds [1024 x i32], [1024 x i32]* @CD, i64 0, i64 %tmp1
  40. store i32 %mul, i32* %arrayidx3, align 4
  41. %indvars.iv.next = add nuw nsw i64 %indvars.iv, 2
  42. %cmp = icmp slt i64 %indvars.iv.next, 1024
  43. br i1 %cmp, label %for.body, label %for.end
  44. for.end: ; preds = %for.body
  45. ret void
  46. }
  47. ; int A[3072];
  48. ; struct ST S[1024];
  49. ; void test_struct_st3() {
  50. ; int *ptr = A;
  51. ; for (int i = 0; i < 1024; i++) {
  52. ; int X1 = *ptr++;
  53. ; int X2 = *ptr++;
  54. ; int X3 = *ptr++;
  55. ; T[i].x = X1 + 1;
  56. ; T[i].y = X2 + 2;
  57. ; T[i].z = X3 + 3;
  58. ; }
  59. ; }
  60. ; CHECK-LABEL: @test_struct_array_load3_store3(
  61. ; CHECK: %wide.vec = load <12 x i32>, <12 x i32>* {{.*}}, align 4
  62. ; CHECK: shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
  63. ; CHECK: shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
  64. ; CHECK: shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 2, i32 5, i32 8, i32 11>
  65. ; CHECK: add nsw <4 x i32> {{.*}}, <i32 1, i32 1, i32 1, i32 1>
  66. ; CHECK: add nsw <4 x i32> {{.*}}, <i32 2, i32 2, i32 2, i32 2>
  67. ; CHECK: add nsw <4 x i32> {{.*}}, <i32 3, i32 3, i32 3, i32 3>
  68. ; CHECK: shufflevector <4 x i32> {{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
  69. ; CHECK: shufflevector <4 x i32> {{.*}}, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
  70. ; CHECK: %interleaved.vec = shufflevector <8 x i32> {{.*}}, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
  71. ; CHECK: store <12 x i32> %interleaved.vec, <12 x i32>* {{.*}}, align 4
  72. %struct.ST3 = type { i32, i32, i32 }
  73. @A = common global [3072 x i32] zeroinitializer, align 4
  74. @S = common global [1024 x %struct.ST3] zeroinitializer, align 4
  75. define void @test_struct_array_load3_store3() {
  76. entry:
  77. br label %for.body
  78. for.body: ; preds = %for.body, %entry
  79. %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
  80. %ptr.016 = phi i32* [ getelementptr inbounds ([3072 x i32], [3072 x i32]* @A, i64 0, i64 0), %entry ], [ %incdec.ptr2, %for.body ]
  81. %incdec.ptr = getelementptr inbounds i32, i32* %ptr.016, i64 1
  82. %tmp = load i32, i32* %ptr.016, align 4
  83. %incdec.ptr1 = getelementptr inbounds i32, i32* %ptr.016, i64 2
  84. %tmp1 = load i32, i32* %incdec.ptr, align 4
  85. %incdec.ptr2 = getelementptr inbounds i32, i32* %ptr.016, i64 3
  86. %tmp2 = load i32, i32* %incdec.ptr1, align 4
  87. %add = add nsw i32 %tmp, 1
  88. %x = getelementptr inbounds [1024 x %struct.ST3], [1024 x %struct.ST3]* @S, i64 0, i64 %indvars.iv, i32 0
  89. store i32 %add, i32* %x, align 4
  90. %add3 = add nsw i32 %tmp1, 2
  91. %y = getelementptr inbounds [1024 x %struct.ST3], [1024 x %struct.ST3]* @S, i64 0, i64 %indvars.iv, i32 1
  92. store i32 %add3, i32* %y, align 4
  93. %add6 = add nsw i32 %tmp2, 3
  94. %z = getelementptr inbounds [1024 x %struct.ST3], [1024 x %struct.ST3]* @S, i64 0, i64 %indvars.iv, i32 2
  95. store i32 %add6, i32* %z, align 4
  96. %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
  97. %exitcond = icmp eq i64 %indvars.iv.next, 1024
  98. br i1 %exitcond, label %for.end, label %for.body
  99. for.end: ; preds = %for.body
  100. ret void
  101. }
  102. ; Check vectorization on an interleaved load group of factor 4.
  103. ; struct ST4{
  104. ; int x;
  105. ; int y;
  106. ; int z;
  107. ; int w;
  108. ; };
  109. ; int test_struct_load4(struct ST4 *S) {
  110. ; int r = 0;
  111. ; for (int i = 0; i < 1024; i++) {
  112. ; r += S[i].x;
  113. ; r -= S[i].y;
  114. ; r += S[i].z;
  115. ; r -= S[i].w;
  116. ; }
  117. ; return r;
  118. ; }
  119. ; CHECK-LABEL: @test_struct_load4(
  120. ; CHECK: %wide.vec = load <16 x i32>, <16 x i32>* {{.*}}, align 4
  121. ; CHECK: shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
  122. ; CHECK: shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
  123. ; CHECK: shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
  124. ; CHECK: shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
  125. ; CHECK: add nsw <4 x i32>
  126. ; CHECK: sub <4 x i32>
  127. ; CHECK: add nsw <4 x i32>
  128. ; CHECK: sub <4 x i32>
  129. %struct.ST4 = type { i32, i32, i32, i32 }
  130. define i32 @test_struct_load4(%struct.ST4* nocapture readonly %S) {
  131. entry:
  132. br label %for.body
  133. for.body: ; preds = %for.body, %entry
  134. %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
  135. %r.022 = phi i32 [ 0, %entry ], [ %sub8, %for.body ]
  136. %x = getelementptr inbounds %struct.ST4, %struct.ST4* %S, i64 %indvars.iv, i32 0
  137. %tmp = load i32, i32* %x, align 4
  138. %add = add nsw i32 %tmp, %r.022
  139. %y = getelementptr inbounds %struct.ST4, %struct.ST4* %S, i64 %indvars.iv, i32 1
  140. %tmp1 = load i32, i32* %y, align 4
  141. %sub = sub i32 %add, %tmp1
  142. %z = getelementptr inbounds %struct.ST4, %struct.ST4* %S, i64 %indvars.iv, i32 2
  143. %tmp2 = load i32, i32* %z, align 4
  144. %add5 = add nsw i32 %sub, %tmp2
  145. %w = getelementptr inbounds %struct.ST4, %struct.ST4* %S, i64 %indvars.iv, i32 3
  146. %tmp3 = load i32, i32* %w, align 4
  147. %sub8 = sub i32 %add5, %tmp3
  148. %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
  149. %exitcond = icmp eq i64 %indvars.iv.next, 1024
  150. br i1 %exitcond, label %for.end, label %for.body
  151. for.end: ; preds = %for.body
  152. ret i32 %sub8
  153. }
  154. ; Check vectorization on an interleaved store group of factor 4.
  155. ; void test_struct_store4(int *A, struct ST4 *B) {
  156. ; int *ptr = A;
  157. ; for (int i = 0; i < 1024; i++) {
  158. ; int X = *ptr++;
  159. ; B[i].x = X + 1;
  160. ; B[i].y = X * 2;
  161. ; B[i].z = X + 3;
  162. ; B[i].w = X + 4;
  163. ; }
  164. ; }
  165. ; CHECK-LABEL: @test_struct_store4(
  166. ; CHECK: %[[LD:.*]] = load <4 x i32>, <4 x i32>*
  167. ; CHECK: add nsw <4 x i32> %[[LD]], <i32 1, i32 1, i32 1, i32 1>
  168. ; CHECK: shl nsw <4 x i32> %[[LD]], <i32 1, i32 1, i32 1, i32 1>
  169. ; CHECK: add nsw <4 x i32> %[[LD]], <i32 3, i32 3, i32 3, i32 3>
  170. ; CHECK: add nsw <4 x i32> %[[LD]], <i32 4, i32 4, i32 4, i32 4>
  171. ; CHECK: shufflevector <4 x i32> {{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
  172. ; CHECK: shufflevector <4 x i32> {{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
  173. ; CHECK: %interleaved.vec = shufflevector <8 x i32> {{.*}}, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
  174. ; CHECK: store <16 x i32> %interleaved.vec, <16 x i32>* {{.*}}, align 4
  175. define void @test_struct_store4(i32* noalias nocapture readonly %A, %struct.ST4* noalias nocapture %B) {
  176. entry:
  177. br label %for.body
  178. for.cond.cleanup: ; preds = %for.body
  179. ret void
  180. for.body: ; preds = %for.body, %entry
  181. %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
  182. %ptr.024 = phi i32* [ %A, %entry ], [ %incdec.ptr, %for.body ]
  183. %incdec.ptr = getelementptr inbounds i32, i32* %ptr.024, i64 1
  184. %tmp = load i32, i32* %ptr.024, align 4
  185. %add = add nsw i32 %tmp, 1
  186. %x = getelementptr inbounds %struct.ST4, %struct.ST4* %B, i64 %indvars.iv, i32 0
  187. store i32 %add, i32* %x, align 4
  188. %mul = shl nsw i32 %tmp, 1
  189. %y = getelementptr inbounds %struct.ST4, %struct.ST4* %B, i64 %indvars.iv, i32 1
  190. store i32 %mul, i32* %y, align 4
  191. %add3 = add nsw i32 %tmp, 3
  192. %z = getelementptr inbounds %struct.ST4, %struct.ST4* %B, i64 %indvars.iv, i32 2
  193. store i32 %add3, i32* %z, align 4
  194. %add6 = add nsw i32 %tmp, 4
  195. %w = getelementptr inbounds %struct.ST4, %struct.ST4* %B, i64 %indvars.iv, i32 3
  196. store i32 %add6, i32* %w, align 4
  197. %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
  198. %exitcond = icmp eq i64 %indvars.iv.next, 1024
  199. br i1 %exitcond, label %for.cond.cleanup, label %for.body
  200. }
  201. ; Check vectorization on a reverse interleaved load group of factor 2 and
  202. ; a reverse interleaved store group of factor 2.
  203. ; struct ST2 {
  204. ; int x;
  205. ; int y;
  206. ; };
  207. ;
  208. ; void test_reversed_load2_store2(struct ST2 *A, struct ST2 *B) {
  209. ; for (int i = 1023; i >= 0; i--) {
  210. ; int a = A[i].x + i; // interleaved load of index 0
  211. ; int b = A[i].y - i; // interleaved load of index 1
  212. ; B[i].x = a; // interleaved store of index 0
  213. ; B[i].y = b; // interleaved store of index 1
  214. ; }
  215. ; }
  216. ; CHECK-LABEL: @test_reversed_load2_store2(
  217. ; CHECK: %wide.vec = load <8 x i32>, <8 x i32>* {{.*}}, align 4
  218. ; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
  219. ; CHECK: shufflevector <4 x i32> {{.*}}, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
  220. ; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
  221. ; CHECK: shufflevector <4 x i32> {{.*}}, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
  222. ; CHECK: add nsw <4 x i32>
  223. ; CHECK: sub nsw <4 x i32>
  224. ; CHECK: shufflevector <4 x i32> {{.*}}, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
  225. ; CHECK: shufflevector <4 x i32> {{.*}}, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
  226. ; CHECK: %interleaved.vec = shufflevector <4 x i32> {{.*}}, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
  227. ; CHECK: store <8 x i32> %interleaved.vec, <8 x i32>* %{{.*}}, align 4
  228. %struct.ST2 = type { i32, i32 }
  229. define void @test_reversed_load2_store2(%struct.ST2* noalias nocapture readonly %A, %struct.ST2* noalias nocapture %B) {
  230. entry:
  231. br label %for.body
  232. for.cond.cleanup: ; preds = %for.body
  233. ret void
  234. for.body: ; preds = %for.body, %entry
  235. %indvars.iv = phi i64 [ 1023, %entry ], [ %indvars.iv.next, %for.body ]
  236. %x = getelementptr inbounds %struct.ST2, %struct.ST2* %A, i64 %indvars.iv, i32 0
  237. %tmp = load i32, i32* %x, align 4
  238. %tmp1 = trunc i64 %indvars.iv to i32
  239. %add = add nsw i32 %tmp, %tmp1
  240. %y = getelementptr inbounds %struct.ST2, %struct.ST2* %A, i64 %indvars.iv, i32 1
  241. %tmp2 = load i32, i32* %y, align 4
  242. %sub = sub nsw i32 %tmp2, %tmp1
  243. %x5 = getelementptr inbounds %struct.ST2, %struct.ST2* %B, i64 %indvars.iv, i32 0
  244. store i32 %add, i32* %x5, align 4
  245. %y8 = getelementptr inbounds %struct.ST2, %struct.ST2* %B, i64 %indvars.iv, i32 1
  246. store i32 %sub, i32* %y8, align 4
  247. %indvars.iv.next = add nsw i64 %indvars.iv, -1
  248. %cmp = icmp sgt i64 %indvars.iv, 0
  249. br i1 %cmp, label %for.body, label %for.cond.cleanup
  250. }
  251. ; Check vectorization on an interleaved load group of factor 2 with 1 gap
  252. ; (missing the load of odd elements).
  253. ; void even_load(int *A, int *B) {
  254. ; for (unsigned i = 0; i < 1024; i+=2)
  255. ; B[i/2] = A[i] * 2;
  256. ; }
  257. ; CHECK-LABEL: @even_load(
  258. ; CHECK: %wide.vec = load <8 x i32>, <8 x i32>* %{{.*}}, align 4
  259. ; CHECK: %strided.vec = shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
  260. ; CHECK-NOT: shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
  261. ; CHECK: shl nsw <4 x i32> %strided.vec, <i32 1, i32 1, i32 1, i32 1>
  262. define void @even_load(i32* noalias nocapture readonly %A, i32* noalias nocapture %B) {
  263. entry:
  264. br label %for.body
  265. for.cond.cleanup: ; preds = %for.body
  266. ret void
  267. for.body: ; preds = %for.body, %entry
  268. %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
  269. %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
  270. %tmp = load i32, i32* %arrayidx, align 4
  271. %mul = shl nsw i32 %tmp, 1
  272. %tmp1 = lshr exact i64 %indvars.iv, 1
  273. %arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %tmp1
  274. store i32 %mul, i32* %arrayidx2, align 4
  275. %indvars.iv.next = add nuw nsw i64 %indvars.iv, 2
  276. %cmp = icmp ult i64 %indvars.iv.next, 1024
  277. br i1 %cmp, label %for.body, label %for.cond.cleanup
  278. }
  279. ; Check vectorization on interleaved access groups identified from mixed
  280. ; loads/stores.
  281. ; void mixed_load2_store2(int *A, int *B) {
  282. ; for (unsigned i = 0; i < 1024; i+=2) {
  283. ; B[i] = A[i] * A[i+1];
  284. ; B[i+1] = A[i] + A[i+1];
  285. ; }
  286. ; }
  287. ; CHECK-LABEL: @mixed_load2_store2(
  288. ; CHECK: %wide.vec = load <8 x i32>, <8 x i32>* {{.*}}, align 4
  289. ; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
  290. ; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
  291. ; CHECK: %interleaved.vec = shufflevector <4 x i32> %{{.*}}, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
  292. ; CHECK: store <8 x i32> %interleaved.vec
  293. define void @mixed_load2_store2(i32* noalias nocapture readonly %A, i32* noalias nocapture %B) {
  294. entry:
  295. br label %for.body
  296. for.cond.cleanup: ; preds = %for.body
  297. ret void
  298. for.body: ; preds = %for.body, %entry
  299. %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
  300. %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
  301. %tmp = load i32, i32* %arrayidx, align 4
  302. %tmp1 = or i64 %indvars.iv, 1
  303. %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %tmp1
  304. %tmp2 = load i32, i32* %arrayidx2, align 4
  305. %mul = mul nsw i32 %tmp2, %tmp
  306. %arrayidx4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
  307. store i32 %mul, i32* %arrayidx4, align 4
  308. %tmp3 = load i32, i32* %arrayidx, align 4
  309. %tmp4 = load i32, i32* %arrayidx2, align 4
  310. %add10 = add nsw i32 %tmp4, %tmp3
  311. %arrayidx13 = getelementptr inbounds i32, i32* %B, i64 %tmp1
  312. store i32 %add10, i32* %arrayidx13, align 4
  313. %indvars.iv.next = add nuw nsw i64 %indvars.iv, 2
  314. %cmp = icmp ult i64 %indvars.iv.next, 1024
  315. br i1 %cmp, label %for.body, label %for.cond.cleanup
  316. }
  317. ; Check vectorization on interleaved access groups identified from mixed
  318. ; loads/stores.
  319. ; void mixed_load3_store3(int *A) {
  320. ; for (unsigned i = 0; i < 1024; i++) {
  321. ; *A++ += i;
  322. ; *A++ += i;
  323. ; *A++ += i;
  324. ; }
  325. ; }
  326. ; CHECK-LABEL: @mixed_load3_store3(
  327. ; CHECK: %wide.vec = load <12 x i32>, <12 x i32>* {{.*}}, align 4
  328. ; CHECK: shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
  329. ; CHECK: shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
  330. ; CHECK: shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 2, i32 5, i32 8, i32 11>
  331. ; CHECK: %interleaved.vec = shufflevector <8 x i32> %{{.*}}, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
  332. ; CHECK: store <12 x i32> %interleaved.vec, <12 x i32>* %{{.*}}, align 4
  333. define void @mixed_load3_store3(i32* nocapture %A) {
  334. entry:
  335. br label %for.body
  336. for.cond.cleanup: ; preds = %for.body
  337. ret void
  338. for.body: ; preds = %for.body, %entry
  339. %i.013 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
  340. %A.addr.012 = phi i32* [ %A, %entry ], [ %incdec.ptr3, %for.body ]
  341. %incdec.ptr = getelementptr inbounds i32, i32* %A.addr.012, i64 1
  342. %tmp = load i32, i32* %A.addr.012, align 4
  343. %add = add i32 %tmp, %i.013
  344. store i32 %add, i32* %A.addr.012, align 4
  345. %incdec.ptr1 = getelementptr inbounds i32, i32* %A.addr.012, i64 2
  346. %tmp1 = load i32, i32* %incdec.ptr, align 4
  347. %add2 = add i32 %tmp1, %i.013
  348. store i32 %add2, i32* %incdec.ptr, align 4
  349. %incdec.ptr3 = getelementptr inbounds i32, i32* %A.addr.012, i64 3
  350. %tmp2 = load i32, i32* %incdec.ptr1, align 4
  351. %add4 = add i32 %tmp2, %i.013
  352. store i32 %add4, i32* %incdec.ptr1, align 4
  353. %inc = add nuw nsw i32 %i.013, 1
  354. %exitcond = icmp eq i32 %inc, 1024
  355. br i1 %exitcond, label %for.cond.cleanup, label %for.body
  356. }
  357. ; Check vectorization on interleaved access groups with members having different
  358. ; kinds of type.
  359. ; struct IntFloat {
  360. ; int a;
  361. ; float b;
  362. ; };
  363. ;
  364. ; int SA;
  365. ; float SB;
  366. ;
  367. ; void int_float_struct(struct IntFloat *A) {
  368. ; int SumA;
  369. ; float SumB;
  370. ; for (unsigned i = 0; i < 1024; i++) {
  371. ; SumA += A[i].a;
  372. ; SumB += A[i].b;
  373. ; }
  374. ; SA = SumA;
  375. ; SB = SumB;
  376. ; }
  377. ; CHECK-LABEL: @int_float_struct(
  378. ; CHECK: %wide.vec = load <8 x i32>, <8 x i32>* %{{.*}}, align 4
  379. ; CHECK: %[[V0:.*]] = shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
  380. ; CHECK: %[[V1:.*]] = shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
  381. ; CHECK: bitcast <4 x i32> %[[V1]] to <4 x float>
  382. ; CHECK: add nsw <4 x i32>
  383. ; CHECK: fadd fast <4 x float>
  384. %struct.IntFloat = type { i32, float }
  385. @SA = common global i32 0, align 4
  386. @SB = common global float 0.000000e+00, align 4
  387. define void @int_float_struct(%struct.IntFloat* nocapture readonly %A) #0 {
  388. entry:
  389. br label %for.body
  390. for.cond.cleanup: ; preds = %for.body
  391. store i32 %add, i32* @SA, align 4
  392. store float %add3, float* @SB, align 4
  393. ret void
  394. for.body: ; preds = %for.body, %entry
  395. %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
  396. %SumB.014 = phi float [ undef, %entry ], [ %add3, %for.body ]
  397. %SumA.013 = phi i32 [ undef, %entry ], [ %add, %for.body ]
  398. %a = getelementptr inbounds %struct.IntFloat, %struct.IntFloat* %A, i64 %indvars.iv, i32 0
  399. %tmp = load i32, i32* %a, align 4
  400. %add = add nsw i32 %tmp, %SumA.013
  401. %b = getelementptr inbounds %struct.IntFloat, %struct.IntFloat* %A, i64 %indvars.iv, i32 1
  402. %tmp1 = load float, float* %b, align 4
  403. %add3 = fadd fast float %SumB.014, %tmp1
  404. %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
  405. %exitcond = icmp eq i64 %indvars.iv.next, 1024
  406. br i1 %exitcond, label %for.cond.cleanup, label %for.body
  407. }
  408. attributes #0 = { "unsafe-fp-math"="true" }