basic.ll 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451
  1. ; RUN: opt %s -scalarizer -scalarize-load-store -dce -S | FileCheck %s
  2. target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
  3. declare <4 x float> @ext(<4 x float>)
  4. @g = global <4 x float> zeroinitializer
  5. define void @f1(<4 x float> %init, <4 x float> *%base, i32 %count) {
  6. ; CHECK-LABEL: @f1(
  7. ; CHECK: entry:
  8. ; CHECK: %init.i0 = extractelement <4 x float> %init, i32 0
  9. ; CHECK: %init.i1 = extractelement <4 x float> %init, i32 1
  10. ; CHECK: %init.i2 = extractelement <4 x float> %init, i32 2
  11. ; CHECK: %init.i3 = extractelement <4 x float> %init, i32 3
  12. ; CHECK: br label %loop
  13. ; CHECK: loop:
  14. ; CHECK: %i = phi i32 [ %count, %entry ], [ %nexti, %loop ]
  15. ; CHECK: %acc.i0 = phi float [ %init.i0, %entry ], [ %sel.i0, %loop ]
  16. ; CHECK: %acc.i1 = phi float [ %init.i1, %entry ], [ %sel.i1, %loop ]
  17. ; CHECK: %acc.i2 = phi float [ %init.i2, %entry ], [ %sel.i2, %loop ]
  18. ; CHECK: %acc.i3 = phi float [ %init.i3, %entry ], [ %sel.i3, %loop ]
  19. ; CHECK: %nexti = sub i32 %i, 1
  20. ; CHECK: %ptr = getelementptr <4 x float>, <4 x float>* %base, i32 %i
  21. ; CHECK: %ptr.i0 = bitcast <4 x float>* %ptr to float*
  22. ; CHECK: %val.i0 = load float, float* %ptr.i0, align 16
  23. ; CHECK: %ptr.i1 = getelementptr float, float* %ptr.i0, i32 1
  24. ; CHECK: %val.i1 = load float, float* %ptr.i1, align 4
  25. ; CHECK: %ptr.i2 = getelementptr float, float* %ptr.i0, i32 2
  26. ; CHECK: %val.i2 = load float, float* %ptr.i2, align 8
  27. ; CHECK: %ptr.i3 = getelementptr float, float* %ptr.i0, i32 3
  28. ; CHECK: %val.i3 = load float, float* %ptr.i3, align 4
  29. ; CHECK: %add.i0 = fadd float %val.i0, %val.i2
  30. ; CHECK: %add.i1 = fadd float %val.i1, %val.i3
  31. ; CHECK: %add.i2 = fadd float %acc.i0, %acc.i2
  32. ; CHECK: %add.i3 = fadd float %acc.i1, %acc.i3
  33. ; CHECK: %add.upto0 = insertelement <4 x float> undef, float %add.i0, i32 0
  34. ; CHECK: %add.upto1 = insertelement <4 x float> %add.upto0, float %add.i1, i32 1
  35. ; CHECK: %add.upto2 = insertelement <4 x float> %add.upto1, float %add.i2, i32 2
  36. ; CHECK: %add = insertelement <4 x float> %add.upto2, float %add.i3, i32 3
  37. ; CHECK: %call = call <4 x float> @ext(<4 x float> %add)
  38. ; CHECK: %call.i0 = extractelement <4 x float> %call, i32 0
  39. ; CHECK: %cmp.i0 = fcmp ogt float %call.i0, 1.0
  40. ; CHECK: %call.i1 = extractelement <4 x float> %call, i32 1
  41. ; CHECK: %cmp.i1 = fcmp ogt float %call.i1, 2.0
  42. ; CHECK: %call.i2 = extractelement <4 x float> %call, i32 2
  43. ; CHECK: %cmp.i2 = fcmp ogt float %call.i2, 3.0
  44. ; CHECK: %call.i3 = extractelement <4 x float> %call, i32 3
  45. ; CHECK: %cmp.i3 = fcmp ogt float %call.i3, 4.0
  46. ; CHECK: %sel.i0 = select i1 %cmp.i0, float %call.i0, float 5.0
  47. ; CHECK: %sel.i1 = select i1 %cmp.i1, float %call.i1, float 6.0
  48. ; CHECK: %sel.i2 = select i1 %cmp.i2, float %call.i2, float 7.0
  49. ; CHECK: %sel.i3 = select i1 %cmp.i3, float %call.i3, float 8.0
  50. ; CHECK: store float %sel.i0, float* %ptr.i0
  51. ; CHECK: store float %sel.i1, float* %ptr.i1
  52. ; CHECK: store float %sel.i2, float* %ptr.i2
  53. ; CHECK: store float %sel.i3, float* %ptr.i3
  54. ; CHECK: %test = icmp eq i32 %nexti, 0
  55. ; CHECK: br i1 %test, label %loop, label %exit
  56. ; CHECK: exit:
  57. ; CHECK: ret void
  58. entry:
  59. br label %loop
  60. loop:
  61. %i = phi i32 [ %count, %entry ], [ %nexti, %loop ]
  62. %acc = phi <4 x float> [ %init, %entry ], [ %sel, %loop ]
  63. %nexti = sub i32 %i, 1
  64. %ptr = getelementptr <4 x float>, <4 x float> *%base, i32 %i
  65. %val = load <4 x float> , <4 x float> *%ptr
  66. %dval = bitcast <4 x float> %val to <2 x double>
  67. %dacc = bitcast <4 x float> %acc to <2 x double>
  68. %shuffle1 = shufflevector <2 x double> %dval, <2 x double> %dacc,
  69. <2 x i32> <i32 0, i32 2>
  70. %shuffle2 = shufflevector <2 x double> %dval, <2 x double> %dacc,
  71. <2 x i32> <i32 1, i32 3>
  72. %f1 = bitcast <2 x double> %shuffle1 to <4 x float>
  73. %f2 = bitcast <2 x double> %shuffle2 to <4 x float>
  74. %add = fadd <4 x float> %f1, %f2
  75. %call = call <4 x float> @ext(<4 x float> %add)
  76. %cmp = fcmp ogt <4 x float> %call,
  77. <float 1.0, float 2.0, float 3.0, float 4.0>
  78. %sel = select <4 x i1> %cmp, <4 x float> %call,
  79. <4 x float> <float 5.0, float 6.0, float 7.0, float 8.0>
  80. store <4 x float> %sel, <4 x float> *%ptr
  81. %test = icmp eq i32 %nexti, 0
  82. br i1 %test, label %loop, label %exit
  83. exit:
  84. ret void
  85. }
  86. define void @f2(<4 x i32> %init, <4 x i8> *%base, i32 %count) {
  87. ; CHECK-LABEL: define void @f2(<4 x i32> %init, <4 x i8>* %base, i32 %count) {
  88. ; CHECK: entry:
  89. ; CHECK: %init.i0 = extractelement <4 x i32> %init, i32 0
  90. ; CHECK: %init.i1 = extractelement <4 x i32> %init, i32 1
  91. ; CHECK: %init.i2 = extractelement <4 x i32> %init, i32 2
  92. ; CHECK: %init.i3 = extractelement <4 x i32> %init, i32 3
  93. ; CHECK: br label %loop
  94. ; CHECK: loop:
  95. ; CHECK: %i = phi i32 [ %count, %entry ], [ %nexti, %loop ]
  96. ; CHECK: %acc.i0 = phi i32 [ %init.i0, %entry ], [ %sel.i0, %loop ]
  97. ; CHECK: %acc.i1 = phi i32 [ %init.i1, %entry ], [ %sel.i1, %loop ]
  98. ; CHECK: %acc.i2 = phi i32 [ %init.i2, %entry ], [ %sel.i2, %loop ]
  99. ; CHECK: %acc.i3 = phi i32 [ %init.i3, %entry ], [ %sel.i3, %loop ]
  100. ; CHECK: %nexti = sub i32 %i, 1
  101. ; CHECK: %ptr = getelementptr <4 x i8>, <4 x i8>* %base, i32 %i
  102. ; CHECK: %ptr.i0 = bitcast <4 x i8>* %ptr to i8*
  103. ; CHECK: %val.i0 = load i8, i8* %ptr.i0, align 4
  104. ; CHECK: %ptr.i1 = getelementptr i8, i8* %ptr.i0, i32 1
  105. ; CHECK: %val.i1 = load i8, i8* %ptr.i1, align 1
  106. ; CHECK: %ptr.i2 = getelementptr i8, i8* %ptr.i0, i32 2
  107. ; CHECK: %val.i2 = load i8, i8* %ptr.i2, align 2
  108. ; CHECK: %ptr.i3 = getelementptr i8, i8* %ptr.i0, i32 3
  109. ; CHECK: %val.i3 = load i8, i8* %ptr.i3, align 1
  110. ; CHECK: %ext.i0 = sext i8 %val.i0 to i32
  111. ; CHECK: %ext.i1 = sext i8 %val.i1 to i32
  112. ; CHECK: %ext.i2 = sext i8 %val.i2 to i32
  113. ; CHECK: %ext.i3 = sext i8 %val.i3 to i32
  114. ; CHECK: %add.i0 = add i32 %ext.i0, %acc.i0
  115. ; CHECK: %add.i1 = add i32 %ext.i1, %acc.i1
  116. ; CHECK: %add.i2 = add i32 %ext.i2, %acc.i2
  117. ; CHECK: %add.i3 = add i32 %ext.i3, %acc.i3
  118. ; CHECK: %cmp.i0 = icmp slt i32 %add.i0, -10
  119. ; CHECK: %cmp.i1 = icmp slt i32 %add.i1, -11
  120. ; CHECK: %cmp.i2 = icmp slt i32 %add.i2, -12
  121. ; CHECK: %cmp.i3 = icmp slt i32 %add.i3, -13
  122. ; CHECK: %sel.i0 = select i1 %cmp.i0, i32 %add.i0, i32 %i
  123. ; CHECK: %sel.i1 = select i1 %cmp.i1, i32 %add.i1, i32 %i
  124. ; CHECK: %sel.i2 = select i1 %cmp.i2, i32 %add.i2, i32 %i
  125. ; CHECK: %sel.i3 = select i1 %cmp.i3, i32 %add.i3, i32 %i
  126. ; CHECK: %trunc.i0 = trunc i32 %sel.i0 to i8
  127. ; CHECK: %trunc.i1 = trunc i32 %sel.i1 to i8
  128. ; CHECK: %trunc.i2 = trunc i32 %sel.i2 to i8
  129. ; CHECK: %trunc.i3 = trunc i32 %sel.i3 to i8
  130. ; CHECK: store i8 %trunc.i0, i8* %ptr.i0, align 4
  131. ; CHECK: store i8 %trunc.i1, i8* %ptr.i1, align 1
  132. ; CHECK: store i8 %trunc.i2, i8* %ptr.i2, align 2
  133. ; CHECK: store i8 %trunc.i3, i8* %ptr.i3, align 1
  134. ; CHECK: %test = icmp eq i32 %nexti, 0
  135. ; CHECK: br i1 %test, label %loop, label %exit
  136. ; CHECK: exit:
  137. ; CHECK: ret void
  138. entry:
  139. br label %loop
  140. loop:
  141. %i = phi i32 [ %count, %entry ], [ %nexti, %loop ]
  142. %acc = phi <4 x i32> [ %init, %entry ], [ %sel, %loop ]
  143. %nexti = sub i32 %i, 1
  144. %ptr = getelementptr <4 x i8>, <4 x i8> *%base, i32 %i
  145. %val = load <4 x i8> , <4 x i8> *%ptr
  146. %ext = sext <4 x i8> %val to <4 x i32>
  147. %add = add <4 x i32> %ext, %acc
  148. %cmp = icmp slt <4 x i32> %add, <i32 -10, i32 -11, i32 -12, i32 -13>
  149. %single = insertelement <4 x i32> undef, i32 %i, i32 0
  150. %limit = shufflevector <4 x i32> %single, <4 x i32> undef,
  151. <4 x i32> zeroinitializer
  152. %sel = select <4 x i1> %cmp, <4 x i32> %add, <4 x i32> %limit
  153. %trunc = trunc <4 x i32> %sel to <4 x i8>
  154. store <4 x i8> %trunc, <4 x i8> *%ptr
  155. %test = icmp eq i32 %nexti, 0
  156. br i1 %test, label %loop, label %exit
  157. exit:
  158. ret void
  159. }
  160. ; Check that !tbaa information is preserved.
  161. define void @f3(<4 x i32> *%src, <4 x i32> *%dst) {
  162. ; CHECK-LABEL: @f3(
  163. ; CHECK: %val.i0 = load i32, i32* %src.i0, align 16, !tbaa ![[TAG:[0-9]*]]
  164. ; CHECK: %val.i1 = load i32, i32* %src.i1, align 4, !tbaa ![[TAG]]
  165. ; CHECK: %val.i2 = load i32, i32* %src.i2, align 8, !tbaa ![[TAG]]
  166. ; CHECK: %val.i3 = load i32, i32* %src.i3, align 4, !tbaa ![[TAG]]
  167. ; CHECK: store i32 %add.i0, i32* %dst.i0, align 16, !tbaa ![[TAG:[0-9]*]]
  168. ; CHECK: store i32 %add.i1, i32* %dst.i1, align 4, !tbaa ![[TAG]]
  169. ; CHECK: store i32 %add.i2, i32* %dst.i2, align 8, !tbaa ![[TAG]]
  170. ; CHECK: store i32 %add.i3, i32* %dst.i3, align 4, !tbaa ![[TAG]]
  171. ; CHECK: ret void
  172. %val = load <4 x i32> , <4 x i32> *%src, !tbaa !1
  173. %add = add <4 x i32> %val, %val
  174. store <4 x i32> %add, <4 x i32> *%dst, !tbaa !2
  175. ret void
  176. }
  177. ; Check that !tbaa.struct information is preserved.
  178. define void @f4(<4 x i32> *%src, <4 x i32> *%dst) {
  179. ; CHECK-LABEL: @f4(
  180. ; CHECK: %val.i0 = load i32, i32* %src.i0, align 16, !tbaa.struct ![[TAG:[0-9]*]]
  181. ; CHECK: %val.i1 = load i32, i32* %src.i1, align 4, !tbaa.struct ![[TAG]]
  182. ; CHECK: %val.i2 = load i32, i32* %src.i2, align 8, !tbaa.struct ![[TAG]]
  183. ; CHECK: %val.i3 = load i32, i32* %src.i3, align 4, !tbaa.struct ![[TAG]]
  184. ; CHECK: store i32 %add.i0, i32* %dst.i0, align 16, !tbaa.struct ![[TAG]]
  185. ; CHECK: store i32 %add.i1, i32* %dst.i1, align 4, !tbaa.struct ![[TAG]]
  186. ; CHECK: store i32 %add.i2, i32* %dst.i2, align 8, !tbaa.struct ![[TAG]]
  187. ; CHECK: store i32 %add.i3, i32* %dst.i3, align 4, !tbaa.struct ![[TAG]]
  188. ; CHECK: ret void
  189. %val = load <4 x i32> , <4 x i32> *%src, !tbaa.struct !5
  190. %add = add <4 x i32> %val, %val
  191. store <4 x i32> %add, <4 x i32> *%dst, !tbaa.struct !5
  192. ret void
  193. }
  194. ; Check that llvm.mem.parallel_loop_access information is preserved.
  195. define void @f5(i32 %count, <4 x i32> *%src, <4 x i32> *%dst) {
  196. ; CHECK-LABEL: @f5(
  197. ; CHECK: %val.i0 = load i32, i32* %this_src.i0, align 16, !llvm.mem.parallel_loop_access ![[TAG:[0-9]*]]
  198. ; CHECK: %val.i1 = load i32, i32* %this_src.i1, align 4, !llvm.mem.parallel_loop_access ![[TAG]]
  199. ; CHECK: %val.i2 = load i32, i32* %this_src.i2, align 8, !llvm.mem.parallel_loop_access ![[TAG]]
  200. ; CHECK: %val.i3 = load i32, i32* %this_src.i3, align 4, !llvm.mem.parallel_loop_access ![[TAG]]
  201. ; CHECK: store i32 %add.i0, i32* %this_dst.i0, align 16, !llvm.mem.parallel_loop_access ![[TAG]]
  202. ; CHECK: store i32 %add.i1, i32* %this_dst.i1, align 4, !llvm.mem.parallel_loop_access ![[TAG]]
  203. ; CHECK: store i32 %add.i2, i32* %this_dst.i2, align 8, !llvm.mem.parallel_loop_access ![[TAG]]
  204. ; CHECK: store i32 %add.i3, i32* %this_dst.i3, align 4, !llvm.mem.parallel_loop_access ![[TAG]]
  205. ; CHECK: ret void
  206. entry:
  207. br label %loop
  208. loop:
  209. %index = phi i32 [ 0, %entry ], [ %next_index, %loop ]
  210. %this_src = getelementptr <4 x i32>, <4 x i32> *%src, i32 %index
  211. %this_dst = getelementptr <4 x i32>, <4 x i32> *%dst, i32 %index
  212. %val = load <4 x i32> , <4 x i32> *%this_src, !llvm.mem.parallel_loop_access !3
  213. %add = add <4 x i32> %val, %val
  214. store <4 x i32> %add, <4 x i32> *%this_dst, !llvm.mem.parallel_loop_access !3
  215. %next_index = add i32 %index, -1
  216. %continue = icmp ne i32 %next_index, %count
  217. br i1 %continue, label %loop, label %end, !llvm.loop !3
  218. end:
  219. ret void
  220. }
  221. ; Check that fpmath information is preserved.
  222. define <4 x float> @f6(<4 x float> %x) {
  223. ; CHECK-LABEL: @f6(
  224. ; CHECK: %x.i0 = extractelement <4 x float> %x, i32 0
  225. ; CHECK: %res.i0 = fadd float %x.i0, 1.0{{[e+0]*}}, !fpmath ![[TAG:[0-9]*]]
  226. ; CHECK: %x.i1 = extractelement <4 x float> %x, i32 1
  227. ; CHECK: %res.i1 = fadd float %x.i1, 2.0{{[e+0]*}}, !fpmath ![[TAG]]
  228. ; CHECK: %x.i2 = extractelement <4 x float> %x, i32 2
  229. ; CHECK: %res.i2 = fadd float %x.i2, 3.0{{[e+0]*}}, !fpmath ![[TAG]]
  230. ; CHECK: %x.i3 = extractelement <4 x float> %x, i32 3
  231. ; CHECK: %res.i3 = fadd float %x.i3, 4.0{{[e+0]*}}, !fpmath ![[TAG]]
  232. ; CHECK: %res.upto0 = insertelement <4 x float> undef, float %res.i0, i32 0
  233. ; CHECK: %res.upto1 = insertelement <4 x float> %res.upto0, float %res.i1, i32 1
  234. ; CHECK: %res.upto2 = insertelement <4 x float> %res.upto1, float %res.i2, i32 2
  235. ; CHECK: %res = insertelement <4 x float> %res.upto2, float %res.i3, i32 3
  236. ; CHECK: ret <4 x float> %res
  237. %res = fadd <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>,
  238. !fpmath !4
  239. ret <4 x float> %res
  240. }
  241. ; Check that random metadata isn't kept.
  242. define void @f7(<4 x i32> *%src, <4 x i32> *%dst) {
  243. ; CHECK-LABEL: @f7(
  244. ; CHECK-NOT: !foo
  245. ; CHECK: ret void
  246. %val = load <4 x i32> , <4 x i32> *%src, !foo !5
  247. %add = add <4 x i32> %val, %val
  248. store <4 x i32> %add, <4 x i32> *%dst, !foo !5
  249. ret void
  250. }
  251. ; Test GEP with vectors.
  252. define void @f8(<4 x float *> *%dest, <4 x float *> %ptr0, <4 x i32> %i0,
  253. float *%other) {
  254. ; CHECK-LABEL: @f8(
  255. ; CHECK: %dest.i0 = bitcast <4 x float*>* %dest to float**
  256. ; CHECK: %dest.i1 = getelementptr float*, float** %dest.i0, i32 1
  257. ; CHECK: %dest.i2 = getelementptr float*, float** %dest.i0, i32 2
  258. ; CHECK: %dest.i3 = getelementptr float*, float** %dest.i0, i32 3
  259. ; CHECK: %i0.i1 = extractelement <4 x i32> %i0, i32 1
  260. ; CHECK: %i0.i3 = extractelement <4 x i32> %i0, i32 3
  261. ; CHECK: %ptr0.i0 = extractelement <4 x float*> %ptr0, i32 0
  262. ; CHECK: %val.i0 = getelementptr float, float* %ptr0.i0, i32 100
  263. ; CHECK: %val.i1 = getelementptr float, float* %other, i32 %i0.i1
  264. ; CHECK: %ptr0.i2 = extractelement <4 x float*> %ptr0, i32 2
  265. ; CHECK: %val.i2 = getelementptr float, float* %ptr0.i2, i32 100
  266. ; CHECK: %ptr0.i3 = extractelement <4 x float*> %ptr0, i32 3
  267. ; CHECK: %val.i3 = getelementptr float, float* %ptr0.i3, i32 %i0.i3
  268. ; CHECK: store float* %val.i0, float** %dest.i0, align 32
  269. ; CHECK: store float* %val.i1, float** %dest.i1, align 8
  270. ; CHECK: store float* %val.i2, float** %dest.i2, align 16
  271. ; CHECK: store float* %val.i3, float** %dest.i3, align 8
  272. ; CHECK: ret void
  273. %i1 = insertelement <4 x i32> %i0, i32 100, i32 0
  274. %i2 = insertelement <4 x i32> %i1, i32 100, i32 2
  275. %ptr1 = insertelement <4 x float *> %ptr0, float *%other, i32 1
  276. %val = getelementptr float, <4 x float *> %ptr1, <4 x i32> %i2
  277. store <4 x float *> %val, <4 x float *> *%dest
  278. ret void
  279. }
  280. ; Test the handling of unaligned loads.
  281. define void @f9(<4 x float> *%dest, <4 x float> *%src) {
  282. ; CHECK: @f9(
  283. ; CHECK: %dest.i0 = bitcast <4 x float>* %dest to float*
  284. ; CHECK: %dest.i1 = getelementptr float, float* %dest.i0, i32 1
  285. ; CHECK: %dest.i2 = getelementptr float, float* %dest.i0, i32 2
  286. ; CHECK: %dest.i3 = getelementptr float, float* %dest.i0, i32 3
  287. ; CHECK: %src.i0 = bitcast <4 x float>* %src to float*
  288. ; CHECK: %val.i0 = load float, float* %src.i0, align 4
  289. ; CHECK: %src.i1 = getelementptr float, float* %src.i0, i32 1
  290. ; CHECK: %val.i1 = load float, float* %src.i1, align 4
  291. ; CHECK: %src.i2 = getelementptr float, float* %src.i0, i32 2
  292. ; CHECK: %val.i2 = load float, float* %src.i2, align 4
  293. ; CHECK: %src.i3 = getelementptr float, float* %src.i0, i32 3
  294. ; CHECK: %val.i3 = load float, float* %src.i3, align 4
  295. ; CHECK: store float %val.i0, float* %dest.i0, align 8
  296. ; CHECK: store float %val.i1, float* %dest.i1, align 4
  297. ; CHECK: store float %val.i2, float* %dest.i2, align 8
  298. ; CHECK: store float %val.i3, float* %dest.i3, align 4
  299. ; CHECK: ret void
  300. %val = load <4 x float> , <4 x float> *%src, align 4
  301. store <4 x float> %val, <4 x float> *%dest, align 8
  302. ret void
  303. }
  304. ; ...and again with subelement alignment.
  305. define void @f10(<4 x float> *%dest, <4 x float> *%src) {
  306. ; CHECK: @f10(
  307. ; CHECK: %dest.i0 = bitcast <4 x float>* %dest to float*
  308. ; CHECK: %dest.i1 = getelementptr float, float* %dest.i0, i32 1
  309. ; CHECK: %dest.i2 = getelementptr float, float* %dest.i0, i32 2
  310. ; CHECK: %dest.i3 = getelementptr float, float* %dest.i0, i32 3
  311. ; CHECK: %src.i0 = bitcast <4 x float>* %src to float*
  312. ; CHECK: %val.i0 = load float, float* %src.i0, align 1
  313. ; CHECK: %src.i1 = getelementptr float, float* %src.i0, i32 1
  314. ; CHECK: %val.i1 = load float, float* %src.i1, align 1
  315. ; CHECK: %src.i2 = getelementptr float, float* %src.i0, i32 2
  316. ; CHECK: %val.i2 = load float, float* %src.i2, align 1
  317. ; CHECK: %src.i3 = getelementptr float, float* %src.i0, i32 3
  318. ; CHECK: %val.i3 = load float, float* %src.i3, align 1
  319. ; CHECK: store float %val.i0, float* %dest.i0, align 2
  320. ; CHECK: store float %val.i1, float* %dest.i1, align 2
  321. ; CHECK: store float %val.i2, float* %dest.i2, align 2
  322. ; CHECK: store float %val.i3, float* %dest.i3, align 2
  323. ; CHECK: ret void
  324. %val = load <4 x float> , <4 x float> *%src, align 1
  325. store <4 x float> %val, <4 x float> *%dest, align 2
  326. ret void
  327. }
  328. ; Test that sub-byte loads aren't scalarized.
  329. define void @f11(<32 x i1> *%dest, <32 x i1> *%src0) {
  330. ; CHECK: @f11(
  331. ; CHECK: %val0 = load <32 x i1>, <32 x i1>* %src0
  332. ; CHECK: %val1 = load <32 x i1>, <32 x i1>* %src1
  333. ; CHECK: store <32 x i1> %and, <32 x i1>* %dest
  334. ; CHECK: ret void
  335. %src1 = getelementptr <32 x i1>, <32 x i1> *%src0, i32 1
  336. %val0 = load <32 x i1> , <32 x i1> *%src0
  337. %val1 = load <32 x i1> , <32 x i1> *%src1
  338. %and = and <32 x i1> %val0, %val1
  339. store <32 x i1> %and, <32 x i1> *%dest
  340. ret void
  341. }
  342. ; Test that variable inserts aren't scalarized.
  343. define void @f12(<4 x i32> *%dest, <4 x i32> *%src, i32 %index) {
  344. ; CHECK: @f12(
  345. ; CHECK: %val1 = insertelement <4 x i32> %val0, i32 1, i32 %index
  346. ; CHECK-DAG: %val1.i0 = extractelement <4 x i32> %val1, i32 0
  347. ; CHECK-DAG: %val1.i1 = extractelement <4 x i32> %val1, i32 1
  348. ; CHECK-DAG: %val1.i2 = extractelement <4 x i32> %val1, i32 2
  349. ; CHECK-DAG: %val1.i3 = extractelement <4 x i32> %val1, i32 3
  350. ; CHECK-DAG: %val2.i0 = shl i32 1, %val1.i0
  351. ; CHECK-DAG: %val2.i1 = shl i32 2, %val1.i1
  352. ; CHECK-DAG: %val2.i2 = shl i32 3, %val1.i2
  353. ; CHECK-DAG: %val2.i3 = shl i32 4, %val1.i3
  354. ; CHECK: ret void
  355. %val0 = load <4 x i32> , <4 x i32> *%src
  356. %val1 = insertelement <4 x i32> %val0, i32 1, i32 %index
  357. %val2 = shl <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %val1
  358. store <4 x i32> %val2, <4 x i32> *%dest
  359. ret void
  360. }
  361. ; Test vector GEPs with more than one index.
  362. define void @f13(<4 x float *> *%dest, <4 x [4 x float] *> %ptr, <4 x i32> %i,
  363. float *%other) {
  364. ; CHECK-LABEL: @f13(
  365. ; CHECK: %dest.i0 = bitcast <4 x float*>* %dest to float**
  366. ; CHECK: %dest.i1 = getelementptr float*, float** %dest.i0, i32 1
  367. ; CHECK: %dest.i2 = getelementptr float*, float** %dest.i0, i32 2
  368. ; CHECK: %dest.i3 = getelementptr float*, float** %dest.i0, i32 3
  369. ; CHECK: %i.i0 = extractelement <4 x i32> %i, i32 0
  370. ; CHECK: %ptr.i0 = extractelement <4 x [4 x float]*> %ptr, i32 0
  371. ; CHECK: %val.i0 = getelementptr inbounds [4 x float], [4 x float]* %ptr.i0, i32 0, i32 %i.i0
  372. ; CHECK: %i.i1 = extractelement <4 x i32> %i, i32 1
  373. ; CHECK: %ptr.i1 = extractelement <4 x [4 x float]*> %ptr, i32 1
  374. ; CHECK: %val.i1 = getelementptr inbounds [4 x float], [4 x float]* %ptr.i1, i32 1, i32 %i.i1
  375. ; CHECK: %i.i2 = extractelement <4 x i32> %i, i32 2
  376. ; CHECK: %ptr.i2 = extractelement <4 x [4 x float]*> %ptr, i32 2
  377. ; CHECK: %val.i2 = getelementptr inbounds [4 x float], [4 x float]* %ptr.i2, i32 2, i32 %i.i2
  378. ; CHECK: %i.i3 = extractelement <4 x i32> %i, i32 3
  379. ; CHECK: %ptr.i3 = extractelement <4 x [4 x float]*> %ptr, i32 3
  380. ; CHECK: %val.i3 = getelementptr inbounds [4 x float], [4 x float]* %ptr.i3, i32 3, i32 %i.i3
  381. ; CHECK: store float* %val.i0, float** %dest.i0, align 32
  382. ; CHECK: store float* %val.i1, float** %dest.i1, align 8
  383. ; CHECK: store float* %val.i2, float** %dest.i2, align 16
  384. ; CHECK: store float* %val.i3, float** %dest.i3, align 8
  385. ; CHECK: ret void
  386. %val = getelementptr inbounds [4 x float], <4 x [4 x float] *> %ptr,
  387. <4 x i32> <i32 0, i32 1, i32 2, i32 3>,
  388. <4 x i32> %i
  389. store <4 x float *> %val, <4 x float *> *%dest
  390. ret void
  391. }
  392. ; Test combinations of vector and non-vector PHIs.
  393. define <4 x float> @f14(<4 x float> %acc, i32 %count) {
  394. ; CHECK-LABEL: @f14(
  395. ; CHECK: %this_acc.i0 = phi float [ %acc.i0, %entry ], [ %next_acc.i0, %loop ]
  396. ; CHECK: %this_acc.i1 = phi float [ %acc.i1, %entry ], [ %next_acc.i1, %loop ]
  397. ; CHECK: %this_acc.i2 = phi float [ %acc.i2, %entry ], [ %next_acc.i2, %loop ]
  398. ; CHECK: %this_acc.i3 = phi float [ %acc.i3, %entry ], [ %next_acc.i3, %loop ]
  399. ; CHECK: %this_count = phi i32 [ %count, %entry ], [ %next_count, %loop ]
  400. ; CHECK: %this_acc.upto0 = insertelement <4 x float> undef, float %this_acc.i0, i32 0
  401. ; CHECK: %this_acc.upto1 = insertelement <4 x float> %this_acc.upto0, float %this_acc.i1, i32 1
  402. ; CHECK: %this_acc.upto2 = insertelement <4 x float> %this_acc.upto1, float %this_acc.i2, i32 2
  403. ; CHECK: %this_acc = insertelement <4 x float> %this_acc.upto2, float %this_acc.i3, i32 3
  404. ; CHECK: ret <4 x float> %next_acc
  405. entry:
  406. br label %loop
  407. loop:
  408. %this_acc = phi <4 x float> [ %acc, %entry ], [ %next_acc, %loop ]
  409. %this_count = phi i32 [ %count, %entry ], [ %next_count, %loop ]
  410. %foo = call <4 x float> @ext(<4 x float> %this_acc)
  411. %next_acc = fadd <4 x float> %this_acc, %foo
  412. %next_count = sub i32 %this_count, 1
  413. %cmp = icmp eq i32 %next_count, 0
  414. br i1 %cmp, label %loop, label %exit
  415. exit:
  416. ret <4 x float> %next_acc
  417. }
  418. !0 = !{ !"root" }
  419. !1 = !{ !"set1", !0 }
  420. !2 = !{ !"set2", !0 }
  421. !3 = !{ !3 }
  422. !4 = !{ float 4.0 }
  423. !5 = !{ i64 0, i64 8, null }