operandorder.ll 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344
  1. ; RUN: opt < %s -basicaa -slp-vectorizer -slp-threshold=-100 -instcombine -dce -S -mtriple=i386-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
  2. target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128"
  3. ; Make sure we order the operands of commutative operations so that we get
  4. ; bigger vectorizable trees.
  5. ; CHECK-LABEL: shuffle_operands1
  6. ; CHECK: load <2 x double>
  7. ; CHECK: fadd <2 x double>
  8. define void @shuffle_operands1(double * noalias %from, double * noalias %to,
  9. double %v1, double %v2) {
  10. %from_1 = getelementptr double, double *%from, i64 1
  11. %v0_1 = load double , double * %from
  12. %v0_2 = load double , double * %from_1
  13. %v1_1 = fadd double %v0_1, %v1
  14. %v1_2 = fadd double %v2, %v0_2
  15. %to_2 = getelementptr double, double * %to, i64 1
  16. store double %v1_1, double *%to
  17. store double %v1_2, double *%to_2
  18. ret void
  19. }
  20. ; CHECK-LABEL: shuffle_preserve_broadcast
  21. ; CHECK: %[[BCAST:[a-z0-9]+]] = insertelement <2 x double> undef, double %v0_1
  22. ; CHECK: = insertelement <2 x double> %[[BCAST]], double %v0_1
  23. define void @shuffle_preserve_broadcast(double * noalias %from,
  24. double * noalias %to,
  25. double %v1, double %v2) {
  26. entry:
  27. br label %lp
  28. lp:
  29. %p = phi double [ 1.000000e+00, %lp ], [ 0.000000e+00, %entry ]
  30. %from_1 = getelementptr double, double *%from, i64 1
  31. %v0_1 = load double , double * %from
  32. %v0_2 = load double , double * %from_1
  33. %v1_1 = fadd double %v0_1, %p
  34. %v1_2 = fadd double %v0_1, %v0_2
  35. %to_2 = getelementptr double, double * %to, i64 1
  36. store double %v1_1, double *%to
  37. store double %v1_2, double *%to_2
  38. br i1 undef, label %lp, label %ext
  39. ext:
  40. ret void
  41. }
  42. ; CHECK-LABEL: shuffle_preserve_broadcast2
  43. ; CHECK: %[[BCAST:[a-z0-9]+]] = insertelement <2 x double> undef, double %v0_1
  44. ; CHECK: = insertelement <2 x double> %[[BCAST]], double %v0_1
  45. define void @shuffle_preserve_broadcast2(double * noalias %from,
  46. double * noalias %to,
  47. double %v1, double %v2) {
  48. entry:
  49. br label %lp
  50. lp:
  51. %p = phi double [ 1.000000e+00, %lp ], [ 0.000000e+00, %entry ]
  52. %from_1 = getelementptr double, double *%from, i64 1
  53. %v0_1 = load double , double * %from
  54. %v0_2 = load double , double * %from_1
  55. %v1_1 = fadd double %p, %v0_1
  56. %v1_2 = fadd double %v0_2, %v0_1
  57. %to_2 = getelementptr double, double * %to, i64 1
  58. store double %v1_1, double *%to
  59. store double %v1_2, double *%to_2
  60. br i1 undef, label %lp, label %ext
  61. ext:
  62. ret void
  63. }
  64. ; CHECK-LABEL: shuffle_preserve_broadcast3
  65. ; CHECK: %[[BCAST:[a-z0-9]+]] = insertelement <2 x double> undef, double %v0_1
  66. ; CHECK: = insertelement <2 x double> %[[BCAST]], double %v0_1
  67. define void @shuffle_preserve_broadcast3(double * noalias %from,
  68. double * noalias %to,
  69. double %v1, double %v2) {
  70. entry:
  71. br label %lp
  72. lp:
  73. %p = phi double [ 1.000000e+00, %lp ], [ 0.000000e+00, %entry ]
  74. %from_1 = getelementptr double, double *%from, i64 1
  75. %v0_1 = load double , double * %from
  76. %v0_2 = load double , double * %from_1
  77. %v1_1 = fadd double %p, %v0_1
  78. %v1_2 = fadd double %v0_1, %v0_2
  79. %to_2 = getelementptr double, double * %to, i64 1
  80. store double %v1_1, double *%to
  81. store double %v1_2, double *%to_2
  82. br i1 undef, label %lp, label %ext
  83. ext:
  84. ret void
  85. }
  86. ; CHECK-LABEL: shuffle_preserve_broadcast4
  87. ; CHECK: %[[BCAST:[a-z0-9]+]] = insertelement <2 x double> undef, double %v0_1
  88. ; CHECK: = insertelement <2 x double> %[[BCAST]], double %v0_1
  89. define void @shuffle_preserve_broadcast4(double * noalias %from,
  90. double * noalias %to,
  91. double %v1, double %v2) {
  92. entry:
  93. br label %lp
  94. lp:
  95. %p = phi double [ 1.000000e+00, %lp ], [ 0.000000e+00, %entry ]
  96. %from_1 = getelementptr double, double *%from, i64 1
  97. %v0_1 = load double , double * %from
  98. %v0_2 = load double , double * %from_1
  99. %v1_1 = fadd double %v0_2, %v0_1
  100. %v1_2 = fadd double %p, %v0_1
  101. %to_2 = getelementptr double, double * %to, i64 1
  102. store double %v1_1, double *%to
  103. store double %v1_2, double *%to_2
  104. br i1 undef, label %lp, label %ext
  105. ext:
  106. ret void
  107. }
  108. ; CHECK-LABEL: shuffle_preserve_broadcast5
  109. ; CHECK: %[[BCAST:[a-z0-9]+]] = insertelement <2 x double> undef, double %v0_1
  110. ; CHECK: = insertelement <2 x double> %[[BCAST]], double %v0_1
  111. define void @shuffle_preserve_broadcast5(double * noalias %from,
  112. double * noalias %to,
  113. double %v1, double %v2) {
  114. entry:
  115. br label %lp
  116. lp:
  117. %p = phi double [ 1.000000e+00, %lp ], [ 0.000000e+00, %entry ]
  118. %from_1 = getelementptr double, double *%from, i64 1
  119. %v0_1 = load double , double * %from
  120. %v0_2 = load double , double * %from_1
  121. %v1_1 = fadd double %v0_1, %v0_2
  122. %v1_2 = fadd double %p, %v0_1
  123. %to_2 = getelementptr double, double * %to, i64 1
  124. store double %v1_1, double *%to
  125. store double %v1_2, double *%to_2
  126. br i1 undef, label %lp, label %ext
  127. ext:
  128. ret void
  129. }
  130. ; CHECK-LABEL: shuffle_preserve_broadcast6
  131. ; CHECK: %[[BCAST:[a-z0-9]+]] = insertelement <2 x double> undef, double %v0_1
  132. ; CHECK: = insertelement <2 x double> %[[BCAST]], double %v0_1
  133. define void @shuffle_preserve_broadcast6(double * noalias %from,
  134. double * noalias %to,
  135. double %v1, double %v2) {
  136. entry:
  137. br label %lp
  138. lp:
  139. %p = phi double [ 1.000000e+00, %lp ], [ 0.000000e+00, %entry ]
  140. %from_1 = getelementptr double, double *%from, i64 1
  141. %v0_1 = load double , double * %from
  142. %v0_2 = load double , double * %from_1
  143. %v1_1 = fadd double %v0_1, %v0_2
  144. %v1_2 = fadd double %v0_1, %p
  145. %to_2 = getelementptr double, double * %to, i64 1
  146. store double %v1_1, double *%to
  147. store double %v1_2, double *%to_2
  148. br i1 undef, label %lp, label %ext
  149. ext:
  150. ret void
  151. }
  152. ; Make sure we don't scramble operands when we reorder them and destroy
  153. ; 'good' source order.
  154. ; CHECK-LABEL: good_load_order
  155. ; CHECK: %[[V1:[0-9]+]] = load <4 x float>, <4 x float>*
  156. ; CHECK: %[[V2:[0-9]+]] = insertelement <4 x float> undef, float %1, i32 0
  157. ; CHECK: %[[V3:[0-9]+]] = shufflevector <4 x float> %[[V2]], <4 x float> %[[V1]], <4 x i32> <i32 0, i32 4, i32 5, i32 6>
  158. ; CHECK: = fmul <4 x float> %[[V1]], %[[V3]]
  159. @a = common global [32000 x float] zeroinitializer, align 16
  160. define void @good_load_order() {
  161. entry:
  162. br label %for.cond1.preheader
  163. for.cond1.preheader:
  164. %0 = load float, float* getelementptr inbounds ([32000 x float], [32000 x float]* @a, i64 0, i64 0), align 16
  165. br label %for.body3
  166. for.body3:
  167. %1 = phi float [ %0, %for.cond1.preheader ], [ %10, %for.body3 ]
  168. %indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next, %for.body3 ]
  169. %2 = add nsw i64 %indvars.iv, 1
  170. %arrayidx = getelementptr inbounds [32000 x float], [32000 x float]* @a, i64 0, i64 %2
  171. %3 = load float, float* %arrayidx, align 4
  172. %arrayidx5 = getelementptr inbounds [32000 x float], [32000 x float]* @a, i64 0, i64 %indvars.iv
  173. %mul6 = fmul float %3, %1
  174. store float %mul6, float* %arrayidx5, align 4
  175. %4 = add nsw i64 %indvars.iv, 2
  176. %arrayidx11 = getelementptr inbounds [32000 x float], [32000 x float]* @a, i64 0, i64 %4
  177. %5 = load float, float* %arrayidx11, align 4
  178. %mul15 = fmul float %5, %3
  179. store float %mul15, float* %arrayidx, align 4
  180. %6 = add nsw i64 %indvars.iv, 3
  181. %arrayidx21 = getelementptr inbounds [32000 x float], [32000 x float]* @a, i64 0, i64 %6
  182. %7 = load float, float* %arrayidx21, align 4
  183. %mul25 = fmul float %7, %5
  184. store float %mul25, float* %arrayidx11, align 4
  185. %8 = add nsw i64 %indvars.iv, 4
  186. %arrayidx31 = getelementptr inbounds [32000 x float], [32000 x float]* @a, i64 0, i64 %8
  187. %9 = load float, float* %arrayidx31, align 4
  188. %mul35 = fmul float %9, %7
  189. store float %mul35, float* %arrayidx21, align 4
  190. %indvars.iv.next = add nuw nsw i64 %indvars.iv, 5
  191. %arrayidx41 = getelementptr inbounds [32000 x float], [32000 x float]* @a, i64 0, i64 %indvars.iv.next
  192. %10 = load float, float* %arrayidx41, align 4
  193. %mul45 = fmul float %10, %9
  194. store float %mul45, float* %arrayidx31, align 4
  195. %11 = trunc i64 %indvars.iv.next to i32
  196. %cmp2 = icmp slt i32 %11, 31995
  197. br i1 %cmp2, label %for.body3, label %for.end
  198. for.end:
  199. ret void
  200. }
  201. ; Check vectorization of following code for double data type-
  202. ; c[0] = a[0]+b[0];
  203. ; c[1] = b[1]+a[1]; // swapped b[1] and a[1]
  204. ; CHECK-LABEL: load_reorder_double
  205. ; CHECK: load <2 x double>, <2 x double>*
  206. ; CHECK: fadd <2 x double>
  207. define void @load_reorder_double(double* nocapture %c, double* noalias nocapture readonly %a, double* noalias nocapture readonly %b){
  208. %1 = load double, double* %a
  209. %2 = load double, double* %b
  210. %3 = fadd double %1, %2
  211. store double %3, double* %c
  212. %4 = getelementptr inbounds double, double* %b, i64 1
  213. %5 = load double, double* %4
  214. %6 = getelementptr inbounds double, double* %a, i64 1
  215. %7 = load double, double* %6
  216. %8 = fadd double %5, %7
  217. %9 = getelementptr inbounds double, double* %c, i64 1
  218. store double %8, double* %9
  219. ret void
  220. }
  221. ; Check vectorization of following code for float data type-
  222. ; c[0] = a[0]+b[0];
  223. ; c[1] = b[1]+a[1]; // swapped b[1] and a[1]
  224. ; c[2] = a[2]+b[2];
  225. ; c[3] = a[3]+b[3];
  226. ; CHECK-LABEL: load_reorder_float
  227. ; CHECK: load <4 x float>, <4 x float>*
  228. ; CHECK: fadd <4 x float>
  229. define void @load_reorder_float(float* nocapture %c, float* noalias nocapture readonly %a, float* noalias nocapture readonly %b){
  230. %1 = load float, float* %a
  231. %2 = load float, float* %b
  232. %3 = fadd float %1, %2
  233. store float %3, float* %c
  234. %4 = getelementptr inbounds float, float* %b, i64 1
  235. %5 = load float, float* %4
  236. %6 = getelementptr inbounds float, float* %a, i64 1
  237. %7 = load float, float* %6
  238. %8 = fadd float %5, %7
  239. %9 = getelementptr inbounds float, float* %c, i64 1
  240. store float %8, float* %9
  241. %10 = getelementptr inbounds float, float* %a, i64 2
  242. %11 = load float, float* %10
  243. %12 = getelementptr inbounds float, float* %b, i64 2
  244. %13 = load float, float* %12
  245. %14 = fadd float %11, %13
  246. %15 = getelementptr inbounds float, float* %c, i64 2
  247. store float %14, float* %15
  248. %16 = getelementptr inbounds float, float* %a, i64 3
  249. %17 = load float, float* %16
  250. %18 = getelementptr inbounds float, float* %b, i64 3
  251. %19 = load float, float* %18
  252. %20 = fadd float %17, %19
  253. %21 = getelementptr inbounds float, float* %c, i64 3
  254. store float %20, float* %21
  255. ret void
  256. }
  257. ; Check we properly reorder the below code so that it gets vectorized optimally-
  258. ; a[0] = (b[0]+c[0])+d[0];
  259. ; a[1] = d[1]+(b[1]+c[1]);
  260. ; a[2] = (b[2]+c[2])+d[2];
  261. ; a[3] = (b[3]+c[3])+d[3];
  262. ; CHECK-LABEL: opcode_reorder
  263. ; CHECK: load <4 x float>, <4 x float>*
  264. ; CHECK: fadd <4 x float>
  265. define void @opcode_reorder(float* noalias nocapture %a, float* noalias nocapture readonly %b,
  266. float* noalias nocapture readonly %c,float* noalias nocapture readonly %d){
  267. %1 = load float, float* %b
  268. %2 = load float, float* %c
  269. %3 = fadd float %1, %2
  270. %4 = load float, float* %d
  271. %5 = fadd float %3, %4
  272. store float %5, float* %a
  273. %6 = getelementptr inbounds float, float* %d, i64 1
  274. %7 = load float, float* %6
  275. %8 = getelementptr inbounds float, float* %b, i64 1
  276. %9 = load float, float* %8
  277. %10 = getelementptr inbounds float, float* %c, i64 1
  278. %11 = load float, float* %10
  279. %12 = fadd float %9, %11
  280. %13 = fadd float %7, %12
  281. %14 = getelementptr inbounds float, float* %a, i64 1
  282. store float %13, float* %14
  283. %15 = getelementptr inbounds float, float* %b, i64 2
  284. %16 = load float, float* %15
  285. %17 = getelementptr inbounds float, float* %c, i64 2
  286. %18 = load float, float* %17
  287. %19 = fadd float %16, %18
  288. %20 = getelementptr inbounds float, float* %d, i64 2
  289. %21 = load float, float* %20
  290. %22 = fadd float %19, %21
  291. %23 = getelementptr inbounds float, float* %a, i64 2
  292. store float %22, float* %23
  293. %24 = getelementptr inbounds float, float* %b, i64 3
  294. %25 = load float, float* %24
  295. %26 = getelementptr inbounds float, float* %c, i64 3
  296. %27 = load float, float* %26
  297. %28 = fadd float %25, %27
  298. %29 = getelementptr inbounds float, float* %d, i64 3
  299. %30 = load float, float* %29
  300. %31 = fadd float %28, %30
  301. %32 = getelementptr inbounds float, float* %a, i64 3
  302. store float %31, float* %32
  303. ret void
  304. }