cse.ll 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248
  1. ; RUN: opt < %s -basicaa -slp-vectorizer -S -mtriple=i386-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
  2. target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128"
  3. target triple = "i386-apple-macosx10.8.0"
  4. ;int test(double *G) {
  5. ; G[0] = 1+G[5]*4;
  6. ; G[1] = 6+G[6]*3;
  7. ; G[2] = 7+G[5]*4;
  8. ; G[3] = 8+G[6]*4;
  9. ;}
  10. ;CHECK-LABEL: @test(
  11. ;CHECK: load <2 x double>
  12. ;CHECK: fadd <4 x double>
  13. ;CHECK: store <4 x double>
  14. ;CHECK: ret i32
  15. define i32 @test(double* nocapture %G) {
  16. entry:
  17. %arrayidx = getelementptr inbounds double, double* %G, i64 5
  18. %0 = load double, double* %arrayidx, align 8
  19. %mul = fmul double %0, 4.000000e+00
  20. %add = fadd double %mul, 1.000000e+00
  21. store double %add, double* %G, align 8
  22. %arrayidx2 = getelementptr inbounds double, double* %G, i64 6
  23. %1 = load double, double* %arrayidx2, align 8
  24. %mul3 = fmul double %1, 3.000000e+00
  25. %add4 = fadd double %mul3, 6.000000e+00
  26. %arrayidx5 = getelementptr inbounds double, double* %G, i64 1
  27. store double %add4, double* %arrayidx5, align 8
  28. %add8 = fadd double %mul, 7.000000e+00
  29. %arrayidx9 = getelementptr inbounds double, double* %G, i64 2
  30. store double %add8, double* %arrayidx9, align 8
  31. %mul11 = fmul double %1, 4.000000e+00
  32. %add12 = fadd double %mul11, 8.000000e+00
  33. %arrayidx13 = getelementptr inbounds double, double* %G, i64 3
  34. store double %add12, double* %arrayidx13, align 8
  35. ret i32 undef
  36. }
  37. ;int foo(double *A, int n) {
  38. ; A[0] = A[0] * 7.9 * n + 6.0;
  39. ; A[1] = A[1] * 7.7 * n + 2.0;
  40. ; A[2] = A[2] * 7.6 * n + 3.0;
  41. ; A[3] = A[3] * 7.4 * n + 4.0;
  42. ;}
  43. ; CHECK-LABEL: @foo(
  44. ; CHECK: load <4 x double>
  45. ; CHECK: fmul <4 x double>
  46. ; CHECK: fmul <4 x double>
  47. ; CHECK: fadd <4 x double>
  48. ; CHECK: store <4 x double>
  49. define i32 @foo(double* nocapture %A, i32 %n) {
  50. entry:
  51. %0 = load double, double* %A, align 8
  52. %mul = fmul double %0, 7.900000e+00
  53. %conv = sitofp i32 %n to double
  54. %mul1 = fmul double %conv, %mul
  55. %add = fadd double %mul1, 6.000000e+00
  56. store double %add, double* %A, align 8
  57. %arrayidx3 = getelementptr inbounds double, double* %A, i64 1
  58. %1 = load double, double* %arrayidx3, align 8
  59. %mul4 = fmul double %1, 7.700000e+00
  60. %mul6 = fmul double %conv, %mul4
  61. %add7 = fadd double %mul6, 2.000000e+00
  62. store double %add7, double* %arrayidx3, align 8
  63. %arrayidx9 = getelementptr inbounds double, double* %A, i64 2
  64. %2 = load double, double* %arrayidx9, align 8
  65. %mul10 = fmul double %2, 7.600000e+00
  66. %mul12 = fmul double %conv, %mul10
  67. %add13 = fadd double %mul12, 3.000000e+00
  68. store double %add13, double* %arrayidx9, align 8
  69. %arrayidx15 = getelementptr inbounds double, double* %A, i64 3
  70. %3 = load double, double* %arrayidx15, align 8
  71. %mul16 = fmul double %3, 7.400000e+00
  72. %mul18 = fmul double %conv, %mul16
  73. %add19 = fadd double %mul18, 4.000000e+00
  74. store double %add19, double* %arrayidx15, align 8
  75. ret i32 undef
  76. }
  77. ; int test2(double *G, int k) {
  78. ; if (k) {
  79. ; G[0] = 1+G[5]*4;
  80. ; G[1] = 6+G[6]*3;
  81. ; } else {
  82. ; G[2] = 7+G[5]*4;
  83. ; G[3] = 8+G[6]*3;
  84. ; }
  85. ; }
  86. ; We can't merge the gather sequences because one does not dominate the other.
  87. ; CHECK-LABEL: @test2(
  88. ; CHECK: insertelement
  89. ; CHECK: insertelement
  90. ; CHECK: insertelement
  91. ; CHECK: insertelement
  92. ; CHECK: ret
  93. define i32 @test2(double* nocapture %G, i32 %k) {
  94. %1 = icmp eq i32 %k, 0
  95. %2 = getelementptr inbounds double, double* %G, i64 5
  96. %3 = load double, double* %2, align 8
  97. %4 = fmul double %3, 4.000000e+00
  98. br i1 %1, label %12, label %5
  99. ; <label>:5 ; preds = %0
  100. %6 = fadd double %4, 1.000000e+00
  101. store double %6, double* %G, align 8
  102. %7 = getelementptr inbounds double, double* %G, i64 6
  103. %8 = load double, double* %7, align 8
  104. %9 = fmul double %8, 3.000000e+00
  105. %10 = fadd double %9, 6.000000e+00
  106. %11 = getelementptr inbounds double, double* %G, i64 1
  107. store double %10, double* %11, align 8
  108. br label %20
  109. ; <label>:12 ; preds = %0
  110. %13 = fadd double %4, 7.000000e+00
  111. %14 = getelementptr inbounds double, double* %G, i64 2
  112. store double %13, double* %14, align 8
  113. %15 = getelementptr inbounds double, double* %G, i64 6
  114. %16 = load double, double* %15, align 8
  115. %17 = fmul double %16, 3.000000e+00
  116. %18 = fadd double %17, 8.000000e+00
  117. %19 = getelementptr inbounds double, double* %G, i64 3
  118. store double %18, double* %19, align 8
  119. br label %20
  120. ; <label>:20 ; preds = %12, %5
  121. ret i32 undef
  122. }
  123. ;int foo(double *A, int n) {
  124. ; A[0] = A[0] * 7.9 * n + 6.0;
  125. ; A[1] = A[1] * 7.9 * n + 6.0;
  126. ; A[2] = A[2] * 7.9 * n + 6.0;
  127. ; A[3] = A[3] * 7.9 * n + 6.0;
  128. ;}
  129. ; CHECK-LABEL: @foo4(
  130. ; CHECK: load <4 x double>
  131. ; CHECK: fmul <4 x double>
  132. ; CHECK: fmul <4 x double>
  133. ; CHECK: fadd <4 x double>
  134. ; CHECK: store <4 x double>
  135. define i32 @foo4(double* nocapture %A, i32 %n) {
  136. entry:
  137. %0 = load double, double* %A, align 8
  138. %mul = fmul double %0, 7.900000e+00
  139. %conv = sitofp i32 %n to double
  140. %mul1 = fmul double %conv, %mul
  141. %add = fadd double %mul1, 6.000000e+00
  142. store double %add, double* %A, align 8
  143. %arrayidx3 = getelementptr inbounds double, double* %A, i64 1
  144. %1 = load double, double* %arrayidx3, align 8
  145. %mul4 = fmul double %1, 7.900000e+00
  146. %mul6 = fmul double %conv, %mul4
  147. %add7 = fadd double %mul6, 6.000000e+00
  148. store double %add7, double* %arrayidx3, align 8
  149. %arrayidx9 = getelementptr inbounds double, double* %A, i64 2
  150. %2 = load double, double* %arrayidx9, align 8
  151. %mul10 = fmul double %2, 7.900000e+00
  152. %mul12 = fmul double %conv, %mul10
  153. %add13 = fadd double %mul12, 6.000000e+00
  154. store double %add13, double* %arrayidx9, align 8
  155. %arrayidx15 = getelementptr inbounds double, double* %A, i64 3
  156. %3 = load double, double* %arrayidx15, align 8
  157. %mul16 = fmul double %3, 7.900000e+00
  158. %mul18 = fmul double %conv, %mul16
  159. %add19 = fadd double %mul18, 6.000000e+00
  160. store double %add19, double* %arrayidx15, align 8
  161. ret i32 undef
  162. }
  163. ;int partial_mrg(double *A, int n) {
  164. ; A[0] = A[0] * n;
  165. ; A[1] = A[1] * n;
  166. ; if (n < 4) return 0;
  167. ; A[2] = A[2] * n;
  168. ; A[3] = A[3] * (n+4);
  169. ;}
  170. ;CHECK-LABEL: @partial_mrg(
  171. ;CHECK: insertelement <2 x double>
  172. ;CHECK: insertelement <2 x double>
  173. ;CHECK: insertelement <2 x double>
  174. ;CHECK-NOT: insertelement <2 x double>
  175. ;CHECK: ret
  176. define i32 @partial_mrg(double* nocapture %A, i32 %n) {
  177. entry:
  178. %0 = load double, double* %A, align 8
  179. %conv = sitofp i32 %n to double
  180. %mul = fmul double %conv, %0
  181. store double %mul, double* %A, align 8
  182. %arrayidx2 = getelementptr inbounds double, double* %A, i64 1
  183. %1 = load double, double* %arrayidx2, align 8
  184. %mul4 = fmul double %conv, %1
  185. store double %mul4, double* %arrayidx2, align 8
  186. %cmp = icmp slt i32 %n, 4
  187. br i1 %cmp, label %return, label %if.end
  188. if.end: ; preds = %entry
  189. %arrayidx7 = getelementptr inbounds double, double* %A, i64 2
  190. %2 = load double, double* %arrayidx7, align 8
  191. %mul9 = fmul double %conv, %2
  192. store double %mul9, double* %arrayidx7, align 8
  193. %arrayidx11 = getelementptr inbounds double, double* %A, i64 3
  194. %3 = load double, double* %arrayidx11, align 8
  195. %add = add nsw i32 %n, 4
  196. %conv12 = sitofp i32 %add to double
  197. %mul13 = fmul double %conv12, %3
  198. store double %mul13, double* %arrayidx11, align 8
  199. br label %return
  200. return: ; preds = %entry, %if.end
  201. ret i32 0
  202. }
  203. %class.B.53.55 = type { %class.A.52.54, double }
  204. %class.A.52.54 = type { double, double, double }
  205. @a = external global double, align 8
  206. define void @PR19646(%class.B.53.55* %this) {
  207. entry:
  208. br i1 undef, label %if.end13, label %if.end13
  209. sw.epilog7: ; No predecessors!
  210. %.in = getelementptr inbounds %class.B.53.55, %class.B.53.55* %this, i64 0, i32 0, i32 1
  211. %0 = load double, double* %.in, align 8
  212. %add = fadd double undef, 0.000000e+00
  213. %add6 = fadd double %add, %0
  214. %1 = load double, double* @a, align 8
  215. %add8 = fadd double %1, 0.000000e+00
  216. %_dy = getelementptr inbounds %class.B.53.55, %class.B.53.55* %this, i64 0, i32 0, i32 2
  217. %2 = load double, double* %_dy, align 8
  218. %add10 = fadd double %add8, %2
  219. br i1 undef, label %if.then12, label %if.end13
  220. if.then12: ; preds = %sw.epilog7
  221. %3 = load double, double* undef, align 8
  222. br label %if.end13
  223. if.end13: ; preds = %if.then12, %sw.epilog7, %entry
  224. %x.1 = phi double [ 0.000000e+00, %if.then12 ], [ %add6, %sw.epilog7 ], [ undef, %entry ], [ undef, %entry ]
  225. %b.0 = phi double [ %3, %if.then12 ], [ %add10, %sw.epilog7 ], [ undef, %entry], [ undef, %entry ]
  226. unreachable
  227. }