simple-int.ll 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506
  1. target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
  2. ; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-ignore-target-info -instcombine -gvn -S | FileCheck %s
  3. declare double @llvm.fma.f64(double, double, double)
  4. declare double @llvm.fmuladd.f64(double, double, double)
  5. declare double @llvm.cos.f64(double)
  6. declare double @llvm.powi.f64(double, i32)
  7. declare double @llvm.round.f64(double)
  8. declare double @llvm.copysign.f64(double, double)
  9. declare double @llvm.ceil.f64(double)
  10. declare double @llvm.nearbyint.f64(double)
  11. declare double @llvm.rint.f64(double)
  12. declare double @llvm.trunc.f64(double)
  13. declare double @llvm.floor.f64(double)
  14. declare double @llvm.fabs.f64(double)
  15. declare i64 @llvm.bswap.i64(i64)
  16. declare i64 @llvm.ctpop.i64(i64)
  17. declare i64 @llvm.ctlz.i64(i64, i1)
  18. declare i64 @llvm.cttz.i64(i64, i1)
  19. ; Basic depth-3 chain with fma
  20. define double @test1(double %A1, double %A2, double %B1, double %B2, double %C1, double %C2) {
  21. %X1 = fsub double %A1, %B1
  22. %X2 = fsub double %A2, %B2
  23. %Y1 = call double @llvm.fma.f64(double %X1, double %A1, double %C1)
  24. %Y2 = call double @llvm.fma.f64(double %X2, double %A2, double %C2)
  25. %Z1 = fadd double %Y1, %B1
  26. %Z2 = fadd double %Y2, %B2
  27. %R = fmul double %Z1, %Z2
  28. ret double %R
  29. ; CHECK-LABEL: @test1(
  30. ; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
  31. ; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
  32. ; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
  33. ; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
  34. ; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
  35. ; CHECK: %Y1.v.i2.1 = insertelement <2 x double> undef, double %C1, i32 0
  36. ; CHECK: %Y1.v.i2.2 = insertelement <2 x double> %Y1.v.i2.1, double %C2, i32 1
  37. ; CHECK: %Y1 = call <2 x double> @llvm.fma.v2f64(<2 x double> %X1, <2 x double> %X1.v.i0.2, <2 x double> %Y1.v.i2.2)
  38. ; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2
  39. ; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
  40. ; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
  41. ; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2
  42. ; CHECK: ret double %R
  43. }
  44. ; Basic depth-3 chain with fmuladd
  45. define double @test1a(double %A1, double %A2, double %B1, double %B2, double %C1, double %C2) {
  46. %X1 = fsub double %A1, %B1
  47. %X2 = fsub double %A2, %B2
  48. %Y1 = call double @llvm.fmuladd.f64(double %X1, double %A1, double %C1)
  49. %Y2 = call double @llvm.fmuladd.f64(double %X2, double %A2, double %C2)
  50. %Z1 = fadd double %Y1, %B1
  51. %Z2 = fadd double %Y2, %B2
  52. %R = fmul double %Z1, %Z2
  53. ret double %R
  54. ; CHECK-LABEL: @test1a(
  55. ; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
  56. ; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
  57. ; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
  58. ; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
  59. ; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
  60. ; CHECK: %Y1.v.i2.1 = insertelement <2 x double> undef, double %C1, i32 0
  61. ; CHECK: %Y1.v.i2.2 = insertelement <2 x double> %Y1.v.i2.1, double %C2, i32 1
  62. ; CHECK: %Y1 = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> %X1, <2 x double> %X1.v.i0.2, <2 x double> %Y1.v.i2.2)
  63. ; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2
  64. ; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
  65. ; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
  66. ; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2
  67. ; CHECK: ret double %R
  68. }
  69. ; Basic depth-3 chain with cos
  70. define double @test2(double %A1, double %A2, double %B1, double %B2) {
  71. %X1 = fsub double %A1, %B1
  72. %X2 = fsub double %A2, %B2
  73. %Y1 = call double @llvm.cos.f64(double %X1)
  74. %Y2 = call double @llvm.cos.f64(double %X2)
  75. %Z1 = fadd double %Y1, %B1
  76. %Z2 = fadd double %Y2, %B2
  77. %R = fmul double %Z1, %Z2
  78. ret double %R
  79. ; CHECK-LABEL: @test2(
  80. ; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
  81. ; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
  82. ; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
  83. ; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
  84. ; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
  85. ; CHECK: %Y1 = call <2 x double> @llvm.cos.v2f64(<2 x double> %X1)
  86. ; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2
  87. ; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
  88. ; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
  89. ; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2
  90. ; CHECK: ret double %R
  91. }
  92. ; Basic depth-3 chain with powi
  93. define double @test3(double %A1, double %A2, double %B1, double %B2, i32 %P) {
  94. %X1 = fsub double %A1, %B1
  95. %X2 = fsub double %A2, %B2
  96. %Y1 = call double @llvm.powi.f64(double %X1, i32 %P)
  97. %Y2 = call double @llvm.powi.f64(double %X2, i32 %P)
  98. %Z1 = fadd double %Y1, %B1
  99. %Z2 = fadd double %Y2, %B2
  100. %R = fmul double %Z1, %Z2
  101. ret double %R
  102. ; CHECK-LABEL: @test3(
  103. ; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
  104. ; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
  105. ; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
  106. ; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
  107. ; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
  108. ; CHECK: %Y1 = call <2 x double> @llvm.powi.v2f64(<2 x double> %X1, i32 %P)
  109. ; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2
  110. ; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
  111. ; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
  112. ; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2
  113. ; CHECK: ret double %R
  114. }
  115. ; Basic depth-3 chain with powi (different powers: should not vectorize)
  116. define double @test4(double %A1, double %A2, double %B1, double %B2, i32 %P) {
  117. %X1 = fsub double %A1, %B1
  118. %X2 = fsub double %A2, %B2
  119. %P2 = add i32 %P, 1
  120. %Y1 = call double @llvm.powi.f64(double %X1, i32 %P)
  121. %Y2 = call double @llvm.powi.f64(double %X2, i32 %P2)
  122. %Z1 = fadd double %Y1, %B1
  123. %Z2 = fadd double %Y2, %B2
  124. %R = fmul double %Z1, %Z2
  125. ret double %R
  126. ; CHECK-LABEL: @test4(
  127. ; CHECK-NOT: <2 x double>
  128. ; CHECK: ret double %R
  129. }
  130. ; Basic depth-3 chain with round
  131. define double @testround(double %A1, double %A2, double %B1, double %B2) {
  132. %X1 = fsub double %A1, %B1
  133. %X2 = fsub double %A2, %B2
  134. %Y1 = call double @llvm.round.f64(double %X1)
  135. %Y2 = call double @llvm.round.f64(double %X2)
  136. %Z1 = fadd double %Y1, %B1
  137. %Z2 = fadd double %Y2, %B2
  138. %R = fmul double %Z1, %Z2
  139. ret double %R
  140. ; CHECK: @testround
  141. ; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
  142. ; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
  143. ; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
  144. ; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
  145. ; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
  146. ; CHECK: %Y1 = call <2 x double> @llvm.round.v2f64(<2 x double> %X1)
  147. ; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2
  148. ; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
  149. ; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
  150. ; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2
  151. ; CHECK: ret double %R
  152. }
  153. ; Basic depth-3 chain with copysign
  154. define double @testcopysign(double %A1, double %A2, double %B1, double %B2) {
  155. %X1 = fsub double %A1, %B1
  156. %X2 = fsub double %A2, %B2
  157. %Y1 = call double @llvm.copysign.f64(double %X1, double %A1)
  158. %Y2 = call double @llvm.copysign.f64(double %X2, double %A1)
  159. %Z1 = fadd double %Y1, %B1
  160. %Z2 = fadd double %Y2, %B2
  161. %R = fmul double %Z1, %Z2
  162. ret double %R
  163. ; CHECK: @testcopysign
  164. ; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
  165. ; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
  166. ; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
  167. ; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
  168. ; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
  169. ; CHECK: %Y1.v.i1.2 = insertelement <2 x double> %X1.v.i0.1, double %A1, i32 1
  170. ; CHECK: %Y1 = call <2 x double> @llvm.copysign.v2f64(<2 x double> %X1, <2 x double> %Y1.v.i1.2)
  171. ; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2
  172. ; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
  173. ; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
  174. ; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2
  175. ; CHECK: ret double %R
  176. }
  177. ; Basic depth-3 chain with ceil
  178. define double @testceil(double %A1, double %A2, double %B1, double %B2) {
  179. %X1 = fsub double %A1, %B1
  180. %X2 = fsub double %A2, %B2
  181. %Y1 = call double @llvm.ceil.f64(double %X1)
  182. %Y2 = call double @llvm.ceil.f64(double %X2)
  183. %Z1 = fadd double %Y1, %B1
  184. %Z2 = fadd double %Y2, %B2
  185. %R = fmul double %Z1, %Z2
  186. ret double %R
  187. ; CHECK: @testceil
  188. ; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
  189. ; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
  190. ; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
  191. ; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
  192. ; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
  193. ; CHECK: %Y1 = call <2 x double> @llvm.ceil.v2f64(<2 x double> %X1)
  194. ; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2
  195. ; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
  196. ; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
  197. ; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2
  198. ; CHECK: ret double %R
  199. }
  200. ; Basic depth-3 chain with nearbyint
  201. define double @testnearbyint(double %A1, double %A2, double %B1, double %B2) {
  202. %X1 = fsub double %A1, %B1
  203. %X2 = fsub double %A2, %B2
  204. %Y1 = call double @llvm.nearbyint.f64(double %X1)
  205. %Y2 = call double @llvm.nearbyint.f64(double %X2)
  206. %Z1 = fadd double %Y1, %B1
  207. %Z2 = fadd double %Y2, %B2
  208. %R = fmul double %Z1, %Z2
  209. ret double %R
  210. ; CHECK: @testnearbyint
  211. ; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
  212. ; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
  213. ; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
  214. ; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
  215. ; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
  216. ; CHECK: %Y1 = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %X1)
  217. ; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2
  218. ; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
  219. ; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
  220. ; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2
  221. ; CHECK: ret double %R
  222. }
  223. ; Basic depth-3 chain with rint
  224. define double @testrint(double %A1, double %A2, double %B1, double %B2) {
  225. %X1 = fsub double %A1, %B1
  226. %X2 = fsub double %A2, %B2
  227. %Y1 = call double @llvm.rint.f64(double %X1)
  228. %Y2 = call double @llvm.rint.f64(double %X2)
  229. %Z1 = fadd double %Y1, %B1
  230. %Z2 = fadd double %Y2, %B2
  231. %R = fmul double %Z1, %Z2
  232. ret double %R
  233. ; CHECK: @testrint
  234. ; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
  235. ; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
  236. ; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
  237. ; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
  238. ; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
  239. ; CHECK: %Y1 = call <2 x double> @llvm.rint.v2f64(<2 x double> %X1)
  240. ; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2
  241. ; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
  242. ; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
  243. ; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2
  244. ; CHECK: ret double %R
  245. }
  246. ; Basic depth-3 chain with trunc
  247. define double @testtrunc(double %A1, double %A2, double %B1, double %B2) {
  248. %X1 = fsub double %A1, %B1
  249. %X2 = fsub double %A2, %B2
  250. %Y1 = call double @llvm.trunc.f64(double %X1)
  251. %Y2 = call double @llvm.trunc.f64(double %X2)
  252. %Z1 = fadd double %Y1, %B1
  253. %Z2 = fadd double %Y2, %B2
  254. %R = fmul double %Z1, %Z2
  255. ret double %R
  256. ; CHECK: @testtrunc
  257. ; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
  258. ; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
  259. ; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
  260. ; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
  261. ; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
  262. ; CHECK: %Y1 = call <2 x double> @llvm.trunc.v2f64(<2 x double> %X1)
  263. ; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2
  264. ; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
  265. ; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
  266. ; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2
  267. ; CHECK: ret double %R
  268. }
  269. ; Basic depth-3 chain with floor
  270. define double @testfloor(double %A1, double %A2, double %B1, double %B2) {
  271. %X1 = fsub double %A1, %B1
  272. %X2 = fsub double %A2, %B2
  273. %Y1 = call double @llvm.floor.f64(double %X1)
  274. %Y2 = call double @llvm.floor.f64(double %X2)
  275. %Z1 = fadd double %Y1, %B1
  276. %Z2 = fadd double %Y2, %B2
  277. %R = fmul double %Z1, %Z2
  278. ret double %R
  279. ; CHECK: @testfloor
  280. ; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
  281. ; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
  282. ; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
  283. ; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
  284. ; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
  285. ; CHECK: %Y1 = call <2 x double> @llvm.floor.v2f64(<2 x double> %X1)
  286. ; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2
  287. ; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
  288. ; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
  289. ; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2
  290. ; CHECK: ret double %R
  291. }
  292. ; Basic depth-3 chain with fabs
  293. define double @testfabs(double %A1, double %A2, double %B1, double %B2) {
  294. %X1 = fsub double %A1, %B1
  295. %X2 = fsub double %A2, %B2
  296. %Y1 = call double @llvm.fabs.f64(double %X1)
  297. %Y2 = call double @llvm.fabs.f64(double %X2)
  298. %Z1 = fadd double %Y1, %B1
  299. %Z2 = fadd double %Y2, %B2
  300. %R = fmul double %Z1, %Z2
  301. ret double %R
  302. ; CHECK: @testfabs
  303. ; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
  304. ; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
  305. ; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
  306. ; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
  307. ; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
  308. ; CHECK: %Y1 = call <2 x double> @llvm.fabs.v2f64(<2 x double> %X1)
  309. ; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2
  310. ; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
  311. ; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
  312. ; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2
  313. ; CHECK: ret double %R
  314. }
  315. ; Basic depth-3 chain with bswap
  316. define i64 @testbswap(i64 %A1, i64 %A2, i64 %B1, i64 %B2) {
  317. %X1 = sub i64 %A1, %B1
  318. %X2 = sub i64 %A2, %B2
  319. %Y1 = call i64 @llvm.bswap.i64(i64 %X1)
  320. %Y2 = call i64 @llvm.bswap.i64(i64 %X2)
  321. %Z1 = add i64 %Y1, %B1
  322. %Z2 = add i64 %Y2, %B2
  323. %R = mul i64 %Z1, %Z2
  324. ret i64 %R
  325. ; CHECK: @testbswap
  326. ; CHECK: %X1.v.i1.1 = insertelement <2 x i64> undef, i64 %B1, i32 0
  327. ; CHECK: %X1.v.i1.2 = insertelement <2 x i64> %X1.v.i1.1, i64 %B2, i32 1
  328. ; CHECK: %X1.v.i0.1 = insertelement <2 x i64> undef, i64 %A1, i32 0
  329. ; CHECK: %X1.v.i0.2 = insertelement <2 x i64> %X1.v.i0.1, i64 %A2, i32 1
  330. ; CHECK: %X1 = sub <2 x i64> %X1.v.i0.2, %X1.v.i1.2
  331. ; CHECK: %Y1 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %X1)
  332. ; CHECK: %Z1 = add <2 x i64> %Y1, %X1.v.i1.2
  333. ; CHECK: %Z1.v.r1 = extractelement <2 x i64> %Z1, i32 0
  334. ; CHECK: %Z1.v.r2 = extractelement <2 x i64> %Z1, i32 1
  335. ; CHECK: %R = mul i64 %Z1.v.r1, %Z1.v.r2
  336. ; CHECK: ret i64 %R
  337. }
  338. ; Basic depth-3 chain with ctpop
  339. define i64 @testctpop(i64 %A1, i64 %A2, i64 %B1, i64 %B2) {
  340. %X1 = sub i64 %A1, %B1
  341. %X2 = sub i64 %A2, %B2
  342. %Y1 = call i64 @llvm.ctpop.i64(i64 %X1)
  343. %Y2 = call i64 @llvm.ctpop.i64(i64 %X2)
  344. %Z1 = add i64 %Y1, %B1
  345. %Z2 = add i64 %Y2, %B2
  346. %R = mul i64 %Z1, %Z2
  347. ret i64 %R
  348. ; CHECK: @testctpop
  349. ; CHECK: %X1.v.i1.1 = insertelement <2 x i64> undef, i64 %B1, i32 0
  350. ; CHECK: %X1.v.i1.2 = insertelement <2 x i64> %X1.v.i1.1, i64 %B2, i32 1
  351. ; CHECK: %X1.v.i0.1 = insertelement <2 x i64> undef, i64 %A1, i32 0
  352. ; CHECK: %X1.v.i0.2 = insertelement <2 x i64> %X1.v.i0.1, i64 %A2, i32 1
  353. ; CHECK: %X1 = sub <2 x i64> %X1.v.i0.2, %X1.v.i1.2
  354. ; CHECK: %Y1 = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %X1)
  355. ; CHECK: %Z1 = add <2 x i64> %Y1, %X1.v.i1.2
  356. ; CHECK: %Z1.v.r1 = extractelement <2 x i64> %Z1, i32 0
  357. ; CHECK: %Z1.v.r2 = extractelement <2 x i64> %Z1, i32 1
  358. ; CHECK: %R = mul i64 %Z1.v.r1, %Z1.v.r2
  359. ; CHECK: ret i64 %R
  360. }
  361. ; Basic depth-3 chain with ctlz
  362. define i64 @testctlz(i64 %A1, i64 %A2, i64 %B1, i64 %B2) {
  363. %X1 = sub i64 %A1, %B1
  364. %X2 = sub i64 %A2, %B2
  365. %Y1 = call i64 @llvm.ctlz.i64(i64 %X1, i1 true)
  366. %Y2 = call i64 @llvm.ctlz.i64(i64 %X2, i1 true)
  367. %Z1 = add i64 %Y1, %B1
  368. %Z2 = add i64 %Y2, %B2
  369. %R = mul i64 %Z1, %Z2
  370. ret i64 %R
  371. ; CHECK: @testctlz
  372. ; CHECK: %X1.v.i1.1 = insertelement <2 x i64> undef, i64 %B1, i32 0
  373. ; CHECK: %X1.v.i1.2 = insertelement <2 x i64> %X1.v.i1.1, i64 %B2, i32 1
  374. ; CHECK: %X1.v.i0.1 = insertelement <2 x i64> undef, i64 %A1, i32 0
  375. ; CHECK: %X1.v.i0.2 = insertelement <2 x i64> %X1.v.i0.1, i64 %A2, i32 1
  376. ; CHECK: %X1 = sub <2 x i64> %X1.v.i0.2, %X1.v.i1.2
  377. ; CHECK: %Y1 = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %X1, i1 true)
  378. ; CHECK: %Z1 = add <2 x i64> %Y1, %X1.v.i1.2
  379. ; CHECK: %Z1.v.r1 = extractelement <2 x i64> %Z1, i32 0
  380. ; CHECK: %Z1.v.r2 = extractelement <2 x i64> %Z1, i32 1
  381. ; CHECK: %R = mul i64 %Z1.v.r1, %Z1.v.r2
  382. ; CHECK: ret i64 %R
  383. }
  384. ; Basic depth-3 chain with ctlz
  385. define i64 @testctlzneg(i64 %A1, i64 %A2, i64 %B1, i64 %B2) {
  386. %X1 = sub i64 %A1, %B1
  387. %X2 = sub i64 %A2, %B2
  388. %Y1 = call i64 @llvm.ctlz.i64(i64 %X1, i1 true)
  389. %Y2 = call i64 @llvm.ctlz.i64(i64 %X2, i1 false)
  390. %Z1 = add i64 %Y1, %B1
  391. %Z2 = add i64 %Y2, %B2
  392. %R = mul i64 %Z1, %Z2
  393. ret i64 %R
  394. ; CHECK: @testctlzneg
  395. ; CHECK: %X1 = sub i64 %A1, %B1
  396. ; CHECK: %X2 = sub i64 %A2, %B2
  397. ; CHECK: %Y1 = call i64 @llvm.ctlz.i64(i64 %X1, i1 true)
  398. ; CHECK: %Y2 = call i64 @llvm.ctlz.i64(i64 %X2, i1 false)
  399. ; CHECK: %Z1 = add i64 %Y1, %B1
  400. ; CHECK: %Z2 = add i64 %Y2, %B2
  401. ; CHECK: %R = mul i64 %Z1, %Z2
  402. ; CHECK: ret i64 %R
  403. }
  404. ; Basic depth-3 chain with cttz
  405. define i64 @testcttz(i64 %A1, i64 %A2, i64 %B1, i64 %B2) {
  406. %X1 = sub i64 %A1, %B1
  407. %X2 = sub i64 %A2, %B2
  408. %Y1 = call i64 @llvm.cttz.i64(i64 %X1, i1 true)
  409. %Y2 = call i64 @llvm.cttz.i64(i64 %X2, i1 true)
  410. %Z1 = add i64 %Y1, %B1
  411. %Z2 = add i64 %Y2, %B2
  412. %R = mul i64 %Z1, %Z2
  413. ret i64 %R
  414. ; CHECK: @testcttz
  415. ; CHECK: %X1.v.i1.1 = insertelement <2 x i64> undef, i64 %B1, i32 0
  416. ; CHECK: %X1.v.i1.2 = insertelement <2 x i64> %X1.v.i1.1, i64 %B2, i32 1
  417. ; CHECK: %X1.v.i0.1 = insertelement <2 x i64> undef, i64 %A1, i32 0
  418. ; CHECK: %X1.v.i0.2 = insertelement <2 x i64> %X1.v.i0.1, i64 %A2, i32 1
  419. ; CHECK: %X1 = sub <2 x i64> %X1.v.i0.2, %X1.v.i1.2
  420. ; CHECK: %Y1 = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %X1, i1 true)
  421. ; CHECK: %Z1 = add <2 x i64> %Y1, %X1.v.i1.2
  422. ; CHECK: %Z1.v.r1 = extractelement <2 x i64> %Z1, i32 0
  423. ; CHECK: %Z1.v.r2 = extractelement <2 x i64> %Z1, i32 1
  424. ; CHECK: %R = mul i64 %Z1.v.r1, %Z1.v.r2
  425. ; CHECK: ret i64 %R
  426. }
  427. ; Basic depth-3 chain with cttz
  428. define i64 @testcttzneg(i64 %A1, i64 %A2, i64 %B1, i64 %B2) {
  429. %X1 = sub i64 %A1, %B1
  430. %X2 = sub i64 %A2, %B2
  431. %Y1 = call i64 @llvm.cttz.i64(i64 %X1, i1 true)
  432. %Y2 = call i64 @llvm.cttz.i64(i64 %X2, i1 false)
  433. %Z1 = add i64 %Y1, %B1
  434. %Z2 = add i64 %Y2, %B2
  435. %R = mul i64 %Z1, %Z2
  436. ret i64 %R
  437. ; CHECK: @testcttzneg
  438. ; CHECK: %X1 = sub i64 %A1, %B1
  439. ; CHECK: %X2 = sub i64 %A2, %B2
  440. ; CHECK: %Y1 = call i64 @llvm.cttz.i64(i64 %X1, i1 true)
  441. ; CHECK: %Y2 = call i64 @llvm.cttz.i64(i64 %X2, i1 false)
  442. ; CHECK: %Z1 = add i64 %Y1, %B1
  443. ; CHECK: %Z2 = add i64 %Y2, %B2
  444. ; CHECK: %R = mul i64 %Z1, %Z2
  445. ; CHECK: ret i64 %R
  446. }
  447. ; CHECK: declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) #0
  448. ; CHECK: declare <2 x double> @llvm.fmuladd.v2f64(<2 x double>, <2 x double>, <2 x double>) #0
  449. ; CHECK: declare <2 x double> @llvm.cos.v2f64(<2 x double>) #0
  450. ; CHECK: declare <2 x double> @llvm.powi.v2f64(<2 x double>, i32) #0
  451. ; CHECK: declare <2 x double> @llvm.round.v2f64(<2 x double>) #0
  452. ; CHECK: declare <2 x double> @llvm.copysign.v2f64(<2 x double>, <2 x double>) #0
  453. ; CHECK: declare <2 x double> @llvm.ceil.v2f64(<2 x double>) #0
  454. ; CHECK: declare <2 x double> @llvm.nearbyint.v2f64(<2 x double>) #0
  455. ; CHECK: declare <2 x double> @llvm.rint.v2f64(<2 x double>) #0
  456. ; CHECK: declare <2 x double> @llvm.trunc.v2f64(<2 x double>) #0
  457. ; CHECK: declare <2 x double> @llvm.floor.v2f64(<2 x double>) #0
  458. ; CHECK: declare <2 x double> @llvm.fabs.v2f64(<2 x double>) #0
  459. ; CHECK: declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>) #0
  460. ; CHECK: declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>) #0
  461. ; CHECK: declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>, i1) #0
  462. ; CHECK: declare <2 x i64> @llvm.cttz.v2i64(<2 x i64>, i1) #0
  463. ; CHECK: attributes #0 = { nounwind readnone }