testshiftashr.ll 15 KB


  1. ; RUN: llc -mtriple=x86_64-apple-darwin -mcpu=core2 < %s | FileCheck --check-prefix=SSE2-CODEGEN %s
  2. ; RUN: opt -mtriple=x86_64-apple-darwin -mcpu=core2 -cost-model -analyze < %s | FileCheck --check-prefix=SSE2 %s
  3. %shifttype = type <2 x i16>
  4. define %shifttype @shift2i16(%shifttype %a, %shifttype %b) {
  5. entry:
  6. ; SSE2: shift2i16
  7. ; SSE2: cost of 20 {{.*}} ashr
  8. ; SSE2-CODEGEN: shift2i16
  9. ; SSE2-CODEGEN: sarq %cl
  10. %0 = ashr %shifttype %a , %b
  11. ret %shifttype %0
  12. }
  13. %shifttype4i16 = type <4 x i16>
  14. define %shifttype4i16 @shift4i16(%shifttype4i16 %a, %shifttype4i16 %b) {
  15. entry:
  16. ; SSE2: shift4i16
  17. ; SSE2: cost of 16 {{.*}} ashr
  18. ; SSE2-CODEGEN: shift4i16
  19. ; SSE2-CODEGEN: psrad
  20. %0 = ashr %shifttype4i16 %a , %b
  21. ret %shifttype4i16 %0
  22. }
  23. %shifttype8i16 = type <8 x i16>
  24. define %shifttype8i16 @shift8i16(%shifttype8i16 %a, %shifttype8i16 %b) {
  25. entry:
  26. ; SSE2: shift8i16
  27. ; SSE2: cost of 32 {{.*}} ashr
  28. ; SSE2-CODEGEN: shift8i16
  29. ; SSE2-CODEGEN: psraw
  30. %0 = ashr %shifttype8i16 %a , %b
  31. ret %shifttype8i16 %0
  32. }
  33. %shifttype16i16 = type <16 x i16>
  34. define %shifttype16i16 @shift16i16(%shifttype16i16 %a, %shifttype16i16 %b) {
  35. entry:
  36. ; SSE2: shift16i16
  37. ; SSE2: cost of 64 {{.*}} ashr
  38. ; SSE2-CODEGEN: shift16i16
  39. ; SSE2-CODEGEN: psraw
  40. %0 = ashr %shifttype16i16 %a , %b
  41. ret %shifttype16i16 %0
  42. }
  43. %shifttype32i16 = type <32 x i16>
  44. define %shifttype32i16 @shift32i16(%shifttype32i16 %a, %shifttype32i16 %b) {
  45. entry:
  46. ; SSE2: shift32i16
  47. ; SSE2: cost of 128 {{.*}} ashr
  48. ; SSE2-CODEGEN: shift32i16
  49. ; SSE2-CODEGEN: psraw
  50. %0 = ashr %shifttype32i16 %a , %b
  51. ret %shifttype32i16 %0
  52. }
  53. %shifttype2i32 = type <2 x i32>
  54. define %shifttype2i32 @shift2i32(%shifttype2i32 %a, %shifttype2i32 %b) {
  55. entry:
  56. ; SSE2: shift2i32
  57. ; SSE2: cost of 20 {{.*}} ashr
  58. ; SSE2-CODEGEN: shift2i32
  59. ; SSE2-CODEGEN: sarq %cl
  60. %0 = ashr %shifttype2i32 %a , %b
  61. ret %shifttype2i32 %0
  62. }
  63. %shifttype4i32 = type <4 x i32>
  64. define %shifttype4i32 @shift4i32(%shifttype4i32 %a, %shifttype4i32 %b) {
  65. entry:
  66. ; SSE2: shift4i32
  67. ; SSE2: cost of 16 {{.*}} ashr
  68. ; SSE2-CODEGEN: shift4i32
  69. ; SSE2-CODEGEN: psrad
  70. %0 = ashr %shifttype4i32 %a , %b
  71. ret %shifttype4i32 %0
  72. }
  73. %shifttype8i32 = type <8 x i32>
  74. define %shifttype8i32 @shift8i32(%shifttype8i32 %a, %shifttype8i32 %b) {
  75. entry:
  76. ; SSE2: shift8i32
  77. ; SSE2: cost of 32 {{.*}} ashr
  78. ; SSE2-CODEGEN: shift8i32
  79. ; SSE2-CODEGEN: psrad
  80. %0 = ashr %shifttype8i32 %a , %b
  81. ret %shifttype8i32 %0
  82. }
  83. %shifttype16i32 = type <16 x i32>
  84. define %shifttype16i32 @shift16i32(%shifttype16i32 %a, %shifttype16i32 %b) {
  85. entry:
  86. ; SSE2: shift16i32
  87. ; SSE2: cost of 64 {{.*}} ashr
  88. ; SSE2-CODEGEN: shift16i32
  89. ; SSE2-CODEGEN: psrad
  90. %0 = ashr %shifttype16i32 %a , %b
  91. ret %shifttype16i32 %0
  92. }
  93. %shifttype32i32 = type <32 x i32>
  94. define %shifttype32i32 @shift32i32(%shifttype32i32 %a, %shifttype32i32 %b) {
  95. entry:
  96. ; SSE2: shift32i32
  97. ; SSE2: cost of 128 {{.*}} ashr
  98. ; SSE2-CODEGEN: shift32i32
  99. ; SSE2-CODEGEN: psrad
  100. %0 = ashr %shifttype32i32 %a , %b
  101. ret %shifttype32i32 %0
  102. }
  103. %shifttype2i64 = type <2 x i64>
  104. define %shifttype2i64 @shift2i64(%shifttype2i64 %a, %shifttype2i64 %b) {
  105. entry:
  106. ; SSE2: shift2i64
  107. ; SSE2: cost of 20 {{.*}} ashr
  108. ; SSE2-CODEGEN: shift2i64
  109. ; SSE2-CODEGEN: sarq %cl
  110. %0 = ashr %shifttype2i64 %a , %b
  111. ret %shifttype2i64 %0
  112. }
  113. %shifttype4i64 = type <4 x i64>
  114. define %shifttype4i64 @shift4i64(%shifttype4i64 %a, %shifttype4i64 %b) {
  115. entry:
  116. ; SSE2: shift4i64
  117. ; SSE2: cost of 40 {{.*}} ashr
  118. ; SSE2-CODEGEN: shift4i64
  119. ; SSE2-CODEGEN: sarq %cl
  120. %0 = ashr %shifttype4i64 %a , %b
  121. ret %shifttype4i64 %0
  122. }
  123. %shifttype8i64 = type <8 x i64>
  124. define %shifttype8i64 @shift8i64(%shifttype8i64 %a, %shifttype8i64 %b) {
  125. entry:
  126. ; SSE2: shift8i64
  127. ; SSE2: cost of 80 {{.*}} ashr
  128. ; SSE2-CODEGEN: shift8i64
  129. ; SSE2-CODEGEN: sarq %cl
  130. %0 = ashr %shifttype8i64 %a , %b
  131. ret %shifttype8i64 %0
  132. }
  133. %shifttype16i64 = type <16 x i64>
  134. define %shifttype16i64 @shift16i64(%shifttype16i64 %a, %shifttype16i64 %b) {
  135. entry:
  136. ; SSE2: shift16i64
  137. ; SSE2: cost of 160 {{.*}} ashr
  138. ; SSE2-CODEGEN: shift16i64
  139. ; SSE2-CODEGEN: sarq %cl
  140. %0 = ashr %shifttype16i64 %a , %b
  141. ret %shifttype16i64 %0
  142. }
  143. %shifttype32i64 = type <32 x i64>
  144. define %shifttype32i64 @shift32i64(%shifttype32i64 %a, %shifttype32i64 %b) {
  145. entry:
  146. ; SSE2: shift32i64
  147. ; SSE2: cost of 320 {{.*}} ashr
  148. ; SSE2-CODEGEN: shift32i64
  149. ; SSE2-CODEGEN: sarq %cl
  150. %0 = ashr %shifttype32i64 %a , %b
  151. ret %shifttype32i64 %0
  152. }
  153. %shifttype2i8 = type <2 x i8>
  154. define %shifttype2i8 @shift2i8(%shifttype2i8 %a, %shifttype2i8 %b) {
  155. entry:
  156. ; SSE2: shift2i8
  157. ; SSE2: cost of 20 {{.*}} ashr
  158. ; SSE2-CODEGEN: shift2i8
  159. ; SSE2-CODEGEN: sarq %cl
  160. %0 = ashr %shifttype2i8 %a , %b
  161. ret %shifttype2i8 %0
  162. }
  163. %shifttype4i8 = type <4 x i8>
  164. define %shifttype4i8 @shift4i8(%shifttype4i8 %a, %shifttype4i8 %b) {
  165. entry:
  166. ; SSE2: shift4i8
  167. ; SSE2: cost of 16 {{.*}} ashr
  168. ; SSE2-CODEGEN: shift4i8
  169. ; SSE2-CODEGEN: psrad
  170. %0 = ashr %shifttype4i8 %a , %b
  171. ret %shifttype4i8 %0
  172. }
  173. %shifttype8i8 = type <8 x i8>
  174. define %shifttype8i8 @shift8i8(%shifttype8i8 %a, %shifttype8i8 %b) {
  175. entry:
  176. ; SSE2: shift8i8
  177. ; SSE2: cost of 32 {{.*}} ashr
  178. ; SSE2-CODEGEN: shift8i8
  179. ; SSE2-CODEGEN: psraw
  180. %0 = ashr %shifttype8i8 %a , %b
  181. ret %shifttype8i8 %0
  182. }
  183. %shifttype16i8 = type <16 x i8>
  184. define %shifttype16i8 @shift16i8(%shifttype16i8 %a, %shifttype16i8 %b) {
  185. entry:
  186. ; SSE2: shift16i8
  187. ; SSE2: cost of 54 {{.*}} ashr
  188. ; SSE2-CODEGEN: shift16i8
  189. ; SSE2-CODEGEN: psraw
  190. %0 = ashr %shifttype16i8 %a , %b
  191. ret %shifttype16i8 %0
  192. }
  193. %shifttype32i8 = type <32 x i8>
  194. define %shifttype32i8 @shift32i8(%shifttype32i8 %a, %shifttype32i8 %b) {
  195. entry:
  196. ; SSE2: shift32i8
  197. ; SSE2: cost of 108 {{.*}} ashr
  198. ; SSE2-CODEGEN: shift32i8
  199. ; SSE2-CODEGEN: psraw
  200. %0 = ashr %shifttype32i8 %a , %b
  201. ret %shifttype32i8 %0
  202. }
  203. ; Test shift by a constant a value.
  204. %shifttypec = type <2 x i16>
  205. define %shifttypec @shift2i16const(%shifttypec %a, %shifttypec %b) {
  206. entry:
  207. ; SSE2: shift2i16const
  208. ; SSE2: cost of 4 {{.*}} ashr
  209. ; SSE2-CODEGEN: shift2i16const
  210. ; SSE2-CODEGEN: psrad $3
  211. %0 = ashr %shifttypec %a , <i16 3, i16 3>
  212. ret %shifttypec %0
  213. }
  214. %shifttypec4i16 = type <4 x i16>
  215. define %shifttypec4i16 @shift4i16const(%shifttypec4i16 %a, %shifttypec4i16 %b) {
  216. entry:
  217. ; SSE2: shift4i16const
  218. ; SSE2: cost of 1 {{.*}} ashr
  219. ; SSE2-CODEGEN: shift4i16const
  220. ; SSE2-CODEGEN: psrad $3
  221. %0 = ashr %shifttypec4i16 %a , <i16 3, i16 3, i16 3, i16 3>
  222. ret %shifttypec4i16 %0
  223. }
  224. %shifttypec8i16 = type <8 x i16>
  225. define %shifttypec8i16 @shift8i16const(%shifttypec8i16 %a, %shifttypec8i16 %b) {
  226. entry:
  227. ; SSE2: shift8i16const
  228. ; SSE2: cost of 1 {{.*}} ashr
  229. ; SSE2-CODEGEN: shift8i16const
  230. ; SSE2-CODEGEN: psraw $3
  231. %0 = ashr %shifttypec8i16 %a , <i16 3, i16 3, i16 3, i16 3,
  232. i16 3, i16 3, i16 3, i16 3>
  233. ret %shifttypec8i16 %0
  234. }
  235. %shifttypec16i16 = type <16 x i16>
  236. define %shifttypec16i16 @shift16i16const(%shifttypec16i16 %a,
  237. %shifttypec16i16 %b) {
  238. entry:
  239. ; SSE2: shift16i16const
  240. ; SSE2: cost of 2 {{.*}} ashr
  241. ; SSE2-CODEGEN: shift16i16const
  242. ; SSE2-CODEGEN: psraw $3
  243. %0 = ashr %shifttypec16i16 %a , <i16 3, i16 3, i16 3, i16 3,
  244. i16 3, i16 3, i16 3, i16 3,
  245. i16 3, i16 3, i16 3, i16 3,
  246. i16 3, i16 3, i16 3, i16 3>
  247. ret %shifttypec16i16 %0
  248. }
  249. %shifttypec32i16 = type <32 x i16>
  250. define %shifttypec32i16 @shift32i16const(%shifttypec32i16 %a,
  251. %shifttypec32i16 %b) {
  252. entry:
  253. ; SSE2: shift32i16const
  254. ; SSE2: cost of 4 {{.*}} ashr
  255. ; SSE2-CODEGEN: shift32i16const
  256. ; SSE2-CODEGEN: psraw $3
  257. %0 = ashr %shifttypec32i16 %a , <i16 3, i16 3, i16 3, i16 3,
  258. i16 3, i16 3, i16 3, i16 3,
  259. i16 3, i16 3, i16 3, i16 3,
  260. i16 3, i16 3, i16 3, i16 3,
  261. i16 3, i16 3, i16 3, i16 3,
  262. i16 3, i16 3, i16 3, i16 3,
  263. i16 3, i16 3, i16 3, i16 3,
  264. i16 3, i16 3, i16 3, i16 3>
  265. ret %shifttypec32i16 %0
  266. }
  267. %shifttypec2i32 = type <2 x i32>
  268. define %shifttypec2i32 @shift2i32c(%shifttypec2i32 %a, %shifttypec2i32 %b) {
  269. entry:
  270. ; SSE2: shift2i32c
  271. ; SSE2: cost of 4 {{.*}} ashr
  272. ; SSE2-CODEGEN: shift2i32c
  273. ; SSE2-CODEGEN: psrad $3
  274. %0 = ashr %shifttypec2i32 %a , <i32 3, i32 3>
  275. ret %shifttypec2i32 %0
  276. }
  277. %shifttypec4i32 = type <4 x i32>
  278. define %shifttypec4i32 @shift4i32c(%shifttypec4i32 %a, %shifttypec4i32 %b) {
  279. entry:
  280. ; SSE2: shift4i32c
  281. ; SSE2: cost of 1 {{.*}} ashr
  282. ; SSE2-CODEGEN: shift4i32c
  283. ; SSE2-CODEGEN: psrad $3
  284. %0 = ashr %shifttypec4i32 %a , <i32 3, i32 3, i32 3, i32 3>
  285. ret %shifttypec4i32 %0
  286. }
  287. %shifttypec8i32 = type <8 x i32>
  288. define %shifttypec8i32 @shift8i32c(%shifttypec8i32 %a, %shifttypec8i32 %b) {
  289. entry:
  290. ; SSE2: shift8i32c
  291. ; SSE2: cost of 2 {{.*}} ashr
  292. ; SSE2-CODEGEN: shift8i32c
  293. ; SSE2-CODEGEN: psrad $3
  294. %0 = ashr %shifttypec8i32 %a , <i32 3, i32 3, i32 3, i32 3,
  295. i32 3, i32 3, i32 3, i32 3>
  296. ret %shifttypec8i32 %0
  297. }
  298. %shifttypec16i32 = type <16 x i32>
  299. define %shifttypec16i32 @shift16i32c(%shifttypec16i32 %a, %shifttypec16i32 %b) {
  300. entry:
  301. ; SSE2: shift16i32c
  302. ; SSE2: cost of 4 {{.*}} ashr
  303. ; SSE2-CODEGEN: shift16i32c
  304. ; SSE2-CODEGEN: psrad $3
  305. %0 = ashr %shifttypec16i32 %a , <i32 3, i32 3, i32 3, i32 3,
  306. i32 3, i32 3, i32 3, i32 3,
  307. i32 3, i32 3, i32 3, i32 3,
  308. i32 3, i32 3, i32 3, i32 3>
  309. ret %shifttypec16i32 %0
  310. }
  311. %shifttypec32i32 = type <32 x i32>
  312. define %shifttypec32i32 @shift32i32c(%shifttypec32i32 %a, %shifttypec32i32 %b) {
  313. entry:
  314. ; SSE2: shift32i32c
  315. ; getTypeConversion fails here and promotes this to a i64.
  316. ; SSE2: cost of 8 {{.*}} ashr
  317. ; SSE2-CODEGEN: shift32i32c
  318. ; SSE2-CODEGEN: psrad $3
  319. %0 = ashr %shifttypec32i32 %a , <i32 3, i32 3, i32 3, i32 3,
  320. i32 3, i32 3, i32 3, i32 3,
  321. i32 3, i32 3, i32 3, i32 3,
  322. i32 3, i32 3, i32 3, i32 3,
  323. i32 3, i32 3, i32 3, i32 3,
  324. i32 3, i32 3, i32 3, i32 3,
  325. i32 3, i32 3, i32 3, i32 3,
  326. i32 3, i32 3, i32 3, i32 3>
  327. ret %shifttypec32i32 %0
  328. }
  329. %shifttypec2i64 = type <2 x i64>
  330. define %shifttypec2i64 @shift2i64c(%shifttypec2i64 %a, %shifttypec2i64 %b) {
  331. entry:
  332. ; SSE2: shift2i64c
  333. ; SSE2: cost of 4 {{.*}} ashr
  334. ; SSE2-CODEGEN: shift2i64c
  335. ; SSE2-CODEGEN: psrad $3
  336. %0 = ashr %shifttypec2i64 %a , <i64 3, i64 3>
  337. ret %shifttypec2i64 %0
  338. }
  339. %shifttypec4i64 = type <4 x i64>
  340. define %shifttypec4i64 @shift4i64c(%shifttypec4i64 %a, %shifttypec4i64 %b) {
  341. entry:
  342. ; SSE2: shift4i64c
  343. ; SSE2: cost of 8 {{.*}} ashr
  344. ; SSE2-CODEGEN: shift4i64c
  345. ; SSE2-CODEGEN: psrad $3
  346. %0 = ashr %shifttypec4i64 %a , <i64 3, i64 3, i64 3, i64 3>
  347. ret %shifttypec4i64 %0
  348. }
  349. %shifttypec8i64 = type <8 x i64>
  350. define %shifttypec8i64 @shift8i64c(%shifttypec8i64 %a, %shifttypec8i64 %b) {
  351. entry:
  352. ; SSE2: shift8i64c
  353. ; SSE2: cost of 16 {{.*}} ashr
  354. ; SSE2-CODEGEN: shift8i64c
  355. ; SSE2-CODEGEN: psrad $3
  356. %0 = ashr %shifttypec8i64 %a , <i64 3, i64 3, i64 3, i64 3,
  357. i64 3, i64 3, i64 3, i64 3>
  358. ret %shifttypec8i64 %0
  359. }
  360. %shifttypec16i64 = type <16 x i64>
  361. define %shifttypec16i64 @shift16i64c(%shifttypec16i64 %a, %shifttypec16i64 %b) {
  362. entry:
  363. ; SSE2: shift16i64c
  364. ; SSE2: cost of 32 {{.*}} ashr
  365. ; SSE2-CODEGEN: shift16i64c
  366. ; SSE2-CODEGEN: psrad $3
  367. %0 = ashr %shifttypec16i64 %a , <i64 3, i64 3, i64 3, i64 3,
  368. i64 3, i64 3, i64 3, i64 3,
  369. i64 3, i64 3, i64 3, i64 3,
  370. i64 3, i64 3, i64 3, i64 3>
  371. ret %shifttypec16i64 %0
  372. }
  373. %shifttypec32i64 = type <32 x i64>
  374. define %shifttypec32i64 @shift32i64c(%shifttypec32i64 %a, %shifttypec32i64 %b) {
  375. entry:
  376. ; SSE2: shift32i64c
  377. ; SSE2: cost of 64 {{.*}} ashr
  378. ; SSE2-CODEGEN: shift32i64c
  379. ; SSE2-CODEGEN: psrad $3
  380. %0 = ashr %shifttypec32i64 %a ,<i64 3, i64 3, i64 3, i64 3,
  381. i64 3, i64 3, i64 3, i64 3,
  382. i64 3, i64 3, i64 3, i64 3,
  383. i64 3, i64 3, i64 3, i64 3,
  384. i64 3, i64 3, i64 3, i64 3,
  385. i64 3, i64 3, i64 3, i64 3,
  386. i64 3, i64 3, i64 3, i64 3,
  387. i64 3, i64 3, i64 3, i64 3>
  388. ret %shifttypec32i64 %0
  389. }
  390. %shifttypec2i8 = type <2 x i8>
  391. define %shifttypec2i8 @shift2i8c(%shifttypec2i8 %a, %shifttypec2i8 %b) {
  392. entry:
  393. ; SSE2: shift2i8c
  394. ; SSE2: cost of 4 {{.*}} ashr
  395. ; SSE2-CODEGEN: shift2i8c
  396. ; SSE2-CODEGEN: psrad $3
  397. %0 = ashr %shifttypec2i8 %a , <i8 3, i8 3>
  398. ret %shifttypec2i8 %0
  399. }
  400. %shifttypec4i8 = type <4 x i8>
  401. define %shifttypec4i8 @shift4i8c(%shifttypec4i8 %a, %shifttypec4i8 %b) {
  402. entry:
  403. ; SSE2: shift4i8c
  404. ; SSE2: cost of 1 {{.*}} ashr
  405. ; SSE2-CODEGEN: shift4i8c
  406. ; SSE2-CODEGEN: psrad $3
  407. %0 = ashr %shifttypec4i8 %a , <i8 3, i8 3, i8 3, i8 3>
  408. ret %shifttypec4i8 %0
  409. }
  410. %shifttypec8i8 = type <8 x i8>
  411. define %shifttypec8i8 @shift8i8c(%shifttypec8i8 %a, %shifttypec8i8 %b) {
  412. entry:
  413. ; SSE2: shift8i8c
  414. ; SSE2: cost of 1 {{.*}} ashr
  415. ; SSE2-CODEGEN: shift8i8c
  416. ; SSE2-CODEGEN: psraw $3
  417. %0 = ashr %shifttypec8i8 %a , <i8 3, i8 3, i8 3, i8 3,
  418. i8 3, i8 3, i8 3, i8 3>
  419. ret %shifttypec8i8 %0
  420. }
  421. %shifttypec16i8 = type <16 x i8>
  422. define %shifttypec16i8 @shift16i8c(%shifttypec16i8 %a, %shifttypec16i8 %b) {
  423. entry:
  424. ; SSE2: shift16i8c
  425. ; SSE2: cost of 4 {{.*}} ashr
  426. ; SSE2-CODEGEN: shift16i8c
  427. ; SSE2-CODEGEN: psrlw $3
  428. %0 = ashr %shifttypec16i8 %a , <i8 3, i8 3, i8 3, i8 3,
  429. i8 3, i8 3, i8 3, i8 3,
  430. i8 3, i8 3, i8 3, i8 3,
  431. i8 3, i8 3, i8 3, i8 3>
  432. ret %shifttypec16i8 %0
  433. }
  434. %shifttypec32i8 = type <32 x i8>
  435. define %shifttypec32i8 @shift32i8c(%shifttypec32i8 %a, %shifttypec32i8 %b) {
  436. entry:
  437. ; SSE2: shift32i8c
  438. ; SSE2: cost of 8 {{.*}} ashr
  439. ; SSE2-CODEGEN: shift32i8c
  440. ; SSE2-CODEGEN: psrlw $3
  441. %0 = ashr %shifttypec32i8 %a , <i8 3, i8 3, i8 3, i8 3,
  442. i8 3, i8 3, i8 3, i8 3,
  443. i8 3, i8 3, i8 3, i8 3,
  444. i8 3, i8 3, i8 3, i8 3,
  445. i8 3, i8 3, i8 3, i8 3,
  446. i8 3, i8 3, i8 3, i8 3,
  447. i8 3, i8 3, i8 3, i8 3,
  448. i8 3, i8 3, i8 3, i8 3>
  449. ret %shifttypec32i8 %0
  450. }