testshiftlshr.ll 15 KB


  1. ; RUN: llc -mtriple=x86_64-apple-darwin -mcpu=core2 < %s | FileCheck --check-prefix=SSE2-CODEGEN %s
  2. ; RUN: opt -mtriple=x86_64-apple-darwin -mcpu=core2 -cost-model -analyze < %s | FileCheck --check-prefix=SSE2 %s
  3. %shifttype = type <2 x i16>
  4. define %shifttype @shift2i16(%shifttype %a, %shifttype %b) {
  5. entry:
  6. ; SSE2: shift2i16
  7. ; SSE2: cost of 20 {{.*}} lshr
  8. ; SSE2-CODEGEN: shift2i16
  9. ; SSE2-CODEGEN: psrlq
  10. %0 = lshr %shifttype %a , %b
  11. ret %shifttype %0
  12. }
  13. %shifttype4i16 = type <4 x i16>
  14. define %shifttype4i16 @shift4i16(%shifttype4i16 %a, %shifttype4i16 %b) {
  15. entry:
  16. ; SSE2: shift4i16
  17. ; SSE2: cost of 16 {{.*}} lshr
  18. ; SSE2-CODEGEN: shift4i16
  19. ; SSE2-CODEGEN: psrld
  20. %0 = lshr %shifttype4i16 %a , %b
  21. ret %shifttype4i16 %0
  22. }
  23. %shifttype8i16 = type <8 x i16>
  24. define %shifttype8i16 @shift8i16(%shifttype8i16 %a, %shifttype8i16 %b) {
  25. entry:
  26. ; SSE2: shift8i16
  27. ; SSE2: cost of 32 {{.*}} lshr
  28. ; SSE2-CODEGEN: shift8i16
  29. ; SSE2-CODEGEN: psrlw
  30. %0 = lshr %shifttype8i16 %a , %b
  31. ret %shifttype8i16 %0
  32. }
  33. %shifttype16i16 = type <16 x i16>
  34. define %shifttype16i16 @shift16i16(%shifttype16i16 %a, %shifttype16i16 %b) {
  35. entry:
  36. ; SSE2: shift16i16
  37. ; SSE2: cost of 64 {{.*}} lshr
  38. ; SSE2-CODEGEN: shift16i16
  39. ; SSE2-CODEGEN: psrlw
  40. %0 = lshr %shifttype16i16 %a , %b
  41. ret %shifttype16i16 %0
  42. }
  43. %shifttype32i16 = type <32 x i16>
  44. define %shifttype32i16 @shift32i16(%shifttype32i16 %a, %shifttype32i16 %b) {
  45. entry:
  46. ; SSE2: shift32i16
  47. ; SSE2: cost of 128 {{.*}} lshr
  48. ; SSE2-CODEGEN: shift32i16
  49. ; SSE2-CODEGEN: psrlw
  50. %0 = lshr %shifttype32i16 %a , %b
  51. ret %shifttype32i16 %0
  52. }
  53. %shifttype2i32 = type <2 x i32>
  54. define %shifttype2i32 @shift2i32(%shifttype2i32 %a, %shifttype2i32 %b) {
  55. entry:
  56. ; SSE2: shift2i32
  57. ; SSE2: cost of 20 {{.*}} lshr
  58. ; SSE2-CODEGEN: shift2i32
  59. ; SSE2-CODEGEN: psrlq
  60. %0 = lshr %shifttype2i32 %a , %b
  61. ret %shifttype2i32 %0
  62. }
  63. %shifttype4i32 = type <4 x i32>
  64. define %shifttype4i32 @shift4i32(%shifttype4i32 %a, %shifttype4i32 %b) {
  65. entry:
  66. ; SSE2: shift4i32
  67. ; SSE2: cost of 16 {{.*}} lshr
  68. ; SSE2-CODEGEN: shift4i32
  69. ; SSE2-CODEGEN: psrld
  70. %0 = lshr %shifttype4i32 %a , %b
  71. ret %shifttype4i32 %0
  72. }
  73. %shifttype8i32 = type <8 x i32>
  74. define %shifttype8i32 @shift8i32(%shifttype8i32 %a, %shifttype8i32 %b) {
  75. entry:
  76. ; SSE2: shift8i32
  77. ; SSE2: cost of 32 {{.*}} lshr
  78. ; SSE2-CODEGEN: shift8i32
  79. ; SSE2-CODEGEN: psrld
  80. %0 = lshr %shifttype8i32 %a , %b
  81. ret %shifttype8i32 %0
  82. }
  83. %shifttype16i32 = type <16 x i32>
  84. define %shifttype16i32 @shift16i32(%shifttype16i32 %a, %shifttype16i32 %b) {
  85. entry:
  86. ; SSE2: shift16i32
  87. ; SSE2: cost of 64 {{.*}} lshr
  88. ; SSE2-CODEGEN: shift16i32
  89. ; SSE2-CODEGEN: psrld
  90. %0 = lshr %shifttype16i32 %a , %b
  91. ret %shifttype16i32 %0
  92. }
  93. %shifttype32i32 = type <32 x i32>
  94. define %shifttype32i32 @shift32i32(%shifttype32i32 %a, %shifttype32i32 %b) {
  95. entry:
  96. ; SSE2: shift32i32
  97. ; SSE2: cost of 128 {{.*}} lshr
  98. ; SSE2-CODEGEN: shift32i32
  99. ; SSE2-CODEGEN: psrld
  100. %0 = lshr %shifttype32i32 %a , %b
  101. ret %shifttype32i32 %0
  102. }
  103. %shifttype2i64 = type <2 x i64>
  104. define %shifttype2i64 @shift2i64(%shifttype2i64 %a, %shifttype2i64 %b) {
  105. entry:
  106. ; SSE2: shift2i64
  107. ; SSE2: cost of 20 {{.*}} lshr
  108. ; SSE2-CODEGEN: shift2i64
  109. ; SSE2-CODEGEN: psrlq
  110. %0 = lshr %shifttype2i64 %a , %b
  111. ret %shifttype2i64 %0
  112. }
  113. %shifttype4i64 = type <4 x i64>
  114. define %shifttype4i64 @shift4i64(%shifttype4i64 %a, %shifttype4i64 %b) {
  115. entry:
  116. ; SSE2: shift4i64
  117. ; SSE2: cost of 40 {{.*}} lshr
  118. ; SSE2-CODEGEN: shift4i64
  119. ; SSE2-CODEGEN: psrlq
  120. %0 = lshr %shifttype4i64 %a , %b
  121. ret %shifttype4i64 %0
  122. }
  123. %shifttype8i64 = type <8 x i64>
  124. define %shifttype8i64 @shift8i64(%shifttype8i64 %a, %shifttype8i64 %b) {
  125. entry:
  126. ; SSE2: shift8i64
  127. ; SSE2: cost of 80 {{.*}} lshr
  128. ; SSE2-CODEGEN: shift8i64
  129. ; SSE2-CODEGEN: psrlq
  130. %0 = lshr %shifttype8i64 %a , %b
  131. ret %shifttype8i64 %0
  132. }
  133. %shifttype16i64 = type <16 x i64>
  134. define %shifttype16i64 @shift16i64(%shifttype16i64 %a, %shifttype16i64 %b) {
  135. entry:
  136. ; SSE2: shift16i64
  137. ; SSE2: cost of 160 {{.*}} lshr
  138. ; SSE2-CODEGEN: shift16i64
  139. ; SSE2-CODEGEN: psrlq
  140. %0 = lshr %shifttype16i64 %a , %b
  141. ret %shifttype16i64 %0
  142. }
  143. %shifttype32i64 = type <32 x i64>
  144. define %shifttype32i64 @shift32i64(%shifttype32i64 %a, %shifttype32i64 %b) {
  145. entry:
  146. ; SSE2: shift32i64
  147. ; SSE2: cost of 320 {{.*}} lshr
  148. ; SSE2-CODEGEN: shift32i64
  149. ; SSE2-CODEGEN: psrlq
  150. %0 = lshr %shifttype32i64 %a , %b
  151. ret %shifttype32i64 %0
  152. }
  153. %shifttype2i8 = type <2 x i8>
  154. define %shifttype2i8 @shift2i8(%shifttype2i8 %a, %shifttype2i8 %b) {
  155. entry:
  156. ; SSE2: shift2i8
  157. ; SSE2: cost of 20 {{.*}} lshr
  158. ; SSE2-CODEGEN: shift2i8
  159. ; SSE2-CODEGEN: psrlq
  160. %0 = lshr %shifttype2i8 %a , %b
  161. ret %shifttype2i8 %0
  162. }
  163. %shifttype4i8 = type <4 x i8>
  164. define %shifttype4i8 @shift4i8(%shifttype4i8 %a, %shifttype4i8 %b) {
  165. entry:
  166. ; SSE2: shift4i8
  167. ; SSE2: cost of 16 {{.*}} lshr
  168. ; SSE2-CODEGEN: shift4i8
  169. ; SSE2-CODEGEN: psrld
  170. %0 = lshr %shifttype4i8 %a , %b
  171. ret %shifttype4i8 %0
  172. }
  173. %shifttype8i8 = type <8 x i8>
  174. define %shifttype8i8 @shift8i8(%shifttype8i8 %a, %shifttype8i8 %b) {
  175. entry:
  176. ; SSE2: shift8i8
  177. ; SSE2: cost of 32 {{.*}} lshr
  178. ; SSE2-CODEGEN: shift8i8
  179. ; SSE2-CODEGEN: psrlw
  180. %0 = lshr %shifttype8i8 %a , %b
  181. ret %shifttype8i8 %0
  182. }
  183. %shifttype16i8 = type <16 x i8>
  184. define %shifttype16i8 @shift16i8(%shifttype16i8 %a, %shifttype16i8 %b) {
  185. entry:
  186. ; SSE2: shift16i8
  187. ; SSE2: cost of 26 {{.*}} lshr
  188. ; SSE2-CODEGEN: shift16i8
  189. ; SSE2-CODEGEN: psrlw
  190. %0 = lshr %shifttype16i8 %a , %b
  191. ret %shifttype16i8 %0
  192. }
  193. %shifttype32i8 = type <32 x i8>
  194. define %shifttype32i8 @shift32i8(%shifttype32i8 %a, %shifttype32i8 %b) {
  195. entry:
  196. ; SSE2: shift32i8
  197. ; SSE2: cost of 52 {{.*}} lshr
  198. ; SSE2-CODEGEN: shift32i8
  199. ; SSE2-CODEGEN: psrlw
  200. %0 = lshr %shifttype32i8 %a , %b
  201. ret %shifttype32i8 %0
  202. }
  203. ; Test shift by a constant vector.
  204. %shifttypec = type <2 x i16>
  205. define %shifttypec @shift2i16const(%shifttypec %a, %shifttypec %b) {
  206. entry:
  207. ; SSE2: shift2i16const
  208. ; SSE2: cost of 1 {{.*}} lshr
  209. ; SSE2-CODEGEN: shift2i16const
  210. ; SSE2-CODEGEN: psrlq $3
  211. %0 = lshr %shifttypec %a , <i16 3, i16 3>
  212. ret %shifttypec %0
  213. }
  214. %shifttypec4i16 = type <4 x i16>
  215. define %shifttypec4i16 @shift4i16const(%shifttypec4i16 %a, %shifttypec4i16 %b) {
  216. entry:
  217. ; SSE2: shift4i16const
  218. ; SSE2: cost of 1 {{.*}} lshr
  219. ; SSE2-CODEGEN: shift4i16const
  220. ; SSE2-CODEGEN: psrld $3
  221. %0 = lshr %shifttypec4i16 %a , <i16 3, i16 3, i16 3, i16 3>
  222. ret %shifttypec4i16 %0
  223. }
  224. %shifttypec8i16 = type <8 x i16>
  225. define %shifttypec8i16 @shift8i16const(%shifttypec8i16 %a, %shifttypec8i16 %b) {
  226. entry:
  227. ; SSE2: shift8i16const
  228. ; SSE2: cost of 1 {{.*}} lshr
  229. ; SSE2-CODEGEN: shift8i16const
  230. ; SSE2-CODEGEN: psrlw $3
  231. %0 = lshr %shifttypec8i16 %a , <i16 3, i16 3, i16 3, i16 3,
  232. i16 3, i16 3, i16 3, i16 3>
  233. ret %shifttypec8i16 %0
  234. }
  235. %shifttypec16i16 = type <16 x i16>
  236. define %shifttypec16i16 @shift16i16const(%shifttypec16i16 %a,
  237. %shifttypec16i16 %b) {
  238. entry:
  239. ; SSE2: shift16i16const
  240. ; SSE2: cost of 2 {{.*}} lshr
  241. ; SSE2-CODEGEN: shift16i16const
  242. ; SSE2-CODEGEN: psrlw $3
  243. %0 = lshr %shifttypec16i16 %a , <i16 3, i16 3, i16 3, i16 3,
  244. i16 3, i16 3, i16 3, i16 3,
  245. i16 3, i16 3, i16 3, i16 3,
  246. i16 3, i16 3, i16 3, i16 3>
  247. ret %shifttypec16i16 %0
  248. }
  249. %shifttypec32i16 = type <32 x i16>
  250. define %shifttypec32i16 @shift32i16const(%shifttypec32i16 %a,
  251. %shifttypec32i16 %b) {
  252. entry:
  253. ; SSE2: shift32i16const
  254. ; SSE2: cost of 4 {{.*}} lshr
  255. ; SSE2-CODEGEN: shift32i16const
  256. ; SSE2-CODEGEN: psrlw $3
  257. %0 = lshr %shifttypec32i16 %a , <i16 3, i16 3, i16 3, i16 3,
  258. i16 3, i16 3, i16 3, i16 3,
  259. i16 3, i16 3, i16 3, i16 3,
  260. i16 3, i16 3, i16 3, i16 3,
  261. i16 3, i16 3, i16 3, i16 3,
  262. i16 3, i16 3, i16 3, i16 3,
  263. i16 3, i16 3, i16 3, i16 3,
  264. i16 3, i16 3, i16 3, i16 3>
  265. ret %shifttypec32i16 %0
  266. }
  267. %shifttypec2i32 = type <2 x i32>
  268. define %shifttypec2i32 @shift2i32c(%shifttypec2i32 %a, %shifttypec2i32 %b) {
  269. entry:
  270. ; SSE2: shift2i32c
  271. ; SSE2: cost of 1 {{.*}} lshr
  272. ; SSE2-CODEGEN: shift2i32c
  273. ; SSE2-CODEGEN: psrlq $3
  274. %0 = lshr %shifttypec2i32 %a , <i32 3, i32 3>
  275. ret %shifttypec2i32 %0
  276. }
  277. %shifttypec4i32 = type <4 x i32>
  278. define %shifttypec4i32 @shift4i32c(%shifttypec4i32 %a, %shifttypec4i32 %b) {
  279. entry:
  280. ; SSE2: shift4i32c
  281. ; SSE2: cost of 1 {{.*}} lshr
  282. ; SSE2-CODEGEN: shift4i32c
  283. ; SSE2-CODEGEN: psrld $3
  284. %0 = lshr %shifttypec4i32 %a , <i32 3, i32 3, i32 3, i32 3>
  285. ret %shifttypec4i32 %0
  286. }
  287. %shifttypec8i32 = type <8 x i32>
  288. define %shifttypec8i32 @shift8i32c(%shifttypec8i32 %a, %shifttypec8i32 %b) {
  289. entry:
  290. ; SSE2: shift8i32c
  291. ; SSE2: cost of 2 {{.*}} lshr
  292. ; SSE2-CODEGEN: shift8i32c
  293. ; SSE2-CODEGEN: psrld $3
  294. %0 = lshr %shifttypec8i32 %a , <i32 3, i32 3, i32 3, i32 3,
  295. i32 3, i32 3, i32 3, i32 3>
  296. ret %shifttypec8i32 %0
  297. }
  298. %shifttypec16i32 = type <16 x i32>
  299. define %shifttypec16i32 @shift16i32c(%shifttypec16i32 %a, %shifttypec16i32 %b) {
  300. entry:
  301. ; SSE2: shift16i32c
  302. ; SSE2: cost of 4 {{.*}} lshr
  303. ; SSE2-CODEGEN: shift16i32c
  304. ; SSE2-CODEGEN: psrld $3
  305. %0 = lshr %shifttypec16i32 %a , <i32 3, i32 3, i32 3, i32 3,
  306. i32 3, i32 3, i32 3, i32 3,
  307. i32 3, i32 3, i32 3, i32 3,
  308. i32 3, i32 3, i32 3, i32 3>
  309. ret %shifttypec16i32 %0
  310. }
  311. %shifttypec32i32 = type <32 x i32>
  312. define %shifttypec32i32 @shift32i32c(%shifttypec32i32 %a, %shifttypec32i32 %b) {
  313. entry:
  314. ; SSE2: shift32i32c
  315. ; SSE2: cost of 8 {{.*}} lshr
  316. ; SSE2-CODEGEN: shift32i32c
  317. ; SSE2-CODEGEN: psrld $3
  318. %0 = lshr %shifttypec32i32 %a , <i32 3, i32 3, i32 3, i32 3,
  319. i32 3, i32 3, i32 3, i32 3,
  320. i32 3, i32 3, i32 3, i32 3,
  321. i32 3, i32 3, i32 3, i32 3,
  322. i32 3, i32 3, i32 3, i32 3,
  323. i32 3, i32 3, i32 3, i32 3,
  324. i32 3, i32 3, i32 3, i32 3,
  325. i32 3, i32 3, i32 3, i32 3>
  326. ret %shifttypec32i32 %0
  327. }
  328. %shifttypec2i64 = type <2 x i64>
  329. define %shifttypec2i64 @shift2i64c(%shifttypec2i64 %a, %shifttypec2i64 %b) {
  330. entry:
  331. ; SSE2: shift2i64c
  332. ; SSE2: cost of 1 {{.*}} lshr
  333. ; SSE2-CODEGEN: shift2i64c
  334. ; SSE2-CODEGEN: psrlq $3
  335. %0 = lshr %shifttypec2i64 %a , <i64 3, i64 3>
  336. ret %shifttypec2i64 %0
  337. }
  338. %shifttypec4i64 = type <4 x i64>
  339. define %shifttypec4i64 @shift4i64c(%shifttypec4i64 %a, %shifttypec4i64 %b) {
  340. entry:
  341. ; SSE2: shift4i64c
  342. ; SSE2: cost of 2 {{.*}} lshr
  343. ; SSE2-CODEGEN: shift4i64c
  344. ; SSE2-CODEGEN: psrlq $3
  345. %0 = lshr %shifttypec4i64 %a , <i64 3, i64 3, i64 3, i64 3>
  346. ret %shifttypec4i64 %0
  347. }
  348. %shifttypec8i64 = type <8 x i64>
  349. define %shifttypec8i64 @shift8i64c(%shifttypec8i64 %a, %shifttypec8i64 %b) {
  350. entry:
  351. ; SSE2: shift8i64c
  352. ; SSE2: cost of 4 {{.*}} lshr
  353. ; SSE2-CODEGEN: shift8i64c
  354. ; SSE2-CODEGEN: psrlq $3
  355. %0 = lshr %shifttypec8i64 %a , <i64 3, i64 3, i64 3, i64 3,
  356. i64 3, i64 3, i64 3, i64 3>
  357. ret %shifttypec8i64 %0
  358. }
  359. %shifttypec16i64 = type <16 x i64>
  360. define %shifttypec16i64 @shift16i64c(%shifttypec16i64 %a, %shifttypec16i64 %b) {
  361. entry:
  362. ; SSE2: shift16i64c
  363. ; SSE2: cost of 8 {{.*}} lshr
  364. ; SSE2-CODEGEN: shift16i64c
  365. ; SSE2-CODEGEN: psrlq $3
  366. %0 = lshr %shifttypec16i64 %a , <i64 3, i64 3, i64 3, i64 3,
  367. i64 3, i64 3, i64 3, i64 3,
  368. i64 3, i64 3, i64 3, i64 3,
  369. i64 3, i64 3, i64 3, i64 3>
  370. ret %shifttypec16i64 %0
  371. }
  372. %shifttypec32i64 = type <32 x i64>
  373. define %shifttypec32i64 @shift32i64c(%shifttypec32i64 %a, %shifttypec32i64 %b) {
  374. entry:
  375. ; SSE2: shift32i64c
  376. ; SSE2: cost of 16 {{.*}} lshr
  377. ; SSE2-CODEGEN: shift32i64c
  378. ; SSE2-CODEGEN: psrlq $3
  379. %0 = lshr %shifttypec32i64 %a ,<i64 3, i64 3, i64 3, i64 3,
  380. i64 3, i64 3, i64 3, i64 3,
  381. i64 3, i64 3, i64 3, i64 3,
  382. i64 3, i64 3, i64 3, i64 3,
  383. i64 3, i64 3, i64 3, i64 3,
  384. i64 3, i64 3, i64 3, i64 3,
  385. i64 3, i64 3, i64 3, i64 3,
  386. i64 3, i64 3, i64 3, i64 3>
  387. ret %shifttypec32i64 %0
  388. }
  389. %shifttypec2i8 = type <2 x i8>
  390. define %shifttypec2i8 @shift2i8c(%shifttypec2i8 %a, %shifttypec2i8 %b) {
  391. entry:
  392. ; SSE2: shift2i8c
  393. ; SSE2: cost of 1 {{.*}} lshr
  394. ; SSE2-CODEGEN: shift2i8c
  395. ; SSE2-CODEGEN: psrlq $3
  396. %0 = lshr %shifttypec2i8 %a , <i8 3, i8 3>
  397. ret %shifttypec2i8 %0
  398. }
  399. %shifttypec4i8 = type <4 x i8>
  400. define %shifttypec4i8 @shift4i8c(%shifttypec4i8 %a, %shifttypec4i8 %b) {
  401. entry:
  402. ; SSE2: shift4i8c
  403. ; SSE2: cost of 1 {{.*}} lshr
  404. ; SSE2-CODEGEN: shift4i8c
  405. ; SSE2-CODEGEN: psrld $3
  406. %0 = lshr %shifttypec4i8 %a , <i8 3, i8 3, i8 3, i8 3>
  407. ret %shifttypec4i8 %0
  408. }
  409. %shifttypec8i8 = type <8 x i8>
  410. define %shifttypec8i8 @shift8i8c(%shifttypec8i8 %a, %shifttypec8i8 %b) {
  411. entry:
  412. ; SSE2: shift8i8c
  413. ; SSE2: cost of 1 {{.*}} lshr
  414. ; SSE2-CODEGEN: shift8i8c
  415. ; SSE2-CODEGEN: psrlw $3
  416. %0 = lshr %shifttypec8i8 %a , <i8 3, i8 3, i8 3, i8 3,
  417. i8 3, i8 3, i8 3, i8 3>
  418. ret %shifttypec8i8 %0
  419. }
  420. %shifttypec16i8 = type <16 x i8>
  421. define %shifttypec16i8 @shift16i8c(%shifttypec16i8 %a, %shifttypec16i8 %b) {
  422. entry:
  423. ; SSE2: shift16i8c
  424. ; SSE2: cost of 1 {{.*}} lshr
  425. ; SSE2-CODEGEN: shift16i8c
  426. ; SSE2-CODEGEN: psrlw $3
  427. %0 = lshr %shifttypec16i8 %a , <i8 3, i8 3, i8 3, i8 3,
  428. i8 3, i8 3, i8 3, i8 3,
  429. i8 3, i8 3, i8 3, i8 3,
  430. i8 3, i8 3, i8 3, i8 3>
  431. ret %shifttypec16i8 %0
  432. }
  433. %shifttypec32i8 = type <32 x i8>
  434. define %shifttypec32i8 @shift32i8c(%shifttypec32i8 %a, %shifttypec32i8 %b) {
  435. entry:
  436. ; SSE2: shift32i8c
  437. ; SSE2: cost of 2 {{.*}} lshr
  438. ; SSE2-CODEGEN: shift32i8c
  439. ; SSE2-CODEGEN: psrlw $3
  440. %0 = lshr %shifttypec32i8 %a , <i8 3, i8 3, i8 3, i8 3,
  441. i8 3, i8 3, i8 3, i8 3,
  442. i8 3, i8 3, i8 3, i8 3,
  443. i8 3, i8 3, i8 3, i8 3,
  444. i8 3, i8 3, i8 3, i8 3,
  445. i8 3, i8 3, i8 3, i8 3,
  446. i8 3, i8 3, i8 3, i8 3,
  447. i8 3, i8 3, i8 3, i8 3>
  448. ret %shifttypec32i8 %0
  449. }