sitofp.ll 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678
  1. ; RUN: opt -mtriple=x86_64-apple-darwin -mattr=+sse2 -cost-model -analyze < %s | FileCheck --check-prefix=SSE --check-prefix=SSE2 %s
  2. ; RUN: opt -mtriple=x86_64-apple-darwin -mattr=+avx -cost-model -analyze < %s | FileCheck --check-prefix=AVX --check-prefix=AVX1 %s
  3. ; RUN: opt -mtriple=x86_64-apple-darwin -mattr=+avx2 -cost-model -analyze < %s | FileCheck --check-prefix=AVX --check-prefix=AVX2 %s
  4. ; RUN: opt -mtriple=x86_64-apple-darwin -mattr=+avx512f -cost-model -analyze < %s | FileCheck --check-prefix=AVX512F %s
  5. define <2 x double> @sitofpv2i8v2double(<2 x i8> %a) {
  6. ; SSE2: sitofpv2i8v2double
  7. ; SSE2: cost of 20 {{.*}} sitofp
  8. ;
  9. ; AVX1: sitofpv2i8v2double
  10. ; AVX1: cost of 4 {{.*}} sitofp
  11. ;
  12. ; AVX2: sitofpv2i8v2double
  13. ; AVX2: cost of 4 {{.*}} sitofp
  14. ;
  15. ; AVX512F: sitofpv2i8v2double
  16. ; AVX512F: cost of 4 {{.*}} sitofp
  17. %1 = sitofp <2 x i8> %a to <2 x double>
  18. ret <2 x double> %1
  19. }
  20. define <4 x double> @sitofpv4i8v4double(<4 x i8> %a) {
  21. ; SSE2: sitofpv4i8v4double
  22. ; SSE2: cost of 40 {{.*}} sitofp
  23. ;
  24. ; AVX1: sitofpv4i8v4double
  25. ; AVX1: cost of 3 {{.*}} sitofp
  26. ;
  27. ; AVX2: sitofpv4i8v4double
  28. ; AVX2: cost of 3 {{.*}} sitofp
  29. ;
  30. ; AVX512F: sitofpv4i8v4double
  31. ; AVX512F: cost of 3 {{.*}} sitofp
  32. %1 = sitofp <4 x i8> %a to <4 x double>
  33. ret <4 x double> %1
  34. }
  35. define <8 x double> @sitofpv8i8v8double(<8 x i8> %a) {
  36. ; SSE2: sitofpv8i8v8double
  37. ; SSE2: cost of 80 {{.*}} sitofp
  38. ;
  39. ; AVX1: sitofpv8i8v8double
  40. ; AVX1: cost of 20 {{.*}} sitofp
  41. ;
  42. ; AVX2: sitofpv8i8v8double
  43. ; AVX2: cost of 20 {{.*}} sitofp
  44. ;
  45. ; AVX512F: sitofpv8i8v8double
  46. ; AVX512F: cost of 2 {{.*}} sitofp
  47. %1 = sitofp <8 x i8> %a to <8 x double>
  48. ret <8 x double> %1
  49. }
  50. define <16 x double> @sitofpv16i8v16double(<16 x i8> %a) {
  51. ; SSE2: sitofpv16i8v16double
  52. ; SSE2: cost of 160 {{.*}} sitofp
  53. ;
  54. ; AVX1: sitofpv16i8v16double
  55. ; AVX1: cost of 40 {{.*}} sitofp
  56. ;
  57. ; AVX2: sitofpv16i8v16double
  58. ; AVX2: cost of 40 {{.*}} sitofp
  59. ;
  60. ; AVX512F: sitofpv16i8v16double
  61. ; AVX512F: cost of 44 {{.*}} sitofp
  62. %1 = sitofp <16 x i8> %a to <16 x double>
  63. ret <16 x double> %1
  64. }
  65. define <32 x double> @sitofpv32i8v32double(<32 x i8> %a) {
  66. ; SSE2: sitofpv32i8v32double
  67. ; SSE2: cost of 320 {{.*}} sitofp
  68. ;
  69. ; AVX1: sitofpv32i8v32double
  70. ; AVX1: cost of 80 {{.*}} sitofp
  71. ;
  72. ; AVX2: sitofpv32i8v32double
  73. ; AVX2: cost of 80 {{.*}} sitofp
  74. ;
  75. ; AVX512F: sitofpv32i8v32double
  76. ; AVX512F: cost of 88 {{.*}} sitofp
  77. %1 = sitofp <32 x i8> %a to <32 x double>
  78. ret <32 x double> %1
  79. }
  80. define <2 x double> @sitofpv2i16v2double(<2 x i16> %a) {
  81. ; SSE2: sitofpv2i16v2double
  82. ; SSE2: cost of 20 {{.*}} sitofp
  83. ;
  84. ; AVX1: sitofpv2i16v2double
  85. ; AVX1: cost of 4 {{.*}} sitofp
  86. ;
  87. ; AVX2: sitofpv2i16v2double
  88. ; AVX2: cost of 4 {{.*}} sitofp
  89. ;
  90. ; AVX512F: sitofpv2i16v2double
  91. ; AVX512F: cost of 4 {{.*}} sitofp
  92. %1 = sitofp <2 x i16> %a to <2 x double>
  93. ret <2 x double> %1
  94. }
  95. define <4 x double> @sitofpv4i16v4double(<4 x i16> %a) {
  96. ; SSE2: sitofpv4i16v4double
  97. ; SSE2: cost of 40 {{.*}} sitofp
  98. ;
  99. ; AVX1: sitofpv4i16v4double
  100. ; AVX1: cost of 3 {{.*}} sitofp
  101. ;
  102. ; AVX2: sitofpv4i16v4double
  103. ; AVX2: cost of 3 {{.*}} sitofp
  104. ;
  105. ; AVX512F: sitofpv4i16v4double
  106. ; AVX512F: cost of 3 {{.*}} sitofp
  107. %1 = sitofp <4 x i16> %a to <4 x double>
  108. ret <4 x double> %1
  109. }
  110. define <8 x double> @sitofpv8i16v8double(<8 x i16> %a) {
  111. ; SSE2: sitofpv8i16v8double
  112. ; SSE2: cost of 80 {{.*}} sitofp
  113. ;
  114. ; AVX1: sitofpv8i16v8double
  115. ; AVX1: cost of 20 {{.*}} sitofp
  116. ;
  117. ; AVX2: sitofpv8i16v8double
  118. ; AVX2: cost of 20 {{.*}} sitofp
  119. ;
  120. ; AVX512F: sitofpv8i16v8double
  121. ; AVX512F: cost of 2 {{.*}} sitofp
  122. %1 = sitofp <8 x i16> %a to <8 x double>
  123. ret <8 x double> %1
  124. }
  125. define <16 x double> @sitofpv16i16v16double(<16 x i16> %a) {
  126. ; SSE2: sitofpv16i16v16double
  127. ; SSE2: cost of 160 {{.*}} sitofp
  128. ;
  129. ; AVX1: sitofpv16i16v16double
  130. ; AVX1: cost of 40 {{.*}} sitofp
  131. ;
  132. ; AVX2: sitofpv16i16v16double
  133. ; AVX2: cost of 40 {{.*}} sitofp
  134. ;
  135. ; AVX512F: sitofpv16i16v16double
  136. ; AVX512F: cost of 44 {{.*}} sitofp
  137. %1 = sitofp <16 x i16> %a to <16 x double>
  138. ret <16 x double> %1
  139. }
  140. define <32 x double> @sitofpv32i16v32double(<32 x i16> %a) {
  141. ; SSE2: sitofpv32i16v32double
  142. ; SSE2: cost of 320 {{.*}} sitofp
  143. ;
  144. ; AVX1: sitofpv32i16v32double
  145. ; AVX1: cost of 80 {{.*}} sitofp
  146. ;
  147. ; AVX2: sitofpv32i16v32double
  148. ; AVX2: cost of 80 {{.*}} sitofp
  149. ;
  150. ; AVX512F: sitofpv32i16v32double
  151. ; AVX512F: cost of 88 {{.*}} sitofp
  152. %1 = sitofp <32 x i16> %a to <32 x double>
  153. ret <32 x double> %1
  154. }
  155. define <2 x double> @sitofpv2i32v2double(<2 x i32> %a) {
  156. ; SSE2: sitofpv2i32v2double
  157. ; SSE2: cost of 20 {{.*}} sitofp
  158. ;
  159. ; AVX1: sitofpv2i32v2double
  160. ; AVX1: cost of 4 {{.*}} sitofp
  161. ;
  162. ; AVX2: sitofpv2i32v2double
  163. ; AVX2: cost of 4 {{.*}} sitofp
  164. ;
  165. ; AVX512F: sitofpv2i32v2double
  166. ; AVX512F: cost of 4 {{.*}} sitofp
  167. %1 = sitofp <2 x i32> %a to <2 x double>
  168. ret <2 x double> %1
  169. }
  170. define <4 x double> @sitofpv4i32v4double(<4 x i32> %a) {
  171. ; SSE2: sitofpv4i32v4double
  172. ; SSE2: cost of 40 {{.*}} sitofp
  173. ;
  174. ; AVX1: sitofpv4i32v4double
  175. ; AVX1: cost of 1 {{.*}} sitofp
  176. ;
  177. ; AVX2: sitofpv4i32v4double
  178. ; AVX2: cost of 1 {{.*}} sitofp
  179. ;
  180. ; AVX512F: sitofpv4i32v4double
  181. ; AVX512F: cost of 1 {{.*}} sitofp
  182. %1 = sitofp <4 x i32> %a to <4 x double>
  183. ret <4 x double> %1
  184. }
  185. define <8 x double> @sitofpv8i32v8double(<8 x i32> %a) {
  186. ; SSE2: sitofpv8i32v8double
  187. ; SSE2: cost of 80 {{.*}} sitofp
  188. ;
  189. ; AVX1: sitofpv8i32v8double
  190. ; AVX1: cost of 20 {{.*}} sitofp
  191. ;
  192. ; AVX2: sitofpv8i32v8double
  193. ; AVX2: cost of 20 {{.*}} sitofp
  194. ;
  195. ; AVX512F: sitofpv8i32v8double
  196. ; AVX512F: cost of 1 {{.*}} sitofp
  197. %1 = sitofp <8 x i32> %a to <8 x double>
  198. ret <8 x double> %1
  199. }
  200. define <16 x double> @sitofpv16i32v16double(<16 x i32> %a) {
  201. ; SSE2: sitofpv16i32v16double
  202. ; SSE2: cost of 160 {{.*}} sitofp
  203. ;
  204. ; AVX1: sitofpv16i32v16double
  205. ; AVX1: cost of 40 {{.*}} sitofp
  206. ;
  207. ; AVX2: sitofpv16i32v16double
  208. ; AVX2: cost of 40 {{.*}} sitofp
  209. ;
  210. ; AVX512F: sitofpv16i32v16double
  211. ; AVX512F: cost of 44 {{.*}} sitofp
  212. %1 = sitofp <16 x i32> %a to <16 x double>
  213. ret <16 x double> %1
  214. }
  215. define <32 x double> @sitofpv32i32v32double(<32 x i32> %a) {
  216. ; SSE2: sitofpv32i32v32double
  217. ; SSE2: cost of 320 {{.*}} sitofp
  218. ;
  219. ; AVX1: sitofpv32i32v32double
  220. ; AVX1: cost of 80 {{.*}} sitofp
  221. ;
  222. ; AVX2: sitofpv32i32v32double
  223. ; AVX2: cost of 80 {{.*}} sitofp
  224. ;
  225. ; AVX512F: sitofpv32i32v32double
  226. ; AVX512F: cost of 88 {{.*}} sitofp
  227. %1 = sitofp <32 x i32> %a to <32 x double>
  228. ret <32 x double> %1
  229. }
  230. define <2 x double> @sitofpv2i64v2double(<2 x i64> %a) {
  231. ; SSE2: sitofpv2i64v2double
  232. ; SSE2: cost of 20 {{.*}} sitofp
  233. ;
  234. ; AVX1: sitofpv2i64v2double
  235. ; AVX1: cost of 4 {{.*}} sitofp
  236. ;
  237. ; AVX2: sitofpv2i64v2double
  238. ; AVX2: cost of 4 {{.*}} sitofp
  239. ;
  240. ; AVX512F: sitofpv2i64v2double
  241. ; AVX512F: cost of 4 {{.*}} sitofp
  242. %1 = sitofp <2 x i64> %a to <2 x double>
  243. ret <2 x double> %1
  244. }
  245. define <4 x double> @sitofpv4i64v4double(<4 x i64> %a) {
  246. ; SSE2: sitofpv4i64v4double
  247. ; SSE2: cost of 40 {{.*}} sitofp
  248. ;
  249. ; AVX1: sitofpv4i64v4double
  250. ; AVX1: cost of 10 {{.*}} sitofp
  251. ;
  252. ; AVX2: sitofpv4i64v4double
  253. ; AVX2: cost of 10 {{.*}} sitofp
  254. ;
  255. ; AVX512F: sitofpv4i64v4double
  256. ; AVX512F: cost of 10 {{.*}} sitofp
  257. %1 = sitofp <4 x i64> %a to <4 x double>
  258. ret <4 x double> %1
  259. }
  260. define <8 x double> @sitofpv8i64v8double(<8 x i64> %a) {
  261. ; SSE2: sitofpv8i64v8double
  262. ; SSE2: cost of 80 {{.*}} sitofp
  263. ;
  264. ; AVX1: sitofpv8i64v8double
  265. ; AVX1: cost of 20 {{.*}} sitofp
  266. ;
  267. ; AVX2: sitofpv8i64v8double
  268. ; AVX2: cost of 20 {{.*}} sitofp
  269. ;
  270. ; AVX512F: sitofpv8i64v8double
  271. ; AVX512F: cost of 22 {{.*}} sitofp
  272. %1 = sitofp <8 x i64> %a to <8 x double>
  273. ret <8 x double> %1
  274. }
  275. define <16 x double> @sitofpv16i64v16double(<16 x i64> %a) {
  276. ; SSE2: sitofpv16i64v16double
  277. ; SSE2: cost of 160 {{.*}} sitofp
  278. ;
  279. ; AVX1: sitofpv16i64v16double
  280. ; AVX1: cost of 40 {{.*}} sitofp
  281. ;
  282. ; AVX2: sitofpv16i64v16double
  283. ; AVX2: cost of 40 {{.*}} sitofp
  284. ;
  285. ; AVX512F: sitofpv16i64v16double
  286. ; AVX512F: cost of 44 {{.*}} sitofp
  287. %1 = sitofp <16 x i64> %a to <16 x double>
  288. ret <16 x double> %1
  289. }
  290. define <32 x double> @sitofpv32i64v32double(<32 x i64> %a) {
  291. ; SSE2: sitofpv32i64v32double
  292. ; SSE2: cost of 320 {{.*}} sitofp
  293. ;
  294. ; AVX1: sitofpv32i64v32double
  295. ; AVX1: cost of 80 {{.*}} sitofp
  296. ;
  297. ; AVX2: sitofpv32i64v32double
  298. ; AVX2: cost of 80 {{.*}} sitofp
  299. ;
  300. ; AVX512F: sitofpv32i64v32double
  301. ; AVX512F: cost of 88 {{.*}} sitofp
  302. %1 = sitofp <32 x i64> %a to <32 x double>
  303. ret <32 x double> %1
  304. }
  305. define <2 x float> @sitofpv2i8v2float(<2 x i8> %a) {
  306. ; SSE2: sitofpv2i8v2float
  307. ; SSE2: cost of 15 {{.*}} sitofp
  308. ;
  309. ; AVX1: sitofpv2i8v2float
  310. ; AVX1: cost of 4 {{.*}} sitofp
  311. ;
  312. ; AVX2: sitofpv2i8v2float
  313. ; AVX2: cost of 4 {{.*}} sitofp
  314. ;
  315. ; AVX512F: sitofpv2i8v2float
  316. ; AVX512F: cost of 4 {{.*}} sitofp
  317. %1 = sitofp <2 x i8> %a to <2 x float>
  318. ret <2 x float> %1
  319. }
  320. define <4 x float> @sitofpv4i8v4float(<4 x i8> %a) {
  321. ; SSE2: sitofpv4i8v4float
  322. ; SSE2: cost of 15 {{.*}} sitofp
  323. ;
  324. ; AVX1: sitofpv4i8v4float
  325. ; AVX1: cost of 3 {{.*}} sitofp
  326. ;
  327. ; AVX2: sitofpv4i8v4float
  328. ; AVX2: cost of 3 {{.*}} sitofp
  329. ;
  330. ; AVX512F: sitofpv4i8v4float
  331. ; AVX512F: cost of 3 {{.*}} sitofp
  332. %1 = sitofp <4 x i8> %a to <4 x float>
  333. ret <4 x float> %1
  334. }
  335. define <8 x float> @sitofpv8i8v8float(<8 x i8> %a) {
  336. ; SSE2: sitofpv8i8v8float
  337. ; SSE2: cost of 15 {{.*}} sitofp
  338. ;
  339. ; AVX1: sitofpv8i8v8float
  340. ; AVX1: cost of 8 {{.*}} sitofp
  341. ;
  342. ; AVX2: sitofpv8i8v8float
  343. ; AVX2: cost of 8 {{.*}} sitofp
  344. ;
  345. ; AVX512F: sitofpv8i8v8float
  346. ; AVX512F: cost of 8 {{.*}} sitofp
  347. %1 = sitofp <8 x i8> %a to <8 x float>
  348. ret <8 x float> %1
  349. }
  350. define <16 x float> @sitofpv16i8v16float(<16 x i8> %a) {
  351. ; SSE2: sitofpv16i8v16float
  352. ; SSE2: cost of 8 {{.*}} sitofp
  353. ;
  354. ; AVX1: sitofpv16i8v16float
  355. ; AVX1: cost of 44 {{.*}} sitofp
  356. ;
  357. ; AVX2: sitofpv16i8v16float
  358. ; AVX2: cost of 44 {{.*}} sitofp
  359. ;
  360. ; AVX512F: sitofpv16i8v16float
  361. ; AVX512F: cost of 2 {{.*}} sitofp
  362. %1 = sitofp <16 x i8> %a to <16 x float>
  363. ret <16 x float> %1
  364. }
  365. define <32 x float> @sitofpv32i8v32float(<32 x i8> %a) {
  366. ; SSE2: sitofpv32i8v32float
  367. ; SSE2: cost of 16 {{.*}} sitofp
  368. ;
  369. ; AVX1: sitofpv32i8v32float
  370. ; AVX1: cost of 88 {{.*}} sitofp
  371. ;
  372. ; AVX2: sitofpv32i8v32float
  373. ; AVX2: cost of 88 {{.*}} sitofp
  374. ;
  375. ; AVX512F: sitofpv32i8v32float
  376. ; AVX512F: cost of 92 {{.*}} sitofp
  377. %1 = sitofp <32 x i8> %a to <32 x float>
  378. ret <32 x float> %1
  379. }
  380. define <2 x float> @sitofpv2i16v2float(<2 x i16> %a) {
  381. ; SSE2: sitofpv2i16v2float
  382. ; SSE2: cost of 15 {{.*}} sitofp
  383. ;
  384. ; AVX1: sitofpv2i16v2float
  385. ; AVX1: cost of 4 {{.*}} sitofp
  386. ;
  387. ; AVX2: sitofpv2i16v2float
  388. ; AVX2: cost of 4 {{.*}} sitofp
  389. ;
  390. ; AVX512F: sitofpv2i16v2float
  391. ; AVX512F: cost of 4 {{.*}} sitofp
  392. %1 = sitofp <2 x i16> %a to <2 x float>
  393. ret <2 x float> %1
  394. }
  395. define <4 x float> @sitofpv4i16v4float(<4 x i16> %a) {
  396. ; SSE2: sitofpv4i16v4float
  397. ; SSE2: cost of 15 {{.*}} sitofp
  398. ;
  399. ; AVX1: sitofpv4i16v4float
  400. ; AVX1: cost of 3 {{.*}} sitofp
  401. ;
  402. ; AVX2: sitofpv4i16v4float
  403. ; AVX2: cost of 3 {{.*}} sitofp
  404. ;
  405. ; AVX512F: sitofpv4i16v4float
  406. ; AVX512F: cost of 3 {{.*}} sitofp
  407. %1 = sitofp <4 x i16> %a to <4 x float>
  408. ret <4 x float> %1
  409. }
  410. define <8 x float> @sitofpv8i16v8float(<8 x i16> %a) {
  411. ; SSE2: sitofpv8i16v8float
  412. ; SSE2: cost of 15 {{.*}} sitofp
  413. ;
  414. ; AVX1: sitofpv8i16v8float
  415. ; AVX1: cost of 5 {{.*}} sitofp
  416. ;
  417. ; AVX2: sitofpv8i16v8float
  418. ; AVX2: cost of 5 {{.*}} sitofp
  419. ;
  420. ; AVX512F: sitofpv8i16v8float
  421. ; AVX512F: cost of 5 {{.*}} sitofp
  422. %1 = sitofp <8 x i16> %a to <8 x float>
  423. ret <8 x float> %1
  424. }
  425. define <16 x float> @sitofpv16i16v16float(<16 x i16> %a) {
  426. ; SSE2: sitofpv16i16v16float
  427. ; SSE2: cost of 30 {{.*}} sitofp
  428. ;
  429. ; AVX1: sitofpv16i16v16float
  430. ; AVX1: cost of 44 {{.*}} sitofp
  431. ;
  432. ; AVX2: sitofpv16i16v16float
  433. ; AVX2: cost of 44 {{.*}} sitofp
  434. ;
  435. ; AVX512F: sitofpv16i16v16float
  436. ; AVX512F: cost of 2 {{.*}} sitofp
  437. %1 = sitofp <16 x i16> %a to <16 x float>
  438. ret <16 x float> %1
  439. }
  440. define <32 x float> @sitofpv32i16v32float(<32 x i16> %a) {
  441. ; SSE2: sitofpv32i16v32float
  442. ; SSE2: cost of 60 {{.*}} sitofp
  443. ;
  444. ; AVX1: sitofpv32i16v32float
  445. ; AVX1: cost of 88 {{.*}} sitofp
  446. ;
  447. ; AVX2: sitofpv32i16v32float
  448. ; AVX2: cost of 88 {{.*}} sitofp
  449. ;
  450. ; AVX512F: sitofpv32i16v32float
  451. ; AVX512F: cost of 2 {{.*}} sitofp
  452. %1 = sitofp <32 x i16> %a to <32 x float>
  453. ret <32 x float> %1
  454. }
  455. define <2 x float> @sitofpv2i32v2float(<2 x i32> %a) {
  456. ; SSE2: sitofpv2i32v2float
  457. ; SSE2: cost of 15 {{.*}} sitofp
  458. ;
  459. ; AVX1: sitofpv2i32v2float
  460. ; AVX1: cost of 4 {{.*}} sitofp
  461. ;
  462. ; AVX2: sitofpv2i32v2float
  463. ; AVX2: cost of 4 {{.*}} sitofp
  464. ;
  465. ; AVX512F: sitofpv2i32v2float
  466. ; AVX512F: cost of 4 {{.*}} sitofp
  467. %1 = sitofp <2 x i32> %a to <2 x float>
  468. ret <2 x float> %1
  469. }
  470. define <4 x float> @sitofpv4i32v4float(<4 x i32> %a) {
  471. ; SSE2: sitofpv4i32v4float
  472. ; SSE2: cost of 15 {{.*}} sitofp
  473. ;
  474. ; AVX1: sitofpv4i32v4float
  475. ; AVX1: cost of 1 {{.*}} sitofp
  476. ;
  477. ; AVX2: sitofpv4i32v4float
  478. ; AVX2: cost of 1 {{.*}} sitofp
  479. ;
  480. ; AVX512F: sitofpv4i32v4float
  481. ; AVX512F: cost of 1 {{.*}} sitofp
  482. %1 = sitofp <4 x i32> %a to <4 x float>
  483. ret <4 x float> %1
  484. }
  485. define <8 x float> @sitofpv8i32v8float(<8 x i32> %a) {
  486. ; SSE2: sitofpv8i32v8float
  487. ; SSE2: cost of 30 {{.*}} sitofp
  488. ;
  489. ; AVX1: sitofpv8i32v8float
  490. ; AVX1: cost of 1 {{.*}} sitofp
  491. ;
  492. ; AVX2: sitofpv8i32v8float
  493. ; AVX2: cost of 1 {{.*}} sitofp
  494. ;
  495. ; AVX512F: sitofpv8i32v8float
  496. ; AVX512F: cost of 1 {{.*}} sitofp
  497. %1 = sitofp <8 x i32> %a to <8 x float>
  498. ret <8 x float> %1
  499. }
  500. define <16 x float> @sitofpv16i32v16float(<16 x i32> %a) {
  501. ; SSE2: sitofpv16i32v16float
  502. ; SSE2: cost of 60 {{.*}} sitofp
  503. ;
  504. ; AVX1: sitofpv16i32v16float
  505. ; AVX1: cost of 44 {{.*}} sitofp
  506. ;
  507. ; AVX2: sitofpv16i32v16float
  508. ; AVX2: cost of 44 {{.*}} sitofp
  509. ;
  510. ; AVX512F: sitofpv16i32v16float
  511. ; AVX512F: cost of 1 {{.*}} sitofp
  512. %1 = sitofp <16 x i32> %a to <16 x float>
  513. ret <16 x float> %1
  514. }
  515. define <32 x float> @sitofpv32i32v32float(<32 x i32> %a) {
  516. ; SSE2: sitofpv32i32v32float
  517. ; SSE2: cost of 120 {{.*}} sitofp
  518. ;
  519. ; AVX1: sitofpv32i32v32float
  520. ; AVX1: cost of 88 {{.*}} sitofp
  521. ;
  522. ; AVX2: sitofpv32i32v32float
  523. ; AVX2: cost of 88 {{.*}} sitofp
  524. ;
  525. ; AVX512F: sitofpv32i32v32float
  526. ; AVX512F: cost of 1 {{.*}} sitofp
  527. %1 = sitofp <32 x i32> %a to <32 x float>
  528. ret <32 x float> %1
  529. }
  530. define <2 x float> @sitofpv2i64v2float(<2 x i64> %a) {
  531. ; SSE2: sitofpv2i64v2float
  532. ; SSE2: cost of 15 {{.*}} sitofp
  533. ;
  534. ; AVX1: sitofpv2i64v2float
  535. ; AVX1: cost of 4 {{.*}} sitofp
  536. ;
  537. ; AVX2: sitofpv2i64v2float
  538. ; AVX2: cost of 4 {{.*}} sitofp
  539. ;
  540. ; AVX512F: sitofpv2i64v2float
  541. ; AVX512F: cost of 4 {{.*}} sitofp
  542. %1 = sitofp <2 x i64> %a to <2 x float>
  543. ret <2 x float> %1
  544. }
  545. define <4 x float> @sitofpv4i64v4float(<4 x i64> %a) {
  546. ; SSE2: sitofpv4i64v4float
  547. ; SSE2: cost of 30 {{.*}} sitofp
  548. ;
  549. ; AVX1: sitofpv4i64v4float
  550. ; AVX1: cost of 10 {{.*}} sitofp
  551. ;
  552. ; AVX2: sitofpv4i64v4float
  553. ; AVX2: cost of 10 {{.*}} sitofp
  554. ;
  555. ; AVX512F: sitofpv4i64v4float
  556. ; AVX512F: cost of 10 {{.*}} sitofp
  557. %1 = sitofp <4 x i64> %a to <4 x float>
  558. ret <4 x float> %1
  559. }
  560. define <8 x float> @sitofpv8i64v8float(<8 x i64> %a) {
  561. ; SSE2: sitofpv8i64v8float
  562. ; SSE2: cost of 60 {{.*}} sitofp
  563. ;
  564. ; AVX1: sitofpv8i64v8float
  565. ; AVX1: cost of 22 {{.*}} sitofp
  566. ;
  567. ; AVX2: sitofpv8i64v8float
  568. ; AVX2: cost of 22 {{.*}} sitofp
  569. ;
  570. ; AVX512F: sitofpv8i64v8float
  571. ; AVX512F: cost of 22 {{.*}} sitofp
  572. %1 = sitofp <8 x i64> %a to <8 x float>
  573. ret <8 x float> %1
  574. }
  575. define <16 x float> @sitofpv16i64v16float(<16 x i64> %a) {
  576. ; SSE2: sitofpv16i64v16float
  577. ; SSE2: cost of 120 {{.*}} sitofp
  578. ;
  579. ; AVX1: sitofpv16i64v16float
  580. ; AVX1: cost of 44 {{.*}} sitofp
  581. ;
  582. ; AVX2: sitofpv16i64v16float
  583. ; AVX2: cost of 44 {{.*}} sitofp
  584. ;
  585. ; AVX512F: sitofpv16i64v16float
  586. ; AVX512F: cost of 46 {{.*}} sitofp
  587. %1 = sitofp <16 x i64> %a to <16 x float>
  588. ret <16 x float> %1
  589. }
  590. define <32 x float> @sitofpv32i64v32float(<32 x i64> %a) {
  591. ; SSE2: sitofpv32i64v32float
  592. ; SSE2: cost of 240 {{.*}} sitofp
  593. ;
  594. ; AVX1: sitofpv32i64v32float
  595. ; AVX1: cost of 88 {{.*}} sitofp
  596. ;
  597. ; AVX2: sitofpv32i64v32float
  598. ; AVX2: cost of 88 {{.*}} sitofp
  599. ;
  600. ; AVX512F: sitofpv32i64v32float
  601. ; AVX512F: cost of 92 {{.*}} sitofp
  602. %1 = sitofp <32 x i64> %a to <32 x float>
  603. ret <32 x float> %1
  604. }
  605. define <8 x double> @sitofpv8i1v8double(<8 x double> %a) {
  606. ; SSE2: sitofpv8i1v8double
  607. ; SSE2: cost of 80 {{.*}} sitofp
  608. ;
  609. ; AVX1: sitofpv8i1v8double
  610. ; AVX1: cost of 20 {{.*}} sitofp
  611. ;
  612. ; AVX2: sitofpv8i1v8double
  613. ; AVX2: cost of 20 {{.*}} sitofp
  614. ;
  615. ; AVX512F: sitofpv8i1v8double
  616. ; AVX512F: cost of 4 {{.*}} sitofp
  617. %cmpres = fcmp ogt <8 x double> %a, zeroinitializer
  618. %1 = sitofp <8 x i1> %cmpres to <8 x double>
  619. ret <8 x double> %1
  620. }
  621. define <16 x float> @sitofpv16i1v16float(<16 x float> %a) {
  622. ; SSE2: sitofpv16i1v16float
  623. ; SSE2: cost of 8 {{.*}} sitofp
  624. ;
  625. ; AVX1: sitofpv16i1v16float
  626. ; AVX1: cost of 44 {{.*}} sitofp
  627. ;
  628. ; AVX2: sitofpv16i1v16float
  629. ; AVX2: cost of 44 {{.*}} sitofp
  630. ;
  631. ; AVX512F: sitofpv16i1v16float
  632. ; AVX512F: cost of 3 {{.*}} sitofp
  633. %cmpres = fcmp ogt <16 x float> %a, zeroinitializer
  634. %1 = sitofp <16 x i1> %cmpres to <16 x float>
  635. ret <16 x float> %1
  636. }