asm-bits.c 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823
  1. /*
  2. * Copyright 2008 Veselin Georgiev,
  3. * anrieffNOSPAM @ mgail_DOT.com (convert to gmail)
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions
  7. * are met:
  8. *
  9. * 1. Redistributions of source code must retain the above copyright
  10. * notice, this list of conditions and the following disclaimer.
  11. * 2. Redistributions in binary form must reproduce the above copyright
  12. * notice, this list of conditions and the following disclaimer in the
  13. * documentation and/or other materials provided with the distribution.
  14. *
  15. * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  16. * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  17. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  18. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  19. * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  20. * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  21. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  22. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  23. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  24. * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  25. */
  26. #include "libcpuid.h"
  27. #include "asm-bits.h"
  28. int cpuid_exists_by_eflags(void)
  29. {
  30. #if defined(PLATFORM_X64)
  31. return 1; /* CPUID is always present on the x86_64 */
  32. #elif defined(PLATFORM_X86)
  33. # if defined(COMPILER_GCC)
  34. int result;
  35. __asm __volatile(
  36. " pushfl\n"
  37. " pop %%eax\n"
  38. " mov %%eax, %%ecx\n"
  39. " xor $0x200000, %%eax\n"
  40. " push %%eax\n"
  41. " popfl\n"
  42. " pushfl\n"
  43. " pop %%eax\n"
  44. " xor %%ecx, %%eax\n"
  45. " mov %%eax, %0\n"
  46. " push %%ecx\n"
  47. " popfl\n"
  48. : "=m"(result)
  49. : :"eax", "ecx", "memory");
  50. return (result != 0);
  51. # elif defined(COMPILER_MICROSOFT)
  52. int result;
  53. __asm {
  54. pushfd
  55. pop eax
  56. mov ecx, eax
  57. xor eax, 0x200000
  58. push eax
  59. popfd
  60. pushfd
  61. pop eax
  62. xor eax, ecx
  63. mov result, eax
  64. push ecx
  65. popfd
  66. };
  67. return (result != 0);
  68. # else
  69. return 0;
  70. # endif /* COMPILER_MICROSOFT */
  71. #else
  72. return 0;
  73. #endif /* PLATFORM_X86 */
  74. }
  75. #ifdef INLINE_ASM_SUPPORTED
  76. /*
  77. * with MSVC/AMD64, the exec_cpuid() and cpu_rdtsc() functions
  78. * are implemented in separate .asm files. Otherwise, use inline assembly
  79. */
  80. void exec_cpuid(uint32_t *regs)
  81. {
  82. #ifdef COMPILER_GCC
  83. # ifdef PLATFORM_X64
  84. __asm __volatile(
  85. " mov %0, %%rdi\n"
  86. " push %%rbx\n"
  87. " push %%rcx\n"
  88. " push %%rdx\n"
  89. " mov (%%rdi), %%eax\n"
  90. " mov 4(%%rdi), %%ebx\n"
  91. " mov 8(%%rdi), %%ecx\n"
  92. " mov 12(%%rdi), %%edx\n"
  93. " cpuid\n"
  94. " movl %%eax, (%%rdi)\n"
  95. " movl %%ebx, 4(%%rdi)\n"
  96. " movl %%ecx, 8(%%rdi)\n"
  97. " movl %%edx, 12(%%rdi)\n"
  98. " pop %%rdx\n"
  99. " pop %%rcx\n"
  100. " pop %%rbx\n"
  101. :
  102. :"m"(regs)
  103. :"memory", "eax", "rdi"
  104. );
  105. # else
  106. __asm __volatile(
  107. " mov %0, %%edi\n"
  108. " push %%ebx\n"
  109. " push %%ecx\n"
  110. " push %%edx\n"
  111. " mov (%%edi), %%eax\n"
  112. " mov 4(%%edi), %%ebx\n"
  113. " mov 8(%%edi), %%ecx\n"
  114. " mov 12(%%edi), %%edx\n"
  115. " cpuid\n"
  116. " mov %%eax, (%%edi)\n"
  117. " mov %%ebx, 4(%%edi)\n"
  118. " mov %%ecx, 8(%%edi)\n"
  119. " mov %%edx, 12(%%edi)\n"
  120. " pop %%edx\n"
  121. " pop %%ecx\n"
  122. " pop %%ebx\n"
  123. :
  124. :"m"(regs)
  125. :"memory", "eax", "edi"
  126. );
  127. # endif /* COMPILER_GCC */
  128. #else
  129. # ifdef COMPILER_MICROSOFT
  130. __asm {
  131. push ebx
  132. push ecx
  133. push edx
  134. push edi
  135. mov edi, regs
  136. mov eax, [edi]
  137. mov ebx, [edi+4]
  138. mov ecx, [edi+8]
  139. mov edx, [edi+12]
  140. cpuid
  141. mov [edi], eax
  142. mov [edi+4], ebx
  143. mov [edi+8], ecx
  144. mov [edi+12], edx
  145. pop edi
  146. pop edx
  147. pop ecx
  148. pop ebx
  149. }
  150. # else
  151. # error "Unsupported compiler"
  152. # endif /* COMPILER_MICROSOFT */
  153. #endif
  154. }
  155. #endif /* INLINE_ASSEMBLY_SUPPORTED */
  156. #ifdef INLINE_ASM_SUPPORTED
  157. void cpu_rdtsc(uint64_t* result)
  158. {
  159. uint32_t low_part, hi_part;
  160. #ifdef COMPILER_GCC
  161. __asm __volatile (
  162. " rdtsc\n"
  163. " mov %%eax, %0\n"
  164. " mov %%edx, %1\n"
  165. :"=m"(low_part), "=m"(hi_part)::"memory", "eax", "edx"
  166. );
  167. #else
  168. # ifdef COMPILER_MICROSOFT
  169. __asm {
  170. rdtsc
  171. mov low_part, eax
  172. mov hi_part, edx
  173. };
  174. # else
  175. # error "Unsupported compiler"
  176. # endif /* COMPILER_MICROSOFT */
  177. #endif /* COMPILER_GCC */
  178. *result = (uint64_t)low_part + (((uint64_t) hi_part) << 32);
  179. }
  180. #endif /* INLINE_ASM_SUPPORTED */
  181. #ifdef INLINE_ASM_SUPPORTED
  182. void busy_sse_loop(int cycles)
  183. {
  184. #ifdef COMPILER_GCC
  185. #ifndef __APPLE__
  186. # define XALIGN ".balign 16\n"
  187. #else
  188. # define XALIGN ".align 4\n"
  189. #endif
  190. __asm __volatile (
  191. " xorps %%xmm0, %%xmm0\n"
  192. " xorps %%xmm1, %%xmm1\n"
  193. " xorps %%xmm2, %%xmm2\n"
  194. " xorps %%xmm3, %%xmm3\n"
  195. " xorps %%xmm4, %%xmm4\n"
  196. " xorps %%xmm5, %%xmm5\n"
  197. " xorps %%xmm6, %%xmm6\n"
  198. " xorps %%xmm7, %%xmm7\n"
  199. XALIGN
  200. ".bsLoop:\n"
  201. // 0:
  202. " addps %%xmm1, %%xmm0\n"
  203. " addps %%xmm2, %%xmm1\n"
  204. " addps %%xmm3, %%xmm2\n"
  205. " addps %%xmm4, %%xmm3\n"
  206. " addps %%xmm5, %%xmm4\n"
  207. " addps %%xmm6, %%xmm5\n"
  208. " addps %%xmm7, %%xmm6\n"
  209. " addps %%xmm0, %%xmm7\n"
  210. // 1:
  211. " addps %%xmm1, %%xmm0\n"
  212. " addps %%xmm2, %%xmm1\n"
  213. " addps %%xmm3, %%xmm2\n"
  214. " addps %%xmm4, %%xmm3\n"
  215. " addps %%xmm5, %%xmm4\n"
  216. " addps %%xmm6, %%xmm5\n"
  217. " addps %%xmm7, %%xmm6\n"
  218. " addps %%xmm0, %%xmm7\n"
  219. // 2:
  220. " addps %%xmm1, %%xmm0\n"
  221. " addps %%xmm2, %%xmm1\n"
  222. " addps %%xmm3, %%xmm2\n"
  223. " addps %%xmm4, %%xmm3\n"
  224. " addps %%xmm5, %%xmm4\n"
  225. " addps %%xmm6, %%xmm5\n"
  226. " addps %%xmm7, %%xmm6\n"
  227. " addps %%xmm0, %%xmm7\n"
  228. // 3:
  229. " addps %%xmm1, %%xmm0\n"
  230. " addps %%xmm2, %%xmm1\n"
  231. " addps %%xmm3, %%xmm2\n"
  232. " addps %%xmm4, %%xmm3\n"
  233. " addps %%xmm5, %%xmm4\n"
  234. " addps %%xmm6, %%xmm5\n"
  235. " addps %%xmm7, %%xmm6\n"
  236. " addps %%xmm0, %%xmm7\n"
  237. // 4:
  238. " addps %%xmm1, %%xmm0\n"
  239. " addps %%xmm2, %%xmm1\n"
  240. " addps %%xmm3, %%xmm2\n"
  241. " addps %%xmm4, %%xmm3\n"
  242. " addps %%xmm5, %%xmm4\n"
  243. " addps %%xmm6, %%xmm5\n"
  244. " addps %%xmm7, %%xmm6\n"
  245. " addps %%xmm0, %%xmm7\n"
  246. // 5:
  247. " addps %%xmm1, %%xmm0\n"
  248. " addps %%xmm2, %%xmm1\n"
  249. " addps %%xmm3, %%xmm2\n"
  250. " addps %%xmm4, %%xmm3\n"
  251. " addps %%xmm5, %%xmm4\n"
  252. " addps %%xmm6, %%xmm5\n"
  253. " addps %%xmm7, %%xmm6\n"
  254. " addps %%xmm0, %%xmm7\n"
  255. // 6:
  256. " addps %%xmm1, %%xmm0\n"
  257. " addps %%xmm2, %%xmm1\n"
  258. " addps %%xmm3, %%xmm2\n"
  259. " addps %%xmm4, %%xmm3\n"
  260. " addps %%xmm5, %%xmm4\n"
  261. " addps %%xmm6, %%xmm5\n"
  262. " addps %%xmm7, %%xmm6\n"
  263. " addps %%xmm0, %%xmm7\n"
  264. // 7:
  265. " addps %%xmm1, %%xmm0\n"
  266. " addps %%xmm2, %%xmm1\n"
  267. " addps %%xmm3, %%xmm2\n"
  268. " addps %%xmm4, %%xmm3\n"
  269. " addps %%xmm5, %%xmm4\n"
  270. " addps %%xmm6, %%xmm5\n"
  271. " addps %%xmm7, %%xmm6\n"
  272. " addps %%xmm0, %%xmm7\n"
  273. // 8:
  274. " addps %%xmm1, %%xmm0\n"
  275. " addps %%xmm2, %%xmm1\n"
  276. " addps %%xmm3, %%xmm2\n"
  277. " addps %%xmm4, %%xmm3\n"
  278. " addps %%xmm5, %%xmm4\n"
  279. " addps %%xmm6, %%xmm5\n"
  280. " addps %%xmm7, %%xmm6\n"
  281. " addps %%xmm0, %%xmm7\n"
  282. // 9:
  283. " addps %%xmm1, %%xmm0\n"
  284. " addps %%xmm2, %%xmm1\n"
  285. " addps %%xmm3, %%xmm2\n"
  286. " addps %%xmm4, %%xmm3\n"
  287. " addps %%xmm5, %%xmm4\n"
  288. " addps %%xmm6, %%xmm5\n"
  289. " addps %%xmm7, %%xmm6\n"
  290. " addps %%xmm0, %%xmm7\n"
  291. //10:
  292. " addps %%xmm1, %%xmm0\n"
  293. " addps %%xmm2, %%xmm1\n"
  294. " addps %%xmm3, %%xmm2\n"
  295. " addps %%xmm4, %%xmm3\n"
  296. " addps %%xmm5, %%xmm4\n"
  297. " addps %%xmm6, %%xmm5\n"
  298. " addps %%xmm7, %%xmm6\n"
  299. " addps %%xmm0, %%xmm7\n"
  300. //11:
  301. " addps %%xmm1, %%xmm0\n"
  302. " addps %%xmm2, %%xmm1\n"
  303. " addps %%xmm3, %%xmm2\n"
  304. " addps %%xmm4, %%xmm3\n"
  305. " addps %%xmm5, %%xmm4\n"
  306. " addps %%xmm6, %%xmm5\n"
  307. " addps %%xmm7, %%xmm6\n"
  308. " addps %%xmm0, %%xmm7\n"
  309. //12:
  310. " addps %%xmm1, %%xmm0\n"
  311. " addps %%xmm2, %%xmm1\n"
  312. " addps %%xmm3, %%xmm2\n"
  313. " addps %%xmm4, %%xmm3\n"
  314. " addps %%xmm5, %%xmm4\n"
  315. " addps %%xmm6, %%xmm5\n"
  316. " addps %%xmm7, %%xmm6\n"
  317. " addps %%xmm0, %%xmm7\n"
  318. //13:
  319. " addps %%xmm1, %%xmm0\n"
  320. " addps %%xmm2, %%xmm1\n"
  321. " addps %%xmm3, %%xmm2\n"
  322. " addps %%xmm4, %%xmm3\n"
  323. " addps %%xmm5, %%xmm4\n"
  324. " addps %%xmm6, %%xmm5\n"
  325. " addps %%xmm7, %%xmm6\n"
  326. " addps %%xmm0, %%xmm7\n"
  327. //14:
  328. " addps %%xmm1, %%xmm0\n"
  329. " addps %%xmm2, %%xmm1\n"
  330. " addps %%xmm3, %%xmm2\n"
  331. " addps %%xmm4, %%xmm3\n"
  332. " addps %%xmm5, %%xmm4\n"
  333. " addps %%xmm6, %%xmm5\n"
  334. " addps %%xmm7, %%xmm6\n"
  335. " addps %%xmm0, %%xmm7\n"
  336. //15:
  337. " addps %%xmm1, %%xmm0\n"
  338. " addps %%xmm2, %%xmm1\n"
  339. " addps %%xmm3, %%xmm2\n"
  340. " addps %%xmm4, %%xmm3\n"
  341. " addps %%xmm5, %%xmm4\n"
  342. " addps %%xmm6, %%xmm5\n"
  343. " addps %%xmm7, %%xmm6\n"
  344. " addps %%xmm0, %%xmm7\n"
  345. //16:
  346. " addps %%xmm1, %%xmm0\n"
  347. " addps %%xmm2, %%xmm1\n"
  348. " addps %%xmm3, %%xmm2\n"
  349. " addps %%xmm4, %%xmm3\n"
  350. " addps %%xmm5, %%xmm4\n"
  351. " addps %%xmm6, %%xmm5\n"
  352. " addps %%xmm7, %%xmm6\n"
  353. " addps %%xmm0, %%xmm7\n"
  354. //17:
  355. " addps %%xmm1, %%xmm0\n"
  356. " addps %%xmm2, %%xmm1\n"
  357. " addps %%xmm3, %%xmm2\n"
  358. " addps %%xmm4, %%xmm3\n"
  359. " addps %%xmm5, %%xmm4\n"
  360. " addps %%xmm6, %%xmm5\n"
  361. " addps %%xmm7, %%xmm6\n"
  362. " addps %%xmm0, %%xmm7\n"
  363. //18:
  364. " addps %%xmm1, %%xmm0\n"
  365. " addps %%xmm2, %%xmm1\n"
  366. " addps %%xmm3, %%xmm2\n"
  367. " addps %%xmm4, %%xmm3\n"
  368. " addps %%xmm5, %%xmm4\n"
  369. " addps %%xmm6, %%xmm5\n"
  370. " addps %%xmm7, %%xmm6\n"
  371. " addps %%xmm0, %%xmm7\n"
  372. //19:
  373. " addps %%xmm1, %%xmm0\n"
  374. " addps %%xmm2, %%xmm1\n"
  375. " addps %%xmm3, %%xmm2\n"
  376. " addps %%xmm4, %%xmm3\n"
  377. " addps %%xmm5, %%xmm4\n"
  378. " addps %%xmm6, %%xmm5\n"
  379. " addps %%xmm7, %%xmm6\n"
  380. " addps %%xmm0, %%xmm7\n"
  381. //20:
  382. " addps %%xmm1, %%xmm0\n"
  383. " addps %%xmm2, %%xmm1\n"
  384. " addps %%xmm3, %%xmm2\n"
  385. " addps %%xmm4, %%xmm3\n"
  386. " addps %%xmm5, %%xmm4\n"
  387. " addps %%xmm6, %%xmm5\n"
  388. " addps %%xmm7, %%xmm6\n"
  389. " addps %%xmm0, %%xmm7\n"
  390. //21:
  391. " addps %%xmm1, %%xmm0\n"
  392. " addps %%xmm2, %%xmm1\n"
  393. " addps %%xmm3, %%xmm2\n"
  394. " addps %%xmm4, %%xmm3\n"
  395. " addps %%xmm5, %%xmm4\n"
  396. " addps %%xmm6, %%xmm5\n"
  397. " addps %%xmm7, %%xmm6\n"
  398. " addps %%xmm0, %%xmm7\n"
  399. //22:
  400. " addps %%xmm1, %%xmm0\n"
  401. " addps %%xmm2, %%xmm1\n"
  402. " addps %%xmm3, %%xmm2\n"
  403. " addps %%xmm4, %%xmm3\n"
  404. " addps %%xmm5, %%xmm4\n"
  405. " addps %%xmm6, %%xmm5\n"
  406. " addps %%xmm7, %%xmm6\n"
  407. " addps %%xmm0, %%xmm7\n"
  408. //23:
  409. " addps %%xmm1, %%xmm0\n"
  410. " addps %%xmm2, %%xmm1\n"
  411. " addps %%xmm3, %%xmm2\n"
  412. " addps %%xmm4, %%xmm3\n"
  413. " addps %%xmm5, %%xmm4\n"
  414. " addps %%xmm6, %%xmm5\n"
  415. " addps %%xmm7, %%xmm6\n"
  416. " addps %%xmm0, %%xmm7\n"
  417. //24:
  418. " addps %%xmm1, %%xmm0\n"
  419. " addps %%xmm2, %%xmm1\n"
  420. " addps %%xmm3, %%xmm2\n"
  421. " addps %%xmm4, %%xmm3\n"
  422. " addps %%xmm5, %%xmm4\n"
  423. " addps %%xmm6, %%xmm5\n"
  424. " addps %%xmm7, %%xmm6\n"
  425. " addps %%xmm0, %%xmm7\n"
  426. //25:
  427. " addps %%xmm1, %%xmm0\n"
  428. " addps %%xmm2, %%xmm1\n"
  429. " addps %%xmm3, %%xmm2\n"
  430. " addps %%xmm4, %%xmm3\n"
  431. " addps %%xmm5, %%xmm4\n"
  432. " addps %%xmm6, %%xmm5\n"
  433. " addps %%xmm7, %%xmm6\n"
  434. " addps %%xmm0, %%xmm7\n"
  435. //26:
  436. " addps %%xmm1, %%xmm0\n"
  437. " addps %%xmm2, %%xmm1\n"
  438. " addps %%xmm3, %%xmm2\n"
  439. " addps %%xmm4, %%xmm3\n"
  440. " addps %%xmm5, %%xmm4\n"
  441. " addps %%xmm6, %%xmm5\n"
  442. " addps %%xmm7, %%xmm6\n"
  443. " addps %%xmm0, %%xmm7\n"
  444. //27:
  445. " addps %%xmm1, %%xmm0\n"
  446. " addps %%xmm2, %%xmm1\n"
  447. " addps %%xmm3, %%xmm2\n"
  448. " addps %%xmm4, %%xmm3\n"
  449. " addps %%xmm5, %%xmm4\n"
  450. " addps %%xmm6, %%xmm5\n"
  451. " addps %%xmm7, %%xmm6\n"
  452. " addps %%xmm0, %%xmm7\n"
  453. //28:
  454. " addps %%xmm1, %%xmm0\n"
  455. " addps %%xmm2, %%xmm1\n"
  456. " addps %%xmm3, %%xmm2\n"
  457. " addps %%xmm4, %%xmm3\n"
  458. " addps %%xmm5, %%xmm4\n"
  459. " addps %%xmm6, %%xmm5\n"
  460. " addps %%xmm7, %%xmm6\n"
  461. " addps %%xmm0, %%xmm7\n"
  462. //29:
  463. " addps %%xmm1, %%xmm0\n"
  464. " addps %%xmm2, %%xmm1\n"
  465. " addps %%xmm3, %%xmm2\n"
  466. " addps %%xmm4, %%xmm3\n"
  467. " addps %%xmm5, %%xmm4\n"
  468. " addps %%xmm6, %%xmm5\n"
  469. " addps %%xmm7, %%xmm6\n"
  470. " addps %%xmm0, %%xmm7\n"
  471. //30:
  472. " addps %%xmm1, %%xmm0\n"
  473. " addps %%xmm2, %%xmm1\n"
  474. " addps %%xmm3, %%xmm2\n"
  475. " addps %%xmm4, %%xmm3\n"
  476. " addps %%xmm5, %%xmm4\n"
  477. " addps %%xmm6, %%xmm5\n"
  478. " addps %%xmm7, %%xmm6\n"
  479. " addps %%xmm0, %%xmm7\n"
  480. //31:
  481. " addps %%xmm1, %%xmm0\n"
  482. " addps %%xmm2, %%xmm1\n"
  483. " addps %%xmm3, %%xmm2\n"
  484. " addps %%xmm4, %%xmm3\n"
  485. " addps %%xmm5, %%xmm4\n"
  486. " addps %%xmm6, %%xmm5\n"
  487. " addps %%xmm7, %%xmm6\n"
  488. " addps %%xmm0, %%xmm7\n"
  489. " dec %%eax\n"
  490. " jnz .bsLoop\n"
  491. ::"a"(cycles)
  492. );
  493. #else
  494. # ifdef COMPILER_MICROSOFT
  495. __asm {
  496. mov eax, cycles
  497. xorps xmm0, xmm0
  498. xorps xmm1, xmm1
  499. xorps xmm2, xmm2
  500. xorps xmm3, xmm3
  501. xorps xmm4, xmm4
  502. xorps xmm5, xmm5
  503. xorps xmm6, xmm6
  504. xorps xmm7, xmm7
  505. //--
  506. align 16
  507. bsLoop:
  508. // 0:
  509. addps xmm0, xmm1
  510. addps xmm1, xmm2
  511. addps xmm2, xmm3
  512. addps xmm3, xmm4
  513. addps xmm4, xmm5
  514. addps xmm5, xmm6
  515. addps xmm6, xmm7
  516. addps xmm7, xmm0
  517. // 1:
  518. addps xmm0, xmm1
  519. addps xmm1, xmm2
  520. addps xmm2, xmm3
  521. addps xmm3, xmm4
  522. addps xmm4, xmm5
  523. addps xmm5, xmm6
  524. addps xmm6, xmm7
  525. addps xmm7, xmm0
  526. // 2:
  527. addps xmm0, xmm1
  528. addps xmm1, xmm2
  529. addps xmm2, xmm3
  530. addps xmm3, xmm4
  531. addps xmm4, xmm5
  532. addps xmm5, xmm6
  533. addps xmm6, xmm7
  534. addps xmm7, xmm0
  535. // 3:
  536. addps xmm0, xmm1
  537. addps xmm1, xmm2
  538. addps xmm2, xmm3
  539. addps xmm3, xmm4
  540. addps xmm4, xmm5
  541. addps xmm5, xmm6
  542. addps xmm6, xmm7
  543. addps xmm7, xmm0
  544. // 4:
  545. addps xmm0, xmm1
  546. addps xmm1, xmm2
  547. addps xmm2, xmm3
  548. addps xmm3, xmm4
  549. addps xmm4, xmm5
  550. addps xmm5, xmm6
  551. addps xmm6, xmm7
  552. addps xmm7, xmm0
  553. // 5:
  554. addps xmm0, xmm1
  555. addps xmm1, xmm2
  556. addps xmm2, xmm3
  557. addps xmm3, xmm4
  558. addps xmm4, xmm5
  559. addps xmm5, xmm6
  560. addps xmm6, xmm7
  561. addps xmm7, xmm0
  562. // 6:
  563. addps xmm0, xmm1
  564. addps xmm1, xmm2
  565. addps xmm2, xmm3
  566. addps xmm3, xmm4
  567. addps xmm4, xmm5
  568. addps xmm5, xmm6
  569. addps xmm6, xmm7
  570. addps xmm7, xmm0
  571. // 7:
  572. addps xmm0, xmm1
  573. addps xmm1, xmm2
  574. addps xmm2, xmm3
  575. addps xmm3, xmm4
  576. addps xmm4, xmm5
  577. addps xmm5, xmm6
  578. addps xmm6, xmm7
  579. addps xmm7, xmm0
  580. // 8:
  581. addps xmm0, xmm1
  582. addps xmm1, xmm2
  583. addps xmm2, xmm3
  584. addps xmm3, xmm4
  585. addps xmm4, xmm5
  586. addps xmm5, xmm6
  587. addps xmm6, xmm7
  588. addps xmm7, xmm0
  589. // 9:
  590. addps xmm0, xmm1
  591. addps xmm1, xmm2
  592. addps xmm2, xmm3
  593. addps xmm3, xmm4
  594. addps xmm4, xmm5
  595. addps xmm5, xmm6
  596. addps xmm6, xmm7
  597. addps xmm7, xmm0
  598. // 10:
  599. addps xmm0, xmm1
  600. addps xmm1, xmm2
  601. addps xmm2, xmm3
  602. addps xmm3, xmm4
  603. addps xmm4, xmm5
  604. addps xmm5, xmm6
  605. addps xmm6, xmm7
  606. addps xmm7, xmm0
  607. // 11:
  608. addps xmm0, xmm1
  609. addps xmm1, xmm2
  610. addps xmm2, xmm3
  611. addps xmm3, xmm4
  612. addps xmm4, xmm5
  613. addps xmm5, xmm6
  614. addps xmm6, xmm7
  615. addps xmm7, xmm0
  616. // 12:
  617. addps xmm0, xmm1
  618. addps xmm1, xmm2
  619. addps xmm2, xmm3
  620. addps xmm3, xmm4
  621. addps xmm4, xmm5
  622. addps xmm5, xmm6
  623. addps xmm6, xmm7
  624. addps xmm7, xmm0
  625. // 13:
  626. addps xmm0, xmm1
  627. addps xmm1, xmm2
  628. addps xmm2, xmm3
  629. addps xmm3, xmm4
  630. addps xmm4, xmm5
  631. addps xmm5, xmm6
  632. addps xmm6, xmm7
  633. addps xmm7, xmm0
  634. // 14:
  635. addps xmm0, xmm1
  636. addps xmm1, xmm2
  637. addps xmm2, xmm3
  638. addps xmm3, xmm4
  639. addps xmm4, xmm5
  640. addps xmm5, xmm6
  641. addps xmm6, xmm7
  642. addps xmm7, xmm0
  643. // 15:
  644. addps xmm0, xmm1
  645. addps xmm1, xmm2
  646. addps xmm2, xmm3
  647. addps xmm3, xmm4
  648. addps xmm4, xmm5
  649. addps xmm5, xmm6
  650. addps xmm6, xmm7
  651. addps xmm7, xmm0
  652. // 16:
  653. addps xmm0, xmm1
  654. addps xmm1, xmm2
  655. addps xmm2, xmm3
  656. addps xmm3, xmm4
  657. addps xmm4, xmm5
  658. addps xmm5, xmm6
  659. addps xmm6, xmm7
  660. addps xmm7, xmm0
  661. // 17:
  662. addps xmm0, xmm1
  663. addps xmm1, xmm2
  664. addps xmm2, xmm3
  665. addps xmm3, xmm4
  666. addps xmm4, xmm5
  667. addps xmm5, xmm6
  668. addps xmm6, xmm7
  669. addps xmm7, xmm0
  670. // 18:
  671. addps xmm0, xmm1
  672. addps xmm1, xmm2
  673. addps xmm2, xmm3
  674. addps xmm3, xmm4
  675. addps xmm4, xmm5
  676. addps xmm5, xmm6
  677. addps xmm6, xmm7
  678. addps xmm7, xmm0
  679. // 19:
  680. addps xmm0, xmm1
  681. addps xmm1, xmm2
  682. addps xmm2, xmm3
  683. addps xmm3, xmm4
  684. addps xmm4, xmm5
  685. addps xmm5, xmm6
  686. addps xmm6, xmm7
  687. addps xmm7, xmm0
  688. // 20:
  689. addps xmm0, xmm1
  690. addps xmm1, xmm2
  691. addps xmm2, xmm3
  692. addps xmm3, xmm4
  693. addps xmm4, xmm5
  694. addps xmm5, xmm6
  695. addps xmm6, xmm7
  696. addps xmm7, xmm0
  697. // 21:
  698. addps xmm0, xmm1
  699. addps xmm1, xmm2
  700. addps xmm2, xmm3
  701. addps xmm3, xmm4
  702. addps xmm4, xmm5
  703. addps xmm5, xmm6
  704. addps xmm6, xmm7
  705. addps xmm7, xmm0
  706. // 22:
  707. addps xmm0, xmm1
  708. addps xmm1, xmm2
  709. addps xmm2, xmm3
  710. addps xmm3, xmm4
  711. addps xmm4, xmm5
  712. addps xmm5, xmm6
  713. addps xmm6, xmm7
  714. addps xmm7, xmm0
  715. // 23:
  716. addps xmm0, xmm1
  717. addps xmm1, xmm2
  718. addps xmm2, xmm3
  719. addps xmm3, xmm4
  720. addps xmm4, xmm5
  721. addps xmm5, xmm6
  722. addps xmm6, xmm7
  723. addps xmm7, xmm0
  724. // 24:
  725. addps xmm0, xmm1
  726. addps xmm1, xmm2
  727. addps xmm2, xmm3
  728. addps xmm3, xmm4
  729. addps xmm4, xmm5
  730. addps xmm5, xmm6
  731. addps xmm6, xmm7
  732. addps xmm7, xmm0
  733. // 25:
  734. addps xmm0, xmm1
  735. addps xmm1, xmm2
  736. addps xmm2, xmm3
  737. addps xmm3, xmm4
  738. addps xmm4, xmm5
  739. addps xmm5, xmm6
  740. addps xmm6, xmm7
  741. addps xmm7, xmm0
  742. // 26:
  743. addps xmm0, xmm1
  744. addps xmm1, xmm2
  745. addps xmm2, xmm3
  746. addps xmm3, xmm4
  747. addps xmm4, xmm5
  748. addps xmm5, xmm6
  749. addps xmm6, xmm7
  750. addps xmm7, xmm0
  751. // 27:
  752. addps xmm0, xmm1
  753. addps xmm1, xmm2
  754. addps xmm2, xmm3
  755. addps xmm3, xmm4
  756. addps xmm4, xmm5
  757. addps xmm5, xmm6
  758. addps xmm6, xmm7
  759. addps xmm7, xmm0
  760. // 28:
  761. addps xmm0, xmm1
  762. addps xmm1, xmm2
  763. addps xmm2, xmm3
  764. addps xmm3, xmm4
  765. addps xmm4, xmm5
  766. addps xmm5, xmm6
  767. addps xmm6, xmm7
  768. addps xmm7, xmm0
  769. // 29:
  770. addps xmm0, xmm1
  771. addps xmm1, xmm2
  772. addps xmm2, xmm3
  773. addps xmm3, xmm4
  774. addps xmm4, xmm5
  775. addps xmm5, xmm6
  776. addps xmm6, xmm7
  777. addps xmm7, xmm0
  778. // 30:
  779. addps xmm0, xmm1
  780. addps xmm1, xmm2
  781. addps xmm2, xmm3
  782. addps xmm3, xmm4
  783. addps xmm4, xmm5
  784. addps xmm5, xmm6
  785. addps xmm6, xmm7
  786. addps xmm7, xmm0
  787. // 31:
  788. addps xmm0, xmm1
  789. addps xmm1, xmm2
  790. addps xmm2, xmm3
  791. addps xmm3, xmm4
  792. addps xmm4, xmm5
  793. addps xmm5, xmm6
  794. addps xmm6, xmm7
  795. addps xmm7, xmm0
  796. //----------------------
  797. dec eax
  798. jnz bsLoop
  799. }
  800. # else
  801. # error "Unsupported compiler"
  802. # endif /* COMPILER_MICROSOFT */
  803. #endif /* COMPILER_GCC */
  804. }
  805. #endif /* INLINE_ASSEMBLY_SUPPORTED */