| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823 |
- /*
- * Copyright 2008 Veselin Georgiev,
- * anrieffNOSPAM @ mgail_DOT.com (convert to gmail)
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
- #include "libcpuid.h"
- #include "asm-bits.h"
- int cpuid_exists_by_eflags(void)
- {
- #if defined(PLATFORM_X64)
- return 1; /* CPUID is always present on the x86_64 */
- #elif defined(PLATFORM_X86)
- # if defined(COMPILER_GCC)
- int result;
- __asm __volatile(
- " pushfl\n"
- " pop %%eax\n"
- " mov %%eax, %%ecx\n"
- " xor $0x200000, %%eax\n"
- " push %%eax\n"
- " popfl\n"
- " pushfl\n"
- " pop %%eax\n"
- " xor %%ecx, %%eax\n"
- " mov %%eax, %0\n"
- " push %%ecx\n"
- " popfl\n"
- : "=m"(result)
- : :"eax", "ecx", "memory");
- return (result != 0);
- # elif defined(COMPILER_MICROSOFT)
- int result;
- __asm {
- pushfd
- pop eax
- mov ecx, eax
- xor eax, 0x200000
- push eax
- popfd
- pushfd
- pop eax
- xor eax, ecx
- mov result, eax
- push ecx
- popfd
- };
- return (result != 0);
- # else
- return 0;
- # endif /* COMPILER_MICROSOFT */
- #else
- return 0;
- #endif /* PLATFORM_X86 */
- }
- #ifdef INLINE_ASM_SUPPORTED
- /*
- * with MSVC/AMD64, the exec_cpuid() and cpu_rdtsc() functions
- * are implemented in separate .asm files. Otherwise, use inline assembly
- */
- void exec_cpuid(uint32_t *regs)
- {
- #ifdef COMPILER_GCC
- # ifdef PLATFORM_X64
- __asm __volatile(
- " mov %0, %%rdi\n"
- " push %%rbx\n"
- " push %%rcx\n"
- " push %%rdx\n"
-
- " mov (%%rdi), %%eax\n"
- " mov 4(%%rdi), %%ebx\n"
- " mov 8(%%rdi), %%ecx\n"
- " mov 12(%%rdi), %%edx\n"
-
- " cpuid\n"
-
- " movl %%eax, (%%rdi)\n"
- " movl %%ebx, 4(%%rdi)\n"
- " movl %%ecx, 8(%%rdi)\n"
- " movl %%edx, 12(%%rdi)\n"
- " pop %%rdx\n"
- " pop %%rcx\n"
- " pop %%rbx\n"
- :
- :"m"(regs)
- :"memory", "eax", "rdi"
- );
- # else
- __asm __volatile(
- " mov %0, %%edi\n"
- " push %%ebx\n"
- " push %%ecx\n"
- " push %%edx\n"
-
- " mov (%%edi), %%eax\n"
- " mov 4(%%edi), %%ebx\n"
- " mov 8(%%edi), %%ecx\n"
- " mov 12(%%edi), %%edx\n"
-
- " cpuid\n"
-
- " mov %%eax, (%%edi)\n"
- " mov %%ebx, 4(%%edi)\n"
- " mov %%ecx, 8(%%edi)\n"
- " mov %%edx, 12(%%edi)\n"
- " pop %%edx\n"
- " pop %%ecx\n"
- " pop %%ebx\n"
- :
- :"m"(regs)
- :"memory", "eax", "edi"
- );
- # endif /* COMPILER_GCC */
- #else
- # ifdef COMPILER_MICROSOFT
- __asm {
- push ebx
- push ecx
- push edx
- push edi
- mov edi, regs
-
- mov eax, [edi]
- mov ebx, [edi+4]
- mov ecx, [edi+8]
- mov edx, [edi+12]
-
- cpuid
-
- mov [edi], eax
- mov [edi+4], ebx
- mov [edi+8], ecx
- mov [edi+12], edx
-
- pop edi
- pop edx
- pop ecx
- pop ebx
- }
- # else
- # error "Unsupported compiler"
- # endif /* COMPILER_MICROSOFT */
- #endif
- }
- #endif /* INLINE_ASSEMBLY_SUPPORTED */
- #ifdef INLINE_ASM_SUPPORTED
- void cpu_rdtsc(uint64_t* result)
- {
- uint32_t low_part, hi_part;
- #ifdef COMPILER_GCC
- __asm __volatile (
- " rdtsc\n"
- " mov %%eax, %0\n"
- " mov %%edx, %1\n"
- :"=m"(low_part), "=m"(hi_part)::"memory", "eax", "edx"
- );
- #else
- # ifdef COMPILER_MICROSOFT
- __asm {
- rdtsc
- mov low_part, eax
- mov hi_part, edx
- };
- # else
- # error "Unsupported compiler"
- # endif /* COMPILER_MICROSOFT */
- #endif /* COMPILER_GCC */
- *result = (uint64_t)low_part + (((uint64_t) hi_part) << 32);
- }
- #endif /* INLINE_ASM_SUPPORTED */
- #ifdef INLINE_ASM_SUPPORTED
- void busy_sse_loop(int cycles)
- {
- #ifdef COMPILER_GCC
- #ifndef __APPLE__
- # define XALIGN ".balign 16\n"
- #else
- # define XALIGN ".align 4\n"
- #endif
- __asm __volatile (
- " xorps %%xmm0, %%xmm0\n"
- " xorps %%xmm1, %%xmm1\n"
- " xorps %%xmm2, %%xmm2\n"
- " xorps %%xmm3, %%xmm3\n"
- " xorps %%xmm4, %%xmm4\n"
- " xorps %%xmm5, %%xmm5\n"
- " xorps %%xmm6, %%xmm6\n"
- " xorps %%xmm7, %%xmm7\n"
- XALIGN
- ".bsLoop:\n"
- // 0:
- " addps %%xmm1, %%xmm0\n"
- " addps %%xmm2, %%xmm1\n"
- " addps %%xmm3, %%xmm2\n"
- " addps %%xmm4, %%xmm3\n"
- " addps %%xmm5, %%xmm4\n"
- " addps %%xmm6, %%xmm5\n"
- " addps %%xmm7, %%xmm6\n"
- " addps %%xmm0, %%xmm7\n"
- // 1:
- " addps %%xmm1, %%xmm0\n"
- " addps %%xmm2, %%xmm1\n"
- " addps %%xmm3, %%xmm2\n"
- " addps %%xmm4, %%xmm3\n"
- " addps %%xmm5, %%xmm4\n"
- " addps %%xmm6, %%xmm5\n"
- " addps %%xmm7, %%xmm6\n"
- " addps %%xmm0, %%xmm7\n"
- // 2:
- " addps %%xmm1, %%xmm0\n"
- " addps %%xmm2, %%xmm1\n"
- " addps %%xmm3, %%xmm2\n"
- " addps %%xmm4, %%xmm3\n"
- " addps %%xmm5, %%xmm4\n"
- " addps %%xmm6, %%xmm5\n"
- " addps %%xmm7, %%xmm6\n"
- " addps %%xmm0, %%xmm7\n"
- // 3:
- " addps %%xmm1, %%xmm0\n"
- " addps %%xmm2, %%xmm1\n"
- " addps %%xmm3, %%xmm2\n"
- " addps %%xmm4, %%xmm3\n"
- " addps %%xmm5, %%xmm4\n"
- " addps %%xmm6, %%xmm5\n"
- " addps %%xmm7, %%xmm6\n"
- " addps %%xmm0, %%xmm7\n"
- // 4:
- " addps %%xmm1, %%xmm0\n"
- " addps %%xmm2, %%xmm1\n"
- " addps %%xmm3, %%xmm2\n"
- " addps %%xmm4, %%xmm3\n"
- " addps %%xmm5, %%xmm4\n"
- " addps %%xmm6, %%xmm5\n"
- " addps %%xmm7, %%xmm6\n"
- " addps %%xmm0, %%xmm7\n"
- // 5:
- " addps %%xmm1, %%xmm0\n"
- " addps %%xmm2, %%xmm1\n"
- " addps %%xmm3, %%xmm2\n"
- " addps %%xmm4, %%xmm3\n"
- " addps %%xmm5, %%xmm4\n"
- " addps %%xmm6, %%xmm5\n"
- " addps %%xmm7, %%xmm6\n"
- " addps %%xmm0, %%xmm7\n"
- // 6:
- " addps %%xmm1, %%xmm0\n"
- " addps %%xmm2, %%xmm1\n"
- " addps %%xmm3, %%xmm2\n"
- " addps %%xmm4, %%xmm3\n"
- " addps %%xmm5, %%xmm4\n"
- " addps %%xmm6, %%xmm5\n"
- " addps %%xmm7, %%xmm6\n"
- " addps %%xmm0, %%xmm7\n"
- // 7:
- " addps %%xmm1, %%xmm0\n"
- " addps %%xmm2, %%xmm1\n"
- " addps %%xmm3, %%xmm2\n"
- " addps %%xmm4, %%xmm3\n"
- " addps %%xmm5, %%xmm4\n"
- " addps %%xmm6, %%xmm5\n"
- " addps %%xmm7, %%xmm6\n"
- " addps %%xmm0, %%xmm7\n"
- // 8:
- " addps %%xmm1, %%xmm0\n"
- " addps %%xmm2, %%xmm1\n"
- " addps %%xmm3, %%xmm2\n"
- " addps %%xmm4, %%xmm3\n"
- " addps %%xmm5, %%xmm4\n"
- " addps %%xmm6, %%xmm5\n"
- " addps %%xmm7, %%xmm6\n"
- " addps %%xmm0, %%xmm7\n"
- // 9:
- " addps %%xmm1, %%xmm0\n"
- " addps %%xmm2, %%xmm1\n"
- " addps %%xmm3, %%xmm2\n"
- " addps %%xmm4, %%xmm3\n"
- " addps %%xmm5, %%xmm4\n"
- " addps %%xmm6, %%xmm5\n"
- " addps %%xmm7, %%xmm6\n"
- " addps %%xmm0, %%xmm7\n"
- //10:
- " addps %%xmm1, %%xmm0\n"
- " addps %%xmm2, %%xmm1\n"
- " addps %%xmm3, %%xmm2\n"
- " addps %%xmm4, %%xmm3\n"
- " addps %%xmm5, %%xmm4\n"
- " addps %%xmm6, %%xmm5\n"
- " addps %%xmm7, %%xmm6\n"
- " addps %%xmm0, %%xmm7\n"
- //11:
- " addps %%xmm1, %%xmm0\n"
- " addps %%xmm2, %%xmm1\n"
- " addps %%xmm3, %%xmm2\n"
- " addps %%xmm4, %%xmm3\n"
- " addps %%xmm5, %%xmm4\n"
- " addps %%xmm6, %%xmm5\n"
- " addps %%xmm7, %%xmm6\n"
- " addps %%xmm0, %%xmm7\n"
- //12:
- " addps %%xmm1, %%xmm0\n"
- " addps %%xmm2, %%xmm1\n"
- " addps %%xmm3, %%xmm2\n"
- " addps %%xmm4, %%xmm3\n"
- " addps %%xmm5, %%xmm4\n"
- " addps %%xmm6, %%xmm5\n"
- " addps %%xmm7, %%xmm6\n"
- " addps %%xmm0, %%xmm7\n"
- //13:
- " addps %%xmm1, %%xmm0\n"
- " addps %%xmm2, %%xmm1\n"
- " addps %%xmm3, %%xmm2\n"
- " addps %%xmm4, %%xmm3\n"
- " addps %%xmm5, %%xmm4\n"
- " addps %%xmm6, %%xmm5\n"
- " addps %%xmm7, %%xmm6\n"
- " addps %%xmm0, %%xmm7\n"
- //14:
- " addps %%xmm1, %%xmm0\n"
- " addps %%xmm2, %%xmm1\n"
- " addps %%xmm3, %%xmm2\n"
- " addps %%xmm4, %%xmm3\n"
- " addps %%xmm5, %%xmm4\n"
- " addps %%xmm6, %%xmm5\n"
- " addps %%xmm7, %%xmm6\n"
- " addps %%xmm0, %%xmm7\n"
- //15:
- " addps %%xmm1, %%xmm0\n"
- " addps %%xmm2, %%xmm1\n"
- " addps %%xmm3, %%xmm2\n"
- " addps %%xmm4, %%xmm3\n"
- " addps %%xmm5, %%xmm4\n"
- " addps %%xmm6, %%xmm5\n"
- " addps %%xmm7, %%xmm6\n"
- " addps %%xmm0, %%xmm7\n"
- //16:
- " addps %%xmm1, %%xmm0\n"
- " addps %%xmm2, %%xmm1\n"
- " addps %%xmm3, %%xmm2\n"
- " addps %%xmm4, %%xmm3\n"
- " addps %%xmm5, %%xmm4\n"
- " addps %%xmm6, %%xmm5\n"
- " addps %%xmm7, %%xmm6\n"
- " addps %%xmm0, %%xmm7\n"
- //17:
- " addps %%xmm1, %%xmm0\n"
- " addps %%xmm2, %%xmm1\n"
- " addps %%xmm3, %%xmm2\n"
- " addps %%xmm4, %%xmm3\n"
- " addps %%xmm5, %%xmm4\n"
- " addps %%xmm6, %%xmm5\n"
- " addps %%xmm7, %%xmm6\n"
- " addps %%xmm0, %%xmm7\n"
- //18:
- " addps %%xmm1, %%xmm0\n"
- " addps %%xmm2, %%xmm1\n"
- " addps %%xmm3, %%xmm2\n"
- " addps %%xmm4, %%xmm3\n"
- " addps %%xmm5, %%xmm4\n"
- " addps %%xmm6, %%xmm5\n"
- " addps %%xmm7, %%xmm6\n"
- " addps %%xmm0, %%xmm7\n"
- //19:
- " addps %%xmm1, %%xmm0\n"
- " addps %%xmm2, %%xmm1\n"
- " addps %%xmm3, %%xmm2\n"
- " addps %%xmm4, %%xmm3\n"
- " addps %%xmm5, %%xmm4\n"
- " addps %%xmm6, %%xmm5\n"
- " addps %%xmm7, %%xmm6\n"
- " addps %%xmm0, %%xmm7\n"
- //20:
- " addps %%xmm1, %%xmm0\n"
- " addps %%xmm2, %%xmm1\n"
- " addps %%xmm3, %%xmm2\n"
- " addps %%xmm4, %%xmm3\n"
- " addps %%xmm5, %%xmm4\n"
- " addps %%xmm6, %%xmm5\n"
- " addps %%xmm7, %%xmm6\n"
- " addps %%xmm0, %%xmm7\n"
- //21:
- " addps %%xmm1, %%xmm0\n"
- " addps %%xmm2, %%xmm1\n"
- " addps %%xmm3, %%xmm2\n"
- " addps %%xmm4, %%xmm3\n"
- " addps %%xmm5, %%xmm4\n"
- " addps %%xmm6, %%xmm5\n"
- " addps %%xmm7, %%xmm6\n"
- " addps %%xmm0, %%xmm7\n"
- //22:
- " addps %%xmm1, %%xmm0\n"
- " addps %%xmm2, %%xmm1\n"
- " addps %%xmm3, %%xmm2\n"
- " addps %%xmm4, %%xmm3\n"
- " addps %%xmm5, %%xmm4\n"
- " addps %%xmm6, %%xmm5\n"
- " addps %%xmm7, %%xmm6\n"
- " addps %%xmm0, %%xmm7\n"
- //23:
- " addps %%xmm1, %%xmm0\n"
- " addps %%xmm2, %%xmm1\n"
- " addps %%xmm3, %%xmm2\n"
- " addps %%xmm4, %%xmm3\n"
- " addps %%xmm5, %%xmm4\n"
- " addps %%xmm6, %%xmm5\n"
- " addps %%xmm7, %%xmm6\n"
- " addps %%xmm0, %%xmm7\n"
- //24:
- " addps %%xmm1, %%xmm0\n"
- " addps %%xmm2, %%xmm1\n"
- " addps %%xmm3, %%xmm2\n"
- " addps %%xmm4, %%xmm3\n"
- " addps %%xmm5, %%xmm4\n"
- " addps %%xmm6, %%xmm5\n"
- " addps %%xmm7, %%xmm6\n"
- " addps %%xmm0, %%xmm7\n"
- //25:
- " addps %%xmm1, %%xmm0\n"
- " addps %%xmm2, %%xmm1\n"
- " addps %%xmm3, %%xmm2\n"
- " addps %%xmm4, %%xmm3\n"
- " addps %%xmm5, %%xmm4\n"
- " addps %%xmm6, %%xmm5\n"
- " addps %%xmm7, %%xmm6\n"
- " addps %%xmm0, %%xmm7\n"
- //26:
- " addps %%xmm1, %%xmm0\n"
- " addps %%xmm2, %%xmm1\n"
- " addps %%xmm3, %%xmm2\n"
- " addps %%xmm4, %%xmm3\n"
- " addps %%xmm5, %%xmm4\n"
- " addps %%xmm6, %%xmm5\n"
- " addps %%xmm7, %%xmm6\n"
- " addps %%xmm0, %%xmm7\n"
- //27:
- " addps %%xmm1, %%xmm0\n"
- " addps %%xmm2, %%xmm1\n"
- " addps %%xmm3, %%xmm2\n"
- " addps %%xmm4, %%xmm3\n"
- " addps %%xmm5, %%xmm4\n"
- " addps %%xmm6, %%xmm5\n"
- " addps %%xmm7, %%xmm6\n"
- " addps %%xmm0, %%xmm7\n"
- //28:
- " addps %%xmm1, %%xmm0\n"
- " addps %%xmm2, %%xmm1\n"
- " addps %%xmm3, %%xmm2\n"
- " addps %%xmm4, %%xmm3\n"
- " addps %%xmm5, %%xmm4\n"
- " addps %%xmm6, %%xmm5\n"
- " addps %%xmm7, %%xmm6\n"
- " addps %%xmm0, %%xmm7\n"
- //29:
- " addps %%xmm1, %%xmm0\n"
- " addps %%xmm2, %%xmm1\n"
- " addps %%xmm3, %%xmm2\n"
- " addps %%xmm4, %%xmm3\n"
- " addps %%xmm5, %%xmm4\n"
- " addps %%xmm6, %%xmm5\n"
- " addps %%xmm7, %%xmm6\n"
- " addps %%xmm0, %%xmm7\n"
- //30:
- " addps %%xmm1, %%xmm0\n"
- " addps %%xmm2, %%xmm1\n"
- " addps %%xmm3, %%xmm2\n"
- " addps %%xmm4, %%xmm3\n"
- " addps %%xmm5, %%xmm4\n"
- " addps %%xmm6, %%xmm5\n"
- " addps %%xmm7, %%xmm6\n"
- " addps %%xmm0, %%xmm7\n"
- //31:
- " addps %%xmm1, %%xmm0\n"
- " addps %%xmm2, %%xmm1\n"
- " addps %%xmm3, %%xmm2\n"
- " addps %%xmm4, %%xmm3\n"
- " addps %%xmm5, %%xmm4\n"
- " addps %%xmm6, %%xmm5\n"
- " addps %%xmm7, %%xmm6\n"
- " addps %%xmm0, %%xmm7\n"
-
- " dec %%eax\n"
- " jnz .bsLoop\n"
- ::"a"(cycles)
- );
- #else
- # ifdef COMPILER_MICROSOFT
- __asm {
- mov eax, cycles
- xorps xmm0, xmm0
- xorps xmm1, xmm1
- xorps xmm2, xmm2
- xorps xmm3, xmm3
- xorps xmm4, xmm4
- xorps xmm5, xmm5
- xorps xmm6, xmm6
- xorps xmm7, xmm7
- //--
- align 16
- bsLoop:
- // 0:
- addps xmm0, xmm1
- addps xmm1, xmm2
- addps xmm2, xmm3
- addps xmm3, xmm4
- addps xmm4, xmm5
- addps xmm5, xmm6
- addps xmm6, xmm7
- addps xmm7, xmm0
- // 1:
- addps xmm0, xmm1
- addps xmm1, xmm2
- addps xmm2, xmm3
- addps xmm3, xmm4
- addps xmm4, xmm5
- addps xmm5, xmm6
- addps xmm6, xmm7
- addps xmm7, xmm0
- // 2:
- addps xmm0, xmm1
- addps xmm1, xmm2
- addps xmm2, xmm3
- addps xmm3, xmm4
- addps xmm4, xmm5
- addps xmm5, xmm6
- addps xmm6, xmm7
- addps xmm7, xmm0
- // 3:
- addps xmm0, xmm1
- addps xmm1, xmm2
- addps xmm2, xmm3
- addps xmm3, xmm4
- addps xmm4, xmm5
- addps xmm5, xmm6
- addps xmm6, xmm7
- addps xmm7, xmm0
- // 4:
- addps xmm0, xmm1
- addps xmm1, xmm2
- addps xmm2, xmm3
- addps xmm3, xmm4
- addps xmm4, xmm5
- addps xmm5, xmm6
- addps xmm6, xmm7
- addps xmm7, xmm0
- // 5:
- addps xmm0, xmm1
- addps xmm1, xmm2
- addps xmm2, xmm3
- addps xmm3, xmm4
- addps xmm4, xmm5
- addps xmm5, xmm6
- addps xmm6, xmm7
- addps xmm7, xmm0
- // 6:
- addps xmm0, xmm1
- addps xmm1, xmm2
- addps xmm2, xmm3
- addps xmm3, xmm4
- addps xmm4, xmm5
- addps xmm5, xmm6
- addps xmm6, xmm7
- addps xmm7, xmm0
- // 7:
- addps xmm0, xmm1
- addps xmm1, xmm2
- addps xmm2, xmm3
- addps xmm3, xmm4
- addps xmm4, xmm5
- addps xmm5, xmm6
- addps xmm6, xmm7
- addps xmm7, xmm0
- // 8:
- addps xmm0, xmm1
- addps xmm1, xmm2
- addps xmm2, xmm3
- addps xmm3, xmm4
- addps xmm4, xmm5
- addps xmm5, xmm6
- addps xmm6, xmm7
- addps xmm7, xmm0
- // 9:
- addps xmm0, xmm1
- addps xmm1, xmm2
- addps xmm2, xmm3
- addps xmm3, xmm4
- addps xmm4, xmm5
- addps xmm5, xmm6
- addps xmm6, xmm7
- addps xmm7, xmm0
- // 10:
- addps xmm0, xmm1
- addps xmm1, xmm2
- addps xmm2, xmm3
- addps xmm3, xmm4
- addps xmm4, xmm5
- addps xmm5, xmm6
- addps xmm6, xmm7
- addps xmm7, xmm0
- // 11:
- addps xmm0, xmm1
- addps xmm1, xmm2
- addps xmm2, xmm3
- addps xmm3, xmm4
- addps xmm4, xmm5
- addps xmm5, xmm6
- addps xmm6, xmm7
- addps xmm7, xmm0
- // 12:
- addps xmm0, xmm1
- addps xmm1, xmm2
- addps xmm2, xmm3
- addps xmm3, xmm4
- addps xmm4, xmm5
- addps xmm5, xmm6
- addps xmm6, xmm7
- addps xmm7, xmm0
- // 13:
- addps xmm0, xmm1
- addps xmm1, xmm2
- addps xmm2, xmm3
- addps xmm3, xmm4
- addps xmm4, xmm5
- addps xmm5, xmm6
- addps xmm6, xmm7
- addps xmm7, xmm0
- // 14:
- addps xmm0, xmm1
- addps xmm1, xmm2
- addps xmm2, xmm3
- addps xmm3, xmm4
- addps xmm4, xmm5
- addps xmm5, xmm6
- addps xmm6, xmm7
- addps xmm7, xmm0
- // 15:
- addps xmm0, xmm1
- addps xmm1, xmm2
- addps xmm2, xmm3
- addps xmm3, xmm4
- addps xmm4, xmm5
- addps xmm5, xmm6
- addps xmm6, xmm7
- addps xmm7, xmm0
- // 16:
- addps xmm0, xmm1
- addps xmm1, xmm2
- addps xmm2, xmm3
- addps xmm3, xmm4
- addps xmm4, xmm5
- addps xmm5, xmm6
- addps xmm6, xmm7
- addps xmm7, xmm0
- // 17:
- addps xmm0, xmm1
- addps xmm1, xmm2
- addps xmm2, xmm3
- addps xmm3, xmm4
- addps xmm4, xmm5
- addps xmm5, xmm6
- addps xmm6, xmm7
- addps xmm7, xmm0
- // 18:
- addps xmm0, xmm1
- addps xmm1, xmm2
- addps xmm2, xmm3
- addps xmm3, xmm4
- addps xmm4, xmm5
- addps xmm5, xmm6
- addps xmm6, xmm7
- addps xmm7, xmm0
- // 19:
- addps xmm0, xmm1
- addps xmm1, xmm2
- addps xmm2, xmm3
- addps xmm3, xmm4
- addps xmm4, xmm5
- addps xmm5, xmm6
- addps xmm6, xmm7
- addps xmm7, xmm0
- // 20:
- addps xmm0, xmm1
- addps xmm1, xmm2
- addps xmm2, xmm3
- addps xmm3, xmm4
- addps xmm4, xmm5
- addps xmm5, xmm6
- addps xmm6, xmm7
- addps xmm7, xmm0
- // 21:
- addps xmm0, xmm1
- addps xmm1, xmm2
- addps xmm2, xmm3
- addps xmm3, xmm4
- addps xmm4, xmm5
- addps xmm5, xmm6
- addps xmm6, xmm7
- addps xmm7, xmm0
- // 22:
- addps xmm0, xmm1
- addps xmm1, xmm2
- addps xmm2, xmm3
- addps xmm3, xmm4
- addps xmm4, xmm5
- addps xmm5, xmm6
- addps xmm6, xmm7
- addps xmm7, xmm0
- // 23:
- addps xmm0, xmm1
- addps xmm1, xmm2
- addps xmm2, xmm3
- addps xmm3, xmm4
- addps xmm4, xmm5
- addps xmm5, xmm6
- addps xmm6, xmm7
- addps xmm7, xmm0
- // 24:
- addps xmm0, xmm1
- addps xmm1, xmm2
- addps xmm2, xmm3
- addps xmm3, xmm4
- addps xmm4, xmm5
- addps xmm5, xmm6
- addps xmm6, xmm7
- addps xmm7, xmm0
- // 25:
- addps xmm0, xmm1
- addps xmm1, xmm2
- addps xmm2, xmm3
- addps xmm3, xmm4
- addps xmm4, xmm5
- addps xmm5, xmm6
- addps xmm6, xmm7
- addps xmm7, xmm0
- // 26:
- addps xmm0, xmm1
- addps xmm1, xmm2
- addps xmm2, xmm3
- addps xmm3, xmm4
- addps xmm4, xmm5
- addps xmm5, xmm6
- addps xmm6, xmm7
- addps xmm7, xmm0
- // 27:
- addps xmm0, xmm1
- addps xmm1, xmm2
- addps xmm2, xmm3
- addps xmm3, xmm4
- addps xmm4, xmm5
- addps xmm5, xmm6
- addps xmm6, xmm7
- addps xmm7, xmm0
- // 28:
- addps xmm0, xmm1
- addps xmm1, xmm2
- addps xmm2, xmm3
- addps xmm3, xmm4
- addps xmm4, xmm5
- addps xmm5, xmm6
- addps xmm6, xmm7
- addps xmm7, xmm0
- // 29:
- addps xmm0, xmm1
- addps xmm1, xmm2
- addps xmm2, xmm3
- addps xmm3, xmm4
- addps xmm4, xmm5
- addps xmm5, xmm6
- addps xmm6, xmm7
- addps xmm7, xmm0
- // 30:
- addps xmm0, xmm1
- addps xmm1, xmm2
- addps xmm2, xmm3
- addps xmm3, xmm4
- addps xmm4, xmm5
- addps xmm5, xmm6
- addps xmm6, xmm7
- addps xmm7, xmm0
- // 31:
- addps xmm0, xmm1
- addps xmm1, xmm2
- addps xmm2, xmm3
- addps xmm3, xmm4
- addps xmm4, xmm5
- addps xmm5, xmm6
- addps xmm6, xmm7
- addps xmm7, xmm0
- //----------------------
- dec eax
- jnz bsLoop
- }
- # else
- # error "Unsupported compiler"
- # endif /* COMPILER_MICROSOFT */
- #endif /* COMPILER_GCC */
- }
- #endif /* INLINE_ASSEMBLY_SUPPORTED */
|