| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376 |
- #ifndef __asm_math_H__
- #define __asm_math_H__
- #include "OgrePrerequisites.h"
- #if OGRE_COMPILER == OGRE_COMPILER_MSVC
- # pragma warning (push)
- // disable "instruction may be inaccurate on some Pentiums"
- # pragma warning (disable : 4725)
- #endif
- namespace Ogre
- {
- /*=============================================================================
- ASM math routines posted by davepermen et al on flipcode forums
- =============================================================================*/
- const float pi = 4.0f * atan( 1.0f );
- const float half_pi = 0.5f * pi;
- /*=============================================================================
- NO EXPLICIT RETURN REQUIRED FROM THESE METHODS!!
- =============================================================================*/
- #if OGRE_COMPILER == OGRE_COMPILER_MSVC && OGRE_ARCH_TYPE == OGRE_ARCHITECTURE_32
- # pragma warning( push )
- # pragma warning( disable: 4035 )
- #endif
- float asm_arccos( float r ) {
- // return half_pi + arctan( r / -sqr( 1.f - r * r ) );
-
- #if OGRE_COMPILER == OGRE_COMPILER_MSVC && OGRE_ARCH_TYPE == OGRE_ARCHITECTURE_32
- float asm_one = 1.f;
- float asm_half_pi = half_pi;
- __asm {
- fld r // r0 = r
- fld r // r1 = r0, r0 = r
- fmul r // r0 = r0 * r
- fsubr asm_one // r0 = r0 - 1.f
- fsqrt // r0 = sqrtf( r0 )
- fchs // r0 = - r0
- fdiv // r0 = r1 / r0
- fld1 // {{ r0 = atan( r0 )
- fpatan // }}
- fadd asm_half_pi // r0 = r0 + pi / 2
- } // returns r0
- #else
- return float( acos( r ) );
- #endif
- }
- float asm_arcsin( float r ) {
- // return arctan( r / sqr( 1.f - r * r ) );
- #if OGRE_COMPILER == OGRE_COMPILER_MSVC && OGRE_ARCH_TYPE == OGRE_ARCHITECTURE_32
- const float asm_one = 1.f;
- __asm {
- fld r // r0 = r
- fld r // r1 = r0, r0 = r
- fmul r // r0 = r0 * r
- fsubr asm_one // r0 = r0 - 1.f
- fsqrt // r0 = sqrtf( r0 )
- fdiv // r0 = r1 / r0
- fld1 // {{ r0 = atan( r0 )
- fpatan // }}
- } // returns r0
- #else
- return float( asin( r ) );
- #endif
- }
- float asm_arctan( float r ) {
- #if OGRE_COMPILER == OGRE_COMPILER_MSVC && OGRE_ARCH_TYPE == OGRE_ARCHITECTURE_32
- __asm {
- fld r // r0 = r
- fld1 // {{ r0 = atan( r0 )
- fpatan // }}
- } // returns r0
- #else
- return float( atan( r ) );
- #endif
- }
- float asm_sin( float r ) {
- #if OGRE_COMPILER == OGRE_COMPILER_MSVC && OGRE_ARCH_TYPE == OGRE_ARCHITECTURE_32
- __asm {
- fld r // r0 = r
- fsin // r0 = sinf( r0 )
- } // returns r0
- #else
- return sin( r );
- #endif
- }
- float asm_cos( float r ) {
- #if OGRE_COMPILER == OGRE_COMPILER_MSVC && OGRE_ARCH_TYPE == OGRE_ARCHITECTURE_32
- __asm {
- fld r // r0 = r
- fcos // r0 = cosf( r0 )
- } // returns r0
- #else
-
- return cos( r );
- #endif
- }
- float asm_tan( float r ) {
- #if OGRE_COMPILER == OGRE_COMPILER_MSVC && OGRE_ARCH_TYPE == OGRE_ARCHITECTURE_32
- // return sin( r ) / cos( r );
- __asm {
- fld r // r0 = r
- fsin // r0 = sinf( r0 )
- fld r // r1 = r0, r0 = r
- fcos // r0 = cosf( r0 )
- fdiv // r0 = r1 / r0
- } // returns r0
- #else
-
- return tan( r );
- #endif
- }
- // returns a for a * a = r
- float asm_sqrt( float r )
- {
- #if OGRE_COMPILER == OGRE_COMPILER_MSVC && OGRE_ARCH_TYPE == OGRE_ARCHITECTURE_32
- __asm {
- fld r // r0 = r
- fsqrt // r0 = sqrtf( r0 )
- } // returns r0
- #else
- return sqrt( r );
- #endif
- }
- // returns 1 / a for a * a = r
- // -- Use this for Vector normalisation!!!
- float asm_rsq( float r )
- {
- #if OGRE_COMPILER == OGRE_COMPILER_MSVC && OGRE_ARCH_TYPE == OGRE_ARCHITECTURE_32
- __asm {
- fld1 // r0 = 1.f
- fld r // r1 = r0, r0 = r
- fsqrt // r0 = sqrtf( r0 )
- fdiv // r0 = r1 / r0
- } // returns r0
- #else
- return 1. / sqrt( r );
- #endif
- }
- // returns 1 / a for a * a = r
- // Another version
- float apx_rsq( float r ) {
- #if OGRE_COMPILER == OGRE_COMPILER_MSVC && OGRE_ARCH_TYPE == OGRE_ARCHITECTURE_32
- const float asm_dot5 = 0.5f;
- const float asm_1dot5 = 1.5f;
- __asm {
- fld r // r0 = r
- fmul asm_dot5 // r0 = r0 * .5f
- mov eax, r // eax = r
- shr eax, 0x1 // eax = eax >> 1
- neg eax // eax = -eax
- add eax, 0x5F400000 // eax = eax & MAGICAL NUMBER
- mov r, eax // r = eax
- fmul r // r0 = r0 * r
- fmul r // r0 = r0 * r
- fsubr asm_1dot5 // r0 = 1.5f - r0
- fmul r // r0 = r0 * r
- } // returns r0
- #else
- return 1. / sqrt( r );
- #endif
- }
- /* very MS-specific, commented out for now
- Finally the best InvSqrt implementation?
- Use for vector normalisation instead of 1/length() * x,y,z
- */
- #if OGRE_COMPILER == OGRE_COMPILER_MSVC && OGRE_ARCH_TYPE == OGRE_ARCHITECTURE_32
- __declspec(naked) float __fastcall InvSqrt(float fValue)
- {
- __asm
- {
- mov eax, 0be6eb508h
- mov dword ptr[esp-12],03fc00000h
- sub eax, dword ptr[esp + 4]
- sub dword ptr[esp+4], 800000h
- shr eax, 1
- mov dword ptr[esp - 8], eax
- fld dword ptr[esp - 8]
- fmul st, st
- fld dword ptr[esp - 8]
- fxch st(1)
- fmul dword ptr[esp + 4]
- fld dword ptr[esp - 12]
- fld st(0)
- fsub st,st(2)
- fld st(1)
- fxch st(1)
- fmul st(3),st
- fmul st(3),st
- fmulp st(4),st
- fsub st,st(2)
- fmul st(2),st
- fmul st(3),st
- fmulp st(2),st
- fxch st(1)
- fsubp st(1),st
- fmulp st(1), st
- ret 4
- }
- }
- #endif
- // returns a random number
- FORCEINLINE float asm_rand()
- {
- #if OGRE_COMPILER == OGRE_COMPILER_MSVC && OGRE_ARCH_TYPE == OGRE_ARCHITECTURE_32
- #if 0
- #if OGRE_COMP_VER >= 1300
- static unsigned __int64 q = time( NULL );
- _asm {
- movq mm0, q
- // do the magic MMX thing
- pshufw mm1, mm0, 0x1E
- paddd mm0, mm1
- // move to integer memory location and free MMX
- movq q, mm0
- emms
- }
- return float( q );
- #endif
- #else
- // VC6 does not support pshufw
- return float( rand() );
- #endif
- #else
- // GCC etc
- return float( rand() );
- #endif
- }
- // returns the maximum random number
- FORCEINLINE float asm_rand_max()
- {
- #if OGRE_COMPILER == OGRE_COMPILER_MSVC && OGRE_ARCH_TYPE == OGRE_ARCHITECTURE_32
- #if 0
- #if OGRE_COMP_VER >= 1300
- return (std::numeric_limits< unsigned __int64 >::max)();
- return 9223372036854775807.0f;
- #endif
- #else
- // VC6 does not support unsigned __int64
- return float( RAND_MAX );
- #endif
- #else
- // GCC etc
- return float( RAND_MAX );
- #endif
- }
- // returns log2( r ) / log2( e )
- float asm_ln( float r ) {
- #if OGRE_COMPILER == OGRE_COMPILER_MSVC && OGRE_ARCH_TYPE == OGRE_ARCHITECTURE_32
- const float asm_1_div_log2_e = .693147180559f;
- const float asm_neg1_div_3 = -.33333333333333333333333333333f;
- const float asm_neg2_div_3 = -.66666666666666666666666666667f;
- const float asm_2 = 2.f;
- int log_2 = 0;
- __asm {
- // log_2 = ( ( r >> 0x17 ) & 0xFF ) - 0x80;
- mov eax, r
- sar eax, 0x17
- and eax, 0xFF
- sub eax, 0x80
- mov log_2, eax
- // r = ( r & 0x807fffff ) + 0x3f800000;
- mov ebx, r
- and ebx, 0x807FFFFF
- add ebx, 0x3F800000
- mov r, ebx
- // r = ( asm_neg1_div_3 * r + asm_2 ) * r + asm_neg2_div_3; // (1)
- fld r
- fmul asm_neg1_div_3
- fadd asm_2
- fmul r
- fadd asm_neg2_div_3
- fild log_2
- fadd
- fmul asm_1_div_log2_e
- }
- #else
- return log( r );
- #endif
- }
- #if OGRE_COMPILER == OGRE_COMPILER_MSVC && OGRE_ARCH_TYPE == OGRE_ARCHITECTURE_32
- # pragma warning( pop )
- #endif
- } // namespace
- #if OGRE_COMPILER == OGRE_COMPILER_MSVC
- # pragma warning (pop)
- #endif
- #endif
|