123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361 |
- ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
- /**
- * Contains FPU related code.
- * \file IceFPU.h
- * \author Pierre Terdiman
- * \date April, 4, 2000
- */
- ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
- ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
- // Include Guard
- #ifndef __ICEFPU_H__
- #define __ICEFPU_H__
- #if defined( PLATFORM_XENON )
- # include <PPCIntrinsics.h>
- #endif
- #define SIGN_BITMASK 0x80000000
- //! Integer representation of a floating-point value.
- #define IR(x) ((udword&)(x))
- //! Signed integer representation of a floating-point value.
- #define SIR(x) ((sdword&)(x))
- //! Absolute integer representation of a floating-point value
- #define AIR(x) (IR(x)&0x7fffffff)
- //! Floating-point representation of an integer value.
- #define FR(x) ((float&)(x))
- //! Integer-based comparison of a floating point value.
- //! Don't use it blindly, it can be faster or slower than the FPU comparison, depends on the context.
- #define IS_NEGATIVE_FLOAT(x) (IR(x)&0x80000000)
- //! Fast fabs for floating-point values. It just clears the sign bit.
- //! Don't use it blindy, it can be faster or slower than the FPU comparison, depends on the context.
- inline_ float FastFabs(float x)
- {
- udword FloatBits = IR(x)&0x7fffffff;
- return FR(FloatBits);
- }
- //! Fast square root for floating-point values.
- inline_ float FastSqrt(float square)
- {
- #if defined( PLATFORM_WINDOWS ) && !defined ( _WIN64 )
- float retval;
- __asm {
- mov eax, square
- sub eax, 0x3F800000
- sar eax, 1
- add eax, 0x3F800000
- mov [retval], eax
- }
- return retval;
- #elif defined( PLATFORM_XENON )
- return __fsqrts( square );
- #else
- return( sqrtf( square ) );
- #endif
- }
- //! Saturates positive to zero.
- inline_ float fsat(float f)
- {
- udword y = (udword&)f & ~((sdword&)f >>31);
- return (float&)y;
- }
- //! Computes 1.0f / sqrtf(x).
- inline_ float frsqrt(float f)
- {
- float x = f * 0.5f;
- udword y = 0x5f3759df - ((udword&)f >> 1);
- // Iteration...
- (float&)y = (float&)y * ( 1.5f - ( x * (float&)y * (float&)y ) );
- // Result
- return (float&)y;
- }
- //! Computes 1.0f / sqrtf(x). Comes from NVIDIA.
- inline_ float InvSqrt(const float& x)
- {
- udword tmp = (udword(IEEE_1_0 << 1) + IEEE_1_0 - *(udword*)&x) >> 1;
- float y = *(float*)&tmp;
- return y * (1.47f - 0.47f * x * y * y);
- }
- //! Computes 1.0f / sqrtf(x). Comes from Quake3. Looks like the first one I had above.
- //! See http://www.magic-software.com/3DGEDInvSqrt.html
- inline_ float RSqrt(float number)
- {
- long i;
- float x2, y;
- const float threehalfs = 1.5f;
- x2 = number * 0.5f;
- y = number;
- i = * (long *) &y;
- i = 0x5f3759df - (i >> 1);
- y = * (float *) &i;
- y = y * (threehalfs - (x2 * y * y));
- return y;
- }
- //! TO BE DOCUMENTED
- inline_ float fsqrt(float f)
- {
- udword y = ( ( (sdword&)f - 0x3f800000 ) >> 1 ) + 0x3f800000;
- // Iteration...?
- // (float&)y = (3.0f - ((float&)y * (float&)y) / f) * (float&)y * 0.5f;
- // Result
- return (float&)y;
- }
- //! Returns the float ranged espilon value.
- inline_ float fepsilon(float f)
- {
- udword b = (udword&)f & 0xff800000;
- udword a = b | 0x00000001;
- (float&)a -= (float&)b;
- // Result
- return (float&)a;
- }
- //! Is the float valid ?
- inline_ bool IsNAN(float value) { return (IR(value)&0x7f800000) == 0x7f800000; }
- inline_ bool IsIndeterminate(float value) { return IR(value) == 0xffc00000; }
- inline_ bool IsPlusInf(float value) { return IR(value) == 0x7f800000; }
- inline_ bool IsMinusInf(float value) { return IR(value) == 0xff800000; }
- inline_ bool IsValidFloat(float value)
- {
- if(IsNAN(value)) return false;
- if(IsIndeterminate(value)) return false;
- if(IsPlusInf(value)) return false;
- if(IsMinusInf(value)) return false;
- return true;
- }
- #define CHECK_VALID_FLOAT(x) ASSERT(IsValidFloat(x));
- /*
- //! FPU precision setting function.
- inline_ void SetFPU()
- {
- // This function evaluates whether the floating-point
- // control word is set to single precision/round to nearest/
- // exceptions disabled. If these conditions don't hold, the
- // function changes the control word to set them and returns
- // TRUE, putting the old control word value in the passback
- // location pointed to by pwOldCW.
- {
- uword wTemp, wSave;
-
- __asm fstcw wSave
- if (wSave & 0x300 || // Not single mode
- 0x3f != (wSave & 0x3f) || // Exceptions enabled
- wSave & 0xC00) // Not round to nearest mode
- {
- __asm
- {
- mov ax, wSave
- and ax, not 300h ;; single mode
- or ax, 3fh ;; disable all exceptions
- and ax, not 0xC00 ;; round to nearest mode
- mov wTemp, ax
- fldcw wTemp
- }
- }
- }
- }
- */
- //! This function computes the slowest possible floating-point value (you can also directly use FLT_EPSILON)
- inline_ float ComputeFloatEpsilon()
- {
- float f = 1.0f;
- ((udword&)f)^=1;
- return f - 1.0f; // You can check it's the same as FLT_EPSILON
- }
- inline_ bool IsFloatZero(float x, float epsilon=1e-6f)
- {
- return x*x < epsilon;
- }
- #if defined( PLATFORM_WINDOWS ) && !defined ( _WIN64 )
- #define FCOMI_ST0 _asm _emit 0xdb _asm _emit 0xf0
- #define FCOMIP_ST0 _asm _emit 0xdf _asm _emit 0xf0
- #define FCMOVB_ST0 _asm _emit 0xda _asm _emit 0xc0
- #define FCMOVNB_ST0 _asm _emit 0xdb _asm _emit 0xc0
- #define FCOMI_ST1 _asm _emit 0xdb _asm _emit 0xf1
- #define FCOMIP_ST1 _asm _emit 0xdf _asm _emit 0xf1
- #define FCMOVB_ST1 _asm _emit 0xda _asm _emit 0xc1
- #define FCMOVNB_ST1 _asm _emit 0xdb _asm _emit 0xc1
- #define FCOMI_ST2 _asm _emit 0xdb _asm _emit 0xf2
- #define FCOMIP_ST2 _asm _emit 0xdf _asm _emit 0xf2
- #define FCMOVB_ST2 _asm _emit 0xda _asm _emit 0xc2
- #define FCMOVNB_ST2 _asm _emit 0xdb _asm _emit 0xc2
- #define FCOMI_ST3 _asm _emit 0xdb _asm _emit 0xf3
- #define FCOMIP_ST3 _asm _emit 0xdf _asm _emit 0xf3
- #define FCMOVB_ST3 _asm _emit 0xda _asm _emit 0xc3
- #define FCMOVNB_ST3 _asm _emit 0xdb _asm _emit 0xc3
- #define FCOMI_ST4 _asm _emit 0xdb _asm _emit 0xf4
- #define FCOMIP_ST4 _asm _emit 0xdf _asm _emit 0xf4
- #define FCMOVB_ST4 _asm _emit 0xda _asm _emit 0xc4
- #define FCMOVNB_ST4 _asm _emit 0xdb _asm _emit 0xc4
- #define FCOMI_ST5 _asm _emit 0xdb _asm _emit 0xf5
- #define FCOMIP_ST5 _asm _emit 0xdf _asm _emit 0xf5
- #define FCMOVB_ST5 _asm _emit 0xda _asm _emit 0xc5
- #define FCMOVNB_ST5 _asm _emit 0xdb _asm _emit 0xc5
- #define FCOMI_ST6 _asm _emit 0xdb _asm _emit 0xf6
- #define FCOMIP_ST6 _asm _emit 0xdf _asm _emit 0xf6
- #define FCMOVB_ST6 _asm _emit 0xda _asm _emit 0xc6
- #define FCMOVNB_ST6 _asm _emit 0xdb _asm _emit 0xc6
- #define FCOMI_ST7 _asm _emit 0xdb _asm _emit 0xf7
- #define FCOMIP_ST7 _asm _emit 0xdf _asm _emit 0xf7
- #define FCMOVB_ST7 _asm _emit 0xda _asm _emit 0xc7
- #define FCMOVNB_ST7 _asm _emit 0xdb _asm _emit 0xc7
- //! A global function to find MAX(a,b) using FCOMI/FCMOV
- inline_ float FCMax2(float a, float b)
- {
- float Res;
- _asm fld [a]
- _asm fld [b]
- FCOMI_ST1
- FCMOVB_ST1
- _asm fstp [Res]
- _asm fcomp
- return Res;
- }
- //! A global function to find MIN(a,b) using FCOMI/FCMOV
- inline_ float FCMin2(float a, float b)
- {
- float Res;
- _asm fld [a]
- _asm fld [b]
- FCOMI_ST1
- FCMOVNB_ST1
- _asm fstp [Res]
- _asm fcomp
- return Res;
- }
- //! A global function to find MAX(a,b,c) using FCOMI/FCMOV
- inline_ float FCMax3(float a, float b, float c)
- {
- float Res;
- _asm fld [a]
- _asm fld [b]
- _asm fld [c]
- FCOMI_ST1
- FCMOVB_ST1
- FCOMI_ST2
- FCMOVB_ST2
- _asm fstp [Res]
- _asm fcompp
- return Res;
- }
- //! A global function to find MIN(a,b,c) using FCOMI/FCMOV
- inline_ float FCMin3(float a, float b, float c)
- {
- float Res;
- _asm fld [a]
- _asm fld [b]
- _asm fld [c]
- FCOMI_ST1
- FCMOVNB_ST1
- FCOMI_ST2
- FCMOVNB_ST2
- _asm fstp [Res]
- _asm fcompp
- return Res;
- }
- #elif defined( PLATFORM_XENON )
- inline float FCMax2(float a, float b) { return (float)__fsel((a) - (b), a, b ); }
- inline float FCMin2(float a, float b) { return (float)__fsel((a) - (b), b, a ); }
- inline float FCMax3(float a, float b, float c)
- {
- return (float)__fsel( (a) - (b), __fsel( (a) - (c), a, c ), __fsel( (b) - (c), b, c ) );
- }
- inline float FCMin3(float a, float b, float c)
- {
- return (float)__fsel( (a) - (b), __fsel( (b) - (c), c, b ), __fsel( (a) - (c), c, a ) );
- }
- #else
- inline float FCMax2(float a, float b) { return a>b ? a : b; }
- inline float FCMin2(float a, float b) { return a>b ? b : a; }
- inline float FCMax3(float a, float b, float c)
- {
- if (a >= b)
- return( FCMax2( a, c ) );
-
- return( FCMax2( b, c ) );
- }
- inline float FCMin3(float a, float b, float c)
- {
- if (a >= b)
- return( FCMin2( b, c ) );
-
- return( FCMin2( a, c ) );
- }
- #endif
- inline_ int ConvertToSortable(float f)
- {
- int& Fi = (int&)f;
- int Fmask = (Fi>>31);
- Fi ^= Fmask;
- Fmask &= ~(1<<31);
- Fi -= Fmask;
- return Fi;
- }
- enum FPUMode
- {
- FPU_FLOOR = 0,
- FPU_CEIL = 1,
- FPU_BEST = 2,
- FPU_FORCE_DWORD = 0x7fffffff
- };
- FUNCTION ICECORE_API FPUMode GetFPUMode();
- FUNCTION ICECORE_API void SaveFPU();
- FUNCTION ICECORE_API void RestoreFPU();
- FUNCTION ICECORE_API void SetFPUFloorMode();
- FUNCTION ICECORE_API void SetFPUCeilMode();
- FUNCTION ICECORE_API void SetFPUBestMode();
- FUNCTION ICECORE_API void SetFPUPrecision24();
- FUNCTION ICECORE_API void SetFPUPrecision53();
- FUNCTION ICECORE_API void SetFPUPrecision64();
- FUNCTION ICECORE_API void SetFPURoundingChop();
- FUNCTION ICECORE_API void SetFPURoundingUp();
- FUNCTION ICECORE_API void SetFPURoundingDown();
- FUNCTION ICECORE_API void SetFPURoundingNear();
- FUNCTION ICECORE_API int intChop(const float& f);
- FUNCTION ICECORE_API int intFloor(const float& f);
- FUNCTION ICECORE_API int intCeil(const float& f);
- #endif // __ICEFPU_H__
|