IceFPU.h 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282
  1. ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  2. /**
  3. * Contains FPU related code.
  4. * \file IceFPU.h
  5. * \author Pierre Terdiman
  6. * \date April, 4, 2000
  7. */
  8. ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  9. ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  10. // Include Guard
  11. #ifndef __ICEFPU_H__
  12. #define __ICEFPU_H__
  13. #define SIGN_BITMASK 0x80000000
  14. namespace {
  15. union float_udword { float f; udword u; };
  16. union float_sdword { float f; sdword s; };
  17. }
  18. //! Integer representation of a floating-point value.
  19. //#define IR(x) ((udword&)(x))
  20. static inline udword IR(float x) { float_udword fu; fu.f = x; return fu.u; }
  21. //! Signed integer representation of a floating-point value.
  22. //#define SIR(x) ((sdword&)(x))
  23. static inline sdword SIR(float x) { float_sdword fs; fs.f = x; return fs.s; }
  24. //! Absolute integer representation of a floating-point value
  25. #define AIR(x) (IR(x)&0x7fffffff)
  26. //! Floating-point representation of an integer value.
  27. //#define FR(x) ((float&)(x))
  28. static inline float FR(unsigned x) { float_udword fu; fu.u = x; return fu.f; }
  29. //! Integer-based comparison of a floating point value.
  30. //! Don't use it blindly, it can be faster or slower than the FPU comparison, depends on the context.
  31. #define IS_NEGATIVE_FLOAT(x) (IR(x)&0x80000000)
  32. //! Fast fabs for floating-point values. It just clears the sign bit.
  33. //! Don't use it blindy, it can be faster or slower than the FPU comparison, depends on the context.
  34. inline_ float FastFabs(float x)
  35. {
  36. udword FloatBits = IR(x)&0x7fffffff;
  37. return FR(FloatBits);
  38. }
  39. //! Fast square root for floating-point values.
  40. inline_ float FastSqrt(float square)
  41. {
  42. return sqrtf(square);
  43. }
  44. //! Saturates positive to zero.
  45. inline_ float fsat(float f)
  46. {
  47. udword y = IR(f) & ~(SIR(f) >>31);
  48. return FR(y);
  49. }
  50. //! Computes 1.0f / sqrtf(x).
  51. inline_ float frsqrt(float f)
  52. {
  53. float x = f * 0.5f;
  54. udword y = 0x5f3759df - (IR(f) >> 1);
  55. // Iteration...
  56. const float fy = FR(y);
  57. const float result = fy * ( 1.5f - ( x * fy * fy ) );
  58. // Result
  59. return result;
  60. }
  61. //! Computes 1.0f / sqrtf(x). Comes from NVIDIA.
  62. inline_ float InvSqrt(const float& x)
  63. {
  64. const udword tmp = (udword(IEEE_1_0 << 1) + IEEE_1_0 - IR(x)) >> 1;
  65. const float y = FR(tmp);
  66. return y * (1.47f - 0.47f * x * y * y);
  67. }
  68. //! Computes 1.0f / sqrtf(x). Comes from Quake3. Looks like the first one I had above.
  69. //! See http://www.magic-software.com/3DGEDInvSqrt.html
  70. inline_ float RSqrt(float number)
  71. {
  72. int i;
  73. float x2, y;
  74. const float threehalfs = 1.5f;
  75. x2 = number * 0.5f;
  76. y = number;
  77. i = IR(y);
  78. i = 0x5f3759df - (i >> 1);
  79. y = FR(i);
  80. y = y * (threehalfs - (x2 * y * y));
  81. return y;
  82. }
  83. //! TO BE DOCUMENTED
  84. inline_ float fsqrt(float f)
  85. {
  86. udword y = ( ( SIR(f) - 0x3f800000 ) >> 1 ) + 0x3f800000;
  87. // Iteration...?
  88. // (float&)y = (3.0f - ((float&)y * (float&)y) / f) * (float&)y * 0.5f;
  89. // Result
  90. return FR(y);
  91. }
  92. //! Returns the float ranged espilon value.
  93. inline_ float fepsilon(float f)
  94. {
  95. udword b = IR(f) & 0xff800000;
  96. udword a = b | 0x00000001;
  97. // Result
  98. return FR(a) - FR(b);
  99. }
  100. //! Is the float valid ?
  101. inline_ bool IsNAN(float value) { return (IR(value)&0x7f800000) == 0x7f800000; }
  102. inline_ bool IsIndeterminate(float value) { return IR(value) == 0xffc00000; }
  103. inline_ bool IsPlusInf(float value) { return IR(value) == 0x7f800000; }
  104. inline_ bool IsMinusInf(float value) { return IR(value) == 0xff800000; }
  105. inline_ bool IsValidFloat(float value)
  106. {
  107. if(IsNAN(value)) return false;
  108. if(IsIndeterminate(value)) return false;
  109. if(IsPlusInf(value)) return false;
  110. if(IsMinusInf(value)) return false;
  111. return true;
  112. }
  113. #define CHECK_VALID_FLOAT(x) ASSERT(IsValidFloat(x));
  114. /*
  115. //! FPU precision setting function.
  116. inline_ void SetFPU()
  117. {
  118. // This function evaluates whether the floating-point
  119. // control word is set to single precision/round to nearest/
  120. // exceptions disabled. If these conditions don't hold, the
  121. // function changes the control word to set them and returns
  122. // TRUE, putting the old control word value in the passback
  123. // location pointed to by pwOldCW.
  124. {
  125. uword wTemp, wSave;
  126. __asm fstcw wSave
  127. if (wSave & 0x300 || // Not single mode
  128. 0x3f != (wSave & 0x3f) || // Exceptions enabled
  129. wSave & 0xC00) // Not round to nearest mode
  130. {
  131. __asm
  132. {
  133. mov ax, wSave
  134. and ax, not 300h ;; single mode
  135. or ax, 3fh ;; disable all exceptions
  136. and ax, not 0xC00 ;; round to nearest mode
  137. mov wTemp, ax
  138. fldcw wTemp
  139. }
  140. }
  141. }
  142. }
  143. */
  144. //! This function computes the slowest possible floating-point value (you can also directly use FLT_EPSILON)
  145. inline_ float ComputeFloatEpsilon()
  146. {
  147. const float f = FR( IR(1.0f) ^ 1 );
  148. return f - 1.0f; // You can check it's the same as FLT_EPSILON
  149. }
  150. inline_ bool IsFloatZero(float x, float epsilon=1e-6f)
  151. {
  152. return x*x < epsilon;
  153. }
  154. #define FCOMI_ST0 _asm _emit 0xdb _asm _emit 0xf0
  155. #define FCOMIP_ST0 _asm _emit 0xdf _asm _emit 0xf0
  156. #define FCMOVB_ST0 _asm _emit 0xda _asm _emit 0xc0
  157. #define FCMOVNB_ST0 _asm _emit 0xdb _asm _emit 0xc0
  158. #define FCOMI_ST1 _asm _emit 0xdb _asm _emit 0xf1
  159. #define FCOMIP_ST1 _asm _emit 0xdf _asm _emit 0xf1
  160. #define FCMOVB_ST1 _asm _emit 0xda _asm _emit 0xc1
  161. #define FCMOVNB_ST1 _asm _emit 0xdb _asm _emit 0xc1
  162. #define FCOMI_ST2 _asm _emit 0xdb _asm _emit 0xf2
  163. #define FCOMIP_ST2 _asm _emit 0xdf _asm _emit 0xf2
  164. #define FCMOVB_ST2 _asm _emit 0xda _asm _emit 0xc2
  165. #define FCMOVNB_ST2 _asm _emit 0xdb _asm _emit 0xc2
  166. #define FCOMI_ST3 _asm _emit 0xdb _asm _emit 0xf3
  167. #define FCOMIP_ST3 _asm _emit 0xdf _asm _emit 0xf3
  168. #define FCMOVB_ST3 _asm _emit 0xda _asm _emit 0xc3
  169. #define FCMOVNB_ST3 _asm _emit 0xdb _asm _emit 0xc3
  170. #define FCOMI_ST4 _asm _emit 0xdb _asm _emit 0xf4
  171. #define FCOMIP_ST4 _asm _emit 0xdf _asm _emit 0xf4
  172. #define FCMOVB_ST4 _asm _emit 0xda _asm _emit 0xc4
  173. #define FCMOVNB_ST4 _asm _emit 0xdb _asm _emit 0xc4
  174. #define FCOMI_ST5 _asm _emit 0xdb _asm _emit 0xf5
  175. #define FCOMIP_ST5 _asm _emit 0xdf _asm _emit 0xf5
  176. #define FCMOVB_ST5 _asm _emit 0xda _asm _emit 0xc5
  177. #define FCMOVNB_ST5 _asm _emit 0xdb _asm _emit 0xc5
  178. #define FCOMI_ST6 _asm _emit 0xdb _asm _emit 0xf6
  179. #define FCOMIP_ST6 _asm _emit 0xdf _asm _emit 0xf6
  180. #define FCMOVB_ST6 _asm _emit 0xda _asm _emit 0xc6
  181. #define FCMOVNB_ST6 _asm _emit 0xdb _asm _emit 0xc6
  182. #define FCOMI_ST7 _asm _emit 0xdb _asm _emit 0xf7
  183. #define FCOMIP_ST7 _asm _emit 0xdf _asm _emit 0xf7
  184. #define FCMOVB_ST7 _asm _emit 0xda _asm _emit 0xc7
  185. #define FCMOVNB_ST7 _asm _emit 0xdb _asm _emit 0xc7
  186. //! A global function to find MAX(a,b) using FCOMI/FCMOV
  187. inline_ float FCMax2(float a, float b)
  188. {
  189. return (a > b) ? a : b;
  190. }
  191. //! A global function to find MIN(a,b) using FCOMI/FCMOV
  192. inline_ float FCMin2(float a, float b)
  193. {
  194. return (a < b) ? a : b;
  195. }
  196. //! A global function to find MAX(a,b,c) using FCOMI/FCMOV
  197. inline_ float FCMax3(float a, float b, float c)
  198. {
  199. return (a > b) ? ((a > c) ? a : c) : ((b > c) ? b : c);
  200. }
  201. //! A global function to find MIN(a,b,c) using FCOMI/FCMOV
  202. inline_ float FCMin3(float a, float b, float c)
  203. {
  204. return (a < b) ? ((a < c) ? a : c) : ((b < c) ? b : c);
  205. }
  206. inline_ int ConvertToSortable(float f)
  207. {
  208. int Fi = SIR(f);
  209. int Fmask = (Fi>>31);
  210. Fi ^= Fmask;
  211. Fmask &= ~(1<<31);
  212. Fi -= Fmask;
  213. return Fi;
  214. }
  215. enum FPUMode
  216. {
  217. FPU_FLOOR = 0,
  218. FPU_CEIL = 1,
  219. FPU_BEST = 2,
  220. FPU_FORCE_DWORD = 0x7fffffff
  221. };
  222. FUNCTION ICECORE_API FPUMode GetFPUMode();
  223. FUNCTION ICECORE_API void SaveFPU();
  224. FUNCTION ICECORE_API void RestoreFPU();
  225. FUNCTION ICECORE_API void SetFPUFloorMode();
  226. FUNCTION ICECORE_API void SetFPUCeilMode();
  227. FUNCTION ICECORE_API void SetFPUBestMode();
  228. FUNCTION ICECORE_API void SetFPUPrecision24();
  229. FUNCTION ICECORE_API void SetFPUPrecision53();
  230. FUNCTION ICECORE_API void SetFPUPrecision64();
  231. FUNCTION ICECORE_API void SetFPURoundingChop();
  232. FUNCTION ICECORE_API void SetFPURoundingUp();
  233. FUNCTION ICECORE_API void SetFPURoundingDown();
  234. FUNCTION ICECORE_API void SetFPURoundingNear();
  235. FUNCTION ICECORE_API int intChop(const float& f);
  236. FUNCTION ICECORE_API int intFloor(const float& f);
  237. FUNCTION ICECORE_API int intCeil(const float& f);
  238. #endif // __ICEFPU_H__