IceFPU.h 10.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361
  1. ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  2. /**
  3. * Contains FPU related code.
  4. * \file IceFPU.h
  5. * \author Pierre Terdiman
  6. * \date April, 4, 2000
  7. */
  8. ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  9. ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  10. // Include Guard
  11. #ifndef __ICEFPU_H__
  12. #define __ICEFPU_H__
  13. #if defined( PLATFORM_XENON )
  14. # include <PPCIntrinsics.h>
  15. #endif
  16. #define SIGN_BITMASK 0x80000000
  17. //! Integer representation of a floating-point value.
  18. #define IR(x) ((udword&)(x))
  19. //! Signed integer representation of a floating-point value.
  20. #define SIR(x) ((sdword&)(x))
  21. //! Absolute integer representation of a floating-point value
  22. #define AIR(x) (IR(x)&0x7fffffff)
  23. //! Floating-point representation of an integer value.
  24. #define FR(x) ((float&)(x))
  25. //! Integer-based comparison of a floating point value.
  26. //! Don't use it blindly, it can be faster or slower than the FPU comparison, depends on the context.
  27. #define IS_NEGATIVE_FLOAT(x) (IR(x)&0x80000000)
  28. //! Fast fabs for floating-point values. It just clears the sign bit.
  29. //! Don't use it blindy, it can be faster or slower than the FPU comparison, depends on the context.
  30. inline_ float FastFabs(float x)
  31. {
  32. udword FloatBits = IR(x)&0x7fffffff;
  33. return FR(FloatBits);
  34. }
  35. //! Fast square root for floating-point values.
  36. inline_ float FastSqrt(float square)
  37. {
  38. #if defined( PLATFORM_WINDOWS ) && !defined ( _WIN64 )
  39. float retval;
  40. __asm {
  41. mov eax, square
  42. sub eax, 0x3F800000
  43. sar eax, 1
  44. add eax, 0x3F800000
  45. mov [retval], eax
  46. }
  47. return retval;
  48. #elif defined( PLATFORM_XENON )
  49. return __fsqrts( square );
  50. #else
  51. return( sqrtf( square ) );
  52. #endif
  53. }
  54. //! Saturates positive to zero.
  55. inline_ float fsat(float f)
  56. {
  57. udword y = (udword&)f & ~((sdword&)f >>31);
  58. return (float&)y;
  59. }
  60. //! Computes 1.0f / sqrtf(x).
  61. inline_ float frsqrt(float f)
  62. {
  63. float x = f * 0.5f;
  64. udword y = 0x5f3759df - ((udword&)f >> 1);
  65. // Iteration...
  66. (float&)y = (float&)y * ( 1.5f - ( x * (float&)y * (float&)y ) );
  67. // Result
  68. return (float&)y;
  69. }
  70. //! Computes 1.0f / sqrtf(x). Comes from NVIDIA.
  71. inline_ float InvSqrt(const float& x)
  72. {
  73. udword tmp = (udword(IEEE_1_0 << 1) + IEEE_1_0 - *(udword*)&x) >> 1;
  74. float y = *(float*)&tmp;
  75. return y * (1.47f - 0.47f * x * y * y);
  76. }
  77. //! Computes 1.0f / sqrtf(x). Comes from Quake3. Looks like the first one I had above.
  78. //! See http://www.magic-software.com/3DGEDInvSqrt.html
  79. inline_ float RSqrt(float number)
  80. {
  81. long i;
  82. float x2, y;
  83. const float threehalfs = 1.5f;
  84. x2 = number * 0.5f;
  85. y = number;
  86. i = * (long *) &y;
  87. i = 0x5f3759df - (i >> 1);
  88. y = * (float *) &i;
  89. y = y * (threehalfs - (x2 * y * y));
  90. return y;
  91. }
  92. //! TO BE DOCUMENTED
  93. inline_ float fsqrt(float f)
  94. {
  95. udword y = ( ( (sdword&)f - 0x3f800000 ) >> 1 ) + 0x3f800000;
  96. // Iteration...?
  97. // (float&)y = (3.0f - ((float&)y * (float&)y) / f) * (float&)y * 0.5f;
  98. // Result
  99. return (float&)y;
  100. }
  101. //! Returns the float ranged espilon value.
  102. inline_ float fepsilon(float f)
  103. {
  104. udword b = (udword&)f & 0xff800000;
  105. udword a = b | 0x00000001;
  106. (float&)a -= (float&)b;
  107. // Result
  108. return (float&)a;
  109. }
  110. //! Is the float valid ?
  111. inline_ bool IsNAN(float value) { return (IR(value)&0x7f800000) == 0x7f800000; }
  112. inline_ bool IsIndeterminate(float value) { return IR(value) == 0xffc00000; }
  113. inline_ bool IsPlusInf(float value) { return IR(value) == 0x7f800000; }
  114. inline_ bool IsMinusInf(float value) { return IR(value) == 0xff800000; }
  115. inline_ bool IsValidFloat(float value)
  116. {
  117. if(IsNAN(value)) return false;
  118. if(IsIndeterminate(value)) return false;
  119. if(IsPlusInf(value)) return false;
  120. if(IsMinusInf(value)) return false;
  121. return true;
  122. }
  123. #define CHECK_VALID_FLOAT(x) ASSERT(IsValidFloat(x));
  124. /*
  125. //! FPU precision setting function.
  126. inline_ void SetFPU()
  127. {
  128. // This function evaluates whether the floating-point
  129. // control word is set to single precision/round to nearest/
  130. // exceptions disabled. If these conditions don't hold, the
  131. // function changes the control word to set them and returns
  132. // TRUE, putting the old control word value in the passback
  133. // location pointed to by pwOldCW.
  134. {
  135. uword wTemp, wSave;
  136. __asm fstcw wSave
  137. if (wSave & 0x300 || // Not single mode
  138. 0x3f != (wSave & 0x3f) || // Exceptions enabled
  139. wSave & 0xC00) // Not round to nearest mode
  140. {
  141. __asm
  142. {
  143. mov ax, wSave
  144. and ax, not 300h ;; single mode
  145. or ax, 3fh ;; disable all exceptions
  146. and ax, not 0xC00 ;; round to nearest mode
  147. mov wTemp, ax
  148. fldcw wTemp
  149. }
  150. }
  151. }
  152. }
  153. */
  154. //! This function computes the slowest possible floating-point value (you can also directly use FLT_EPSILON)
  155. inline_ float ComputeFloatEpsilon()
  156. {
  157. float f = 1.0f;
  158. ((udword&)f)^=1;
  159. return f - 1.0f; // You can check it's the same as FLT_EPSILON
  160. }
  161. inline_ bool IsFloatZero(float x, float epsilon=1e-6f)
  162. {
  163. return x*x < epsilon;
  164. }
  165. #if defined( PLATFORM_WINDOWS ) && !defined ( _WIN64 )
  166. #define FCOMI_ST0 _asm _emit 0xdb _asm _emit 0xf0
  167. #define FCOMIP_ST0 _asm _emit 0xdf _asm _emit 0xf0
  168. #define FCMOVB_ST0 _asm _emit 0xda _asm _emit 0xc0
  169. #define FCMOVNB_ST0 _asm _emit 0xdb _asm _emit 0xc0
  170. #define FCOMI_ST1 _asm _emit 0xdb _asm _emit 0xf1
  171. #define FCOMIP_ST1 _asm _emit 0xdf _asm _emit 0xf1
  172. #define FCMOVB_ST1 _asm _emit 0xda _asm _emit 0xc1
  173. #define FCMOVNB_ST1 _asm _emit 0xdb _asm _emit 0xc1
  174. #define FCOMI_ST2 _asm _emit 0xdb _asm _emit 0xf2
  175. #define FCOMIP_ST2 _asm _emit 0xdf _asm _emit 0xf2
  176. #define FCMOVB_ST2 _asm _emit 0xda _asm _emit 0xc2
  177. #define FCMOVNB_ST2 _asm _emit 0xdb _asm _emit 0xc2
  178. #define FCOMI_ST3 _asm _emit 0xdb _asm _emit 0xf3
  179. #define FCOMIP_ST3 _asm _emit 0xdf _asm _emit 0xf3
  180. #define FCMOVB_ST3 _asm _emit 0xda _asm _emit 0xc3
  181. #define FCMOVNB_ST3 _asm _emit 0xdb _asm _emit 0xc3
  182. #define FCOMI_ST4 _asm _emit 0xdb _asm _emit 0xf4
  183. #define FCOMIP_ST4 _asm _emit 0xdf _asm _emit 0xf4
  184. #define FCMOVB_ST4 _asm _emit 0xda _asm _emit 0xc4
  185. #define FCMOVNB_ST4 _asm _emit 0xdb _asm _emit 0xc4
  186. #define FCOMI_ST5 _asm _emit 0xdb _asm _emit 0xf5
  187. #define FCOMIP_ST5 _asm _emit 0xdf _asm _emit 0xf5
  188. #define FCMOVB_ST5 _asm _emit 0xda _asm _emit 0xc5
  189. #define FCMOVNB_ST5 _asm _emit 0xdb _asm _emit 0xc5
  190. #define FCOMI_ST6 _asm _emit 0xdb _asm _emit 0xf6
  191. #define FCOMIP_ST6 _asm _emit 0xdf _asm _emit 0xf6
  192. #define FCMOVB_ST6 _asm _emit 0xda _asm _emit 0xc6
  193. #define FCMOVNB_ST6 _asm _emit 0xdb _asm _emit 0xc6
  194. #define FCOMI_ST7 _asm _emit 0xdb _asm _emit 0xf7
  195. #define FCOMIP_ST7 _asm _emit 0xdf _asm _emit 0xf7
  196. #define FCMOVB_ST7 _asm _emit 0xda _asm _emit 0xc7
  197. #define FCMOVNB_ST7 _asm _emit 0xdb _asm _emit 0xc7
  198. //! A global function to find MAX(a,b) using FCOMI/FCMOV
  199. inline_ float FCMax2(float a, float b)
  200. {
  201. float Res;
  202. _asm fld [a]
  203. _asm fld [b]
  204. FCOMI_ST1
  205. FCMOVB_ST1
  206. _asm fstp [Res]
  207. _asm fcomp
  208. return Res;
  209. }
  210. //! A global function to find MIN(a,b) using FCOMI/FCMOV
  211. inline_ float FCMin2(float a, float b)
  212. {
  213. float Res;
  214. _asm fld [a]
  215. _asm fld [b]
  216. FCOMI_ST1
  217. FCMOVNB_ST1
  218. _asm fstp [Res]
  219. _asm fcomp
  220. return Res;
  221. }
  222. //! A global function to find MAX(a,b,c) using FCOMI/FCMOV
  223. inline_ float FCMax3(float a, float b, float c)
  224. {
  225. float Res;
  226. _asm fld [a]
  227. _asm fld [b]
  228. _asm fld [c]
  229. FCOMI_ST1
  230. FCMOVB_ST1
  231. FCOMI_ST2
  232. FCMOVB_ST2
  233. _asm fstp [Res]
  234. _asm fcompp
  235. return Res;
  236. }
  237. //! A global function to find MIN(a,b,c) using FCOMI/FCMOV
  238. inline_ float FCMin3(float a, float b, float c)
  239. {
  240. float Res;
  241. _asm fld [a]
  242. _asm fld [b]
  243. _asm fld [c]
  244. FCOMI_ST1
  245. FCMOVNB_ST1
  246. FCOMI_ST2
  247. FCMOVNB_ST2
  248. _asm fstp [Res]
  249. _asm fcompp
  250. return Res;
  251. }
  252. #elif defined( PLATFORM_XENON )
  253. inline float FCMax2(float a, float b) { return (float)__fsel((a) - (b), a, b ); }
  254. inline float FCMin2(float a, float b) { return (float)__fsel((a) - (b), b, a ); }
  255. inline float FCMax3(float a, float b, float c)
  256. {
  257. return (float)__fsel( (a) - (b), __fsel( (a) - (c), a, c ), __fsel( (b) - (c), b, c ) );
  258. }
  259. inline float FCMin3(float a, float b, float c)
  260. {
  261. return (float)__fsel( (a) - (b), __fsel( (b) - (c), c, b ), __fsel( (a) - (c), c, a ) );
  262. }
  263. #else
  264. inline float FCMax2(float a, float b) { return a>b ? a : b; }
  265. inline float FCMin2(float a, float b) { return a>b ? b : a; }
  266. inline float FCMax3(float a, float b, float c)
  267. {
  268. if (a >= b)
  269. return( FCMax2( a, c ) );
  270. return( FCMax2( b, c ) );
  271. }
  272. inline float FCMin3(float a, float b, float c)
  273. {
  274. if (a >= b)
  275. return( FCMin2( b, c ) );
  276. return( FCMin2( a, c ) );
  277. }
  278. #endif
  279. inline_ int ConvertToSortable(float f)
  280. {
  281. int& Fi = (int&)f;
  282. int Fmask = (Fi>>31);
  283. Fi ^= Fmask;
  284. Fmask &= ~(1<<31);
  285. Fi -= Fmask;
  286. return Fi;
  287. }
  288. enum FPUMode
  289. {
  290. FPU_FLOOR = 0,
  291. FPU_CEIL = 1,
  292. FPU_BEST = 2,
  293. FPU_FORCE_DWORD = 0x7fffffff
  294. };
  295. FUNCTION ICECORE_API FPUMode GetFPUMode();
  296. FUNCTION ICECORE_API void SaveFPU();
  297. FUNCTION ICECORE_API void RestoreFPU();
  298. FUNCTION ICECORE_API void SetFPUFloorMode();
  299. FUNCTION ICECORE_API void SetFPUCeilMode();
  300. FUNCTION ICECORE_API void SetFPUBestMode();
  301. FUNCTION ICECORE_API void SetFPUPrecision24();
  302. FUNCTION ICECORE_API void SetFPUPrecision53();
  303. FUNCTION ICECORE_API void SetFPUPrecision64();
  304. FUNCTION ICECORE_API void SetFPURoundingChop();
  305. FUNCTION ICECORE_API void SetFPURoundingUp();
  306. FUNCTION ICECORE_API void SetFPURoundingDown();
  307. FUNCTION ICECORE_API void SetFPURoundingNear();
  308. FUNCTION ICECORE_API int intChop(const float& f);
  309. FUNCTION ICECORE_API int intFloor(const float& f);
  310. FUNCTION ICECORE_API int intCeil(const float& f);
  311. #endif // __ICEFPU_H__