EAMathHelp.html 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232
  1. <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"><html><head>
  2. <meta http-equiv="content-type" content="text/html; charset=ISO-8859-1">
  3. <Title>EAMathHelp</title>
  4. <link type="text/css" rel="stylesheet" href="UTFDoc.css">
  5. <meta name="author" content="Paul Pedriana">
  6. </head>
  7. <body bgcolor="#FFFFFF">
  8. <h1>EAMathHelp</h1>
  9. <h2>Introduction</h2>
  10. <p>EAMathHelp provides fast floating point math primitives. It is not a vector/matrix math library such as those in use around Electronic Arts, but rather is a base
  11. for doing floating point characterizations and for doing fast floating point conversions. The former often serve to help implement the latter.</p>
  12. <p>The constants listed below may look odd if you aren't familiar with the standardized IEEE floating point format. A description of this format is outside the scope
  13. of this document, but you can find plenty of documentation about it by looking it up on the Internet. Suffice it to say the floating point numbers are essentially
  14. bitfields that specify a sign, an integer value (mantissa), and an exponent to raise the integer value by.</p>
  15. <h2>Constants</h2>
  16. <pre class="code-example"><span class="code-example-comment">// 32 bit float bits
  17. </span>const uint32_t kFloat32SignMask = UINT32_C(0x80000000);
  18. const uint32_t kFloat32ExponentMask = UINT32_C(0x7F800000);
  19. const uint32_t kFloat32MantissaMask = UINT32_C(0x007FFFFF);
  20. const uint32_t kFloat32SignAndExponentMask = UINT32_C(0xFF800000);
  21. const uint32_t kFloat32SignAndMantissaMask = UINT32_C(0x807FFFFF);
  22. const uint32_t kFloat32ExponentAndMantissaMask = UINT32_C(0x7FFFFFFF);
  23. const uint32_t kFloat32PositiveInfinityBits = UINT32_C(0x7F800000);
  24. const unsigned kFloat32SignBits = 1;
  25. const unsigned kFloat32ExponentBits = 8;
  26. const unsigned kFloat32MantissaBits = 23;
  27. const unsigned kFloat32BiasValue = 127;
  28. <span class="code-example-comment">// 64 bit float bits
  29. </span>const uint64_t kFloat64SignMask = UINT64_C(0x8000000000000000);
  30. const uint64_t kFloat64ExponentMask = UINT64_C(0x7FF0000000000000);
  31. const uint64_t kFloat64MantissaMask = UINT64_C(0x000FFFFFFFFFFFFF);
  32. const uint64_t kFloat64SignAndExponentMask = UINT64_C(0xFFF0000000000000);
  33. const uint64_t kFloat64SignAndMantissaMask = UINT64_C(0x800FFFFFFFFFFFFF);
  34. const uint64_t kFloat64ExponentAndMantissaMask = UINT64_C(0x7FFFFFFFFFFFFFFF);
  35. const uint64_t kFloat64PositiveInfinityBits = UINT64_C(0x7FF0000000000000);
  36. const unsigned kFloat64SignBits = 1;
  37. const unsigned kFloat64ExponentBits = 11;
  38. const unsigned kFloat64MantissaBits = 52;
  39. const unsigned kFloat64BiasValue = 1023;
  40. const float32_t kFloat32Infinity = kFloat32PositiveInfinityBits;
  41. const float64_t kFloat64Infinity = kFloat64PositiveInfinityBits;
  42. <span class="code-example-comment">// bias to integer
  43. </span>const float32_t kFToIBiasF32 = uint32_t(3) << 22;
  44. const int32_t kFToIBiasS32 = 0x4B400000; <span class="code-example-comment">// Same as ((int32_t&) kFToIBiasF32), but known to optimizer at compile time.</span>
  45. const float64_t kFToIBiasF64 = uint64_t(3) << 52;
  46. <span class="code-example-comment">// bias to 8-bit fraction
  47. </span>const float32_t kFToI8BiasF32 = uint32_t(3) << 14;
  48. const int32_t kFToI8BiasS32 = 0x47400000; <span class="code-example-comment">// Same as ((int32_t&) kFToI8BiasF32), but known to optimizer at compile time.</span>
  49. <span class="code-example-comment">// bias to 16-bit fraction
  50. </span>const float32_t kFToI16BiasF32 = uint32_t(3) << 6;
  51. const int32_t kFToI16BiasS32 = 0x43400000; <span class="code-example-comment">// Same as ((int32_t&) kFToI16BiasF32), but known to optimizer at compile time.</span>
  52. </pre>
  53. <h2>Functions</h2>
  54. <p>Float conversion functions</p>
  55. <pre class="code-example"><span class="code-example-comment">///////////////////////////////////////////////////////////////////////
  56. // Full range conversion functions
  57. //
  58. // These are good for floats within the full range of a float. Remember
  59. // that a single-precision float only has a 24-bit significand so
  60. // most integers |x| > 2^24 cannot be represented exactly.
  61. //
  62. // The result of converting an out-of-range number, infinity, or NaN
  63. // is undefined.
  64. //
  65. </span>inline uint32_t RoundToUint32(float fValue);
  66. inline int32_t RoundToInt32(float fValue);
  67. inline int32_t FloorToInt32(float fValue);
  68. inline int32_t CeilToInt32(float fValue);
  69. inline int32_t TruncateToInt32(float fValue);
  70. <span class="code-example-comment">///////////////////////////////////////////////////////////////////////
  71. // Partial range conversion functions.
  72. //
  73. // These are only good for |x| &lt;= 2^23. The result of converting an
  74. // out-of-range number, infinity, or NaN is undefined.
  75. //
  76. </span>inline int32_t FastRoundToInt23(float fValue);
  77. <span class="code-example-comment">///////////////////////////////////////////////////////////////////////
  78. // Unit-to-byte functions.
  79. //
  80. // Converts real values in the range |x| &lt;= 1 to unsigned 8-bit values
  81. // [0, 255]. The result of calling UnitFloatToUint8() with |x|>1 is
  82. // undefined.
  83. //
  84. </span>inline uint8_t UnitFloatToUint8(float fValue);
  85. inline uint8_t ClampUnitFloatToUint8(float fValue);
  86. </pre>
  87. <p>Float characterization functions</p>
  88. <pre class="code-example"><span class="code-example-comment">///////////////////////////////////////////////////////////////////////
  89. // IsInvalid
  90. //
  91. // Returns true if a value does not obey normal arithmetic rules;
  92. // specifically, x != x. In the case of Visual C++ 2003, this is true
  93. // for NaNs and indefinites, and not for normalized finite values,
  94. // denormals, and infinities. Other compilers may return different
  95. // results or even false for all values.
  96. //
  97. // IsInvalid() is useful as a fast assert check that floats are
  98. // sane and won't poison computations as NaNs can with masked
  99. // exceptions.
  100. //
  101. </span>inline bool IsInvalid(float32_t fValue);
  102. inline bool IsInvalid(float64_t fValue);
  103. <span class="code-example-comment">///////////////////////////////////////////////////////////////////////////////
  104. // IsNormal
  105. //
  106. // Returns true if the value is a normalized finite number. That is, it is neither
  107. // an infinite, nor a NaN (including indefinite NaN), nor a denormalized number.
  108. // You generally want to write math operation checking code that asserts for
  109. // IsNormal() as opposed to checking specifically for IsNaN, etc.
  110. //
  111. // Normal values are defined as any floating point value with an exponent in
  112. // the range of [1, 254], as 0 is reserved for denormalized (underflow) values
  113. // and 255 is reserved for infinite (overflow) and NaN values.
  114. //
  115. </span>inline bool IsNormal(float32_t fValue);
  116. inline bool IsNormal(float64_t fValue);
  117. <span class="code-example-comment">///////////////////////////////////////////////////////////////////////////////
  118. // IsNAN
  119. //
  120. // A NaN is a special kind of number that is neither finite nor infinite.
  121. // It is the result of doing things like the following:
  122. // float x = 1 * NaN;
  123. // float x = NaN + NaN;
  124. // float x = 0 / 0;
  125. // float x = 0 / infinite;
  126. // float x = infinite - infinite
  127. // float x = sqrt(-1);
  128. // float x = cos(infinite);
  129. // Under the VC++ debugger, x will be displayed as 1.#QNAN00 or 1.#IND00 and
  130. // the bit representation of x will be 0x7fc00001 (in the case of 1 * NaN).
  131. // The 'Q' in front of NAN stands for "quiet" and means that use of that value
  132. // in expressions won't generate exceptions. A signaling NaN (SNAN) means that
  133. // use of the value would generate exceptions.
  134. //
  135. // NaNs are frequently generated in physics simulations and similar mathematical
  136. // situations when you are simulating an object moving or turning over time but
  137. // the time or distance differential in the calculation is very small.
  138. // Also, floating point roundoff error can generate NaNs if you do things
  139. // like call acos(x) where you didn't take care to clamp x to &lt;= 1. You can
  140. // also get a NaN when memory used to store a floating point value is written
  141. // with random data.
  142. //
  143. // A curious property of NaNs is that all comparisons between NaNs return
  144. // false except the expression: NaN != NaN. This is so even if the bit
  145. // representation of the two compared NaNs are identical. Thus, with NaNs,
  146. // the following holds:
  147. // x == x is always false
  148. // x < y is always false
  149. // x > y is always false
  150. //
  151. // As a result, one simple way to test for a NaN without fiddling with bits is
  152. // to simply test for x == x. If this returns false, then you have a NaN.
  153. // Unfortunately, many C and C++ compilers don't obey this, so you are usually
  154. // stuck fiddling with bits.
  155. //
  156. // With a NaN, all exponent bits are 1 and the mantissa is not zero.
  157. // If the highest fraction bit is 1, the NAN is "quiet" -- it represents
  158. // and indeterminant operation rather than an invalid one.
  159. //</span>
  160. inline bool IsNAN(float32_t fValue);
  161. inline bool IsNAN(float64_t fValue);
  162. <span class="code-example-comment">///////////////////////////////////////////////////////////////////////////////
  163. // IsInfinite
  164. //
  165. // A value is infinity if the exponent bits are all 1 and all the bits of the
  166. // mantissa (significand) are 0. The sign bit indicates positive or negative
  167. // infinity. Thus, for Float32, 0x7f800000 is positive infinity and 0xff800000
  168. // is negative infinity.
  169. //
  170. </span>inline bool IsInfinite(float32_t fValue);
  171. inline bool IsInfinite(float64_t fValue);
  172. <span class="code-example-comment">///////////////////////////////////////////////////////////////////////////////
  173. // IsIndefinite
  174. //
  175. // An indefinite is a special kind of NaN that is used to signify that an
  176. // operation between non-NaNs generated a NaN. Other than that, it really is
  177. // simply another NaN.
  178. //
  179. </span>inline bool IsIndefinite(float32_t fValue);
  180. inline bool IsIndefinite(float64_t fValue);
  181. <span class="code-example-comment">///////////////////////////////////////////////////////////////////////////////
  182. // IsDenormalized
  183. //
  184. // Much in the same way that infinite numbers represent an overflow,
  185. // denormalized numbers represent an underflow. A denormalized number is
  186. // indicated by an exponent with a value of zero. You get a denormalized
  187. // number when you do operations such as this:
  188. // float x = 1e-10 / 1e35;
  189. // Under the VC++ debugger, x will be displayed as 1.4e-045#DEN and the
  190. // bit representation of x will be 0x00000001. Unlike infinites and NaNs,
  191. // you can still do math with denormalized numbers. However, the results
  192. // of your math will likely have a lot of imprecision. You can also get a
  193. // denormalized value when memory used to store a floating point value is
  194. // written with random data.
  195. //
  196. </span>inline bool IsDenormalized(float32_t fValue);
  197. inline bool IsDenormalized(float64_t fValue);</pre>
  198. <hr>
  199. <p>&nbsp;</p>
  200. <p>&nbsp;</p>
  201. <p>&nbsp;</p>
  202. <p>&nbsp;</p>
  203. <p>&nbsp;</p>
  204. <p>&nbsp;</p>
  205. <p>&nbsp;</p>
  206. <p>&nbsp;</p>
  207. <p>&nbsp;</p>
  208. <p>&nbsp;</p>
  209. <p>&nbsp;</p>
  210. <p>&nbsp;</p>
  211. <p>&nbsp;</p>
  212. </body></html>