floattypes.cpp 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254
  1. /**
  2. * Copyright (c) 2006-2024 LOVE Development Team
  3. *
  4. * This software is provided 'as-is', without any express or implied
  5. * warranty. In no event will the authors be held liable for any damages
  6. * arising from the use of this software.
  7. *
  8. * Permission is granted to anyone to use this software for any purpose,
  9. * including commercial applications, and to alter it and redistribute it
  10. * freely, subject to the following restrictions:
  11. *
  12. * 1. The origin of this software must not be misrepresented; you must not
  13. * claim that you wrote the original software. If you use this software
  14. * in a product, an acknowledgment in the product documentation would be
  15. * appreciated but is not required.
  16. * 2. Altered source versions must be plainly marked as such, and must not be
  17. * misrepresented as being the original software.
  18. * 3. This notice may not be removed or altered from any source distribution.
  19. **/
  20. #include "floattypes.h"
  21. #include <limits>
  22. #include <cmath>
  23. namespace love
  24. {
  25. // Code from ftp://www.fox-toolkit.org/pub/fasthalffloatconversion.pdf
  26. static bool halfInitialized = false;
  27. // tables for half -> float conversions
  28. static uint32 mantissatable[2048];
  29. static uint16 offsettable[64];
  30. static uint32 exponenttable[64];
  31. // tables for float -> half conversions
  32. static uint16 basetable[512];
  33. static uint8 shifttable[512];
  34. static uint32 convertMantissa(uint32 i)
  35. {
  36. uint32 m = i << 13; // Zero pad mantissa bits
  37. uint32 e = 0; // Zero exponent
  38. while (!(m & 0x00800000)) // While not normalized
  39. {
  40. e -= 0x00800000; // Decrement exponent (1<<23)
  41. m <<= 1; // Shift mantissa
  42. }
  43. m &= ~(0x00800000); // Clear leading 1 bit
  44. e += 0x38800000; // Adjust bias ((127-14)<<23)
  45. return m | e; // Return combined number
  46. }
  47. void float16Init()
  48. {
  49. if (halfInitialized)
  50. return;
  51. halfInitialized = true;
  52. // tables for float16 -> float32 conversions.
  53. mantissatable[0] = 0;
  54. for (uint32 i = 1; i < 1024; i++)
  55. mantissatable[i] = convertMantissa(i);
  56. for (uint32 i = 1024; i < 2048; i++)
  57. mantissatable[i] = 0x38000000 + ((i - 1024) << 13);
  58. exponenttable[0] = 0;
  59. exponenttable[32] = 0x80000000;
  60. for (uint32 i = 0; i < 31; i++)
  61. exponenttable[i] = i << 23;
  62. for (uint32 i = 33; i < 63; i++)
  63. exponenttable[i] = 0x80000000 + ((i - 32) << 23);
  64. exponenttable[31] = 0x47800000;
  65. exponenttable[63] = 0xC7800000;
  66. for (int i = 0; i < 64; i++)
  67. {
  68. if (i == 0 || i == 32)
  69. offsettable[i] = 0;
  70. else
  71. offsettable[i] = 1024;
  72. }
  73. // tables for float32 -> float16 conversions.
  74. for (uint32 i = 0; i < 256; i++)
  75. {
  76. int e = (int) i - 127;
  77. if (e < -24) // Very small numbers map to zero
  78. {
  79. basetable[i | 0x000] = 0x0000;
  80. basetable[i | 0x100] = 0x8000;
  81. shifttable[i | 0x000] = 24;
  82. shifttable[i | 0x100] = 24;
  83. }
  84. else if (e < -14) // Small numbers map to denorms
  85. {
  86. basetable[i | 0x000] = (0x0400 >> (-e - 14));
  87. basetable[i | 0x100] = (0x0400 >> (-e - 14)) | 0x8000;
  88. shifttable[i | 0x000] = -e - 1;
  89. shifttable[i | 0x100] = -e - 1;
  90. }
  91. else if (e <= 15) // Normal numbers just lose precision
  92. {
  93. basetable[i | 0x000] = ((e + 15) << 10);
  94. basetable[i | 0x100] = ((e + 15) << 10) | 0x8000;
  95. shifttable[i | 0x000] = 13;
  96. shifttable[i | 0x100] = 13;
  97. }
  98. else if (e < 128) // Large numbers map to Infinity
  99. {
  100. basetable[i | 0x000] = 0x7C00;
  101. basetable[i | 0x100] = 0xFC00;
  102. shifttable[i | 0x000] = 24;
  103. shifttable[i | 0x100] = 24;
  104. }
  105. else // Infinity and NaN's stay Infinity and NaN's
  106. {
  107. basetable[i | 0x000] = 0x7C00;
  108. basetable[i | 0x100] = 0xFC00;
  109. shifttable[i | 0x000] = 13;
  110. shifttable[i | 0x100] = 13;
  111. }
  112. }
  113. }
  114. static inline uint32 asuint32(float f)
  115. {
  116. union { float f; uint32 u; } conv;
  117. conv.f = f;
  118. return conv.u;
  119. }
  120. static inline float asfloat32(uint32 u)
  121. {
  122. union { float f; uint32 u; } conv;
  123. conv.u = u;
  124. return conv.f;
  125. }
  126. float float16to32(float16 f)
  127. {
  128. return asfloat32(mantissatable[offsettable[f >> 10] + (f & 0x3FF)] + exponenttable[f >> 10]);
  129. }
  130. float16 float32to16(float f)
  131. {
  132. uint32 u = asuint32(f);
  133. return basetable[(u >> 23) & 0x1FF] + ((u & 0x007FFFFF) >> shifttable[(u >> 23) & 0x1FF]);
  134. }
  135. // Adapted from https://stackoverflow.com/questions/41532085/how-to-pack-unpack-11-and-10-bit-floats-in-javascript-for-webgl2
  136. float float11to32(float11 f)
  137. {
  138. uint16 exponent = f >> 6;
  139. uint16 mantissa = f & 0x3F;
  140. if (exponent == 0)
  141. return mantissa == 0 ? 0 : powf(2.0f, -14.0f) * (mantissa / 64.0f);
  142. if (exponent < 31)
  143. return powf(2.0f, exponent - 15) * (1.0f + mantissa / 64.0f);
  144. return mantissa == 0 ? std::numeric_limits<float>::infinity() : std::numeric_limits<float>::quiet_NaN();
  145. }
  146. float11 float32to11(float f)
  147. {
  148. const uint16 EXPONENT_BITS = 0x1F;
  149. const uint16 EXPONENT_SHIFT = 6;
  150. const uint16 EXPONENT_BIAS = 15;
  151. const uint16 MANTISSA_BITS = 0x3F;
  152. const uint16 MANTISSA_SHIFT = (23 - EXPONENT_SHIFT);
  153. const uint16 MAX_EXPONENT = (EXPONENT_BITS << EXPONENT_SHIFT);
  154. uint32 u = asuint32(f);
  155. if (u & 0x80000000)
  156. return 0; // Negative values go to 0.
  157. // Map exponent to the range [-127,128]
  158. int32 exponent = (int32)((u >> 23) & 0xFF) - 127;
  159. uint32 mantissa = u & 0x007FFFFF;
  160. if (exponent > 15) // Infinity or NaN
  161. return MAX_EXPONENT | (exponent == 128 ? (mantissa & MANTISSA_BITS) : 0);
  162. else if (exponent <= -15)
  163. return 0;
  164. exponent += EXPONENT_BIAS;
  165. return ((uint16)exponent << EXPONENT_SHIFT) | (mantissa >> MANTISSA_SHIFT);
  166. }
  167. float float10to32(float10 f)
  168. {
  169. uint16 exponent = f >> 5;
  170. uint16 mantissa = f & 0x1F;
  171. if (exponent == 0)
  172. return mantissa == 0 ? 0 : powf(2.0f, -14.0f) * (mantissa / 32.0f);
  173. if (exponent < 31)
  174. return powf(2.0f, exponent - 15) * (1.0f + mantissa / 32.0f);
  175. return mantissa == 0 ? std::numeric_limits<float>::infinity() : std::numeric_limits<float>::quiet_NaN();
  176. }
  177. float10 float32to10(float f)
  178. {
  179. const uint16 EXPONENT_BITS = 0x1F;
  180. const uint16 EXPONENT_SHIFT = 5;
  181. const uint16 EXPONENT_BIAS = 15;
  182. const uint16 MANTISSA_BITS = 0x1F;
  183. const uint16 MANTISSA_SHIFT = (23 - EXPONENT_SHIFT);
  184. const uint16 MAX_EXPONENT = (EXPONENT_BITS << EXPONENT_SHIFT);
  185. uint32 u = asuint32(f);
  186. if (u & 0x80000000)
  187. return 0; // Negative values go to 0.
  188. // Map exponent to the range [-127,128]
  189. int32 exponent = (int32)((u >> 23) & 0xFF) - 127;
  190. uint32 mantissa = u & 0x007FFFFF;
  191. if (exponent > 15) // Infinity or NaN
  192. return MAX_EXPONENT | (exponent == 128 ? (mantissa & MANTISSA_BITS) : 0);
  193. else if (exponent <= -15)
  194. return 0;
  195. exponent += EXPONENT_BIAS;
  196. return ((uint16)exponent << EXPONENT_SHIFT) | (mantissa >> MANTISSA_SHIFT);
  197. }
  198. } // love