123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254 |
- /**
- * Copyright (c) 2006-2024 LOVE Development Team
- *
- * This software is provided 'as-is', without any express or implied
- * warranty. In no event will the authors be held liable for any damages
- * arising from the use of this software.
- *
- * Permission is granted to anyone to use this software for any purpose,
- * including commercial applications, and to alter it and redistribute it
- * freely, subject to the following restrictions:
- *
- * 1. The origin of this software must not be misrepresented; you must not
- * claim that you wrote the original software. If you use this software
- * in a product, an acknowledgment in the product documentation would be
- * appreciated but is not required.
- * 2. Altered source versions must be plainly marked as such, and must not be
- * misrepresented as being the original software.
- * 3. This notice may not be removed or altered from any source distribution.
- **/
- #include "floattypes.h"
- #include <limits>
- #include <cmath>
- namespace love
- {
- // Code from ftp://www.fox-toolkit.org/pub/fasthalffloatconversion.pdf
- static bool halfInitialized = false;
- // tables for half -> float conversions
- static uint32 mantissatable[2048];
- static uint16 offsettable[64];
- static uint32 exponenttable[64];
- // tables for float -> half conversions
- static uint16 basetable[512];
- static uint8 shifttable[512];
- static uint32 convertMantissa(uint32 i)
- {
- uint32 m = i << 13; // Zero pad mantissa bits
- uint32 e = 0; // Zero exponent
- while (!(m & 0x00800000)) // While not normalized
- {
- e -= 0x00800000; // Decrement exponent (1<<23)
- m <<= 1; // Shift mantissa
- }
- m &= ~(0x00800000); // Clear leading 1 bit
- e += 0x38800000; // Adjust bias ((127-14)<<23)
- return m | e; // Return combined number
- }
- void float16Init()
- {
- if (halfInitialized)
- return;
- halfInitialized = true;
- // tables for float16 -> float32 conversions.
- mantissatable[0] = 0;
- for (uint32 i = 1; i < 1024; i++)
- mantissatable[i] = convertMantissa(i);
- for (uint32 i = 1024; i < 2048; i++)
- mantissatable[i] = 0x38000000 + ((i - 1024) << 13);
- exponenttable[0] = 0;
- exponenttable[32] = 0x80000000;
- for (uint32 i = 0; i < 31; i++)
- exponenttable[i] = i << 23;
- for (uint32 i = 33; i < 63; i++)
- exponenttable[i] = 0x80000000 + ((i - 32) << 23);
- exponenttable[31] = 0x47800000;
- exponenttable[63] = 0xC7800000;
- for (int i = 0; i < 64; i++)
- {
- if (i == 0 || i == 32)
- offsettable[i] = 0;
- else
- offsettable[i] = 1024;
- }
- // tables for float32 -> float16 conversions.
- for (uint32 i = 0; i < 256; i++)
- {
- int e = (int) i - 127;
- if (e < -24) // Very small numbers map to zero
- {
- basetable[i | 0x000] = 0x0000;
- basetable[i | 0x100] = 0x8000;
- shifttable[i | 0x000] = 24;
- shifttable[i | 0x100] = 24;
- }
- else if (e < -14) // Small numbers map to denorms
- {
- basetable[i | 0x000] = (0x0400 >> (-e - 14));
- basetable[i | 0x100] = (0x0400 >> (-e - 14)) | 0x8000;
- shifttable[i | 0x000] = -e - 1;
- shifttable[i | 0x100] = -e - 1;
- }
- else if (e <= 15) // Normal numbers just lose precision
- {
- basetable[i | 0x000] = ((e + 15) << 10);
- basetable[i | 0x100] = ((e + 15) << 10) | 0x8000;
- shifttable[i | 0x000] = 13;
- shifttable[i | 0x100] = 13;
- }
- else if (e < 128) // Large numbers map to Infinity
- {
- basetable[i | 0x000] = 0x7C00;
- basetable[i | 0x100] = 0xFC00;
- shifttable[i | 0x000] = 24;
- shifttable[i | 0x100] = 24;
- }
- else // Infinity and NaN's stay Infinity and NaN's
- {
- basetable[i | 0x000] = 0x7C00;
- basetable[i | 0x100] = 0xFC00;
- shifttable[i | 0x000] = 13;
- shifttable[i | 0x100] = 13;
- }
- }
- }
- static inline uint32 asuint32(float f)
- {
- union { float f; uint32 u; } conv;
- conv.f = f;
- return conv.u;
- }
- static inline float asfloat32(uint32 u)
- {
- union { float f; uint32 u; } conv;
- conv.u = u;
- return conv.f;
- }
- float float16to32(float16 f)
- {
- return asfloat32(mantissatable[offsettable[f >> 10] + (f & 0x3FF)] + exponenttable[f >> 10]);
- }
- float16 float32to16(float f)
- {
- uint32 u = asuint32(f);
- return basetable[(u >> 23) & 0x1FF] + ((u & 0x007FFFFF) >> shifttable[(u >> 23) & 0x1FF]);
- }
- // Adapted from https://stackoverflow.com/questions/41532085/how-to-pack-unpack-11-and-10-bit-floats-in-javascript-for-webgl2
- float float11to32(float11 f)
- {
- uint16 exponent = f >> 6;
- uint16 mantissa = f & 0x3F;
- if (exponent == 0)
- return mantissa == 0 ? 0 : powf(2.0f, -14.0f) * (mantissa / 64.0f);
- if (exponent < 31)
- return powf(2.0f, exponent - 15) * (1.0f + mantissa / 64.0f);
- return mantissa == 0 ? std::numeric_limits<float>::infinity() : std::numeric_limits<float>::quiet_NaN();
- }
- float11 float32to11(float f)
- {
- const uint16 EXPONENT_BITS = 0x1F;
- const uint16 EXPONENT_SHIFT = 6;
- const uint16 EXPONENT_BIAS = 15;
- const uint16 MANTISSA_BITS = 0x3F;
- const uint16 MANTISSA_SHIFT = (23 - EXPONENT_SHIFT);
- const uint16 MAX_EXPONENT = (EXPONENT_BITS << EXPONENT_SHIFT);
- uint32 u = asuint32(f);
- if (u & 0x80000000)
- return 0; // Negative values go to 0.
- // Map exponent to the range [-127,128]
- int32 exponent = (int32)((u >> 23) & 0xFF) - 127;
- uint32 mantissa = u & 0x007FFFFF;
- if (exponent > 15) // Infinity or NaN
- return MAX_EXPONENT | (exponent == 128 ? (mantissa & MANTISSA_BITS) : 0);
- else if (exponent <= -15)
- return 0;
- exponent += EXPONENT_BIAS;
- return ((uint16)exponent << EXPONENT_SHIFT) | (mantissa >> MANTISSA_SHIFT);
- }
- float float10to32(float10 f)
- {
- uint16 exponent = f >> 5;
- uint16 mantissa = f & 0x1F;
- if (exponent == 0)
- return mantissa == 0 ? 0 : powf(2.0f, -14.0f) * (mantissa / 32.0f);
- if (exponent < 31)
- return powf(2.0f, exponent - 15) * (1.0f + mantissa / 32.0f);
- return mantissa == 0 ? std::numeric_limits<float>::infinity() : std::numeric_limits<float>::quiet_NaN();
- }
- float10 float32to10(float f)
- {
- const uint16 EXPONENT_BITS = 0x1F;
- const uint16 EXPONENT_SHIFT = 5;
- const uint16 EXPONENT_BIAS = 15;
- const uint16 MANTISSA_BITS = 0x1F;
- const uint16 MANTISSA_SHIFT = (23 - EXPONENT_SHIFT);
- const uint16 MAX_EXPONENT = (EXPONENT_BITS << EXPONENT_SHIFT);
- uint32 u = asuint32(f);
- if (u & 0x80000000)
- return 0; // Negative values go to 0.
- // Map exponent to the range [-127,128]
- int32 exponent = (int32)((u >> 23) & 0xFF) - 127;
- uint32 mantissa = u & 0x007FFFFF;
- if (exponent > 15) // Infinity or NaN
- return MAX_EXPONENT | (exponent == 128 ? (mantissa & MANTISSA_BITS) : 0);
- else if (exponent <= -15)
- return 0;
- exponent += EXPONENT_BIAS;
- return ((uint16)exponent << EXPONENT_SHIFT) | (mantissa >> MANTISSA_SHIFT);
- }
- } // love
|