hex_float.h 42 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174
  1. // Copyright (c) 2015-2016 The Khronos Group Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. #ifndef LIBSPIRV_UTIL_HEX_FLOAT_H_
  15. #define LIBSPIRV_UTIL_HEX_FLOAT_H_
  16. #include <cassert>
  17. #include <cctype>
  18. #include <cmath>
  19. #include <cstdint>
  20. #include <iomanip>
  21. #include <limits>
  22. #include <sstream>
  23. #include "bitutils.h"
  24. namespace spvutils {
  25. class Float16 {
  26. public:
  27. Float16(uint16_t v) : val(v) {}
  28. Float16() {}
  29. static bool isNan(const Float16& val) {
  30. return ((val.val & 0x7C00) == 0x7C00) && ((val.val & 0x3FF) != 0);
  31. }
  32. // Returns true if the given value is any kind of infinity.
  33. static bool isInfinity(const Float16& val) {
  34. return ((val.val & 0x7C00) == 0x7C00) && ((val.val & 0x3FF) == 0);
  35. }
  36. Float16(const Float16& other) { val = other.val; }
  37. uint16_t get_value() const { return val; }
  38. // Returns the maximum normal value.
  39. static Float16 max() { return Float16(0x7bff); }
  40. // Returns the lowest normal value.
  41. static Float16 lowest() { return Float16(0xfbff); }
  42. private:
  43. uint16_t val;
  44. };
  45. class FloatE5M2 {
  46. public:
  47. FloatE5M2(uint8_t v) : val(v) {}
  48. FloatE5M2() {}
  49. static bool isNan(const FloatE5M2& val) {
  50. return ((val.val & 0x7C) == 0x7C) && ((val.val & 0x3) != 0);
  51. }
  52. // Returns true if the given value is any kind of infinity.
  53. static bool isInfinity(const FloatE5M2& val) {
  54. return ((val.val & 0x7C) == 0x7C) && ((val.val & 0x3) == 0);
  55. }
  56. FloatE5M2(const FloatE5M2& other) { val = other.val; }
  57. uint8_t get_value() const { return val; }
  58. // Returns the maximum normal value.
  59. static FloatE5M2 max() { return FloatE5M2(0x7B); }
  60. // Returns the lowest normal value.
  61. static FloatE5M2 lowest() { return FloatE5M2(0xFB); }
  62. private:
  63. uint8_t val;
  64. };
  65. class FloatE4M3 {
  66. public:
  67. FloatE4M3(uint8_t v) : val(v) {}
  68. FloatE4M3() {}
  69. static bool isNan(const FloatE4M3& val) {
  70. return (val.val & 0x7F) == 0x7F;
  71. }
  72. // Returns true if the given value is any kind of infinity.
  73. static bool isInfinity(const FloatE4M3&) {
  74. return false;
  75. }
  76. FloatE4M3(const FloatE4M3& other) { val = other.val; }
  77. uint8_t get_value() const { return val; }
  78. // Returns the maximum normal value.
  79. static FloatE4M3 max() { return FloatE4M3(0x7E); }
  80. // Returns the lowest normal value.
  81. static FloatE4M3 lowest() { return FloatE4M3(0xFE); }
  82. private:
  83. uint8_t val;
  84. };
  85. // To specialize this type, you must override uint_type to define
  86. // an unsigned integer that can fit your floating point type.
  87. // You must also add a isNan function that returns true if
  88. // a value is Nan.
  89. template <typename T>
  90. struct FloatProxyTraits {
  91. typedef void uint_type;
  92. };
  93. template <>
  94. struct FloatProxyTraits<float> {
  95. typedef uint32_t uint_type;
  96. static bool isNan(float f) { return std::isnan(f); }
  97. // Returns true if the given value is any kind of infinity.
  98. static bool isInfinity(float f) { return std::isinf(f); }
  99. // Returns the maximum normal value.
  100. static float max() { return std::numeric_limits<float>::max(); }
  101. // Returns the lowest normal value.
  102. static float lowest() { return std::numeric_limits<float>::lowest(); }
  103. };
  104. template <>
  105. struct FloatProxyTraits<double> {
  106. typedef uint64_t uint_type;
  107. static bool isNan(double f) { return std::isnan(f); }
  108. // Returns true if the given value is any kind of infinity.
  109. static bool isInfinity(double f) { return std::isinf(f); }
  110. // Returns the maximum normal value.
  111. static double max() { return std::numeric_limits<double>::max(); }
  112. // Returns the lowest normal value.
  113. static double lowest() { return std::numeric_limits<double>::lowest(); }
  114. };
  115. template <>
  116. struct FloatProxyTraits<Float16> {
  117. typedef uint16_t uint_type;
  118. static bool isNan(Float16 f) { return Float16::isNan(f); }
  119. // Returns true if the given value is any kind of infinity.
  120. static bool isInfinity(Float16 f) { return Float16::isInfinity(f); }
  121. // Returns the maximum normal value.
  122. static Float16 max() { return Float16::max(); }
  123. // Returns the lowest normal value.
  124. static Float16 lowest() { return Float16::lowest(); }
  125. };
  126. template <>
  127. struct FloatProxyTraits<FloatE5M2> {
  128. typedef uint8_t uint_type;
  129. static bool isNan(FloatE5M2 f) { return FloatE5M2::isNan(f); }
  130. // Returns true if the given value is any kind of infinity.
  131. static bool isInfinity(FloatE5M2 f) { return FloatE5M2::isInfinity(f); }
  132. // Returns the maximum normal value.
  133. static FloatE5M2 max() { return FloatE5M2::max(); }
  134. // Returns the lowest normal value.
  135. static FloatE5M2 lowest() { return FloatE5M2::lowest(); }
  136. };
  137. template <>
  138. struct FloatProxyTraits<FloatE4M3> {
  139. typedef uint8_t uint_type;
  140. static bool isNan(FloatE4M3 f) { return FloatE4M3::isNan(f); }
  141. // Returns true if the given value is any kind of infinity.
  142. static bool isInfinity(FloatE4M3 f) { return FloatE4M3::isInfinity(f); }
  143. // Returns the maximum normal value.
  144. static FloatE4M3 max() { return FloatE4M3::max(); }
  145. // Returns the lowest normal value.
  146. static FloatE4M3 lowest() { return FloatE4M3::lowest(); }
  147. };
  148. // Since copying a floating point number (especially if it is NaN)
  149. // does not guarantee that bits are preserved, this class lets us
  150. // store the type and use it as a float when necessary.
  151. template <typename T>
  152. class FloatProxy {
  153. public:
  154. typedef typename FloatProxyTraits<T>::uint_type uint_type;
  155. // Since this is to act similar to the normal floats,
  156. // do not initialize the data by default.
  157. FloatProxy() {}
  158. // Intentionally non-explicit. This is a proxy type so
  159. // implicit conversions allow us to use it more transparently.
  160. FloatProxy(T val) { data_ = BitwiseCast<uint_type>(val); }
  161. // Intentionally non-explicit. This is a proxy type so
  162. // implicit conversions allow us to use it more transparently.
  163. FloatProxy(uint_type val) { data_ = val; }
  164. // This is helpful to have and is guaranteed not to stomp bits.
  165. FloatProxy<T> operator-() const {
  166. return static_cast<uint_type>(data_ ^
  167. (uint_type(0x1) << (sizeof(T) * 8 - 1)));
  168. }
  169. // Returns the data as a floating point value.
  170. T getAsFloat() const { return BitwiseCast<T>(data_); }
  171. // Returns the raw data.
  172. uint_type data() const { return data_; }
  173. // Returns true if the value represents any type of NaN.
  174. bool isNan() { return FloatProxyTraits<T>::isNan(getAsFloat()); }
  175. // Returns true if the value represents any type of infinity.
  176. bool isInfinity() { return FloatProxyTraits<T>::isInfinity(getAsFloat()); }
  177. // Returns the maximum normal value.
  178. static FloatProxy<T> max() {
  179. return FloatProxy<T>(FloatProxyTraits<T>::max());
  180. }
  181. // Returns the lowest normal value.
  182. static FloatProxy<T> lowest() {
  183. return FloatProxy<T>(FloatProxyTraits<T>::lowest());
  184. }
  185. private:
  186. uint_type data_;
  187. };
  188. template <typename T>
  189. bool operator==(const FloatProxy<T>& first, const FloatProxy<T>& second) {
  190. return first.data() == second.data();
  191. }
  192. // Reads a FloatProxy value as a normal float from a stream.
  193. template <typename T>
  194. std::istream& operator>>(std::istream& is, FloatProxy<T>& value) {
  195. T float_val;
  196. is >> float_val;
  197. value = FloatProxy<T>(float_val);
  198. return is;
  199. }
  200. // This is an example traits. It is not meant to be used in practice, but will
  201. // be the default for any non-specialized type.
  202. template <typename T>
  203. struct HexFloatTraits {
  204. // Integer type that can store this hex-float.
  205. typedef void uint_type;
  206. // Signed integer type that can store this hex-float.
  207. typedef void int_type;
  208. // The numerical type that this HexFloat represents.
  209. typedef void underlying_type;
  210. // The type needed to construct the underlying type.
  211. typedef void native_type;
  212. // The number of bits that are actually relevant in the uint_type.
  213. // This allows us to deal with, for example, 24-bit values in a 32-bit
  214. // integer.
  215. static const uint32_t num_used_bits = 0;
  216. // Number of bits that represent the exponent.
  217. static const uint32_t num_exponent_bits = 0;
  218. // Number of bits that represent the fractional part.
  219. static const uint32_t num_fraction_bits = 0;
  220. // The bias of the exponent. (How much we need to subtract from the stored
  221. // value to get the correct value.)
  222. static const uint32_t exponent_bias = 0;
  223. static bool supportsInfinity() { return true; }
  224. };
  225. // Traits for IEEE float.
  226. // 1 sign bit, 8 exponent bits, 23 fractional bits.
  227. template <>
  228. struct HexFloatTraits<FloatProxy<float>> {
  229. typedef uint32_t uint_type;
  230. typedef int32_t int_type;
  231. typedef FloatProxy<float> underlying_type;
  232. typedef float native_type;
  233. static const uint_type num_used_bits = 32;
  234. static const uint_type num_exponent_bits = 8;
  235. static const uint_type num_fraction_bits = 23;
  236. static const uint_type exponent_bias = 127;
  237. static bool supportsInfinity() { return true; }
  238. };
  239. // Traits for IEEE double.
  240. // 1 sign bit, 11 exponent bits, 52 fractional bits.
  241. template <>
  242. struct HexFloatTraits<FloatProxy<double>> {
  243. typedef uint64_t uint_type;
  244. typedef int64_t int_type;
  245. typedef FloatProxy<double> underlying_type;
  246. typedef double native_type;
  247. static const uint_type num_used_bits = 64;
  248. static const uint_type num_exponent_bits = 11;
  249. static const uint_type num_fraction_bits = 52;
  250. static const uint_type exponent_bias = 1023;
  251. static bool supportsInfinity() { return true; }
  252. };
  253. // Traits for IEEE half.
  254. // 1 sign bit, 5 exponent bits, 10 fractional bits.
  255. template <>
  256. struct HexFloatTraits<FloatProxy<Float16>> {
  257. typedef uint16_t uint_type;
  258. typedef int16_t int_type;
  259. typedef uint16_t underlying_type;
  260. typedef uint16_t native_type;
  261. static const uint_type num_used_bits = 16;
  262. static const uint_type num_exponent_bits = 5;
  263. static const uint_type num_fraction_bits = 10;
  264. static const uint_type exponent_bias = 15;
  265. static bool supportsInfinity() { return true; }
  266. };
  267. template <>
  268. struct HexFloatTraits<FloatProxy<FloatE5M2>> {
  269. typedef uint8_t uint_type;
  270. typedef int8_t int_type;
  271. typedef uint8_t underlying_type;
  272. typedef uint8_t native_type;
  273. static const uint_type num_used_bits = 8;
  274. static const uint_type num_exponent_bits = 5;
  275. static const uint_type num_fraction_bits = 2;
  276. static const uint_type exponent_bias = 15;
  277. static bool supportsInfinity() { return true; }
  278. };
  279. template <>
  280. struct HexFloatTraits<FloatProxy<FloatE4M3>> {
  281. typedef uint8_t uint_type;
  282. typedef int8_t int_type;
  283. typedef uint8_t underlying_type;
  284. typedef uint8_t native_type;
  285. static const uint_type num_used_bits = 8;
  286. static const uint_type num_exponent_bits = 4;
  287. static const uint_type num_fraction_bits = 3;
  288. static const uint_type exponent_bias = 7;
  289. static bool supportsInfinity() { return false; }
  290. };
  291. enum round_direction {
  292. kRoundToZero,
  293. kRoundToNearestEven,
  294. kRoundToPositiveInfinity,
  295. kRoundToNegativeInfinity
  296. };
  297. // Template class that houses a floating pointer number.
  298. // It exposes a number of constants based on the provided traits to
  299. // assist in interpreting the bits of the value.
  300. template <typename T, typename Traits = HexFloatTraits<T>>
  301. class HexFloat {
  302. public:
  303. typedef typename Traits::uint_type uint_type;
  304. typedef typename Traits::int_type int_type;
  305. typedef typename Traits::underlying_type underlying_type;
  306. typedef typename Traits::native_type native_type;
  307. using Traits_T = Traits;
  308. explicit HexFloat(T f) : value_(f) {}
  309. T value() const { return value_; }
  310. void set_value(T f) { value_ = f; }
  311. // These are all written like this because it is convenient to have
  312. // compile-time constants for all of these values.
  313. // Pass-through values to save typing.
  314. static const uint32_t num_used_bits = Traits::num_used_bits;
  315. static const uint32_t exponent_bias = Traits::exponent_bias;
  316. static const uint32_t num_exponent_bits = Traits::num_exponent_bits;
  317. static const uint32_t num_fraction_bits = Traits::num_fraction_bits;
  318. // Number of bits to shift left to set the highest relevant bit.
  319. static const uint32_t top_bit_left_shift = num_used_bits - 1;
  320. // How many nibbles (hex characters) the fractional part takes up.
  321. static const uint32_t fraction_nibbles = (num_fraction_bits + 3) / 4;
  322. // If the fractional part does not fit evenly into a hex character (4-bits)
  323. // then we have to left-shift to get rid of leading 0s. This is the amount
  324. // we have to shift (might be 0).
  325. static const uint32_t num_overflow_bits =
  326. fraction_nibbles * 4 - num_fraction_bits;
  327. // The representation of the fraction, not the actual bits. This
  328. // includes the leading bit that is usually implicit.
  329. static const uint_type fraction_represent_mask =
  330. spvutils::SetBits<uint_type, 0,
  331. num_fraction_bits + num_overflow_bits>::get;
  332. // The topmost bit in the nibble-aligned fraction.
  333. static const uint_type fraction_top_bit =
  334. uint_type(1) << (num_fraction_bits + num_overflow_bits - 1);
  335. // The least significant bit in the exponent, which is also the bit
  336. // immediately to the left of the significand.
  337. static const uint_type first_exponent_bit = uint_type(1)
  338. << (num_fraction_bits);
  339. // The mask for the encoded fraction. It does not include the
  340. // implicit bit.
  341. static const uint_type fraction_encode_mask =
  342. spvutils::SetBits<uint_type, 0, num_fraction_bits>::get;
  343. // The bit that is used as a sign.
  344. static const uint_type sign_mask = uint_type(1) << top_bit_left_shift;
  345. // The bits that represent the exponent.
  346. static const uint_type exponent_mask =
  347. spvutils::SetBits<uint_type, num_fraction_bits, num_exponent_bits>::get;
  348. // How far left the exponent is shifted.
  349. static const uint32_t exponent_left_shift = num_fraction_bits;
  350. // How far from the right edge the fraction is shifted.
  351. static const uint32_t fraction_right_shift =
  352. static_cast<uint32_t>(sizeof(uint_type) * 8) - num_fraction_bits;
  353. // The maximum representable unbiased exponent.
  354. static const int_type max_exponent =
  355. (exponent_mask >> num_fraction_bits) - exponent_bias;
  356. // The minimum representable exponent for normalized numbers.
  357. static const int_type min_exponent = -static_cast<int_type>(exponent_bias);
  358. // Returns the bits associated with the value.
  359. uint_type getBits() const { return spvutils::BitwiseCast<uint_type>(value_); }
  360. // Returns the bits associated with the value, without the leading sign bit.
  361. uint_type getUnsignedBits() const {
  362. return static_cast<uint_type>(spvutils::BitwiseCast<uint_type>(value_) &
  363. ~sign_mask);
  364. }
  365. // Returns the bits associated with the exponent, shifted to start at the
  366. // lsb of the type.
  367. const uint_type getExponentBits() const {
  368. return static_cast<uint_type>((getBits() & exponent_mask) >>
  369. num_fraction_bits);
  370. }
  371. // Returns the exponent in unbiased form. This is the exponent in the
  372. // human-friendly form.
  373. const int_type getUnbiasedExponent() const {
  374. return static_cast<int_type>(getExponentBits() - exponent_bias);
  375. }
  376. // Returns just the significand bits from the value.
  377. const uint_type getSignificandBits() const {
  378. return getBits() & fraction_encode_mask;
  379. }
  380. // If the number was normalized, returns the unbiased exponent.
  381. // If the number was denormal, normalize the exponent first.
  382. const int_type getUnbiasedNormalizedExponent() const {
  383. if ((getBits() & ~sign_mask) == 0) { // special case if everything is 0
  384. return 0;
  385. }
  386. int_type exp = getUnbiasedExponent();
  387. if (exp == min_exponent) { // We are in denorm land.
  388. uint_type significand_bits = getSignificandBits();
  389. while ((significand_bits & (first_exponent_bit >> 1)) == 0) {
  390. significand_bits = static_cast<uint_type>(significand_bits << 1);
  391. exp = static_cast<int_type>(exp - 1);
  392. }
  393. significand_bits &= fraction_encode_mask;
  394. }
  395. return exp;
  396. }
  397. // Returns the signficand after it has been normalized.
  398. const uint_type getNormalizedSignificand() const {
  399. int_type unbiased_exponent = getUnbiasedNormalizedExponent();
  400. uint_type significand = getSignificandBits();
  401. for (int_type i = unbiased_exponent; i <= min_exponent; ++i) {
  402. significand = static_cast<uint_type>(significand << 1);
  403. }
  404. significand &= fraction_encode_mask;
  405. return significand;
  406. }
  407. // Returns true if this number represents a negative value.
  408. bool isNegative() const { return (getBits() & sign_mask) != 0; }
  409. // Sets this HexFloat from the individual components.
  410. // Note this assumes EVERY significand is normalized, and has an implicit
  411. // leading one. This means that the only way that this method will set 0,
  412. // is if you set a number so denormalized that it underflows.
  413. // Do not use this method with raw bits extracted from a subnormal number,
  414. // since subnormals do not have an implicit leading 1 in the significand.
  415. // The significand is also expected to be in the
  416. // lowest-most num_fraction_bits of the uint_type.
  417. // The exponent is expected to be unbiased, meaning an exponent of
  418. // 0 actually means 0.
  419. // If underflow_round_up is set, then on underflow, if a number is non-0
  420. // and would underflow, we round up to the smallest denorm.
  421. void setFromSignUnbiasedExponentAndNormalizedSignificand(
  422. bool negative, int_type exponent, uint_type significand,
  423. bool round_denorm_up) {
  424. bool significand_is_zero = significand == 0;
  425. if (exponent <= min_exponent) {
  426. // If this was denormalized, then we have to shift the bit on, meaning
  427. // the significand is not zero.
  428. significand_is_zero = false;
  429. significand |= first_exponent_bit;
  430. significand = static_cast<uint_type>(significand >> 1);
  431. }
  432. while (exponent < min_exponent) {
  433. significand = static_cast<uint_type>(significand >> 1);
  434. ++exponent;
  435. }
  436. if (exponent == min_exponent) {
  437. if (significand == 0 && !significand_is_zero && round_denorm_up) {
  438. significand = static_cast<uint_type>(0x1);
  439. }
  440. }
  441. uint_type new_value = 0;
  442. if (negative) {
  443. new_value = static_cast<uint_type>(new_value | sign_mask);
  444. }
  445. exponent = static_cast<int_type>(exponent + exponent_bias);
  446. assert(exponent >= 0);
  447. // put it all together
  448. exponent = static_cast<uint_type>((exponent << exponent_left_shift) &
  449. exponent_mask);
  450. significand = static_cast<uint_type>(significand & fraction_encode_mask);
  451. new_value = static_cast<uint_type>(new_value | (exponent | significand));
  452. value_ = BitwiseCast<T>(new_value);
  453. }
  454. // Increments the significand of this number by the given amount.
  455. // If this would spill the significand into the implicit bit,
  456. // carry is set to true and the significand is shifted to fit into
  457. // the correct location, otherwise carry is set to false.
  458. // All significands and to_increment are assumed to be within the bounds
  459. // for a valid significand.
  460. static uint_type incrementSignificand(uint_type significand,
  461. uint_type to_increment, bool* carry) {
  462. significand = static_cast<uint_type>(significand + to_increment);
  463. *carry = false;
  464. if (significand & first_exponent_bit) {
  465. *carry = true;
  466. // The implicit 1-bit will have carried, so we should zero-out the
  467. // top bit and shift back.
  468. significand = static_cast<uint_type>(significand & ~first_exponent_bit);
  469. significand = static_cast<uint_type>(significand >> 1);
  470. }
  471. return significand;
  472. }
  473. // These exist because MSVC throws warnings on negative right-shifts
  474. // even if they are not going to be executed. Eg:
  475. // constant_number < 0? 0: constant_number
  476. // These convert the negative left-shifts into right shifts.
  477. template <typename int_type>
  478. uint_type negatable_left_shift(int_type N, uint_type val)
  479. {
  480. if(N >= 0)
  481. return val << N;
  482. return val >> -N;
  483. }
  484. template <typename int_type>
  485. uint_type negatable_right_shift(int_type N, uint_type val)
  486. {
  487. if(N >= 0)
  488. return val >> N;
  489. return val << -N;
  490. }
  491. // Returns the significand, rounded to fit in a significand in
  492. // other_T. This is shifted so that the most significant
  493. // bit of the rounded number lines up with the most significant bit
  494. // of the returned significand.
  495. template <typename other_T>
  496. typename other_T::uint_type getRoundedNormalizedSignificand(
  497. round_direction dir, bool* carry_bit) {
  498. typedef typename other_T::uint_type other_uint_type;
  499. static const int_type num_throwaway_bits =
  500. static_cast<int_type>(num_fraction_bits) -
  501. static_cast<int_type>(other_T::num_fraction_bits);
  502. static const uint_type last_significant_bit =
  503. (num_throwaway_bits < 0)
  504. ? 0
  505. : negatable_left_shift(num_throwaway_bits, 1u);
  506. static const uint_type first_rounded_bit =
  507. (num_throwaway_bits < 1)
  508. ? 0
  509. : negatable_left_shift(num_throwaway_bits - 1, 1u);
  510. static const uint_type throwaway_mask_bits =
  511. num_throwaway_bits > 0 ? num_throwaway_bits : 0;
  512. static const uint_type throwaway_mask =
  513. spvutils::SetBits<uint_type, 0, throwaway_mask_bits>::get;
  514. *carry_bit = false;
  515. other_uint_type out_val = 0;
  516. uint_type significand = getNormalizedSignificand();
  517. // If we are up-casting, then we just have to shift to the right location.
  518. if (num_throwaway_bits <= 0) {
  519. out_val = static_cast<other_uint_type>(significand);
  520. uint_type shift_amount = static_cast<uint_type>(-num_throwaway_bits);
  521. out_val = static_cast<other_uint_type>(out_val << shift_amount);
  522. return out_val;
  523. }
  524. // If every non-representable bit is 0, then we don't have any casting to
  525. // do.
  526. if ((significand & throwaway_mask) == 0) {
  527. return static_cast<other_uint_type>(
  528. negatable_right_shift(num_throwaway_bits, significand));
  529. }
  530. bool round_away_from_zero = false;
  531. // We actually have to narrow the significand here, so we have to follow the
  532. // rounding rules.
  533. switch (dir) {
  534. case kRoundToZero:
  535. break;
  536. case kRoundToPositiveInfinity:
  537. round_away_from_zero = !isNegative();
  538. break;
  539. case kRoundToNegativeInfinity:
  540. round_away_from_zero = isNegative();
  541. break;
  542. case kRoundToNearestEven:
  543. // Have to round down, round bit is 0
  544. if ((first_rounded_bit & significand) == 0) {
  545. break;
  546. }
  547. if (((significand & throwaway_mask) & ~first_rounded_bit) != 0) {
  548. // If any subsequent bit of the rounded portion is non-0 then we round
  549. // up.
  550. round_away_from_zero = true;
  551. break;
  552. }
  553. // We are exactly half-way between 2 numbers, pick even.
  554. if ((significand & last_significant_bit) != 0) {
  555. // 1 for our last bit, round up.
  556. round_away_from_zero = true;
  557. break;
  558. }
  559. break;
  560. }
  561. if (round_away_from_zero) {
  562. return static_cast<other_uint_type>(
  563. negatable_right_shift(num_throwaway_bits, incrementSignificand(
  564. significand, last_significant_bit, carry_bit)));
  565. } else {
  566. return static_cast<other_uint_type>(
  567. negatable_right_shift(num_throwaway_bits, significand));
  568. }
  569. }
  570. // Casts this value to another HexFloat. If the cast is widening,
  571. // then round_dir is ignored. If the cast is narrowing, then
  572. // the result is rounded in the direction specified.
  573. // This number will retain Nan and Inf values.
  574. // It will also saturate to Inf if the number overflows, and
  575. // underflow to (0 or min depending on rounding) if the number underflows.
  576. template <typename other_T>
  577. void castTo(other_T& other, round_direction round_dir) {
  578. other = other_T(static_cast<typename other_T::native_type>(0));
  579. bool negate = isNegative();
  580. if (getUnsignedBits() == 0) {
  581. if (negate) {
  582. other.set_value(-other.value());
  583. }
  584. return;
  585. }
  586. uint_type significand = getSignificandBits();
  587. bool carried = false;
  588. typename other_T::uint_type rounded_significand =
  589. getRoundedNormalizedSignificand<other_T>(round_dir, &carried);
  590. int_type exponent = getUnbiasedExponent();
  591. if (exponent == min_exponent) {
  592. // If we are denormal, normalize the exponent, so that we can encode
  593. // easily.
  594. exponent = static_cast<int_type>(exponent + 1);
  595. for (uint_type check_bit = first_exponent_bit >> 1; check_bit != 0;
  596. check_bit = static_cast<uint_type>(check_bit >> 1)) {
  597. exponent = static_cast<int_type>(exponent - 1);
  598. if (check_bit & significand) break;
  599. }
  600. }
  601. bool is_nan =
  602. (getBits() & exponent_mask) == exponent_mask && significand != 0;
  603. bool is_inf =
  604. !is_nan &&
  605. (((exponent + carried) > static_cast<int_type>(other_T::exponent_bias) && other_T::Traits_T::supportsInfinity()) ||
  606. ((exponent + carried) > static_cast<int_type>(other_T::exponent_bias + 1) && !other_T::Traits_T::supportsInfinity()) ||
  607. (significand == 0 && (getBits() & exponent_mask) == exponent_mask));
  608. // If we are Nan or Inf we should pass that through.
  609. if (is_inf) {
  610. if (other_T::Traits_T::supportsInfinity()) {
  611. // encode as +/-inf
  612. other.set_value(BitwiseCast<typename other_T::underlying_type>(
  613. static_cast<typename other_T::uint_type>(
  614. (negate ? other_T::sign_mask : 0) | other_T::exponent_mask)));
  615. } else {
  616. // encode as +/-nan
  617. other.set_value(BitwiseCast<typename other_T::underlying_type>(
  618. static_cast<typename other_T::uint_type>(negate ? ~0 : ~other_T::sign_mask)));
  619. }
  620. return;
  621. }
  622. if (is_nan) {
  623. typename other_T::uint_type shifted_significand;
  624. shifted_significand = static_cast<typename other_T::uint_type>(
  625. negatable_left_shift(
  626. static_cast<int_type>(other_T::num_fraction_bits) -
  627. static_cast<int_type>(num_fraction_bits), significand));
  628. // We are some sort of Nan. We try to keep the bit-pattern of the Nan
  629. // as close as possible. If we had to shift off bits so we are 0, then we
  630. // just set the last bit.
  631. other.set_value(BitwiseCast<typename other_T::underlying_type>(
  632. static_cast<typename other_T::uint_type>(
  633. (negate ? other_T::sign_mask : 0) | other_T::exponent_mask |
  634. (shifted_significand == 0 ? 0x1 : shifted_significand))));
  635. return;
  636. }
  637. bool round_underflow_up =
  638. isNegative() ? round_dir == kRoundToNegativeInfinity
  639. : round_dir == kRoundToPositiveInfinity;
  640. typedef typename other_T::int_type other_int_type;
  641. // setFromSignUnbiasedExponentAndNormalizedSignificand will
  642. // zero out any underflowing value (but retain the sign).
  643. other.setFromSignUnbiasedExponentAndNormalizedSignificand(
  644. negate, static_cast<other_int_type>(exponent), rounded_significand,
  645. round_underflow_up);
  646. return;
  647. }
  648. private:
  649. T value_;
  650. static_assert(num_used_bits ==
  651. Traits::num_exponent_bits + Traits::num_fraction_bits + 1,
  652. "The number of bits do not fit");
  653. static_assert(sizeof(T) == sizeof(uint_type), "The type sizes do not match");
  654. };
  655. // Returns 4 bits represented by the hex character.
  656. inline uint8_t get_nibble_from_character(int character) {
  657. const char* dec = "0123456789";
  658. const char* lower = "abcdef";
  659. const char* upper = "ABCDEF";
  660. const char* p = nullptr;
  661. if ((p = strchr(dec, character))) {
  662. return static_cast<uint8_t>(p - dec);
  663. } else if ((p = strchr(lower, character))) {
  664. return static_cast<uint8_t>(p - lower + 0xa);
  665. } else if ((p = strchr(upper, character))) {
  666. return static_cast<uint8_t>(p - upper + 0xa);
  667. }
  668. assert(false && "This was called with a non-hex character");
  669. return 0;
  670. }
  671. // Outputs the given HexFloat to the stream.
  672. template <typename T, typename Traits>
  673. std::ostream& operator<<(std::ostream& os, const HexFloat<T, Traits>& value) {
  674. typedef HexFloat<T, Traits> HF;
  675. typedef typename HF::uint_type uint_type;
  676. typedef typename HF::int_type int_type;
  677. static_assert(HF::num_used_bits != 0,
  678. "num_used_bits must be non-zero for a valid float");
  679. static_assert(HF::num_exponent_bits != 0,
  680. "num_exponent_bits must be non-zero for a valid float");
  681. static_assert(HF::num_fraction_bits != 0,
  682. "num_fractin_bits must be non-zero for a valid float");
  683. const uint_type bits = spvutils::BitwiseCast<uint_type>(value.value());
  684. const char* const sign = (bits & HF::sign_mask) ? "-" : "";
  685. const uint_type exponent = static_cast<uint_type>(
  686. (bits & HF::exponent_mask) >> HF::num_fraction_bits);
  687. uint_type fraction = static_cast<uint_type>((bits & HF::fraction_encode_mask)
  688. << HF::num_overflow_bits);
  689. const bool is_zero = exponent == 0 && fraction == 0;
  690. const bool is_denorm = exponent == 0 && !is_zero;
  691. // exponent contains the biased exponent we have to convert it back into
  692. // the normal range.
  693. int_type int_exponent = static_cast<int_type>(exponent - HF::exponent_bias);
  694. // If the number is all zeros, then we actually have to NOT shift the
  695. // exponent.
  696. int_exponent = is_zero ? 0 : int_exponent;
  697. // If we are denorm, then start shifting, and decreasing the exponent until
  698. // our leading bit is 1.
  699. if (is_denorm) {
  700. while ((fraction & HF::fraction_top_bit) == 0) {
  701. fraction = static_cast<uint_type>(fraction << 1);
  702. int_exponent = static_cast<int_type>(int_exponent - 1);
  703. }
  704. // Since this is denormalized, we have to consume the leading 1 since it
  705. // will end up being implicit.
  706. fraction = static_cast<uint_type>(fraction << 1); // eat the leading 1
  707. fraction &= HF::fraction_represent_mask;
  708. }
  709. uint_type fraction_nibbles = HF::fraction_nibbles;
  710. // We do not have to display any trailing 0s, since this represents the
  711. // fractional part.
  712. while (fraction_nibbles > 0 && (fraction & 0xF) == 0) {
  713. // Shift off any trailing values;
  714. fraction = static_cast<uint_type>(fraction >> 4);
  715. --fraction_nibbles;
  716. }
  717. const auto saved_flags = os.flags();
  718. const auto saved_fill = os.fill();
  719. os << sign << "0x" << (is_zero ? '0' : '1');
  720. if (fraction_nibbles) {
  721. // Make sure to keep the leading 0s in place, since this is the fractional
  722. // part.
  723. os << "." << std::setw(static_cast<int>(fraction_nibbles))
  724. << std::setfill('0') << std::hex << fraction;
  725. }
  726. os << "p" << std::dec << (int_exponent >= 0 ? "+" : "") << int_exponent;
  727. os.flags(saved_flags);
  728. os.fill(saved_fill);
  729. return os;
  730. }
  731. // Returns true if negate_value is true and the next character on the
  732. // input stream is a plus or minus sign. In that case we also set the fail bit
  733. // on the stream and set the value to the zero value for its type.
  734. template <typename T, typename Traits>
  735. inline bool RejectParseDueToLeadingSign(std::istream& is, bool negate_value,
  736. HexFloat<T, Traits>& value) {
  737. if (negate_value) {
  738. auto next_char = is.peek();
  739. if (next_char == '-' || next_char == '+') {
  740. // Fail the parse. Emulate standard behaviour by setting the value to
  741. // the zero value, and set the fail bit on the stream.
  742. value = HexFloat<T, Traits>(typename HexFloat<T, Traits>::uint_type(0));
  743. is.setstate(std::ios_base::failbit);
  744. return true;
  745. }
  746. }
  747. return false;
  748. }
  749. // Parses a floating point number from the given stream and stores it into the
  750. // value parameter.
  751. // If negate_value is true then the number may not have a leading minus or
  752. // plus, and if it successfully parses, then the number is negated before
  753. // being stored into the value parameter.
  754. // If the value cannot be correctly parsed or overflows the target floating
  755. // point type, then set the fail bit on the stream.
  756. // TODO(dneto): Promise C++11 standard behavior in how the value is set in
  757. // the error case, but only after all target platforms implement it correctly.
  758. // In particular, the Microsoft C++ runtime appears to be out of spec.
  759. template <typename T, typename Traits>
  760. inline std::istream& ParseNormalFloat(std::istream& is, bool negate_value,
  761. HexFloat<T, Traits>& value) {
  762. if (RejectParseDueToLeadingSign(is, negate_value, value)) {
  763. return is;
  764. }
  765. T val;
  766. is >> val;
  767. if (negate_value) {
  768. val = -val;
  769. }
  770. value.set_value(val);
  771. // In the failure case, map -0.0 to 0.0.
  772. if (is.fail() && value.getUnsignedBits() == 0u) {
  773. value = HexFloat<T, Traits>(typename HexFloat<T, Traits>::uint_type(0));
  774. }
  775. if (val.isInfinity()) {
  776. // Fail the parse. Emulate standard behaviour by setting the value to
  777. // the closest normal value, and set the fail bit on the stream.
  778. value.set_value((value.isNegative() || negate_value) ? T::lowest()
  779. : T::max());
  780. is.setstate(std::ios_base::failbit);
  781. }
  782. return is;
  783. }
  784. // Specialization of ParseNormalFloat for FloatProxy<Float16> values.
  785. // This will parse the float as it were a 32-bit floating point number,
  786. // and then round it down to fit into a Float16 value.
  787. // The number is rounded towards zero.
  788. // If negate_value is true then the number may not have a leading minus or
  789. // plus, and if it successfully parses, then the number is negated before
  790. // being stored into the value parameter.
  791. // If the value cannot be correctly parsed or overflows the target floating
  792. // point type, then set the fail bit on the stream.
  793. // TODO(dneto): Promise C++11 standard behavior in how the value is set in
  794. // the error case, but only after all target platforms implement it correctly.
  795. // In particular, the Microsoft C++ runtime appears to be out of spec.
  796. template <>
  797. inline std::istream&
  798. ParseNormalFloat<FloatProxy<Float16>, HexFloatTraits<FloatProxy<Float16>>>(
  799. std::istream& is, bool negate_value,
  800. HexFloat<FloatProxy<Float16>, HexFloatTraits<FloatProxy<Float16>>>& value) {
  801. // First parse as a 32-bit float.
  802. HexFloat<FloatProxy<float>> float_val(0.0f);
  803. ParseNormalFloat(is, negate_value, float_val);
  804. // Then convert to 16-bit float, saturating at infinities, and
  805. // rounding toward zero.
  806. float_val.castTo(value, kRoundToZero);
  807. // Overflow on 16-bit behaves the same as for 32- and 64-bit: set the
  808. // fail bit and set the lowest or highest value.
  809. if (Float16::isInfinity(value.value().getAsFloat())) {
  810. value.set_value(value.isNegative() ? Float16::lowest() : Float16::max());
  811. is.setstate(std::ios_base::failbit);
  812. }
  813. return is;
  814. }
  815. // Reads a HexFloat from the given stream.
  816. // If the float is not encoded as a hex-float then it will be parsed
  817. // as a regular float.
  818. // This may fail if your stream does not support at least one unget.
  819. // Nan values can be encoded with "0x1.<not zero>p+exponent_bias".
  820. // This would normally overflow a float and round to
  821. // infinity but this special pattern is the exact representation for a NaN,
  822. // and therefore is actually encoded as the correct NaN. To encode inf,
  823. // either 0x0p+exponent_bias can be specified or any exponent greater than
  824. // exponent_bias.
  825. // Examples using IEEE 32-bit float encoding.
  826. // 0x1.0p+128 (+inf)
  827. // -0x1.0p-128 (-inf)
  828. //
  829. // 0x1.1p+128 (+Nan)
  830. // -0x1.1p+128 (-Nan)
  831. //
  832. // 0x1p+129 (+inf)
  833. // -0x1p+129 (-inf)
  834. template <typename T, typename Traits>
  835. std::istream& operator>>(std::istream& is, HexFloat<T, Traits>& value) {
  836. using HF = HexFloat<T, Traits>;
  837. using uint_type = typename HF::uint_type;
  838. using int_type = typename HF::int_type;
  839. value.set_value(static_cast<typename HF::native_type>(0.f));
  840. if (is.flags() & std::ios::skipws) {
  841. // If the user wants to skip whitespace , then we should obey that.
  842. while (std::isspace(is.peek())) {
  843. is.get();
  844. }
  845. }
  846. auto next_char = is.peek();
  847. bool negate_value = false;
  848. if (next_char != '-' && next_char != '0') {
  849. return ParseNormalFloat(is, negate_value, value);
  850. }
  851. if (next_char == '-') {
  852. negate_value = true;
  853. is.get();
  854. next_char = is.peek();
  855. }
  856. if (next_char == '0') {
  857. is.get(); // We may have to unget this.
  858. auto maybe_hex_start = is.peek();
  859. if (maybe_hex_start != 'x' && maybe_hex_start != 'X') {
  860. is.unget();
  861. return ParseNormalFloat(is, negate_value, value);
  862. } else {
  863. is.get(); // Throw away the 'x';
  864. }
  865. } else {
  866. return ParseNormalFloat(is, negate_value, value);
  867. }
  868. // This "looks" like a hex-float so treat it as one.
  869. bool seen_p = false;
  870. bool seen_dot = false;
  871. uint_type fraction_index = 0;
  872. uint_type fraction = 0;
  873. int_type exponent = HF::exponent_bias;
  874. // Strip off leading zeros so we don't have to special-case them later.
  875. while ((next_char = is.peek()) == '0') {
  876. is.get();
  877. }
  878. bool is_denorm =
  879. true; // Assume denorm "representation" until we hear otherwise.
  880. // NB: This does not mean the value is actually denorm,
  881. // it just means that it was written 0.
  882. bool bits_written = false; // Stays false until we write a bit.
  883. while (!seen_p && !seen_dot) {
  884. // Handle characters that are left of the fractional part.
  885. if (next_char == '.') {
  886. seen_dot = true;
  887. } else if (next_char == 'p') {
  888. seen_p = true;
  889. } else if (::isxdigit(next_char)) {
  890. // We know this is not denormalized since we have stripped all leading
  891. // zeroes and we are not a ".".
  892. is_denorm = false;
  893. int number = get_nibble_from_character(next_char);
  894. for (int i = 0; i < 4; ++i, number <<= 1) {
  895. uint_type write_bit = (number & 0x8) ? 0x1 : 0x0;
  896. if (bits_written) {
  897. // If we are here the bits represented belong in the fractional
  898. // part of the float, and we have to adjust the exponent accordingly.
  899. fraction = static_cast<uint_type>(
  900. fraction |
  901. static_cast<uint_type>(
  902. write_bit << (HF::top_bit_left_shift - fraction_index++)));
  903. exponent = static_cast<int_type>(exponent + 1);
  904. }
  905. bits_written |= write_bit != 0;
  906. }
  907. } else {
  908. // We have not found our exponent yet, so we have to fail.
  909. is.setstate(std::ios::failbit);
  910. return is;
  911. }
  912. is.get();
  913. next_char = is.peek();
  914. }
  915. bits_written = false;
  916. while (seen_dot && !seen_p) {
  917. // Handle only fractional parts now.
  918. if (next_char == 'p') {
  919. seen_p = true;
  920. } else if (::isxdigit(next_char)) {
  921. int number = get_nibble_from_character(next_char);
  922. for (int i = 0; i < 4; ++i, number <<= 1) {
  923. uint_type write_bit = (number & 0x8) ? 0x01 : 0x00;
  924. bits_written |= write_bit != 0;
  925. if (is_denorm && !bits_written) {
  926. // Handle modifying the exponent here this way we can handle
  927. // an arbitrary number of hex values without overflowing our
  928. // integer.
  929. exponent = static_cast<int_type>(exponent - 1);
  930. } else {
  931. fraction = static_cast<uint_type>(
  932. fraction |
  933. static_cast<uint_type>(
  934. write_bit << (HF::top_bit_left_shift - fraction_index++)));
  935. }
  936. }
  937. } else {
  938. // We still have not found our 'p' exponent yet, so this is not a valid
  939. // hex-float.
  940. is.setstate(std::ios::failbit);
  941. return is;
  942. }
  943. is.get();
  944. next_char = is.peek();
  945. }
  946. bool seen_sign = false;
  947. int8_t exponent_sign = 1;
  948. int_type written_exponent = 0;
  949. while (true) {
  950. if ((next_char == '-' || next_char == '+')) {
  951. if (seen_sign) {
  952. is.setstate(std::ios::failbit);
  953. return is;
  954. }
  955. seen_sign = true;
  956. exponent_sign = (next_char == '-') ? -1 : 1;
  957. } else if (::isdigit(next_char)) {
  958. // Hex-floats express their exponent as decimal.
  959. written_exponent = static_cast<int_type>(written_exponent * 10);
  960. written_exponent =
  961. static_cast<int_type>(written_exponent + (next_char - '0'));
  962. } else {
  963. break;
  964. }
  965. is.get();
  966. next_char = is.peek();
  967. }
  968. written_exponent = static_cast<int_type>(written_exponent * exponent_sign);
  969. exponent = static_cast<int_type>(exponent + written_exponent);
  970. bool is_zero = is_denorm && (fraction == 0);
  971. if (is_denorm && !is_zero) {
  972. fraction = static_cast<uint_type>(fraction << 1);
  973. exponent = static_cast<int_type>(exponent - 1);
  974. } else if (is_zero) {
  975. exponent = 0;
  976. }
  977. if (exponent <= 0 && !is_zero) {
  978. fraction = static_cast<uint_type>(fraction >> 1);
  979. fraction |= static_cast<uint_type>(1) << HF::top_bit_left_shift;
  980. }
  981. fraction = (fraction >> HF::fraction_right_shift) & HF::fraction_encode_mask;
  982. const int_type max_exponent =
  983. SetBits<uint_type, 0, HF::num_exponent_bits>::get;
  984. // Handle actual denorm numbers
  985. while (exponent < 0 && !is_zero) {
  986. fraction = static_cast<uint_type>(fraction >> 1);
  987. exponent = static_cast<int_type>(exponent + 1);
  988. fraction &= HF::fraction_encode_mask;
  989. if (fraction == 0) {
  990. // We have underflowed our fraction. We should clamp to zero.
  991. is_zero = true;
  992. exponent = 0;
  993. }
  994. }
  995. // We have overflowed so we should be inf/-inf.
  996. if (exponent > max_exponent) {
  997. exponent = max_exponent;
  998. fraction = 0;
  999. }
  1000. uint_type output_bits = static_cast<uint_type>(
  1001. static_cast<uint_type>(negate_value ? 1 : 0) << HF::top_bit_left_shift);
  1002. output_bits |= fraction;
  1003. uint_type shifted_exponent = static_cast<uint_type>(
  1004. static_cast<uint_type>(exponent << HF::exponent_left_shift) &
  1005. HF::exponent_mask);
  1006. output_bits |= shifted_exponent;
  1007. T output_float = spvutils::BitwiseCast<T>(output_bits);
  1008. value.set_value(output_float);
  1009. return is;
  1010. }
  1011. // Writes a FloatProxy value to a stream.
  1012. // Zero and normal numbers are printed in the usual notation, but with
  1013. // enough digits to fully reproduce the value. Other values (subnormal,
  1014. // NaN, and infinity) are printed as a hex float.
  1015. template <typename T>
  1016. std::ostream& operator<<(std::ostream& os, const FloatProxy<T>& value) {
  1017. auto float_val = value.getAsFloat();
  1018. switch (std::fpclassify(float_val)) {
  1019. case FP_ZERO:
  1020. case FP_NORMAL: {
  1021. auto saved_precision = os.precision();
  1022. os.precision(std::numeric_limits<T>::digits10);
  1023. os << float_val;
  1024. os.precision(saved_precision);
  1025. } break;
  1026. default:
  1027. os << HexFloat<FloatProxy<T>>(value);
  1028. break;
  1029. }
  1030. return os;
  1031. }
  1032. template <>
  1033. inline std::ostream& operator<<<Float16>(std::ostream& os,
  1034. const FloatProxy<Float16>& value) {
  1035. os << HexFloat<FloatProxy<Float16>>(value);
  1036. return os;
  1037. }
  1038. }
  1039. #endif // LIBSPIRV_UTIL_HEX_FLOAT_H_