Number.NumberToFloatingPointBits.cs 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621
  1. // Licensed to the .NET Foundation under one or more agreements.
  2. // The .NET Foundation licenses this file to you under the MIT license.
  3. // See the LICENSE file in the project root for more information.
  4. using System.Diagnostics;
  5. namespace System
  6. {
  7. internal unsafe partial class Number
  8. {
  9. public readonly struct FloatingPointInfo
  10. {
  11. public static readonly FloatingPointInfo Double = new FloatingPointInfo(
  12. denormalMantissaBits: 52,
  13. exponentBits: 11,
  14. maxBinaryExponent: 1023,
  15. exponentBias: 1023,
  16. infinityBits: 0x7FF00000_00000000
  17. );
  18. public static readonly FloatingPointInfo Single = new FloatingPointInfo(
  19. denormalMantissaBits: 23,
  20. exponentBits: 8,
  21. maxBinaryExponent: 127,
  22. exponentBias: 127,
  23. infinityBits: 0x7F800000
  24. );
  25. public ulong ZeroBits { get; }
  26. public ulong InfinityBits { get; }
  27. public ulong NormalMantissaMask { get; }
  28. public ulong DenormalMantissaMask { get; }
  29. public int MinBinaryExponent { get; }
  30. public int MaxBinaryExponent { get; }
  31. public int ExponentBias { get; }
  32. public int OverflowDecimalExponent { get; }
  33. public ushort NormalMantissaBits { get; }
  34. public ushort DenormalMantissaBits { get; }
  35. public ushort ExponentBits { get; }
  36. public FloatingPointInfo(ushort denormalMantissaBits, ushort exponentBits, int maxBinaryExponent, int exponentBias, ulong infinityBits)
  37. {
  38. ExponentBits = exponentBits;
  39. DenormalMantissaBits = denormalMantissaBits;
  40. NormalMantissaBits = (ushort)(denormalMantissaBits + 1); // we get an extra (hidden) bit for normal mantissas
  41. OverflowDecimalExponent = (maxBinaryExponent + 2 * NormalMantissaBits) / 3;
  42. ExponentBias = exponentBias;
  43. MaxBinaryExponent = maxBinaryExponent;
  44. MinBinaryExponent = 1 - maxBinaryExponent;
  45. DenormalMantissaMask = (1UL << denormalMantissaBits) - 1;
  46. NormalMantissaMask = (1UL << NormalMantissaBits) - 1;
  47. InfinityBits = infinityBits;
  48. ZeroBits = 0;
  49. }
  50. }
  51. private static float[] s_Pow10SingleTable = new float[]
  52. {
  53. 1e0f, // 10^0
  54. 1e1f, // 10^1
  55. 1e2f, // 10^2
  56. 1e3f, // 10^3
  57. 1e4f, // 10^4
  58. 1e5f, // 10^5
  59. 1e6f, // 10^6
  60. 1e7f, // 10^7
  61. 1e8f, // 10^8
  62. 1e9f, // 10^9
  63. 1e10f, // 10^10
  64. };
  65. private static double[] s_Pow10DoubleTable = new double[]
  66. {
  67. 1e0, // 10^0
  68. 1e1, // 10^1
  69. 1e2, // 10^2
  70. 1e3, // 10^3
  71. 1e4, // 10^4
  72. 1e5, // 10^5
  73. 1e6, // 10^6
  74. 1e7, // 10^7
  75. 1e8, // 10^8
  76. 1e9, // 10^9
  77. 1e10, // 10^10
  78. 1e11, // 10^11
  79. 1e12, // 10^12
  80. 1e13, // 10^13
  81. 1e14, // 10^14
  82. 1e15, // 10^15
  83. 1e16, // 10^16
  84. 1e17, // 10^17
  85. 1e18, // 10^18
  86. 1e19, // 10^19
  87. 1e20, // 10^20
  88. 1e21, // 10^21
  89. 1e22, // 10^22
  90. };
  91. private static void AccumulateDecimalDigitsIntoBigInteger(ref NumberBuffer number, uint firstIndex, uint lastIndex, out BigInteger result)
  92. {
  93. result = new BigInteger(0);
  94. byte* src = number.GetDigitsPointer() + firstIndex;
  95. uint remaining = lastIndex - firstIndex;
  96. while (remaining != 0)
  97. {
  98. uint count = Math.Min(remaining, 9);
  99. uint value = DigitsToUInt32(src, (int)(count));
  100. result.MultiplyPow10(count);
  101. result.Add(value);
  102. src += count;
  103. remaining -= count;
  104. }
  105. }
  106. private static ulong AssembleFloatingPointBits(in FloatingPointInfo info, ulong initialMantissa, int initialExponent, bool hasZeroTail)
  107. {
  108. // number of bits by which we must adjust the mantissa to shift it into the
  109. // correct position, and compute the resulting base two exponent for the
  110. // normalized mantissa:
  111. uint initialMantissaBits = BigInteger.CountSignificantBits(initialMantissa);
  112. int normalMantissaShift = info.NormalMantissaBits - (int)(initialMantissaBits);
  113. int normalExponent = initialExponent - normalMantissaShift;
  114. ulong mantissa = initialMantissa;
  115. int exponent = normalExponent;
  116. if (normalExponent > info.MaxBinaryExponent)
  117. {
  118. // The exponent is too large to be represented by the floating point
  119. // type; report the overflow condition:
  120. return info.InfinityBits;
  121. }
  122. else if (normalExponent < info.MinBinaryExponent)
  123. {
  124. // The exponent is too small to be represented by the floating point
  125. // type as a normal value, but it may be representable as a denormal
  126. // value. Compute the number of bits by which we need to shift the
  127. // mantissa in order to form a denormal number. (The subtraction of
  128. // an extra 1 is to account for the hidden bit of the mantissa that
  129. // is not available for use when representing a denormal.)
  130. int denormalMantissaShift = normalMantissaShift + normalExponent + info.ExponentBias - 1;
  131. // Denormal values have an exponent of zero, so the debiased exponent is
  132. // the negation of the exponent bias:
  133. exponent = -info.ExponentBias;
  134. if (denormalMantissaShift < 0)
  135. {
  136. // Use two steps for right shifts: for a shift of N bits, we first
  137. // shift by N-1 bits, then shift the last bit and use its value to
  138. // round the mantissa.
  139. mantissa = RightShiftWithRounding(mantissa, -denormalMantissaShift, hasZeroTail);
  140. // If the mantissa is now zero, we have underflowed:
  141. if (mantissa == 0)
  142. {
  143. return info.ZeroBits;
  144. }
  145. // When we round the mantissa, the result may be so large that the
  146. // number becomes a normal value. For example, consider the single
  147. // precision case where the mantissa is 0x01ffffff and a right shift
  148. // of 2 is required to shift the value into position. We perform the
  149. // shift in two steps: we shift by one bit, then we shift again and
  150. // round using the dropped bit. The initial shift yields 0x00ffffff.
  151. // The rounding shift then yields 0x007fffff and because the least
  152. // significant bit was 1, we add 1 to this number to round it. The
  153. // final result is 0x00800000.
  154. //
  155. // 0x00800000 is 24 bits, which is more than the 23 bits available
  156. // in the mantissa. Thus, we have rounded our denormal number into
  157. // a normal number.
  158. //
  159. // We detect this case here and re-adjust the mantissa and exponent
  160. // appropriately, to form a normal number:
  161. if (mantissa > info.DenormalMantissaMask)
  162. {
  163. // We add one to the denormal_mantissa_shift to account for the
  164. // hidden mantissa bit (we subtracted one to account for this bit
  165. // when we computed the denormal_mantissa_shift above).
  166. exponent = initialExponent - (denormalMantissaShift + 1) - normalMantissaShift;
  167. }
  168. }
  169. else
  170. {
  171. mantissa <<= denormalMantissaShift;
  172. }
  173. }
  174. else
  175. {
  176. if (normalMantissaShift < 0)
  177. {
  178. // Use two steps for right shifts: for a shift of N bits, we first
  179. // shift by N-1 bits, then shift the last bit and use its value to
  180. // round the mantissa.
  181. mantissa = RightShiftWithRounding(mantissa, -normalMantissaShift, hasZeroTail);
  182. // When we round the mantissa, it may produce a result that is too
  183. // large. In this case, we divide the mantissa by two and increment
  184. // the exponent (this does not change the value).
  185. if (mantissa > info.NormalMantissaMask)
  186. {
  187. mantissa >>= 1;
  188. exponent++;
  189. // The increment of the exponent may have generated a value too
  190. // large to be represented. In this case, report the overflow:
  191. if (exponent > info.MaxBinaryExponent)
  192. {
  193. return info.InfinityBits;
  194. }
  195. }
  196. }
  197. else if (normalMantissaShift > 0)
  198. {
  199. mantissa <<= normalMantissaShift;
  200. }
  201. }
  202. // Unset the hidden bit in the mantissa and assemble the floating point value
  203. // from the computed components:
  204. mantissa &= info.DenormalMantissaMask;
  205. Debug.Assert((info.DenormalMantissaMask & (1UL << info.DenormalMantissaBits)) == 0);
  206. ulong shiftedExponent = ((ulong)(exponent + info.ExponentBias)) << info.DenormalMantissaBits;
  207. Debug.Assert((shiftedExponent & info.DenormalMantissaMask) == 0);
  208. Debug.Assert((mantissa & ~info.DenormalMantissaMask) == 0);
  209. Debug.Assert((shiftedExponent & ~(((1UL << info.ExponentBits) - 1) << info.DenormalMantissaBits)) == 0); // exponent fits in its place
  210. return (shiftedExponent | mantissa);
  211. }
  212. private static ulong ConvertBigIntegerToFloatingPointBits(ref BigInteger value, in FloatingPointInfo info, uint integerBitsOfPrecision, bool hasNonZeroFractionalPart)
  213. {
  214. int baseExponent = info.DenormalMantissaBits;
  215. // When we have 64-bits or less of precision, we can just get the mantissa directly
  216. if (integerBitsOfPrecision <= 64)
  217. {
  218. return AssembleFloatingPointBits(in info, value.ToUInt64(), baseExponent, !hasNonZeroFractionalPart);
  219. }
  220. uint topBlockIndex = Math.DivRem(integerBitsOfPrecision, 32, out uint topBlockBits);
  221. uint middleBlockIndex = topBlockIndex - 1;
  222. uint bottomBlockIndex = middleBlockIndex - 1;
  223. ulong mantissa = 0;
  224. int exponent = baseExponent + ((int)(bottomBlockIndex) * 32);
  225. bool hasZeroTail = !hasNonZeroFractionalPart;
  226. // When the top 64-bits perfectly span two blocks, we can get those blocks directly
  227. if (topBlockBits == 0)
  228. {
  229. mantissa = ((ulong)(value.GetBlock(middleBlockIndex)) << 32) + value.GetBlock(bottomBlockIndex);
  230. }
  231. else
  232. {
  233. // Otherwise, we need to read three blocks and combine them into a 64-bit mantissa
  234. int bottomBlockShift = (int)(topBlockBits);
  235. int topBlockShift = 64 - bottomBlockShift;
  236. int middleBlockShift = topBlockShift - 32;
  237. exponent += (int)(topBlockBits);
  238. uint bottomBlock = value.GetBlock(bottomBlockIndex);
  239. uint bottomBits = bottomBlock >> bottomBlockShift;
  240. ulong middleBits = (ulong)(value.GetBlock(middleBlockIndex)) << middleBlockShift;
  241. ulong topBits = (ulong)(value.GetBlock(topBlockIndex)) << topBlockShift;
  242. mantissa = topBits + middleBits + bottomBits;
  243. uint unusedBottomBlockBitsMask = (1u << (int)(topBlockBits)) - 1;
  244. hasZeroTail &= (bottomBlock & unusedBottomBlockBitsMask) == 0;
  245. }
  246. for (uint i = 0; i != bottomBlockIndex; i++)
  247. {
  248. hasZeroTail &= (value.GetBlock(i) == 0);
  249. }
  250. return AssembleFloatingPointBits(in info, mantissa, exponent, hasZeroTail);
  251. }
  252. // get 32-bit integer from at most 9 digits
  253. private static uint DigitsToUInt32(byte* p, int count)
  254. {
  255. Debug.Assert((1 <= count) && (count <= 9));
  256. byte* end = (p + count);
  257. uint res = (uint)(p[0] - '0');
  258. for (p++; p < end; p++)
  259. {
  260. res = (10 * res) + p[0] - '0';
  261. }
  262. return res;
  263. }
  264. // get 64-bit integer from at most 19 digits
  265. private static ulong DigitsToUInt64(byte* p, int count)
  266. {
  267. Debug.Assert((1 <= count) && (count <= 19));
  268. byte* end = (p + count);
  269. ulong res = (ulong)(p[0] - '0');
  270. for (p++; p < end; p++)
  271. {
  272. res = (10 * res) + p[0] - '0';
  273. }
  274. return res;
  275. }
  276. private static ulong NumberToFloatingPointBits(ref NumberBuffer number, in FloatingPointInfo info)
  277. {
  278. Debug.Assert(number.GetDigitsPointer()[0] != '0');
  279. Debug.Assert(number.Scale <= FloatingPointMaxExponent);
  280. Debug.Assert(number.Scale >= FloatingPointMinExponent);
  281. Debug.Assert(number.DigitsCount != 0);
  282. // The input is of the form 0.Mantissa x 10^Exponent, where 'Mantissa' are
  283. // the decimal digits of the mantissa and 'Exponent' is the decimal exponent.
  284. // We decompose the mantissa into two parts: an integer part and a fractional
  285. // part. If the exponent is positive, then the integer part consists of the
  286. // first 'exponent' digits, or all present digits if there are fewer digits.
  287. // If the exponent is zero or negative, then the integer part is empty. In
  288. // either case, the remaining digits form the fractional part of the mantissa.
  289. uint totalDigits = (uint)(number.DigitsCount);
  290. uint positiveExponent = (uint)(Math.Max(0, number.Scale));
  291. uint integerDigitsPresent = Math.Min(positiveExponent, totalDigits);
  292. uint fractionalDigitsPresent = totalDigits - integerDigitsPresent;
  293. uint fastExponent = (uint)(Math.Abs(number.Scale - integerDigitsPresent - fractionalDigitsPresent));
  294. // When the number of significant digits is less than or equal to 15 and the
  295. // scale is less than or equal to 22, we can take some shortcuts and just rely
  296. // on floating-point arithmetic to compute the correct result. This is
  297. // because each floating-point precision values allows us to exactly represent
  298. // different whole integers and certain powers of 10, depending on the underlying
  299. // formats exact range. Additionally, IEEE operations dictate that the result is
  300. // computed to the infinitely precise result and then rounded, which means that
  301. // we can rely on it to produce the correct result when both inputs are exact.
  302. byte* src = number.GetDigitsPointer();
  303. if ((info.DenormalMantissaBits == 23) && (totalDigits <= 7) && (fastExponent <= 10))
  304. {
  305. // It is only valid to do this optimization for single-precision floating-point
  306. // values since we can lose some of the mantissa bits and would return the
  307. // wrong value when upcasting to double.
  308. float result = DigitsToUInt32(src, (int)(totalDigits));
  309. float scale = s_Pow10SingleTable[fastExponent];
  310. if (fractionalDigitsPresent != 0)
  311. {
  312. result /= scale;
  313. }
  314. else
  315. {
  316. result *= scale;
  317. }
  318. return (uint)(BitConverter.SingleToInt32Bits(result));
  319. }
  320. if ((totalDigits <= 15) && (fastExponent <= 22))
  321. {
  322. double result = DigitsToUInt64(src, (int)(totalDigits));
  323. double scale = s_Pow10DoubleTable[fastExponent];
  324. if (fractionalDigitsPresent != 0)
  325. {
  326. result /= scale;
  327. }
  328. else
  329. {
  330. result *= scale;
  331. }
  332. if (info.DenormalMantissaBits == 52)
  333. {
  334. return (ulong)(BitConverter.DoubleToInt64Bits(result));
  335. }
  336. else
  337. {
  338. Debug.Assert(info.DenormalMantissaBits == 23);
  339. return (uint)(BitConverter.SingleToInt32Bits((float)(result)));
  340. }
  341. }
  342. return NumberToFloatingPointBitsSlow(ref number, in info, positiveExponent, integerDigitsPresent, fractionalDigitsPresent);
  343. }
  344. private static ulong NumberToFloatingPointBitsSlow(ref NumberBuffer number, in FloatingPointInfo info, uint positiveExponent, uint integerDigitsPresent, uint fractionalDigitsPresent)
  345. {
  346. // To generate an N bit mantissa we require N + 1 bits of precision. The
  347. // extra bit is used to correctly round the mantissa (if there are fewer bits
  348. // than this available, then that's totally okay; in that case we use what we
  349. // have and we don't need to round).
  350. uint requiredBitsOfPrecision = (uint)(info.NormalMantissaBits + 1);
  351. uint totalDigits = (uint)(number.DigitsCount);
  352. uint integerDigitsMissing = positiveExponent - integerDigitsPresent;
  353. uint integerFirstIndex = 0;
  354. uint integerLastIndex = integerDigitsPresent;
  355. uint fractionalFirstIndex = integerLastIndex;
  356. uint fractionalLastIndex = totalDigits;
  357. // First, we accumulate the integer part of the mantissa into a big_integer:
  358. AccumulateDecimalDigitsIntoBigInteger(ref number, integerFirstIndex, integerLastIndex, out BigInteger integerValue);
  359. if (integerDigitsMissing > 0)
  360. {
  361. if (integerDigitsMissing > info.OverflowDecimalExponent)
  362. {
  363. return info.InfinityBits;
  364. }
  365. integerValue.MultiplyPow10(integerDigitsMissing);
  366. }
  367. // At this point, the integer_value contains the value of the integer part
  368. // of the mantissa. If either [1] this number has more than the required
  369. // number of bits of precision or [2] the mantissa has no fractional part,
  370. // then we can assemble the result immediately:
  371. uint integerBitsOfPrecision = BigInteger.CountSignificantBits(ref integerValue);
  372. if ((integerBitsOfPrecision >= requiredBitsOfPrecision) || (fractionalDigitsPresent == 0))
  373. {
  374. return ConvertBigIntegerToFloatingPointBits(
  375. ref integerValue,
  376. in info,
  377. integerBitsOfPrecision,
  378. fractionalDigitsPresent != 0
  379. );
  380. }
  381. // Otherwise, we did not get enough bits of precision from the integer part,
  382. // and the mantissa has a fractional part. We parse the fractional part of
  383. // the mantissa to obtain more bits of precision. To do this, we convert
  384. // the fractional part into an actual fraction N/M, where the numerator N is
  385. // computed from the digits of the fractional part, and the denominator M is
  386. // computed as the power of 10 such that N/M is equal to the value of the
  387. // fractional part of the mantissa.
  388. uint fractionalDenominatorExponent = fractionalDigitsPresent;
  389. if (number.Scale < 0)
  390. {
  391. fractionalDenominatorExponent += (uint)(-number.Scale);
  392. }
  393. if ((integerBitsOfPrecision == 0) && (fractionalDenominatorExponent - (int)(totalDigits)) > info.OverflowDecimalExponent)
  394. {
  395. // If there were any digits in the integer part, it is impossible to
  396. // underflow (because the exponent cannot possibly be small enough),
  397. // so if we underflow here it is a true underflow and we return zero.
  398. return info.ZeroBits;
  399. }
  400. AccumulateDecimalDigitsIntoBigInteger(ref number, fractionalFirstIndex, fractionalLastIndex, out BigInteger fractionalNumerator);
  401. Debug.Assert(!fractionalNumerator.IsZero());
  402. BigInteger.Pow10(fractionalDenominatorExponent, out BigInteger fractionalDenominator);
  403. // Because we are using only the fractional part of the mantissa here, the
  404. // numerator is guaranteed to be smaller than the denominator. We normalize
  405. // the fraction such that the most significant bit of the numerator is in
  406. // the same position as the most significant bit in the denominator. This
  407. // ensures that when we later shift the numerator N bits to the left, we
  408. // will produce N bits of precision.
  409. uint fractionalNumeratorBits = BigInteger.CountSignificantBits(ref fractionalNumerator);
  410. uint fractionalDenominatorBits = BigInteger.CountSignificantBits(ref fractionalDenominator);
  411. uint fractionalShift = 0;
  412. if (fractionalDenominatorBits > fractionalNumeratorBits)
  413. {
  414. fractionalShift = fractionalDenominatorBits - fractionalNumeratorBits;
  415. }
  416. if (fractionalShift > 0)
  417. {
  418. fractionalNumerator.ShiftLeft(fractionalShift);
  419. }
  420. uint requiredFractionalBitsOfPrecision = requiredBitsOfPrecision - integerBitsOfPrecision;
  421. uint remainingBitsOfPrecisionRequired = requiredFractionalBitsOfPrecision;
  422. if (integerBitsOfPrecision > 0)
  423. {
  424. // If the fractional part of the mantissa provides no bits of precision
  425. // and cannot affect rounding, we can just take whatever bits we got from
  426. // the integer part of the mantissa. This is the case for numbers like
  427. // 5.0000000000000000000001, where the significant digits of the fractional
  428. // part start so far to the right that they do not affect the floating
  429. // point representation.
  430. //
  431. // If the fractional shift is exactly equal to the number of bits of
  432. // precision that we require, then no fractional bits will be part of the
  433. // result, but the result may affect rounding. This is e.g. the case for
  434. // large, odd integers with a fractional part greater than or equal to .5.
  435. // Thus, we need to do the division to correctly round the result.
  436. if (fractionalShift > remainingBitsOfPrecisionRequired)
  437. {
  438. return ConvertBigIntegerToFloatingPointBits(
  439. ref integerValue,
  440. in info,
  441. integerBitsOfPrecision,
  442. fractionalDigitsPresent != 0
  443. );
  444. }
  445. remainingBitsOfPrecisionRequired -= fractionalShift;
  446. }
  447. // If there was no integer part of the mantissa, we will need to compute the
  448. // exponent from the fractional part. The fractional exponent is the power
  449. // of two by which we must multiply the fractional part to move it into the
  450. // range [1.0, 2.0). This will either be the same as the shift we computed
  451. // earlier, or one greater than that shift:
  452. uint fractionalExponent = fractionalShift;
  453. if (BigInteger.Compare(ref fractionalNumerator, ref fractionalDenominator) < 0)
  454. {
  455. fractionalExponent++;
  456. }
  457. fractionalNumerator.ShiftLeft(remainingBitsOfPrecisionRequired);
  458. BigInteger.DivRem(ref fractionalNumerator, ref fractionalDenominator, out BigInteger bigFractionalMantissa, out BigInteger fractionalRemainder);
  459. ulong fractionalMantissa = bigFractionalMantissa.ToUInt64();
  460. bool hasZeroTail = !number.HasNonZeroTail && fractionalRemainder.IsZero();
  461. // We may have produced more bits of precision than were required. Check,
  462. // and remove any "extra" bits:
  463. uint fractionalMantissaBits = BigInteger.CountSignificantBits(fractionalMantissa);
  464. if (fractionalMantissaBits > requiredFractionalBitsOfPrecision)
  465. {
  466. int shift = (int)(fractionalMantissaBits - requiredFractionalBitsOfPrecision);
  467. hasZeroTail = hasZeroTail && (fractionalMantissa & ((1UL << shift) - 1)) == 0;
  468. fractionalMantissa >>= shift;
  469. }
  470. // Compose the mantissa from the integer and fractional parts:
  471. ulong integerMantissa = integerValue.ToUInt64();
  472. ulong completeMantissa = (integerMantissa << (int)(requiredFractionalBitsOfPrecision)) + fractionalMantissa;
  473. // Compute the final exponent:
  474. // * If the mantissa had an integer part, then the exponent is one less than
  475. // the number of bits we obtained from the integer part. (It's one less
  476. // because we are converting to the form 1.11111, with one 1 to the left
  477. // of the decimal point.)
  478. // * If the mantissa had no integer part, then the exponent is the fractional
  479. // exponent that we computed.
  480. // Then, in both cases, we subtract an additional one from the exponent, to
  481. // account for the fact that we've generated an extra bit of precision, for
  482. // use in rounding.
  483. int finalExponent = (integerBitsOfPrecision > 0) ? (int)(integerBitsOfPrecision) - 2 : -(int)(fractionalExponent) - 1;
  484. return AssembleFloatingPointBits(in info, completeMantissa, finalExponent, hasZeroTail);
  485. }
  486. private static ulong RightShiftWithRounding(ulong value, int shift, bool hasZeroTail)
  487. {
  488. // If we'd need to shift further than it is possible to shift, the answer
  489. // is always zero:
  490. if (shift >= 64)
  491. {
  492. return 0;
  493. }
  494. ulong extraBitsMask = (1UL << (shift - 1)) - 1;
  495. ulong roundBitMask = (1UL << (shift - 1));
  496. ulong lsbBitMask = 1UL << shift;
  497. bool lsbBit = (value & lsbBitMask) != 0;
  498. bool roundBit = (value & roundBitMask) != 0;
  499. bool hasTailBits = !hasZeroTail || (value & extraBitsMask) != 0;
  500. return (value >> shift) + (ShouldRoundUp(lsbBit, roundBit, hasTailBits) ? 1UL : 0);
  501. }
  502. private static bool ShouldRoundUp(bool lsbBit, bool roundBit, bool hasTailBits)
  503. {
  504. // If there are insignificant set bits, we need to round to the
  505. // nearest; there are two cases:
  506. // we round up if either [1] the value is slightly greater than the midpoint
  507. // between two exactly representable values or [2] the value is exactly the
  508. // midpoint between two exactly representable values and the greater of the
  509. // two is even (this is "round-to-even").
  510. return roundBit && (hasTailBits || lsbBit);
  511. }
  512. }
  513. }