Number.NumberToFloatingPointBits.cs 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621
  1. // Licensed to the .NET Foundation under one or more agreements.
  2. // The .NET Foundation licenses this file to you under the MIT license.
  3. // See the LICENSE file in the project root for more information.
  4. using System.Diagnostics;
  5. namespace System
  6. {
  7. internal unsafe partial class Number
  8. {
  9. public readonly struct FloatingPointInfo
  10. {
  11. public static readonly FloatingPointInfo Double = new FloatingPointInfo(
  12. denormalMantissaBits: 52,
  13. exponentBits: 11,
  14. maxBinaryExponent: 1023,
  15. exponentBias: 1023,
  16. infinityBits: 0x7FF00000_00000000
  17. );
  18. public static readonly FloatingPointInfo Single = new FloatingPointInfo(
  19. denormalMantissaBits: 23,
  20. exponentBits: 8,
  21. maxBinaryExponent: 127,
  22. exponentBias: 127,
  23. infinityBits: 0x7F800000
  24. );
  25. public ulong ZeroBits { get; }
  26. public ulong InfinityBits { get; }
  27. public ulong NormalMantissaMask { get; }
  28. public ulong DenormalMantissaMask { get; }
  29. public int MinBinaryExponent { get; }
  30. public int MaxBinaryExponent { get; }
  31. public int ExponentBias { get; }
  32. public int OverflowDecimalExponent { get; }
  33. public ushort NormalMantissaBits { get; }
  34. public ushort DenormalMantissaBits { get; }
  35. public ushort ExponentBits { get; }
  36. public FloatingPointInfo(ushort denormalMantissaBits, ushort exponentBits, int maxBinaryExponent, int exponentBias, ulong infinityBits)
  37. {
  38. ExponentBits = exponentBits;
  39. DenormalMantissaBits = denormalMantissaBits;
  40. NormalMantissaBits = (ushort)(denormalMantissaBits + 1); // we get an extra (hidden) bit for normal mantissas
  41. OverflowDecimalExponent = (maxBinaryExponent + 2 * NormalMantissaBits) / 3;
  42. ExponentBias = exponentBias;
  43. MaxBinaryExponent = maxBinaryExponent;
  44. MinBinaryExponent = 1 - maxBinaryExponent;
  45. DenormalMantissaMask = (1UL << denormalMantissaBits) - 1;
  46. NormalMantissaMask = (1UL << NormalMantissaBits) - 1;
  47. InfinityBits = infinityBits;
  48. ZeroBits = 0;
  49. }
  50. }
  51. private static float[] s_Pow10SingleTable = new float[]
  52. {
  53. 1e0f, // 10^0
  54. 1e1f, // 10^1
  55. 1e2f, // 10^2
  56. 1e3f, // 10^3
  57. 1e4f, // 10^4
  58. 1e5f, // 10^5
  59. 1e6f, // 10^6
  60. 1e7f, // 10^7
  61. 1e8f, // 10^8
  62. 1e9f, // 10^9
  63. 1e10f, // 10^10
  64. };
  65. private static double[] s_Pow10DoubleTable = new double[]
  66. {
  67. 1e0, // 10^0
  68. 1e1, // 10^1
  69. 1e2, // 10^2
  70. 1e3, // 10^3
  71. 1e4, // 10^4
  72. 1e5, // 10^5
  73. 1e6, // 10^6
  74. 1e7, // 10^7
  75. 1e8, // 10^8
  76. 1e9, // 10^9
  77. 1e10, // 10^10
  78. 1e11, // 10^11
  79. 1e12, // 10^12
  80. 1e13, // 10^13
  81. 1e14, // 10^14
  82. 1e15, // 10^15
  83. 1e16, // 10^16
  84. 1e17, // 10^17
  85. 1e18, // 10^18
  86. 1e19, // 10^19
  87. 1e20, // 10^20
  88. 1e21, // 10^21
  89. 1e22, // 10^22
  90. };
  91. private static void AccumulateDecimalDigitsIntoBigInteger(ref NumberBuffer number, uint firstIndex, uint lastIndex, out BigInteger result)
  92. {
  93. result = new BigInteger(0);
  94. byte* src = number.GetDigitsPointer() + firstIndex;
  95. uint remaining = lastIndex - firstIndex;
  96. while (remaining != 0)
  97. {
  98. uint count = Math.Min(remaining, 9);
  99. uint value = DigitsToUInt32(src, (int)(count));
  100. result.MultiplyPow10(count);
  101. result.Add(value);
  102. src += count;
  103. remaining -= count;
  104. }
  105. }
  106. private static ulong AssembleFloatingPointBits(in FloatingPointInfo info, ulong initialMantissa, int initialExponent, bool hasZeroTail)
  107. {
  108. // number of bits by which we must adjust the mantissa to shift it into the
  109. // correct position, and compute the resulting base two exponent for the
  110. // normalized mantissa:
  111. uint initialMantissaBits = BigInteger.CountSignificantBits(initialMantissa);
  112. int normalMantissaShift = info.NormalMantissaBits - (int)(initialMantissaBits);
  113. int normalExponent = initialExponent - normalMantissaShift;
  114. ulong mantissa = initialMantissa;
  115. int exponent = normalExponent;
  116. if (normalExponent > info.MaxBinaryExponent)
  117. {
  118. // The exponent is too large to be represented by the floating point
  119. // type; report the overflow condition:
  120. return info.InfinityBits;
  121. }
  122. else if (normalExponent < info.MinBinaryExponent)
  123. {
  124. // The exponent is too small to be represented by the floating point
  125. // type as a normal value, but it may be representable as a denormal
  126. // value. Compute the number of bits by which we need to shift the
  127. // mantissa in order to form a denormal number. (The subtraction of
  128. // an extra 1 is to account for the hidden bit of the mantissa that
  129. // is not available for use when representing a denormal.)
  130. int denormalMantissaShift = normalMantissaShift + normalExponent + info.ExponentBias - 1;
  131. // Denormal values have an exponent of zero, so the debiased exponent is
  132. // the negation of the exponent bias:
  133. exponent = -info.ExponentBias;
  134. if (denormalMantissaShift < 0)
  135. {
  136. // Use two steps for right shifts: for a shift of N bits, we first
  137. // shift by N-1 bits, then shift the last bit and use its value to
  138. // round the mantissa.
  139. mantissa = RightShiftWithRounding(mantissa, -denormalMantissaShift, hasZeroTail);
  140. // If the mantissa is now zero, we have underflowed:
  141. if (mantissa == 0)
  142. {
  143. return info.ZeroBits;
  144. }
  145. // When we round the mantissa, the result may be so large that the
  146. // number becomes a normal value. For example, consider the single
  147. // precision case where the mantissa is 0x01ffffff and a right shift
  148. // of 2 is required to shift the value into position. We perform the
  149. // shift in two steps: we shift by one bit, then we shift again and
  150. // round using the dropped bit. The initial shift yields 0x00ffffff.
  151. // The rounding shift then yields 0x007fffff and because the least
  152. // significant bit was 1, we add 1 to this number to round it. The
  153. // final result is 0x00800000.
  154. //
  155. // 0x00800000 is 24 bits, which is more than the 23 bits available
  156. // in the mantissa. Thus, we have rounded our denormal number into
  157. // a normal number.
  158. //
  159. // We detect this case here and re-adjust the mantissa and exponent
  160. // appropriately, to form a normal number:
  161. if (mantissa > info.DenormalMantissaMask)
  162. {
  163. // We add one to the denormal_mantissa_shift to account for the
  164. // hidden mantissa bit (we subtracted one to account for this bit
  165. // when we computed the denormal_mantissa_shift above).
  166. exponent = initialExponent - (denormalMantissaShift + 1) - normalMantissaShift;
  167. }
  168. }
  169. else
  170. {
  171. mantissa <<= denormalMantissaShift;
  172. }
  173. }
  174. else
  175. {
  176. if (normalMantissaShift < 0)
  177. {
  178. // Use two steps for right shifts: for a shift of N bits, we first
  179. // shift by N-1 bits, then shift the last bit and use its value to
  180. // round the mantissa.
  181. mantissa = RightShiftWithRounding(mantissa, -normalMantissaShift, hasZeroTail);
  182. // When we round the mantissa, it may produce a result that is too
  183. // large. In this case, we divide the mantissa by two and increment
  184. // the exponent (this does not change the value).
  185. if (mantissa > info.NormalMantissaMask)
  186. {
  187. mantissa >>= 1;
  188. exponent++;
  189. // The increment of the exponent may have generated a value too
  190. // large to be represented. In this case, report the overflow:
  191. if (exponent > info.MaxBinaryExponent)
  192. {
  193. return info.InfinityBits;
  194. }
  195. }
  196. }
  197. else if (normalMantissaShift > 0)
  198. {
  199. mantissa <<= normalMantissaShift;
  200. }
  201. }
  202. // Unset the hidden bit in the mantissa and assemble the floating point value
  203. // from the computed components:
  204. mantissa &= info.DenormalMantissaMask;
  205. Debug.Assert((info.DenormalMantissaMask & (1UL << info.DenormalMantissaBits)) == 0);
  206. ulong shiftedExponent = ((ulong)(exponent + info.ExponentBias)) << info.DenormalMantissaBits;
  207. Debug.Assert((shiftedExponent & info.DenormalMantissaMask) == 0);
  208. Debug.Assert((mantissa & ~info.DenormalMantissaMask) == 0);
  209. Debug.Assert((shiftedExponent & ~(((1UL << info.ExponentBits) - 1) << info.DenormalMantissaBits)) == 0); // exponent fits in its place
  210. return (shiftedExponent | mantissa);
  211. }
  212. private static ulong ConvertBigIntegerToFloatingPointBits(ref BigInteger value, in FloatingPointInfo info, uint integerBitsOfPrecision, bool hasNonZeroFractionalPart)
  213. {
  214. int baseExponent = info.DenormalMantissaBits;
  215. // When we have 64-bits or less of precision, we can just get the mantissa directly
  216. if (integerBitsOfPrecision <= 64)
  217. {
  218. return AssembleFloatingPointBits(in info, value.ToUInt64(), baseExponent, !hasNonZeroFractionalPart);
  219. }
  220. uint topBlockIndex = Math.DivRem(integerBitsOfPrecision, 32, out uint topBlockBits);
  221. uint middleBlockIndex = topBlockIndex - 1;
  222. uint bottomBlockIndex = middleBlockIndex - 1;
  223. ulong mantissa = 0;
  224. int exponent = baseExponent + ((int)(bottomBlockIndex) * 32);
  225. bool hasZeroTail = !hasNonZeroFractionalPart;
  226. // When the top 64-bits perfectly span two blocks, we can get those blocks directly
  227. if (topBlockBits == 0)
  228. {
  229. mantissa = ((ulong)(value.GetBlock(middleBlockIndex)) << 32) + value.GetBlock(bottomBlockIndex);
  230. }
  231. else
  232. {
  233. // Otherwise, we need to read three blocks and combine them into a 64-bit mantissa
  234. int bottomBlockShift = (int)(topBlockBits);
  235. int topBlockShift = 64 - bottomBlockShift;
  236. int middleBlockShift = topBlockShift - 32;
  237. exponent += (int)(topBlockBits);
  238. uint bottomBlock = value.GetBlock(bottomBlockIndex);
  239. uint bottomBits = bottomBlock >> bottomBlockShift;
  240. ulong middleBits = (ulong)(value.GetBlock(middleBlockIndex)) << middleBlockShift;
  241. ulong topBits = (ulong)(value.GetBlock(topBlockIndex)) << topBlockShift;
  242. mantissa = topBits + middleBits + bottomBits;
  243. uint unusedBottomBlockBitsMask = (1u << (int)(topBlockBits)) - 1;
  244. hasZeroTail &= (bottomBlock & unusedBottomBlockBitsMask) == 0;
  245. }
  246. for (uint i = 0; i != bottomBlockIndex; i++)
  247. {
  248. hasZeroTail &= (value.GetBlock(i) == 0);
  249. }
  250. return AssembleFloatingPointBits(in info, mantissa, exponent, hasZeroTail);
  251. }
  252. // get 32-bit integer from at most 9 digits
  253. private static uint DigitsToUInt32(byte* p, int count)
  254. {
  255. Debug.Assert((1 <= count) && (count <= 9));
  256. byte* end = (p + count);
  257. uint res = (uint)(p[0] - '0');
  258. for (p++; p < end; p++)
  259. {
  260. res = (10 * res) + p[0] - '0';
  261. }
  262. return res;
  263. }
  264. // get 64-bit integer from at most 19 digits
  265. private static ulong DigitsToUInt64(byte* p, int count)
  266. {
  267. Debug.Assert((1 <= count) && (count <= 19));
  268. byte* end = (p + count);
  269. ulong res = (ulong)(p[0] - '0');
  270. for (p++; p < end; p++)
  271. {
  272. res = (10 * res) + p[0] - '0';
  273. }
  274. return res;
  275. }
  276. private static ulong NumberToFloatingPointBits(ref NumberBuffer number, in FloatingPointInfo info)
  277. {
  278. Debug.Assert(number.GetDigitsPointer()[0] != '0');
  279. // The input is of the form 0.Mantissa x 10^Exponent, where 'Mantissa' are
  280. // the decimal digits of the mantissa and 'Exponent' is the decimal exponent.
  281. // We decompose the mantissa into two parts: an integer part and a fractional
  282. // part. If the exponent is positive, then the integer part consists of the
  283. // first 'exponent' digits, or all present digits if there are fewer digits.
  284. // If the exponent is zero or negative, then the integer part is empty. In
  285. // either case, the remaining digits form the fractional part of the mantissa.
  286. uint totalDigits = (uint)(number.DigitsCount);
  287. uint positiveExponent = (uint)(Math.Max(0, number.Scale));
  288. uint integerDigitsPresent = Math.Min(positiveExponent, totalDigits);
  289. uint fractionalDigitsPresent = totalDigits - integerDigitsPresent;
  290. uint fastExponent = (uint)(Math.Abs(number.Scale - integerDigitsPresent - fractionalDigitsPresent));
  291. // When the number of significant digits is less than or equal to 15 and the
  292. // scale is less than or equal to 22, we can take some shortcuts and just rely
  293. // on floating-point arithmetic to compute the correct result. This is
  294. // because each floating-point precision values allows us to exactly represent
  295. // different whole integers and certain powers of 10, depending on the underlying
  296. // formats exact range. Additionally, IEEE operations dictate that the result is
  297. // computed to the infinitely precise result and then rounded, which means that
  298. // we can rely on it to produce the correct result when both inputs are exact.
  299. byte* src = number.GetDigitsPointer();
  300. if (totalDigits == 0)
  301. {
  302. return info.ZeroBits;
  303. }
  304. if ((info.DenormalMantissaBits == 23) && (totalDigits <= 7) && (fastExponent <= 10))
  305. {
  306. // It is only valid to do this optimization for single-precision floating-point
  307. // values since we can lose some of the mantissa bits and would return the
  308. // wrong value when upcasting to double.
  309. float result = DigitsToUInt32(src, (int)(totalDigits));
  310. float scale = s_Pow10SingleTable[fastExponent];
  311. if (fractionalDigitsPresent != 0)
  312. {
  313. result /= scale;
  314. }
  315. else
  316. {
  317. result *= scale;
  318. }
  319. return (uint)(BitConverter.SingleToInt32Bits(result));
  320. }
  321. if ((totalDigits <= 15) && (fastExponent <= 22))
  322. {
  323. double result = DigitsToUInt64(src, (int)(totalDigits));
  324. double scale = s_Pow10DoubleTable[fastExponent];
  325. if (fractionalDigitsPresent != 0)
  326. {
  327. result /= scale;
  328. }
  329. else
  330. {
  331. result *= scale;
  332. }
  333. if (info.DenormalMantissaBits == 52)
  334. {
  335. return (ulong)(BitConverter.DoubleToInt64Bits(result));
  336. }
  337. else
  338. {
  339. Debug.Assert(info.DenormalMantissaBits == 23);
  340. return (uint)(BitConverter.SingleToInt32Bits((float)(result)));
  341. }
  342. }
  343. return NumberToFloatingPointBitsSlow(ref number, in info, positiveExponent, integerDigitsPresent, fractionalDigitsPresent);
  344. }
  345. private static ulong NumberToFloatingPointBitsSlow(ref NumberBuffer number, in FloatingPointInfo info, uint positiveExponent, uint integerDigitsPresent, uint fractionalDigitsPresent)
  346. {
  347. // To generate an N bit mantissa we require N + 1 bits of precision. The
  348. // extra bit is used to correctly round the mantissa (if there are fewer bits
  349. // than this available, then that's totally okay; in that case we use what we
  350. // have and we don't need to round).
  351. uint requiredBitsOfPrecision = (uint)(info.NormalMantissaBits + 1);
  352. uint totalDigits = (uint)(number.DigitsCount);
  353. uint integerDigitsMissing = positiveExponent - integerDigitsPresent;
  354. uint integerFirstIndex = 0;
  355. uint integerLastIndex = integerDigitsPresent;
  356. uint fractionalFirstIndex = integerLastIndex;
  357. uint fractionalLastIndex = totalDigits;
  358. // First, we accumulate the integer part of the mantissa into a big_integer:
  359. AccumulateDecimalDigitsIntoBigInteger(ref number, integerFirstIndex, integerLastIndex, out BigInteger integerValue);
  360. if (integerDigitsMissing > 0)
  361. {
  362. if (integerDigitsMissing > info.OverflowDecimalExponent)
  363. {
  364. return info.InfinityBits;
  365. }
  366. integerValue.MultiplyPow10(integerDigitsMissing);
  367. }
  368. // At this point, the integer_value contains the value of the integer part
  369. // of the mantissa. If either [1] this number has more than the required
  370. // number of bits of precision or [2] the mantissa has no fractional part,
  371. // then we can assemble the result immediately:
  372. uint integerBitsOfPrecision = BigInteger.CountSignificantBits(ref integerValue);
  373. if ((integerBitsOfPrecision >= requiredBitsOfPrecision) || (fractionalDigitsPresent == 0))
  374. {
  375. return ConvertBigIntegerToFloatingPointBits(
  376. ref integerValue,
  377. in info,
  378. integerBitsOfPrecision,
  379. fractionalDigitsPresent != 0
  380. );
  381. }
  382. // Otherwise, we did not get enough bits of precision from the integer part,
  383. // and the mantissa has a fractional part. We parse the fractional part of
  384. // the mantissa to obtain more bits of precision. To do this, we convert
  385. // the fractional part into an actual fraction N/M, where the numerator N is
  386. // computed from the digits of the fractional part, and the denominator M is
  387. // computed as the power of 10 such that N/M is equal to the value of the
  388. // fractional part of the mantissa.
  389. uint fractionalDenominatorExponent = fractionalDigitsPresent;
  390. if (number.Scale < 0)
  391. {
  392. fractionalDenominatorExponent += (uint)(-number.Scale);
  393. }
  394. if ((integerBitsOfPrecision == 0) && (fractionalDenominatorExponent - (int)(totalDigits)) > info.OverflowDecimalExponent)
  395. {
  396. // If there were any digits in the integer part, it is impossible to
  397. // underflow (because the exponent cannot possibly be small enough),
  398. // so if we underflow here it is a true underflow and we return zero.
  399. return info.ZeroBits;
  400. }
  401. AccumulateDecimalDigitsIntoBigInteger(ref number, fractionalFirstIndex, fractionalLastIndex, out BigInteger fractionalNumerator);
  402. Debug.Assert(!fractionalNumerator.IsZero());
  403. BigInteger.Pow10(fractionalDenominatorExponent, out BigInteger fractionalDenominator);
  404. // Because we are using only the fractional part of the mantissa here, the
  405. // numerator is guaranteed to be smaller than the denominator. We normalize
  406. // the fraction such that the most significant bit of the numerator is in
  407. // the same position as the most significant bit in the denominator. This
  408. // ensures that when we later shift the numerator N bits to the left, we
  409. // will produce N bits of precision.
  410. uint fractionalNumeratorBits = BigInteger.CountSignificantBits(ref fractionalNumerator);
  411. uint fractionalDenominatorBits = BigInteger.CountSignificantBits(ref fractionalDenominator);
  412. uint fractionalShift = 0;
  413. if (fractionalDenominatorBits > fractionalNumeratorBits)
  414. {
  415. fractionalShift = fractionalDenominatorBits - fractionalNumeratorBits;
  416. }
  417. if (fractionalShift > 0)
  418. {
  419. fractionalNumerator.ShiftLeft(fractionalShift);
  420. }
  421. uint requiredFractionalBitsOfPrecision = requiredBitsOfPrecision - integerBitsOfPrecision;
  422. uint remainingBitsOfPrecisionRequired = requiredFractionalBitsOfPrecision;
  423. if (integerBitsOfPrecision > 0)
  424. {
  425. // If the fractional part of the mantissa provides no bits of precision
  426. // and cannot affect rounding, we can just take whatever bits we got from
  427. // the integer part of the mantissa. This is the case for numbers like
  428. // 5.0000000000000000000001, where the significant digits of the fractional
  429. // part start so far to the right that they do not affect the floating
  430. // point representation.
  431. //
  432. // If the fractional shift is exactly equal to the number of bits of
  433. // precision that we require, then no fractional bits will be part of the
  434. // result, but the result may affect rounding. This is e.g. the case for
  435. // large, odd integers with a fractional part greater than or equal to .5.
  436. // Thus, we need to do the division to correctly round the result.
  437. if (fractionalShift > remainingBitsOfPrecisionRequired)
  438. {
  439. return ConvertBigIntegerToFloatingPointBits(
  440. ref integerValue,
  441. in info,
  442. integerBitsOfPrecision,
  443. fractionalDigitsPresent != 0
  444. );
  445. }
  446. remainingBitsOfPrecisionRequired -= fractionalShift;
  447. }
  448. // If there was no integer part of the mantissa, we will need to compute the
  449. // exponent from the fractional part. The fractional exponent is the power
  450. // of two by which we must multiply the fractional part to move it into the
  451. // range [1.0, 2.0). This will either be the same as the shift we computed
  452. // earlier, or one greater than that shift:
  453. uint fractionalExponent = fractionalShift;
  454. if (BigInteger.Compare(ref fractionalNumerator, ref fractionalDenominator) < 0)
  455. {
  456. fractionalExponent++;
  457. }
  458. fractionalNumerator.ShiftLeft(remainingBitsOfPrecisionRequired);
  459. BigInteger.DivRem(ref fractionalNumerator, ref fractionalDenominator, out BigInteger bigFractionalMantissa, out BigInteger fractionalRemainder);
  460. ulong fractionalMantissa = bigFractionalMantissa.ToUInt64();
  461. bool hasZeroTail = !number.HasNonZeroTail && fractionalRemainder.IsZero();
  462. // We may have produced more bits of precision than were required. Check,
  463. // and remove any "extra" bits:
  464. uint fractionalMantissaBits = BigInteger.CountSignificantBits(fractionalMantissa);
  465. if (fractionalMantissaBits > requiredFractionalBitsOfPrecision)
  466. {
  467. int shift = (int)(fractionalMantissaBits - requiredFractionalBitsOfPrecision);
  468. hasZeroTail = hasZeroTail && (fractionalMantissa & ((1UL << shift) - 1)) == 0;
  469. fractionalMantissa >>= shift;
  470. }
  471. // Compose the mantissa from the integer and fractional parts:
  472. ulong integerMantissa = integerValue.ToUInt64();
  473. ulong completeMantissa = (integerMantissa << (int)(requiredFractionalBitsOfPrecision)) + fractionalMantissa;
  474. // Compute the final exponent:
  475. // * If the mantissa had an integer part, then the exponent is one less than
  476. // the number of bits we obtained from the integer part. (It's one less
  477. // because we are converting to the form 1.11111, with one 1 to the left
  478. // of the decimal point.)
  479. // * If the mantissa had no integer part, then the exponent is the fractional
  480. // exponent that we computed.
  481. // Then, in both cases, we subtract an additional one from the exponent, to
  482. // account for the fact that we've generated an extra bit of precision, for
  483. // use in rounding.
  484. int finalExponent = (integerBitsOfPrecision > 0) ? (int)(integerBitsOfPrecision) - 2 : -(int)(fractionalExponent) - 1;
  485. return AssembleFloatingPointBits(in info, completeMantissa, finalExponent, hasZeroTail);
  486. }
  487. private static ulong RightShiftWithRounding(ulong value, int shift, bool hasZeroTail)
  488. {
  489. // If we'd need to shift further than it is possible to shift, the answer
  490. // is always zero:
  491. if (shift >= 64)
  492. {
  493. return 0;
  494. }
  495. ulong extraBitsMask = (1UL << (shift - 1)) - 1;
  496. ulong roundBitMask = (1UL << (shift - 1));
  497. ulong lsbBitMask = 1UL << shift;
  498. bool lsbBit = (value & lsbBitMask) != 0;
  499. bool roundBit = (value & roundBitMask) != 0;
  500. bool hasTailBits = !hasZeroTail || (value & extraBitsMask) != 0;
  501. return (value >> shift) + (ShouldRoundUp(lsbBit, roundBit, hasTailBits) ? 1UL : 0);
  502. }
  503. private static bool ShouldRoundUp(bool lsbBit, bool roundBit, bool hasTailBits)
  504. {
  505. // If there are insignificant set bits, we need to round to the
  506. // nearest; there are two cases:
  507. // we round up if either [1] the value is slightly greater than the midpoint
  508. // between two exactly representable values or [2] the value is exactly the
  509. // midpoint between two exactly representable values and the greater of the
  510. // two is even (this is "round-to-even").
  511. return roundBit && (hasTailBits || lsbBit);
  512. }
  513. }
  514. }