diyfp.go 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152
  1. package fast
  2. import "math"
  3. const (
  4. diyFpKSignificandSize = 64
  5. kSignificandSize = 53
  6. kUint64MSB uint64 = 1 << 63
  7. kSignificandMask = 0x000FFFFFFFFFFFFF
  8. kHiddenBit = 0x0010000000000000
  9. kExponentMask = 0x7FF0000000000000
  10. kPhysicalSignificandSize = 52 // Excludes the hidden bit.
  11. kExponentBias = 0x3FF + kPhysicalSignificandSize
  12. kDenormalExponent = -kExponentBias + 1
  13. )
  14. type double float64
  15. type diyfp struct {
  16. f uint64
  17. e int
  18. }
  19. // f =- o.
  20. // The exponents of both numbers must be the same and the significand of this
  21. // must be bigger than the significand of other.
  22. // The result will not be normalized.
  23. func (f *diyfp) subtract(o diyfp) {
  24. _DCHECK(f.e == o.e)
  25. _DCHECK(f.f >= o.f)
  26. f.f -= o.f
  27. }
  28. // Returns f - o
  29. // The exponents of both numbers must be the same and this must be bigger
  30. // than other. The result will not be normalized.
  31. func (f diyfp) minus(o diyfp) diyfp {
  32. res := f
  33. res.subtract(o)
  34. return res
  35. }
  36. // f *= o
  37. func (f *diyfp) mul(o diyfp) {
  38. // Simply "emulates" a 128 bit multiplication.
  39. // However: the resulting number only contains 64 bits. The least
  40. // significant 64 bits are only used for rounding the most significant 64
  41. // bits.
  42. const kM32 uint64 = 0xFFFFFFFF
  43. a := f.f >> 32
  44. b := f.f & kM32
  45. c := o.f >> 32
  46. d := o.f & kM32
  47. ac := a * c
  48. bc := b * c
  49. ad := a * d
  50. bd := b * d
  51. tmp := (bd >> 32) + (ad & kM32) + (bc & kM32)
  52. // By adding 1U << 31 to tmp we round the final result.
  53. // Halfway cases will be round up.
  54. tmp += 1 << 31
  55. result_f := ac + (ad >> 32) + (bc >> 32) + (tmp >> 32)
  56. f.e += o.e + 64
  57. f.f = result_f
  58. }
  59. // Returns f * o
  60. func (f diyfp) times(o diyfp) diyfp {
  61. res := f
  62. res.mul(o)
  63. return res
  64. }
  65. func (f *diyfp) _normalize() {
  66. f_, e := f.f, f.e
  67. // This method is mainly called for normalizing boundaries. In general
  68. // boundaries need to be shifted by 10 bits. We thus optimize for this case.
  69. const k10MSBits uint64 = 0x3FF << 54
  70. for f_&k10MSBits == 0 {
  71. f_ <<= 10
  72. e -= 10
  73. }
  74. for f_&kUint64MSB == 0 {
  75. f_ <<= 1
  76. e--
  77. }
  78. f.f, f.e = f_, e
  79. }
  80. func normalizeDiyfp(f diyfp) diyfp {
  81. res := f
  82. res._normalize()
  83. return res
  84. }
  85. // f must be strictly greater than 0.
  86. func (d double) toNormalizedDiyfp() diyfp {
  87. f, e := d.sigExp()
  88. // The current float could be a denormal.
  89. for (f & kHiddenBit) == 0 {
  90. f <<= 1
  91. e--
  92. }
  93. // Do the final shifts in one go.
  94. f <<= diyFpKSignificandSize - kSignificandSize
  95. e -= diyFpKSignificandSize - kSignificandSize
  96. return diyfp{f, e}
  97. }
  98. // Returns the two boundaries of this.
  99. // The bigger boundary (m_plus) is normalized. The lower boundary has the same
  100. // exponent as m_plus.
  101. // Precondition: the value encoded by this Double must be greater than 0.
  102. func (d double) normalizedBoundaries() (m_minus, m_plus diyfp) {
  103. v := d.toDiyFp()
  104. significand_is_zero := v.f == kHiddenBit
  105. m_plus = normalizeDiyfp(diyfp{f: (v.f << 1) + 1, e: v.e - 1})
  106. if significand_is_zero && v.e != kDenormalExponent {
  107. // The boundary is closer. Think of v = 1000e10 and v- = 9999e9.
  108. // Then the boundary (== (v - v-)/2) is not just at a distance of 1e9 but
  109. // at a distance of 1e8.
  110. // The only exception is for the smallest normal: the largest denormal is
  111. // at the same distance as its successor.
  112. // Note: denormals have the same exponent as the smallest normals.
  113. m_minus = diyfp{f: (v.f << 2) - 1, e: v.e - 2}
  114. } else {
  115. m_minus = diyfp{f: (v.f << 1) - 1, e: v.e - 1}
  116. }
  117. m_minus.f <<= m_minus.e - m_plus.e
  118. m_minus.e = m_plus.e
  119. return
  120. }
  121. func (d double) toDiyFp() diyfp {
  122. f, e := d.sigExp()
  123. return diyfp{f: f, e: e}
  124. }
  125. func (d double) sigExp() (significand uint64, exponent int) {
  126. d64 := math.Float64bits(float64(d))
  127. significand = d64 & kSignificandMask
  128. if d64&kExponentMask != 0 { // not denormal
  129. significand += kHiddenBit
  130. exponent = int((d64&kExponentMask)>>kPhysicalSignificandSize) - kExponentBias
  131. } else {
  132. exponent = kDenormalExponent
  133. }
  134. return
  135. }