math_log1p.odin 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199
  1. package math
  2. // The original C code, the long comment, and the constants
  3. // below are from FreeBSD's /usr/src/lib/msun/src/s_log1p.c
  4. // and came with this notice. The go code is a simplified
  5. // version of the original C.
  6. //
  7. // ====================================================
  8. // Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
  9. //
  10. // Developed at SunPro, a Sun Microsystems, Inc. business.
  11. // Permission to use, copy, modify, and distribute this
  12. // software is freely granted, provided that this notice
  13. // is preserved.
  14. // ====================================================
  15. //
  16. //
  17. // double log1p(double x)
  18. //
  19. // Method :
  20. // 1. Argument Reduction: find k and f such that
  21. // 1+x = 2**k * (1+f),
  22. // where sqrt(2)/2 < 1+f < sqrt(2) .
  23. //
  24. // Note. If k=0, then f=x is exact. However, if k!=0, then f
  25. // may not be representable exactly. In that case, a correction
  26. // term is need. Let u=1+x rounded. Let c = (1+x)-u, then
  27. // log(1+x) - log(u) ~ c/u. Thus, we proceed to compute log(u),
  28. // and add back the correction term c/u.
  29. // (Note: when x > 2**53, one can simply return log(x))
  30. //
  31. // 2. Approximation of log1p(f).
  32. // Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s)
  33. // = 2s + 2/3 s**3 + 2/5 s**5 + .....,
  34. // = 2s + s*R
  35. // We use a special Reme algorithm on [0,0.1716] to generate
  36. // a polynomial of degree 14 to approximate R The maximum error
  37. // of this polynomial approximation is bounded by 2**-58.45. In
  38. // other words,
  39. // 2 4 6 8 10 12 14
  40. // R(z) ~ Lp1*s +Lp2*s +Lp3*s +Lp4*s +Lp5*s +Lp6*s +Lp7*s
  41. // (the values of Lp1 to Lp7 are listed in the program)
  42. // and
  43. // | 2 14 | -58.45
  44. // | Lp1*s +...+Lp7*s - R(z) | <= 2
  45. // | |
  46. // Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2.
  47. // In order to guarantee error in log below 1ulp, we compute log
  48. // by
  49. // log1p(f) = f - (hfsq - s*(hfsq+R)).
  50. //
  51. // 3. Finally, log1p(x) = k*ln2 + log1p(f).
  52. // = k*ln2_hi+(f-(hfsq-(s*(hfsq+R)+k*ln2_lo)))
  53. // Here ln2 is split into two floating point number:
  54. // ln2_hi + ln2_lo,
  55. // where n*ln2_hi is always exact for |n| < 2000.
  56. //
  57. // Special cases:
  58. // log1p(x) is NaN with signal if x < -1 (including -INF) ;
  59. // log1p(+INF) is +INF; log1p(-1) is -INF with signal;
  60. // log1p(NaN) is that NaN with no signal.
  61. //
  62. // Accuracy:
  63. // according to an error analysis, the error is always less than
  64. // 1 ulp (unit in the last place).
  65. //
  66. // Constants:
  67. // The hexadecimal values are the intended ones for the following
  68. // constants. The decimal values may be used, provided that the
  69. // compiler will convert from decimal to binary accurately enough
  70. // to produce the hexadecimal values shown.
  71. //
  72. // Note: Assuming log() return accurate answer, the following
  73. // algorithm can be used to compute log1p(x) to within a few ULP:
  74. //
  75. // u = 1+x;
  76. // if(u==1.0) return x ; else
  77. // return log(u)*(x/(u-1.0));
  78. //
  79. // See HP-15C Advanced Functions Handbook, p.193.
  80. log1p :: proc {
  81. log1p_f16,
  82. log1p_f32,
  83. log1p_f64,
  84. log1p_f16le,
  85. log1p_f16be,
  86. log1p_f32le,
  87. log1p_f32be,
  88. log1p_f64le,
  89. log1p_f64be,
  90. }
  91. @(require_results) log1p_f16 :: proc "contextless" (x: f16) -> f16 { return f16(log1p_f64(f64(x))) }
  92. @(require_results) log1p_f32 :: proc "contextless" (x: f32) -> f32 { return f32(log1p_f64(f64(x))) }
  93. @(require_results) log1p_f16le :: proc "contextless" (x: f16le) -> f16le { return f16le(log1p_f64(f64(x))) }
  94. @(require_results) log1p_f16be :: proc "contextless" (x: f16be) -> f16be { return f16be(log1p_f64(f64(x))) }
  95. @(require_results) log1p_f32le :: proc "contextless" (x: f32le) -> f32le { return f32le(log1p_f64(f64(x))) }
  96. @(require_results) log1p_f32be :: proc "contextless" (x: f32be) -> f32be { return f32be(log1p_f64(f64(x))) }
  97. @(require_results) log1p_f64le :: proc "contextless" (x: f64le) -> f64le { return f64le(log1p_f64(f64(x))) }
  98. @(require_results) log1p_f64be :: proc "contextless" (x: f64be) -> f64be { return f64be(log1p_f64(f64(x))) }
  99. @(require_results)
  100. log1p_f64 :: proc "contextless" (x: f64) -> f64 {
  101. SQRT2_M1 :: 0h3fda827999fcef34 // sqrt(2)-1
  102. SQRT2_HALF_M1 :: 0hbfd2bec333018866 // sqrt(2)/2-1
  103. SMALL :: 0h3e20000000000000 // 2**-29
  104. TINY :: 0h3c90000000000000 // 2**-54
  105. TWO53 :: 0h4340000000000000 // 2**53
  106. LN2HI :: 0h3fe62e42fee00000
  107. LN2LO :: 0h3dea39ef35793c76
  108. LP1 :: 0h3FE5555555555593
  109. LP2 :: 0h3FD999999997FA04
  110. LP3 :: 0h3FD2492494229359
  111. LP4 :: 0h3FCC71C51D8E78AF
  112. LP5 :: 0h3FC7466496CB03DE
  113. LP6 :: 0h3FC39A09D078C69F
  114. LP7 :: 0h3FC2F112DF3E5244
  115. switch {
  116. case x < -1 || is_nan(x):
  117. return nan_f64()
  118. case x == -1:
  119. return inf_f64(-1)
  120. case is_inf(x, 1):
  121. return inf_f64(+1)
  122. }
  123. absx := abs(x)
  124. f: f64
  125. iu: u64
  126. k := 1
  127. if absx < SQRT2_M1 { // |x| < sqrt(2)-1
  128. if absx < SMALL { // |x| < 2**-29
  129. if absx < TINY { // |x| < 2**-54
  130. return x
  131. }
  132. return x - x*x*0.5
  133. }
  134. if x > SQRT2_HALF_M1 { // sqrt(2)/2-1 < x
  135. // (sqrt(2)/2-1) < x < (sqrt(2)-1)
  136. k = 0
  137. f = x
  138. iu = 1
  139. }
  140. }
  141. c: f64
  142. if k != 0 {
  143. u: f64
  144. if absx < TWO53 { // 1<<53
  145. u = 1.0 + x
  146. iu = transmute(u64)u
  147. k = int((iu >> 52) - 1023)
  148. // correction term
  149. if k > 0 {
  150. c = 1.0 - (u - x)
  151. } else {
  152. c = x - (u - 1.0)
  153. }
  154. c /= u
  155. } else {
  156. u = x
  157. iu = transmute(u64)u
  158. k = int((iu >> 52) - 1023)
  159. c = 0
  160. }
  161. iu &= 0x000fffffffffffff
  162. if iu < 0x0006a09e667f3bcd { // mantissa of sqrt(2)
  163. u = transmute(f64)(iu | 0x3ff0000000000000) // normalize u
  164. } else {
  165. k += 1
  166. u = transmute(f64)(iu | 0x3fe0000000000000) // normalize u/2
  167. iu = (0x0010000000000000 - iu) >> 2
  168. }
  169. f = u - 1.0 // sqrt(2)/2 < u < sqrt(2)
  170. }
  171. hfsq := 0.5 * f * f
  172. s, R, z: f64
  173. if iu == 0 { // |f| < 2**-20
  174. if f == 0 {
  175. if k == 0 {
  176. return 0
  177. }
  178. c += f64(k) * LN2LO
  179. return f64(k)*LN2HI + c
  180. }
  181. R = hfsq * (1.0 - 0.66666666666666666*f) // avoid division
  182. if k == 0 {
  183. return f - R
  184. }
  185. return f64(k)*LN2HI - ((R - (f64(k)*LN2LO + c)) - f)
  186. }
  187. s = f / (2.0 + f)
  188. z = s * s
  189. R = z * (LP1 + z*(LP2+z*(LP3+z*(LP4+z*(LP5+z*(LP6+z*LP7))))))
  190. if k == 0 {
  191. return f - (hfsq - s*(hfsq+R))
  192. }
  193. return f64(k)*LN2HI - ((hfsq - (s*(hfsq+R) + (f64(k)*LN2LO + c))) - f)
  194. }