Browse Source

Add `log1p`, `erf`, `erfc`, `ilogb` `logb` (implemented based of FreeBSD's)

gingerBill 3 years ago
parent
commit
eb8b0d7a03
3 changed files with 780 additions and 22 deletions
  1. 172 22
      core/math/math.odin
  2. 410 0
      core/math/math_erf.odin
  3. 198 0
      core/math/math_log1p.odin

+ 172 - 22
core/math/math.odin

@@ -197,22 +197,16 @@ log       :: proc{
 	log_f64, log_f64le, log_f64be,
 }
 
-log2_f16   :: proc "contextless" (x: f16)   -> f16   { return ln(x)/LN2 }
-log2_f16le :: proc "contextless" (x: f16le) -> f16le { return f16le(log2_f16(f16(x))) }
-log2_f16be :: proc "contextless" (x: f16be) -> f16be { return f16be(log2_f16(f16(x))) }
-
-log2_f32   :: proc "contextless" (x: f32)   -> f32   { return ln(x)/LN2 }
-log2_f32le :: proc "contextless" (x: f32le) -> f32le { return f32le(log2_f32(f32(x))) }
-log2_f32be :: proc "contextless" (x: f32be) -> f32be { return f32be(log2_f32(f32(x))) }
-
-log2_f64   :: proc "contextless" (x: f64)   -> f64   { return ln(x)/LN2 }
-log2_f64le :: proc "contextless" (x: f64le) -> f64le { return f64le(log2_f64(f64(x))) }
-log2_f64be :: proc "contextless" (x: f64be) -> f64be { return f64be(log2_f64(f64(x))) }
-log2       :: proc{
-	log2_f16, log2_f16le, log2_f16be,
-	log2_f32, log2_f32le, log2_f32be,
-	log2_f64, log2_f64le, log2_f64be,
-}
+log2_f16   :: logb_f16
+log2_f16le :: logb_f16le
+log2_f16be :: logb_f16be
+log2_f32   :: logb_f32
+log2_f32le :: logb_f32le
+log2_f32be :: logb_f32be
+log2_f64   :: logb_f64
+log2_f64le :: logb_f64le
+log2_f64be :: logb_f64be
+log2       :: logb
 
 log10_f16   :: proc "contextless" (x: f16)   -> f16   { return ln(x)/LN10 }
 log10_f16le :: proc "contextless" (x: f16le) -> f16le { return f16le(log10_f16(f16(x))) }
@@ -1394,18 +1388,174 @@ tanh :: proc "contextless" (x: $T) -> T where intrinsics.type_is_float(T) {
 	return (t - 1) / (t + 1)
 }
 
-asinh :: proc "contextless" (x: $T) -> T where intrinsics.type_is_float(T) {
-	return ln(x + sqrt(x*x + 1))
-}
-
-acosh :: proc "contextless" (x: $T) -> T where intrinsics.type_is_float(T) {
-	return ln(x + sqrt(x*x - 1))
+asinh :: proc "contextless" (y: $T) -> T where intrinsics.type_is_float(T) {
+	// The original C code, the long comment, and the constants
+	// below are from FreeBSD's /usr/src/lib/msun/src/s_asinh.c
+	// and came with this notice. 
+	//
+	// ====================================================
+	// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+	//
+	// Developed at SunPro, a Sun Microsystems, Inc. business.
+	// Permission to use, copy, modify, and distribute this
+	// software is freely granted, provided that this notice
+	// is preserved.
+	// ====================================================
+	
+	LN2       :: 0h3FE62E42FEFA39EF
+	NEAR_ZERO :: 1.0 / (1 << 28)
+	LARGE     :: 1 << 28
+	
+	x := f64(y)
+	
+	if is_nan(x) || is_inf(x) {
+		return T(x)
+	}
+	sign := false
+	if x < 0 {
+		x = -x
+		sign = true
+	}
+	temp: f64
+	switch {
+	case x > LARGE:
+		temp = ln(x) + LN2
+	case x > 2:
+		temp = ln(2*x + 1/(sqrt(x*x + 1) + x))
+	case x < NEAR_ZERO:
+		temp = x
+	case:
+		temp = log1p(x + x*x/(1 + sqrt(1 + x*x)))
+	}
+	
+	if sign {
+		temp = -temp
+	}
+	return T(temp)
+}
+
+acosh :: proc "contextless" (y: $T) -> T where intrinsics.type_is_float(T) {
+	// The original C code, the long comment, and the constants
+	// below are from FreeBSD's /usr/src/lib/msun/src/e_acosh.c
+	// and came with this notice. 
+	//
+	// ====================================================
+	// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+	//
+	// Developed at SunPro, a Sun Microsystems, Inc. business.
+	// Permission to use, copy, modify, and distribute this
+	// software is freely granted, provided that this notice
+	// is preserved.
+	// ====================================================
+	
+	LARGE :: 1<<28
+	LN2 :: 0h3FE62E42FEFA39EF
+	x := f64(y)
+	switch {
+	case x < 1 || is_nan(x):
+		return T(nan_f64())
+	case x == 1:
+		return 0
+	case x >= LARGE:
+		return T(ln(x) + LN2)
+	case x > 2:
+		return T(ln(2*x - 1/(x+sqrt(x*x-1))))
+	}
+	t := x-1
+	return T(log1p(t + sqrt(2*t + t*t)))
 }
 
 atanh :: proc "contextless" (x: $T) -> T where intrinsics.type_is_float(T) {
 	return 0.5*ln((1+x)/(1-x))
 }
 
+ilogb_f16 :: proc "contextless" (val: f16) -> int {
+	switch {
+	case val == 0:    return int(min(i32))
+	case is_nan(val): return int(max(i32))
+	case is_inf(val): return int(max(i32))
+	}
+	x, exp := normalize_f16(val)
+	return int(((transmute(u16)x)>>F16_SHIFT)&F16_MASK) - F16_BIAS + exp
+}
+ilogb_f32 :: proc "contextless" (val: f32) -> int {
+	switch {
+	case val == 0:    return int(min(i32))
+	case is_nan(val): return int(max(i32))
+	case is_inf(val): return int(max(i32))
+	}
+	x, exp := normalize_f32(val)
+	return int(((transmute(u32)x)>>F32_SHIFT)&F32_MASK) - F32_BIAS + exp
+}
+ilogb_f64 :: proc "contextless" (val: f64) -> int {
+	switch {
+	case val == 0:    return int(min(i32))
+	case is_nan(val): return int(max(i32))
+	case is_inf(val): return int(max(i32))
+	}
+	x, exp := normalize_f64(val)
+	return int(((transmute(u64)x)>>F64_SHIFT)&F64_MASK) - F64_BIAS + exp
+}
+ilogb_f16le :: proc "contextless" (value: f16le) -> int { return ilogb_f16(f16(value)) }
+ilogb_f16be :: proc "contextless" (value: f16be) -> int { return ilogb_f16(f16(value)) }
+ilogb_f32le :: proc "contextless" (value: f32le) -> int { return ilogb_f32(f32(value)) }
+ilogb_f32be :: proc "contextless" (value: f32be) -> int { return ilogb_f32(f32(value)) }
+ilogb_f64le :: proc "contextless" (value: f64le) -> int { return ilogb_f64(f64(value)) }
+ilogb_f64be :: proc "contextless" (value: f64be) -> int { return ilogb_f64(f64(value)) }
+ilogb :: proc {
+	ilogb_f16,
+	ilogb_f32,
+	ilogb_f64,
+	ilogb_f16le,
+	ilogb_f16be,
+	ilogb_f32le,
+	ilogb_f32be,
+	ilogb_f64le,
+	ilogb_f64be,
+}
+
+logb_f16 :: proc "contextless" (val: f16) -> f16 {
+	switch {
+	case val == 0:    return inf_f16(-1)
+	case is_inf(val): return inf_f16(+1)
+	case is_nan(val): return val
+	}
+	return f16(ilogb(val))
+}
+logb_f32 :: proc "contextless" (val: f32) -> f32 {
+	switch {
+	case val == 0:    return inf_f32(-1)
+	case is_inf(val): return inf_f32(+1)
+	case is_nan(val): return val
+	}
+	return f32(ilogb(val))
+}
+logb_f64 :: proc "contextless" (val: f64) -> f64 {
+	switch {
+	case val == 0:    return inf_f64(-1)
+	case is_inf(val): return inf_f64(+1)
+	case is_nan(val): return val
+	}
+	return f64(ilogb(val))
+}
+logb_f16le :: proc "contextless" (value: f16le) -> f16le { return f16le(logb_f16(f16(value))) }
+logb_f16be :: proc "contextless" (value: f16be) -> f16be { return f16be(logb_f16(f16(value))) }
+logb_f32le :: proc "contextless" (value: f32le) -> f32le { return f32le(logb_f32(f32(value))) }
+logb_f32be :: proc "contextless" (value: f32be) -> f32be { return f32be(logb_f32(f32(value))) }
+logb_f64le :: proc "contextless" (value: f64le) -> f64le { return f64le(logb_f64(f64(value))) }
+logb_f64be :: proc "contextless" (value: f64be) -> f64be { return f64be(logb_f64(f64(value))) }
+logb :: proc {
+	logb_f16,
+	logb_f32,
+	logb_f64,
+	logb_f16le,
+	logb_f16be,
+	logb_f32le,
+	logb_f32be,
+	logb_f64le,
+	logb_f64be,
+}
+
 F16_DIG        :: 3
 F16_EPSILON    :: 0.00097656
 F16_GUARD      :: 0

+ 410 - 0
core/math/math_erf.odin

@@ -0,0 +1,410 @@
+package math
+
+// The original C code and the long comment below are
+// from FreeBSD's /usr/src/lib/msun/src/s_erf.c and
+// came with this notice. 
+//
+// ====================================================
+// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+//
+// Developed at SunPro, a Sun Microsystems, Inc. business.
+// Permission to use, copy, modify, and distribute this
+// software is freely granted, provided that this notice
+// is preserved.
+// ====================================================
+//
+//
+// double erf(double x)
+// double erfc(double x)
+//                           x
+//                    2      |\
+//     erf(x)  =  ---------  | exp(-t*t)dt
+//                 sqrt(pi) \|
+//                           0
+//
+//     erfc(x) =  1-erf(x)
+//  Note that
+//              erf(-x) = -erf(x)
+//              erfc(-x) = 2 - erfc(x)
+//
+// Method:
+//      1. For |x| in [0, 0.84375]
+//          erf(x)  = x + x*R(x**2)
+//          erfc(x) = 1 - erf(x)           if x in [-.84375,0.25]
+//                  = 0.5 + ((0.5-x)-x*R)  if x in [0.25,0.84375]
+//         where R = P/Q where P is an odd poly of degree 8 and
+//         Q is an odd poly of degree 10.
+//                                               -57.90
+//                      | R - (erf(x)-x)/x | <= 2
+//
+//
+//         Remark. The formula is derived by noting
+//          erf(x) = (2/sqrt(pi))*(x - x**3/3 + x**5/10 - x**7/42 + ....)
+//         and that
+//          2/sqrt(pi) = 1.128379167095512573896158903121545171688
+//         is close to one. The interval is chosen because the fix
+//         point of erf(x) is near 0.6174 (i.e., erf(x)=x when x is
+//         near 0.6174), and by some experiment, 0.84375 is chosen to
+//         guarantee the error is less than one ulp for erf.
+//
+//      2. For |x| in [0.84375,1.25], let s = |x| - 1, and
+//         c = 0.84506291151 rounded to single (24 bits)
+//              erf(x)  = sign(x) * (c  + P1(s)/Q1(s))
+//              erfc(x) = (1-c)  - P1(s)/Q1(s) if x > 0
+//                        1+(c+P1(s)/Q1(s))    if x < 0
+//              |P1/Q1 - (erf(|x|)-c)| <= 2**-59.06
+//         Remark: here we use the taylor series expansion at x=1.
+//              erf(1+s) = erf(1) + s*Poly(s)
+//                       = 0.845.. + P1(s)/Q1(s)
+//         That is, we use rational approximation to approximate
+//                      erf(1+s) - (c = (single)0.84506291151)
+//         Note that |P1/Q1|< 0.078 for x in [0.84375,1.25]
+//         where
+//              P1(s) = degree 6 poly in s
+//              Q1(s) = degree 6 poly in s
+//
+//      3. For x in [1.25,1/0.35(~2.857143)],
+//              erfc(x) = (1/x)*exp(-x*x-0.5625+R1/S1)
+//              erf(x)  = 1 - erfc(x)
+//         where
+//              R1(z) = degree 7 poly in z, (z=1/x**2)
+//              S1(z) = degree 8 poly in z
+//
+//      4. For x in [1/0.35,28]
+//              erfc(x) = (1/x)*exp(-x*x-0.5625+R2/S2) if x > 0
+//                      = 2.0 - (1/x)*exp(-x*x-0.5625+R2/S2) if -6<x<0
+//                      = 2.0 - tiny            (if x <= -6)
+//              erf(x)  = sign(x)*(1.0 - erfc(x)) if x < 6, else
+//              erf(x)  = sign(x)*(1.0 - tiny)
+//         where
+//              R2(z) = degree 6 poly in z, (z=1/x**2)
+//              S2(z) = degree 7 poly in z
+//
+//      Note1:
+//         To compute exp(-x*x-0.5625+R/S), let s be a single
+//         precision number and s := x; then
+//              -x*x = -s*s + (s-x)*(s+x)
+//              exp(-x*x-0.5626+R/S) =
+//                      exp(-s*s-0.5625)*exp((s-x)*(s+x)+R/S);
+//      Note2:
+//         Here 4 and 5 make use of the asymptotic series
+//                        exp(-x*x)
+//              erfc(x) ~ ---------- * ( 1 + Poly(1/x**2) )
+//                        x*sqrt(pi)
+//         We use rational approximation to approximate
+//              g(s)=f(1/x**2) = log(erfc(x)*x) - x*x + 0.5625
+//         Here is the error bound for R1/S1 and R2/S2
+//              |R1/S1 - f(x)|  < 2**(-62.57)
+//              |R2/S2 - f(x)|  < 2**(-61.52)
+//
+//      5. For inf > x >= 28
+//              erf(x)  = sign(x) *(1 - tiny)  (raise inexact)
+//              erfc(x) = tiny*tiny (raise underflow) if x > 0
+//                      = 2 - tiny if x<0
+//
+//      7. Special case:
+//              erf(0)  = 0, erf(inf)  = 1, erf(-inf) = -1,
+//              erfc(0) = 1, erfc(inf) = 0, erfc(-inf) = 2,
+//              erfc/erf(NaN) is NaN
+
+erf :: proc{
+	erf_f16,
+	erf_f16le,
+	erf_f16be,
+	erf_f32,
+	erf_f32le,
+	erf_f32be,
+	erf_f64,
+}
+
+erf_f16   :: proc "contextless" (x: f16)   -> f16   { return f16(erf_f64(f64(x))) }
+erf_f16le :: proc "contextless" (x: f16le) -> f16le { return f16le(erf_f64(f64(x))) }
+erf_f16be :: proc "contextless" (x: f16be) -> f16be { return f16be(erf_f64(f64(x))) }
+erf_f32   :: proc "contextless" (x: f32)   -> f32   { return f32(erf_f64(f64(x))) }
+erf_f32le :: proc "contextless" (x: f32le) -> f32le { return f32le(erf_f64(f64(x))) }
+erf_f32be :: proc "contextless" (x: f32be) -> f32be { return f32be(erf_f64(f64(x))) }
+
+erf_f64 :: proc "contextless" (x: f64) -> f64 {
+	erx :: 0h3FEB0AC160000000
+	// Coefficients for approximation to  erf in [0, 0.84375]
+	efx  :: 0h3FC06EBA8214DB69
+	efx8 :: 0h3FF06EBA8214DB69
+	pp0  :: 0h3FC06EBA8214DB68
+	pp1  :: 0hBFD4CD7D691CB913
+	pp2  :: 0hBF9D2A51DBD7194F
+	pp3  :: 0hBF77A291236668E4
+	pp4  :: 0hBEF8EAD6120016AC
+	qq1  :: 0h3FD97779CDDADC09
+	qq2  :: 0h3FB0A54C5536CEBA
+	qq3  :: 0h3F74D022C4D36B0F
+	qq4  :: 0h3F215DC9221C1A10
+	qq5  :: 0hBED09C4342A26120
+	// Coefficients for approximation to  erf  in [0.84375, 1.25]
+	pa0 :: 0hBF6359B8BEF77538
+	pa1 :: 0h3FDA8D00AD92B34D
+	pa2 :: 0hBFD7D240FBB8C3F1
+	pa3 :: 0h3FD45FCA805120E4
+	pa4 :: 0hBFBC63983D3E28EC
+	pa5 :: 0h3FA22A36599795EB
+	pa6 :: 0hBF61BF380A96073F
+	qa1 :: 0h3FBB3E6618EEE323
+	qa2 :: 0h3FE14AF092EB6F33
+	qa3 :: 0h3FB2635CD99FE9A7
+	qa4 :: 0h3FC02660E763351F
+	qa5 :: 0h3F8BEDC26B51DD1C
+	qa6 :: 0h3F888B545735151D
+	// Coefficients for approximation to  erfc in [1.25, 1/0.35]
+	ra0 :: 0hBF843412600D6435
+	ra1 :: 0hBFE63416E4BA7360
+	ra2 :: 0hC0251E0441B0E726
+	ra3 :: 0hC04F300AE4CBA38D
+	ra4 :: 0hC0644CB184282266
+	ra5 :: 0hC067135CEBCCABB2
+	ra6 :: 0hC054526557E4D2F2
+	ra7 :: 0hC023A0EFC69AC25C
+	sa1 :: 0h4033A6B9BD707687
+	sa2 :: 0h4061350C526AE721
+	sa3 :: 0h407B290DD58A1A71
+	sa4 :: 0h40842B1921EC2868
+	sa5 :: 0h407AD02157700314
+	sa6 :: 0h405B28A3EE48AE2C
+	sa7 :: 0h401A47EF8E484A93
+	sa8 :: 0hBFAEEFF2EE749A62
+	// Coefficients for approximation to  erfc in [1/.35, 28]
+	rb0 :: 0hBF84341239E86F4A
+	rb1 :: 0hBFE993BA70C285DE
+	rb2 :: 0hC031C209555F995A
+	rb3 :: 0hC064145D43C5ED98
+	rb4 :: 0hC083EC881375F228
+	rb5 :: 0hC09004616A2E5992
+	rb6 :: 0hC07E384E9BDC383F
+	sb1 :: 0h403E568B261D5190
+	sb2 :: 0h40745CAE221B9F0A
+	sb3 :: 0h409802EB189D5118
+	sb4 :: 0h40A8FFB7688C246A
+	sb5 :: 0h40A3F219CEDF3BE6
+	sb6 :: 0h407DA874E79FE763
+	sb7 :: 0hC03670E242712D62
+	
+	
+	VERY_TINY :: 0h0080000000000000
+	SMALL     :: 1.0 / (1 << 28)        // 2**-28
+
+	// special cases
+	switch {
+	case is_nan(x):
+		return nan_f64()
+	case is_inf(x, 1):
+		return 1
+	case is_inf(x, -1):
+		return -1
+	}
+	x := x
+	sign := false
+	if x < 0 {
+		x = -x
+		sign = true
+	}
+	if x < 0.84375 { // |x| < 0.84375
+		temp: f64
+		if x < SMALL { // |x| < 2**-28
+			if x < VERY_TINY {
+				temp = 0.125 * (8.0*x + efx8*x) // avoid underflow
+			} else {
+				temp = x + efx*x
+			}
+		} else {
+			z := x * x
+			r := pp0 + z*(pp1+z*(pp2+z*(pp3+z*pp4)))
+			s := 1 + z*(qq1+z*(qq2+z*(qq3+z*(qq4+z*qq5))))
+			y := r / s
+			temp = x + x*y
+		}
+		if sign {
+			return -temp
+		}
+		return temp
+	}
+	if x < 1.25 { // 0.84375 <= |x| < 1.25
+		s := x - 1
+		P := pa0 + s*(pa1+s*(pa2+s*(pa3+s*(pa4+s*(pa5+s*pa6)))))
+		Q := 1 + s*(qa1+s*(qa2+s*(qa3+s*(qa4+s*(qa5+s*qa6)))))
+		if sign {
+			return -erx - P/Q
+		}
+		return erx + P/Q
+	}
+	if x >= 6 { // inf > |x| >= 6
+		if sign {
+			return -1
+		}
+		return 1
+	}
+	s := 1 / (x * x)
+	R, S: f64
+	if x < 1/0.35 { // |x| < 1 / 0.35  ~ 2.857143
+		R = ra0 + s*(ra1+s*(ra2+s*(ra3+s*(ra4+s*(ra5+s*(ra6+s*ra7))))))
+		S = 1 + s*(sa1+s*(sa2+s*(sa3+s*(sa4+s*(sa5+s*(sa6+s*(sa7+s*sa8)))))))
+	} else { // |x| >= 1 / 0.35  ~ 2.857143
+		R = rb0 + s*(rb1+s*(rb2+s*(rb3+s*(rb4+s*(rb5+s*rb6)))))
+		S = 1 + s*(sb1+s*(sb2+s*(sb3+s*(sb4+s*(sb5+s*(sb6+s*sb7))))))
+	}
+	z := transmute(f64)(0xffffffff00000000 & transmute(u64)x) // pseudo-single (20-bit) precision x
+	r := exp(-z*z-0.5625) * exp((z-x)*(z+x)+R/S)
+	if sign {
+		return r/x - 1
+	}
+	return 1 - r/x
+}
+
+
+erfc :: proc{
+	erfc_f16,
+	erfc_f16le,
+	erfc_f16be,
+	erfc_f32,
+	erfc_f32le,
+	erfc_f32be,
+	erfc_f64,
+}
+
+erfc_f16   :: proc "contextless" (x: f16)   -> f16   { return f16(erfc_f64(f64(x))) }
+erfc_f16le :: proc "contextless" (x: f16le) -> f16le { return f16le(erfc_f64(f64(x))) }
+erfc_f16be :: proc "contextless" (x: f16be) -> f16be { return f16be(erfc_f64(f64(x))) }
+erfc_f32   :: proc "contextless" (x: f32)   -> f32   { return f32(erfc_f64(f64(x))) }
+erfc_f32le :: proc "contextless" (x: f32le) -> f32le { return f32le(erfc_f64(f64(x))) }
+erfc_f32be :: proc "contextless" (x: f32be) -> f32be { return f32be(erfc_f64(f64(x))) }
+
+erfc_f64 :: proc "contextless" (x: f64) -> f64 {
+	erx :: 0h3FEB0AC160000000
+	// Coefficients for approximation to  erf in [0, 0.84375]
+	efx  :: 0h3FC06EBA8214DB69
+	efx8 :: 0h3FF06EBA8214DB69
+	pp0  :: 0h3FC06EBA8214DB68
+	pp1  :: 0hBFD4CD7D691CB913
+	pp2  :: 0hBF9D2A51DBD7194F
+	pp3  :: 0hBF77A291236668E4
+	pp4  :: 0hBEF8EAD6120016AC
+	qq1  :: 0h3FD97779CDDADC09
+	qq2  :: 0h3FB0A54C5536CEBA
+	qq3  :: 0h3F74D022C4D36B0F
+	qq4  :: 0h3F215DC9221C1A10
+	qq5  :: 0hBED09C4342A26120
+	// Coefficients for approximation to  erf  in [0.84375, 1.25]
+	pa0 :: 0hBF6359B8BEF77538
+	pa1 :: 0h3FDA8D00AD92B34D
+	pa2 :: 0hBFD7D240FBB8C3F1
+	pa3 :: 0h3FD45FCA805120E4
+	pa4 :: 0hBFBC63983D3E28EC
+	pa5 :: 0h3FA22A36599795EB
+	pa6 :: 0hBF61BF380A96073F
+	qa1 :: 0h3FBB3E6618EEE323
+	qa2 :: 0h3FE14AF092EB6F33
+	qa3 :: 0h3FB2635CD99FE9A7
+	qa4 :: 0h3FC02660E763351F
+	qa5 :: 0h3F8BEDC26B51DD1C
+	qa6 :: 0h3F888B545735151D
+	// Coefficients for approximation to  erfc in [1.25, 1/0.35]
+	ra0 :: 0hBF843412600D6435
+	ra1 :: 0hBFE63416E4BA7360
+	ra2 :: 0hC0251E0441B0E726
+	ra3 :: 0hC04F300AE4CBA38D
+	ra4 :: 0hC0644CB184282266
+	ra5 :: 0hC067135CEBCCABB2
+	ra6 :: 0hC054526557E4D2F2
+	ra7 :: 0hC023A0EFC69AC25C
+	sa1 :: 0h4033A6B9BD707687
+	sa2 :: 0h4061350C526AE721
+	sa3 :: 0h407B290DD58A1A71
+	sa4 :: 0h40842B1921EC2868
+	sa5 :: 0h407AD02157700314
+	sa6 :: 0h405B28A3EE48AE2C
+	sa7 :: 0h401A47EF8E484A93
+	sa8 :: 0hBFAEEFF2EE749A62
+	// Coefficients for approximation to  erfc in [1/.35, 28]
+	rb0 :: 0hBF84341239E86F4A
+	rb1 :: 0hBFE993BA70C285DE
+	rb2 :: 0hC031C209555F995A
+	rb3 :: 0hC064145D43C5ED98
+	rb4 :: 0hC083EC881375F228
+	rb5 :: 0hC09004616A2E5992
+	rb6 :: 0hC07E384E9BDC383F
+	sb1 :: 0h403E568B261D5190
+	sb2 :: 0h40745CAE221B9F0A
+	sb3 :: 0h409802EB189D5118
+	sb4 :: 0h40A8FFB7688C246A
+	sb5 :: 0h40A3F219CEDF3BE6
+	sb6 :: 0h407DA874E79FE763
+	sb7 :: 0hC03670E242712D62
+	
+	TINY :: 1.0 / (1 << 56) // 2**-56
+	// special cases
+	switch {
+	case is_nan(x):
+		return nan_f64()
+	case is_inf(x, 1):
+		return 0
+	case is_inf(x, -1):
+		return 2
+	}
+	x := x
+	sign := false
+	if x < 0 {
+		x = -x
+		sign = true
+	}
+	if x < 0.84375 { // |x| < 0.84375
+		temp: f64
+		if x < TINY { // |x| < 2**-56
+			temp = x
+		} else {
+			z := x * x
+			r := pp0 + z*(pp1+z*(pp2+z*(pp3+z*pp4)))
+			s := 1 + z*(qq1+z*(qq2+z*(qq3+z*(qq4+z*qq5))))
+			y := r / s
+			if x < 0.25 { // |x| < 1/4
+				temp = x + x*y
+			} else {
+				temp = 0.5 + (x*y + (x - 0.5))
+			}
+		}
+		if sign {
+			return 1 + temp
+		}
+		return 1 - temp
+	}
+	if x < 1.25 { // 0.84375 <= |x| < 1.25
+		s := x - 1
+		P := pa0 + s*(pa1+s*(pa2+s*(pa3+s*(pa4+s*(pa5+s*pa6)))))
+		Q := 1 + s*(qa1+s*(qa2+s*(qa3+s*(qa4+s*(qa5+s*qa6)))))
+		if sign {
+			return 1 + erx + P/Q
+		}
+		return 1 - erx - P/Q
+
+	}
+	if x < 28 { // |x| < 28
+		s := 1 / (x * x)
+		R, S: f64
+		if x < 1/0.35 { // |x| < 1 / 0.35 ~ 2.857143
+			R = ra0 + s*(ra1+s*(ra2+s*(ra3+s*(ra4+s*(ra5+s*(ra6+s*ra7))))))
+			S = 1 + s*(sa1+s*(sa2+s*(sa3+s*(sa4+s*(sa5+s*(sa6+s*(sa7+s*sa8)))))))
+		} else { // |x| >= 1 / 0.35 ~ 2.857143
+			if sign && x > 6 {
+				return 2 // x < -6
+			}
+			R = rb0 + s*(rb1+s*(rb2+s*(rb3+s*(rb4+s*(rb5+s*rb6)))))
+			S = 1 + s*(sb1+s*(sb2+s*(sb3+s*(sb4+s*(sb5+s*(sb6+s*sb7))))))
+		}
+		z := transmute(f64)(0xffffffff00000000 & transmute(u64)x) // pseudo-single (20-bit) precision x
+		r := exp(-z*z-0.5625) * exp((z-x)*(z+x)+R/S)
+		if sign {
+			return 2 - r/x
+		}
+		return r / x
+	}
+	if sign {
+		return 2
+	}
+	return 0
+}

+ 198 - 0
core/math/math_log1p.odin

@@ -0,0 +1,198 @@
+package math
+
+// The original C code, the long comment, and the constants
+// below are from FreeBSD's /usr/src/lib/msun/src/s_log1p.c
+// and came with this notice. The go code is a simplified
+// version of the original C.
+//
+// ====================================================
+// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+//
+// Developed at SunPro, a Sun Microsystems, Inc. business.
+// Permission to use, copy, modify, and distribute this
+// software is freely granted, provided that this notice
+// is preserved.
+// ====================================================
+//
+//
+// double log1p(double x)
+//
+// Method :
+//   1. Argument Reduction: find k and f such that
+//                      1+x = 2**k * (1+f),
+//         where  sqrt(2)/2 < 1+f < sqrt(2) .
+//
+//      Note. If k=0, then f=x is exact. However, if k!=0, then f
+//      may not be representable exactly. In that case, a correction
+//      term is need. Let u=1+x rounded. Let c = (1+x)-u, then
+//      log(1+x) - log(u) ~ c/u. Thus, we proceed to compute log(u),
+//      and add back the correction term c/u.
+//      (Note: when x > 2**53, one can simply return log(x))
+//
+//   2. Approximation of log1p(f).
+//      Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s)
+//               = 2s + 2/3 s**3 + 2/5 s**5 + .....,
+//               = 2s + s*R
+//      We use a special Reme algorithm on [0,0.1716] to generate
+//      a polynomial of degree 14 to approximate R The maximum error
+//      of this polynomial approximation is bounded by 2**-58.45. In
+//      other words,
+//                      2      4      6      8      10      12      14
+//          R(z) ~ Lp1*s +Lp2*s +Lp3*s +Lp4*s +Lp5*s  +Lp6*s  +Lp7*s
+//      (the values of Lp1 to Lp7 are listed in the program)
+//      and
+//          |      2          14          |     -58.45
+//          | Lp1*s +...+Lp7*s    -  R(z) | <= 2
+//          |                             |
+//      Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2.
+//      In order to guarantee error in log below 1ulp, we compute log
+//      by
+//              log1p(f) = f - (hfsq - s*(hfsq+R)).
+//
+//   3. Finally, log1p(x) = k*ln2 + log1p(f).
+//                        = k*ln2_hi+(f-(hfsq-(s*(hfsq+R)+k*ln2_lo)))
+//      Here ln2 is split into two floating point number:
+//                   ln2_hi + ln2_lo,
+//      where n*ln2_hi is always exact for |n| < 2000.
+//
+// Special cases:
+//      log1p(x) is NaN with signal if x < -1 (including -INF) ;
+//      log1p(+INF) is +INF; log1p(-1) is -INF with signal;
+//      log1p(NaN) is that NaN with no signal.
+//
+// Accuracy:
+//      according to an error analysis, the error is always less than
+//      1 ulp (unit in the last place).
+//
+// Constants:
+// The hexadecimal values are the intended ones for the following
+// constants. The decimal values may be used, provided that the
+// compiler will convert from decimal to binary accurately enough
+// to produce the hexadecimal values shown.
+//
+// Note: Assuming log() return accurate answer, the following
+//       algorithm can be used to compute log1p(x) to within a few ULP:
+//
+//              u = 1+x;
+//              if(u==1.0) return x ; else
+//                         return log(u)*(x/(u-1.0));
+//
+//       See HP-15C Advanced Functions Handbook, p.193.
+
+log1p :: proc {
+	log1p_f16,
+	log1p_f32,
+	log1p_f64,
+	log1p_f16le,
+	log1p_f16be,
+	log1p_f32le,
+	log1p_f32be,
+	log1p_f64le,
+	log1p_f64be,
+}
+log1p_f16   :: proc "contextless" (x: f16)   -> f16   { return f16(log1p_f64(f64(x))) }
+log1p_f32   :: proc "contextless" (x: f32)   -> f32   { return f32(log1p_f64(f64(x))) }
+log1p_f16le :: proc "contextless" (x: f16le) -> f16le { return f16le(log1p_f64(f64(x))) }
+log1p_f16be :: proc "contextless" (x: f16be) -> f16be { return f16be(log1p_f64(f64(x))) }
+log1p_f32le :: proc "contextless" (x: f32le) -> f32le { return f32le(log1p_f64(f64(x))) }
+log1p_f32be :: proc "contextless" (x: f32be) -> f32be { return f32be(log1p_f64(f64(x))) }
+log1p_f64le :: proc "contextless" (x: f64le) -> f64le { return f64le(log1p_f64(f64(x))) }
+log1p_f64be :: proc "contextless" (x: f64be) -> f64be { return f64be(log1p_f64(f64(x))) }
+
+log1p_f64 :: proc "contextless" (x: f64) -> f64 {
+	SQRT2_M1      :: 0h3fda827999fcef34 // Sqrt(2)-1 
+	SQRT2_HALF_M1 :: 0hbfd2bec333018866 // Sqrt(2)/2-1
+	SMALL         :: 0h3e20000000000000 // 2**-29
+	TINY          :: 1.0 / (1 << 54)    // 2**-54
+	TWO53         :: 1 << 53            // 2**53
+	LN2HI         :: 0h3fe62e42fee00000
+	LN2LO         :: 0h3dea39ef35793c76
+	LP1           :: 0h3FE5555555555593
+	LP2           :: 0h3FD999999997FA04
+	LP3           :: 0h3FD2492494229359
+	LP4           :: 0h3FCC71C51D8E78AF
+	LP5           :: 0h3FC7466496CB03DE
+	LP6           :: 0h3FC39A09D078C69F
+	LP7           :: 0h3FC2F112DF3E5244
+	
+	switch {
+	case x < -1 || is_nan(x):
+		return nan_f64()
+	case x == -1:
+		return inf_f64(-1)
+	case is_inf(x, 1):
+		return inf_f64(+1)
+	}
+	absx := abs(x)
+	
+	f: f64
+	iu: u64
+	k := 1
+	if absx < SQRT2_M1 { //  |x| < Sqrt(2)-1
+		if absx < SMALL { // |x| < 2**-29
+			if absx < TINY { // |x| < 2**-54
+				return x
+			}
+			return x - x*x*0.5
+		}
+		if x > SQRT2_HALF_M1 { // Sqrt(2)/2-1 < x
+			// (Sqrt(2)/2-1) < x < (Sqrt(2)-1)
+			k = 0
+			f = x
+			iu = 1
+		}
+	}
+	c: f64
+	if k != 0 {
+		u: f64
+		if absx < TWO53 { // 1<<53
+			u = 1.0 + x
+			iu = transmute(u64)u
+			k = int((iu >> 52) - 1023)
+			// correction term
+			if k > 0 {
+				c = 1.0 - (u - x)
+			} else {
+				c = x - (u - 1.0)
+			}
+			c /= u
+		} else {
+			u = x
+			iu = transmute(u64)u
+			k = int((iu >> 52) - 1023)
+			c = 0
+		}
+		iu &= 0x000fffffffffffff
+		if iu < 0x0006a09e667f3bcd { // mantissa of Sqrt(2)
+			u = transmute(f64)(iu | 0x3ff0000000000000) // normalize u
+		} else {
+			k += 1
+			u = transmute(f64)(iu | 0x3fe0000000000000) // normalize u/2
+			iu = (0x0010000000000000 - iu) >> 2
+		}
+		f = u - 1.0 // Sqrt(2)/2 < u < Sqrt(2)
+	}
+	hfsq := 0.5 * f * f
+	s, R, z: f64
+	if iu == 0 { // |f| < 2**-20
+		if f == 0 {
+			if k == 0 {
+				return 0
+			}
+			c += f64(k) * LN2LO
+			return f64(k)*LN2HI + c
+		}
+		R = hfsq * (1.0 - 0.66666666666666666*f) // avoid division
+		if k == 0 {
+			return f - R
+		}
+		return f64(k)*LN2HI - ((R - (f64(k)*LN2LO + c)) - f)
+	}
+	s = f / (2.0 + f)
+	z = s * s
+	R = z * (LP1 + z*(LP2+z*(LP3+z*(LP4+z*(LP5+z*(LP6+z*LP7))))))
+	if k == 0 {
+		return f - (hfsq - s*(hfsq+R))
+	}
+	return f64(k)*LN2HI - ((hfsq - (s*(hfsq+R) + (f64(k)*LN2LO + c))) - f)
+}