2 tahun lalu · 0180a4fcd4
--- a/core/math/math_sincos.odin
+++ b/core/math/math_sincos.odin
@@ -0,0 +1,308 @@
 
															+package math
														
 
															+
														
 
															+import "core:math/bits"
														
 
															+
														
 
															+// The original C code, the long comment, and the constants
														
 
															+// below were from http://netlib.sandia.gov/cephes/cmath/sin.c,
														
 
															+// available from http://www.netlib.org/cephes/cmath.tgz.
														
 
															+// The go code is a simplified version of the original C.
														
 
															+//
														
 
															+//      sin.c
														
 
															+//
														
 
															+//      Circular sine
														
 
															+//
														
 
															+// SYNOPSIS:
														
 
															+//
														
 
															+// double x, y, sin();
														
 
															+// y = sin( x );
														
 
															+//
														
 
															+// DESCRIPTION:
														
 
															+//
														
 
															+// Range reduction is into intervals of pi/4.  The reduction error is nearly
														
 
															+// eliminated by contriving an extended precision modular arithmetic.
														
 
															+//
														
 
															+// Two polynomial approximating functions are employed.
														
 
															+// Between 0 and pi/4 the sine is approximated by
														
 
															+//      x  +  x**3 P(x**2).
														
 
															+// Between pi/4 and pi/2 the cosine is represented as
														
 
															+//      1  -  x**2 Q(x**2).
														
 
															+//
														
 
															+// ACCURACY:
														
 
															+//
														
 
															+//                      Relative error:
														
 
															+// arithmetic   domain      # trials      peak         rms
														
 
															+//    DEC       0, 10       150000       3.0e-17     7.8e-18
														
 
															+//    IEEE -1.07e9,+1.07e9  130000       2.1e-16     5.4e-17
														
 
															+//
														
 
															+// Partial loss of accuracy begins to occur at x = 2**30 = 1.074e9.  The loss
														
 
															+// is not gradual, but jumps suddenly to about 1 part in 10e7.  Results may
														
 
															+// be meaningless for x > 2**49 = 5.6e14.
														
 
															+//
														
 
															+//      cos.c
														
 
															+//
														
 
															+//      Circular cosine
														
 
															+//
														
 
															+// SYNOPSIS:
														
 
															+//
														
 
															+// double x, y, cos();
														
 
															+// y = cos( x );
														
 
															+//
														
 
															+// DESCRIPTION:
														
 
															+//
														
 
															+// Range reduction is into intervals of pi/4.  The reduction error is nearly
														
 
															+// eliminated by contriving an extended precision modular arithmetic.
														
 
															+//
														
 
															+// Two polynomial approximating functions are employed.
														
 
															+// Between 0 and pi/4 the cosine is approximated by
														
 
															+//      1  -  x**2 Q(x**2).
														
 
															+// Between pi/4 and pi/2 the sine is represented as
														
 
															+//      x  +  x**3 P(x**2).
														
 
															+//
														
 
															+// ACCURACY:
														
 
															+//
														
 
															+//                      Relative error:
														
 
															+// arithmetic   domain      # trials      peak         rms
														
 
															+//    IEEE -1.07e9,+1.07e9  130000       2.1e-16     5.4e-17
														
 
															+//    DEC        0,+1.07e9   17000       3.0e-17     7.2e-18
														
 
															+//
														
 
															+// Cephes Math Library Release 2.8:  June, 2000
														
 
															+// Copyright 1984, 1987, 1989, 1992, 2000 by Stephen L. Moshier
														
 
															+//
														
 
															+// The readme file at http://netlib.sandia.gov/cephes/ says:
														
 
															+//    Some software in this archive may be from the book _Methods and
														
 
															+// Programs for Mathematical Functions_ (Prentice-Hall or Simon & Schuster
														
 
															+// International, 1989) or from the Cephes Mathematical Library, a
														
 
															+// commercial product. In either event, it is copyrighted by the author.
														
 
															+// What you see here may be used freely but it comes with no support or
														
 
															+// guarantee.
														
 
															+//
														
 
															+//   The two known misprints in the book are repaired here in the
														
 
															+// source listings for the gamma function and the incomplete beta
														
 
															+// integral.
														
 
															+//
														
 
															+//   Stephen L. Moshier
														
 
															+//   [email protected]
														
 
															+
														
 
															+sincos :: proc{
														
 
															+	sincos_f16, sincos_f16le, sincos_f16be,
														
 
															+	sincos_f32, sincos_f32le, sincos_f32be,
														
 
															+	sincos_f64, sincos_f64le, sincos_f64be,
														
 
															+}
														
 
															+
														
 
															+sincos_f16 :: proc "contextless" (x: f16) -> (sin, cos: f16) #no_bounds_check {
														
 
															+	s, c := sincos_f64(f64(x))
														
 
															+	return f16(s), f16(c)
														
 
															+}
														
 
															+sincos_f16le :: proc "contextless" (x: f16le) -> (sin, cos: f16le) #no_bounds_check {
														
 
															+	s, c := sincos_f64(f64(x))
														
 
															+	return f16le(s), f16le(c)
														
 
															+}
														
 
															+sincos_f16be :: proc "contextless" (x: f16be) -> (sin, cos: f16be) #no_bounds_check {
														
 
															+	s, c := sincos_f64(f64(x))
														
 
															+	return f16be(s), f16be(c)
														
 
															+}
														
 
															+
														
 
															+sincos_f32 :: proc "contextless" (x: f32) -> (sin, cos: f32) #no_bounds_check {
														
 
															+	s, c := sincos_f64(f64(x))
														
 
															+	return f32(s), f32(c)
														
 
															+}
														
 
															+sincos_f32le :: proc "contextless" (x: f32le) -> (sin, cos: f32le) #no_bounds_check {
														
 
															+	s, c := sincos_f64(f64(x))
														
 
															+	return f32le(s), f32le(c)
														
 
															+}
														
 
															+sincos_f32be :: proc "contextless" (x: f32be) -> (sin, cos: f32be) #no_bounds_check {
														
 
															+	s, c := sincos_f64(f64(x))
														
 
															+	return f32be(s), f32be(c)
														
 
															+}
														
 
															+
														
 
															+sincos_f64le :: proc "contextless" (x: f64le) -> (sin, cos: f64le) #no_bounds_check {
														
 
															+	s, c := sincos_f64(f64(x))
														
 
															+	return f64le(s), f64le(c)
														
 
															+}
														
 
															+sincos_f64be :: proc "contextless" (x: f64be) -> (sin, cos: f64be) #no_bounds_check {
														
 
															+	s, c := sincos_f64(f64(x))
														
 
															+	return f64be(s), f64be(c)
														
 
															+}
														
 
															+
														
 
															+sincos_f64 :: proc "contextless" (x: f64) -> (sin, cos: f64) #no_bounds_check {
														
 
															+	x := x
														
 
															+
														
 
															+	PI4A :: 0h3fe921fb40000000 // 7.85398125648498535156e-1  PI/4 split into three parts
														
 
															+	PI4B :: 0h3e64442d00000000 // 3.77489470793079817668e-8
														
 
															+	PI4C :: 0h3ce8469898cc5170 // 2.69515142907905952645e-15
														
 
															+
														
 
															+	// special cases
														
 
															+	switch {
														
 
															+	case x == 0:
														
 
															+		return x, 1 // return ±0.0, 1.0
														
 
															+	case is_nan(x) || is_inf(x, 0):
														
 
															+		return nan_f64(), nan_f64()
														
 
															+	}
														
 
															+
														
 
															+	// make argument positive
														
 
															+	sin_sign, cos_sign := false, false
														
 
															+	if x < 0 {
														
 
															+		x = -x
														
 
															+		sin_sign = true
														
 
															+	}
														
 
															+
														
 
															+	j: u64
														
 
															+	y, z: f64
														
 
															+	if x >= REDUCE_THRESHOLD {
														
 
															+		j, z = _trig_reduce_f64(x)
														
 
															+	} else {
														
 
															+		j = u64(x * (4 / PI)) // integer part of x/(PI/4), as integer for tests on the phase angle
														
 
															+		y = f64(j)           // integer part of x/(PI/4), as float
														
 
															+
														
 
															+		if j&1 == 1 { // map zeros to origin
														
 
															+			j += 1
														
 
															+			y += 1
														
 
															+		}
														
 
															+		j &= 7                               // octant modulo TAU radians (360 degrees)
														
 
															+		z = ((x - y*PI4A) - y*PI4B) - y*PI4C // Extended precision modular arithmetic
														
 
															+	}
														
 
															+	if j > 3 { // reflect in x axis
														
 
															+		j -= 4
														
 
															+		sin_sign, cos_sign = !sin_sign, !cos_sign
														
 
															+	}
														
 
															+	if j > 1 {
														
 
															+		cos_sign = !cos_sign
														
 
															+	}
														
 
															+
														
 
															+	zz := z * z
														
 
															+
														
 
															+	cos = 1.0 - 0.5*zz + zz*zz*((((((_cos[0]*zz)+_cos[1])*zz+_cos[2])*zz+_cos[3])*zz+_cos[4])*zz+_cos[5])
														
 
															+	sin = z + z*zz*((((((_sin[0]*zz)+_sin[1])*zz+_sin[2])*zz+_sin[3])*zz+_sin[4])*zz+_sin[5])
														
 
															+
														
 
															+	if j == 1 || j == 2 {
														
 
															+		sin, cos = cos, sin
														
 
															+	}
														
 
															+	if cos_sign {
														
 
															+		cos = -cos
														
 
															+	}
														
 
															+	if sin_sign {
														
 
															+		sin = -sin
														
 
															+	}
														
 
															+	return
														
 
															+}
														
 
															+
														
 
															+// sin coefficients
														
 
															+@(private="file")
														
 
															+_sin := [?]f64{
														
 
															+	 0h3de5d8fd1fd19ccd, //  1.58962301576546568060e-10
														
 
															+	 0hbe5ae5e5a9291f5d, // -2.50507477628578072866e-8
														
 
															+	 0h3ec71de3567d48a1, //  2.75573136213857245213e-6
														
 
															+	 0hbf2a01a019bfdf03, // -1.98412698295895385996e-4
														
 
															+	 0h3f8111111110f7d0, //  8.33333333332211858878e-3
														
 
															+	 0hbfc5555555555548, // -1.66666666666666307295e-1
														
 
															+}
														
 
															+
														
 
															+// cos coefficients
														
 
															+@(private="file")
														
 
															+_cos := [?]f64{
														
 
															+	0hbda8fa49a0861a9b, // -1.13585365213876817300e-11,
														
 
															+	0h3e21ee9d7b4e3f05, //  2.08757008419747316778e-9,
														
 
															+	0hbe927e4f7eac4bc6, // -2.75573141792967388112e-7,
														
 
															+	0h3efa01a019c844f5, //  2.48015872888517045348e-5,
														
 
															+	0hbf56c16c16c14f91, // -1.38888888888730564116e-3,
														
 
															+	0h3fa555555555554b, //  4.16666666666665929218e-2,
														
 
															+}
														
 
															+
														
 
															+// REDUCE_THRESHOLD is the maximum value of x where the reduction using Pi/4
														
 
															+// in 3 f64 parts still gives accurate results. This threshold
														
 
															+// is set by y*C being representable as a f64 without error
														
 
															+// where y is given by y = floor(x * (4 / Pi)) and C is the leading partial
														
 
															+// terms of 4/Pi. Since the leading terms (PI4A and PI4B in sin.go) have 30
														
 
															+// and 32 trailing zero bits, y should have less than 30 significant bits.
														
 
															+//
														
 
															+//	y < 1<<30  -> floor(x*4/Pi) < 1<<30 -> x < (1<<30 - 1) * Pi/4
														
 
															+//
														
 
															+// So, conservatively we can take x < 1<<29.
														
 
															+// Above this threshold Payne-Hanek range reduction must be used.
														
 
															+@(private="file")
														
 
															+REDUCE_THRESHOLD :: 1 << 29
														
 
															+
														
 
															+// _trig_reduce_f64 implements Payne-Hanek range reduction by Pi/4
														
 
															+// for x > 0. It returns the integer part mod 8 (j) and
														
 
															+// the fractional part (z) of x / (Pi/4).
														
 
															+// The implementation is based on:
														
 
															+// "ARGUMENT REDUCTION FOR HUGE ARGUMENTS: Good to the Last Bit"
														
 
															+// K. C. Ng et al, March 24, 1992
														
 
															+// The simulated multi-precision calculation of x*B uses 64-bit integer arithmetic.
														
 
															+_trig_reduce_f64 :: proc "contextless" (x: f64) -> (j: u64, z: f64) #no_bounds_check {
														
 
															+	// bd_pi4 is the binary digits of 4/pi as a u64 array,
														
 
															+	// that is, 4/pi = Sum bd_pi4[i]*2^(-64*i)
														
 
															+	// 19 64-bit digits and the leading one bit give 1217 bits
														
 
															+	// of precision to handle the largest possible f64 exponent.
														
 
															+	@static bd_pi4 := [?]u64{
														
 
															+		0x0000000000000001,
														
 
															+		0x45f306dc9c882a53,
														
 
															+		0xf84eafa3ea69bb81,
														
 
															+		0xb6c52b3278872083,
														
 
															+		0xfca2c757bd778ac3,
														
 
															+		0x6e48dc74849ba5c0,
														
 
															+		0x0c925dd413a32439,
														
 
															+		0xfc3bd63962534e7d,
														
 
															+		0xd1046bea5d768909,
														
 
															+		0xd338e04d68befc82,
														
 
															+		0x7323ac7306a673e9,
														
 
															+		0x3908bf177bf25076,
														
 
															+		0x3ff12fffbc0b301f,
														
 
															+		0xde5e2316b414da3e,
														
 
															+		0xda6cfd9e4f96136e,
														
 
															+		0x9e8c7ecd3cbfd45a,
														
 
															+		0xea4f758fd7cbe2f6,
														
 
															+		0x7a0e73ef14a525d4,
														
 
															+		0xd7f6bf623f1aba10,
														
 
															+		0xac06608df8f6d757,
														
 
															+	}
														
 
															+
														
 
															+	PI4 :: PI / 4
														
 
															+	if x < PI4 {
														
 
															+		return 0, x
														
 
															+	}
														
 
															+
														
 
															+	MASK  :: 0x7FF
														
 
															+	SHIFT :: 64 - 11 - 1
														
 
															+	BIAS  :: 1023
														
 
															+
														
 
															+	// Extract out the integer and exponent such that,
														
 
															+	// x = ix * 2 ** exp.
														
 
															+	ix := transmute(u64)x
														
 
															+	exp := int(ix>>SHIFT&MASK) - BIAS - SHIFT
														
 
															+	ix &~= MASK << SHIFT
														
 
															+	ix |= 1 << SHIFT
														
 
															+	// Use the exponent to extract the 3 appropriate u64 digits from bd_pi4,
														
 
															+	// B ~ (z0, z1, z2), such that the product leading digit has the exponent -61.
														
 
															+	// Note, exp >= -53 since x >= PI4 and exp < 971 for maximum f64.
														
 
															+	digit, bitshift := uint(exp+61)/64, uint(exp+61)%64
														
 
															+	z0 := (bd_pi4[digit] << bitshift) | (bd_pi4[digit+1] >> (64 - bitshift))
														
 
															+	z1 := (bd_pi4[digit+1] << bitshift) | (bd_pi4[digit+2] >> (64 - bitshift))
														
 
															+	z2 := (bd_pi4[digit+2] << bitshift) | (bd_pi4[digit+3] >> (64 - bitshift))
														
 
															+	// Multiply mantissa by the digits and extract the upper two digits (hi, lo).
														
 
															+	z2hi, _ := bits.mul(z2, ix)
														
 
															+	z1hi, z1lo := bits.mul(z1, ix)
														
 
															+	z0lo := z0 * ix
														
 
															+	lo, c := bits.add(z1lo, z2hi, 0)
														
 
															+	hi, _ := bits.add(z0lo, z1hi, c)
														
 
															+	// The top 3 bits are j.
														
 
															+	j = hi >> 61
														
 
															+	// Extract the fraction and find its magnitude.
														
 
															+	hi = hi<<3 | lo>>61
														
 
															+	lz := uint(bits.leading_zeros(hi))
														
 
															+	e := u64(BIAS - (lz + 1))
														
 
															+	// Clear implicit mantissa bit and shift into place.
														
 
															+	hi = (hi << (lz + 1)) | (lo >> (64 - (lz + 1)))
														
 
															+	hi >>= 64 - SHIFT
														
 
															+	// Include the exponent and convert to a float.
														
 
															+	hi |= e << SHIFT
														
 
															+	z = transmute(f64)hi
														
 
															+	// Map zeros to origin.
														
 
															+	if j&1 == 1 {
														
 
															+		j += 1
														
 
															+		j &= 7
														
 
															+		z -= 1
														
 
															+	}
														
 
															+	// Multiply the fractional part by pi/4.
														
 
															+	return j, z * PI4
														
 
															+}