math_sincos.odin 9.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308
  1. package math
  2. import "core:math/bits"
  3. // The original C code, the long comment, and the constants
  4. // below were from http://netlib.sandia.gov/cephes/cmath/sin.c,
  5. // available from http://www.netlib.org/cephes/cmath.tgz.
  6. // The go code is a simplified version of the original C.
  7. //
  8. // sin.c
  9. //
  10. // Circular sine
  11. //
  12. // SYNOPSIS:
  13. //
  14. // double x, y, sin();
  15. // y = sin( x );
  16. //
  17. // DESCRIPTION:
  18. //
  19. // Range reduction is into intervals of pi/4. The reduction error is nearly
  20. // eliminated by contriving an extended precision modular arithmetic.
  21. //
  22. // Two polynomial approximating functions are employed.
  23. // Between 0 and pi/4 the sine is approximated by
  24. // x + x**3 P(x**2).
  25. // Between pi/4 and pi/2 the cosine is represented as
  26. // 1 - x**2 Q(x**2).
  27. //
  28. // ACCURACY:
  29. //
  30. // Relative error:
  31. // arithmetic domain # trials peak rms
  32. // DEC 0, 10 150000 3.0e-17 7.8e-18
  33. // IEEE -1.07e9,+1.07e9 130000 2.1e-16 5.4e-17
  34. //
  35. // Partial loss of accuracy begins to occur at x = 2**30 = 1.074e9. The loss
  36. // is not gradual, but jumps suddenly to about 1 part in 10e7. Results may
  37. // be meaningless for x > 2**49 = 5.6e14.
  38. //
  39. // cos.c
  40. //
  41. // Circular cosine
  42. //
  43. // SYNOPSIS:
  44. //
  45. // double x, y, cos();
  46. // y = cos( x );
  47. //
  48. // DESCRIPTION:
  49. //
  50. // Range reduction is into intervals of pi/4. The reduction error is nearly
  51. // eliminated by contriving an extended precision modular arithmetic.
  52. //
  53. // Two polynomial approximating functions are employed.
  54. // Between 0 and pi/4 the cosine is approximated by
  55. // 1 - x**2 Q(x**2).
  56. // Between pi/4 and pi/2 the sine is represented as
  57. // x + x**3 P(x**2).
  58. //
  59. // ACCURACY:
  60. //
  61. // Relative error:
  62. // arithmetic domain # trials peak rms
  63. // IEEE -1.07e9,+1.07e9 130000 2.1e-16 5.4e-17
  64. // DEC 0,+1.07e9 17000 3.0e-17 7.2e-18
  65. //
  66. // Cephes Math Library Release 2.8: June, 2000
  67. // Copyright 1984, 1987, 1989, 1992, 2000 by Stephen L. Moshier
  68. //
  69. // The readme file at http://netlib.sandia.gov/cephes/ says:
  70. // Some software in this archive may be from the book _Methods and
  71. // Programs for Mathematical Functions_ (Prentice-Hall or Simon & Schuster
  72. // International, 1989) or from the Cephes Mathematical Library, a
  73. // commercial product. In either event, it is copyrighted by the author.
  74. // What you see here may be used freely but it comes with no support or
  75. // guarantee.
  76. //
  77. // The two known misprints in the book are repaired here in the
  78. // source listings for the gamma function and the incomplete beta
  79. // integral.
  80. //
  81. // Stephen L. Moshier
  82. // [email protected]
  83. sincos :: proc{
  84. sincos_f16, sincos_f16le, sincos_f16be,
  85. sincos_f32, sincos_f32le, sincos_f32be,
  86. sincos_f64, sincos_f64le, sincos_f64be,
  87. }
  88. sincos_f16 :: proc "contextless" (x: f16) -> (sin, cos: f16) #no_bounds_check {
  89. s, c := sincos_f64(f64(x))
  90. return f16(s), f16(c)
  91. }
  92. sincos_f16le :: proc "contextless" (x: f16le) -> (sin, cos: f16le) #no_bounds_check {
  93. s, c := sincos_f64(f64(x))
  94. return f16le(s), f16le(c)
  95. }
  96. sincos_f16be :: proc "contextless" (x: f16be) -> (sin, cos: f16be) #no_bounds_check {
  97. s, c := sincos_f64(f64(x))
  98. return f16be(s), f16be(c)
  99. }
  100. sincos_f32 :: proc "contextless" (x: f32) -> (sin, cos: f32) #no_bounds_check {
  101. s, c := sincos_f64(f64(x))
  102. return f32(s), f32(c)
  103. }
  104. sincos_f32le :: proc "contextless" (x: f32le) -> (sin, cos: f32le) #no_bounds_check {
  105. s, c := sincos_f64(f64(x))
  106. return f32le(s), f32le(c)
  107. }
  108. sincos_f32be :: proc "contextless" (x: f32be) -> (sin, cos: f32be) #no_bounds_check {
  109. s, c := sincos_f64(f64(x))
  110. return f32be(s), f32be(c)
  111. }
  112. sincos_f64le :: proc "contextless" (x: f64le) -> (sin, cos: f64le) #no_bounds_check {
  113. s, c := sincos_f64(f64(x))
  114. return f64le(s), f64le(c)
  115. }
  116. sincos_f64be :: proc "contextless" (x: f64be) -> (sin, cos: f64be) #no_bounds_check {
  117. s, c := sincos_f64(f64(x))
  118. return f64be(s), f64be(c)
  119. }
  120. sincos_f64 :: proc "contextless" (x: f64) -> (sin, cos: f64) #no_bounds_check {
  121. x := x
  122. PI4A :: 0h3fe921fb40000000 // 7.85398125648498535156e-1 PI/4 split into three parts
  123. PI4B :: 0h3e64442d00000000 // 3.77489470793079817668e-8
  124. PI4C :: 0h3ce8469898cc5170 // 2.69515142907905952645e-15
  125. // special cases
  126. switch {
  127. case x == 0:
  128. return x, 1 // return ±0.0, 1.0
  129. case is_nan(x) || is_inf(x, 0):
  130. return nan_f64(), nan_f64()
  131. }
  132. // make argument positive
  133. sin_sign, cos_sign := false, false
  134. if x < 0 {
  135. x = -x
  136. sin_sign = true
  137. }
  138. j: u64
  139. y, z: f64
  140. if x >= REDUCE_THRESHOLD {
  141. j, z = _trig_reduce_f64(x)
  142. } else {
  143. j = u64(x * (4 / PI)) // integer part of x/(PI/4), as integer for tests on the phase angle
  144. y = f64(j) // integer part of x/(PI/4), as float
  145. if j&1 == 1 { // map zeros to origin
  146. j += 1
  147. y += 1
  148. }
  149. j &= 7 // octant modulo TAU radians (360 degrees)
  150. z = ((x - y*PI4A) - y*PI4B) - y*PI4C // Extended precision modular arithmetic
  151. }
  152. if j > 3 { // reflect in x axis
  153. j -= 4
  154. sin_sign, cos_sign = !sin_sign, !cos_sign
  155. }
  156. if j > 1 {
  157. cos_sign = !cos_sign
  158. }
  159. zz := z * z
  160. cos = 1.0 - 0.5*zz + zz*zz*((((((_cos[0]*zz)+_cos[1])*zz+_cos[2])*zz+_cos[3])*zz+_cos[4])*zz+_cos[5])
  161. sin = z + z*zz*((((((_sin[0]*zz)+_sin[1])*zz+_sin[2])*zz+_sin[3])*zz+_sin[4])*zz+_sin[5])
  162. if j == 1 || j == 2 {
  163. sin, cos = cos, sin
  164. }
  165. if cos_sign {
  166. cos = -cos
  167. }
  168. if sin_sign {
  169. sin = -sin
  170. }
  171. return
  172. }
  173. // sin coefficients
  174. @(private="file")
  175. _sin := [?]f64{
  176. 0h3de5d8fd1fd19ccd, // 1.58962301576546568060e-10
  177. 0hbe5ae5e5a9291f5d, // -2.50507477628578072866e-8
  178. 0h3ec71de3567d48a1, // 2.75573136213857245213e-6
  179. 0hbf2a01a019bfdf03, // -1.98412698295895385996e-4
  180. 0h3f8111111110f7d0, // 8.33333333332211858878e-3
  181. 0hbfc5555555555548, // -1.66666666666666307295e-1
  182. }
  183. // cos coefficients
  184. @(private="file")
  185. _cos := [?]f64{
  186. 0hbda8fa49a0861a9b, // -1.13585365213876817300e-11,
  187. 0h3e21ee9d7b4e3f05, // 2.08757008419747316778e-9,
  188. 0hbe927e4f7eac4bc6, // -2.75573141792967388112e-7,
  189. 0h3efa01a019c844f5, // 2.48015872888517045348e-5,
  190. 0hbf56c16c16c14f91, // -1.38888888888730564116e-3,
  191. 0h3fa555555555554b, // 4.16666666666665929218e-2,
  192. }
  193. // REDUCE_THRESHOLD is the maximum value of x where the reduction using Pi/4
  194. // in 3 f64 parts still gives accurate results. This threshold
  195. // is set by y*C being representable as a f64 without error
  196. // where y is given by y = floor(x * (4 / Pi)) and C is the leading partial
  197. // terms of 4/Pi. Since the leading terms (PI4A and PI4B in sin.go) have 30
  198. // and 32 trailing zero bits, y should have less than 30 significant bits.
  199. //
  200. // y < 1<<30 -> floor(x*4/Pi) < 1<<30 -> x < (1<<30 - 1) * Pi/4
  201. //
  202. // So, conservatively we can take x < 1<<29.
  203. // Above this threshold Payne-Hanek range reduction must be used.
  204. @(private="file")
  205. REDUCE_THRESHOLD :: 1 << 29
  206. // _trig_reduce_f64 implements Payne-Hanek range reduction by Pi/4
  207. // for x > 0. It returns the integer part mod 8 (j) and
  208. // the fractional part (z) of x / (Pi/4).
  209. // The implementation is based on:
  210. // "ARGUMENT REDUCTION FOR HUGE ARGUMENTS: Good to the Last Bit"
  211. // K. C. Ng et al, March 24, 1992
  212. // The simulated multi-precision calculation of x*B uses 64-bit integer arithmetic.
  213. _trig_reduce_f64 :: proc "contextless" (x: f64) -> (j: u64, z: f64) #no_bounds_check {
  214. // bd_pi4 is the binary digits of 4/pi as a u64 array,
  215. // that is, 4/pi = Sum bd_pi4[i]*2^(-64*i)
  216. // 19 64-bit digits and the leading one bit give 1217 bits
  217. // of precision to handle the largest possible f64 exponent.
  218. @static bd_pi4 := [?]u64{
  219. 0x0000000000000001,
  220. 0x45f306dc9c882a53,
  221. 0xf84eafa3ea69bb81,
  222. 0xb6c52b3278872083,
  223. 0xfca2c757bd778ac3,
  224. 0x6e48dc74849ba5c0,
  225. 0x0c925dd413a32439,
  226. 0xfc3bd63962534e7d,
  227. 0xd1046bea5d768909,
  228. 0xd338e04d68befc82,
  229. 0x7323ac7306a673e9,
  230. 0x3908bf177bf25076,
  231. 0x3ff12fffbc0b301f,
  232. 0xde5e2316b414da3e,
  233. 0xda6cfd9e4f96136e,
  234. 0x9e8c7ecd3cbfd45a,
  235. 0xea4f758fd7cbe2f6,
  236. 0x7a0e73ef14a525d4,
  237. 0xd7f6bf623f1aba10,
  238. 0xac06608df8f6d757,
  239. }
  240. PI4 :: PI / 4
  241. if x < PI4 {
  242. return 0, x
  243. }
  244. MASK :: 0x7FF
  245. SHIFT :: 64 - 11 - 1
  246. BIAS :: 1023
  247. // Extract out the integer and exponent such that,
  248. // x = ix * 2 ** exp.
  249. ix := transmute(u64)x
  250. exp := int(ix>>SHIFT&MASK) - BIAS - SHIFT
  251. ix &~= MASK << SHIFT
  252. ix |= 1 << SHIFT
  253. // Use the exponent to extract the 3 appropriate u64 digits from bd_pi4,
  254. // B ~ (z0, z1, z2), such that the product leading digit has the exponent -61.
  255. // Note, exp >= -53 since x >= PI4 and exp < 971 for maximum f64.
  256. digit, bitshift := uint(exp+61)/64, uint(exp+61)%64
  257. z0 := (bd_pi4[digit] << bitshift) | (bd_pi4[digit+1] >> (64 - bitshift))
  258. z1 := (bd_pi4[digit+1] << bitshift) | (bd_pi4[digit+2] >> (64 - bitshift))
  259. z2 := (bd_pi4[digit+2] << bitshift) | (bd_pi4[digit+3] >> (64 - bitshift))
  260. // Multiply mantissa by the digits and extract the upper two digits (hi, lo).
  261. z2hi, _ := bits.mul(z2, ix)
  262. z1hi, z1lo := bits.mul(z1, ix)
  263. z0lo := z0 * ix
  264. lo, c := bits.add(z1lo, z2hi, 0)
  265. hi, _ := bits.add(z0lo, z1hi, c)
  266. // The top 3 bits are j.
  267. j = hi >> 61
  268. // Extract the fraction and find its magnitude.
  269. hi = hi<<3 | lo>>61
  270. lz := uint(bits.leading_zeros(hi))
  271. e := u64(BIAS - (lz + 1))
  272. // Clear implicit mantissa bit and shift into place.
  273. hi = (hi << (lz + 1)) | (lo >> (64 - (lz + 1)))
  274. hi >>= 64 - SHIFT
  275. // Include the exponent and convert to a float.
  276. hi |= e << SHIFT
  277. z = transmute(f64)hi
  278. // Map zeros to origin.
  279. if j&1 == 1 {
  280. j += 1
  281. j &= 7
  282. z -= 1
  283. }
  284. // Multiply the fractional part by pi/4.
  285. return j, z * PI4
  286. }