math_basic.odin 7.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239
  1. //+build !js
  2. package math
  3. import "base:intrinsics"
  4. @(default_calling_convention="none", private="file")
  5. foreign _ {
  6. @(link_name="llvm.sin.f16", require_results)
  7. _sin_f16 :: proc(θ: f16) -> f16 ---
  8. @(link_name="llvm.sin.f32", require_results)
  9. _sin_f32 :: proc(θ: f32) -> f32 ---
  10. @(link_name="llvm.sin.f64", require_results)
  11. _sin_f64 :: proc(θ: f64) -> f64 ---
  12. @(link_name="llvm.cos.f16", require_results)
  13. _cos_f16 :: proc(θ: f16) -> f16 ---
  14. @(link_name="llvm.cos.f32", require_results)
  15. _cos_f32 :: proc(θ: f32) -> f32 ---
  16. @(link_name="llvm.cos.f64", require_results)
  17. _cos_f64 :: proc(θ: f64) -> f64 ---
  18. @(link_name="llvm.pow.f16", require_results)
  19. _pow_f16 :: proc(x, power: f16) -> f16 ---
  20. @(link_name="llvm.pow.f32", require_results)
  21. _pow_f32 :: proc(x, power: f32) -> f32 ---
  22. @(link_name="llvm.pow.f64", require_results)
  23. _pow_f64 :: proc(x, power: f64) -> f64 ---
  24. @(link_name="llvm.fmuladd.f16", require_results)
  25. _fmuladd_f16 :: proc(a, b, c: f16) -> f16 ---
  26. @(link_name="llvm.fmuladd.f32", require_results)
  27. _fmuladd_f32 :: proc(a, b, c: f32) -> f32 ---
  28. @(link_name="llvm.fmuladd.f64", require_results)
  29. _fmuladd_f64 :: proc(a, b, c: f64) -> f64 ---
  30. @(link_name="llvm.exp.f16", require_results)
  31. _exp_f16 :: proc(x: f16) -> f16 ---
  32. @(link_name="llvm.exp.f32", require_results)
  33. _exp_f32 :: proc(x: f32) -> f32 ---
  34. @(link_name="llvm.exp.f64", require_results)
  35. _exp_f64 :: proc(x: f64) -> f64 ---
  36. }
  37. @(require_results)
  38. sin_f16 :: proc "contextless" (θ: f16) -> f16 {
  39. return _sin_f16(θ)
  40. }
  41. @(require_results)
  42. sin_f32 :: proc "contextless" (θ: f32) -> f32 {
  43. return _sin_f32(θ)
  44. }
  45. @(require_results)
  46. sin_f64 :: proc "contextless" (θ: f64) -> f64 {
  47. return _sin_f64(θ)
  48. }
  49. @(require_results)
  50. cos_f16 :: proc "contextless" (θ: f16) -> f16 {
  51. return _cos_f16(θ)
  52. }
  53. @(require_results)
  54. cos_f32 :: proc "contextless" (θ: f32) -> f32 {
  55. return _cos_f32(θ)
  56. }
  57. @(require_results)
  58. cos_f64 :: proc "contextless" (θ: f64) -> f64 {
  59. return _cos_f64(θ)
  60. }
  61. @(require_results)
  62. pow_f16 :: proc "contextless" (x, power: f16) -> f16 {
  63. return _pow_f16(x, power)
  64. }
  65. @(require_results)
  66. pow_f32 :: proc "contextless" (x, power: f32) -> f32 {
  67. return _pow_f32(x, power)
  68. }
  69. @(require_results)
  70. pow_f64 :: proc "contextless" (x, power: f64) -> f64 {
  71. return _pow_f64(x, power)
  72. }
  73. @(require_results)
  74. fmuladd_f16 :: proc "contextless" (a, b, c: f16) -> f16 {
  75. return _fmuladd_f16(a, b, c)
  76. }
  77. @(require_results)
  78. fmuladd_f32 :: proc "contextless" (a, b, c: f32) -> f32 {
  79. return _fmuladd_f32(a, b, c)
  80. }
  81. @(require_results)
  82. fmuladd_f64 :: proc "contextless" (a, b, c: f64) -> f64 {
  83. return _fmuladd_f64(a, b, c)
  84. }
  85. @(require_results)
  86. exp_f16 :: proc "contextless" (x: f16) -> f16 {
  87. return _exp_f16(x)
  88. }
  89. @(require_results)
  90. exp_f32 :: proc "contextless" (x: f32) -> f32 {
  91. return _exp_f32(x)
  92. }
  93. @(require_results)
  94. exp_f64 :: proc "contextless" (x: f64) -> f64 {
  95. return _exp_f64(x)
  96. }
  97. @(require_results)
  98. sqrt_f16 :: proc "contextless" (x: f16) -> f16 {
  99. return intrinsics.sqrt(x)
  100. }
  101. @(require_results)
  102. sqrt_f32 :: proc "contextless" (x: f32) -> f32 {
  103. return intrinsics.sqrt(x)
  104. }
  105. @(require_results)
  106. sqrt_f64 :: proc "contextless" (x: f64) -> f64 {
  107. return intrinsics.sqrt(x)
  108. }
  109. @(require_results)
  110. ln_f64 :: proc "contextless" (x: f64) -> f64 {
  111. // The original C code, the long comment, and the constants
  112. // below are from FreeBSD's /usr/src/lib/msun/src/e_log.c
  113. // and came with this notice.
  114. //
  115. // ====================================================
  116. // Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
  117. //
  118. // Developed at SunPro, a Sun Microsystems, Inc. business.
  119. // Permission to use, copy, modify, and distribute this
  120. // software is freely granted, provided that this notice
  121. // is preserved.
  122. // ====================================================
  123. //
  124. // __ieee754_log(x)
  125. // Return the logarithm of x
  126. //
  127. // Method :
  128. // 1. Argument Reduction: find k and f such that
  129. // x = 2**k * (1+f),
  130. // where sqrt(2)/2 < 1+f < sqrt(2) .
  131. //
  132. // 2. Approximation of log(1+f).
  133. // Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s)
  134. // = 2s + 2/3 s**3 + 2/5 s**5 + .....,
  135. // = 2s + s*R
  136. // We use a special Reme algorithm on [0,0.1716] to generate
  137. // a polynomial of degree 14 to approximate R. The maximum error
  138. // of this polynomial approximation is bounded by 2**-58.45. In
  139. // other words,
  140. // 2 4 6 8 10 12 14
  141. // R(z) ~ L1*s +L2*s +L3*s +L4*s +L5*s +L6*s +L7*s
  142. // (the values of L1 to L7 are listed in the program) and
  143. // | 2 14 | -58.45
  144. // | L1*s +...+L7*s - R(z) | <= 2
  145. // | |
  146. // Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2.
  147. // In order to guarantee error in log below 1ulp, we compute log by
  148. // log(1+f) = f - s*(f - R) (if f is not too large)
  149. // log(1+f) = f - (hfsq - s*(hfsq+R)). (better accuracy)
  150. //
  151. // 3. Finally, log(x) = k*Ln2 + log(1+f).
  152. // = k*Ln2_hi+(f-(hfsq-(s*(hfsq+R)+k*Ln2_lo)))
  153. // Here Ln2 is split into two floating point number:
  154. // Ln2_hi + Ln2_lo,
  155. // where n*Ln2_hi is always exact for |n| < 2000.
  156. //
  157. // Special cases:
  158. // log(x) is NaN with signal if x < 0 (including -INF) ;
  159. // log(+INF) is +INF; log(0) is -INF with signal;
  160. // log(NaN) is that NaN with no signal.
  161. //
  162. // Accuracy:
  163. // according to an error analysis, the error is always less than
  164. // 1 ulp (unit in the last place).
  165. //
  166. // Constants:
  167. // The hexadecimal values are the intended ones for the following
  168. // constants. The decimal values may be used, provided that the
  169. // compiler will convert from decimal to binary accurately enough
  170. // to produce the hexadecimal values shown.
  171. LN2_HI :: 0h3fe62e42_fee00000 // 6.93147180369123816490e-01
  172. LN2_LO :: 0h3dea39ef_35793c76 // 1.90821492927058770002e-10
  173. L1 :: 0h3fe55555_55555593 // 6.666666666666735130e-01
  174. L2 :: 0h3fd99999_9997fa04 // 3.999999999940941908e-01
  175. L3 :: 0h3fd24924_94229359 // 2.857142874366239149e-01
  176. L4 :: 0h3fcc71c5_1d8e78af // 2.222219843214978396e-01
  177. L5 :: 0h3fc74664_96cb03de // 1.818357216161805012e-01
  178. L6 :: 0h3fc39a09_d078c69f // 1.531383769920937332e-01
  179. L7 :: 0h3fc2f112_df3e5244 // 1.479819860511658591e-01
  180. switch {
  181. case is_nan(x) || is_inf(x, 1):
  182. return x
  183. case x < 0:
  184. return nan_f64()
  185. case x == 0:
  186. return inf_f64(-1)
  187. }
  188. // reduce
  189. f1, ki := frexp(x)
  190. if f1 < SQRT_TWO/2 {
  191. f1 *= 2
  192. ki -= 1
  193. }
  194. f := f1 - 1
  195. k := f64(ki)
  196. // compute
  197. s := f / (2 + f)
  198. s2 := s * s
  199. s4 := s2 * s2
  200. t1 := s2 * (L1 + s4*(L3+s4*(L5+s4*L7)))
  201. t2 := s4 * (L2 + s4*(L4+s4*L6))
  202. R := t1 + t2
  203. hfsq := 0.5 * f * f
  204. return k*LN2_HI - ((hfsq - (s*(hfsq+R) + k*LN2_LO)) - f)
  205. }
  206. @(require_results) ln_f16 :: proc "contextless" (x: f16) -> f16 { return #force_inline f16(ln_f64(f64(x))) }
  207. @(require_results) ln_f32 :: proc "contextless" (x: f32) -> f32 { return #force_inline f32(ln_f64(f64(x))) }
  208. @(require_results) ln_f16le :: proc "contextless" (x: f16le) -> f16le { return #force_inline f16le(ln_f64(f64(x))) }
  209. @(require_results) ln_f16be :: proc "contextless" (x: f16be) -> f16be { return #force_inline f16be(ln_f64(f64(x))) }
  210. @(require_results) ln_f32le :: proc "contextless" (x: f32le) -> f32le { return #force_inline f32le(ln_f64(f64(x))) }
  211. @(require_results) ln_f32be :: proc "contextless" (x: f32be) -> f32be { return #force_inline f32be(ln_f64(f64(x))) }
  212. @(require_results) ln_f64le :: proc "contextless" (x: f64le) -> f64le { return #force_inline f64le(ln_f64(f64(x))) }
  213. @(require_results) ln_f64be :: proc "contextless" (x: f64be) -> f64be { return #force_inline f64be(ln_f64(f64(x))) }
  214. ln :: proc{
  215. ln_f16, ln_f16le, ln_f16be,
  216. ln_f32, ln_f32le, ln_f32be,
  217. ln_f64, ln_f64le, ln_f64be,
  218. }