math.inc 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565
  1. {
  2. Implementation of mathematical routines for x86_64
  3. This file is part of the Free Pascal run time library.
  4. Copyright (c) 1999-2005 by the Free Pascal development team
  5. See the file COPYING.FPC, included in this distribution,
  6. for details about the copyright.
  7. This program is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  10. **********************************************************************}
  11. {-------------------------------------------------------------------------
  12. Using functions from AMath/DAMath libraries, which are covered by the
  13. following license:
  14. (C) Copyright 2009-2013 Wolfgang Ehrhardt
  15. This software is provided 'as-is', without any express or implied warranty.
  16. In no event will the authors be held liable for any damages arising from
  17. the use of this software.
  18. Permission is granted to anyone to use this software for any purpose,
  19. including commercial applications, and to alter it and redistribute it
  20. freely, subject to the following restrictions:
  21. 1. The origin of this software must not be misrepresented; you must not
  22. claim that you wrote the original software. If you use this software in
  23. a product, an acknowledgment in the product documentation would be
  24. appreciated but is not required.
  25. 2. Altered source versions must be plainly marked as such, and must not be
  26. misrepresented as being the original software.
  27. 3. This notice may not be removed or altered from any source distribution.
  28. ----------------------------------------------------------------------------}
  29. {$push}
  30. {$codealign constmin=16}
  31. const
  32. FPC_ABSMASK_SINGLE: array[0..1] of qword=($7fffffff7fffffff,$7fffffff7fffffff); cvar; public;
  33. FPC_ABSMASK_DOUBLE: array[0..1] of qword=($7fffffffffffffff,$7fffffffffffffff); cvar; public;
  34. {$pop}
  35. FPC_LOG2E: Double = 1.4426950408889634073599246810019; { 1/log(2) }
  36. FPC_INFINITY_DOUBLE: QWord = $7ff0000000000000; { IEEE 754 bit representation of +oo for binary64 }
  37. FPC_MAXLOG_DOUBLE: Double = 709.78271289338399684324569237317; { log(2**1024) }
  38. FPC_MINLOG_DOUBLE: Double = -709.08956571282405153382846025171; { log(2**-1023) }
  39. {****************************************************************************
  40. FPU Control word
  41. ****************************************************************************}
  42. procedure Set8087CW(cw:word);
  43. begin
  44. default8087cw:=cw;
  45. asm
  46. fnclex
  47. fldcw cw
  48. end;
  49. end;
  50. function Get8087CW:word;assembler;
  51. var
  52. tmp: word;
  53. asm
  54. fnstcw tmp
  55. movw tmp,%ax
  56. andl $0xffff,%eax { clears bits 32-63 }
  57. end;
  58. procedure SetMXCSR(w : dword);
  59. begin
  60. defaultmxcsr:=w;
  61. asm
  62. {$ifdef FPUX86_HAS_AVXUNIT}
  63. vldmxcsr w
  64. {$else FPUX86_HAS_AVXUNIT}
  65. ldmxcsr w
  66. {$endif FPUX86_HAS_AVXUNIT}
  67. end;
  68. end;
  69. function GetMXCSR : dword;assembler;
  70. var
  71. _w : dword;
  72. asm
  73. {$ifdef FPUX86_HAS_AVXUNIT}
  74. vstmxcsr _w
  75. {$else FPUX86_HAS_AVXUNIT}
  76. stmxcsr _w
  77. {$endif FPUX86_HAS_AVXUNIT}
  78. movl _w,%eax
  79. end;
  80. function GetNativeFPUControlWord: TNativeFPUControlWord; {$if defined(SYSTEMINLINE)}inline;{$endif}
  81. begin
  82. result.cw8087:=Get8087CW;
  83. result.MXCSR:=GetMXCSR
  84. end;
  85. procedure SetNativeFPUControlWord(const cw: TNativeFPUControlWord); {$if defined(SYSTEMINLINE)}inline;{$endif}
  86. begin
  87. Set8087CW(cw.cw8087);
  88. SetMXCSR(cw.MXCSR);
  89. end;
  90. procedure SetSSECSR(w : dword);
  91. begin
  92. SetMXCSR(w);
  93. end;
  94. function GetSSECSR: dword;
  95. begin
  96. result:=GetMXCSR;
  97. end;
  98. {****************************************************************************
  99. EXTENDED data type routines
  100. ****************************************************************************}
  101. {$ifndef FPC_SYSTEM_HAS_ABS}
  102. {$define FPC_SYSTEM_HAS_ABS}
  103. function fpc_abs_real(d : ValReal) : ValReal;compilerproc;
  104. {$ifndef cpullvm}
  105. begin
  106. { Function is handled internal in the compiler }
  107. runerror(207);
  108. result:=0;
  109. {$else not cpullvm}
  110. assembler;
  111. asm
  112. fldt d
  113. fabs
  114. {$endif not cpullvm}
  115. end;
  116. {$endif FPC_SYSTEM_HAS_ABS}
  117. {$ifndef FPC_SYSTEM_HAS_SQR}
  118. {$define FPC_SYSTEM_HAS_SQR}
  119. function fpc_sqr_real(d : ValReal) : ValReal;compilerproc;
  120. {$ifndef cpullvm}
  121. begin
  122. { Function is handled internal in the compiler }
  123. runerror(207);
  124. result:=0;
  125. {$else not cpullvm}
  126. begin
  127. fpc_sqr_real:=d*d;
  128. {$endif not cpullvm}
  129. end;
  130. {$endif FPC_SYSTEM_HAS_SQR}
  131. {$ifndef FPC_SYSTEM_HAS_SQRT}
  132. {$define FPC_SYSTEM_HAS_SQRT}
  133. function fpc_sqrt_real(d : ValReal) : ValReal;compilerproc;
  134. {$ifndef cpullvm}
  135. begin
  136. { Function is handled internal in the compiler }
  137. runerror(207);
  138. result:=0;
  139. {$else not cpullvm}
  140. assembler;
  141. asm
  142. fldt d
  143. fsqrt
  144. {$endif not cpullvm}
  145. end;
  146. {$endif FPC_SYSTEM_HAS_SQRT}
  147. {$ifdef FPC_HAS_TYPE_EXTENDED}
  148. {$ifndef FPC_SYSTEM_HAS_ARCTAN}
  149. {$define FPC_SYSTEM_HAS_ARCTAN}
  150. function fpc_arctan_real(d : ValReal) : ValReal;compilerproc;
  151. {$ifndef cpullvm}
  152. begin
  153. { Function is handled internal in the compiler }
  154. runerror(207);
  155. result:=0;
  156. {$else not cpullvm}
  157. assembler;
  158. asm
  159. fldt d
  160. fld1
  161. fpatan
  162. {$endif not cpullvm}
  163. end;
  164. {$endif FPC_SYSTEM_HAS_ARCTAN}
  165. {$ifndef FPC_SYSTEM_HAS_LN}
  166. {$define FPC_SYSTEM_HAS_LN}
  167. function fpc_ln_real(d : ValReal) : ValReal;compilerproc;
  168. {$ifndef cpullvm}
  169. begin
  170. { Function is handled internal in the compiler }
  171. runerror(207);
  172. result:=0;
  173. {$else not cpullvm}
  174. assembler;
  175. asm
  176. fldln2
  177. fldt d
  178. fyl2x
  179. {$endif not cpullvm}
  180. end;
  181. {$endif FPC_SYSTEM_HAS_LN}
  182. {$ifndef FPC_SYSTEM_HAS_SIN}
  183. {$define FPC_SYSTEM_HAS_SIN}
  184. function fpc_sin_real(d : ValReal) : ValReal;compilerproc;
  185. {$ifndef cpullvm}
  186. begin
  187. { Function is handled internal in the compiler }
  188. runerror(207);
  189. result:=0;
  190. {$else not cpullvm}
  191. assembler;
  192. asm
  193. fldt d
  194. fsin
  195. {$endif not cpullvm}
  196. end;
  197. {$endif FPC_SYSTEM_HAS_SIN}
  198. {$ifndef FPC_SYSTEM_HAS_COS}
  199. {$define FPC_SYSTEM_HAS_COS}
  200. function fpc_cos_real(d : ValReal) : ValReal;compilerproc;
  201. {$ifndef cpullvm}
  202. begin
  203. { Function is handled internal in the compiler }
  204. runerror(207);
  205. result:=0;
  206. {$else not cpullvm}
  207. assembler;
  208. asm
  209. fldt d
  210. fcos
  211. {$endif not cpullvm}
  212. end;
  213. {$endif FPC_SYSTEM_HAS_COS}
  214. {$ifndef FPC_SYSTEM_HAS_EXP}
  215. {$define FPC_SYSTEM_HAS_EXP}
  216. { exp function adapted from AMath library (C) Copyright 2009-2013 Wolfgang Ehrhardt
  217. * translated into AT&T syntax
  218. + PIC support
  219. * return +Inf/0 for +Inf/-Inf input, instead of NaN }
  220. function fpc_exp_real(d : ValReal) : ValReal;assembler;compilerproc;
  221. const
  222. ln2hi: double=6.9314718036912382E-001;
  223. ln2lo: double=1.9082149292705877E-010;
  224. large: single=24576.0;
  225. two: single=2.0;
  226. half: single=0.5;
  227. asm
  228. fldt d
  229. fldl2e
  230. fmul %st(1),%st { z = d * log2(e) }
  231. frndint
  232. { Calculate frac(z) using modular arithmetic to avoid precision loss }
  233. fldl ln2hi(%rip)
  234. fmul %st(1),%st
  235. fsubrp %st,%st(2)
  236. fldl ln2lo(%rip)
  237. fmul %st(1),%st
  238. fsubrp %st,%st(2)
  239. fxch %st(1) { (d-int(z)*ln2_hi)-int(z)*ln2_lo }
  240. fldl2e
  241. fmulp %st,%st(1) { frac(z) }
  242. { Above calculation can yield |frac(z)|>1, particularly when rounding mode
  243. is not "round to nearest". f2xm1 is undefined in that case, so it's
  244. necessary to check }
  245. fld %st
  246. fabs
  247. fld1
  248. fcomip %st(1),%st(0)
  249. fstp %st
  250. jp .L3 { NaN }
  251. jae .L1 { |frac(z)| <= 1, good }
  252. fld %st(1)
  253. fabs
  254. flds large(%rip)
  255. fcomip %st(1),%st(0)
  256. fstp %st
  257. jb .L3 { int(z) >= 24576 }
  258. .L0:
  259. { Calculate 2**frac(z)-1 as N*(N+2), where N=2**(frac(z)/2)-1 }
  260. fmuls half(%rip)
  261. f2xm1
  262. fld %st
  263. fadds two(%rip)
  264. fmulp %st,%st(1)
  265. jmp .L2
  266. .L3:
  267. fstp %st { pop frac(z) and load 0 }
  268. fldz
  269. .L1:
  270. f2xm1
  271. .L2:
  272. fld1
  273. faddp %st,%st(1)
  274. fscale
  275. fstp %st(1)
  276. end;
  277. {$endif FPC_SYSTEM_HAS_EXP}
  278. {$ifndef FPC_SYSTEM_HAS_FRAC}
  279. {$define FPC_SYSTEM_HAS_FRAC}
  280. function fpc_frac_real(d : ValReal) : ValReal;assembler;compilerproc;
  281. var
  282. oldcw,newcw: word;
  283. asm
  284. fnstcw oldcw
  285. fldt d
  286. movw oldcw,%cx
  287. orw $0x0c00,%cx
  288. movw %cx,newcw
  289. fldcw newcw
  290. fld %st
  291. frndint
  292. fsubrp %st,%st(1)
  293. fwait
  294. fldcw oldcw
  295. end;
  296. {$endif FPC_SYSTEM_HAS_FRAC}
  297. {$ifndef FPC_SYSTEM_HAS_INT}
  298. {$define FPC_SYSTEM_HAS_INT}
  299. function fpc_int_real(d : ValReal) : ValReal;assembler;compilerproc;
  300. var
  301. oldcw,newcw: word;
  302. asm
  303. fnstcw oldcw
  304. movw oldcw,%cx
  305. orw $0x0c00,%cx
  306. movw %cx,newcw
  307. fldcw newcw
  308. fldt d
  309. frndint
  310. fwait
  311. fldcw oldcw
  312. end;
  313. {$endif FPC_SYSTEM_HAS_INT}
  314. {$ifndef FPC_SYSTEM_HAS_TRUNC}
  315. {$define FPC_SYSTEM_HAS_TRUNC}
  316. function fpc_trunc_real(d : ValReal) : int64;assembler;compilerproc;
  317. var
  318. oldcw,
  319. newcw : word;
  320. res : int64;
  321. asm
  322. fnstcw oldcw
  323. movw oldcw,%cx
  324. orw $0x0c00,%cx
  325. movw %cx,newcw
  326. fldcw newcw
  327. fldt d
  328. fistpq res
  329. fwait
  330. movq res,%rax
  331. fldcw oldcw
  332. end;
  333. {$endif FPC_SYSTEM_HAS_TRUNC}
  334. {$ifndef FPC_SYSTEM_HAS_ROUND}
  335. {$define FPC_SYSTEM_HAS_ROUND}
  336. function fpc_round_real(d : ValReal) : int64;assembler;compilerproc;
  337. var
  338. res : int64;
  339. asm
  340. fldt d
  341. fistpq res
  342. fwait
  343. movq res,%rax
  344. end;
  345. {$endif FPC_SYSTEM_HAS_ROUND}
  346. {$else FPC_HAS_TYPE_EXTENDED}
  347. {$ifndef FPC_SYSTEM_HAS_INT}
  348. {$define FPC_SYSTEM_HAS_INT}
  349. function fpc_int_real(d : ValReal) : ValReal;compilerproc; assembler; nostackframe;
  350. asm
  351. pextrw $3, %xmm0, %eax { eax = d[48:63] }
  352. and $0x7ff0,%ax
  353. cmp $0x4330,%ax
  354. jge .L0
  355. cvttsd2si %xmm0, %rax
  356. cvtsi2sd %rax, %xmm0
  357. .L0:
  358. end;
  359. {$endif FPC_SYSTEM_HAS_INT}
  360. {$ifndef FPC_SYSTEM_HAS_TRUNC}
  361. {$define FPC_SYSTEM_HAS_TRUNC}
  362. function fpc_trunc_real(d : ValReal) : int64;compilerproc; assembler; nostackframe;
  363. asm
  364. cvttsd2si %xmm0,%rax;
  365. end;
  366. {$endif FPC_SYSTEM_HAS_TRUNC}
  367. {$ifndef FPC_SYSTEM_HAS_ROUND}
  368. {$define FPC_SYSTEM_HAS_ROUND}
  369. function fpc_round_real(d : ValReal) : int64;compilerproc; assembler; nostackframe;
  370. asm
  371. cvtsd2si %xmm0,%rax;
  372. end;
  373. {$endif FPC_SYSTEM_HAS_ROUND}
  374. {$ifndef FPC_SYSTEM_HAS_FRAC}
  375. {$define FPC_SYSTEM_HAS_FRAC}
  376. function fpc_frac_real(d: ValReal) : ValReal;compilerproc; assembler; nostackframe;
  377. asm
  378. { Windows defines %xmm4 and %xmm5 as first non-parameter volatile registers;
  379. on SYSV systems all are considered as such, so use %xmm4 }
  380. pextrw $3, %xmm0, %eax { eax = d[48:63] }
  381. movapd %xmm0, %xmm4
  382. and $0x7ff0,%ax
  383. cmp $0x4330,%ax
  384. jge .L0
  385. cvttsd2si %xmm0, %rax
  386. cvtsi2sd %rax, %xmm4
  387. .L0:
  388. subsd %xmm4, %xmm0
  389. end;
  390. {$endif FPC_SYSTEM_HAS_FRAC}
  391. {$ifndef FPC_SYSTEM_HAS_EXP}
  392. {$define FPC_SYSTEM_HAS_EXP}
  393. {$push}
  394. {$codealign constmin=16}
  395. const
  396. FPC_M25_24_ARRAY: array[0..1] of Double = (-25.0, 24.0);
  397. FPC_300_252_ARRAY: array[0..1] of Double = (300.0, 252.0);
  398. FPC_M2100_1344_ARRAY: array[0..1] of Double = (-2100.0, 1344.0);
  399. FPC_8400_3024_ARRAY: array[0..1] of Double = (8400.0, 3024.0);
  400. {$pop}
  401. FPC_HALF_DOUBLE: Double = 0.5;
  402. FPC_ONE_DOUBLE: Double = 1.0;
  403. FPC_M15120_DOUBLE: Double = -15120.0;
  404. FPC_M5_DOUBLE: Double = -5.0;
  405. function fpc_exp_real(d : ValReal) : ValReal; compilerproc; assembler; nostackframe;
  406. {*****************************************************************}
  407. { Exponential Function }
  408. {*****************************************************************}
  409. { }
  410. { SYNOPSIS: }
  411. { }
  412. { double d, y, exp(); }
  413. { }
  414. { y = exp(d); }
  415. { }
  416. { DESCRIPTION: }
  417. { }
  418. { Returns e (2.71828...) raised to the d power. }
  419. { }
  420. { First calculate z = d log2 e, then break z into integer and }
  421. { fractional components k and f. }
  422. { }
  423. { d z k + f k f }
  424. { e = 2 => 2 = 2 2 }
  425. { }
  426. { 2^k can be calculated very quickly with bit manipulation, then }
  427. { convert 2^f to e^x via x = f ln 2 and use a Padé approximant to }
  428. { calculate e^x for x between -0.5 ln 2 and 0.5 ln 2 }
  429. {*****************************************************************}
  430. asm
  431. movsd FPC_LOG2E(%rip), %xmm4
  432. ucomisd FPC_MAXLOG_DOUBLE(%rip), %xmm0
  433. jp .LNaN { Propagate NaN into result }
  434. jae .LOverflow
  435. movsd FPC_ONE_DOUBLE(%rip), %xmm5
  436. xorpd %xmm3, %xmm3
  437. comisd FPC_MINLOG_DOUBLE(%rip), %xmm0
  438. ja .LNoUnderflow
  439. xorpd %xmm0, %xmm0 { Set result to zero }
  440. .LNaN:
  441. ret
  442. .LOverflow:
  443. movsd FPC_INFINITY_DOUBLE(%rip), %xmm0 { Set result to infinity }
  444. ret
  445. .balign 16
  446. .LNoUnderflow:
  447. movsd FPC_HALF_DOUBLE(%rip), %xmm2
  448. comisd %xmm3, %xmm0
  449. jnz .LNotZero
  450. movsd %xmm5, %xmm0 { e^0 = 1 }
  451. ret
  452. .balign 16
  453. .LNotZero:
  454. mulsd %xmm4, %xmm0
  455. addsd %xmm2, %xmm0 { Add 0.5 to make sure the fractional part falls between -0.5 and 0.5 }
  456. { %xmm0 won't be out of range due to the earlier checks }
  457. cvttsd2si %xmm0, %rax
  458. subsd %xmm2, %xmm0
  459. cvtsi2sd %rax, %xmm1
  460. movapd FPC_M25_24_ARRAY(%rip), %xmm5
  461. { Some slightly evil floating-point bit manipulation to calculate 2^k }
  462. addw $1023, %ax
  463. shlw $4, %ax
  464. subsd %xmm1, %xmm0 { %xmm0 now contains the fractional component }
  465. pinsrw $3,%eax, %xmm3 { Insert into the most-significant 16 bits }
  466. { %xmm3 now contains 2^k }
  467. { Calculate the Padé approximant that is:
  468. -5(x(x(x(x + 24) + 252) + 1344) + 3024)
  469. ----------------------------------------------
  470. x(x(x(x(x - 25) + 300) - 2100) + 8400) - 15120
  471. Aligned for easier calculation:
  472. -5(x(x(x(x + 24) + 252) + 1344) + 3024)
  473. ----------------------------------------------------
  474. x(x(x(x(x - 25) + 300) - 2100) + 8400) - 15120
  475. }
  476. movapd FPC_300_252_ARRAY(%rip), %xmm2 { 300, 252 }
  477. divsd %xmm4, %xmm0 { Reapply the factor of ln 2 to x }
  478. movapd FPC_M2100_1344_ARRAY(%rip), %xmm1 { -2100, 1344 }
  479. shufpd $0,%xmm0, %xmm0 { x, x }
  480. movapd %xmm0, %xmm4 { x, x }
  481. addpd %xmm5, %xmm0 { x - 25, x - 24 }
  482. mulpd %xmm4, %xmm0 { x(x - 25), x(x + 24) }
  483. addpd %xmm2, %xmm0 { x(x - 25) + 300, x(x + 24) + 252 }
  484. movapd FPC_8400_3024_ARRAY(%rip), %xmm2 { 8400, 3024 }
  485. mulpd %xmm4, %xmm0 { x(x(x - 25) + 300), x(x(x + 24) + 252) }
  486. addpd %xmm1, %xmm0 { x(x(x - 25) + 300) - 2100, x(x(x + 24) + 252) + 1344 }
  487. movsd FPC_M5_DOUBLE(%rip), %xmm1 { -5, 0 }
  488. mulpd %xmm4, %xmm0 { x(x(x(x - 25) + 300) - 2100), x(x(x(x + 24) + 252) + 1344) }
  489. shufpd $1,%xmm1, %xmm4 { x, -5 }
  490. movsd FPC_M15120_DOUBLE(%rip), %xmm1 { -15120, 0 }
  491. addpd %xmm2, %xmm0 { x(x(x(x - 25) + 300) - 2100) + 8400, x(x(x(x + 24) + 252) + 1344) + 3024 }
  492. movsd FPC_ONE_DOUBLE(%rip), %xmm2 { 1, 0 }
  493. mulpd %xmm4, %xmm0 { x(x(x(x(x - 25) + 300) - 2100) + 8400), -5(x(x(x(x + 24) + 252) + 1344) + 3024) }
  494. addpd %xmm1, %xmm0 { x(x(x(x(x - 25) + 300) - 2100) + 8400) - 15120, -5(x(x(x(x + 24) + 252) + 1344) + 3024) }
  495. movapd %xmm0, %xmm4 { x(x(x(x(x - 25) + 300) - 2100) + 8400) - 15120, -5(x(x(x(x + 24) + 252) + 1344) + 3024) }
  496. shufpd $1,%xmm0, %xmm0 { -5(x(x(x(x + 24) + 252) + 1344) + 3024), x(x(x(x(x - 25) + 300) - 2100) + 8400) - 15120 }
  497. divsd %xmm4, %xmm0 { Padé approximant for e^x computed }
  498. mulsd %xmm3, %xmm0 { Apply the integer component calculated earlier }
  499. end;
  500. {$endif FPC_SYSTEM_HAS_EXP}
  501. {$endif FPC_HAS_TYPE_EXTENDED}