math.inc 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415
  1. {
  2. This file is part of the Free Pascal run time library.
  3. Copyright (c) 1999-2001 by the Free Pascal development team
  4. Implementation of mathematical routines (for extended type)
  5. See the file COPYING.FPC, included in this distribution,
  6. for details about the copyright.
  7. This program is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  10. **********************************************************************}
  11. {-------------------------------------------------------------------------
  12. Using functions from AMath/DAMath libraries, which are covered by the
  13. following license:
  14. (C) Copyright 2009-2013 Wolfgang Ehrhardt
  15. This software is provided 'as-is', without any express or implied warranty.
  16. In no event will the authors be held liable for any damages arising from
  17. the use of this software.
  18. Permission is granted to anyone to use this software for any purpose,
  19. including commercial applications, and to alter it and redistribute it
  20. freely, subject to the following restrictions:
  21. 1. The origin of this software must not be misrepresented; you must not
  22. claim that you wrote the original software. If you use this software in
  23. a product, an acknowledgment in the product documentation would be
  24. appreciated but is not required.
  25. 2. Altered source versions must be plainly marked as such, and must not be
  26. misrepresented as being the original software.
  27. 3. This notice may not be removed or altered from any source distribution.
  28. ----------------------------------------------------------------------------}
  29. {****************************************************************************
  30. FPU Control word
  31. ****************************************************************************}
  32. {$push}
  33. {$codealign constmin=16}
  34. const
  35. FPC_ABSMASK_SINGLE: array[0..1] of qword=($7fffffff7fffffff,$7fffffff7fffffff); cvar; public;
  36. FPC_ABSMASK_DOUBLE: array[0..1] of qword=($7fffffffffffffff,$7fffffffffffffff); cvar; public;
  37. {$pop}
  38. procedure Set8087CW(cw:word);
  39. begin
  40. { pic-safe ; cw will not be a regvar because it's accessed from }
  41. { assembler }
  42. default8087cw:=cw;
  43. asm
  44. fnclex
  45. fldcw cw
  46. end;
  47. end;
  48. function Get8087CW:word;assembler;
  49. asm
  50. pushl $0
  51. fnstcw (%esp)
  52. popl %eax
  53. end;
  54. procedure SetMXCSR(w : dword);
  55. begin
  56. defaultmxcsr:=w;
  57. {$ifndef OLD_ASSEMBLER}
  58. asm
  59. ldmxcsr w
  60. end;
  61. {$else}
  62. { Use convoluted code to avoid relocation on
  63. ldmxcsr opcode, and use .byte version }
  64. asm
  65. mov w,%eax
  66. subl $4,%esp
  67. mov %eax,(%esp)
  68. //ldmxcsr (%esp)
  69. .byte 0x0f,0xae,0x14,0x24
  70. addl $4,%esp
  71. end;
  72. {$endif OLD_ASSEMBLER}
  73. end;
  74. function GetMXCSR : dword;
  75. var
  76. _w : dword;
  77. begin
  78. {$ifndef OLD_ASSEMBLER}
  79. asm
  80. stmxcsr _w
  81. end;
  82. {$else}
  83. asm
  84. { Use convoluted code to avoid relocation on
  85. ldmxcsr opcode, and use .byte version }
  86. subl $4,%esp
  87. //stmxcsr (%esp)
  88. .byte 0x0f,0xae,0x14,0x24
  89. mov (%esp),%eax
  90. addl $4,%esp
  91. mov %eax,_w
  92. end;
  93. {$endif OLD_ASSEMBLER}
  94. result:=_w;
  95. end;
  96. function GetNativeFPUControlWord: TNativeFPUControlWord; {$if defined(SYSTEMINLINE)}inline;{$endif}
  97. begin
  98. result.cw8087:=Get8087CW;
  99. if has_sse_support then
  100. result.MXCSR:=GetMXCSR
  101. else
  102. result.MXCSR:=-1;
  103. end;
  104. procedure SetNativeFPUControlWord(const cw: TNativeFPUControlWord); {$if defined(SYSTEMINLINE)}inline;{$endif}
  105. begin
  106. Set8087CW(cw.cw8087);
  107. if cw.MXCSR<>-1 then
  108. SetMXCSR(cw.MXCSR);
  109. end;
  110. procedure SetSSECSR(w : dword);
  111. begin
  112. SetMXCSR(w);
  113. end;
  114. function GetSSECSR: dword;
  115. begin
  116. result:=GetMXCSR;
  117. end;
  118. {****************************************************************************
  119. EXTENDED data type routines
  120. ****************************************************************************}
  121. {$define FPC_SYSTEM_HAS_ABS}
  122. function fpc_abs_real(d : ValReal) : ValReal;compilerproc;
  123. begin
  124. { Function is handled internal in the compiler }
  125. runerror(207);
  126. result:=0;
  127. end;
  128. {$define FPC_SYSTEM_HAS_SQR}
  129. function fpc_sqr_real(d : ValReal) : ValReal;compilerproc;
  130. begin
  131. { Function is handled internal in the compiler }
  132. runerror(207);
  133. result:=0;
  134. end;
  135. {$define FPC_SYSTEM_HAS_SQRT}
  136. function fpc_sqrt_real(d : ValReal) : ValReal;compilerproc;
  137. begin
  138. { Function is handled internal in the compiler }
  139. runerror(207);
  140. result:=0;
  141. end;
  142. {$define FPC_SYSTEM_HAS_ARCTAN}
  143. function fpc_arctan_real(d : ValReal) : ValReal;compilerproc;
  144. begin
  145. { Function is handled internal in the compiler }
  146. runerror(207);
  147. result:=0;
  148. end;
  149. {$define FPC_SYSTEM_HAS_LN}
  150. function fpc_ln_real(d : ValReal) : ValReal;compilerproc;
  151. begin
  152. { Function is handled internal in the compiler }
  153. runerror(207);
  154. result:=0;
  155. end;
  156. {$define FPC_SYSTEM_HAS_SIN}
  157. function fpc_sin_real(d : ValReal) : ValReal;compilerproc;
  158. begin
  159. { Function is handled internal in the compiler }
  160. runerror(207);
  161. result:=0;
  162. end;
  163. {$define FPC_SYSTEM_HAS_COS}
  164. function fpc_cos_real(d : ValReal) : ValReal;compilerproc;
  165. begin
  166. { Function is handled internal in the compiler }
  167. runerror(207);
  168. result:=0;
  169. end;
  170. {$ifdef OLD_ASSEMBLER}
  171. {$define DISABLE_PIC_IN_EXP_REAL}
  172. {$endif}
  173. {$define FPC_SYSTEM_HAS_EXP}
  174. { exp function adapted from AMath library (C) Copyright 2009-2013 Wolfgang Ehrhardt
  175. * translated into AT&T syntax
  176. + PIC support
  177. * return +Inf/0 for +Inf/-Inf input, instead of NaN }
  178. function fpc_exp_real(d : ValReal) : ValReal;assembler;compilerproc;
  179. const
  180. ln2hi: double=6.9314718036912382E-001;
  181. ln2lo: double=1.9082149292705877E-010;
  182. large: single=24576.0;
  183. two: single=2.0;
  184. half: single=0.5;
  185. asm
  186. {$ifndef DISABLE_PIC_IN_EXP_REAL}
  187. call .LPIC
  188. .LPIC:
  189. pop %ecx
  190. {$endif not DISABLE_PIC_IN_EXP_REAL}
  191. fldt d
  192. fldl2e
  193. fmul %st(1),%st { z = d * log2(e) }
  194. frndint
  195. { Calculate frac(z) using modular arithmetic to avoid precision loss. }
  196. {$ifndef DISABLE_PIC_IN_EXP_REAL}
  197. fldl ln2hi-.LPIC(%ecx)
  198. {$else}
  199. fldl ln2hi
  200. {$endif}
  201. fmul %st(1),%st
  202. fsubrp %st,%st(2)
  203. {$ifndef DISABLE_PIC_IN_EXP_REAL}
  204. fldl ln2lo-.LPIC(%ecx)
  205. {$else}
  206. fldl ln2lo
  207. {$endif}
  208. fmul %st(1),%st
  209. fsubrp %st,%st(2)
  210. fxch %st(1) { (d-int(z)*ln2_hi)-int(z)*ln2_lo }
  211. fldl2e
  212. fmulp %st,%st(1) { frac(z) }
  213. { The above code can result in |frac(z)|>1, particularly when rounding mode
  214. is not "round to nearest". f2xm1 is undefined in this case, so a check
  215. is necessary. Furthermore, frac(z) evaluates to NaN for d=+-Inf. }
  216. fld %st
  217. fabs
  218. fld1
  219. fcompp
  220. fstsw %ax
  221. sahf
  222. jp .L3 { NaN }
  223. jae .L1 { frac(z) <= 1 }
  224. fld %st(1)
  225. fabs
  226. {$ifndef DISABLE_PIC_IN_EXP_REAL}
  227. fcomps large-.LPIC(%ecx)
  228. {$else}
  229. fcomps large
  230. {$endif}
  231. fstsw %ax
  232. sahf
  233. jb .L0 { int(z) < 24576 }
  234. .L3:
  235. fstp %st { zero out frac(z), hard way because }
  236. fldz { "fsub %st,%st" does not work for NaN }
  237. jmp .L1
  238. .L0:
  239. { Calculate 2**frac(z)-1 as N*(N+2), where N=2**(frac(z)/2)-1 }
  240. {$ifndef DISABLE_PIC_IN_EXP_REAL}
  241. fmuls half-.LPIC(%ecx)
  242. {$else}
  243. fmuls half
  244. {$endif}
  245. f2xm1
  246. fld %st
  247. {$ifndef DISABLE_PIC_IN_EXP_REAL}
  248. fadds two-.LPIC(%ecx)
  249. {$else}
  250. fadds two
  251. {$endif}
  252. fmulp %st,%st(1)
  253. jmp .L2
  254. .L1:
  255. f2xm1
  256. .L2:
  257. fld1
  258. faddp %st,%st(1)
  259. fscale
  260. fstp %st(1)
  261. end;
  262. {$define FPC_SYSTEM_HAS_FRAC}
  263. function fpc_frac_real(d : ValReal) : ValReal;assembler;nostackframe;compilerproc;
  264. { [esp + 4 .. esp + 13] = d. }
  265. asm
  266. { Extended exponent bias is 16383 and mantissa is 63 bits not counting explicit 1. In memory:
  267. bit 0, byte 0 bit 64, byte 8
  268. ↓ ↓
  269. M0 M1 ... M61 M62 1 E14 E13 ... E1 E0 S
  270. └───────────────┘
  271. E = 16383 + exponent
  272. Numbers with E < 16383 have abs < 1 so frac = itself;
  273. Numbers with E ≥ 16383 + 63 = 16446 have frac = 0, except for E = 32767 (Inf, NaN) that have frac = NaN.
  274. Numbers with 16383 ≤ E < 16383 + 63 have (16383 + 63 - E) mantissa bits after the point.
  275. Zero them manually instead of changing and restoring the control word.
  276. FISTTP + FILD is faster but FISTTP is a SSE3 instruction despite its appearance. :( }
  277. movzwl 12(%esp), %ecx
  278. and $0x7FFF, %ecx { ecx = E }
  279. sub $16383, %ecx { ecx = E - 16383 = exponent. }
  280. jb .LLoad { exponent < 0 ⇒ abs(number) < 1 ⇒ frac is the number itself. }
  281. sub $63, %ecx
  282. jae .LZeroOrSpecial
  283. fldt 4(%esp)
  284. neg %ecx { ecx = 63 - exponent = number of mantissa bits after point = number of bottom mantissa bits that must be zeroed. }
  285. or $-1, %eax { eax = all ones, so “eax shl N” will have N bottom zeros. }
  286. shl %cl, %eax { This shifts by ecx mod 32. }
  287. shr $5, %ecx { 0 if first 32 bits must be masked by eax, 1 if second 32 bits must be masked by eax and first 32 bits must be zeroed. }
  288. and 4(%esp,%ecx,4), %eax
  289. movl $0, 4(%esp) { If ecx = 0, gets instantly overwritten instead of branching. }
  290. mov %eax, 4(%esp,%ecx,4)
  291. fldt 4(%esp)
  292. fsubrp %st(0), %st(1) { For some reason this matches fsubP st(1), st(0) in Intel syntax. o_O }
  293. ret $12
  294. .LLoad:
  295. fldt 4(%esp)
  296. ret $12
  297. .LZeroOrSpecial:
  298. cmp $(16384 - 63), %ecx { E = MAX, number is Inf/NaN? }
  299. je .LInfNaN
  300. fldz
  301. ret $12
  302. .LInfNaN:
  303. { Bother a bit to explicitly handle infinity instead of jumping to fldt + fsubrp + ret that would conveniently substract Inf/NaN from itself and give NaN.
  304. Such subtracting is likely to be very slow even on newer CPUs whose SSE units handle infinities/NaNs at full speed.
  305. I’d prefer frac(Inf) = 0, but x86-64 version returns NaN too. }
  306. mov 8(%esp), %eax { Check if mantissa bits 0:62 are all zeros. }
  307. shl $1, %eax { Ignore bit 63. }
  308. or 4(%esp), %eax
  309. jnz .LLoad { Not all zeros, NaN; return itself. }
  310. movl $0xFFC00000, 4(%esp) { 32-bit qNaN that, when loaded with flds on my CPU, produces the same bitpattern as actual subtraction of two infinities. ^^" }
  311. flds 4(%esp)
  312. end;
  313. {$define FPC_SYSTEM_HAS_INT}
  314. function fpc_int_real(d : ValReal) : ValReal;assembler;nostackframe;compilerproc;
  315. { [esp + 4 .. esp + 13] = d. }
  316. asm
  317. { See fpc_frac_real. }
  318. movzwl 12(%esp), %ecx
  319. and $0x7FFF, %ecx { ecx = E }
  320. sub $16383, %ecx { ecx = E - 16383 = exponent. }
  321. jb .LZero { exponent < 0 ⇒ abs(number) < 1 ⇒ int is 0 (assuming its sign is not important). }
  322. sub $63, %ecx
  323. jae .LReload { exponent > 63 ⇒ the number is either too large to have a fraction or an Inf/NaN ⇒ int is the number itself. }
  324. neg %ecx { ecx = 63 - exponent = number of mantissa bits after point = number of bottom mantissa bits that must be zeroed. }
  325. or $-1, %eax { eax = all ones, so “eax shl N” will have N bottom zeros. }
  326. shl %cl, %eax { This shifts by ecx mod 32. }
  327. shr $5, %ecx { 0 if first 32 bits must be masked by eax, 1 if second 32 bits must be masked by eax and first 32 bits must be zeroed. }
  328. and 4(%esp,%ecx,4), %eax
  329. movl $0, 4(%esp) { If ecx = 0, gets instantly overwritten instead of branching. }
  330. mov %eax, 4(%esp,%ecx,4)
  331. .LReload:
  332. fldt 4(%esp)
  333. ret $12
  334. .LZero:
  335. fldz
  336. end;
  337. {$define FPC_SYSTEM_HAS_TRUNC}
  338. function fpc_trunc_real(d : ValReal) : int64;assembler;compilerproc;
  339. asm
  340. subl $12,%esp
  341. fldt d
  342. fnstcw (%esp)
  343. movw (%esp),%cx
  344. orw $0x0f00,(%esp)
  345. fldcw (%esp)
  346. movw %cx,(%esp)
  347. fistpq 4(%esp)
  348. fldcw (%esp)
  349. fwait
  350. movl 4(%esp),%eax
  351. movl 8(%esp),%edx
  352. end;
  353. {$define FPC_SYSTEM_HAS_ROUND}
  354. { keep for bootstrapping with 2.0.x }
  355. function fpc_round_real(d : ValReal) : int64;compilerproc;assembler;
  356. var
  357. res : int64;
  358. asm
  359. fldt d
  360. fistpq res
  361. fwait
  362. movl res,%eax
  363. movl res+4,%edx
  364. end;