1 year ago · e5b47310c8
--- a/rtl/i386/math.inc
+++ b/rtl/i386/math.inc
@@ -293,39 +293,90 @@ const
 
															     {$define FPC_SYSTEM_HAS_FRAC}
														
 
															-    function fpc_frac_real(d : ValReal) : ValReal;assembler;compilerproc;
														
 
															+    function fpc_frac_real(d : ValReal) : ValReal;assembler;nostackframe;compilerproc;
														
 
															+      { [esp + 4 .. esp + 13] = d. }
														
 
															       asm
														
 
															-        subl $4,%esp
														
 
															-        fnstcw (%esp)
														
 
															-        fwait
														
 
															-        movw (%esp),%cx
														
 
															-        orw $0x0f00,(%esp)
														
 
															-        fldcw (%esp)
														
 
															-        fldt d
														
 
															-        frndint
														
 
															-        fldt d
														
 
															-        fsub %st(1),%st
														
 
															-        fstp %st(1)
														
 
															-        movw %cx,(%esp)
														
 
															-        fldcw (%esp)
														
 
															+        { Extended exponent bias is 16383 and mantissa is 63 bits not counting explicit 1. In memory:
														
 
															+
														
 
															+          bit 0, byte 0       bit 64, byte 8
														
 
															+          ↓                   ↓
														
 
															+          M0 M1 ... M61 M62 1 E14 E13 ... E1 E0 S
														
 
															+                              └───────────────┘
														
 
															+                              E = 16383 + exponent
														
 
															+
														
 
															+          Numbers with E < 16383 have abs < 1 so frac = itself;
														
 
															+          Numbers with E ≥ 16383 + 63 = 16446 have frac = 0, except for E = 32767 (Inf, NaN) that have frac = NaN.
														
 
															+
														
 
															+          Numbers with 16383 ≤ E < 16383 + 63 have (16383 + 63 - E) mantissa bits after the point.
														
 
															+          Zero them manually instead of changing and restoring the control word.
														
 
															+          FISTTP + FILD is faster but FISTTP is a SSE3 instruction despite its appearance. :( }
														
 
															+
														
 
															+        movzwl  12(%esp), %ecx
														
 
															+        and     $0x7FFF, %ecx { ecx = E }
														
 
															+        sub     $16383, %ecx { ecx = E - 16383 = exponent. }
														
 
															+        jb      .LLoad { exponent < 0 ⇒ abs(number) < 1 ⇒ frac is the number itself. }
														
 
															+        sub     $63, %ecx
														
 
															+        jae     .LZeroOrSpecial
														
 
															+
														
 
															+        fldt    4(%esp)
														
 
															+        neg     %ecx { ecx = 63 - exponent = number of mantissa bits after point = number of bottom mantissa bits that must be zeroed. }
														
 
															+        or      $-1, %eax { eax = all ones, so “eax shl N” will have N bottom zeros. }
														
 
															+        shl     %cl, %eax { This shifts by ecx mod 32. }
														
 
															+        shr     $5, %ecx { 0 if first 32 bits must be masked by eax, 1 if second 32 bits must be masked by eax and first 32 bits must be zeroed. }
														
 
															+        and     4(%esp,%ecx,4), %eax
														
 
															+        movl    $0, 4(%esp) { If ecx = 0, gets instantly overwritten instead of branching. }
														
 
															+        mov     %eax, 4(%esp,%ecx,4)
														
 
															+        fldt    4(%esp)
														
 
															+        fsubrp  %st(0), %st(1) { For some reason this matches fsubP st(1), st(0) in Intel syntax. o_O }
														
 
															+        ret     $12
														
 
															+
														
 
															+.LLoad:
														
 
															+        fldt    4(%esp)
														
 
															+        ret     $12
														
 
															+
														
 
															+.LZeroOrSpecial:
														
 
															+        cmp     $(16384 - 63), %ecx { E = MAX, number is Inf/NaN? }
														
 
															+        je      .LInfNaN
														
 
															+        fldz
														
 
															+        ret     $12
														
 
															+
														
 
															+.LInfNaN:
														
 
															+        { Bother a bit to explicitly handle infinity instead of jumping to fldt + fsubrp + ret that would conveniently substract Inf/NaN from itself and give NaN.
														
 
															+          Such subtracting is likely to be very slow even on newer CPUs whose SSE units handle infinities/NaNs at full speed.
														
 
															+          I’d prefer frac(Inf) = 0, but x86-64 version returns NaN too. }
														
 
															+        mov     8(%esp), %eax { Check if mantissa bits 0:62 are all zeros. }
														
 
															+        shl     $1, %eax { Ignore bit 63. }
														
 
															+        or      4(%esp), %eax
														
 
															+        jnz     .LLoad { Not all zeros, NaN; return itself. }
														
 
															+        movl    $0xFFC00000, 4(%esp) { 32-bit qNaN that, when loaded with flds on my CPU, produces the same bitpattern as actual subtraction of two infinities. ^^" }
														
 
															+        flds    4(%esp)
														
 
															       end;
														
 
															     {$define FPC_SYSTEM_HAS_INT}
														
 
															-    function fpc_int_real(d : ValReal) : ValReal;assembler;compilerproc;
														
 
															+    function fpc_int_real(d : ValReal) : ValReal;assembler;nostackframe;compilerproc;
														
 
															+      { [esp + 4 .. esp + 13] = d. }
														
 
															       asm
														
 
															-        subl $4,%esp
														
 
															-        fnstcw (%esp)
														
 
															-        fwait
														
 
															-        movw (%esp),%cx
														
 
															-        orw $0x0f00,(%esp)
														
 
															-        fldcw (%esp)
														
 
															-        fwait
														
 
															-        fldt d
														
 
															-        frndint
														
 
															-        fwait
														
 
															-        movw %cx,(%esp)
														
 
															-        fldcw (%esp)
														
 
															+        { See fpc_frac_real. }
														
 
															+        movzwl  12(%esp), %ecx
														
 
															+        and     $0x7FFF, %ecx { ecx = E }
														
 
															+        sub     $16383, %ecx { ecx = E - 16383 = exponent. }
														
 
															+        jb      .LZero { exponent < 0 ⇒ abs(number) < 1 ⇒ int is 0 (assuming its sign is not important). }
														
 
															+        sub     $63, %ecx
														
 
															+        jae     .LReload { exponent > 63 ⇒ the number is either too large to have a fraction or an Inf/NaN ⇒ int is the number itself. }
														
 
															+
														
 
															+        neg     %ecx { ecx = 63 - exponent = number of mantissa bits after point = number of bottom mantissa bits that must be zeroed. }
														
 
															+        or      $-1, %eax { eax = all ones, so “eax shl N” will have N bottom zeros. }
														
 
															+        shl     %cl, %eax { This shifts by ecx mod 32. }
														
 
															+        shr     $5, %ecx { 0 if first 32 bits must be masked by eax, 1 if second 32 bits must be masked by eax and first 32 bits must be zeroed. }
														
 
															+        and     4(%esp,%ecx,4), %eax
														
 
															+        movl    $0, 4(%esp) { If ecx = 0, gets instantly overwritten instead of branching. }
														
 
															+        mov     %eax, 4(%esp,%ecx,4)
														
 
															+.LReload:
														
 
															+        fldt    4(%esp)
														
 
															+        ret     $12
														
 
															+.LZero:
														
 
															+        fldz
														
 
															       end;