8 months ago · baa6f2a1ef
--- a/rtl/x86_64/math.inc
+++ b/rtl/x86_64/math.inc
@@ -44,6 +44,11 @@ const
 
				   FPC_ABSMASK_SINGLE: array[0..1] of qword=($7fffffff7fffffff,$7fffffff7fffffff); cvar; public;
			
 
				   FPC_ABSMASK_DOUBLE: array[0..1] of qword=($7fffffffffffffff,$7fffffffffffffff); cvar; public;
			
 
				 {$pop}
			
 
				+  FPC_LOG2E: Double   =  1.4426950408889634073599246810019; {  1/log(2)    }
			
 
				+
			
 
				+  FPC_INFINITY_DOUBLE: QWord = $7ff0000000000000;     { IEEE 754 bit representation of +oo for binary64 }
			
 
				+  FPC_MAXLOG_DOUBLE: Double  =  709.08956571282405153382846025171; { log(2**1023)  }
			
 
				+  FPC_MINLOG_DOUBLE: Double  = -709.78271289338399684324569237317; { log(2**-1024) }
			
 
				 
			
 
				 {****************************************************************************
			
 
				                             FPU Control word
			
@@ -433,4 +438,127 @@ const
 
				       end;
			
 
				     {$endif FPC_SYSTEM_HAS_FRAC}
			
 
				 
			
 
				+    {$ifndef FPC_SYSTEM_HAS_EXP}
			
 
				+    {$define FPC_SYSTEM_HAS_EXP}
			
 
				+
			
 
				+{$push}
			
 
				+{$codealign constmin=16}
			
 
				+  const
			
 
				+    FPC_M25_24_ARRAY:      array[0..1] of Double = (-25.0,   24.0);
			
 
				+    FPC_300_252_ARRAY:     array[0..1] of Double = (300.0,   252.0);
			
 
				+    FPC_M2100_1344_ARRAY:  array[0..1] of Double = (-2100.0, 1344.0);
			
 
				+    FPC_8400_3024_ARRAY:   array[0..1] of Double = (8400.0,  3024.0);
			
 
				+{$pop}
			
 
				+    FPC_HALF_DOUBLE: Double = 0.5;
			
 
				+    FPC_ONE_DOUBLE: Double = 1.0;
			
 
				+    FPC_M15120_DOUBLE: Double = -15120.0;
			
 
				+    FPC_M5_DOUBLE: Double = -5.0;
			
 
				+
			
 
				+    function fpc_exp_real(d : ValReal) : ValReal; compilerproc; assembler; nostackframe;
			
 
				+    {*****************************************************************}
			
 
				+    { Exponential Function                                            }
			
 
				+    {*****************************************************************}
			
 
				+    {                                                                 }
			
 
				+    { SYNOPSIS:                                                       }
			
 
				+    {                                                                 }
			
 
				+    { double d, y, exp();                                             }
			
 
				+    {                                                                 }
			
 
				+    { y = exp(d);                                                     }
			
 
				+    {                                                                 }
			
 
				+    { DESCRIPTION:                                                    }
			
 
				+    {                                                                 }
			
 
				+    { Returns e (2.71828...) raised to the d power.                   }
			
 
				+    {                                                                 }
			
 
				+    { First calculate z = d log2 e, then break z into integer and     }
			
 
				+    { fractional components k and f.                                  }
			
 
				+    {                                                                 }
			
 
				+    {     d    z      k + f    k f                                    }
			
 
				+    {    e  = 2   => 2      = 2 2                                     }
			
 
				+    {                                                                 }
			
 
				+    { 2^k can be calculated very quickly with bit manipulation, then  }
			
 
				+    { convert 2^f to e^x via x = f ln 2 and use a Padé approximant to }
			
 
				+    { calculate e^x for x between -0.5 ln 2 and 0.5 ln 2              }
			
 
				+    {*****************************************************************}
			
 
				+      asm
			
 
				+        movsd     FPC_LOG2E(%rip), %xmm4
			
 
				+        ucomisd   FPC_MAXLOG_DOUBLE(%rip), %xmm0
			
 
				+        jp        .LNaN { Propagate NaN into result }
			
 
				+        jae       .LOverflow
			
 
				+        movsd     FPC_ONE_DOUBLE(%rip), %xmm5
			
 
				+        xorpd     %xmm3, %xmm3
			
 
				+        comisd    FPC_MINLOG_DOUBLE(%rip), %xmm0
			
 
				+        ja        .LNoUnderflow
			
 
				+        xorpd     %xmm0, %xmm0 { Set result to zero }
			
 
				+    .LNaN:
			
 
				+        ret
			
 
				+    .LOverflow:
			
 
				+        movsd     FPC_INFINITY_DOUBLE(%rip), %xmm0 { Set result to infinity }
			
 
				+        ret
			
 
				+        .balign   16
			
 
				+    .LNoUnderflow:
			
 
				+        movsd     FPC_HALF_DOUBLE(%rip), %xmm2
			
 
				+        comisd    %xmm3, %xmm0
			
 
				+        jnz .LNotZero
			
 
				+        movsd     %xmm5, %xmm0 { e^0 = 1 }
			
 
				+        ret
			
 
				+    .LNotZero:
			
 
				+        mulsd     %xmm4, %xmm0
			
 
				+        addsd     %xmm2, %xmm0 { Add 0.5 to make sure the fractional part falls between -0.5 and 0.5 }
			
 
				+        { %xmm0 won't be out of range due to the earlier checks }
			
 
				+        cvttsd2si %xmm0, %rax
			
 
				+        subsd     %xmm2, %xmm0
			
 
				+        cvtsi2sd  %rax,  %xmm1
			
 
				+
			
 
				+        movapd    FPC_M25_24_ARRAY(%rip), %xmm5
			
 
				+
			
 
				+        { Some slightly evil floating-point bit manipulation to calculate 2^k }
			
 
				+        addw      $1023, %ax
			
 
				+        shlw      $4,    %ax
			
 
				+        subsd     %xmm1, %xmm0 { %xmm0 now contains the fractional component }
			
 
				+        pinsrw $3,%eax,  %xmm3 { Insert into the most-significant 16 bits }
			
 
				+                               { %xmm3 now contains 2^k }
			
 
				+
			
 
				+        { Calculate the Padé approximant that is:
			
 
				+
			
 
				+            -5(x(x(x(x + 24) + 252) + 1344) + 3024)
			
 
				+        ----------------------------------------------
			
 
				+        x(x(x(x(x - 25) + 300) - 2100) + 8400) - 15120
			
 
				+
			
 
				+        Aligned for easier calculation:
			
 
				+
			
 
				+             -5(x(x(x(x + 24) + 252) + 1344) + 3024)
			
 
				+         ----------------------------------------------------
			
 
				+              x(x(x(x(x - 25) + 300) - 2100) + 8400) - 15120
			
 
				+        }
			
 
				+        movapd    FPC_300_252_ARRAY(%rip), %xmm2 { 300, 252 }
			
 
				+        divsd     %xmm4, %xmm0 { Reapply the factor of ln 2 to x }
			
 
				+
			
 
				+        movapd    FPC_M2100_1344_ARRAY(%rip), %xmm1 { -2100, 1344 }
			
 
				+        shufpd $0,%xmm0, %xmm0 { x, x }
			
 
				+        movapd    %xmm0, %xmm4 { x, x }
			
 
				+
			
 
				+        addpd     %xmm5, %xmm0 { x - 25, x - 24 }
			
 
				+        mulpd     %xmm4, %xmm0 { x(x - 25), x(x + 24) }
			
 
				+        addpd     %xmm2, %xmm0 { x(x - 25) + 300, x(x + 24) + 252 }
			
 
				+        movapd    FPC_8400_3024_ARRAY(%rip), %xmm2 { 8400, 3024 }
			
 
				+        mulpd     %xmm4, %xmm0 { x(x(x - 25) + 300), x(x(x + 24) + 252) }
			
 
				+        addpd     %xmm1, %xmm0 { x(x(x - 25) + 300) - 2100, x(x(x + 24) + 252) + 1344 }
			
 
				+        movsd     FPC_M5_DOUBLE(%rip), %xmm1 { -5, 0 }
			
 
				+        mulpd     %xmm4, %xmm0 { x(x(x(x - 25) + 300) - 2100), x(x(x(x + 24) + 252) + 1344) }
			
 
				+        shufpd $1,%xmm1, %xmm4 { x, -5 }
			
 
				+        movsd     FPC_M15120_DOUBLE(%rip), %xmm1 { -15120, 0 }
			
 
				+        addpd     %xmm2, %xmm0 { x(x(x(x - 25) + 300) - 2100) + 8400, x(x(x(x + 24) + 252) + 1344) + 3024 }
			
 
				+        movsd     FPC_ONE_DOUBLE(%rip), %xmm2 { 1, 0 }
			
 
				+        mulpd     %xmm4, %xmm0 { x(x(x(x(x - 25) + 300) - 2100) + 8400), -5(x(x(x(x + 24) + 252) + 1344) + 3024) }
			
 
				+        addpd     %xmm1, %xmm0 { x(x(x(x(x - 25) + 300) - 2100) + 8400) - 15120, -5(x(x(x(x + 24) + 252) + 1344) + 3024) }
			
 
				+
			
 
				+        movapd    %xmm0, %xmm4 { x(x(x(x(x - 25) + 300) - 2100) + 8400) - 15120, -5(x(x(x(x + 24) + 252) + 1344) + 3024) }
			
 
				+        shufpd $1,%xmm0, %xmm0 { -5(x(x(x(x + 24) + 252) + 1344) + 3024), x(x(x(x(x - 25) + 300) - 2100) + 8400) - 15120 }
			
 
				+
			
 
				+        divsd     %xmm4, %xmm0 { Padé approximant for e^x computed }
			
 
				+
			
 
				+        mulsd     %xmm3, %xmm0 { Apply the integer component calculated earlier }
			
 
				+      end;
			
 
				+    {$endif FPC_SYSTEM_HAS_EXP}
			
 
				+
			
 
				 {$endif FPC_HAS_TYPE_EXTENDED}