Selaa lähdekoodia

* continued to work on float128 stuff

git-svn-id: trunk@9117 -
florian 17 vuotta sitten
vanhempi
commit
e3085015f7
3 muutettua tiedostoa jossa 92 lisäystä ja 5 poistoa
  1. 1 1
      .gitattributes
  2. 67 1
      rtl/inc/softfpu.pp
  3. 24 3
      rtl/inc/ufloat128.pp

+ 1 - 1
.gitattributes

@@ -4659,7 +4659,6 @@ rtl/inc/except.inc svneol=native#text/plain
 rtl/inc/fexpand.inc svneol=native#text/plain
 rtl/inc/file.inc svneol=native#text/plain
 rtl/inc/filerec.inc svneol=native#text/plain
-rtl/inc/float128.pp svneol=native#text/plain
 rtl/inc/generic.inc svneol=native#text/plain
 rtl/inc/genmath.inc svneol=native#text/plain
 rtl/inc/genset.inc svneol=native#text/plain
@@ -4714,6 +4713,7 @@ rtl/inc/threadh.inc svneol=native#text/plain
 rtl/inc/threadvr.inc svneol=native#text/plain
 rtl/inc/typefile.inc svneol=native#text/plain
 rtl/inc/ucomplex.pp svneol=native#text/plain
+rtl/inc/ufloat128.pp svneol=native#text/plain
 rtl/inc/varerror.inc svneol=native#text/plain
 rtl/inc/variant.inc svneol=native#text/plain
 rtl/inc/varianth.inc svneol=native#text/plain

+ 67 - 1
rtl/inc/softfpu.pp

@@ -70,7 +70,7 @@ these four paragraphs for those parts of this code that are retained.
 *}
 
 { $define FPC_SOFTFLOAT_FLOATX80}
-{ $define FPC_SOFTFLOAT_FLOAT128}
+{$define FPC_SOFTFLOAT_FLOAT128}
 
 { the softfpu unit can be also embedded directly into the system unit }
 
@@ -436,6 +436,7 @@ function float128_to_int64(a: float128): int64;
 function float128_to_int64_round_to_zero(a: float128): int64;
 function float128_to_float32(a: float128): float32;
 function float128_to_float64(a: float128): float64;
+function float64_to_float128( a : float64) : float128;
 {$ifdef FPC_SOFTFLOAT_FLOAT80}
 function float128_to_floatx80(a: float128): floatx80;
 {$endif FPC_SOFTFLOAT_FLOAT80}
@@ -1892,6 +1893,17 @@ Begin
 
 End;
 
+function float64ToCommonNaN( a : float64 ) : commonNaNT;
+Var
+    z : commonNaNT;
+Begin
+    if ( float64_is_signaling_nan( a )<>0 ) then
+        float_raise( float_flag_invalid );
+    z.sign := a.high shr 31;
+    shortShift64Left( a.high, a.low, 12, z.high, z.low );
+    result := z;
+
+End;
 {*
 -------------------------------------------------------------------------------
 Returns the result of converting the canonical NaN `a' to the double-
@@ -2473,6 +2485,13 @@ Function extractFloat64Frac1(a: float64): bits32;
     extractFloat64Frac1 := a.low;
   End;
 
+
+{$define FPC_SYSTEM_HAS_extractFloat64Frac}
+Function extractFloat64Frac(a: float64): bits64;
+  Begin
+    extractFloat64Frac := bits64(a) and $000FFFFFFFFFFFFF;
+  End;
+
 {*
 -------------------------------------------------------------------------------
 Returns the exponent bits of the double-precision floating-point value `a'.
@@ -2537,6 +2556,16 @@ Procedure normalizeFloat64Subnormal(
       End;
   End;
 
+procedure normalizeFloat64Subnormal(aSig : bits64;var zExpPtr : int16; var zSigPtr : bits64);
+var
+  shiftCount : int8;
+begin
+    shiftCount := countLeadingZeros64( aSig ) - 11;
+    zSigPtr := aSig shl shiftCount;
+    zExpPtr := 1 - shiftCount;
+end;
+
+
 {*
 -------------------------------------------------------------------------------
 Packs the sign `zSign', the exponent `zExp', and the significand formed by
@@ -8397,6 +8426,43 @@ begin
 
 end;
 
+{----------------------------------------------------------------------------
+| Returns the result of converting the double-precision floating-point value
+| `a' to the quadruple-precision floating-point format.  The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------}
+
+function float64_to_float128( a : float64) : float128;
+var
+    aSign : flag;
+    aExp : int16;
+    aSig, zSig0, zSig1 : bits64;
+begin
+    aSig := extractFloat64Frac( a );
+    aExp := extractFloat64Exp( a );
+    aSign := extractFloat64Sign( a );
+    if ( aExp = $7FF ) then begin
+        if ( aSig<>0 ) then
+          result:=commonNaNToFloat128( float64ToCommonNaN( a ) );
+        result:=packFloat128( aSign, $7FFF, 0, 0 );
+        exit;
+    end;
+    if ( aExp = 0 ) then begin
+        if ( aSig = 0 ) then
+          begin
+            result:=packFloat128( aSign, 0, 0, 0 );
+            exit;
+          end;
+
+        normalizeFloat64Subnormal( aSig, aExp, aSig );
+        dec(aExp);
+    end;
+    shift128Right( aSig, 0, 4, zSig0, zSig1 );
+    result:=packFloat128( aSign, aExp + $3C00, zSig0, zSig1 );
+
+end;
+
 {$endif FPC_SOFTFLOAT_FLOAT128}
 
 {$endif not(defined(fpc_softfpu_interface))}

+ 24 - 3
rtl/inc/float128.pp → rtl/inc/ufloat128.pp

@@ -13,13 +13,17 @@
     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 
  **********************************************************************}
-unit float128;
+{$inline on}
+unit ufloat128;
 
   interface
 
     uses
       softfpu;
 
+    type
+      float128 = softfpu.float128;
+
     operator+ (const f1,f2 : float128) result : float128;inline;
     operator* (const f1,f2 : float128) result : float128;inline;
     operator- (const f1,f2 : float128) result : float128;inline;
@@ -29,9 +33,25 @@ unit float128;
 
     operator :=(const source : float128) dest : double;inline;
 
+    procedure DumpFloat128(const f : float128);
 
   implementation
 
+    procedure DumpFloat128(const f : float128);
+      type
+        ta = packed array[0..15] of byte;
+      var
+        i : longint;
+      begin
+        for i:=15 downto 0 do
+          begin
+            write(hexstr(ta(f)[i],2));
+            if i<15 then
+              write(' ');
+          end;
+      end;
+
+
     operator+ (const f1,f2 : float128) result : float128;inline;
       begin
         result:=float128_add(f1,f2);
@@ -58,13 +78,14 @@ unit float128;
 
     operator :=(const source : double) dest : float128;inline;
       begin
-        dest:=float64_to_float128(source);
+        dest:=float64_to_float128(float64(source));
       end;
 
 
     operator :=(const source : float128) dest : double;inline;
       begin
-        dest:=float128_to_float64(source);
+        dest:=double(float128_to_float64(source));
       end;
 
+
 end.