Browse Source

+ raw conversion of float80 and float128 support commited

git-svn-id: trunk@5700 -
florian 18 years ago
parent
commit
454120a504
1 changed files with 2148 additions and 0 deletions
  1. 2148 0
      rtl/inc/softfpu.pp

+ 2148 - 0
rtl/inc/softfpu.pp

@@ -28,6 +28,39 @@ Derivative works are acceptable, even for commercial purposes, so long as
 include prominent notice akin to these four paragraphs for those parts of
 this code that are retained.
 
+===============================================================================
+The float80 and float128 part is translated from the softfloat package
+by Florian Klaempfl and contained the following copyright notice
+===============================================================================
+
+This C source file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic
+Package, Release 2b.
+
+Written by John R. Hauser.  This work was made possible in part by the
+International Computer Science Institute, located at Suite 600, 1947 Center
+Street, Berkeley, California 94704.  Funding was partially provided by the
+National Science Foundation under grant MIP-9311980.  The original version
+of this code was written as part of a project to build a fixed-point vector
+processor in collaboration with the University of California at Berkeley,
+overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
+is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
+arithmetic/SoftFloat.html'.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+
+
 ===============================================================================
 *}
 
@@ -4658,6 +4691,2121 @@ Begin
     int64_to_float64:= float_result;
 End;
 
+
+{$ifdef FPC_SOFTFLOAT_FLOATX80}
+
+{*----------------------------------------------------------------------------
+| Returns the result of converting the extended double-precision floating-
+| point value `a' to the 32-bit two's complement integer format.  The
+| conversion is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic---which means in particular that the conversion
+| is rounded according to the current rounding mode.  If `a' is a NaN, the
+| largest positive integer is returned.  Otherwise, if the conversion
+| overflows, the largest integer with the same sign as `a' is returned.
+*----------------------------------------------------------------------------*}
+
+function floatx80_to_int32(a: floatx80): int32;
+begin
+    flag aSign;
+    int32 aExp, shiftCount;
+    bits64 aSig;
+
+    aSig := extractFloatx80Frac( a );
+    aExp := extractFloatx80Exp( a );
+    aSign := extractFloatx80Sign( a );
+    if ( ( aExp = $7FFF ) and (bits64) ( aSig shl 1 ) ) aSign := 0;
+    shiftCount := $4037 - aExp;
+    if ( shiftCount <= 0 ) shiftCount := 1;
+    shift64RightJamming( aSig, shiftCount, &aSig );
+    result := roundAndPackInt32( aSign, aSig );
+
+end;
+
+{*----------------------------------------------------------------------------
+| Returns the result of converting the extended double-precision floating-
+| point value `a' to the 32-bit two's complement integer format.  The
+| conversion is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic, except that the conversion is always rounded
+| toward zero.  If `a' is a NaN, the largest positive integer is returned.
+| Otherwise, if the conversion overflows, the largest integer with the same
+| sign as `a' is returned.
+*----------------------------------------------------------------------------*}
+
+function floatx80_to_int32_round_to_zero(a: floatx80): int32;
+begin
+    flag aSign;
+    int32 aExp, shiftCount;
+    bits64 aSig, savedASig;
+    int32 z;
+
+    aSig := extractFloatx80Frac( a );
+    aExp := extractFloatx80Exp( a );
+    aSign := extractFloatx80Sign( a );
+    if ( $401E < aExp ) begin
+        if ( ( aExp = $7FFF ) and (bits64) ( aSig shl 1 ) ) aSign := 0;
+        goto invalid;
+    end;
+    else if ( aExp < $3FFF ) begin
+        if ( aExp or aSig ) float_exception_flags or= float_flag_inexact;
+        result := 0;
+    end;
+    shiftCount := $403E - aExp;
+    savedASig := aSig;
+    aSig >>= shiftCount;
+    z := aSig;
+    if ( aSign ) z := - z;
+    if ( ( z < 0 ) xor aSign ) begin
+ invalid:
+        float_raise( float_flag_invalid );
+        result := aSign ? (sbits32) $80000000 : $7FFFFFFF;
+    end;
+    if ( ( aSig shl shiftCount ) <> savedASig ) begin
+        float_exception_flags or= float_flag_inexact;
+    end;
+    result := z;
+
+end;
+
+{*----------------------------------------------------------------------------
+| Returns the result of converting the extended double-precision floating-
+| point value `a' to the 64-bit two's complement integer format.  The
+| conversion is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic---which means in particular that the conversion
+| is rounded according to the current rounding mode.  If `a' is a NaN,
+| the largest positive integer is returned.  Otherwise, if the conversion
+| overflows, the largest integer with the same sign as `a' is returned.
+*----------------------------------------------------------------------------*}
+
+function floatx80_to_int64(a: floatx80): int64;
+begin
+    flag aSign;
+    int32 aExp, shiftCount;
+    bits64 aSig, aSigExtra;
+
+    aSig := extractFloatx80Frac( a );
+    aExp := extractFloatx80Exp( a );
+    aSign := extractFloatx80Sign( a );
+    shiftCount := $403E - aExp;
+    if ( shiftCount <= 0 ) begin
+        if ( shiftCount ) begin
+            float_raise( float_flag_invalid );
+            if (    ! aSign
+                 or (    ( aExp = $7FFF )
+                      and ( aSig <> LIT64( $8000000000000000 ) ) )
+               ) begin
+                result := LIT64( $7FFFFFFFFFFFFFFF );
+            end;
+            result := (sbits64) LIT64( $8000000000000000 );
+        end;
+        aSigExtra := 0;
+    end;
+    else begin
+        shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
+    end;
+    result := roundAndPackInt64( aSign, aSig, aSigExtra );
+
+end;
+
+{*----------------------------------------------------------------------------
+| Returns the result of converting the extended double-precision floating-
+| point value `a' to the 64-bit two's complement integer format.  The
+| conversion is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic, except that the conversion is always rounded
+| toward zero.  If `a' is a NaN, the largest positive integer is returned.
+| Otherwise, if the conversion overflows, the largest integer with the same
+| sign as `a' is returned.
+*----------------------------------------------------------------------------*}
+
+function floatx80_to_int64_round_to_zero(a: floatx80): int64;
+begin
+    flag aSign;
+    int32 aExp, shiftCount;
+    bits64 aSig;
+    int64 z;
+
+    aSig := extractFloatx80Frac( a );
+    aExp := extractFloatx80Exp( a );
+    aSign := extractFloatx80Sign( a );
+    shiftCount := aExp - $403E;
+    if ( 0 <= shiftCount ) begin
+        aSig &= LIT64( $7FFFFFFFFFFFFFFF );
+        if ( ( a.high <> $C03E ) or aSig ) begin
+            float_raise( float_flag_invalid );
+            if ( ! aSign or ( ( aExp = $7FFF ) and aSig ) ) begin
+                result := LIT64( $7FFFFFFFFFFFFFFF );
+            end;
+        end;
+        result := (sbits64) LIT64( $8000000000000000 );
+    end;
+    else if ( aExp < $3FFF ) begin
+        if ( aExp or aSig ) float_exception_flags or= float_flag_inexact;
+        result := 0;
+    end;
+    z := aSig>>( - shiftCount );
+    if ( (bits64) ( aSig shl ( shiftCount and 63 ) ) ) begin
+        float_exception_flags or= float_flag_inexact;
+    end;
+    if ( aSign ) z := - z;
+    result := z;
+
+end;
+
+{*----------------------------------------------------------------------------
+| Returns the result of converting the extended double-precision floating-
+| point value `a' to the single-precision floating-point format.  The
+| conversion is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*}
+
+function floatx80_to_float32(a: floatx80): float32;
+begin
+    flag aSign;
+    int32 aExp;
+    bits64 aSig;
+
+    aSig := extractFloatx80Frac( a );
+    aExp := extractFloatx80Exp( a );
+    aSign := extractFloatx80Sign( a );
+    if ( aExp = $7FFF ) begin
+        if ( (bits64) ( aSig shl 1 ) ) begin
+            result := commonNaNToFloat32( floatx80ToCommonNaN( a ) );
+        end;
+        result := packFloat32( aSign, $FF, 0 );
+    end;
+    shift64RightJamming( aSig, 33, &aSig );
+    if ( aExp or aSig ) aExp -= $3F81;
+    result := roundAndPackFloat32( aSign, aExp, aSig );
+
+end;
+
+{*----------------------------------------------------------------------------
+| Returns the result of converting the extended double-precision floating-
+| point value `a' to the double-precision floating-point format.  The
+| conversion is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*}
+
+function floatx80_to_float64(a: floatx80): float64;
+begin
+    flag aSign;
+    int32 aExp;
+    bits64 aSig, zSig;
+
+    aSig := extractFloatx80Frac( a );
+    aExp := extractFloatx80Exp( a );
+    aSign := extractFloatx80Sign( a );
+    if ( aExp = $7FFF ) begin
+        if ( (bits64) ( aSig shl 1 ) ) begin
+            result := commonNaNToFloat64( floatx80ToCommonNaN( a ) );
+        end;
+        result := packFloat64( aSign, $7FF, 0 );
+    end;
+    shift64RightJamming( aSig, 1, &zSig );
+    if ( aExp or aSig ) aExp -= $3C01;
+    result := roundAndPackFloat64( aSign, aExp, zSig );
+
+end;
+
+{$ifdef FPC_SOFTFLOAT_FLOAT128}
+
+{*----------------------------------------------------------------------------
+| Returns the result of converting the extended double-precision floating-
+| point value `a' to the quadruple-precision floating-point format.  The
+| conversion is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*}
+
+function floatx80_to_float128(a: floatx80): float128;
+begin
+    flag aSign;
+    int16 aExp;
+    bits64 aSig, zSig0, zSig1;
+
+    aSig := extractFloatx80Frac( a );
+    aExp := extractFloatx80Exp( a );
+    aSign := extractFloatx80Sign( a );
+    if ( ( aExp = $7FFF ) and (bits64) ( aSig shl 1 ) ) begin
+        result := commonNaNToFloat128( floatx80ToCommonNaN( a ) );
+    end;
+    shift128Right( aSig shl 1, 0, 16, &zSig0, &zSig1 );
+    result := packFloat128( aSign, aExp, zSig0, zSig1 );
+
+end;
+
+{$endif FPC_SOFTFLOAT_FLOAT128}
+
+{*----------------------------------------------------------------------------
+| Rounds the extended double-precision floating-point value `a' to an integer,
+| and Returns the result as an extended quadruple-precision floating-point
+| value.  The operation is performed according to the IEC/IEEE Standard for
+| Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*}
+
+function floatx80_round_to_int(a: floatx80): floatx80;
+begin
+    flag aSign;
+    int32 aExp;
+    bits64 lastBitMask, roundBitsMask;
+    int8 roundingMode;
+    floatx80 z;
+
+    aExp := extractFloatx80Exp( a );
+    if ( $403E <= aExp ) begin
+        if ( ( aExp = $7FFF ) and (bits64) ( extractFloatx80Frac( a ) shl 1 ) ) begin
+            result := propagateFloatx80NaN( a, a );
+        end;
+        result := a;
+    end;
+    if ( aExp < $3FFF ) begin
+        if (    ( aExp = 0 )
+             and ( (bits64) ( extractFloatx80Frac( a ) shl 1 ) = 0 ) ) begin
+            result := a;
+        end;
+        float_exception_flags or= float_flag_inexact;
+        aSign := extractFloatx80Sign( a );
+        switch ( float_rounding_mode ) begin
+         case float_round_nearest_even:
+            if ( ( aExp = $3FFE ) and (bits64) ( extractFloatx80Frac( a ) shl 1 )
+               ) begin
+                result :=
+                    packFloatx80( aSign, $3FFF, LIT64( $8000000000000000 ) );
+            end;
+            break;
+         case float_round_down:
+            result :=
+                  aSign ?
+                      packFloatx80( 1, $3FFF, LIT64( $8000000000000000 ) )
+                : packFloatx80( 0, 0, 0 );
+         case float_round_up:
+            result :=
+                  aSign ? packFloatx80( 1, 0, 0 )
+                : packFloatx80( 0, $3FFF, LIT64( $8000000000000000 ) );
+        end;
+        result := packFloatx80( aSign, 0, 0 );
+    end;
+    lastBitMask := 1;
+    lastBitMask  shl = $403E - aExp;
+    roundBitsMask := lastBitMask - 1;
+    z := a;
+    roundingMode := float_rounding_mode;
+    if ( roundingMode = float_round_nearest_even ) begin
+        z.low += lastBitMask>>1;
+        if ( ( z.low and roundBitsMask ) = 0 ) z.low &= ~ lastBitMask;
+    end;
+    else if ( roundingMode <> float_round_to_zero ) begin
+        if ( extractFloatx80Sign( z ) xor ( roundingMode = float_round_up ) ) begin
+            z.low += roundBitsMask;
+        end;
+    end;
+    z.low &= ~ roundBitsMask;
+    if ( z.low = 0 ) begin
+        ++z.high;
+        z.low := LIT64( $8000000000000000 );
+    end;
+    if ( z.low <> a.low ) float_exception_flags or= float_flag_inexact;
+    result := z;
+
+end;
+
+{*----------------------------------------------------------------------------
+| Returns the result of adding the absolute values of the extended double-
+| precision floating-point values `a' and `b'.  If `zSign' is 1, the sum is
+| negated before being returned.  `zSign' is ignored if the result is a NaN.
+| The addition is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*}
+
+function addFloatx80Sigs(a: floatx80; b: floatx80, flag zSign ): floatx80;
+begin
+    int32 aExp, bExp, zExp;
+    bits64 aSig, bSig, zSig0, zSig1;
+    int32 expDiff;
+
+    aSig := extractFloatx80Frac( a );
+    aExp := extractFloatx80Exp( a );
+    bSig := extractFloatx80Frac( b );
+    bExp := extractFloatx80Exp( b );
+    expDiff := aExp - bExp;
+    if ( 0 < expDiff ) begin
+        if ( aExp = $7FFF ) begin
+            if ( (bits64) ( aSig shl 1 ) ) result := propagateFloatx80NaN( a, b );
+            result := a;
+        end;
+        if ( bExp = 0 ) --expDiff;
+        shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
+        zExp := aExp;
+    end;
+    else if ( expDiff < 0 ) begin
+        if ( bExp = $7FFF ) begin
+            if ( (bits64) ( bSig shl 1 ) ) result := propagateFloatx80NaN( a, b );
+            result := packFloatx80( zSign, $7FFF, LIT64( $8000000000000000 ) );
+        end;
+        if ( aExp = 0 ) ++expDiff;
+        shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
+        zExp := bExp;
+    end;
+    else begin
+        if ( aExp = $7FFF ) begin
+            if ( (bits64) ( ( aSig or bSig ) shl 1 ) ) begin
+                result := propagateFloatx80NaN( a, b );
+            end;
+            result := a;
+        end;
+        zSig1 := 0;
+        zSig0 := aSig + bSig;
+        if ( aExp = 0 ) begin
+            normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
+            goto roundAndPack;
+        end;
+        zExp := aExp;
+        goto shiftRight1;
+    end;
+    zSig0 := aSig + bSig;
+    if ( (sbits64) zSig0 < 0 ) goto roundAndPack;
+ shiftRight1:
+    shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
+    zSig0 or= LIT64( $8000000000000000 );
+    ++zExp;
+ roundAndPack:
+    result :=
+        roundAndPackFloatx80(
+            floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
+
+end;
+
+{*----------------------------------------------------------------------------
+| Returns the result of subtracting the absolute values of the extended
+| double-precision floating-point values `a' and `b'.  If `zSign' is 1, the
+| difference is negated before being returned.  `zSign' is ignored if the
+| result is a NaN.  The subtraction is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*}
+
+function subFloatx80Sigs(a: floatx80; b: floatx80, flag zSign ): floatx80;
+begin
+    int32 aExp, bExp, zExp;
+    bits64 aSig, bSig, zSig0, zSig1;
+    int32 expDiff;
+    floatx80 z;
+
+    aSig := extractFloatx80Frac( a );
+    aExp := extractFloatx80Exp( a );
+    bSig := extractFloatx80Frac( b );
+    bExp := extractFloatx80Exp( b );
+    expDiff := aExp - bExp;
+    if ( 0 < expDiff ) goto aExpBigger;
+    if ( expDiff < 0 ) goto bExpBigger;
+    if ( aExp = $7FFF ) begin
+        if ( (bits64) ( ( aSig or bSig ) shl 1 ) ) begin
+            result := propagateFloatx80NaN( a, b );
+        end;
+        float_raise( float_flag_invalid );
+        z.low := floatx80_default_nan_low;
+        z.high := floatx80_default_nan_high;
+        result := z;
+    end;
+    if ( aExp = 0 ) begin
+        aExp := 1;
+        bExp := 1;
+    end;
+    zSig1 := 0;
+    if ( bSig < aSig ) goto aBigger;
+    if ( aSig < bSig ) goto bBigger;
+    result := packFloatx80( float_rounding_mode = float_round_down, 0, 0 );
+ bExpBigger:
+    if ( bExp = $7FFF ) begin
+        if ( (bits64) ( bSig shl 1 ) ) result := propagateFloatx80NaN( a, b );
+        result := packFloatx80( zSign xor 1, $7FFF, LIT64( $8000000000000000 ) );
+    end;
+    if ( aExp = 0 ) ++expDiff;
+    shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
+ bBigger:
+    sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
+    zExp := bExp;
+    zSign  xor = 1;
+    goto normalizeRoundAndPack;
+ aExpBigger:
+    if ( aExp = $7FFF ) begin
+        if ( (bits64) ( aSig shl 1 ) ) result := propagateFloatx80NaN( a, b );
+        result := a;
+    end;
+    if ( bExp = 0 ) --expDiff;
+    shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
+ aBigger:
+    sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
+    zExp := aExp;
+ normalizeRoundAndPack:
+    result :=
+        normalizeRoundAndPackFloatx80(
+            floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
+
+end;
+
+{*----------------------------------------------------------------------------
+| Returns the result of adding the extended double-precision floating-point
+| values `a' and `b'.  The operation is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*}
+
+function floatx80_add(a: floatx80; b: floatx80): floatx80;
+begin
+    flag aSign, bSign;
+
+    aSign := extractFloatx80Sign( a );
+    bSign := extractFloatx80Sign( b );
+    if ( aSign = bSign ) begin
+        result := addFloatx80Sigs( a, b, aSign );
+    end;
+    else begin
+        result := subFloatx80Sigs( a, b, aSign );
+    end;
+
+end;
+
+{*----------------------------------------------------------------------------
+| Returns the result of subtracting the extended double-precision floating-
+| point values `a' and `b'.  The operation is performed according to the
+| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*}
+
+function floatx80_sub(a: floatx80; b: floatx80 ): floatx80;
+begin
+    flag aSign, bSign;
+
+    aSign := extractFloatx80Sign( a );
+    bSign := extractFloatx80Sign( b );
+    if ( aSign = bSign ) begin
+        result := subFloatx80Sigs( a, b, aSign );
+    end;
+    else begin
+        result := addFloatx80Sigs( a, b, aSign );
+    end;
+
+end;
+
+{*----------------------------------------------------------------------------
+| Returns the result of multiplying the extended double-precision floating-
+| point values `a' and `b'.  The operation is performed according to the
+| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*}
+
+function floatx80_mul(a: floatx80; b: floatx80): floatx80;
+begin
+    flag aSign, bSign, zSign;
+    int32 aExp, bExp, zExp;
+    bits64 aSig, bSig, zSig0, zSig1;
+    floatx80 z;
+
+    aSig := extractFloatx80Frac( a );
+    aExp := extractFloatx80Exp( a );
+    aSign := extractFloatx80Sign( a );
+    bSig := extractFloatx80Frac( b );
+    bExp := extractFloatx80Exp( b );
+    bSign := extractFloatx80Sign( b );
+    zSign := aSign xor bSign;
+    if ( aExp = $7FFF ) begin
+        if (    (bits64) ( aSig shl 1 )
+             or ( ( bExp = $7FFF ) and (bits64) ( bSig shl 1 ) ) ) begin
+            result := propagateFloatx80NaN( a, b );
+        end;
+        if ( ( bExp or bSig ) = 0 ) goto invalid;
+        result := packFloatx80( zSign, $7FFF, LIT64( $8000000000000000 ) );
+    end;
+    if ( bExp = $7FFF ) begin
+        if ( (bits64) ( bSig shl 1 ) ) result := propagateFloatx80NaN( a, b );
+        if ( ( aExp or aSig ) = 0 ) begin
+ invalid:
+            float_raise( float_flag_invalid );
+            z.low := floatx80_default_nan_low;
+            z.high := floatx80_default_nan_high;
+            result := z;
+        end;
+        result := packFloatx80( zSign, $7FFF, LIT64( $8000000000000000 ) );
+    end;
+    if ( aExp = 0 ) begin
+        if ( aSig = 0 ) result := packFloatx80( zSign, 0, 0 );
+        normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
+    end;
+    if ( bExp = 0 ) begin
+        if ( bSig = 0 ) result := packFloatx80( zSign, 0, 0 );
+        normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
+    end;
+    zExp := aExp + bExp - $3FFE;
+    mul64To128( aSig, bSig, &zSig0, &zSig1 );
+    if ( 0 < (sbits64) zSig0 ) begin
+        shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
+        --zExp;
+    end;
+    result :=
+        roundAndPackFloatx80(
+            floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
+
+end;
+
+{*----------------------------------------------------------------------------
+| Returns the result of dividing the extended double-precision floating-point
+| value `a' by the corresponding value `b'.  The operation is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*}
+
+function floatx80_div(a: floatx80; b: floatx80 ): floatx80;
+begin
+    flag aSign, bSign, zSign;
+    int32 aExp, bExp, zExp;
+    bits64 aSig, bSig, zSig0, zSig1;
+    bits64 rem0, rem1, rem2, term0, term1, term2;
+    floatx80 z;
+
+    aSig := extractFloatx80Frac( a );
+    aExp := extractFloatx80Exp( a );
+    aSign := extractFloatx80Sign( a );
+    bSig := extractFloatx80Frac( b );
+    bExp := extractFloatx80Exp( b );
+    bSign := extractFloatx80Sign( b );
+    zSign := aSign xor bSign;
+    if ( aExp = $7FFF ) begin
+        if ( (bits64) ( aSig shl 1 ) ) result := propagateFloatx80NaN( a, b );
+        if ( bExp = $7FFF ) begin
+            if ( (bits64) ( bSig shl 1 ) ) result := propagateFloatx80NaN( a, b );
+            goto invalid;
+        end;
+        result := packFloatx80( zSign, $7FFF, LIT64( $8000000000000000 ) );
+    end;
+    if ( bExp = $7FFF ) begin
+        if ( (bits64) ( bSig shl 1 ) ) result := propagateFloatx80NaN( a, b );
+        result := packFloatx80( zSign, 0, 0 );
+    end;
+    if ( bExp = 0 ) begin
+        if ( bSig = 0 ) begin
+            if ( ( aExp or aSig ) = 0 ) begin
+ invalid:
+                float_raise( float_flag_invalid );
+                z.low := floatx80_default_nan_low;
+                z.high := floatx80_default_nan_high;
+                result := z;
+            end;
+            float_raise( float_flag_divbyzero );
+            result := packFloatx80( zSign, $7FFF, LIT64( $8000000000000000 ) );
+        end;
+        normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
+    end;
+    if ( aExp = 0 ) begin
+        if ( aSig = 0 ) result := packFloatx80( zSign, 0, 0 );
+        normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
+    end;
+    zExp := aExp - bExp + $3FFE;
+    rem1 := 0;
+    if ( bSig <= aSig ) begin
+        shift128Right( aSig, 0, 1, &aSig, &rem1 );
+        ++zExp;
+    end;
+    zSig0 := estimateDiv128To64( aSig, rem1, bSig );
+    mul64To128( bSig, zSig0, &term0, &term1 );
+    sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
+    while ( (sbits64) rem0 < 0 ) begin
+        --zSig0;
+        add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
+    end;
+    zSig1 := estimateDiv128To64( rem1, 0, bSig );
+    if ( (bits64) ( zSig1 shl 1 ) <= 8 ) begin
+        mul64To128( bSig, zSig1, &term1, &term2 );
+        sub128( rem1, 0, term1, term2, &rem1, &rem2 );
+        while ( (sbits64) rem1 < 0 ) begin
+            --zSig1;
+            add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
+        end;
+        zSig1 or= ( ( rem1 or rem2 ) <> 0 );
+    end;
+    result :=
+        roundAndPackFloatx80(
+            floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
+
+end;
+
+{*----------------------------------------------------------------------------
+| Returns the remainder of the extended double-precision floating-point value
+| `a' with respect to the corresponding value `b'.  The operation is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*}
+
+function floatx80_rem(a: floatx80; b: floatx80 ): floatx80;
+begin
+    flag aSign, bSign, zSign;
+    int32 aExp, bExp, expDiff;
+    bits64 aSig0, aSig1, bSig;
+    bits64 q, term0, term1, alternateASig0, alternateASig1;
+    floatx80 z;
+
+    aSig0 := extractFloatx80Frac( a );
+    aExp := extractFloatx80Exp( a );
+    aSign := extractFloatx80Sign( a );
+    bSig := extractFloatx80Frac( b );
+    bExp := extractFloatx80Exp( b );
+    bSign := extractFloatx80Sign( b );
+    if ( aExp = $7FFF ) begin
+        if (    (bits64) ( aSig0 shl 1 )
+             or ( ( bExp = $7FFF ) and (bits64) ( bSig shl 1 ) ) ) begin
+            result := propagateFloatx80NaN( a, b );
+        end;
+        goto invalid;
+    end;
+    if ( bExp = $7FFF ) begin
+        if ( (bits64) ( bSig shl 1 ) ) result := propagateFloatx80NaN( a, b );
+        result := a;
+    end;
+    if ( bExp = 0 ) begin
+        if ( bSig = 0 ) begin
+ invalid:
+            float_raise( float_flag_invalid );
+            z.low := floatx80_default_nan_low;
+            z.high := floatx80_default_nan_high;
+            result := z;
+        end;
+        normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
+    end;
+    if ( aExp = 0 ) begin
+        if ( (bits64) ( aSig0 shl 1 ) = 0 ) result := a;
+        normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
+    end;
+    bSig or= LIT64( $8000000000000000 );
+    zSign := aSign;
+    expDiff := aExp - bExp;
+    aSig1 := 0;
+    if ( expDiff < 0 ) begin
+        if ( expDiff < -1 ) result := a;
+        shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
+        expDiff := 0;
+    end;
+    q := ( bSig <= aSig0 );
+    if ( q ) aSig0 -= bSig;
+    expDiff -= 64;
+    while ( 0 < expDiff ) begin
+        q := estimateDiv128To64( aSig0, aSig1, bSig );
+        q := ( 2 < q ) ? q - 2 : 0;
+        mul64To128( bSig, q, &term0, &term1 );
+        sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
+        shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
+        expDiff -= 62;
+    end;
+    expDiff += 64;
+    if ( 0 < expDiff ) begin
+        q := estimateDiv128To64( aSig0, aSig1, bSig );
+        q := ( 2 < q ) ? q - 2 : 0;
+        q >>= 64 - expDiff;
+        mul64To128( bSig, q shl ( 64 - expDiff ), &term0, &term1 );
+        sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
+        shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
+        while ( le128( term0, term1, aSig0, aSig1 ) ) begin
+            ++q;
+            sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
+        end;
+    end;
+    else begin
+        term1 := 0;
+        term0 := bSig;
+    end;
+    sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
+    if (    lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
+         or (    eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
+              and ( q and 1 ) )
+       ) begin
+        aSig0 := alternateASig0;
+        aSig1 := alternateASig1;
+        zSign := ! zSign;
+    end;
+    result :=
+        normalizeRoundAndPackFloatx80(
+            80, zSign, bExp + expDiff, aSig0, aSig1 );
+
+end;
+
+{*----------------------------------------------------------------------------
+| Returns the square root of the extended double-precision floating-point
+| value `a'.  The operation is performed according to the IEC/IEEE Standard
+| for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*}
+
+function floatx80_sqrt(a: floatx80): floatx80;
+begin
+    flag aSign;
+    int32 aExp, zExp;
+    bits64 aSig0, aSig1, zSig0, zSig1, doubleZSig0;
+    bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
+    floatx80 z;
+
+    aSig0 := extractFloatx80Frac( a );
+    aExp := extractFloatx80Exp( a );
+    aSign := extractFloatx80Sign( a );
+    if ( aExp = $7FFF ) begin
+        if ( (bits64) ( aSig0 shl 1 ) ) result := propagateFloatx80NaN( a, a );
+        if ( ! aSign ) result := a;
+        goto invalid;
+    end;
+    if ( aSign ) begin
+        if ( ( aExp or aSig0 ) = 0 ) result := a;
+ invalid:
+        float_raise( float_flag_invalid );
+        z.low := floatx80_default_nan_low;
+        z.high := floatx80_default_nan_high;
+        result := z;
+    end;
+    if ( aExp = 0 ) begin
+        if ( aSig0 = 0 ) result := packFloatx80( 0, 0, 0 );
+        normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
+    end;
+    zExp := ( ( aExp - $3FFF )>>1 ) + $3FFF;
+    zSig0 := estimateSqrt32( aExp, aSig0>>32 );
+    shift128Right( aSig0, 0, 2 + ( aExp and 1 ), &aSig0, &aSig1 );
+    zSig0 := estimateDiv128To64( aSig0, aSig1, zSig0 shl 32 ) + ( zSig0 shl 30 );
+    doubleZSig0 := zSig0 shl 1;
+    mul64To128( zSig0, zSig0, &term0, &term1 );
+    sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
+    while ( (sbits64) rem0 < 0 ) begin
+        --zSig0;
+        doubleZSig0 -= 2;
+        add128( rem0, rem1, zSig0>>63, doubleZSig0 or 1, &rem0, &rem1 );
+    end;
+    zSig1 := estimateDiv128To64( rem1, 0, doubleZSig0 );
+    if ( ( zSig1 and LIT64( $3FFFFFFFFFFFFFFF ) ) <= 5 ) begin
+        if ( zSig1 = 0 ) zSig1 := 1;
+        mul64To128( doubleZSig0, zSig1, &term1, &term2 );
+        sub128( rem1, 0, term1, term2, &rem1, &rem2 );
+        mul64To128( zSig1, zSig1, &term2, &term3 );
+        sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
+        while ( (sbits64) rem1 < 0 ) begin
+            --zSig1;
+            shortShift128Left( 0, zSig1, 1, &term2, &term3 );
+            term3 or= 1;
+            term2 or= doubleZSig0;
+            add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
+        end;
+        zSig1 or= ( ( rem1 or rem2 or rem3 ) <> 0 );
+    end;
+    shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
+    zSig0 or= doubleZSig0;
+    result :=
+        roundAndPackFloatx80(
+            floatx80_rounding_precision, 0, zExp, zSig0, zSig1 );
+
+end;
+
+{*----------------------------------------------------------------------------
+| Returns 1 if the extended double-precision floating-point value `a' is
+| equal to the corresponding value `b', and 0 otherwise.  The comparison is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*}
+
+function floatx80_eq(a: floatx80; b: floatx80 ): flag;
+begin
+
+    if (    (    ( extractFloatx80Exp( a ) = $7FFF )
+              and (bits64) ( extractFloatx80Frac( a ) shl 1 ) )
+         or (    ( extractFloatx80Exp( b ) = $7FFF )
+              and (bits64) ( extractFloatx80Frac( b ) shl 1 ) )
+       ) begin
+        if (    floatx80_is_signaling_nan( a )
+             or floatx80_is_signaling_nan( b ) ) begin
+            float_raise( float_flag_invalid );
+        end;
+        result := 0;
+    end;
+    result :=
+           ( a.low = b.low )
+        and (    ( a.high = b.high )
+             or (    ( a.low = 0 )
+                  and ( (bits16) ( ( a.high or b.high ) shl 1 ) = 0 ) )
+           );
+
+end;
+
+{*----------------------------------------------------------------------------
+| Returns 1 if the extended double-precision floating-point value `a' is
+| less than or equal to the corresponding value `b', and 0 otherwise.  The
+| comparison is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*}
+
+function floatx80_le(a: floatx80; b: floatx80 ): flag;
+begin
+    flag aSign, bSign;
+
+    if (    (    ( extractFloatx80Exp( a ) = $7FFF )
+              and (bits64) ( extractFloatx80Frac( a ) shl 1 ) )
+         or (    ( extractFloatx80Exp( b ) = $7FFF )
+              and (bits64) ( extractFloatx80Frac( b ) shl 1 ) )
+       ) begin
+        float_raise( float_flag_invalid );
+        result := 0;
+    end;
+    aSign := extractFloatx80Sign( a );
+    bSign := extractFloatx80Sign( b );
+    if ( aSign <> bSign ) begin
+        result :=
+               aSign
+            or (    ( ( (bits16) ( ( a.high or b.high ) shl 1 ) ) or a.low or b.low )
+                 = 0 );
+    end;
+    result :=
+          aSign ? le128( b.high, b.low, a.high, a.low )
+        : le128( a.high, a.low, b.high, b.low );
+
+end;
+
+{*----------------------------------------------------------------------------
+| Returns 1 if the extended double-precision floating-point value `a' is
+| less than the corresponding value `b', and 0 otherwise.  The comparison
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*}
+
+function floatx80_lt(a: floatx80; b: floatx80 ): flag;
+begin
+    flag aSign, bSign;
+
+    if (    (    ( extractFloatx80Exp( a ) = $7FFF )
+              and (bits64) ( extractFloatx80Frac( a ) shl 1 ) )
+         or (    ( extractFloatx80Exp( b ) = $7FFF )
+              and (bits64) ( extractFloatx80Frac( b ) shl 1 ) )
+       ) begin
+        float_raise( float_flag_invalid );
+        result := 0;
+    end;
+    aSign := extractFloatx80Sign( a );
+    bSign := extractFloatx80Sign( b );
+    if ( aSign <> bSign ) begin
+        result :=
+               aSign
+            and (    ( ( (bits16) ( ( a.high or b.high ) shl 1 ) ) or a.low or b.low )
+                 <> 0 );
+    end;
+    result :=
+          aSign ? lt128( b.high, b.low, a.high, a.low )
+        : lt128( a.high, a.low, b.high, b.low );
+
+end;
+
+{*----------------------------------------------------------------------------
+| Returns 1 if the extended double-precision floating-point value `a' is equal
+| to the corresponding value `b', and 0 otherwise.  The invalid exception is
+| raised if either operand is a NaN.  Otherwise, the comparison is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*}
+
+function floatx80_eq_signaling(a: floatx80; b: floatx80 ): flag;
+begin
+
+    if (    (    ( extractFloatx80Exp( a ) = $7FFF )
+              and (bits64) ( extractFloatx80Frac( a ) shl 1 ) )
+         or (    ( extractFloatx80Exp( b ) = $7FFF )
+              and (bits64) ( extractFloatx80Frac( b ) shl 1 ) )
+       ) begin
+        float_raise( float_flag_invalid );
+        result := 0;
+    end;
+    result :=
+           ( a.low = b.low )
+        and (    ( a.high = b.high )
+             or (    ( a.low = 0 )
+                  and ( (bits16) ( ( a.high or b.high ) shl 1 ) = 0 ) )
+           );
+
+end;
+
+{*----------------------------------------------------------------------------
+| Returns 1 if the extended double-precision floating-point value `a' is less
+| than or equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs
+| do not cause an exception.  Otherwise, the comparison is performed according
+| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*}
+
+function floatx80_le_quiet(a: floatx80; b: floatx80 ): flag;
+begin
+    flag aSign, bSign;
+
+    if (    (    ( extractFloatx80Exp( a ) = $7FFF )
+              and (bits64) ( extractFloatx80Frac( a ) shl 1 ) )
+         or (    ( extractFloatx80Exp( b ) = $7FFF )
+              and (bits64) ( extractFloatx80Frac( b ) shl 1 ) )
+       ) begin
+        if (    floatx80_is_signaling_nan( a )
+             or floatx80_is_signaling_nan( b ) ) begin
+            float_raise( float_flag_invalid );
+        end;
+        result := 0;
+    end;
+    aSign := extractFloatx80Sign( a );
+    bSign := extractFloatx80Sign( b );
+    if ( aSign <> bSign ) begin
+        result :=
+               aSign
+            or (    ( ( (bits16) ( ( a.high or b.high ) shl 1 ) ) or a.low or b.low )
+                 = 0 );
+    end;
+    result :=
+          aSign ? le128( b.high, b.low, a.high, a.low )
+        : le128( a.high, a.low, b.high, b.low );
+
+end;
+
+{*----------------------------------------------------------------------------
+| Returns 1 if the extended double-precision floating-point value `a' is less
+| than the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause
+| an exception.  Otherwise, the comparison is performed according to the
+| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*}
+
+function floatx80_lt_quiet(a: floatx80; b: floatx80 ): flag;
+begin
+    flag aSign, bSign;
+
+    if (    (    ( extractFloatx80Exp( a ) = $7FFF )
+              and (bits64) ( extractFloatx80Frac( a ) shl 1 ) )
+         or (    ( extractFloatx80Exp( b ) = $7FFF )
+              and (bits64) ( extractFloatx80Frac( b ) shl 1 ) )
+       ) begin
+        if (    floatx80_is_signaling_nan( a )
+             or floatx80_is_signaling_nan( b ) ) begin
+            float_raise( float_flag_invalid );
+        end;
+        result := 0;
+    end;
+    aSign := extractFloatx80Sign( a );
+    bSign := extractFloatx80Sign( b );
+    if ( aSign <> bSign ) begin
+        result :=
+               aSign
+            and (    ( ( (bits16) ( ( a.high or b.high ) shl 1 ) ) or a.low or b.low )
+                 <> 0 );
+    end;
+    result :=
+          aSign ? lt128( b.high, b.low, a.high, a.low )
+        : lt128( a.high, a.low, b.high, b.low );
+
+end;
+
+{$endif FPC_SOFTFLOAT_FLOATX80}
+
+
+{$ifdef FPC_SOFTFLOAT_FLOAT128}
+
+{*----------------------------------------------------------------------------
+| Returns the result of converting the quadruple-precision floating-point
+| value `a' to the 32-bit two's complement integer format.  The conversion
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic---which means in particular that the conversion is rounded
+| according to the current rounding mode.  If `a' is a NaN, the largest
+| positive integer is returned.  Otherwise, if the conversion overflows, the
+| largest integer with the same sign as `a' is returned.
+*----------------------------------------------------------------------------*}
+
+function float128_to_int32(a: float128): int32;
+begin
+    flag aSign;
+    int32 aExp, shiftCount;
+    bits64 aSig0, aSig1;
+
+    aSig1 := extractFloat128Frac1( a );
+    aSig0 := extractFloat128Frac0( a );
+    aExp := extractFloat128Exp( a );
+    aSign := extractFloat128Sign( a );
+    if ( ( aExp = $7FFF ) and ( aSig0 or aSig1 ) ) aSign := 0;
+    if ( aExp ) aSig0 or= LIT64( $0001000000000000 );
+    aSig0 or= ( aSig1 <> 0 );
+    shiftCount := $4028 - aExp;
+    if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 );
+    result := roundAndPackInt32( aSign, aSig0 );
+
+end;
+
+{*----------------------------------------------------------------------------
+| Returns the result of converting the quadruple-precision floating-point
+| value `a' to the 32-bit two's complement integer format.  The conversion
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic, except that the conversion is always rounded toward zero.  If
+| `a' is a NaN, the largest positive integer is returned.  Otherwise, if the
+| conversion overflows, the largest integer with the same sign as `a' is
+| returned.
+*----------------------------------------------------------------------------*}
+
+function float128_to_int32_round_to_zero(a: float128): int32;
+begin
+    flag aSign;
+    int32 aExp, shiftCount;
+    bits64 aSig0, aSig1, savedASig;
+    int32 z;
+
+    aSig1 := extractFloat128Frac1( a );
+    aSig0 := extractFloat128Frac0( a );
+    aExp := extractFloat128Exp( a );
+    aSign := extractFloat128Sign( a );
+    aSig0 or= ( aSig1 <> 0 );
+    if ( $401E < aExp ) begin
+        if ( ( aExp = $7FFF ) and aSig0 ) aSign := 0;
+        goto invalid;
+    end;
+    else if ( aExp < $3FFF ) begin
+        if ( aExp or aSig0 ) float_exception_flags or= float_flag_inexact;
+        result := 0;
+    end;
+    aSig0 or= LIT64( $0001000000000000 );
+    shiftCount := $402F - aExp;
+    savedASig := aSig0;
+    aSig0 >>= shiftCount;
+    z := aSig0;
+    if ( aSign ) z := - z;
+    if ( ( z < 0 ) xor aSign ) begin
+ invalid:
+        float_raise( float_flag_invalid );
+        result := aSign ? (sbits32) $80000000 : $7FFFFFFF;
+    end;
+    if ( ( aSig0 shl shiftCount ) <> savedASig ) begin
+        float_exception_flags or= float_flag_inexact;
+    end;
+    result := z;
+
+end;
+
+{*----------------------------------------------------------------------------
+| Returns the result of converting the quadruple-precision floating-point
+| value `a' to the 64-bit two's complement integer format.  The conversion
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic---which means in particular that the conversion is rounded
+| according to the current rounding mode.  If `a' is a NaN, the largest
+| positive integer is returned.  Otherwise, if the conversion overflows, the
+| largest integer with the same sign as `a' is returned.
+*----------------------------------------------------------------------------*}
+
+function float128_to_int64(a: float128): int64;
+begin
+    flag aSign;
+    int32 aExp, shiftCount;
+    bits64 aSig0, aSig1;
+
+    aSig1 := extractFloat128Frac1( a );
+    aSig0 := extractFloat128Frac0( a );
+    aExp := extractFloat128Exp( a );
+    aSign := extractFloat128Sign( a );
+    if ( aExp ) aSig0 or= LIT64( $0001000000000000 );
+    shiftCount := $402F - aExp;
+    if ( shiftCount <= 0 ) begin
+        if ( $403E < aExp ) begin
+            float_raise( float_flag_invalid );
+            if (    ! aSign
+                 or (    ( aExp = $7FFF )
+                      and ( aSig1 or ( aSig0 <> LIT64( $0001000000000000 ) ) )
+                    )
+               ) begin
+                result := LIT64( $7FFFFFFFFFFFFFFF );
+            end;
+            result := (sbits64) LIT64( $8000000000000000 );
+        end;
+        shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
+    end;
+    else begin
+        shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 );
+    end;
+    result := roundAndPackInt64( aSign, aSig0, aSig1 );
+
+end;
+
+{*----------------------------------------------------------------------------
+| Returns the result of converting the quadruple-precision floating-point
+| value `a' to the 64-bit two's complement integer format.  The conversion
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic, except that the conversion is always rounded toward zero.
+| If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
+| the conversion overflows, the largest integer with the same sign as `a' is
+| returned.
+*----------------------------------------------------------------------------*}
+
+function float128_to_int64_round_to_zero(a: float128): int64;
+begin
+    flag aSign;
+    int32 aExp, shiftCount;
+    bits64 aSig0, aSig1;
+    int64 z;
+
+    aSig1 := extractFloat128Frac1( a );
+    aSig0 := extractFloat128Frac0( a );
+    aExp := extractFloat128Exp( a );
+    aSign := extractFloat128Sign( a );
+    if ( aExp ) aSig0 or= LIT64( $0001000000000000 );
+    shiftCount := aExp - $402F;
+    if ( 0 < shiftCount ) begin
+        if ( $403E <= aExp ) begin
+            aSig0 &= LIT64( $0000FFFFFFFFFFFF );
+            if (    ( a.high = LIT64( $C03E000000000000 ) )
+                 and ( aSig1 < LIT64( $0002000000000000 ) ) ) begin
+                if ( aSig1 ) float_exception_flags or= float_flag_inexact;
+            end;
+            else begin
+                float_raise( float_flag_invalid );
+                if ( ! aSign or ( ( aExp = $7FFF ) and ( aSig0 or aSig1 ) ) ) begin
+                    result := LIT64( $7FFFFFFFFFFFFFFF );
+                end;
+            end;
+            result := (sbits64) LIT64( $8000000000000000 );
+        end;
+        z := ( aSig0 shl shiftCount ) or ( aSig1>>( ( - shiftCount ) and 63 ) );
+        if ( (bits64) ( aSig1 shl shiftCount ) ) begin
+            float_exception_flags or= float_flag_inexact;
+        end;
+    end;
+    else begin
+        if ( aExp < $3FFF ) begin
+            if ( aExp or aSig0 or aSig1 ) begin
+                float_exception_flags or= float_flag_inexact;
+            end;
+            result := 0;
+        end;
+        z := aSig0>>( - shiftCount );
+        if (    aSig1
+             or ( shiftCount and (bits64) ( aSig0 shl ( shiftCount and 63 ) ) ) ) begin
+            float_exception_flags or= float_flag_inexact;
+        end;
+    end;
+    if ( aSign ) z := - z;
+    result := z;
+
+end;
+
+{*----------------------------------------------------------------------------
+| Returns the result of converting the quadruple-precision floating-point
+| value `a' to the single-precision floating-point format.  The conversion
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*}
+
+function float128_to_float32(a: float128): float32;
+begin
+    flag aSign;
+    int32 aExp;
+    bits64 aSig0, aSig1;
+    bits32 zSig;
+
+    aSig1 := extractFloat128Frac1( a );
+    aSig0 := extractFloat128Frac0( a );
+    aExp := extractFloat128Exp( a );
+    aSign := extractFloat128Sign( a );
+    if ( aExp = $7FFF ) begin
+        if ( aSig0 or aSig1 ) begin
+            result := commonNaNToFloat32( float128ToCommonNaN( a ) );
+        end;
+        result := packFloat32( aSign, $FF, 0 );
+    end;
+    aSig0 or= ( aSig1 <> 0 );
+    shift64RightJamming( aSig0, 18, &aSig0 );
+    zSig := aSig0;
+    if ( aExp or zSig ) begin
+        zSig or= $40000000;
+        aExp -= $3F81;
+    end;
+    result := roundAndPackFloat32( aSign, aExp, zSig );
+
+end;
+
+{*----------------------------------------------------------------------------
+| Returns the result of converting the quadruple-precision floating-point
+| value `a' to the double-precision floating-point format.  The conversion
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*}
+
+function float128_to_float64(a: float128): float64;
+begin
+    flag aSign;
+    int32 aExp;
+    bits64 aSig0, aSig1;
+
+    aSig1 := extractFloat128Frac1( a );
+    aSig0 := extractFloat128Frac0( a );
+    aExp := extractFloat128Exp( a );
+    aSign := extractFloat128Sign( a );
+    if ( aExp = $7FFF ) begin
+        if ( aSig0 or aSig1 ) begin
+            result := commonNaNToFloat64( float128ToCommonNaN( a ) );
+        end;
+        result := packFloat64( aSign, $7FF, 0 );
+    end;
+    shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
+    aSig0 or= ( aSig1 <> 0 );
+    if ( aExp or aSig0 ) begin
+        aSig0 or= LIT64( $4000000000000000 );
+        aExp -= $3C01;
+    end;
+    result := roundAndPackFloat64( aSign, aExp, aSig0 );
+
+end;
+
+{$ifdef FPC_SOFTFLOAT_FLOATX80}
+
+{*----------------------------------------------------------------------------
+| Returns the result of converting the quadruple-precision floating-point
+| value `a' to the extended double-precision floating-point format.  The
+| conversion is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*}
+
+function float128_to_floatx80(a: float128): floatx80;
+begin
+    flag aSign;
+    int32 aExp;
+    bits64 aSig0, aSig1;
+
+    aSig1 := extractFloat128Frac1( a );
+    aSig0 := extractFloat128Frac0( a );
+    aExp := extractFloat128Exp( a );
+    aSign := extractFloat128Sign( a );
+    if ( aExp = $7FFF ) begin
+        if ( aSig0 or aSig1 ) begin
+            result := commonNaNToFloatx80( float128ToCommonNaN( a ) );
+        end;
+        result := packFloatx80( aSign, $7FFF, LIT64( $8000000000000000 ) );
+    end;
+    if ( aExp = 0 ) begin
+        if ( ( aSig0 or aSig1 ) = 0 ) result := packFloatx80( aSign, 0, 0 );
+        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
+    end;
+    else begin
+        aSig0 or= LIT64( $0001000000000000 );
+    end;
+    shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
+    result := roundAndPackFloatx80( 80, aSign, aExp, aSig0, aSig1 );
+
+end;
+
+{$endif FPC_SOFTFLOAT_FLOATX80}
+
+{*----------------------------------------------------------------------------
+| Rounds the quadruple-precision floating-point value `a' to an integer, and
+| Returns the result as a quadruple-precision floating-point value.  The
+| operation is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*}
+
+function float128_round_to_int(a: float128): float128;
+begin
+    flag aSign;
+    int32 aExp;
+    bits64 lastBitMask, roundBitsMask;
+    int8 roundingMode;
+    float128 z;
+
+    aExp := extractFloat128Exp( a );
+    if ( $402F <= aExp ) begin
+        if ( $406F <= aExp ) begin
+            if (    ( aExp = $7FFF )
+                 and ( extractFloat128Frac0( a ) or extractFloat128Frac1( a ) )
+               ) begin
+                result := propagateFloat128NaN( a, a );
+            end;
+            result := a;
+        end;
+        lastBitMask := 1;
+        lastBitMask := ( lastBitMask shl ( $406E - aExp ) ) shl 1;
+        roundBitsMask := lastBitMask - 1;
+        z := a;
+        roundingMode := float_rounding_mode;
+        if ( roundingMode = float_round_nearest_even ) begin
+            if ( lastBitMask ) begin
+                add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
+                if ( ( z.low and roundBitsMask ) = 0 ) z.low &= ~ lastBitMask;
+            end;
+            else begin
+                if ( (sbits64) z.low < 0 ) begin
+                    ++z.high;
+                    if ( (bits64) ( z.low shl 1 ) = 0 ) z.high &= ~1;
+                end;
+            end;
+        end;
+        else if ( roundingMode <> float_round_to_zero ) begin
+            if (   extractFloat128Sign( z )
+                 xor ( roundingMode = float_round_up ) ) begin
+                add128( z.high, z.low, 0, roundBitsMask, &z.high, &z.low );
+            end;
+        end;
+        z.low &= ~ roundBitsMask;
+    end;
+    else begin
+        if ( aExp < $3FFF ) begin
+            if ( ( ( (bits64) ( a.high shl 1 ) ) or a.low ) = 0 ) result := a;
+            float_exception_flags or= float_flag_inexact;
+            aSign := extractFloat128Sign( a );
+            switch ( float_rounding_mode ) begin
+             case float_round_nearest_even:
+                if (    ( aExp = $3FFE )
+                     and (   extractFloat128Frac0( a )
+                          or extractFloat128Frac1( a ) )
+                   ) begin
+                    result := packFloat128( aSign, $3FFF, 0, 0 );
+                end;
+                break;
+             case float_round_down:
+                result :=
+                      aSign ? packFloat128( 1, $3FFF, 0, 0 )
+                    : packFloat128( 0, 0, 0, 0 );
+             case float_round_up:
+                result :=
+                      aSign ? packFloat128( 1, 0, 0, 0 )
+                    : packFloat128( 0, $3FFF, 0, 0 );
+            end;
+            result := packFloat128( aSign, 0, 0, 0 );
+        end;
+        lastBitMask := 1;
+        lastBitMask  shl = $402F - aExp;
+        roundBitsMask := lastBitMask - 1;
+        z.low := 0;
+        z.high := a.high;
+        roundingMode := float_rounding_mode;
+        if ( roundingMode = float_round_nearest_even ) begin
+            z.high += lastBitMask>>1;
+            if ( ( ( z.high and roundBitsMask ) or a.low ) = 0 ) begin
+                z.high &= ~ lastBitMask;
+            end;
+        end;
+        else if ( roundingMode <> float_round_to_zero ) begin
+            if (   extractFloat128Sign( z )
+                 xor ( roundingMode = float_round_up ) ) begin
+                z.high or= ( a.low <> 0 );
+                z.high += roundBitsMask;
+            end;
+        end;
+        z.high &= ~ roundBitsMask;
+    end;
+    if ( ( z.low <> a.low ) or ( z.high <> a.high ) ) begin
+        float_exception_flags or= float_flag_inexact;
+    end;
+    result := z;
+
+end;
+
+{*----------------------------------------------------------------------------
+| Returns the result of adding the absolute values of the quadruple-precision
+| floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
+| before being returned.  `zSign' is ignored if the result is a NaN.
+| The addition is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*}
+
+function addFloat128Sigs( float128 a, float128 b, flag zSign ): float128;
+begin
+    int32 aExp, bExp, zExp;
+    bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
+    int32 expDiff;
+
+    aSig1 := extractFloat128Frac1( a );
+    aSig0 := extractFloat128Frac0( a );
+    aExp := extractFloat128Exp( a );
+    bSig1 := extractFloat128Frac1( b );
+    bSig0 := extractFloat128Frac0( b );
+    bExp := extractFloat128Exp( b );
+    expDiff := aExp - bExp;
+    if ( 0 < expDiff ) begin
+        if ( aExp = $7FFF ) begin
+            if ( aSig0 or aSig1 ) result := propagateFloat128NaN( a, b );
+            result := a;
+        end;
+        if ( bExp = 0 ) begin
+            --expDiff;
+        end;
+        else begin
+            bSig0 or= LIT64( $0001000000000000 );
+        end;
+        shift128ExtraRightJamming(
+            bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
+        zExp := aExp;
+    end;
+    else if ( expDiff < 0 ) begin
+        if ( bExp = $7FFF ) begin
+            if ( bSig0 or bSig1 ) result := propagateFloat128NaN( a, b );
+            result := packFloat128( zSign, $7FFF, 0, 0 );
+        end;
+        if ( aExp = 0 ) begin
+            ++expDiff;
+        end;
+        else begin
+            aSig0 or= LIT64( $0001000000000000 );
+        end;
+        shift128ExtraRightJamming(
+            aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
+        zExp := bExp;
+    end;
+    else begin
+        if ( aExp = $7FFF ) begin
+            if ( aSig0 or aSig1 or bSig0 or bSig1 ) begin
+                result := propagateFloat128NaN( a, b );
+            end;
+            result := a;
+        end;
+        add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
+        if ( aExp = 0 ) result := packFloat128( zSign, 0, zSig0, zSig1 );
+        zSig2 := 0;
+        zSig0 or= LIT64( $0002000000000000 );
+        zExp := aExp;
+        goto shiftRight1;
+    end;
+    aSig0 or= LIT64( $0001000000000000 );
+    add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
+    --zExp;
+    if ( zSig0 < LIT64( $0002000000000000 ) ) goto roundAndPack;
+    ++zExp;
+ shiftRight1:
+    shift128ExtraRightJamming(
+        zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
+ roundAndPack:
+    result := roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
+
+end;
+
+{*----------------------------------------------------------------------------
+| Returns the result of subtracting the absolute values of the quadruple-
+| precision floating-point values `a' and `b'.  If `zSign' is 1, the
+| difference is negated before being returned.  `zSign' is ignored if the
+| result is a NaN.  The subtraction is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*}
+
+function subFloat128Sigs( float128 a, float128 b, flag zSign ): float128;
+begin
+    int32 aExp, bExp, zExp;
+    bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
+    int32 expDiff;
+    float128 z;
+
+    aSig1 := extractFloat128Frac1( a );
+    aSig0 := extractFloat128Frac0( a );
+    aExp := extractFloat128Exp( a );
+    bSig1 := extractFloat128Frac1( b );
+    bSig0 := extractFloat128Frac0( b );
+    bExp := extractFloat128Exp( b );
+    expDiff := aExp - bExp;
+    shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
+    shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 );
+    if ( 0 < expDiff ) goto aExpBigger;
+    if ( expDiff < 0 ) goto bExpBigger;
+    if ( aExp = $7FFF ) begin
+        if ( aSig0 or aSig1 or bSig0 or bSig1 ) begin
+            result := propagateFloat128NaN( a, b );
+        end;
+        float_raise( float_flag_invalid );
+        z.low := float128_default_nan_low;
+        z.high := float128_default_nan_high;
+        result := z;
+    end;
+    if ( aExp = 0 ) begin
+        aExp := 1;
+        bExp := 1;
+    end;
+    if ( bSig0 < aSig0 ) goto aBigger;
+    if ( aSig0 < bSig0 ) goto bBigger;
+    if ( bSig1 < aSig1 ) goto aBigger;
+    if ( aSig1 < bSig1 ) goto bBigger;
+    result := packFloat128( float_rounding_mode = float_round_down, 0, 0, 0 );
+ bExpBigger:
+    if ( bExp = $7FFF ) begin
+        if ( bSig0 or bSig1 ) result := propagateFloat128NaN( a, b );
+        result := packFloat128( zSign xor 1, $7FFF, 0, 0 );
+    end;
+    if ( aExp = 0 ) begin
+        ++expDiff;
+    end;
+    else begin
+        aSig0 or= LIT64( $4000000000000000 );
+    end;
+    shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
+    bSig0 or= LIT64( $4000000000000000 );
+ bBigger:
+    sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
+    zExp := bExp;
+    zSign  xor = 1;
+    goto normalizeRoundAndPack;
+ aExpBigger:
+    if ( aExp = $7FFF ) begin
+        if ( aSig0 or aSig1 ) result := propagateFloat128NaN( a, b );
+        result := a;
+    end;
+    if ( bExp = 0 ) begin
+        --expDiff;
+    end;
+    else begin
+        bSig0 or= LIT64( $4000000000000000 );
+    end;
+    shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
+    aSig0 or= LIT64( $4000000000000000 );
+ aBigger:
+    sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
+    zExp := aExp;
+ normalizeRoundAndPack:
+    --zExp;
+    result := normalizeRoundAndPackFloat128( zSign, zExp - 14, zSig0, zSig1 );
+
+end;
+
+{*----------------------------------------------------------------------------
+| Returns the result of adding the quadruple-precision floating-point values
+| `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
+| for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*}
+
+function float128_add(a: float128; b: float128): float128;
+begin
+    flag aSign, bSign;
+
+    aSign := extractFloat128Sign( a );
+    bSign := extractFloat128Sign( b );
+    if ( aSign = bSign ) begin
+        result := addFloat128Sigs( a, b, aSign );
+    end;
+    else begin
+        result := subFloat128Sigs( a, b, aSign );
+    end;
+
+end;
+
+{*----------------------------------------------------------------------------
+| Returns the result of subtracting the quadruple-precision floating-point
+| values `a' and `b'.  The operation is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*}
+
+function float128_sub(a: float128; b: float128): float128;
+begin
+    flag aSign, bSign;
+
+    aSign := extractFloat128Sign( a );
+    bSign := extractFloat128Sign( b );
+    if ( aSign = bSign ) begin
+        result := subFloat128Sigs( a, b, aSign );
+    end;
+    else begin
+        result := addFloat128Sigs( a, b, aSign );
+    end;
+
+end;
+
+{*----------------------------------------------------------------------------
+| Returns the result of multiplying the quadruple-precision floating-point
+| values `a' and `b'.  The operation is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*}
+
+function float128_mul(a: float128; b: float128): float128;
+begin
+    flag aSign, bSign, zSign;
+    int32 aExp, bExp, zExp;
+    bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
+    float128 z;
+
+    aSig1 := extractFloat128Frac1( a );
+    aSig0 := extractFloat128Frac0( a );
+    aExp := extractFloat128Exp( a );
+    aSign := extractFloat128Sign( a );
+    bSig1 := extractFloat128Frac1( b );
+    bSig0 := extractFloat128Frac0( b );
+    bExp := extractFloat128Exp( b );
+    bSign := extractFloat128Sign( b );
+    zSign := aSign xor bSign;
+    if ( aExp = $7FFF ) begin
+        if (    ( aSig0 or aSig1 )
+             or ( ( bExp = $7FFF ) and ( bSig0 or bSig1 ) ) ) begin
+            result := propagateFloat128NaN( a, b );
+        end;
+        if ( ( bExp or bSig0 or bSig1 ) = 0 ) goto invalid;
+        result := packFloat128( zSign, $7FFF, 0, 0 );
+    end;
+    if ( bExp = $7FFF ) begin
+        if ( bSig0 or bSig1 ) result := propagateFloat128NaN( a, b );
+        if ( ( aExp or aSig0 or aSig1 ) = 0 ) begin
+ invalid:
+            float_raise( float_flag_invalid );
+            z.low := float128_default_nan_low;
+            z.high := float128_default_nan_high;
+            result := z;
+        end;
+        result := packFloat128( zSign, $7FFF, 0, 0 );
+    end;
+    if ( aExp = 0 ) begin
+        if ( ( aSig0 or aSig1 ) = 0 ) result := packFloat128( zSign, 0, 0, 0 );
+        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
+    end;
+    if ( bExp = 0 ) begin
+        if ( ( bSig0 or bSig1 ) = 0 ) result := packFloat128( zSign, 0, 0, 0 );
+        normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
+    end;
+    zExp := aExp + bExp - $4000;
+    aSig0 or= LIT64( $0001000000000000 );
+    shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 );
+    mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
+    add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
+    zSig2 or= ( zSig3 <> 0 );
+    if ( LIT64( $0002000000000000 ) <= zSig0 ) begin
+        shift128ExtraRightJamming(
+            zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
+        ++zExp;
+    end;
+    result := roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
+
+end;
+
+{*----------------------------------------------------------------------------
+| Returns the result of dividing the quadruple-precision floating-point value
+| `a' by the corresponding value `b'.  The operation is performed according to
+| the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*}
+
+function float128_div(a: float128; b: float128): float128;
+begin
+    flag aSign, bSign, zSign;
+    int32 aExp, bExp, zExp;
+    bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
+    bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
+    float128 z;
+
+    aSig1 := extractFloat128Frac1( a );
+    aSig0 := extractFloat128Frac0( a );
+    aExp := extractFloat128Exp( a );
+    aSign := extractFloat128Sign( a );
+    bSig1 := extractFloat128Frac1( b );
+    bSig0 := extractFloat128Frac0( b );
+    bExp := extractFloat128Exp( b );
+    bSign := extractFloat128Sign( b );
+    zSign := aSign xor bSign;
+    if ( aExp = $7FFF ) begin
+        if ( aSig0 or aSig1 ) result := propagateFloat128NaN( a, b );
+        if ( bExp = $7FFF ) begin
+            if ( bSig0 or bSig1 ) result := propagateFloat128NaN( a, b );
+            goto invalid;
+        end;
+        result := packFloat128( zSign, $7FFF, 0, 0 );
+    end;
+    if ( bExp = $7FFF ) begin
+        if ( bSig0 or bSig1 ) result := propagateFloat128NaN( a, b );
+        result := packFloat128( zSign, 0, 0, 0 );
+    end;
+    if ( bExp = 0 ) begin
+        if ( ( bSig0 or bSig1 ) = 0 ) begin
+            if ( ( aExp or aSig0 or aSig1 ) = 0 ) begin
+ invalid:
+                float_raise( float_flag_invalid );
+                z.low := float128_default_nan_low;
+                z.high := float128_default_nan_high;
+                result := z;
+            end;
+            float_raise( float_flag_divbyzero );
+            result := packFloat128( zSign, $7FFF, 0, 0 );
+        end;
+        normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
+    end;
+    if ( aExp = 0 ) begin
+        if ( ( aSig0 or aSig1 ) = 0 ) result := packFloat128( zSign, 0, 0, 0 );
+        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
+    end;
+    zExp := aExp - bExp + $3FFD;
+    shortShift128Left(
+        aSig0 or LIT64( $0001000000000000 ), aSig1, 15, &aSig0, &aSig1 );
+    shortShift128Left(
+        bSig0 or LIT64( $0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
+    if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) begin
+        shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
+        ++zExp;
+    end;
+    zSig0 := estimateDiv128To64( aSig0, aSig1, bSig0 );
+    mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
+    sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
+    while ( (sbits64) rem0 < 0 ) begin
+        --zSig0;
+        add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
+    end;
+    zSig1 := estimateDiv128To64( rem1, rem2, bSig0 );
+    if ( ( zSig1 and $3FFF ) <= 4 ) begin
+        mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
+        sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
+        while ( (sbits64) rem1 < 0 ) begin
+            --zSig1;
+            add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
+        end;
+        zSig1 or= ( ( rem1 or rem2 or rem3 ) <> 0 );
+    end;
+    shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 );
+    result := roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
+
+end;
+
+{*----------------------------------------------------------------------------
+| Returns the remainder of the quadruple-precision floating-point value `a'
+| with respect to the corresponding value `b'.  The operation is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*}
+
+function float128_rem(a: float128; b: float128): float128;
+begin
+    flag aSign, bSign, zSign;
+    int32 aExp, bExp, expDiff;
+    bits64 aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
+    bits64 allZero, alternateASig0, alternateASig1, sigMean1;
+    sbits64 sigMean0;
+    float128 z;
+
+    aSig1 := extractFloat128Frac1( a );
+    aSig0 := extractFloat128Frac0( a );
+    aExp := extractFloat128Exp( a );
+    aSign := extractFloat128Sign( a );
+    bSig1 := extractFloat128Frac1( b );
+    bSig0 := extractFloat128Frac0( b );
+    bExp := extractFloat128Exp( b );
+    bSign := extractFloat128Sign( b );
+    if ( aExp = $7FFF ) begin
+        if (    ( aSig0 or aSig1 )
+             or ( ( bExp = $7FFF ) and ( bSig0 or bSig1 ) ) ) begin
+            result := propagateFloat128NaN( a, b );
+        end;
+        goto invalid;
+    end;
+    if ( bExp = $7FFF ) begin
+        if ( bSig0 or bSig1 ) result := propagateFloat128NaN( a, b );
+        result := a;
+    end;
+    if ( bExp = 0 ) begin
+        if ( ( bSig0 or bSig1 ) = 0 ) begin
+ invalid:
+            float_raise( float_flag_invalid );
+            z.low := float128_default_nan_low;
+            z.high := float128_default_nan_high;
+            result := z;
+        end;
+        normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
+    end;
+    if ( aExp = 0 ) begin
+        if ( ( aSig0 or aSig1 ) = 0 ) result := a;
+        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
+    end;
+    expDiff := aExp - bExp;
+    if ( expDiff < -1 ) result := a;
+    shortShift128Left(
+        aSig0 or LIT64( $0001000000000000 ),
+        aSig1,
+        15 - ( expDiff < 0 ),
+        &aSig0,
+        &aSig1
+    );
+    shortShift128Left(
+        bSig0 or LIT64( $0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
+    q := le128( bSig0, bSig1, aSig0, aSig1 );
+    if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
+    expDiff -= 64;
+    while ( 0 < expDiff ) begin
+        q := estimateDiv128To64( aSig0, aSig1, bSig0 );
+        q := ( 4 < q ) ? q - 4 : 0;
+        mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
+        shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
+        shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
+        sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
+        expDiff -= 61;
+    end;
+    if ( -64 < expDiff ) begin
+        q := estimateDiv128To64( aSig0, aSig1, bSig0 );
+        q := ( 4 < q ) ? q - 4 : 0;
+        q >>= - expDiff;
+        shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
+        expDiff += 52;
+        if ( expDiff < 0 ) begin
+            shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
+        end;
+        else begin
+            shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
+        end;
+        mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
+        sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
+    end;
+    else begin
+        shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
+        shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
+    end;
+    do begin
+        alternateASig0 := aSig0;
+        alternateASig1 := aSig1;
+        ++q;
+        sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
+    end; while ( 0 <= (sbits64) aSig0 );
+    add128(
+        aSig0, aSig1, alternateASig0, alternateASig1, &sigMean0, &sigMean1 );
+    if (    ( sigMean0 < 0 )
+         or ( ( ( sigMean0 or sigMean1 ) = 0 ) and ( q and 1 ) ) ) begin
+        aSig0 := alternateASig0;
+        aSig1 := alternateASig1;
+    end;
+    zSign := ( (sbits64) aSig0 < 0 );
+    if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
+    result :=
+        normalizeRoundAndPackFloat128( aSign xor zSign, bExp - 4, aSig0, aSig1 );
+
+end;
+
+{*----------------------------------------------------------------------------
+| Returns the square root of the quadruple-precision floating-point value `a'.
+| The operation is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*}
+
+function float128_sqrt(a: float128): float128;
+begin
+    flag aSign;
+    int32 aExp, zExp;
+    bits64 aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
+    bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
+    float128 z;
+
+    aSig1 := extractFloat128Frac1( a );
+    aSig0 := extractFloat128Frac0( a );
+    aExp := extractFloat128Exp( a );
+    aSign := extractFloat128Sign( a );
+    if ( aExp = $7FFF ) begin
+        if ( aSig0 or aSig1 ) result := propagateFloat128NaN( a, a );
+        if ( ! aSign ) result := a;
+        goto invalid;
+    end;
+    if ( aSign ) begin
+        if ( ( aExp or aSig0 or aSig1 ) = 0 ) result := a;
+ invalid:
+        float_raise( float_flag_invalid );
+        z.low := float128_default_nan_low;
+        z.high := float128_default_nan_high;
+        result := z;
+    end;
+    if ( aExp = 0 ) begin
+        if ( ( aSig0 or aSig1 ) = 0 ) result := packFloat128( 0, 0, 0, 0 );
+        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
+    end;
+    zExp := ( ( aExp - $3FFF )>>1 ) + $3FFE;
+    aSig0 or= LIT64( $0001000000000000 );
+    zSig0 := estimateSqrt32( aExp, aSig0>>17 );
+    shortShift128Left( aSig0, aSig1, 13 - ( aExp and 1 ), &aSig0, &aSig1 );
+    zSig0 := estimateDiv128To64( aSig0, aSig1, zSig0 shl 32 ) + ( zSig0 shl 30 );
+    doubleZSig0 := zSig0 shl 1;
+    mul64To128( zSig0, zSig0, &term0, &term1 );
+    sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
+    while ( (sbits64) rem0 < 0 ) begin
+        --zSig0;
+        doubleZSig0 -= 2;
+        add128( rem0, rem1, zSig0>>63, doubleZSig0 or 1, &rem0, &rem1 );
+    end;
+    zSig1 := estimateDiv128To64( rem1, 0, doubleZSig0 );
+    if ( ( zSig1 and $1FFF ) <= 5 ) begin
+        if ( zSig1 = 0 ) zSig1 := 1;
+        mul64To128( doubleZSig0, zSig1, &term1, &term2 );
+        sub128( rem1, 0, term1, term2, &rem1, &rem2 );
+        mul64To128( zSig1, zSig1, &term2, &term3 );
+        sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
+        while ( (sbits64) rem1 < 0 ) begin
+            --zSig1;
+            shortShift128Left( 0, zSig1, 1, &term2, &term3 );
+            term3 or= 1;
+            term2 or= doubleZSig0;
+            add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
+        end;
+        zSig1 or= ( ( rem1 or rem2 or rem3 ) <> 0 );
+    end;
+    shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
+    result := roundAndPackFloat128( 0, zExp, zSig0, zSig1, zSig2 );
+
+end;
+
+{*----------------------------------------------------------------------------
+| Returns 1 if the quadruple-precision floating-point value `a' is equal to
+| the corresponding value `b', and 0 otherwise.  The comparison is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*}
+
+function float128_eq(a: float128; b: float128): flag;
+begin
+
+    if (    (    ( extractFloat128Exp( a ) = $7FFF )
+              and ( extractFloat128Frac0( a ) or extractFloat128Frac1( a ) ) )
+         or (    ( extractFloat128Exp( b ) = $7FFF )
+              and ( extractFloat128Frac0( b ) or extractFloat128Frac1( b ) ) )
+       ) begin
+        if (    float128_is_signaling_nan( a )
+             or float128_is_signaling_nan( b ) ) begin
+            float_raise( float_flag_invalid );
+        end;
+        result := 0;
+    end;
+    result :=
+           ( a.low = b.low )
+        and (    ( a.high = b.high )
+             or (    ( a.low = 0 )
+                  and ( (bits64) ( ( a.high or b.high ) shl 1 ) = 0 ) )
+           );
+
+end;
+
+{*----------------------------------------------------------------------------
+| Returns 1 if the quadruple-precision floating-point value `a' is less than
+| or equal to the corresponding value `b', and 0 otherwise.  The comparison
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*}
+
+function float128_le(a: float128; b: float128): flag;
+begin
+    flag aSign, bSign;
+
+    if (    (    ( extractFloat128Exp( a ) = $7FFF )
+              and ( extractFloat128Frac0( a ) or extractFloat128Frac1( a ) ) )
+         or (    ( extractFloat128Exp( b ) = $7FFF )
+              and ( extractFloat128Frac0( b ) or extractFloat128Frac1( b ) ) )
+       ) begin
+        float_raise( float_flag_invalid );
+        result := 0;
+    end;
+    aSign := extractFloat128Sign( a );
+    bSign := extractFloat128Sign( b );
+    if ( aSign <> bSign ) begin
+        result :=
+               aSign
+            or (    ( ( (bits64) ( ( a.high or b.high ) shl 1 ) ) or a.low or b.low )
+                 = 0 );
+    end;
+    result :=
+          aSign ? le128( b.high, b.low, a.high, a.low )
+        : le128( a.high, a.low, b.high, b.low );
+
+end;
+
+{*----------------------------------------------------------------------------
+| Returns 1 if the quadruple-precision floating-point value `a' is less than
+| the corresponding value `b', and 0 otherwise.  The comparison is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*}
+
+function float128_lt(a: float128; b: float128): flag;
+begin
+    flag aSign, bSign;
+
+    if (    (    ( extractFloat128Exp( a ) = $7FFF )
+              and ( extractFloat128Frac0( a ) or extractFloat128Frac1( a ) ) )
+         or (    ( extractFloat128Exp( b ) = $7FFF )
+              and ( extractFloat128Frac0( b ) or extractFloat128Frac1( b ) ) )
+       ) begin
+        float_raise( float_flag_invalid );
+        result := 0;
+    end;
+    aSign := extractFloat128Sign( a );
+    bSign := extractFloat128Sign( b );
+    if ( aSign <> bSign ) begin
+        result :=
+               aSign
+            and (    ( ( (bits64) ( ( a.high or b.high ) shl 1 ) ) or a.low or b.low )
+                 <> 0 );
+    end;
+    result :=
+          aSign ? lt128( b.high, b.low, a.high, a.low )
+        : lt128( a.high, a.low, b.high, b.low );
+
+end;
+
+{*----------------------------------------------------------------------------
+| Returns 1 if the quadruple-precision floating-point value `a' is equal to
+| the corresponding value `b', and 0 otherwise.  The invalid exception is
+| raised if either operand is a NaN.  Otherwise, the comparison is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*}
+
+function float128_eq_signaling(a: float128; b: float128): flag;
+begin
+
+    if (    (    ( extractFloat128Exp( a ) = $7FFF )
+              and ( extractFloat128Frac0( a ) or extractFloat128Frac1( a ) ) )
+         or (    ( extractFloat128Exp( b ) = $7FFF )
+              and ( extractFloat128Frac0( b ) or extractFloat128Frac1( b ) ) )
+       ) begin
+        float_raise( float_flag_invalid );
+        result := 0;
+    end;
+    result :=
+           ( a.low = b.low )
+        and (    ( a.high = b.high )
+             or (    ( a.low = 0 )
+                  and ( (bits64) ( ( a.high or b.high ) shl 1 ) = 0 ) )
+           );
+
+end;
+
+{*----------------------------------------------------------------------------
+| Returns 1 if the quadruple-precision floating-point value `a' is less than
+| or equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
+| cause an exception.  Otherwise, the comparison is performed according to the
+| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*}
+
+function float128_le_quiet(a: float128; b: float128): flag;
+begin
+    flag aSign, bSign;
+
+    if (    (    ( extractFloat128Exp( a ) = $7FFF )
+              and ( extractFloat128Frac0( a ) or extractFloat128Frac1( a ) ) )
+         or (    ( extractFloat128Exp( b ) = $7FFF )
+              and ( extractFloat128Frac0( b ) or extractFloat128Frac1( b ) ) )
+       ) begin
+        if (    float128_is_signaling_nan( a )
+             or float128_is_signaling_nan( b ) ) begin
+            float_raise( float_flag_invalid );
+        end;
+        result := 0;
+    end;
+    aSign := extractFloat128Sign( a );
+    bSign := extractFloat128Sign( b );
+    if ( aSign <> bSign ) begin
+        result :=
+               aSign
+            or (    ( ( (bits64) ( ( a.high or b.high ) shl 1 ) ) or a.low or b.low )
+                 = 0 );
+    end;
+    result :=
+          aSign ? le128( b.high, b.low, a.high, a.low )
+        : le128( a.high, a.low, b.high, b.low );
+
+end;
+
+{*----------------------------------------------------------------------------
+| Returns 1 if the quadruple-precision floating-point value `a' is less than
+| the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
+| exception.  Otherwise, the comparison is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*}
+
+function float128_lt_quiet(a: float128; b: float128): flag;
+begin
+    flag aSign, bSign;
+
+    if (    (    ( extractFloat128Exp( a ) = $7FFF )
+              and ( extractFloat128Frac0( a ) or extractFloat128Frac1( a ) ) )
+         or (    ( extractFloat128Exp( b ) = $7FFF )
+              and ( extractFloat128Frac0( b ) or extractFloat128Frac1( b ) ) )
+       ) begin
+        if (    float128_is_signaling_nan( a )
+             or float128_is_signaling_nan( b ) ) begin
+            float_raise( float_flag_invalid );
+        end;
+        result := 0;
+    end;
+    aSign := extractFloat128Sign( a );
+    bSign := extractFloat128Sign( b );
+    if ( aSign <> bSign ) begin
+        result :=
+               aSign
+            and (    ( ( (bits64) ( ( a.high or b.high ) shl 1 ) ) or a.low or b.low )
+                 <> 0 );
+    end;
+    result :=
+          aSign ? lt128( b.high, b.low, a.high, a.low )
+        : lt128( a.high, a.low, b.high, b.low );
+
+end;
+
+{$endif FPC_SOFTFLOAT_FLOAT128}
+
 {$endif not(defined(fpc_softfpu_interface))}
 
 {$if not(defined(fpc_softfpu_interface)) and not(defined(fpc_softfpu_implementation))}