|
@@ -28,6 +28,39 @@ Derivative works are acceptable, even for commercial purposes, so long as
|
|
|
include prominent notice akin to these four paragraphs for those parts of
|
|
|
this code that are retained.
|
|
|
|
|
|
+===============================================================================
|
|
|
+The float80 and float128 part is translated from the softfloat package
|
|
|
+by Florian Klaempfl and contained the following copyright notice
|
|
|
+===============================================================================
|
|
|
+
|
|
|
+This C source file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic
|
|
|
+Package, Release 2b.
|
|
|
+
|
|
|
+Written by John R. Hauser. This work was made possible in part by the
|
|
|
+International Computer Science Institute, located at Suite 600, 1947 Center
|
|
|
+Street, Berkeley, California 94704. Funding was partially provided by the
|
|
|
+National Science Foundation under grant MIP-9311980. The original version
|
|
|
+of this code was written as part of a project to build a fixed-point vector
|
|
|
+processor in collaboration with the University of California at Berkeley,
|
|
|
+overseen by Profs. Nelson Morgan and John Wawrzynek. More information
|
|
|
+is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
|
|
|
+arithmetic/SoftFloat.html'.
|
|
|
+
|
|
|
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has
|
|
|
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
|
|
|
+RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
|
|
|
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
|
|
|
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
|
|
|
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
|
|
|
+INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
|
|
|
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
|
|
|
+
|
|
|
+Derivative works are acceptable, even for commercial purposes, so long as
|
|
|
+(1) the source code for the derivative work includes prominent notice that
|
|
|
+the work is derivative, and (2) the source code includes prominent notice with
|
|
|
+these four paragraphs for those parts of this code that are retained.
|
|
|
+
|
|
|
+
|
|
|
===============================================================================
|
|
|
*}
|
|
|
|
|
@@ -4658,6 +4691,2121 @@ Begin
|
|
|
int64_to_float64:= float_result;
|
|
|
End;
|
|
|
|
|
|
+
|
|
|
+{$ifdef FPC_SOFTFLOAT_FLOATX80}
|
|
|
+
|
|
|
+{*----------------------------------------------------------------------------
|
|
|
+| Returns the result of converting the extended double-precision floating-
|
|
|
+| point value `a' to the 32-bit two's complement integer format. The
|
|
|
+| conversion is performed according to the IEC/IEEE Standard for Binary
|
|
|
+| Floating-Point Arithmetic---which means in particular that the conversion
|
|
|
+| is rounded according to the current rounding mode. If `a' is a NaN, the
|
|
|
+| largest positive integer is returned. Otherwise, if the conversion
|
|
|
+| overflows, the largest integer with the same sign as `a' is returned.
|
|
|
+*----------------------------------------------------------------------------*}
|
|
|
+
|
|
|
+function floatx80_to_int32(a: floatx80): int32;
|
|
|
+begin
|
|
|
+ flag aSign;
|
|
|
+ int32 aExp, shiftCount;
|
|
|
+ bits64 aSig;
|
|
|
+
|
|
|
+ aSig := extractFloatx80Frac( a );
|
|
|
+ aExp := extractFloatx80Exp( a );
|
|
|
+ aSign := extractFloatx80Sign( a );
|
|
|
+ if ( ( aExp = $7FFF ) and (bits64) ( aSig shl 1 ) ) aSign := 0;
|
|
|
+ shiftCount := $4037 - aExp;
|
|
|
+ if ( shiftCount <= 0 ) shiftCount := 1;
|
|
|
+ shift64RightJamming( aSig, shiftCount, &aSig );
|
|
|
+ result := roundAndPackInt32( aSign, aSig );
|
|
|
+
|
|
|
+end;
|
|
|
+
|
|
|
+{*----------------------------------------------------------------------------
|
|
|
+| Returns the result of converting the extended double-precision floating-
|
|
|
+| point value `a' to the 32-bit two's complement integer format. The
|
|
|
+| conversion is performed according to the IEC/IEEE Standard for Binary
|
|
|
+| Floating-Point Arithmetic, except that the conversion is always rounded
|
|
|
+| toward zero. If `a' is a NaN, the largest positive integer is returned.
|
|
|
+| Otherwise, if the conversion overflows, the largest integer with the same
|
|
|
+| sign as `a' is returned.
|
|
|
+*----------------------------------------------------------------------------*}
|
|
|
+
|
|
|
+function floatx80_to_int32_round_to_zero(a: floatx80): int32;
|
|
|
+begin
|
|
|
+ flag aSign;
|
|
|
+ int32 aExp, shiftCount;
|
|
|
+ bits64 aSig, savedASig;
|
|
|
+ int32 z;
|
|
|
+
|
|
|
+ aSig := extractFloatx80Frac( a );
|
|
|
+ aExp := extractFloatx80Exp( a );
|
|
|
+ aSign := extractFloatx80Sign( a );
|
|
|
+ if ( $401E < aExp ) begin
|
|
|
+ if ( ( aExp = $7FFF ) and (bits64) ( aSig shl 1 ) ) aSign := 0;
|
|
|
+ goto invalid;
|
|
|
+ end;
|
|
|
+ else if ( aExp < $3FFF ) begin
|
|
|
+ if ( aExp or aSig ) float_exception_flags or= float_flag_inexact;
|
|
|
+ result := 0;
|
|
|
+ end;
|
|
|
+ shiftCount := $403E - aExp;
|
|
|
+ savedASig := aSig;
|
|
|
+ aSig >>= shiftCount;
|
|
|
+ z := aSig;
|
|
|
+ if ( aSign ) z := - z;
|
|
|
+ if ( ( z < 0 ) xor aSign ) begin
|
|
|
+ invalid:
|
|
|
+ float_raise( float_flag_invalid );
|
|
|
+ result := aSign ? (sbits32) $80000000 : $7FFFFFFF;
|
|
|
+ end;
|
|
|
+ if ( ( aSig shl shiftCount ) <> savedASig ) begin
|
|
|
+ float_exception_flags or= float_flag_inexact;
|
|
|
+ end;
|
|
|
+ result := z;
|
|
|
+
|
|
|
+end;
|
|
|
+
|
|
|
+{*----------------------------------------------------------------------------
|
|
|
+| Returns the result of converting the extended double-precision floating-
|
|
|
+| point value `a' to the 64-bit two's complement integer format. The
|
|
|
+| conversion is performed according to the IEC/IEEE Standard for Binary
|
|
|
+| Floating-Point Arithmetic---which means in particular that the conversion
|
|
|
+| is rounded according to the current rounding mode. If `a' is a NaN,
|
|
|
+| the largest positive integer is returned. Otherwise, if the conversion
|
|
|
+| overflows, the largest integer with the same sign as `a' is returned.
|
|
|
+*----------------------------------------------------------------------------*}
|
|
|
+
|
|
|
+function floatx80_to_int64(a: floatx80): int64;
|
|
|
+begin
|
|
|
+ flag aSign;
|
|
|
+ int32 aExp, shiftCount;
|
|
|
+ bits64 aSig, aSigExtra;
|
|
|
+
|
|
|
+ aSig := extractFloatx80Frac( a );
|
|
|
+ aExp := extractFloatx80Exp( a );
|
|
|
+ aSign := extractFloatx80Sign( a );
|
|
|
+ shiftCount := $403E - aExp;
|
|
|
+ if ( shiftCount <= 0 ) begin
|
|
|
+ if ( shiftCount ) begin
|
|
|
+ float_raise( float_flag_invalid );
|
|
|
+ if ( ! aSign
|
|
|
+ or ( ( aExp = $7FFF )
|
|
|
+ and ( aSig <> LIT64( $8000000000000000 ) ) )
|
|
|
+ ) begin
|
|
|
+ result := LIT64( $7FFFFFFFFFFFFFFF );
|
|
|
+ end;
|
|
|
+ result := (sbits64) LIT64( $8000000000000000 );
|
|
|
+ end;
|
|
|
+ aSigExtra := 0;
|
|
|
+ end;
|
|
|
+ else begin
|
|
|
+ shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
|
|
|
+ end;
|
|
|
+ result := roundAndPackInt64( aSign, aSig, aSigExtra );
|
|
|
+
|
|
|
+end;
|
|
|
+
|
|
|
+{*----------------------------------------------------------------------------
|
|
|
+| Returns the result of converting the extended double-precision floating-
|
|
|
+| point value `a' to the 64-bit two's complement integer format. The
|
|
|
+| conversion is performed according to the IEC/IEEE Standard for Binary
|
|
|
+| Floating-Point Arithmetic, except that the conversion is always rounded
|
|
|
+| toward zero. If `a' is a NaN, the largest positive integer is returned.
|
|
|
+| Otherwise, if the conversion overflows, the largest integer with the same
|
|
|
+| sign as `a' is returned.
|
|
|
+*----------------------------------------------------------------------------*}
|
|
|
+
|
|
|
+function floatx80_to_int64_round_to_zero(a: floatx80): int64;
|
|
|
+begin
|
|
|
+ flag aSign;
|
|
|
+ int32 aExp, shiftCount;
|
|
|
+ bits64 aSig;
|
|
|
+ int64 z;
|
|
|
+
|
|
|
+ aSig := extractFloatx80Frac( a );
|
|
|
+ aExp := extractFloatx80Exp( a );
|
|
|
+ aSign := extractFloatx80Sign( a );
|
|
|
+ shiftCount := aExp - $403E;
|
|
|
+ if ( 0 <= shiftCount ) begin
|
|
|
+ aSig &= LIT64( $7FFFFFFFFFFFFFFF );
|
|
|
+ if ( ( a.high <> $C03E ) or aSig ) begin
|
|
|
+ float_raise( float_flag_invalid );
|
|
|
+ if ( ! aSign or ( ( aExp = $7FFF ) and aSig ) ) begin
|
|
|
+ result := LIT64( $7FFFFFFFFFFFFFFF );
|
|
|
+ end;
|
|
|
+ end;
|
|
|
+ result := (sbits64) LIT64( $8000000000000000 );
|
|
|
+ end;
|
|
|
+ else if ( aExp < $3FFF ) begin
|
|
|
+ if ( aExp or aSig ) float_exception_flags or= float_flag_inexact;
|
|
|
+ result := 0;
|
|
|
+ end;
|
|
|
+ z := aSig>>( - shiftCount );
|
|
|
+ if ( (bits64) ( aSig shl ( shiftCount and 63 ) ) ) begin
|
|
|
+ float_exception_flags or= float_flag_inexact;
|
|
|
+ end;
|
|
|
+ if ( aSign ) z := - z;
|
|
|
+ result := z;
|
|
|
+
|
|
|
+end;
|
|
|
+
|
|
|
+{*----------------------------------------------------------------------------
|
|
|
+| Returns the result of converting the extended double-precision floating-
|
|
|
+| point value `a' to the single-precision floating-point format. The
|
|
|
+| conversion is performed according to the IEC/IEEE Standard for Binary
|
|
|
+| Floating-Point Arithmetic.
|
|
|
+*----------------------------------------------------------------------------*}
|
|
|
+
|
|
|
+function floatx80_to_float32(a: floatx80): float32;
|
|
|
+begin
|
|
|
+ flag aSign;
|
|
|
+ int32 aExp;
|
|
|
+ bits64 aSig;
|
|
|
+
|
|
|
+ aSig := extractFloatx80Frac( a );
|
|
|
+ aExp := extractFloatx80Exp( a );
|
|
|
+ aSign := extractFloatx80Sign( a );
|
|
|
+ if ( aExp = $7FFF ) begin
|
|
|
+ if ( (bits64) ( aSig shl 1 ) ) begin
|
|
|
+ result := commonNaNToFloat32( floatx80ToCommonNaN( a ) );
|
|
|
+ end;
|
|
|
+ result := packFloat32( aSign, $FF, 0 );
|
|
|
+ end;
|
|
|
+ shift64RightJamming( aSig, 33, &aSig );
|
|
|
+ if ( aExp or aSig ) aExp -= $3F81;
|
|
|
+ result := roundAndPackFloat32( aSign, aExp, aSig );
|
|
|
+
|
|
|
+end;
|
|
|
+
|
|
|
+{*----------------------------------------------------------------------------
|
|
|
+| Returns the result of converting the extended double-precision floating-
|
|
|
+| point value `a' to the double-precision floating-point format. The
|
|
|
+| conversion is performed according to the IEC/IEEE Standard for Binary
|
|
|
+| Floating-Point Arithmetic.
|
|
|
+*----------------------------------------------------------------------------*}
|
|
|
+
|
|
|
+function floatx80_to_float64(a: floatx80): float64;
|
|
|
+begin
|
|
|
+ flag aSign;
|
|
|
+ int32 aExp;
|
|
|
+ bits64 aSig, zSig;
|
|
|
+
|
|
|
+ aSig := extractFloatx80Frac( a );
|
|
|
+ aExp := extractFloatx80Exp( a );
|
|
|
+ aSign := extractFloatx80Sign( a );
|
|
|
+ if ( aExp = $7FFF ) begin
|
|
|
+ if ( (bits64) ( aSig shl 1 ) ) begin
|
|
|
+ result := commonNaNToFloat64( floatx80ToCommonNaN( a ) );
|
|
|
+ end;
|
|
|
+ result := packFloat64( aSign, $7FF, 0 );
|
|
|
+ end;
|
|
|
+ shift64RightJamming( aSig, 1, &zSig );
|
|
|
+ if ( aExp or aSig ) aExp -= $3C01;
|
|
|
+ result := roundAndPackFloat64( aSign, aExp, zSig );
|
|
|
+
|
|
|
+end;
|
|
|
+
|
|
|
+{$ifdef FPC_SOFTFLOAT_FLOAT128}
|
|
|
+
|
|
|
+{*----------------------------------------------------------------------------
|
|
|
+| Returns the result of converting the extended double-precision floating-
|
|
|
+| point value `a' to the quadruple-precision floating-point format. The
|
|
|
+| conversion is performed according to the IEC/IEEE Standard for Binary
|
|
|
+| Floating-Point Arithmetic.
|
|
|
+*----------------------------------------------------------------------------*}
|
|
|
+
|
|
|
+function floatx80_to_float128(a: floatx80): float128;
|
|
|
+begin
|
|
|
+ flag aSign;
|
|
|
+ int16 aExp;
|
|
|
+ bits64 aSig, zSig0, zSig1;
|
|
|
+
|
|
|
+ aSig := extractFloatx80Frac( a );
|
|
|
+ aExp := extractFloatx80Exp( a );
|
|
|
+ aSign := extractFloatx80Sign( a );
|
|
|
+ if ( ( aExp = $7FFF ) and (bits64) ( aSig shl 1 ) ) begin
|
|
|
+ result := commonNaNToFloat128( floatx80ToCommonNaN( a ) );
|
|
|
+ end;
|
|
|
+ shift128Right( aSig shl 1, 0, 16, &zSig0, &zSig1 );
|
|
|
+ result := packFloat128( aSign, aExp, zSig0, zSig1 );
|
|
|
+
|
|
|
+end;
|
|
|
+
|
|
|
+{$endif FPC_SOFTFLOAT_FLOAT128}
|
|
|
+
|
|
|
+{*----------------------------------------------------------------------------
|
|
|
+| Rounds the extended double-precision floating-point value `a' to an integer,
|
|
|
+| and Returns the result as an extended quadruple-precision floating-point
|
|
|
+| value. The operation is performed according to the IEC/IEEE Standard for
|
|
|
+| Binary Floating-Point Arithmetic.
|
|
|
+*----------------------------------------------------------------------------*}
|
|
|
+
|
|
|
+function floatx80_round_to_int(a: floatx80): floatx80;
|
|
|
+begin
|
|
|
+ flag aSign;
|
|
|
+ int32 aExp;
|
|
|
+ bits64 lastBitMask, roundBitsMask;
|
|
|
+ int8 roundingMode;
|
|
|
+ floatx80 z;
|
|
|
+
|
|
|
+ aExp := extractFloatx80Exp( a );
|
|
|
+ if ( $403E <= aExp ) begin
|
|
|
+ if ( ( aExp = $7FFF ) and (bits64) ( extractFloatx80Frac( a ) shl 1 ) ) begin
|
|
|
+ result := propagateFloatx80NaN( a, a );
|
|
|
+ end;
|
|
|
+ result := a;
|
|
|
+ end;
|
|
|
+ if ( aExp < $3FFF ) begin
|
|
|
+ if ( ( aExp = 0 )
|
|
|
+ and ( (bits64) ( extractFloatx80Frac( a ) shl 1 ) = 0 ) ) begin
|
|
|
+ result := a;
|
|
|
+ end;
|
|
|
+ float_exception_flags or= float_flag_inexact;
|
|
|
+ aSign := extractFloatx80Sign( a );
|
|
|
+ switch ( float_rounding_mode ) begin
|
|
|
+ case float_round_nearest_even:
|
|
|
+ if ( ( aExp = $3FFE ) and (bits64) ( extractFloatx80Frac( a ) shl 1 )
|
|
|
+ ) begin
|
|
|
+ result :=
|
|
|
+ packFloatx80( aSign, $3FFF, LIT64( $8000000000000000 ) );
|
|
|
+ end;
|
|
|
+ break;
|
|
|
+ case float_round_down:
|
|
|
+ result :=
|
|
|
+ aSign ?
|
|
|
+ packFloatx80( 1, $3FFF, LIT64( $8000000000000000 ) )
|
|
|
+ : packFloatx80( 0, 0, 0 );
|
|
|
+ case float_round_up:
|
|
|
+ result :=
|
|
|
+ aSign ? packFloatx80( 1, 0, 0 )
|
|
|
+ : packFloatx80( 0, $3FFF, LIT64( $8000000000000000 ) );
|
|
|
+ end;
|
|
|
+ result := packFloatx80( aSign, 0, 0 );
|
|
|
+ end;
|
|
|
+ lastBitMask := 1;
|
|
|
+ lastBitMask shl = $403E - aExp;
|
|
|
+ roundBitsMask := lastBitMask - 1;
|
|
|
+ z := a;
|
|
|
+ roundingMode := float_rounding_mode;
|
|
|
+ if ( roundingMode = float_round_nearest_even ) begin
|
|
|
+ z.low += lastBitMask>>1;
|
|
|
+ if ( ( z.low and roundBitsMask ) = 0 ) z.low &= ~ lastBitMask;
|
|
|
+ end;
|
|
|
+ else if ( roundingMode <> float_round_to_zero ) begin
|
|
|
+ if ( extractFloatx80Sign( z ) xor ( roundingMode = float_round_up ) ) begin
|
|
|
+ z.low += roundBitsMask;
|
|
|
+ end;
|
|
|
+ end;
|
|
|
+ z.low &= ~ roundBitsMask;
|
|
|
+ if ( z.low = 0 ) begin
|
|
|
+ ++z.high;
|
|
|
+ z.low := LIT64( $8000000000000000 );
|
|
|
+ end;
|
|
|
+ if ( z.low <> a.low ) float_exception_flags or= float_flag_inexact;
|
|
|
+ result := z;
|
|
|
+
|
|
|
+end;
|
|
|
+
|
|
|
+{*----------------------------------------------------------------------------
|
|
|
+| Returns the result of adding the absolute values of the extended double-
|
|
|
+| precision floating-point values `a' and `b'. If `zSign' is 1, the sum is
|
|
|
+| negated before being returned. `zSign' is ignored if the result is a NaN.
|
|
|
+| The addition is performed according to the IEC/IEEE Standard for Binary
|
|
|
+| Floating-Point Arithmetic.
|
|
|
+*----------------------------------------------------------------------------*}
|
|
|
+
|
|
|
+function addFloatx80Sigs(a: floatx80; b: floatx80, flag zSign ): floatx80;
|
|
|
+begin
|
|
|
+ int32 aExp, bExp, zExp;
|
|
|
+ bits64 aSig, bSig, zSig0, zSig1;
|
|
|
+ int32 expDiff;
|
|
|
+
|
|
|
+ aSig := extractFloatx80Frac( a );
|
|
|
+ aExp := extractFloatx80Exp( a );
|
|
|
+ bSig := extractFloatx80Frac( b );
|
|
|
+ bExp := extractFloatx80Exp( b );
|
|
|
+ expDiff := aExp - bExp;
|
|
|
+ if ( 0 < expDiff ) begin
|
|
|
+ if ( aExp = $7FFF ) begin
|
|
|
+ if ( (bits64) ( aSig shl 1 ) ) result := propagateFloatx80NaN( a, b );
|
|
|
+ result := a;
|
|
|
+ end;
|
|
|
+ if ( bExp = 0 ) --expDiff;
|
|
|
+ shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
|
|
|
+ zExp := aExp;
|
|
|
+ end;
|
|
|
+ else if ( expDiff < 0 ) begin
|
|
|
+ if ( bExp = $7FFF ) begin
|
|
|
+ if ( (bits64) ( bSig shl 1 ) ) result := propagateFloatx80NaN( a, b );
|
|
|
+ result := packFloatx80( zSign, $7FFF, LIT64( $8000000000000000 ) );
|
|
|
+ end;
|
|
|
+ if ( aExp = 0 ) ++expDiff;
|
|
|
+ shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
|
|
|
+ zExp := bExp;
|
|
|
+ end;
|
|
|
+ else begin
|
|
|
+ if ( aExp = $7FFF ) begin
|
|
|
+ if ( (bits64) ( ( aSig or bSig ) shl 1 ) ) begin
|
|
|
+ result := propagateFloatx80NaN( a, b );
|
|
|
+ end;
|
|
|
+ result := a;
|
|
|
+ end;
|
|
|
+ zSig1 := 0;
|
|
|
+ zSig0 := aSig + bSig;
|
|
|
+ if ( aExp = 0 ) begin
|
|
|
+ normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
|
|
|
+ goto roundAndPack;
|
|
|
+ end;
|
|
|
+ zExp := aExp;
|
|
|
+ goto shiftRight1;
|
|
|
+ end;
|
|
|
+ zSig0 := aSig + bSig;
|
|
|
+ if ( (sbits64) zSig0 < 0 ) goto roundAndPack;
|
|
|
+ shiftRight1:
|
|
|
+ shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
|
|
|
+ zSig0 or= LIT64( $8000000000000000 );
|
|
|
+ ++zExp;
|
|
|
+ roundAndPack:
|
|
|
+ result :=
|
|
|
+ roundAndPackFloatx80(
|
|
|
+ floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
|
|
|
+
|
|
|
+end;
|
|
|
+
|
|
|
+{*----------------------------------------------------------------------------
|
|
|
+| Returns the result of subtracting the absolute values of the extended
|
|
|
+| double-precision floating-point values `a' and `b'. If `zSign' is 1, the
|
|
|
+| difference is negated before being returned. `zSign' is ignored if the
|
|
|
+| result is a NaN. The subtraction is performed according to the IEC/IEEE
|
|
|
+| Standard for Binary Floating-Point Arithmetic.
|
|
|
+*----------------------------------------------------------------------------*}
|
|
|
+
|
|
|
+function subFloatx80Sigs(a: floatx80; b: floatx80, flag zSign ): floatx80;
|
|
|
+begin
|
|
|
+ int32 aExp, bExp, zExp;
|
|
|
+ bits64 aSig, bSig, zSig0, zSig1;
|
|
|
+ int32 expDiff;
|
|
|
+ floatx80 z;
|
|
|
+
|
|
|
+ aSig := extractFloatx80Frac( a );
|
|
|
+ aExp := extractFloatx80Exp( a );
|
|
|
+ bSig := extractFloatx80Frac( b );
|
|
|
+ bExp := extractFloatx80Exp( b );
|
|
|
+ expDiff := aExp - bExp;
|
|
|
+ if ( 0 < expDiff ) goto aExpBigger;
|
|
|
+ if ( expDiff < 0 ) goto bExpBigger;
|
|
|
+ if ( aExp = $7FFF ) begin
|
|
|
+ if ( (bits64) ( ( aSig or bSig ) shl 1 ) ) begin
|
|
|
+ result := propagateFloatx80NaN( a, b );
|
|
|
+ end;
|
|
|
+ float_raise( float_flag_invalid );
|
|
|
+ z.low := floatx80_default_nan_low;
|
|
|
+ z.high := floatx80_default_nan_high;
|
|
|
+ result := z;
|
|
|
+ end;
|
|
|
+ if ( aExp = 0 ) begin
|
|
|
+ aExp := 1;
|
|
|
+ bExp := 1;
|
|
|
+ end;
|
|
|
+ zSig1 := 0;
|
|
|
+ if ( bSig < aSig ) goto aBigger;
|
|
|
+ if ( aSig < bSig ) goto bBigger;
|
|
|
+ result := packFloatx80( float_rounding_mode = float_round_down, 0, 0 );
|
|
|
+ bExpBigger:
|
|
|
+ if ( bExp = $7FFF ) begin
|
|
|
+ if ( (bits64) ( bSig shl 1 ) ) result := propagateFloatx80NaN( a, b );
|
|
|
+ result := packFloatx80( zSign xor 1, $7FFF, LIT64( $8000000000000000 ) );
|
|
|
+ end;
|
|
|
+ if ( aExp = 0 ) ++expDiff;
|
|
|
+ shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
|
|
|
+ bBigger:
|
|
|
+ sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
|
|
|
+ zExp := bExp;
|
|
|
+ zSign xor = 1;
|
|
|
+ goto normalizeRoundAndPack;
|
|
|
+ aExpBigger:
|
|
|
+ if ( aExp = $7FFF ) begin
|
|
|
+ if ( (bits64) ( aSig shl 1 ) ) result := propagateFloatx80NaN( a, b );
|
|
|
+ result := a;
|
|
|
+ end;
|
|
|
+ if ( bExp = 0 ) --expDiff;
|
|
|
+ shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
|
|
|
+ aBigger:
|
|
|
+ sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
|
|
|
+ zExp := aExp;
|
|
|
+ normalizeRoundAndPack:
|
|
|
+ result :=
|
|
|
+ normalizeRoundAndPackFloatx80(
|
|
|
+ floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
|
|
|
+
|
|
|
+end;
|
|
|
+
|
|
|
+{*----------------------------------------------------------------------------
|
|
|
+| Returns the result of adding the extended double-precision floating-point
|
|
|
+| values `a' and `b'. The operation is performed according to the IEC/IEEE
|
|
|
+| Standard for Binary Floating-Point Arithmetic.
|
|
|
+*----------------------------------------------------------------------------*}
|
|
|
+
|
|
|
+function floatx80_add(a: floatx80; b: floatx80): floatx80;
|
|
|
+begin
|
|
|
+ flag aSign, bSign;
|
|
|
+
|
|
|
+ aSign := extractFloatx80Sign( a );
|
|
|
+ bSign := extractFloatx80Sign( b );
|
|
|
+ if ( aSign = bSign ) begin
|
|
|
+ result := addFloatx80Sigs( a, b, aSign );
|
|
|
+ end;
|
|
|
+ else begin
|
|
|
+ result := subFloatx80Sigs( a, b, aSign );
|
|
|
+ end;
|
|
|
+
|
|
|
+end;
|
|
|
+
|
|
|
+{*----------------------------------------------------------------------------
|
|
|
+| Returns the result of subtracting the extended double-precision floating-
|
|
|
+| point values `a' and `b'. The operation is performed according to the
|
|
|
+| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
|
|
|
+*----------------------------------------------------------------------------*}
|
|
|
+
|
|
|
+function floatx80_sub(a: floatx80; b: floatx80 ): floatx80;
|
|
|
+begin
|
|
|
+ flag aSign, bSign;
|
|
|
+
|
|
|
+ aSign := extractFloatx80Sign( a );
|
|
|
+ bSign := extractFloatx80Sign( b );
|
|
|
+ if ( aSign = bSign ) begin
|
|
|
+ result := subFloatx80Sigs( a, b, aSign );
|
|
|
+ end;
|
|
|
+ else begin
|
|
|
+ result := addFloatx80Sigs( a, b, aSign );
|
|
|
+ end;
|
|
|
+
|
|
|
+end;
|
|
|
+
|
|
|
+{*----------------------------------------------------------------------------
|
|
|
+| Returns the result of multiplying the extended double-precision floating-
|
|
|
+| point values `a' and `b'. The operation is performed according to the
|
|
|
+| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
|
|
|
+*----------------------------------------------------------------------------*}
|
|
|
+
|
|
|
+function floatx80_mul(a: floatx80; b: floatx80): floatx80;
|
|
|
+begin
|
|
|
+ flag aSign, bSign, zSign;
|
|
|
+ int32 aExp, bExp, zExp;
|
|
|
+ bits64 aSig, bSig, zSig0, zSig1;
|
|
|
+ floatx80 z;
|
|
|
+
|
|
|
+ aSig := extractFloatx80Frac( a );
|
|
|
+ aExp := extractFloatx80Exp( a );
|
|
|
+ aSign := extractFloatx80Sign( a );
|
|
|
+ bSig := extractFloatx80Frac( b );
|
|
|
+ bExp := extractFloatx80Exp( b );
|
|
|
+ bSign := extractFloatx80Sign( b );
|
|
|
+ zSign := aSign xor bSign;
|
|
|
+ if ( aExp = $7FFF ) begin
|
|
|
+ if ( (bits64) ( aSig shl 1 )
|
|
|
+ or ( ( bExp = $7FFF ) and (bits64) ( bSig shl 1 ) ) ) begin
|
|
|
+ result := propagateFloatx80NaN( a, b );
|
|
|
+ end;
|
|
|
+ if ( ( bExp or bSig ) = 0 ) goto invalid;
|
|
|
+ result := packFloatx80( zSign, $7FFF, LIT64( $8000000000000000 ) );
|
|
|
+ end;
|
|
|
+ if ( bExp = $7FFF ) begin
|
|
|
+ if ( (bits64) ( bSig shl 1 ) ) result := propagateFloatx80NaN( a, b );
|
|
|
+ if ( ( aExp or aSig ) = 0 ) begin
|
|
|
+ invalid:
|
|
|
+ float_raise( float_flag_invalid );
|
|
|
+ z.low := floatx80_default_nan_low;
|
|
|
+ z.high := floatx80_default_nan_high;
|
|
|
+ result := z;
|
|
|
+ end;
|
|
|
+ result := packFloatx80( zSign, $7FFF, LIT64( $8000000000000000 ) );
|
|
|
+ end;
|
|
|
+ if ( aExp = 0 ) begin
|
|
|
+ if ( aSig = 0 ) result := packFloatx80( zSign, 0, 0 );
|
|
|
+ normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
|
|
|
+ end;
|
|
|
+ if ( bExp = 0 ) begin
|
|
|
+ if ( bSig = 0 ) result := packFloatx80( zSign, 0, 0 );
|
|
|
+ normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
|
|
|
+ end;
|
|
|
+ zExp := aExp + bExp - $3FFE;
|
|
|
+ mul64To128( aSig, bSig, &zSig0, &zSig1 );
|
|
|
+ if ( 0 < (sbits64) zSig0 ) begin
|
|
|
+ shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
|
|
|
+ --zExp;
|
|
|
+ end;
|
|
|
+ result :=
|
|
|
+ roundAndPackFloatx80(
|
|
|
+ floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
|
|
|
+
|
|
|
+end;
|
|
|
+
|
|
|
+{*----------------------------------------------------------------------------
|
|
|
+| Returns the result of dividing the extended double-precision floating-point
|
|
|
+| value `a' by the corresponding value `b'. The operation is performed
|
|
|
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
|
|
|
+*----------------------------------------------------------------------------*}
|
|
|
+
|
|
|
+function floatx80_div(a: floatx80; b: floatx80 ): floatx80;
|
|
|
+begin
|
|
|
+ flag aSign, bSign, zSign;
|
|
|
+ int32 aExp, bExp, zExp;
|
|
|
+ bits64 aSig, bSig, zSig0, zSig1;
|
|
|
+ bits64 rem0, rem1, rem2, term0, term1, term2;
|
|
|
+ floatx80 z;
|
|
|
+
|
|
|
+ aSig := extractFloatx80Frac( a );
|
|
|
+ aExp := extractFloatx80Exp( a );
|
|
|
+ aSign := extractFloatx80Sign( a );
|
|
|
+ bSig := extractFloatx80Frac( b );
|
|
|
+ bExp := extractFloatx80Exp( b );
|
|
|
+ bSign := extractFloatx80Sign( b );
|
|
|
+ zSign := aSign xor bSign;
|
|
|
+ if ( aExp = $7FFF ) begin
|
|
|
+ if ( (bits64) ( aSig shl 1 ) ) result := propagateFloatx80NaN( a, b );
|
|
|
+ if ( bExp = $7FFF ) begin
|
|
|
+ if ( (bits64) ( bSig shl 1 ) ) result := propagateFloatx80NaN( a, b );
|
|
|
+ goto invalid;
|
|
|
+ end;
|
|
|
+ result := packFloatx80( zSign, $7FFF, LIT64( $8000000000000000 ) );
|
|
|
+ end;
|
|
|
+ if ( bExp = $7FFF ) begin
|
|
|
+ if ( (bits64) ( bSig shl 1 ) ) result := propagateFloatx80NaN( a, b );
|
|
|
+ result := packFloatx80( zSign, 0, 0 );
|
|
|
+ end;
|
|
|
+ if ( bExp = 0 ) begin
|
|
|
+ if ( bSig = 0 ) begin
|
|
|
+ if ( ( aExp or aSig ) = 0 ) begin
|
|
|
+ invalid:
|
|
|
+ float_raise( float_flag_invalid );
|
|
|
+ z.low := floatx80_default_nan_low;
|
|
|
+ z.high := floatx80_default_nan_high;
|
|
|
+ result := z;
|
|
|
+ end;
|
|
|
+ float_raise( float_flag_divbyzero );
|
|
|
+ result := packFloatx80( zSign, $7FFF, LIT64( $8000000000000000 ) );
|
|
|
+ end;
|
|
|
+ normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
|
|
|
+ end;
|
|
|
+ if ( aExp = 0 ) begin
|
|
|
+ if ( aSig = 0 ) result := packFloatx80( zSign, 0, 0 );
|
|
|
+ normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
|
|
|
+ end;
|
|
|
+ zExp := aExp - bExp + $3FFE;
|
|
|
+ rem1 := 0;
|
|
|
+ if ( bSig <= aSig ) begin
|
|
|
+ shift128Right( aSig, 0, 1, &aSig, &rem1 );
|
|
|
+ ++zExp;
|
|
|
+ end;
|
|
|
+ zSig0 := estimateDiv128To64( aSig, rem1, bSig );
|
|
|
+ mul64To128( bSig, zSig0, &term0, &term1 );
|
|
|
+ sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
|
|
|
+ while ( (sbits64) rem0 < 0 ) begin
|
|
|
+ --zSig0;
|
|
|
+ add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
|
|
|
+ end;
|
|
|
+ zSig1 := estimateDiv128To64( rem1, 0, bSig );
|
|
|
+ if ( (bits64) ( zSig1 shl 1 ) <= 8 ) begin
|
|
|
+ mul64To128( bSig, zSig1, &term1, &term2 );
|
|
|
+ sub128( rem1, 0, term1, term2, &rem1, &rem2 );
|
|
|
+ while ( (sbits64) rem1 < 0 ) begin
|
|
|
+ --zSig1;
|
|
|
+ add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
|
|
|
+ end;
|
|
|
+ zSig1 or= ( ( rem1 or rem2 ) <> 0 );
|
|
|
+ end;
|
|
|
+ result :=
|
|
|
+ roundAndPackFloatx80(
|
|
|
+ floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
|
|
|
+
|
|
|
+end;
|
|
|
+
|
|
|
+{*----------------------------------------------------------------------------
|
|
|
+| Returns the remainder of the extended double-precision floating-point value
|
|
|
+| `a' with respect to the corresponding value `b'. The operation is performed
|
|
|
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
|
|
|
+*----------------------------------------------------------------------------*}
|
|
|
+
|
|
|
+function floatx80_rem(a: floatx80; b: floatx80 ): floatx80;
|
|
|
+begin
|
|
|
+ flag aSign, bSign, zSign;
|
|
|
+ int32 aExp, bExp, expDiff;
|
|
|
+ bits64 aSig0, aSig1, bSig;
|
|
|
+ bits64 q, term0, term1, alternateASig0, alternateASig1;
|
|
|
+ floatx80 z;
|
|
|
+
|
|
|
+ aSig0 := extractFloatx80Frac( a );
|
|
|
+ aExp := extractFloatx80Exp( a );
|
|
|
+ aSign := extractFloatx80Sign( a );
|
|
|
+ bSig := extractFloatx80Frac( b );
|
|
|
+ bExp := extractFloatx80Exp( b );
|
|
|
+ bSign := extractFloatx80Sign( b );
|
|
|
+ if ( aExp = $7FFF ) begin
|
|
|
+ if ( (bits64) ( aSig0 shl 1 )
|
|
|
+ or ( ( bExp = $7FFF ) and (bits64) ( bSig shl 1 ) ) ) begin
|
|
|
+ result := propagateFloatx80NaN( a, b );
|
|
|
+ end;
|
|
|
+ goto invalid;
|
|
|
+ end;
|
|
|
+ if ( bExp = $7FFF ) begin
|
|
|
+ if ( (bits64) ( bSig shl 1 ) ) result := propagateFloatx80NaN( a, b );
|
|
|
+ result := a;
|
|
|
+ end;
|
|
|
+ if ( bExp = 0 ) begin
|
|
|
+ if ( bSig = 0 ) begin
|
|
|
+ invalid:
|
|
|
+ float_raise( float_flag_invalid );
|
|
|
+ z.low := floatx80_default_nan_low;
|
|
|
+ z.high := floatx80_default_nan_high;
|
|
|
+ result := z;
|
|
|
+ end;
|
|
|
+ normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
|
|
|
+ end;
|
|
|
+ if ( aExp = 0 ) begin
|
|
|
+ if ( (bits64) ( aSig0 shl 1 ) = 0 ) result := a;
|
|
|
+ normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
|
|
|
+ end;
|
|
|
+ bSig or= LIT64( $8000000000000000 );
|
|
|
+ zSign := aSign;
|
|
|
+ expDiff := aExp - bExp;
|
|
|
+ aSig1 := 0;
|
|
|
+ if ( expDiff < 0 ) begin
|
|
|
+ if ( expDiff < -1 ) result := a;
|
|
|
+ shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
|
|
|
+ expDiff := 0;
|
|
|
+ end;
|
|
|
+ q := ( bSig <= aSig0 );
|
|
|
+ if ( q ) aSig0 -= bSig;
|
|
|
+ expDiff -= 64;
|
|
|
+ while ( 0 < expDiff ) begin
|
|
|
+ q := estimateDiv128To64( aSig0, aSig1, bSig );
|
|
|
+ q := ( 2 < q ) ? q - 2 : 0;
|
|
|
+ mul64To128( bSig, q, &term0, &term1 );
|
|
|
+ sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
|
|
|
+ shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
|
|
|
+ expDiff -= 62;
|
|
|
+ end;
|
|
|
+ expDiff += 64;
|
|
|
+ if ( 0 < expDiff ) begin
|
|
|
+ q := estimateDiv128To64( aSig0, aSig1, bSig );
|
|
|
+ q := ( 2 < q ) ? q - 2 : 0;
|
|
|
+ q >>= 64 - expDiff;
|
|
|
+ mul64To128( bSig, q shl ( 64 - expDiff ), &term0, &term1 );
|
|
|
+ sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
|
|
|
+ shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
|
|
|
+ while ( le128( term0, term1, aSig0, aSig1 ) ) begin
|
|
|
+ ++q;
|
|
|
+ sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
|
|
|
+ end;
|
|
|
+ end;
|
|
|
+ else begin
|
|
|
+ term1 := 0;
|
|
|
+ term0 := bSig;
|
|
|
+ end;
|
|
|
+ sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
|
|
|
+ if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
|
|
|
+ or ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
|
|
|
+ and ( q and 1 ) )
|
|
|
+ ) begin
|
|
|
+ aSig0 := alternateASig0;
|
|
|
+ aSig1 := alternateASig1;
|
|
|
+ zSign := ! zSign;
|
|
|
+ end;
|
|
|
+ result :=
|
|
|
+ normalizeRoundAndPackFloatx80(
|
|
|
+ 80, zSign, bExp + expDiff, aSig0, aSig1 );
|
|
|
+
|
|
|
+end;
|
|
|
+
|
|
|
+{*----------------------------------------------------------------------------
|
|
|
+| Returns the square root of the extended double-precision floating-point
|
|
|
+| value `a'. The operation is performed according to the IEC/IEEE Standard
|
|
|
+| for Binary Floating-Point Arithmetic.
|
|
|
+*----------------------------------------------------------------------------*}
|
|
|
+
|
|
|
+function floatx80_sqrt(a: floatx80): floatx80;
|
|
|
+begin
|
|
|
+ flag aSign;
|
|
|
+ int32 aExp, zExp;
|
|
|
+ bits64 aSig0, aSig1, zSig0, zSig1, doubleZSig0;
|
|
|
+ bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
|
|
|
+ floatx80 z;
|
|
|
+
|
|
|
+ aSig0 := extractFloatx80Frac( a );
|
|
|
+ aExp := extractFloatx80Exp( a );
|
|
|
+ aSign := extractFloatx80Sign( a );
|
|
|
+ if ( aExp = $7FFF ) begin
|
|
|
+ if ( (bits64) ( aSig0 shl 1 ) ) result := propagateFloatx80NaN( a, a );
|
|
|
+ if ( ! aSign ) result := a;
|
|
|
+ goto invalid;
|
|
|
+ end;
|
|
|
+ if ( aSign ) begin
|
|
|
+ if ( ( aExp or aSig0 ) = 0 ) result := a;
|
|
|
+ invalid:
|
|
|
+ float_raise( float_flag_invalid );
|
|
|
+ z.low := floatx80_default_nan_low;
|
|
|
+ z.high := floatx80_default_nan_high;
|
|
|
+ result := z;
|
|
|
+ end;
|
|
|
+ if ( aExp = 0 ) begin
|
|
|
+ if ( aSig0 = 0 ) result := packFloatx80( 0, 0, 0 );
|
|
|
+ normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
|
|
|
+ end;
|
|
|
+ zExp := ( ( aExp - $3FFF )>>1 ) + $3FFF;
|
|
|
+ zSig0 := estimateSqrt32( aExp, aSig0>>32 );
|
|
|
+ shift128Right( aSig0, 0, 2 + ( aExp and 1 ), &aSig0, &aSig1 );
|
|
|
+ zSig0 := estimateDiv128To64( aSig0, aSig1, zSig0 shl 32 ) + ( zSig0 shl 30 );
|
|
|
+ doubleZSig0 := zSig0 shl 1;
|
|
|
+ mul64To128( zSig0, zSig0, &term0, &term1 );
|
|
|
+ sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
|
|
|
+ while ( (sbits64) rem0 < 0 ) begin
|
|
|
+ --zSig0;
|
|
|
+ doubleZSig0 -= 2;
|
|
|
+ add128( rem0, rem1, zSig0>>63, doubleZSig0 or 1, &rem0, &rem1 );
|
|
|
+ end;
|
|
|
+ zSig1 := estimateDiv128To64( rem1, 0, doubleZSig0 );
|
|
|
+ if ( ( zSig1 and LIT64( $3FFFFFFFFFFFFFFF ) ) <= 5 ) begin
|
|
|
+ if ( zSig1 = 0 ) zSig1 := 1;
|
|
|
+ mul64To128( doubleZSig0, zSig1, &term1, &term2 );
|
|
|
+ sub128( rem1, 0, term1, term2, &rem1, &rem2 );
|
|
|
+ mul64To128( zSig1, zSig1, &term2, &term3 );
|
|
|
+ sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
|
|
|
+ while ( (sbits64) rem1 < 0 ) begin
|
|
|
+ --zSig1;
|
|
|
+ shortShift128Left( 0, zSig1, 1, &term2, &term3 );
|
|
|
+ term3 or= 1;
|
|
|
+ term2 or= doubleZSig0;
|
|
|
+ add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
|
|
|
+ end;
|
|
|
+ zSig1 or= ( ( rem1 or rem2 or rem3 ) <> 0 );
|
|
|
+ end;
|
|
|
+ shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
|
|
|
+ zSig0 or= doubleZSig0;
|
|
|
+ result :=
|
|
|
+ roundAndPackFloatx80(
|
|
|
+ floatx80_rounding_precision, 0, zExp, zSig0, zSig1 );
|
|
|
+
|
|
|
+end;
|
|
|
+
|
|
|
+{*----------------------------------------------------------------------------
|
|
|
+| Returns 1 if the extended double-precision floating-point value `a' is
|
|
|
+| equal to the corresponding value `b', and 0 otherwise. The comparison is
|
|
|
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
|
|
|
+| Arithmetic.
|
|
|
+*----------------------------------------------------------------------------*}
|
|
|
+
|
|
|
+function floatx80_eq(a: floatx80; b: floatx80 ): flag;
|
|
|
+begin
|
|
|
+
|
|
|
+ if ( ( ( extractFloatx80Exp( a ) = $7FFF )
|
|
|
+ and (bits64) ( extractFloatx80Frac( a ) shl 1 ) )
|
|
|
+ or ( ( extractFloatx80Exp( b ) = $7FFF )
|
|
|
+ and (bits64) ( extractFloatx80Frac( b ) shl 1 ) )
|
|
|
+ ) begin
|
|
|
+ if ( floatx80_is_signaling_nan( a )
|
|
|
+ or floatx80_is_signaling_nan( b ) ) begin
|
|
|
+ float_raise( float_flag_invalid );
|
|
|
+ end;
|
|
|
+ result := 0;
|
|
|
+ end;
|
|
|
+ result :=
|
|
|
+ ( a.low = b.low )
|
|
|
+ and ( ( a.high = b.high )
|
|
|
+ or ( ( a.low = 0 )
|
|
|
+ and ( (bits16) ( ( a.high or b.high ) shl 1 ) = 0 ) )
|
|
|
+ );
|
|
|
+
|
|
|
+end;
|
|
|
+
|
|
|
+{*----------------------------------------------------------------------------
|
|
|
+| Returns 1 if the extended double-precision floating-point value `a' is
|
|
|
+| less than or equal to the corresponding value `b', and 0 otherwise. The
|
|
|
+| comparison is performed according to the IEC/IEEE Standard for Binary
|
|
|
+| Floating-Point Arithmetic.
|
|
|
+*----------------------------------------------------------------------------*}
|
|
|
+
|
|
|
+function floatx80_le(a: floatx80; b: floatx80 ): flag;
|
|
|
+begin
|
|
|
+ flag aSign, bSign;
|
|
|
+
|
|
|
+ if ( ( ( extractFloatx80Exp( a ) = $7FFF )
|
|
|
+ and (bits64) ( extractFloatx80Frac( a ) shl 1 ) )
|
|
|
+ or ( ( extractFloatx80Exp( b ) = $7FFF )
|
|
|
+ and (bits64) ( extractFloatx80Frac( b ) shl 1 ) )
|
|
|
+ ) begin
|
|
|
+ float_raise( float_flag_invalid );
|
|
|
+ result := 0;
|
|
|
+ end;
|
|
|
+ aSign := extractFloatx80Sign( a );
|
|
|
+ bSign := extractFloatx80Sign( b );
|
|
|
+ if ( aSign <> bSign ) begin
|
|
|
+ result :=
|
|
|
+ aSign
|
|
|
+ or ( ( ( (bits16) ( ( a.high or b.high ) shl 1 ) ) or a.low or b.low )
|
|
|
+ = 0 );
|
|
|
+ end;
|
|
|
+ result :=
|
|
|
+ aSign ? le128( b.high, b.low, a.high, a.low )
|
|
|
+ : le128( a.high, a.low, b.high, b.low );
|
|
|
+
|
|
|
+end;
|
|
|
+
|
|
|
+{*----------------------------------------------------------------------------
|
|
|
+| Returns 1 if the extended double-precision floating-point value `a' is
|
|
|
+| less than the corresponding value `b', and 0 otherwise. The comparison
|
|
|
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
|
|
|
+| Arithmetic.
|
|
|
+*----------------------------------------------------------------------------*}
|
|
|
+
|
|
|
+function floatx80_lt(a: floatx80; b: floatx80 ): flag;
|
|
|
+begin
|
|
|
+ flag aSign, bSign;
|
|
|
+
|
|
|
+ if ( ( ( extractFloatx80Exp( a ) = $7FFF )
|
|
|
+ and (bits64) ( extractFloatx80Frac( a ) shl 1 ) )
|
|
|
+ or ( ( extractFloatx80Exp( b ) = $7FFF )
|
|
|
+ and (bits64) ( extractFloatx80Frac( b ) shl 1 ) )
|
|
|
+ ) begin
|
|
|
+ float_raise( float_flag_invalid );
|
|
|
+ result := 0;
|
|
|
+ end;
|
|
|
+ aSign := extractFloatx80Sign( a );
|
|
|
+ bSign := extractFloatx80Sign( b );
|
|
|
+ if ( aSign <> bSign ) begin
|
|
|
+ result :=
|
|
|
+ aSign
|
|
|
+ and ( ( ( (bits16) ( ( a.high or b.high ) shl 1 ) ) or a.low or b.low )
|
|
|
+ <> 0 );
|
|
|
+ end;
|
|
|
+ result :=
|
|
|
+ aSign ? lt128( b.high, b.low, a.high, a.low )
|
|
|
+ : lt128( a.high, a.low, b.high, b.low );
|
|
|
+
|
|
|
+end;
|
|
|
+
|
|
|
+{*----------------------------------------------------------------------------
|
|
|
+| Returns 1 if the extended double-precision floating-point value `a' is equal
|
|
|
+| to the corresponding value `b', and 0 otherwise. The invalid exception is
|
|
|
+| raised if either operand is a NaN. Otherwise, the comparison is performed
|
|
|
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
|
|
|
+*----------------------------------------------------------------------------*}
|
|
|
+
|
|
|
+function floatx80_eq_signaling(a: floatx80; b: floatx80 ): flag;
|
|
|
+begin
|
|
|
+
|
|
|
+ if ( ( ( extractFloatx80Exp( a ) = $7FFF )
|
|
|
+ and (bits64) ( extractFloatx80Frac( a ) shl 1 ) )
|
|
|
+ or ( ( extractFloatx80Exp( b ) = $7FFF )
|
|
|
+ and (bits64) ( extractFloatx80Frac( b ) shl 1 ) )
|
|
|
+ ) begin
|
|
|
+ float_raise( float_flag_invalid );
|
|
|
+ result := 0;
|
|
|
+ end;
|
|
|
+ result :=
|
|
|
+ ( a.low = b.low )
|
|
|
+ and ( ( a.high = b.high )
|
|
|
+ or ( ( a.low = 0 )
|
|
|
+ and ( (bits16) ( ( a.high or b.high ) shl 1 ) = 0 ) )
|
|
|
+ );
|
|
|
+
|
|
|
+end;
|
|
|
+
|
|
|
+{*----------------------------------------------------------------------------
|
|
|
+| Returns 1 if the extended double-precision floating-point value `a' is less
|
|
|
+| than or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs
|
|
|
+| do not cause an exception. Otherwise, the comparison is performed according
|
|
|
+| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
|
|
|
+*----------------------------------------------------------------------------*}
|
|
|
+
|
|
|
+function floatx80_le_quiet(a: floatx80; b: floatx80 ): flag;
|
|
|
+begin
|
|
|
+ flag aSign, bSign;
|
|
|
+
|
|
|
+ if ( ( ( extractFloatx80Exp( a ) = $7FFF )
|
|
|
+ and (bits64) ( extractFloatx80Frac( a ) shl 1 ) )
|
|
|
+ or ( ( extractFloatx80Exp( b ) = $7FFF )
|
|
|
+ and (bits64) ( extractFloatx80Frac( b ) shl 1 ) )
|
|
|
+ ) begin
|
|
|
+ if ( floatx80_is_signaling_nan( a )
|
|
|
+ or floatx80_is_signaling_nan( b ) ) begin
|
|
|
+ float_raise( float_flag_invalid );
|
|
|
+ end;
|
|
|
+ result := 0;
|
|
|
+ end;
|
|
|
+ aSign := extractFloatx80Sign( a );
|
|
|
+ bSign := extractFloatx80Sign( b );
|
|
|
+ if ( aSign <> bSign ) begin
|
|
|
+ result :=
|
|
|
+ aSign
|
|
|
+ or ( ( ( (bits16) ( ( a.high or b.high ) shl 1 ) ) or a.low or b.low )
|
|
|
+ = 0 );
|
|
|
+ end;
|
|
|
+ result :=
|
|
|
+ aSign ? le128( b.high, b.low, a.high, a.low )
|
|
|
+ : le128( a.high, a.low, b.high, b.low );
|
|
|
+
|
|
|
+end;
|
|
|
+
|
|
|
+{*----------------------------------------------------------------------------
|
|
|
+| Returns 1 if the extended double-precision floating-point value `a' is less
|
|
|
+| than the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause
|
|
|
+| an exception. Otherwise, the comparison is performed according to the
|
|
|
+| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
|
|
|
+*----------------------------------------------------------------------------*}
|
|
|
+
|
|
|
+function floatx80_lt_quiet(a: floatx80; b: floatx80 ): flag;
|
|
|
+begin
|
|
|
+ flag aSign, bSign;
|
|
|
+
|
|
|
+ if ( ( ( extractFloatx80Exp( a ) = $7FFF )
|
|
|
+ and (bits64) ( extractFloatx80Frac( a ) shl 1 ) )
|
|
|
+ or ( ( extractFloatx80Exp( b ) = $7FFF )
|
|
|
+ and (bits64) ( extractFloatx80Frac( b ) shl 1 ) )
|
|
|
+ ) begin
|
|
|
+ if ( floatx80_is_signaling_nan( a )
|
|
|
+ or floatx80_is_signaling_nan( b ) ) begin
|
|
|
+ float_raise( float_flag_invalid );
|
|
|
+ end;
|
|
|
+ result := 0;
|
|
|
+ end;
|
|
|
+ aSign := extractFloatx80Sign( a );
|
|
|
+ bSign := extractFloatx80Sign( b );
|
|
|
+ if ( aSign <> bSign ) begin
|
|
|
+ result :=
|
|
|
+ aSign
|
|
|
+ and ( ( ( (bits16) ( ( a.high or b.high ) shl 1 ) ) or a.low or b.low )
|
|
|
+ <> 0 );
|
|
|
+ end;
|
|
|
+ result :=
|
|
|
+ aSign ? lt128( b.high, b.low, a.high, a.low )
|
|
|
+ : lt128( a.high, a.low, b.high, b.low );
|
|
|
+
|
|
|
+end;
|
|
|
+
|
|
|
+{$endif FPC_SOFTFLOAT_FLOATX80}
|
|
|
+
|
|
|
+
|
|
|
+{$ifdef FPC_SOFTFLOAT_FLOAT128}
|
|
|
+
|
|
|
+{*----------------------------------------------------------------------------
|
|
|
+| Returns the result of converting the quadruple-precision floating-point
|
|
|
+| value `a' to the 32-bit two's complement integer format. The conversion
|
|
|
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
|
|
|
+| Arithmetic---which means in particular that the conversion is rounded
|
|
|
+| according to the current rounding mode. If `a' is a NaN, the largest
|
|
|
+| positive integer is returned. Otherwise, if the conversion overflows, the
|
|
|
+| largest integer with the same sign as `a' is returned.
|
|
|
+*----------------------------------------------------------------------------*}
|
|
|
+
|
|
|
+function float128_to_int32(a: float128): int32;
|
|
|
+begin
|
|
|
+ flag aSign;
|
|
|
+ int32 aExp, shiftCount;
|
|
|
+ bits64 aSig0, aSig1;
|
|
|
+
|
|
|
+ aSig1 := extractFloat128Frac1( a );
|
|
|
+ aSig0 := extractFloat128Frac0( a );
|
|
|
+ aExp := extractFloat128Exp( a );
|
|
|
+ aSign := extractFloat128Sign( a );
|
|
|
+ if ( ( aExp = $7FFF ) and ( aSig0 or aSig1 ) ) aSign := 0;
|
|
|
+ if ( aExp ) aSig0 or= LIT64( $0001000000000000 );
|
|
|
+ aSig0 or= ( aSig1 <> 0 );
|
|
|
+ shiftCount := $4028 - aExp;
|
|
|
+ if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 );
|
|
|
+ result := roundAndPackInt32( aSign, aSig0 );
|
|
|
+
|
|
|
+end;
|
|
|
+
|
|
|
+{*----------------------------------------------------------------------------
|
|
|
+| Returns the result of converting the quadruple-precision floating-point
|
|
|
+| value `a' to the 32-bit two's complement integer format. The conversion
|
|
|
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
|
|
|
+| Arithmetic, except that the conversion is always rounded toward zero. If
|
|
|
+| `a' is a NaN, the largest positive integer is returned. Otherwise, if the
|
|
|
+| conversion overflows, the largest integer with the same sign as `a' is
|
|
|
+| returned.
|
|
|
+*----------------------------------------------------------------------------*}
|
|
|
+
|
|
|
+function float128_to_int32_round_to_zero(a: float128): int32;
|
|
|
+begin
|
|
|
+ flag aSign;
|
|
|
+ int32 aExp, shiftCount;
|
|
|
+ bits64 aSig0, aSig1, savedASig;
|
|
|
+ int32 z;
|
|
|
+
|
|
|
+ aSig1 := extractFloat128Frac1( a );
|
|
|
+ aSig0 := extractFloat128Frac0( a );
|
|
|
+ aExp := extractFloat128Exp( a );
|
|
|
+ aSign := extractFloat128Sign( a );
|
|
|
+ aSig0 or= ( aSig1 <> 0 );
|
|
|
+ if ( $401E < aExp ) begin
|
|
|
+ if ( ( aExp = $7FFF ) and aSig0 ) aSign := 0;
|
|
|
+ goto invalid;
|
|
|
+ end;
|
|
|
+ else if ( aExp < $3FFF ) begin
|
|
|
+ if ( aExp or aSig0 ) float_exception_flags or= float_flag_inexact;
|
|
|
+ result := 0;
|
|
|
+ end;
|
|
|
+ aSig0 or= LIT64( $0001000000000000 );
|
|
|
+ shiftCount := $402F - aExp;
|
|
|
+ savedASig := aSig0;
|
|
|
+ aSig0 >>= shiftCount;
|
|
|
+ z := aSig0;
|
|
|
+ if ( aSign ) z := - z;
|
|
|
+ if ( ( z < 0 ) xor aSign ) begin
|
|
|
+ invalid:
|
|
|
+ float_raise( float_flag_invalid );
|
|
|
+ result := aSign ? (sbits32) $80000000 : $7FFFFFFF;
|
|
|
+ end;
|
|
|
+ if ( ( aSig0 shl shiftCount ) <> savedASig ) begin
|
|
|
+ float_exception_flags or= float_flag_inexact;
|
|
|
+ end;
|
|
|
+ result := z;
|
|
|
+
|
|
|
+end;
|
|
|
+
|
|
|
+{*----------------------------------------------------------------------------
|
|
|
+| Returns the result of converting the quadruple-precision floating-point
|
|
|
+| value `a' to the 64-bit two's complement integer format. The conversion
|
|
|
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
|
|
|
+| Arithmetic---which means in particular that the conversion is rounded
|
|
|
+| according to the current rounding mode. If `a' is a NaN, the largest
|
|
|
+| positive integer is returned. Otherwise, if the conversion overflows, the
|
|
|
+| largest integer with the same sign as `a' is returned.
|
|
|
+*----------------------------------------------------------------------------*}
|
|
|
+
|
|
|
+function float128_to_int64(a: float128): int64;
|
|
|
+begin
|
|
|
+ flag aSign;
|
|
|
+ int32 aExp, shiftCount;
|
|
|
+ bits64 aSig0, aSig1;
|
|
|
+
|
|
|
+ aSig1 := extractFloat128Frac1( a );
|
|
|
+ aSig0 := extractFloat128Frac0( a );
|
|
|
+ aExp := extractFloat128Exp( a );
|
|
|
+ aSign := extractFloat128Sign( a );
|
|
|
+ if ( aExp ) aSig0 or= LIT64( $0001000000000000 );
|
|
|
+ shiftCount := $402F - aExp;
|
|
|
+ if ( shiftCount <= 0 ) begin
|
|
|
+ if ( $403E < aExp ) begin
|
|
|
+ float_raise( float_flag_invalid );
|
|
|
+ if ( ! aSign
|
|
|
+ or ( ( aExp = $7FFF )
|
|
|
+ and ( aSig1 or ( aSig0 <> LIT64( $0001000000000000 ) ) )
|
|
|
+ )
|
|
|
+ ) begin
|
|
|
+ result := LIT64( $7FFFFFFFFFFFFFFF );
|
|
|
+ end;
|
|
|
+ result := (sbits64) LIT64( $8000000000000000 );
|
|
|
+ end;
|
|
|
+ shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
|
|
|
+ end;
|
|
|
+ else begin
|
|
|
+ shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 );
|
|
|
+ end;
|
|
|
+ result := roundAndPackInt64( aSign, aSig0, aSig1 );
|
|
|
+
|
|
|
+end;
|
|
|
+
|
|
|
+{*----------------------------------------------------------------------------
|
|
|
+| Returns the result of converting the quadruple-precision floating-point
|
|
|
+| value `a' to the 64-bit two's complement integer format. The conversion
|
|
|
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
|
|
|
+| Arithmetic, except that the conversion is always rounded toward zero.
|
|
|
+| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
|
|
|
+| the conversion overflows, the largest integer with the same sign as `a' is
|
|
|
+| returned.
|
|
|
+*----------------------------------------------------------------------------*}
|
|
|
+
|
|
|
+function float128_to_int64_round_to_zero(a: float128): int64;
|
|
|
+begin
|
|
|
+ flag aSign;
|
|
|
+ int32 aExp, shiftCount;
|
|
|
+ bits64 aSig0, aSig1;
|
|
|
+ int64 z;
|
|
|
+
|
|
|
+ aSig1 := extractFloat128Frac1( a );
|
|
|
+ aSig0 := extractFloat128Frac0( a );
|
|
|
+ aExp := extractFloat128Exp( a );
|
|
|
+ aSign := extractFloat128Sign( a );
|
|
|
+ if ( aExp ) aSig0 or= LIT64( $0001000000000000 );
|
|
|
+ shiftCount := aExp - $402F;
|
|
|
+ if ( 0 < shiftCount ) begin
|
|
|
+ if ( $403E <= aExp ) begin
|
|
|
+ aSig0 &= LIT64( $0000FFFFFFFFFFFF );
|
|
|
+ if ( ( a.high = LIT64( $C03E000000000000 ) )
|
|
|
+ and ( aSig1 < LIT64( $0002000000000000 ) ) ) begin
|
|
|
+ if ( aSig1 ) float_exception_flags or= float_flag_inexact;
|
|
|
+ end;
|
|
|
+ else begin
|
|
|
+ float_raise( float_flag_invalid );
|
|
|
+ if ( ! aSign or ( ( aExp = $7FFF ) and ( aSig0 or aSig1 ) ) ) begin
|
|
|
+ result := LIT64( $7FFFFFFFFFFFFFFF );
|
|
|
+ end;
|
|
|
+ end;
|
|
|
+ result := (sbits64) LIT64( $8000000000000000 );
|
|
|
+ end;
|
|
|
+ z := ( aSig0 shl shiftCount ) or ( aSig1>>( ( - shiftCount ) and 63 ) );
|
|
|
+ if ( (bits64) ( aSig1 shl shiftCount ) ) begin
|
|
|
+ float_exception_flags or= float_flag_inexact;
|
|
|
+ end;
|
|
|
+ end;
|
|
|
+ else begin
|
|
|
+ if ( aExp < $3FFF ) begin
|
|
|
+ if ( aExp or aSig0 or aSig1 ) begin
|
|
|
+ float_exception_flags or= float_flag_inexact;
|
|
|
+ end;
|
|
|
+ result := 0;
|
|
|
+ end;
|
|
|
+ z := aSig0>>( - shiftCount );
|
|
|
+ if ( aSig1
|
|
|
+ or ( shiftCount and (bits64) ( aSig0 shl ( shiftCount and 63 ) ) ) ) begin
|
|
|
+ float_exception_flags or= float_flag_inexact;
|
|
|
+ end;
|
|
|
+ end;
|
|
|
+ if ( aSign ) z := - z;
|
|
|
+ result := z;
|
|
|
+
|
|
|
+end;
|
|
|
+
|
|
|
+{*----------------------------------------------------------------------------
|
|
|
+| Returns the result of converting the quadruple-precision floating-point
|
|
|
+| value `a' to the single-precision floating-point format. The conversion
|
|
|
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
|
|
|
+| Arithmetic.
|
|
|
+*----------------------------------------------------------------------------*}
|
|
|
+
|
|
|
+function float128_to_float32(a: float128): float32;
|
|
|
+begin
|
|
|
+ flag aSign;
|
|
|
+ int32 aExp;
|
|
|
+ bits64 aSig0, aSig1;
|
|
|
+ bits32 zSig;
|
|
|
+
|
|
|
+ aSig1 := extractFloat128Frac1( a );
|
|
|
+ aSig0 := extractFloat128Frac0( a );
|
|
|
+ aExp := extractFloat128Exp( a );
|
|
|
+ aSign := extractFloat128Sign( a );
|
|
|
+ if ( aExp = $7FFF ) begin
|
|
|
+ if ( aSig0 or aSig1 ) begin
|
|
|
+ result := commonNaNToFloat32( float128ToCommonNaN( a ) );
|
|
|
+ end;
|
|
|
+ result := packFloat32( aSign, $FF, 0 );
|
|
|
+ end;
|
|
|
+ aSig0 or= ( aSig1 <> 0 );
|
|
|
+ shift64RightJamming( aSig0, 18, &aSig0 );
|
|
|
+ zSig := aSig0;
|
|
|
+ if ( aExp or zSig ) begin
|
|
|
+ zSig or= $40000000;
|
|
|
+ aExp -= $3F81;
|
|
|
+ end;
|
|
|
+ result := roundAndPackFloat32( aSign, aExp, zSig );
|
|
|
+
|
|
|
+end;
|
|
|
+
|
|
|
+{*----------------------------------------------------------------------------
|
|
|
+| Returns the result of converting the quadruple-precision floating-point
|
|
|
+| value `a' to the double-precision floating-point format. The conversion
|
|
|
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
|
|
|
+| Arithmetic.
|
|
|
+*----------------------------------------------------------------------------*}
|
|
|
+
|
|
|
+function float128_to_float64(a: float128): float64;
|
|
|
+begin
|
|
|
+ flag aSign;
|
|
|
+ int32 aExp;
|
|
|
+ bits64 aSig0, aSig1;
|
|
|
+
|
|
|
+ aSig1 := extractFloat128Frac1( a );
|
|
|
+ aSig0 := extractFloat128Frac0( a );
|
|
|
+ aExp := extractFloat128Exp( a );
|
|
|
+ aSign := extractFloat128Sign( a );
|
|
|
+ if ( aExp = $7FFF ) begin
|
|
|
+ if ( aSig0 or aSig1 ) begin
|
|
|
+ result := commonNaNToFloat64( float128ToCommonNaN( a ) );
|
|
|
+ end;
|
|
|
+ result := packFloat64( aSign, $7FF, 0 );
|
|
|
+ end;
|
|
|
+ shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
|
|
|
+ aSig0 or= ( aSig1 <> 0 );
|
|
|
+ if ( aExp or aSig0 ) begin
|
|
|
+ aSig0 or= LIT64( $4000000000000000 );
|
|
|
+ aExp -= $3C01;
|
|
|
+ end;
|
|
|
+ result := roundAndPackFloat64( aSign, aExp, aSig0 );
|
|
|
+
|
|
|
+end;
|
|
|
+
|
|
|
+{$ifdef FPC_SOFTFLOAT_FLOATX80}
|
|
|
+
|
|
|
+{*----------------------------------------------------------------------------
|
|
|
+| Returns the result of converting the quadruple-precision floating-point
|
|
|
+| value `a' to the extended double-precision floating-point format. The
|
|
|
+| conversion is performed according to the IEC/IEEE Standard for Binary
|
|
|
+| Floating-Point Arithmetic.
|
|
|
+*----------------------------------------------------------------------------*}
|
|
|
+
|
|
|
+function float128_to_floatx80(a: float128): floatx80;
|
|
|
+begin
|
|
|
+ flag aSign;
|
|
|
+ int32 aExp;
|
|
|
+ bits64 aSig0, aSig1;
|
|
|
+
|
|
|
+ aSig1 := extractFloat128Frac1( a );
|
|
|
+ aSig0 := extractFloat128Frac0( a );
|
|
|
+ aExp := extractFloat128Exp( a );
|
|
|
+ aSign := extractFloat128Sign( a );
|
|
|
+ if ( aExp = $7FFF ) begin
|
|
|
+ if ( aSig0 or aSig1 ) begin
|
|
|
+ result := commonNaNToFloatx80( float128ToCommonNaN( a ) );
|
|
|
+ end;
|
|
|
+ result := packFloatx80( aSign, $7FFF, LIT64( $8000000000000000 ) );
|
|
|
+ end;
|
|
|
+ if ( aExp = 0 ) begin
|
|
|
+ if ( ( aSig0 or aSig1 ) = 0 ) result := packFloatx80( aSign, 0, 0 );
|
|
|
+ normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
|
|
|
+ end;
|
|
|
+ else begin
|
|
|
+ aSig0 or= LIT64( $0001000000000000 );
|
|
|
+ end;
|
|
|
+ shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
|
|
|
+ result := roundAndPackFloatx80( 80, aSign, aExp, aSig0, aSig1 );
|
|
|
+
|
|
|
+end;
|
|
|
+
|
|
|
+{$endif FPC_SOFTFLOAT_FLOATX80}
|
|
|
+
|
|
|
+{*----------------------------------------------------------------------------
|
|
|
+| Rounds the quadruple-precision floating-point value `a' to an integer, and
|
|
|
+| Returns the result as a quadruple-precision floating-point value. The
|
|
|
+| operation is performed according to the IEC/IEEE Standard for Binary
|
|
|
+| Floating-Point Arithmetic.
|
|
|
+*----------------------------------------------------------------------------*}
|
|
|
+
|
|
|
+function float128_round_to_int(a: float128): float128;
|
|
|
+begin
|
|
|
+ flag aSign;
|
|
|
+ int32 aExp;
|
|
|
+ bits64 lastBitMask, roundBitsMask;
|
|
|
+ int8 roundingMode;
|
|
|
+ float128 z;
|
|
|
+
|
|
|
+ aExp := extractFloat128Exp( a );
|
|
|
+ if ( $402F <= aExp ) begin
|
|
|
+ if ( $406F <= aExp ) begin
|
|
|
+ if ( ( aExp = $7FFF )
|
|
|
+ and ( extractFloat128Frac0( a ) or extractFloat128Frac1( a ) )
|
|
|
+ ) begin
|
|
|
+ result := propagateFloat128NaN( a, a );
|
|
|
+ end;
|
|
|
+ result := a;
|
|
|
+ end;
|
|
|
+ lastBitMask := 1;
|
|
|
+ lastBitMask := ( lastBitMask shl ( $406E - aExp ) ) shl 1;
|
|
|
+ roundBitsMask := lastBitMask - 1;
|
|
|
+ z := a;
|
|
|
+ roundingMode := float_rounding_mode;
|
|
|
+ if ( roundingMode = float_round_nearest_even ) begin
|
|
|
+ if ( lastBitMask ) begin
|
|
|
+ add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
|
|
|
+ if ( ( z.low and roundBitsMask ) = 0 ) z.low &= ~ lastBitMask;
|
|
|
+ end;
|
|
|
+ else begin
|
|
|
+ if ( (sbits64) z.low < 0 ) begin
|
|
|
+ ++z.high;
|
|
|
+ if ( (bits64) ( z.low shl 1 ) = 0 ) z.high &= ~1;
|
|
|
+ end;
|
|
|
+ end;
|
|
|
+ end;
|
|
|
+ else if ( roundingMode <> float_round_to_zero ) begin
|
|
|
+ if ( extractFloat128Sign( z )
|
|
|
+ xor ( roundingMode = float_round_up ) ) begin
|
|
|
+ add128( z.high, z.low, 0, roundBitsMask, &z.high, &z.low );
|
|
|
+ end;
|
|
|
+ end;
|
|
|
+ z.low &= ~ roundBitsMask;
|
|
|
+ end;
|
|
|
+ else begin
|
|
|
+ if ( aExp < $3FFF ) begin
|
|
|
+ if ( ( ( (bits64) ( a.high shl 1 ) ) or a.low ) = 0 ) result := a;
|
|
|
+ float_exception_flags or= float_flag_inexact;
|
|
|
+ aSign := extractFloat128Sign( a );
|
|
|
+ switch ( float_rounding_mode ) begin
|
|
|
+ case float_round_nearest_even:
|
|
|
+ if ( ( aExp = $3FFE )
|
|
|
+ and ( extractFloat128Frac0( a )
|
|
|
+ or extractFloat128Frac1( a ) )
|
|
|
+ ) begin
|
|
|
+ result := packFloat128( aSign, $3FFF, 0, 0 );
|
|
|
+ end;
|
|
|
+ break;
|
|
|
+ case float_round_down:
|
|
|
+ result :=
|
|
|
+ aSign ? packFloat128( 1, $3FFF, 0, 0 )
|
|
|
+ : packFloat128( 0, 0, 0, 0 );
|
|
|
+ case float_round_up:
|
|
|
+ result :=
|
|
|
+ aSign ? packFloat128( 1, 0, 0, 0 )
|
|
|
+ : packFloat128( 0, $3FFF, 0, 0 );
|
|
|
+ end;
|
|
|
+ result := packFloat128( aSign, 0, 0, 0 );
|
|
|
+ end;
|
|
|
+ lastBitMask := 1;
|
|
|
+ lastBitMask shl = $402F - aExp;
|
|
|
+ roundBitsMask := lastBitMask - 1;
|
|
|
+ z.low := 0;
|
|
|
+ z.high := a.high;
|
|
|
+ roundingMode := float_rounding_mode;
|
|
|
+ if ( roundingMode = float_round_nearest_even ) begin
|
|
|
+ z.high += lastBitMask>>1;
|
|
|
+ if ( ( ( z.high and roundBitsMask ) or a.low ) = 0 ) begin
|
|
|
+ z.high &= ~ lastBitMask;
|
|
|
+ end;
|
|
|
+ end;
|
|
|
+ else if ( roundingMode <> float_round_to_zero ) begin
|
|
|
+ if ( extractFloat128Sign( z )
|
|
|
+ xor ( roundingMode = float_round_up ) ) begin
|
|
|
+ z.high or= ( a.low <> 0 );
|
|
|
+ z.high += roundBitsMask;
|
|
|
+ end;
|
|
|
+ end;
|
|
|
+ z.high &= ~ roundBitsMask;
|
|
|
+ end;
|
|
|
+ if ( ( z.low <> a.low ) or ( z.high <> a.high ) ) begin
|
|
|
+ float_exception_flags or= float_flag_inexact;
|
|
|
+ end;
|
|
|
+ result := z;
|
|
|
+
|
|
|
+end;
|
|
|
+
|
|
|
+{*----------------------------------------------------------------------------
|
|
|
+| Returns the result of adding the absolute values of the quadruple-precision
|
|
|
+| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
|
|
|
+| before being returned. `zSign' is ignored if the result is a NaN.
|
|
|
+| The addition is performed according to the IEC/IEEE Standard for Binary
|
|
|
+| Floating-Point Arithmetic.
|
|
|
+*----------------------------------------------------------------------------*}
|
|
|
+
|
|
|
+function addFloat128Sigs( float128 a, float128 b, flag zSign ): float128;
|
|
|
+begin
|
|
|
+ int32 aExp, bExp, zExp;
|
|
|
+ bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
|
|
|
+ int32 expDiff;
|
|
|
+
|
|
|
+ aSig1 := extractFloat128Frac1( a );
|
|
|
+ aSig0 := extractFloat128Frac0( a );
|
|
|
+ aExp := extractFloat128Exp( a );
|
|
|
+ bSig1 := extractFloat128Frac1( b );
|
|
|
+ bSig0 := extractFloat128Frac0( b );
|
|
|
+ bExp := extractFloat128Exp( b );
|
|
|
+ expDiff := aExp - bExp;
|
|
|
+ if ( 0 < expDiff ) begin
|
|
|
+ if ( aExp = $7FFF ) begin
|
|
|
+ if ( aSig0 or aSig1 ) result := propagateFloat128NaN( a, b );
|
|
|
+ result := a;
|
|
|
+ end;
|
|
|
+ if ( bExp = 0 ) begin
|
|
|
+ --expDiff;
|
|
|
+ end;
|
|
|
+ else begin
|
|
|
+ bSig0 or= LIT64( $0001000000000000 );
|
|
|
+ end;
|
|
|
+ shift128ExtraRightJamming(
|
|
|
+ bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
|
|
|
+ zExp := aExp;
|
|
|
+ end;
|
|
|
+ else if ( expDiff < 0 ) begin
|
|
|
+ if ( bExp = $7FFF ) begin
|
|
|
+ if ( bSig0 or bSig1 ) result := propagateFloat128NaN( a, b );
|
|
|
+ result := packFloat128( zSign, $7FFF, 0, 0 );
|
|
|
+ end;
|
|
|
+ if ( aExp = 0 ) begin
|
|
|
+ ++expDiff;
|
|
|
+ end;
|
|
|
+ else begin
|
|
|
+ aSig0 or= LIT64( $0001000000000000 );
|
|
|
+ end;
|
|
|
+ shift128ExtraRightJamming(
|
|
|
+ aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
|
|
|
+ zExp := bExp;
|
|
|
+ end;
|
|
|
+ else begin
|
|
|
+ if ( aExp = $7FFF ) begin
|
|
|
+ if ( aSig0 or aSig1 or bSig0 or bSig1 ) begin
|
|
|
+ result := propagateFloat128NaN( a, b );
|
|
|
+ end;
|
|
|
+ result := a;
|
|
|
+ end;
|
|
|
+ add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
|
|
|
+ if ( aExp = 0 ) result := packFloat128( zSign, 0, zSig0, zSig1 );
|
|
|
+ zSig2 := 0;
|
|
|
+ zSig0 or= LIT64( $0002000000000000 );
|
|
|
+ zExp := aExp;
|
|
|
+ goto shiftRight1;
|
|
|
+ end;
|
|
|
+ aSig0 or= LIT64( $0001000000000000 );
|
|
|
+ add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
|
|
|
+ --zExp;
|
|
|
+ if ( zSig0 < LIT64( $0002000000000000 ) ) goto roundAndPack;
|
|
|
+ ++zExp;
|
|
|
+ shiftRight1:
|
|
|
+ shift128ExtraRightJamming(
|
|
|
+ zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
|
|
|
+ roundAndPack:
|
|
|
+ result := roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
|
|
|
+
|
|
|
+end;
|
|
|
+
|
|
|
+{*----------------------------------------------------------------------------
|
|
|
+| Returns the result of subtracting the absolute values of the quadruple-
|
|
|
+| precision floating-point values `a' and `b'. If `zSign' is 1, the
|
|
|
+| difference is negated before being returned. `zSign' is ignored if the
|
|
|
+| result is a NaN. The subtraction is performed according to the IEC/IEEE
|
|
|
+| Standard for Binary Floating-Point Arithmetic.
|
|
|
+*----------------------------------------------------------------------------*}
|
|
|
+
|
|
|
+function subFloat128Sigs( float128 a, float128 b, flag zSign ): float128;
|
|
|
+begin
|
|
|
+ int32 aExp, bExp, zExp;
|
|
|
+ bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
|
|
|
+ int32 expDiff;
|
|
|
+ float128 z;
|
|
|
+
|
|
|
+ aSig1 := extractFloat128Frac1( a );
|
|
|
+ aSig0 := extractFloat128Frac0( a );
|
|
|
+ aExp := extractFloat128Exp( a );
|
|
|
+ bSig1 := extractFloat128Frac1( b );
|
|
|
+ bSig0 := extractFloat128Frac0( b );
|
|
|
+ bExp := extractFloat128Exp( b );
|
|
|
+ expDiff := aExp - bExp;
|
|
|
+ shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
|
|
|
+ shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 );
|
|
|
+ if ( 0 < expDiff ) goto aExpBigger;
|
|
|
+ if ( expDiff < 0 ) goto bExpBigger;
|
|
|
+ if ( aExp = $7FFF ) begin
|
|
|
+ if ( aSig0 or aSig1 or bSig0 or bSig1 ) begin
|
|
|
+ result := propagateFloat128NaN( a, b );
|
|
|
+ end;
|
|
|
+ float_raise( float_flag_invalid );
|
|
|
+ z.low := float128_default_nan_low;
|
|
|
+ z.high := float128_default_nan_high;
|
|
|
+ result := z;
|
|
|
+ end;
|
|
|
+ if ( aExp = 0 ) begin
|
|
|
+ aExp := 1;
|
|
|
+ bExp := 1;
|
|
|
+ end;
|
|
|
+ if ( bSig0 < aSig0 ) goto aBigger;
|
|
|
+ if ( aSig0 < bSig0 ) goto bBigger;
|
|
|
+ if ( bSig1 < aSig1 ) goto aBigger;
|
|
|
+ if ( aSig1 < bSig1 ) goto bBigger;
|
|
|
+ result := packFloat128( float_rounding_mode = float_round_down, 0, 0, 0 );
|
|
|
+ bExpBigger:
|
|
|
+ if ( bExp = $7FFF ) begin
|
|
|
+ if ( bSig0 or bSig1 ) result := propagateFloat128NaN( a, b );
|
|
|
+ result := packFloat128( zSign xor 1, $7FFF, 0, 0 );
|
|
|
+ end;
|
|
|
+ if ( aExp = 0 ) begin
|
|
|
+ ++expDiff;
|
|
|
+ end;
|
|
|
+ else begin
|
|
|
+ aSig0 or= LIT64( $4000000000000000 );
|
|
|
+ end;
|
|
|
+ shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
|
|
|
+ bSig0 or= LIT64( $4000000000000000 );
|
|
|
+ bBigger:
|
|
|
+ sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
|
|
|
+ zExp := bExp;
|
|
|
+ zSign xor = 1;
|
|
|
+ goto normalizeRoundAndPack;
|
|
|
+ aExpBigger:
|
|
|
+ if ( aExp = $7FFF ) begin
|
|
|
+ if ( aSig0 or aSig1 ) result := propagateFloat128NaN( a, b );
|
|
|
+ result := a;
|
|
|
+ end;
|
|
|
+ if ( bExp = 0 ) begin
|
|
|
+ --expDiff;
|
|
|
+ end;
|
|
|
+ else begin
|
|
|
+ bSig0 or= LIT64( $4000000000000000 );
|
|
|
+ end;
|
|
|
+ shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
|
|
|
+ aSig0 or= LIT64( $4000000000000000 );
|
|
|
+ aBigger:
|
|
|
+ sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
|
|
|
+ zExp := aExp;
|
|
|
+ normalizeRoundAndPack:
|
|
|
+ --zExp;
|
|
|
+ result := normalizeRoundAndPackFloat128( zSign, zExp - 14, zSig0, zSig1 );
|
|
|
+
|
|
|
+end;
|
|
|
+
|
|
|
+{*----------------------------------------------------------------------------
|
|
|
+| Returns the result of adding the quadruple-precision floating-point values
|
|
|
+| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
|
|
|
+| for Binary Floating-Point Arithmetic.
|
|
|
+*----------------------------------------------------------------------------*}
|
|
|
+
|
|
|
+function float128_add(a: float128; b: float128): float128;
|
|
|
+begin
|
|
|
+ flag aSign, bSign;
|
|
|
+
|
|
|
+ aSign := extractFloat128Sign( a );
|
|
|
+ bSign := extractFloat128Sign( b );
|
|
|
+ if ( aSign = bSign ) begin
|
|
|
+ result := addFloat128Sigs( a, b, aSign );
|
|
|
+ end;
|
|
|
+ else begin
|
|
|
+ result := subFloat128Sigs( a, b, aSign );
|
|
|
+ end;
|
|
|
+
|
|
|
+end;
|
|
|
+
|
|
|
+{*----------------------------------------------------------------------------
|
|
|
+| Returns the result of subtracting the quadruple-precision floating-point
|
|
|
+| values `a' and `b'. The operation is performed according to the IEC/IEEE
|
|
|
+| Standard for Binary Floating-Point Arithmetic.
|
|
|
+*----------------------------------------------------------------------------*}
|
|
|
+
|
|
|
+function float128_sub(a: float128; b: float128): float128;
|
|
|
+begin
|
|
|
+ flag aSign, bSign;
|
|
|
+
|
|
|
+ aSign := extractFloat128Sign( a );
|
|
|
+ bSign := extractFloat128Sign( b );
|
|
|
+ if ( aSign = bSign ) begin
|
|
|
+ result := subFloat128Sigs( a, b, aSign );
|
|
|
+ end;
|
|
|
+ else begin
|
|
|
+ result := addFloat128Sigs( a, b, aSign );
|
|
|
+ end;
|
|
|
+
|
|
|
+end;
|
|
|
+
|
|
|
+{*----------------------------------------------------------------------------
|
|
|
+| Returns the result of multiplying the quadruple-precision floating-point
|
|
|
+| values `a' and `b'. The operation is performed according to the IEC/IEEE
|
|
|
+| Standard for Binary Floating-Point Arithmetic.
|
|
|
+*----------------------------------------------------------------------------*}
|
|
|
+
|
|
|
+function float128_mul(a: float128; b: float128): float128;
|
|
|
+begin
|
|
|
+ flag aSign, bSign, zSign;
|
|
|
+ int32 aExp, bExp, zExp;
|
|
|
+ bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
|
|
|
+ float128 z;
|
|
|
+
|
|
|
+ aSig1 := extractFloat128Frac1( a );
|
|
|
+ aSig0 := extractFloat128Frac0( a );
|
|
|
+ aExp := extractFloat128Exp( a );
|
|
|
+ aSign := extractFloat128Sign( a );
|
|
|
+ bSig1 := extractFloat128Frac1( b );
|
|
|
+ bSig0 := extractFloat128Frac0( b );
|
|
|
+ bExp := extractFloat128Exp( b );
|
|
|
+ bSign := extractFloat128Sign( b );
|
|
|
+ zSign := aSign xor bSign;
|
|
|
+ if ( aExp = $7FFF ) begin
|
|
|
+ if ( ( aSig0 or aSig1 )
|
|
|
+ or ( ( bExp = $7FFF ) and ( bSig0 or bSig1 ) ) ) begin
|
|
|
+ result := propagateFloat128NaN( a, b );
|
|
|
+ end;
|
|
|
+ if ( ( bExp or bSig0 or bSig1 ) = 0 ) goto invalid;
|
|
|
+ result := packFloat128( zSign, $7FFF, 0, 0 );
|
|
|
+ end;
|
|
|
+ if ( bExp = $7FFF ) begin
|
|
|
+ if ( bSig0 or bSig1 ) result := propagateFloat128NaN( a, b );
|
|
|
+ if ( ( aExp or aSig0 or aSig1 ) = 0 ) begin
|
|
|
+ invalid:
|
|
|
+ float_raise( float_flag_invalid );
|
|
|
+ z.low := float128_default_nan_low;
|
|
|
+ z.high := float128_default_nan_high;
|
|
|
+ result := z;
|
|
|
+ end;
|
|
|
+ result := packFloat128( zSign, $7FFF, 0, 0 );
|
|
|
+ end;
|
|
|
+ if ( aExp = 0 ) begin
|
|
|
+ if ( ( aSig0 or aSig1 ) = 0 ) result := packFloat128( zSign, 0, 0, 0 );
|
|
|
+ normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
|
|
|
+ end;
|
|
|
+ if ( bExp = 0 ) begin
|
|
|
+ if ( ( bSig0 or bSig1 ) = 0 ) result := packFloat128( zSign, 0, 0, 0 );
|
|
|
+ normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
|
|
|
+ end;
|
|
|
+ zExp := aExp + bExp - $4000;
|
|
|
+ aSig0 or= LIT64( $0001000000000000 );
|
|
|
+ shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 );
|
|
|
+ mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
|
|
|
+ add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
|
|
|
+ zSig2 or= ( zSig3 <> 0 );
|
|
|
+ if ( LIT64( $0002000000000000 ) <= zSig0 ) begin
|
|
|
+ shift128ExtraRightJamming(
|
|
|
+ zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
|
|
|
+ ++zExp;
|
|
|
+ end;
|
|
|
+ result := roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
|
|
|
+
|
|
|
+end;
|
|
|
+
|
|
|
+{*----------------------------------------------------------------------------
|
|
|
+| Returns the result of dividing the quadruple-precision floating-point value
|
|
|
+| `a' by the corresponding value `b'. The operation is performed according to
|
|
|
+| the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
|
|
|
+*----------------------------------------------------------------------------*}
|
|
|
+
|
|
|
+function float128_div(a: float128; b: float128): float128;
|
|
|
+begin
|
|
|
+ flag aSign, bSign, zSign;
|
|
|
+ int32 aExp, bExp, zExp;
|
|
|
+ bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
|
|
|
+ bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
|
|
|
+ float128 z;
|
|
|
+
|
|
|
+ aSig1 := extractFloat128Frac1( a );
|
|
|
+ aSig0 := extractFloat128Frac0( a );
|
|
|
+ aExp := extractFloat128Exp( a );
|
|
|
+ aSign := extractFloat128Sign( a );
|
|
|
+ bSig1 := extractFloat128Frac1( b );
|
|
|
+ bSig0 := extractFloat128Frac0( b );
|
|
|
+ bExp := extractFloat128Exp( b );
|
|
|
+ bSign := extractFloat128Sign( b );
|
|
|
+ zSign := aSign xor bSign;
|
|
|
+ if ( aExp = $7FFF ) begin
|
|
|
+ if ( aSig0 or aSig1 ) result := propagateFloat128NaN( a, b );
|
|
|
+ if ( bExp = $7FFF ) begin
|
|
|
+ if ( bSig0 or bSig1 ) result := propagateFloat128NaN( a, b );
|
|
|
+ goto invalid;
|
|
|
+ end;
|
|
|
+ result := packFloat128( zSign, $7FFF, 0, 0 );
|
|
|
+ end;
|
|
|
+ if ( bExp = $7FFF ) begin
|
|
|
+ if ( bSig0 or bSig1 ) result := propagateFloat128NaN( a, b );
|
|
|
+ result := packFloat128( zSign, 0, 0, 0 );
|
|
|
+ end;
|
|
|
+ if ( bExp = 0 ) begin
|
|
|
+ if ( ( bSig0 or bSig1 ) = 0 ) begin
|
|
|
+ if ( ( aExp or aSig0 or aSig1 ) = 0 ) begin
|
|
|
+ invalid:
|
|
|
+ float_raise( float_flag_invalid );
|
|
|
+ z.low := float128_default_nan_low;
|
|
|
+ z.high := float128_default_nan_high;
|
|
|
+ result := z;
|
|
|
+ end;
|
|
|
+ float_raise( float_flag_divbyzero );
|
|
|
+ result := packFloat128( zSign, $7FFF, 0, 0 );
|
|
|
+ end;
|
|
|
+ normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
|
|
|
+ end;
|
|
|
+ if ( aExp = 0 ) begin
|
|
|
+ if ( ( aSig0 or aSig1 ) = 0 ) result := packFloat128( zSign, 0, 0, 0 );
|
|
|
+ normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
|
|
|
+ end;
|
|
|
+ zExp := aExp - bExp + $3FFD;
|
|
|
+ shortShift128Left(
|
|
|
+ aSig0 or LIT64( $0001000000000000 ), aSig1, 15, &aSig0, &aSig1 );
|
|
|
+ shortShift128Left(
|
|
|
+ bSig0 or LIT64( $0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
|
|
|
+ if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) begin
|
|
|
+ shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
|
|
|
+ ++zExp;
|
|
|
+ end;
|
|
|
+ zSig0 := estimateDiv128To64( aSig0, aSig1, bSig0 );
|
|
|
+ mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
|
|
|
+ sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
|
|
|
+ while ( (sbits64) rem0 < 0 ) begin
|
|
|
+ --zSig0;
|
|
|
+ add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
|
|
|
+ end;
|
|
|
+ zSig1 := estimateDiv128To64( rem1, rem2, bSig0 );
|
|
|
+ if ( ( zSig1 and $3FFF ) <= 4 ) begin
|
|
|
+ mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
|
|
|
+ sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
|
|
|
+ while ( (sbits64) rem1 < 0 ) begin
|
|
|
+ --zSig1;
|
|
|
+ add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
|
|
|
+ end;
|
|
|
+ zSig1 or= ( ( rem1 or rem2 or rem3 ) <> 0 );
|
|
|
+ end;
|
|
|
+ shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 );
|
|
|
+ result := roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
|
|
|
+
|
|
|
+end;
|
|
|
+
|
|
|
+{*----------------------------------------------------------------------------
|
|
|
+| Returns the remainder of the quadruple-precision floating-point value `a'
|
|
|
+| with respect to the corresponding value `b'. The operation is performed
|
|
|
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
|
|
|
+*----------------------------------------------------------------------------*}
|
|
|
+
|
|
|
+function float128_rem(a: float128; b: float128): float128;
|
|
|
+begin
|
|
|
+ flag aSign, bSign, zSign;
|
|
|
+ int32 aExp, bExp, expDiff;
|
|
|
+ bits64 aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
|
|
|
+ bits64 allZero, alternateASig0, alternateASig1, sigMean1;
|
|
|
+ sbits64 sigMean0;
|
|
|
+ float128 z;
|
|
|
+
|
|
|
+ aSig1 := extractFloat128Frac1( a );
|
|
|
+ aSig0 := extractFloat128Frac0( a );
|
|
|
+ aExp := extractFloat128Exp( a );
|
|
|
+ aSign := extractFloat128Sign( a );
|
|
|
+ bSig1 := extractFloat128Frac1( b );
|
|
|
+ bSig0 := extractFloat128Frac0( b );
|
|
|
+ bExp := extractFloat128Exp( b );
|
|
|
+ bSign := extractFloat128Sign( b );
|
|
|
+ if ( aExp = $7FFF ) begin
|
|
|
+ if ( ( aSig0 or aSig1 )
|
|
|
+ or ( ( bExp = $7FFF ) and ( bSig0 or bSig1 ) ) ) begin
|
|
|
+ result := propagateFloat128NaN( a, b );
|
|
|
+ end;
|
|
|
+ goto invalid;
|
|
|
+ end;
|
|
|
+ if ( bExp = $7FFF ) begin
|
|
|
+ if ( bSig0 or bSig1 ) result := propagateFloat128NaN( a, b );
|
|
|
+ result := a;
|
|
|
+ end;
|
|
|
+ if ( bExp = 0 ) begin
|
|
|
+ if ( ( bSig0 or bSig1 ) = 0 ) begin
|
|
|
+ invalid:
|
|
|
+ float_raise( float_flag_invalid );
|
|
|
+ z.low := float128_default_nan_low;
|
|
|
+ z.high := float128_default_nan_high;
|
|
|
+ result := z;
|
|
|
+ end;
|
|
|
+ normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
|
|
|
+ end;
|
|
|
+ if ( aExp = 0 ) begin
|
|
|
+ if ( ( aSig0 or aSig1 ) = 0 ) result := a;
|
|
|
+ normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
|
|
|
+ end;
|
|
|
+ expDiff := aExp - bExp;
|
|
|
+ if ( expDiff < -1 ) result := a;
|
|
|
+ shortShift128Left(
|
|
|
+ aSig0 or LIT64( $0001000000000000 ),
|
|
|
+ aSig1,
|
|
|
+ 15 - ( expDiff < 0 ),
|
|
|
+ &aSig0,
|
|
|
+ &aSig1
|
|
|
+ );
|
|
|
+ shortShift128Left(
|
|
|
+ bSig0 or LIT64( $0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
|
|
|
+ q := le128( bSig0, bSig1, aSig0, aSig1 );
|
|
|
+ if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
|
|
|
+ expDiff -= 64;
|
|
|
+ while ( 0 < expDiff ) begin
|
|
|
+ q := estimateDiv128To64( aSig0, aSig1, bSig0 );
|
|
|
+ q := ( 4 < q ) ? q - 4 : 0;
|
|
|
+ mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
|
|
|
+ shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
|
|
|
+ shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
|
|
|
+ sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
|
|
|
+ expDiff -= 61;
|
|
|
+ end;
|
|
|
+ if ( -64 < expDiff ) begin
|
|
|
+ q := estimateDiv128To64( aSig0, aSig1, bSig0 );
|
|
|
+ q := ( 4 < q ) ? q - 4 : 0;
|
|
|
+ q >>= - expDiff;
|
|
|
+ shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
|
|
|
+ expDiff += 52;
|
|
|
+ if ( expDiff < 0 ) begin
|
|
|
+ shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
|
|
|
+ end;
|
|
|
+ else begin
|
|
|
+ shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
|
|
|
+ end;
|
|
|
+ mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
|
|
|
+ sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
|
|
|
+ end;
|
|
|
+ else begin
|
|
|
+ shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
|
|
|
+ shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
|
|
|
+ end;
|
|
|
+ do begin
|
|
|
+ alternateASig0 := aSig0;
|
|
|
+ alternateASig1 := aSig1;
|
|
|
+ ++q;
|
|
|
+ sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
|
|
|
+ end; while ( 0 <= (sbits64) aSig0 );
|
|
|
+ add128(
|
|
|
+ aSig0, aSig1, alternateASig0, alternateASig1, &sigMean0, &sigMean1 );
|
|
|
+ if ( ( sigMean0 < 0 )
|
|
|
+ or ( ( ( sigMean0 or sigMean1 ) = 0 ) and ( q and 1 ) ) ) begin
|
|
|
+ aSig0 := alternateASig0;
|
|
|
+ aSig1 := alternateASig1;
|
|
|
+ end;
|
|
|
+ zSign := ( (sbits64) aSig0 < 0 );
|
|
|
+ if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
|
|
|
+ result :=
|
|
|
+ normalizeRoundAndPackFloat128( aSign xor zSign, bExp - 4, aSig0, aSig1 );
|
|
|
+
|
|
|
+end;
|
|
|
+
|
|
|
+{*----------------------------------------------------------------------------
|
|
|
+| Returns the square root of the quadruple-precision floating-point value `a'.
|
|
|
+| The operation is performed according to the IEC/IEEE Standard for Binary
|
|
|
+| Floating-Point Arithmetic.
|
|
|
+*----------------------------------------------------------------------------*}
|
|
|
+
|
|
|
+function float128_sqrt(a: float128): float128;
|
|
|
+begin
|
|
|
+ flag aSign;
|
|
|
+ int32 aExp, zExp;
|
|
|
+ bits64 aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
|
|
|
+ bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
|
|
|
+ float128 z;
|
|
|
+
|
|
|
+ aSig1 := extractFloat128Frac1( a );
|
|
|
+ aSig0 := extractFloat128Frac0( a );
|
|
|
+ aExp := extractFloat128Exp( a );
|
|
|
+ aSign := extractFloat128Sign( a );
|
|
|
+ if ( aExp = $7FFF ) begin
|
|
|
+ if ( aSig0 or aSig1 ) result := propagateFloat128NaN( a, a );
|
|
|
+ if ( ! aSign ) result := a;
|
|
|
+ goto invalid;
|
|
|
+ end;
|
|
|
+ if ( aSign ) begin
|
|
|
+ if ( ( aExp or aSig0 or aSig1 ) = 0 ) result := a;
|
|
|
+ invalid:
|
|
|
+ float_raise( float_flag_invalid );
|
|
|
+ z.low := float128_default_nan_low;
|
|
|
+ z.high := float128_default_nan_high;
|
|
|
+ result := z;
|
|
|
+ end;
|
|
|
+ if ( aExp = 0 ) begin
|
|
|
+ if ( ( aSig0 or aSig1 ) = 0 ) result := packFloat128( 0, 0, 0, 0 );
|
|
|
+ normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
|
|
|
+ end;
|
|
|
+ zExp := ( ( aExp - $3FFF )>>1 ) + $3FFE;
|
|
|
+ aSig0 or= LIT64( $0001000000000000 );
|
|
|
+ zSig0 := estimateSqrt32( aExp, aSig0>>17 );
|
|
|
+ shortShift128Left( aSig0, aSig1, 13 - ( aExp and 1 ), &aSig0, &aSig1 );
|
|
|
+ zSig0 := estimateDiv128To64( aSig0, aSig1, zSig0 shl 32 ) + ( zSig0 shl 30 );
|
|
|
+ doubleZSig0 := zSig0 shl 1;
|
|
|
+ mul64To128( zSig0, zSig0, &term0, &term1 );
|
|
|
+ sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
|
|
|
+ while ( (sbits64) rem0 < 0 ) begin
|
|
|
+ --zSig0;
|
|
|
+ doubleZSig0 -= 2;
|
|
|
+ add128( rem0, rem1, zSig0>>63, doubleZSig0 or 1, &rem0, &rem1 );
|
|
|
+ end;
|
|
|
+ zSig1 := estimateDiv128To64( rem1, 0, doubleZSig0 );
|
|
|
+ if ( ( zSig1 and $1FFF ) <= 5 ) begin
|
|
|
+ if ( zSig1 = 0 ) zSig1 := 1;
|
|
|
+ mul64To128( doubleZSig0, zSig1, &term1, &term2 );
|
|
|
+ sub128( rem1, 0, term1, term2, &rem1, &rem2 );
|
|
|
+ mul64To128( zSig1, zSig1, &term2, &term3 );
|
|
|
+ sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
|
|
|
+ while ( (sbits64) rem1 < 0 ) begin
|
|
|
+ --zSig1;
|
|
|
+ shortShift128Left( 0, zSig1, 1, &term2, &term3 );
|
|
|
+ term3 or= 1;
|
|
|
+ term2 or= doubleZSig0;
|
|
|
+ add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
|
|
|
+ end;
|
|
|
+ zSig1 or= ( ( rem1 or rem2 or rem3 ) <> 0 );
|
|
|
+ end;
|
|
|
+ shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
|
|
|
+ result := roundAndPackFloat128( 0, zExp, zSig0, zSig1, zSig2 );
|
|
|
+
|
|
|
+end;
|
|
|
+
|
|
|
+{*----------------------------------------------------------------------------
|
|
|
+| Returns 1 if the quadruple-precision floating-point value `a' is equal to
|
|
|
+| the corresponding value `b', and 0 otherwise. The comparison is performed
|
|
|
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
|
|
|
+*----------------------------------------------------------------------------*}
|
|
|
+
|
|
|
+function float128_eq(a: float128; b: float128): flag;
|
|
|
+begin
|
|
|
+
|
|
|
+ if ( ( ( extractFloat128Exp( a ) = $7FFF )
|
|
|
+ and ( extractFloat128Frac0( a ) or extractFloat128Frac1( a ) ) )
|
|
|
+ or ( ( extractFloat128Exp( b ) = $7FFF )
|
|
|
+ and ( extractFloat128Frac0( b ) or extractFloat128Frac1( b ) ) )
|
|
|
+ ) begin
|
|
|
+ if ( float128_is_signaling_nan( a )
|
|
|
+ or float128_is_signaling_nan( b ) ) begin
|
|
|
+ float_raise( float_flag_invalid );
|
|
|
+ end;
|
|
|
+ result := 0;
|
|
|
+ end;
|
|
|
+ result :=
|
|
|
+ ( a.low = b.low )
|
|
|
+ and ( ( a.high = b.high )
|
|
|
+ or ( ( a.low = 0 )
|
|
|
+ and ( (bits64) ( ( a.high or b.high ) shl 1 ) = 0 ) )
|
|
|
+ );
|
|
|
+
|
|
|
+end;
|
|
|
+
|
|
|
+{*----------------------------------------------------------------------------
|
|
|
+| Returns 1 if the quadruple-precision floating-point value `a' is less than
|
|
|
+| or equal to the corresponding value `b', and 0 otherwise. The comparison
|
|
|
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
|
|
|
+| Arithmetic.
|
|
|
+*----------------------------------------------------------------------------*}
|
|
|
+
|
|
|
+function float128_le(a: float128; b: float128): flag;
|
|
|
+begin
|
|
|
+ flag aSign, bSign;
|
|
|
+
|
|
|
+ if ( ( ( extractFloat128Exp( a ) = $7FFF )
|
|
|
+ and ( extractFloat128Frac0( a ) or extractFloat128Frac1( a ) ) )
|
|
|
+ or ( ( extractFloat128Exp( b ) = $7FFF )
|
|
|
+ and ( extractFloat128Frac0( b ) or extractFloat128Frac1( b ) ) )
|
|
|
+ ) begin
|
|
|
+ float_raise( float_flag_invalid );
|
|
|
+ result := 0;
|
|
|
+ end;
|
|
|
+ aSign := extractFloat128Sign( a );
|
|
|
+ bSign := extractFloat128Sign( b );
|
|
|
+ if ( aSign <> bSign ) begin
|
|
|
+ result :=
|
|
|
+ aSign
|
|
|
+ or ( ( ( (bits64) ( ( a.high or b.high ) shl 1 ) ) or a.low or b.low )
|
|
|
+ = 0 );
|
|
|
+ end;
|
|
|
+ result :=
|
|
|
+ aSign ? le128( b.high, b.low, a.high, a.low )
|
|
|
+ : le128( a.high, a.low, b.high, b.low );
|
|
|
+
|
|
|
+end;
|
|
|
+
|
|
|
+{*----------------------------------------------------------------------------
|
|
|
+| Returns 1 if the quadruple-precision floating-point value `a' is less than
|
|
|
+| the corresponding value `b', and 0 otherwise. The comparison is performed
|
|
|
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
|
|
|
+*----------------------------------------------------------------------------*}
|
|
|
+
|
|
|
+function float128_lt(a: float128; b: float128): flag;
|
|
|
+begin
|
|
|
+ flag aSign, bSign;
|
|
|
+
|
|
|
+ if ( ( ( extractFloat128Exp( a ) = $7FFF )
|
|
|
+ and ( extractFloat128Frac0( a ) or extractFloat128Frac1( a ) ) )
|
|
|
+ or ( ( extractFloat128Exp( b ) = $7FFF )
|
|
|
+ and ( extractFloat128Frac0( b ) or extractFloat128Frac1( b ) ) )
|
|
|
+ ) begin
|
|
|
+ float_raise( float_flag_invalid );
|
|
|
+ result := 0;
|
|
|
+ end;
|
|
|
+ aSign := extractFloat128Sign( a );
|
|
|
+ bSign := extractFloat128Sign( b );
|
|
|
+ if ( aSign <> bSign ) begin
|
|
|
+ result :=
|
|
|
+ aSign
|
|
|
+ and ( ( ( (bits64) ( ( a.high or b.high ) shl 1 ) ) or a.low or b.low )
|
|
|
+ <> 0 );
|
|
|
+ end;
|
|
|
+ result :=
|
|
|
+ aSign ? lt128( b.high, b.low, a.high, a.low )
|
|
|
+ : lt128( a.high, a.low, b.high, b.low );
|
|
|
+
|
|
|
+end;
|
|
|
+
|
|
|
+{*----------------------------------------------------------------------------
|
|
|
+| Returns 1 if the quadruple-precision floating-point value `a' is equal to
|
|
|
+| the corresponding value `b', and 0 otherwise. The invalid exception is
|
|
|
+| raised if either operand is a NaN. Otherwise, the comparison is performed
|
|
|
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
|
|
|
+*----------------------------------------------------------------------------*}
|
|
|
+
|
|
|
+function float128_eq_signaling(a: float128; b: float128): flag;
|
|
|
+begin
|
|
|
+
|
|
|
+ if ( ( ( extractFloat128Exp( a ) = $7FFF )
|
|
|
+ and ( extractFloat128Frac0( a ) or extractFloat128Frac1( a ) ) )
|
|
|
+ or ( ( extractFloat128Exp( b ) = $7FFF )
|
|
|
+ and ( extractFloat128Frac0( b ) or extractFloat128Frac1( b ) ) )
|
|
|
+ ) begin
|
|
|
+ float_raise( float_flag_invalid );
|
|
|
+ result := 0;
|
|
|
+ end;
|
|
|
+ result :=
|
|
|
+ ( a.low = b.low )
|
|
|
+ and ( ( a.high = b.high )
|
|
|
+ or ( ( a.low = 0 )
|
|
|
+ and ( (bits64) ( ( a.high or b.high ) shl 1 ) = 0 ) )
|
|
|
+ );
|
|
|
+
|
|
|
+end;
|
|
|
+
|
|
|
+{*----------------------------------------------------------------------------
|
|
|
+| Returns 1 if the quadruple-precision floating-point value `a' is less than
|
|
|
+| or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
|
|
|
+| cause an exception. Otherwise, the comparison is performed according to the
|
|
|
+| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
|
|
|
+*----------------------------------------------------------------------------*}
|
|
|
+
|
|
|
+function float128_le_quiet(a: float128; b: float128): flag;
|
|
|
+begin
|
|
|
+ flag aSign, bSign;
|
|
|
+
|
|
|
+ if ( ( ( extractFloat128Exp( a ) = $7FFF )
|
|
|
+ and ( extractFloat128Frac0( a ) or extractFloat128Frac1( a ) ) )
|
|
|
+ or ( ( extractFloat128Exp( b ) = $7FFF )
|
|
|
+ and ( extractFloat128Frac0( b ) or extractFloat128Frac1( b ) ) )
|
|
|
+ ) begin
|
|
|
+ if ( float128_is_signaling_nan( a )
|
|
|
+ or float128_is_signaling_nan( b ) ) begin
|
|
|
+ float_raise( float_flag_invalid );
|
|
|
+ end;
|
|
|
+ result := 0;
|
|
|
+ end;
|
|
|
+ aSign := extractFloat128Sign( a );
|
|
|
+ bSign := extractFloat128Sign( b );
|
|
|
+ if ( aSign <> bSign ) begin
|
|
|
+ result :=
|
|
|
+ aSign
|
|
|
+ or ( ( ( (bits64) ( ( a.high or b.high ) shl 1 ) ) or a.low or b.low )
|
|
|
+ = 0 );
|
|
|
+ end;
|
|
|
+ result :=
|
|
|
+ aSign ? le128( b.high, b.low, a.high, a.low )
|
|
|
+ : le128( a.high, a.low, b.high, b.low );
|
|
|
+
|
|
|
+end;
|
|
|
+
|
|
|
+{*----------------------------------------------------------------------------
|
|
|
+| Returns 1 if the quadruple-precision floating-point value `a' is less than
|
|
|
+| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
|
|
|
+| exception. Otherwise, the comparison is performed according to the IEC/IEEE
|
|
|
+| Standard for Binary Floating-Point Arithmetic.
|
|
|
+*----------------------------------------------------------------------------*}
|
|
|
+
|
|
|
+function float128_lt_quiet(a: float128; b: float128): flag;
|
|
|
+begin
|
|
|
+ flag aSign, bSign;
|
|
|
+
|
|
|
+ if ( ( ( extractFloat128Exp( a ) = $7FFF )
|
|
|
+ and ( extractFloat128Frac0( a ) or extractFloat128Frac1( a ) ) )
|
|
|
+ or ( ( extractFloat128Exp( b ) = $7FFF )
|
|
|
+ and ( extractFloat128Frac0( b ) or extractFloat128Frac1( b ) ) )
|
|
|
+ ) begin
|
|
|
+ if ( float128_is_signaling_nan( a )
|
|
|
+ or float128_is_signaling_nan( b ) ) begin
|
|
|
+ float_raise( float_flag_invalid );
|
|
|
+ end;
|
|
|
+ result := 0;
|
|
|
+ end;
|
|
|
+ aSign := extractFloat128Sign( a );
|
|
|
+ bSign := extractFloat128Sign( b );
|
|
|
+ if ( aSign <> bSign ) begin
|
|
|
+ result :=
|
|
|
+ aSign
|
|
|
+ and ( ( ( (bits64) ( ( a.high or b.high ) shl 1 ) ) or a.low or b.low )
|
|
|
+ <> 0 );
|
|
|
+ end;
|
|
|
+ result :=
|
|
|
+ aSign ? lt128( b.high, b.low, a.high, a.low )
|
|
|
+ : lt128( a.high, a.low, b.high, b.low );
|
|
|
+
|
|
|
+end;
|
|
|
+
|
|
|
+{$endif FPC_SOFTFLOAT_FLOAT128}
|
|
|
+
|
|
|
{$endif not(defined(fpc_softfpu_interface))}
|
|
|
|
|
|
{$if not(defined(fpc_softfpu_interface)) and not(defined(fpc_softfpu_implementation))}
|