Browse Source

* bugfix for int64 to float conversion

carl 23 years ago
parent
commit
c0a2149c38
1 changed files with 90 additions and 94 deletions
  1. 90 94
      rtl/inc/softfpu.pp

+ 90 - 94
rtl/inc/softfpu.pp

@@ -6,7 +6,7 @@ to pascal was done by Carl Eric Codere in 2002 ([email protected]).
 ===============================================================================
 ===============================================================================
 
 
 This C source file is part of the SoftFloat IEC/IEEE Floating-Point
 This C source file is part of the SoftFloat IEC/IEEE Floating-Point
-Arithmetic Package, Release 2a. 
+Arithmetic Package, Release 2a.
 
 
 Written by John R. Hauser.  This work was made possible in part by the
 Written by John R. Hauser.  This work was made possible in part by the
 International Computer Science Institute, located at Suite 600, 1947 Center
 International Computer Science Institute, located at Suite 600, 1947 Center
@@ -15,7 +15,7 @@ National Science Foundation under grant MIP-9311980.  The original version
 of this code was written as part of a project to build a fixed-point vector
 of this code was written as part of a project to build a fixed-point vector
 processor in collaboration with the University of California at Berkeley,
 processor in collaboration with the University of California at Berkeley,
 overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
 overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
-is available through the Web page 
+is available through the Web page
 `http://HTTP.CS.Berkeley.EDU/~jhauser/arithmetic/SoftFloat.html'.
 `http://HTTP.CS.Berkeley.EDU/~jhauser/arithmetic/SoftFloat.html'.
 
 
 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
@@ -36,7 +36,7 @@ unit softfpu;
 { Overflow checking must be disabled,
 { Overflow checking must be disabled,
   since some operations expect overflow!
   since some operations expect overflow!
 }
 }
-{$Q-}  
+{$Q-}
 
 
 interface
 interface
 
 
@@ -69,7 +69,7 @@ TYPE
   uint64 = qword;
   uint64 = qword;
   bits64 = qword;
   bits64 = qword;
   sbits64 = int64;
   sbits64 = int64;
-  
+
 {$ifdef ENDIAN_LITTLE}
 {$ifdef ENDIAN_LITTLE}
   float64 = packed record
   float64 = packed record
     low: bits32;
     low: bits32;
@@ -98,7 +98,7 @@ the corresponding value `b', and 0 otherwise.  The comparison is performed
 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 *}
 *}
-Function float64_lt(a: float64;b: float64): flag; 
+Function float64_lt(a: float64;b: float64): flag;
 {*
 {*
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 Returns 1 if the double-precision floating-point value `a' is less than
 Returns 1 if the double-precision floating-point value `a' is less than
@@ -107,7 +107,7 @@ is performed according to the IEC/IEEE Standard for Binary Floating-Point
 Arithmetic.
 Arithmetic.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 *}
 *}
-Function float64_le(a: float64;b: float64): flag; 
+Function float64_le(a: float64;b: float64): flag;
 {*
 {*
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 Returns 1 if the double-precision floating-point value `a' is equal to
 Returns 1 if the double-precision floating-point value `a' is equal to
@@ -115,7 +115,7 @@ the corresponding value `b', and 0 otherwise.  The comparison is performed
 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 *}
 *}
-Function float64_eq(a: float64;b: float64): flag; 
+Function float64_eq(a: float64;b: float64): flag;
 {*
 {*
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 Returns the square root of the double-precision floating-point value `a'.
 Returns the square root of the double-precision floating-point value `a'.
@@ -123,7 +123,7 @@ The operation is performed according to the IEC/IEEE Standard for Binary
 Floating-Point Arithmetic.
 Floating-Point Arithmetic.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 *}
 *}
-Procedure float64_sqrt( a: float64; var out: float64 ); 
+Procedure float64_sqrt( a: float64; var out: float64 );
 {*
 {*
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 Returns the remainder of the double-precision floating-point value `a'
 Returns the remainder of the double-precision floating-point value `a'
@@ -131,7 +131,7 @@ with respect to the corresponding value `b'.  The operation is performed
 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 *}
 *}
-Procedure float64_rem(a: float64; b : float64; var out: float64); 
+Procedure float64_rem(a: float64; b : float64; var out: float64);
 {*
 {*
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 Returns the result of dividing the double-precision floating-point value `a'
 Returns the result of dividing the double-precision floating-point value `a'
@@ -139,7 +139,7 @@ by the corresponding value `b'.  The operation is performed according to the
 IEC/IEEE Standard for Binary Floating-Point Arithmetic.
 IEC/IEEE Standard for Binary Floating-Point Arithmetic.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 *}
 *}
-Procedure float64_div(a: float64; b : float64 ; var out: float64 ); 
+Procedure float64_div(a: float64; b : float64 ; var out: float64 );
 {*
 {*
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 Returns the result of multiplying the double-precision floating-point values
 Returns the result of multiplying the double-precision floating-point values
@@ -147,7 +147,7 @@ Returns the result of multiplying the double-precision floating-point values
 for Binary Floating-Point Arithmetic.
 for Binary Floating-Point Arithmetic.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 *}
 *}
-Procedure float64_mul( a: float64; b:float64; Var out: float64); 
+Procedure float64_mul( a: float64; b:float64; Var out: float64);
 {*
 {*
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 Returns the result of subtracting the double-precision floating-point values
 Returns the result of subtracting the double-precision floating-point values
@@ -155,7 +155,7 @@ Returns the result of subtracting the double-precision floating-point values
 for Binary Floating-Point Arithmetic.
 for Binary Floating-Point Arithmetic.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 *}
 *}
-Procedure float64_sub(a: float64; b : float64; var out: float64); 
+Procedure float64_sub(a: float64; b : float64; var out: float64);
 {*
 {*
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 Returns the result of adding the double-precision floating-point values `a'
 Returns the result of adding the double-precision floating-point values `a'
@@ -163,7 +163,7 @@ and `b'.  The operation is performed according to the IEC/IEEE Standard for
 Binary Floating-Point Arithmetic.
 Binary Floating-Point Arithmetic.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 *}
 *}
-Procedure float64_add( a: float64; b : float64; Var out : float64); 
+Procedure float64_add( a: float64; b : float64; Var out : float64);
 {*
 {*
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 Rounds the double-precision floating-point value `a' to an integer,
 Rounds the double-precision floating-point value `a' to an integer,
@@ -172,7 +172,7 @@ operation is performed according to the IEC/IEEE Standard for Binary
 Floating-Point Arithmetic.
 Floating-Point Arithmetic.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 *}
 *}
-Procedure float64_round_to_int(a: float64; var out: float64 ); 
+Procedure float64_round_to_int(a: float64; var out: float64 );
 {*
 {*
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 Returns the result of converting the double-precision floating-point value
 Returns the result of converting the double-precision floating-point value
@@ -181,7 +181,7 @@ performed according to the IEC/IEEE Standard for Binary Floating-Point
 Arithmetic.
 Arithmetic.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 *}
 *}
-Function float64_to_float32(a: float64 ): float32; 
+Function float64_to_float32(a: float64 ): float32;
 {*
 {*
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 Returns the result of converting the double-precision floating-point value
 Returns the result of converting the double-precision floating-point value
@@ -193,7 +193,7 @@ the conversion overflows, the largest integer with the same sign as `a' is
 returned.
 returned.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 *}
 *}
-Function float64_to_int32_round_to_zero(a: float64 ): int32; 
+Function float64_to_int32_round_to_zero(a: float64 ): int32;
 {*
 {*
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 Returns the result of converting the double-precision floating-point value
 Returns the result of converting the double-precision floating-point value
@@ -205,7 +205,7 @@ positive integer is returned.  Otherwise, if the conversion overflows, the
 largest integer with the same sign as `a' is returned.
 largest integer with the same sign as `a' is returned.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 *}
 *}
-Function float64_to_int32(a: float64): int32; 
+Function float64_to_int32(a: float64): int32;
 {*
 {*
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 Returns 1 if the single-precision floating-point value `a' is less than
 Returns 1 if the single-precision floating-point value `a' is less than
@@ -213,7 +213,7 @@ the corresponding value `b', and 0 otherwise.  The comparison is performed
 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 *}
 *}
-Function float32_lt( a:float32 ; b : float32): flag; 
+Function float32_lt( a:float32 ; b : float32): flag;
 {*
 {*
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 Returns 1 if the single-precision floating-point value `a' is less than
 Returns 1 if the single-precision floating-point value `a' is less than
@@ -222,7 +222,7 @@ is performed according to the IEC/IEEE Standard for Binary Floating-Point
 Arithmetic.
 Arithmetic.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 *}
 *}
-Function float32_le( a: float32; b : float32 ):flag; 
+Function float32_le( a: float32; b : float32 ):flag;
 {*
 {*
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 Returns 1 if the single-precision floating-point value `a' is equal to
 Returns 1 if the single-precision floating-point value `a' is equal to
@@ -230,7 +230,7 @@ the corresponding value `b', and 0 otherwise.  The comparison is performed
 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 *}
 *}
-Function float32_eq( a:float32; b:float32): flag; 
+Function float32_eq( a:float32; b:float32): flag;
 {*
 {*
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 Returns the square root of the single-precision floating-point value `a'.
 Returns the square root of the single-precision floating-point value `a'.
@@ -238,7 +238,7 @@ The operation is performed according to the IEC/IEEE Standard for Binary
 Floating-Point Arithmetic.
 Floating-Point Arithmetic.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 *}
 *}
-Function float32_sqrt(a: float32 ): float32; 
+Function float32_sqrt(a: float32 ): float32;
 {*
 {*
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 Returns the remainder of the single-precision floating-point value `a'
 Returns the remainder of the single-precision floating-point value `a'
@@ -246,7 +246,7 @@ with respect to the corresponding value `b'.  The operation is performed
 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 *}
 *}
-Function float32_rem(a: float32; b: float32 ):float32; 
+Function float32_rem(a: float32; b: float32 ):float32;
 {*
 {*
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 Returns the result of dividing the single-precision floating-point value `a'
 Returns the result of dividing the single-precision floating-point value `a'
@@ -254,7 +254,7 @@ by the corresponding value `b'.  The operation is performed according to the
 IEC/IEEE Standard for Binary Floating-Point Arithmetic.
 IEC/IEEE Standard for Binary Floating-Point Arithmetic.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 *}
 *}
-Function float32_div(a: float32;b: float32 ): float32; 
+Function float32_div(a: float32;b: float32 ): float32;
 {*
 {*
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 Returns the result of multiplying the single-precision floating-point values
 Returns the result of multiplying the single-precision floating-point values
@@ -262,7 +262,7 @@ Returns the result of multiplying the single-precision floating-point values
 for Binary Floating-Point Arithmetic.
 for Binary Floating-Point Arithmetic.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 *}
 *}
-Function float32_mul(a: float32; b: float32 ) : float32; 
+Function float32_mul(a: float32; b: float32 ) : float32;
 {*
 {*
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 Returns the result of subtracting the single-precision floating-point values
 Returns the result of subtracting the single-precision floating-point values
@@ -270,7 +270,7 @@ Returns the result of subtracting the single-precision floating-point values
 for Binary Floating-Point Arithmetic.
 for Binary Floating-Point Arithmetic.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 *}
 *}
-Function float32_sub( a: float32 ; b:float32 ): float32; 
+Function float32_sub( a: float32 ; b:float32 ): float32;
 {*
 {*
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 Returns the result of adding the single-precision floating-point values `a'
 Returns the result of adding the single-precision floating-point values `a'
@@ -278,7 +278,7 @@ and `b'.  The operation is performed according to the IEC/IEEE Standard for
 Binary Floating-Point Arithmetic.
 Binary Floating-Point Arithmetic.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 *}
 *}
-Function float32_add( a: float32; b:float32 ): float32; 
+Function float32_add( a: float32; b:float32 ): float32;
 {*
 {*
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 Rounds the single-precision floating-point value `a' to an integer,
 Rounds the single-precision floating-point value `a' to an integer,
@@ -287,7 +287,7 @@ operation is performed according to the IEC/IEEE Standard for Binary
 Floating-Point Arithmetic.
 Floating-Point Arithmetic.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 *}
 *}
-Function float32_round_to_int( a: float32): float32; 
+Function float32_round_to_int( a: float32): float32;
 {*
 {*
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 Returns the result of converting the single-precision floating-point value
 Returns the result of converting the single-precision floating-point value
@@ -296,7 +296,7 @@ performed according to the IEC/IEEE Standard for Binary Floating-Point
 Arithmetic.
 Arithmetic.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 *}
 *}
-Procedure float32_to_float64( a : float32; var out: Float64); 
+Procedure float32_to_float64( a : float32; var out: Float64);
 {*
 {*
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 Returns the result of converting the single-precision floating-point value
 Returns the result of converting the single-precision floating-point value
@@ -308,7 +308,7 @@ the conversion overflows, the largest integer with the same sign as `a' is
 returned.
 returned.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 *}
 *}
-Function float32_to_int32_round_to_zero( a: Float32 ): int32; 
+Function float32_to_int32_round_to_zero( a: Float32 ): int32;
 {*
 {*
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 Returns the result of converting the single-precision floating-point value
 Returns the result of converting the single-precision floating-point value
@@ -320,7 +320,7 @@ positive integer is returned.  Otherwise, if the conversion overflows, the
 largest integer with the same sign as `a' is returned.
 largest integer with the same sign as `a' is returned.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 *}
 *}
-Function float32_to_int32( a : float32) : int32; 
+Function float32_to_int32( a : float32) : int32;
 {*
 {*
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 Returns the result of converting the 32-bit two's complement integer `a' to
 Returns the result of converting the 32-bit two's complement integer `a' to
@@ -328,7 +328,7 @@ the double-precision floating-point format.  The conversion is performed
 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 *}
 *}
-Procedure int32_to_float64( a: int32; var c: float64 ); 
+Procedure int32_to_float64( a: int32; var c: float64 );
 {*
 {*
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 Returns the result of converting the 32-bit two's complement integer `a' to
 Returns the result of converting the 32-bit two's complement integer `a' to
@@ -336,7 +336,7 @@ the single-precision floating-point format.  The conversion is performed
 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 *}
 *}
-Function int32_to_float32( a: int32): float32; 
+Function int32_to_float32( a: int32): float32;
 
 
 {*----------------------------------------------------------------------------
 {*----------------------------------------------------------------------------
 | Returns the result of converting the 64-bit two's complement integer `a'
 | Returns the result of converting the 64-bit two's complement integer `a'
@@ -367,28 +367,28 @@ Software IEC/IEEE floating-point rounding mode.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 *}
 *}
 {
 {
-Round to nearest. 
-This is the default mode. It should be used unless there is a specific 
-need for one of the others. In this mode results are rounded to the 
-nearest representable value. If the result is midway between two 
-representable values, the even representable is chosen. Even here 
-means the lowest-order bit is zero. This rounding mode prevents 
-statistical bias and guarantees numeric stability: round-off errors 
-in a lengthy calculation will remain smaller than half of FLT_EPSILON. 
-
-Round toward plus Infinity. 
-All results are rounded to the smallest representable value which is 
-greater than the result. 
-
-Round toward minus Infinity. 
-All results are rounded to the largest representable value which is 
-less than the result. 
-
-Round toward zero. 
-All results are rounded to the largest representable value whose 
-magnitude is less than that of the result. In other words, if the 
-result is negative it is rounded up; if it is positive, it is 
-rounded down. 
+Round to nearest.
+This is the default mode. It should be used unless there is a specific
+need for one of the others. In this mode results are rounded to the
+nearest representable value. If the result is midway between two
+representable values, the even representable is chosen. Even here
+means the lowest-order bit is zero. This rounding mode prevents
+statistical bias and guarantees numeric stability: round-off errors
+in a lengthy calculation will remain smaller than half of FLT_EPSILON.
+
+Round toward plus Infinity.
+All results are rounded to the smallest representable value which is
+greater than the result.
+
+Round toward minus Infinity.
+All results are rounded to the largest representable value which is
+less than the result.
+
+Round toward zero.
+All results are rounded to the largest representable value whose
+magnitude is less than that of the result. In other words, if the
+result is negative it is rounded up; if it is positive, it is
+rounded down.
 }
 }
     float_round_nearest_even = 0;
     float_round_nearest_even = 0;
     float_round_down         = 1;
     float_round_down         = 1;
@@ -443,7 +443,7 @@ Begin
   float_exception_flags := float_exception_flags or i;
   float_exception_flags := float_exception_flags or i;
   if (float_exception_flags and float_flag_invalid) <> 0 then
   if (float_exception_flags and float_flag_invalid) <> 0 then
      RunError(207)
      RunError(207)
-  else  
+  else
   if (float_exception_flags and float_flag_divbyzero) <> 0 then
   if (float_exception_flags and float_flag_divbyzero) <> 0 then
      RunError(200)
      RunError(200)
   else
   else
@@ -479,7 +479,7 @@ var
 Begin
 Begin
     if ( count = 0 ) then
     if ( count = 0 ) then
         z := a
         z := a
-   else 
+   else
     if ( count < 32 ) then
     if ( count < 32 ) then
     Begin
     Begin
         z := ( a shr count ) or bits32( (( a shl ( ( - count ) AND 31 )) ) <> 0);
         z := ( a shr count ) or bits32( (( a shl ( ( - count ) AND 31 )) ) <> 0);
@@ -557,7 +557,7 @@ Begin
         z1 := a1;
         z1 := a1;
         z0 := a0;
         z0 := a0;
     End
     End
-   else 
+   else
     if ( count < 32 ) then
     if ( count < 32 ) then
     Begin
     Begin
         z1 := ( a0 shl negCount ) OR ( a1 shr count ) OR bits32( ( a1 shl negCount ) <> 0 );
         z1 := ( a0 shl negCount ) OR ( a1 shr count ) OR bits32( ( a1 shl negCount ) <> 0 );
@@ -569,7 +569,7 @@ Begin
         Begin
         Begin
             z1 := a0 OR bits32( a1 <> 0 );
             z1 := a0 OR bits32( a1 <> 0 );
         End
         End
-       else 
+       else
         if ( count < 64 ) Then
         if ( count < 64 ) Then
         Begin
         Begin
             z1 := ( a0 shr ( count AND 31 ) ) OR bits32( ( ( a0 shl negCount ) OR a1 ) <> 0 );
             z1 := ( a0 shr ( count AND 31 ) ) OR bits32( ( ( a0 shl negCount ) OR a1 ) <> 0 );
@@ -1081,7 +1081,7 @@ End;
 function countLeadingZeros64( a : bits64): int8;
 function countLeadingZeros64( a : bits64): int8;
 var
 var
  shiftcount : int8;
  shiftcount : int8;
-Begin 
+Begin
     shiftCount := 0;
     shiftCount := 0;
     if ( a <  (bits64(1)  shl 32 )) then
     if ( a <  (bits64(1)  shl 32 )) then
         shiftCount := shiftcount + 32
         shiftCount := shiftcount + 32
@@ -1441,7 +1441,7 @@ End;
   sign : flag;
   sign : flag;
   high, low : bits32;
   high, low : bits32;
  end;
  end;
- 
+
 (*----------------------------------------------------------------------------
 (*----------------------------------------------------------------------------
 | The pattern for a default generated single-precision NaN.
 | The pattern for a default generated single-precision NaN.
 *----------------------------------------------------------------------------*)
 *----------------------------------------------------------------------------*)
@@ -1464,7 +1464,7 @@ function float32_is_signaling_nan(a: float32):flag;
  begin
  begin
    float32_is_signaling_nan := flag( ( ( a shr 22 ) and $1FF ) = $1FE ) and ( (a and $003FFFFF)<>0 );
    float32_is_signaling_nan := flag( ( ( a shr 22 ) and $1FF ) = $1FE ) and ( (a and $003FFFFF)<>0 );
  end;
  end;
- 
+
 (*----------------------------------------------------------------------------
 (*----------------------------------------------------------------------------
 | Returns the result of converting the single-precision floating-point NaN
 | Returns the result of converting the single-precision floating-point NaN
 | `a' to the canonical NaN format.  If `a' is a signaling NaN, the invalid
 | `a' to the canonical NaN format.  If `a' is a signaling NaN, the invalid
@@ -1490,7 +1490,7 @@ function CommonNanToFloat32(a : CommonNaNT): float32;
  begin
  begin
     CommonNanToFloat32:= ( ( (bits32) a.sign ) shl 31 ) OR $7FC00000 OR ( a.high shr 9 );
     CommonNanToFloat32:= ( ( (bits32) a.sign ) shl 31 ) OR $7FC00000 OR ( a.high shr 9 );
  end;
  end;
- 
+
 (*----------------------------------------------------------------------------
 (*----------------------------------------------------------------------------
 | Takes two single-precision floating-point values `a' and `b', one of which
 | Takes two single-precision floating-point values `a' and `b', one of which
 | is a NaN, and returns the appropriate NaN result.  If either `a' or `b' is a
 | is a NaN, and returns the appropriate NaN result.  If either `a' or `b' is a
@@ -1607,7 +1607,7 @@ var
         c := a;
         c := a;
  end;
  end;
 
 
-{$ENDIF} 
+{$ENDIF}
 
 
 (****************************************************************************)
 (****************************************************************************)
 (*                        END ENDIAN SPECIFIC CODE                          *)
 (*                        END ENDIAN SPECIFIC CODE                          *)
@@ -4573,14 +4573,14 @@ Begin
       begin
       begin
         int64_to_float32:= packFloat32( zSign, $95 - shiftCount, absA shl shiftCount );
         int64_to_float32:= packFloat32( zSign, $95 - shiftCount, absA shl shiftCount );
       end
       end
-    else 
+    else
        begin
        begin
         shiftCount := shiftCount + 7;
         shiftCount := shiftCount + 7;
         if ( shiftCount < 0 ) then
         if ( shiftCount < 0 ) then
           begin
           begin
             intval.low := int64rec(AbsA).low;
             intval.low := int64rec(AbsA).low;
             intval.high := int64rec(AbsA).high;
             intval.high := int64rec(AbsA).high;
-            shift64RightJamming( intval.low, intval.high, - shiftCount, 
+            shift64RightJamming( intval.low, intval.high, - shiftCount,
                intval.low, intval.high);
                intval.low, intval.high);
             int64rec(absA).low := intval.low;
             int64rec(absA).low := intval.low;
             int64rec(absA).high := intval.high;
             int64rec(absA).high := intval.high;
@@ -4597,51 +4597,47 @@ End;
 | to the double-precision floating-point format.  The conversion is performed
 | to the double-precision floating-point format.  The conversion is performed
 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
 *----------------------------------------------------------------------------*}
 *----------------------------------------------------------------------------*}
-
 function int64_to_float64( a: int64 ): float64;
 function int64_to_float64( a: int64 ): float64;
 var
 var
  zSign : flag;
  zSign : flag;
  float_result : float64;
  float_result : float64;
  intval : int64rec;
  intval : int64rec;
+ AbsA : bits64;
+ shiftcount : int8;
+ zSig0, zSig1 : bits32;
 Begin
 Begin
     if ( a = 0 ) then
     if ( a = 0 ) then
-      begin
-        int64_to_float64.low := 0;
-        int64_to_float64.high := 0;
-        exit;
+      Begin
+       packFloat64( 0, 0, 0, 0, float_result );
+       exit;
       end;
       end;
-    if ( a =  sbits64 ( 1 shl 64 ) ) then
-      begin
-        packFloat64(1, $43E, 0, 0, float_result);
-        int64_to_float64 := float_result;
-        exit;
-      end;  
-    if a < 0 then
-      zSign := flag(TRUE)
+    zSign := flag( a < 0 );
+    if ZSign<>0 then
+      AbsA := -a
     else
     else
-      zSign := flag(FALSE);
-    if zSign<>0 then 
-      a := -a;
-    if zSign <> 0 then
-     begin
-       a:=-a;
-       intval.low := int64rec(a).low;
-       intval.high := int64rec(a).high;
-       normalizeRoundAndPackFloat64( zSign, $43C, intval.low, intval.high , float_result )
-     end
+      AbsA := a;
+    shiftCount := countLeadingZeros64( absA ) - 11;
+    if ( 0 <= shiftCount ) then
+      Begin
+        absA := absA shl shiftcount;
+        zSig0:=int64rec(absA).high;
+        zSig1:=int64rec(absA).low;
+      End
     else
     else
-     begin 
-       intval.low := int64rec(a).low;
-       intval.high := int64rec(a).high;
-       normalizeRoundAndPackFloat64( zSign, $43C, intval.low, intval.high , float_result );
-     end;
+      Begin
+        shift64Right( absA, 0, - shiftCount, zSig0, zSig1 );
+      End;
+    packFloat64( zSign, $432 - shiftCount, zSig0, zSig1, float_result );
     int64_to_float64:= float_result;
     int64_to_float64:= float_result;
 End;
 End;
 
 
 end.
 end.
 {
 {
    $Log$
    $Log$
-   Revision 1.3  2002-10-12 20:24:22  carl
+   Revision 1.4  2002-10-13 15:47:39  carl
+      * bugfix for int64 to float conversion
+
+   Revision 1.3  2002/10/12 20:24:22  carl
      + int64_tof_loat conversion routines
      + int64_tof_loat conversion routines
 
 
    Revision 1.2  2002/10/08 20:07:08  carl
    Revision 1.2  2002/10/08 20:07:08  carl