Browse Source

* first revision of FPU emulation

carl 23 years ago
parent
commit
bf6a6b4d46
1 changed files with 4494 additions and 0 deletions
  1. 4494 0
      rtl/inc/softfpu.pp

+ 4494 - 0
rtl/inc/softfpu.pp

@@ -0,0 +1,4494 @@
+
+{*
+===============================================================================
+The original notice of the softfloat package is shown below. The conversion
+to pascal was done by Carl Eric Codere in 2002 ([email protected]).
+===============================================================================
+
+This C source file is part of the SoftFloat IEC/IEEE Floating-Point
+Arithmetic Package, Release 2a. 
+
+Written by John R. Hauser.  This work was made possible in part by the
+International Computer Science Institute, located at Suite 600, 1947 Center
+Street, Berkeley, California 94704.  Funding was partially provided by the
+National Science Foundation under grant MIP-9311980.  The original version
+of this code was written as part of a project to build a fixed-point vector
+processor in collaboration with the University of California at Berkeley,
+overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
+is available through the Web page 
+`http://HTTP.CS.Berkeley.EDU/~jhauser/arithmetic/SoftFloat.html'.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
+has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
+TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
+PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
+AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) they include prominent notice that the work is derivative, and (2) they
+include prominent notice akin to these four paragraphs for those parts of
+this code that are retained.
+
+===============================================================================
+*}
+
+unit softfpu;
+
+interface
+
+
+{
+-------------------------------------------------------------------------------
+Software IEC/IEEE floating-point types.
+-------------------------------------------------------------------------------
+}
+TYPE
+  float32 = longint;
+
+  flag = byte;
+  uint8 = byte;
+  int8 = shortint;
+  uint16 = word;
+  int16 = integer;
+  uint32 = longint;
+  int32 = longint;
+
+  bits8 = byte;
+  sbits8 = shortint;
+  bits16 = word;
+  sbits16 = integer;
+  sbits32 = longint;
+  bits32 = longint;
+{$ifdef ENDIAN_LITTLE}
+  float64 = packed record
+    low: longword;
+    high: longword;
+  end;
+{$else}
+ float64 = packed record
+   high,low : longword;
+ end;
+
+{$endif}
+
+{*
+-------------------------------------------------------------------------------
+Returns 1 if the double-precision floating-point value `a' is less than
+the corresponding value `b', and 0 otherwise.  The comparison is performed
+according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+-------------------------------------------------------------------------------
+*}
+Function float64_lt(a: float64;b: float64): flag; 
+{*
+-------------------------------------------------------------------------------
+Returns 1 if the double-precision floating-point value `a' is less than
+or equal to the corresponding value `b', and 0 otherwise.  The comparison
+is performed according to the IEC/IEEE Standard for Binary Floating-Point
+Arithmetic.
+-------------------------------------------------------------------------------
+*}
+Function float64_le(a: float64;b: float64): flag; 
+{*
+-------------------------------------------------------------------------------
+Returns 1 if the double-precision floating-point value `a' is equal to
+the corresponding value `b', and 0 otherwise.  The comparison is performed
+according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+-------------------------------------------------------------------------------
+*}
+Function float64_eq(a: float64;b: float64): flag; 
+{*
+-------------------------------------------------------------------------------
+Returns the square root of the double-precision floating-point value `a'.
+The operation is performed according to the IEC/IEEE Standard for Binary
+Floating-Point Arithmetic.
+-------------------------------------------------------------------------------
+*}
+Procedure float64_sqrt( a: float64; var out: float64 ); 
+{*
+-------------------------------------------------------------------------------
+Returns the remainder of the double-precision floating-point value `a'
+with respect to the corresponding value `b'.  The operation is performed
+according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+-------------------------------------------------------------------------------
+*}
+Procedure float64_rem(a: float64; b : float64; var out: float64); 
+{*
+-------------------------------------------------------------------------------
+Returns the result of dividing the double-precision floating-point value `a'
+by the corresponding value `b'.  The operation is performed according to the
+IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+-------------------------------------------------------------------------------
+*}
+Procedure float64_div(a: float64; b : float64 ; var out: float64 ); 
+{*
+-------------------------------------------------------------------------------
+Returns the result of multiplying the double-precision floating-point values
+`a' and `b'.  The operation is performed according to the IEC/IEEE Standard
+for Binary Floating-Point Arithmetic.
+-------------------------------------------------------------------------------
+*}
+Procedure float64_mul( a: float64; b:float64; Var out: float64); 
+{*
+-------------------------------------------------------------------------------
+Returns the result of subtracting the double-precision floating-point values
+`a' and `b'.  The operation is performed according to the IEC/IEEE Standard
+for Binary Floating-Point Arithmetic.
+-------------------------------------------------------------------------------
+*}
+Procedure float64_sub(a: float64; b : float64; var out: float64); 
+{*
+-------------------------------------------------------------------------------
+Returns the result of adding the double-precision floating-point values `a'
+and `b'.  The operation is performed according to the IEC/IEEE Standard for
+Binary Floating-Point Arithmetic.
+-------------------------------------------------------------------------------
+*}
+Procedure float64_add( a: float64; b : float64; Var out : float64); 
+{*
+-------------------------------------------------------------------------------
+Rounds the double-precision floating-point value `a' to an integer,
+and returns the result as a double-precision floating-point value.  The
+operation is performed according to the IEC/IEEE Standard for Binary
+Floating-Point Arithmetic.
+-------------------------------------------------------------------------------
+*}
+Procedure float64_round_to_int(a: float64; var out: float64 ); 
+{*
+-------------------------------------------------------------------------------
+Returns the result of converting the double-precision floating-point value
+`a' to the single-precision floating-point format.  The conversion is
+performed according to the IEC/IEEE Standard for Binary Floating-Point
+Arithmetic.
+-------------------------------------------------------------------------------
+*}
+Function float64_to_float32(a: float64 ): float32; 
+{*
+-------------------------------------------------------------------------------
+Returns the result of converting the double-precision floating-point value
+`a' to the 32-bit two's complement integer format.  The conversion is
+performed according to the IEC/IEEE Standard for Binary Floating-Point
+Arithmetic, except that the conversion is always rounded toward zero.
+If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
+the conversion overflows, the largest integer with the same sign as `a' is
+returned.
+-------------------------------------------------------------------------------
+*}
+Function float64_to_int32_round_to_zero(a: float64 ): int32; 
+{*
+-------------------------------------------------------------------------------
+Returns the result of converting the double-precision floating-point value
+`a' to the 32-bit two's complement integer format.  The conversion is
+performed according to the IEC/IEEE Standard for Binary Floating-Point
+Arithmetic---which means in particular that the conversion is rounded
+according to the current rounding mode.  If `a' is a NaN, the largest
+positive integer is returned.  Otherwise, if the conversion overflows, the
+largest integer with the same sign as `a' is returned.
+-------------------------------------------------------------------------------
+*}
+Function float64_to_int32(a: float64): int32; 
+{*
+-------------------------------------------------------------------------------
+Returns 1 if the single-precision floating-point value `a' is less than
+the corresponding value `b', and 0 otherwise.  The comparison is performed
+according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+-------------------------------------------------------------------------------
+*}
+Function float32_lt( a:float32 ; b : float32): flag; 
+{*
+-------------------------------------------------------------------------------
+Returns 1 if the single-precision floating-point value `a' is less than
+or equal to the corresponding value `b', and 0 otherwise.  The comparison
+is performed according to the IEC/IEEE Standard for Binary Floating-Point
+Arithmetic.
+-------------------------------------------------------------------------------
+*}
+Function float32_le( a: float32; b : float32 ):flag; 
+{*
+-------------------------------------------------------------------------------
+Returns 1 if the single-precision floating-point value `a' is equal to
+the corresponding value `b', and 0 otherwise.  The comparison is performed
+according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+-------------------------------------------------------------------------------
+*}
+Function float32_eq( a:float32; b:float32): flag; 
+{*
+-------------------------------------------------------------------------------
+Returns the square root of the single-precision floating-point value `a'.
+The operation is performed according to the IEC/IEEE Standard for Binary
+Floating-Point Arithmetic.
+-------------------------------------------------------------------------------
+*}
+Function float32_sqrt(a: float32 ): float32; 
+{*
+-------------------------------------------------------------------------------
+Returns the remainder of the single-precision floating-point value `a'
+with respect to the corresponding value `b'.  The operation is performed
+according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+-------------------------------------------------------------------------------
+*}
+Function float32_rem(a: float32; b: float32 ):float32; 
+{*
+-------------------------------------------------------------------------------
+Returns the result of dividing the single-precision floating-point value `a'
+by the corresponding value `b'.  The operation is performed according to the
+IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+-------------------------------------------------------------------------------
+*}
+Function float32_div(a: float32;b: float32 ): float32; 
+{*
+-------------------------------------------------------------------------------
+Returns the result of multiplying the single-precision floating-point values
+`a' and `b'.  The operation is performed according to the IEC/IEEE Standard
+for Binary Floating-Point Arithmetic.
+-------------------------------------------------------------------------------
+*}
+Function float32_mul(a: float32; b: float32 ) : float32; 
+{*
+-------------------------------------------------------------------------------
+Returns the result of subtracting the single-precision floating-point values
+`a' and `b'.  The operation is performed according to the IEC/IEEE Standard
+for Binary Floating-Point Arithmetic.
+-------------------------------------------------------------------------------
+*}
+Function float32_sub( a: float32 ; b:float32 ): float32; 
+{*
+-------------------------------------------------------------------------------
+Returns the result of adding the single-precision floating-point values `a'
+and `b'.  The operation is performed according to the IEC/IEEE Standard for
+Binary Floating-Point Arithmetic.
+-------------------------------------------------------------------------------
+*}
+Function float32_add( a: float32; b:float32 ): float32; 
+{*
+-------------------------------------------------------------------------------
+Rounds the single-precision floating-point value `a' to an integer,
+and returns the result as a single-precision floating-point value.  The
+operation is performed according to the IEC/IEEE Standard for Binary
+Floating-Point Arithmetic.
+-------------------------------------------------------------------------------
+*}
+Function float32_round_to_int( a: float32): float32; 
+{*
+-------------------------------------------------------------------------------
+Returns the result of converting the single-precision floating-point value
+`a' to the double-precision floating-point format.  The conversion is
+performed according to the IEC/IEEE Standard for Binary Floating-Point
+Arithmetic.
+-------------------------------------------------------------------------------
+*}
+Procedure float32_to_float64( a : float32; var out: Float64); 
+{*
+-------------------------------------------------------------------------------
+Returns the result of converting the single-precision floating-point value
+`a' to the 32-bit two's complement integer format.  The conversion is
+performed according to the IEC/IEEE Standard for Binary Floating-Point
+Arithmetic, except that the conversion is always rounded toward zero.
+If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
+the conversion overflows, the largest integer with the same sign as `a' is
+returned.
+-------------------------------------------------------------------------------
+*}
+Function float32_to_int32_round_to_zero( a: Float32 ): int32; 
+{*
+-------------------------------------------------------------------------------
+Returns the result of converting the single-precision floating-point value
+`a' to the 32-bit two's complement integer format.  The conversion is
+performed according to the IEC/IEEE Standard for Binary Floating-Point
+Arithmetic---which means in particular that the conversion is rounded
+according to the current rounding mode.  If `a' is a NaN, the largest
+positive integer is returned.  Otherwise, if the conversion overflows, the
+largest integer with the same sign as `a' is returned.
+-------------------------------------------------------------------------------
+*}
+Function float32_to_int32( a : float32) : int32; 
+{*
+-------------------------------------------------------------------------------
+Returns the result of converting the 32-bit two's complement integer `a' to
+the double-precision floating-point format.  The conversion is performed
+according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+-------------------------------------------------------------------------------
+*}
+Procedure int32_to_float64( a: int32; var c: float64 ); 
+{*
+-------------------------------------------------------------------------------
+Returns the result of converting the 32-bit two's complement integer `a' to
+the single-precision floating-point format.  The conversion is performed
+according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+-------------------------------------------------------------------------------
+*}
+Function int32_to_float32( a: int32): float32; 
+
+
+CONST
+{-------------------------------------------------------------------------------
+Software IEC/IEEE floating-point underflow tininess-detection mode.
+-------------------------------------------------------------------------------
+*}
+    float_tininess_after_rounding  = 0;
+    float_tininess_before_rounding = 1;
+
+{*
+-------------------------------------------------------------------------------
+Software IEC/IEEE floating-point rounding mode.
+-------------------------------------------------------------------------------
+*}
+{
+Round to nearest. 
+This is the default mode. It should be used unless there is a specific 
+need for one of the others. In this mode results are rounded to the 
+nearest representable value. If the result is midway between two 
+representable values, the even representable is chosen. Even here 
+means the lowest-order bit is zero. This rounding mode prevents 
+statistical bias and guarantees numeric stability: round-off errors 
+in a lengthy calculation will remain smaller than half of FLT_EPSILON. 
+
+Round toward plus Infinity. 
+All results are rounded to the smallest representable value which is 
+greater than the result. 
+
+Round toward minus Infinity. 
+All results are rounded to the largest representable value which is 
+less than the result. 
+
+Round toward zero. 
+All results are rounded to the largest representable value whose 
+magnitude is less than that of the result. In other words, if the 
+result is negative it is rounded up; if it is positive, it is 
+rounded down. 
+}
+    float_round_nearest_even = 0;
+    float_round_down         = 1;
+    float_round_up           = 2;
+    float_round_to_zero      = 3;
+
+{*
+-------------------------------------------------------------------------------
+Software IEC/IEEE floating-point exception flags.
+-------------------------------------------------------------------------------
+*}
+    float_flag_invalid   =  1;
+    float_flag_divbyzero =  4;
+    float_flag_overflow  =  8;
+    float_flag_underflow = 16;
+    float_flag_inexact   = 32;
+
+{*
+-------------------------------------------------------------------------------
+Floating-point rounding mode and exception flags.
+-------------------------------------------------------------------------------
+*}
+const
+ float_rounding_mode : Byte = float_round_nearest_even;
+ float_exception_flags : Byte = 0;
+
+{*
+-------------------------------------------------------------------------------
+Underflow tininess-detection mode, statically initialized to default value.
+(The declaration in `softfloat.h' must match the `int8' type here.)
+-------------------------------------------------------------------------------
+*}
+
+const float_detect_tininess: int8 = float_tininess_after_rounding;
+
+
+
+
+implementation
+
+
+{*
+-------------------------------------------------------------------------------
+Raises the exceptions specified by `flags'.  Floating-point traps can be
+defined here if desired.  It is currently not possible for such a trap
+to substitute a result value.  If traps are not implemented, this routine
+should be simply `float_exception_flags |= flags;'.
+-------------------------------------------------------------------------------
+*}
+procedure float_raise( i: shortint );
+Begin
+  float_exception_flags := float_exception_flags or i;
+  if (float_exception_flags and float_flag_invalid) <> 0 then
+     RunError(207)
+  else  
+  if (float_exception_flags and float_flag_divbyzero) <> 0 then
+     RunError(200)
+  else
+  if (float_exception_flags and float_flag_overflow) <> 0 then
+     RunError(205)
+  else
+  if (float_exception_flags and float_flag_underflow) <> 0 then
+     RunError(206);
+end;
+
+
+(*****************************************************************************)
+(*----------------------------------------------------------------------------*)
+(* Primitive arithmetic functions, including multi-word arithmetic, and       *)
+(* division and square root approximations.  (Can be specialized to target if *)
+(* desired.)                                                                  *)
+(* ---------------------------------------------------------------------------*)
+(*****************************************************************************)
+
+{*
+-------------------------------------------------------------------------------
+Shifts `a' right by the number of bits given in `count'.  If any nonzero
+bits are shifted off, they are ``jammed'' into the least significant bit of
+the result by setting the least significant bit to 1.  The value of `count'
+can be arbitrarily large; in particular, if `count' is greater than 32, the
+result will be either 0 or 1, depending on whether `a' is zero or nonzero.
+The result is stored in the location pointed to by `zPtr'.
+-------------------------------------------------------------------------------
+*}
+Procedure shift32RightJamming( a: bits32 ; count: int16 ; VAR zPtr :bits32);
+var
+  z: Bits32;
+Begin
+    if ( count = 0 ) then
+        z := a
+   else 
+    if ( count < 32 ) then
+    Begin
+        z := ( a shr count ) or bits32( (( a shl ( ( - count ) AND 31 )) ) <> 0);
+    End
+   else
+    Begin
+        z := bits32( a <> 0 );
+    End;
+    zPtr := z;
+End;
+
+{*
+-------------------------------------------------------------------------------
+Shifts the 64-bit value formed by concatenating `a0' and `a1' right by the
+number of bits given in `count'.  Any bits shifted off are lost.  The value
+of `count' can be arbitrarily large; in particular, if `count' is greater
+than 64, the result will be 0.  The result is broken into two 32-bit pieces
+which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
+-------------------------------------------------------------------------------
+*}
+Procedure
+ shift64Right(
+     a0 :bits32; a1: bits32; count:int16; VAR z0Ptr:bits32; VAR z1Ptr:bits32);
+Var
+  z0, z1: bits32;
+  negCount : int8;
+Begin
+    negCount := ( - count ) AND 31;
+
+    if ( count = 0 ) then
+    Begin
+        z1 := a1;
+        z0 := a0;
+    End
+    else if ( count < 32 ) then
+    Begin
+        z1 := ( a0 shl negCount ) OR ( a1 shr count );
+        z0 := a0 shr count;
+    End
+   else
+    Begin
+        if (count < 64) then
+          z1 := ( a0 shr ( count AND 31 ) )
+        else
+          z1 := 0;
+        z0 := 0;
+    End;
+    z1Ptr := z1;
+    z0Ptr := z0;
+End;
+
+{*
+-------------------------------------------------------------------------------
+Shifts the 64-bit value formed by concatenating `a0' and `a1' right by the
+number of bits given in `count'.  If any nonzero bits are shifted off, they
+are ``jammed'' into the least significant bit of the result by setting the
+least significant bit to 1.  The value of `count' can be arbitrarily large;
+in particular, if `count' is greater than 64, the result will be either 0
+or 1, depending on whether the concatenation of `a0' and `a1' is zero or
+nonzero.  The result is broken into two 32-bit pieces which are stored at
+the locations pointed to by `z0Ptr' and `z1Ptr'.
+-------------------------------------------------------------------------------
+*}
+Procedure
+ shift64RightJamming(
+     a0:bits32; a1: bits32; count:int16; VAR Z0Ptr :bits32;VAR z1Ptr: bits32 );
+VAR
+    z0, z1 : bits32;
+    negCount : int8;
+Begin
+    negCount := ( - count ) AND 31;
+
+    if ( count = 0 ) then
+    Begin
+        z1 := a1;
+        z0 := a0;
+    End
+   else 
+    if ( count < 32 ) then
+    Begin
+        z1 := ( a0 shl negCount ) OR ( a1 shr count ) OR bits32( ( a1 shl negCount ) <> 0 );
+        z0 := a0 shr count;
+    End
+   else
+    Begin
+        if ( count = 32 ) then
+        Begin
+            z1 := a0 OR bits32( a1 <> 0 );
+        End
+       else 
+        if ( count < 64 ) Then
+        Begin
+            z1 := ( a0 shr ( count AND 31 ) ) OR bits32( ( ( a0 shl negCount ) OR a1 ) <> 0 );
+        End
+       else
+        Begin
+            z1 := bits32( ( a0 OR a1 ) <> 0 );
+        End;
+        z0 := 0;
+    End;
+    z1Ptr := z1;
+    z0Ptr := z0;
+End;
+
+{*
+-------------------------------------------------------------------------------
+Shifts the 96-bit value formed by concatenating `a0', `a1', and `a2' right
+by 32 _plus_ the number of bits given in `count'.  The shifted result is
+at most 64 nonzero bits; these are broken into two 32-bit pieces which are
+stored at the locations pointed to by `z0Ptr' and `z1Ptr'.  The bits shifted
+off form a third 32-bit result as follows:  The _last_ bit shifted off is
+the most-significant bit of the extra result, and the other 31 bits of the
+extra result are all zero if and only if _all_but_the_last_ bits shifted off
+were all zero.  This extra result is stored in the location pointed to by
+`z2Ptr'.  The value of `count' can be arbitrarily large.
+    (This routine makes more sense if `a0', `a1', and `a2' are considered
+to form a fixed-point value with binary point between `a1' and `a2'.  This
+fixed-point value is shifted right by the number of bits given in `count',
+and the integer part of the result is returned at the locations pointed to
+by `z0Ptr' and `z1Ptr'.  The fractional part of the result may be slightly
+corrupted as described above, and is returned at the location pointed to by
+`z2Ptr'.)
+-------------------------------------------------------------------------------
+}
+Procedure
+ shift64ExtraRightJamming(
+     a0: bits32;
+     a1: bits32;
+     a2: bits32;
+     count: int16;
+     VAR z0Ptr: bits32;
+     VAR z1Ptr: bits32;
+     VAR z2Ptr: bits32
+ );
+Var
+    z0, z1, z2: bits32;
+    negCount : int8;
+Begin
+    negCount := ( - count ) AND 31;
+
+    if ( count = 0 ) then
+    Begin
+        z2 := a2;
+        z1 := a1;
+        z0 := a0;
+    End
+   else
+    Begin
+        if ( count < 32 ) Then
+        Begin
+            z2 := a1 shl negCount;
+            z1 := ( a0 shl negCount ) OR ( a1 shr count );
+            z0 := a0 shr count;
+        End
+       else
+        Begin
+            if ( count = 32 ) then
+            Begin
+                z2 := a1;
+                z1 := a0;
+            End
+           else
+            Begin
+                a2 := a2 or a1;
+                if ( count < 64 ) then
+                Begin
+                    z2 := a0 shl negCount;
+                    z1 := a0 shr ( count AND 31 );
+                End
+               else
+                Begin
+                    if count = 64 then
+                       z2 := a0
+                    else
+                       z2 := bits32(a0 <> 0);
+                    z1 := 0;
+                End;
+            End;
+            z0 := 0;
+        End;
+        z2 := z2 or bits32( a2 <> 0 );
+    End;
+    z2Ptr := z2;
+    z1Ptr := z1;
+    z0Ptr := z0;
+End;
+
+{*
+-------------------------------------------------------------------------------
+Shifts the 64-bit value formed by concatenating `a0' and `a1' left by the
+number of bits given in `count'.  Any bits shifted off are lost.  The value
+of `count' must be less than 32.  The result is broken into two 32-bit
+pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
+-------------------------------------------------------------------------------
+*}
+Procedure
+ shortShift64Left(
+     a0:bits32; a1:bits32; count:int16; VAR z0Ptr:bits32; VAR z1Ptr:bits32 );
+Begin
+
+    z1Ptr := a1 shl count;
+    if count = 0 then
+      z0Ptr := a0
+    else
+      z0Ptr := ( a0 shl count ) OR ( a1 shr ( ( - count ) AND 31 ) );
+End;
+
+{*
+-------------------------------------------------------------------------------
+Shifts the 96-bit value formed by concatenating `a0', `a1', and `a2' left
+by the number of bits given in `count'.  Any bits shifted off are lost.
+The value of `count' must be less than 32.  The result is broken into three
+32-bit pieces which are stored at the locations pointed to by `z0Ptr',
+`z1Ptr', and `z2Ptr'.
+-------------------------------------------------------------------------------
+*}
+Procedure
+ shortShift96Left(
+     a0: bits32;
+     a1: bits32;
+     a2: bits32;
+     count: int16;
+     VAR z0Ptr: bits32;
+     VAR z1Ptr: bits32;
+     VAR z2Ptr: bits32
+ );
+Var
+    z0, z1, z2: bits32;
+    negCount: int8;
+Begin
+    z2 := a2 shl count;
+    z1 := a1 shl count;
+    z0 := a0 shl count;
+    if ( 0 < count ) then
+    Begin
+        negCount := ( ( - count ) AND 31 );
+        z1 := z1 or (a2 shr negCount);
+        z0 := z0 or (a1 shr negCount);
+    End;
+    z2Ptr := z2;
+    z1Ptr := z1;
+    z0Ptr := z0;
+End;
+
+{*
+-------------------------------------------------------------------------------
+Adds the 64-bit value formed by concatenating `a0' and `a1' to the 64-bit
+value formed by concatenating `b0' and `b1'.  Addition is modulo 2^64, so
+any carry out is lost.  The result is broken into two 32-bit pieces which
+are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
+-------------------------------------------------------------------------------
+*}
+Procedure
+ add64(
+     a0:bits32; a1:bits32; b0:bits32; b1:bits32; VAR z0Ptr:bits32; VAR z1Ptr:bits32 );
+Var
+    z1: bits32;
+Begin
+    z1 := a1 + b1;
+    z1Ptr := z1;
+    z0Ptr := a0 + b0 + bits32( z1 < a1 );
+End;
+
+{*
+-------------------------------------------------------------------------------
+Adds the 96-bit value formed by concatenating `a0', `a1', and `a2' to the
+96-bit value formed by concatenating `b0', `b1', and `b2'.  Addition is
+modulo 2^96, so any carry out is lost.  The result is broken into three
+32-bit pieces which are stored at the locations pointed to by `z0Ptr',
+`z1Ptr', and `z2Ptr'.
+-------------------------------------------------------------------------------
+*}
+Procedure
+ add96(
+     a0: bits32;
+     a1: bits32;
+     a2: bits32;
+     b0: bits32;
+     b1: bits32;
+     b2: bits32;
+     VAR z0Ptr: bits32;
+     VAR z1Ptr: bits32;
+     VAR z2Ptr: bits32
+ );
+var
+    z0, z1, z2: bits32;
+    carry0, carry1: int8;
+Begin
+    z2 := a2 + b2;
+    carry1 := int8( z2 < a2 );
+    z1 := a1 + b1;
+    carry0 := int8( z1 < a1 );
+    z0 := a0 + b0;
+    z1 := z1 + carry1;
+    z0 := z0 + bits32( z1 < carry1 );
+    z0 := z0 + carry0;
+    z2Ptr := z2;
+    z1Ptr := z1;
+    z0Ptr := z0;
+End;
+
+{*
+-------------------------------------------------------------------------------
+Subtracts the 64-bit value formed by concatenating `b0' and `b1' from the
+64-bit value formed by concatenating `a0' and `a1'.  Subtraction is modulo
+2^64, so any borrow out (carry out) is lost.  The result is broken into two
+32-bit pieces which are stored at the locations pointed to by `z0Ptr' and
+`z1Ptr'.
+-------------------------------------------------------------------------------
+*}
+Procedure
+ sub64(
+     a0: bits32; a1 : bits32; b0 :bits32; b1: bits32; VAR z0Ptr:bits32; VAR z1Ptr: bits32 );
+Begin
+    z1Ptr := a1 - b1;
+    z0Ptr := a0 - b0 - bits32( a1 < b1 );
+End;
+
+{*
+-------------------------------------------------------------------------------
+Subtracts the 96-bit value formed by concatenating `b0', `b1', and `b2' from
+the 96-bit value formed by concatenating `a0', `a1', and `a2'.  Subtraction
+is modulo 2^96, so any borrow out (carry out) is lost.  The result is broken
+into three 32-bit pieces which are stored at the locations pointed to by
+`z0Ptr', `z1Ptr', and `z2Ptr'.
+-------------------------------------------------------------------------------
+*}
+Procedure
+ sub96(
+     a0:bits32;
+     a1:bits32;
+     a2:bits32;
+     b0:bits32;
+     b1:bits32;
+     b2:bits32;
+     VAR z0Ptr:bits32;
+     VAR z1Ptr:bits32;
+     VAR z2Ptr:bits32
+ );
+Var
+    z0, z1, z2: bits32;
+    borrow0, borrow1: int8;
+Begin
+    z2 := a2 - b2;
+    borrow1 := int8( a2 < b2 );
+    z1 := a1 - b1;
+    borrow0 := int8( a1 < b1 );
+    z0 := a0 - b0;
+    z0 := z0 - bits32( z1 < borrow1 );
+    z1 := z1 - borrow1;
+    z0 := z0 -borrow0;
+    z2Ptr := z2;
+    z1Ptr := z1;
+    z0Ptr := z0;
+End;
+
+{*
+-------------------------------------------------------------------------------
+Multiplies `a' by `b' to obtain a 64-bit product.  The product is broken
+into two 32-bit pieces which are stored at the locations pointed to by
+`z0Ptr' and `z1Ptr'.
+-------------------------------------------------------------------------------
+*}
+Procedure mul32To64( a:bits32; b:bits32; VAR z0Ptr: bits32; VAR z1Ptr
+:bits32 );
+Var
+    aHigh, aLow, bHigh, bLow: bits16;
+    z0, zMiddleA, zMiddleB, z1: bits32;
+Begin
+    aLow := a;
+    aHigh := a shr 16;
+    bLow := b;
+    bHigh := b shr 16;
+    z1 := ( bits32( aLow) ) * bLow;
+    zMiddleA := ( bits32 (aLow) ) * bHigh;
+    zMiddleB := ( bits32 (aHigh) ) * bLow;
+    z0 := ( bits32 (aHigh) ) * bHigh;
+    zMiddleA := zMiddleA + zMiddleB;
+    z0 := z0 + ( ( bits32 ( zMiddleA < zMiddleB ) ) shl 16 ) + ( zMiddleA shr 16 );
+    zMiddleA := zmiddleA shl 16;
+    z1 := z1 + zMiddleA;
+    z0 := z0 + bits32( z1 < zMiddleA );
+    z1Ptr := z1;
+    z0Ptr := z0;
+End;
+
+{*
+-------------------------------------------------------------------------------
+Multiplies the 64-bit value formed by concatenating `a0' and `a1' by `b'
+to obtain a 96-bit product.  The product is broken into three 32-bit pieces
+which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and
+`z2Ptr'.
+-------------------------------------------------------------------------------
+*}
+Procedure
+ mul64By32To96(
+     a0:bits32;
+     a1:bits32;
+     b:bits32;
+     VAR z0Ptr:bits32;
+     VAR z1Ptr:bits32;
+     VAR z2Ptr:bits32
+ );
+Var
+    z0, z1, z2, more1: bits32;
+Begin
+    mul32To64( a1, b, z1, z2 );
+    mul32To64( a0, b, z0, more1 );
+    add64( z0, more1, 0, z1, z0, z1 );
+    z2Ptr := z2;
+    z1Ptr := z1;
+    z0Ptr := z0;
+End;
+
+{*
+-------------------------------------------------------------------------------
+Multiplies the 64-bit value formed by concatenating `a0' and `a1' to the
+64-bit value formed by concatenating `b0' and `b1' to obtain a 128-bit
+product.  The product is broken into four 32-bit pieces which are stored at
+the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
+-------------------------------------------------------------------------------
+*}
+Procedure
+ mul64To128(
+     a0:bits32;
+     a1:bits32;
+     b0:bits32;
+     b1:bits32;
+     VAR z0Ptr:bits32;
+     VAR z1Ptr:bits32;
+     VAR z2Ptr:bits32;
+     VAR z3Ptr:bits32
+ );
+Var
+    z0, z1, z2, z3: bits32;
+    more1, more2: bits32;
+Begin
+
+    mul32To64( a1, b1, z2, z3 );
+    mul32To64( a1, b0, z1, more2 );
+    add64( z1, more2, 0, z2, z1, z2 );
+    mul32To64( a0, b0, z0, more1 );
+    add64( z0, more1, 0, z1, z0, z1 );
+    mul32To64( a0, b1, more1, more2 );
+    add64( more1, more2, 0, z2, more1, z2 );
+    add64( z0, z1, 0, more1, z0, z1 );
+    z3Ptr := z3;
+    z2Ptr := z2;
+    z1Ptr := z1;
+    z0Ptr := z0;
+
+End;
+
+{*
+-------------------------------------------------------------------------------
+Returns an approximation to the 32-bit integer quotient obtained by dividing
+`b' into the 64-bit value formed by concatenating `a0' and `a1'.  The
+divisor `b' must be at least 2^31.  If q is the exact quotient truncated
+toward zero, the approximation returned lies between q and q + 2 inclusive.
+If the exact quotient q is larger than 32 bits, the maximum positive 32-bit
+unsigned integer is returned.
+-------------------------------------------------------------------------------
+*}
+Function estimateDiv64To32( a0:bits32; a1: bits32; b:bits32): bits32;
+Var
+    b0, b1: bits32;
+    rem0, rem1, term0, term1: bits32;
+    z: bits32;
+Begin
+    if ( b <= a0 ) then
+    Begin
+       estimateDiv64To32 := $FFFFFFFF;
+       exit;
+    End;
+    b0 := b shr 16;
+    if ( b0 shl 16 <= a0 ) then
+       z:= $FFFF0000
+     else
+       z:= ( a0 div b0 ) shl 16;
+    mul32To64( b, z, term0, term1 );
+    sub64( a0, a1, term0, term1, rem0, rem1 );
+    while ( ( sbits32 (rem0) ) < 0 ) do
+    Begin
+        z := z - $10000;
+        b1 := b shl 16;
+        add64( rem0, rem1, b0, b1, rem0, rem1 );
+    End;
+    rem0 := ( rem0 shl 16 ) OR ( rem1 shr 16 );
+    if ( b0 shl 16 <= rem0 ) then
+      z := z or $FFFF
+    else
+      z := z or (rem0 div b0);
+    estimateDiv64To32 := z;
+
+End;
+
+{*
+-------------------------------------------------------------------------------
+Returns an approximation to the square root of the 32-bit significand given
+by `a'.  Considered as an integer, `a' must be at least 2^31.  If bit 0 of
+`aExp' (the least significant bit) is 1, the integer returned approximates
+2^31*sqrt(`a'/2^31), where `a' is considered an integer.  If bit 0 of `aExp'
+is 0, the integer returned approximates 2^31*sqrt(`a'/2^30).  In either
+case, the approximation returned lies strictly within +/-2 of the exact
+value.
+-------------------------------------------------------------------------------
+*}
+Function estimateSqrt32( aExp: int16; a: bits32 ): bits32;
+    const sqrtOddAdjustments: array[0..15] of bits16 = (
+        $0004, $0022, $005D, $00B1, $011D, $019F, $0236, $02E0,
+        $039C, $0468, $0545, $0631, $072B, $0832, $0946, $0A67
+    );
+    const sqrtEvenAdjustments: array[0..15] of bits16 = (
+        $0A2D, $08AF, $075A, $0629, $051A, $0429, $0356, $029E,
+        $0200, $0179, $0109, $00AF, $0068, $0034, $0012, $0002
+    );
+Var
+    index: int8;
+    z: bits32;
+Begin
+
+    index := ( a shr 27 ) AND 15;
+    if ( aExp AND 1 ) <> 0  then
+    Begin
+        z := $4000 + ( a shr 17 ) - sqrtOddAdjustments[ index ];
+        z := ( ( a div z ) shl 14 ) + ( z shl 15 );
+        a := a shr 1;
+    End
+    else
+    Begin
+        z := $8000 + ( a shr 17 ) - sqrtEvenAdjustments[ index ];
+        z := a div z + z;
+        if ( $20000 <= z ) then
+          z := $FFFF8000
+        else
+          z := ( z shl 15 );
+        if ( z <= a ) then
+        Begin
+           estimateSqrt32 := bits32 ( ( sbits32 (a )) shr 1 );
+           exit;
+        End;
+    End;
+    estimateSqrt32 := ( ( estimateDiv64To32( a, 0, z ) ) shr 1 ) + ( z shr 1 );
+End;
+
+{*
+-------------------------------------------------------------------------------
+Returns the number of leading 0 bits before the most-significant 1 bit of
+`a'.  If `a' is zero, 32 is returned.
+-------------------------------------------------------------------------------
+*}
+Function countLeadingZeros32( a:bits32 ): int8;
+
+    const countLeadingZerosHigh:array[0..255] of int8 = (
+        8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
+        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+    );
+Var
+    shiftCount: int8;
+Begin
+
+    shiftCount := 0;
+    if ( a < $10000 ) then
+    Begin
+        shiftCount := shiftcount + 16;
+        a := a shl 16;
+    End;
+    if ( a < $1000000 ) then
+    Begin
+        shiftCount := shiftcount + 8;
+        a := a shl 8;
+    end;
+    shiftCount := shiftcount + countLeadingZerosHigh[ a shr 24 ];
+    countLeadingZeros32:= shiftCount;
+End;
+
+{*
+-------------------------------------------------------------------------------
+Returns 1 if the 64-bit value formed by concatenating `a0' and `a1' is
+equal to the 64-bit value formed by concatenating `b0' and `b1'.  Otherwise,
+returns 0.
+-------------------------------------------------------------------------------
+*}
+Function eq64( a0: bits32; a1:bits32 ;b0:bits32; b1:bits32 ): flag;
+Begin
+    eq64 :=  flag( a0 = b0 ) and flag( a1 = b1 );
+End;
+
+{*
+-------------------------------------------------------------------------------
+Returns 1 if the 64-bit value formed by concatenating `a0' and `a1' is less
+than or equal to the 64-bit value formed by concatenating `b0' and `b1'.
+Otherwise, returns 0.
+-------------------------------------------------------------------------------
+*}
+Function le64( a0: bits32; a1:bits32 ;b0:bits32; b1:bits32 ): flag;
+Begin
+
+    le64:= flag( a0 < b0 ) or flag( ( a0 = b0 ) and ( a1 <= b1 ) );
+
+End;
+
+{*
+-------------------------------------------------------------------------------
+Returns 1 if the 64-bit value formed by concatenating `a0' and `a1' is less
+than the 64-bit value formed by concatenating `b0' and `b1'.  Otherwise,
+returns 0.
+-------------------------------------------------------------------------------
+*}
+Function lt64( a0: bits32; a1:bits32 ;b0:bits32; b1:bits32 ): flag;
+Begin
+    lt64 := flag( a0 < b0 ) or flag( ( a0 = b0 ) and ( a1 < b1 ) );
+End;
+
+{*
+-------------------------------------------------------------------------------
+Returns 1 if the 64-bit value formed by concatenating `a0' and `a1' is not
+equal to the 64-bit value formed by concatenating `b0' and `b1'.  Otherwise,
+returns 0.
+-------------------------------------------------------------------------------
+*}
+Function ne64( a0: bits32; a1:bits32 ;b0:bits32; b1:bits32 ): flag;
+Begin
+    ne64:= flag( a0 <> b0 ) or flag( a1 <> b1 );
+End;
+
+(*****************************************************************************)
+(*                      End Low-Level arithmetic                             *)
+(*****************************************************************************)
+
+
+
+{*
+-------------------------------------------------------------------------------
+Functions and definitions to determine:  (1) whether tininess for underflow
+is detected before or after rounding by default, (2) what (if anything)
+happens when exceptions are raised, (3) how signaling NaNs are distinguished
+from quiet NaNs, (4) the default generated quiet NaNs, and (4) how NaNs
+are propagated from function inputs to output.  These details are ENDIAN
+specific
+-------------------------------------------------------------------------------
+*}
+{$IFDEF ENDIAN_LITTLE}
+{*
+-------------------------------------------------------------------------------
+Internal canonical NaN format.
+-------------------------------------------------------------------------------
+*}
+TYPE
+
+
+ commonNaNT = packed record
+   sign: flag;
+   high, low : bits32;
+ end;
+
+{*
+-------------------------------------------------------------------------------
+The pattern for a default generated single-precision NaN.
+-------------------------------------------------------------------------------
+*}
+const float32_default_nan = $FFC00000;
+
+{*
+-------------------------------------------------------------------------------
+Returns 1 if the single-precision floating-point value `a' is a NaN;
+otherwise returns 0.
+-------------------------------------------------------------------------------
+*}
+Function float32_is_nan( a : float32 ): flag;
+Begin
+
+    float32_is_nan:= flag( $FF000000 < bits32 ( a shl 1 ) );
+
+End;
+
+{*
+-------------------------------------------------------------------------------
+Returns 1 if the single-precision floating-point value `a' is a signaling
+NaN; otherwise returns 0.
+-------------------------------------------------------------------------------
+*}
+Function float32_is_signaling_nan( a : float32  ): flag;
+Begin
+
+    float32_is_signaling_nan := flag
+      ( ( ( a shr 22 ) and $1FF ) = $1FE ) and( a and $003FFFFF );
+
+End;
+
+{*
+-------------------------------------------------------------------------------
+Returns the result of converting the single-precision floating-point NaN
+`a' to the canonical NaN format.  If `a' is a signaling NaN, the invalid
+exception is raised.
+-------------------------------------------------------------------------------
+*}
+Procedure float32ToCommonNaN( a: float32; VAR c:commonNaNT  );
+var
+    z : commonNaNT ;
+Begin
+    if ( float32_is_signaling_nan( a ) <> 0) then
+       float_raise( float_flag_invalid );
+    z.sign := a shr 31;
+    z.low := 0;
+    z.high := a shl 9;
+    c := z;
+
+End;
+
+{*
+-------------------------------------------------------------------------------
+Returns the result of converting the canonical NaN `a' to the single-
+precision floating-point format.
+-------------------------------------------------------------------------------
+*}
+Function commonNaNToFloat32( a : commonNaNT ): float32;
+Begin
+    commonNaNToFloat32 := ( ( bits32 (a.sign) ) shl 31 ) or $7FC00000 or ( a.high shr 9 );
+End;
+
+{*
+-------------------------------------------------------------------------------
+Takes two single-precision floating-point values `a' and `b', one of which
+is a NaN, and returns the appropriate NaN result.  If either `a' or `b' is a
+signaling NaN, the invalid exception is raised.
+-------------------------------------------------------------------------------
+*}
+Function propagateFloat32NaN( a : float32 ; b: float32 ): float32;
+Var
+    aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN: flag;
+label returnLargerSignificand;
+Begin
+    aIsNaN := float32_is_nan( a );
+    aIsSignalingNaN := float32_is_signaling_nan( a );
+    bIsNaN := float32_is_nan( b );
+    bIsSignalingNaN := float32_is_signaling_nan( b );
+    a := a or $00400000;
+    b := b or $00400000;
+    if ( aIsSignalingNaN or bIsSignalingNaN ) <> 0 then
+        float_raise( float_flag_invalid );
+    if ( aIsSignalingNaN )<> 0  then
+    Begin
+        if ( bIsSignalingNaN ) <>  0 then
+          goto returnLargerSignificand;
+        if bIsNan <> 0 then
+          propagateFloat32NaN := b
+        else
+          propagateFloat32NaN := a;
+        exit;
+    End
+    else if ( aIsNaN <> 0) then
+    Begin
+        if ( bIsSignalingNaN or not bIsNaN )<> 0 then
+        Begin
+           propagateFloat32NaN := a;
+           exit;
+        End;
+ returnLargerSignificand:
+        if ( bits32 ( a shl 1 ) < bits32 ( b shl 1 ) ) then
+        Begin
+           propagateFloat32NaN := b;
+           exit;
+        End;
+        if ( bits32 ( b shl 1 ) < bits32 ( a shl 1 ) ) then
+        Begin
+           propagateFloat32NaN :=  a;
+        End;
+        if a < b then
+          propagateFloat32NaN := a
+        else
+          propagateFloat32NaN := b;
+        exit;
+    End
+    else
+    Begin
+        propagateFloat32NaN := b;
+        exit;
+    End;
+
+End;
+
+{*
+-------------------------------------------------------------------------------
+The pattern for a default generated double-precision NaN.  The `high' and
+`low' values hold the most- and least-significant bits, respectively.
+-------------------------------------------------------------------------------
+*}
+const
+    float64_default_nan_high = $FFF80000;
+    float64_default_nan_low  = $00000000;
+
+{*
+-------------------------------------------------------------------------------
+Returns 1 if the double-precision floating-point value `a' is a NaN;
+otherwise returns 0.
+-------------------------------------------------------------------------------
+*}
+Function float64_is_nan( a : float64 ) : flag;
+Begin
+
+    float64_is_nan :=
+           flag( $FFE00000 <= bits32 ( a.high shl 1 ) )
+        and ( a.low or ( a.high and $000FFFFF ) );
+
+End;
+
+{*
+-------------------------------------------------------------------------------
+Returns 1 if the double-precision floating-point value `a' is a signaling
+NaN; otherwise returns 0.
+-------------------------------------------------------------------------------
+*}
+Function float64_is_signaling_nan( a : float64 ): flag;
+Begin
+
+    float64_is_signaling_nan :=
+           flag( ( ( a.high shr 19 ) and $FFF ) = $FFE )
+        and ( a.low or ( a.high and $0007FFFF ) );
+End;
+
+{*
+-------------------------------------------------------------------------------
+Returns the result of converting the double-precision floating-point NaN
+`a' to the canonical NaN format.  If `a' is a signaling NaN, the invalid
+exception is raised.
+-------------------------------------------------------------------------------
+*}
+Procedure float64ToCommonNaN( a : float64; VAR c:commonNaNT );
+Var
+    z : commonNaNT;
+Begin
+    if ( float64_is_signaling_nan( a )<>0 ) then
+        float_raise( float_flag_invalid );
+    z.sign := a.high shr 31;
+    shortShift64Left( a.high, a.low, 12, z.high, z.low );
+    c := z;
+
+End;
+
+{*
+-------------------------------------------------------------------------------
+Returns the result of converting the canonical NaN `a' to the double-
+precision floating-point format.
+-------------------------------------------------------------------------------
+*}
+Procedure commonNaNToFloat64( a : commonNaNT; VAR c: float64  );
+Var
+    z: float64;
+Begin
+    shift64Right( a.high, a.low, 12, z.high, z.low );
+    z.high := z.high or ( ( bits32 (a.sign) ) shl 31 ) or $7FF80000;
+    c := z;
+End;
+
+{*
+-------------------------------------------------------------------------------
+Takes two double-precision floating-point values `a' and `b', one of which
+is a NaN, and returns the appropriate NaN result.  If either `a' or `b' is a
+signaling NaN, the invalid exception is raised.
+-------------------------------------------------------------------------------
+*}
+Procedure propagateFloat64NaN( a: float64; b: float64 ; VAR c: float64 );
+Var
+    aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN: flag;
+    label returnLargerSignificand;
+Begin
+    aIsNaN := float64_is_nan( a );
+    aIsSignalingNaN := float64_is_signaling_nan( a );
+    bIsNaN := float64_is_nan( b );
+    bIsSignalingNaN := float64_is_signaling_nan( b );
+    a.high := a.high or $00080000;
+    b.high := b.high or $00080000;
+    if ( aIsSignalingNaN or bIsSignalingNaN )<> 0 then
+        float_raise( float_flag_invalid );
+    if ( aIsSignalingNaN )<>0 then
+    Begin
+        if ( bIsSignalingNaN )<>0 then
+            goto returnLargerSignificand;
+        if bIsNan <> 0 then
+           c := b
+        else
+           c := a;
+        exit;
+    End
+    else if ( aIsNaN )<> 0 then
+    Begin
+        if ( bIsSignalingNaN or not bIsNaN ) <> 0 then
+        Begin
+          c := a;
+           exit;
+        End;
+ returnLargerSignificand:
+        if ( lt64( a.high shl 1, a.low, b.high shl 1, b.low ) ) <> 0 then
+        Begin
+           c := b;
+           exit;
+        End;
+        if ( lt64( b.high shl 1, b.low, a.high shl 1, a.low ) ) <> 0 then
+        Begin
+           c := a;
+           exit;
+        End;
+        if a.high < b.high then
+         c := a
+        else
+         c := b;
+        exit;
+    End
+    else
+    Begin
+        c := b;
+        exit;
+    End;
+End;
+{$ELSE}
+{ Big endian code }
+(*----------------------------------------------------------------------------
+| Internal canonical NaN format.
+*----------------------------------------------------------------------------*)
+ commonNANT = packed record
+  sign : flag;
+  high, low : bits32;
+ end;
+ 
+(*----------------------------------------------------------------------------
+| The pattern for a default generated single-precision NaN.
+*----------------------------------------------------------------------------*)
+const float32_default_nan = $7FFFFFFF;
+
+(*----------------------------------------------------------------------------
+| Returns 1 if the single-precision floating-point value `a' is a NaN;
+| otherwise returns 0.
+*----------------------------------------------------------------------------*)
+function float32_is_nan(a:  float32): flag;
+begin
+    float32_is_nan := flag( 0xFF000000 < (bits32) ( a shl 1 ) );
+end;
+
+(*----------------------------------------------------------------------------
+| Returns 1 if the single-precision floating-point value `a' is a signaling
+| NaN; otherwise returns 0.
+*----------------------------------------------------------------------------*)
+function float32_is_signaling_nan(a: float32):flag;
+ begin
+   float32_is_signaling_nan := flag( ( ( a shr 22 ) and $1FF ) = $1FE ) and ( (a and $003FFFFF)<>0 );
+ end;
+ 
+(*----------------------------------------------------------------------------
+| Returns the result of converting the single-precision floating-point NaN
+| `a' to the canonical NaN format.  If `a' is a signaling NaN, the invalid
+| exception is raised.
+*----------------------------------------------------------------------------*)
+Procedure float32ToCommonNaN( a: float32; VAR c:commonNaNT  );
+ var 
+  z: commonNANT;
+ begin
+   if float32_is_signaling_nan(a) then
+      float_raise(float_flag_invalid);
+   z.sign := a shr 31;
+   z.low := 0;
+   z.high := a shl 9;
+   c:=z;
+ end;
+
+(*----------------------------------------------------------------------------
+| Returns the result of converting the canonical NaN `a' to the single-
+| precision floating-point format.
+*----------------------------------------------------------------------------*)
+function CommonNanToFloat32(a : CommonNaNT): float32;
+ begin
+    CommonNanToFloat32:= ( ( (bits32) a.sign ) shl 31 ) OR $7FC00000 OR ( a.high shr 9 );
+ end;
+ 
+(*----------------------------------------------------------------------------
+| Takes two single-precision floating-point values `a' and `b', one of which
+| is a NaN, and returns the appropriate NaN result.  If either `a' or `b' is a
+| signaling NaN, the invalid exception is raised.
+*----------------------------------------------------------------------------*)
+function  propagateFloat32NaN( a: float32 ; b: float32): float32;
+ var
+  aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN: flag;
+ begin
+    aIsNaN := float32_is_nan( a );
+    aIsSignalingNaN := float32_is_signaling_nan( a );
+    bIsNaN := float32_is_nan( b );
+    bIsSignalingNaN := float32_is_signaling_nan( b );
+    a := a or $00400000;
+    b := b or $00400000;
+    if ( aIsSignalingNaN or bIsSignalingNaN ) then
+       float_raise( float_flag_invalid );
+    if bIsSignalingNaN<>0 then
+        propagateFloat32Nan := b
+    else if aIsSignalingNan<>0 then
+        propagateFloat32Nan := a
+    else if bIsNan<>0 then
+        propagateFloat32Nan := b
+    else
+        propagateFloat32Nan := a;
+ end;
+
+
+(*----------------------------------------------------------------------------
+| The pattern for a default generated double-precision NaN.  The `high' and
+| `low' values hold the most- and least-significant bits, respectively.
+*----------------------------------------------------------------------------*)
+const
+    float64_default_nan_high = $7FFFFFFF;
+    float64_default_nan_low  = $FFFFFFFF;
+
+(*----------------------------------------------------------------------------
+| Returns 1 if the double-precision floating-point value `a' is a NaN;
+| otherwise returns 0.
+*----------------------------------------------------------------------------*)
+
+function float64_is_nan(a: float64): flag;
+ begin
+    float_64_is_nan := flag
+           ( $FFE00000 <= (bits32) ( a.high shl 1 ) )
+        and ( (a.low<>0) or (( a.high and $000FFFFF )<>0) );
+ end;
+
+(*----------------------------------------------------------------------------
+| Returns 1 if the double-precision floating-point value `a' is a signaling
+| NaN; otherwise returns 0.
+*----------------------------------------------------------------------------*)
+function float64_is_signaling_nan( a:float64): flag;
+ begin
+    float64_is_signaling_nan := flag
+           ( ( ( a.high shr 19 ) and $FFF ) = $FFE )
+        and ( (a.low<>0) or (( a.high and 0x0007FFFF )<>0) );
+
+ end;
+
+(*----------------------------------------------------------------------------
+| Returns the result of converting the double-precision floating-point NaN
+| `a' to the canonical NaN format.  If `a' is a signaling NaN, the invalid
+| exception is raised.
+*----------------------------------------------------------------------------*)
+Procedure float64ToCommonNaN( a : float64; VAR c:commonNaNT );
+ var
+   z : commonNaNT;
+ begin
+    if ( float64_is_signaling_nan( a )<>0 ) then
+        float_raise( float_flag_invalid );
+    z.sign = a.high shr 31;
+    shortShift64Left( a.high, a.low, 12, z.high, z.low );
+    c:=z;
+ end;
+
+(*----------------------------------------------------------------------------
+| Returns the result of converting the canonical NaN `a' to the double-
+| precision floating-point format.
+*----------------------------------------------------------------------------*)
+Procedure commonNaNToFloat64( a : commonNaNT; VAR c: float64  );
+ var
+  z: float64;
+ begin
+    shift64Right( a.high, a.low, 12, z.high, z.low );
+    z.high := z.high or ( ( (bits32) a.sign )<<31 ) | 0x7FF80000;
+    c:=z;
+ end;
+ 
+(*----------------------------------------------------------------------------
+| Takes two double-precision floating-point values `a' and `b', one of which
+| is a NaN, and returns the appropriate NaN result.  If either `a' or `b' is a
+| signaling NaN, the invalid exception is raised.
+*----------------------------------------------------------------------------*)
+Procedure propagateFloat64NaN( a: float64; b: float64 ; VAR c: float64 );
+var
+ aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN : flag;
+ begin
+    aIsNaN := float64_is_nan( a );
+    aIsSignalingNaN := float64_is_signaling_nan( a );
+    bIsNaN := float64_is_nan( b );
+    bIsSignalingNaN := float64_is_signaling_nan( b );
+    a.high := a.high or $00080000;
+    b.high := b.high or $00080000;
+    if ( aIsSignalingNaN<>0 or bIsSignalingNaN<>0 ) then
+       float_raise( float_flag_invalid );
+    if bIsSignalingNaN<>0 then
+        c := b
+    else if aIsSignalingNan<>0 then
+        c := a
+    else if bIsNan<>0 then
+        c := b
+    else
+        c := a;
+ end;
+
+{$ENDIF} 
+
+(****************************************************************************)
+(*                        END ENDIAN SPECIFIC CODE                          *)
+(****************************************************************************)
+
+
+{*
+-------------------------------------------------------------------------------
+Returns the fraction bits of the single-precision floating-point value `a'.
+-------------------------------------------------------------------------------
+*}
+Function ExtractFloat32Frac(a : Float32) : Bits32;
+ Begin
+    ExtractFloat32Frac := A AND $007FFFFF;
+ End;
+
+
+{*
+-------------------------------------------------------------------------------
+Returns the exponent bits of the single-precision floating-point value `a'.
+-------------------------------------------------------------------------------
+*}
+Function extractFloat32Exp( a: float32 ): Int16;
+  Begin
+    extractFloat32Exp := (a shr 23) AND $FF;
+  End;
+
+{*
+-------------------------------------------------------------------------------
+Returns the sign bit of the single-precision floating-point value `a'.
+-------------------------------------------------------------------------------
+*}
+Function extractFloat32Sign( a: float32 ): Flag;
+  Begin
+    extractFloat32Sign := a shr 31;
+  End;
+
+{*
+-------------------------------------------------------------------------------
+Normalizes the subnormal single-precision floating-point value represented
+by the denormalized significand `aSig'.  The normalized exponent and
+significand are stored at the locations pointed to by `zExpPtr' and
+`zSigPtr', respectively.
+-------------------------------------------------------------------------------
+*}
+Procedure normalizeFloat32Subnormal( aSig : bits32; VAR zExpPtr: Int16; VAR zSigPtr :bits32);
+ Var
+   ShiftCount : BYTE;
+ Begin
+
+    shiftCount := countLeadingZeros32( aSig ) - 8;
+    zSigPtr := aSig shl shiftCount;
+    zExpPtr := 1 - shiftCount;
+  End;
+
+{*
+-------------------------------------------------------------------------------
+Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
+single-precision floating-point value, returning the result.  After being
+shifted into the proper positions, the three fields are simply added
+together to form the result.  This means that any integer portion of `zSig'
+will be added into the exponent.  Since a properly normalized significand
+will have an integer portion equal to 1, the `zExp' input should be 1 less
+than the desired result exponent whenever `zSig' is a complete, normalized
+significand.
+-------------------------------------------------------------------------------
+*}
+Function packFloat32( zSign: Flag; zExp : Int16; zSig: Bits32 ): Float32;
+ Begin
+
+    packFloat32 := ( ( bits32( zSign) ) shl 31 ) + ( ( bits32 (zExp) ) shl 23 )
+      + zSig;
+ End;
+
+{*
+-------------------------------------------------------------------------------
+Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+and significand `zSig', and returns the proper single-precision floating-
+point value corresponding to the abstract input.  Ordinarily, the abstract
+value is simply rounded and packed into the single-precision format, with
+the inexact exception raised if the abstract input cannot be represented
+exactly.  However, if the abstract value is too large, the overflow and
+inexact exceptions are raised and an infinity or maximal finite value is
+returned.  If the abstract value is too small, the input value is rounded to
+a subnormal number, and the underflow and inexact exceptions are raised if
+the abstract input cannot be represented exactly as a subnormal single-
+precision floating-point number.
+    The input significand `zSig' has its binary point between bits 30
+and 29, which is 7 bits to the left of the usual location.  This shifted
+significand must be normalized or smaller.  If `zSig' is not normalized,
+`zExp' must be 0; in that case, the result returned is a subnormal number,
+and it must not require rounding.  In the usual case that `zSig' is
+normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
+The handling of underflow and overflow follows the IEC/IEEE Standard for
+Binary Floating-Point Arithmetic.
+-------------------------------------------------------------------------------
+*}
+Function roundAndPackFloat32( zSign : Flag; zExp : Int16; zSig : Bits32 ) : float32;
+ Var
+   roundingMode : BYTE;
+   roundNearestEven : Flag;
+   roundIncrement, roundBits : BYTE;
+   IsTiny : Flag;
+ Begin
+    roundingMode := float_rounding_mode;
+    if (roundingMode = float_round_nearest_even) then
+      Begin
+        roundNearestEven := Flag(TRUE);
+      end
+    else
+       roundNearestEven := Flag(FALSE);
+    roundIncrement := $40;
+    if ( Boolean(roundNearestEven)  = FALSE)  then
+      Begin
+        if ( roundingMode = float_round_to_zero ) Then
+          Begin
+            roundIncrement := 0;
+          End
+        else
+          Begin
+            roundIncrement := $7F;
+            if ( zSign <> 0 ) then
+              Begin
+                if roundingMode = float_round_up then roundIncrement := 0;
+              End
+            else
+              Begin
+                if roundingMode = float_round_down then roundIncrement := 0;
+              End;
+         End
+      End;
+    roundBits := zSig AND $7F;
+    if ($FD <= bits16 (zExp) ) then
+     Begin
+        if (( $FD < zExp ) OR  ( zExp = $FD ) AND ( sbits32 ( zSig + roundIncrement ) < 0 ) ) then
+          Begin
+             float_raise( float_flag_overflow OR float_flag_inexact );
+             roundAndPackFloat32:=packFloat32( zSign, $FF, 0 ) - Flag( roundIncrement = 0 );
+             exit;
+          End;
+        if ( zExp < 0 ) then
+          Begin
+            isTiny :=
+                   flag(( float_detect_tininess = float_tininess_before_rounding )
+                OR ( zExp < -1 )
+                OR ( (zSig + roundIncrement) < $80000000 ));
+            shift32RightJamming( zSig, - zExp, zSig );
+            zExp := 0;
+            roundBits := zSig AND $7F;
+            if ( (isTiny = flag(TRUE)) and (roundBits<>0) ) then
+               float_raise( float_flag_underflow );
+          End;
+    End;
+    if ( roundBits )<> 0 then
+       float_exception_flags := float_flag_inexact OR float_exception_flags;
+    zSig := ( zSig + roundIncrement ) shr 7;
+    zSig := zSig AND not bits32( bits32( ( roundBits XOR $40 ) = 0 ) and roundNearestEven );
+    if ( zSig = 0 ) then zExp := 0;
+    roundAndPackFloat32 := packFloat32( zSign, zExp, zSig );
+    exit;
+  End;
+
+{*
+-------------------------------------------------------------------------------
+Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+and significand `zSig', and returns the proper single-precision floating-
+point value corresponding to the abstract input.  This routine is just like
+`roundAndPackFloat32' except that `zSig' does not have to be normalized.
+Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
+floating-point exponent.
+-------------------------------------------------------------------------------
+*}
+Function normalizeRoundAndPackFloat32( zSign: flag; zExp: int16; zSig:bits32 ): float32;
+  Var
+    ShiftCount : int8;
+  Begin
+    shiftCount := countLeadingZeros32( zSig ) - 1;
+    normalizeRoundAndPackFloat32 := roundAndPackFloat32( zSign, zExp - shiftCount, zSig shl shiftCount );
+  End;
+
+{*
+-------------------------------------------------------------------------------
+Returns the least-significant 32 fraction bits of the double-precision
+floating-point value `a'.
+-------------------------------------------------------------------------------
+*}
+Function extractFloat64Frac( a: float64 ): bits32;
+  Begin
+    extractFloat64Frac := a.low;
+  End;
+
+{*
+-------------------------------------------------------------------------------
+Returns the most-significant 20 fraction bits of the double-precision
+floating-point value `a'.
+-------------------------------------------------------------------------------
+*}
+Function extractFloat64Frac0(a: float64): bits32;
+  Begin
+    extractFloat64Frac0 := a.high and $000FFFFF;
+  End;
+
+{*
+-------------------------------------------------------------------------------
+Returns the least-significant 32 fraction bits of the double-precision
+floating-point value `a'.
+-------------------------------------------------------------------------------
+*}
+Function extractFloat64Frac1(a: float64): bits32;
+  Begin
+    extractFloat64Frac1 := a.low;
+  End;
+
+{*
+-------------------------------------------------------------------------------
+Returns the exponent bits of the double-precision floating-point value `a'.
+-------------------------------------------------------------------------------
+*}
+Function extractFloat64Exp(a: float64): int16;
+ Begin
+    extractFloat64Exp:= ( a.high shr 20 ) AND $7FF;
+ End;
+
+{*
+-------------------------------------------------------------------------------
+Returns the sign bit of the double-precision floating-point value `a'.
+-------------------------------------------------------------------------------
+*}
+Function extractFloat64Sign(a: float64) : flag;
+ Begin
+    extractFloat64Sign := a.high shr 31;
+ End;
+
+{*
+-------------------------------------------------------------------------------
+Normalizes the subnormal double-precision floating-point value represented
+by the denormalized significand formed by the concatenation of `aSig0' and
+`aSig1'.  The normalized exponent is stored at the location pointed to by
+`zExpPtr'.  The most significant 21 bits of the normalized significand are
+stored at the location pointed to by `zSig0Ptr', and the least significant
+32 bits of the normalized significand are stored at the location pointed to
+by `zSig1Ptr'.
+-------------------------------------------------------------------------------
+*}
+Procedure normalizeFloat64Subnormal(
+     aSig0: bits32;
+     aSig1: bits32;
+     VAR zExpPtr : Int16;
+     VAR zSig0Ptr : Bits32;
+     VAR zSig1Ptr : Bits32
+ );
+ Var
+  ShiftCount : Int8;
+ Begin
+    if ( aSig0 = 0 ) then
+      Begin
+        shiftCount := countLeadingZeros32( aSig1 ) - 11;
+        if ( shiftCount < 0 ) then
+          Begin
+            zSig0Ptr := aSig1 shr ( - shiftCount );
+            zSig1Ptr := aSig1 shl ( shiftCount AND 31 );
+          End
+        else
+           Begin
+            zSig0Ptr := aSig1 shl shiftCount;
+            zSig1Ptr := 0;
+           End;
+        zExpPtr := - shiftCount - 31;
+      End
+    else
+      Begin
+        shiftCount := countLeadingZeros32( aSig0 ) - 11;
+        shortShift64Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
+        zExpPtr := 1 - shiftCount;
+      End;
+  End;
+
+{*
+-------------------------------------------------------------------------------
+Packs the sign `zSign', the exponent `zExp', and the significand formed by
+the concatenation of `zSig0' and `zSig1' into a double-precision floating-
+point value, returning the result.  After being shifted into the proper
+positions, the three fields `zSign', `zExp', and `zSig0' are simply added
+together to form the most significant 32 bits of the result.  This means
+that any integer portion of `zSig0' will be added into the exponent.  Since
+a properly normalized significand will have an integer portion equal to 1,
+the `zExp' input should be 1 less than the desired result exponent whenever
+`zSig0' and `zSig1' concatenated form a complete, normalized significand.
+-------------------------------------------------------------------------------
+*}
+Procedure
+ packFloat64( zSign: Flag; zExp: Int16; zSig0: Bits32; zSig1 : Bits32; VAR c : float64);
+ var
+    z: Float64;
+ Begin
+
+    z.low := zSig1;
+    z.high := ( ( bits32 (zSign) ) shl 31 ) + ( ( bits32 (zExp) ) shl 20 ) + zSig0;
+    c := z;
+ End;
+
+{*
+-------------------------------------------------------------------------------
+Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+and extended significand formed by the concatenation of `zSig0', `zSig1',
+and `zSig2', and returns the proper double-precision floating-point value
+corresponding to the abstract input.  Ordinarily, the abstract value is
+simply rounded and packed into the double-precision format, with the inexact
+exception raised if the abstract input cannot be represented exactly.
+However, if the abstract value is too large, the overflow and inexact
+exceptions are raised and an infinity or maximal finite value is returned.
+If the abstract value is too small, the input value is rounded to a
+subnormal number, and the underflow and inexact exceptions are raised if the
+abstract input cannot be represented exactly as a subnormal double-precision
+floating-point number.
+    The input significand must be normalized or smaller.  If the input
+significand is not normalized, `zExp' must be 0; in that case, the result
+returned is a subnormal number, and it must not require rounding.  In the
+usual case that the input significand is normalized, `zExp' must be 1 less
+than the ``true'' floating-point exponent.  The handling of underflow and
+overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+-------------------------------------------------------------------------------
+*}
+Procedure
+ roundAndPackFloat64(
+     zSign: Flag; zExp: Int16; zSig0: Bits32; zSig1: Bits32; zSig2: Bits32; Var c: Float64 );
+ Var
+   roundingMode : Int8;
+   roundNearestEven, increment, isTiny : Flag;
+ Begin
+
+    roundingMode := float_rounding_mode;
+    roundNearestEven := flag( roundingMode = float_round_nearest_even );
+    increment := flag( sbits32 (zSig2) < 0 );
+    if ( roundNearestEven  = flag(FALSE) ) then
+      Begin
+        if ( roundingMode = float_round_to_zero ) then
+            increment := 0
+        else
+          Begin
+            if ( zSign )<> 0 then
+              Begin
+                increment := flag( roundingMode = float_round_down ) and zSig2;
+              End
+            else
+              Begin
+                increment := flag( roundingMode = float_round_up ) and zSig2;
+              End
+          End
+      End;
+    if ( $7FD <= bits16 (zExp) ) then
+      Begin
+        if (( $7FD < zExp )
+             or (( zExp = $7FD )
+                  and (eq64( $001FFFFF, $FFFFFFFF, zSig0, zSig1 )<>0)
+                  and (increment<>0)
+                )
+           ) then
+           Begin
+            float_raise( float_flag_overflow OR  float_flag_inexact );
+            if (( roundingMode = float_round_to_zero )
+                 or ( (zSign<>0) and ( roundingMode = float_round_up ) )
+                 or ( (zSign = 0) and ( roundingMode = float_round_down ) )
+               ) then
+              Begin
+                packFloat64( zSign, $7FE, $000FFFFF, $FFFFFFFF, c );
+                exit;
+              End;
+            packFloat64( zSign, $7FF, 0, 0, c );
+            exit;
+           End;
+        if ( zExp < 0 ) then
+           Begin
+            isTiny :=
+                   flag( float_detect_tininess = float_tininess_before_rounding )
+                or flag( zExp < -1 )
+                or  flag(increment = 0)
+                or flag(lt64( zSig0, zSig1, $001FFFFF, $FFFFFFFF)<>0);
+            shift64ExtraRightJamming(
+                zSig0, zSig1, zSig2, - zExp, zSig0, zSig1, zSig2 );
+            zExp := 0;
+            if ( isTiny<>0) and (zSig2<>0 ) then float_raise( float_flag_underflow );
+            if ( roundNearestEven )<>0 then
+              Begin
+                increment := flag( sbits32 (zSig2) < 0 );
+              End
+            else
+              Begin
+                if ( zSign )<>0 then
+                  Begin
+                    increment := flag( roundingMode = float_round_down ) and zSig2;
+                  End
+                else
+                  Begin
+                    increment := flag( roundingMode = float_round_up ) and zSig2;
+                  End
+              End;
+        End;
+    End;
+    if ( zSig2 )<>0 then
+       float_exception_flags := float_exception_flags OR  float_flag_inexact;
+    if ( increment )<>0 then
+      Begin
+        add64( zSig0, zSig1, 0, 1, zSig0, zSig1 );
+        {!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!}
+        zSig1 := zSig1 and not ( flag( zSig2 + zSig2 = 0 ) and roundNearestEven );
+      End
+    else
+      Begin
+        if ( ( zSig0 or zSig1 ) = 0 ) then zExp := 0;
+      End;
+    packFloat64( zSign, zExp, zSig0, zSig1, c );
+ End;
+
+{*
+-------------------------------------------------------------------------------
+Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+and significand formed by the concatenation of `zSig0' and `zSig1', and
+returns the proper double-precision floating-point value corresponding
+to the abstract input.  This routine is just like `roundAndPackFloat64'
+except that the input significand has fewer bits and does not have to be
+normalized.  In all cases, `zExp' must be 1 less than the ``true'' floating-
+point exponent.
+-------------------------------------------------------------------------------
+*}
+Procedure
+ normalizeRoundAndPackFloat64(
+     zSign:flag; zExp:int16; zSig0:bits32; zSig1:bits32; VAR c: float64 );
+ Var
+   shiftCount : int8;
+   zSig2 : bits32;
+ Begin
+
+    if ( zSig0 = 0 ) then
+     Begin
+        zSig0 := zSig1;
+        zSig1 := 0;
+        zExp := zExp -32;
+     End;
+    shiftCount := countLeadingZeros32( zSig0 ) - 11;
+    if ( 0 <= shiftCount ) then
+      Begin
+        zSig2 := 0;
+        shortShift64Left( zSig0, zSig1, shiftCount, zSig0, zSig1 );
+      End
+    else
+      Begin
+        shift64ExtraRightJamming
+          (zSig0, zSig1, 0, - shiftCount, zSig0, zSig1, zSig2 );
+      End;
+    zExp := zExp - shiftCount;
+    roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2, c );
+  End;
+
+{*
+-------------------------------------------------------------------------------
+Returns the result of converting the 32-bit two's complement integer `a' to
+the single-precision floating-point format.  The conversion is performed
+according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+-------------------------------------------------------------------------------
+*}
+Function int32_to_float32( a: int32): float32;
+ Var
+  zSign : Flag;
+ Begin
+
+    if ( a = 0 ) then
+      Begin
+       int32_to_float32 := 0;
+       exit;
+      End;
+    if ( a = sbits32 ($80000000) ) then
+      Begin
+       int32_to_float32 := packFloat32( 1, $9E, 0 );
+       exit;
+      end;
+    zSign := flag( a < 0 );
+    If zSign<>0 then
+      a := -a;
+    int32_to_float32:=
+      normalizeRoundAndPackFloat32( zSign, $9C, a );
+ End;
+
+
+{*
+-------------------------------------------------------------------------------
+Returns the result of converting the 32-bit two's complement integer `a' to
+the double-precision floating-point format.  The conversion is performed
+according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+-------------------------------------------------------------------------------
+*}
+Procedure int32_to_float64( a: int32; var c: float64 );
+  var
+    zSign : flag;
+    absA : bits32;
+    shiftCount : int8;
+    zSig0, zSig1 : bits32;
+  Begin
+
+    if ( a = 0 ) then
+      Begin
+       packFloat64( 0, 0, 0, 0, c );
+       exit;
+      end;
+    zSign := flag( a < 0 );
+    if ZSign<>0 then
+      AbsA := -a
+    else
+      AbsA := a;
+    shiftCount := countLeadingZeros32( absA ) - 11;
+    if ( 0 <= shiftCount ) then
+      Begin
+        zSig0 := absA shl shiftCount;
+        zSig1 := 0;
+      End
+    else
+      Begin
+        shift64Right( absA, 0, - shiftCount, zSig0, zSig1 );
+      End;
+    packFloat64( zSign, $412 - shiftCount, zSig0, zSig1,c );
+  End;
+
+{*
+-------------------------------------------------------------------------------
+Returns the result of converting the single-precision floating-point value
+`a' to the 32-bit two's complement integer format.  The conversion is
+performed according to the IEC/IEEE Standard for Binary Floating-Point
+Arithmetic---which means in particular that the conversion is rounded
+according to the current rounding mode.  If `a' is a NaN, the largest
+positive integer is returned.  Otherwise, if the conversion overflows, the
+largest integer with the same sign as `a' is returned.
+-------------------------------------------------------------------------------
+*}
+Function float32_to_int32( a : float32) : int32;
+  Var
+    aSign: flag;
+    aExp, shiftCount: int16;
+    aSig, aSigExtra: bits32;
+    z: int32;
+    roundingMode: int8;
+  Begin
+
+    aSig := extractFloat32Frac( a );
+    aExp := extractFloat32Exp( a );
+    aSign := extractFloat32Sign( a );
+    shiftCount := aExp - $96;
+    if ( 0 <= shiftCount ) then
+      Begin
+        if ( $9E <= aExp ) then
+          Begin
+            if ( a <> $CF000000 ) then
+              Begin
+                float_raise( float_flag_invalid );
+                if ( (aSign=0) or ( ( aExp = $FF ) and (aSig<>0) ) ) then
+                  Begin
+                    float32_to_int32 := $7FFFFFFF;
+                    exit;
+                  End;
+              End;
+            float32_to_int32 := sbits32 ($80000000);
+            exit;
+          End;
+        z := ( aSig or $00800000 ) shl shiftCount;
+        if ( aSign<>0 ) then z := - z;
+      End
+    else
+      Begin
+        if ( aExp < $7E ) then
+          Begin
+            aSigExtra := aExp OR aSig;
+            z := 0;
+          End
+        else
+         Begin
+            aSig := aSig OR $00800000;
+            aSigExtra := aSig shl ( shiftCount and 31 );
+            z := aSig shr ( - shiftCount );
+         End;
+        if ( aSigExtra<>0 ) then
+          float_exception_flags := float_exception_flags
+             or float_flag_inexact;
+        roundingMode := float_rounding_mode;
+        if ( roundingMode = float_round_nearest_even ) then
+          Begin
+            if ( sbits32 (aSigExtra) < 0 ) then
+              Begin
+                Inc(z);
+                if ( bits32 ( aSigExtra shl 1 ) = 0 ) then
+                  z := z and not 1;
+              End;
+              if ( aSign<>0 ) then
+                z := - z;
+          End
+        else
+          Begin
+            aSigExtra := flag( aSigExtra <> 0 );
+            if ( aSign<>0 ) then
+             Begin
+                z := z + (flag( roundingMode = float_round_down ) and aSigExtra);
+                z := - z;
+             End
+            else
+             Begin
+                z := z + (flag( roundingMode = float_round_up ) and aSigExtra);
+             End
+          End;
+      End;
+   float32_to_int32 := z;
+  End;
+
+{*
+-------------------------------------------------------------------------------
+Returns the result of converting the single-precision floating-point value
+`a' to the 32-bit two's complement integer format.  The conversion is
+performed according to the IEC/IEEE Standard for Binary Floating-Point
+Arithmetic, except that the conversion is always rounded toward zero.
+If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
+the conversion overflows, the largest integer with the same sign as `a' is
+returned.
+-------------------------------------------------------------------------------
+*}
+Function float32_to_int32_round_to_zero( a: Float32 ): int32;
+ Var
+    aSign : flag;
+    aExp, shiftCount : int16;
+    aSig : bits32;
+    z : int32;
+ Begin
+    aSig := extractFloat32Frac( a );
+    aExp := extractFloat32Exp( a );
+    aSign := extractFloat32Sign( a );
+    shiftCount := aExp - $9E;
+    if ( 0 <= shiftCount ) then
+      Begin
+        if ( a <> $CF000000 ) then
+          Begin
+            float_raise( float_flag_invalid );
+            if ( (aSign=0) or ( ( aExp = $FF ) and (aSig<>0) ) ) then
+              Begin
+                float32_to_int32_round_to_zero := $7FFFFFFF;
+                exit;
+              end;
+          End;
+        float32_to_int32_round_to_zero:= sbits32 ($80000000);
+        exit;
+      End
+    else
+      if ( aExp <= $7E ) then
+      Begin
+        if ( aExp or aSig )<>0 then
+           float_exception_flags :=
+             float_exception_flags or float_flag_inexact;
+        float32_to_int32_round_to_zero := 0;
+        exit;
+      End;
+    aSig := ( aSig or $00800000 ) shl 8;
+    z := aSig shr ( - shiftCount );
+    if ( bits32 ( aSig shl ( shiftCount and 31 ) )<> 0 ) then
+      Begin
+           float_exception_flags :=
+             float_exception_flags or float_flag_inexact;
+      End;
+    if ( aSign<>0 ) then z := - z;
+    float32_to_int32_round_to_zero := z;
+ End;
+
+{*
+-------------------------------------------------------------------------------
+Returns the result of converting the single-precision floating-point value
+`a' to the double-precision floating-point format.  The conversion is
+performed according to the IEC/IEEE Standard for Binary Floating-Point
+Arithmetic.
+-------------------------------------------------------------------------------
+*}
+Procedure float32_to_float64( a : float32; var out: Float64);
+  Var
+    aSign : flag;
+    aExp : int16;
+    aSig, zSig0, zSig1: bits32;
+    tmp : CommonNanT;
+  Begin
+    aSig := extractFloat32Frac( a );
+    aExp := extractFloat32Exp( a );
+    aSign := extractFloat32Sign( a );
+    if ( aExp = $FF ) then
+      Begin
+        if ( aSig<>0 ) then
+          Begin
+            float32ToCommonNaN(a, tmp);
+            commonNaNToFloat64(tmp , out);
+            exit;
+          End;
+          packFloat64( aSign, $7FF, 0, 0, out );
+          exit;
+      End;
+    if ( aExp = 0 ) then
+      Begin
+        if ( aSig = 0 ) then
+          Begin
+            packFloat64( aSign, 0, 0, 0, out );
+            exit;
+          end;
+        normalizeFloat32Subnormal( aSig, aExp, aSig );
+        Dec(aExp);
+      End;
+    shift64Right( aSig, 0, 3, zSig0, zSig1 );
+    packFloat64( aSign, aExp + $380, zSig0, zSig1, out );
+  End;
+
+{*
+-------------------------------------------------------------------------------
+Rounds the single-precision floating-point value `a' to an integer,
+and returns the result as a single-precision floating-point value.  The
+operation is performed according to the IEC/IEEE Standard for Binary
+Floating-Point Arithmetic.
+-------------------------------------------------------------------------------
+*}
+Function float32_round_to_int( a: float32): float32;
+  Var
+    aSign: flag;
+    aExp: int16;
+    lastBitMask, roundBitsMask: bits32;
+    roundingMode: int8;
+    z: float32;
+  Begin
+    aExp := extractFloat32Exp( a );
+    if ( $96 <= aExp ) then
+     Begin
+        if ( ( aExp = $FF ) and (extractFloat32Frac( a )<>0) ) then
+          Begin
+            float32_round_to_int:= propagateFloat32NaN( a, a );
+            exit;
+          End;
+        float32_round_to_int:=a;
+        exit;
+     End;
+    if ( aExp <= $7E ) then
+      Begin
+        if ( bits32 ( a shl 1 ) = 0 ) then
+          Begin
+             float32_round_to_int:=a;
+             exit;
+          end;
+        float_exception_flags
+          := float_exception_flags OR  float_flag_inexact;
+        aSign := extractFloat32Sign( a );
+
+        case ( float_rounding_mode ) of
+         float_round_nearest_even:
+            Begin
+              if ( ( aExp = $7E ) and (extractFloat32Frac( a )<>0) ) then
+                Begin
+                  float32_round_to_int := packFloat32( aSign, $7F, 0 );
+                  exit;
+                End;
+            End;
+         float_round_down:
+            Begin
+              if aSign <> 0 then
+                 float32_round_to_int := $BF800000
+              else
+                 float32_round_to_int := 0;
+              exit;
+            End;
+         float_round_up:
+            Begin
+              if aSign <> 0 then
+                 float32_round_to_int := $80000000
+              else
+                 float32_round_to_int := $3F800000;
+              exit;
+            End;
+        end;
+        float32_round_to_int := packFloat32( aSign, 0, 0 );
+      End;
+    lastBitMask := 1;
+    {_____________________________!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!}
+    lastBitMask := lastBitMask shl ($96 - aExp);
+    roundBitsMask := lastBitMask - 1;
+    z := a;
+    roundingMode := float_rounding_mode;
+    if ( roundingMode = float_round_nearest_even ) then
+      Begin
+        z := z + (lastBitMask shr 1);
+        if ( ( z and roundBitsMask ) = 0 ) then
+           z := z and not lastBitMask;
+      End
+    else if ( roundingMode <> float_round_to_zero ) then
+      Begin
+        if ( (extractFloat32Sign( z ) xor flag(roundingMode = float_round_up ))<>0 ) then
+          Begin
+            z := z + roundBitsMask;
+          End;
+      End;
+    z := z and not roundBitsMask;
+    if ( z <> a ) then
+      float_exception_flags := float_exception_flags or float_flag_inexact;
+    float32_round_to_int := z;
+  End;
+
+{*
+-------------------------------------------------------------------------------
+Returns the result of adding the absolute values of the single-precision
+floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
+before being returned.  `zSign' is ignored if the result is a NaN.
+The addition is performed according to the IEC/IEEE Standard for Binary
+Floating-Point Arithmetic.
+-------------------------------------------------------------------------------
+*}
+Function addFloat32Sigs( a:float32; b: float32; zSign:flag ): float32;
+  Var
+    aExp, bExp, zExp: int16;
+    aSig, bSig, zSig: bits32;
+    expDiff: int16;
+    label roundAndPack;
+  Begin
+    aSig:=extractFloat32Frac( a );
+    aExp:=extractFloat32Exp( a );
+    bSig:=extractFloat32Frac( b );
+    bExp := extractFloat32Exp( b );
+    expDiff := aExp - bExp;
+    aSig := aSig shl 6;
+    bSig := bSig shl 6;
+    if ( 0 < expDiff ) then
+    Begin
+        if ( aExp = $FF ) then
+          Begin
+            if ( aSig <> 0) then
+              Begin
+                addFloat32Sigs := propagateFloat32NaN( a, b );
+                exit;
+              End;
+            addFloat32Sigs := a;
+            exit;
+          End;
+        if ( bExp = 0 ) then
+          Begin
+             Dec(expDiff);
+          End
+        else
+          Begin
+            bSig := bSig or $20000000;
+          End;
+        shift32RightJamming( bSig, expDiff, bSig );
+        zExp := aExp;
+    End
+    else
+    If ( expDiff < 0 ) then
+      Begin
+        if ( bExp = $FF ) then
+        Begin
+            if ( bSig<>0 ) then
+              Begin
+                addFloat32Sigs := propagateFloat32NaN( a, b );
+                exit;
+              end;
+
+            addFloat32Sigs := packFloat32( zSign, $FF, 0 );
+            exit;
+        End;
+        if ( aExp = 0 ) then
+          Begin
+            Inc(expDiff);
+          End
+        else
+          Begin
+            aSig := aSig OR $20000000;
+          End;
+        shift32RightJamming( aSig, - expDiff, aSig );
+        zExp := bExp;
+    End
+    else
+    Begin
+        if ( aExp = $FF ) then
+        Begin
+            if ( aSig OR  bSig )<> 0 then
+              Begin
+                addFloat32Sigs := propagateFloat32NaN( a, b );
+                exit;
+              end;
+            addFloat32Sigs := a;
+            exit;
+        End;
+        if ( aExp = 0 ) then
+          Begin
+             addFloat32Sigs := packFloat32( zSign, 0, ( aSig + bSig ) shr 6 );
+             exit;
+          end;
+        zSig := $40000000 + aSig + bSig;
+        zExp := aExp;
+        goto roundAndPack;
+    End;
+    aSig := aSig OR $20000000;
+    zSig := ( aSig + bSig ) shl 1;
+    Dec(zExp);
+    if ( sbits32 (zSig) < 0 ) then
+      Begin
+        zSig := aSig + bSig;
+        Inc(zExp);
+      End;
+ roundAndPack:
+    addFloat32Sigs := roundAndPackFloat32( zSign, zExp, zSig );
+ End;
+
+{*
+-------------------------------------------------------------------------------
+Returns the result of subtracting the absolute values of the single-
+precision floating-point values `a' and `b'.  If `zSign' is 1, the
+difference is negated before being returned.  `zSign' is ignored if the
+result is a NaN.  The subtraction is performed according to the IEC/IEEE
+Standard for Binary Floating-Point Arithmetic.
+-------------------------------------------------------------------------------
+*}
+Function subFloat32Sigs( a:float32; b:float32; zSign:flag ): float32;
+  Var
+    aExp, bExp, zExp: int16;
+    aSig, bSig, zSig: bits32;
+    expDiff : int16;
+    label aExpBigger;
+    label bExpBigger;
+    label aBigger;
+    label bBigger;
+    label normalizeRoundAndPack;
+  Begin
+    aSig := extractFloat32Frac( a );
+    aExp := extractFloat32Exp( a );
+    bSig := extractFloat32Frac( b );
+    bExp := extractFloat32Exp( b );
+    expDiff := aExp - bExp;
+    aSig := aSig shl 7;
+    bSig := bSig shl 7;
+    if ( 0 < expDiff ) then goto aExpBigger;
+    if ( expDiff < 0 ) then goto bExpBigger;
+    if ( aExp = $FF ) then
+    Begin
+        if ( aSig OR  bSig )<> 0 then
+          Begin
+           subFloat32Sigs := propagateFloat32NaN( a, b );
+           exit;
+          End;
+        float_raise( float_flag_invalid );
+        subFloat32Sigs := float32_default_nan;
+        exit;
+    End;
+    if ( aExp = 0 ) then
+    Begin
+        aExp := 1;
+        bExp := 1;
+    End;
+    if ( bSig < aSig ) Then goto aBigger;
+    if ( aSig < bSig ) Then goto bBigger;
+    subFloat32Sigs := packFloat32( flag(float_rounding_mode = float_round_down), 0, 0 );
+    exit;
+ bExpBigger:
+    if ( bExp = $FF ) then
+    Begin
+        if ( bSig<>0 ) then
+        Begin
+          subFloat32Sigs := propagateFloat32NaN( a, b );
+          exit;
+        End;
+        subFloat32Sigs := packFloat32( zSign XOR 1, $FF, 0 );
+        exit;
+    End;
+    if ( aExp = 0 ) then
+      Begin
+        Inc(expDiff);
+      End
+    else
+      Begin
+        aSig := aSig OR $40000000;
+      End;
+    shift32RightJamming( aSig, - expDiff, aSig );
+    bSig := bSig OR $40000000;
+ bBigger:
+    zSig := bSig - aSig;
+    zExp := bExp;
+    zSign := zSign xor 1;
+    goto normalizeRoundAndPack;
+ aExpBigger:
+    if ( aExp = $FF ) then
+      Begin
+        if ( aSig <> 0) then
+          Begin
+            subFloat32Sigs := propagateFloat32NaN( a, b );
+            exit;
+          End;
+        subFloat32Sigs := a;
+        exit;
+      End;
+    if ( bExp = 0 ) then
+      Begin
+        Dec(expDiff);
+      End
+    else
+      Begin
+        bSig := bSig OR $40000000;
+      End;
+    shift32RightJamming( bSig, expDiff, bSig );
+    aSig := aSig OR $40000000;
+ aBigger:
+    zSig := aSig - bSig;
+    zExp := aExp;
+ normalizeRoundAndPack:
+    Dec(zExp);
+    subFloat32Sigs := normalizeRoundAndPackFloat32( zSign, zExp, zSig );
+  End;
+
+{*
+-------------------------------------------------------------------------------
+Returns the result of adding the single-precision floating-point values `a'
+and `b'.  The operation is performed according to the IEC/IEEE Standard for
+Binary Floating-Point Arithmetic.
+-------------------------------------------------------------------------------
+*}
+Function float32_add( a: float32; b:float32 ): float32;
+  Var
+    aSign, bSign: Flag;
+  Begin
+    aSign := extractFloat32Sign( a );
+    bSign := extractFloat32Sign( b );
+    if ( aSign = bSign ) then
+      Begin
+        float32_add := addFloat32Sigs( a, b, aSign );
+      End
+    else
+      Begin
+        float32_add := subFloat32Sigs( a, b, aSign );
+      End;
+  End;
+
+{*
+-------------------------------------------------------------------------------
+Returns the result of subtracting the single-precision floating-point values
+`a' and `b'.  The operation is performed according to the IEC/IEEE Standard
+for Binary Floating-Point Arithmetic.
+-------------------------------------------------------------------------------
+*}
+Function float32_sub( a: float32 ; b:float32 ): float32;
+  Var
+    aSign, bSign: flag;
+  Begin
+    aSign := extractFloat32Sign( a );
+    bSign := extractFloat32Sign( b );
+    if ( aSign = bSign ) then
+      Begin
+        float32_sub := subFloat32Sigs( a, b, aSign );
+      End
+    else
+      Begin
+        float32_sub := addFloat32Sigs( a, b, aSign );
+      End;
+  End;
+
+{*
+-------------------------------------------------------------------------------
+Returns the result of multiplying the single-precision floating-point values
+`a' and `b'.  The operation is performed according to the IEC/IEEE Standard
+for Binary Floating-Point Arithmetic.
+-------------------------------------------------------------------------------
+*}
+Function float32_mul(a: float32; b: float32 ) : float32;
+
+  Var
+    aSign, bSign, zSign: flag;
+    aExp, bExp, zExp : int16;
+    aSig, bSig, zSig0, zSig1: bits32;
+  Begin
+    aSig := extractFloat32Frac( a );
+    aExp := extractFloat32Exp( a );
+    aSign := extractFloat32Sign( a );
+    bSig := extractFloat32Frac( b );
+    bExp := extractFloat32Exp( b );
+    bSign := extractFloat32Sign( b );
+    zSign := aSign xor bSign;
+    if ( aExp = $FF ) then
+    Begin
+        if ( (aSig<>0) OR ( ( bExp = $FF ) AND  (bSig<>0) ) ) then
+        Begin
+            float32_mul := propagateFloat32NaN( a, b );
+        End;
+        if ( ( bExp OR  bSig ) = 0 ) then
+        Begin
+            float_raise( float_flag_invalid );
+            float32_mul := float32_default_nan;
+            exit;
+        End;
+        float32_mul := packFloat32( zSign, $FF, 0 );
+        exit;
+    End;
+    if ( bExp = $FF ) then
+    Begin
+        if ( bSig <> 0 ) then
+        Begin
+           float32_mul := propagateFloat32NaN( a, b );
+           exit;
+        End;
+        if ( ( aExp OR  aSig ) = 0 ) then
+        Begin
+            float_raise( float_flag_invalid );
+            float32_mul := float32_default_nan;
+            exit;
+        End;
+        float32_mul := packFloat32( zSign, $FF, 0 );
+        exit;
+    End;
+    if ( aExp = 0 ) then
+    Begin
+        if ( aSig = 0 ) then
+        Begin
+           float32_mul := packFloat32( zSign, 0, 0 );
+           exit;
+        End;
+        normalizeFloat32Subnormal( aSig, aExp, aSig );
+    End;
+    if ( bExp = 0 ) then
+    Begin
+        if ( bSig = 0 ) then
+         Begin
+           float32_mul := packFloat32( zSign, 0, 0 );
+           exit;
+         End;
+        normalizeFloat32Subnormal( bSig, bExp, bSig );
+    End;
+    zExp := aExp + bExp - $7F;
+    aSig := ( aSig OR  $00800000 ) shl 7;
+    bSig := ( bSig OR  $00800000 ) shl 8;
+    mul32To64( aSig, bSig, zSig0, zSig1 );
+    zSig0 := zSig0 OR bits32( zSig1 <> 0 );
+    if ( 0 <= sbits32 ( zSig0 shl 1 ) ) then
+    Begin
+        zSig0 := zSig0 shl 1;
+        Dec(zExp);
+    End;
+    float32_mul := roundAndPackFloat32( zSign, zExp, zSig0 );
+ End;
+
+{*
+-------------------------------------------------------------------------------
+Returns the result of dividing the single-precision floating-point value `a'
+by the corresponding value `b'.  The operation is performed according to the
+IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+-------------------------------------------------------------------------------
+*}
+Function float32_div(a: float32;b: float32 ): float32;
+  Var
+    aSign, bSign, zSign: flag;
+    aExp, bExp, zExp: int16;
+    aSig, bSig, zSig, rem0, rem1, term0, term1: bits32;
+  Begin
+    aSig := extractFloat32Frac( a );
+    aExp := extractFloat32Exp( a );
+    aSign := extractFloat32Sign( a );
+    bSig := extractFloat32Frac( b );
+    bExp := extractFloat32Exp( b );
+    bSign := extractFloat32Sign( b );
+    zSign := aSign xor bSign;
+    if ( aExp = $FF ) then
+      Begin
+        if ( aSig <> 0 ) then
+        Begin
+           float32_div := propagateFloat32NaN( a, b );
+           exit;
+        End;
+        if ( bExp = $FF ) then
+        Begin
+            if ( bSig <> 0) then
+            Begin
+              float32_div := propagateFloat32NaN( a, b );
+            End;
+            float_raise( float_flag_invalid );
+            float32_div := float32_default_nan;
+            exit;
+        End;
+        float32_div := packFloat32( zSign, $FF, 0 );
+        exit;
+      End;
+    if ( bExp = $FF ) then
+    Begin
+        if ( bSig <> 0) then
+        Begin
+          float32_div := propagateFloat32NaN( a, b );
+          exit;
+        End;
+        float32_div := packFloat32( zSign, 0, 0 );
+        exit;
+    End;
+    if ( bExp = 0 ) Then
+    Begin
+        if ( bSig = 0 ) Then
+        Begin
+            if ( ( aExp OR  aSig ) = 0 ) then
+            Begin
+                float_raise( float_flag_invalid );
+                float32_div := float32_default_nan;
+                exit;
+            End;
+            float_raise( float_flag_divbyzero );
+            float32_div := packFloat32( zSign, $FF, 0 );
+            exit;
+        End;
+        normalizeFloat32Subnormal( bSig, bExp, bSig );
+    End;
+    if ( aExp = 0 ) Then
+    Begin
+        if ( aSig = 0 ) Then
+        Begin
+          float32_div := packFloat32( zSign, 0, 0 );
+          exit;
+        End;
+        normalizeFloat32Subnormal( aSig, aExp, aSig );
+    End;
+    zExp := aExp - bExp + $7D;
+    aSig := ( aSig OR  $00800000 ) shl 7;
+    bSig := ( bSig OR  $00800000 ) shl 8;
+    if ( bSig <= ( aSig + aSig ) ) then
+    Begin
+        aSig := aSig shr 1;
+        Inc(zExp);
+    End;
+    zSig := estimateDiv64To32( aSig, 0, bSig );
+    if ( ( zSig and $3F ) <= 2 ) then
+    Begin
+        mul32To64( bSig, zSig, term0, term1 );
+        sub64( aSig, 0, term0, term1, rem0, rem1 );
+        while ( sbits32 (rem0) < 0 ) do
+        Begin
+            Dec(zSig);
+            add64( rem0, rem1, 0, bSig, rem0, rem1 );
+        End;
+        zSig := zSig or bits32( rem1 <> 0 );
+    End;
+    float32_div := roundAndPackFloat32( zSign, zExp, zSig );
+
+  End;
+
+{*
+-------------------------------------------------------------------------------
+Returns the remainder of the single-precision floating-point value `a'
+with respect to the corresponding value `b'.  The operation is performed
+according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+-------------------------------------------------------------------------------
+*}
+Function float32_rem(a: float32; b: float32 ):float32;
+  Var
+    aSign, bSign, zSign: flag;
+    aExp, bExp, expDiff: int16;
+    aSig, bSig, q, allZero, alternateASig: bits32;
+    sigMean: sbits32;
+  Begin
+    aSig := extractFloat32Frac( a );
+    aExp := extractFloat32Exp( a );
+    aSign := extractFloat32Sign( a );
+    bSig := extractFloat32Frac( b );
+    bExp := extractFloat32Exp( b );
+    bSign := extractFloat32Sign( b );
+    if ( aExp = $FF ) then
+    Begin
+        if ( (aSig<>0) OR ( ( bExp = $FF ) AND  (bSig <>0)) ) then
+        Begin
+            float32_rem := propagateFloat32NaN( a, b );
+            exit;
+        End;
+        float_raise( float_flag_invalid );
+        float32_rem := float32_default_nan;
+        exit;
+    End;
+    if ( bExp = $FF ) then
+    Begin
+        if ( bSig <> 0 ) then
+        Begin
+          float32_rem := propagateFloat32NaN( a, b );
+          exit;
+        End;
+        float32_rem := a;
+        exit;
+    End;
+    if ( bExp = 0 ) then
+    Begin
+        if ( bSig = 0 ) then
+        Begin
+            float_raise( float_flag_invalid );
+            float32_rem := float32_default_nan;
+            exit;
+        End;
+        normalizeFloat32Subnormal( bSig, bExp, bSig );
+    End;
+    if ( aExp = 0 ) then
+    Begin
+        if ( aSig = 0 ) then
+        Begin
+           float32_rem := a;
+           exit;
+        End;
+        normalizeFloat32Subnormal( aSig, aExp, aSig );
+    End;
+    expDiff := aExp - bExp;
+    aSig := ( aSig OR  $00800000 ) shl 8;
+    bSig := ( bSig OR  $00800000 ) shl 8;
+    if ( expDiff < 0 ) then
+    Begin
+        if ( expDiff < -1 ) then
+        Begin
+           float32_rem := a;
+           exit;
+        End;
+        aSig := aSig shr 1;
+    End;
+    q := bits32( bSig <= aSig );
+    if ( q <> 0) then
+       aSig := aSig - bSig;
+    expDiff := expDiff - 32;
+    while ( 0 < expDiff ) do
+    Begin
+        q := estimateDiv64To32( aSig, 0, bSig );
+        if (2 < q) then
+         q := q - 2
+        else
+         q := 0;
+        aSig := - ( ( bSig shr 2 ) * q );
+        expDiff := expDiff - 30;
+    End;
+    expDiff := expDiff + 32;
+    if ( 0 < expDiff ) then
+    Begin
+        q := estimateDiv64To32( aSig, 0, bSig );
+        if (2 < q) then
+         q := q - 2
+        else
+         q := 0;
+        q := q shr (32 - expDiff);
+        bSig := bSig shr 2;
+        aSig := ( ( aSig shr 1 ) shl ( expDiff - 1 ) ) - bSig * q;
+    End
+    else
+    Begin
+        aSig := aSig shr 2;
+        bSig := bSig shr 2;
+    End;
+    Repeat
+        alternateASig := aSig;
+        Inc(q);
+        aSig := aSig - bSig;
+    Until not ( 0 <= sbits32 (aSig) );
+    sigMean := aSig + alternateASig;
+    if ( ( sigMean < 0 ) OR ( ( sigMean = 0 ) AND  (( q and 1 )<>0) ) ) then
+    Begin
+        aSig := alternateASig;
+    End;
+    zSign := flag( sbits32 (aSig) < 0 );
+    if ( zSign<>0 ) then
+      aSig := - aSig;
+    float32_rem := normalizeRoundAndPackFloat32( aSign xor zSign, bExp, aSig );
+  End;
+
+{*
+-------------------------------------------------------------------------------
+Returns the square root of the single-precision floating-point value `a'.
+The operation is performed according to the IEC/IEEE Standard for Binary
+Floating-Point Arithmetic.
+-------------------------------------------------------------------------------
+*}
+Function float32_sqrt(a: float32 ): float32;
+Var
+    aSign : flag;
+    aExp, zExp : int16;
+    aSig, zSig, rem0, rem1, term0, term1: bits32;
+    label roundAndPack;
+Begin
+    aSig := extractFloat32Frac( a );
+    aExp := extractFloat32Exp( a );
+    aSign := extractFloat32Sign( a );
+    if ( aExp = $FF ) then
+    Begin
+        if ( aSig <> 0) then
+        Begin
+           float32_sqrt := propagateFloat32NaN( a, 0 );
+           exit;
+        End;
+        if ( aSign = 0) then
+        Begin
+          float32_sqrt := a;
+          exit;
+        End;
+        float_raise( float_flag_invalid );
+        float32_sqrt := float32_default_nan;
+        exit;
+    End;
+    if ( aSign <> 0) then
+    Begin
+        if ( ( aExp OR  aSig ) = 0 ) then
+        Begin
+           float32_sqrt := a;
+           exit;
+        End;
+        float_raise( float_flag_invalid );
+        float32_sqrt := float32_default_nan;
+        exit;
+    End;
+    if ( aExp = 0 ) then
+    Begin
+        if ( aSig = 0 ) then
+        Begin
+           float32_sqrt := 0;
+           exit;
+        End;
+        normalizeFloat32Subnormal( aSig, aExp, aSig );
+    End;
+    zExp := ( ( aExp - $7F ) shr 1 ) + $7E;
+    aSig := ( aSig OR  $00800000 ) shl 8;
+    zSig := estimateSqrt32( aExp, aSig ) + 2;
+    if ( ( zSig and $7F ) <= 5 ) then
+    Begin
+        if ( zSig < 2 ) then
+        Begin
+            zSig := $7FFFFFFF;
+            goto roundAndPack;
+        End
+        else
+        Begin
+            aSig  := aSig shr (aExp and 1);
+            mul32To64( zSig, zSig, term0, term1 );
+            sub64( aSig, 0, term0, term1, rem0, rem1 );
+            while ( sbits32 (rem0) < 0 ) do
+            Begin
+                Dec(zSig);
+                shortShift64Left( 0, zSig, 1, term0, term1 );
+                term1 := term1 or 1;
+                add64( rem0, rem1, term0, term1, rem0, rem1 );
+            End;
+            zSig := zSig OR bits32( ( rem0 OR  rem1 ) <> 0 );
+        End;
+    End;
+    shift32RightJamming( zSig, 1, zSig );
+ roundAndPack:
+    float32_sqrt := roundAndPackFloat32( 0, zExp, zSig );
+End;
+
+{*
+-------------------------------------------------------------------------------
+Returns 1 if the single-precision floating-point value `a' is equal to
+the corresponding value `b', and 0 otherwise.  The comparison is performed
+according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+-------------------------------------------------------------------------------
+*}
+Function float32_eq( a:float32; b:float32): flag;
+Begin
+    if ((( extractFloat32Exp( a ) = $FF ) AND  (extractFloat32Frac( a )<>0))
+         OR ( ( extractFloat32Exp( b ) = $FF ) AND  (extractFloat32Frac( b )<>0) )
+       ) then
+    Begin
+        if ( (float32_is_signaling_nan( a )<>0) OR (float32_is_signaling_nan( b )<>0) ) then
+        Begin
+            float_raise( float_flag_invalid );
+        End;
+        float32_eq := 0;
+        exit;
+    End;
+    float32_eq := flag( a = b ) OR flag( bits32 ( ( a OR  b ) shl 1 ) = 0 );
+End;
+
+{*
+-------------------------------------------------------------------------------
+Returns 1 if the single-precision floating-point value `a' is less than
+or equal to the corresponding value `b', and 0 otherwise.  The comparison
+is performed according to the IEC/IEEE Standard for Binary Floating-Point
+Arithmetic.
+-------------------------------------------------------------------------------
+*}
+Function float32_le( a: float32; b : float32 ):flag;
+var
+    aSign, bSign: flag;
+Begin
+
+    if (    ( ( extractFloat32Exp( a ) = $FF ) AND  (extractFloat32Frac( a )<>0) )
+         OR ( ( extractFloat32Exp( b ) = $FF ) AND  (extractFloat32Frac( b )<>0) )
+       ) then
+    Begin
+        float_raise( float_flag_invalid );
+        float32_le := 0;
+        exit;
+    End;
+    aSign := extractFloat32Sign( a );
+    bSign := extractFloat32Sign( b );
+    if ( aSign <> bSign ) then
+    Begin
+       float32_le :=  aSign OR flag( bits32 ( ( a OR  b ) shl 1 ) = 0 );
+       exit;
+    End;
+    float32_le := flag(flag( a = b ) OR flag( aSign xor flag( a < b ) ));
+
+End;
+
+{*
+-------------------------------------------------------------------------------
+Returns 1 if the single-precision floating-point value `a' is less than
+the corresponding value `b', and 0 otherwise.  The comparison is performed
+according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+-------------------------------------------------------------------------------
+*}
+Function float32_lt( a:float32 ; b : float32): flag;
+var
+    aSign, bSign: flag;
+Begin
+
+    if (    ( ( extractFloat32Exp( a ) = $FF ) AND  (extractFloat32Frac( a ) <>0))
+         OR ( ( extractFloat32Exp( b ) = $FF ) AND  (extractFloat32Frac( b ) <>0) )
+       ) then
+    Begin
+        float_raise( float_flag_invalid );
+        float32_lt :=0;
+        exit;
+    End;
+    aSign := extractFloat32Sign( a );
+    bSign := extractFloat32Sign( b );
+    if ( aSign <> bSign ) then
+    Begin
+       float32_lt := aSign AND  flag( bits32 ( ( a OR  b ) shl 1 ) <> 0 );
+       exit;
+    End;
+    float32_lt := flag(flag( a <> b ) AND  flag( aSign xor flag( a < b ) ));
+
+End;
+
+{*
+-------------------------------------------------------------------------------
+Returns 1 if the single-precision floating-point value `a' is equal to
+the corresponding value `b', and 0 otherwise.  The invalid exception is
+raised if either operand is a NaN.  Otherwise, the comparison is performed
+according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+-------------------------------------------------------------------------------
+*}
+Function float32_eq_signaling( a: float32; b: float32) : flag;
+Begin
+
+    if (    ( ( extractFloat32Exp( a ) = $FF ) AND  (extractFloat32Frac( a ) <> 0))
+         OR ( ( extractFloat32Exp( b ) = $FF ) AND  (extractFloat32Frac( b ) <> 0))
+       ) then
+    Begin
+        float_raise( float_flag_invalid );
+        float32_eq_signaling := 0;
+        exit;
+    End;
+    float32_eq_signaling := (flag( a = b ) OR flag( bits32 ( ( a OR  b ) shl 1 ) = 0 ));
+End;
+
+{*
+-------------------------------------------------------------------------------
+Returns 1 if the single-precision floating-point value `a' is less than or
+equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
+cause an exception.  Otherwise, the comparison is performed according to the
+IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+-------------------------------------------------------------------------------
+*}
+Function float32_le_quiet( a: float32 ; b : float32 ): flag;
+Var
+    aSign, bSign: flag;
+    aExp, bExp: int16;
+Begin
+    if (    ( ( extractFloat32Exp( a ) = $FF ) AND  (extractFloat32Frac( a )<>0) )
+         OR ( ( extractFloat32Exp( b ) = $FF ) AND  (extractFloat32Frac( b )<>0) )
+       ) then
+    Begin
+        if ( (float32_is_signaling_nan( a )<>0) OR (float32_is_signaling_nan( b )<>0) ) then
+        Begin
+            float_raise( float_flag_invalid );
+        End;
+        float32_le_quiet := 0;
+        exit;
+    End;
+    aSign := extractFloat32Sign( a );
+    bSign := extractFloat32Sign( b );
+    if ( aSign <> bSign ) then
+    Begin
+       float32_le_quiet := aSign OR flag( bits32 ( ( a OR  b ) shl 1 ) = 0 );
+       exit;
+    End;
+    float32_le_quiet := flag(flag( a = b ) OR flag( aSign xor flag( a < b ) ));
+End;
+
+{*
+-------------------------------------------------------------------------------
+Returns 1 if the single-precision floating-point value `a' is less than
+the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
+exception.  Otherwise, the comparison is performed according to the IEC/IEEE
+Standard for Binary Floating-Point Arithmetic.
+-------------------------------------------------------------------------------
+*}
+Function float32_lt_quiet( a: float32 ; b: float32 ): flag;
+Var
+   aSign, bSign: flag;
+Begin
+    if (    ( ( extractFloat32Exp( a ) = $FF ) AND  (extractFloat32Frac( a )<>0) )
+         OR ( ( extractFloat32Exp( b ) = $FF ) AND  (extractFloat32Frac( b )<>0) )
+       ) then
+    Begin
+        if ( (float32_is_signaling_nan( a )<>0) OR (float32_is_signaling_nan( b )<>0) ) then
+        Begin
+            float_raise( float_flag_invalid );
+        End;
+        float32_lt_quiet := 0;
+        exit;
+    End;
+    aSign := extractFloat32Sign( a );
+    bSign := extractFloat32Sign( b );
+    if ( aSign <> bSign ) then
+    Begin
+        float32_lt_quiet := aSign AND  flag( bits32 ( ( a OR  b ) shl 1 ) <> 0 );
+        exit;
+    End;
+    float32_lt_quiet := flag(flag( a <> b ) AND  ( aSign xor flag( a < b ) ));
+End;
+
+{*
+-------------------------------------------------------------------------------
+Returns the result of converting the double-precision floating-point value
+`a' to the 32-bit two's complement integer format.  The conversion is
+performed according to the IEC/IEEE Standard for Binary Floating-Point
+Arithmetic---which means in particular that the conversion is rounded
+according to the current rounding mode.  If `a' is a NaN, the largest
+positive integer is returned.  Otherwise, if the conversion overflows, the
+largest integer with the same sign as `a' is returned.
+-------------------------------------------------------------------------------
+*}
+Function float64_to_int32(a: float64): int32;
+var
+    aSign: flag;
+    aExp, shiftCount: int16;
+    aSig0, aSig1, absZ, aSigExtra: bits32;
+    z: int32;
+    roundingMode: int8;
+    label invalid;
+Begin
+    aSig1 := extractFloat64Frac1( a );
+    aSig0 := extractFloat64Frac0( a );
+    aExp := extractFloat64Exp( a );
+    aSign := extractFloat64Sign( a );
+    shiftCount := aExp - $413;
+    if ( 0 <= shiftCount ) then
+    Begin
+        if ( $41E < aExp ) then
+        Begin
+            if ( ( aExp = $7FF ) AND  (( aSig0 OR  aSig1 )<>0) ) then
+               aSign := 0;
+            goto invalid;
+        End;
+        shortShift64Left(
+            aSig0 OR  $00100000, aSig1, shiftCount, absZ, aSigExtra );
+        if ( $80000000 < absZ ) then
+          goto invalid;
+    End
+    else
+    Begin
+        aSig1 := flag( aSig1 <> 0 );
+        if ( aExp < $3FE ) then
+        Begin
+            aSigExtra := aExp OR  aSig0 OR  aSig1;
+            absZ := 0;
+        End
+        else
+        Begin
+            aSig0 := aSig0 OR $00100000;
+            aSigExtra := ( aSig0 shl ( shiftCount and 31 ) ) OR  aSig1;
+            absZ := aSig0 shr ( - shiftCount );
+        End;
+    End;
+    roundingMode := float_rounding_mode;
+    if ( roundingMode = float_round_nearest_even ) then
+    Begin
+        if ( sbits32(aSigExtra) < 0 ) then
+        Begin
+            Inc(absZ);
+            if ( bits32 ( aSigExtra shl 1 ) = 0 ) then
+               absZ :=  absZ and not 1;
+        End;
+        if aSign <> 0 then
+          z := - absZ
+        else
+          z := absZ;
+    End
+    else
+    Begin
+        aSigExtra := bits32( aSigExtra <> 0 );
+        if ( aSign <> 0) then
+        Begin
+            z := - (   absZ
+                    + ( int32( roundingMode = float_round_down ) and aSigExtra ) );
+        End
+        else
+        Begin
+            z := absZ + ( int32( roundingMode = float_round_up ) and aSigExtra );
+        End
+    End;
+    if ( (( aSign xor flag( z < 0 ) )<>0) AND  (z<>0) ) then
+    Begin
+ invalid:
+        float_raise( float_flag_invalid );
+        if (aSign <> 0 ) then
+          float64_to_int32 := sbits32 ($80000000)
+        else
+          float64_to_int32 :=  $7FFFFFFF;
+        exit;
+    End;
+    if ( aSigExtra <> 0) then
+       float_exception_flags := float_exception_flags or float_flag_inexact;
+    float64_to_int32 := z;
+End;
+
+
+{*
+-------------------------------------------------------------------------------
+Returns the result of converting the double-precision floating-point value
+`a' to the 32-bit two's complement integer format.  The conversion is
+performed according to the IEC/IEEE Standard for Binary Floating-Point
+Arithmetic, except that the conversion is always rounded toward zero.
+If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
+the conversion overflows, the largest integer with the same sign as `a' is
+returned.
+-------------------------------------------------------------------------------
+*}
+Function float64_to_int32_round_to_zero(a: float64 ): int32;
+Var
+    aSign: flag;
+    aExp, shiftCount: int16;
+    aSig0, aSig1, absZ, aSigExtra: bits32;
+    z: int32;
+    label invalid;
+ Begin
+    aSig1 := extractFloat64Frac1( a );
+    aSig0 := extractFloat64Frac0( a );
+    aExp := extractFloat64Exp( a );
+    aSign := extractFloat64Sign( a );
+    shiftCount := aExp - $413;
+    if ( 0 <= shiftCount ) then
+    Begin
+        if ( $41E < aExp ) then
+        Begin
+            if ( ( aExp = $7FF ) AND  (( aSig0 OR  aSig1 )<>0) ) then
+               aSign := 0;
+            goto invalid;
+        End;
+        shortShift64Left(
+            aSig0 OR  $00100000, aSig1, shiftCount, absZ, aSigExtra );
+    End
+    else
+    Begin
+        if ( aExp < $3FF ) then
+        Begin
+            if ( aExp OR  aSig0 OR  aSig1 )<>0 then
+            Begin
+                float_exception_flags :=
+                  float_exception_flags or float_flag_inexact;
+            End;
+            float64_to_int32_round_to_zero := 0;
+            exit;
+        End;
+        aSig0 := aSig0 or $00100000;
+        aSigExtra := ( aSig0 shl ( shiftCount and 31 ) ) OR  aSig1;
+        absZ := aSig0 shr ( - shiftCount );
+    End;
+    if aSign <> 0 then
+      z := - absZ
+    else
+      z := absZ;
+    if ( (( aSign xor flag( z < 0 )) <> 0) AND  (z<>0) ) then
+    Begin
+ invalid:
+        float_raise( float_flag_invalid );
+        if (aSign <> 0) then
+          float64_to_int32_round_to_zero := sbits32 ($80000000)
+        else
+          float64_to_int32_round_to_zero :=  $7FFFFFFF;
+        exit;
+    End;
+    if ( aSigExtra <> 0) then
+       float_exception_flags := float_exception_flags or float_flag_inexact;
+    float64_to_int32_round_to_zero := z;
+ End;
+
+{*
+-------------------------------------------------------------------------------
+Returns the result of converting the double-precision floating-point value
+`a' to the single-precision floating-point format.  The conversion is
+performed according to the IEC/IEEE Standard for Binary Floating-Point
+Arithmetic.
+-------------------------------------------------------------------------------
+*}
+Function float64_to_float32(a: float64 ): float32;
+Var
+    aSign: flag;
+    aExp: int16;
+    aSig0, aSig1, zSig: bits32;
+    allZero: bits32;
+    tmp : CommonNanT;
+Begin
+    aSig1 := extractFloat64Frac1( a );
+    aSig0 := extractFloat64Frac0( a );
+    aExp := extractFloat64Exp( a );
+    aSign := extractFloat64Sign( a );
+    if ( aExp = $7FF ) then
+    Begin
+        if ( aSig0 OR  aSig1 ) <> 0 then
+        Begin
+            float64ToCommonNaN( a, tmp );
+            float64_to_float32 := commonNaNToFloat32( tmp );
+            exit;
+        End;
+        float64_to_float32 := packFloat32( aSign, $FF, 0 );
+        exit;
+    End;
+    shift64RightJamming( aSig0, aSig1, 22, allZero, zSig );
+    if ( aExp <> 0) then
+      zSig := zSig OR $40000000;
+    float64_to_float32 := roundAndPackFloat32( aSign, aExp - $381, zSig );
+End;
+
+{*
+-------------------------------------------------------------------------------
+Rounds the double-precision floating-point value `a' to an integer,
+and returns the result as a double-precision floating-point value.  The
+operation is performed according to the IEC/IEEE Standard for Binary
+Floating-Point Arithmetic.
+-------------------------------------------------------------------------------
+*}
+Procedure float64_round_to_int(a: float64; var out: float64 );
+Var
+    aSign: flag;
+    aExp: int16;
+    lastBitMask, roundBitsMask: bits32;
+    roundingMode: int8;
+    z: float64;
+Begin
+    aExp := extractFloat64Exp( a );
+    if ( $413 <= aExp ) then
+    Begin
+        if ( $433 <= aExp ) then
+        Begin
+            if (    ( aExp = $7FF )
+                 AND
+            (
+            ( extractFloat64Frac0( a ) OR  extractFloat64Frac1( a )
+            ) <>0)
+            )  then
+            Begin
+                propagateFloat64NaN( a, a, out );
+                exit;
+            End;
+            out := a;
+            exit;
+        End;
+        lastBitMask := 1;
+        lastBitMask := ( lastBitMask shl ( $432 - aExp ) ) shl 1;
+        roundBitsMask := lastBitMask - 1;
+        z := a;
+        roundingMode := float_rounding_mode;
+        if ( roundingMode = float_round_nearest_even ) then
+        Begin
+            if ( lastBitMask <> 0) then
+            Begin
+                add64( z.high, z.low, 0, lastBitMask shr 1, z.high, z.low );
+                if ( ( z.low and roundBitsMask ) = 0 ) then
+                   z.low := z.low and not lastBitMask;
+            End
+            else
+            Begin
+                if ( sbits32 (z.low) < 0 ) then
+                Begin
+                    Inc(z.high);
+                    if ( bits32 ( z.low shl 1 ) = 0 ) then
+                      z.high := z.high and not 1;
+                End;
+            End;
+        End
+        else if ( roundingMode <> float_round_to_zero ) then
+        Begin
+            if (   extractFloat64Sign( z )
+                 xor flag( roundingMode = float_round_up ) )<> 0 then
+            Begin
+                add64( z.high, z.low, 0, roundBitsMask, z.high, z.low );
+            End;
+        End;
+        z.low := z.low and not roundBitsMask;
+    End
+    else
+    Begin
+        if ( aExp <= $3FE ) then
+        Begin
+            if ( ( ( bits32 ( a.high shl 1 ) ) OR  a.low ) = 0 ) then
+            Begin
+                out := a;
+                exit;
+            End;
+            float_exception_flags := float_exception_flags or
+               float_flag_inexact;
+            aSign := extractFloat64Sign( a );
+            case ( float_rounding_mode ) of
+             float_round_nearest_even:
+               Begin
+                if (    ( aExp = $3FE )
+                     AND  ( (extractFloat64Frac0( a ) OR  extractFloat64Frac1( a ) )<>0)
+                   ) then
+                Begin
+                    packFloat64( aSign, $3FF, 0, 0, out );
+                    exit;
+                End;
+
+               End;
+               float_round_down:
+                Begin
+                  if aSign<>0 then
+                   packFloat64( 1, $3FF, 0, 0, out )
+                  else
+                   packFloat64( 0, 0, 0, 0, out );
+                  exit;
+                End;
+             float_round_up:
+                Begin
+                  if aSign <> 0 then
+                   packFloat64( 1, 0, 0, 0, out )
+                  else
+                   packFloat64( 0, $3FF, 0, 0, out );
+                  exit;
+                End;
+            end;
+            packFloat64( aSign, 0, 0, 0, out );
+            exit;
+        End;
+        lastBitMask := 1;
+        lastBitMask := lastBitMask shl ($413 - aExp);
+        roundBitsMask := lastBitMask - 1;
+        z.low := 0;
+        z.high := a.high;
+        roundingMode := float_rounding_mode;
+        if ( roundingMode = float_round_nearest_even ) then
+        Begin
+            z.high := z.high + lastBitMask shr 1;
+            if ( ( ( z.high and roundBitsMask ) OR  a.low ) = 0 ) then
+            Begin
+                z.high := z.high and not lastBitMask;
+            End;
+        End
+        else if ( roundingMode <> float_round_to_zero ) then
+        Begin
+            if (   extractFloat64Sign( z )
+                 xor flag( roundingMode = float_round_up ) )<> 0 then
+            Begin
+                z.high := z.high or bits32( a.low <> 0 );
+                z.high := z.high + roundBitsMask;
+            End;
+        End;
+        z.high := z.high and not roundBitsMask;
+    End;
+    if ( ( z.low <> a.low ) OR ( z.high <> a.high ) ) then
+    Begin
+        float_exception_flags :=
+          float_exception_flags or float_flag_inexact;
+    End;
+    out := z;
+End;
+
+
+{*
+-------------------------------------------------------------------------------
+Returns the result of adding the absolute values of the double-precision
+floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
+before being returned.  `zSign' is ignored if the result is a NaN.
+The addition is performed according to the IEC/IEEE Standard for Binary
+Floating-Point Arithmetic.
+-------------------------------------------------------------------------------
+*}
+Procedure addFloat64Sigs( a:float64 ; b: float64 ; zSign:flag; Var out: float64 );
+Var
+    aExp, bExp, zExp: int16;
+    aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2: bits32;
+    expDiff: int16;
+    label shiftRight1;
+    label roundAndPack;
+Begin
+    aSig1 := extractFloat64Frac1( a );
+    aSig0 := extractFloat64Frac0( a );
+    aExp := extractFloat64Exp( a );
+    bSig1 := extractFloat64Frac1( b );
+    bSig0 := extractFloat64Frac0( b );
+    bExp := extractFloat64Exp( b );
+    expDiff := aExp - bExp;
+    if ( 0 < expDiff ) then
+    Begin
+        if ( aExp = $7FF ) then
+        Begin
+            if ( aSig0 OR  aSig1 ) <> 0 then
+            Begin
+              propagateFloat64NaN( a, b, out );
+              exit;
+            end;
+            out := a;
+            exit;
+        End;
+        if ( bExp = 0 ) then
+        Begin
+            Dec(expDiff);
+        End
+        else
+        Begin
+            bSig0 := bSig0 or $00100000;
+        End;
+        shift64ExtraRightJamming(
+            bSig0, bSig1, 0, expDiff, bSig0, bSig1, zSig2 );
+        zExp := aExp;
+    End
+    else if ( expDiff < 0 ) then
+    Begin
+        if ( bExp = $7FF ) then
+        Begin
+            if ( bSig0 OR  bSig1 ) <> 0 then
+            Begin
+               propagateFloat64NaN( a, b, out );
+               exit;
+            End;
+            packFloat64( zSign, $7FF, 0, 0, out );
+        End;
+        if ( aExp = 0 ) then
+        Begin
+            Inc(expDiff);
+        End
+        else
+        Begin
+            aSig0 := aSig0 or $00100000;
+        End;
+        shift64ExtraRightJamming(
+            aSig0, aSig1, 0, - expDiff, aSig0, aSig1, zSig2 );
+        zExp := bExp;
+    End
+    else
+    Begin
+        if ( aExp = $7FF ) then
+        Begin
+            if ( aSig0 OR  aSig1 OR  bSig0 OR  bSig1 ) <> 0 then
+            Begin
+                propagateFloat64NaN( a, b, out );
+                exit;
+            End;
+            out := a;
+            exit;
+        End;
+        add64( aSig0, aSig1, bSig0, bSig1, zSig0, zSig1 );
+        if ( aExp = 0 ) then
+        Begin
+           packFloat64( zSign, 0, zSig0, zSig1, out );
+           exit;
+        End;
+        zSig2 := 0;
+        zSig0 := zSig0 or $00200000;
+        zExp := aExp;
+        goto shiftRight1;
+    End;
+    aSig0 := aSig0 or $00100000;
+    add64( aSig0, aSig1, bSig0, bSig1, zSig0, zSig1 );
+    Dec(zExp);
+    if ( zSig0 < $00200000 ) then
+       goto roundAndPack;
+    Inc(zExp);
+ shiftRight1:
+    shift64ExtraRightJamming( zSig0, zSig1, zSig2, 1, zSig0, zSig1, zSig2 );
+ roundAndPack:
+    roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2, out );
+
+End;
+
+{*
+-------------------------------------------------------------------------------
+Returns the result of subtracting the absolute values of the double-
+precision floating-point values `a' and `b'.  If `zSign' is 1, the
+difference is negated before being returned.  `zSign' is ignored if the
+result is a NaN.  The subtraction is performed according to the IEC/IEEE
+Standard for Binary Floating-Point Arithmetic.
+-------------------------------------------------------------------------------
+*}
+Procedure subFloat64Sigs( a:float64; b: float64 ; zSign:flag; Var out: float64 );
+Var
+    aExp, bExp, zExp: int16;
+    aSig0, aSig1, bSig0, bSig1, zSig0, zSig1: bits32;
+    expDiff: int16;
+    z: float64;
+    label aExpBigger;
+    label bExpBigger;
+    label aBigger;
+    label bBigger;
+    label normalizeRoundAndPack;
+Begin
+    aSig1 := extractFloat64Frac1( a );
+    aSig0 := extractFloat64Frac0( a );
+    aExp := extractFloat64Exp( a );
+    bSig1 := extractFloat64Frac1( b );
+    bSig0 := extractFloat64Frac0( b );
+    bExp := extractFloat64Exp( b );
+    expDiff := aExp - bExp;
+    shortShift64Left( aSig0, aSig1, 10, aSig0, aSig1 );
+    shortShift64Left( bSig0, bSig1, 10, bSig0, bSig1 );
+    if ( 0 < expDiff ) then goto aExpBigger;
+    if ( expDiff < 0 ) then goto bExpBigger;
+    if ( aExp = $7FF ) then
+    Begin
+        if ( aSig0 OR  aSig1 OR  bSig0 OR  bSig1 ) <> 0 then
+        Begin
+            propagateFloat64NaN( a, b, out );
+            exit;
+        End;
+        float_raise( float_flag_invalid );
+        z.low := float64_default_nan_low;
+        z.high := float64_default_nan_high;
+        out := z;
+        exit;
+    End;
+    if ( aExp = 0 ) then
+    Begin
+        aExp := 1;
+        bExp := 1;
+    End;
+    if ( bSig0 < aSig0 ) then goto aBigger;
+    if ( aSig0 < bSig0 ) then goto bBigger;
+    if ( bSig1 < aSig1 ) then goto aBigger;
+    if ( aSig1 < bSig1 ) then goto bBigger;
+    packFloat64( flag(float_rounding_mode = float_round_down), 0, 0, 0 , out);
+    exit;
+ bExpBigger:
+    if ( bExp = $7FF ) then
+    Begin
+        if ( bSig0 OR  bSig1 ) <> 0 then
+        Begin
+           propagateFloat64NaN( a, b, out );
+           exit;
+        End;
+        packFloat64( zSign xor 1, $7FF, 0, 0, out );
+        exit;
+    End;
+    if ( aExp = 0 ) then
+    Begin
+        Inc(expDiff);
+    End
+    else
+    Begin
+        aSig0 := aSig0 or $40000000;
+    End;
+    shift64RightJamming( aSig0, aSig1, - expDiff, aSig0, aSig1 );
+    bSig0 := bSig0 or $40000000;
+ bBigger:
+    sub64( bSig0, bSig1, aSig0, aSig1, zSig0, zSig1 );
+    zExp := bExp;
+    zSign := zSign xor 1;
+    goto normalizeRoundAndPack;
+ aExpBigger:
+    if ( aExp = $7FF ) then
+    Begin
+        if ( aSig0 OR  aSig1 ) <> 0 then
+        Begin
+           propagateFloat64NaN( a, b, out );
+           exit;
+        End;
+        out :=  a;
+        exit;
+    End;
+    if ( bExp = 0 ) then
+    Begin
+        Dec(expDiff);
+    End
+    else
+    Begin
+        bSig0 := bSig0 or $40000000;
+    End;
+    shift64RightJamming( bSig0, bSig1, expDiff, bSig0, bSig1 );
+    aSig0 := aSig0 or $40000000;
+ aBigger:
+    sub64( aSig0, aSig1, bSig0, bSig1, zSig0, zSig1 );
+    zExp := aExp;
+ normalizeRoundAndPack:
+    Dec(zExp);
+    normalizeRoundAndPackFloat64( zSign, zExp - 10, zSig0, zSig1, out );
+
+End;
+
+{*
+-------------------------------------------------------------------------------
+Returns the result of adding the double-precision floating-point values `a'
+and `b'.  The operation is performed according to the IEC/IEEE Standard for
+Binary Floating-Point Arithmetic.
+-------------------------------------------------------------------------------
+*}
+Procedure float64_add( a: float64; b : float64; Var out : float64);
+Var
+    aSign, bSign: flag;
+Begin
+    aSign := extractFloat64Sign( a );
+    bSign := extractFloat64Sign( b );
+    if ( aSign = bSign ) then
+    Begin
+         addFloat64Sigs( a, b, aSign, out );
+    End
+    else
+    Begin
+        subFloat64Sigs( a, b, aSign, out );
+    End;
+End;
+
+{*
+-------------------------------------------------------------------------------
+Returns the result of subtracting the double-precision floating-point values
+`a' and `b'.  The operation is performed according to the IEC/IEEE Standard
+for Binary Floating-Point Arithmetic.
+-------------------------------------------------------------------------------
+*}
+Procedure float64_sub(a: float64; b : float64; var out: float64);
+Var
+    aSign, bSign: flag;
+Begin
+    aSign := extractFloat64Sign( a );
+    bSign := extractFloat64Sign( b );
+    if ( aSign = bSign ) then
+    Begin
+        subFloat64Sigs( a, b, aSign, out );
+    End
+    else
+    Begin
+        addFloat64Sigs( a, b, aSign, out );
+    End;
+End;
+
+{*
+-------------------------------------------------------------------------------
+Returns the result of multiplying the double-precision floating-point values
+`a' and `b'.  The operation is performed according to the IEC/IEEE Standard
+for Binary Floating-Point Arithmetic.
+-------------------------------------------------------------------------------
+*}
+Procedure float64_mul( a: float64; b:float64; Var out: float64);
+Var
+    aSign, bSign, zSign: flag;
+    aExp, bExp, zExp: int16;
+    aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3: bits32;
+    z: float64;
+    label invalid;
+Begin
+    aSig1 := extractFloat64Frac1( a );
+    aSig0 := extractFloat64Frac0( a );
+    aExp := extractFloat64Exp( a );
+    aSign := extractFloat64Sign( a );
+    bSig1 := extractFloat64Frac1( b );
+    bSig0 := extractFloat64Frac0( b );
+    bExp := extractFloat64Exp( b );
+    bSign := extractFloat64Sign( b );
+    zSign := aSign xor bSign;
+    if ( aExp = $7FF ) then
+    Begin
+        if (    (( aSig0 OR  aSig1 ) <>0)
+             OR ( ( bExp = $7FF ) AND  (( bSig0 OR  bSig1 )<>0) ) ) then
+        Begin
+            propagateFloat64NaN( a, b, out );
+            exit;
+        End;
+        if ( ( bExp OR  bSig0 OR  bSig1 ) = 0 ) then goto invalid;
+        packFloat64( zSign, $7FF, 0, 0, out );
+        exit;
+    End;
+    if ( bExp = $7FF ) then
+    Begin
+        if ( bSig0 OR  bSig1 )<> 0 then
+        Begin
+          propagateFloat64NaN( a, b, out );
+          exit;
+        End;
+        if ( ( aExp OR  aSig0 OR  aSig1 ) = 0 ) then
+        Begin
+ invalid:
+            float_raise( float_flag_invalid );
+            z.low := float64_default_nan_low;
+            z.high := float64_default_nan_high;
+            out := z;
+            exit;
+        End;
+        packFloat64( zSign, $7FF, 0, 0, out );
+        exit;
+    End;
+    if ( aExp = 0 ) then
+    Begin
+        if ( ( aSig0 OR  aSig1 ) = 0 ) then
+        Begin
+           packFloat64( zSign, 0, 0, 0, out );
+           exit;
+        End;
+        normalizeFloat64Subnormal( aSig0, aSig1, aExp, aSig0, aSig1 );
+    End;
+    if ( bExp = 0 ) then
+    Begin
+        if ( ( bSig0 OR  bSig1 ) = 0 ) then
+        Begin
+          packFloat64( zSign, 0, 0, 0, out );
+          exit;
+        End;
+        normalizeFloat64Subnormal( bSig0, bSig1, bExp, bSig0, bSig1 );
+    End;
+    zExp := aExp + bExp - $400;
+    aSig0 := aSig0 or $00100000;
+    shortShift64Left( bSig0, bSig1, 12, bSig0, bSig1 );
+    mul64To128( aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3 );
+    add64( zSig0, zSig1, aSig0, aSig1, zSig0, zSig1 );
+    zSig2 := zSig2 or flag( zSig3 <> 0 );
+    if ( $00200000 <= zSig0 ) then
+    Begin
+        shift64ExtraRightJamming(
+            zSig0, zSig1, zSig2, 1, zSig0, zSig1, zSig2 );
+        Inc(zExp);
+    End;
+    roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2, out );
+End;
+
+{*
+-------------------------------------------------------------------------------
+Returns the result of dividing the double-precision floating-point value `a'
+by the corresponding value `b'.  The operation is performed according to the
+IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+-------------------------------------------------------------------------------
+*}
+Procedure float64_div(a: float64; b : float64 ; var out: float64 );
+Var
+    aSign, bSign, zSign: flag;
+    aExp, bExp, zExp: int16;
+    aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2: bits32;
+    rem0, rem1, rem2, rem3, term0, term1, term2, term3: bits32;
+    z: float64;
+    label invalid;
+Begin
+    aSig1 := extractFloat64Frac1( a );
+    aSig0 := extractFloat64Frac0( a );
+    aExp := extractFloat64Exp( a );
+    aSign := extractFloat64Sign( a );
+    bSig1 := extractFloat64Frac1( b );
+    bSig0 := extractFloat64Frac0( b );
+    bExp := extractFloat64Exp( b );
+    bSign := extractFloat64Sign( b );
+    zSign := aSign xor bSign;
+    if ( aExp = $7FF ) then
+    Begin
+        if ( aSig0 OR  aSig1 )<> 0 then
+        Begin
+           propagateFloat64NaN( a, b, out );
+           exit;
+        end;
+        if ( bExp = $7FF ) then
+        Begin
+            if ( bSig0 OR  bSig1 )<>0 then
+            Begin
+               propagateFloat64NaN( a, b, out );
+               exit;
+            End;
+            goto invalid;
+        End;
+        packFloat64( zSign, $7FF, 0, 0, out );
+        exit;
+    End;
+    if ( bExp = $7FF ) then
+    Begin
+        if ( bSig0 OR  bSig1 )<> 0 then
+        Begin
+          propagateFloat64NaN( a, b, out );
+          exit;
+        End;
+        packFloat64( zSign, 0, 0, 0, out );
+        exit;
+    End;
+    if ( bExp = 0 ) then
+    Begin
+        if ( ( bSig0 OR  bSig1 ) = 0 ) then
+        Begin
+            if ( ( aExp OR  aSig0 OR  aSig1 ) = 0 ) then
+            Begin
+ invalid:
+                float_raise( float_flag_invalid );
+                z.low := float64_default_nan_low;
+                z.high := float64_default_nan_high;
+                out := z;
+                exit;
+            End;
+            float_raise( float_flag_divbyzero );
+            packFloat64( zSign, $7FF, 0, 0, out );
+            exit;
+        End;
+        normalizeFloat64Subnormal( bSig0, bSig1, bExp, bSig0, bSig1 );
+    End;
+    if ( aExp = 0 ) then
+    Begin
+        if ( ( aSig0 OR  aSig1 ) = 0 ) then
+        Begin
+           packFloat64( zSign, 0, 0, 0, out );
+           exit;
+        End;
+        normalizeFloat64Subnormal( aSig0, aSig1, aExp, aSig0, aSig1 );
+    End;
+    zExp := aExp - bExp + $3FD;
+    shortShift64Left( aSig0 OR  $00100000, aSig1, 11, aSig0, aSig1 );
+    shortShift64Left( bSig0 OR  $00100000, bSig1, 11, bSig0, bSig1 );
+    if ( le64( bSig0, bSig1, aSig0, aSig1 )<>0 ) then
+    Begin
+        shift64Right( aSig0, aSig1, 1, aSig0, aSig1 );
+        Inc(zExp);
+    End;
+    zSig0 := estimateDiv64To32( aSig0, aSig1, bSig0 );
+    mul64By32To96( bSig0, bSig1, zSig0, term0, term1, term2 );
+    sub96( aSig0, aSig1, 0, term0, term1, term2, rem0, rem1, rem2 );
+    while ( sbits32 (rem0) < 0 ) do
+    Begin
+        Dec(zSig0);
+        add96( rem0, rem1, rem2, 0, bSig0, bSig1, rem0, rem1, rem2 );
+    End;
+    zSig1 := estimateDiv64To32( rem1, rem2, bSig0 );
+    if ( ( zSig1 and $3FF ) <= 4 ) then
+    Begin
+        mul64By32To96( bSig0, bSig1, zSig1, term1, term2, term3 );
+        sub96( rem1, rem2, 0, term1, term2, term3, rem1, rem2, rem3 );
+        while ( sbits32 (rem1) < 0 ) do
+        Begin
+            Dec(zSig1);
+            add96( rem1, rem2, rem3, 0, bSig0, bSig1, rem1, rem2, rem3 );
+        End;
+        zSig1 := zSig1 or flag( ( rem1 OR  rem2 OR  rem3 ) <> 0 );
+    End;
+    shift64ExtraRightJamming( zSig0, zSig1, 0, 11, zSig0, zSig1, zSig2 );
+    roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2, out );
+
+End;
+
+{*
+-------------------------------------------------------------------------------
+Returns the remainder of the double-precision floating-point value `a'
+with respect to the corresponding value `b'.  The operation is performed
+according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+-------------------------------------------------------------------------------
+*}
+Procedure float64_rem(a: float64; b : float64; var out: float64);
+Var
+    aSign, bSign, zSign: flag;
+    aExp, bExp, expDiff: int16;
+    aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2: bits32;
+    allZero, alternateASig0, alternateASig1, sigMean1: bits32;
+    sigMean0: sbits32;
+    z: float64;
+    label invalid;
+Begin
+    aSig1 := extractFloat64Frac1( a );
+    aSig0 := extractFloat64Frac0( a );
+    aExp := extractFloat64Exp( a );
+    aSign := extractFloat64Sign( a );
+    bSig1 := extractFloat64Frac1( b );
+    bSig0 := extractFloat64Frac0( b );
+    bExp := extractFloat64Exp( b );
+    bSign := extractFloat64Sign( b );
+    if ( aExp = $7FF ) then
+    Begin
+        if ((( aSig0 OR  aSig1 )<>0)
+             OR ( ( bExp = $7FF ) AND  (( bSig0 OR  bSig1 )<>0) ) ) then
+        Begin
+            propagateFloat64NaN( a, b, out );
+            exit;
+        End;
+        goto invalid;
+    End;
+    if ( bExp = $7FF ) then
+    Begin
+        if ( bSig0 OR  bSig1 ) <> 0 then
+        Begin
+          propagateFloat64NaN( a, b, out );
+          exit;
+        End;
+        out := a;
+        exit;
+    End;
+    if ( bExp = 0 ) then
+    Begin
+        if ( ( bSig0 OR  bSig1 ) = 0 ) then
+        Begin
+ invalid:
+            float_raise( float_flag_invalid );
+            z.low := float64_default_nan_low;
+            z.high := float64_default_nan_high;
+            out := z;
+            exit;
+        End;
+        normalizeFloat64Subnormal( bSig0, bSig1, bExp, bSig0, bSig1 );
+    End;
+    if ( aExp = 0 ) then
+    Begin
+        if ( ( aSig0 OR  aSig1 ) = 0 ) then
+        Begin
+           out := a;
+           exit;
+        End;
+        normalizeFloat64Subnormal( aSig0, aSig1, aExp, aSig0, aSig1 );
+    End;
+    expDiff := aExp - bExp;
+    if ( expDiff < -1 ) then
+    Begin
+       out := a;
+       exit;
+    End;
+    shortShift64Left(
+        aSig0 OR  $00100000, aSig1, 11 - flag( expDiff < 0 ), aSig0, aSig1 );
+    shortShift64Left( bSig0 OR  $00100000, bSig1, 11, bSig0, bSig1 );
+    q := le64( bSig0, bSig1, aSig0, aSig1 );
+    if ( q )<>0 then
+       sub64( aSig0, aSig1, bSig0, bSig1, aSig0, aSig1 );
+    expDiff := expDiff - 32;
+    while ( 0 < expDiff ) do
+    Begin
+        q := estimateDiv64To32( aSig0, aSig1, bSig0 );
+        if 4 < q then
+          q:= q - 4
+        else
+          q := 0;
+        mul64By32To96( bSig0, bSig1, q, term0, term1, term2 );
+        shortShift96Left( term0, term1, term2, 29, term1, term2, allZero );
+        shortShift64Left( aSig0, aSig1, 29, aSig0, allZero );
+        sub64( aSig0, 0, term1, term2, aSig0, aSig1 );
+        expDiff := expDiff - 29;
+    End;
+    if ( -32 < expDiff ) then
+    Begin
+        q := estimateDiv64To32( aSig0, aSig1, bSig0 );
+        if 4 < q then
+          q := q - 4
+        else
+          q := 0;
+        q := q shr (- expDiff);
+        shift64Right( bSig0, bSig1, 8, bSig0, bSig1 );
+        expDiff := expDiff + 24;
+        if ( expDiff < 0 ) then
+        Begin
+            shift64Right( aSig0, aSig1, - expDiff, aSig0, aSig1 );
+        End
+        else
+        Begin
+            shortShift64Left( aSig0, aSig1, expDiff, aSig0, aSig1 );
+        End;
+        mul64By32To96( bSig0, bSig1, q, term0, term1, term2 );
+        sub64( aSig0, aSig1, term1, term2, aSig0, aSig1 );
+    End
+    else
+    Begin
+        shift64Right( aSig0, aSig1, 8, aSig0, aSig1 );
+        shift64Right( bSig0, bSig1, 8, bSig0, bSig1 );
+    End;
+    Repeat
+        alternateASig0 := aSig0;
+        alternateASig1 := aSig1;
+        Inc(q);
+        sub64( aSig0, aSig1, bSig0, bSig1, aSig0, aSig1 );
+    Until not ( 0 <= sbits32 (aSig0) );
+    add64(
+        aSig0, aSig1, alternateASig0, alternateASig1, sigMean0, sigMean1 );
+    if (    ( sigMean0 < 0 )
+         OR ( ( ( sigMean0 OR  sigMean1 ) = 0 ) AND  (( q AND 1 )<>0) ) ) then
+    Begin
+        aSig0 := alternateASig0;
+        aSig1 := alternateASig1;
+    End;
+    zSign := flag( sbits32 (aSig0) < 0 );
+    if ( zSign <> 0 ) then
+       sub64( 0, 0, aSig0, aSig1, aSig0, aSig1 );
+    normalizeRoundAndPackFloat64( aSign xor zSign, bExp - 4, aSig0, aSig1, out );
+End;
+
+{*
+-------------------------------------------------------------------------------
+Returns the square root of the double-precision floating-point value `a'.
+The operation is performed according to the IEC/IEEE Standard for Binary
+Floating-Point Arithmetic.
+-------------------------------------------------------------------------------
+*}
+Procedure float64_sqrt( a: float64; var out: float64 );
+Var
+    aSign: flag;
+    aExp, zExp: int16;
+    aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0: bits32;
+    rem0, rem1, rem2, rem3, term0, term1, term2, term3: bits32;
+    z: float64;
+    label invalid;
+Begin
+    aSig1 := extractFloat64Frac1( a );
+    aSig0 := extractFloat64Frac0( a );
+    aExp := extractFloat64Exp( a );
+    aSign := extractFloat64Sign( a );
+    if ( aExp = $7FF ) then
+    Begin
+        if ( aSig0 OR  aSig1 ) <> 0 then
+        Begin
+           propagateFloat64NaN( a, a, out );
+           exit;
+        End;
+        if ( aSign = 0) then
+        Begin
+          out := a;
+          exit;
+        End;
+        goto invalid;
+    End;
+    if ( aSign <> 0 ) then
+    Begin
+        if ( ( aExp OR  aSig0 OR  aSig1 ) = 0 ) then
+        Begin
+           out := a;
+           exit;
+        End;
+ invalid:
+        float_raise( float_flag_invalid );
+        z.low := float64_default_nan_low;
+        z.high := float64_default_nan_high;
+        out := z;
+        exit;
+    End;
+    if ( aExp = 0 ) then
+    Begin
+        if ( ( aSig0 OR  aSig1 ) = 0 ) then
+        Begin
+           packFloat64( 0, 0, 0, 0, out );
+           exit;
+        End;
+        normalizeFloat64Subnormal( aSig0, aSig1, aExp, aSig0, aSig1 );
+    End;
+    zExp := ( ( aExp - $3FF ) shr 1 ) + $3FE;
+    aSig0 := aSig0 or $00100000;
+    shortShift64Left( aSig0, aSig1, 11, term0, term1 );
+    zSig0 := ( estimateSqrt32( aExp, term0 ) shr 1 ) + 1;
+    if ( zSig0 = 0 ) then
+       zSig0 := $7FFFFFFF;
+    doubleZSig0 := zSig0 + zSig0;
+    shortShift64Left( aSig0, aSig1, 9 - ( aExp and 1 ), aSig0, aSig1 );
+    mul32To64( zSig0, zSig0, term0, term1 );
+    sub64( aSig0, aSig1, term0, term1, rem0, rem1 );
+    while ( sbits32 (rem0) < 0 ) do
+    Begin
+        Dec(zSig0);
+        doubleZSig0 := doubleZSig0 - 2;
+        add64( rem0, rem1, 0, doubleZSig0 OR  1, rem0, rem1 );
+    End;
+    zSig1 := estimateDiv64To32( rem1, 0, doubleZSig0 );
+    if ( ( zSig1 and $1FF ) <= 5 ) then
+    Begin
+        if ( zSig1 = 0 ) then
+           zSig1 := 1;
+        mul32To64( doubleZSig0, zSig1, term1, term2 );
+        sub64( rem1, 0, term1, term2, rem1, rem2 );
+        mul32To64( zSig1, zSig1, term2, term3 );
+        sub96( rem1, rem2, 0, 0, term2, term3, rem1, rem2, rem3 );
+        while ( sbits32 (rem1) < 0 ) do
+        Begin
+            Dec(zSig1);
+            shortShift64Left( 0, zSig1, 1, term2, term3 );
+            term3 := term3 or 1;
+            term2 := term2 or doubleZSig0;
+            add96( rem1, rem2, rem3, 0, term2, term3, rem1, rem2, rem3 );
+        End;
+        zSig1 := zSig1 or bits32( ( rem1 OR  rem2 OR  rem3 ) <> 0 );
+    End;
+    shift64ExtraRightJamming( zSig0, zSig1, 0, 10, zSig0, zSig1, zSig2 );
+    roundAndPackFloat64( 0, zExp, zSig0, zSig1, zSig2, out );
+End;
+
+{*
+-------------------------------------------------------------------------------
+Returns 1 if the double-precision floating-point value `a' is equal to
+the corresponding value `b', and 0 otherwise.  The comparison is performed
+according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+-------------------------------------------------------------------------------
+*}
+Function float64_eq(a: float64; b: float64): flag;
+Begin
+    if
+         (
+                ( extractFloat64Exp( a ) = $7FF )
+            AND
+                (
+                    (extractFloat64Frac0( a )  OR  extractFloat64Frac1( a )) <>0
+                )
+         )
+         OR (
+                ( extractFloat64Exp( b ) = $7FF )
+           AND  (
+                    (extractFloat64Frac0( b ) OR  (extractFloat64Frac1( b )) <> 0
+                )
+           )
+       ) then
+    Begin
+        if ( (float64_is_signaling_nan( a )<>0) OR (float64_is_signaling_nan( b )<>0) ) then
+            float_raise( float_flag_invalid );
+        float64_eq := 0;
+        exit;
+    End;
+    float64_eq := flag(
+           ( a.low = b.low )
+        AND  (    ( a.high = b.high )
+             OR (    ( a.low = 0 )
+                  AND  ( bits32 ( ( a.high OR  b.high ) shl 1 ) = 0 ) )
+           ));
+End;
+
+{*
+-------------------------------------------------------------------------------
+Returns 1 if the double-precision floating-point value `a' is less than
+or equal to the corresponding value `b', and 0 otherwise.  The comparison
+is performed according to the IEC/IEEE Standard for Binary Floating-Point
+Arithmetic.
+-------------------------------------------------------------------------------
+*}
+Function float64_le(a: float64;b: float64): flag;
+Var
+    aSign, bSign: flag;
+Begin
+    if
+         (
+                ( extractFloat64Exp( a ) = $7FF )
+            AND
+                (
+                    (extractFloat64Frac0( a )  OR  extractFloat64Frac1( a )) <>0
+                )
+         )
+         OR (
+                ( extractFloat64Exp( b ) = $7FF )
+           AND  (
+                    (extractFloat64Frac0( b ) OR  (extractFloat64Frac1( b )) <> 0
+                )
+           )
+       ) then
+    Begin
+        float_raise( float_flag_invalid );
+        float64_le := 0;
+        exit;
+    End;
+    aSign := extractFloat64Sign( a );
+    bSign := extractFloat64Sign( b );
+    if ( aSign <> bSign ) then
+    Begin
+        float64_le := flag(
+               (aSign <> 0)
+            OR (    ( ( bits32 ( ( a.high OR  b.high ) shl 1 ) ) OR  a.low OR  b.low )
+                 = 0 ));
+        exit;
+    End;
+    if aSign <> 0 then
+      float64_le := le64( b.high, b.low, a.high, a.low )
+    else
+      float64_le := le64( a.high, a.low, b.high, b.low );
+End;
+
+{*
+-------------------------------------------------------------------------------
+Returns 1 if the double-precision floating-point value `a' is less than
+the corresponding value `b', and 0 otherwise.  The comparison is performed
+according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+-------------------------------------------------------------------------------
+*}
+Function float64_lt(a: float64;b: float64): flag;
+Var
+    aSign, bSign: flag;
+Begin
+    if
+         (
+                ( extractFloat64Exp( a ) = $7FF )
+            AND
+                (
+                    (extractFloat64Frac0( a )  OR  extractFloat64Frac1( a )) <>0
+                )
+         )
+         OR (
+                ( extractFloat64Exp( b ) = $7FF )
+           AND  (
+                    (extractFloat64Frac0( b ) OR  (extractFloat64Frac1( b )) <> 0
+                )
+           )
+       ) then
+    Begin
+        float_raise( float_flag_invalid );
+        float64_lt := 0;
+        exit;
+    End;
+    aSign := extractFloat64Sign( a );
+    bSign := extractFloat64Sign( b );
+    if ( aSign <> bSign ) then
+    Begin
+        float64_lt := flag(
+               (aSign <> 0)
+            AND  (    ( ( bits32 ( ( a.high OR  b.high ) shl 1 ) ) OR  a.low OR  b.low )
+                 <> 0 ));
+        exit;
+    End;
+    if aSign <> 0 then
+       float64_lt := lt64( b.high, b.low, a.high, a.low )
+    else
+       float64_lt := lt64( a.high, a.low, b.high, b.low );
+End;
+
+{*
+-------------------------------------------------------------------------------
+Returns 1 if the double-precision floating-point value `a' is equal to
+the corresponding value `b', and 0 otherwise.  The invalid exception is
+raised if either operand is a NaN.  Otherwise, the comparison is performed
+according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+-------------------------------------------------------------------------------
+*}
+Function float64_eq_signaling( a: float64; b: float64): flag;
+Begin
+
+    if
+         (
+                ( extractFloat64Exp( a ) = $7FF )
+            AND
+                (
+                    (extractFloat64Frac0( a )  OR  extractFloat64Frac1( a )) <>0
+                )
+         )
+         OR (
+                ( extractFloat64Exp( b ) = $7FF )
+           AND  (
+                    (extractFloat64Frac0( b ) OR  (extractFloat64Frac1( b )) <> 0
+                )
+           )
+       ) then
+    Begin
+        float_raise( float_flag_invalid );
+        float64_eq_signaling := 0;
+        exit;
+    End;
+    float64_eq_signaling := flag(
+           ( a.low = b.low )
+        AND  (    ( a.high = b.high )
+             OR (    ( a.low = 0 )
+                  AND  ( bits32 ( ( a.high OR  b.high ) shl 1 ) = 0 ) )
+           ));
+End;
+
+{*
+-------------------------------------------------------------------------------
+Returns 1 if the double-precision floating-point value `a' is less than or
+equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
+cause an exception.  Otherwise, the comparison is performed according to the
+IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+-------------------------------------------------------------------------------
+*}
+Function float64_le_quiet(a: float64 ; b: float64 ): flag;
+Var
+    aSign, bSign : flag;
+Begin
+    if
+         (
+                ( extractFloat64Exp( a ) = $7FF )
+            AND
+                (
+                    (extractFloat64Frac0( a )  OR  extractFloat64Frac1( a )) <>0
+                )
+         )
+         OR (
+                ( extractFloat64Exp( b ) = $7FF )
+           AND  (
+                    (extractFloat64Frac0( b ) OR  (extractFloat64Frac1( b )) <> 0
+                )
+           )
+       ) then
+    Begin
+        if ( (float64_is_signaling_nan( a )<>0)  OR (float64_is_signaling_nan( b )<>0) ) then
+            float_raise( float_flag_invalid );
+        float64_le_quiet := 0;
+        exit;
+    End;
+    aSign := extractFloat64Sign( a );
+    bSign := extractFloat64Sign( b );
+    if ( aSign <> bSign ) then
+    Begin
+     float64_le_quiet := flag
+      ((aSign <> 0)
+            OR (    ( ( bits32 ( ( a.high OR  b.high ) shl 1 ) ) OR  a.low OR  b.low )
+                 = 0 ));
+        exit;
+    End;
+    if aSign <> 0 then
+      float64_le_quiet := le64( b.high, b.low, a.high, a.low )
+    else
+      float64_le_quiet := le64( a.high, a.low, b.high, b.low );
+End;
+
+{*
+-------------------------------------------------------------------------------
+Returns 1 if the double-precision floating-point value `a' is less than
+the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
+exception.  Otherwise, the comparison is performed according to the IEC/IEEE
+Standard for Binary Floating-Point Arithmetic.
+-------------------------------------------------------------------------------
+*}
+Function float64_lt_quiet(a: float64; b: float64 ): Flag;
+Var
+    aSign, bSign: flag;
+Begin
+    if
+         (
+                ( extractFloat64Exp( a ) = $7FF )
+            AND
+                (
+                    (extractFloat64Frac0( a )  OR  extractFloat64Frac1( a )) <>0
+                )
+         )
+         OR (
+                ( extractFloat64Exp( b ) = $7FF )
+           AND  (
+                    (extractFloat64Frac0( b ) OR  (extractFloat64Frac1( b )) <> 0
+                )
+           )
+       ) then
+    Begin
+        if ( (float64_is_signaling_nan( a )<>0) OR (float64_is_signaling_nan( b )<>0) ) then
+            float_raise( float_flag_invalid );
+        float64_lt_quiet := 0;
+        exit;
+    End;
+    aSign := extractFloat64Sign( a );
+    bSign := extractFloat64Sign( b );
+    if ( aSign <> bSign ) then
+    Begin
+      float64_lt_quiet := flag(
+               (aSign<>0)
+            AND  (    ( ( bits32 ( ( a.high OR  b.high ) shl 1 ) ) OR  a.low OR  b.low )
+                 <> 0 ));
+        exit;
+    End;
+    If aSign <> 0 then
+      float64_lt_quiet :=  lt64( b.high, b.low, a.high, a.low )
+    else
+      float64_lt_quiet := lt64( a.high, a.low, b.high, b.low );
+End;
+
+end.
+{
+   $Log$
+   Revision 1.1  2002-09-16 19:10:17  carl
+     * first revision of FPU emulation
+
+}