Browse Source

* split fpc_mul_<64 bit> into separate procedures with and without overflow checking

git-svn-id: trunk@35454 -
florian 8 years ago
parent
commit
7213a13081
7 changed files with 345 additions and 56 deletions
  1. 4 3
      compiler/nadd.pas
  2. 31 6
      rtl/arm/int64p.inc
  3. 26 10
      rtl/i386/int64p.inc
  4. 106 25
      rtl/i8086/int64p.inc
  5. 7 0
      rtl/inc/compproc.inc
  6. 120 6
      rtl/inc/int64.inc
  7. 51 6
      rtl/powerpc/int64p.inc

+ 4 - 3
compiler/nadd.pas

@@ -2944,9 +2944,7 @@ implementation
               end;
 
             { otherwise, create the parameters for the helper }
-            right := ccallparanode.create(
-              cordconstnode.create(ord(cs_check_overflow in current_settings.localswitches),pasbool8type,true),
-              ccallparanode.create(right,ccallparanode.create(left,nil)));
+            right :=   ccallparanode.create(right,ccallparanode.create(left,nil));
             left := nil;
             { only qword needs the unsigned code, the
               signed code is also used for currency }
@@ -2954,6 +2952,9 @@ implementation
               procname := 'fpc_mul_int64'
             else
               procname := 'fpc_mul_qword';
+            if cs_check_overflow in current_settings.localswitches then
+              procname := procname + '_checkoverflow';
+
             result := ccallnode.createintern(procname,right);
             right := nil;
           end;

+ 31 - 6
rtl/arm/int64p.inc

@@ -13,9 +13,38 @@
 
  **********************************************************************}
 
+{$ifndef VER3_0}
 {$if (not defined(CPUTHUMB)) and defined(CPUARM_HAS_UMULL)}
 {$define FPC_SYSTEM_HAS_MUL_QWORD}
-function fpc_mul_qword(f1,f2 : qword;checkoverflow : longbool) : qword;assembler;nostackframe;[public,alias: 'FPC_MUL_QWORD']; compilerproc;
+function fpc_mul_qword(f1,f2 : qword) : qword;assembler;nostackframe;[public,alias: 'FPC_MUL_QWORD']; compilerproc;
+asm
+  stmfd sp!,{r4,r5,r6,r14}
+  mov r6,#0
+  // r4 = result lo, r5 = result hi
+{$ifdef ENDIAN_LITTLE}
+  // lo(f1)*lo(f2)
+  umull r4,r5,r0,r2
+  // lo(f1)*hi(f2)
+  umlal r5,r6,r0,r3
+  // hi(f1)*lo(f2)
+  umlal r5,r6,r1,r2
+  mov r0,r4
+  mov r1,r5
+{$else}
+  // lo(f1)*lo(f2)
+  umull r4,r5,r1,r3
+  // lo(f1)*hi(f2)
+  umlal r5,r6,r1,r2
+  // hi(f1)*lo(f2)
+  umlal r5,r6,r0,r3
+  mov r1,r4
+  mov r0,r5
+{$endif}
+  ldmfd sp!,{r4,r5,r6,r15}
+end;
+
+
+function fpc_mul_qword_checkoverflow(f1,f2 : qword) : qword;assembler;nostackframe;[public,alias: 'FPC_MUL_QWORD_CHECKOVERFLOW']; compilerproc;
 asm
   stmfd sp!,{r4,r5,r6,r14}
   mov r6,#0
@@ -51,10 +80,6 @@ asm
 {$endif}
   // no overflow?
   beq .Lexit
-  // checkoverflow?
-  ldr r2,[sp,#16]
-  cmp r2,#0
-  beq .Lexit
 
   mov r0,#215
   mov r1,fp
@@ -63,4 +88,4 @@ asm
   ldmfd sp!,{r4,r5,r6,r15}
 end;
 {$endif (not defined(CPUTHUMB)) and defined(CPUARM_HAS_UMULL)}
-
+{$endif VER3_0}

+ 26 - 10
rtl/i386/int64p.inc

@@ -361,15 +361,10 @@
             movl saveedi,%edi
       end;
 
+{$ifndef VER3_0}
 {$define FPC_SYSTEM_HAS_MUL_QWORD}
-    { multiplies two qwords
-      the longbool for checkoverflow avoids a misaligned stack
-    }
-    function fpc_mul_qword(f1,f2 : qword;checkoverflow : longbool) : qword;[public,alias: 'FPC_MUL_QWORD']; compilerproc;
-      var
-        overflowed : boolean;
+    function fpc_mul_qword(f1,f2 : qword) : qword;[public,alias: 'FPC_MUL_QWORD']; compilerproc;
       begin
-        overflowed:=false;
         { the following piece of code is taken from the
           AMD Athlon Processor x86 Code Optimization manual }
         asm
@@ -383,8 +378,6 @@
            mull %edx
            jmp .Lqwordmulready
         .Lqwordmultwomul:
-           cmpl $0,checkoverflow
-           jnz  .Loverflowchecked
            imul f1+4,%edx
            imul %eax,%ecx
            addl %edx,%ecx
@@ -393,6 +386,29 @@
         .Lqwordmulready:
            movl %eax,__RESULT
            movl %edx,__RESULT+4
+        .Lend:
+        end [ 'eax','edx','ecx'];
+      end;
+
+	
+    function fpc_mul_qword_checkoverflow(f1,f2 : qword) : qword;[public,alias: 'FPC_MUL_QWORD_CHECKOVERFLOW']; compilerproc;
+      var
+        overflowed : boolean;
+      begin
+        overflowed:=false;
+        { the following piece of code is taken from the
+          AMD Athlon Processor x86 Code Optimization manual }
+        asm
+           movl f1+4,%edx
+           movl f2+4,%ecx
+           orl %ecx,%edx
+           movl f2,%edx
+           movl f1,%eax
+           jnz .Loverflowchecked
+           { if both upper dwords are =0 then it cannot overflow }
+           mull %edx
+           movl %eax,__RESULT
+           movl %edx,__RESULT+4
            jmp .Lend
 
         .Loverflowchecked:
@@ -431,4 +447,4 @@
         if overflowed then
           HandleErrorFrame(215,get_frame);
       end;
-
+{$endif VER3_0}

+ 106 - 25
rtl/i8086/int64p.inc

@@ -1,3 +1,4 @@
+
 {
     This file is part of the Free Pascal run time library.
     Copyright (c) 2013 by the Free Pascal development team
@@ -13,8 +14,9 @@
 
  **********************************************************************}
 
+{$ifndef VER3_0}
 {$define FPC_SYSTEM_HAS_MUL_QWORD}
-function fpc_mul_qword( f1, f2: qword; checkoverflow: longbool ): qword; [public,alias: 'FPC_MUL_QWORD']; compilerproc;
+function fpc_mul_qword( f1, f2: qword): qword; [public,alias: 'FPC_MUL_QWORD']; compilerproc;
 begin
 { routine contributed by Max Nazhalov
 
@@ -97,9 +99,6 @@ begin
     mov     word[result+6],dx
     mov     si,word[f1+4]
     mov     ax,word[f1+6]
-    mov     bx,word[checkoverflow]
-    or      bx,word[checkoverflow+2]
-    jnz     @@checked
     mov     di,word[f2]
     mul     di
     mov     cx,ax
@@ -124,74 +123,156 @@ begin
     adc     cx,dx
     add     word[result+4],bx
     adc     word[result+6],cx
-    jmp     @@done
-@@checked:
+  end [ 'ax','bx','cx','dx','si','di' ];
+end;
+
+
+function fpc_mul_qword_checkoverflow( f1, f2: qword): qword; [public,alias: 'FPC_MUL_QWORD_CHECKOVERFLOW']; compilerproc;
+begin
+{ routine contributed by Max Nazhalov
+
+64-bit multiplication via 16-bit digits: (A3:A2:A1:A0)*(B3:B2:B1:B0)
+
+//////// STEP 1; break-down to 32-bit multiplications, each of them generates 64-bit result:
+  (A3:A2*B3:B2)<<64 + (A3:A2*B1:B0)<<32 + (A1:A0*B3:B2)<<32 + (A1:A0*B1:B0)
+
+(A1:A0*B1:B0) = (A1*B1)<<32 + (A1*B0)<<16 + (A0*B1)<<16 + (A0:B0)
+ -- never overflows, forms the base of the final result, name it as "R64"
+
+(A3:A2*B3:B2) is not required for the 64-bit result if overflow is not checked, since it is completely beyond the resulting width.
+ -- always overflows if "<>0", so can be checked as "((A2|A3)<>0)&&(B2|B3)<>0)"
+
+(A3:A2*B1:B0) and (A1:A0*B3:B2) are partially required for the final result
+ -- to be calculated on steps 2 and 3 as a correction for the "R64"
+
+//////// STEP 2; calculate "R64+=(A3:A2*B1:B0)<<32" (16-bit multiplications, each of them generates 32-bit result):
+  (A3*B1)<<32 + (A3*B0)<<16 + (A2*B1)<<16 + (A2*B0)
+
+((A3*B1)<<32)<<32 is not required for the 64-bit result if overflow is not checked, since it is completely beyond the resulting width.
+ -- always overflows if "<>0", so can be checked as "(A3<>0)&&(B1<>0)"
+
+((A3*B0)<<16)<<32: only low word of "A3*B0" contributes to the final result if overflow is not checked.
+ -- overflows if the hi_word "<>0"
+ -- overflows if R64+(lo_word<<48) produces C-flag
+
+((A2*B1)<<16)<<32: only low word of "A2*B1" contributes to the final result if overflow is not checked.
+ -- overflows if the hi_word "<>0"
+ -- overflows if R64+(lo_word<<48) produces C-flag
+
+(A2*B0)<<32: the whole dword is significand, name it as "X"
+ -- overflows if R64+(X<<32) produces C-flag
+
+//////// STEP 3; calculate "R64+=(A1:A0*B3:B2)<<32" (16-bit multiplications, each of them generates 32-bit result):
+  (A1*B3)<<32 + (A1*B2)<<16 + (A0*B3)<<16 + (A0*B2)
+
+((A1*B3)<<32)<<32 is not required for the 64-bit result if overflow is not checked, since it is completely beyond the resulting width.
+ -- always overflows if "<>0", so can be checked as "(A1<>0)&&(B3<>0)"
+
+((A1*B2)<<16)<<32: only low word of "A1*B2" contributes to the final result if overflow is not checked.
+ -- overflows if the hi_word "<>0"
+ -- overflows if R64+(lo_word<<48) produces C-flag
+
+((A0*B3)<<16)<<32: only low word "A0*B3" contributes to the final result if overflow is not checked.
+ -- overflows if the hi_word "<>0"
+ -- overflows if R64+(lo_word<<48) produces C-flag
+
+(A0*B2)<<32: the whole dword is significand, name it as "Y"
+ -- overflows if R64+(Y<<32) produces C-flag
+}
+  asm
+    mov     di,word[f1]
+    mov     bx,word[f1+2]
+    mov     si,word[f2]
+    mov     ax,word[f2+2]
+    push    bp
+    mov     cx,ax
+    mul     bx
+    xchg    ax,bx
+    mov     bp,dx
+    mul     si
+    xchg    ax,cx
+    add     bx,dx
+    adc     bp,0
+    mul     di
+    add     cx,ax
+    adc     bx,dx
+    adc     bp,0
+    mov     ax,di
+    mul     si
+    add     cx,dx
+    adc     bx,0
+    adc     bp,0
+    mov     dx,bp
+    pop     bp
+    mov     word[result],ax
+    mov     word[result+2],cx
+    mov     word[result+4],bx
+    mov     word[result+6],dx
+    mov     si,word[f1+4]
+    mov     ax,word[f1+6]
     mov     bx,word[f2+6]
     mov     cx,ax
     or      cx,si
     jz      @@nover1
     mov     cx,word[f2+4]
     or      cx,bx
-    jnz     @@done
+    jnz     @@overflow
 @@nover1:
     test    bx,bx
     jz      @@nover2
     mov     bx,word[f1+2]
     test    bx,bx
-    jnz     @@done
+    jnz     @@overflow
 @@nover2:
     test    ax,ax
     jz      @@nover3
     or      bx,word[f2+2]
-    jnz     @@done
+    jnz     @@overflow
 @@nover3:
     mov     di,word[f2]
     mul     di
     test    dx,dx
-    jnz     @@done
+    jnz     @@overflow
     mov     cx,ax
     mov     ax,word[f2+2]
     mul     si
     test    dx,dx
-    jnz     @@done
+    jnz     @@overflow
     add     cx,ax
-    jc      @@done
+    jc      @@overflow
     mov     ax,di
     mul     si
     mov     bx,ax
     add     cx,dx
-    jc      @@done
+    jc      @@overflow
     mov     si,word[f2+4]
     mov     ax,word[f2+6]
     mov     di,word[f1]
     mul     di
     test    dx,dx
-    jnz     @@done
+    jnz     @@overflow
     add     cx,ax
-    jc      @@done
+    jc      @@overflow
     mov     ax,word[f1+2]
     mul     si
     test    dx,dx
-    jnz     @@done
+    jnz     @@overflow
     add     cx,ax
-    jc      @@done
+    jc      @@overflow
     mov     ax,di
     mul     si
     add     bx,ax
     adc     cx,dx
-    jc      @@done
+    jc      @@overflow
     add     word[result+4],bx
     adc     word[result+6],cx
-    jc      @@done
-    // checked and succeed
-    xor     ax,ax
-    mov     word[checkoverflow],ax
-    mov     word[checkoverflow+2],ax
+    jnc     @@done
+@@overflow:
+	call	FPC_OVERFLOW
 @@done:
   end [ 'ax','bx','cx','dx','si','di' ];
-  if checkoverflow then
-    HandleErrorAddrFrameInd(215,get_pc_addr,get_frame);
 end;
+{$endif VER3_0}
 
 
 {$define FPC_SYSTEM_HAS_MUL_DWORD_TO_QWORD}

+ 7 - 0
rtl/inc/compproc.inc

@@ -602,8 +602,15 @@ function fpc_div_qword(n,z : qword) : qword; compilerproc;
 function fpc_mod_qword(n,z : qword) : qword; compilerproc;
 function fpc_div_int64(n,z : int64) : int64; compilerproc;
 function fpc_mod_int64(n,z : int64) : int64; compilerproc;
+{$ifdef VER3_0}
 function fpc_mul_qword(f1,f2 : qword;checkoverflow : longbool) : qword; compilerproc;
 function fpc_mul_int64(f1,f2 : int64;checkoverflow : longbool) : int64; compilerproc;
+{$else VER3_0}
+function fpc_mul_qword(f1,f2 : qword) : qword; compilerproc;
+function fpc_mul_qword_checkoverflow(f1,f2 : qword) : qword; compilerproc;
+function fpc_mul_int64(f1,f2 : int64) : int64; compilerproc;
+function fpc_mul_int64_checkoverflow(f1,f2 : int64) : int64; compilerproc;
+{$endif VER3_0}
 function fpc_mul_dword_to_qword(f1,f2 : dword) : qword; compilerproc;
 function fpc_mul_longint_to_int64(f1,f2 : longint) : int64; compilerproc;
 

+ 120 - 6
rtl/inc/int64.inc

@@ -1,3 +1,4 @@
+
 {
     This file is part of the Free Pascal run time library.
     Copyright (c) 1999-2000 by the Free Pascal development team
@@ -267,6 +268,7 @@
       end;
 {$endif FPC_SYSTEM_HAS_MOD_INT64}
 
+{$ifdef VER3_0}
 
 {$ifndef FPC_SYSTEM_HAS_MUL_QWORD}
     { multiplies two qwords
@@ -304,24 +306,77 @@
       end;
 {$endif FPC_SYSTEM_HAS_MUL_QWORD}
 
+{$else VER3_0}
+
+{$ifndef FPC_SYSTEM_HAS_MUL_QWORD}
+    function fpc_mul_qword(f1,f2 : qword) : qword;[public,alias: 'FPC_MUL_QWORD']; compilerproc;
+      var
+         bitpos : qword;
+         l : longint;
+      begin
+        result:=0;
+        bitpos:=1;
+
+        for l:=0 to 63 do
+          begin
+            if (f2 and bitpos)<>0 then
+              result:=result+f1;
+            f1:=f1 shl 1;
+            bitpos:=bitpos shl 1;
+          end;
+      end;
+
+
+    function fpc_mul_qword_checkoverflow(f1,f2 : qword) : qword;[public,alias: 'FPC_MUL_QWORD_CHECKOVERFLOW']; compilerproc;
+      var
+         _f1,bitpos : qword;
+         l : longint;
+         f1overflowed : boolean;
+      begin
+        result:=0;
+        bitpos:=1;
+        f1overflowed:=false;
+
+        for l:=0 to 63 do
+          begin
+            if (f2 and bitpos)<>0 then
+              begin
+                _f1:=result;
+                result:=result+f1;
+
+                { if one of the operands is greater than the result an
+                  overflow occurs                                      }
+                if (f1overflowed or ((_f1<>0) and (f1<>0) and
+                  ((_f1>result) or (f1>result)))) then
+                  HandleErrorAddrFrameInd(215,get_pc_addr,get_frame);
+              end;
+            { when bootstrapping, we forget about overflow checking for qword :) }
+            f1overflowed:=f1overflowed or ((f1 and (qword(1) shl 63))<>0);
+            f1:=f1 shl 1;
+            bitpos:=bitpos shl 1;
+          end;
+      end;
+{$endif FPC_SYSTEM_HAS_MUL_QWORD}
+
+{$endif VER3_0}
 
 {$ifndef FPC_SYSTEM_HAS_MUL_DWORD_TO_QWORD}
-    function fpc_mul_qword_compilerproc(f1,f2 : qword;checkoverflow : longbool) : qword; external name 'FPC_MUL_QWORD';
+    function fpc_mul_qword_compilerproc(f1,f2 : qword) : qword; external name 'FPC_MUL_QWORD';
 
     function fpc_mul_dword_to_qword(f1,f2 : dword) : qword;[public,alias: 'FPC_MUL_DWORD_TO_QWORD']; compilerproc;
       begin
-        fpc_mul_dword_to_qword:=fpc_mul_qword_compilerproc(f1,f2,false);
+        fpc_mul_dword_to_qword:=fpc_mul_qword_compilerproc(f1,f2);
       end;
 {$endif FPC_SYSTEM_HAS_MUL_DWORD_TO_QWORD}
 
 
+{$ifdef VER3_0}
+
 {$ifndef FPC_SYSTEM_HAS_MUL_INT64}
     function fpc_mul_int64(f1,f2 : int64;checkoverflow : longbool) : int64;[public,alias: 'FPC_MUL_INT64']; compilerproc;
-
       var
          sign : boolean;
          q1,q2,q3 : qword;
-
       begin
 {$ifdef EXCLUDE_COMPLEX_PROCS}
          runerror(219);
@@ -370,9 +425,68 @@
       end;
 {$endif FPC_SYSTEM_HAS_MUL_INT64}
 
+{$else VER3_0}
+
+{$ifndef FPC_SYSTEM_HAS_MUL_INT64}
+    function fpc_mul_int64(f1,f2 : int64) : int64;[public,alias: 'FPC_MUL_INT64']; compilerproc;
+      begin
+        { there's no difference between signed and unsigned multiplication,
+          when the destination size is equal to the source size and overflow
+          checking is off }
+        { qword(f1)*qword(f2) is coded as a call to mulqword }
+        result:=int64(qword(f1)*qword(f2));
+      end;
+
+
+    function fpc_mul_int64_checkoverflow(f1,f2 : int64) : int64;[public,alias: 'FPC_MUL_INT64_CHECKOVERFLOW']; compilerproc;
+{$ifdef EXCLUDE_COMPLEX_PROCS}
+      begin
+        runerror(217);
+      end;
+{$else EXCLUDE_COMPLEX_PROCS}
+      var
+         sign : boolean;
+         q1,q2,q3 : qword;
+      begin
+        sign:=false;
+        if f1<0 then
+          begin
+            sign:=not(sign);
+            q1:=qword(-f1);
+          end
+        else
+          q1:=f1;
+        if f2<0 then
+          begin
+            sign:=not(sign);
+            q2:=qword(-f2);
+          end
+        else
+          q2:=f2;
+        { the q1*q2 is coded as call to mulqword }
+        q3:=q1*q2;
+
+        if (q1 <> 0) and (q2 <>0) and
+          ((q1>q3) or (q2>q3) or
+            { the bit 63 can be only set if we have $80000000 00000000 }
+            { and sign is true                                         }
+            (q3 shr 63<>0) and
+             ((q3<>qword(qword(1) shl 63)) or not(sign))
+            ) then
+          HandleErrorAddrFrameInd(215,get_pc_addr,get_frame);
+
+        if sign then
+          result:=-q3
+        else
+          result:=q3;
+      end;
+{$endif EXCLUDE_COMPLEX_PROCS}
+{$endif FPC_SYSTEM_HAS_MUL_INT64}
+
+{$endif VER3_0}
 
 {$ifndef FPC_SYSTEM_HAS_MUL_LONGINT_TO_INT64}
-    function fpc_mul_int64_compilerproc(f1,f2 : int64;checkoverflow : longbool) : int64; external name 'FPC_MUL_INT64';
+    function fpc_mul_int64_compilerproc(f1,f2 : int64) : int64; external name 'FPC_MUL_INT64';
 
     function fpc_mul_longint_to_int64(f1,f2 : longint) : int64;[public,alias: 'FPC_MUL_LONGINT_TO_INT64']; compilerproc;
 {$ifdef EXCLUDE_COMPLEX_PROCS}
@@ -381,7 +495,7 @@
       end;
 {$else EXCLUDE_COMPLEX_PROCS}
       begin
-        fpc_mul_longint_to_int64:=fpc_mul_int64_compilerproc(f1,f2,false);
+        fpc_mul_longint_to_int64:=fpc_mul_int64_compilerproc(f1,f2);
       end;
 {$endif EXCLUDE_COMPLEX_PROCS}
 

+ 51 - 6
rtl/powerpc/int64p.inc

@@ -140,11 +140,9 @@
         mr   R4,R6
       end;
 
+{$ifndef VER3_0}	
 {$define FPC_SYSTEM_HAS_MUL_QWORD}
-    { multiplies two qwords
-      the longbool for checkoverflow avoids a misaligned stack
-    }
-    function fpc_mul_qword(f1,f2 : qword;checkoverflow : longbool) : qword;[public,alias: 'FPC_MUL_QWORD']; compilerproc;
+    function fpc_mul_qword(f1,f2 : qword) : qword;[public,alias: 'FPC_MUL_QWORD']; compilerproc;
       assembler; nostackframe;
       asm
         // (r3:r4) = (r3:r4) * (r5:r6),  checkoverflow is in r7
@@ -185,6 +183,54 @@
         add     r9,r9,r12
         cmplwi  cr7,r9,64   // is the sum now >= 64?
         cmplwi  cr1,r9,62   // or <= 62?
+
+      .LDone:
+        mullw   r4,r4,r6    // lsw of product of lsw's
+        mr      r3,r8       // get msw of product in correct register
+      end;
+	
+	
+    function fpc_mul_qword_checkoverflow(f1,f2 : qword) : qword;[public,alias: 'FPC_MUL_QWORD_CHECKOVERFLOW']; compilerproc;
+      assembler; nostackframe;
+      asm
+        // (r3:r4) = (r3:r4) * (r5:r6),  checkoverflow is in r7
+        //   res        f1        f2
+
+        or.     r10,r3,r5    // are both msw's 0?
+        mulhwu  r8,r4,r6    // msw of product of lsw's
+        xor     r0,r0,r0    // r0 := 0 for overflow checking
+        beq     .LDone      // if both msw's are zero, skip cross products
+        mullw   r9,r4,r5    // lsw of first cross-product
+        cntlzw  r11,r3      // count leading zeroes of msw1
+        cntlzw  r12,r5      // count leading zeroes of msw2
+        mullw   r7,r3,r6    // lsw of second cross-product
+        add     r12,r11,r12  // sum of leading zeroes
+        mr      r10,r8
+        or      r0,r12,r0    // maximise sum if no overflow checking, otherwise it remains
+        add     r8,r8,r9    // add
+        cmplwi  cr1,r0,64   // >= 64 leading zero bits in total? If so, no overflow
+        add     r8,r8,r7    // add
+        bge+    cr1,.LDone  // if the sum of leading zero's >= 64 (or checkoverflow was 0)
+                            // there's no overflow, otherwise more thorough check
+        add     r7,r7,r9
+        mulhwu  r3,r6,r3
+        addc    r7,r7,r10   // add the msw of the product of the lsw's, record carry
+        cntlzw  r9,r5
+        cntlzw  r10,r4      // get leading zeroes count of lsw f1
+        mulhwu  r5,r4,r5
+        addze   r3,r3
+        subfic  r0,r11,31   // if msw f1 = 0, then r0 := -1, else r0 >= 0
+        cntlzw  r7,r6
+        subfic  r11,r9,31   // same for f2
+        srawi   r0,r0,31    // if msw f1 = 0, then r0 := 1, else r0 := 0
+        srawi   r11,r11,31
+        and     r10,r10,r0    // if msw f1 <> 0, the leading zero count lsw f1 := 0
+        and     r9,r7,r11     // same for f2
+        or.     r5,r5,r3
+        add     r9,r9,r10    // add leading zero counts of lsw's to sum if appropriate
+        add     r9,r9,r12
+        cmplwi  cr7,r9,64   // is the sum now >= 64?
+        cmplwi  cr1,r9,62   // or <= 62?
         bge+    cr7,.LDone      // >= 64 leading zeroes -> no overflow
         ble+    cr1,.LOverflow  // <= 62 leading zeroes -> overflow
                             // for 63 zeroes, we need additional checks
@@ -198,5 +244,4 @@
         mullw   r4,r4,r6    // lsw of product of lsw's
         mr      r3,r8       // get msw of product in correct register
       end;
-
-
+{$endif VER3_0}