浏览代码

* split fpc_mul_<64 bit> into separate procedures with and without overflow checking

git-svn-id: trunk@35454 -
florian 8 年之前
父节点
当前提交
7213a13081
共有 7 个文件被更改,包括 345 次插入56 次删除
  1. 4 3
      compiler/nadd.pas
  2. 31 6
      rtl/arm/int64p.inc
  3. 26 10
      rtl/i386/int64p.inc
  4. 106 25
      rtl/i8086/int64p.inc
  5. 7 0
      rtl/inc/compproc.inc
  6. 120 6
      rtl/inc/int64.inc
  7. 51 6
      rtl/powerpc/int64p.inc

+ 4 - 3
compiler/nadd.pas

@@ -2944,9 +2944,7 @@ implementation
               end;
               end;
 
 
             { otherwise, create the parameters for the helper }
             { otherwise, create the parameters for the helper }
-            right := ccallparanode.create(
-              cordconstnode.create(ord(cs_check_overflow in current_settings.localswitches),pasbool8type,true),
-              ccallparanode.create(right,ccallparanode.create(left,nil)));
+            right :=   ccallparanode.create(right,ccallparanode.create(left,nil));
             left := nil;
             left := nil;
             { only qword needs the unsigned code, the
             { only qword needs the unsigned code, the
               signed code is also used for currency }
               signed code is also used for currency }
@@ -2954,6 +2952,9 @@ implementation
               procname := 'fpc_mul_int64'
               procname := 'fpc_mul_int64'
             else
             else
               procname := 'fpc_mul_qword';
               procname := 'fpc_mul_qword';
+            if cs_check_overflow in current_settings.localswitches then
+              procname := procname + '_checkoverflow';
+
             result := ccallnode.createintern(procname,right);
             result := ccallnode.createintern(procname,right);
             right := nil;
             right := nil;
           end;
           end;

+ 31 - 6
rtl/arm/int64p.inc

@@ -13,9 +13,38 @@
 
 
  **********************************************************************}
  **********************************************************************}
 
 
+{$ifndef VER3_0}
 {$if (not defined(CPUTHUMB)) and defined(CPUARM_HAS_UMULL)}
 {$if (not defined(CPUTHUMB)) and defined(CPUARM_HAS_UMULL)}
 {$define FPC_SYSTEM_HAS_MUL_QWORD}
 {$define FPC_SYSTEM_HAS_MUL_QWORD}
-function fpc_mul_qword(f1,f2 : qword;checkoverflow : longbool) : qword;assembler;nostackframe;[public,alias: 'FPC_MUL_QWORD']; compilerproc;
+function fpc_mul_qword(f1,f2 : qword) : qword;assembler;nostackframe;[public,alias: 'FPC_MUL_QWORD']; compilerproc;
+asm
+  stmfd sp!,{r4,r5,r6,r14}
+  mov r6,#0
+  // r4 = result lo, r5 = result hi
+{$ifdef ENDIAN_LITTLE}
+  // lo(f1)*lo(f2)
+  umull r4,r5,r0,r2
+  // lo(f1)*hi(f2)
+  umlal r5,r6,r0,r3
+  // hi(f1)*lo(f2)
+  umlal r5,r6,r1,r2
+  mov r0,r4
+  mov r1,r5
+{$else}
+  // lo(f1)*lo(f2)
+  umull r4,r5,r1,r3
+  // lo(f1)*hi(f2)
+  umlal r5,r6,r1,r2
+  // hi(f1)*lo(f2)
+  umlal r5,r6,r0,r3
+  mov r1,r4
+  mov r0,r5
+{$endif}
+  ldmfd sp!,{r4,r5,r6,r15}
+end;
+
+
+function fpc_mul_qword_checkoverflow(f1,f2 : qword) : qword;assembler;nostackframe;[public,alias: 'FPC_MUL_QWORD_CHECKOVERFLOW']; compilerproc;
 asm
 asm
   stmfd sp!,{r4,r5,r6,r14}
   stmfd sp!,{r4,r5,r6,r14}
   mov r6,#0
   mov r6,#0
@@ -51,10 +80,6 @@ asm
 {$endif}
 {$endif}
   // no overflow?
   // no overflow?
   beq .Lexit
   beq .Lexit
-  // checkoverflow?
-  ldr r2,[sp,#16]
-  cmp r2,#0
-  beq .Lexit
 
 
   mov r0,#215
   mov r0,#215
   mov r1,fp
   mov r1,fp
@@ -63,4 +88,4 @@ asm
   ldmfd sp!,{r4,r5,r6,r15}
   ldmfd sp!,{r4,r5,r6,r15}
 end;
 end;
 {$endif (not defined(CPUTHUMB)) and defined(CPUARM_HAS_UMULL)}
 {$endif (not defined(CPUTHUMB)) and defined(CPUARM_HAS_UMULL)}
-
+{$endif VER3_0}

+ 26 - 10
rtl/i386/int64p.inc

@@ -361,15 +361,10 @@
             movl saveedi,%edi
             movl saveedi,%edi
       end;
       end;
 
 
+{$ifndef VER3_0}
 {$define FPC_SYSTEM_HAS_MUL_QWORD}
 {$define FPC_SYSTEM_HAS_MUL_QWORD}
-    { multiplies two qwords
-      the longbool for checkoverflow avoids a misaligned stack
-    }
-    function fpc_mul_qword(f1,f2 : qword;checkoverflow : longbool) : qword;[public,alias: 'FPC_MUL_QWORD']; compilerproc;
-      var
-        overflowed : boolean;
+    function fpc_mul_qword(f1,f2 : qword) : qword;[public,alias: 'FPC_MUL_QWORD']; compilerproc;
       begin
       begin
-        overflowed:=false;
         { the following piece of code is taken from the
         { the following piece of code is taken from the
           AMD Athlon Processor x86 Code Optimization manual }
           AMD Athlon Processor x86 Code Optimization manual }
         asm
         asm
@@ -383,8 +378,6 @@
            mull %edx
            mull %edx
            jmp .Lqwordmulready
            jmp .Lqwordmulready
         .Lqwordmultwomul:
         .Lqwordmultwomul:
-           cmpl $0,checkoverflow
-           jnz  .Loverflowchecked
            imul f1+4,%edx
            imul f1+4,%edx
            imul %eax,%ecx
            imul %eax,%ecx
            addl %edx,%ecx
            addl %edx,%ecx
@@ -393,6 +386,29 @@
         .Lqwordmulready:
         .Lqwordmulready:
            movl %eax,__RESULT
            movl %eax,__RESULT
            movl %edx,__RESULT+4
            movl %edx,__RESULT+4
+        .Lend:
+        end [ 'eax','edx','ecx'];
+      end;
+
+	
+    function fpc_mul_qword_checkoverflow(f1,f2 : qword) : qword;[public,alias: 'FPC_MUL_QWORD_CHECKOVERFLOW']; compilerproc;
+      var
+        overflowed : boolean;
+      begin
+        overflowed:=false;
+        { the following piece of code is taken from the
+          AMD Athlon Processor x86 Code Optimization manual }
+        asm
+           movl f1+4,%edx
+           movl f2+4,%ecx
+           orl %ecx,%edx
+           movl f2,%edx
+           movl f1,%eax
+           jnz .Loverflowchecked
+           { if both upper dwords are =0 then it cannot overflow }
+           mull %edx
+           movl %eax,__RESULT
+           movl %edx,__RESULT+4
            jmp .Lend
            jmp .Lend
 
 
         .Loverflowchecked:
         .Loverflowchecked:
@@ -431,4 +447,4 @@
         if overflowed then
         if overflowed then
           HandleErrorFrame(215,get_frame);
           HandleErrorFrame(215,get_frame);
       end;
       end;
-
+{$endif VER3_0}

+ 106 - 25
rtl/i8086/int64p.inc

@@ -1,3 +1,4 @@
+
 {
 {
     This file is part of the Free Pascal run time library.
     This file is part of the Free Pascal run time library.
     Copyright (c) 2013 by the Free Pascal development team
     Copyright (c) 2013 by the Free Pascal development team
@@ -13,8 +14,9 @@
 
 
  **********************************************************************}
  **********************************************************************}
 
 
+{$ifndef VER3_0}
 {$define FPC_SYSTEM_HAS_MUL_QWORD}
 {$define FPC_SYSTEM_HAS_MUL_QWORD}
-function fpc_mul_qword( f1, f2: qword; checkoverflow: longbool ): qword; [public,alias: 'FPC_MUL_QWORD']; compilerproc;
+function fpc_mul_qword( f1, f2: qword): qword; [public,alias: 'FPC_MUL_QWORD']; compilerproc;
 begin
 begin
 { routine contributed by Max Nazhalov
 { routine contributed by Max Nazhalov
 
 
@@ -97,9 +99,6 @@ begin
     mov     word[result+6],dx
     mov     word[result+6],dx
     mov     si,word[f1+4]
     mov     si,word[f1+4]
     mov     ax,word[f1+6]
     mov     ax,word[f1+6]
-    mov     bx,word[checkoverflow]
-    or      bx,word[checkoverflow+2]
-    jnz     @@checked
     mov     di,word[f2]
     mov     di,word[f2]
     mul     di
     mul     di
     mov     cx,ax
     mov     cx,ax
@@ -124,74 +123,156 @@ begin
     adc     cx,dx
     adc     cx,dx
     add     word[result+4],bx
     add     word[result+4],bx
     adc     word[result+6],cx
     adc     word[result+6],cx
-    jmp     @@done
-@@checked:
+  end [ 'ax','bx','cx','dx','si','di' ];
+end;
+
+
+function fpc_mul_qword_checkoverflow( f1, f2: qword): qword; [public,alias: 'FPC_MUL_QWORD_CHECKOVERFLOW']; compilerproc;
+begin
+{ routine contributed by Max Nazhalov
+
+64-bit multiplication via 16-bit digits: (A3:A2:A1:A0)*(B3:B2:B1:B0)
+
+//////// STEP 1; break-down to 32-bit multiplications, each of them generates 64-bit result:
+  (A3:A2*B3:B2)<<64 + (A3:A2*B1:B0)<<32 + (A1:A0*B3:B2)<<32 + (A1:A0*B1:B0)
+
+(A1:A0*B1:B0) = (A1*B1)<<32 + (A1*B0)<<16 + (A0*B1)<<16 + (A0:B0)
+ -- never overflows, forms the base of the final result, name it as "R64"
+
+(A3:A2*B3:B2) is not required for the 64-bit result if overflow is not checked, since it is completely beyond the resulting width.
+ -- always overflows if "<>0", so can be checked as "((A2|A3)<>0)&&(B2|B3)<>0)"
+
+(A3:A2*B1:B0) and (A1:A0*B3:B2) are partially required for the final result
+ -- to be calculated on steps 2 and 3 as a correction for the "R64"
+
+//////// STEP 2; calculate "R64+=(A3:A2*B1:B0)<<32" (16-bit multiplications, each of them generates 32-bit result):
+  (A3*B1)<<32 + (A3*B0)<<16 + (A2*B1)<<16 + (A2*B0)
+
+((A3*B1)<<32)<<32 is not required for the 64-bit result if overflow is not checked, since it is completely beyond the resulting width.
+ -- always overflows if "<>0", so can be checked as "(A3<>0)&&(B1<>0)"
+
+((A3*B0)<<16)<<32: only low word of "A3*B0" contributes to the final result if overflow is not checked.
+ -- overflows if the hi_word "<>0"
+ -- overflows if R64+(lo_word<<48) produces C-flag
+
+((A2*B1)<<16)<<32: only low word of "A2*B1" contributes to the final result if overflow is not checked.
+ -- overflows if the hi_word "<>0"
+ -- overflows if R64+(lo_word<<48) produces C-flag
+
+(A2*B0)<<32: the whole dword is significand, name it as "X"
+ -- overflows if R64+(X<<32) produces C-flag
+
+//////// STEP 3; calculate "R64+=(A1:A0*B3:B2)<<32" (16-bit multiplications, each of them generates 32-bit result):
+  (A1*B3)<<32 + (A1*B2)<<16 + (A0*B3)<<16 + (A0*B2)
+
+((A1*B3)<<32)<<32 is not required for the 64-bit result if overflow is not checked, since it is completely beyond the resulting width.
+ -- always overflows if "<>0", so can be checked as "(A1<>0)&&(B3<>0)"
+
+((A1*B2)<<16)<<32: only low word of "A1*B2" contributes to the final result if overflow is not checked.
+ -- overflows if the hi_word "<>0"
+ -- overflows if R64+(lo_word<<48) produces C-flag
+
+((A0*B3)<<16)<<32: only low word "A0*B3" contributes to the final result if overflow is not checked.
+ -- overflows if the hi_word "<>0"
+ -- overflows if R64+(lo_word<<48) produces C-flag
+
+(A0*B2)<<32: the whole dword is significand, name it as "Y"
+ -- overflows if R64+(Y<<32) produces C-flag
+}
+  asm
+    mov     di,word[f1]
+    mov     bx,word[f1+2]
+    mov     si,word[f2]
+    mov     ax,word[f2+2]
+    push    bp
+    mov     cx,ax
+    mul     bx
+    xchg    ax,bx
+    mov     bp,dx
+    mul     si
+    xchg    ax,cx
+    add     bx,dx
+    adc     bp,0
+    mul     di
+    add     cx,ax
+    adc     bx,dx
+    adc     bp,0
+    mov     ax,di
+    mul     si
+    add     cx,dx
+    adc     bx,0
+    adc     bp,0
+    mov     dx,bp
+    pop     bp
+    mov     word[result],ax
+    mov     word[result+2],cx
+    mov     word[result+4],bx
+    mov     word[result+6],dx
+    mov     si,word[f1+4]
+    mov     ax,word[f1+6]
     mov     bx,word[f2+6]
     mov     bx,word[f2+6]
     mov     cx,ax
     mov     cx,ax
     or      cx,si
     or      cx,si
     jz      @@nover1
     jz      @@nover1
     mov     cx,word[f2+4]
     mov     cx,word[f2+4]
     or      cx,bx
     or      cx,bx
-    jnz     @@done
+    jnz     @@overflow
 @@nover1:
 @@nover1:
     test    bx,bx
     test    bx,bx
     jz      @@nover2
     jz      @@nover2
     mov     bx,word[f1+2]
     mov     bx,word[f1+2]
     test    bx,bx
     test    bx,bx
-    jnz     @@done
+    jnz     @@overflow
 @@nover2:
 @@nover2:
     test    ax,ax
     test    ax,ax
     jz      @@nover3
     jz      @@nover3
     or      bx,word[f2+2]
     or      bx,word[f2+2]
-    jnz     @@done
+    jnz     @@overflow
 @@nover3:
 @@nover3:
     mov     di,word[f2]
     mov     di,word[f2]
     mul     di
     mul     di
     test    dx,dx
     test    dx,dx
-    jnz     @@done
+    jnz     @@overflow
     mov     cx,ax
     mov     cx,ax
     mov     ax,word[f2+2]
     mov     ax,word[f2+2]
     mul     si
     mul     si
     test    dx,dx
     test    dx,dx
-    jnz     @@done
+    jnz     @@overflow
     add     cx,ax
     add     cx,ax
-    jc      @@done
+    jc      @@overflow
     mov     ax,di
     mov     ax,di
     mul     si
     mul     si
     mov     bx,ax
     mov     bx,ax
     add     cx,dx
     add     cx,dx
-    jc      @@done
+    jc      @@overflow
     mov     si,word[f2+4]
     mov     si,word[f2+4]
     mov     ax,word[f2+6]
     mov     ax,word[f2+6]
     mov     di,word[f1]
     mov     di,word[f1]
     mul     di
     mul     di
     test    dx,dx
     test    dx,dx
-    jnz     @@done
+    jnz     @@overflow
     add     cx,ax
     add     cx,ax
-    jc      @@done
+    jc      @@overflow
     mov     ax,word[f1+2]
     mov     ax,word[f1+2]
     mul     si
     mul     si
     test    dx,dx
     test    dx,dx
-    jnz     @@done
+    jnz     @@overflow
     add     cx,ax
     add     cx,ax
-    jc      @@done
+    jc      @@overflow
     mov     ax,di
     mov     ax,di
     mul     si
     mul     si
     add     bx,ax
     add     bx,ax
     adc     cx,dx
     adc     cx,dx
-    jc      @@done
+    jc      @@overflow
     add     word[result+4],bx
     add     word[result+4],bx
     adc     word[result+6],cx
     adc     word[result+6],cx
-    jc      @@done
-    // checked and succeed
-    xor     ax,ax
-    mov     word[checkoverflow],ax
-    mov     word[checkoverflow+2],ax
+    jnc     @@done
+@@overflow:
+	call	FPC_OVERFLOW
 @@done:
 @@done:
   end [ 'ax','bx','cx','dx','si','di' ];
   end [ 'ax','bx','cx','dx','si','di' ];
-  if checkoverflow then
-    HandleErrorAddrFrameInd(215,get_pc_addr,get_frame);
 end;
 end;
+{$endif VER3_0}
 
 
 
 
 {$define FPC_SYSTEM_HAS_MUL_DWORD_TO_QWORD}
 {$define FPC_SYSTEM_HAS_MUL_DWORD_TO_QWORD}

+ 7 - 0
rtl/inc/compproc.inc

@@ -602,8 +602,15 @@ function fpc_div_qword(n,z : qword) : qword; compilerproc;
 function fpc_mod_qword(n,z : qword) : qword; compilerproc;
 function fpc_mod_qword(n,z : qword) : qword; compilerproc;
 function fpc_div_int64(n,z : int64) : int64; compilerproc;
 function fpc_div_int64(n,z : int64) : int64; compilerproc;
 function fpc_mod_int64(n,z : int64) : int64; compilerproc;
 function fpc_mod_int64(n,z : int64) : int64; compilerproc;
+{$ifdef VER3_0}
 function fpc_mul_qword(f1,f2 : qword;checkoverflow : longbool) : qword; compilerproc;
 function fpc_mul_qword(f1,f2 : qword;checkoverflow : longbool) : qword; compilerproc;
 function fpc_mul_int64(f1,f2 : int64;checkoverflow : longbool) : int64; compilerproc;
 function fpc_mul_int64(f1,f2 : int64;checkoverflow : longbool) : int64; compilerproc;
+{$else VER3_0}
+function fpc_mul_qword(f1,f2 : qword) : qword; compilerproc;
+function fpc_mul_qword_checkoverflow(f1,f2 : qword) : qword; compilerproc;
+function fpc_mul_int64(f1,f2 : int64) : int64; compilerproc;
+function fpc_mul_int64_checkoverflow(f1,f2 : int64) : int64; compilerproc;
+{$endif VER3_0}
 function fpc_mul_dword_to_qword(f1,f2 : dword) : qword; compilerproc;
 function fpc_mul_dword_to_qword(f1,f2 : dword) : qword; compilerproc;
 function fpc_mul_longint_to_int64(f1,f2 : longint) : int64; compilerproc;
 function fpc_mul_longint_to_int64(f1,f2 : longint) : int64; compilerproc;
 
 

+ 120 - 6
rtl/inc/int64.inc

@@ -1,3 +1,4 @@
+
 {
 {
     This file is part of the Free Pascal run time library.
     This file is part of the Free Pascal run time library.
     Copyright (c) 1999-2000 by the Free Pascal development team
     Copyright (c) 1999-2000 by the Free Pascal development team
@@ -267,6 +268,7 @@
       end;
       end;
 {$endif FPC_SYSTEM_HAS_MOD_INT64}
 {$endif FPC_SYSTEM_HAS_MOD_INT64}
 
 
+{$ifdef VER3_0}
 
 
 {$ifndef FPC_SYSTEM_HAS_MUL_QWORD}
 {$ifndef FPC_SYSTEM_HAS_MUL_QWORD}
     { multiplies two qwords
     { multiplies two qwords
@@ -304,24 +306,77 @@
       end;
       end;
 {$endif FPC_SYSTEM_HAS_MUL_QWORD}
 {$endif FPC_SYSTEM_HAS_MUL_QWORD}
 
 
+{$else VER3_0}
+
+{$ifndef FPC_SYSTEM_HAS_MUL_QWORD}
+    function fpc_mul_qword(f1,f2 : qword) : qword;[public,alias: 'FPC_MUL_QWORD']; compilerproc;
+      var
+         bitpos : qword;
+         l : longint;
+      begin
+        result:=0;
+        bitpos:=1;
+
+        for l:=0 to 63 do
+          begin
+            if (f2 and bitpos)<>0 then
+              result:=result+f1;
+            f1:=f1 shl 1;
+            bitpos:=bitpos shl 1;
+          end;
+      end;
+
+
+    function fpc_mul_qword_checkoverflow(f1,f2 : qword) : qword;[public,alias: 'FPC_MUL_QWORD_CHECKOVERFLOW']; compilerproc;
+      var
+         _f1,bitpos : qword;
+         l : longint;
+         f1overflowed : boolean;
+      begin
+        result:=0;
+        bitpos:=1;
+        f1overflowed:=false;
+
+        for l:=0 to 63 do
+          begin
+            if (f2 and bitpos)<>0 then
+              begin
+                _f1:=result;
+                result:=result+f1;
+
+                { if one of the operands is greater than the result an
+                  overflow occurs                                      }
+                if (f1overflowed or ((_f1<>0) and (f1<>0) and
+                  ((_f1>result) or (f1>result)))) then
+                  HandleErrorAddrFrameInd(215,get_pc_addr,get_frame);
+              end;
+            { when bootstrapping, we forget about overflow checking for qword :) }
+            f1overflowed:=f1overflowed or ((f1 and (qword(1) shl 63))<>0);
+            f1:=f1 shl 1;
+            bitpos:=bitpos shl 1;
+          end;
+      end;
+{$endif FPC_SYSTEM_HAS_MUL_QWORD}
+
+{$endif VER3_0}
 
 
 {$ifndef FPC_SYSTEM_HAS_MUL_DWORD_TO_QWORD}
 {$ifndef FPC_SYSTEM_HAS_MUL_DWORD_TO_QWORD}
-    function fpc_mul_qword_compilerproc(f1,f2 : qword;checkoverflow : longbool) : qword; external name 'FPC_MUL_QWORD';
+    function fpc_mul_qword_compilerproc(f1,f2 : qword) : qword; external name 'FPC_MUL_QWORD';
 
 
     function fpc_mul_dword_to_qword(f1,f2 : dword) : qword;[public,alias: 'FPC_MUL_DWORD_TO_QWORD']; compilerproc;
     function fpc_mul_dword_to_qword(f1,f2 : dword) : qword;[public,alias: 'FPC_MUL_DWORD_TO_QWORD']; compilerproc;
       begin
       begin
-        fpc_mul_dword_to_qword:=fpc_mul_qword_compilerproc(f1,f2,false);
+        fpc_mul_dword_to_qword:=fpc_mul_qword_compilerproc(f1,f2);
       end;
       end;
 {$endif FPC_SYSTEM_HAS_MUL_DWORD_TO_QWORD}
 {$endif FPC_SYSTEM_HAS_MUL_DWORD_TO_QWORD}
 
 
 
 
+{$ifdef VER3_0}
+
 {$ifndef FPC_SYSTEM_HAS_MUL_INT64}
 {$ifndef FPC_SYSTEM_HAS_MUL_INT64}
     function fpc_mul_int64(f1,f2 : int64;checkoverflow : longbool) : int64;[public,alias: 'FPC_MUL_INT64']; compilerproc;
     function fpc_mul_int64(f1,f2 : int64;checkoverflow : longbool) : int64;[public,alias: 'FPC_MUL_INT64']; compilerproc;
-
       var
       var
          sign : boolean;
          sign : boolean;
          q1,q2,q3 : qword;
          q1,q2,q3 : qword;
-
       begin
       begin
 {$ifdef EXCLUDE_COMPLEX_PROCS}
 {$ifdef EXCLUDE_COMPLEX_PROCS}
          runerror(219);
          runerror(219);
@@ -370,9 +425,68 @@
       end;
       end;
 {$endif FPC_SYSTEM_HAS_MUL_INT64}
 {$endif FPC_SYSTEM_HAS_MUL_INT64}
 
 
+{$else VER3_0}
+
+{$ifndef FPC_SYSTEM_HAS_MUL_INT64}
+    function fpc_mul_int64(f1,f2 : int64) : int64;[public,alias: 'FPC_MUL_INT64']; compilerproc;
+      begin
+        { there's no difference between signed and unsigned multiplication,
+          when the destination size is equal to the source size and overflow
+          checking is off }
+        { qword(f1)*qword(f2) is coded as a call to mulqword }
+        result:=int64(qword(f1)*qword(f2));
+      end;
+
+
+    function fpc_mul_int64_checkoverflow(f1,f2 : int64) : int64;[public,alias: 'FPC_MUL_INT64_CHECKOVERFLOW']; compilerproc;
+{$ifdef EXCLUDE_COMPLEX_PROCS}
+      begin
+        runerror(217);
+      end;
+{$else EXCLUDE_COMPLEX_PROCS}
+      var
+         sign : boolean;
+         q1,q2,q3 : qword;
+      begin
+        sign:=false;
+        if f1<0 then
+          begin
+            sign:=not(sign);
+            q1:=qword(-f1);
+          end
+        else
+          q1:=f1;
+        if f2<0 then
+          begin
+            sign:=not(sign);
+            q2:=qword(-f2);
+          end
+        else
+          q2:=f2;
+        { the q1*q2 is coded as call to mulqword }
+        q3:=q1*q2;
+
+        if (q1 <> 0) and (q2 <>0) and
+          ((q1>q3) or (q2>q3) or
+            { the bit 63 can be only set if we have $80000000 00000000 }
+            { and sign is true                                         }
+            (q3 shr 63<>0) and
+             ((q3<>qword(qword(1) shl 63)) or not(sign))
+            ) then
+          HandleErrorAddrFrameInd(215,get_pc_addr,get_frame);
+
+        if sign then
+          result:=-q3
+        else
+          result:=q3;
+      end;
+{$endif EXCLUDE_COMPLEX_PROCS}
+{$endif FPC_SYSTEM_HAS_MUL_INT64}
+
+{$endif VER3_0}
 
 
 {$ifndef FPC_SYSTEM_HAS_MUL_LONGINT_TO_INT64}
 {$ifndef FPC_SYSTEM_HAS_MUL_LONGINT_TO_INT64}
-    function fpc_mul_int64_compilerproc(f1,f2 : int64;checkoverflow : longbool) : int64; external name 'FPC_MUL_INT64';
+    function fpc_mul_int64_compilerproc(f1,f2 : int64) : int64; external name 'FPC_MUL_INT64';
 
 
     function fpc_mul_longint_to_int64(f1,f2 : longint) : int64;[public,alias: 'FPC_MUL_LONGINT_TO_INT64']; compilerproc;
     function fpc_mul_longint_to_int64(f1,f2 : longint) : int64;[public,alias: 'FPC_MUL_LONGINT_TO_INT64']; compilerproc;
 {$ifdef EXCLUDE_COMPLEX_PROCS}
 {$ifdef EXCLUDE_COMPLEX_PROCS}
@@ -381,7 +495,7 @@
       end;
       end;
 {$else EXCLUDE_COMPLEX_PROCS}
 {$else EXCLUDE_COMPLEX_PROCS}
       begin
       begin
-        fpc_mul_longint_to_int64:=fpc_mul_int64_compilerproc(f1,f2,false);
+        fpc_mul_longint_to_int64:=fpc_mul_int64_compilerproc(f1,f2);
       end;
       end;
 {$endif EXCLUDE_COMPLEX_PROCS}
 {$endif EXCLUDE_COMPLEX_PROCS}
 
 

+ 51 - 6
rtl/powerpc/int64p.inc

@@ -140,11 +140,9 @@
         mr   R4,R6
         mr   R4,R6
       end;
       end;
 
 
+{$ifndef VER3_0}	
 {$define FPC_SYSTEM_HAS_MUL_QWORD}
 {$define FPC_SYSTEM_HAS_MUL_QWORD}
-    { multiplies two qwords
-      the longbool for checkoverflow avoids a misaligned stack
-    }
-    function fpc_mul_qword(f1,f2 : qword;checkoverflow : longbool) : qword;[public,alias: 'FPC_MUL_QWORD']; compilerproc;
+    function fpc_mul_qword(f1,f2 : qword) : qword;[public,alias: 'FPC_MUL_QWORD']; compilerproc;
       assembler; nostackframe;
       assembler; nostackframe;
       asm
       asm
         // (r3:r4) = (r3:r4) * (r5:r6),  checkoverflow is in r7
         // (r3:r4) = (r3:r4) * (r5:r6),  checkoverflow is in r7
@@ -185,6 +183,54 @@
         add     r9,r9,r12
         add     r9,r9,r12
         cmplwi  cr7,r9,64   // is the sum now >= 64?
         cmplwi  cr7,r9,64   // is the sum now >= 64?
         cmplwi  cr1,r9,62   // or <= 62?
         cmplwi  cr1,r9,62   // or <= 62?
+
+      .LDone:
+        mullw   r4,r4,r6    // lsw of product of lsw's
+        mr      r3,r8       // get msw of product in correct register
+      end;
+	
+	
+    function fpc_mul_qword_checkoverflow(f1,f2 : qword) : qword;[public,alias: 'FPC_MUL_QWORD_CHECKOVERFLOW']; compilerproc;
+      assembler; nostackframe;
+      asm
+        // (r3:r4) = (r3:r4) * (r5:r6),  checkoverflow is in r7
+        //   res        f1        f2
+
+        or.     r10,r3,r5    // are both msw's 0?
+        mulhwu  r8,r4,r6    // msw of product of lsw's
+        xor     r0,r0,r0    // r0 := 0 for overflow checking
+        beq     .LDone      // if both msw's are zero, skip cross products
+        mullw   r9,r4,r5    // lsw of first cross-product
+        cntlzw  r11,r3      // count leading zeroes of msw1
+        cntlzw  r12,r5      // count leading zeroes of msw2
+        mullw   r7,r3,r6    // lsw of second cross-product
+        add     r12,r11,r12  // sum of leading zeroes
+        mr      r10,r8
+        or      r0,r12,r0    // maximise sum if no overflow checking, otherwise it remains
+        add     r8,r8,r9    // add
+        cmplwi  cr1,r0,64   // >= 64 leading zero bits in total? If so, no overflow
+        add     r8,r8,r7    // add
+        bge+    cr1,.LDone  // if the sum of leading zero's >= 64 (or checkoverflow was 0)
+                            // there's no overflow, otherwise more thorough check
+        add     r7,r7,r9
+        mulhwu  r3,r6,r3
+        addc    r7,r7,r10   // add the msw of the product of the lsw's, record carry
+        cntlzw  r9,r5
+        cntlzw  r10,r4      // get leading zeroes count of lsw f1
+        mulhwu  r5,r4,r5
+        addze   r3,r3
+        subfic  r0,r11,31   // if msw f1 = 0, then r0 := -1, else r0 >= 0
+        cntlzw  r7,r6
+        subfic  r11,r9,31   // same for f2
+        srawi   r0,r0,31    // if msw f1 = 0, then r0 := 1, else r0 := 0
+        srawi   r11,r11,31
+        and     r10,r10,r0    // if msw f1 <> 0, the leading zero count lsw f1 := 0
+        and     r9,r7,r11     // same for f2
+        or.     r5,r5,r3
+        add     r9,r9,r10    // add leading zero counts of lsw's to sum if appropriate
+        add     r9,r9,r12
+        cmplwi  cr7,r9,64   // is the sum now >= 64?
+        cmplwi  cr1,r9,62   // or <= 62?
         bge+    cr7,.LDone      // >= 64 leading zeroes -> no overflow
         bge+    cr7,.LDone      // >= 64 leading zeroes -> no overflow
         ble+    cr1,.LOverflow  // <= 62 leading zeroes -> overflow
         ble+    cr1,.LOverflow  // <= 62 leading zeroes -> overflow
                             // for 63 zeroes, we need additional checks
                             // for 63 zeroes, we need additional checks
@@ -198,5 +244,4 @@
         mullw   r4,r4,r6    // lsw of product of lsw's
         mullw   r4,r4,r6    // lsw of product of lsw's
         mr      r3,r8       // get msw of product in correct register
         mr      r3,r8       // get msw of product in correct register
       end;
       end;
-
-
+{$endif VER3_0}