ソースを参照

* made multiplying, dividing and mod'ing of int64 and qword processor
independent with compilerprocs (+ small optimizations by using shift/and
where possible)

Jonas Maebe 24 年 前
コミット
a39cd8a580
6 ファイル変更181 行追加124 行削除
  1. 8 49
      compiler/i386/n386add.pas
  2. 8 45
      compiler/i386/n386mat.pas
  3. 62 2
      compiler/nadd.pas
  4. 61 3
      compiler/nmat.pas
  5. 13 1
      rtl/inc/compproc.inc
  6. 29 24
      rtl/inc/int64.inc

+ 8 - 49
compiler/i386/n386add.pas

@@ -1235,54 +1235,8 @@ interface
 
                    if nodetype=muln then
                      begin
-                        regstopush := $ff;
-                        remove_non_regvars_from_loc(location,regstopush);
-                        remove_non_regvars_from_loc(right.location,regstopush);
-                        
-                        { ugly hack because in *this* case, the pushed register }
-                        { must not be allocated later on (JM)                   }
-                        unusedregisters:=unused;
-                        usablecount:=usablereg32;
-                        pushusedregisters(pushedreg,regstopush);
-                        unused:=unusedregisters;
-                        usablereg32:=usablecount;
-                        
-                        if cs_check_overflow in aktlocalswitches then
-                          push_int(1)
-                        else
-                          push_int(0);
-                        { the left operand is in hloc, because the
-                          location of left is location but location
-                          is already destroyed
-                          
-                          not anymore... I had to change this because the
-                          regalloc info was completely wrong otherwise (JM)
-                        }
-                        emit_pushq_loc(location);
-                        release_qword_loc(location);
-                        clear_location(location);
-                        emit_pushq_loc(right.location);
-                        release_qword_loc(right.location);
-                        saveregvars($ff);
-                        if torddef(resulttype.def).typ=u64bit then
-                          emitcall('FPC_MUL_QWORD')
-                        else
-                          emitcall('FPC_MUL_INT64');
-                        { make sure we don't overwrite any results (JM) }
-                        if R_EDX in unused then
-                          begin
-                             location.registerhigh:=getexplicitregister32(R_EDX);
-                             location.registerlow:=getexplicitregister32(R_EAX);
-                          end
-                        else
-                          begin
-                             location.registerlow:=getexplicitregister32(R_EAX);
-                             location.registerhigh:=getexplicitregister32(R_EDX);
-                          end;
-                        location.loc := LOC_REGISTER;
-                        emit_reg_reg(A_MOV,S_L,R_EAX,location.registerlow);
-                        emit_reg_reg(A_MOV,S_L,R_EDX,location.registerhigh);
-                        popusedregisters(pushedreg);
+                       { should be handled in pass_1 (JM) }
+                       internalerror(200109051);
                      end
                    else
                      begin
@@ -1890,7 +1844,12 @@ begin
 end.
 {
   $Log$
-  Revision 1.22  2001-09-04 11:38:55  jonas
+  Revision 1.23  2001-09-05 15:22:09  jonas
+    * made multiplying, dividing and mod'ing of int64 and qword processor
+      independent with compilerprocs (+ small optimizations by using shift/and
+      where possible)
+
+  Revision 1.22  2001/09/04 11:38:55  jonas
     + searchsystype() and searchsystype() functions in symtable
     * changed ninl and nadd to use these functions
     * i386 set comparison functions now return their results in al instead

+ 8 - 45
compiler/i386/n386mat.pas

@@ -88,50 +88,8 @@ implementation
 
          if is_64bitint(resulttype.def) then
            begin
-              regstopush := $ff;
-              remove_non_regvars_from_loc(location,regstopush);
-              remove_non_regvars_from_loc(right.location,regstopush);
-              
-              { ugly hack because in *this* case, the pushed register }
-              { must not be allocated later on (JM)                   }
-              unusedregisters:=unused;
-              usablecount:=usablereg32;
-              pushusedregisters(pushedreg,regstopush);
-              unused:=unusedregisters;
-              usablereg32:=usablecount;
-              
-              emit_pushq_loc(location);
-              release_qword_loc(location);
-              clear_location(location);
-              emit_pushq_loc(right.location);
-              release_qword_loc(right.location);
-              if torddef(resulttype.def).typ=u64bit then
-                typename:='QWORD'
-              else
-                typename:='INT64';
-              if nodetype=divn then
-                opname:='DIV_'
-              else
-                opname:='MOD_';
-              saveregvars($ff);
-              emitcall('FPC_'+opname+typename);
-
-              { make sure we don't overwrite any results (JM) }
-              if R_EDX in unused then
-                begin
-                   location.registerhigh:=getexplicitregister32(R_EDX);
-                   location.registerlow:=getexplicitregister32(R_EAX);
-                end
-              else
-                begin
-                   location.registerlow:=getexplicitregister32(R_EAX);
-                   location.registerhigh:=getexplicitregister32(R_EDX);
-                end;
-              location.loc:=LOC_REGISTER;
-              emit_reg_reg(A_MOV,S_L,R_EAX,location.registerlow);
-              emit_reg_reg(A_MOV,S_L,R_EDX,location.registerhigh);
-
-              popusedregisters(pushedreg);
+             { should be handled in pass_1 (JM) }
+             internalerror(200109052);
            end
          else
            begin
@@ -1058,7 +1016,12 @@ begin
 end.
 {
   $Log$
-  Revision 1.15  2001-08-29 12:03:23  jonas
+  Revision 1.16  2001-09-05 15:22:10  jonas
+    * made multiplying, dividing and mod'ing of int64 and qword processor
+      independent with compilerprocs (+ small optimizations by using shift/and
+      where possible)
+
+  Revision 1.15  2001/08/29 12:03:23  jonas
     * fixed wrong regalloc info around FPC_MUL/DIV/MOD_INT64/QWORD calls
     * fixed partial result overwriting with the above calls too
 

+ 62 - 2
compiler/nadd.pas

@@ -39,6 +39,9 @@ interface
           { parts explicitely in the code generator (JM)    }
           function first_addstring: tnode; virtual;
           function first_addset: tnode; virtual;
+          { only implements "muln" nodes, the rest always has to be done in }
+          { the code generator for performance reasons (JM)                 }
+          function first_add64bitint: tnode; virtual;
        end;
        taddnodeclass = class of taddnode;
 
@@ -1265,6 +1268,53 @@ implementation
       end;
 
 
+    function taddnode.first_add64bitint: tnode;
+      var
+        procname: string[31];
+        temp: tnode;
+        power: longint;
+      begin
+        result := nil;
+        { create helper calls mul }
+        if nodetype <> muln then
+          exit;
+        
+        { make sure that if there is a constant, that it's on the right }
+        if left.nodetype = ordconstn then
+          begin
+            temp := right;
+            right := left;
+            left := temp;
+          end;
+
+        { can we use a shift instead of a mul? }
+        if (right.nodetype = ordconstn) and
+           ispowerof2(tordconstnode(right).value,power) then
+          begin
+            tordconstnode(right).value := power;
+            result := cshlshrnode.create(shln,left,right);
+            { left and right are reused }
+            left := nil;
+            right := nil;
+            { return firstpassed new node }
+            firstpass(result);
+            exit;
+          end;
+
+        { otherwise, create the parameters for the helper }
+        right := ccallparanode.create(
+          cordconstnode.create(ord(cs_check_overflow in aktlocalswitches),booltype),
+          ccallparanode.create(right,ccallparanode.create(left,nil)));
+        left := nil;
+        if torddef(resulttype.def).typ = s64bit then
+          procname := 'fpc_mul_int64'
+        else
+          procname := 'fpc_mul_qword';
+        result := ccallnode.createintern(procname,right);
+        right := nil;
+        firstpass(result);
+      end;
+
     function taddnode.pass_1 : tnode;
       var
          hp      : tnode;
@@ -1329,7 +1379,12 @@ implementation
                end
               { is there a 64 bit type ? }
              else if (torddef(ld).typ in [s64bit,u64bit]) then
-               calcregisters(self,2,0,0)
+               begin
+                 result := first_add64bitint;
+                 if assigned(result) then
+                   exit;
+                 calcregisters(self,2,0,0)
+               end
              { is there a cardinal? }
              else if (torddef(ld).typ=u32bit) then
                begin
@@ -1527,7 +1582,12 @@ begin
 end.
 {
   $Log$
-  Revision 1.38  2001-09-04 11:38:54  jonas
+  Revision 1.39  2001-09-05 15:22:09  jonas
+    * made multiplying, dividing and mod'ing of int64 and qword processor
+      independent with compilerprocs (+ small optimizations by using shift/and
+      where possible)
+
+  Revision 1.38  2001/09/04 11:38:54  jonas
     + searchsystype() and searchsystype() functions in symtable
     * changed ninl and nadd to use these functions
     * i386 set comparison functions now return their results in al instead

+ 61 - 3
compiler/nmat.pas

@@ -33,6 +33,10 @@ interface
        tmoddivnode = class(tbinopnode)
           function pass_1 : tnode;override;
           function det_resulttype:tnode;override;
+         protected
+          { override the following if you want to implement }
+          { parts explicitely in the code generator (JM)    }
+          function first_moddiv64bitint: tnode; virtual;
        end;
        tmoddivnodeclass = class of tmoddivnode;
 
@@ -67,14 +71,14 @@ implementation
 
     uses
       systems,tokens,
-      verbose,globals,
+      verbose,globals,cutils,
 {$ifdef support_mmx}
       globtype,
 {$endif}
       symconst,symtype,symtable,symdef,types,
       htypechk,pass_1,cpubase,cpuinfo,
       cgbase,
-      ncon,ncnv,ncal;
+      ncon,ncnv,ncal,nadd;
 
 {****************************************************************************
                               TMODDIVNODE
@@ -188,6 +192,52 @@ implementation
       end;
 
 
+    function tmoddivnode.first_moddiv64bitint: tnode;
+      var
+        procname: string[31];
+        power: longint;
+      begin
+        result := nil;
+        
+        { divide/mod an unsigned number by a constant which is a power of 2? }
+        if (right.nodetype = ordconstn) and
+           not is_signed(resulttype.def) and
+           ispowerof2(tordconstnode(right).value,power) then
+          begin
+            if nodetype = divn then
+              begin
+                tordconstnode(right).value := power;
+                result := cshlshrnode.create(shrn,left,right)
+              end
+            else
+              begin
+                dec(tordconstnode(right).value);
+                result := caddnode.create(andn,left,right);
+              end;
+            { left and right are reused }
+            left := nil;
+            right := nil;
+            firstpass(result);
+            exit;
+          end;
+          
+        { otherwise create a call to a helper }
+        if nodetype = divn then
+          procname := 'fpc_div_'
+        else
+          procname := 'fpc_mod_';
+        if is_signed(resulttype.def) then
+          procname := procname + 'int64'
+        else
+          procname := procname + 'qword';
+
+        result := ccallnode.createintern(procname,ccallparanode.create(left,
+          ccallparanode.create(right,nil)));
+        left := nil;
+        right := nil;
+        firstpass(result);
+      end;
+
     function tmoddivnode.pass_1 : tnode;
       begin
          result:=nil;
@@ -200,6 +250,9 @@ implementation
          if (left.resulttype.def.deftype=orddef) and (right.resulttype.def.deftype=orddef) and
             (is_64bitint(left.resulttype.def) or is_64bitint(right.resulttype.def)) then
            begin
+             result := first_moddiv64bitint;
+             if assigned(result) then
+               exit;
              calcregisters(self,2,0,0);
            end
          else
@@ -587,7 +640,12 @@ begin
 end.
 {
   $Log$
-  Revision 1.22  2001-09-02 21:12:07  peter
+  Revision 1.23  2001-09-05 15:22:09  jonas
+    * made multiplying, dividing and mod'ing of int64 and qword processor
+      independent with compilerprocs (+ small optimizations by using shift/and
+      where possible)
+
+  Revision 1.22  2001/09/02 21:12:07  peter
     * move class of definitions into type section for delphi
 
   Revision 1.21  2001/08/26 13:36:41  florian

+ 13 - 1
rtl/inc/compproc.inc

@@ -157,6 +157,13 @@ Function fpc_val_qword_shortstr(Const S: ShortString; var Code: ValSInt): QWord;
 procedure fpc_widestr_qword(v : qword;len : longint;var s : widestring); compilerproc;
 procedure fpc_widestr_int64(v : int64;len : longint;var s : widestring); compilerproc;
 
+function fpc_div_qword(n,z : qword) : qword; compilerproc;
+function fpc_mod_qword(n,z : qword) : qword; compilerproc;
+function fpc_div_int64(n,z : int64) : int64; compilerproc;
+function fpc_mod_int64(n,z : int64) : int64; compilerproc;
+function fpc_mul_qword(f1,f2 : qword;checkoverflow : longbool) : qword; compilerproc;
+function fpc_mul_int64(f1,f2 : int64;checkoverflow : longbool) : int64; compilerproc;
+
 function fpc_do_is(aclass : tclass;aobject : tobject) : boolean; compilerproc;
 procedure fpc_do_as(aclass : tclass;aobject : tobject); compilerproc;
 procedure fpc_intf_decr_ref(var i: pointer); compilerproc;
@@ -240,7 +247,12 @@ Procedure fpc_typed_read(TypeSize : Longint;var f : TypedFile;var Buf); compiler
 
 {
   $Log$
-  Revision 1.9  2001-09-04 11:38:55  jonas
+  Revision 1.10  2001-09-05 15:22:09  jonas
+    * made multiplying, dividing and mod'ing of int64 and qword processor
+      independent with compilerprocs (+ small optimizations by using shift/and
+      where possible)
+
+  Revision 1.9  2001/09/04 11:38:55  jonas
     + searchsystype() and searchsystype() functions in symtable
     * changed ninl and nadd to use these functions
     * i386 set comparison functions now return their results in al instead

+ 29 - 24
rtl/inc/int64.inc

@@ -50,14 +50,14 @@
          count_leading_zeros:=r;
       end;
 
-    function divqword(n,z : qword) : qword;[public,alias: 'FPC_DIV_QWORD'];
+    function fpc_div_qword(n,z : qword) : qword;[public,alias: 'FPC_DIV_QWORD']; {$ifdef hascompilerproc} compilerproc; {$endif}
 
       var
          shift,lzz,lzn : longint;
          { one : qword; }
 
       begin
-         divqword:=0;
+         fpc_div_qword:=0;
          if n=0 then
            HandleErrorFrame(200,get_frame);
          lzz:=count_leading_zeros(z);
@@ -73,20 +73,20 @@
            if z>=n then
              begin
                 z:=z-n;
-                divqword:=divqword+(qword(1) shl shift);
+                fpc_div_qword:=fpc_div_qword+(qword(1) shl shift);
              end;
            dec(shift);
            n:=n shr 1;
          until shift<0;
       end;
 
-    function modqword(n,z : qword) : qword;[public,alias: 'FPC_MOD_QWORD'];
+    function fpc_mod_qword(n,z : qword) : qword;[public,alias: 'FPC_MOD_QWORD']; {$ifdef hascompilerproc} compilerproc; {$endif}
 
       var
          shift,lzz,lzn : longint;
 
       begin
-         modqword:=0;
+         fpc_mod_qword:=0;
          if n=0 then
            HandleErrorFrame(200,get_frame);
          lzz:=count_leading_zeros(z);
@@ -96,7 +96,7 @@
          { the d is greater than the n            }
          if lzn<lzz then
            begin
-              modqword:=z;
+              fpc_mod_qword:=z;
               exit;
            end;
          shift:=lzn-lzz;
@@ -107,10 +107,10 @@
            dec(shift);
            n:=n shr 1;
          until shift<0;
-         modqword:=z;
+         fpc_mod_qword:=z;
       end;
 
-    function divint64(n,z : int64) : int64;[public,alias: 'FPC_DIV_INT64'];
+    function fpc_div_int64(n,z : int64) : int64;[public,alias: 'FPC_DIV_INT64']; {$ifdef hascompilerproc} compilerproc; {$endif}
 
       var
          sign : boolean;
@@ -128,7 +128,7 @@
            begin
               // the c:=comp(...) is necessary to shut up the compiler
               c:=comp(comp(z)/comp(n));
-              divint64:=qword(c);
+              fpc_div_int64:=qword(c);
            end
          else
 {$endif}
@@ -151,13 +151,13 @@
 
               { the div is coded by the compiler as call to divqword }
               if sign then
-                divint64:=-(q1 div q2)
+                fpc_div_int64:=-(q1 div q2)
               else
-                divint64:=q1 div q2;
+                fpc_div_int64:=q1 div q2;
            end;
       end;
 
-    function modint64(n,z : int64) : int64;[public,alias: 'FPC_MOD_INT64'];
+    function fpc_mod_int64(n,z : int64) : int64;[public,alias: 'FPC_MOD_INT64']; {$ifdef hascompilerproc} compilerproc; {$endif}
 
       var
          signed : boolean;
@@ -185,15 +185,15 @@
            zq:=z;
          r:=zq mod nq;
          if signed then
-           modint64:=-int64(r)
+           fpc_mod_int64:=-int64(r)
          else
-           modint64:=r;
+           fpc_mod_int64:=r;
       end;
 
     { multiplies two qwords
       the longbool for checkoverflow avoids a misaligned stack
     }
-    function mulqword(f1,f2 : qword;checkoverflow : longbool) : qword;[public,alias: 'FPC_MUL_QWORD'];
+    function fpc_mul_qword(f1,f2 : qword;checkoverflow : longbool) : qword;[public,alias: 'FPC_MUL_QWORD']; {$ifdef hascompilerproc} compilerproc; {$endif}
 
       var
          _f1,bitpos : qword;
@@ -226,12 +226,12 @@
                  movl %eax,r
                  movl %edx,r+4
               end;
-              mulqword:=r;
+              fpc_mul_qword:=r;
            end
          else
 {$endif i386}
            begin
-              mulqword:=0;
+              fpc_mul_qword:=0;
               bitpos:=1;
 
               // store f1 for overflow checking
@@ -240,7 +240,7 @@
               for l:=0 to 63 do
                 begin
                    if (f2 and bitpos)<>0 then
-                     mulqword:=mulqword+f1;
+                     fpc_mul_qword:=fpc_mul_qword+f1;
 
                    f1:=f1 shl 1;
                    bitpos:=bitpos shl 1;
@@ -249,7 +249,7 @@
               { if one of the operands is greater than the result an }
               { overflow occurs                                      }
               if checkoverflow and (_f1 <> 0) and (f2 <>0) and
-                 ((_f1>mulqword) or (f2>mulqword)) then
+                 ((_f1>fpc_mul_qword) or (f2>fpc_mul_qword)) then
                 HandleErrorFrame(215,get_frame);
            end;
       end;
@@ -261,7 +261,7 @@
          ... using the comp multiplication
        the longbool for checkoverflow avoids a misaligned stack
      }
-    function mulint64(f1,f2 : int64;checkoverflow : longbool) : int64;[public,alias: 'FPC_MUL_INT64'];
+    function fpc_mul_int64(f1,f2 : int64;checkoverflow : longbool) : int64;[public,alias: 'FPC_MUL_INT64']; {$ifdef hascompilerproc} compilerproc; {$endif}
 
       var
          sign : boolean;
@@ -277,7 +277,7 @@
            begin
               // the c:=comp(...) is necessary to shut up the compiler
               c:=comp(comp(f1)*comp(f2));
-              mulint64:=int64(c);
+              fpc_mul_int64:=int64(c);
            end
          else
 {$endif}
@@ -310,9 +310,9 @@
                 HandleErrorFrame(215,get_frame);
 
               if sign then
-                mulint64:=-q3
+                fpc_mul_int64:=-q3
               else
-                mulint64:=q3;
+                fpc_mul_int64:=q3;
            end;
       end;
 
@@ -508,7 +508,12 @@
 
 {
   $Log$
-  Revision 1.11  2001-08-13 12:40:16  jonas
+  Revision 1.12  2001-09-05 15:22:09  jonas
+    * made multiplying, dividing and mod'ing of int64 and qword processor
+      independent with compilerprocs (+ small optimizations by using shift/and
+      where possible)
+
+  Revision 1.11  2001/08/13 12:40:16  jonas
     * renamed some str(x,y) and val(x,y) helpers so the naming scheme is the
       same for all string types
     + added the str(x,y) and val(x,y,z) helpers for int64/qword to