Browse Source

+ Support (as target-independent as possible) optimization of division by constants:
The code generator gets two new methods, a_mul_reg_reg_pair and g_div_const_reg_reg. The first one is basically 32x32 to 64 bits multiplication (or any other size, with result having twice the size of arguments), which must be implemented for every target. The second one actually does the job, its default implementation taken from powerpc64 and is sufficiently good for all three-address targets.

+ Enabled optimized division for MIPS target, target-specific changes are under 30 lines.

git-svn-id: trunk@27904 -

sergei 11 years ago
parent
commit
cd27d64cd5
3 changed files with 103 additions and 9 deletions
  1. 70 0
      compiler/cgobj.pas
  2. 19 0
      compiler/mips/cgcpu.pas
  3. 14 9
      compiler/mips/ncpumat.pas

+ 70 - 0
compiler/cgobj.pas

@@ -249,6 +249,10 @@ unit cgobj;
           { bit scan instructions }
           { bit scan instructions }
           procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; size: tcgsize; src, dst: TRegister); virtual; abstract;
           procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; size: tcgsize; src, dst: TRegister); virtual; abstract;
 
 
+          { Multiplication with doubling result size.
+            dstlo or dsthi may be NR_NO, in which case corresponding half of result is discarded. }
+          procedure a_mul_reg_reg_pair(list: TAsmList; size: tcgsize; src1,src2,dstlo,dsthi: TRegister);virtual;
+
           { fpu move instructions }
           { fpu move instructions }
           procedure a_loadfpu_reg_reg(list: TAsmList; fromsize, tosize:tcgsize; reg1, reg2: tregister); virtual; abstract;
           procedure a_loadfpu_reg_reg(list: TAsmList; fromsize, tosize:tcgsize; reg1, reg2: tregister); virtual; abstract;
           procedure a_loadfpu_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister); virtual; abstract;
           procedure a_loadfpu_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister); virtual; abstract;
@@ -458,6 +462,9 @@ unit cgobj;
           { Generate code to exit an unwind-protected region. The default implementation
           { Generate code to exit an unwind-protected region. The default implementation
             produces a simple jump to destination label. }
             produces a simple jump to destination label. }
           procedure g_local_unwind(list: TAsmList; l: TAsmLabel);virtual;
           procedure g_local_unwind(list: TAsmList; l: TAsmLabel);virtual;
+          { Generate code for integer division by constant,
+            generic version is suitable for 3-address CPUs }
+          procedure g_div_const_reg_reg(list:tasmlist; size: TCgSize; a: tcgint; src,dst: tregister); virtual;
 
 
          protected
          protected
           function g_indirect_sym_load(list:TAsmList;const symname: string; const flags: tindsymflags): tregister;virtual;
           function g_indirect_sym_load(list:TAsmList;const symname: string; const flags: tindsymflags): tregister;virtual;
@@ -2501,6 +2508,69 @@ implementation
         internalerror(200807238);
         internalerror(200807238);
       end;
       end;
 
 
+
+    procedure tcg.a_mul_reg_reg_pair(list: TAsmList; size: TCgSize; src1,src2,dstlo,dsthi: TRegister);
+      begin
+        internalerror(2014060801);
+      end;
+
+
+    procedure tcg.g_div_const_reg_reg(list:tasmlist; size: TCgSize; a: tcgint; src,dst: tregister);
+      var
+        divreg: tregister;
+        magic: aInt;
+        u_magic: aWord;
+        u_shift: byte;
+        u_add: boolean;
+      begin
+        divreg:=getintregister(list,OS_INT);
+        if (size in [OS_S32,OS_S64]) then
+          begin
+            calc_divconst_magic_signed(tcgsize2size[size]*8,a,magic,u_shift);
+            { load magic value }
+            a_load_const_reg(list,OS_INT,magic,divreg);
+            { multiply, discarding low bits }
+            a_mul_reg_reg_pair(list,size,src,divreg,NR_NO,dst);
+            { add/subtract numerator }
+            if (a>0) and (magic<0) then
+              a_op_reg_reg_reg(list,OP_ADD,OS_INT,src,dst,dst)
+            else if (a<0) and (magic>0) then
+              a_op_reg_reg_reg(list,OP_SUB,OS_INT,src,dst,dst);
+            { shift shift places to the right (arithmetic) }
+            a_op_const_reg_reg(list,OP_SAR,OS_INT,u_shift,dst,dst);
+            { extract and add sign bit }
+            if (a>=0) then
+              a_op_const_reg_reg(list,OP_SHR,OS_INT,tcgsize2size[size]*8-1,src,divreg)
+            else
+              a_op_const_reg_reg(list,OP_SHR,OS_INT,tcgsize2size[size]*8-1,dst,divreg);
+            a_op_reg_reg_reg(list,OP_ADD,OS_INT,dst,divreg,dst);
+          end
+        else if (size in [OS_32,OS_64]) then
+          begin
+            calc_divconst_magic_unsigned(tcgsize2size[size]*8,a,u_magic,u_add,u_shift);
+            { load magic in divreg }
+            a_load_const_reg(list,OS_INT,tcgint(u_magic),divreg);
+            { multiply, discarding low bits }
+            a_mul_reg_reg_pair(list,size,src,divreg,NR_NO,dst);
+            if (u_add) then
+              begin
+                { Calculate "(numerator+result) shr u_shift", avoiding possible overflow }
+                a_op_reg_reg_reg(list,OP_SUB,OS_INT,dst,src,divreg);
+                { divreg=(numerator-result) }
+                a_op_const_reg_reg(list,OP_SHR,OS_INT,1,divreg,divreg);
+                { divreg=(numerator-result)/2 }
+                a_op_reg_reg_reg(list,OP_ADD,OS_INT,divreg,dst,divreg);
+                { divreg=(numerator+result)/2, already shifted by 1, so decrease u_shift. }
+                a_op_const_reg_reg(list,OP_SHR,OS_INT,u_shift-1,divreg,dst);
+              end
+            else
+              a_op_const_reg_reg(list,OP_SHR,OS_INT,u_shift,dst,dst);
+          end
+        else
+          InternalError(2014060601);
+      end;
+
+
 {*****************************************************************************
 {*****************************************************************************
                                     TCG64
                                     TCG64
 *****************************************************************************}
 *****************************************************************************}

+ 19 - 0
compiler/mips/cgcpu.pas

@@ -76,6 +76,7 @@ type
     procedure g_flags2reg(list: tasmlist; size: TCgSize; const f: TResFlags; reg: tregister); override;
     procedure g_flags2reg(list: tasmlist; size: TCgSize; const f: TResFlags; reg: tregister); override;
     procedure a_jmp_always(List: tasmlist; l: TAsmLabel); override;
     procedure a_jmp_always(List: tasmlist; l: TAsmLabel); override;
     procedure a_jmp_name(list: tasmlist; const s: string); override;
     procedure a_jmp_name(list: tasmlist; const s: string); override;
+    procedure a_mul_reg_reg_pair(list: tasmlist; size: tcgsize; src1,src2,dstlo,dsthi: tregister); override;
     procedure g_overflowCheck(List: tasmlist; const Loc: TLocation; def: TDef); override;
     procedure g_overflowCheck(List: tasmlist; const Loc: TLocation; def: TDef); override;
     procedure g_overflowCheck_loc(List: tasmlist; const Loc: TLocation; def: TDef; ovloc: tlocation); override;
     procedure g_overflowCheck_loc(List: tasmlist; const Loc: TLocation; def: TDef; ovloc: tlocation); override;
     procedure g_proc_entry(list: tasmlist; localsize: longint; nostackframe: boolean); override;
     procedure g_proc_entry(list: tasmlist; localsize: longint; nostackframe: boolean); override;
@@ -1154,6 +1155,24 @@ procedure TCGMIPS.g_flags2reg(list: tasmlist; size: tcgsize; const f: tresflags;
   end;
   end;
 
 
 
 
+procedure TCGMIPS.a_mul_reg_reg_pair(list: tasmlist; size: tcgsize; src1,src2,dstlo,dsthi: tregister);
+var
+  asmop: tasmop;
+begin
+  case size of
+    OS_32:  asmop:=A_MULTU;
+    OS_S32: asmop:=A_MULT;
+  else
+    InternalError(2014060802);
+  end;
+  list.concat(taicpu.op_reg_reg(asmop,src1,src2));
+  if (dstlo<>NR_NO) then
+    list.concat(taicpu.op_reg(A_MFLO,dstlo));
+  if (dsthi<>NR_NO) then
+    list.concat(taicpu.op_reg(A_MFHI,dsthi));
+end;
+
+
 procedure TCGMIPS.g_overflowCheck(List: tasmlist; const Loc: TLocation; def: TDef);
 procedure TCGMIPS.g_overflowCheck(List: tasmlist; const Loc: TLocation; def: TDef);
 begin
 begin
 // this is an empty procedure
 // this is an empty procedure

+ 14 - 9
compiler/mips/ncpumat.pas

@@ -86,16 +86,21 @@ begin
   numerator := left.location.Register;
   numerator := left.location.Register;
 
 
   if (nodetype = divn) and
   if (nodetype = divn) and
-    (right.nodetype = ordconstn) and
-    ispowerof2(tordconstnode(right).Value.svalue, power) then
+    (right.nodetype = ordconstn) then
   begin
   begin
-    tmpreg := cg.GetIntRegister(current_asmdata.CurrAsmList, OS_INT);
-    cg.a_op_const_reg_reg(current_asmdata.CurrAsmList, OP_SAR, OS_INT, 31, numerator, tmpreg);
-    { if signed, tmpreg=right value-1, otherwise 0 }
-    cg.a_op_const_reg(current_asmdata.CurrAsmList, OP_AND, OS_INT, tordconstnode(right).Value.svalue - 1, tmpreg);
-    { add left value }
-    cg.a_op_reg_reg(current_asmdata.CurrAsmList, OP_ADD, OS_INT, numerator, tmpreg);
-    cg.a_op_const_reg_reg(current_asmdata.CurrAsmList, OP_SAR, OS_INT, aword(power), tmpreg, location.register);
+    if ispowerof2(tordconstnode(right).Value.svalue, power) then
+    begin
+      tmpreg := cg.GetIntRegister(current_asmdata.CurrAsmList, OS_INT);
+      cg.a_op_const_reg_reg(current_asmdata.CurrAsmList, OP_SAR, OS_INT, 31, numerator, tmpreg);
+      { if signed, tmpreg=right value-1, otherwise 0 }
+      cg.a_op_const_reg(current_asmdata.CurrAsmList, OP_AND, OS_INT, tordconstnode(right).Value.svalue - 1, tmpreg);
+      { add left value }
+      cg.a_op_reg_reg(current_asmdata.CurrAsmList, OP_ADD, OS_INT, numerator, tmpreg);
+      cg.a_op_const_reg_reg(current_asmdata.CurrAsmList, OP_SAR, OS_INT, aword(power), tmpreg, location.register);
+    end
+    else
+      cg.g_div_const_reg_reg(current_asmdata.CurrAsmList,def_cgsize(resultdef),
+        tordconstnode(right).value.svalue,numerator,location.register);
   end
   end
   else
   else
   begin
   begin