Browse Source

{ARM} Implement usage of generic division-by-const optimization

This utilizes the code commited in r27904 to convert a division by const
into a 32x32->64 bit multiplication for ARM.

git-svn-id: trunk@27929 -
masta 11 years ago
parent
commit
0cb1a129b3
2 changed files with 36 additions and 4 deletions
  1. 24 0
      compiler/arm/cgcpu.pas
  2. 12 4
      compiler/arm/narmmat.pas

+ 24 - 0
compiler/arm/cgcpu.pas

@@ -114,6 +114,7 @@ unit cgcpu;
 
         { mla for thumb requires that none of the registers is equal to r13/r15, this method ensures this }
         procedure safe_mla(list: TAsmList;op1,op2,op3,op4 : TRegister);
+
       end;
 
       { tcgarm is shared between normal arm and thumb-2 }
@@ -133,6 +134,9 @@ unit cgcpu;
         procedure a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);override;
 
         procedure g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: tcgint); override;
+
+        {Multiply two 32-bit registers into lo and hi 32-bit registers}
+        procedure a_mul_reg_reg_pair(list: tasmlist; size: tcgsize; src1,src2,dstlo,dsthi: tregister); override;
       end;
 
       { normal arm cg }
@@ -1173,6 +1177,26 @@ unit cgcpu;
         maybeadjustresult(list,op,size,dst);
       end;
 
+    procedure tcgarm.a_mul_reg_reg_pair(list: tasmlist; size: tcgsize; src1,src2,dstlo,dsthi: tregister);
+    var
+      asmop: tasmop;
+    begin
+      list.concat(tai_comment.create(strpnew('tcgarm.a_mul_reg_reg_pair called')));
+      case size of
+        OS_32:  asmop:=A_UMULL;
+        OS_S32: asmop:=A_SMULL;
+        else
+          InternalError(2014060802);
+      end;
+      { The caller might omit dstlo or dsthi, when he is not interested in it, we still
+        need valid registers everywhere. In case of dsthi = NR_NO we could fall back to
+        32x32=32 bit multiplication}
+      if (dstlo = NR_NO) then
+        dstlo:=getintregister(list,size);
+      if (dsthi = NR_NO) then
+        dsthi:=getintregister(list,size);
+      list.concat(taicpu.op_reg_reg_reg_reg(asmop, dstlo, dsthi, src1,src2));
+    end;
 
     function tbasecgarm.handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference;
       var

+ 12 - 4
compiler/arm/narmmat.pas

@@ -71,14 +71,19 @@ implementation
       var
         power  : longint;
       begin
+        {We can handle all cases of constant division}
         if not(cs_check_overflow in current_settings.localswitches) and
            (right.nodetype=ordconstn) and
            (nodetype=divn) and
-           (ispowerof2(tordconstnode(right).value,power) or
+           not(is_64bitint(resultdef)) and
+           {Only the ARM and thumb2-isa support umull and smull, which are required for arbitary division by const optimization}
+           (GenerateArmCode or
+            GenerateThumb2Code or
+            (ispowerof2(tordconstnode(right).value,power) or
             (tordconstnode(right).value=1) or
             (tordconstnode(right).value=int64(-1))
-           ) and
-           not(is_64bitint(resultdef)) then
+            )
+           ) then
           result:=nil
         else if ((GenerateThumbCode or GenerateThumb2Code) and (CPUARM_HAS_THUMB_IDIV in cpu_capabilities[current_settings.cputype])) and
           (nodetype=divn) and
@@ -173,7 +178,10 @@ implementation
                   end
                else
                  cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_SHR,OS_INT,power,numerator,resultreg)
-             end;
+             end
+           else {Everything else is handled the generic code}
+             cg.g_div_const_reg_reg(current_asmdata.CurrAsmList,def_cgsize(resultdef),
+               tordconstnode(right).value.svalue,numerator,resultreg);
          end;
 
 {