Explorar o código

+ div 2^n optimization for arm

git-svn-id: trunk@8864 -
florian %!s(int64=18) %!d(string=hai) anos
pai
achega
c57f44d75b
Modificáronse 1 ficheiros con 159 adicións e 9 borrados
  1. 159 9
      compiler/arm/narmmat.pas

+ 159 - 9
compiler/arm/narmmat.pas

@@ -29,11 +29,15 @@ interface
       node,nmat,ncgmat;
 
     type
+      tarmmoddivnode = class(tmoddivnode)
+        function first_moddivint: tnode;override;
+        procedure pass_generate_code;override;
+      end;
+
       tarmnotnode = class(tcgnotnode)
         procedure second_boolean;override;
       end;
 
-
       tarmunaryminusnode = class(tcgunaryminusnode)
         procedure second_float;override;
       end;
@@ -43,15 +47,161 @@ implementation
 
     uses
       globtype,systems,
-      cutils,verbose,globals,
-      symconst,symdef,
+      cutils,verbose,globals,constexp,
       aasmbase,aasmcpu,aasmtai,aasmdata,
       defutil,
       cgbase,cgobj,cgutils,
-      pass_1,pass_2,procinfo,
+      pass_2,procinfo,
       ncon,
-      cpubase,cpuinfo,
-      ncgutil,cgcpu,cg64f32,rgobj;
+      cpubase,
+      ncgutil,cgcpu;
+
+{*****************************************************************************
+                             TARMMODDIVNODE
+*****************************************************************************}
+
+    function tarmmoddivnode.first_moddivint: tnode;
+      var
+        power  : longint;
+      begin
+        if (right.nodetype=ordconstn) and
+          (nodetype=divn) and
+          ispowerof2(tordconstnode(right).value,power) and
+          not(is_64bitint(resultdef)) then
+          result:=nil
+        else
+          result:=inherited first_moddivint;
+      end;
+
+
+    procedure tarmmoddivnode.pass_generate_code;
+      var
+        power  : longint;
+        numerator,
+        helper1,
+        helper2,
+        resultreg  : tregister;
+        size       : Tcgsize;
+        so : tshifterop;
+
+       procedure genOrdConstNodeDiv;
+         begin
+           if tordconstnode(right).value=0 then
+             internalerror(2005061701)
+           else if tordconstnode(right).value=1 then
+             cg.a_load_reg_reg(current_asmdata.CurrAsmList, OS_INT, OS_INT, numerator, resultreg)
+           else if (tordconstnode(right).value = int64(-1)) then
+             begin
+               // note: only in the signed case possible..., may overflow
+               current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_MVN,
+                 resultreg,numerator),toppostfix(ord(cs_check_overflow in current_settings.localswitches)*ord(PF_S))));
+             end
+           else if ispowerof2(tordconstnode(right).value,power) then
+             begin
+               if (is_signed(right.resultdef)) then
+                 begin
+                    helper1:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
+                    helper2:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
+                    shifterop_reset(so);
+                    so.shiftmode:=SM_ASR;
+                    so.shiftimm:=31;
+                    current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_shifterop(A_MOV,helper1,numerator,so));
+                    shifterop_reset(so);
+                    so.shiftmode:=SM_LSR;
+                    so.shiftimm:=32-power;
+                    current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,helper2,numerator,helper1,so));
+                    shifterop_reset(so);
+                    so.shiftmode:=SM_ASR;
+                    so.shiftimm:=power;
+                    current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_shifterop(A_MOV,resultreg,helper2,so));
+                  end
+               else
+                 cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_SHR,OS_INT,power,numerator,resultreg)
+             end;
+         end;
+
+{
+       procedure genOrdConstNodeMod;
+         var
+             modreg, maskreg, tempreg : tregister;
+         begin
+             if (tordconstnode(right).value = 0) then begin
+                 internalerror(2005061702);
+             end
+             else if (abs(tordconstnode(right).value.svalue) = 1) then
+             begin
+                // x mod +/-1 is always zero
+                cg.a_load_const_reg(current_asmdata.CurrAsmList, OS_INT, 0, resultreg);
+             end
+             else if (ispowerof2(tordconstnode(right).value, power)) then
+             begin
+                 if (is_signed(right.resultdef)) then begin
+
+                     tempreg := cg.getintregister(current_asmdata.CurrAsmList, OS_INT);
+                     maskreg := cg.getintregister(current_asmdata.CurrAsmList, OS_INT);
+                     modreg := cg.getintregister(current_asmdata.CurrAsmList, OS_INT);
+
+                     cg.a_load_const_reg(current_asmdata.CurrAsmList, OS_INT, abs(tordconstnode(right).value.svalue)-1, modreg);
+                     cg.a_op_const_reg_reg(current_asmdata.CurrAsmList, OP_SAR, OS_INT, 31, numerator, maskreg);
+                     cg.a_op_reg_reg_reg(current_asmdata.CurrAsmList, OP_AND, OS_INT, numerator, modreg, tempreg);
+
+                     current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_ANDC, maskreg, maskreg, modreg));
+                     current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_const(A_SUBFIC, modreg, tempreg, 0));
+                     current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_SUBFE, modreg, modreg, modreg));
+                     cg.a_op_reg_reg_reg(current_asmdata.CurrAsmList, OP_AND, OS_INT, modreg, maskreg, maskreg);
+                     cg.a_op_reg_reg_reg(current_asmdata.CurrAsmList, OP_OR, OS_INT, maskreg, tempreg, resultreg);
+                 end else begin
+                     cg.a_op_const_reg_reg(current_asmdata.CurrAsmList, OP_AND, OS_INT, tordconstnode(right).value.svalue-1, numerator, resultreg);
+                 end;
+             end else begin
+                 genOrdConstNodeDiv();
+                 cg.a_op_const_reg_reg(current_asmdata.CurrAsmList, OP_MUL, OS_INT, tordconstnode(right).value.svalue, resultreg, resultreg);
+                 cg.a_op_reg_reg_reg(current_asmdata.CurrAsmList, OP_SUB, OS_INT, resultreg, numerator, resultreg);
+             end;
+         end;
+}
+
+      begin
+        secondpass(left);
+        secondpass(right);
+        location_copy(location,left.location);
+
+        { put numerator in register }
+        size:=def_cgsize(left.resultdef);
+        location_force_reg(current_asmdata.CurrAsmList,left.location,
+          size,true);
+        location_copy(location,left.location);
+        numerator:=location.register;
+        resultreg:=location.register;
+        if location.loc=LOC_CREGISTER then
+          begin
+            location.loc := LOC_REGISTER;
+            location.register := cg.getintregister(current_asmdata.CurrAsmList,size);
+            resultreg:=location.register;
+          end
+        else if (nodetype=modn) or (right.nodetype=ordconstn) then
+          begin
+            // for a modulus op, and for const nodes we need the result register
+            // to be an extra register
+            resultreg:=cg.getintregister(current_asmdata.CurrAsmList,size);
+          end;
+
+        if right.nodetype=ordconstn then
+          begin
+            if nodetype=divn then
+              genOrdConstNodeDiv
+            else
+//              genOrdConstNodeMod;
+          end;
+
+        location.register:=resultreg;
+
+        { unsigned division/module can only overflow in case of division by zero }
+        { (but checking this overflow flag is more convoluted than performing a  }
+        {  simple comparison with 0)                                             }
+        if is_signed(right.resultdef) then
+          cg.g_overflowcheck(current_asmdata.CurrAsmList,location,resultdef);
+      end;
 
 {*****************************************************************************
                                TARMNOTNODE
@@ -60,7 +210,6 @@ implementation
     procedure tarmnotnode.second_boolean;
       var
         hl : tasmlabel;
-        ins : taicpu;
       begin
         { if the location is LOC_JUMP, we do the secondpass after the
           labels are allocated
@@ -117,6 +266,7 @@ implementation
 
 
 begin
-   cnotnode:=tarmnotnode;
-   cunaryminusnode:=tarmunaryminusnode;
+  cmoddivnode:=tarmmoddivnode;
+  cnotnode:=tarmnotnode;
+  cunaryminusnode:=tarmunaryminusnode;
 end.