2
0
Эх сурвалжийг харах

* another patch from Roozbeh GHolizadeh
* improved concatcopy code generation
* improved constant loading
* improved multiplication code generation

git-svn-id: trunk@3471 -

florian 19 жил өмнө
parent
commit
8bd674af24

+ 96 - 42
compiler/arm/cgcpu.pas

@@ -459,6 +459,16 @@ unit cgcpu;
               it saves us a register }
               it saves us a register }
             else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a,l1) and not(cgsetflags or setflags) then
             else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a,l1) and not(cgsetflags or setflags) then
               a_op_const_reg_reg(list,OP_SHL,size,l1,src,dst)
               a_op_const_reg_reg(list,OP_SHL,size,l1,src,dst)
+            { for example : b=a*5 -> b=a*4+a with add instruction and shl }
+            else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a-1,l1) and not(cgsetflags or setflags) then
+              begin
+                if l1>32 then{roozbeh does this ever happen?}
+                  internalerror(200308291);
+                shifterop_reset(so);
+                so.shiftmode:=SM_LSL;
+                so.shiftimm:=l1;
+                list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,src,src,so));
+              end
             else
             else
               begin
               begin
                 tmpreg:=getintregister(list,size);
                 tmpreg:=getintregister(list,size);
@@ -569,6 +579,7 @@ unit cgcpu;
           imm_shift : byte;
           imm_shift : byte;
           l : tasmlabel;
           l : tasmlabel;
           hr : treference;
           hr : treference;
+          tmpreg : tregister;
        begin
        begin
           if not(size in [OS_8,OS_S8,OS_16,OS_S16,OS_32,OS_S32]) then
           if not(size in [OS_8,OS_S8,OS_16,OS_S16,OS_32,OS_S32]) then
             internalerror(2002090902);
             internalerror(2002090902);
@@ -576,6 +587,23 @@ unit cgcpu;
             list.concat(taicpu.op_reg_const(A_MOV,reg,a))
             list.concat(taicpu.op_reg_const(A_MOV,reg,a))
           else if is_shifter_const(not(a),imm_shift) then
           else if is_shifter_const(not(a),imm_shift) then
             list.concat(taicpu.op_reg_const(A_MVN,reg,not(a)))
             list.concat(taicpu.op_reg_const(A_MVN,reg,not(a)))
+          { loading of constants with mov and orr }
+          {else [if (is_shifter_const(a-byte(a),imm_shift)) then
+            begin
+              }{ roozbeh:why using tmpreg later causes error in compiling of system.pp,and also those other similars}
+              {list.concat(taicpu.op_reg_const(A_MOV,reg,a-byte(a)));
+              list.concat(taicpu.op_reg_reg_const(A_ORR,reg,reg,byte(a)));
+            end
+          else if (is_shifter_const(a-word(a),imm_shift)) and (is_shifter_const(word(a),imm_shift)) then
+            begin
+              list.concat(taicpu.op_reg_const(A_MOV,reg,a-word(a)));
+              list.concat(taicpu.op_reg_reg_const(A_ORR,reg,reg,word(a)));
+            end
+          else if (is_shifter_const(a-(longint(a) shl 8) shr 8,imm_shift)) and (is_shifter_const((longint(a) shl 8) shr 8,imm_shift)) then
+            begin
+              list.concat(taicpu.op_reg_const(A_MOV,reg,a-(longint(a) shl 8)shr 8));
+              list.concat(taicpu.op_reg_reg_const(A_ORR,reg,reg,(longint(a) shl 8)shr 8));
+            end}
           else
           else
             begin
             begin
                reference_reset(hr);
                reference_reset(hr);
@@ -1377,6 +1405,9 @@ unit cgcpu;
 
 
 
 
     procedure tcgarm.g_concatcopy_internal(list : TAsmList;const source,dest : treference;len : aint;aligned : boolean);
     procedure tcgarm.g_concatcopy_internal(list : TAsmList;const source,dest : treference;len : aint;aligned : boolean);
+      const
+        maxtmpreg=10;{roozbeh: can be reduced to 8 or lower if might conflick with reserved ones,also +2 is used becouse of regs required for referencing}
+
       var
       var
         srcref,dstref,usedtmpref,usedtmpref2:treference;
         srcref,dstref,usedtmpref,usedtmpref2:treference;
         srcreg,destreg,countreg,r,tmpreg,tmpreg2:tregister;
         srcreg,destreg,countreg,r,tmpreg,tmpreg2:tregister;
@@ -1384,6 +1415,8 @@ unit cgcpu;
         copysize:byte;
         copysize:byte;
         cgsize:Tcgsize;
         cgsize:Tcgsize;
         so:tshifterop;
         so:tshifterop;
+        tmpregisters:array[1..maxtmpreg]of tregister;
+        tmpregi,tmpregi2:byte;
 
 
       { will never be called with count<=4 }
       { will never be called with count<=4 }
       procedure genloop(count : aword;size : byte);
       procedure genloop(count : aword;size : byte);
@@ -1456,13 +1489,38 @@ unit cgcpu;
 
 
         if len=0 then
         if len=0 then
           exit;
           exit;
-        helpsize:=12;
+        helpsize:=12+maxtmpreg*4;//52 with maxtmpreg=10
         dstref:=dest;
         dstref:=dest;
         srcref:=source;
         srcref:=source;
         if cs_opt_size in aktoptimizerswitches then
         if cs_opt_size in aktoptimizerswitches then
           helpsize:=8;
           helpsize:=8;
         if (len<=helpsize) and aligned then
         if (len<=helpsize) and aligned then
           begin
           begin
+            tmpregi:=0;
+            srcreg:=getintregister(list,OS_ADDR);
+            a_loadaddr_ref_reg(list,source,srcreg);
+            reference_reset_base(srcref,srcreg,0);
+
+            while (len div  4 <> 0) and (tmpregi<=maxtmpreg) do
+              begin
+                inc(tmpregi);
+                tmpregisters[tmpregi]:=getintregister(list,OS_32);
+                a_load_ref_reg(list,OS_32,OS_32,srcref,tmpregisters[tmpregi]);
+                inc(srcref.offset,4);
+                dec(len,4);
+              end;
+
+            destreg:=getintregister(list,OS_ADDR);
+            a_loadaddr_ref_reg(list,dest,destreg);
+            reference_reset_base(dstref,destreg,0);
+            tmpregi2:=1;
+            while (tmpregi2<=tmpregi) do
+              begin
+                a_load_reg_ref(list,OS_32,OS_32,tmpregisters[tmpregi2],dstref);
+                inc(dstref.offset,4);
+                inc(tmpregi2);
+              end;
+
             copysize:=4;
             copysize:=4;
             cgsize:=OS_32;
             cgsize:=OS_32;
             while len<>0 do
             while len<>0 do
@@ -1483,48 +1541,43 @@ unit cgcpu;
                 a_load_reg_ref(list,cgsize,cgsize,r,dstref);
                 a_load_reg_ref(list,cgsize,cgsize,r,dstref);
                 inc(srcref.offset,copysize);
                 inc(srcref.offset,copysize);
                 inc(dstref.offset,copysize);
                 inc(dstref.offset,copysize);
-              end;
+              end;{end of while}
           end
           end
         else
         else
           begin
           begin
             cgsize:=OS_32;
             cgsize:=OS_32;
-            if (len<=4) then
+            if (len<=4) then{len<=4 and not aligned}
               begin
               begin
                 r:=getintregister(list,cgsize);
                 r:=getintregister(list,cgsize);
-                case Len of
-                  1,2,3,4:
-                    begin
-                      usedtmpref:=a_internal_load_ref_reg(list,OS_8,OS_8,srcref,r);
-                      if Len=1 then
-                        a_load_reg_ref(list,OS_8,OS_8,r,dstref)
-                      else
-                        begin
-                          tmpreg:=getintregister(list,cgsize);
-                          usedtmpref2:=a_internal_load_reg_ref(list,OS_8,OS_8,r,dstref);
-                          inc(usedtmpref.offset,1);
-                          a_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
-                          inc(usedtmpref2.offset,1);
-                          a_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref2);
-                          if len>2 then
-                            begin
-                              inc(usedtmpref.offset,1);
-                              a_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
-                              inc(usedtmpref2.offset,1);
-                              a_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref2);
-                              if len>3 then
-                                begin
-                                  inc(usedtmpref.offset,1);
-                                  a_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
-                                  inc(usedtmpref2.offset,1);
-                                  a_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref2);
-                                end;
-                            end;
+                usedtmpref:=a_internal_load_ref_reg(list,OS_8,OS_8,srcref,r);
+                if Len=1 then
+                  a_load_reg_ref(list,OS_8,OS_8,r,dstref)
+                else
+                  begin
+                    tmpreg:=getintregister(list,cgsize);
+                    usedtmpref2:=a_internal_load_reg_ref(list,OS_8,OS_8,r,dstref);
+                    inc(usedtmpref.offset,1);
+                    a_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
+                    inc(usedtmpref2.offset,1);
+                    a_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref2);
+                    if len>2 then
+                      begin
+                        inc(usedtmpref.offset,1);
+                        a_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
+                        inc(usedtmpref2.offset,1);
+                        a_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref2);
+                        if len>3 then
+                          begin
+                            inc(usedtmpref.offset,1);
+                            a_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
+                            inc(usedtmpref2.offset,1);
+                            a_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref2);
+                          end;
                         end;
                         end;
-                    end;
-                end;
-              end
+                      end;
+            end{end of if len<=4}
             else
             else
-              begin
+              begin{unaligned & 4<len<helpsize **or** aligned/unaligned & len>helpsize}
                 destreg:=getintregister(list,OS_ADDR);
                 destreg:=getintregister(list,OS_ADDR);
                 a_loadaddr_ref_reg(list,dest,destreg);
                 a_loadaddr_ref_reg(list,dest,destreg);
                 reference_reset_base(dstref,destreg,0);
                 reference_reset_base(dstref,destreg,0);
@@ -1535,14 +1588,15 @@ unit cgcpu;
 
 
                 countreg:=getintregister(list,OS_32);
                 countreg:=getintregister(list,OS_32);
 
 
-                if aligned then
-                  genloop(len,4)
-                else
-                  genloop(len,1);
-              end;
+//            if cs_opt_size in aktoptimizerswitches  then
+                { roozbeh : it seems loading 1 byte is faster becouse of caching/fetching(?) }
+                {if aligned then
+                genloop(len,4)
+                else}
+                genloop(len,1);
+            end;
           end;
           end;
-      end;
-
+    end;
 
 
     procedure tcgarm.g_concatcopy_unaligned(list : TAsmList;const source,dest : treference;len : aint);
     procedure tcgarm.g_concatcopy_unaligned(list : TAsmList;const source,dest : treference;len : aint);
       begin
       begin