Explorar el Código

+ MIPS: more peephole optimizations (basically updated to the state of SPARC peephole).

git-svn-id: trunk@27990 -
sergei hace 11 años
padre
commit
a8e30043db
Se han modificado 1 ficheros con 149 adiciones y 4 borrados
  1. 149 4
      compiler/mips/aoptcpu.pas

+ 149 - 4
compiler/mips/aoptcpu.pas

@@ -32,6 +32,10 @@ unit aoptcpu;
 
     Type
       TCpuAsmOptimizer = class(TAsmOptimizer)
+        function GetNextInstructionUsingReg(Current: tai;
+          var Next: tai; reg: TRegister): Boolean;
+        function RegUsedAfterInstruction(reg: Tregister; p: tai;
+          var AllUsedRegs: TAllUsedRegs): Boolean;
         function TryRemoveMov(var p: tai; opcode: TAsmOp): boolean;
         function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
       End;
@@ -39,7 +43,7 @@ unit aoptcpu;
   Implementation
 
      uses
-       aasmcpu;
+       globals,aasmbase,aasmcpu,cpuinfo,verbose;
 
 
   function MatchInstruction(const instr: tai; const op: TAsmOp): boolean;
@@ -65,6 +69,87 @@ unit aoptcpu;
     end;
 
 
+  function regLoadedWithNewValue(reg: tregister; hp: tai): boolean;
+    var
+      p: taicpu;
+    begin
+      p:=taicpu(hp);
+      result:=false;
+      if not ((assigned(hp)) and (hp.typ=ait_instruction)) then
+        exit;
+
+      case p.opcode of
+        { These instructions do not write into a register at all }
+        A_NOP,
+        A_C_EQ_D,A_C_EQ_S,A_C_LE_D,A_C_LE_S,A_C_LT_D,A_C_LT_S,
+        A_BA,A_BC,
+        A_SB,A_SH,A_SW,A_SWL,A_SWR,A_SWC1,A_SDC1:
+          exit;
+      end;
+
+      result:=(p.ops>0) and (p.oper[0]^.typ=top_reg) and
+        (p.oper[0]^.reg=reg);
+    end;
+
+
+  function instructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean;
+    var
+      p: taicpu;
+      i: longint;
+    begin
+      result:=false;
+      if not (assigned(hp) and (hp.typ=ait_instruction)) then
+        exit;
+      p:=taicpu(hp);
+
+      i:=1;
+      while(i<p.ops) do
+        begin
+          case p.oper[I]^.typ of
+            top_reg:
+              result:=(p.oper[I]^.reg=reg) and (I<2);
+            top_ref:
+              result:=
+                (p.oper[I]^.ref^.base=reg) or
+                (p.oper[I]^.ref^.index=reg);
+          end;
+          if result then exit; {Bailout if we found something}
+          Inc(I);
+        end;
+    end;
+
+
+  function TCpuAsmOptimizer.GetNextInstructionUsingReg(Current: tai;
+    var Next: tai; reg: TRegister): Boolean;
+    begin
+      Next:=Current;
+      repeat
+        Result:=GetNextInstruction(Next,Next);
+      until {not(cs_opt_level3 in current_settings.optimizerswitches) or} not(Result) or (Next.typ<>ait_instruction) or (RegInInstruction(reg,Next)) or
+        (is_calljmp(taicpu(Next).opcode));
+      if Result and is_calljmp(taicpu(next).opcode) then
+        begin
+          result:=false;
+          next:=nil;
+        end;
+    end;
+
+
+  function TCpuAsmOptimizer.RegUsedAfterInstruction(reg: Tregister; p: tai;
+    var AllUsedRegs: TAllUsedRegs): Boolean;
+    begin
+      AllUsedRegs[getregtype(reg)].Update(tai(p.Next),true);
+      RegUsedAfterInstruction :=
+        AllUsedRegs[getregtype(reg)].IsUsed(reg) and
+        not(regLoadedWithNewValue(reg,p)) and
+        (
+          not(GetNextInstruction(p,p)) or
+          instructionLoadsFromReg(reg,p) or
+          not(regLoadedWithNewValue(reg,p))
+        );
+    end;
+
+
   function TCpuAsmOptimizer.TryRemoveMov(var p: tai; opcode: TAsmOp): boolean;
     var
       next,hp1: tai;
@@ -79,9 +164,13 @@ unit aoptcpu;
         opcode may be A_MOVE, A_MOV_s, A_MOV_d, etc.
       }
       result:=false;
-      if GetNextInstruction(p,next) and
+      if (taicpu(p).ops>1) and
+         GetNextInstructionUsingReg(p,next,taicpu(p).oper[0]^.reg) and
          MatchInstruction(next,opcode) and
-         MatchOperand(taicpu(next).oper[1]^,taicpu(p).oper[0]^.reg) then
+         MatchOperand(taicpu(next).oper[1]^,taicpu(p).oper[0]^.reg) and
+         { the destination register of mov cannot be used between p and next }
+         (not RegUsedBetween(taicpu(next).oper[0]^.reg,p,next)) then
+
         begin
           dealloc:=FindRegDealloc(taicpu(p).oper[0]^.reg,tai(next.Next));
           if assigned(dealloc) then
@@ -124,12 +213,49 @@ unit aoptcpu;
   function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
     var
       next,next2: tai;
+      TmpUsedRegs: TAllUsedRegs;
     begin
       result:=false;
       case p.typ of
         ait_instruction:
           begin
             case taicpu(p).opcode of
+              A_SLL:
+                begin
+                  { if this is a sign extension... }
+                  if (taicpu(p).oper[2]^.typ=top_const) and
+                    GetNextInstruction(p,next) and
+                    MatchInstruction(next,A_SRA) and
+                    IsSameReg(taicpu(p),taicpu(next)) and
+                    (taicpu(next).oper[2]^.typ=top_const) and
+                    (taicpu(next).oper[2]^.val=taicpu(p).oper[2]^.val) and
+                    (taicpu(next).oper[2]^.val=16) and
+                    { ...followed by 16-bit store (possibly with PIC simplification, etc. in between) }
+                    GetNextInstructionUsingReg(next,next2,taicpu(p).oper[0]^.reg) and
+                    MatchInstruction(next2,A_SH) and
+                    (taicpu(next2).oper[0]^.typ=top_reg) and
+                    (taicpu(next2).oper[0]^.reg=taicpu(p).oper[0]^.reg) and
+                    { the initial register may not be reused }
+                    (not RegUsedBetween(taicpu(p).oper[1]^.reg,next,next2)) then
+                    begin
+                      CopyUsedRegs(TmpUsedRegs);
+                      UpdateUsedRegs(TmpUsedRegs, tai(p.next));
+                      UpdateUsedRegs(TmpUsedRegs, tai(next.next));
+                      if not RegUsedAfterInstruction(taicpu(p).oper[0]^.reg,next2,TmpUsedRegs) then
+                        begin
+                          taicpu(next2).loadreg(0,taicpu(p).oper[1]^.reg);
+                          asml.remove(p);
+                          asml.remove(next);
+                          p.free;
+                          next.free;
+                          p:=next2;
+                        end;
+                      ReleaseUsedRegs(TmpUsedRegs);
+                    end
+                  else
+                    TryRemoveMov(p,A_MOVE);
+                end;
+
               A_SRL:
                 begin
                   { Remove 'andi' in sequences
@@ -185,6 +311,25 @@ unit aoptcpu;
                       next.free;
                       next2.free;
                     end
+                  { Remove zero extension if register is used only for byte/word memory store }
+                  else if (taicpu(p).oper[2]^.typ=top_const) and
+                    GetNextInstruction(p,next) and
+                    ((taicpu(p).oper[2]^.val=255) and MatchInstruction(next,A_SB)) or
+                    ((taicpu(p).oper[2]^.val=65535) and MatchInstruction(next,A_SH)) and
+                    (taicpu(next).oper[0]^.typ=top_reg) and
+                    (taicpu(next).oper[0]^.reg=taicpu(p).oper[0]^.reg) then
+                    begin
+                      CopyUsedRegs(TmpUsedRegs);
+                      UpdateUsedRegs(TmpUsedRegs, tai(p.next));
+                      if not RegUsedAfterInstruction(taicpu(p).oper[0]^.reg,next,TmpUsedRegs) then
+                        begin
+                          taicpu(next).loadreg(0,taicpu(p).oper[1]^.reg);
+                          asml.remove(p);
+                          p.free;
+                          p:=next;
+                        end;
+                      ReleaseUsedRegs(TmpUsedRegs);
+                    end
                   else
                     TryRemoveMov(p,A_MOVE);
                 end;
@@ -194,7 +339,7 @@ unit aoptcpu;
               A_SUB,A_SUBU,
               A_SRA,A_SRAV,
               A_SRLV,
-              A_SLL,A_SLLV,
+              A_SLLV,
               A_AND,A_OR,A_XOR,A_ORI,A_XORI:
                 TryRemoveMov(p,A_MOVE);