ソースを参照

* modified patch by J. Gareth Moreton: MovOpMov2Op/Lea2Add/Lea2Sub consolidation, resolves #37422

git-svn-id: trunk@45865 -
florian 5 年 前
コミット
55c4986c8d
1 ファイル変更139 行追加150 行削除
  1. 139 150
      compiler/x86/aoptx86.pas

+ 139 - 150
compiler/x86/aoptx86.pas

@@ -105,6 +105,10 @@ unit aoptx86;
         class function CanBeCMOV(p : tai) : boolean; static;
 
 
+        { Converts the LEA instruction to ADD/INC/SUB/DEC. Returns True if the
+          conversion was successful }
+        function ConvertLEA(const p : taicpu): Boolean;
+
         function DeepMOVOpt(const p_mov: taicpu; const hp: taicpu): Boolean;
 
         procedure DebugMsg(const s : string; p : tai);inline;
@@ -1773,6 +1777,62 @@ unit aoptx86;
       end;
 
 
+    function TX86AsmOptimizer.ConvertLEA(const p: taicpu): Boolean;
+      var
+        l: asizeint;
+      begin
+        Result := False;
+
+        { Should have been checked previously }
+        if p.opcode <> A_LEA then
+          InternalError(2020072501);
+
+        { do not mess with the stack point as adjusting it by lea is recommend, except if we optimize for size }
+         if (taicpu(p).oper[1]^.reg=NR_STACK_POINTER_REG) and
+           not(cs_opt_size in current_settings.optimizerswitches) then
+           exit;
+
+         with p.oper[0]^.ref^ do
+          begin
+            if (base <> p.oper[1]^.reg) or (index <> NR_NO) then
+              Exit(False);
+
+            l:=offset;
+            if (l=1) and UseIncDec then
+              begin
+                p.opcode:=A_INC;
+                p.loadreg(0,p.oper[1]^.reg);
+                p.ops:=1;
+                DebugMsg(SPeepholeOptimization + 'Lea2Inc done',p);
+              end
+            else if (l=-1) and UseIncDec then
+              begin
+                p.opcode:=A_DEC;
+                p.loadreg(0,p.oper[1]^.reg);
+                p.ops:=1;
+                DebugMsg(SPeepholeOptimization + 'Lea2Dec done',p);
+              end
+            else
+              begin
+                if (l<0) and (l<>-2147483648) then
+                  begin
+                    p.opcode:=A_SUB;
+                    p.loadConst(0,-l);
+                    DebugMsg(SPeepholeOptimization + 'Lea2Sub done',p);
+                  end
+                else
+                  begin
+                    p.opcode:=A_ADD;
+                    p.loadConst(0,l);
+                    DebugMsg(SPeepholeOptimization + 'Lea2Add done',p);
+                  end;
+              end;
+          end;
+
+        Result := True;
+      end;
+
+
     function TX86AsmOptimizer.DeepMOVOpt(const p_mov: taicpu; const hp: taicpu): Boolean;
       var
         CurrentReg, ReplaceReg: TRegister;
@@ -2755,14 +2815,59 @@ unit aoptx86;
             AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
             exit;
           end;
+
+        if MatchInstruction(hp1,A_LEA,[S_L{$ifdef x86_64},S_Q{$endif x86_64}]) then
+          begin
+            if MatchOpType(Taicpu(p),top_ref,top_reg) and
+               ((MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(hp1).oper[1]^.reg,Taicpu(p).oper[1]^.reg) and
+                 (Taicpu(hp1).oper[0]^.ref^.base<>Taicpu(p).oper[1]^.reg)
+                ) or
+                (MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(p).oper[1]^.reg,Taicpu(hp1).oper[1]^.reg) and
+                 (Taicpu(hp1).oper[0]^.ref^.index<>Taicpu(p).oper[1]^.reg)
+                )
+               ) then
+               { mov reg1,ref
+                 lea reg2,[reg1,reg2]
+
+                 to
+
+                 add reg2,ref}
+              begin
+                TransferUsedRegs(TmpUsedRegs);
+                { reg1 may not be used afterwards }
+                if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
+                  begin
+                    Taicpu(hp1).opcode:=A_ADD;
+                    Taicpu(hp1).oper[0]^.ref^:=Taicpu(p).oper[0]^.ref^;
+                    DebugMsg(SPeepholeOptimization + 'MovLea2Add done',hp1);
+                    RemoveCurrentp(p, hp1);
+                    result:=true;
+                    exit;
+                  end;
+              end;
+
+            { If the LEA instruction can be converted into an arithmetic instruction,
+              it may be possible to then fold it in the next optimisation, otherwise
+              there's nothing more that can be optimised here. }
+            if not ConvertLEA(taicpu(hp1)) then
+              Exit;
+
+          end;
+
         if (taicpu(p).oper[1]^.typ = top_reg) and
           (hp1.typ = ait_instruction) and
           GetNextInstruction(hp1, hp2) and
           MatchInstruction(hp2,A_MOV,[]) and
           (SuperRegistersEqual(taicpu(hp2).oper[0]^.reg,taicpu(p).oper[1]^.reg)) and
-          (IsFoldableArithOp(taicpu(hp1), taicpu(p).oper[1]^.reg) or
-           ((taicpu(p).opsize=S_L) and (taicpu(hp1).opsize=S_Q) and (taicpu(hp2).opsize=S_L) and
-            IsFoldableArithOp(taicpu(hp1), newreg(R_INTREGISTER,getsupreg(taicpu(p).oper[1]^.reg),R_SUBQ)))
+          (
+            IsFoldableArithOp(taicpu(hp1), taicpu(p).oper[1]^.reg)
+{$ifdef x86_64}
+            or
+            (
+              (taicpu(p).opsize=S_L) and (taicpu(hp1).opsize=S_Q) and (taicpu(hp2).opsize=S_L) and
+              IsFoldableArithOp(taicpu(hp1), newreg(R_INTREGISTER,getsupreg(taicpu(p).oper[1]^.reg),R_SUBQ))
+            )
+{$endif x86_64}
           ) then
           begin
             if OpsEqual(taicpu(hp2).oper[1]^, taicpu(p).oper[0]^) and
@@ -2911,6 +3016,7 @@ unit aoptx86;
                     hp2.Free;
                   end;
               end;
+
           end;
         if MatchInstruction(hp1,A_BTS,A_BTR,[Taicpu(p).opsize]) and
           GetNextInstruction(hp1, hp2) and
@@ -2932,37 +3038,6 @@ unit aoptx86;
             Result:=true;
             exit;
           end;
-
-        if MatchInstruction(hp1,A_LEA,[S_L]) and
-           MatchOpType(Taicpu(p),top_ref,top_reg) and
-           ((MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(hp1).oper[1]^.reg,Taicpu(p).oper[1]^.reg) and
-             (Taicpu(hp1).oper[0]^.ref^.base<>Taicpu(p).oper[1]^.reg)
-            ) or
-            (MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(p).oper[1]^.reg,Taicpu(hp1).oper[1]^.reg) and
-             (Taicpu(hp1).oper[0]^.ref^.index<>Taicpu(p).oper[1]^.reg)
-            )
-           ) then
-           { mov reg1,ref
-             lea reg2,[reg1,reg2]
-
-             to
-
-             add reg2,ref}
-          begin
-            TransferUsedRegs(TmpUsedRegs);
-            { reg1 may not be used afterwards }
-            if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
-              begin
-                Taicpu(hp1).opcode:=A_ADD;
-                Taicpu(hp1).oper[0]^.ref^:=Taicpu(p).oper[0]^.ref^;
-                DebugMsg(SPeepholeOptimization + 'MovLea2Add done',hp1);
-                asml.remove(p);
-                p.free;
-                p:=hp1;
-                result:=true;
-                exit;
-              end;
-          end;
       end;
 
 
@@ -3074,65 +3149,36 @@ unit aoptx86;
            (taicpu(p).oper[1]^.reg <> NR_STACK_POINTER_REG) and
            (not(Assigned(taicpu(p).oper[0]^.ref^.Symbol))) then
           begin
-            if (taicpu(p).oper[0]^.ref^.base <> taicpu(p).oper[1]^.reg) and
-               (taicpu(p).oper[0]^.ref^.offset = 0) then
+            if (taicpu(p).oper[0]^.ref^.offset = 0) then
               begin
-                hp1:=taicpu.op_reg_reg(A_MOV,taicpu(p).opsize,taicpu(p).oper[0]^.ref^.base,
-                  taicpu(p).oper[1]^.reg);
-                InsertLLItem(p.previous,p.next, hp1);
-                DebugMsg(SPeepholeOptimization + 'Lea2Mov done',hp1);
-                p.free;
-                p:=hp1;
+                if (taicpu(p).oper[0]^.ref^.base <> taicpu(p).oper[1]^.reg) then
+                  begin
+                    hp1:=taicpu.op_reg_reg(A_MOV,taicpu(p).opsize,taicpu(p).oper[0]^.ref^.base,
+                      taicpu(p).oper[1]^.reg);
+                    InsertLLItem(p.previous,p.next, hp1);
+                    DebugMsg(SPeepholeOptimization + 'Lea2Mov done',hp1);
+                    p.free;
+                    p:=hp1;
+                  end
+                else
+                  begin
+                    DebugMsg(SPeepholeOptimization + 'Lea2Nop done',p);
+                    RemoveCurrentP(p);
+                  end;
                 Result:=true;
                 exit;
               end
-            else if (taicpu(p).oper[0]^.ref^.offset = 0) then
+            else if (
+              { continue to use lea to adjust the stack pointer,
+                it is the recommended way, but only if not optimizing for size }
+                (taicpu(p).oper[1]^.reg<>NR_STACK_POINTER_REG) or
+                (cs_opt_size in current_settings.optimizerswitches)
+              ) and
+              ConvertLEA(taicpu(p)) then
               begin
-                DebugMsg(SPeepholeOptimization + 'Lea2Nop done',p);
-                RemoveCurrentP(p);
                 Result:=true;
                 exit;
-              end
-            { continue to use lea to adjust the stack pointer,
-              it is the recommended way, but only if not optimizing for size }
-            else if (taicpu(p).oper[1]^.reg<>NR_STACK_POINTER_REG) or
-              (cs_opt_size in current_settings.optimizerswitches) then
-              with taicpu(p).oper[0]^.ref^ do
-                if (base = taicpu(p).oper[1]^.reg) then
-                  begin
-                    l:=offset;
-                    if (l=1) and UseIncDec then
-                      begin
-                        taicpu(p).opcode:=A_INC;
-                        taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
-                        taicpu(p).ops:=1;
-                        DebugMsg(SPeepholeOptimization + 'Lea2Inc done',p);
-                      end
-                    else if (l=-1) and UseIncDec then
-                      begin
-                        taicpu(p).opcode:=A_DEC;
-                        taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
-                        taicpu(p).ops:=1;
-                        DebugMsg(SPeepholeOptimization + 'Lea2Dec done',p);
-                      end
-                    else
-                      begin
-                        if (l<0) and (l<>-2147483648) then
-                          begin
-                            taicpu(p).opcode:=A_SUB;
-                            taicpu(p).loadConst(0,-l);
-                            DebugMsg(SPeepholeOptimization + 'Lea2Sub done',p);
-                          end
-                        else
-                          begin
-                            taicpu(p).opcode:=A_ADD;
-                            taicpu(p).loadConst(0,l);
-                            DebugMsg(SPeepholeOptimization + 'Lea2Add done',p);
-                          end;
-                      end;
-                    Result:=true;
-                    exit;
-                  end;
+              end;
           end;
         if GetNextInstruction(p,hp1) and
           MatchInstruction(hp1,A_MOV,[taicpu(p).opsize]) and
@@ -4617,70 +4663,6 @@ unit aoptx86;
 *)
                   end;
               end;
-          end
-        else if (taicpu(p).oper[0]^.typ = top_ref) and
-          (hp1.typ = ait_instruction) and
-          { while the GetNextInstruction(hp1,hp2) call could be factored out,
-            doing it separately in both branches allows to do the cheap checks
-            with low probability earlier }
-          ((IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) and
-            GetNextInstruction(hp1,hp2) and
-            MatchInstruction(hp2,A_MOV,[])
-           ) or
-           ((taicpu(hp1).opcode=A_LEA) and
-             GetNextInstruction(hp1,hp2) and
-             MatchInstruction(hp2,A_MOV,[]) and
-            ((MatchReference(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_INVALID) and
-             (taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg)
-              ) or
-             (MatchReference(taicpu(hp1).oper[0]^.ref^,NR_INVALID,
-              taicpu(p).oper[1]^.reg) and
-             (taicpu(hp1).oper[0]^.ref^.base<>taicpu(p).oper[1]^.reg)) or
-             (MatchReferenceWithOffset(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_NO)) or
-             (MatchReferenceWithOffset(taicpu(hp1).oper[0]^.ref^,NR_NO,taicpu(p).oper[1]^.reg))
-            ) and
-            ((MatchOperand(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^)) or not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,UsedRegs)))
-           )
-          ) and
-          MatchOperand(taicpu(hp1).oper[taicpu(hp1).ops-1]^,taicpu(hp2).oper[0]^) and
-          (taicpu(hp2).oper[1]^.typ = top_ref) then
-          begin
-            TransferUsedRegs(TmpUsedRegs);
-            UpdateUsedRegs(TmpUsedRegs,tai(p.next));
-            UpdateUsedRegs(TmpUsedRegs,tai(hp1.next));
-            if (RefsEqual(taicpu(hp2).oper[1]^.ref^,taicpu(p).oper[0]^.ref^) and
-              not(RegUsedAfterInstruction(taicpu(hp2).oper[0]^.reg,hp2,TmpUsedRegs))) then
-              { change   mov            (ref), reg
-                         add/sub/or/... reg2/$const, reg
-                         mov            reg, (ref)
-                         # release reg
-                to       add/sub/or/... reg2/$const, (ref)    }
-              begin
-                case taicpu(hp1).opcode of
-                  A_INC,A_DEC,A_NOT,A_NEG :
-                    taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
-                  A_LEA :
-                    begin
-                      taicpu(hp1).opcode:=A_ADD;
-                      if (taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg) and (taicpu(hp1).oper[0]^.ref^.index<>NR_NO) then
-                        taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.index)
-                      else if (taicpu(hp1).oper[0]^.ref^.base<>taicpu(p).oper[1]^.reg) and (taicpu(hp1).oper[0]^.ref^.base<>NR_NO) then
-                        taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.base)
-                      else
-                        taicpu(hp1).loadconst(0,taicpu(hp1).oper[0]^.ref^.offset);
-                      taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
-                      DebugMsg(SPeepholeOptimization + 'FoldLea done',hp1);
-                    end
-                  else
-                    taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
-                end;
-                asml.remove(p);
-                asml.remove(hp2);
-                p.free;
-                hp2.free;
-                p := hp1
-              end;
-            Exit;
 {$ifdef x86_64}
           end
         else if (taicpu(p).opsize = S_L) and
@@ -5382,7 +5364,14 @@ unit aoptx86;
         reg_and_hp1_is_instr:=(taicpu(p).oper[1]^.typ = top_reg) and
           GetNextInstruction(p,hp1) and
           (hp1.typ = ait_instruction);
+
         if reg_and_hp1_is_instr and
+          (
+            (taicpu(hp1).opcode <> A_LEA) or
+            { If the LEA instruction can be converted into an arithmetic instruction,
+              it may be possible to then fold it. }
+            ConvertLEA(taicpu(hp1))
+          ) and
           IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) and
           GetNextInstruction(hp1,hp2) and
           MatchInstruction(hp2,A_MOV,[]) and