Переглянути джерело

o patch by J. Gareth Moreton, resolves #36355
+ This patch serves to extend the JMP -> RET optimisation in OptPass2JMP by also doing the same
for JMP -> MOV/RET, since there are often cases where the result (e.g. EAX) is set just
prior to the function exiting.
* RemoveDeadCodeAfterJump will now drop out if it detects SEH information -
this stops exception information from being stripped if it is called on the final RET instruction.

git-svn-id: trunk@43592 -

florian 5 роки тому
батько
коміт
af107ca8fe
2 змінених файлів з 137 додано та 37 видалено
  1. 11 9
      compiler/aoptobj.pas
  2. 126 28
      compiler/x86/aoptx86.pas

+ 11 - 9
compiler/aoptobj.pas

@@ -1616,6 +1616,15 @@ Unit AoptObj;
 
 
     { Removes all instructions between an unconditional jump and the next label }
     { Removes all instructions between an unconditional jump and the next label }
     procedure TAOptObj.RemoveDeadCodeAfterJump(p: tai);
     procedure TAOptObj.RemoveDeadCodeAfterJump(p: tai);
+      const
+{$ifdef JVM}
+        TaiFence = SkipInstr + [ait_const, ait_realconst, ait_typedconst, ait_label, ait_jcatch];
+{$else JVM}
+        { Stop if it reaches SEH directive information in the form of
+          consts, which may occur if RemoveDeadCodeAfterJump is called on
+          the final RET instruction on x86, for example }
+        TaiFence = SkipInstr + [ait_const, ait_realconst, ait_typedconst, ait_label];
+{$endif JVM}
       var
       var
         hp1, hp2: tai;
         hp1, hp2: tai;
       begin
       begin
@@ -1624,12 +1633,7 @@ Unit AoptObj;
         }
         }
         while GetNextInstruction(p, hp1) and
         while GetNextInstruction(p, hp1) and
               (hp1 <> BlockEnd) and
               (hp1 <> BlockEnd) and
-              (hp1.typ <> ait_label)
-{$ifdef JVM}
-              and (hp1.typ <> ait_jcatch)
-{$endif}
-              do
-          if not(hp1.typ in ([ait_label]+skipinstr)) then
+              not (hp1.typ in TaiFence) do
             begin
             begin
               if (hp1.typ = ait_instruction) and
               if (hp1.typ = ait_instruction) and
                  taicpu(hp1).is_jmp and
                  taicpu(hp1).is_jmp and
@@ -1658,9 +1662,7 @@ Unit AoptObj;
                 end
                 end
               else
               else
                 p:=hp1;
                 p:=hp1;
-            end
-          else
-            Break;
+            end;
       end;
       end;
 
 
     { If hp is a label, strip it if its reference count is zero.  Repeat until
     { If hp is a label, strip it if its reference count is zero.  Repeat until

+ 126 - 28
compiler/x86/aoptx86.pas

@@ -93,6 +93,8 @@ unit aoptx86;
         function PostPeepholeOptLea(var p : tai) : Boolean;
         function PostPeepholeOptLea(var p : tai) : Boolean;
 
 
         procedure OptReferences;
         procedure OptReferences;
+
+        procedure ConvertJumpToRET(const p: tai; const ret_p: tai);
       end;
       end;
 
 
     function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
     function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
@@ -3107,8 +3109,47 @@ unit aoptx86;
 {$endif x86_64}
 {$endif x86_64}
       begin
       begin
         Result:=false;
         Result:=false;
-        if MatchOpType(taicpu(p),top_reg,top_reg) and
-          GetNextInstruction(p, hp1) and
+        if not GetNextInstruction(p, hp1) then
+          Exit;
+
+        if MatchInstruction(hp1, A_JMP, [S_NO]) then
+          begin
+            { Sometimes the MOVs that OptPass2JMP produces can be improved
+              further, but we can't just put this jump optimisation in pass 1
+              because it tends to perform worse when conditional jumps are
+              nearby (e.g. when converting CMOV instructions). [Kit] }
+            if OptPass2JMP(hp1) then
+              { call OptPass1MOV once to potentially merge any MOVs that were created }
+              Result := OptPass1MOV(p)
+              { OptPass2MOV will now exit but will be called again if OptPass1MOV
+                returned True and the instruction is still a MOV, thus checking
+                the optimisations below }
+            else
+              { Since OptPass2JMP returned false, no optimisations were done to
+                the jump. Additionally, a label will definitely follow the jump
+                (although it may have become dead), so skip ahead as far as
+                possible }
+              begin
+                while (p <> hp1) do
+                  begin
+                    { Nothing changed between the MOV and the JMP, so
+                      don't bother with "UpdateUsedRegsAndOptimize" }
+                    UpdateUsedRegs(p);
+                    p := tai(p.Next);
+                  end;
+
+                { Use "UpdateUsedRegsAndOptimize" here though, because the
+                  label might now be dead and can be stripped out }
+                p := tai(UpdateUsedRegsAndOptimize(hp1).Next);
+
+                { If p is a label, then Result will be False and program flow
+                  will move onto the next list entry in "PeepHoleOptPass2" }
+                if (p = BlockEnd) or not (p.typ in [ait_align, ait_label]) then
+                  Result := True;
+
+              end;
+          end
+        else if MatchOpType(taicpu(p),top_reg,top_reg) and
 {$ifdef x86_64}
 {$ifdef x86_64}
           MatchInstruction(hp1,A_MOVZX,A_MOVSX,A_MOVSXD,[]) and
           MatchInstruction(hp1,A_MOVZX,A_MOVSX,A_MOVSXD,[]) and
 {$else x86_64}
 {$else x86_64}
@@ -3141,7 +3182,6 @@ unit aoptx86;
             exit;
             exit;
           end
           end
         else if MatchOpType(taicpu(p),top_reg,top_reg) and
         else if MatchOpType(taicpu(p),top_reg,top_reg) and
-          GetNextInstruction(p, hp1) and
 {$ifdef x86_64}
 {$ifdef x86_64}
           MatchInstruction(hp1,[A_MOV,A_MOVZX,A_MOVSX,A_MOVSXD],[]) and
           MatchInstruction(hp1,[A_MOV,A_MOVZX,A_MOVSX,A_MOVSXD],[]) and
 {$else x86_64}
 {$else x86_64}
@@ -3168,7 +3208,6 @@ unit aoptx86;
             exit;
             exit;
           end
           end
         else if (taicpu(p).oper[0]^.typ = top_ref) and
         else if (taicpu(p).oper[0]^.typ = top_ref) and
-          GetNextInstruction(p,hp1) and
           (hp1.typ = ait_instruction) and
           (hp1.typ = ait_instruction) and
           { while the GetNextInstruction(hp1,hp2) call could be factored out,
           { while the GetNextInstruction(hp1,hp2) call could be factored out,
             doing it separately in both branches allows to do the cheap checks
             doing it separately in both branches allows to do the cheap checks
@@ -3236,7 +3275,6 @@ unit aoptx86;
         else if (taicpu(p).opsize = S_L) and
         else if (taicpu(p).opsize = S_L) and
           (taicpu(p).oper[1]^.typ = top_reg) and
           (taicpu(p).oper[1]^.typ = top_reg) and
           (
           (
-            GetNextInstruction(p, hp1) and
             MatchInstruction(hp1, A_MOV,[]) and
             MatchInstruction(hp1, A_MOV,[]) and
             (taicpu(hp1).opsize = S_L) and
             (taicpu(hp1).opsize = S_L) and
             (taicpu(hp1).oper[1]^.typ = top_reg)
             (taicpu(hp1).oper[1]^.typ = top_reg)
@@ -3365,40 +3403,100 @@ unit aoptx86;
       end;
       end;
 
 
 
 
+    procedure TX86AsmOptimizer.ConvertJumpToRET(const p: tai; const ret_p: tai);
+      var
+        ThisLabel: TAsmLabel;
+      begin
+        ThisLabel := tasmlabel(taicpu(p).oper[0]^.ref^.symbol);
+        ThisLabel.decrefs;
+        taicpu(p).opcode := A_RET;
+        taicpu(p).is_jmp := false;
+        taicpu(p).ops := taicpu(ret_p).ops;
+        case taicpu(ret_p).ops of
+          0:
+            taicpu(p).clearop(0);
+          1:
+            taicpu(p).loadconst(0,taicpu(ret_p).oper[0]^.val);
+          else
+            internalerror(2016041301);
+        end;
+
+        { If the original label is now dead, it might turn out that the label
+          immediately follows p.  As a result, everything beyond it, which will
+          be just some final register configuration and a RET instruction, is
+          now dead code. [Kit] }
+
+        { NOTE: This is much faster than introducing a OptPass2RET routine and
+          running RemoveDeadCodeAfterJump for each RET instruction, because
+          this optimisation rarely happens and most RETs appear at the end of
+          routines where there is nothing that can be stripped. [Kit] }
+        if not ThisLabel.is_used then
+          RemoveDeadCodeAfterJump(p);
+      end;
+
+
     function TX86AsmOptimizer.OptPass2Jmp(var p : tai) : boolean;
     function TX86AsmOptimizer.OptPass2Jmp(var p : tai) : boolean;
       var
       var
-        hp1 : tai;
+        hp1, hp2 : tai;
       begin
       begin
-        {
-          change
-                 jmp .L1
-                 ...
-             .L1:
-                 ret
-          into
-                 ret
-        }
         result:=false;
         result:=false;
         if (taicpu(p).oper[0]^.typ=top_ref) and (taicpu(p).oper[0]^.ref^.refaddr=addr_full) and (taicpu(p).oper[0]^.ref^.base=NR_NO) and
         if (taicpu(p).oper[0]^.typ=top_ref) and (taicpu(p).oper[0]^.ref^.refaddr=addr_full) and (taicpu(p).oper[0]^.ref^.base=NR_NO) and
           (taicpu(p).oper[0]^.ref^.index=NR_NO) then
           (taicpu(p).oper[0]^.ref^.index=NR_NO) then
           begin
           begin
             hp1:=getlabelwithsym(tasmlabel(taicpu(p).oper[0]^.ref^.symbol));
             hp1:=getlabelwithsym(tasmlabel(taicpu(p).oper[0]^.ref^.symbol));
-            if (taicpu(p).condition=C_None) and assigned(hp1) and SkipLabels(hp1,hp1) and
-              MatchInstruction(hp1,A_RET,[S_NO]) then
+            if (taicpu(p).condition=C_None) and assigned(hp1) and SkipLabels(hp1,hp1) and (hp1.typ = ait_instruction) then
               begin
               begin
-                tasmlabel(taicpu(p).oper[0]^.ref^.symbol).decrefs;
-                taicpu(p).opcode:=A_RET;
-                taicpu(p).is_jmp:=false;
-                taicpu(p).ops:=taicpu(hp1).ops;
-                case taicpu(hp1).ops of
-                  0:
-                    taicpu(p).clearop(0);
-                  1:
-                    taicpu(p).loadconst(0,taicpu(hp1).oper[0]^.val);
+                case taicpu(hp1).opcode of
+                  A_RET:
+                    {
+                      change
+                             jmp .L1
+                             ...
+                         .L1:
+                             ret
+                      into
+                             ret
+                    }
+                    begin
+                      ConvertJumpToRET(p, hp1);
+                      result:=true;
+                    end;
+                  A_MOV:
+                    {
+                      change
+                             jmp .L1
+                             ...
+                         .L1:
+                             mov ##, ##
+                             ret
+                      into
+                             mov ##, ##
+                             ret
+                    }
+                    { This optimisation tends to increase code size if the pass 1 MOV optimisations aren't
+                      re-run, so only do this particular optimisation if optimising for speed or when
+                      optimisations are very in-depth. [Kit] }
+                    if (current_settings.optimizerswitches * [cs_opt_level3, cs_opt_size]) <> [cs_opt_size] then
+                      begin
+                        GetNextInstruction(hp1, hp2);
+                        if not Assigned(hp2) then
+                          Exit;
+
+                        if (hp2.typ in [ait_label, ait_align]) then
+                          SkipLabels(hp2,hp2);
+                        if Assigned(hp2) and MatchInstruction(hp2, A_RET, [S_NO]) then
+                          begin
+                            { Duplicate the MOV instruction }
+                            asml.InsertBefore(hp1.getcopy, p);
+
+                            { Now change the jump into a RET instruction }
+                            ConvertJumpToRET(p, hp2);
+                            result:=true;
+                          end;
+                      end;
                   else
                   else
-                    internalerror(2016041301);
+                    { Do nothing };
                 end;
                 end;
-                result:=true;
               end;
               end;
           end;
           end;
       end;
       end;