Browse Source

* x86: New jump/label tracking in CrossJump optimisations

J. Gareth "Curious Kit" Moreton 3 years ago
parent
commit
627fb9a25b
5 changed files with 264 additions and 11 deletions
  1. 7 0
      compiler/aoptobj.pas
  2. 15 0
      compiler/i386/aoptcpu.pas
  3. 225 9
      compiler/x86/aoptx86.pas
  4. 2 2
      compiler/x86/cpubase.pas
  5. 15 0
      compiler/x86_64/aoptcpu.pas

+ 7 - 0
compiler/aoptobj.pas

@@ -420,6 +420,9 @@ Unit AoptObj;
           each instruction. Useful for debugging the InstructionLoadsFromReg and
           each instruction. Useful for debugging the InstructionLoadsFromReg and
           other similar functions. }
           other similar functions. }
         procedure Debug_InsertInstrRegisterDependencyInfo; virtual;
         procedure Debug_InsertInstrRegisterDependencyInfo; virtual;
+      protected
+        { Set to True if this is the second time that Pass 1 is being run }
+        NotFirstIteration: Boolean;
       private
       private
         procedure DebugMsg(const s: string; p: tai);
         procedure DebugMsg(const s: string; p: tai);
       End;
       End;
@@ -2492,6 +2495,7 @@ Unit AoptObj;
         else
         else
           MaxCount := MaxPasses[1];
           MaxCount := MaxPasses[1];
 
 
+        NotFirstIteration := False;
         repeat
         repeat
           stoploop:=true;
           stoploop:=true;
           p := StartPoint;
           p := StartPoint;
@@ -2546,6 +2550,9 @@ Unit AoptObj;
 
 
           Inc(PassCount);
           Inc(PassCount);
 
 
+          if not stoploop then
+            NotFirstIteration := True;
+
         until stoploop or (PassCount >= MaxCount);
         until stoploop or (PassCount >= MaxCount);
       end;
       end;
 
 

+ 15 - 0
compiler/i386/aoptcpu.pas

@@ -283,6 +283,21 @@ unit aoptcpu;
           else
           else
             ;
             ;
         end;
         end;
+        { If this flag is set, force another run of pass 1 even if p wasn't
+          changed }
+        if aoc_ForceNewIteration in OptsToCheck then
+          begin
+            Exclude(OptsToCheck, aoc_ForceNewIteration);
+
+            if not Result then
+              begin
+                if not (p.typ in SkipInstr) then
+                  UpdateUsedRegs(p);
+
+                p := tai(p.Next);
+                Result := True;
+              end;
+          end;
       end;
       end;
 
 
 
 

+ 225 - 9
compiler/x86/aoptx86.pas

@@ -32,7 +32,7 @@ unit aoptx86;
   interface
   interface
 
 
     uses
     uses
-      globtype,
+      globtype,cclasses,
       cpubase,
       cpubase,
       aasmtai,aasmcpu,
       aasmtai,aasmcpu,
       cgbase,cgutils,
       cgbase,cgutils,
@@ -40,7 +40,8 @@ unit aoptx86;
 
 
     type
     type
       TOptsToCheck = (
       TOptsToCheck = (
-        aoc_MovAnd2Mov_3
+        aoc_MovAnd2Mov_3,
+        aoc_ForceNewIteration
       );
       );
 
 
       TX86AsmOptimizer = class(TAsmOptimizer)
       TX86AsmOptimizer = class(TAsmOptimizer)
@@ -57,7 +58,7 @@ unit aoptx86;
         { This version of GetNextInstructionUsingReg will look across conditional jumps,
         { This version of GetNextInstructionUsingReg will look across conditional jumps,
           potentially allowing further optimisation (although it might need to know if
           potentially allowing further optimisation (although it might need to know if
           it crossed a conditional jump. }
           it crossed a conditional jump. }
-        function GetNextInstructionUsingRegCond(Current: tai; out Next: tai; reg: TRegister; var CrossJump: Boolean): Boolean;
+        function GetNextInstructionUsingRegCond(Current: tai; out Next: tai; reg: TRegister; var JumpTracking: TLinkedList; var CrossJump: Boolean): Boolean;
 
 
         {
         {
           In comparison with GetNextInstructionUsingReg, GetNextInstructionUsingRegTrackingUse tracks
           In comparison with GetNextInstructionUsingReg, GetNextInstructionUsingRegTrackingUse tracks
@@ -255,6 +256,33 @@ unit aoptx86;
 {$endif DEBUG_AOPTCPU}
 {$endif DEBUG_AOPTCPU}
       LIST_STEP_SIZE = 4;
       LIST_STEP_SIZE = 4;
 
 
+    type
+      TJumpTrackingItem = class(TLinkedListItem)
+      private
+        FSymbol: TAsmSymbol;
+        FRefs: LongInt;
+      public
+        constructor Create(ASymbol: TAsmSymbol);
+        procedure IncRefs; {$ifdef USEINLINE}inline;{$endif USEINLINE}
+        property Symbol: TAsmSymbol read FSymbol;
+        property Refs: LongInt read FRefs;
+      end;
+
+
+    constructor TJumpTrackingItem.Create(ASymbol: TAsmSymbol);
+      begin
+        inherited Create;
+        FSymbol := ASymbol;
+        FRefs := 0;
+      end;
+
+
+    procedure TJumpTrackingItem.IncRefs; {$ifdef USEINLINE}inline;{$endif USEINLINE}
+      begin
+        Inc(FRefs);
+      end;
+
+
     function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
     function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
       begin
       begin
         result :=
         result :=
@@ -448,23 +476,198 @@ unit aoptx86;
     end;
     end;
 
 
 
 
-  function TX86AsmOptimizer.GetNextInstructionUsingRegCond(Current: tai; out Next: tai; reg: TRegister; var CrossJump: Boolean): Boolean;
+  function TX86AsmOptimizer.GetNextInstructionUsingRegCond(Current: tai; out Next: tai; reg: TRegister; var JumpTracking: TLinkedList; var CrossJump: Boolean): Boolean;
+
+    procedure TrackJump(Symbol: TAsmSymbol);
+      var
+        Search: TJumpTrackingItem;
+      begin
+        { See if an entry already exists in our jump tracking list
+          (faster to search backwards due to the higher chance of
+          matching destinations) }
+        Search := TJumpTrackingItem(JumpTracking.Last);
+
+        while Assigned(Search) do
+          begin
+            if Search.Symbol = Symbol then
+              begin
+                { Found it - remove it so it can be pushed to the front }
+                JumpTracking.Remove(Search);
+                Break;
+              end;
+
+            Search := TJumpTrackingItem(Search.Previous);
+          end;
+
+        if not Assigned(Search) then
+          Search := TJumpTrackingItem.Create(JumpTargetOp(taicpu(Next))^.ref^.symbol);
+
+        JumpTracking.Concat(Search);
+        Search.IncRefs;
+      end;
+
+    function LabelAccountedFor(Symbol: TAsmSymbol): Boolean;
+      var
+        Search: TJumpTrackingItem;
+      begin
+        Result := False;
+
+        { See if this label appears in the tracking list }
+        Search := TJumpTrackingItem(JumpTracking.Last);
+        while Assigned(Search) do
+          begin
+            if Search.Symbol = Symbol then
+              begin
+                { Found it - let's see what we can discover }
+                if Search.Symbol.getrefs = Search.Refs then
+                  begin
+                    { Success - all the references are accounted for }
+                    JumpTracking.Remove(Search);
+                    Search.Free;
+
+                    { It is logically impossible for CrossJump to be false here
+                      because we must have run into a conditional jump for
+                      this label at some point }
+                    if not CrossJump then
+                      InternalError(2022041710);
+
+                    if JumpTracking.First = nil then
+                      { Tracking list is now empty - no more cross jumps }
+                      CrossJump := False;
+
+                    Result := True;
+                    Exit;
+                  end;
+
+                { If the references don't match, it's possible to enter
+                  this label through other means, so drop out }
+                Exit;
+              end;
+
+            Search := TJumpTrackingItem(Search.Previous);
+          end;
+      end;
+
+    var
+      Next_Label: tai;
     begin
     begin
       { Note, CrossJump keeps its input value if a conditional jump is not found - it doesn't get set to False }
       { Note, CrossJump keeps its input value if a conditional jump is not found - it doesn't get set to False }
       Next := Current;
       Next := Current;
       repeat
       repeat
         Result := GetNextInstruction(Next,Next);
         Result := GetNextInstruction(Next,Next);
-        if Result and (Next.typ=ait_instruction) and is_calljmp(taicpu(Next).opcode) then
+        if not Result then
+          Break;
+
+        if Next.typ = ait_align then
+          Result := SkipAligns(Next, Next);
+
+        if (Next.typ=ait_instruction) and is_calljmp(taicpu(Next).opcode) then
           if is_calljmpuncondret(taicpu(Next).opcode) then
           if is_calljmpuncondret(taicpu(Next).opcode) then
             begin
             begin
+              if (taicpu(Next).opcode = A_JMP) and
+                { Remove dead code now to save time }
+                RemoveDeadCodeAfterJump(taicpu(Next)) then
+                { A jump was removed, but not the current instruction, and
+                  Result doesn't necessarily translate into an optimisation
+                  routine's Result, so use the "Force New Iteration" flag so
+                  mark a new pass }
+                Include(OptsToCheck, aoc_ForceNewIteration);
+
+              if not Assigned(JumpTracking) then
+                begin
+                  { Cross-label optimisations often causes other optimisations
+                    to perform worse because they're not given the chance to
+                    optimise locally.  In this case, don't do the cross-label
+                    optimisations yet, but flag them as a potential possibility
+                    for the next iteration of Pass 1 }
+                  if not NotFirstIteration then
+                    Include(OptsToCheck, aoc_ForceNewIteration);
+                end
+              else if IsJumpToLabel(taicpu(Next)) and
+                GetNextInstruction(Next, Next_Label) and
+                SkipAligns(Next_Label, Next_Label) then
+                begin
+
+                  { If we have JMP .lbl, and the label after it has all of its
+                    references tracked, then this is probably an if-else style of
+                    block and we can keep tracking.  If the label for this jump
+                    then appears later and is fully tracked, then it's the end
+                    of the if-else blocks and the code paths converge (thus
+                    marking the end of the cross-jump) }
+
+                  if (Next_Label.typ = ait_label) then
+                    begin
+                      if LabelAccountedFor(tai_label(Next_Label).labsym) then
+                        begin
+                          TrackJump(JumpTargetOp(taicpu(Next))^.ref^.symbol);
+                          Next := Next_Label;
+
+                          { CrossJump gets set to false by LabelAccountedFor if the
+                            list is completely emptied (as it indicates that all
+                            code paths have converged).  We could avoid this nuance
+                            by moving the TrackJump call to before the
+                            LabelAccountedFor call, but this is slower in situations
+                            where LabelAccountedFor would return False due to the
+                            creation of a new object that is not used and destroyed
+                            soon after. }
+                          CrossJump := True;
+                          Continue;
+                        end;
+                    end
+                  else if (Next_Label.typ <> ait_marker) then
+                    { We just did a RemoveDeadCodeAfterJump, so either we find
+                      a label, the end of the procedure or some kind of marker}
+                    InternalError(2022041720);
+                end;
+
               Result := False;
               Result := False;
               Exit;
               Exit;
             end
             end
           else
           else
-            CrossJump := True;
+            begin
+              if not Assigned(JumpTracking) then
+                begin
+                  { Cross-label optimisations often causes other optimisations
+                    to perform worse because they're not given the chance to
+                    optimise locally.  In this case, don't do the cross-label
+                    optimisations yet, but flag them as a potential possibility
+                    for the next iteration of Pass 1 }
+                  if not NotFirstIteration then
+                    Include(OptsToCheck, aoc_ForceNewIteration);
+                end
+              else if IsJumpToLabel(taicpu(Next)) then
+                TrackJump(JumpTargetOp(taicpu(Next))^.ref^.symbol)
+              else
+                { Conditional jumps should always be a jump to label }
+                InternalError(2022041701);
+
+              CrossJump := True;
+              Continue;
+            end;
+
+        if Next.typ = ait_label then
+          begin
+            if not Assigned(JumpTracking) then
+              begin
+                { Cross-label optimisations often causes other optimisations
+                  to perform worse because they're not given the chance to
+                  optimise locally.  In this case, don't do the cross-label
+                  optimisations yet, but flag them as a potential possibility
+                  for the next iteration of Pass 1 }
+                if not NotFirstIteration then
+                  Include(OptsToCheck, aoc_ForceNewIteration);
+
+              end
+            else if LabelAccountedFor(tai_label(Next).labsym) then
+              Continue;
+
+            { If we reach here, we're at a label that hasn't been seen before
+              (or JumpTracking was nil) }
+            Break;
+          end;
       until not Result or
       until not Result or
             not (cs_opt_level3 in current_settings.optimizerswitches) or
             not (cs_opt_level3 in current_settings.optimizerswitches) or
-            (Next.typ <> ait_instruction) or
+            not (Next.typ in [ait_label, ait_instruction]) or
             RegInInstruction(reg,Next);
             RegInInstruction(reg,Next);
     end;
     end;
 
 
@@ -2553,7 +2756,7 @@ unit aoptx86;
         SourceRef, TargetRef: TReference;
         SourceRef, TargetRef: TReference;
         MovAligned, MovUnaligned: TAsmOp;
         MovAligned, MovUnaligned: TAsmOp;
         ThisRef: TReference;
         ThisRef: TReference;
-
+        JumpTracking: TLinkedList;
       begin
       begin
         Result:=false;
         Result:=false;
 
 
@@ -3674,7 +3877,12 @@ unit aoptx86;
             TransferUsedRegs(TmpUsedRegs);
             TransferUsedRegs(TmpUsedRegs);
             UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
             UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
 
 
-            while GetNextInstructionUsingRegCond(hp3,hp2,ActiveReg,CrossJump) and
+            if NotFirstIteration then
+              JumpTracking := TLinkedList.Create
+            else
+              JumpTracking := nil;
+
+            while GetNextInstructionUsingRegCond(hp3,hp2,ActiveReg,JumpTracking,CrossJump) and
               { GetNextInstructionUsingRegCond only searches one instruction ahead unless -O3 is specified }
               { GetNextInstructionUsingRegCond only searches one instruction ahead unless -O3 is specified }
               (hp2.typ=ait_instruction) do
               (hp2.typ=ait_instruction) do
               begin
               begin
@@ -3690,6 +3898,7 @@ unit aoptx86;
                             DebugMsg(SPeepholeOptimization + 'Mov2Nop 3c done',p);
                             DebugMsg(SPeepholeOptimization + 'Mov2Nop 3c done',p);
                             RemoveCurrentp(p, hp1);
                             RemoveCurrentp(p, hp1);
                             Result := True;
                             Result := True;
+                            JumpTracking.Free;
                             Exit;
                             Exit;
                           end;
                           end;
                         { Can't go any further }
                         { Can't go any further }
@@ -3756,6 +3965,7 @@ unit aoptx86;
                                       DebugMsg(SPeepholeOptimization + 'MovMov2NopNop 6b done',p);
                                       DebugMsg(SPeepholeOptimization + 'MovMov2NopNop 6b done',p);
                                       RemoveCurrentP(p, hp1);
                                       RemoveCurrentP(p, hp1);
                                       Result:=true;
                                       Result:=true;
+                                      JumpTracking.Free;
                                       Exit;
                                       Exit;
                                     end;
                                     end;
                                 end
                                 end
@@ -3778,6 +3988,7 @@ unit aoptx86;
                                       DebugMsg(SPeepholeOptimization + 'MovMov2Mov 6 done',p);
                                       DebugMsg(SPeepholeOptimization + 'MovMov2Mov 6 done',p);
                                       RemoveCurrentP(p, hp1);
                                       RemoveCurrentP(p, hp1);
                                       Result:=true;
                                       Result:=true;
+                                      JumpTracking.Free;
                                       Exit;
                                       Exit;
                                     end;
                                     end;
 
 
@@ -3887,6 +4098,7 @@ unit aoptx86;
                         RemoveInstruction(hp2);
                         RemoveInstruction(hp2);
 
 
                         Result := True;
                         Result := True;
+                        JumpTracking.Free;
                         Exit;
                         Exit;
                       end;
                       end;
                   else
                   else
@@ -3921,6 +4133,8 @@ unit aoptx86;
                                 { We can remove the original MOV }
                                 { We can remove the original MOV }
                                 DebugMsg(SPeepholeOptimization + 'Mov2Nop 3b done',p);
                                 DebugMsg(SPeepholeOptimization + 'Mov2Nop 3b done',p);
                                 RemoveCurrentp(p, hp1);
                                 RemoveCurrentp(p, hp1);
+                                JumpTracking.Free;
+                                Result := True;
                                 Exit;
                                 Exit;
                               end;
                               end;
 
 
@@ -3938,6 +4152,8 @@ unit aoptx86;
                 Break;
                 Break;
               end;
               end;
 
 
+            JumpTracking.Free;
+
           end;
           end;
 
 
         if (aoc_MovAnd2Mov_3 in OptsToCheck) and
         if (aoc_MovAnd2Mov_3 in OptsToCheck) and

+ 2 - 2
compiler/x86/cpubase.pas

@@ -340,7 +340,7 @@ topsize2memsize: array[topsize] of integer =
     function reg2opsize(r:Tregister):topsize;
     function reg2opsize(r:Tregister):topsize;
     function reg_cgsize(const reg: tregister): tcgsize;
     function reg_cgsize(const reg: tregister): tcgsize;
     function is_calljmp(o:tasmop):boolean;
     function is_calljmp(o:tasmop):boolean;
-    function is_calljmpuncondret(o:tasmop):boolean; {$ifdef USEINLINE}inline;{$endif USEINLINE}
+    function is_calljmpuncondret(o:tasmop):boolean;
     procedure inverse_flags(var f: TResFlags);
     procedure inverse_flags(var f: TResFlags);
     function flags_to_cond(const f: TResFlags) : TAsmCond;
     function flags_to_cond(const f: TResFlags) : TAsmCond;
     function is_segment_reg(r:tregister):boolean;
     function is_segment_reg(r:tregister):boolean;
@@ -578,7 +578,7 @@ implementation
       end;
       end;
 
 
 
 
-    function is_calljmpuncondret(o:tasmop):boolean; {$ifdef USEINLINE}inline;{$endif USEINLINE}
+    function is_calljmpuncondret(o:tasmop):boolean;
       begin
       begin
         case o of
         case o of
           A_CALL,
           A_CALL,

+ 15 - 0
compiler/x86_64/aoptcpu.pas

@@ -202,6 +202,21 @@ uses
           else
           else
             ;
             ;
         end;
         end;
+        { If this flag is set, force another run of pass 1 even if p wasn't
+          changed }
+        if aoc_ForceNewIteration in OptsToCheck then
+          begin
+            Exclude(OptsToCheck, aoc_ForceNewIteration);
+
+            if not Result then
+              begin
+                if not (p.typ in SkipInstr) then
+                  UpdateUsedRegs(p);
+
+                p := tai(p.Next);
+                Result := True;
+              end;
+          end;
       end;
       end;