浏览代码

* x86: Separated out Jcc; STC/CLC optimisation and added additional peephole optimisations

J. Gareth "Curious Kit" Moreton 1 年之前
父节点
当前提交
698d432cce
共有 1 个文件被更改,包括 135 次插入83 次删除
  1. 135 83
      compiler/x86/aoptx86.pas

+ 135 - 83
compiler/x86/aoptx86.pas

@@ -222,6 +222,7 @@ unit aoptx86;
         function TrySwapMovOp(var p, hp1: tai): Boolean;
         function TrySwapMovCmp(var p, hp1: tai): Boolean;
         function TryCmpCMovOpts(var p, hp1: tai) : Boolean;
+        function TryJccStcClcOpt(var p, hp1: tai): Boolean;
 
         { Processor-dependent reference optimisation }
         class procedure OptimizeRefs(var p: taicpu); static;
@@ -8629,7 +8630,6 @@ unit aoptx86;
      var
        hp1, hp2, hp3, hp4, hp5: tai;
        ThisReg: TRegister;
-       TempBool: Boolean;
      begin
        Result := False;
        if not GetNextInstruction(p,hp1) then
@@ -8788,55 +8788,7 @@ unit aoptx86;
            exit;
          end
        else if MatchInstruction(hp1, A_CLC, A_STC, []) then
-         begin
-           {
-               j(c)   .L1
-               stc/clc
-             .L1:
-               jc/jnc .L2
-               (Flags deallocated)
-
-             Change to:
-               j)c)   .L1
-               jmp    .L2
-             .L1:
-               jc/jnc .L2
-
-             Then call DoJumpOptimizations to convert to:
-               j(nc)  .L2
-             .L1: (may become a dead label)
-               jc/jnc .L2
-           }
-           if GetNextInstruction(hp1, hp2) and
-             (hp2.typ = ait_label) and
-             (tai_label(hp2).labsym = TAsmLabel(taicpu(p).oper[0]^.ref^.symbol)) and
-             GetNextInstruction(hp2, hp3) and
-             MatchInstruction(hp3, A_Jcc, []) and
-             (
-               (
-                 (taicpu(hp3).condition = C_C) and
-                 (taicpu(hp1).opcode = A_STC)
-               ) or (
-                 (taicpu(hp3).condition = C_NC) and
-                 (taicpu(hp1).opcode = A_CLC)
-               )
-             ) and
-             { Make sure the flags aren't used again }
-             Assigned(FindRegDealloc(NR_DEFAULTFLAGS, tai(hp3.Next))) then
-             begin
-               taicpu(hp1).allocate_oper(1);
-               taicpu(hp1).ops := 1;
-               taicpu(hp1).loadsymbol(0, TAsmLabel(taicpu(hp3).oper[0]^.ref^.symbol), 0);
-               taicpu(hp1).opcode := A_JMP;
-               taicpu(hp1).is_jmp := True;
-
-               TempBool := True; { Prevent compiler warnings }
-               if DoJumpOptimizations(p, TempBool) then
-                 Result := True
-               else
-                 Include(OptsToCheck, aoc_ForceNewIteration);
-             end;
-         end
+         Result := TryJccStcClcOpt(p, hp1)
        else if (hp1.typ = ait_label) then
          Result := DoSETccLblRETOpt(p, tai_label(hp1));
      end;
@@ -9462,7 +9414,7 @@ unit aoptx86;
     var
       hp1, hp2, p_dist, hp1_dist: tai;
       JumpLabel: TAsmLabel;
-
+      Tmp: string;
     begin
       Result := False;
       { Look for:
@@ -9484,55 +9436,155 @@ unit aoptx86;
       if (hp1.typ = ait_instruction) and
         IsJumpToLabel(taicpu(hp1)) then
         begin
+          { Optimise the J(c); stc/clc optimisation first since this will
+            get missed if the main optimisation takes place }
+          if (taicpu(hp1).opcode = A_JCC) and
+            GetNextInstruction(hp1, hp2) and
+            MatchInstruction(hp2, A_CLC, A_STC, []) and
+            TryJccStcClcOpt(hp1, hp2) then
+            begin
+              Result := True;
+              Exit;
+            end;
+
           hp2 := nil; { Suppress compiler warning }
           JumpLabel := TAsmLabel(taicpu(hp1).oper[0]^.ref^.symbol);
           if Assigned(JumpLabel) and
             SetAndTest(getlabelwithsym(JumpLabel), hp2) and
             GetNextInstruction(hp2, p_dist) and
-            MatchInstruction(p_dist, A_SETcc, []) and
-            (taicpu(p_dist).condition in [C_C, C_NC]) and
-            { Make sure the flags aren't used again }
-            SetAndTest(FindRegDealloc(NR_DEFAULTFLAGS, tai(p_dist.Next)), hp2) and
-            GetNextInstruction(hp2, hp1_dist) and
-            IsJumpToLabel(taicpu(hp1_dist)) and
-            { Make sure the carry flag doesn't appear in the jump conditions }
-            not (taicpu(hp1).condition in [C_AE, C_NB, C_NC, C_B, C_C, C_NAE, C_BE, C_NA]) and
-            not (taicpu(hp1_dist).condition in [C_AE, C_NB, C_NC, C_B, C_C, C_NAE, C_BE, C_NA]) and
-            { This works if hp1_dist or both are regular JMP instructions }
-            condition_in(taicpu(hp1).condition, taicpu(hp1_dist).condition) then
+            MatchInstruction(p_dist, A_Jcc, A_SETcc, []) and
+            (taicpu(p_dist).condition in [C_C, C_NC]) then
             begin
-              taicpu(p).allocate_oper(2);
-              taicpu(p).ops := 2;
+              case taicpu(p_dist).opcode of
+                A_Jcc:
+                  { clc + jc = False; clc + jnc = True; stc + jc = True; stc + jnc = False }
+                  if ((taicpu(p).opcode = A_STC) xor (taicpu(p_dist).condition = C_NC)) then
+                    begin
+                      DebugMsg(SPeepholeOptimization + 'STC/CLC; JMP/Jcc; ... J(N)C -> JMP/Jcc (StcClcJ(c)2Jmp)', p);
 
-              { clc + setc = 0; clc + setnc = 1; stc + setc = 1; stc + setnc = 0 }
-              taicpu(p).loadconst(0, TCGInt((taicpu(p).opcode = A_STC) xor (taicpu(p_dist).condition = C_NC)));
-              taicpu(p).loadoper(1, taicpu(p_dist).oper[0]^);
-              taicpu(p).opcode := A_MOV;
-              taicpu(p).opsize := S_B;
+                      JumpLabel.decrefs;
+                      taicpu(hp1).loadsymbol(0, taicpu(p_dist).oper[0]^.ref^.symbol, 0);
 
-              if (taicpu(p_dist).oper[0]^.typ = top_reg) then
-                AllocRegBetween(taicpu(p_dist).oper[0]^.reg, p, hp1, UsedRegs);
+                      RemoveCurrentP(p, hp1);
+                      Result := True;
+                      Exit;
+                    end
+                  else if GetNextInstruction(p_dist, hp1_dist) and
+                    (hp1_dist.typ = ait_label) then
+                    begin
+                      DebugMsg(SPeepholeOptimization + 'STC/CLC; JMP/Jcc; ... J(N)C; .Lbl -> JMP/Jcc .Lbl (StcClcJ(~c)Lbl2Jmp)', p);
 
-              DebugMsg(SPeepholeOptimization + 'STC/CLC; JMP; ... SET(N)C; JMP -> MOV; JMP (StcClc2Mov)', p);
+                      JumpLabel.decrefs;
+                      taicpu(hp1).loadsymbol(0, tai_label(hp1_dist).labsym, 0);
 
-              JumpLabel.decrefs;
-              taicpu(hp1).loadsymbol(0, taicpu(hp1_dist).oper[0]^.ref^.symbol, 0);
+                      RemoveCurrentP(p, hp1);
+                      Result := True;
+                      Exit;
+                    end;
 
-              { If a flag allocation is found, try to move it to after the MOV so "mov $0,%reg" gets optimised to "xor %reg,%reg" }
-              if SetAndTest(FindRegAlloc(NR_DEFAULTFLAGS, p), hp1) and
-                (tai_regalloc(hp1).ratype = ra_alloc) then
-                begin
-                  Asml.Remove(hp1);
-                  Asml.InsertAfter(hp1, p);
-                end;
+                A_SETcc:
+                  if { Make sure the flags aren't used again }
+                    SetAndTest(FindRegDealloc(NR_DEFAULTFLAGS, tai(p_dist.Next)), hp2) and
+                    GetNextInstruction(hp2, hp1_dist) and
+                    IsJumpToLabel(taicpu(hp1_dist)) and
+                    { Make sure the carry flag doesn't appear in the jump conditions }
+                    not (taicpu(hp1).condition in [C_AE, C_NB, C_NC, C_B, C_C, C_NAE, C_BE, C_NA]) and
+                    not (taicpu(hp1_dist).condition in [C_AE, C_NB, C_NC, C_B, C_C, C_NAE, C_BE, C_NA]) and
+                    { This works if hp1_dist or both are regular JMP instructions }
+                    condition_in(taicpu(hp1).condition, taicpu(hp1_dist).condition) then
+                    begin
+                      taicpu(p).allocate_oper(2);
+                      taicpu(p).ops := 2;
 
-              Result := True;
-              Exit;
+                      { clc + setc = 0; clc + setnc = 1; stc + setc = 1; stc + setnc = 0 }
+                      taicpu(p).loadconst(0, TCGInt((taicpu(p).opcode = A_STC) xor (taicpu(p_dist).condition = C_NC)));
+                      taicpu(p).loadoper(1, taicpu(p_dist).oper[0]^);
+                      taicpu(p).opcode := A_MOV;
+                      taicpu(p).opsize := S_B;
+
+                      if (taicpu(p_dist).oper[0]^.typ = top_reg) then
+                        AllocRegBetween(taicpu(p_dist).oper[0]^.reg, p, hp1, UsedRegs);
+
+                      DebugMsg(SPeepholeOptimization + 'STC/CLC; JMP; ... SET(N)C; JMP -> MOV; JMP (StcClcSet(c)2Mov)', p);
+
+                      JumpLabel.decrefs;
+                      taicpu(hp1).loadsymbol(0, taicpu(hp1_dist).oper[0]^.ref^.symbol, 0);
+
+                      { If a flag allocation is found, try to move it to after the MOV so "mov $0,%reg" gets optimised to "xor %reg,%reg" }
+                      if SetAndTest(FindRegAllocBackward(NR_DEFAULTFLAGS, tai(p.Previous)), hp1) and
+                        (tai_regalloc(hp1).ratype = ra_alloc) then
+                        begin
+                          Asml.Remove(hp1);
+                          Asml.InsertAfter(hp1, p);
+                        end;
+
+                      Result := True;
+                      Exit;
+                    end;
+                else
+                  ;
+              end;
             end;
         end;
 
     end;
 
+  function TX86AsmOptimizer.TryJccStcClcOpt(var p, hp1: tai): Boolean;
+    var
+      hp2, hp3: tai;
+      TempBool: Boolean;
+    begin
+      Result := False;
+      {
+          j(c)   .L1
+          stc/clc
+        .L1:
+          jc/jnc .L2
+          (Flags deallocated)
+
+        Change to:
+          j)c)   .L1
+          jmp    .L2
+        .L1:
+          jc/jnc .L2
+
+        Then call DoJumpOptimizations to convert to:
+          j(nc)  .L2
+        .L1: (may become a dead label)
+          jc/jnc .L2
+      }
+      if GetNextInstruction(hp1, hp2) and
+        (hp2.typ = ait_label) and
+        (tai_label(hp2).labsym = TAsmLabel(taicpu(p).oper[0]^.ref^.symbol)) and
+        GetNextInstruction(hp2, hp3) and
+        MatchInstruction(hp3, A_Jcc, []) and
+        (
+          (
+            (taicpu(hp3).condition = C_C) and
+            (taicpu(hp1).opcode = A_STC)
+          ) or (
+            (taicpu(hp3).condition = C_NC) and
+            (taicpu(hp1).opcode = A_CLC)
+          )
+        ) and
+        { Make sure the flags aren't used again }
+        Assigned(FindRegDealloc(NR_DEFAULTFLAGS, tai(hp3.Next))) then
+        begin
+          tai_label(hp2).labsym.DecRefs;
+
+          taicpu(hp1).allocate_oper(1);
+          taicpu(hp1).ops := 1;
+          taicpu(hp1).loadsymbol(0, TAsmLabel(taicpu(hp3).oper[0]^.ref^.symbol), 0);
+          taicpu(hp1).opcode := A_JMP;
+          taicpu(hp1).is_jmp := True;
+
+          TempBool := True; { Prevent compiler warnings }
+          if DoJumpOptimizations(p, TempBool) then
+            Result := True
+          else
+            Include(OptsToCheck, aoc_ForceNewIteration);
+        end;
+    end;
 
 
   function TX86AsmOptimizer.OptPass2MOV(var p : tai) : boolean;