Browse Source

* x86: CMOVcc/Jcc pairs are now changed to MOV/Jcc if the register is not used if the jump doesn't branch

J. Gareth "Curious Kit" Moreton 1 year ago
parent
commit
a7fe49f38f
3 changed files with 62 additions and 0 deletions
  1. 2 0
      compiler/i386/aoptcpu.pas
  2. 58 0
      compiler/x86/aoptx86.pas
  3. 2 0
      compiler/x86_64/aoptcpu.pas

+ 2 - 0
compiler/i386/aoptcpu.pas

@@ -294,6 +294,8 @@ unit aoptcpu;
               case taicpu(p).opcode Of
               case taicpu(p).opcode Of
                 A_ADD:
                 A_ADD:
                   Result:=OptPass2ADD(p);
                   Result:=OptPass2ADD(p);
+                A_CMOVcc:
+                  Result:=OptPass2CMOVcc(p);
                 A_CMP:
                 A_CMP:
                   Result:=OptPass2CMP(p);
                   Result:=OptPass2CMP(p);
                 A_TEST:
                 A_TEST:

+ 58 - 0
compiler/x86/aoptx86.pas

@@ -190,6 +190,7 @@ unit aoptx86;
         function OptPass1STCCLC(var p: tai): Boolean;
         function OptPass1STCCLC(var p: tai): Boolean;
 
 
         function OptPass2STCCLC(var p: tai): Boolean;
         function OptPass2STCCLC(var p: tai): Boolean;
+        function OptPass2CMOVcc(var p: tai): Boolean;
         function OptPass2Movx(var p : tai): Boolean;
         function OptPass2Movx(var p : tai): Boolean;
         function OptPass2MOV(var p : tai) : boolean;
         function OptPass2MOV(var p : tai) : boolean;
         function OptPass2Imul(var p : tai) : boolean;
         function OptPass2Imul(var p : tai) : boolean;
@@ -9687,6 +9688,63 @@ unit aoptx86;
     end;
     end;
 
 
 
 
+  function TX86AsmOptimizer.OptPass2CMOVcc(var p: tai): Boolean;
+    var
+      hp1, hp2: tai;
+    begin
+      Result := False;
+      { Sometimes, the CMOV optimisations in OptPass2Jcc are a bit overzealous
+        and make a slightly inefficent result on branching-type blocks, notably
+        when setting a function result then jumping to the function epilogue.
+
+        In this case, change:
+
+        cmov(c) %reg1,%reg2
+        j(c) @lbl
+        (%reg2 deallocated)
+
+        To:
+
+        mov %reg11,%reg2
+        j(c) @lbl
+
+        Note, we can't use GetNextInstructionUsingReg to find the conditional
+        jump because if it's not present, we may end up with a jump that's
+        completely unrelated.
+      }
+      hp1 := p;
+      while GetNextInstruction(hp1, hp1) and
+        MatchInstruction(hp1, A_MOV, A_CMOVcc, []) do { loop };
+
+      if (hp1.typ = ait_instruction) and
+        (taicpu(hp1).opcode = A_Jcc) and
+        condition_in(taicpu(hp1).condition, taicpu(p).condition) then
+        begin
+          TransferUsedRegs(TmpUsedRegs);
+          UpdateUsedRegsBetween(TmpUsedRegs, p, hp1);
+          if not RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs) or
+            (
+              { See if we can find a more distant instruction that overwrites
+                the destination register }
+              (cs_opt_level3 in current_settings.optimizerswitches) and
+              GetNextInstructionUsingReg(hp1, hp2, taicpu(p).oper[1]^.reg) and
+              RegLoadedWithNewValue(taicpu(p).oper[1]^.reg, hp2)
+            ) then
+            begin
+              DebugMsg(SPeepholeOptimization + 'CMOVcc/Jcc -> MOV/Jcc since register is not used if not branching', p);
+              taicpu(p).opcode := A_MOV;
+              taicpu(p).condition := C_None;
+
+              { Rely on the post peephole stage to put the MOV before the
+                CMP/TEST instruction that appears prior }
+
+              Result := True;
+              Exit;
+            end;
+        end;
+    end;
+
+
   function TX86AsmOptimizer.OptPass2MOV(var p : tai) : boolean;
   function TX86AsmOptimizer.OptPass2MOV(var p : tai) : boolean;
 
 
      function IsXCHGAcceptable: Boolean; inline;
      function IsXCHGAcceptable: Boolean; inline;

+ 2 - 0
compiler/x86_64/aoptcpu.pas

@@ -229,6 +229,8 @@ uses
                   Result:=OptPass2SUB(p);
                   Result:=OptPass2SUB(p);
                 A_ADD:
                 A_ADD:
                   Result:=OptPass2ADD(p);
                   Result:=OptPass2ADD(p);
+                A_CMOVcc:
+                  Result:=OptPass2CMOVcc(p);
                 A_SETcc:
                 A_SETcc:
                   result:=OptPass2SETcc(p);
                   result:=OptPass2SETcc(p);
                 A_CMP:
                 A_CMP: