Browse Source

* x86: Added new OptPass1CMOVcc peephole optimisation routine to dust up min/max code

J. Gareth "Curious Kit" Moreton 1 year ago
parent
commit
11b341cc97
4 changed files with 63 additions and 1 deletions
  1. 2 0
      compiler/i386/aoptcpu.pas
  2. 58 0
      compiler/x86/aoptx86.pas
  3. 1 1
      compiler/x86/nx86inl.pas
  4. 2 0
      compiler/x86_64/aoptcpu.pas

+ 2 - 0
compiler/i386/aoptcpu.pas

@@ -161,6 +161,8 @@ unit aoptcpu;
                   Result:=OptPass1ADD(p);
                 A_AND:
                   Result:=OptPass1And(p);
+                A_CMOVcc:
+                  Result:=OptPass1CMOVcc(p);
                 A_IMUL:
                   Result:=OptPass1Imul(p);
                 A_CMP:

+ 58 - 0
compiler/x86/aoptx86.pas

@@ -167,6 +167,7 @@ unit aoptx86;
         function OptPass1Test(var p: tai): boolean;
         function OptPass1Add(var p: tai): boolean;
         function OptPass1AND(var p : tai) : boolean;
+        function OptPass1CMOVcc(var p: tai): Boolean;
         function OptPass1_V_MOVAP(var p : tai) : boolean;
         function OptPass1VOP(var p : tai) : boolean;
         function OptPass1MOV(var p : tai) : boolean;
@@ -2299,6 +2300,57 @@ unit aoptx86;
       end;
 
 
+    function TX86AsmOptimizer.OptPass1CMOVcc(var p: tai): Boolean;
+      var
+        hp1: tai;
+        operswap: poper;
+      begin
+        Result := False;
+
+        { Optimise:
+            cmov(c)  %reg1,%reg2
+            mov      %reg2,%reg1
+            (%reg2 dealloc.)
+
+          To:
+            cmov(~c) %reg2,%reg1
+        }
+        if (taicpu(p).oper[0]^.typ = top_reg) then
+          while GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[1]^.reg) and
+            MatchInstruction(hp1, A_MOV, [taicpu(p).opsize]) and
+            MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[1]^.reg) and
+            MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) do
+            begin
+              TransferUsedRegs(TmpUsedRegs);
+              UpdateUsedRegsBetween(TmpUsedRegs, p, hp1);
+              if not RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs) then
+                begin
+                  DebugMsg(SPeepholeOptimization + 'CMOV(c) %reg1,%reg2; MOV %reg2,%reg1 -> CMOV(~c) %reg2,%reg1 (CMovMov2CMov)', p);
+
+                  { Save time by swapping the pointers (they're both registers, so
+                    we don't need to worry about reference counts) }
+                  operswap := taicpu(p).oper[0];
+                  taicpu(p).oper[0] := taicpu(p).oper[1];
+                  taicpu(p).oper[1] := operswap;
+
+                  taicpu(p).condition := inverse_cond(taicpu(p).condition);
+
+                  RemoveInstruction(hp1);
+
+                  { It's still a CMOV, so we can look further ahead }
+                  Include(OptsToCheck, aoc_ForceNewIteration);
+
+                  { But first, let's see if this will get optimised again
+                    (probably won't happen, but best to be sure) }
+                  Continue;
+                end;
+
+              Break;
+            end;
+
+      end;
+
+
     function TX86AsmOptimizer.OptPass1_V_MOVAP(var p : tai) : boolean;
       var
         hp1,hp2 : tai;
@@ -9693,7 +9745,13 @@ unit aoptx86;
       hp1, hp2: tai;
       FoundComparison: Boolean;
     begin
+      { Run the pass 1 optimisations as well, since they may have some effect
+        after the CMOV blocks are created in OptPass2Jcc }
       Result := False;
+{      Result := OptPass1CMOVcc(p);
+      if Result then
+        Exit;}
+
       { Sometimes, the CMOV optimisations in OptPass2Jcc are a bit overzealous
         and make a slightly inefficent result on branching-type blocks, notably
         when setting a function result then jumping to the function epilogue.

+ 1 - 1
compiler/x86/nx86inl.pas

@@ -1591,7 +1591,7 @@ implementation
 {$endif i8086}
       var
 {$ifndef i8086}
-        memop,
+        memop : integer;
         gotmem : boolean;
         op: TAsmOp;
 {$endif i8086}

+ 2 - 0
compiler/x86_64/aoptcpu.pas

@@ -94,6 +94,8 @@ uses
                   Result:=OptPass1ADD(p);
                 A_AND:
                   Result:=OptPass1AND(p);
+                A_CMOVcc:
+                  Result:=OptPass1CMOVcc(p);
                 A_IMUL:
                   Result:=OptPass1Imul(p);
                 A_MOV: