Browse Source

* manually merged merge request 69 by J. Gareth "Kit" Moreton:
x86: CMP/MOV refactoring and expansion
This merge request refactors the SwapMovCmp routine, and calls to it, to be more self-contained,
having the preliminary checks built-in to ensure that moving the MOV instruction is
actually a sound idea, while also making it more general-purpose so it can handle instructions
that are not MOV operations. This feature is primarily for future expansion,
but also cleans up the code for the x86 peephole optimizer.

florian 3 years ago
parent
commit
ea6529ff63
1 changed files with 97 additions and 55 deletions
  1. 97 55
      compiler/x86/aoptx86.pas

+ 97 - 55
compiler/x86/aoptx86.pas

@@ -193,7 +193,7 @@ unit aoptx86;
         procedure ConvertJumpToRET(const p: tai; const ret_p: tai);
 
         function CheckJumpMovTransferOpt(var p: tai; hp1: tai; LoopCount: Integer; out Count: Integer): Boolean;
-        procedure SwapMovCmp(var p, hp1: tai);
+        function TrySwapMovCmp(var p, hp1: tai): Boolean;
 
         { Processor-dependent reference optimisation }
         class procedure OptimizeRefs(var p: taicpu); static;
@@ -772,6 +772,16 @@ unit aoptx86;
               Result:=([Ch_W0IntFlag,Ch_W1IntFlag,Ch_WFlags]*insprop[taicpu(p1).opcode].Ch)<>[];
             R_SUBFLAGDIRECTION:
               Result:=([Ch_RDirFlag,Ch_W0DirFlag,Ch_W1DirFlag,Ch_WFlags]*insprop[taicpu(p1).opcode].Ch)<>[];
+            R_SUBW,R_SUBD,R_SUBQ:
+              { Everything except the direction bits }
+              Result:=
+                ([Ch_RCarryFlag,Ch_RParityFlag,Ch_RAuxiliaryFlag,Ch_RZeroFlag,Ch_RSignFlag,Ch_ROverflowFlag,
+                Ch_WCarryFlag,Ch_WParityFlag,Ch_WAuxiliaryFlag,Ch_WZeroFlag,Ch_WSignFlag,Ch_WOverflowFlag,
+                Ch_W0CarryFlag,Ch_W0ParityFlag,Ch_W0AuxiliaryFlag,Ch_W0ZeroFlag,Ch_W0SignFlag,Ch_W0OverflowFlag,
+                Ch_W1CarryFlag,Ch_W1ParityFlag,Ch_W1AuxiliaryFlag,Ch_W1ZeroFlag,Ch_W1SignFlag,Ch_W1OverflowFlag,
+                Ch_WUCarryFlag,Ch_WUParityFlag,Ch_WUAuxiliaryFlag,Ch_WUZeroFlag,Ch_WUSignFlag,Ch_WUOverflowFlag,
+                Ch_RWCarryFlag,Ch_RWParityFlag,Ch_RWAuxiliaryFlag,Ch_RWZeroFlag,Ch_RWSignFlag,Ch_RWOverflowFlag
+                ]*insprop[taicpu(p1).opcode].Ch)<>[];
             else
               ;
           end;
@@ -4171,33 +4181,8 @@ unit aoptx86;
         Result := False;
 
         if GetNextInstruction(p, hp1) and
-          MatchInstruction(hp1,A_MOV,[]) and
-          (
-            (taicpu(p).oper[0]^.typ <> top_reg) or
-            not RegInInstruction(taicpu(p).oper[0]^.reg, hp1)
-          ) and
-          (
-            (taicpu(p).oper[1]^.typ <> top_reg) or
-            not RegInInstruction(taicpu(p).oper[1]^.reg, hp1)
-          ) and
-          (
-            { Make sure the register written to doesn't appear in the
-              test instruction (in a reference, say) }
-            (taicpu(hp1).oper[1]^.typ <> top_reg) or
-            not RegInInstruction(taicpu(hp1).oper[1]^.reg, p)
-          ) then
+          TrySwapMovCmp(p, hp1) then
           begin
-            { If we have something like:
-                test %reg1,%reg1
-                mov  0,%reg2
-
-              And no registers are shared (the two %reg1's can be different, as
-              long as neither of them are also %reg2), move the MOV command to
-              before the comparison as this means it can be optimised without
-              worrying about the FLAGS register. (This combination is generated
-              by "J(c)Mov1JmpMov0 -> Set(~c)", among other things).
-            }
-            SwapMovCmp(p, hp1);
             Result := True;
             Exit;
           end;
@@ -5730,32 +5715,8 @@ unit aoptx86;
                end;
            end;
 
-         if MatchInstruction(hp1,A_MOV,[]) and
-           (
-             (taicpu(p).oper[0]^.typ <> top_reg) or
-             not RegInInstruction(taicpu(p).oper[0]^.reg, hp1)
-           ) and
-           (
-             (taicpu(p).oper[1]^.typ <> top_reg) or
-             not RegInInstruction(taicpu(p).oper[1]^.reg, hp1)
-           ) and
-           (
-             { Make sure the register written to doesn't appear in the
-               cmp instruction (in a reference, say) }
-             (taicpu(hp1).oper[1]^.typ <> top_reg) or
-             not RegInInstruction(taicpu(hp1).oper[1]^.reg, p)
-           ) then
+         if TrySwapMovCmp(p, hp1) then
            begin
-             { If we have something like:
-                 cmp ###,%reg1
-                 mov 0,%reg2
-
-               And no registers are shared, move the MOV command to before the
-               comparison as this means it can be optimised without worrying
-               about the FLAGS register. (This combination is generated by
-               "J(c)Mov1JmpMov0 -> Set(~c)", among other things).
-             }
-             SwapMovCmp(p, hp1);
              Result := True;
              Exit;
            end;
@@ -6524,11 +6485,86 @@ unit aoptx86;
     end;
 
 
-  procedure TX86AsmOptimizer.SwapMovCmp(var p, hp1: tai);
+  function TX86AsmOptimizer.TrySwapMovCmp(var p, hp1: tai): Boolean;
     var
       hp2: tai;
       X: Integer;
+    const
+      WriteOp: array[0..3] of set of TInsChange = (
+        [Ch_Wop1, Ch_RWop1, Ch_Mop1],
+        [Ch_Wop2, Ch_RWop2, Ch_Mop2],
+        [Ch_Wop3, Ch_RWop3, Ch_Mop3],
+        [Ch_Wop4, Ch_RWop4, Ch_Mop4]);
+
+      RegWriteFlags: array[0..7] of set of TInsChange = (
+        { The order is important: EAX, ECX, EDX, EBX, ESI, EDI, EBP, ESP }
+        [Ch_WEAX, Ch_RWEAX, Ch_MEAX{$ifdef x86_64}, Ch_WRAX, Ch_RWRAX, Ch_MRAX{$endif x86_64}],
+        [Ch_WECX, Ch_RWECX, Ch_MECX{$ifdef x86_64}, Ch_WRCX, Ch_RWRCX, Ch_MRCX{$endif x86_64}],
+        [Ch_WEDX, Ch_RWEDX, Ch_MEDX{$ifdef x86_64}, Ch_WRDX, Ch_RWRDX, Ch_MRDX{$endif x86_64}],
+        [Ch_WEBX, Ch_RWEBX, Ch_MEBX{$ifdef x86_64}, Ch_WRBX, Ch_RWRBX, Ch_MRBX{$endif x86_64}],
+        [Ch_WESI, Ch_RWESI, Ch_MESI{$ifdef x86_64}, Ch_WRSI, Ch_RWRSI, Ch_MRSI{$endif x86_64}],
+        [Ch_WEDI, Ch_RWEDI, Ch_MEDI{$ifdef x86_64}, Ch_WRDI, Ch_RWRDI, Ch_MRDI{$endif x86_64}],
+        [Ch_WEBP, Ch_RWEBP, Ch_MEBP{$ifdef x86_64}, Ch_WRBP, Ch_RWRBP, Ch_MRBP{$endif x86_64}],
+        [Ch_WESP, Ch_RWESP, Ch_MESP{$ifdef x86_64}, Ch_WRSP, Ch_RWRSP, Ch_MRSP{$endif x86_64}]);
+
     begin
+      { If we have something like:
+          cmp ###,%reg1
+          mov 0,%reg2
+
+        And no modified registers are shared, move the instruction to before
+        the comparison as this means it can be optimised without worrying
+        about the FLAGS register. (CMP/MOV is generated by
+        "J(c)Mov1JmpMov0 -> Set(~c)", among other things).
+
+        As long as the second instruction doesn't use the flags or one of the
+        registers used by CMP or TEST (also check any references that use the
+        registers), then it can be moved prior to the comparison.
+      }
+
+      Result := False;
+      if (hp1.typ <> ait_instruction) or
+        taicpu(hp1).is_jmp or
+        RegInInstruction(NR_DEFAULTFLAGS, hp1) then
+        Exit;
+
+      { NOP is a pipeline fence, likely marking the beginning of the function
+        epilogue, so drop out.  Similarly, drop out if POP or RET are
+        encountered }
+      if MatchInstruction(hp1, A_NOP, A_POP, []) then
+        Exit;
+
+      if (taicpu(hp1).opcode = A_MOVSS) and
+        (taicpu(hp1).ops = 0) then
+        { Wrong MOVSS }
+        Exit;
+
+      { Check for writes to specific registers first }
+      { EAX, ECX, EDX, EBX, ESI, EDI, EBP, ESP in that order }
+      for X := 0 to 7 do
+        if (RegWriteFlags[X] * InsProp[taicpu(hp1).opcode].Ch <> [])
+          and RegInInstruction(newreg(R_INTREGISTER, TSuperRegister(X), R_SUBWHOLE), p) then
+          Exit;
+
+      for X := 0 to taicpu(hp1).ops - 1 do
+        begin
+          { Check to see if this operand writes to something }
+          if ((WriteOp[X] * InsProp[taicpu(hp1).opcode].Ch) <> []) and
+            { And matches something in the CMP/TEST instruction }
+            (
+              MatchOperand(taicpu(hp1).oper[X]^, taicpu(p).oper[0]^) or
+              MatchOperand(taicpu(hp1).oper[X]^, taicpu(p).oper[1]^) or
+              (
+                { If it's a register, make sure the register written to doesn't
+                  appear in the cmp instruction as part of a reference }
+                (taicpu(hp1).oper[X]^.typ = top_reg) and
+                RegInInstruction(taicpu(hp1).oper[X]^.reg, p)
+              )
+            ) then
+            Exit;
+        end;
+
+      { The instruction can be safely moved }
       asml.Remove(hp1);
 
       { Try to insert after the last instructions where the FLAGS register is not yet in use }
@@ -6537,9 +6573,9 @@ unit aoptx86;
       else
         asml.InsertAfter(hp1, hp2);
 
-      DebugMsg(SPeepholeOptimization + 'Swapped ' + debug_op2str(taicpu(p).opcode) + ' and mov instructions to improve optimisation potential', hp1);
+      DebugMsg(SPeepholeOptimization + 'Swapped ' + debug_op2str(taicpu(p).opcode) + ' and ' + debug_op2str(taicpu(hp1).opcode) + ' instructions to improve optimisation potential', hp1);
 
-      for X := 0 to 1 do
+      for X := 0 to taicpu(hp1).ops - 1 do
         case taicpu(hp1).oper[X]^.typ of
           top_reg:
             AllocRegBetween(taicpu(hp1).oper[X]^.reg, hp1, p, UsedRegs);
@@ -6553,6 +6589,12 @@ unit aoptx86;
           else
             ;
         end;
+
+      if taicpu(hp1).opcode = A_LEA then
+        { The flags will be overwritten by the CMP/TEST instruction }
+        ConvertLEA(taicpu(hp1));
+
+      Result := True;
     end;