Forráskód Böngészése

* patch by J. Gareth Moreton: x86 CMP/TEST/Jcc optimisations, resolves #36624

git-svn-id: trunk@44029 -
florian 5 éve
szülő
commit
a807e18591
1 módosított fájl, 175 hozzáadás és 33 törlés
  1. 175 33
      compiler/x86/aoptx86.pas

+ 175 - 33
compiler/x86/aoptx86.pas

@@ -3303,43 +3303,185 @@ unit aoptx86;
      function TX86AsmOptimizer.OptPass1Cmp(var p: tai): boolean;
        var
          v: TCGInt;
-         hp1, hp2, hp3, hp4: tai;
+         hp1, hp2: tai;
        begin
          Result:=false;
-         { cmp register,$8000                neg register
-           je target                 -->     jo target
-
-           .... only if register is deallocated before jump.}
-         case Taicpu(p).opsize of
-           S_B: v:=$80;
-           S_W: v:=$8000;
-           S_L: v:=qword($80000000);
-           { actually, this will never happen: cmp with 64 bit constants is not possible }
-           S_Q : v:=Int64($8000000000000000);
-           else
-             internalerror(2013112905);
-         end;
-         if MatchOpType(taicpu(p),Top_const,top_reg) and
-            (taicpu(p).oper[0]^.val=v) and
-            GetNextInstruction(p, hp1) and
-            MatchInstruction(hp1,A_Jcc,[]) and
-            (Taicpu(hp1).condition in [C_E,C_NE]) then
+
+         if taicpu(p).oper[0]^.typ = top_const then
            begin
-             TransferUsedRegs(TmpUsedRegs);
-             UpdateUsedRegs(TmpUsedRegs,tai(p.next));
-             if not(RegInUsedRegs(Taicpu(p).oper[1]^.reg, TmpUsedRegs)) then
+             { Though GetNextInstruction can be factored out, it is an expensive
+               call, so delay calling it until we have first checked cheaper
+               conditions that are independent of it. }
+
+             if (taicpu(p).oper[0]^.val = 0) and
+               (taicpu(p).oper[1]^.typ = top_reg) and
+               GetNextInstruction(p, hp1) and
+               MatchInstruction(hp1,A_Jcc,A_SETcc,[]) then
                begin
-                 DebugMsg(SPeepholeOptimization + 'CmpJe2NegJo done',p);
-                 Taicpu(p).opcode:=A_NEG;
-                 Taicpu(p).loadoper(0,Taicpu(p).oper[1]^);
-                 Taicpu(p).clearop(1);
-                 Taicpu(p).ops:=1;
-                 if Taicpu(hp1).condition=C_E then
-                   Taicpu(hp1).condition:=C_O
-                 else
-                   Taicpu(hp1).condition:=C_NO;
-                 Result:=true;
-                 exit;
+                 hp2 := p;
+                 { When dealing with "cmp $0,%reg", only ZF and SF contain
+                   anything meaningful once it's converted to "test %reg,%reg";
+                   additionally, some jumps will always (or never) branch, so
+                   evaluate every jump immediately following the
+                   comparison, optimising the conditions if possible.
+                   Similarly with SETcc... those that are always set to 0 or 1
+                   are changed to MOV instructions }
+                 while GetNextInstruction(hp2, hp1) and
+                   MatchInstruction(hp1,A_Jcc,A_SETcc,[]) do
+                   begin
+                     case taicpu(hp1).condition of
+                       C_B, C_C, C_NAE, C_O:
+                         { For B/NAE:
+                             Will never branch since an unsigned integer can never be below zero
+                           For C/O:
+                             Result cannot overflow because 0 is being subtracted
+                         }
+                         begin
+                           if taicpu(hp1).opcode = A_Jcc then
+                             begin
+                               DebugMsg(SPeepholeOptimization + 'Cmpcc2Testcc - condition B/C/NAE/O --> Never (jump removed)', hp1);
+                               TAsmLabel(taicpu(hp1).oper[0]^.ref^.symbol).decrefs;
+                               AsmL.Remove(hp1);
+                               hp1.Free;
+                               { Since hp1 was deleted, hp2 must not be updated }
+                               Continue;
+                             end
+                           else
+                             begin
+                               DebugMsg(SPeepholeOptimization + 'Cmpcc2Testcc - condition B/C/NAE/O --> Never (set -> mov 0)', hp1);
+                               { Convert "set(c) %reg" instruction to "movb 0,%reg" }
+                               taicpu(hp1).opcode := A_MOV;
+                               taicpu(hp1).condition := C_None;
+                               taicpu(hp1).opsize := S_B;
+                               taicpu(hp1).allocate_oper(2);
+                               taicpu(hp1).loadreg(1,taicpu(hp1).oper[0]^.reg);
+                               taicpu(hp1).loadconst(0, 0);
+                             end;
+                         end;
+                       C_BE, C_NA:
+                         begin
+                           { Will only branch if equal to zero }
+                           DebugMsg(SPeepholeOptimization + 'Cmpcc2Testcc - condition BE/NA --> E', hp1);
+                           taicpu(hp1).condition := C_E;
+                         end;
+                       C_A, C_NBE:
+                         begin
+                           { Will only branch if not equal to zero }
+                           DebugMsg(SPeepholeOptimization + 'Cmpcc2Testcc - condition A/NBE --> NE', hp1);
+                           taicpu(hp1).condition := C_NE;
+                         end;
+                       C_AE, C_NB, C_NC, C_NO:
+                         begin
+                           { Will always branch }
+                           DebugMsg(SPeepholeOptimization + 'Cmpcc2Testcc - condition AE/NB/NC/NO --> Always', hp1);
+                           if taicpu(hp1).opcode = A_Jcc then
+                             begin
+                               MakeUnconditional(taicpu(hp1));
+                               { Any jumps/set that follow will now be dead code }
+                               RemoveDeadCodeAfterJump(taicpu(hp1));
+                               Break;
+                             end
+                           else
+                             begin
+                               { Convert "set(c) %reg" instruction to "movb 1,%reg" }
+                               taicpu(hp1).opcode := A_MOV;
+                               taicpu(hp1).condition := C_None;
+                               taicpu(hp1).opsize := S_B;
+                               taicpu(hp1).allocate_oper(2);
+                               taicpu(hp1).loadreg(1,taicpu(hp1).oper[0]^.reg);
+                               taicpu(hp1).loadconst(0, 1);
+                             end;
+                         end;
+                       C_None:
+                         InternalError(2020012201);
+                       C_P, C_PE, C_NP, C_PO:
+                         { We can't handle parity checks and they should never be generated
+                           after a general-purpose CMP (it's used in some floating-point
+                           comparisons that don't use CMP) }
+                         InternalError(2020012202);
+                       else
+                         { Zero/Equality, Sign, their complements and all of the
+                           signed comparisons do not need to be converted };
+                     end;
+                     hp2 := hp1;
+                   end;
+
+                 { Convert the instruction to a TEST }
+
+                 taicpu(p).opcode := A_TEST;
+                 taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
+                 Result := True;
+                 Exit;
+               end
+             else if (taicpu(p).oper[0]^.val = 1) and
+               GetNextInstruction(p, hp1) and
+               MatchInstruction(hp1,A_Jcc,A_SETcc,[]) and
+               (taicpu(hp1).condition in [C_L, C_NGE]) then
+               begin
+                 { Convert;       To:
+                     cmp $1,r/m     cmp $0,r/m
+                     jl  @lbl       jle @lbl
+                 }
+                 DebugMsg(SPeepholeOptimization + 'Cmp1Jl2Cmp0Jle', p);
+                 taicpu(p).oper[0]^.val := 0;
+                 taicpu(hp1).condition := C_LE;
+
+                 { If the instruction is now "cmp $0,%reg", convert it to a
+                   TEST (and effectively do the work of the "cmp $0,%reg" in
+                   the block above)
+
+                   If it's a reference, we can get away with not setting
+                   Result to True because he haven't evaluated the jump
+                   in this pass yet.
+                 }
+                 if (taicpu(p).oper[1]^.typ = top_reg) then
+                   begin
+                     taicpu(p).opcode := A_TEST;
+                     taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
+                     Result := True;
+                   end;
+
+                 Exit;
+               end
+             else if (taicpu(p).oper[1]^.typ = top_reg) then
+               begin
+                 { cmp register,$8000                neg register
+                   je target                 -->     jo target
+
+                   .... only if register is deallocated before jump.}
+                 case Taicpu(p).opsize of
+                   S_B: v:=$80;
+                   S_W: v:=$8000;
+                   S_L: v:=qword($80000000);
+                   { S_Q will never happen: cmp with 64 bit constants is not possible }
+                   S_Q:
+                     Exit;
+                   else
+                     internalerror(2013112905);
+                 end;
+
+                 if (taicpu(p).oper[0]^.val=v) and
+                    GetNextInstruction(p, hp1) and
+                    MatchInstruction(hp1,A_Jcc,A_SETcc,[]) and
+                    (Taicpu(hp1).condition in [C_E,C_NE]) then
+                   begin
+                     TransferUsedRegs(TmpUsedRegs);
+                     UpdateUsedRegs(TmpUsedRegs,tai(p.next));
+                     if not(RegInUsedRegs(Taicpu(p).oper[1]^.reg, TmpUsedRegs)) then
+                       begin
+                         DebugMsg(SPeepholeOptimization + 'CmpJe2NegJo done',p);
+                         Taicpu(p).opcode:=A_NEG;
+                         Taicpu(p).loadoper(0,Taicpu(p).oper[1]^);
+                         Taicpu(p).clearop(1);
+                         Taicpu(p).ops:=1;
+                         if Taicpu(hp1).condition=C_E then
+                           Taicpu(hp1).condition:=C_O
+                         else
+                           Taicpu(hp1).condition:=C_NO;
+                         Result:=true;
+                         exit;
+                       end;
+                   end;
                end;
            end;
      end;