Browse Source

* x86: New CMP optimisation that mimics some of the CMOV optimisations and removes unnecessary conditions.

J. Gareth "Curious Kit" Moreton 3 years ago
parent
commit
b108608b29
1 changed files with 131 additions and 100 deletions
  1. 131 100
      compiler/x86/aoptx86.pas

+ 131 - 100
compiler/x86/aoptx86.pas

@@ -7518,109 +7518,140 @@ unit aoptx86;
              GetNextInstruction(p_jump, p_jump);
            end;
 
-         {
-           Try to optimise the following:
-             cmp       $x,###  ($x and $y can be registers or constants)
-             je        @lbl1   (only reference)
- 	     cmp       $y,###  (### are identical)
-           @Lbl:
-             sete      %reg1
-
-           Change to:
-             cmp       $x,###
-             sete      %reg2   (allocate new %reg2)
-             cmp       $y,###
-             sete      %reg1
-             orb       %reg2,%reg1
-             (dealloc %reg2)
-
-           This adds an instruction (so don't perform under -Os), but it removes
-           a conditional branch.
-         }
-         if not (cs_opt_size in current_settings.optimizerswitches) and
-           (
+         if (
+             { Don't call GetNextInstruction again if we already have it }
              (hp1 = p_jump) or
              GetNextInstruction(p, hp1)
            ) and
            MatchInstruction(hp1, A_Jcc, []) and
            IsJumpToLabel(taicpu(hp1)) and
-           (taicpu(hp1).condition in [C_E, C_Z]) and
-           GetNextInstruction(hp1, hp2) and
-           MatchInstruction(hp2, A_CMP, A_TEST, [taicpu(p).opsize]) and
-           MatchOperand(taicpu(p).oper[1]^, taicpu(hp2).oper[1]^) and
-           { The first operand of CMP instructions can only be a register or
-             immediate anyway, so no need to check }
-           GetNextInstruction(hp2, p_label) and
-           (
-             (p_label.typ = ait_label) or
-             (
-               { Sometimes there's a zero-distance jump before the label, so deal with it here
-                 to potentially cut down on the iterations of Pass 1 }
-               MatchInstruction(p_label, A_Jcc, []) and
-               IsJumpToLabel(taicpu(p_label)) and
-               { Use p_dist to hold the jump briefly }
-               SetAndTest(p_label, p_dist) and
-               GetNextInstruction(p_dist, p_label) and
-               (p_label.typ = ait_label) and
-               (tai_label(p_label).labsym.getrefs >= 2) and
-               (JumpTargetOp(taicpu(p_dist))^.ref^.symbol = tai_label(p_label).labsym) and
-               { We might as well collapse the jump now }
-               CollapseZeroDistJump(p_dist, tai_label(p_label).labsym)
-             )
-           ) and
-           (tai_label(p_label).labsym.getrefs = 1) and
-           (JumpTargetOp(taicpu(hp1))^.ref^.symbol = tai_label(p_label).labsym) and
-           GetNextInstruction(p_label, p_dist) and
-           MatchInstruction(p_dist, A_SETcc, []) and
-           (taicpu(p_dist).condition in [C_E, C_Z]) and
-           (taicpu(p_dist).oper[0]^.typ = top_reg) and
-           { Get the instruction after the SETcc instruction so we can
-             allocate a new register over the entire range }
-           GetNextInstruction(p_dist, hp1_dist) then
+           (taicpu(hp1).condition in [C_E, C_Z, C_NE, C_NZ]) and
+           GetNextInstruction(hp1, hp2) then
            begin
-             TransferUsedRegs(TmpUsedRegs);
-             UpdateUsedRegs(TmpUsedRegs, tai(hp1.Next));
-             UpdateUsedRegs(TmpUsedRegs, tai(hp2.Next));
-             UpdateUsedRegs(TmpUsedRegs, tai(p_label.Next));
-//             UpdateUsedRegs(TmpUsedRegs, tai(p_dist.Next));
-
-             { RegUsedAfterInstruction modifies TmpUsedRegs }
-             if not RegUsedAfterInstruction(NR_DEFAULTFLAGS, p_dist, TmpUsedRegs) then
+             {
+                 cmp       x, y    (or "cmp y, x")
+                 je        @lbl
+                 mov       x, y
+               @lbl:
+                 (x and y can be constants, registers or references)
+
+               Change to:
+                 mov       x, y    (x and y will always be equal in the end)
+               @lbl:               (may beceome a dead label)
+
+
+               Also:
+                 cmp       x, y    (or "cmp y, x")
+                 jne       @lbl
+                 mov       x, y
+               @lbl:
+                 (x and y can be constants, registers or references)
+
+               Change to:
+                 Absolutely nothing! (Except @lbl if it's still live)
+             }
+             if MatchInstruction(hp2, A_MOV, [taicpu(p).opsize]) and
+               (
+                 (
+                   MatchOperand(taicpu(p).oper[0]^, taicpu(hp2).oper[0]^) and
+                   MatchOperand(taicpu(p).oper[1]^, taicpu(hp2).oper[1]^)
+                 ) or (
+                   MatchOperand(taicpu(p).oper[0]^, taicpu(hp2).oper[1]^) and
+                   MatchOperand(taicpu(p).oper[1]^, taicpu(hp2).oper[0]^)
+                 )
+               ) and
+               GetNextInstruction(hp2, hp1_label) and
+               SkipAligns(hp1_label, hp1_label) and
+               (hp1_label.typ = ait_label) and
+               (tai_label(hp1_label).labsym = taicpu(hp1).oper[0]^.ref^.symbol) then
                begin
-                 { Register can appear in p if it's not used afterwards, so only
-                   allocate between hp1 and hp1_dist }
-                 NewReg := GetIntRegisterBetween(R_SUBL, TmpUsedRegs, hp1, p_dist);
-                 if NewReg <> NR_NO then
+                 tai_label(hp1_label).labsym.DecRefs;
+                 if (taicpu(hp1).condition in [C_NE, C_NZ]) then
                    begin
-                     DebugMsg(SPeepholeOptimization + 'CMP/JE/CMP/@Lbl/SETE -> CMP/SETE/CMP/SETE/OR, removing conditional branch', p);
-
-                     { Change the jump instruction into a SETcc instruction }
-                     taicpu(hp1).opcode := A_SETcc;
-                     taicpu(hp1).opsize := S_B;
-                     taicpu(hp1).loadreg(0, NewReg);
-
-                     { This is now a dead label }
-                     tai_label(p_label).labsym.decrefs;
-
-                     { Prefer adding before the next instruction so the FLAGS
-                       register is deallocated first  }
-                     hp2 := taicpu.op_reg_reg(A_OR, S_B, NewReg, taicpu(p_dist).oper[0]^.reg);
-                     taicpu(hp2).fileinfo := taicpu(p_dist).fileinfo;
-
-                     AsmL.InsertBefore(
-                       hp2,
-                       hp1_dist
-                     );
-
-                     { Make sure the new register is in use over the new instruction
-                       (long-winded, but things work best when the FLAGS register
-                       is not allocated here) }
-                     AllocRegBetween(NewReg, p_dist, hp2, TmpUsedRegs);
-
-                     Result := True;
-                     { Don't exit yet, as p wasn't changed and hp1, while
-                       modified, is still intact and might be optimised by the
-                       SETcc optimisation below }
+                     DebugMsg(SPeepholeOptimization + 'CMP/JNE/MOV/@Lbl -> NOP, since the MOV is only executed if the operands are equal (CmpJneMov2Nop)', p);
+                     RemoveInstruction(hp2);
+                     hp2 := hp1_label; { So RemoveCurrentp below can be set to something valid }
+                   end
+                 else
+                   DebugMsg(SPeepholeOptimization + 'CMP/JE/MOV/@Lbl -> MOV, since the MOV is only executed if the operands aren''t equal (CmpJeMov2Mov)', p);
+
+                 RemoveInstruction(hp1);
+                 RemoveCurrentp(p, hp2);
+                 Result := True;
+                 Exit;
+               end;
+
+             {
+               Try to optimise the following:
+                 cmp       $x,###  ($x and $y can be registers or constants)
+                 je        @lbl1   (only reference)
+                 cmp       $y,###  (### are identical)
+               @Lbl:
+                 sete      %reg1
+
+               Change to:
+                 cmp       $x,###
+                 sete      %reg2   (allocate new %reg2)
+                 cmp       $y,###
+                 sete      %reg1
+                 orb       %reg2,%reg1
+                 (dealloc %reg2)
+
+               This adds an instruction (so don't perform under -Os), but it removes
+               a conditional branch.
+             }
+             if not (cs_opt_size in current_settings.optimizerswitches) and
+               MatchInstruction(hp2, A_CMP, A_TEST, [taicpu(p).opsize]) and
+               MatchOperand(taicpu(p).oper[1]^, taicpu(hp2).oper[1]^) and
+               { The first operand of CMP instructions can only be a register or
+                 immediate anyway, so no need to check }
+               GetNextInstruction(hp2, p_label) and
+               (p_label.typ = ait_label) and
+               (tai_label(p_label).labsym.getrefs = 1) and
+               (JumpTargetOp(taicpu(hp1))^.ref^.symbol = tai_label(p_label).labsym) and
+               GetNextInstruction(p_label, p_dist) and
+               MatchInstruction(p_dist, A_SETcc, []) and
+               (taicpu(p_dist).condition in [C_E, C_Z]) and
+               (taicpu(p_dist).oper[0]^.typ = top_reg) then
+               begin
+                 TransferUsedRegs(TmpUsedRegs);
+                 UpdateUsedRegs(TmpUsedRegs, tai(hp1.Next));
+                 UpdateUsedRegs(TmpUsedRegs, tai(hp2.Next));
+                 UpdateUsedRegs(TmpUsedRegs, tai(p_label.Next));
+                 UpdateUsedRegs(TmpUsedRegs, tai(p_dist.Next));
+
+                 if not RegInUsedRegs(NR_DEFAULTFLAGS, TmpUsedRegs) and
+                   { Get the instruction after the SETcc instruction so we can
+                     allocate a new register over the entire range }
+                   GetNextInstruction(p_dist, hp1_dist) then
+                   begin
+                     { Register can appear in p if it's not used afterwards, so only
+                       allocate between hp1 and hp1_dist }
+                     NewReg := GetIntRegisterBetween(R_SUBL, TmpUsedRegs, hp1, hp1_dist);
+                     if NewReg <> NR_NO then
+                       begin
+                         DebugMsg(SPeepholeOptimization + 'CMP/JE/CMP/@Lbl/SETE -> CMP/SETE/CMP/SETE/OR, removing conditional branch', p);
+
+                         { Change the jump instruction into a SETcc instruction }
+                         taicpu(hp1).opcode := A_SETcc;
+                         taicpu(hp1).opsize := S_B;
+                         taicpu(hp1).loadreg(0, NewReg);
+
+                         { This is now a dead label }
+                         tai_label(p_label).labsym.decrefs;
+
+                         { Prefer adding before the next instruction so the FLAGS
+                           register is deallicated first  }
+                         AsmL.InsertBefore(
+                           taicpu.op_reg_reg(A_OR, S_B, NewReg, taicpu(p_dist).oper[0]^.reg),
+                           hp1_dist
+                         );
+
+                         Result := True;
+                         { Don't exit yet, as p wasn't changed and hp1, while
+                           modified, is still intact and might be optimised by the
+                           SETcc optimisation below }
+                       end;
                    end;
                end;
            end;
@@ -8733,8 +8764,8 @@ unit aoptx86;
       { The instruction can be safely moved }
       asml.Remove(hp1);
 
-      { Try to insert before the FLAGS register is allocated, so "mov $0,%reg"
-        can be optimised into "xor %reg,%reg" later }
+      { Try to insert after the last instructions where the FLAGS register is not
+        yet in use, so "mov $0,%reg" can be optimised into "xor %reg,%reg" later }
       if SetAndTest(FindRegAllocBackward(NR_DEFAULTFLAGS, tai(p.Previous)), hp2) then
         asml.InsertBefore(hp1, hp2)
 
@@ -8750,9 +8781,9 @@ unit aoptx86;
         asml.InsertAfter(hp1, hp2)
       else
         { Note, if p.Previous is nil (even if it should logically never be the
-          case), FindRegAllocBackward immediately exits with False and so we
-          safely land here (we can't just pass p because FindRegAllocBackward
-          immediately exits on an instruction). [Kit] }
+        case), FindRegAllocBackward immediately exits with False and so we
+        safely land here (we can't just pass p because FindRegAllocBackward
+        immediately exits on an instruction). [Kit] }
         asml.InsertBefore(hp1, p);
 
       DebugMsg(SPeepholeOptimization + 'Swapped ' + debug_op2str(taicpu(p).opcode) + ' and ' + debug_op2str(taicpu(hp1).opcode) + ' instructions to improve optimisation potential', hp1);