Bläddra i källkod

* patch by J. Gareth Moreton: AND/CMP optimisation, resolves #39287

florian 4 år sedan
förälder
incheckning
306fae299e
1 ändrade filer med 157 tillägg och 113 borttagningar
  1. 157 113
      compiler/x86/aoptx86.pas

+ 157 - 113
compiler/x86/aoptx86.pas

@@ -8618,39 +8618,77 @@ unit aoptx86;
           begin
             if (taicpu(p).oper[0]^.typ = top_const) then
               begin
-                if (taicpu(hp1).opcode = A_AND) and
-                  MatchOpType(taicpu(hp1),top_const,top_reg) and
-                  (getsupreg(taicpu(p).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg)) and
-                  { the second register must contain the first one, so compare their subreg types }
-                  (getsubreg(taicpu(p).oper[1]^.reg)<=getsubreg(taicpu(hp1).oper[1]^.reg)) and
-                  (abs(taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val)<$80000000) then
-                  { change
-                      and const1, reg
-                      and const2, reg
-                    to
-                      and (const1 and const2), reg
-                  }
-                  begin
-                    taicpu(hp1).loadConst(0, taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val);
-                    DebugMsg(SPeepholeOptimization + 'AndAnd2And done',hp1);
-                    RemoveCurrentP(p, hp1);
-                    Result:=true;
-                    exit;
-                  end
-                else if (taicpu(hp1).opcode = A_MOVZX) and
-                  MatchOpType(taicpu(hp1),top_reg,top_reg) and
-                  SuperRegistersEqual(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[1]^.reg) and
-                  (getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg)) and
-                   (((taicpu(p).opsize=S_W) and
-                     (taicpu(hp1).opsize=S_BW)) or
-                    ((taicpu(p).opsize=S_L) and
-                     (taicpu(hp1).opsize in [S_WL,S_BL{$ifdef x86_64},S_BQ,S_WQ{$endif x86_64}]))
+                case taicpu(hp1).opcode of
+                  A_AND:
+                    if MatchOpType(taicpu(hp1),top_const,top_reg) and
+                      (getsupreg(taicpu(p).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg)) and
+                      { the second register must contain the first one, so compare their subreg types }
+                      (getsubreg(taicpu(p).oper[1]^.reg)<=getsubreg(taicpu(hp1).oper[1]^.reg)) and
+                      (abs(taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val)<$80000000) then
+                      { change
+                          and const1, reg
+                          and const2, reg
+                        to
+                          and (const1 and const2), reg
+                      }
+                      begin
+                        taicpu(hp1).loadConst(0, taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val);
+                        DebugMsg(SPeepholeOptimization + 'AndAnd2And done',hp1);
+                        RemoveCurrentP(p, hp1);
+                        Result:=true;
+                        exit;
+                      end;
+
+                  A_CMP:
+                    if (PopCnt(DWord(taicpu(p).oper[0]^.val)) = 1) and { Only 1 bit set }
+                      MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^.val) and
+                      MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[1]^.reg) and
+                      { Just check that the condition on the next instruction is compatible }
+                      GetNextInstruction(hp1, hp2) and
+                      (hp2.typ = ait_instruction) and
+                      (taicpu(hp2).condition in [C_Z, C_E, C_NZ, C_NE])
+                      then
+                        { change
+                            and  2^n, reg
+                            cmp  2^n, reg
+                            j(c) / set(c) / cmov(c)   (c is equal or not equal)
+                          to
+                            and  2^n, reg
+                            test reg, reg
+                            j(~c) / set(~c) / cmov(~c)
+                        }
+                      begin
+                        { Keep TEST instruction in, rather than remove it, because
+                          it may trigger other optimisations such as MovAndTest2Test }
+                        taicpu(hp1).loadreg(0, taicpu(hp1).oper[1]^.reg);
+                        taicpu(hp1).opcode := A_TEST;
+                        DebugMsg(SPeepholeOptimization + 'AND/CMP/J(c) -> AND/J(~c) with power of 2 constant', p);
+                        taicpu(hp2).condition := inverse_cond(taicpu(hp2).condition);
+                        Result := True;
+                        Exit;
+                      end;
+
+                  A_MOVZX:
+                    if MatchOpType(taicpu(hp1),top_reg,top_reg) and
+                      SuperRegistersEqual(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[1]^.reg) and
+                      (getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg)) and
+                      (
+                        (
+                          (taicpu(p).opsize=S_W) and
+                          (taicpu(hp1).opsize=S_BW)
+                        ) or
+                        (
+                          (taicpu(p).opsize=S_L) and
+                          (taicpu(hp1).opsize in [S_WL,S_BL{$ifdef x86_64},S_BQ,S_WQ{$endif x86_64}])
+                        )
 {$ifdef x86_64}
-                      or
-                     ((taicpu(p).opsize=S_Q) and
-                      (taicpu(hp1).opsize in [S_BQ,S_WQ,S_BL,S_WL]))
+                        or
+                        (
+                          (taicpu(p).opsize=S_Q) and
+                          (taicpu(hp1).opsize in [S_BQ,S_WQ,S_BL,S_WL])
+                        )
 {$endif x86_64}
-                    ) then
+                      ) then
                       begin
                         if (((taicpu(hp1).opsize) in [S_BW,S_BL{$ifdef x86_64},S_BQ{$endif x86_64}]) and
                             ((taicpu(p).oper[0]^.val and $ff)=taicpu(p).oper[0]^.val)
@@ -8673,108 +8711,114 @@ unit aoptx86;
                             { See if there are other optimisations possible }
                             Continue;
                           end;
-                      end
-                else if (taicpu(hp1).opcode = A_SHL) and
-                  MatchOpType(taicpu(hp1),top_const,top_reg) and
-                  (getsupreg(taicpu(p).oper[1]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg)) then
-                  begin
+                      end;
+
+                  A_SHL:
+                    if MatchOpType(taicpu(hp1),top_const,top_reg) and
+                      (getsupreg(taicpu(p).oper[1]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg)) then
+                      begin
 {$ifopt R+}
 {$define RANGE_WAS_ON}
 {$R-}
 {$endif}
-                    { get length of potential and mask }
-                    MaskLength:=SizeOf(taicpu(p).oper[0]^.val)*8-BsrQWord(taicpu(p).oper[0]^.val)-1;
+                        { get length of potential and mask }
+                        MaskLength:=SizeOf(taicpu(p).oper[0]^.val)*8-BsrQWord(taicpu(p).oper[0]^.val)-1;
 
-                    { really a mask? }
+                        { really a mask? }
 {$ifdef RANGE_WAS_ON}
 {$R+}
 {$endif}
-                    if (((QWord(1) shl MaskLength)-1)=taicpu(p).oper[0]^.val) and
-                      { unmasked part shifted out? }
-                      ((MaskLength+taicpu(hp1).oper[0]^.val)>=topsize2memsize[taicpu(hp1).opsize]) then
-                      begin
-                        DebugMsg(SPeepholeOptimization + 'AndShlToShl done',p);
-                        RemoveCurrentP(p, hp1);
-                        Result:=true;
-                        exit;
+                        if (((QWord(1) shl MaskLength)-1)=taicpu(p).oper[0]^.val) and
+                          { unmasked part shifted out? }
+                          ((MaskLength+taicpu(hp1).oper[0]^.val)>=topsize2memsize[taicpu(hp1).opsize]) then
+                          begin
+                            DebugMsg(SPeepholeOptimization + 'AndShlToShl done',p);
+                            RemoveCurrentP(p, hp1);
+                            Result:=true;
+                            exit;
+                          end;
                       end;
-                  end
-                else if (taicpu(hp1).opcode = A_SHR) and
-                  MatchOpType(taicpu(hp1),top_const,top_reg) and
-                  (taicpu(p).oper[1]^.reg = taicpu(hp1).oper[1]^.reg) and
-                  (taicpu(hp1).oper[0]^.val <= 63) then
-                  begin
-                    { Does SHR combined with the AND cover all the bits?
 
-                      e.g. for "andb $252,%reg; shrb $2,%reg" - the "and" can be removed }
+                  A_SHR:
+                    if MatchOpType(taicpu(hp1),top_const,top_reg) and
+                      (taicpu(p).oper[1]^.reg = taicpu(hp1).oper[1]^.reg) and
+                      (taicpu(hp1).oper[0]^.val <= 63) then
+                      begin
+                        { Does SHR combined with the AND cover all the bits?
 
-                    MaskedBits := taicpu(p).oper[0]^.val or ((TCgInt(1) shl taicpu(hp1).oper[0]^.val) - 1);
+                          e.g. for "andb $252,%reg; shrb $2,%reg" - the "and" can be removed }
 
-                    if ((taicpu(p).opsize = S_B) and ((MaskedBits and $FF) = $FF)) or
-                      ((taicpu(p).opsize = S_W) and ((MaskedBits and $FFFF) = $FFFF)) or
-                      ((taicpu(p).opsize = S_L) and ((MaskedBits and $FFFFFFFF) = $FFFFFFFF)) then
-                      begin
-                        DebugMsg(SPeepholeOptimization + 'AndShrToShr done', p);
-                        RemoveCurrentP(p, hp1);
-                        Result := True;
-                        Exit;
+                        MaskedBits := taicpu(p).oper[0]^.val or ((TCgInt(1) shl taicpu(hp1).oper[0]^.val) - 1);
+
+                        if ((taicpu(p).opsize = S_B) and ((MaskedBits and $FF) = $FF)) or
+                          ((taicpu(p).opsize = S_W) and ((MaskedBits and $FFFF) = $FFFF)) or
+                          ((taicpu(p).opsize = S_L) and ((MaskedBits and $FFFFFFFF) = $FFFFFFFF)) then
+                          begin
+                            DebugMsg(SPeepholeOptimization + 'AndShrToShr done', p);
+                            RemoveCurrentP(p, hp1);
+                            Result := True;
+                            Exit;
+                          end;
                       end;
-                  end
-                else if ((taicpu(hp1).opcode = A_MOVSX){$ifdef x86_64} or (taicpu(hp1).opcode = A_MOVSXD){$endif x86_64}) and
-                  (taicpu(hp1).oper[0]^.typ = top_reg) and
-                  SuperRegistersEqual(taicpu(hp1).oper[0]^.reg, taicpu(hp1).oper[1]^.reg) then
-                    begin
-                      if SuperRegistersEqual(taicpu(p).oper[1]^.reg, taicpu(hp1).oper[1]^.reg) and
-                        (
+
+                  A_MOVSX{$ifdef x86_64}, A_MOVSXD{$endif x86_64}:
+                    if (taicpu(hp1).oper[0]^.typ = top_reg) and
+                      SuperRegistersEqual(taicpu(hp1).oper[0]^.reg, taicpu(hp1).oper[1]^.reg) then
+                      begin
+                        if SuperRegistersEqual(taicpu(p).oper[1]^.reg, taicpu(hp1).oper[1]^.reg) and
                           (
-                            (taicpu(hp1).opsize in [S_BW,S_BL{$ifdef x86_64},S_BQ{$endif x86_64}]) and
-                            ((taicpu(p).oper[0]^.val and $7F) = taicpu(p).oper[0]^.val)
-                          ) or (
-                            (taicpu(hp1).opsize in [S_WL{$ifdef x86_64},S_WQ{$endif x86_64}]) and
-                            ((taicpu(p).oper[0]^.val and $7FFF) = taicpu(p).oper[0]^.val)
+                            (
+                              (taicpu(hp1).opsize in [S_BW,S_BL{$ifdef x86_64},S_BQ{$endif x86_64}]) and
+                              ((taicpu(p).oper[0]^.val and $7F) = taicpu(p).oper[0]^.val)
+                            ) or (
+                              (taicpu(hp1).opsize in [S_WL{$ifdef x86_64},S_WQ{$endif x86_64}]) and
+                              ((taicpu(p).oper[0]^.val and $7FFF) = taicpu(p).oper[0]^.val)
 {$ifdef x86_64}
-                          ) or (
-                            (taicpu(hp1).opsize = S_LQ) and
-                            ((taicpu(p).oper[0]^.val and $7fffffff) = taicpu(p).oper[0]^.val)
+                            ) or (
+                              (taicpu(hp1).opsize = S_LQ) and
+                              ((taicpu(p).oper[0]^.val and $7fffffff) = taicpu(p).oper[0]^.val)
 {$endif x86_64}
-                          )
-                        ) then
-                        begin
-                          if (taicpu(p).oper[1]^.reg = taicpu(hp1).oper[1]^.reg){$ifdef x86_64} or (taicpu(hp1).opsize = S_LQ){$endif x86_64} then
-                            begin
-                              DebugMsg(SPeepholeOptimization + 'AndMovsxToAnd',p);
-                              RemoveInstruction(hp1);
-                              { See if there are other optimisations possible }
-                              Continue;
-                            end;
+                            )
+                          ) then
+                          begin
+                            if (taicpu(p).oper[1]^.reg = taicpu(hp1).oper[1]^.reg){$ifdef x86_64} or (taicpu(hp1).opsize = S_LQ){$endif x86_64} then
+                              begin
+                                DebugMsg(SPeepholeOptimization + 'AndMovsxToAnd',p);
+                                RemoveInstruction(hp1);
+                                { See if there are other optimisations possible }
+                                Continue;
+                              end;
 
-                          { The super-registers are the same though.
+                            { The super-registers are the same though.
 
-                            Note that this change by itself doesn't improve
-                            code speed, but it opens up other optimisations. }
+                              Note that this change by itself doesn't improve
+                              code speed, but it opens up other optimisations. }
 {$ifdef x86_64}
-                          { Convert 64-bit register to 32-bit }
-                          case taicpu(hp1).opsize of
-                            S_BQ:
-                              begin
-                                taicpu(hp1).opsize := S_BL;
-                                taicpu(hp1).oper[1]^.reg := newreg(R_INTREGISTER, getsupreg(taicpu(hp1).oper[1]^.reg), R_SUBD);
-                              end;
-                            S_WQ:
-                              begin
-                                taicpu(hp1).opsize := S_WL;
-                                taicpu(hp1).oper[1]^.reg := newreg(R_INTREGISTER, getsupreg(taicpu(hp1).oper[1]^.reg), R_SUBD);
-                              end
-                            else
-                              ;
-                          end;
+                            { Convert 64-bit register to 32-bit }
+                            case taicpu(hp1).opsize of
+                              S_BQ:
+                                begin
+                                  taicpu(hp1).opsize := S_BL;
+                                  taicpu(hp1).oper[1]^.reg := newreg(R_INTREGISTER, getsupreg(taicpu(hp1).oper[1]^.reg), R_SUBD);
+                                end;
+                              S_WQ:
+                                begin
+                                  taicpu(hp1).opsize := S_WL;
+                                  taicpu(hp1).oper[1]^.reg := newreg(R_INTREGISTER, getsupreg(taicpu(hp1).oper[1]^.reg), R_SUBD);
+                                end
+                              else
+                                ;
+                            end;
 {$endif x86_64}
-                          DebugMsg(SPeepholeOptimization + 'AndMovsxToAndMovzx', hp1);
-                          taicpu(hp1).opcode := A_MOVZX;
-                          { See if there are other optimisations possible }
-                          Continue;
-                        end;
-                    end;
+                            DebugMsg(SPeepholeOptimization + 'AndMovsxToAndMovzx', hp1);
+                            taicpu(hp1).opcode := A_MOVZX;
+                            { See if there are other optimisations possible }
+                            Continue;
+                          end;
+                      end;
+                  else
+                    ;
+                end;
               end;
 
             if (taicpu(hp1).is_jmp) and