|
@@ -8618,39 +8618,77 @@ unit aoptx86;
|
|
|
begin
|
|
|
if (taicpu(p).oper[0]^.typ = top_const) then
|
|
|
begin
|
|
|
- if (taicpu(hp1).opcode = A_AND) and
|
|
|
- MatchOpType(taicpu(hp1),top_const,top_reg) and
|
|
|
- (getsupreg(taicpu(p).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg)) and
|
|
|
- { the second register must contain the first one, so compare their subreg types }
|
|
|
- (getsubreg(taicpu(p).oper[1]^.reg)<=getsubreg(taicpu(hp1).oper[1]^.reg)) and
|
|
|
- (abs(taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val)<$80000000) then
|
|
|
- { change
|
|
|
- and const1, reg
|
|
|
- and const2, reg
|
|
|
- to
|
|
|
- and (const1 and const2), reg
|
|
|
- }
|
|
|
- begin
|
|
|
- taicpu(hp1).loadConst(0, taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val);
|
|
|
- DebugMsg(SPeepholeOptimization + 'AndAnd2And done',hp1);
|
|
|
- RemoveCurrentP(p, hp1);
|
|
|
- Result:=true;
|
|
|
- exit;
|
|
|
- end
|
|
|
- else if (taicpu(hp1).opcode = A_MOVZX) and
|
|
|
- MatchOpType(taicpu(hp1),top_reg,top_reg) and
|
|
|
- SuperRegistersEqual(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[1]^.reg) and
|
|
|
- (getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg)) and
|
|
|
- (((taicpu(p).opsize=S_W) and
|
|
|
- (taicpu(hp1).opsize=S_BW)) or
|
|
|
- ((taicpu(p).opsize=S_L) and
|
|
|
- (taicpu(hp1).opsize in [S_WL,S_BL{$ifdef x86_64},S_BQ,S_WQ{$endif x86_64}]))
|
|
|
+ case taicpu(hp1).opcode of
|
|
|
+ A_AND:
|
|
|
+ if MatchOpType(taicpu(hp1),top_const,top_reg) and
|
|
|
+ (getsupreg(taicpu(p).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg)) and
|
|
|
+ { the second register must contain the first one, so compare their subreg types }
|
|
|
+ (getsubreg(taicpu(p).oper[1]^.reg)<=getsubreg(taicpu(hp1).oper[1]^.reg)) and
|
|
|
+ (abs(taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val)<$80000000) then
|
|
|
+ { change
|
|
|
+ and const1, reg
|
|
|
+ and const2, reg
|
|
|
+ to
|
|
|
+ and (const1 and const2), reg
|
|
|
+ }
|
|
|
+ begin
|
|
|
+ taicpu(hp1).loadConst(0, taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val);
|
|
|
+ DebugMsg(SPeepholeOptimization + 'AndAnd2And done',hp1);
|
|
|
+ RemoveCurrentP(p, hp1);
|
|
|
+ Result:=true;
|
|
|
+ exit;
|
|
|
+ end;
|
|
|
+
|
|
|
+ A_CMP:
|
|
|
+ if (PopCnt(DWord(taicpu(p).oper[0]^.val)) = 1) and { Only 1 bit set }
|
|
|
+ MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^.val) and
|
|
|
+ MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[1]^.reg) and
|
|
|
+ { Just check that the condition on the next instruction is compatible }
|
|
|
+ GetNextInstruction(hp1, hp2) and
|
|
|
+ (hp2.typ = ait_instruction) and
|
|
|
+ (taicpu(hp2).condition in [C_Z, C_E, C_NZ, C_NE])
|
|
|
+ then
|
|
|
+ { change
|
|
|
+ and 2^n, reg
|
|
|
+ cmp 2^n, reg
|
|
|
+ j(c) / set(c) / cmov(c) (c is equal or not equal)
|
|
|
+ to
|
|
|
+ and 2^n, reg
|
|
|
+ test reg, reg
|
|
|
+ j(~c) / set(~c) / cmov(~c)
|
|
|
+ }
|
|
|
+ begin
|
|
|
+ { Keep TEST instruction in, rather than remove it, because
|
|
|
+ it may trigger other optimisations such as MovAndTest2Test }
|
|
|
+ taicpu(hp1).loadreg(0, taicpu(hp1).oper[1]^.reg);
|
|
|
+ taicpu(hp1).opcode := A_TEST;
|
|
|
+ DebugMsg(SPeepholeOptimization + 'AND/CMP/J(c) -> AND/J(~c) with power of 2 constant', p);
|
|
|
+ taicpu(hp2).condition := inverse_cond(taicpu(hp2).condition);
|
|
|
+ Result := True;
|
|
|
+ Exit;
|
|
|
+ end;
|
|
|
+
|
|
|
+ A_MOVZX:
|
|
|
+ if MatchOpType(taicpu(hp1),top_reg,top_reg) and
|
|
|
+ SuperRegistersEqual(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[1]^.reg) and
|
|
|
+ (getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg)) and
|
|
|
+ (
|
|
|
+ (
|
|
|
+ (taicpu(p).opsize=S_W) and
|
|
|
+ (taicpu(hp1).opsize=S_BW)
|
|
|
+ ) or
|
|
|
+ (
|
|
|
+ (taicpu(p).opsize=S_L) and
|
|
|
+ (taicpu(hp1).opsize in [S_WL,S_BL{$ifdef x86_64},S_BQ,S_WQ{$endif x86_64}])
|
|
|
+ )
|
|
|
{$ifdef x86_64}
|
|
|
- or
|
|
|
- ((taicpu(p).opsize=S_Q) and
|
|
|
- (taicpu(hp1).opsize in [S_BQ,S_WQ,S_BL,S_WL]))
|
|
|
+ or
|
|
|
+ (
|
|
|
+ (taicpu(p).opsize=S_Q) and
|
|
|
+ (taicpu(hp1).opsize in [S_BQ,S_WQ,S_BL,S_WL])
|
|
|
+ )
|
|
|
{$endif x86_64}
|
|
|
- ) then
|
|
|
+ ) then
|
|
|
begin
|
|
|
if (((taicpu(hp1).opsize) in [S_BW,S_BL{$ifdef x86_64},S_BQ{$endif x86_64}]) and
|
|
|
((taicpu(p).oper[0]^.val and $ff)=taicpu(p).oper[0]^.val)
|
|
@@ -8673,108 +8711,114 @@ unit aoptx86;
|
|
|
{ See if there are other optimisations possible }
|
|
|
Continue;
|
|
|
end;
|
|
|
- end
|
|
|
- else if (taicpu(hp1).opcode = A_SHL) and
|
|
|
- MatchOpType(taicpu(hp1),top_const,top_reg) and
|
|
|
- (getsupreg(taicpu(p).oper[1]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg)) then
|
|
|
- begin
|
|
|
+ end;
|
|
|
+
|
|
|
+ A_SHL:
|
|
|
+ if MatchOpType(taicpu(hp1),top_const,top_reg) and
|
|
|
+ (getsupreg(taicpu(p).oper[1]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg)) then
|
|
|
+ begin
|
|
|
{$ifopt R+}
|
|
|
{$define RANGE_WAS_ON}
|
|
|
{$R-}
|
|
|
{$endif}
|
|
|
- { get length of potential and mask }
|
|
|
- MaskLength:=SizeOf(taicpu(p).oper[0]^.val)*8-BsrQWord(taicpu(p).oper[0]^.val)-1;
|
|
|
+ { get length of potential and mask }
|
|
|
+ MaskLength:=SizeOf(taicpu(p).oper[0]^.val)*8-BsrQWord(taicpu(p).oper[0]^.val)-1;
|
|
|
|
|
|
- { really a mask? }
|
|
|
+ { really a mask? }
|
|
|
{$ifdef RANGE_WAS_ON}
|
|
|
{$R+}
|
|
|
{$endif}
|
|
|
- if (((QWord(1) shl MaskLength)-1)=taicpu(p).oper[0]^.val) and
|
|
|
- { unmasked part shifted out? }
|
|
|
- ((MaskLength+taicpu(hp1).oper[0]^.val)>=topsize2memsize[taicpu(hp1).opsize]) then
|
|
|
- begin
|
|
|
- DebugMsg(SPeepholeOptimization + 'AndShlToShl done',p);
|
|
|
- RemoveCurrentP(p, hp1);
|
|
|
- Result:=true;
|
|
|
- exit;
|
|
|
+ if (((QWord(1) shl MaskLength)-1)=taicpu(p).oper[0]^.val) and
|
|
|
+ { unmasked part shifted out? }
|
|
|
+ ((MaskLength+taicpu(hp1).oper[0]^.val)>=topsize2memsize[taicpu(hp1).opsize]) then
|
|
|
+ begin
|
|
|
+ DebugMsg(SPeepholeOptimization + 'AndShlToShl done',p);
|
|
|
+ RemoveCurrentP(p, hp1);
|
|
|
+ Result:=true;
|
|
|
+ exit;
|
|
|
+ end;
|
|
|
end;
|
|
|
- end
|
|
|
- else if (taicpu(hp1).opcode = A_SHR) and
|
|
|
- MatchOpType(taicpu(hp1),top_const,top_reg) and
|
|
|
- (taicpu(p).oper[1]^.reg = taicpu(hp1).oper[1]^.reg) and
|
|
|
- (taicpu(hp1).oper[0]^.val <= 63) then
|
|
|
- begin
|
|
|
- { Does SHR combined with the AND cover all the bits?
|
|
|
|
|
|
- e.g. for "andb $252,%reg; shrb $2,%reg" - the "and" can be removed }
|
|
|
+ A_SHR:
|
|
|
+ if MatchOpType(taicpu(hp1),top_const,top_reg) and
|
|
|
+ (taicpu(p).oper[1]^.reg = taicpu(hp1).oper[1]^.reg) and
|
|
|
+ (taicpu(hp1).oper[0]^.val <= 63) then
|
|
|
+ begin
|
|
|
+ { Does SHR combined with the AND cover all the bits?
|
|
|
|
|
|
- MaskedBits := taicpu(p).oper[0]^.val or ((TCgInt(1) shl taicpu(hp1).oper[0]^.val) - 1);
|
|
|
+ e.g. for "andb $252,%reg; shrb $2,%reg" - the "and" can be removed }
|
|
|
|
|
|
- if ((taicpu(p).opsize = S_B) and ((MaskedBits and $FF) = $FF)) or
|
|
|
- ((taicpu(p).opsize = S_W) and ((MaskedBits and $FFFF) = $FFFF)) or
|
|
|
- ((taicpu(p).opsize = S_L) and ((MaskedBits and $FFFFFFFF) = $FFFFFFFF)) then
|
|
|
- begin
|
|
|
- DebugMsg(SPeepholeOptimization + 'AndShrToShr done', p);
|
|
|
- RemoveCurrentP(p, hp1);
|
|
|
- Result := True;
|
|
|
- Exit;
|
|
|
+ MaskedBits := taicpu(p).oper[0]^.val or ((TCgInt(1) shl taicpu(hp1).oper[0]^.val) - 1);
|
|
|
+
|
|
|
+ if ((taicpu(p).opsize = S_B) and ((MaskedBits and $FF) = $FF)) or
|
|
|
+ ((taicpu(p).opsize = S_W) and ((MaskedBits and $FFFF) = $FFFF)) or
|
|
|
+ ((taicpu(p).opsize = S_L) and ((MaskedBits and $FFFFFFFF) = $FFFFFFFF)) then
|
|
|
+ begin
|
|
|
+ DebugMsg(SPeepholeOptimization + 'AndShrToShr done', p);
|
|
|
+ RemoveCurrentP(p, hp1);
|
|
|
+ Result := True;
|
|
|
+ Exit;
|
|
|
+ end;
|
|
|
end;
|
|
|
- end
|
|
|
- else if ((taicpu(hp1).opcode = A_MOVSX){$ifdef x86_64} or (taicpu(hp1).opcode = A_MOVSXD){$endif x86_64}) and
|
|
|
- (taicpu(hp1).oper[0]^.typ = top_reg) and
|
|
|
- SuperRegistersEqual(taicpu(hp1).oper[0]^.reg, taicpu(hp1).oper[1]^.reg) then
|
|
|
- begin
|
|
|
- if SuperRegistersEqual(taicpu(p).oper[1]^.reg, taicpu(hp1).oper[1]^.reg) and
|
|
|
- (
|
|
|
+
|
|
|
+ A_MOVSX{$ifdef x86_64}, A_MOVSXD{$endif x86_64}:
|
|
|
+ if (taicpu(hp1).oper[0]^.typ = top_reg) and
|
|
|
+ SuperRegistersEqual(taicpu(hp1).oper[0]^.reg, taicpu(hp1).oper[1]^.reg) then
|
|
|
+ begin
|
|
|
+ if SuperRegistersEqual(taicpu(p).oper[1]^.reg, taicpu(hp1).oper[1]^.reg) and
|
|
|
(
|
|
|
- (taicpu(hp1).opsize in [S_BW,S_BL{$ifdef x86_64},S_BQ{$endif x86_64}]) and
|
|
|
- ((taicpu(p).oper[0]^.val and $7F) = taicpu(p).oper[0]^.val)
|
|
|
- ) or (
|
|
|
- (taicpu(hp1).opsize in [S_WL{$ifdef x86_64},S_WQ{$endif x86_64}]) and
|
|
|
- ((taicpu(p).oper[0]^.val and $7FFF) = taicpu(p).oper[0]^.val)
|
|
|
+ (
|
|
|
+ (taicpu(hp1).opsize in [S_BW,S_BL{$ifdef x86_64},S_BQ{$endif x86_64}]) and
|
|
|
+ ((taicpu(p).oper[0]^.val and $7F) = taicpu(p).oper[0]^.val)
|
|
|
+ ) or (
|
|
|
+ (taicpu(hp1).opsize in [S_WL{$ifdef x86_64},S_WQ{$endif x86_64}]) and
|
|
|
+ ((taicpu(p).oper[0]^.val and $7FFF) = taicpu(p).oper[0]^.val)
|
|
|
{$ifdef x86_64}
|
|
|
- ) or (
|
|
|
- (taicpu(hp1).opsize = S_LQ) and
|
|
|
- ((taicpu(p).oper[0]^.val and $7fffffff) = taicpu(p).oper[0]^.val)
|
|
|
+ ) or (
|
|
|
+ (taicpu(hp1).opsize = S_LQ) and
|
|
|
+ ((taicpu(p).oper[0]^.val and $7fffffff) = taicpu(p).oper[0]^.val)
|
|
|
{$endif x86_64}
|
|
|
- )
|
|
|
- ) then
|
|
|
- begin
|
|
|
- if (taicpu(p).oper[1]^.reg = taicpu(hp1).oper[1]^.reg){$ifdef x86_64} or (taicpu(hp1).opsize = S_LQ){$endif x86_64} then
|
|
|
- begin
|
|
|
- DebugMsg(SPeepholeOptimization + 'AndMovsxToAnd',p);
|
|
|
- RemoveInstruction(hp1);
|
|
|
- { See if there are other optimisations possible }
|
|
|
- Continue;
|
|
|
- end;
|
|
|
+ )
|
|
|
+ ) then
|
|
|
+ begin
|
|
|
+ if (taicpu(p).oper[1]^.reg = taicpu(hp1).oper[1]^.reg){$ifdef x86_64} or (taicpu(hp1).opsize = S_LQ){$endif x86_64} then
|
|
|
+ begin
|
|
|
+ DebugMsg(SPeepholeOptimization + 'AndMovsxToAnd',p);
|
|
|
+ RemoveInstruction(hp1);
|
|
|
+ { See if there are other optimisations possible }
|
|
|
+ Continue;
|
|
|
+ end;
|
|
|
|
|
|
- { The super-registers are the same though.
|
|
|
+ { The super-registers are the same though.
|
|
|
|
|
|
- Note that this change by itself doesn't improve
|
|
|
- code speed, but it opens up other optimisations. }
|
|
|
+ Note that this change by itself doesn't improve
|
|
|
+ code speed, but it opens up other optimisations. }
|
|
|
{$ifdef x86_64}
|
|
|
- { Convert 64-bit register to 32-bit }
|
|
|
- case taicpu(hp1).opsize of
|
|
|
- S_BQ:
|
|
|
- begin
|
|
|
- taicpu(hp1).opsize := S_BL;
|
|
|
- taicpu(hp1).oper[1]^.reg := newreg(R_INTREGISTER, getsupreg(taicpu(hp1).oper[1]^.reg), R_SUBD);
|
|
|
- end;
|
|
|
- S_WQ:
|
|
|
- begin
|
|
|
- taicpu(hp1).opsize := S_WL;
|
|
|
- taicpu(hp1).oper[1]^.reg := newreg(R_INTREGISTER, getsupreg(taicpu(hp1).oper[1]^.reg), R_SUBD);
|
|
|
- end
|
|
|
- else
|
|
|
- ;
|
|
|
- end;
|
|
|
+ { Convert 64-bit register to 32-bit }
|
|
|
+ case taicpu(hp1).opsize of
|
|
|
+ S_BQ:
|
|
|
+ begin
|
|
|
+ taicpu(hp1).opsize := S_BL;
|
|
|
+ taicpu(hp1).oper[1]^.reg := newreg(R_INTREGISTER, getsupreg(taicpu(hp1).oper[1]^.reg), R_SUBD);
|
|
|
+ end;
|
|
|
+ S_WQ:
|
|
|
+ begin
|
|
|
+ taicpu(hp1).opsize := S_WL;
|
|
|
+ taicpu(hp1).oper[1]^.reg := newreg(R_INTREGISTER, getsupreg(taicpu(hp1).oper[1]^.reg), R_SUBD);
|
|
|
+ end
|
|
|
+ else
|
|
|
+ ;
|
|
|
+ end;
|
|
|
{$endif x86_64}
|
|
|
- DebugMsg(SPeepholeOptimization + 'AndMovsxToAndMovzx', hp1);
|
|
|
- taicpu(hp1).opcode := A_MOVZX;
|
|
|
- { See if there are other optimisations possible }
|
|
|
- Continue;
|
|
|
- end;
|
|
|
- end;
|
|
|
+ DebugMsg(SPeepholeOptimization + 'AndMovsxToAndMovzx', hp1);
|
|
|
+ taicpu(hp1).opcode := A_MOVZX;
|
|
|
+ { See if there are other optimisations possible }
|
|
|
+ Continue;
|
|
|
+ end;
|
|
|
+ end;
|
|
|
+ else
|
|
|
+ ;
|
|
|
+ end;
|
|
|
end;
|
|
|
|
|
|
if (taicpu(hp1).is_jmp) and
|