|
@@ -140,6 +140,7 @@ unit aoptx86;
|
|
function OptPass1VPXor(var p: tai): boolean;
|
|
function OptPass1VPXor(var p: tai): boolean;
|
|
function OptPass1Imul(var p : tai) : boolean;
|
|
function OptPass1Imul(var p : tai) : boolean;
|
|
|
|
|
|
|
|
+ function OptPass2Movx(var p : tai): Boolean;
|
|
function OptPass2MOV(var p : tai) : boolean;
|
|
function OptPass2MOV(var p : tai) : boolean;
|
|
function OptPass2Imul(var p : tai) : boolean;
|
|
function OptPass2Imul(var p : tai) : boolean;
|
|
function OptPass2Jmp(var p : tai) : boolean;
|
|
function OptPass2Jmp(var p : tai) : boolean;
|
|
@@ -149,8 +150,8 @@ unit aoptx86;
|
|
function OptPass2ADD(var p : tai): Boolean;
|
|
function OptPass2ADD(var p : tai): Boolean;
|
|
|
|
|
|
function PostPeepholeOptMov(var p : tai) : Boolean;
|
|
function PostPeepholeOptMov(var p : tai) : Boolean;
|
|
-{$ifdef x86_64} { These post-peephole optimisations only affect 64-bit registers. [Kit] }
|
|
|
|
function PostPeepholeOptMovzx(var p : tai) : Boolean;
|
|
function PostPeepholeOptMovzx(var p : tai) : Boolean;
|
|
|
|
+{$ifdef x86_64} { These post-peephole optimisations only affect 64-bit registers. [Kit] }
|
|
function PostPeepholeOptXor(var p : tai) : Boolean;
|
|
function PostPeepholeOptXor(var p : tai) : Boolean;
|
|
{$endif}
|
|
{$endif}
|
|
function PostPeepholeOptAnd(var p : tai) : boolean;
|
|
function PostPeepholeOptAnd(var p : tai) : boolean;
|
|
@@ -160,6 +161,7 @@ unit aoptx86;
|
|
function PostPeepholeOptCall(var p : tai) : Boolean;
|
|
function PostPeepholeOptCall(var p : tai) : Boolean;
|
|
function PostPeepholeOptLea(var p : tai) : Boolean;
|
|
function PostPeepholeOptLea(var p : tai) : Boolean;
|
|
function PostPeepholeOptPush(var p: tai): Boolean;
|
|
function PostPeepholeOptPush(var p: tai): Boolean;
|
|
|
|
+ function PostPeepholeOptShr(var p : tai) : boolean;
|
|
|
|
|
|
procedure ConvertJumpToRET(const p: tai; const ret_p: tai);
|
|
procedure ConvertJumpToRET(const p: tai; const ret_p: tai);
|
|
|
|
|
|
@@ -4935,6 +4937,529 @@ unit aoptx86;
|
|
end;
|
|
end;
|
|
|
|
|
|
|
|
|
|
|
|
+ function TX86AsmOptimizer.OptPass2Movx(var p : tai) : boolean;
|
|
|
|
+ const
|
|
|
|
+ LIST_STEP_SIZE = 4;
|
|
|
|
+ var
|
|
|
|
+ ThisReg: TRegister;
|
|
|
|
+ MinSize, MaxSize, TrySmaller, TargetSize: TOpSize;
|
|
|
|
+ TargetSubReg: TSubRegister;
|
|
|
|
+ hp1, hp2: tai;
|
|
|
|
+ RegInUse, p_removed: Boolean;
|
|
|
|
+
|
|
|
|
+ { Store list of found instructions so we don't have to call
|
|
|
|
+ GetNextInstructionUsingReg multiple times }
|
|
|
|
+ InstrList: array of taicpu;
|
|
|
|
+ InstrMax, Index: Integer;
|
|
|
|
+ UpperLimit, TrySmallerLimit: TCgInt;
|
|
|
|
+
|
|
|
|
+ { Data flow analysis }
|
|
|
|
+ TestValMin, TestValMax: TCgInt;
|
|
|
|
+ SmallerOverflow: Boolean;
|
|
|
|
+
|
|
|
|
+ begin
|
|
|
|
+ Result := False;
|
|
|
|
+ p_removed := False;
|
|
|
|
+
|
|
|
|
+ { This is anything but quick! }
|
|
|
|
+ if not(cs_opt_level2 in current_settings.optimizerswitches) then
|
|
|
|
+ Exit;
|
|
|
|
+
|
|
|
|
+ SetLength(InstrList, 0);
|
|
|
|
+ InstrMax := -1;
|
|
|
|
+ ThisReg := taicpu(p).oper[1]^.reg;
|
|
|
|
+ hp1 := p;
|
|
|
|
+
|
|
|
|
+ case taicpu(p).opsize of
|
|
|
|
+ S_BW, S_BL:
|
|
|
|
+ begin
|
|
|
|
+ UpperLimit := $FF;
|
|
|
|
+ MinSize := S_B;
|
|
|
|
+ if taicpu(p).opsize = S_BW then
|
|
|
|
+ MaxSize := S_W
|
|
|
|
+ else
|
|
|
|
+ MaxSize := S_L;
|
|
|
|
+ end;
|
|
|
|
+ S_WL:
|
|
|
|
+ begin
|
|
|
|
+ UpperLimit := $FFFF;
|
|
|
|
+ MinSize := S_W;
|
|
|
|
+ MaxSize := S_L;
|
|
|
|
+ end
|
|
|
|
+ else
|
|
|
|
+ InternalError(2020112301);
|
|
|
|
+ end;
|
|
|
|
+
|
|
|
|
+ TestValMin := 0;
|
|
|
|
+ TestValMax := UpperLimit;
|
|
|
|
+ TrySmallerLimit := UpperLimit;
|
|
|
|
+ TrySmaller := S_NO;
|
|
|
|
+ SmallerOverflow := False;
|
|
|
|
+
|
|
|
|
+ while GetNextInstructionUsingReg(hp1, hp1, ThisReg) and
|
|
|
|
+ (hp1.typ = ait_instruction) and
|
|
|
|
+ (
|
|
|
|
+ { Under -O1 and -O2, GetNextInstructionUsingReg may return an
|
|
|
|
+ instruction that doesn't actually contain ThisReg }
|
|
|
|
+ (cs_opt_level3 in current_settings.optimizerswitches) or
|
|
|
|
+ RegInInstruction(ThisReg, hp1)
|
|
|
|
+ ) do
|
|
|
|
+ begin
|
|
|
|
+ case taicpu(hp1).opcode of
|
|
|
|
+ A_INC,A_DEC:
|
|
|
|
+ begin
|
|
|
|
+ { Has to be an exact match on the register }
|
|
|
|
+ if not MatchOperand(taicpu(hp1).oper[0]^, ThisReg) then
|
|
|
|
+ Break;
|
|
|
|
+
|
|
|
|
+ if taicpu(hp1).opcode = A_INC then
|
|
|
|
+ begin
|
|
|
|
+ Inc(TestValMin);
|
|
|
|
+ Inc(TestValMax);
|
|
|
|
+ end
|
|
|
|
+ else
|
|
|
|
+ begin
|
|
|
|
+ Dec(TestValMin);
|
|
|
|
+ Dec(TestValMax);
|
|
|
|
+ end;
|
|
|
|
+ end;
|
|
|
|
+
|
|
|
|
+ { OR and XOR are not included because they can too easily fool
|
|
|
|
+ the data flow analysis (they can cause non-linear behaviour) }
|
|
|
|
+ A_ADD,A_SUB,A_AND,A_SHL,A_SHR:
|
|
|
|
+ begin
|
|
|
|
+ if
|
|
|
|
+ (taicpu(hp1).oper[1]^.typ <> top_reg) or
|
|
|
|
+ { Has to be an exact match on the register }
|
|
|
|
+ (taicpu(hp1).oper[1]^.reg <> ThisReg) or not
|
|
|
|
+ (
|
|
|
|
+ (
|
|
|
|
+ (taicpu(hp1).oper[0]^.typ = top_const) and
|
|
|
|
+ (
|
|
|
|
+ (
|
|
|
|
+ (taicpu(hp1).opcode = A_SHL) and
|
|
|
|
+ (
|
|
|
|
+ ((MinSize = S_B) and (taicpu(hp1).oper[0]^.val < 8)) or
|
|
|
|
+ ((MinSize = S_W) and (taicpu(hp1).oper[0]^.val < 16)) or
|
|
|
|
+ ((MinSize = S_L) and (taicpu(hp1).oper[0]^.val < 32))
|
|
|
|
+ )
|
|
|
|
+ ) or (
|
|
|
|
+ (taicpu(hp1).opcode <> A_SHL) and
|
|
|
|
+ (
|
|
|
|
+ ((taicpu(hp1).oper[0]^.val and UpperLimit) = taicpu(hp1).oper[0]^.val) or
|
|
|
|
+ { Is it in the negative range? }
|
|
|
|
+ (((not taicpu(hp1).oper[0]^.val) and (UpperLimit shr 1)) = (not taicpu(hp1).oper[0]^.val))
|
|
|
|
+ )
|
|
|
|
+ )
|
|
|
|
+ )
|
|
|
|
+ ) or (
|
|
|
|
+ MatchOperand(taicpu(hp1).oper[0]^, taicpu(hp1).oper[1]^.reg) and
|
|
|
|
+ ((taicpu(hp1).opcode = A_ADD) or (taicpu(hp1).opcode = A_AND) or (taicpu(hp1).opcode = A_SUB))
|
|
|
|
+ )
|
|
|
|
+ ) then
|
|
|
|
+ Break;
|
|
|
|
+
|
|
|
|
+ case taicpu(hp1).opcode of
|
|
|
|
+ A_ADD:
|
|
|
|
+ if (taicpu(hp1).oper[0]^.typ = top_reg) then
|
|
|
|
+ begin
|
|
|
|
+ TestValMin := TestValMin * 2;
|
|
|
|
+ TestValMax := TestValMax * 2;
|
|
|
|
+ end
|
|
|
|
+ else
|
|
|
|
+ begin
|
|
|
|
+ TestValMin := TestValMin + taicpu(hp1).oper[0]^.val;
|
|
|
|
+ TestValMax := TestValMax + taicpu(hp1).oper[0]^.val;
|
|
|
|
+ end;
|
|
|
|
+ A_SUB:
|
|
|
|
+ if (taicpu(hp1).oper[0]^.typ = top_reg) then
|
|
|
|
+ begin
|
|
|
|
+ TestValMin := 0;
|
|
|
|
+ TestValMax := 0;
|
|
|
|
+ end
|
|
|
|
+ else
|
|
|
|
+ begin
|
|
|
|
+ TestValMin := TestValMin - taicpu(hp1).oper[0]^.val;
|
|
|
|
+ TestValMax := TestValMax - taicpu(hp1).oper[0]^.val;
|
|
|
|
+ end;
|
|
|
|
+ A_AND:
|
|
|
|
+ if (taicpu(hp1).oper[0]^.typ = top_const) then
|
|
|
|
+ begin
|
|
|
|
+ { we might be able to go smaller if AND appears first }
|
|
|
|
+ if InstrMax = -1 then
|
|
|
|
+ case MinSize of
|
|
|
|
+ S_B:
|
|
|
|
+ ;
|
|
|
|
+ S_W:
|
|
|
|
+ if ((taicpu(hp1).oper[0]^.val and $FF) = taicpu(hp1).oper[0]^.val) or
|
|
|
|
+ ((not(taicpu(hp1).oper[0]^.val) and $7F) = (not taicpu(hp1).oper[0]^.val)) then
|
|
|
|
+ begin
|
|
|
|
+ TrySmaller := S_B;
|
|
|
|
+ TrySmallerLimit := $FF;
|
|
|
|
+ end;
|
|
|
|
+ S_L:
|
|
|
|
+ if ((taicpu(hp1).oper[0]^.val and $FF) = taicpu(hp1).oper[0]^.val) or
|
|
|
|
+ ((not(taicpu(hp1).oper[0]^.val) and $7F) = (not taicpu(hp1).oper[0]^.val)) then
|
|
|
|
+ begin
|
|
|
|
+ TrySmaller := S_B;
|
|
|
|
+ TrySmallerLimit := $FF;
|
|
|
|
+ end
|
|
|
|
+ else if ((taicpu(hp1).oper[0]^.val and $FFFF) = taicpu(hp1).oper[0]^.val) or
|
|
|
|
+ ((not(taicpu(hp1).oper[0]^.val) and $7FFF) = (not taicpu(hp1).oper[0]^.val)) then
|
|
|
|
+ begin
|
|
|
|
+ TrySmaller := S_W;
|
|
|
|
+ TrySmallerLimit := $FFFF;
|
|
|
|
+ end;
|
|
|
|
+ else
|
|
|
|
+ InternalError(2020112320);
|
|
|
|
+ end;
|
|
|
|
+
|
|
|
|
+ TestValMin := TestValMin and taicpu(hp1).oper[0]^.val;
|
|
|
|
+ TestValMax := TestValMax and taicpu(hp1).oper[0]^.val;
|
|
|
|
+ end;
|
|
|
|
+ A_SHL:
|
|
|
|
+ begin
|
|
|
|
+ TestValMin := TestValMin shl taicpu(hp1).oper[0]^.val;
|
|
|
|
+ TestValMax := TestValMax shl taicpu(hp1).oper[0]^.val;
|
|
|
|
+ end;
|
|
|
|
+ A_SHR:
|
|
|
|
+ begin
|
|
|
|
+ { we might be able to go smaller if SHR appears first }
|
|
|
|
+ if InstrMax = -1 then
|
|
|
|
+ case MinSize of
|
|
|
|
+ S_B:
|
|
|
|
+ ;
|
|
|
|
+ S_W:
|
|
|
|
+ if (taicpu(hp1).oper[0]^.val >= 8) then
|
|
|
|
+ begin
|
|
|
|
+ TrySmaller := S_B;
|
|
|
|
+ TrySmallerLimit := $FF;
|
|
|
|
+ end;
|
|
|
|
+ S_L:
|
|
|
|
+ if (taicpu(hp1).oper[0]^.val >= 24) then
|
|
|
|
+ begin
|
|
|
|
+ TrySmaller := S_B;
|
|
|
|
+ TrySmallerLimit := $FF;
|
|
|
|
+ end
|
|
|
|
+ else if (taicpu(hp1).oper[0]^.val >= 16) then
|
|
|
|
+ begin
|
|
|
|
+ TrySmaller := S_W;
|
|
|
|
+ TrySmallerLimit := $FFFF;
|
|
|
|
+ end;
|
|
|
|
+ else
|
|
|
|
+ InternalError(2020112321);
|
|
|
|
+ end;
|
|
|
|
+
|
|
|
|
+ TestValMin := TestValMin shr taicpu(hp1).oper[0]^.val;
|
|
|
|
+ TestValMax := TestValMax shr taicpu(hp1).oper[0]^.val;
|
|
|
|
+ end;
|
|
|
|
+ else
|
|
|
|
+ InternalError(2020112303);
|
|
|
|
+ end;
|
|
|
|
+ end;
|
|
|
|
+(*
|
|
|
|
+ A_IMUL:
|
|
|
|
+ case taicpu(hp1).ops of
|
|
|
|
+ 2:
|
|
|
|
+ begin
|
|
|
|
+ if not MatchOpType(hp1, top_reg, top_reg) or
|
|
|
|
+ { Has to be an exact match on the register }
|
|
|
|
+ (taicpu(hp1).oper[0]^.reg <> ThisReg) or
|
|
|
|
+ (taicpu(hp1).oper[1]^.reg <> ThisReg) then
|
|
|
|
+ Break;
|
|
|
|
+
|
|
|
|
+ TestValMin := TestValMin * TestValMin;
|
|
|
|
+ TestValMax := TestValMax * TestValMax;
|
|
|
|
+ end;
|
|
|
|
+ 3:
|
|
|
|
+ begin
|
|
|
|
+ if not MatchOpType(hp1, top_const, top_reg, top_reg) or
|
|
|
|
+ { Has to be an exact match on the register }
|
|
|
|
+ (taicpu(hp1).oper[1]^.reg <> ThisReg) or
|
|
|
|
+ (taicpu(hp1).oper[2]^.reg <> ThisReg) or
|
|
|
|
+ ((taicpu(hp1).oper[0]^.val and UpperLimit) = taicpu(hp1).oper[0]^.val) or
|
|
|
|
+ { Is it in the negative range? }
|
|
|
|
+ (((not taicpu(hp1).oper[0]^.val) and (UpperLimit shr 1)) = (not taicpu(hp1).oper[0]^.val)) then
|
|
|
|
+ Break;
|
|
|
|
+
|
|
|
|
+ TestValMin := TestValMin * taicpu(hp1).oper[0]^.val;
|
|
|
|
+ TestValMax := TestValMax * taicpu(hp1).oper[0]^.val;
|
|
|
|
+ end;
|
|
|
|
+ else
|
|
|
|
+ Break;
|
|
|
|
+ end;
|
|
|
|
+
|
|
|
|
+ A_IDIV:
|
|
|
|
+ case taicpu(hp1).ops of
|
|
|
|
+ 3:
|
|
|
|
+ begin
|
|
|
|
+ if not MatchOpType(hp1, top_const, top_reg, top_reg) or
|
|
|
|
+ { Has to be an exact match on the register }
|
|
|
|
+ (taicpu(hp1).oper[1]^.reg <> ThisReg) or
|
|
|
|
+ (taicpu(hp1).oper[2]^.reg <> ThisReg) or
|
|
|
|
+ ((taicpu(hp1).oper[0]^.val and UpperLimit) = taicpu(hp1).oper[0]^.val) or
|
|
|
|
+ { Is it in the negative range? }
|
|
|
|
+ (((not taicpu(hp1).oper[0]^.val) and (UpperLimit shr 1)) = (not taicpu(hp1).oper[0]^.val)) then
|
|
|
|
+ Break;
|
|
|
|
+
|
|
|
|
+ TestValMin := TestValMin div taicpu(hp1).oper[0]^.val;
|
|
|
|
+ TestValMax := TestValMax div taicpu(hp1).oper[0]^.val;
|
|
|
|
+ end;
|
|
|
|
+ else
|
|
|
|
+ Break;
|
|
|
|
+ end;
|
|
|
|
+*)
|
|
|
|
+ A_MOVZX:
|
|
|
|
+ begin
|
|
|
|
+ if not MatchOpType(taicpu(hp1), top_reg, top_reg) then
|
|
|
|
+ Break;
|
|
|
|
+
|
|
|
|
+ { The objective here is to try to find a combination that
|
|
|
|
+ removes one of the MOV/Z instructions. }
|
|
|
|
+ case taicpu(hp1).opsize of
|
|
|
|
+ S_WL:
|
|
|
|
+ if (MinSize in [S_B, S_W]) then
|
|
|
|
+ begin
|
|
|
|
+ TargetSize := S_L;
|
|
|
|
+ TargetSubReg := R_SUBD;
|
|
|
|
+ end
|
|
|
|
+ else if ((TrySmaller in [S_B, S_W]) and not SmallerOverflow) then
|
|
|
|
+ begin
|
|
|
|
+ TargetSize := TrySmaller;
|
|
|
|
+ if TrySmaller = S_B then
|
|
|
|
+ TargetSubReg := R_SUBL
|
|
|
|
+ else
|
|
|
|
+ TargetSubReg := R_SUBW;
|
|
|
|
+ end
|
|
|
|
+ else
|
|
|
|
+ Break;
|
|
|
|
+
|
|
|
|
+ S_BW:
|
|
|
|
+ if (MinSize in [S_B, S_W]) then
|
|
|
|
+ begin
|
|
|
|
+ TargetSize := S_W;
|
|
|
|
+ TargetSubReg := R_SUBW;
|
|
|
|
+ end
|
|
|
|
+ else if ((TrySmaller = S_B) and not SmallerOverflow) then
|
|
|
|
+ begin
|
|
|
|
+ TargetSize := S_B;
|
|
|
|
+ TargetSubReg := R_SUBL;
|
|
|
|
+ end
|
|
|
|
+ else
|
|
|
|
+ Break;
|
|
|
|
+
|
|
|
|
+ S_BL:
|
|
|
|
+ if (MinSize in [S_B, S_W]) then
|
|
|
|
+ begin
|
|
|
|
+ TargetSize := S_L;
|
|
|
|
+ TargetSubReg := R_SUBD;
|
|
|
|
+ end
|
|
|
|
+ else if ((TrySmaller = S_B) and not SmallerOverflow) then
|
|
|
|
+ begin
|
|
|
|
+ TargetSize := S_B;
|
|
|
|
+ TargetSubReg := R_SUBL;
|
|
|
|
+ end
|
|
|
|
+ else
|
|
|
|
+ Break;
|
|
|
|
+
|
|
|
|
+ else
|
|
|
|
+ InternalError(2020112302);
|
|
|
|
+ end;
|
|
|
|
+
|
|
|
|
+ { Update the register to its new size }
|
|
|
|
+ ThisReg := newreg(R_INTREGISTER, getsupreg(ThisReg), TargetSubReg);
|
|
|
|
+
|
|
|
|
+ if TargetSize = MinSize then
|
|
|
|
+ begin
|
|
|
|
+ { Convert the input MOVZX to a MOV }
|
|
|
|
+ if (taicpu(p).oper[0]^.typ = top_reg) and
|
|
|
|
+ SuperRegistersEqual(taicpu(p).oper[0]^.reg, ThisReg) then
|
|
|
|
+ begin
|
|
|
|
+ { Or remove it completely! }
|
|
|
|
+ DebugMsg(SPeepholeOptimization + 'Movzx2Nop 1', p);
|
|
|
|
+ RemoveCurrentP(p);
|
|
|
|
+ p_removed := True;
|
|
|
|
+ end
|
|
|
|
+ else
|
|
|
|
+ begin
|
|
|
|
+ DebugMsg(SPeepholeOptimization + 'Movzx2Mov 1', p);
|
|
|
|
+ taicpu(p).opcode := A_MOV;
|
|
|
|
+ taicpu(p).oper[1]^.reg := ThisReg;
|
|
|
|
+ taicpu(p).opsize := TargetSize;
|
|
|
|
+ end;
|
|
|
|
+
|
|
|
|
+ Result := True;
|
|
|
|
+ end
|
|
|
|
+ else if TargetSize <> MaxSize then
|
|
|
|
+ begin
|
|
|
|
+
|
|
|
|
+ case MaxSize of
|
|
|
|
+ S_L:
|
|
|
|
+ if TargetSize = S_W then
|
|
|
|
+ begin
|
|
|
|
+ DebugMsg(SPeepholeOptimization + 'movzbl2movzbw', p);
|
|
|
|
+ taicpu(p).opsize := S_BW;
|
|
|
|
+ taicpu(p).oper[1]^.reg := ThisReg;
|
|
|
|
+ Result := True;
|
|
|
|
+ end
|
|
|
|
+ else
|
|
|
|
+ InternalError(2020112341);
|
|
|
|
+
|
|
|
|
+ S_W:
|
|
|
|
+ if TargetSize = S_L then
|
|
|
|
+ begin
|
|
|
|
+ DebugMsg(SPeepholeOptimization + 'movzbw2movzbl', p);
|
|
|
|
+ taicpu(p).opsize := S_BL;
|
|
|
|
+ taicpu(p).oper[1]^.reg := ThisReg;
|
|
|
|
+ Result := True;
|
|
|
|
+ end
|
|
|
|
+ else
|
|
|
|
+ InternalError(2020112342);
|
|
|
|
+ else
|
|
|
|
+ ;
|
|
|
|
+ end;
|
|
|
|
+ end;
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ if (MaxSize = TargetSize) or
|
|
|
|
+ ((TargetSize = S_L) and (taicpu(hp1).opsize in [S_L, S_BL, S_WL])) or
|
|
|
|
+ ((TargetSize = S_W) and (taicpu(hp1).opsize in [S_W, S_BW])) then
|
|
|
|
+ begin
|
|
|
|
+ { Convert the output MOVZX to a MOV }
|
|
|
|
+ if (taicpu(hp1).oper[0]^.typ = top_reg) and
|
|
|
|
+ SuperRegistersEqual(taicpu(hp1).oper[1]^.reg, ThisReg) then
|
|
|
|
+ begin
|
|
|
|
+ { Or remove it completely! }
|
|
|
|
+ DebugMsg(SPeepholeOptimization + 'Movzx2Nop 2', hp1);
|
|
|
|
+
|
|
|
|
+ { Be careful; if p = hp1 and p was also removed, p
|
|
|
|
+ will become a dangling pointer }
|
|
|
|
+ if p = hp1 then
|
|
|
|
+ RemoveCurrentp(p) { p = hp1 and will then become the next instruction }
|
|
|
|
+ else
|
|
|
|
+ RemoveInstruction(hp1);
|
|
|
|
+ end
|
|
|
|
+ else
|
|
|
|
+ begin
|
|
|
|
+ taicpu(hp1).opcode := A_MOV;
|
|
|
|
+ taicpu(hp1).oper[0]^.reg := ThisReg;
|
|
|
|
+ taicpu(hp1).opsize := TargetSize;
|
|
|
|
+
|
|
|
|
+ { Check to see if the active register is used afterwards;
|
|
|
|
+ if not, we can change it and make a saving. }
|
|
|
|
+ RegInUse := False;
|
|
|
|
+ TransferUsedRegs(TmpUsedRegs);
|
|
|
|
+
|
|
|
|
+ { The target register may be marked as in use to cross
|
|
|
|
+ a jump to a distant label, so exclude it }
|
|
|
|
+ ExcludeRegFromUsedRegs(taicpu(hp1).oper[1]^.reg, TmpUsedRegs);
|
|
|
|
+
|
|
|
|
+ hp2 := p;
|
|
|
|
+ repeat
|
|
|
|
+
|
|
|
|
+ UpdateUsedRegs(TmpUsedRegs, tai(hp2.next));
|
|
|
|
+
|
|
|
|
+ { Explicitly check for the excluded register (don't include the first
|
|
|
|
+ instruction as it may be reading from here }
|
|
|
|
+ if ((p <> hp2) and (RegInInstruction(taicpu(hp1).oper[1]^.reg, hp2))) or
|
|
|
|
+ RegInUsedRegs(taicpu(hp1).oper[1]^.reg, TmpUsedRegs) then
|
|
|
|
+ begin
|
|
|
|
+ RegInUse := True;
|
|
|
|
+ Break;
|
|
|
|
+ end;
|
|
|
|
+
|
|
|
|
+ if not GetNextInstruction(hp2, hp2) then
|
|
|
|
+ InternalError(2020112340);
|
|
|
|
+
|
|
|
|
+ until (hp2 = hp1);
|
|
|
|
+
|
|
|
|
+ if not RegInUse and not RegUsedAfterInstruction(ThisReg, hp1, TmpUsedRegs) then
|
|
|
|
+ begin
|
|
|
|
+ DebugMsg(SPeepholeOptimization + 'Simplified register usage so ' + debug_regname(taicpu(hp1).oper[1]^.reg) + ' = ' + debug_regname(taicpu(p).oper[1]^.reg), p);
|
|
|
|
+ ThisReg := taicpu(hp1).oper[1]^.reg;
|
|
|
|
+
|
|
|
|
+ TransferUsedRegs(TmpUsedRegs);
|
|
|
|
+ AllocRegBetween(ThisReg, p, hp1, TmpUsedRegs);
|
|
|
|
+
|
|
|
|
+ DebugMsg(SPeepholeOptimization + 'Movzx2Nop 3', hp1);
|
|
|
|
+ if p = hp1 then
|
|
|
|
+ RemoveCurrentp(p) { p = hp1 and will then become the next instruction }
|
|
|
|
+ else
|
|
|
|
+ RemoveInstruction(hp1);
|
|
|
|
+
|
|
|
|
+ { Instruction will become "mov %reg,%reg" }
|
|
|
|
+ if not p_removed and (taicpu(p).opcode = A_MOV) and
|
|
|
|
+ MatchOperand(taicpu(p).oper[0]^, ThisReg) then
|
|
|
|
+ begin
|
|
|
|
+ DebugMsg(SPeepholeOptimization + 'Movzx2Nop 6', p);
|
|
|
|
+ RemoveCurrentP(p);
|
|
|
|
+ p_removed := True;
|
|
|
|
+ end
|
|
|
|
+ else
|
|
|
|
+ taicpu(p).oper[1]^.reg := ThisReg;
|
|
|
|
+
|
|
|
|
+ Result := True;
|
|
|
|
+ end
|
|
|
|
+ else
|
|
|
|
+ DebugMsg(SPeepholeOptimization + 'Movzx2Mov 2', hp1);
|
|
|
|
+
|
|
|
|
+ end;
|
|
|
|
+ end
|
|
|
|
+ else
|
|
|
|
+ InternalError(2020112330);
|
|
|
|
+
|
|
|
|
+ { Now go through every instruction we found and change the
|
|
|
|
+ size. If TargetSize = MaxSize, then almost no changes are
|
|
|
|
+ needed and Result can remain False if it hasn't been set
|
|
|
|
+ yet. }
|
|
|
|
+
|
|
|
|
+ if (TargetSize <> MaxSize) and (InstrMax >= 0) then
|
|
|
|
+ begin
|
|
|
|
+ for Index := 0 to InstrMax do
|
|
|
|
+ begin
|
|
|
|
+
|
|
|
|
+ { If p_removed is true, then the original MOV/Z was removed
|
|
|
|
+ and removing the AND instruction may not be safe if it
|
|
|
|
+ appears first }
|
|
|
|
+ if (InstrList[Index].oper[InstrList[Index].ops - 1]^.typ <> top_reg) then
|
|
|
|
+ InternalError(2020112310);
|
|
|
|
+
|
|
|
|
+ if InstrList[Index].oper[0]^.typ = top_reg then
|
|
|
|
+ InstrList[Index].oper[0]^.reg := ThisReg;
|
|
|
|
+
|
|
|
|
+ InstrList[Index].oper[InstrList[Index].ops - 1]^.reg := ThisReg;
|
|
|
|
+ InstrList[Index].opsize := TargetSize;
|
|
|
|
+ end;
|
|
|
|
+
|
|
|
|
+ Result := True;
|
|
|
|
+ end;
|
|
|
|
+
|
|
|
|
+ Exit;
|
|
|
|
+ end;
|
|
|
|
+
|
|
|
|
+ else
|
|
|
|
+ { This includes ADC, SBB, IDIV and SAR }
|
|
|
|
+ Break;
|
|
|
|
+ end;
|
|
|
|
+
|
|
|
|
+ if (TestValMin < 0) or (TestValMax < 0) or
|
|
|
|
+ (TestValMin > UpperLimit) or (TestValMax > UpperLimit) then
|
|
|
|
+ { Overflow }
|
|
|
|
+ Break
|
|
|
|
+ else if not SmallerOverflow and (TrySmaller <> S_NO) and
|
|
|
|
+ ((TestValMin > TrySmallerLimit) or (TestValMax > TrySmallerLimit)) then
|
|
|
|
+ SmallerOverflow := True;
|
|
|
|
+
|
|
|
|
+ { Contains highest index (so instruction count - 1) }
|
|
|
|
+ Inc(InstrMax);
|
|
|
|
+ if InstrMax > High(InstrList) then
|
|
|
|
+ SetLength(InstrList, InstrMax + LIST_STEP_SIZE);
|
|
|
|
+
|
|
|
|
+ InstrList[InstrMax] := taicpu(hp1);
|
|
|
|
+ end;
|
|
|
|
+ end;
|
|
|
|
+
|
|
|
|
+
|
|
function TX86AsmOptimizer.OptPass2Imul(var p : tai) : boolean;
|
|
function TX86AsmOptimizer.OptPass2Imul(var p : tai) : boolean;
|
|
var
|
|
var
|
|
hp1 : tai;
|
|
hp1 : tai;
|
|
@@ -6691,6 +7216,41 @@ unit aoptx86;
|
|
end;
|
|
end;
|
|
|
|
|
|
|
|
|
|
|
|
+ function TX86AsmOptimizer.PostPeepholeOptShr(var p : tai) : boolean;
|
|
|
|
+ var
|
|
|
|
+ hp1: tai;
|
|
|
|
+ begin
|
|
|
|
+ { Detect:
|
|
|
|
+ shr x, %ax (x > 0)
|
|
|
|
+ ...
|
|
|
|
+ movzwl %ax,%eax
|
|
|
|
+
|
|
|
|
+ Change movzwl %ax,%eax to cwtl (shorter encoding for movswl %ax,%eax)
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ Result := False;
|
|
|
|
+ if MatchOpType(taicpu(p), top_const, top_reg) and
|
|
|
|
+ (taicpu(p).oper[1]^.reg = NR_AX) and { This is also enough to determine that opsize = S_W }
|
|
|
|
+ (taicpu(p).oper[0]^.val > 0) and
|
|
|
|
+ GetNextInstructionUsingReg(p, hp1, NR_EAX) and
|
|
|
|
+ MatchInstruction(hp1, A_MOVZX, [S_WL]) and
|
|
|
|
+ MatchOperand(taicpu(hp1).oper[0]^, NR_AX) and
|
|
|
|
+ MatchOperand(taicpu(hp1).oper[1]^, NR_EAX) then
|
|
|
|
+ begin
|
|
|
|
+ DebugMsg(SPeepholeOptimization + 'Converted movzwl %ax,%eax to cwtl (via ShrMovz2ShrCwtl)', hp1);
|
|
|
|
+ taicpu(hp1).opcode := A_CWDE;
|
|
|
|
+ taicpu(hp1).clearop(0);
|
|
|
|
+ taicpu(hp1).clearop(1);
|
|
|
|
+ taicpu(hp1).ops := 0;
|
|
|
|
+
|
|
|
|
+ { A change was made, but not with p, so move forward 1 }
|
|
|
|
+ p := tai(p.Next);
|
|
|
|
+ Result := True;
|
|
|
|
+ end;
|
|
|
|
+
|
|
|
|
+ end;
|
|
|
|
+
|
|
|
|
+
|
|
function TX86AsmOptimizer.PostPeepholeOptCmp(var p : tai) : Boolean;
|
|
function TX86AsmOptimizer.PostPeepholeOptCmp(var p : tai) : Boolean;
|
|
begin
|
|
begin
|
|
Result:=false;
|
|
Result:=false;
|
|
@@ -6864,12 +7424,150 @@ unit aoptx86;
|
|
end;
|
|
end;
|
|
|
|
|
|
|
|
|
|
-{$ifdef x86_64}
|
|
|
|
function TX86AsmOptimizer.PostPeepholeOptMovzx(var p : tai) : Boolean;
|
|
function TX86AsmOptimizer.PostPeepholeOptMovzx(var p : tai) : Boolean;
|
|
|
|
+
|
|
|
|
+ function ConstInRange(const Val: TCGInt; const OpSize: TOpSize): Boolean;
|
|
|
|
+ begin
|
|
|
|
+ case OpSize of
|
|
|
|
+ S_B, S_BW, S_BL{$ifdef x86_64}, S_BQ{$endif x86_64}:
|
|
|
|
+ Result := (Val <= $FF) and (Val >= -128);
|
|
|
|
+ S_W, S_WL{$ifdef x86_64}, S_WQ{$endif x86_64}:
|
|
|
|
+ Result := (Val <= $FFFF) and (Val >= -32768);
|
|
|
|
+ S_L{$ifdef x86_64}, S_LQ{$endif x86_64}:
|
|
|
|
+ Result := (Val <= $FFFFFFFF) and (Val >= -2147483648);
|
|
|
|
+ else
|
|
|
|
+ Result := True;
|
|
|
|
+ end;
|
|
|
|
+ end;
|
|
|
|
+
|
|
var
|
|
var
|
|
|
|
+ hp1, hp2 : tai;
|
|
|
|
+ SizeChange: Boolean;
|
|
PreMessage: string;
|
|
PreMessage: string;
|
|
begin
|
|
begin
|
|
Result := False;
|
|
Result := False;
|
|
|
|
+
|
|
|
|
+ if (taicpu(p).oper[0]^.typ = top_reg) and
|
|
|
|
+ SuperRegistersEqual(taicpu(p).oper[0]^.reg, taicpu(p).oper[1]^.reg) and
|
|
|
|
+ GetNextInstruction(p, hp1) and (hp1.typ = ait_instruction) then
|
|
|
|
+ begin
|
|
|
|
+ { Change (using movzbl %al,%eax as an example):
|
|
|
|
+
|
|
|
|
+ movzbl %al, %eax movzbl %al, %eax
|
|
|
|
+ cmpl x, %eax testl %eax,%eax
|
|
|
|
+
|
|
|
|
+ To:
|
|
|
|
+ cmpb x, %al testb %al, %al (Move one back to avoid a false dependency)
|
|
|
|
+ movzbl %al, %eax movzbl %al, %eax
|
|
|
|
+
|
|
|
|
+ Smaller instruction and minimises pipeline stall as the CPU
|
|
|
|
+ doesn't have to wait for the register to get zero-extended. [Kit]
|
|
|
|
+
|
|
|
|
+ Also allow if the smaller of the two registers is being checked,
|
|
|
|
+ as this still removes the false dependency.
|
|
|
|
+ }
|
|
|
|
+ if
|
|
|
|
+ (
|
|
|
|
+ (
|
|
|
|
+ (taicpu(hp1).opcode = A_CMP) and MatchOpType(taicpu(hp1), top_const, top_reg) and
|
|
|
|
+ ConstInRange(taicpu(hp1).oper[0]^.val, taicpu(p).opsize)
|
|
|
|
+ ) or (
|
|
|
|
+ { If MatchOperand returns True, they must both be registers }
|
|
|
|
+ (taicpu(hp1).opcode = A_TEST) and MatchOperand(taicpu(hp1).oper[0]^, taicpu(hp1).oper[1]^)
|
|
|
|
+ )
|
|
|
|
+ ) and
|
|
|
|
+ (reg2opsize(taicpu(hp1).oper[1]^.reg) <= reg2opsize(taicpu(p).oper[1]^.reg)) then
|
|
|
|
+ begin
|
|
|
|
+ PreMessage := debug_op2str(taicpu(hp1).opcode) + debug_opsize2str(taicpu(hp1).opsize) + ' ' + debug_operstr(taicpu(hp1).oper[0]^) + ',' + debug_regname(taicpu(hp1).oper[1]^.reg) + ' -> ' + debug_op2str(taicpu(hp1).opcode);
|
|
|
|
+
|
|
|
|
+ asml.Remove(hp1);
|
|
|
|
+ asml.InsertBefore(hp1, p);
|
|
|
|
+
|
|
|
|
+ { Swap instructions in the case of cmp 0,%reg or test %reg,%reg }
|
|
|
|
+ if (taicpu(hp1).opcode = A_TEST) or (taicpu(hp1).oper[0]^.val = 0) then
|
|
|
|
+ begin
|
|
|
|
+ taicpu(hp1).opcode := A_TEST;
|
|
|
|
+ taicpu(hp1).loadreg(0, taicpu(p).oper[0]^.reg);
|
|
|
|
+ end;
|
|
|
|
+
|
|
|
|
+ taicpu(hp1).oper[1]^.reg := taicpu(p).oper[0]^.reg;
|
|
|
|
+
|
|
|
|
+ case taicpu(p).opsize of
|
|
|
|
+ S_BW, S_BL:
|
|
|
|
+ begin
|
|
|
|
+ SizeChange := taicpu(hp1).opsize <> S_B;
|
|
|
|
+ taicpu(hp1).changeopsize(S_B);
|
|
|
|
+ end;
|
|
|
|
+ S_WL:
|
|
|
|
+ begin
|
|
|
|
+ SizeChange := taicpu(hp1).opsize <> S_W;
|
|
|
|
+ taicpu(hp1).changeopsize(S_W);
|
|
|
|
+ end
|
|
|
|
+ else
|
|
|
|
+ InternalError(2020112701);
|
|
|
|
+ end;
|
|
|
|
+
|
|
|
|
+ UpdateUsedRegs(tai(p.Next));
|
|
|
|
+
|
|
|
|
+ { Check if the register is used aferwards - if not, we can
|
|
|
|
+ remove the movzx instruction completely }
|
|
|
|
+ if not RegUsedAfterInstruction(taicpu(hp1).oper[1]^.reg, p, UsedRegs) then
|
|
|
|
+ begin
|
|
|
|
+ { Hp1 is a better position than p for debugging purposes }
|
|
|
|
+ DebugMsg(SPeepholeOptimization + 'Movzx2Nop 4a', hp1);
|
|
|
|
+ RemoveCurrentp(p, hp1);
|
|
|
|
+ Result := True;
|
|
|
|
+ end;
|
|
|
|
+
|
|
|
|
+ if SizeChange then
|
|
|
|
+ DebugMsg(SPeepholeOptimization + PreMessage +
|
|
|
|
+ debug_opsize2str(taicpu(hp1).opsize) + ' ' + debug_operstr(taicpu(hp1).oper[0]^) + ',' + debug_regname(taicpu(hp1).oper[1]^.reg) + ' (smaller and minimises pipeline stall - MovzxCmp2CmpMovzx)', hp1)
|
|
|
|
+ else
|
|
|
|
+ DebugMsg(SPeepholeOptimization + 'MovzxCmp2CmpMovzx', hp1);
|
|
|
|
+
|
|
|
|
+ Exit;
|
|
|
|
+ end;
|
|
|
|
+
|
|
|
|
+ { Change (using movzwl %ax,%eax as an example):
|
|
|
|
+
|
|
|
|
+ movzwl %ax, %eax
|
|
|
|
+ movb %al, (dest) (Register is smaller than read register in movz)
|
|
|
|
+
|
|
|
|
+ To:
|
|
|
|
+ movb %al, (dest) (Move one back to avoid a false dependency)
|
|
|
|
+ movzwl %ax, %eax
|
|
|
|
+ }
|
|
|
|
+ if (taicpu(hp1).opcode = A_MOV) and
|
|
|
|
+ (taicpu(hp1).oper[0]^.typ = top_reg) and
|
|
|
|
+ not RegInOp(taicpu(hp1).oper[0]^.reg, taicpu(hp1).oper[1]^) and
|
|
|
|
+ SuperRegistersEqual(taicpu(hp1).oper[0]^.reg, taicpu(p).oper[0]^.reg) and
|
|
|
|
+ (reg2opsize(taicpu(hp1).oper[0]^.reg) <= reg2opsize(taicpu(p).oper[0]^.reg)) then
|
|
|
|
+ begin
|
|
|
|
+ DebugMsg(SPeepholeOptimization + 'MovzxMov2MovMovzx', hp1);
|
|
|
|
+
|
|
|
|
+ hp2 := tai(hp1.Previous); { Effectively the old position of hp1 }
|
|
|
|
+ asml.Remove(hp1);
|
|
|
|
+ asml.InsertBefore(hp1, p);
|
|
|
|
+ if taicpu(hp1).oper[1]^.typ = top_reg then
|
|
|
|
+ AllocRegBetween(taicpu(hp1).oper[1]^.reg, hp1, hp2, UsedRegs);
|
|
|
|
+
|
|
|
|
+ { Check if the register is used aferwards - if not, we can
|
|
|
|
+ remove the movzx instruction completely }
|
|
|
|
+
|
|
|
|
+ if not RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg, p, UsedRegs) then
|
|
|
|
+ begin
|
|
|
|
+ { Hp1 is a better position than p for debugging purposes }
|
|
|
|
+ DebugMsg(SPeepholeOptimization + 'Movzx2Nop 4b', hp1);
|
|
|
|
+ RemoveCurrentp(p, hp1);
|
|
|
|
+ Result := True;
|
|
|
|
+ end;
|
|
|
|
+
|
|
|
|
+ Exit;
|
|
|
|
+ end;
|
|
|
|
+
|
|
|
|
+ end;
|
|
|
|
+
|
|
|
|
+{$ifdef x86_64}
|
|
{ Code size reduction by J. Gareth "Kit" Moreton }
|
|
{ Code size reduction by J. Gareth "Kit" Moreton }
|
|
{ Convert MOVZBQ and MOVZWQ to MOVZBL and MOVZWL respectively if it removes the REX prefix }
|
|
{ Convert MOVZBQ and MOVZWQ to MOVZBL and MOVZWL respectively if it removes the REX prefix }
|
|
if (taicpu(p).opsize in [S_BQ, S_WQ]) and
|
|
if (taicpu(p).opsize in [S_BQ, S_WQ]) and
|
|
@@ -6889,9 +7587,11 @@ unit aoptx86;
|
|
DebugMsg(SPeepholeOptimization + PreMessage +
|
|
DebugMsg(SPeepholeOptimization + PreMessage +
|
|
debug_opsize2str(taicpu(p).opsize) + ' ' + debug_operstr(taicpu(p).oper[0]^) + ',' + debug_regname(taicpu(p).oper[1]^.reg) + ' (removes REX prefix)', p);
|
|
debug_opsize2str(taicpu(p).opsize) + ' ' + debug_operstr(taicpu(p).oper[0]^) + ',' + debug_regname(taicpu(p).oper[1]^.reg) + ' (removes REX prefix)', p);
|
|
end;
|
|
end;
|
|
|
|
+{$endif}
|
|
end;
|
|
end;
|
|
|
|
|
|
|
|
|
|
|
|
+{$ifdef x86_64}
|
|
function TX86AsmOptimizer.PostPeepholeOptXor(var p : tai) : Boolean;
|
|
function TX86AsmOptimizer.PostPeepholeOptXor(var p : tai) : Boolean;
|
|
var
|
|
var
|
|
PreMessage, RegName: string;
|
|
PreMessage, RegName: string;
|