|
@@ -41,6 +41,7 @@ uses
|
|
|
cutils,
|
|
|
verbose,
|
|
|
cgbase, cgutils,
|
|
|
+ aoptobj,
|
|
|
aasmbase, aasmdata, aasmcpu;
|
|
|
|
|
|
function isFoldableArithOp(hp1: taicpu; reg: tregister): boolean;
|
|
@@ -49,11 +50,11 @@ begin
|
|
|
case hp1.opcode of
|
|
|
A_ADD, A_SUB, A_OR, A_XOR, A_AND, A_SHL, A_SHR, A_SAR:
|
|
|
isFoldableArithOp :=
|
|
|
+ (taicpu(hp1).oper[1]^.typ = top_reg) and
|
|
|
+ (taicpu(hp1).oper[1]^.reg = reg) and
|
|
|
((taicpu(hp1).oper[0]^.typ = top_const) or
|
|
|
((taicpu(hp1).oper[0]^.typ = top_reg) and
|
|
|
- (taicpu(hp1).oper[0]^.reg<>reg))) and
|
|
|
- (taicpu(hp1).oper[1]^.typ = top_reg) and
|
|
|
- (taicpu(hp1).oper[1]^.reg = reg);
|
|
|
+ (taicpu(hp1).oper[0]^.reg<>reg)));
|
|
|
A_INC, A_DEC:
|
|
|
isFoldableArithOp :=
|
|
|
(taicpu(hp1).oper[0]^.typ = top_reg) and
|
|
@@ -65,6 +66,8 @@ function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
|
|
|
var
|
|
|
next1: tai;
|
|
|
hp1, hp2: tai;
|
|
|
+ GetNextIntruction_p : boolean;
|
|
|
+ TmpUsedRegs : TAllUsedRegs;
|
|
|
begin
|
|
|
Result := False;
|
|
|
case p.typ of
|
|
@@ -92,7 +95,7 @@ begin
|
|
|
taicpu(hp1).oper[0]^.val);
|
|
|
asml.remove(p);
|
|
|
p.Free;
|
|
|
- p := hp1;
|
|
|
+ p:=hp1;
|
|
|
end;
|
|
|
(* else
|
|
|
{change "and x, reg; jxx" to "test x, reg", if reg is deallocated before the
|
|
@@ -108,8 +111,11 @@ begin
|
|
|
A_MOV:
|
|
|
{ removes superfluous And's after mov's }
|
|
|
begin
|
|
|
+ if not(cs_opt_level3 in current_settings.optimizerswitches) then
|
|
|
+ exit;
|
|
|
+ GetNextIntruction_p:=GetNextInstruction(p, hp1);
|
|
|
if (taicpu(p).oper[1]^.typ = top_reg) and
|
|
|
- GetNextInstruction(p, hp1) and
|
|
|
+ GetNextIntruction_p and
|
|
|
(tai(hp1).typ = ait_instruction) and
|
|
|
(taicpu(hp1).opcode = A_AND) and
|
|
|
(taicpu(hp1).oper[0]^.typ = top_const) and
|
|
@@ -122,7 +128,65 @@ begin
|
|
|
asml.remove(hp1);
|
|
|
hp1.free;
|
|
|
end;
|
|
|
- end;
|
|
|
+ end
|
|
|
+ else if (taicpu(p).oper[1]^.typ = top_reg) and
|
|
|
+ GetNextIntruction_p and
|
|
|
+ (hp1.typ = ait_instruction) and
|
|
|
+ GetNextInstruction(hp1, hp2) and
|
|
|
+ (hp2.typ = ait_instruction) and
|
|
|
+ (taicpu(hp2).opcode = A_MOV) and
|
|
|
+ (taicpu(hp2).oper[0]^.typ = top_reg) and
|
|
|
+ OpsEqual(taicpu(hp2).oper[1]^, taicpu(p).oper[0]^) and
|
|
|
+ (IsFoldableArithOp(taicpu(hp1), taicpu(p).oper[1]^.reg) or
|
|
|
+ ((taicpu(p).opsize=S_L) and (taicpu(hp1).opsize=S_Q) and
|
|
|
+ IsFoldableArithOp(taicpu(hp1), newreg(R_INTREGISTER,getsupreg(taicpu(p).oper[1]^.reg),R_SUBQ)))
|
|
|
+ ) then
|
|
|
+ { change movsX/movzX reg/ref, reg2 }
|
|
|
+ { add/sub/or/... reg3/$const, reg2 }
|
|
|
+ { mov reg2 reg/ref }
|
|
|
+ { to add/sub/or/... reg3/$const, reg/ref }
|
|
|
+ begin
|
|
|
+ CopyUsedRegs(TmpUsedRegs);
|
|
|
+ UpdateUsedRegs(TmpUsedRegs, tai(p.next));
|
|
|
+ UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
|
|
|
+ If not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs)) then
|
|
|
+ begin
|
|
|
+ { by example:
|
|
|
+ movswl %si,%eax movswl %si,%eax p
|
|
|
+ decl %eax addl %edx,%eax hp1
|
|
|
+ movw %ax,%si movw %ax,%si hp2
|
|
|
+ ->
|
|
|
+ movswl %si,%eax movswl %si,%eax p
|
|
|
+ decw %eax addw %edx,%eax hp1
|
|
|
+ movw %ax,%si movw %ax,%si hp2
|
|
|
+ }
|
|
|
+ taicpu(hp1).changeopsize(taicpu(hp2).opsize);
|
|
|
+ {
|
|
|
+ ->
|
|
|
+ movswl %si,%eax movswl %si,%eax p
|
|
|
+ decw %si addw %dx,%si hp1
|
|
|
+ movw %ax,%si movw %ax,%si hp2
|
|
|
+ }
|
|
|
+ case taicpu(hp1).ops of
|
|
|
+ 1:
|
|
|
+ taicpu(hp1).loadoper(0, taicpu(hp2).oper[1]^);
|
|
|
+ 2:
|
|
|
+ taicpu(hp1).loadoper(1, taicpu(hp2).oper[1]^);
|
|
|
+ else
|
|
|
+ internalerror(2008042701);
|
|
|
+ end;
|
|
|
+ {
|
|
|
+ ->
|
|
|
+ decw %si addw %dx,%si p
|
|
|
+ }
|
|
|
+ asml.remove(p);
|
|
|
+ asml.remove(hp2);
|
|
|
+ p.Free;
|
|
|
+ hp2.Free;
|
|
|
+ p := hp1;
|
|
|
+ end;
|
|
|
+ ReleaseUsedRegs(TmpUsedRegs);
|
|
|
+ end
|
|
|
end;
|
|
|
A_MOVSX,
|
|
|
A_MOVZX:
|
|
@@ -190,26 +254,28 @@ begin
|
|
|
(taicpu(hp1).oper[0]^.typ = top_const) and
|
|
|
(taicpu(hp1).oper[1]^.typ = top_reg) and
|
|
|
(taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
|
|
|
- case taicpu(p).opsize of
|
|
|
- S_BL, S_BW, S_BQ:
|
|
|
- if (taicpu(hp1).oper[0]^.val = $ff) then
|
|
|
- begin
|
|
|
- asml.remove(hp1);
|
|
|
- hp1.Free;
|
|
|
- end;
|
|
|
- S_WL, S_WQ:
|
|
|
- if (taicpu(hp1).oper[0]^.val = $ffff) then
|
|
|
- begin
|
|
|
- asml.remove(hp1);
|
|
|
- hp1.Free;
|
|
|
- end;
|
|
|
- S_LQ:
|
|
|
- if (taicpu(hp1).oper[0]^.val = $ffffffff) then
|
|
|
- begin
|
|
|
- asml.remove(hp1);
|
|
|
- hp1.Free;
|
|
|
+ begin
|
|
|
+ case taicpu(p).opsize of
|
|
|
+ S_BL, S_BW, S_BQ:
|
|
|
+ if (taicpu(hp1).oper[0]^.val = $ff) then
|
|
|
+ begin
|
|
|
+ asml.remove(hp1);
|
|
|
+ hp1.Free;
|
|
|
+ end;
|
|
|
+ S_WL, S_WQ:
|
|
|
+ if (taicpu(hp1).oper[0]^.val = $ffff) then
|
|
|
+ begin
|
|
|
+ asml.remove(hp1);
|
|
|
+ hp1.Free;
|
|
|
+ end;
|
|
|
+ S_LQ:
|
|
|
+ if (taicpu(hp1).oper[0]^.val = $ffffffff) then
|
|
|
+ begin
|
|
|
+ asml.remove(hp1);
|
|
|
+ hp1.Free;
|
|
|
+ end;
|
|
|
end;
|
|
|
- end;
|
|
|
+ end;
|
|
|
{ changes some movzx constructs to faster synonims (all examples
|
|
|
are given with eax/ax, but are also valid for other registers)}
|
|
|
if (taicpu(p).oper[1]^.typ = top_reg) then
|