|
@@ -39,15 +39,21 @@ Interface
|
|
|
{ uses the same constructor as TAopObj }
|
|
|
function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
|
|
|
procedure PeepHoleOptPass2;override;
|
|
|
+ function RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;override;
|
|
|
function GetNextInstructionUsingReg(Current : tai; out Next : tai; reg : TRegister) : Boolean;
|
|
|
function LookForPostindexedPattern(p : taicpu) : boolean;
|
|
|
procedure DebugMsg(const s : string; p : tai);
|
|
|
+ private
|
|
|
+ function OptPass1Shift(var p: tai): boolean;
|
|
|
End;
|
|
|
|
|
|
Implementation
|
|
|
|
|
|
uses
|
|
|
- aasmbase;
|
|
|
+ aasmbase,
|
|
|
+ aoptutils,
|
|
|
+ cgutils,
|
|
|
+ verbose;
|
|
|
|
|
|
{$ifdef DEBUG_AOPTCPU}
|
|
|
procedure TCpuAsmOptimizer.DebugMsg(const s: string;p : tai);
|
|
@@ -66,6 +72,22 @@ Implementation
|
|
|
end;
|
|
|
|
|
|
|
|
|
+ function RefsEqual(const r1, r2: treference): boolean;
|
|
|
+ begin
|
|
|
+ refsequal :=
|
|
|
+ (r1.offset = r2.offset) and
|
|
|
+ (r1.base = r2.base) and
|
|
|
+ (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
|
|
|
+ (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
|
|
|
+ (r1.relsymbol = r2.relsymbol) and
|
|
|
+ (r1.shiftimm = r2.shiftimm) and
|
|
|
+ (r1.addressmode = r2.addressmode) and
|
|
|
+ (r1.shiftmode = r2.shiftmode) and
|
|
|
+ (r1.volatility=[]) and
|
|
|
+ (r2.volatility=[]);
|
|
|
+ end;
|
|
|
+
|
|
|
+
|
|
|
function MatchInstruction(const instr: tai; const op: TAsmOps; const postfix: TOpPostfixes): boolean;
|
|
|
begin
|
|
|
result :=
|
|
@@ -84,6 +106,33 @@ Implementation
|
|
|
end;
|
|
|
|
|
|
|
|
|
+ function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
|
|
|
+ begin
|
|
|
+ result := (oper.typ = top_reg) and (oper.reg = reg);
|
|
|
+ end;
|
|
|
+
|
|
|
+
|
|
|
+ function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
|
|
|
+ begin
|
|
|
+ result := oper1.typ = oper2.typ;
|
|
|
+
|
|
|
+ if result then
|
|
|
+ case oper1.typ of
|
|
|
+ top_const:
|
|
|
+ Result:=oper1.val = oper2.val;
|
|
|
+ top_reg:
|
|
|
+ Result:=oper1.reg = oper2.reg;
|
|
|
+ top_conditioncode:
|
|
|
+ Result:=oper1.cc = oper2.cc;
|
|
|
+ top_realconst:
|
|
|
+ Result:=oper1.val_real = oper2.val_real;
|
|
|
+ top_ref:
|
|
|
+ Result:=RefsEqual(oper1.ref^, oper2.ref^);
|
|
|
+ else Result:=false;
|
|
|
+ end
|
|
|
+ end;
|
|
|
+
|
|
|
+
|
|
|
function TCpuAsmOptimizer.GetNextInstructionUsingReg(Current: tai;
|
|
|
Out Next: tai; reg: TRegister): Boolean;
|
|
|
begin
|
|
@@ -97,6 +146,54 @@ Implementation
|
|
|
is_calljmp(taicpu(Next).opcode);
|
|
|
end;
|
|
|
|
|
|
+
|
|
|
+ function TCpuAsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
|
|
|
+ var
|
|
|
+ p: taicpu;
|
|
|
+ begin
|
|
|
+ p := taicpu(hp);
|
|
|
+ Result := false;
|
|
|
+ if not ((assigned(hp)) and (hp.typ = ait_instruction)) then
|
|
|
+ exit;
|
|
|
+
|
|
|
+ case p.opcode of
|
|
|
+ { These operands do not write into a register at all }
|
|
|
+ A_CMP, A_CMN, A_TST, A_B, A_BL, A_MSR, A_FCMP:
|
|
|
+ exit;
|
|
|
+ {Take care of post/preincremented store and loads, they will change their base register}
|
|
|
+ A_STR, A_LDR:
|
|
|
+ begin
|
|
|
+ Result := false;
|
|
|
+ { actually, this does not apply here because post-/preindexed does not mean that a register
|
|
|
+ is loaded with a new value, it is only modified
|
|
|
+ (taicpu(p).oper[1]^.typ=top_ref) and
|
|
|
+ (taicpu(p).oper[1]^.ref^.addressmode in [AM_PREINDEXED,AM_POSTINDEXED]) and
|
|
|
+ (taicpu(p).oper[1]^.ref^.base = reg);
|
|
|
+ }
|
|
|
+ { STR does not load into it's first register }
|
|
|
+ if p.opcode = A_STR then
|
|
|
+ exit;
|
|
|
+ end;
|
|
|
+ else
|
|
|
+ ;
|
|
|
+ end;
|
|
|
+
|
|
|
+ if Result then
|
|
|
+ exit;
|
|
|
+
|
|
|
+ case p.oper[0]^.typ of
|
|
|
+ top_reg:
|
|
|
+ Result := (p.oper[0]^.reg = reg);
|
|
|
+ top_ref:
|
|
|
+ Result :=
|
|
|
+ (taicpu(p).oper[0]^.ref^.addressmode in [AM_PREINDEXED,AM_POSTINDEXED]) and
|
|
|
+ (taicpu(p).oper[0]^.ref^.base = reg);
|
|
|
+ else
|
|
|
+ ;
|
|
|
+ end;
|
|
|
+ end;
|
|
|
+
|
|
|
+
|
|
|
{
|
|
|
optimize
|
|
|
ldr/str regX,[reg1]
|
|
@@ -145,6 +242,111 @@ Implementation
|
|
|
end;
|
|
|
|
|
|
|
|
|
+ function TCpuAsmOptimizer.OptPass1Shift(var p : tai): boolean;
|
|
|
+ var
|
|
|
+ hp1,hp2: tai;
|
|
|
+ I2, I: Integer;
|
|
|
+ shifterop: tshifterop;
|
|
|
+ begin
|
|
|
+ Result:=false;
|
|
|
+ { This folds shifterops into following instructions
|
|
|
+ <shiftop> r0, r1, #imm
|
|
|
+ <op> r2, r3, r0
|
|
|
+
|
|
|
+ to
|
|
|
+
|
|
|
+ <op> r2, r3, r1, <shiftop> #imm
|
|
|
+ }
|
|
|
+ { do not handle ROR yet, only part of the instructions below support ROR as shifter operand }
|
|
|
+ if MatchInstruction(p,[A_LSL, A_LSR, A_ASR{, A_ROR}],[PF_None]) and
|
|
|
+ MatchOpType(taicpu(p),top_reg,top_reg,top_const) and
|
|
|
+ GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
|
|
|
+ MatchInstruction(hp1, [A_ADD, A_AND, A_BIC, A_CMP, A_CMN,
|
|
|
+ A_EON, A_EOR, A_MOV, A_NEG, A_ORN, A_ORR,
|
|
|
+ A_SUB, A_TST], [PF_None]) and
|
|
|
+ RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) and
|
|
|
+ (taicpu(hp1).ops >= 2) and
|
|
|
+ { Currently we can't fold into another shifterop }
|
|
|
+ (taicpu(hp1).oper[taicpu(hp1).ops-1]^.typ = top_reg) and
|
|
|
+ { SP does not work completely with shifted registers, as I didn't find the exact rules,
|
|
|
+ we do not operate on SP }
|
|
|
+ (taicpu(hp1).oper[0]^.reg<>NR_SP) and
|
|
|
+ (taicpu(hp1).oper[1]^.reg<>NR_SP) and
|
|
|
+ (taicpu(hp1).oper[taicpu(hp1).ops-1]^.reg<>NR_SP) and
|
|
|
+ { reg1 might not be modified inbetween }
|
|
|
+ not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) and
|
|
|
+ (
|
|
|
+ { Only ONE of the two src operands is allowed to match }
|
|
|
+ MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[taicpu(hp1).ops-2]^) xor
|
|
|
+ MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[taicpu(hp1).ops-1]^)
|
|
|
+ ) and
|
|
|
+ { for SUB, the last operand must match, there is no RSB on AArch64 }
|
|
|
+ ((taicpu(hp1).opcode<>A_SUB) or
|
|
|
+ MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[taicpu(hp1).ops-1]^)) then
|
|
|
+ begin
|
|
|
+ if taicpu(hp1).opcode in [A_TST, A_CMP, A_CMN, A_MOV] then
|
|
|
+ I2:=0
|
|
|
+ else
|
|
|
+ I2:=1;
|
|
|
+ for I:=I2 to taicpu(hp1).ops-1 do
|
|
|
+ if MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[I]^.reg) then
|
|
|
+ begin
|
|
|
+ { If the parameter matched on the second op from the RIGHT
|
|
|
+ we have to switch the parameters, this will not happen for CMP
|
|
|
+ were we're only evaluating the most right parameter
|
|
|
+ }
|
|
|
+ shifterop_reset(shifterop);
|
|
|
+ case taicpu(p).opcode of
|
|
|
+ A_LSL:
|
|
|
+ shifterop.shiftmode:=SM_LSL;
|
|
|
+ A_ROR:
|
|
|
+ shifterop.shiftmode:=SM_ROR;
|
|
|
+ A_LSR:
|
|
|
+ shifterop.shiftmode:=SM_LSR;
|
|
|
+ A_ASR:
|
|
|
+ shifterop.shiftmode:=SM_ASR;
|
|
|
+ else
|
|
|
+ InternalError(2019090401);
|
|
|
+ end;
|
|
|
+ shifterop.shiftimm:=taicpu(p).oper[2]^.val;
|
|
|
+
|
|
|
+ if I <> taicpu(hp1).ops-1 then
|
|
|
+ begin
|
|
|
+ if taicpu(hp1).ops = 3 then
|
|
|
+ hp2:=taicpu.op_reg_reg_reg_shifterop(taicpu(hp1).opcode,
|
|
|
+ taicpu(hp1).oper[0]^.reg, taicpu(hp1).oper[2]^.reg,
|
|
|
+ taicpu(p).oper[1]^.reg, shifterop)
|
|
|
+ else
|
|
|
+ hp2:=taicpu.op_reg_reg_shifterop(taicpu(hp1).opcode,
|
|
|
+ taicpu(hp1).oper[0]^.reg, taicpu(p).oper[1]^.reg,
|
|
|
+ shifterop);
|
|
|
+ end
|
|
|
+ else
|
|
|
+ if taicpu(hp1).ops = 3 then
|
|
|
+ hp2:=taicpu.op_reg_reg_reg_shifterop(taicpu(hp1).opcode,
|
|
|
+ taicpu(hp1).oper[0]^.reg, taicpu(hp1).oper[1]^.reg,
|
|
|
+ taicpu(p).oper[1]^.reg,shifterop)
|
|
|
+ else
|
|
|
+ hp2:=taicpu.op_reg_reg_shifterop(taicpu(hp1).opcode,
|
|
|
+ taicpu(hp1).oper[0]^.reg, taicpu(p).oper[1]^.reg,
|
|
|
+ shifterop);
|
|
|
+
|
|
|
+ taicpu(hp2).fileinfo:=taicpu(hp1).fileinfo;
|
|
|
+ asml.insertbefore(hp2, hp1);
|
|
|
+ GetNextInstruction(p, hp2);
|
|
|
+ asml.remove(p);
|
|
|
+ asml.remove(hp1);
|
|
|
+ p.free;
|
|
|
+ hp1.free;
|
|
|
+ p:=hp2;
|
|
|
+ DebugMsg('Peephole FoldShiftProcess done', p);
|
|
|
+ Result:=true;
|
|
|
+ break;
|
|
|
+ end;
|
|
|
+ end;
|
|
|
+ end;
|
|
|
+
|
|
|
+
|
|
|
function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
|
|
|
begin
|
|
|
result := false;
|
|
@@ -159,6 +361,11 @@ Implementation
|
|
|
begin
|
|
|
Result:=LookForPostindexedPattern(taicpu(p));
|
|
|
end;
|
|
|
+ A_LSR,
|
|
|
+ A_ROR,
|
|
|
+ A_ASR,
|
|
|
+ A_LSL:
|
|
|
+ Result:=OptPass1Shift(p);
|
|
|
else
|
|
|
;
|
|
|
end;
|