|
@@ -37,6 +37,9 @@ Type
|
|
|
procedure PeepHoleOptPass2;override;
|
|
|
End;
|
|
|
|
|
|
+ TCpuPreRegallocScheduler = class(TAsmOptimizer)
|
|
|
+ function PeepHoleOptPass1Cpu(var p: tai): boolean;override;
|
|
|
+ end;
|
|
|
|
|
|
TCpuThumb2AsmOptimizer = class(TCpuAsmOptimizer)
|
|
|
{ uses the same constructor as TAopObj }
|
|
@@ -46,8 +49,9 @@ Type
|
|
|
Implementation
|
|
|
|
|
|
uses
|
|
|
+ cutils,
|
|
|
verbose,
|
|
|
- cgutils,
|
|
|
+ cgbase,cgutils,
|
|
|
aasmbase,aasmcpu;
|
|
|
|
|
|
function CanBeCond(p : tai) : boolean;
|
|
@@ -450,6 +454,83 @@ Implementation
|
|
|
end;
|
|
|
end;
|
|
|
|
|
|
+ const
|
|
|
+ { set of opcode which might or do write to memory }
|
|
|
+ { TODO : extend armins.dat to contain r/w info }
|
|
|
+ opcode_could_mem_write = [A_B,A_BL,A_BLX,A_BKPT,A_BX,A_STR,A_STRB,A_STRBT,
|
|
|
+ A_STRH,A_STRT,A_STF,A_SFM,A_STM,A_FSTS,A_FSTD];
|
|
|
+
|
|
|
+ function TCpuPreRegallocScheduler.PeepHoleOptPass1Cpu(var p: tai): boolean;
|
|
|
+ {
|
|
|
+ TODO:
|
|
|
+ - move in right pass
|
|
|
+ - changes of R15
|
|
|
+ }
|
|
|
+ var
|
|
|
+ hp1,hp2 : tai;
|
|
|
+ begin
|
|
|
+ result:=true;
|
|
|
+ p := BlockStart;
|
|
|
+ { UsedRegs := []; }
|
|
|
+ while (p <> BlockEnd) Do
|
|
|
+ begin
|
|
|
+ if (p.typ=ait_instruction) and
|
|
|
+ GetNextInstruction(p,hp1) and
|
|
|
+ (hp1.typ=ait_instruction) and
|
|
|
+ { for now we don't reschedule if the previous instruction changes potentially a memory location }
|
|
|
+ ( (not(taicpu(p).opcode in opcode_could_mem_write) and
|
|
|
+ not(RegModifiedByInstruction(NR_PC,p)) and
|
|
|
+ (taicpu(hp1).opcode in [A_LDR,A_LDRB,A_LDRH,A_LDRSB,A_LDRSH])
|
|
|
+ ) or
|
|
|
+ ((taicpu(p).opcode in [A_STM,A_STRB,A_STRH,A_STR]) and
|
|
|
+ (taicpu(hp1).opcode in [A_LDR,A_LDRB,A_LDRH,A_LDRSB,A_LDRSH]) and
|
|
|
+ ((taicpu(hp1).oper[1]^.ref^.base=NR_PC) or
|
|
|
+ (assigned(taicpu(hp1).oper[1]^.ref^.symboldata) and
|
|
|
+ (taicpu(hp1).oper[1]^.ref^.offset=0)
|
|
|
+ )
|
|
|
+ ) or
|
|
|
+ { try to prove that the memory accesses don't overlapp }
|
|
|
+ ((taicpu(p).opcode in [A_STRB,A_STRH,A_STR]) and
|
|
|
+ (taicpu(hp1).opcode in [A_LDR,A_LDRB,A_LDRH,A_LDRSB,A_LDRSH]) and
|
|
|
+ (taicpu(p).oper[1]^.ref^.base=taicpu(hp1).oper[1]^.ref^.base) and
|
|
|
+ (taicpu(p).oppostfix=PF_None) and
|
|
|
+ (taicpu(hp1).oppostfix=PF_None) and
|
|
|
+ (taicpu(p).oper[1]^.ref^.index=NR_NO) and
|
|
|
+ (taicpu(hp1).oper[1]^.ref^.index=NR_NO) and
|
|
|
+ { get operand sizes and check if the offset distance is large enough to ensure no overlapp }
|
|
|
+ (abs(taicpu(p).oper[1]^.ref^.offset-taicpu(hp1).oper[1]^.ref^.offset)>=max(tcgsize2size[reg_cgsize(taicpu(p).oper[0]^.reg)],tcgsize2size[reg_cgsize(taicpu(hp1).oper[0]^.reg)]))
|
|
|
+ )
|
|
|
+ )
|
|
|
+ ) and
|
|
|
+ GetNextInstruction(hp1,hp2) and
|
|
|
+ (hp2.typ=ait_instruction) and
|
|
|
+ { loaded register used by next instruction? }
|
|
|
+ (RegInInstruction(taicpu(hp1).oper[0]^.reg,hp2)) and
|
|
|
+ { loaded register not used by previous instruction? }
|
|
|
+ not(RegInInstruction(taicpu(hp1).oper[0]^.reg,p)) and
|
|
|
+ { same condition? }
|
|
|
+ (taicpu(p).condition=taicpu(hp1).condition) and
|
|
|
+ { first instruction might not change the register used as base }
|
|
|
+ ((taicpu(hp1).oper[1]^.ref^.base=NR_NO) or
|
|
|
+ not(RegModifiedByInstruction(taicpu(hp1).oper[1]^.ref^.base,p))
|
|
|
+ ) and
|
|
|
+ { first instruction might not change the register used as index }
|
|
|
+ ((taicpu(hp1).oper[1]^.ref^.index=NR_NO) or
|
|
|
+ not(RegModifiedByInstruction(taicpu(hp1).oper[1]^.ref^.index,p))
|
|
|
+ ) then
|
|
|
+ begin
|
|
|
+ asml.Remove(p);
|
|
|
+ asml.Remove(hp1);
|
|
|
+{$ifdef DEBUG_PREREGSCHEDULER}
|
|
|
+ asml.InsertBefore(tai_comment.Create(strpnew('Rescheduled')),hp2);
|
|
|
+{$endif DEBUG_PREREGSCHEDULER}
|
|
|
+ asml.InsertBefore(hp1,hp2);
|
|
|
+ asml.InsertBefore(p,hp2);
|
|
|
+ end;
|
|
|
+ p := tai(p.next)
|
|
|
+ end;
|
|
|
+ end;
|
|
|
+
|
|
|
|
|
|
procedure TCpuThumb2AsmOptimizer.PeepHoleOptPass2;
|
|
|
begin
|
|
@@ -458,4 +539,5 @@ Implementation
|
|
|
|
|
|
begin
|
|
|
casmoptimizer:=TCpuAsmOptimizer;
|
|
|
+ cpreregallocscheduler:=TCpuPreRegallocScheduler;
|
|
|
End.
|