123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984 |
- {
- Copyright (c) 1998-2002 by Jonas Maebe, member of the Free Pascal
- Development Team
- This unit implements the common RiscV optimizer object
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
- (at your option) any later version.
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- ****************************************************************************
- }
- unit aoptcpurv;
- interface
- {$I fpcdefs.inc}
- {$ifdef EXTDEBUG}
- {$define DEBUG_AOPTCPU}
- {$endif EXTDEBUG}
- uses
- cpubase,
- globals, globtype,
- cgbase,
- aoptobj, aoptcpub, aopt,
- aasmtai, aasmcpu;
- type
- TRVCpuAsmOptimizer = class(TAsmOptimizer)
- function InstructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean; override;
- function RegLoadedWithNewValue(reg: tregister; hp: tai): boolean; override;
- function RegModifiedByInstruction(Reg: TRegister; p1: tai): boolean; override;
- Function GetNextInstructionUsingReg(Current: tai; Out Next: tai; reg: TRegister): Boolean;
- { outputs a debug message into the assembler file }
- procedure DebugMsg(const s: string; p: tai);
- function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
- function OptPass1OP(var p: tai): boolean;
- function OptPass1FOP(var p: tai;mvop: tasmop): boolean;
- function OptPass1FSGNJ(var p: tai;mvop: tasmop): boolean;
- function OptPass1SLTx(var p: tai): boolean;
- function OptPass1SLTI(var p: tai): boolean;
- function OptPass1Andi(var p: tai): boolean;
- function OptPass1SLTIU(var p: tai): boolean;
- function OptPass1SxxI(var p: tai): boolean;
- function OptPass1Add(var p: tai): boolean;
- function OptPass1Sub(var p: tai): boolean;
- function OptPass1Fcmp(var p: tai): boolean;
- procedure RemoveInstr(var orig: tai; moveback: boolean=true);
- end;
- implementation
- uses
- cutils,
- verbose;
- function MatchInstruction(const instr: tai; const op: TCommonAsmOps; const AConditions: TAsmConds = []): boolean;
- begin
- result :=
- (instr.typ = ait_instruction) and
- (taicpu(instr).opcode in op) and
- ((AConditions=[]) or (taicpu(instr).condition in AConditions));
- end;
- function MatchInstruction(const instr: tai; const op: TAsmOp; const AConditions: TAsmConds = []): boolean;
- begin
- result :=
- (instr.typ = ait_instruction) and
- (taicpu(instr).opcode = op) and
- ((AConditions=[]) or (taicpu(instr).condition in AConditions));
- end;
- function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
- begin
- result := oper1.typ = oper2.typ;
- if result then
- case oper1.typ of
- top_const:
- Result:=oper1.val = oper2.val;
- top_reg:
- Result:=oper1.reg = oper2.reg;
- {top_ref:
- Result:=RefsEqual(oper1.ref^, oper2.ref^);}
- else Result:=false;
- end
- end;
- function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
- begin
- result := (oper.typ = top_reg) and (oper.reg = reg);
- end;
- {$ifdef DEBUG_AOPTCPU}
- procedure TRVCpuAsmOptimizer.DebugMsg(const s: string;p : tai);
- begin
- asml.insertbefore(tai_comment.Create(strpnew(s)), p);
- end;
- {$else DEBUG_AOPTCPU}
- procedure TRVCpuAsmOptimizer.DebugMsg(const s: string;p : tai);inline;
- begin
- end;
- {$endif DEBUG_AOPTCPU}
- function TRVCpuAsmOptimizer.InstructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean;
- var
- p: taicpu;
- i: longint;
- begin
- result:=false;
- if not (assigned(hp) and (hp.typ=ait_instruction)) then
- exit;
- p:=taicpu(hp);
- i:=0;
- while(i<p.ops) do
- begin
- case p.oper[I]^.typ of
- top_reg:
- result:=(p.oper[I]^.reg=reg) and (p.spilling_get_operation_type(i)<>operand_write);
- top_ref:
- result:=
- (p.oper[I]^.ref^.base=reg);
- else
- ;
- end;
- if result then exit; {Bailout if we found something}
- Inc(I);
- end;
- end;
- function TRVCpuAsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
- begin
- result:=
- (hp.typ=ait_instruction) and
- (taicpu(hp).ops>1) and
- (taicpu(hp).oper[0]^.typ=top_reg) and
- (taicpu(hp).oper[0]^.reg=reg) and
- (taicpu(hp).spilling_get_operation_type(0)<>operand_read);
- end;
- function TRVCpuAsmOptimizer.RegModifiedByInstruction(Reg: TRegister; p1: tai): boolean;
- var
- i : Longint;
- begin
- result:=false;
- for i:=0 to taicpu(p1).ops-1 do
- case taicpu(p1).oper[i]^.typ of
- top_reg:
- if (taicpu(p1).oper[i]^.reg=Reg) and (taicpu(p1).spilling_get_operation_type(i) in [operand_write,operand_readwrite]) then
- exit(true);
- else
- ;
- end;
- end;
- function TRVCpuAsmOptimizer.GetNextInstructionUsingReg(Current: tai; out Next: tai; reg: TRegister): Boolean;
- begin
- Next:=Current;
- repeat
- Result:=GetNextInstruction(Next,Next);
- until not (Result) or
- not(cs_opt_level3 in current_settings.optimizerswitches) or
- (Next.typ<>ait_instruction) or
- RegInInstruction(reg,Next) or
- is_calljmp(taicpu(Next).opcode);
- end;
- function TRVCpuAsmOptimizer.OptPass1OP(var p : tai) : boolean;
- var
- hp1 : tai;
- begin
- result:=false;
- { replace
- <Op> %reg3,%reg2,%reg1
- addi %reg4,%reg3,0
- dealloc %reg3
- by
- <Op> %reg4,%reg2,%reg1
- ?
- }
- if GetNextInstruction(p,hp1) and
- MatchInstruction(hp1,A_ADDI) and
- (taicpu(hp1).oper[2]^.val=0) and
- MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) then
- begin
- TransferUsedRegs(TmpUsedRegs);
- UpdateUsedRegs(TmpUsedRegs, tai(p.next));
- if not(RegUsedAfterInstruction(taicpu(hp1).oper[1]^.reg,hp1,TmpUsedRegs)) then
- begin
- taicpu(p).loadoper(0,taicpu(hp1).oper[0]^);
- DebugMsg('Peephole OpAddi02Op done',p);
- RemoveInstruction(hp1);
- result:=true;
- end;
- end;
- end;
- function TRVCpuAsmOptimizer.OptPass1FOP(var p: tai;mvop: tasmop) : boolean;
- var
- hp1 : tai;
- begin
- result:=false;
- { replace
- <FOp> %reg3,%reg2,%reg1
- <mvop> %reg4,%reg3,%reg3
- dealloc %reg3
- by
- <FOp> %reg4,%reg2,%reg1
- ?
- }
- if GetNextInstruction(p,hp1) and
- MatchInstruction(hp1,mvop) and
- MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) and
- MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[2]^) then
- begin
- TransferUsedRegs(TmpUsedRegs);
- UpdateUsedRegs(TmpUsedRegs, tai(p.next));
- if not(RegUsedAfterInstruction(taicpu(hp1).oper[1]^.reg,hp1,TmpUsedRegs)) then
- begin
- taicpu(p).loadoper(0,taicpu(hp1).oper[0]^);
- DebugMsg('Peephole FOpFsgnj02FOp done',p);
- RemoveInstruction(hp1);
- result:=true;
- end;
- end;
- end;
- function TRVCpuAsmOptimizer.OptPass1Fcmp(var p: tai) : boolean;
- var
- hp1 : tai;
- begin
- result:=false;
- { replace
- <Fcmp> %ireg3,%freg2,%freg1
- <andi> %ireg4,%ireg3,const
- dealloc %reg3
- by
- <Fcmp> %ireg4,%freg2,%freg1
- ?
- }
- if GetNextInstruction(p,hp1) and
- MatchInstruction(hp1,A_ANDI) and
- MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) and
- ((taicpu(hp1).oper[2]^.val and 1)=1) then
- begin
- TransferUsedRegs(TmpUsedRegs);
- UpdateUsedRegs(TmpUsedRegs, tai(p.next));
- if not(RegUsedAfterInstruction(taicpu(hp1).oper[1]^.reg,hp1,TmpUsedRegs)) then
- begin
- taicpu(p).loadoper(0,taicpu(hp1).oper[0]^);
- DebugMsg('Peephole FcmpAndi2Fcmp done',p);
- RemoveInstruction(hp1);
- result:=true;
- end;
- end;
- end;
- function TRVCpuAsmOptimizer.OptPass1FSGNJ(var p: tai; mvop: tasmop): boolean;
- var
- hp1 : tai;
- begin
- result:=false;
- { replace
- <mvop> %reg1,%reg2,%reg2
- <FOp> %reg3,%reg1,%reg1
- dealloc %reg2
- by
- <FOp> %reg3,%reg2,%reg2
- ?
- }
- if GetNextInstruction(p,hp1) and (hp1.typ=ait_instruction) and
- (((mvop=A_FSGNJ_S) and (taicpu(hp1).opcode in [A_FADD_S,A_FSUB_S,A_FMUL_S,A_FDIV_S,A_FSQRT_S,
- A_FNEG_S,A_FMADD_S,A_FMSUB_S,A_FNMSUB_S,A_FNMADD_S,A_FMIN_S,A_FMAX_S,A_FCVT_D_S,
- A_FEQ_S])) or
- ((mvop=A_FSGNJ_D) and (taicpu(hp1).opcode in [A_FADD_D,A_FSUB_D,A_FMUL_D,A_FDIV_D,A_FSQRT_D,
- A_FNEG_D,A_FMADD_D,A_FMSUB_D,A_FNMSUB_D,A_FNMADD_D,A_FMIN_D,A_FMAX_D,A_FCVT_S_D,
- A_FEQ_D]))) and
- (MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) or
- ((taicpu(hp1).ops>=3) and MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[2]^)) or
- ((taicpu(hp1).ops>=4) and MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[3]^))) and
- RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
- begin
- if MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) then
- taicpu(hp1).loadreg(1,taicpu(p).oper[1]^.reg);
- if (taicpu(hp1).ops>=3) and MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[2]^) then
- taicpu(hp1).loadreg(2,taicpu(p).oper[1]^.reg);
- if (taicpu(hp1).ops>=4) and MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[3]^) then
- taicpu(hp1).loadreg(3,taicpu(p).oper[1]^.reg);
- AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
- DebugMsg('Peephole FMVFOp2FOp performed', hp1);
- RemoveInstr(p);
- result:=true;
- end
- end;
- procedure TRVCpuAsmOptimizer.RemoveInstr(var orig: tai; moveback: boolean = true);
- var
- n: tai;
- begin
- if moveback and (not GetLastInstruction(orig,n)) then
- GetNextInstruction(orig,n);
- AsmL.Remove(orig);
- orig.Free;
- orig:=n;
- end;
- function TRVCpuAsmOptimizer.OptPass1Add(var p: tai): boolean;
- var
- hp1: tai;
- begin
- result:=false;
- {
- Get rid of
- addi x, x, 0
- }
- if (taicpu(p).ops=3) and
- (taicpu(p).oper[2]^.typ=top_const) and
- (taicpu(p).oper[2]^.val=0) and
- MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then
- begin
- DebugMsg('Peephole Addi2Nop performed', p);
- RemoveInstr(p);
- result:=true;
- end
- {
- Changes
- addi x, y, #
- addi/addiw z, x, #
- dealloc x
- To
- addi z, y, #+#
- dealloc x
- }
- else if (taicpu(p).ops=3) and
- (taicpu(p).oper[2]^.typ=top_const) and
- GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
- MatchInstruction(hp1,[A_ADDI{$ifdef riscv64},A_ADDIW{$endif}]) and
- (taicpu(hp1).ops=3) and
- MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) and
- (taicpu(hp1).oper[2]^.typ=top_const) and
- is_imm12(taicpu(p).oper[2]^.val+taicpu(hp1).oper[2]^.val) and
- (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and
- RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
- begin
- AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
- taicpu(hp1).loadreg(1,taicpu(p).oper[1]^.reg);
- taicpu(hp1).loadconst(2, taicpu(p).oper[2]^.val+taicpu(hp1).oper[2]^.val);
- DebugMsg('Peephole AddiAddi2Addi performed', hp1);
- RemoveInstr(p);
- result:=true;
- end
- {
- Changes
- addi x, z, (ref)
- ld/sd y, 0(x)
- dealloc x
- To
- ld/sd y, 0(ref)(x)
- }
- else if (taicpu(p).ops=3) and
- (taicpu(p).oper[2]^.typ=top_ref) and
- MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) and
- GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
- MatchInstruction(hp1, [A_LB,A_LBU,A_LH,A_LHU,A_LW,
- A_SB,A_SH,A_SW{$ifdef riscv64},A_LD,A_LWU,A_SD{$endif}]) and
- (taicpu(hp1).ops=2) and
- (taicpu(hp1).oper[1]^.typ=top_ref) and
- (taicpu(hp1).oper[1]^.ref^.base=taicpu(p).oper[0]^.reg) and
- (taicpu(hp1).oper[1]^.ref^.offset=0) and
- (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and
- RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
- begin
- taicpu(hp1).loadref(1,taicpu(p).oper[2]^.ref^);
- taicpu(hp1).oper[1]^.ref^.base:=taicpu(p).oper[1]^.reg;
- DebugMsg('Peephole AddiMem2Mem performed', hp1);
- RemoveInstr(p);
- result:=true;
- end
- {
- Changes
- addi x, z, #w
- ld/sd y, 0(x)
- dealloc x
- To
- ld/sd y, #w(z)
- }
- else if (taicpu(p).ops=3) and
- (taicpu(p).oper[2]^.typ=top_const) and
- //MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) and
- GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
- MatchInstruction(hp1, [A_LB,A_LBU,A_LH,A_LHU,A_LW,
- A_SB,A_SH,A_SW{$ifdef riscv64},A_LWU,A_LD,A_SD{$endif}]) and
- (taicpu(hp1).ops=2) and
- (taicpu(hp1).oper[1]^.typ=top_ref) and
- (taicpu(hp1).oper[1]^.ref^.base=taicpu(p).oper[0]^.reg) and
- (taicpu(hp1).oper[1]^.ref^.offset=0) and
- (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and
- RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
- begin
- //taicpu(hp1).loadconst(1,taicpu(p).oper[2]^.ref^);
- taicpu(hp1).oper[1]^.ref^.offset:=taicpu(p).oper[2]^.val;
- taicpu(hp1).oper[1]^.ref^.base:=taicpu(p).oper[1]^.reg;
- DebugMsg('Peephole AddiMem2Mem performed', hp1);
- RemoveInstr(p);
- result:=true;
- end
- {
- Changes
- addi w, z, 0
- op x, y, w
- dealloc w
- To
- op x, y, z
- }
- else if (taicpu(p).ops=3) and
- (taicpu(p).oper[2]^.typ=top_const) and
- (taicpu(p).oper[2]^.val=0) and
- GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
- ((MatchInstruction(hp1, [A_SUB,A_ADD,A_SLL,A_SRL,A_AND,A_OR,
- A_ADDI,A_ANDI,A_ORI,A_SRAI,A_SRLI,A_SLLI,A_XORI,A_MUL,
- A_DIV,A_DIVU,A_REM,A_REMU,A_SLT,A_SLTU,A_SLTI,A_SLTIU
- {$ifdef riscv64},A_ADDIW,A_SLLIW,A_SRLIW,A_SRAIW,
- A_ADDW,A_SLLW,A_SRLW,A_SUBW,A_SRAW,
- A_DIVUW,A_DIVW,A_REMW,A_REMUW{$endif}]
- ) and
- (taicpu(hp1).ops=3) and
- (MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[2]^) or MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^))) {or
- This is not possible yet as the deallocation after the jump could also mean that the register is in use at the
- jump target.
- (MatchInstruction(hp1, [A_Bxx]) and
- (taicpu(hp1).ops=3) and
- (MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[0]^) or MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^))) }
- ) and
- (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and
- RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
- begin
- { if MatchInstruction(hp1, [A_Bxx]) and MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[0]^) then
- taicpu(hp1).loadreg(0,taicpu(p).oper[1]^.reg); }
- if MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) then
- taicpu(hp1).loadreg(1,taicpu(p).oper[1]^.reg);
- if MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[2]^) then
- taicpu(hp1).loadreg(2,taicpu(p).oper[1]^.reg);
- AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
- DebugMsg('Peephole Addi0Op2Op performed', hp1);
- RemoveInstr(p);
- result:=true;
- end
- else
- result:=OptPass1OP(p);
- end;
- function TRVCpuAsmOptimizer.OptPass1Sub(var p: tai): boolean;
- var
- hp1: tai;
- begin
- result:=false;
- {
- Turn
- sub x,y,z
- bgeu X0,x,...
- dealloc x
- Into
- bne y,x,...
- }
- if (taicpu(p).ops=3) and
- GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
- MatchInstruction(hp1,A_Bxx,[C_GEU,C_EQ]) and
- (taicpu(hp1).ops=3) and
- MatchOperand(taicpu(hp1).oper[0]^,NR_X0) and
- MatchOperand(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) and
- (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and
- (not RegModifiedBetween(taicpu(p).oper[2]^.reg, p,hp1)) and
- RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
- begin
- taicpu(hp1).loadreg(0,taicpu(p).oper[1]^.reg);
- taicpu(hp1).loadreg(1,taicpu(p).oper[2]^.reg);
- taicpu(hp1).condition:=C_EQ;
- DebugMsg('Peephole SubBxx2Beq performed', hp1);
- RemoveInstr(p);
- result:=true;
- end
- else
- result:=OptPass1OP(p);
- end;
- function TRVCpuAsmOptimizer.OptPass1SLTx(var p: tai): boolean;
- var
- hp1: tai;
- begin
- result:=false;
- {
- Turn
- sltu x,X0,y
- beq/bne x, X0, ...
- dealloc x
- Into
- bltu/geu X0, y, ...
- }
- if (taicpu(p).ops=3) and
- MatchOperand(taicpu(p).oper[1]^,NR_X0) and
- GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
- MatchInstruction(hp1,A_Bxx,[C_NE,C_EQ]) and
- (taicpu(hp1).ops=3) and
- MatchOperand(taicpu(hp1).oper[0]^,taicpu(p).oper[0]^) and
- MatchOperand(taicpu(hp1).oper[1]^,NR_X0) and
- (not RegModifiedBetween(taicpu(p).oper[2]^.reg, p,hp1)) and
- RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
- begin
- taicpu(hp1).loadreg(0,NR_X0);
- taicpu(hp1).loadreg(1,taicpu(p).oper[2]^.reg);
- if taicpu(p).opcode=A_SLTU then
- begin
- if taicpu(hp1).condition=C_NE then
- taicpu(hp1).condition:=C_LTU
- else
- taicpu(hp1).condition:=C_GEU;
- end
- else
- begin
- if taicpu(hp1).condition=C_NE then
- taicpu(hp1).condition:=C_LT
- else
- taicpu(hp1).condition:=C_GE;
- end;
- DebugMsg('Peephole SltuB2B performed', hp1);
- RemoveInstr(p);
- result:=true;
- end
- {
- Turn
- sltu x,y,z
- beq/bne x, X0, ...
- dealloc x
- Into
- bltu/geu y, z, ...
- }
- else if (taicpu(p).ops=3) and
- GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
- MatchInstruction(hp1,A_Bxx,[C_NE,C_EQ]) and
- (taicpu(hp1).ops=3) and
- MatchOperand(taicpu(hp1).oper[0]^,taicpu(p).oper[0]^) and
- MatchOperand(taicpu(hp1).oper[1]^,NR_X0) and
- (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and
- (not RegModifiedBetween(taicpu(p).oper[2]^.reg, p,hp1)) and
- RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
- begin
- taicpu(hp1).loadreg(0,taicpu(p).oper[1]^.reg);
- taicpu(hp1).loadreg(1,taicpu(p).oper[2]^.reg);
- if taicpu(p).opcode=A_SLTU then
- begin
- if taicpu(hp1).condition=C_NE then
- taicpu(hp1).condition:=C_LTU
- else
- taicpu(hp1).condition:=C_GEU;
- end
- else
- begin
- if taicpu(hp1).condition=C_NE then
- taicpu(hp1).condition:=C_LT
- else
- taicpu(hp1).condition:=C_GE;
- end;
- DebugMsg('Peephole SltuB2B performed', hp1);
- RemoveInstr(p);
- result:=true;
- end
- else
- result:=OptPass1OP(p);
- end;
- function TRVCpuAsmOptimizer.OptPass1SLTI(var p: tai): boolean;
- var
- hp1: tai;
- begin
- result:=false;
- {
- Turn
- slti x,y,0
- beq/ne x,x0,...
- dealloc x
- Into
- bge/lt y,x0,...
- }
- if (taicpu(p).ops=3) and
- (taicpu(p).oper[2]^.typ=top_const) and
- (taicpu(p).oper[2]^.val=0) and
- GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) then
- begin
- {
- we cannot do this optimization yet as we don't know if taicpu(p).oper[0]^.reg isn't used after taking the branch
- if MatchInstruction(hp1,A_Bxx) and
- (taicpu(hp1).ops=3) and
- (taicpu(hp1).oper[0]^.typ=top_reg) and
- (taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg) and
- (taicpu(hp1).oper[1]^.typ=top_reg) and
- (taicpu(hp1).oper[1]^.reg=NR_X0) and
- (taicpu(hp1).condition in [C_NE,C_EQ]) and
- (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and
- RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
- begin
- taicpu(hp1).loadreg(0,taicpu(p).oper[1]^.reg);
- taicpu(hp1).loadreg(1,NR_X0);
- if taicpu(hp1).condition=C_NE then
- taicpu(hp1).condition:=C_LT
- else
- taicpu(hp1).condition:=C_GE;
- DebugMsg('Peephole Slti0B2B performed', hp1);
- RemoveInstr(p);
- result:=true;
- exit;
- end
- else } if MatchInstruction(hp1,A_ANDI) and
- (taicpu(hp1).ops=3) and
- (taicpu(hp1).oper[2]^.val>0) and
- MatchOperand(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) and
- (not RegModifiedBetween(taicpu(hp1).oper[0]^.reg, p,hp1)) then
- begin
- DebugMsg('Peephole SltiAndi2Slti performed', hp1);
- AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
- taicpu(p).loadreg(0,taicpu(hp1).oper[0]^.reg);
- RemoveInstr(hp1);
- result:=true;
- exit;
- end;
- end;
- { in all other branches we exit before }
- result:=OptPass1OP(p);
- end;
- function TRVCpuAsmOptimizer.OptPass1Andi(var p: tai): boolean;
- var
- hp1: tai;
- begin
- result:=false;
- {
- Changes
- andi x, y, #
- andi z, x, #
- dealloc x
- To
- andi z, y, # and #
- }
- if (taicpu(p).ops=3) and
- (taicpu(p).oper[2]^.typ=top_const) and
- GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) then
- begin
- if MatchInstruction(hp1,A_ANDI) and
- (taicpu(hp1).ops=3) and
- MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) and
- (taicpu(hp1).oper[2]^.typ=top_const) and
- is_imm12(taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val) and
- (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and
- RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
- begin
- taicpu(hp1).loadreg(1,taicpu(p).oper[1]^.reg);
- taicpu(hp1).loadconst(2, taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val);
- DebugMsg('Peephole AndiAndi2Andi performed', hp1);
- RemoveInstr(p);
- result:=true;
- end
- {$ifndef RISCV32}
- else if MatchInstruction(hp1,A_ADDIW) and
- (taicpu(hp1).ops=3) and
- MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) and
- (taicpu(hp1).oper[2]^.typ=top_const) and
- (taicpu(hp1).oper[2]^.val=0) and
- is_imm12(taicpu(p).oper[2]^.val) and
- (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and
- RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
- begin
- taicpu(p).loadreg(0,taicpu(hp1).oper[0]^.reg);
- DebugMsg('Peephole AndiAddwi02Andi performed', hp1);
- RemoveInstr(hp1);
- result:=true;
- end
- {$endif RISCV32}
- else
- result:=OptPass1OP(p);
- end
- else
- result:=OptPass1OP(p);
- end;
- function TRVCpuAsmOptimizer.OptPass1SLTIU(var p: tai): boolean;
- var
- hp1: tai;
- begin
- result:=false;
- {
- Turn
- sltiu x,y,1
- beq/ne x,x0,...
- dealloc x
- Into
- bne y,x0,...
- }
- if (taicpu(p).ops=3) and
- (taicpu(p).oper[2]^.typ=top_const) and
- (taicpu(p).oper[2]^.val=1) and
- GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) then
- begin
- {
- we cannot do this optimization yet as we don't know if taicpu(p).oper[0]^.reg isn't used after taking the branch
- if MatchInstruction(hp1,A_Bxx,[C_NE,C_EQ]) and
- (taicpu(hp1).ops=3) and
- MatchOperand(taicpu(hp1).oper[0]^,taicpu(p).oper[0]^) and
- MatchOperand(taicpu(hp1).oper[1]^,NR_X0) and
- (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and
- RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
- begin
- taicpu(hp1).loadreg(0,taicpu(p).oper[1]^.reg);
- taicpu(hp1).condition:=inverse_cond(taicpu(hp1).condition);
- DebugMsg('Peephole Sltiu0B2B performed', hp1);
- RemoveInstr(p);
- result:=true;
- exit;
- end
- else } if MatchInstruction(hp1,A_ANDI) and
- (taicpu(hp1).ops=3) and
- (taicpu(hp1).oper[2]^.val>0) and
- MatchOperand(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) and
- (not RegModifiedBetween(taicpu(hp1).oper[0]^.reg, p,hp1)) then
- begin
- DebugMsg('Peephole SltiuAndi2Sltiu performed', hp1);
- AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
- taicpu(p).loadreg(0,taicpu(hp1).oper[0]^.reg);
- RemoveInstr(hp1);
- result:=true;
- exit;
- end;
- end;
- { in all other branches we exit before }
- result:=OptPass1OP(p);
- end;
- function TRVCpuAsmOptimizer.OptPass1SxxI(var p: tai): boolean;
- begin
- result:=false;
- if (taicpu(p).oper[2]^.val=0) and
- MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then
- begin
- DebugMsg('Peephole S*LI x,x,0 to nop performed', p);
- RemoveInstr(p);
- result:=true;
- end
- else if (taicpu(p).oper[2]^.val=0) then
- begin
- { this enables further optimizations }
- DebugMsg('Peephole S*LI x,y,0 to addi performed', p);
- taicpu(p).opcode:=A_ADDI;
- result:=true;
- end
- else
- result:=OptPass1OP(p);
- end;
- function TRVCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
- var
- hp1: tai;
- begin
- result:=false;
- case p.typ of
- ait_instruction:
- begin
- case taicpu(p).opcode of
- A_ADDI:
- result:=OptPass1Add(p);
- A_SUB:
- result:=OptPass1Sub(p);
- A_ANDI:
- result:=OptPass1Andi(p);
- A_SLT,
- A_SLTU:
- result:=OptPass1SLTx(p);
- A_SLTIU:
- result:=OptPass1SLTIU(p);
- A_LA,
- A_LUI,
- A_LB,
- A_LBU,
- A_LH,
- A_LHU,
- A_LW,
- {$ifdef riscv64}
- A_LWU,
- A_LD,
- {$endif riscv64}
- A_ADD,
- {$ifdef riscv64}
- A_ADDIW,
- A_SUBW,
- {$endif riscv64}
- A_DIV,
- A_DIVU,
- {$ifdef riscv64}
- A_DIVW,
- A_DIVUW,
- {$endif riscv64}
- A_REM,
- A_REMU,
- {$ifdef riscv64}
- A_REMW,
- A_REMUW,
- A_MULW,
- {$endif riscv64}
- A_MUL,
- A_MULH,
- A_MULHSU,
- A_MULHU,
- A_ORI,
- A_XORI,
- A_AND,
- A_OR,
- A_XOR,
- {$ifdef riscv64}
- A_SLLW,
- A_SRLW,
- A_SRAW,
- A_ROLW,
- A_RORW,
- A_RORIW,
- {$endif riscv64}
- A_SLL,
- A_SRL,
- A_SRA,
- A_ROL,
- A_ROR,
- A_RORI,
- A_NEG,
- A_NOT:
- result:=OptPass1OP(p);
- {$ifdef riscv64}
- A_SRAIW,
- A_SRLIW,
- A_SLLIW,
- {$endif riscv64}
- A_SRAI,
- A_SRLI,
- A_SLLI:
- result:=OptPass1SxxI(p);
- A_SLTI:
- result:=OptPass1SLTI(p);
- A_FADD_S,
- A_FSUB_S,
- A_FMUL_S,
- A_FDIV_S,
- A_FSQRT_S,
- A_FNEG_S,
- A_FLW,
- A_FCVT_D_S,
- A_FMADD_S,A_FMSUB_S,A_FNMSUB_S,A_FNMADD_S,
- A_FMIN_S,A_FMAX_S:
- result:=OptPass1FOP(p,A_FSGNJ_S);
- A_FADD_D,
- A_FSUB_D,
- A_FMUL_D,
- A_FDIV_D,
- A_FSQRT_D,
- A_FNEG_D,
- A_FLD,
- A_FCVT_S_D,
- A_FMADD_D,A_FMSUB_D,A_FNMSUB_D,A_FNMADD_D,
- A_FMIN_D,A_FMAX_D:
- result:=OptPass1FOP(p,A_FSGNJ_D);
- A_FEQ_S,
- A_FLT_S,
- A_FLE_S,
- A_FEQ_D,
- A_FLT_D,
- A_FLE_D:
- result:=OptPass1Fcmp(p);
- A_FSGNJ_S,
- A_FSGNJ_D:
- result:=OptPass1FSGNJ(p,taicpu(p).opcode);
- else
- ;
- end;
- end;
- else
- ;
- end;
- end;
- end.
|