| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007 | {    Copyright (c) 1998-2002 by Jonas Maebe, member of the Free Pascal    Development Team    This unit implements the common RiscV optimizer object    This program is free software; you can redistribute it and/or modify    it under the terms of the GNU General Public License as published by    the Free Software Foundation; either version 2 of the License, or    (at your option) any later version.    This program is distributed in the hope that it will be useful,    but WITHOUT ANY WARRANTY; without even the implied warranty of    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the    GNU General Public License for more details.    You should have received a copy of the GNU General Public License    along with this program; if not, write to the Free Software    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ****************************************************************************}unit aoptcpurv;interface{$I fpcdefs.inc}{$ifdef EXTDEBUG}{$define DEBUG_AOPTCPU}{$endif EXTDEBUG}uses  cpubase,  globals, globtype,  cgbase,  aoptobj, aoptcpub, aopt,  aasmtai, aasmcpu;type  TRVCpuAsmOptimizer = class(TAsmOptimizer)    function InstructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean; override;    function RegLoadedWithNewValue(reg: tregister; hp: tai): boolean; override;    function RegModifiedByInstruction(Reg: TRegister; p1: tai): boolean; override;    Function GetNextInstructionUsingReg(Current: tai; Out Next: tai; reg: TRegister): Boolean;    { outputs a debug message into the assembler file }    procedure DebugMsg(const s: string; p: tai);    function PeepHoleOptPass1Cpu(var p: tai): boolean; override;    function OptPass1OP(var p: tai): boolean;    function OptPass1FOP(var p: tai;mvop: tasmop): boolean;    function OptPass1FSGNJ(var p: tai;mvop: tasmop): boolean;    function OptPass1SLTx(var p: tai): boolean;    function OptPass1SLTI(var p: tai): boolean;    function OptPass1Andi(var p: tai): boolean;    function OptPass1SLTIU(var p: tai): boolean;    function OptPass1SxxI(var p: tai): boolean;    function OptPass1Add(var p: tai): boolean;    function OptPass1Sub(var p: tai): boolean;    function OptPass1Fcmp(var p: tai): boolean;    procedure RemoveInstr(var orig: tai; moveback: boolean=true);  end;implementation  uses    cutils,    verbose;  function MatchInstruction(const instr: tai; const op: TCommonAsmOps; const AConditions: TAsmConds = []): boolean;    begin      result :=        (instr.typ = ait_instruction) and        (taicpu(instr).opcode in op) and        ((AConditions=[]) or (taicpu(instr).condition in AConditions));    end;  function MatchInstruction(const instr: tai; const op: TAsmOp; const AConditions: TAsmConds = []): boolean;    begin      result :=        (instr.typ = ait_instruction) and        (taicpu(instr).opcode = op) and        ((AConditions=[]) or (taicpu(instr).condition in AConditions));    end;  function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;    begin      result := oper1.typ = oper2.typ;      if result then        case oper1.typ of          top_const:            Result:=oper1.val = oper2.val;          top_reg:            Result:=oper1.reg = oper2.reg;          {top_ref:            Result:=RefsEqual(oper1.ref^, oper2.ref^);}          else Result:=false;        end    end;  function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;    begin      result := (oper.typ = top_reg) and (oper.reg = reg);    end;{$ifdef DEBUG_AOPTCPU}  procedure TRVCpuAsmOptimizer.DebugMsg(const s: string;p : tai);    begin      asml.insertbefore(tai_comment.Create(strpnew(s)), p);    end;{$else DEBUG_AOPTCPU}  procedure TRVCpuAsmOptimizer.DebugMsg(const s: string;p : tai);inline;    begin    end;{$endif DEBUG_AOPTCPU}  function TRVCpuAsmOptimizer.InstructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean;    var      p: taicpu;      i: longint;    begin      result:=false;      if not (assigned(hp) and (hp.typ=ait_instruction)) then        exit;      p:=taicpu(hp);      i:=0;      while(i<p.ops) do        begin          case p.oper[I]^.typ of            top_reg:              result:=(p.oper[I]^.reg=reg) and (p.spilling_get_operation_type(i)<>operand_write);            top_ref:              result:=                (p.oper[I]^.ref^.base=reg);            else              ;          end;          if result then exit; {Bailout if we found something}          Inc(I);        end;    end;  function TRVCpuAsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;    begin      result:=        (hp.typ=ait_instruction) and        (taicpu(hp).ops>1) and        (taicpu(hp).oper[0]^.typ=top_reg) and        (taicpu(hp).oper[0]^.reg=reg) and        (taicpu(hp).spilling_get_operation_type(0)<>operand_read);    end;  function TRVCpuAsmOptimizer.RegModifiedByInstruction(Reg: TRegister; p1: tai): boolean;    var      i : Longint;    begin      result:=false;      for i:=0 to taicpu(p1).ops-1 do        case taicpu(p1).oper[i]^.typ of          top_reg:            if (taicpu(p1).oper[i]^.reg=Reg) and (taicpu(p1).spilling_get_operation_type(i) in [operand_write,operand_readwrite]) then              exit(true);          else            ;        end;    end;  function TRVCpuAsmOptimizer.GetNextInstructionUsingReg(Current: tai; out Next: tai; reg: TRegister): Boolean;    begin      Next:=Current;      repeat        Result:=GetNextInstruction(Next,Next);      until not (Result) or            not(cs_opt_level3 in current_settings.optimizerswitches) or            (Next.typ<>ait_instruction) or            RegInInstruction(reg,Next) or            is_calljmp(taicpu(Next).opcode);    end;  function TRVCpuAsmOptimizer.OptPass1OP(var p : tai) : boolean;    var      hp1 : tai;    begin      result:=false;      { replace          <Op>   %reg3,%reg2,%reg1          addi   %reg4,%reg3,0          dealloc  %reg3          by          <Op>   %reg4,%reg2,%reg1        ?      }      if GetNextInstruction(p,hp1) and        MatchInstruction(hp1,A_ADDI) and        (taicpu(hp1).oper[2]^.val=0) and        MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) then        begin          TransferUsedRegs(TmpUsedRegs);          UpdateUsedRegs(TmpUsedRegs, tai(p.next));          if not(RegUsedAfterInstruction(taicpu(hp1).oper[1]^.reg,hp1,TmpUsedRegs)) then            begin              taicpu(p).loadoper(0,taicpu(hp1).oper[0]^);              DebugMsg('Peephole OpAddi02Op done',p);              RemoveInstruction(hp1);              result:=true;            end;        end;    end;  function TRVCpuAsmOptimizer.OptPass1FOP(var p: tai;mvop: tasmop) : boolean;    var      hp1 : tai;    begin      result:=false;      { replace          <FOp>   %reg3,%reg2,%reg1          <mvop>  %reg4,%reg3,%reg3          dealloc %reg3        by          <FOp>   %reg4,%reg2,%reg1        ?      }      if GetNextInstruction(p,hp1) and        MatchInstruction(hp1,mvop) and        MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) and        MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[2]^) then        begin          TransferUsedRegs(TmpUsedRegs);          UpdateUsedRegs(TmpUsedRegs, tai(p.next));          if not(RegUsedAfterInstruction(taicpu(hp1).oper[1]^.reg,hp1,TmpUsedRegs)) then            begin              taicpu(p).loadoper(0,taicpu(hp1).oper[0]^);              DebugMsg('Peephole FOpFsgnj02FOp done',p);              RemoveInstruction(hp1);              result:=true;            end;        end;    end;  function TRVCpuAsmOptimizer.OptPass1Fcmp(var p: tai) : boolean;    var      hp1 : tai;    begin      result:=false;      { replace          <Fcmp>  %ireg3,%freg2,%freg1          <andi>  %ireg4,%ireg3,const          dealloc %reg3        by          <Fcmp>   %ireg4,%freg2,%freg1        ?      }      if GetNextInstruction(p,hp1) and        MatchInstruction(hp1,A_ANDI) and        MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) and        ((taicpu(hp1).oper[2]^.val and 1)=1) then        begin          TransferUsedRegs(TmpUsedRegs);          UpdateUsedRegs(TmpUsedRegs, tai(p.next));          if not(RegUsedAfterInstruction(taicpu(hp1).oper[1]^.reg,hp1,TmpUsedRegs)) then            begin              taicpu(p).loadoper(0,taicpu(hp1).oper[0]^);              DebugMsg('Peephole FcmpAndi2Fcmp done',p);              RemoveInstruction(hp1);              result:=true;            end;        end;    end;  function TRVCpuAsmOptimizer.OptPass1FSGNJ(var p: tai; mvop: tasmop): boolean;    var      hp1 : tai;    begin      result:=false;      { replace          <mvop>  %reg1,%reg2,%reg2          <FOp>   %reg3,%reg1,%reg1          dealloc %reg2        by          <FOp>   %reg3,%reg2,%reg2        ?      }      if GetNextInstruction(p,hp1) and (hp1.typ=ait_instruction) and        (((mvop=A_FSGNJ_S) and (taicpu(hp1).opcode in [A_FADD_S,A_FSUB_S,A_FMUL_S,A_FDIV_S,A_FSQRT_S,              A_FNEG_S,A_FMADD_S,A_FMSUB_S,A_FNMSUB_S,A_FNMADD_S,A_FMIN_S,A_FMAX_S,A_FCVT_D_S,              A_FEQ_S])) or         ((mvop=A_FSGNJ_D) and (taicpu(hp1).opcode in [A_FADD_D,A_FSUB_D,A_FMUL_D,A_FDIV_D,A_FSQRT_D,              A_FNEG_D,A_FMADD_D,A_FMSUB_D,A_FNMSUB_D,A_FNMADD_D,A_FMIN_D,A_FMAX_D,A_FCVT_S_D,              A_FEQ_D]))) and        (MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) or        ((taicpu(hp1).ops>=3) and MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[2]^)) or        ((taicpu(hp1).ops>=4) and MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[3]^))) and        RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then        begin          if MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) then            taicpu(hp1).loadreg(1,taicpu(p).oper[1]^.reg);          if (taicpu(hp1).ops>=3) and MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[2]^) then            taicpu(hp1).loadreg(2,taicpu(p).oper[1]^.reg);          if (taicpu(hp1).ops>=4) and MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[3]^) then            taicpu(hp1).loadreg(3,taicpu(p).oper[1]^.reg);          AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);          DebugMsg('Peephole FMVFOp2FOp performed', hp1);          RemoveInstr(p);          result:=true;        end    end;  procedure TRVCpuAsmOptimizer.RemoveInstr(var orig: tai; moveback: boolean = true);    var      n: tai;    begin      if moveback and (not GetLastInstruction(orig,n)) then        GetNextInstruction(orig,n);      AsmL.Remove(orig);      orig.Free;      orig:=n;    end;  function TRVCpuAsmOptimizer.OptPass1Add(var p: tai): boolean;    var      hp1: tai;    begin      result:=false;      {        Get rid of          addi x, x, 0      }      if (taicpu(p).ops=3) and        (taicpu(p).oper[2]^.typ=top_const) and        (taicpu(p).oper[2]^.val=0) and        MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then        begin          DebugMsg('Peephole Addi2Nop performed', p);          RemoveInstr(p);          result:=true;        end      {        Changes          addi x, y, #          addi/addiw z, x, #          dealloc x        To          addi z, y, #+#          dealloc x      }      else if (taicpu(p).ops=3) and         (taicpu(p).oper[2]^.typ=top_const) and         GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and         MatchInstruction(hp1,[A_ADDI{$ifdef riscv64},A_ADDIW{$endif}]) and         (taicpu(hp1).ops=3) and         MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) and         (taicpu(hp1).oper[2]^.typ=top_const) and         is_imm12(taicpu(p).oper[2]^.val+taicpu(hp1).oper[2]^.val) and         (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and         RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then        begin          AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);          taicpu(hp1).loadreg(1,taicpu(p).oper[1]^.reg);          taicpu(hp1).loadconst(2, taicpu(p).oper[2]^.val+taicpu(hp1).oper[2]^.val);          DebugMsg('Peephole AddiAddi2Addi performed', hp1);          RemoveInstr(p);          result:=true;        end      {        Changes          addi x, z, (ref)          ld/sd y, 0(x)          dealloc x        To          ld/sd y, 0(ref)(x)      }      else if (taicpu(p).ops=3) and         (taicpu(p).oper[2]^.typ=top_ref) and         MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) and         GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and         MatchInstruction(hp1, [A_LB,A_LBU,A_LH,A_LHU,A_LW,                                 A_SB,A_SH,A_SW{$ifdef riscv64},A_LD,A_LWU,A_SD{$endif}]) and         (taicpu(hp1).ops=2) and         (taicpu(hp1).oper[1]^.typ=top_ref) and         (taicpu(hp1).oper[1]^.ref^.base=taicpu(p).oper[0]^.reg) and         (taicpu(hp1).oper[1]^.ref^.offset=0) and         (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and         RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then        begin          taicpu(hp1).loadref(1,taicpu(p).oper[2]^.ref^);          taicpu(hp1).oper[1]^.ref^.base:=taicpu(p).oper[1]^.reg;          DebugMsg('Peephole AddiMem2Mem performed', hp1);          RemoveInstr(p);          result:=true;        end      {        Changes          addi x, z, #w          ld/sd y, 0(x)          dealloc x        To          ld/sd y, #w(z)      }      else if (taicpu(p).ops=3) and         (taicpu(p).oper[2]^.typ=top_const) and         //MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) and         GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and         MatchInstruction(hp1, [A_LB,A_LBU,A_LH,A_LHU,A_LW,                                 A_SB,A_SH,A_SW{$ifdef riscv64},A_LWU,A_LD,A_SD{$endif}]) and         (taicpu(hp1).ops=2) and         (taicpu(hp1).oper[1]^.typ=top_ref) and         (taicpu(hp1).oper[1]^.ref^.base=taicpu(p).oper[0]^.reg) and         (taicpu(hp1).oper[1]^.ref^.offset=0) and         (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and         RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then        begin          //taicpu(hp1).loadconst(1,taicpu(p).oper[2]^.ref^);          taicpu(hp1).oper[1]^.ref^.offset:=taicpu(p).oper[2]^.val;          taicpu(hp1).oper[1]^.ref^.base:=taicpu(p).oper[1]^.reg;          DebugMsg('Peephole AddiMem2Mem performed', hp1);          RemoveInstr(p);          result:=true;        end      {        Changes          addi w, z, 0          op x, y, w          dealloc w        To          op x, y, z      }      else if (taicpu(p).ops=3) and         (taicpu(p).oper[2]^.typ=top_const) and         (taicpu(p).oper[2]^.val=0) and         GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and         ((MatchInstruction(hp1, [A_SUB,A_ADD,A_SLL,A_SRL,A_AND,A_OR,            A_ADDI,A_ANDI,A_ORI,A_SRAI,A_SRLI,A_SLLI,A_XORI,A_MUL,            A_DIV,A_DIVU,A_REM,A_REMU,A_SLT,A_SLTU,A_SLTI,A_SLTIU            {$ifdef riscv64},A_ADDIW,A_SLLIW,A_SRLIW,A_SRAIW,            A_ADDW,A_SLLW,A_SRLW,A_SUBW,A_SRAW,            A_DIVUW,A_DIVW,A_REMW,A_REMUW{$endif}]            ) and          (taicpu(hp1).ops=3) and          (MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[2]^) or MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^))) {or          This is not possible yet as the deallocation after the jump could also mean that the register is in use at the          jump target.          (MatchInstruction(hp1, [A_Bxx]) and          (taicpu(hp1).ops=3) and          (MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[0]^) or MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^))) }         ) and         (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and         RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then        begin          { if MatchInstruction(hp1, [A_Bxx]) and MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[0]^) then            taicpu(hp1).loadreg(0,taicpu(p).oper[1]^.reg); }          if MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) then            taicpu(hp1).loadreg(1,taicpu(p).oper[1]^.reg);          if MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[2]^) then            taicpu(hp1).loadreg(2,taicpu(p).oper[1]^.reg);          AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);          DebugMsg('Peephole Addi0Op2Op performed', hp1);          RemoveInstr(p);          result:=true;        end      else        result:=OptPass1OP(p);    end;  function TRVCpuAsmOptimizer.OptPass1Sub(var p: tai): boolean;    var      hp1: tai;    begin      result:=false;      {        Turn          sub x,y,z          bgeu X0,x,...          dealloc x        Into          bne y,x,...      }      if (taicpu(p).ops=3) and         GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and         MatchInstruction(hp1,A_Bxx,[C_GEU,C_EQ]) and         (taicpu(hp1).ops=3) and         MatchOperand(taicpu(hp1).oper[0]^,NR_X0) and         MatchOperand(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) and         (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and         (not RegModifiedBetween(taicpu(p).oper[2]^.reg, p,hp1)) and         RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then        begin          taicpu(hp1).loadreg(0,taicpu(p).oper[1]^.reg);          taicpu(hp1).loadreg(1,taicpu(p).oper[2]^.reg);          taicpu(hp1).condition:=C_EQ;          DebugMsg('Peephole SubBxx2Beq performed', hp1);          RemoveInstr(p);          result:=true;        end      else        result:=OptPass1OP(p);    end;  function TRVCpuAsmOptimizer.OptPass1SLTx(var p: tai): boolean;    var      hp1: tai;    begin      result:=false;      {        Turn          sltu x,X0,y          beq/bne x, X0, ...          dealloc x        Into          bltu/geu X0, y, ...      }      if (taicpu(p).ops=3) and         MatchOperand(taicpu(p).oper[1]^,NR_X0) and         GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and         MatchInstruction(hp1,A_Bxx,[C_NE,C_EQ]) and         (taicpu(hp1).ops=3) and         MatchOperand(taicpu(hp1).oper[0]^,taicpu(p).oper[0]^) and         MatchOperand(taicpu(hp1).oper[1]^,NR_X0) and         (not RegModifiedBetween(taicpu(p).oper[2]^.reg, p,hp1)) and         RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then        begin          taicpu(hp1).loadreg(0,NR_X0);          taicpu(hp1).loadreg(1,taicpu(p).oper[2]^.reg);          if taicpu(p).opcode=A_SLTU then            begin              if taicpu(hp1).condition=C_NE then                taicpu(hp1).condition:=C_LTU              else                taicpu(hp1).condition:=C_GEU;            end          else            begin              if taicpu(hp1).condition=C_NE then                taicpu(hp1).condition:=C_LT              else                taicpu(hp1).condition:=C_GE;            end;          DebugMsg('Peephole SltuB2B 1 performed', hp1);          RemoveInstr(p);          result:=true;        end      {        Turn          sltu x,y,z          beq/bne x, X0, ...          dealloc x        Into          bltu/geu y, z, ...      }      else if (taicpu(p).ops=3) and         GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and         MatchInstruction(hp1,A_Bxx,[C_NE,C_EQ]) and         (taicpu(hp1).ops=3) and         MatchOperand(taicpu(hp1).oper[0]^,taicpu(p).oper[0]^) and         MatchOperand(taicpu(hp1).oper[1]^,NR_X0) and         (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and         (not RegModifiedBetween(taicpu(p).oper[2]^.reg, p,hp1)) and         RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then        begin          taicpu(hp1).loadreg(0,taicpu(p).oper[1]^.reg);          taicpu(hp1).loadreg(1,taicpu(p).oper[2]^.reg);          if taicpu(p).opcode=A_SLTU then            begin              if taicpu(hp1).condition=C_NE then                taicpu(hp1).condition:=C_LTU              else                taicpu(hp1).condition:=C_GEU;            end          else            begin              if taicpu(hp1).condition=C_NE then                taicpu(hp1).condition:=C_LT              else                taicpu(hp1).condition:=C_GE;            end;          DebugMsg('Peephole SltuB2B 2 performed', hp1);          RemoveInstr(p);          result:=true;        end      else        result:=OptPass1OP(p);    end;  function TRVCpuAsmOptimizer.OptPass1SLTI(var p: tai): boolean;    var      hp1: tai;    begin      result:=false;      {        Turn          slti x,y,0          beq/ne x,x0,...          dealloc x        Into          bge/lt y,x0,...      }      if (taicpu(p).ops=3) and         (taicpu(p).oper[2]^.typ=top_const) and         (taicpu(p).oper[2]^.val=0) and         GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) then        begin{           we cannot do this optimization yet as we don't know if taicpu(p).oper[0]^.reg isn't used after taking the branch          if MatchInstruction(hp1,A_Bxx) and            (taicpu(hp1).ops=3) and            (taicpu(hp1).oper[0]^.typ=top_reg) and            (taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg) and            (taicpu(hp1).oper[1]^.typ=top_reg) and            (taicpu(hp1).oper[1]^.reg=NR_X0) and            (taicpu(hp1).condition in [C_NE,C_EQ]) and            (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and            RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then            begin              taicpu(hp1).loadreg(0,taicpu(p).oper[1]^.reg);              taicpu(hp1).loadreg(1,NR_X0);              if taicpu(hp1).condition=C_NE then                taicpu(hp1).condition:=C_LT              else                taicpu(hp1).condition:=C_GE;              DebugMsg('Peephole Slti0B2B performed', hp1);              RemoveInstr(p);              result:=true;              exit;            end          else } if MatchInstruction(hp1,A_ANDI) and            (taicpu(hp1).ops=3) and            (taicpu(hp1).oper[2]^.val>0) and            MatchOperand(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) and            (not RegModifiedBetween(taicpu(hp1).oper[0]^.reg, p,hp1)) then            begin              DebugMsg('Peephole SltiAndi2Slti performed', hp1);              AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);              taicpu(p).loadreg(0,taicpu(hp1).oper[0]^.reg);              RemoveInstr(hp1);              result:=true;              exit;            end;        end;      { in all other branches we exit before }      result:=OptPass1OP(p);    end;  function TRVCpuAsmOptimizer.OptPass1Andi(var p: tai): boolean;    var      hp1: tai;    begin      result:=false;      if (taicpu(p).ops=3) and         (taicpu(p).oper[2]^.typ=top_const) and         GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) then        begin          {            Changes              andi x, y, #              andi z, x, #              dealloc x            To              andi z, y, # and #          }          if MatchInstruction(hp1,A_ANDI) and            (taicpu(hp1).ops=3) and            MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) and            (taicpu(hp1).oper[2]^.typ=top_const) and            is_imm12(taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val) and            (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and            RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then            begin              taicpu(hp1).loadreg(1,taicpu(p).oper[1]^.reg);              taicpu(hp1).loadconst(2, taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val);              DebugMsg('Peephole AndiAndi2Andi performed', hp1);              RemoveInstr(p);              result:=true;            end          {            Changes              andi x, y, #ff or ...              sb x, ...              dealloc x            To              sb x, ...          }          else if MatchInstruction(hp1,A_SB) and            (taicpu(hp1).ops=2) and            MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[0]^) and            (taicpu(p).oper[2]^.val and $ff=$ff) and            (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and            RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then            begin              taicpu(hp1).loadreg(0,taicpu(p).oper[1]^.reg);              DebugMsg('Peephole AndiSb2Sb performed', hp1);              RemoveInstr(p);              result:=true;            end{$ifndef RISCV32}          else if MatchInstruction(hp1,A_ADDIW) and            (taicpu(hp1).ops=3) and            MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) and            (taicpu(hp1).oper[2]^.typ=top_const) and            (taicpu(hp1).oper[2]^.val=0) and             is_imm12(taicpu(p).oper[2]^.val) and            (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and            RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then            begin              taicpu(p).loadreg(0,taicpu(hp1).oper[0]^.reg);              DebugMsg('Peephole AndiAddwi02Andi performed', hp1);              RemoveInstr(hp1);              result:=true;             end{$endif RISCV32}          else            result:=OptPass1OP(p);        end      else        result:=OptPass1OP(p);    end;  function TRVCpuAsmOptimizer.OptPass1SLTIU(var p: tai): boolean;    var      hp1: tai;    begin      result:=false;      {        Turn          sltiu x,y,1          beq/ne x,x0,...          dealloc x        Into          bne y,x0,...      }      if (taicpu(p).ops=3) and         (taicpu(p).oper[2]^.typ=top_const) and         (taicpu(p).oper[2]^.val=1) and         GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) then         begin{           we cannot do this optimization yet as we don't know if taicpu(p).oper[0]^.reg isn't used after taking the branch           if MatchInstruction(hp1,A_Bxx,[C_NE,C_EQ]) and             (taicpu(hp1).ops=3) and             MatchOperand(taicpu(hp1).oper[0]^,taicpu(p).oper[0]^) and             MatchOperand(taicpu(hp1).oper[1]^,NR_X0) and             (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and             RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then             begin               taicpu(hp1).loadreg(0,taicpu(p).oper[1]^.reg);               taicpu(hp1).condition:=inverse_cond(taicpu(hp1).condition);               DebugMsg('Peephole Sltiu0B2B performed', hp1);               RemoveInstr(p);               result:=true;               exit;             end           else } if MatchInstruction(hp1,A_ANDI) and             (taicpu(hp1).ops=3) and             (taicpu(hp1).oper[2]^.val>0) and             MatchOperand(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) and             (not RegModifiedBetween(taicpu(hp1).oper[0]^.reg, p,hp1)) then             begin               DebugMsg('Peephole SltiuAndi2Sltiu performed', hp1);               AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);               taicpu(p).loadreg(0,taicpu(hp1).oper[0]^.reg);               RemoveInstr(hp1);               result:=true;               exit;             end;         end;      { in all other branches we exit before }      result:=OptPass1OP(p);    end;  function TRVCpuAsmOptimizer.OptPass1SxxI(var p: tai): boolean;    begin      result:=false;      if (taicpu(p).oper[2]^.val=0) and        MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then        begin          DebugMsg('Peephole S*LI x,x,0 to nop performed', p);          RemoveInstr(p);          result:=true;        end      else if (taicpu(p).oper[2]^.val=0) then        begin          { this enables further optimizations }          DebugMsg('Peephole S*LI x,y,0 to addi performed', p);          taicpu(p).opcode:=A_ADDI;          result:=true;        end      else        result:=OptPass1OP(p);    end;  function TRVCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;    var      hp1: tai;    begin      result:=false;      case p.typ of        ait_instruction:          begin            case taicpu(p).opcode of              A_ADDI:                result:=OptPass1Add(p);              A_SUB:                result:=OptPass1Sub(p);              A_ANDI:                result:=OptPass1Andi(p);              A_SLT,              A_SLTU:                result:=OptPass1SLTx(p);              A_SLTIU:                result:=OptPass1SLTIU(p);              A_LA,              A_LUI,              A_LB,              A_LBU,              A_LH,              A_LHU,              A_LW,{$ifdef riscv64}              A_LWU,              A_LD,{$endif riscv64}              A_ADD,{$ifdef riscv64}              A_ADDIW,              A_SUBW,{$endif riscv64}              A_DIV,              A_DIVU,{$ifdef riscv64}              A_DIVW,              A_DIVUW,{$endif riscv64}              A_REM,              A_REMU,{$ifdef riscv64}              A_REMW,              A_REMUW,              A_MULW,{$endif riscv64}              A_MUL,              A_MULH,              A_MULHSU,              A_MULHU,              A_ORI,              A_XORI,              A_AND,              A_OR,              A_XOR,{$ifdef riscv64}              A_SLLW,              A_SRLW,              A_SRAW,              A_ROLW,              A_RORW,              A_RORIW,{$endif riscv64}              A_SLL,              A_SRL,              A_SRA,              A_ROL,              A_ROR,              A_RORI,              A_NEG,              A_NOT:                result:=OptPass1OP(p);{$ifdef riscv64}              A_SRAIW,              A_SRLIW,              A_SLLIW,{$endif riscv64}              A_SRAI,              A_SRLI,              A_SLLI:                result:=OptPass1SxxI(p);              A_SLTI:                result:=OptPass1SLTI(p);              A_FADD_S,              A_FSUB_S,              A_FMUL_S,              A_FDIV_S,              A_FSQRT_S,              A_FNEG_S,              A_FLW,              A_FCVT_D_S,              A_FMADD_S,A_FMSUB_S,A_FNMSUB_S,A_FNMADD_S,              A_FMIN_S,A_FMAX_S:                result:=OptPass1FOP(p,A_FSGNJ_S);              A_FADD_D,              A_FSUB_D,              A_FMUL_D,              A_FDIV_D,              A_FSQRT_D,              A_FNEG_D,              A_FLD,              A_FCVT_S_D,              A_FMADD_D,A_FMSUB_D,A_FNMSUB_D,A_FNMADD_D,              A_FMIN_D,A_FMAX_D:                result:=OptPass1FOP(p,A_FSGNJ_D);              A_FEQ_S,              A_FLT_S,              A_FLE_S,              A_FEQ_D,              A_FLT_D,              A_FLE_D:                result:=OptPass1Fcmp(p);              A_FSGNJ_S,              A_FSGNJ_D:                result:=OptPass1FSGNJ(p,taicpu(p).opcode);              else                ;            end;          end;        else          ;      end;    end;end.
 |