| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530 | {    Copyright (c) 1998-2002 by Jonas Maebe, member of the Free Pascal    Development Team    This unit implements the common RiscV optimizer object    This program is free software; you can redistribute it and/or modify    it under the terms of the GNU General Public License as published by    the Free Software Foundation; either version 2 of the License, or    (at your option) any later version.    This program is distributed in the hope that it will be useful,    but WITHOUT ANY WARRANTY; without even the implied warranty of    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the    GNU General Public License for more details.    You should have received a copy of the GNU General Public License    along with this program; if not, write to the Free Software    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ****************************************************************************}unit aoptcpurv;interface{$I fpcdefs.inc}{$define DEBUG_AOPTCPU}uses  cpubase,  globals, globtype,  cgbase,  aoptobj, aoptcpub, aopt,  aasmtai, aasmcpu;type  TRVCpuAsmOptimizer = class(TAsmOptimizer)    function InstructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean; override;    function RegLoadedWithNewValue(reg: tregister; hp: tai): boolean; override;    function RegModifiedByInstruction(Reg: TRegister; p1: tai): boolean; override;    Function GetNextInstructionUsingReg(Current: tai; Out Next: tai; reg: TRegister): Boolean;    { outputs a debug message into the assembler file }    procedure DebugMsg(const s: string; p: tai);    function PeepHoleOptPass1Cpu(var p: tai): boolean; override;    function OptPass1OP(var p: tai): boolean;  end;implementation  uses    cutils;  function MatchInstruction(const instr: tai; const op: TAsmOps; const AConditions: TAsmConds = []): boolean;    begin      result :=        (instr.typ = ait_instruction) and        (taicpu(instr).opcode in op) and        ((AConditions=[]) or (taicpu(instr).condition in AConditions));    end;  function MatchInstruction(const instr: tai; const op: TAsmOp; const AConditions: TAsmConds = []): boolean;    begin      result :=        (instr.typ = ait_instruction) and        (taicpu(instr).opcode = op) and        ((AConditions=[]) or (taicpu(instr).condition in AConditions));    end;  function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;    begin      result := oper1.typ = oper2.typ;      if result then        case oper1.typ of          top_const:            Result:=oper1.val = oper2.val;          top_reg:            Result:=oper1.reg = oper2.reg;          {top_ref:            Result:=RefsEqual(oper1.ref^, oper2.ref^);}          else Result:=false;        end    end;  function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;    begin      result := (oper.typ = top_reg) and (oper.reg = reg);    end;{$ifdef DEBUG_AOPTCPU}  procedure TRVCpuAsmOptimizer.DebugMsg(const s: string;p : tai);    begin      asml.insertbefore(tai_comment.Create(strpnew(s)), p);    end;{$else DEBUG_AOPTCPU}  procedure TRVCpuAsmOptimizer.DebugMsg(const s: string;p : tai);inline;    begin    end;{$endif DEBUG_AOPTCPU}  function TRVCpuAsmOptimizer.InstructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean;    var      p: taicpu;      i: longint;    begin      result:=false;      if not (assigned(hp) and (hp.typ=ait_instruction)) then        exit;      p:=taicpu(hp);      i:=0;      while(i<p.ops) do        begin          case p.oper[I]^.typ of            top_reg:              result:=(p.oper[I]^.reg=reg) and (p.spilling_get_operation_type(i)<>operand_write);            top_ref:              result:=                (p.oper[I]^.ref^.base=reg);            else              ;          end;          if result then exit; {Bailout if we found something}          Inc(I);        end;    end;  function TRVCpuAsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;    begin      result:=        (hp.typ=ait_instruction) and        (taicpu(hp).ops>1) and        (taicpu(hp).oper[0]^.typ=top_reg) and        (taicpu(hp).oper[0]^.reg=reg) and        (taicpu(hp).spilling_get_operation_type(0)<>operand_read);    end;  function TRVCpuAsmOptimizer.RegModifiedByInstruction(Reg: TRegister; p1: tai): boolean;    var      i : Longint;    begin      result:=false;      for i:=0 to taicpu(p1).ops-1 do        case taicpu(p1).oper[i]^.typ of          top_reg:            if (taicpu(p1).oper[i]^.reg=Reg) and (taicpu(p1).spilling_get_operation_type(i) in [operand_write,operand_readwrite]) then              exit(true);          else            ;        end;    end;  function TRVCpuAsmOptimizer.GetNextInstructionUsingReg(Current: tai; out Next: tai; reg: TRegister): Boolean;    begin      Next:=Current;      repeat        Result:=GetNextInstruction(Next,Next);      until not (Result) or            not(cs_opt_level3 in current_settings.optimizerswitches) or            (Next.typ<>ait_instruction) or            RegInInstruction(reg,Next) or            is_calljmp(taicpu(Next).opcode);    end;  function TRVCpuAsmOptimizer.OptPass1OP(var p : tai) : boolean;    var      hp1 : tai;    begin      result:=false;      { replace          <Op>   %reg3,%mreg2,%mreg1          addi   %reg4,%reg3,0          dealloc  %reg3          by          <Op>   %reg4,%reg2,%reg1        ?      }      if GetNextInstruction(p,hp1) and        { we mix single and double operations here because we assume that the compiler          generates vmovapd only after double operations and vmovaps only after single operations }        MatchInstruction(hp1,A_ADDI) and        (taicpu(hp1).oper[2]^.val=0) and        MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) then        begin          TransferUsedRegs(TmpUsedRegs);          UpdateUsedRegs(TmpUsedRegs, tai(p.next));          if not(RegUsedAfterInstruction(taicpu(hp1).oper[1]^.reg,hp1,TmpUsedRegs)) then            begin              taicpu(p).loadoper(0,taicpu(hp1).oper[0]^);              DebugMsg('Peephole OpAddi02Op done',p);              RemoveInstruction(hp1);              result:=true;            end;        end;    end;  function TRVCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;    procedure RemoveInstr(var orig: tai; moveback: boolean = true);      var        n: tai;      begin        if moveback and (not GetLastInstruction(orig,n)) then          GetNextInstruction(orig,n);        AsmL.Remove(orig);        orig.Free;        orig:=n;      end;    var      hp1: tai;    begin      result:=false;      case p.typ of        ait_instruction:          begin            case taicpu(p).opcode of              A_ADDI:                begin                  {                    Changes                      addi x, y, #                      addi/addiw z, x, #                      dealloc x                    To                      addi z, y, #+#                  }                  if (taicpu(p).ops=3) and                     (taicpu(p).oper[2]^.typ=top_const) and                     GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and                     MatchInstruction(hp1,[A_ADDI{$ifdef riscv64},A_ADDIW{$endif}]) and                     (taicpu(hp1).ops=3) and                     MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) and                     (taicpu(hp1).oper[2]^.typ=top_const) and                     is_imm12(taicpu(p).oper[2]^.val+taicpu(hp1).oper[2]^.val) and                     (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and                     RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then                    begin                      taicpu(hp1).loadreg(1,taicpu(p).oper[1]^.reg);                      taicpu(hp1).loadconst(2, taicpu(p).oper[2]^.val+taicpu(hp1).oper[2]^.val);                      DebugMsg('Peephole AddiAddi2Addi performed', hp1);                      RemoveInstr(p);                      result:=true;                    end                  {                    Changes                      addi x, z, (ref)                      ld/sd y, 0(x)                      dealloc x                    To                      ld/sd y, 0(ref)(x)                  }                  else if (taicpu(p).ops=3) and                     (taicpu(p).oper[2]^.typ=top_ref) and                     MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) and                     GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and                     MatchInstruction(hp1, [A_LB,A_LBU,A_LH,A_LHU,A_LW,                                             A_SB,A_SH,A_SW{$ifdef riscv64},A_LD,A_LWU,A_SD{$endif}]) and                     (taicpu(hp1).ops=2) and                     (taicpu(hp1).oper[1]^.typ=top_ref) and                     (taicpu(hp1).oper[1]^.ref^.base=taicpu(p).oper[0]^.reg) and                     (taicpu(hp1).oper[1]^.ref^.offset=0) and                     (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and                     RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then                    begin                      taicpu(hp1).loadref(1,taicpu(p).oper[2]^.ref^);                      taicpu(hp1).oper[1]^.ref^.base:=taicpu(p).oper[1]^.reg;                      DebugMsg('Peephole AddiMem2Mem performed', hp1);                      RemoveInstr(p);                      result:=true;                    end                  {                    Changes                      addi x, z, #w                      ld/sd y, 0(x)                      dealloc x                    To                      ld/sd y, #w(z)                  }                  else if (taicpu(p).ops=3) and                     (taicpu(p).oper[2]^.typ=top_const) and                     //MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) and                     GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and                     MatchInstruction(hp1, [A_LB,A_LBU,A_LH,A_LHU,A_LW,                                             A_SB,A_SH,A_SW{$ifdef riscv64},A_LWU,A_LD,A_SD{$endif}]) and                     (taicpu(hp1).ops=2) and                     (taicpu(hp1).oper[1]^.typ=top_ref) and                     (taicpu(hp1).oper[1]^.ref^.base=taicpu(p).oper[0]^.reg) and                     (taicpu(hp1).oper[1]^.ref^.offset=0) and                     (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and                     RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then                    begin                      //taicpu(hp1).loadconst(1,taicpu(p).oper[2]^.ref^);                      taicpu(hp1).oper[1]^.ref^.offset:=taicpu(p).oper[2]^.val;                      taicpu(hp1).oper[1]^.ref^.base:=taicpu(p).oper[1]^.reg;                      DebugMsg('Peephole AddiMem2Mem performed', hp1);                      RemoveInstr(p);                      result:=true;                    end;                end;              A_SUB:                begin                  {                    Turn                      sub x,y,z                      bgeu X0,x,...                      dealloc x                    Into                      bne y,x,...                  }                  if (taicpu(p).ops=3) and                     GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and                     MatchInstruction(hp1,A_Bxx,[C_GEU,C_EQ]) and                     (taicpu(hp1).ops=3) and                     MatchOperand(taicpu(hp1).oper[0]^,NR_X0) and                     MatchOperand(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) and                     (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and                     (not RegModifiedBetween(taicpu(p).oper[2]^.reg, p,hp1)) and                     RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then                    begin                      taicpu(hp1).loadreg(0,taicpu(p).oper[1]^.reg);                      taicpu(hp1).loadreg(1,taicpu(p).oper[2]^.reg);                      taicpu(hp1).condition:=C_EQ;                      DebugMsg('Peephole SubBxx2Beq performed', hp1);                      RemoveInstr(p);                      result:=true;                    end;                end;              A_SLT,              A_SLTU:                begin                  {                    Turn                      sltu x,X0,y                      beq/bne x, X0, ...                      dealloc x                    Into                      bltu/geu X0, y, ...                  }                  if (taicpu(p).ops=3) and                     MatchOperand(taicpu(p).oper[1]^,NR_X0) and                     GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and                     MatchInstruction(hp1,A_Bxx,[C_NE,C_EQ]) and                     (taicpu(hp1).ops=3) and                     MatchOperand(taicpu(hp1).oper[0]^,taicpu(p).oper[0]^) and                     MatchOperand(taicpu(hp1).oper[1]^,NR_X0) and                     (not RegModifiedBetween(taicpu(p).oper[2]^.reg, p,hp1)) and                     RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then                    begin                      taicpu(hp1).loadreg(0,NR_X0);                      taicpu(hp1).loadreg(1,taicpu(p).oper[2]^.reg);                      if taicpu(p).opcode=A_SLTU then                        begin                          if taicpu(hp1).condition=C_NE then                            taicpu(hp1).condition:=C_LTU                          else                            taicpu(hp1).condition:=C_GEU;                        end                      else                        begin                              if taicpu(hp1).condition=C_NE then                            taicpu(hp1).condition:=C_LT                          else                            taicpu(hp1).condition:=C_GE;                        end;                      DebugMsg('Peephole SltuB2B performed', hp1);                      RemoveInstr(p);                      result:=true;                    end                  {                    Turn                      sltu x,y,z                      beq/bne x, X0, ...                      dealloc x                    Into                      bltu/geu y, z, ...                  }                  else if (taicpu(p).ops=3) and                     GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and                     MatchInstruction(hp1,A_Bxx,[C_NE,C_EQ]) and                     (taicpu(hp1).ops=3) and                     MatchOperand(taicpu(hp1).oper[0]^,taicpu(p).oper[0]^) and                     MatchOperand(taicpu(hp1).oper[1]^,NR_X0) and                     (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and                     (not RegModifiedBetween(taicpu(p).oper[2]^.reg, p,hp1)) and                     RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then                    begin                      taicpu(hp1).loadreg(0,taicpu(p).oper[1]^.reg);                      taicpu(hp1).loadreg(1,taicpu(p).oper[2]^.reg);                      if taicpu(p).opcode=A_SLTU then                        begin                          if taicpu(hp1).condition=C_NE then                            taicpu(hp1).condition:=C_LTU                          else                            taicpu(hp1).condition:=C_GEU;                        end                      else                        begin                          if taicpu(hp1).condition=C_NE then                            taicpu(hp1).condition:=C_LT                          else                            taicpu(hp1).condition:=C_GE;                        end;                      DebugMsg('Peephole SltuB2B performed', hp1);                      RemoveInstr(p);                      result:=true;                    end;                end;              A_SLTIU:                begin                  {                    Turn                      sltiu x,y,1                      beq/ne x,x0,...                      dealloc x                    Into                      bne y,x0,...                  }                  if (taicpu(p).ops=3) and                     (taicpu(p).oper[2]^.typ=top_const) and                     (taicpu(p).oper[2]^.val=1) and                     GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and                     MatchInstruction(hp1,A_Bxx,[C_NE,C_EQ]) and                     (taicpu(hp1).ops=3) and                     MatchOperand(taicpu(hp1).oper[0]^,taicpu(p).oper[0]^) and                     MatchOperand(taicpu(hp1).oper[1]^,NR_X0) and                     (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and                     RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then                    begin                      taicpu(hp1).loadreg(0,taicpu(p).oper[1]^.reg);                      taicpu(hp1).condition:=inverse_cond(taicpu(hp1).condition);                      DebugMsg('Peephole Sltiu0B2B performed', hp1);                      RemoveInstr(p);                      result:=true;                    end;                end;              A_SRLI,              A_SLLI:                result:=OptPass1OP(p);              A_SLTI:                begin                  {                    Turn                      slti x,y,0                      beq/ne x,x0,...                      dealloc x                    Into                      bge/lt y,x0,...                  }                  if (taicpu(p).ops=3) and                     (taicpu(p).oper[2]^.typ=top_const) and                     (taicpu(p).oper[2]^.val=0) and                     GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and                     (hp1.typ=ait_instruction) and                     (taicpu(hp1).opcode=A_Bxx) and                     (taicpu(hp1).ops=3) and                     (taicpu(hp1).oper[0]^.typ=top_reg) and                     (taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg) and                     (taicpu(hp1).oper[1]^.typ=top_reg) and                     (taicpu(hp1).oper[1]^.reg=NR_X0) and                     (taicpu(hp1).condition in [C_NE,C_EQ]) and                     (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and                     RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then                    begin                      taicpu(hp1).loadreg(0,taicpu(p).oper[1]^.reg);                      taicpu(hp1).loadreg(1,NR_X0);                      if taicpu(hp1).condition=C_NE then                        taicpu(hp1).condition:=C_LT                      else                        taicpu(hp1).condition:=C_GE;                      DebugMsg('Peephole Slti0B2B performed', hp1);                      RemoveInstr(p);                      result:=true;                    end;                end;              else                ;            end;          end;        else          ;      end;    end;end.
 |