aoptcpurv.pas 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937
  1. {
  2. Copyright (c) 1998-2002 by Jonas Maebe, member of the Free Pascal
  3. Development Team
  4. This unit implements the common RiscV optimizer object
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. unit aoptcpurv;
  19. interface
  20. {$I fpcdefs.inc}
  21. {$ifdef EXTDEBUG}
  22. {$define DEBUG_AOPTCPU}
  23. {$endif EXTDEBUG}
  24. uses
  25. cpubase,
  26. globals, globtype,
  27. cgbase,
  28. aoptobj, aoptcpub, aopt,
  29. aasmtai, aasmcpu;
  30. type
  31. TRVCpuAsmOptimizer = class(TAsmOptimizer)
  32. function InstructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean; override;
  33. function RegLoadedWithNewValue(reg: tregister; hp: tai): boolean; override;
  34. function RegModifiedByInstruction(Reg: TRegister; p1: tai): boolean; override;
  35. Function GetNextInstructionUsingReg(Current: tai; Out Next: tai; reg: TRegister): Boolean;
  36. { outputs a debug message into the assembler file }
  37. procedure DebugMsg(const s: string; p: tai);
  38. function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
  39. function OptPass1OP(var p: tai): boolean;
  40. function OptPass1FOP(var p: tai;mvop: tasmop): boolean;
  41. function OptPass1FSGNJ(var p: tai;mvop: tasmop): boolean;
  42. function OptPass1SLTx(var p: tai): boolean;
  43. function OptPass1SLTI(var p: tai): boolean;
  44. function OptPass1Andi(var p: tai): boolean;
  45. function OptPass1SLTIU(var p: tai): boolean;
  46. function OptPass1SxxI(var p: tai): boolean;
  47. function OptPass1Add(var p: tai): boolean;
  48. function OptPass1Sub(var p: tai): boolean;
  49. procedure RemoveInstr(var orig: tai; moveback: boolean=true);
  50. end;
  51. implementation
  52. uses
  53. cutils,
  54. verbose;
  55. function MatchInstruction(const instr: tai; const op: TCommonAsmOps; const AConditions: TAsmConds = []): boolean;
  56. begin
  57. result :=
  58. (instr.typ = ait_instruction) and
  59. (taicpu(instr).opcode in op) and
  60. ((AConditions=[]) or (taicpu(instr).condition in AConditions));
  61. end;
  62. function MatchInstruction(const instr: tai; const op: TAsmOp; const AConditions: TAsmConds = []): boolean;
  63. begin
  64. result :=
  65. (instr.typ = ait_instruction) and
  66. (taicpu(instr).opcode = op) and
  67. ((AConditions=[]) or (taicpu(instr).condition in AConditions));
  68. end;
  69. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
  70. begin
  71. result := oper1.typ = oper2.typ;
  72. if result then
  73. case oper1.typ of
  74. top_const:
  75. Result:=oper1.val = oper2.val;
  76. top_reg:
  77. Result:=oper1.reg = oper2.reg;
  78. {top_ref:
  79. Result:=RefsEqual(oper1.ref^, oper2.ref^);}
  80. else Result:=false;
  81. end
  82. end;
  83. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  84. begin
  85. result := (oper.typ = top_reg) and (oper.reg = reg);
  86. end;
  87. {$ifdef DEBUG_AOPTCPU}
  88. procedure TRVCpuAsmOptimizer.DebugMsg(const s: string;p : tai);
  89. begin
  90. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  91. end;
  92. {$else DEBUG_AOPTCPU}
  93. procedure TRVCpuAsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  94. begin
  95. end;
  96. {$endif DEBUG_AOPTCPU}
  97. function TRVCpuAsmOptimizer.InstructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean;
  98. var
  99. p: taicpu;
  100. i: longint;
  101. begin
  102. result:=false;
  103. if not (assigned(hp) and (hp.typ=ait_instruction)) then
  104. exit;
  105. p:=taicpu(hp);
  106. i:=0;
  107. while(i<p.ops) do
  108. begin
  109. case p.oper[I]^.typ of
  110. top_reg:
  111. result:=(p.oper[I]^.reg=reg) and (p.spilling_get_operation_type(i)<>operand_write);
  112. top_ref:
  113. result:=
  114. (p.oper[I]^.ref^.base=reg);
  115. else
  116. ;
  117. end;
  118. if result then exit; {Bailout if we found something}
  119. Inc(I);
  120. end;
  121. end;
  122. function TRVCpuAsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  123. begin
  124. result:=
  125. (hp.typ=ait_instruction) and
  126. (taicpu(hp).ops>1) and
  127. (taicpu(hp).oper[0]^.typ=top_reg) and
  128. (taicpu(hp).oper[0]^.reg=reg) and
  129. (taicpu(hp).spilling_get_operation_type(0)<>operand_read);
  130. end;
  131. function TRVCpuAsmOptimizer.RegModifiedByInstruction(Reg: TRegister; p1: tai): boolean;
  132. var
  133. i : Longint;
  134. begin
  135. result:=false;
  136. for i:=0 to taicpu(p1).ops-1 do
  137. case taicpu(p1).oper[i]^.typ of
  138. top_reg:
  139. if (taicpu(p1).oper[i]^.reg=Reg) and (taicpu(p1).spilling_get_operation_type(i) in [operand_write,operand_readwrite]) then
  140. exit(true);
  141. else
  142. ;
  143. end;
  144. end;
  145. function TRVCpuAsmOptimizer.GetNextInstructionUsingReg(Current: tai; out Next: tai; reg: TRegister): Boolean;
  146. begin
  147. Next:=Current;
  148. repeat
  149. Result:=GetNextInstruction(Next,Next);
  150. until not (Result) or
  151. not(cs_opt_level3 in current_settings.optimizerswitches) or
  152. (Next.typ<>ait_instruction) or
  153. RegInInstruction(reg,Next) or
  154. is_calljmp(taicpu(Next).opcode);
  155. end;
  156. function TRVCpuAsmOptimizer.OptPass1OP(var p : tai) : boolean;
  157. var
  158. hp1 : tai;
  159. begin
  160. result:=false;
  161. { replace
  162. <Op> %reg3,%reg2,%reg1
  163. addi %reg4,%reg3,0
  164. dealloc %reg3
  165. by
  166. <Op> %reg4,%reg2,%reg1
  167. ?
  168. }
  169. if GetNextInstruction(p,hp1) and
  170. MatchInstruction(hp1,A_ADDI) and
  171. (taicpu(hp1).oper[2]^.val=0) and
  172. MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) then
  173. begin
  174. TransferUsedRegs(TmpUsedRegs);
  175. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  176. if not(RegUsedAfterInstruction(taicpu(hp1).oper[1]^.reg,hp1,TmpUsedRegs)) then
  177. begin
  178. taicpu(p).loadoper(0,taicpu(hp1).oper[0]^);
  179. DebugMsg('Peephole OpAddi02Op done',p);
  180. RemoveInstruction(hp1);
  181. result:=true;
  182. end;
  183. end;
  184. end;
  185. function TRVCpuAsmOptimizer.OptPass1FOP(var p: tai;mvop: tasmop) : boolean;
  186. var
  187. hp1 : tai;
  188. begin
  189. result:=false;
  190. { replace
  191. <FOp> %reg3,%reg2,%reg1
  192. <mvop> %reg4,%reg3,%reg3
  193. dealloc %reg3
  194. by
  195. <FOp> %reg4,%reg2,%reg1
  196. ?
  197. }
  198. if GetNextInstruction(p,hp1) and
  199. MatchInstruction(hp1,mvop) and
  200. MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) and
  201. MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[2]^) then
  202. begin
  203. TransferUsedRegs(TmpUsedRegs);
  204. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  205. if not(RegUsedAfterInstruction(taicpu(hp1).oper[1]^.reg,hp1,TmpUsedRegs)) then
  206. begin
  207. taicpu(p).loadoper(0,taicpu(hp1).oper[0]^);
  208. DebugMsg('Peephole FOpFsgnj02FOp done',p);
  209. RemoveInstruction(hp1);
  210. result:=true;
  211. end;
  212. end;
  213. end;
  214. function TRVCpuAsmOptimizer.OptPass1FSGNJ(var p: tai; mvop: tasmop): boolean;
  215. var
  216. hp1 : tai;
  217. begin
  218. result:=false;
  219. { replace
  220. <mvop> %reg1,%reg2,%reg2
  221. <FOp> %reg3,%reg1,%reg1
  222. dealloc %reg2
  223. by
  224. <FOp> %reg3,%reg2,%reg2
  225. ?
  226. }
  227. if GetNextInstruction(p,hp1) and (hp1.typ=ait_instruction) and
  228. (((mvop=A_FSGNJ_S) and (taicpu(hp1).opcode in [A_FADD_S,A_FSUB_S,A_FMUL_S,A_FDIV_S,A_FSQRT_S,
  229. A_FNEG_S,A_FMADD_S,A_FMSUB_S,A_FNMSUB_S,A_FNMADD_S,A_FMIN_S,A_FMAX_S,A_FCVT_D_S,
  230. A_FEQ_S])) or
  231. ((mvop=A_FSGNJ_D) and (taicpu(hp1).opcode in [A_FADD_D,A_FSUB_D,A_FMUL_D,A_FDIV_D,A_FSQRT_D,
  232. A_FNEG_D,A_FMADD_D,A_FMSUB_D,A_FNMSUB_D,A_FNMADD_D,A_FMIN_D,A_FMAX_D,A_FCVT_S_D,
  233. A_FEQ_D]))) and
  234. (MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) or
  235. ((taicpu(hp1).ops>=3) and MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[2]^)) or
  236. ((taicpu(hp1).ops>=4) and MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[3]^))) and
  237. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  238. begin
  239. if MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) then
  240. taicpu(hp1).loadreg(1,taicpu(p).oper[1]^.reg);
  241. if (taicpu(hp1).ops>=3) and MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[2]^) then
  242. taicpu(hp1).loadreg(2,taicpu(p).oper[1]^.reg);
  243. if (taicpu(hp1).ops>=4) and MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[3]^) then
  244. taicpu(hp1).loadreg(3,taicpu(p).oper[1]^.reg);
  245. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  246. DebugMsg('Peephole FMVFOp2FOp performed', hp1);
  247. RemoveInstr(p);
  248. result:=true;
  249. end
  250. end;
  251. procedure TRVCpuAsmOptimizer.RemoveInstr(var orig: tai; moveback: boolean = true);
  252. var
  253. n: tai;
  254. begin
  255. if moveback and (not GetLastInstruction(orig,n)) then
  256. GetNextInstruction(orig,n);
  257. AsmL.Remove(orig);
  258. orig.Free;
  259. orig:=n;
  260. end;
  261. function TRVCpuAsmOptimizer.OptPass1Add(var p: tai): boolean;
  262. var
  263. hp1: tai;
  264. begin
  265. result:=false;
  266. {
  267. Get rid of
  268. addi x, x, 0
  269. }
  270. if (taicpu(p).ops=3) and
  271. (taicpu(p).oper[2]^.typ=top_const) and
  272. (taicpu(p).oper[2]^.val=0) and
  273. MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then
  274. begin
  275. DebugMsg('Peephole Addi2Nop performed', p);
  276. RemoveInstr(p);
  277. result:=true;
  278. end
  279. {
  280. Changes
  281. addi x, y, #
  282. addi/addiw z, x, #
  283. dealloc x
  284. To
  285. addi z, y, #+#
  286. dealloc x
  287. }
  288. else if (taicpu(p).ops=3) and
  289. (taicpu(p).oper[2]^.typ=top_const) and
  290. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  291. MatchInstruction(hp1,[A_ADDI{$ifdef riscv64},A_ADDIW{$endif}]) and
  292. (taicpu(hp1).ops=3) and
  293. MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) and
  294. (taicpu(hp1).oper[2]^.typ=top_const) and
  295. is_imm12(taicpu(p).oper[2]^.val+taicpu(hp1).oper[2]^.val) and
  296. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and
  297. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  298. begin
  299. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  300. taicpu(hp1).loadreg(1,taicpu(p).oper[1]^.reg);
  301. taicpu(hp1).loadconst(2, taicpu(p).oper[2]^.val+taicpu(hp1).oper[2]^.val);
  302. DebugMsg('Peephole AddiAddi2Addi performed', hp1);
  303. RemoveInstr(p);
  304. result:=true;
  305. end
  306. {
  307. Changes
  308. addi x, z, (ref)
  309. ld/sd y, 0(x)
  310. dealloc x
  311. To
  312. ld/sd y, 0(ref)(x)
  313. }
  314. else if (taicpu(p).ops=3) and
  315. (taicpu(p).oper[2]^.typ=top_ref) and
  316. MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) and
  317. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  318. MatchInstruction(hp1, [A_LB,A_LBU,A_LH,A_LHU,A_LW,
  319. A_SB,A_SH,A_SW{$ifdef riscv64},A_LD,A_LWU,A_SD{$endif}]) and
  320. (taicpu(hp1).ops=2) and
  321. (taicpu(hp1).oper[1]^.typ=top_ref) and
  322. (taicpu(hp1).oper[1]^.ref^.base=taicpu(p).oper[0]^.reg) and
  323. (taicpu(hp1).oper[1]^.ref^.offset=0) and
  324. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and
  325. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  326. begin
  327. taicpu(hp1).loadref(1,taicpu(p).oper[2]^.ref^);
  328. taicpu(hp1).oper[1]^.ref^.base:=taicpu(p).oper[1]^.reg;
  329. DebugMsg('Peephole AddiMem2Mem performed', hp1);
  330. RemoveInstr(p);
  331. result:=true;
  332. end
  333. {
  334. Changes
  335. addi x, z, #w
  336. ld/sd y, 0(x)
  337. dealloc x
  338. To
  339. ld/sd y, #w(z)
  340. }
  341. else if (taicpu(p).ops=3) and
  342. (taicpu(p).oper[2]^.typ=top_const) and
  343. //MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) and
  344. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  345. MatchInstruction(hp1, [A_LB,A_LBU,A_LH,A_LHU,A_LW,
  346. A_SB,A_SH,A_SW{$ifdef riscv64},A_LWU,A_LD,A_SD{$endif}]) and
  347. (taicpu(hp1).ops=2) and
  348. (taicpu(hp1).oper[1]^.typ=top_ref) and
  349. (taicpu(hp1).oper[1]^.ref^.base=taicpu(p).oper[0]^.reg) and
  350. (taicpu(hp1).oper[1]^.ref^.offset=0) and
  351. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and
  352. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  353. begin
  354. //taicpu(hp1).loadconst(1,taicpu(p).oper[2]^.ref^);
  355. taicpu(hp1).oper[1]^.ref^.offset:=taicpu(p).oper[2]^.val;
  356. taicpu(hp1).oper[1]^.ref^.base:=taicpu(p).oper[1]^.reg;
  357. DebugMsg('Peephole AddiMem2Mem performed', hp1);
  358. RemoveInstr(p);
  359. result:=true;
  360. end
  361. {
  362. Changes
  363. addi w, z, 0
  364. op x, y, w
  365. dealloc w
  366. To
  367. op x, y, z
  368. }
  369. else if (taicpu(p).ops=3) and
  370. (taicpu(p).oper[2]^.typ=top_const) and
  371. (taicpu(p).oper[2]^.val=0) and
  372. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  373. ((MatchInstruction(hp1, [A_SUB,A_ADD,A_SLL,A_SRL,A_SLT,A_AND,A_OR,
  374. A_ADDI,A_ANDI,A_ORI,A_SRAI,A_SRLI,A_SLLI,A_XORI,A_MUL,
  375. A_DIV,A_DIVU,A_REM,A_REMU,A_SLTI,A_SLTIU
  376. {$ifdef riscv64},A_ADDIW,A_SLLIW,A_SRLIW,A_SRAIW,
  377. A_ADDW,A_SLLW,A_SRLW,A_SUBW,A_SRAW,
  378. A_DIVUW,A_DIVW,A_REMW,A_REMUW{$endif}]
  379. ) and
  380. (taicpu(hp1).ops=3) and
  381. (MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[2]^) or MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^))) {or
  382. This is not possible yet as the deallocation after the jump could also mean that the register is in use at the
  383. jump target.
  384. (MatchInstruction(hp1, [A_Bxx]) and
  385. (taicpu(hp1).ops=3) and
  386. (MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[0]^) or MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^))) }
  387. ) and
  388. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and
  389. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  390. begin
  391. { if MatchInstruction(hp1, [A_Bxx]) and MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[0]^) then
  392. taicpu(hp1).loadreg(0,taicpu(p).oper[1]^.reg); }
  393. if MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) then
  394. taicpu(hp1).loadreg(1,taicpu(p).oper[1]^.reg);
  395. if MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[2]^) then
  396. taicpu(hp1).loadreg(2,taicpu(p).oper[1]^.reg);
  397. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  398. DebugMsg('Peephole Addi0Op2Op performed', hp1);
  399. RemoveInstr(p);
  400. result:=true;
  401. end
  402. else
  403. result:=OptPass1OP(p);
  404. end;
  405. function TRVCpuAsmOptimizer.OptPass1Sub(var p: tai): boolean;
  406. var
  407. hp1: tai;
  408. begin
  409. result:=false;
  410. {
  411. Turn
  412. sub x,y,z
  413. bgeu X0,x,...
  414. dealloc x
  415. Into
  416. bne y,x,...
  417. }
  418. if (taicpu(p).ops=3) and
  419. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  420. MatchInstruction(hp1,A_Bxx,[C_GEU,C_EQ]) and
  421. (taicpu(hp1).ops=3) and
  422. MatchOperand(taicpu(hp1).oper[0]^,NR_X0) and
  423. MatchOperand(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) and
  424. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and
  425. (not RegModifiedBetween(taicpu(p).oper[2]^.reg, p,hp1)) and
  426. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  427. begin
  428. taicpu(hp1).loadreg(0,taicpu(p).oper[1]^.reg);
  429. taicpu(hp1).loadreg(1,taicpu(p).oper[2]^.reg);
  430. taicpu(hp1).condition:=C_EQ;
  431. DebugMsg('Peephole SubBxx2Beq performed', hp1);
  432. RemoveInstr(p);
  433. result:=true;
  434. end
  435. else
  436. result:=OptPass1OP(p);
  437. end;
  438. function TRVCpuAsmOptimizer.OptPass1SLTx(var p: tai): boolean;
  439. var
  440. hp1: tai;
  441. begin
  442. result:=false;
  443. {
  444. Turn
  445. sltu x,X0,y
  446. beq/bne x, X0, ...
  447. dealloc x
  448. Into
  449. bltu/geu X0, y, ...
  450. }
  451. if (taicpu(p).ops=3) and
  452. MatchOperand(taicpu(p).oper[1]^,NR_X0) and
  453. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  454. MatchInstruction(hp1,A_Bxx,[C_NE,C_EQ]) and
  455. (taicpu(hp1).ops=3) and
  456. MatchOperand(taicpu(hp1).oper[0]^,taicpu(p).oper[0]^) and
  457. MatchOperand(taicpu(hp1).oper[1]^,NR_X0) and
  458. (not RegModifiedBetween(taicpu(p).oper[2]^.reg, p,hp1)) and
  459. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  460. begin
  461. taicpu(hp1).loadreg(0,NR_X0);
  462. taicpu(hp1).loadreg(1,taicpu(p).oper[2]^.reg);
  463. if taicpu(p).opcode=A_SLTU then
  464. begin
  465. if taicpu(hp1).condition=C_NE then
  466. taicpu(hp1).condition:=C_LTU
  467. else
  468. taicpu(hp1).condition:=C_GEU;
  469. end
  470. else
  471. begin
  472. if taicpu(hp1).condition=C_NE then
  473. taicpu(hp1).condition:=C_LT
  474. else
  475. taicpu(hp1).condition:=C_GE;
  476. end;
  477. DebugMsg('Peephole SltuB2B performed', hp1);
  478. RemoveInstr(p);
  479. result:=true;
  480. end
  481. {
  482. Turn
  483. sltu x,y,z
  484. beq/bne x, X0, ...
  485. dealloc x
  486. Into
  487. bltu/geu y, z, ...
  488. }
  489. else if (taicpu(p).ops=3) and
  490. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  491. MatchInstruction(hp1,A_Bxx,[C_NE,C_EQ]) and
  492. (taicpu(hp1).ops=3) and
  493. MatchOperand(taicpu(hp1).oper[0]^,taicpu(p).oper[0]^) and
  494. MatchOperand(taicpu(hp1).oper[1]^,NR_X0) and
  495. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and
  496. (not RegModifiedBetween(taicpu(p).oper[2]^.reg, p,hp1)) and
  497. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  498. begin
  499. taicpu(hp1).loadreg(0,taicpu(p).oper[1]^.reg);
  500. taicpu(hp1).loadreg(1,taicpu(p).oper[2]^.reg);
  501. if taicpu(p).opcode=A_SLTU then
  502. begin
  503. if taicpu(hp1).condition=C_NE then
  504. taicpu(hp1).condition:=C_LTU
  505. else
  506. taicpu(hp1).condition:=C_GEU;
  507. end
  508. else
  509. begin
  510. if taicpu(hp1).condition=C_NE then
  511. taicpu(hp1).condition:=C_LT
  512. else
  513. taicpu(hp1).condition:=C_GE;
  514. end;
  515. DebugMsg('Peephole SltuB2B performed', hp1);
  516. RemoveInstr(p);
  517. result:=true;
  518. end
  519. else
  520. result:=OptPass1OP(p);
  521. end;
  522. function TRVCpuAsmOptimizer.OptPass1SLTI(var p: tai): boolean;
  523. var
  524. hp1: tai;
  525. begin
  526. result:=false;
  527. {
  528. Turn
  529. slti x,y,0
  530. beq/ne x,x0,...
  531. dealloc x
  532. Into
  533. bge/lt y,x0,...
  534. }
  535. if (taicpu(p).ops=3) and
  536. (taicpu(p).oper[2]^.typ=top_const) and
  537. (taicpu(p).oper[2]^.val=0) and
  538. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) then
  539. begin
  540. {
  541. we cannot do this optimization yet as we don't know if taicpu(p).oper[0]^.reg isn't used after taking the branch
  542. if MatchInstruction(hp1,A_Bxx) and
  543. (taicpu(hp1).ops=3) and
  544. (taicpu(hp1).oper[0]^.typ=top_reg) and
  545. (taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg) and
  546. (taicpu(hp1).oper[1]^.typ=top_reg) and
  547. (taicpu(hp1).oper[1]^.reg=NR_X0) and
  548. (taicpu(hp1).condition in [C_NE,C_EQ]) and
  549. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and
  550. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  551. begin
  552. taicpu(hp1).loadreg(0,taicpu(p).oper[1]^.reg);
  553. taicpu(hp1).loadreg(1,NR_X0);
  554. if taicpu(hp1).condition=C_NE then
  555. taicpu(hp1).condition:=C_LT
  556. else
  557. taicpu(hp1).condition:=C_GE;
  558. DebugMsg('Peephole Slti0B2B performed', hp1);
  559. RemoveInstr(p);
  560. result:=true;
  561. exit;
  562. end
  563. else } if MatchInstruction(hp1,A_ANDI) and
  564. (taicpu(hp1).ops=3) and
  565. (taicpu(hp1).oper[2]^.val>0) and
  566. MatchOperand(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) and
  567. (not RegModifiedBetween(taicpu(hp1).oper[0]^.reg, p,hp1)) then
  568. begin
  569. DebugMsg('Peephole SltiAndi2Slti performed', hp1);
  570. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  571. taicpu(p).loadreg(0,taicpu(hp1).oper[0]^.reg);
  572. RemoveInstr(hp1);
  573. result:=true;
  574. exit;
  575. end;
  576. end;
  577. { in all other branches we exit before }
  578. result:=OptPass1OP(p);
  579. end;
  580. function TRVCpuAsmOptimizer.OptPass1Andi(var p: tai): boolean;
  581. var
  582. hp1: tai;
  583. begin
  584. result:=false;
  585. {
  586. Changes
  587. andi x, y, #
  588. andi z, x, #
  589. dealloc x
  590. To
  591. andi z, y, # and #
  592. }
  593. if (taicpu(p).ops=3) and
  594. (taicpu(p).oper[2]^.typ=top_const) and
  595. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) then
  596. begin
  597. if MatchInstruction(hp1,A_ANDI) and
  598. (taicpu(hp1).ops=3) and
  599. MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) and
  600. (taicpu(hp1).oper[2]^.typ=top_const) and
  601. is_imm12(taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val) and
  602. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and
  603. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  604. begin
  605. taicpu(hp1).loadreg(1,taicpu(p).oper[1]^.reg);
  606. taicpu(hp1).loadconst(2, taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val);
  607. DebugMsg('Peephole AndiAndi2Andi performed', hp1);
  608. RemoveInstr(p);
  609. result:=true;
  610. end
  611. {$ifndef RISCV32}
  612. else if MatchInstruction(hp1,A_ADDIW) and
  613. (taicpu(hp1).ops=3) and
  614. MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) and
  615. (taicpu(hp1).oper[2]^.typ=top_const) and
  616. (taicpu(hp1).oper[2]^.val=0) and
  617. is_imm12(taicpu(p).oper[2]^.val) and
  618. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and
  619. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  620. begin
  621. taicpu(p).loadreg(0,taicpu(hp1).oper[0]^.reg);
  622. DebugMsg('Peephole AndiAddwi02Andi performed', hp1);
  623. RemoveInstr(hp1);
  624. result:=true;
  625. end
  626. {$endif RISCV32}
  627. else
  628. result:=OptPass1OP(p);
  629. end
  630. else
  631. result:=OptPass1OP(p);
  632. end;
  633. function TRVCpuAsmOptimizer.OptPass1SLTIU(var p: tai): boolean;
  634. var
  635. hp1: tai;
  636. begin
  637. result:=false;
  638. {
  639. Turn
  640. sltiu x,y,1
  641. beq/ne x,x0,...
  642. dealloc x
  643. Into
  644. bne y,x0,...
  645. }
  646. if (taicpu(p).ops=3) and
  647. (taicpu(p).oper[2]^.typ=top_const) and
  648. (taicpu(p).oper[2]^.val=1) and
  649. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) then
  650. begin
  651. {
  652. we cannot do this optimization yet as we don't know if taicpu(p).oper[0]^.reg isn't used after taking the branch
  653. if MatchInstruction(hp1,A_Bxx,[C_NE,C_EQ]) and
  654. (taicpu(hp1).ops=3) and
  655. MatchOperand(taicpu(hp1).oper[0]^,taicpu(p).oper[0]^) and
  656. MatchOperand(taicpu(hp1).oper[1]^,NR_X0) and
  657. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and
  658. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  659. begin
  660. taicpu(hp1).loadreg(0,taicpu(p).oper[1]^.reg);
  661. taicpu(hp1).condition:=inverse_cond(taicpu(hp1).condition);
  662. DebugMsg('Peephole Sltiu0B2B performed', hp1);
  663. RemoveInstr(p);
  664. result:=true;
  665. exit;
  666. end
  667. else } if MatchInstruction(hp1,A_ANDI) and
  668. (taicpu(hp1).ops=3) and
  669. (taicpu(hp1).oper[2]^.val>0) and
  670. MatchOperand(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) and
  671. (not RegModifiedBetween(taicpu(hp1).oper[0]^.reg, p,hp1)) then
  672. begin
  673. DebugMsg('Peephole SltiuAndi2Sltiu performed', hp1);
  674. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  675. taicpu(p).loadreg(0,taicpu(hp1).oper[0]^.reg);
  676. RemoveInstr(hp1);
  677. result:=true;
  678. exit;
  679. end;
  680. end;
  681. { in all other branches we exit before }
  682. result:=OptPass1OP(p);
  683. end;
  684. function TRVCpuAsmOptimizer.OptPass1SxxI(var p: tai): boolean;
  685. begin
  686. result:=false;
  687. if (taicpu(p).oper[2]^.val=0) and
  688. MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then
  689. begin
  690. DebugMsg('Peephole S*LI x,x,0 to nop performed', p);
  691. RemoveInstr(p);
  692. result:=true;
  693. end
  694. else if (taicpu(p).oper[2]^.val=0) then
  695. begin
  696. { this enables further optimizations }
  697. DebugMsg('Peephole S*LI x,y,0 to addi performed', p);
  698. taicpu(p).opcode:=A_ADDI;
  699. result:=true;
  700. end
  701. else
  702. result:=OptPass1OP(p);
  703. end;
  704. function TRVCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
  705. var
  706. hp1: tai;
  707. begin
  708. result:=false;
  709. case p.typ of
  710. ait_instruction:
  711. begin
  712. case taicpu(p).opcode of
  713. A_ADDI:
  714. result:=OptPass1Add(p);
  715. A_SUB:
  716. result:=OptPass1Sub(p);
  717. A_ANDI:
  718. result:=OptPass1Andi(p);
  719. A_SLT,
  720. A_SLTU:
  721. result:=OptPass1SLTx(p);
  722. A_SLTIU:
  723. result:=OptPass1SLTIU(p);
  724. A_LA,
  725. A_LUI,
  726. A_LB,
  727. A_LBU,
  728. A_LH,
  729. A_LHU,
  730. A_LW,
  731. {$ifdef riscv64}
  732. A_LWU,
  733. A_LD,
  734. {$endif riscv64}
  735. A_ADD,
  736. {$ifdef riscv64}
  737. A_ADDIW,
  738. A_SUBW,
  739. {$endif riscv64}
  740. A_DIV,
  741. A_DIVU,
  742. {$ifdef riscv64}
  743. A_DIVW,
  744. A_DIVUW,
  745. {$endif riscv64}
  746. A_REM,
  747. A_REMU,
  748. {$ifdef riscv64}
  749. A_REMW,
  750. A_REMUW,
  751. A_MULW,
  752. {$endif riscv64}
  753. A_MUL,
  754. A_MULH,
  755. A_MULHSU,
  756. A_MULHU,
  757. A_ORI,
  758. A_XORI,
  759. A_AND,
  760. A_OR,
  761. A_XOR,
  762. {$ifdef riscv64}
  763. A_SLLW,
  764. A_SRLW,
  765. A_SRAW,
  766. {$endif riscv64}
  767. A_SLL,
  768. A_SRL,
  769. A_SRA,
  770. A_NEG,
  771. A_NOT:
  772. result:=OptPass1OP(p);
  773. {$ifdef riscv64}
  774. A_SRAIW,
  775. A_SRLIW,
  776. A_SLLIW,
  777. {$endif riscv64}
  778. A_SRAI,
  779. A_SRLI,
  780. A_SLLI:
  781. result:=OptPass1SxxI(p);
  782. A_SLTI:
  783. result:=OptPass1SLTI(p);
  784. A_FADD_S,
  785. A_FSUB_S,
  786. A_FMUL_S,
  787. A_FDIV_S,
  788. A_FSQRT_S,
  789. A_FNEG_S,
  790. A_FLW,
  791. A_FCVT_D_S,
  792. A_FMADD_S,A_FMSUB_S,A_FNMSUB_S,A_FNMADD_S,
  793. A_FMIN_S,A_FMAX_S:
  794. result:=OptPass1FOP(p,A_FSGNJ_S);
  795. A_FADD_D,
  796. A_FSUB_D,
  797. A_FMUL_D,
  798. A_FDIV_D,
  799. A_FSQRT_D,
  800. A_FNEG_D,
  801. A_FLD,
  802. A_FCVT_S_D,
  803. A_FMADD_D,A_FMSUB_D,A_FNMSUB_D,A_FNMADD_D,
  804. A_FMIN_D,A_FMAX_D:
  805. result:=OptPass1FOP(p,A_FSGNJ_D);
  806. A_FSGNJ_S,
  807. A_FSGNJ_D:
  808. result:=OptPass1FSGNJ(p,taicpu(p).opcode);
  809. else
  810. ;
  811. end;
  812. end;
  813. else
  814. ;
  815. end;
  816. end;
  817. end.