aoptcpurv.pas 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873
  1. {
  2. Copyright (c) 1998-2002 by Jonas Maebe, member of the Free Pascal
  3. Development Team
  4. This unit implements the common RiscV optimizer object
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. unit aoptcpurv;
  19. interface
  20. {$I fpcdefs.inc}
  21. {$define DEBUG_AOPTCPU}
  22. uses
  23. cpubase,
  24. globals, globtype,
  25. cgbase,
  26. aoptobj, aoptcpub, aopt,
  27. aasmtai, aasmcpu;
  28. type
  29. TRVCpuAsmOptimizer = class(TAsmOptimizer)
  30. function InstructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean; override;
  31. function RegLoadedWithNewValue(reg: tregister; hp: tai): boolean; override;
  32. function RegModifiedByInstruction(Reg: TRegister; p1: tai): boolean; override;
  33. Function GetNextInstructionUsingReg(Current: tai; Out Next: tai; reg: TRegister): Boolean;
  34. { outputs a debug message into the assembler file }
  35. procedure DebugMsg(const s: string; p: tai);
  36. function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
  37. function OptPass1OP(var p: tai): boolean;
  38. function OptPass1FOP(var p: tai;mvop: tasmop): boolean;
  39. function OptPass1FSGNJ(var p: tai;mvop: tasmop): boolean;
  40. function OptPass1SLTx(var p: tai): boolean;
  41. function OptPass1SLTI(var p: tai): boolean;
  42. function OptPass1Andi(var p: tai): boolean;
  43. function OptPass1Add(var p: tai): boolean;
  44. function OptPass1Sub(var p: tai): boolean;
  45. procedure RemoveInstr(var orig: tai; moveback: boolean=true);
  46. end;
  47. implementation
  48. uses
  49. cutils,
  50. verbose;
  51. function MatchInstruction(const instr: tai; const op: TCommonAsmOps; const AConditions: TAsmConds = []): boolean;
  52. begin
  53. result :=
  54. (instr.typ = ait_instruction) and
  55. (taicpu(instr).opcode in op) and
  56. ((AConditions=[]) or (taicpu(instr).condition in AConditions));
  57. end;
  58. function MatchInstruction(const instr: tai; const op: TAsmOp; const AConditions: TAsmConds = []): boolean;
  59. begin
  60. result :=
  61. (instr.typ = ait_instruction) and
  62. (taicpu(instr).opcode = op) and
  63. ((AConditions=[]) or (taicpu(instr).condition in AConditions));
  64. end;
  65. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
  66. begin
  67. result := oper1.typ = oper2.typ;
  68. if result then
  69. case oper1.typ of
  70. top_const:
  71. Result:=oper1.val = oper2.val;
  72. top_reg:
  73. Result:=oper1.reg = oper2.reg;
  74. {top_ref:
  75. Result:=RefsEqual(oper1.ref^, oper2.ref^);}
  76. else Result:=false;
  77. end
  78. end;
  79. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  80. begin
  81. result := (oper.typ = top_reg) and (oper.reg = reg);
  82. end;
  83. {$ifdef DEBUG_AOPTCPU}
  84. procedure TRVCpuAsmOptimizer.DebugMsg(const s: string;p : tai);
  85. begin
  86. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  87. end;
  88. {$else DEBUG_AOPTCPU}
  89. procedure TRVCpuAsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  90. begin
  91. end;
  92. {$endif DEBUG_AOPTCPU}
  93. function TRVCpuAsmOptimizer.InstructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean;
  94. var
  95. p: taicpu;
  96. i: longint;
  97. begin
  98. result:=false;
  99. if not (assigned(hp) and (hp.typ=ait_instruction)) then
  100. exit;
  101. p:=taicpu(hp);
  102. i:=0;
  103. while(i<p.ops) do
  104. begin
  105. case p.oper[I]^.typ of
  106. top_reg:
  107. result:=(p.oper[I]^.reg=reg) and (p.spilling_get_operation_type(i)<>operand_write);
  108. top_ref:
  109. result:=
  110. (p.oper[I]^.ref^.base=reg);
  111. else
  112. ;
  113. end;
  114. if result then exit; {Bailout if we found something}
  115. Inc(I);
  116. end;
  117. end;
  118. function TRVCpuAsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  119. begin
  120. result:=
  121. (hp.typ=ait_instruction) and
  122. (taicpu(hp).ops>1) and
  123. (taicpu(hp).oper[0]^.typ=top_reg) and
  124. (taicpu(hp).oper[0]^.reg=reg) and
  125. (taicpu(hp).spilling_get_operation_type(0)<>operand_read);
  126. end;
  127. function TRVCpuAsmOptimizer.RegModifiedByInstruction(Reg: TRegister; p1: tai): boolean;
  128. var
  129. i : Longint;
  130. begin
  131. result:=false;
  132. for i:=0 to taicpu(p1).ops-1 do
  133. case taicpu(p1).oper[i]^.typ of
  134. top_reg:
  135. if (taicpu(p1).oper[i]^.reg=Reg) and (taicpu(p1).spilling_get_operation_type(i) in [operand_write,operand_readwrite]) then
  136. exit(true);
  137. else
  138. ;
  139. end;
  140. end;
  141. function TRVCpuAsmOptimizer.GetNextInstructionUsingReg(Current: tai; out Next: tai; reg: TRegister): Boolean;
  142. begin
  143. Next:=Current;
  144. repeat
  145. Result:=GetNextInstruction(Next,Next);
  146. until not (Result) or
  147. not(cs_opt_level3 in current_settings.optimizerswitches) or
  148. (Next.typ<>ait_instruction) or
  149. RegInInstruction(reg,Next) or
  150. is_calljmp(taicpu(Next).opcode);
  151. end;
  152. function TRVCpuAsmOptimizer.OptPass1OP(var p : tai) : boolean;
  153. var
  154. hp1 : tai;
  155. begin
  156. result:=false;
  157. { replace
  158. <Op> %reg3,%reg2,%reg1
  159. addi %reg4,%reg3,0
  160. dealloc %reg3
  161. by
  162. <Op> %reg4,%reg2,%reg1
  163. ?
  164. }
  165. if GetNextInstruction(p,hp1) and
  166. MatchInstruction(hp1,A_ADDI) and
  167. (taicpu(hp1).oper[2]^.val=0) and
  168. MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) then
  169. begin
  170. TransferUsedRegs(TmpUsedRegs);
  171. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  172. if not(RegUsedAfterInstruction(taicpu(hp1).oper[1]^.reg,hp1,TmpUsedRegs)) then
  173. begin
  174. taicpu(p).loadoper(0,taicpu(hp1).oper[0]^);
  175. DebugMsg('Peephole OpAddi02Op done',p);
  176. RemoveInstruction(hp1);
  177. result:=true;
  178. end;
  179. end;
  180. end;
  181. function TRVCpuAsmOptimizer.OptPass1FOP(var p: tai;mvop: tasmop) : boolean;
  182. var
  183. hp1 : tai;
  184. begin
  185. result:=false;
  186. { replace
  187. <FOp> %reg3,%reg2,%reg1
  188. <mvop> %reg4,%reg3,%reg3
  189. dealloc %reg3
  190. by
  191. <FOp> %reg4,%reg2,%reg1
  192. ?
  193. }
  194. if GetNextInstruction(p,hp1) and
  195. MatchInstruction(hp1,mvop) and
  196. MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) and
  197. MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[2]^) then
  198. begin
  199. TransferUsedRegs(TmpUsedRegs);
  200. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  201. if not(RegUsedAfterInstruction(taicpu(hp1).oper[1]^.reg,hp1,TmpUsedRegs)) then
  202. begin
  203. taicpu(p).loadoper(0,taicpu(hp1).oper[0]^);
  204. DebugMsg('Peephole FOpFsgnj02FOp done',p);
  205. RemoveInstruction(hp1);
  206. result:=true;
  207. end;
  208. end;
  209. end;
  210. function TRVCpuAsmOptimizer.OptPass1FSGNJ(var p: tai; mvop: tasmop): boolean;
  211. var
  212. hp1 : tai;
  213. begin
  214. result:=false;
  215. { replace
  216. <mvop> %reg1,%reg2,%reg2
  217. <FOp> %reg3,%reg1,%reg1
  218. dealloc %reg2
  219. by
  220. <FOp> %reg3,%reg2,%reg2
  221. ?
  222. }
  223. if GetNextInstruction(p,hp1) and (hp1.typ=ait_instruction) and
  224. (((mvop=A_FSGNJ_S) and (taicpu(hp1).opcode in [A_FADD_S,A_FSUB_S,A_FMUL_S,A_FDIV_S,A_FSQRT_S,
  225. A_FNEG_S,A_FMADD_S,A_FMSUB_S,A_FNMSUB_S,A_FNMADD_S,A_FMIN_S,A_FMAX_S,A_FCVT_D_S,
  226. A_FEQ_S])) or
  227. ((mvop=A_FSGNJ_D) and (taicpu(hp1).opcode in [A_FADD_D,A_FSUB_D,A_FMUL_D,A_FDIV_D,A_FSQRT_D,
  228. A_FNEG_D,A_FMADD_D,A_FMSUB_D,A_FNMSUB_D,A_FNMADD_D,A_FMIN_D,A_FMAX_D,A_FCVT_S_D,
  229. A_FEQ_D]))) and
  230. (MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) or
  231. ((taicpu(hp1).ops>=3) and MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[2]^)) or
  232. ((taicpu(hp1).ops>=4) and MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[3]^))) and
  233. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  234. begin
  235. if MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) then
  236. taicpu(hp1).loadreg(1,taicpu(p).oper[1]^.reg);
  237. if (taicpu(hp1).ops>=3) and MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[2]^) then
  238. taicpu(hp1).loadreg(2,taicpu(p).oper[1]^.reg);
  239. if (taicpu(hp1).ops>=4) and MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[3]^) then
  240. taicpu(hp1).loadreg(3,taicpu(p).oper[1]^.reg);
  241. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  242. DebugMsg('Peephole FMVFOp2FOp performed', hp1);
  243. RemoveInstr(p);
  244. result:=true;
  245. end
  246. end;
  247. procedure TRVCpuAsmOptimizer.RemoveInstr(var orig: tai; moveback: boolean = true);
  248. var
  249. n: tai;
  250. begin
  251. if moveback and (not GetLastInstruction(orig,n)) then
  252. GetNextInstruction(orig,n);
  253. AsmL.Remove(orig);
  254. orig.Free;
  255. orig:=n;
  256. end;
  257. function TRVCpuAsmOptimizer.OptPass1Add(var p: tai): boolean;
  258. var
  259. hp1: tai;
  260. begin
  261. result:=false;
  262. {
  263. Get rid of
  264. addi x, x, 0
  265. }
  266. if (taicpu(p).ops=3) and
  267. (taicpu(p).oper[2]^.typ=top_const) and
  268. (taicpu(p).oper[2]^.val=0) and
  269. MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then
  270. begin
  271. DebugMsg('Peephole Addi2Nop performed', p);
  272. RemoveInstr(p);
  273. result:=true;
  274. end
  275. {
  276. Changes
  277. addi x, y, #
  278. addi/addiw z, x, #
  279. dealloc x
  280. To
  281. addi z, y, #+#
  282. dealloc x
  283. }
  284. else if (taicpu(p).ops=3) and
  285. (taicpu(p).oper[2]^.typ=top_const) and
  286. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  287. MatchInstruction(hp1,[A_ADDI{$ifdef riscv64},A_ADDIW{$endif}]) and
  288. (taicpu(hp1).ops=3) and
  289. MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) and
  290. (taicpu(hp1).oper[2]^.typ=top_const) and
  291. is_imm12(taicpu(p).oper[2]^.val+taicpu(hp1).oper[2]^.val) and
  292. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and
  293. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  294. begin
  295. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  296. taicpu(hp1).loadreg(1,taicpu(p).oper[1]^.reg);
  297. taicpu(hp1).loadconst(2, taicpu(p).oper[2]^.val+taicpu(hp1).oper[2]^.val);
  298. DebugMsg('Peephole AddiAddi2Addi performed', hp1);
  299. RemoveInstr(p);
  300. result:=true;
  301. end
  302. {
  303. Changes
  304. addi x, z, (ref)
  305. ld/sd y, 0(x)
  306. dealloc x
  307. To
  308. ld/sd y, 0(ref)(x)
  309. }
  310. else if (taicpu(p).ops=3) and
  311. (taicpu(p).oper[2]^.typ=top_ref) and
  312. MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) and
  313. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  314. MatchInstruction(hp1, [A_LB,A_LBU,A_LH,A_LHU,A_LW,
  315. A_SB,A_SH,A_SW{$ifdef riscv64},A_LD,A_LWU,A_SD{$endif}]) and
  316. (taicpu(hp1).ops=2) and
  317. (taicpu(hp1).oper[1]^.typ=top_ref) and
  318. (taicpu(hp1).oper[1]^.ref^.base=taicpu(p).oper[0]^.reg) and
  319. (taicpu(hp1).oper[1]^.ref^.offset=0) and
  320. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and
  321. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  322. begin
  323. taicpu(hp1).loadref(1,taicpu(p).oper[2]^.ref^);
  324. taicpu(hp1).oper[1]^.ref^.base:=taicpu(p).oper[1]^.reg;
  325. DebugMsg('Peephole AddiMem2Mem performed', hp1);
  326. RemoveInstr(p);
  327. result:=true;
  328. end
  329. {
  330. Changes
  331. addi x, z, #w
  332. ld/sd y, 0(x)
  333. dealloc x
  334. To
  335. ld/sd y, #w(z)
  336. }
  337. else if (taicpu(p).ops=3) and
  338. (taicpu(p).oper[2]^.typ=top_const) and
  339. //MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) and
  340. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  341. MatchInstruction(hp1, [A_LB,A_LBU,A_LH,A_LHU,A_LW,
  342. A_SB,A_SH,A_SW{$ifdef riscv64},A_LWU,A_LD,A_SD{$endif}]) and
  343. (taicpu(hp1).ops=2) and
  344. (taicpu(hp1).oper[1]^.typ=top_ref) and
  345. (taicpu(hp1).oper[1]^.ref^.base=taicpu(p).oper[0]^.reg) and
  346. (taicpu(hp1).oper[1]^.ref^.offset=0) and
  347. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and
  348. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  349. begin
  350. //taicpu(hp1).loadconst(1,taicpu(p).oper[2]^.ref^);
  351. taicpu(hp1).oper[1]^.ref^.offset:=taicpu(p).oper[2]^.val;
  352. taicpu(hp1).oper[1]^.ref^.base:=taicpu(p).oper[1]^.reg;
  353. DebugMsg('Peephole AddiMem2Mem performed', hp1);
  354. RemoveInstr(p);
  355. result:=true;
  356. end
  357. {
  358. Changes
  359. addi w, z, 0
  360. op x, y, w
  361. dealloc w
  362. To
  363. op x, y, z
  364. }
  365. else if (taicpu(p).ops=3) and
  366. (taicpu(p).oper[2]^.typ=top_const) and
  367. (taicpu(p).oper[2]^.val=0) and
  368. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  369. ((MatchInstruction(hp1, [A_SUB,A_ADD,A_SLL,A_SRL,A_SLT,A_AND,A_OR,
  370. A_ADDI,A_ANDI,A_ORI,A_SRAI,A_SRLI,A_SLLI,A_XORI,A_MUL,
  371. A_DIV,A_DIVU,A_REM,A_REMU
  372. {$ifdef riscv64},A_ADDIW,A_SLLIW,A_SRLIW,A_SRAIW,
  373. A_ADDW,A_SLLW,A_SRLW,A_SUBW,A_SRAW,
  374. A_DIVUW,A_DIVW,A_REMW,A_REMUW{$endif}]
  375. ) and
  376. (taicpu(hp1).ops=3) and
  377. (MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[2]^) or MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^))) {or
  378. This is not possible yet as the deallocation after the jump could also mean that the register is in use at the
  379. jump target.
  380. (MatchInstruction(hp1, [A_Bxx]) and
  381. (taicpu(hp1).ops=3) and
  382. (MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[0]^) or MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^))) }
  383. ) and
  384. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and
  385. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  386. begin
  387. { if MatchInstruction(hp1, [A_Bxx]) and MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[0]^) then
  388. taicpu(hp1).loadreg(0,taicpu(p).oper[1]^.reg); }
  389. if MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) then
  390. taicpu(hp1).loadreg(1,taicpu(p).oper[1]^.reg);
  391. if MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[2]^) then
  392. taicpu(hp1).loadreg(2,taicpu(p).oper[1]^.reg);
  393. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  394. DebugMsg('Peephole Addi0Op2Op performed', hp1);
  395. RemoveInstr(p);
  396. result:=true;
  397. end
  398. else
  399. result:=OptPass1OP(p);
  400. end;
  401. function TRVCpuAsmOptimizer.OptPass1Sub(var p: tai): boolean;
  402. var
  403. hp1: tai;
  404. begin
  405. result:=false;
  406. {
  407. Turn
  408. sub x,y,z
  409. bgeu X0,x,...
  410. dealloc x
  411. Into
  412. bne y,x,...
  413. }
  414. if (taicpu(p).ops=3) and
  415. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  416. MatchInstruction(hp1,A_Bxx,[C_GEU,C_EQ]) and
  417. (taicpu(hp1).ops=3) and
  418. MatchOperand(taicpu(hp1).oper[0]^,NR_X0) and
  419. MatchOperand(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) and
  420. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and
  421. (not RegModifiedBetween(taicpu(p).oper[2]^.reg, p,hp1)) and
  422. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  423. begin
  424. taicpu(hp1).loadreg(0,taicpu(p).oper[1]^.reg);
  425. taicpu(hp1).loadreg(1,taicpu(p).oper[2]^.reg);
  426. taicpu(hp1).condition:=C_EQ;
  427. DebugMsg('Peephole SubBxx2Beq performed', hp1);
  428. RemoveInstr(p);
  429. result:=true;
  430. end
  431. else
  432. result:=OptPass1OP(p);
  433. end;
  434. function TRVCpuAsmOptimizer.OptPass1SLTx(var p: tai): boolean;
  435. var
  436. hp1: tai;
  437. begin
  438. result:=false;
  439. {
  440. Turn
  441. sltu x,X0,y
  442. beq/bne x, X0, ...
  443. dealloc x
  444. Into
  445. bltu/geu X0, y, ...
  446. }
  447. if (taicpu(p).ops=3) and
  448. MatchOperand(taicpu(p).oper[1]^,NR_X0) and
  449. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  450. MatchInstruction(hp1,A_Bxx,[C_NE,C_EQ]) and
  451. (taicpu(hp1).ops=3) and
  452. MatchOperand(taicpu(hp1).oper[0]^,taicpu(p).oper[0]^) and
  453. MatchOperand(taicpu(hp1).oper[1]^,NR_X0) and
  454. (not RegModifiedBetween(taicpu(p).oper[2]^.reg, p,hp1)) and
  455. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  456. begin
  457. taicpu(hp1).loadreg(0,NR_X0);
  458. taicpu(hp1).loadreg(1,taicpu(p).oper[2]^.reg);
  459. if taicpu(p).opcode=A_SLTU then
  460. begin
  461. if taicpu(hp1).condition=C_NE then
  462. taicpu(hp1).condition:=C_LTU
  463. else
  464. taicpu(hp1).condition:=C_GEU;
  465. end
  466. else
  467. begin
  468. if taicpu(hp1).condition=C_NE then
  469. taicpu(hp1).condition:=C_LT
  470. else
  471. taicpu(hp1).condition:=C_GE;
  472. end;
  473. DebugMsg('Peephole SltuB2B performed', hp1);
  474. RemoveInstr(p);
  475. result:=true;
  476. end
  477. {
  478. Turn
  479. sltu x,y,z
  480. beq/bne x, X0, ...
  481. dealloc x
  482. Into
  483. bltu/geu y, z, ...
  484. }
  485. else if (taicpu(p).ops=3) and
  486. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  487. MatchInstruction(hp1,A_Bxx,[C_NE,C_EQ]) and
  488. (taicpu(hp1).ops=3) and
  489. MatchOperand(taicpu(hp1).oper[0]^,taicpu(p).oper[0]^) and
  490. MatchOperand(taicpu(hp1).oper[1]^,NR_X0) and
  491. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and
  492. (not RegModifiedBetween(taicpu(p).oper[2]^.reg, p,hp1)) and
  493. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  494. begin
  495. taicpu(hp1).loadreg(0,taicpu(p).oper[1]^.reg);
  496. taicpu(hp1).loadreg(1,taicpu(p).oper[2]^.reg);
  497. if taicpu(p).opcode=A_SLTU then
  498. begin
  499. if taicpu(hp1).condition=C_NE then
  500. taicpu(hp1).condition:=C_LTU
  501. else
  502. taicpu(hp1).condition:=C_GEU;
  503. end
  504. else
  505. begin
  506. if taicpu(hp1).condition=C_NE then
  507. taicpu(hp1).condition:=C_LT
  508. else
  509. taicpu(hp1).condition:=C_GE;
  510. end;
  511. DebugMsg('Peephole SltuB2B performed', hp1);
  512. RemoveInstr(p);
  513. result:=true;
  514. end
  515. else
  516. result:=OptPass1OP(p);
  517. end;
  518. function TRVCpuAsmOptimizer.OptPass1SLTI(var p: tai): boolean;
  519. var
  520. hp1: tai;
  521. begin
  522. result:=false;
  523. {
  524. Turn
  525. slti x,y,0
  526. beq/ne x,x0,...
  527. dealloc x
  528. Into
  529. bge/lt y,x0,...
  530. }
  531. if (taicpu(p).ops=3) and
  532. (taicpu(p).oper[2]^.typ=top_const) and
  533. (taicpu(p).oper[2]^.val=0) and
  534. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  535. (hp1.typ=ait_instruction) and
  536. (taicpu(hp1).opcode=A_Bxx) and
  537. (taicpu(hp1).ops=3) and
  538. (taicpu(hp1).oper[0]^.typ=top_reg) and
  539. (taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg) and
  540. (taicpu(hp1).oper[1]^.typ=top_reg) and
  541. (taicpu(hp1).oper[1]^.reg=NR_X0) and
  542. (taicpu(hp1).condition in [C_NE,C_EQ]) and
  543. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and
  544. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  545. begin
  546. taicpu(hp1).loadreg(0,taicpu(p).oper[1]^.reg);
  547. taicpu(hp1).loadreg(1,NR_X0);
  548. if taicpu(hp1).condition=C_NE then
  549. taicpu(hp1).condition:=C_LT
  550. else
  551. taicpu(hp1).condition:=C_GE;
  552. DebugMsg('Peephole Slti0B2B performed', hp1);
  553. RemoveInstr(p);
  554. result:=true;
  555. end;
  556. end;
  557. function TRVCpuAsmOptimizer.OptPass1Andi(var p: tai): boolean;
  558. var
  559. hp1: tai;
  560. begin
  561. result:=false;
  562. {
  563. Changes
  564. andi x, y, #
  565. andi z, x, #
  566. dealloc x
  567. To
  568. andi z, y, # and #
  569. }
  570. if (taicpu(p).ops=3) and
  571. (taicpu(p).oper[2]^.typ=top_const) and
  572. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) then
  573. begin
  574. if MatchInstruction(hp1,A_ANDI) and
  575. (taicpu(hp1).ops=3) and
  576. MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) and
  577. (taicpu(hp1).oper[2]^.typ=top_const) and
  578. is_imm12(taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val) and
  579. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and
  580. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  581. begin
  582. taicpu(hp1).loadreg(1,taicpu(p).oper[1]^.reg);
  583. taicpu(hp1).loadconst(2, taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val);
  584. DebugMsg('Peephole AndiAndi2Andi performed', hp1);
  585. RemoveInstr(p);
  586. result:=true;
  587. end
  588. {$ifndef RISCV32}
  589. else if MatchInstruction(hp1,A_ADDIW) and
  590. (taicpu(hp1).ops=3) and
  591. MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) and
  592. (taicpu(hp1).oper[2]^.typ=top_const) and
  593. (taicpu(hp1).oper[2]^.val=0) and
  594. is_imm12(taicpu(p).oper[2]^.val) and
  595. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and
  596. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  597. begin
  598. taicpu(p).loadreg(0,taicpu(hp1).oper[0]^.reg);
  599. DebugMsg('Peephole AndiAddwi02Andi performed', hp1);
  600. RemoveInstr(hp1);
  601. result:=true;
  602. end
  603. {$endif RISCV32}
  604. else
  605. result:=OptPass1OP(p);
  606. end
  607. else
  608. result:=OptPass1OP(p);
  609. end;
  610. function TRVCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
  611. var
  612. hp1: tai;
  613. begin
  614. result:=false;
  615. case p.typ of
  616. ait_instruction:
  617. begin
  618. case taicpu(p).opcode of
  619. A_ADDI:
  620. result:=OptPass1Add(p);
  621. A_SUB:
  622. result:=OptPass1Sub(p);
  623. A_ANDI:
  624. result:=OptPass1Andi(p);
  625. A_SLT,
  626. A_SLTU:
  627. result:=OptPass1SLTx(p);
  628. A_SLTIU:
  629. begin
  630. {
  631. Turn
  632. sltiu x,y,1
  633. beq/ne x,x0,...
  634. dealloc x
  635. Into
  636. bne y,x0,...
  637. }
  638. if (taicpu(p).ops=3) and
  639. (taicpu(p).oper[2]^.typ=top_const) and
  640. (taicpu(p).oper[2]^.val=1) and
  641. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  642. MatchInstruction(hp1,A_Bxx,[C_NE,C_EQ]) and
  643. (taicpu(hp1).ops=3) and
  644. MatchOperand(taicpu(hp1).oper[0]^,taicpu(p).oper[0]^) and
  645. MatchOperand(taicpu(hp1).oper[1]^,NR_X0) and
  646. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and
  647. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  648. begin
  649. taicpu(hp1).loadreg(0,taicpu(p).oper[1]^.reg);
  650. taicpu(hp1).condition:=inverse_cond(taicpu(hp1).condition);
  651. DebugMsg('Peephole Sltiu0B2B performed', hp1);
  652. RemoveInstr(p);
  653. result:=true;
  654. end;
  655. end;
  656. A_LA,
  657. A_LUI,
  658. A_LB,
  659. A_LBU,
  660. A_LH,
  661. A_LHU,
  662. A_LW,
  663. {$ifdef riscv64}
  664. A_LWU,
  665. A_LD,
  666. {$endif riscv64}
  667. A_ADD,
  668. {$ifdef riscv64}
  669. A_ADDIW,
  670. A_SUBW,
  671. {$endif riscv64}
  672. A_DIV,
  673. A_DIVU,
  674. {$ifdef riscv64}
  675. A_DIVW,
  676. A_DIVUW,
  677. {$endif riscv64}
  678. A_REM,
  679. A_REMU,
  680. {$ifdef riscv64}
  681. A_REMW,
  682. A_REMUW,
  683. A_MULW,
  684. {$endif riscv64}
  685. A_MUL,
  686. A_MULH,
  687. A_MULHSU,
  688. A_MULHU,
  689. A_ORI,
  690. A_XORI,
  691. A_AND,
  692. A_OR,
  693. A_XOR,
  694. {$ifdef riscv64}
  695. A_SLLW,
  696. A_SRLW,
  697. A_SRAW,
  698. {$endif riscv64}
  699. A_SLL,
  700. A_SRL,
  701. A_SRA,
  702. A_NEG,
  703. A_NOT:
  704. result:=OptPass1OP(p);
  705. {$ifdef riscv64}
  706. A_SRAIW,
  707. A_SRLIW,
  708. A_SLLIW,
  709. {$endif riscv64}
  710. A_SRAI,
  711. A_SRLI,
  712. A_SLLI:
  713. begin
  714. if (taicpu(p).oper[2]^.val=0) and
  715. MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then
  716. begin
  717. DebugMsg('Peephole S*LI x,x,0 to nop performed', p);
  718. RemoveInstr(p);
  719. result:=true;
  720. end
  721. else if (taicpu(p).oper[2]^.val=0) then
  722. begin
  723. { this enables further optimizations }
  724. DebugMsg('Peephole S*LI x,y,0 to addi performed', p);
  725. taicpu(p).opcode:=A_ADDI;
  726. result:=true;
  727. end
  728. else
  729. result:=OptPass1OP(p);
  730. end;
  731. A_SLTI:
  732. result:=OptPass1SLTI(p);
  733. A_FADD_S,
  734. A_FSUB_S,
  735. A_FMUL_S,
  736. A_FDIV_S,
  737. A_FSQRT_S,
  738. A_FNEG_S,
  739. A_FLW,
  740. A_FCVT_D_S,
  741. A_FMADD_S,A_FMSUB_S,A_FNMSUB_S,A_FNMADD_S,
  742. A_FMIN_S,A_FMAX_S:
  743. result:=OptPass1FOP(p,A_FSGNJ_S);
  744. A_FADD_D,
  745. A_FSUB_D,
  746. A_FMUL_D,
  747. A_FDIV_D,
  748. A_FSQRT_D,
  749. A_FNEG_D,
  750. A_FLD,
  751. A_FCVT_S_D,
  752. A_FMADD_D,A_FMSUB_D,A_FNMSUB_D,A_FNMADD_D,
  753. A_FMIN_D,A_FMAX_D:
  754. result:=OptPass1FOP(p,A_FSGNJ_D);
  755. A_FSGNJ_S,
  756. A_FSGNJ_D:
  757. result:=OptPass1FSGNJ(p,taicpu(p).opcode);
  758. else
  759. ;
  760. end;
  761. end;
  762. else
  763. ;
  764. end;
  765. end;
  766. end.