aoptcpurv.pas 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855
  1. {
  2. Copyright (c) 1998-2002 by Jonas Maebe, member of the Free Pascal
  3. Development Team
  4. This unit implements the common RiscV optimizer object
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. unit aoptcpurv;
  19. interface
  20. {$I fpcdefs.inc}
  21. { $define DEBUG_AOPTCPU}
  22. uses
  23. cpubase,
  24. globals, globtype,
  25. cgbase,
  26. aoptobj, aoptcpub, aopt,
  27. aasmtai, aasmcpu;
  28. type
  29. TRVCpuAsmOptimizer = class(TAsmOptimizer)
  30. function InstructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean; override;
  31. function RegLoadedWithNewValue(reg: tregister; hp: tai): boolean; override;
  32. function RegModifiedByInstruction(Reg: TRegister; p1: tai): boolean; override;
  33. Function GetNextInstructionUsingReg(Current: tai; Out Next: tai; reg: TRegister): Boolean;
  34. { outputs a debug message into the assembler file }
  35. procedure DebugMsg(const s: string; p: tai);
  36. function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
  37. function OptPass1OP(var p: tai): boolean;
  38. function OptPass1FOP(var p: tai;mvop: tasmop): boolean;
  39. function OptPass1FSGNJ(var p: tai;mvop: tasmop): boolean;
  40. function OptPass1SLTx(var p: tai): boolean;
  41. function OptPass1Add(var p: tai): boolean;
  42. function OptPass1Sub(var p: tai): boolean;
  43. procedure RemoveInstr(var orig: tai; moveback: boolean=true);
  44. end;
  45. implementation
  46. uses
  47. cutils,
  48. verbose;
  49. function MatchInstruction(const instr: tai; const op: TCommonAsmOps; const AConditions: TAsmConds = []): boolean;
  50. begin
  51. result :=
  52. (instr.typ = ait_instruction) and
  53. (taicpu(instr).opcode in op) and
  54. ((AConditions=[]) or (taicpu(instr).condition in AConditions));
  55. end;
  56. function MatchInstruction(const instr: tai; const op: TAsmOp; const AConditions: TAsmConds = []): boolean;
  57. begin
  58. result :=
  59. (instr.typ = ait_instruction) and
  60. (taicpu(instr).opcode = op) and
  61. ((AConditions=[]) or (taicpu(instr).condition in AConditions));
  62. end;
  63. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
  64. begin
  65. result := oper1.typ = oper2.typ;
  66. if result then
  67. case oper1.typ of
  68. top_const:
  69. Result:=oper1.val = oper2.val;
  70. top_reg:
  71. Result:=oper1.reg = oper2.reg;
  72. {top_ref:
  73. Result:=RefsEqual(oper1.ref^, oper2.ref^);}
  74. else Result:=false;
  75. end
  76. end;
  77. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  78. begin
  79. result := (oper.typ = top_reg) and (oper.reg = reg);
  80. end;
  81. {$ifdef DEBUG_AOPTCPU}
  82. procedure TRVCpuAsmOptimizer.DebugMsg(const s: string;p : tai);
  83. begin
  84. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  85. end;
  86. {$else DEBUG_AOPTCPU}
  87. procedure TRVCpuAsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  88. begin
  89. end;
  90. {$endif DEBUG_AOPTCPU}
  91. function TRVCpuAsmOptimizer.InstructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean;
  92. var
  93. p: taicpu;
  94. i: longint;
  95. begin
  96. result:=false;
  97. if not (assigned(hp) and (hp.typ=ait_instruction)) then
  98. exit;
  99. p:=taicpu(hp);
  100. i:=0;
  101. while(i<p.ops) do
  102. begin
  103. case p.oper[I]^.typ of
  104. top_reg:
  105. result:=(p.oper[I]^.reg=reg) and (p.spilling_get_operation_type(i)<>operand_write);
  106. top_ref:
  107. result:=
  108. (p.oper[I]^.ref^.base=reg);
  109. else
  110. ;
  111. end;
  112. if result then exit; {Bailout if we found something}
  113. Inc(I);
  114. end;
  115. end;
  116. function TRVCpuAsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  117. begin
  118. result:=
  119. (hp.typ=ait_instruction) and
  120. (taicpu(hp).ops>1) and
  121. (taicpu(hp).oper[0]^.typ=top_reg) and
  122. (taicpu(hp).oper[0]^.reg=reg) and
  123. (taicpu(hp).spilling_get_operation_type(0)<>operand_read);
  124. end;
  125. function TRVCpuAsmOptimizer.RegModifiedByInstruction(Reg: TRegister; p1: tai): boolean;
  126. var
  127. i : Longint;
  128. begin
  129. result:=false;
  130. for i:=0 to taicpu(p1).ops-1 do
  131. case taicpu(p1).oper[i]^.typ of
  132. top_reg:
  133. if (taicpu(p1).oper[i]^.reg=Reg) and (taicpu(p1).spilling_get_operation_type(i) in [operand_write,operand_readwrite]) then
  134. exit(true);
  135. else
  136. ;
  137. end;
  138. end;
  139. function TRVCpuAsmOptimizer.GetNextInstructionUsingReg(Current: tai; out Next: tai; reg: TRegister): Boolean;
  140. begin
  141. Next:=Current;
  142. repeat
  143. Result:=GetNextInstruction(Next,Next);
  144. until not (Result) or
  145. not(cs_opt_level3 in current_settings.optimizerswitches) or
  146. (Next.typ<>ait_instruction) or
  147. RegInInstruction(reg,Next) or
  148. is_calljmp(taicpu(Next).opcode);
  149. end;
  150. function TRVCpuAsmOptimizer.OptPass1OP(var p : tai) : boolean;
  151. var
  152. hp1 : tai;
  153. begin
  154. result:=false;
  155. { replace
  156. <Op> %reg3,%reg2,%reg1
  157. addi %reg4,%reg3,0
  158. dealloc %reg3
  159. by
  160. <Op> %reg4,%reg2,%reg1
  161. ?
  162. }
  163. if GetNextInstruction(p,hp1) and
  164. MatchInstruction(hp1,A_ADDI) and
  165. (taicpu(hp1).oper[2]^.val=0) and
  166. MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) then
  167. begin
  168. TransferUsedRegs(TmpUsedRegs);
  169. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  170. if not(RegUsedAfterInstruction(taicpu(hp1).oper[1]^.reg,hp1,TmpUsedRegs)) then
  171. begin
  172. taicpu(p).loadoper(0,taicpu(hp1).oper[0]^);
  173. DebugMsg('Peephole OpAddi02Op done',p);
  174. RemoveInstruction(hp1);
  175. result:=true;
  176. end;
  177. end;
  178. end;
  179. function TRVCpuAsmOptimizer.OptPass1FOP(var p: tai;mvop: tasmop) : boolean;
  180. var
  181. hp1 : tai;
  182. begin
  183. result:=false;
  184. { replace
  185. <FOp> %reg3,%reg2,%reg1
  186. <mvop> %reg4,%reg3,%reg3
  187. dealloc %reg3
  188. by
  189. <FOp> %reg4,%reg2,%reg1
  190. ?
  191. }
  192. if GetNextInstruction(p,hp1) and
  193. MatchInstruction(hp1,mvop) and
  194. MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) and
  195. MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[2]^) then
  196. begin
  197. TransferUsedRegs(TmpUsedRegs);
  198. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  199. if not(RegUsedAfterInstruction(taicpu(hp1).oper[1]^.reg,hp1,TmpUsedRegs)) then
  200. begin
  201. taicpu(p).loadoper(0,taicpu(hp1).oper[0]^);
  202. DebugMsg('Peephole FOpFsgnj02FOp done',p);
  203. RemoveInstruction(hp1);
  204. result:=true;
  205. end;
  206. end;
  207. end;
  208. function TRVCpuAsmOptimizer.OptPass1FSGNJ(var p: tai; mvop: tasmop): boolean;
  209. var
  210. hp1 : tai;
  211. begin
  212. result:=false;
  213. { replace
  214. <mvop> %reg1,%reg2,%reg2
  215. <FOp> %reg3,%reg1,%reg1
  216. dealloc %reg2
  217. by
  218. <FOp> %reg3,%reg2,%reg2
  219. ?
  220. }
  221. if GetNextInstruction(p,hp1) and (hp1.typ=ait_instruction) and
  222. (((mvop=A_FSGNJ_S) and (taicpu(hp1).opcode in [A_FADD_S,A_FSUB_S,A_FMUL_S,A_FDIV_S,A_FSQRT_S,
  223. A_FNEG_S,A_FMADD_S,A_FMSUB_S,A_FNMSUB_S,A_FNMADD_S,A_FMIN_S,A_FMAX_S,A_FCVT_D_S,
  224. A_FEQ_S])) or
  225. ((mvop=A_FSGNJ_D) and (taicpu(hp1).opcode in [A_FADD_D,A_FSUB_D,A_FMUL_D,A_FDIV_D,A_FSQRT_D,
  226. A_FNEG_D,A_FMADD_D,A_FMSUB_D,A_FNMSUB_D,A_FNMADD_D,A_FMIN_D,A_FMAX_D,A_FCVT_S_D,
  227. A_FEQ_D]))) and
  228. (MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) or
  229. ((taicpu(hp1).ops>=3) and MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[2]^)) or
  230. ((taicpu(hp1).ops>=4) and MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[3]^))) and
  231. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  232. begin
  233. if MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) then
  234. taicpu(hp1).loadreg(1,taicpu(p).oper[1]^.reg);
  235. if (taicpu(hp1).ops>=3) and MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[2]^) then
  236. taicpu(hp1).loadreg(2,taicpu(p).oper[1]^.reg);
  237. if (taicpu(hp1).ops>=4) and MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[3]^) then
  238. taicpu(hp1).loadreg(3,taicpu(p).oper[1]^.reg);
  239. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  240. DebugMsg('Peephole FMVFOp2FOp performed', hp1);
  241. RemoveInstr(p);
  242. result:=true;
  243. end
  244. end;
  245. procedure TRVCpuAsmOptimizer.RemoveInstr(var orig: tai; moveback: boolean = true);
  246. var
  247. n: tai;
  248. begin
  249. if moveback and (not GetLastInstruction(orig,n)) then
  250. GetNextInstruction(orig,n);
  251. AsmL.Remove(orig);
  252. orig.Free;
  253. orig:=n;
  254. end;
  255. function TRVCpuAsmOptimizer.OptPass1Add(var p: tai): boolean;
  256. var
  257. hp1: tai;
  258. begin
  259. result:=false;
  260. {
  261. Get rid of
  262. addi x, x, 0
  263. }
  264. if (taicpu(p).ops=3) and
  265. (taicpu(p).oper[2]^.typ=top_const) and
  266. (taicpu(p).oper[2]^.val=0) and
  267. MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then
  268. begin
  269. DebugMsg('Peephole Addi2Nop performed', p);
  270. RemoveInstr(p);
  271. result:=true;
  272. end
  273. {
  274. Changes
  275. addi x, y, #
  276. addi/addiw z, x, #
  277. dealloc x
  278. To
  279. addi z, y, #+#
  280. }
  281. else if (taicpu(p).ops=3) and
  282. (taicpu(p).oper[2]^.typ=top_const) and
  283. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  284. MatchInstruction(hp1,[A_ADDI{$ifdef riscv64},A_ADDIW{$endif}]) and
  285. (taicpu(hp1).ops=3) and
  286. MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) and
  287. (taicpu(hp1).oper[2]^.typ=top_const) and
  288. is_imm12(taicpu(p).oper[2]^.val+taicpu(hp1).oper[2]^.val) and
  289. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and
  290. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  291. begin
  292. taicpu(hp1).loadreg(1,taicpu(p).oper[1]^.reg);
  293. taicpu(hp1).loadconst(2, taicpu(p).oper[2]^.val+taicpu(hp1).oper[2]^.val);
  294. DebugMsg('Peephole AddiAddi2Addi performed', hp1);
  295. RemoveInstr(p);
  296. result:=true;
  297. end
  298. {
  299. Changes
  300. addi x, z, (ref)
  301. ld/sd y, 0(x)
  302. dealloc x
  303. To
  304. ld/sd y, 0(ref)(x)
  305. }
  306. else if (taicpu(p).ops=3) and
  307. (taicpu(p).oper[2]^.typ=top_ref) and
  308. MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) and
  309. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  310. MatchInstruction(hp1, [A_LB,A_LBU,A_LH,A_LHU,A_LW,
  311. A_SB,A_SH,A_SW{$ifdef riscv64},A_LD,A_LWU,A_SD{$endif}]) and
  312. (taicpu(hp1).ops=2) and
  313. (taicpu(hp1).oper[1]^.typ=top_ref) and
  314. (taicpu(hp1).oper[1]^.ref^.base=taicpu(p).oper[0]^.reg) and
  315. (taicpu(hp1).oper[1]^.ref^.offset=0) and
  316. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and
  317. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  318. begin
  319. taicpu(hp1).loadref(1,taicpu(p).oper[2]^.ref^);
  320. taicpu(hp1).oper[1]^.ref^.base:=taicpu(p).oper[1]^.reg;
  321. DebugMsg('Peephole AddiMem2Mem performed', hp1);
  322. RemoveInstr(p);
  323. result:=true;
  324. end
  325. {
  326. Changes
  327. addi x, z, #w
  328. ld/sd y, 0(x)
  329. dealloc x
  330. To
  331. ld/sd y, #w(z)
  332. }
  333. else if (taicpu(p).ops=3) and
  334. (taicpu(p).oper[2]^.typ=top_const) and
  335. //MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) and
  336. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  337. MatchInstruction(hp1, [A_LB,A_LBU,A_LH,A_LHU,A_LW,
  338. A_SB,A_SH,A_SW{$ifdef riscv64},A_LWU,A_LD,A_SD{$endif}]) and
  339. (taicpu(hp1).ops=2) and
  340. (taicpu(hp1).oper[1]^.typ=top_ref) and
  341. (taicpu(hp1).oper[1]^.ref^.base=taicpu(p).oper[0]^.reg) and
  342. (taicpu(hp1).oper[1]^.ref^.offset=0) and
  343. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and
  344. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  345. begin
  346. //taicpu(hp1).loadconst(1,taicpu(p).oper[2]^.ref^);
  347. taicpu(hp1).oper[1]^.ref^.offset:=taicpu(p).oper[2]^.val;
  348. taicpu(hp1).oper[1]^.ref^.base:=taicpu(p).oper[1]^.reg;
  349. DebugMsg('Peephole AddiMem2Mem performed', hp1);
  350. RemoveInstr(p);
  351. result:=true;
  352. end
  353. {
  354. Changes
  355. addi w, z, 0
  356. op x, y, w
  357. dealloc w
  358. To
  359. op x, y, z
  360. }
  361. else if (taicpu(p).ops=3) and
  362. (taicpu(p).oper[2]^.typ=top_const) and
  363. (taicpu(p).oper[2]^.val=0) and
  364. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  365. ((MatchInstruction(hp1, [A_SUB,A_ADD,A_SLL,A_SRL,A_SLT,A_AND,A_OR,
  366. A_ADDI,A_ANDI,A_ORI,A_SRAI,A_SRLI,A_SLLI,A_XORI,A_MUL,
  367. A_DIV,A_DIVU,A_REM,A_REMU
  368. {$ifdef riscv64},A_ADDIW,A_SLLIW,A_SRLIW,A_SRAIW,
  369. A_ADDW,A_SLLW,A_SRLW,A_SUBW,A_SRAW,
  370. A_DIVUW,A_DIVW,A_REMW,A_REMUW{$endif}]
  371. ) and
  372. (taicpu(hp1).ops=3) and
  373. (MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[2]^) or MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^))) {or
  374. This is not possible yet as the deallocation after the jump could also mean that the register is in use at the
  375. jump target.
  376. (MatchInstruction(hp1, [A_Bxx]) and
  377. (taicpu(hp1).ops=3) and
  378. (MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[0]^) or MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^))) }
  379. ) and
  380. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and
  381. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  382. begin
  383. { if MatchInstruction(hp1, [A_Bxx]) and MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[0]^) then
  384. taicpu(hp1).loadreg(0,taicpu(p).oper[1]^.reg); }
  385. if MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) then
  386. taicpu(hp1).loadreg(1,taicpu(p).oper[1]^.reg);
  387. if MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[2]^) then
  388. taicpu(hp1).loadreg(2,taicpu(p).oper[1]^.reg);
  389. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  390. DebugMsg('Peephole Addi0Op2Op performed', hp1);
  391. RemoveInstr(p);
  392. result:=true;
  393. end
  394. else
  395. result:=OptPass1OP(p);
  396. end;
  397. function TRVCpuAsmOptimizer.OptPass1Sub(var p: tai): boolean;
  398. var
  399. hp1: tai;
  400. begin
  401. result:=false;
  402. {
  403. Turn
  404. sub x,y,z
  405. bgeu X0,x,...
  406. dealloc x
  407. Into
  408. bne y,x,...
  409. }
  410. if (taicpu(p).ops=3) and
  411. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  412. MatchInstruction(hp1,A_Bxx,[C_GEU,C_EQ]) and
  413. (taicpu(hp1).ops=3) and
  414. MatchOperand(taicpu(hp1).oper[0]^,NR_X0) and
  415. MatchOperand(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) and
  416. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and
  417. (not RegModifiedBetween(taicpu(p).oper[2]^.reg, p,hp1)) and
  418. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  419. begin
  420. taicpu(hp1).loadreg(0,taicpu(p).oper[1]^.reg);
  421. taicpu(hp1).loadreg(1,taicpu(p).oper[2]^.reg);
  422. taicpu(hp1).condition:=C_EQ;
  423. DebugMsg('Peephole SubBxx2Beq performed', hp1);
  424. RemoveInstr(p);
  425. result:=true;
  426. end
  427. else
  428. result:=OptPass1OP(p);
  429. end;
  430. function TRVCpuAsmOptimizer.OptPass1SLTx(var p: tai): boolean;
  431. var
  432. hp1: tai;
  433. begin
  434. result:=false;
  435. {
  436. Turn
  437. sltu x,X0,y
  438. beq/bne x, X0, ...
  439. dealloc x
  440. Into
  441. bltu/geu X0, y, ...
  442. }
  443. if (taicpu(p).ops=3) and
  444. MatchOperand(taicpu(p).oper[1]^,NR_X0) and
  445. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  446. MatchInstruction(hp1,A_Bxx,[C_NE,C_EQ]) and
  447. (taicpu(hp1).ops=3) and
  448. MatchOperand(taicpu(hp1).oper[0]^,taicpu(p).oper[0]^) and
  449. MatchOperand(taicpu(hp1).oper[1]^,NR_X0) and
  450. (not RegModifiedBetween(taicpu(p).oper[2]^.reg, p,hp1)) and
  451. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  452. begin
  453. taicpu(hp1).loadreg(0,NR_X0);
  454. taicpu(hp1).loadreg(1,taicpu(p).oper[2]^.reg);
  455. if taicpu(p).opcode=A_SLTU then
  456. begin
  457. if taicpu(hp1).condition=C_NE then
  458. taicpu(hp1).condition:=C_LTU
  459. else
  460. taicpu(hp1).condition:=C_GEU;
  461. end
  462. else
  463. begin
  464. if taicpu(hp1).condition=C_NE then
  465. taicpu(hp1).condition:=C_LT
  466. else
  467. taicpu(hp1).condition:=C_GE;
  468. end;
  469. DebugMsg('Peephole SltuB2B performed', hp1);
  470. RemoveInstr(p);
  471. result:=true;
  472. end
  473. {
  474. Turn
  475. sltu x,y,z
  476. beq/bne x, X0, ...
  477. dealloc x
  478. Into
  479. bltu/geu y, z, ...
  480. }
  481. else if (taicpu(p).ops=3) and
  482. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  483. MatchInstruction(hp1,A_Bxx,[C_NE,C_EQ]) and
  484. (taicpu(hp1).ops=3) and
  485. MatchOperand(taicpu(hp1).oper[0]^,taicpu(p).oper[0]^) and
  486. MatchOperand(taicpu(hp1).oper[1]^,NR_X0) and
  487. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and
  488. (not RegModifiedBetween(taicpu(p).oper[2]^.reg, p,hp1)) and
  489. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  490. begin
  491. taicpu(hp1).loadreg(0,taicpu(p).oper[1]^.reg);
  492. taicpu(hp1).loadreg(1,taicpu(p).oper[2]^.reg);
  493. if taicpu(p).opcode=A_SLTU then
  494. begin
  495. if taicpu(hp1).condition=C_NE then
  496. taicpu(hp1).condition:=C_LTU
  497. else
  498. taicpu(hp1).condition:=C_GEU;
  499. end
  500. else
  501. begin
  502. if taicpu(hp1).condition=C_NE then
  503. taicpu(hp1).condition:=C_LT
  504. else
  505. taicpu(hp1).condition:=C_GE;
  506. end;
  507. DebugMsg('Peephole SltuB2B performed', hp1);
  508. RemoveInstr(p);
  509. result:=true;
  510. end
  511. else
  512. result:=OptPass1OP(p);
  513. end;
  514. function TRVCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
  515. var
  516. hp1: tai;
  517. begin
  518. result:=false;
  519. case p.typ of
  520. ait_instruction:
  521. begin
  522. case taicpu(p).opcode of
  523. A_ADDI:
  524. result:=OptPass1Add(p);
  525. A_SUB:
  526. result:=OptPass1Sub(p);
  527. A_ANDI:
  528. begin
  529. {
  530. Changes
  531. andi x, y, #
  532. andi z, x, #
  533. dealloc x
  534. To
  535. andi z, y, # and #
  536. }
  537. if (taicpu(p).ops=3) and
  538. (taicpu(p).oper[2]^.typ=top_const) and
  539. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) then
  540. begin
  541. if MatchInstruction(hp1,A_ANDI) and
  542. (taicpu(hp1).ops=3) and
  543. MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) and
  544. (taicpu(hp1).oper[2]^.typ=top_const) and
  545. is_imm12(taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val) and
  546. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and
  547. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  548. begin
  549. taicpu(hp1).loadreg(1,taicpu(p).oper[1]^.reg);
  550. taicpu(hp1).loadconst(2, taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val);
  551. DebugMsg('Peephole AndiAndi2Andi performed', hp1);
  552. RemoveInstr(p);
  553. result:=true;
  554. end
  555. {$ifndef RISCV32}
  556. else if MatchInstruction(hp1,A_ADDIW) and
  557. (taicpu(hp1).ops=3) and
  558. MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) and
  559. (taicpu(hp1).oper[2]^.typ=top_const) and
  560. (taicpu(hp1).oper[2]^.val=0) and
  561. is_imm12(taicpu(p).oper[2]^.val) and
  562. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and
  563. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  564. begin
  565. taicpu(p).loadreg(0,taicpu(hp1).oper[0]^.reg);
  566. DebugMsg('Peephole AndiAddwi02Andi performed', hp1);
  567. RemoveInstr(hp1);
  568. result:=true;
  569. end
  570. {$endif RISCV32}
  571. else
  572. result:=OptPass1OP(p);
  573. end
  574. else
  575. result:=OptPass1OP(p);
  576. end;
  577. A_SLT,
  578. A_SLTU:
  579. result:=OptPass1SLTx(p);
  580. A_SLTIU:
  581. begin
  582. {
  583. Turn
  584. sltiu x,y,1
  585. beq/ne x,x0,...
  586. dealloc x
  587. Into
  588. bne y,x0,...
  589. }
  590. if (taicpu(p).ops=3) and
  591. (taicpu(p).oper[2]^.typ=top_const) and
  592. (taicpu(p).oper[2]^.val=1) and
  593. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  594. MatchInstruction(hp1,A_Bxx,[C_NE,C_EQ]) and
  595. (taicpu(hp1).ops=3) and
  596. MatchOperand(taicpu(hp1).oper[0]^,taicpu(p).oper[0]^) and
  597. MatchOperand(taicpu(hp1).oper[1]^,NR_X0) and
  598. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and
  599. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  600. begin
  601. taicpu(hp1).loadreg(0,taicpu(p).oper[1]^.reg);
  602. taicpu(hp1).condition:=inverse_cond(taicpu(hp1).condition);
  603. DebugMsg('Peephole Sltiu0B2B performed', hp1);
  604. RemoveInstr(p);
  605. result:=true;
  606. end;
  607. end;
  608. A_LA,
  609. A_LUI,
  610. A_LB,
  611. A_LBU,
  612. A_LH,
  613. A_LHU,
  614. A_LW,
  615. {$ifdef riscv64}
  616. A_LWU,
  617. A_LD,
  618. {$endif riscv64}
  619. A_ADD,
  620. {$ifdef riscv64}
  621. A_ADDIW,
  622. A_SUBW,
  623. {$endif riscv64}
  624. A_DIV,
  625. A_DIVU,
  626. {$ifdef riscv64}
  627. A_DIVW,
  628. A_DIVUW,
  629. {$endif riscv64}
  630. A_REM,
  631. A_REMU,
  632. {$ifdef riscv64}
  633. A_REMW,
  634. A_REMUW,
  635. A_MULW,
  636. {$endif riscv64}
  637. A_MUL,
  638. A_MULH,
  639. A_MULHSU,
  640. A_MULHU,
  641. A_ORI,
  642. A_XORI,
  643. A_AND,
  644. A_OR,
  645. A_XOR,
  646. {$ifdef riscv64}
  647. A_SLLW,
  648. A_SRLW,
  649. A_SRAW,
  650. {$endif riscv64}
  651. A_SLL,
  652. A_SRL,
  653. A_SRA,
  654. A_NEG,
  655. A_NOT:
  656. result:=OptPass1OP(p);
  657. {$ifdef riscv64}
  658. A_SRAIW,
  659. A_SRLIW,
  660. A_SLLIW,
  661. {$endif riscv64}
  662. A_SRAI,
  663. A_SRLI,
  664. A_SLLI:
  665. begin
  666. if (taicpu(p).oper[2]^.val=0) and
  667. MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then
  668. begin
  669. DebugMsg('Peephole S*LI x,x,0 to nop performed', p);
  670. RemoveInstr(p);
  671. result:=true;
  672. end
  673. else if (taicpu(p).oper[2]^.val=0) then
  674. begin
  675. { this enables further optimizations }
  676. DebugMsg('Peephole S*LI x,y,0 to addi performed', p);
  677. taicpu(p).opcode:=A_ADDI;
  678. result:=true;
  679. end
  680. else
  681. result:=OptPass1OP(p);
  682. end;
  683. A_SLTI:
  684. begin
  685. {
  686. Turn
  687. slti x,y,0
  688. beq/ne x,x0,...
  689. dealloc x
  690. Into
  691. bge/lt y,x0,...
  692. }
  693. if (taicpu(p).ops=3) and
  694. (taicpu(p).oper[2]^.typ=top_const) and
  695. (taicpu(p).oper[2]^.val=0) and
  696. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  697. (hp1.typ=ait_instruction) and
  698. (taicpu(hp1).opcode=A_Bxx) and
  699. (taicpu(hp1).ops=3) and
  700. (taicpu(hp1).oper[0]^.typ=top_reg) and
  701. (taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg) and
  702. (taicpu(hp1).oper[1]^.typ=top_reg) and
  703. (taicpu(hp1).oper[1]^.reg=NR_X0) and
  704. (taicpu(hp1).condition in [C_NE,C_EQ]) and
  705. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and
  706. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  707. begin
  708. taicpu(hp1).loadreg(0,taicpu(p).oper[1]^.reg);
  709. taicpu(hp1).loadreg(1,NR_X0);
  710. if taicpu(hp1).condition=C_NE then
  711. taicpu(hp1).condition:=C_LT
  712. else
  713. taicpu(hp1).condition:=C_GE;
  714. DebugMsg('Peephole Slti0B2B performed', hp1);
  715. RemoveInstr(p);
  716. result:=true;
  717. end;
  718. end;
  719. A_FADD_S,
  720. A_FSUB_S,
  721. A_FMUL_S,
  722. A_FDIV_S,
  723. A_FSQRT_S,
  724. A_FNEG_S,
  725. A_FLW,
  726. A_FCVT_D_S,
  727. A_FMADD_S,A_FMSUB_S,A_FNMSUB_S,A_FNMADD_S,
  728. A_FMIN_S,A_FMAX_S:
  729. result:=OptPass1FOP(p,A_FSGNJ_S);
  730. A_FADD_D,
  731. A_FSUB_D,
  732. A_FMUL_D,
  733. A_FDIV_D,
  734. A_FSQRT_D,
  735. A_FNEG_D,
  736. A_FLD,
  737. A_FCVT_S_D,
  738. A_FMADD_D,A_FMSUB_D,A_FNMSUB_D,A_FNMADD_D,
  739. A_FMIN_D,A_FMAX_D:
  740. result:=OptPass1FOP(p,A_FSGNJ_D);
  741. A_FSGNJ_S,
  742. A_FSGNJ_D:
  743. result:=OptPass1FSGNJ(p,taicpu(p).opcode);
  744. else
  745. ;
  746. end;
  747. end;
  748. else
  749. ;
  750. end;
  751. end;
  752. end.