aoptcpurv.pas 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017
  1. {
  2. Copyright (c) 1998-2002 by Jonas Maebe, member of the Free Pascal
  3. Development Team
  4. This unit implements the common RiscV optimizer object
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. unit aoptcpurv;
  19. interface
  20. {$I fpcdefs.inc}
  21. {$ifdef EXTDEBUG}
  22. {$define DEBUG_AOPTCPU}
  23. {$endif EXTDEBUG}
  24. uses
  25. cpubase,
  26. globals, globtype,
  27. cgbase,
  28. aoptobj, aoptcpub, aopt,
  29. aasmtai, aasmcpu;
  30. type
  31. TRVCpuAsmOptimizer = class(TAsmOptimizer)
  32. function InstructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean; override;
  33. function RegLoadedWithNewValue(reg: tregister; hp: tai): boolean; override;
  34. function RegModifiedByInstruction(Reg: TRegister; p1: tai): boolean; override;
  35. Function GetNextInstructionUsingReg(Current: tai; Out Next: tai; reg: TRegister): Boolean;
  36. { outputs a debug message into the assembler file }
  37. procedure DebugMsg(const s: string; p: tai);
  38. function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
  39. function OptPass1OP(var p: tai): boolean;
  40. function OptPass1FOP(var p: tai;mvop: tasmop): boolean;
  41. function OptPass1FSGNJ(var p: tai;mvop: tasmop): boolean;
  42. function OptPass1SLTx(var p: tai): boolean;
  43. function OptPass1SLTI(var p: tai): boolean;
  44. function OptPass1Andi(var p: tai): boolean;
  45. function OptPass1SLTIU(var p: tai): boolean;
  46. function OptPass1SxxI(var p: tai): boolean;
  47. function OptPass1Add(var p: tai): boolean;
  48. function OptPass1Sub(var p: tai): boolean;
  49. function OptPass1Fcmp(var p: tai): boolean;
  50. procedure RemoveInstr(var orig: tai; moveback: boolean=true);
  51. end;
  52. implementation
  53. uses
  54. cutils,
  55. verbose;
  56. function MatchInstruction(const instr: tai; const ops : array of TAsmOp; const AConditions: TAsmConds = []): boolean;
  57. var
  58. op : TAsmOp;
  59. begin
  60. result:=false;
  61. if (instr.typ <> ait_instruction) or
  62. ((AConditions <> []) and not(taicpu(instr).condition in AConditions)) then
  63. exit;
  64. for op in ops do
  65. begin
  66. if taicpu(instr).opcode = op then
  67. begin
  68. result:=true;
  69. exit;
  70. end;
  71. end;
  72. end;
  73. function MatchInstruction(const instr: tai; const op: TAsmOp; const AConditions: TAsmConds = []): boolean;
  74. begin
  75. result :=
  76. (instr.typ = ait_instruction) and
  77. (taicpu(instr).opcode = op) and
  78. ((AConditions=[]) or (taicpu(instr).condition in AConditions));
  79. end;
  80. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
  81. begin
  82. result := oper1.typ = oper2.typ;
  83. if result then
  84. case oper1.typ of
  85. top_const:
  86. Result:=oper1.val = oper2.val;
  87. top_reg:
  88. Result:=oper1.reg = oper2.reg;
  89. {top_ref:
  90. Result:=RefsEqual(oper1.ref^, oper2.ref^);}
  91. else Result:=false;
  92. end
  93. end;
  94. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  95. begin
  96. result := (oper.typ = top_reg) and (oper.reg = reg);
  97. end;
  98. {$ifdef DEBUG_AOPTCPU}
  99. procedure TRVCpuAsmOptimizer.DebugMsg(const s: string;p : tai);
  100. begin
  101. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  102. end;
  103. {$else DEBUG_AOPTCPU}
  104. procedure TRVCpuAsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  105. begin
  106. end;
  107. {$endif DEBUG_AOPTCPU}
  108. function TRVCpuAsmOptimizer.InstructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean;
  109. var
  110. p: taicpu;
  111. i: longint;
  112. begin
  113. result:=false;
  114. if not (assigned(hp) and (hp.typ=ait_instruction)) then
  115. exit;
  116. p:=taicpu(hp);
  117. i:=0;
  118. while(i<p.ops) do
  119. begin
  120. case p.oper[I]^.typ of
  121. top_reg:
  122. result:=(p.oper[I]^.reg=reg) and (p.spilling_get_operation_type(i)<>operand_write);
  123. top_ref:
  124. result:=
  125. (p.oper[I]^.ref^.base=reg);
  126. else
  127. ;
  128. end;
  129. if result then exit; {Bailout if we found something}
  130. Inc(I);
  131. end;
  132. end;
  133. function TRVCpuAsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  134. begin
  135. result:=
  136. (hp.typ=ait_instruction) and
  137. (taicpu(hp).ops>1) and
  138. (taicpu(hp).oper[0]^.typ=top_reg) and
  139. (taicpu(hp).oper[0]^.reg=reg) and
  140. (taicpu(hp).spilling_get_operation_type(0)<>operand_read);
  141. end;
  142. function TRVCpuAsmOptimizer.RegModifiedByInstruction(Reg: TRegister; p1: tai): boolean;
  143. var
  144. i : Longint;
  145. begin
  146. result:=false;
  147. for i:=0 to taicpu(p1).ops-1 do
  148. case taicpu(p1).oper[i]^.typ of
  149. top_reg:
  150. if (taicpu(p1).oper[i]^.reg=Reg) and (taicpu(p1).spilling_get_operation_type(i) in [operand_write,operand_readwrite]) then
  151. exit(true);
  152. else
  153. ;
  154. end;
  155. end;
  156. function TRVCpuAsmOptimizer.GetNextInstructionUsingReg(Current: tai; out Next: tai; reg: TRegister): Boolean;
  157. begin
  158. Next:=Current;
  159. repeat
  160. Result:=GetNextInstruction(Next,Next);
  161. until not (Result) or
  162. not(cs_opt_level3 in current_settings.optimizerswitches) or
  163. (Next.typ<>ait_instruction) or
  164. RegInInstruction(reg,Next) or
  165. is_calljmp(taicpu(Next).opcode);
  166. end;
  167. function TRVCpuAsmOptimizer.OptPass1OP(var p : tai) : boolean;
  168. var
  169. hp1 : tai;
  170. begin
  171. result:=false;
  172. { replace
  173. <Op> %reg3,%reg2,%reg1
  174. addi %reg4,%reg3,0
  175. dealloc %reg3
  176. by
  177. <Op> %reg4,%reg2,%reg1
  178. ?
  179. }
  180. if GetNextInstruction(p,hp1) and
  181. MatchInstruction(hp1,A_ADDI) and
  182. (taicpu(hp1).oper[2]^.val=0) and
  183. MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) then
  184. begin
  185. TransferUsedRegs(TmpUsedRegs);
  186. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  187. if not(RegUsedAfterInstruction(taicpu(hp1).oper[1]^.reg,hp1,TmpUsedRegs)) then
  188. begin
  189. taicpu(p).loadoper(0,taicpu(hp1).oper[0]^);
  190. DebugMsg('Peephole OpAddi02Op done',p);
  191. RemoveInstruction(hp1);
  192. result:=true;
  193. end;
  194. end;
  195. end;
  196. function TRVCpuAsmOptimizer.OptPass1FOP(var p: tai;mvop: tasmop) : boolean;
  197. var
  198. hp1 : tai;
  199. begin
  200. result:=false;
  201. { replace
  202. <FOp> %reg3,%reg2,%reg1
  203. <mvop> %reg4,%reg3,%reg3
  204. dealloc %reg3
  205. by
  206. <FOp> %reg4,%reg2,%reg1
  207. ?
  208. }
  209. if GetNextInstruction(p,hp1) and
  210. MatchInstruction(hp1,mvop) and
  211. MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) and
  212. MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[2]^) then
  213. begin
  214. TransferUsedRegs(TmpUsedRegs);
  215. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  216. if not(RegUsedAfterInstruction(taicpu(hp1).oper[1]^.reg,hp1,TmpUsedRegs)) then
  217. begin
  218. taicpu(p).loadoper(0,taicpu(hp1).oper[0]^);
  219. DebugMsg('Peephole FOpFsgnj02FOp done',p);
  220. RemoveInstruction(hp1);
  221. result:=true;
  222. end;
  223. end;
  224. end;
  225. function TRVCpuAsmOptimizer.OptPass1Fcmp(var p: tai) : boolean;
  226. var
  227. hp1 : tai;
  228. begin
  229. result:=false;
  230. { replace
  231. <Fcmp> %ireg3,%freg2,%freg1
  232. <andi> %ireg4,%ireg3,const
  233. dealloc %reg3
  234. by
  235. <Fcmp> %ireg4,%freg2,%freg1
  236. ?
  237. }
  238. if GetNextInstruction(p,hp1) and
  239. MatchInstruction(hp1,A_ANDI) and
  240. MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) and
  241. ((taicpu(hp1).oper[2]^.val and 1)=1) then
  242. begin
  243. TransferUsedRegs(TmpUsedRegs);
  244. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  245. if not(RegUsedAfterInstruction(taicpu(hp1).oper[1]^.reg,hp1,TmpUsedRegs)) then
  246. begin
  247. taicpu(p).loadoper(0,taicpu(hp1).oper[0]^);
  248. DebugMsg('Peephole FcmpAndi2Fcmp done',p);
  249. RemoveInstruction(hp1);
  250. result:=true;
  251. end;
  252. end;
  253. end;
  254. function TRVCpuAsmOptimizer.OptPass1FSGNJ(var p: tai; mvop: tasmop): boolean;
  255. var
  256. hp1 : tai;
  257. begin
  258. result:=false;
  259. { replace
  260. <mvop> %reg1,%reg2,%reg2
  261. <FOp> %reg3,%reg1,%reg1
  262. dealloc %reg2
  263. by
  264. <FOp> %reg3,%reg2,%reg2
  265. ?
  266. }
  267. if GetNextInstruction(p,hp1) and (hp1.typ=ait_instruction) and
  268. (((mvop=A_FSGNJ_S) and (taicpu(hp1).opcode in [A_FADD_S,A_FSUB_S,A_FMUL_S,A_FDIV_S,A_FSQRT_S,
  269. A_FNEG_S,A_FMADD_S,A_FMSUB_S,A_FNMSUB_S,A_FNMADD_S,A_FMIN_S,A_FMAX_S,A_FCVT_D_S,
  270. A_FEQ_S])) or
  271. ((mvop=A_FSGNJ_D) and (taicpu(hp1).opcode in [A_FADD_D,A_FSUB_D,A_FMUL_D,A_FDIV_D,A_FSQRT_D,
  272. A_FNEG_D,A_FMADD_D,A_FMSUB_D,A_FNMSUB_D,A_FNMADD_D,A_FMIN_D,A_FMAX_D,A_FCVT_S_D,
  273. A_FEQ_D]))) and
  274. (MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) or
  275. ((taicpu(hp1).ops>=3) and MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[2]^)) or
  276. ((taicpu(hp1).ops>=4) and MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[3]^))) and
  277. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  278. begin
  279. if MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) then
  280. taicpu(hp1).loadreg(1,taicpu(p).oper[1]^.reg);
  281. if (taicpu(hp1).ops>=3) and MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[2]^) then
  282. taicpu(hp1).loadreg(2,taicpu(p).oper[1]^.reg);
  283. if (taicpu(hp1).ops>=4) and MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[3]^) then
  284. taicpu(hp1).loadreg(3,taicpu(p).oper[1]^.reg);
  285. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  286. DebugMsg('Peephole FMVFOp2FOp performed', hp1);
  287. RemoveInstr(p);
  288. result:=true;
  289. end
  290. end;
  291. procedure TRVCpuAsmOptimizer.RemoveInstr(var orig: tai; moveback: boolean = true);
  292. var
  293. n: tai;
  294. begin
  295. if moveback and (not GetLastInstruction(orig,n)) then
  296. GetNextInstruction(orig,n);
  297. AsmL.Remove(orig);
  298. orig.Free;
  299. orig:=n;
  300. end;
  301. function TRVCpuAsmOptimizer.OptPass1Add(var p: tai): boolean;
  302. var
  303. hp1: tai;
  304. begin
  305. result:=false;
  306. {
  307. Get rid of
  308. addi x, x, 0
  309. }
  310. if (taicpu(p).ops=3) and
  311. (taicpu(p).oper[2]^.typ=top_const) and
  312. (taicpu(p).oper[2]^.val=0) and
  313. MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then
  314. begin
  315. DebugMsg('Peephole Addi2Nop performed', p);
  316. RemoveInstr(p);
  317. result:=true;
  318. end
  319. {
  320. Changes
  321. addi x, y, #
  322. addi/addiw z, x, #
  323. dealloc x
  324. To
  325. addi z, y, #+#
  326. dealloc x
  327. }
  328. else if (taicpu(p).ops=3) and
  329. (taicpu(p).oper[2]^.typ=top_const) and
  330. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  331. MatchInstruction(hp1,[A_ADDI{$ifdef riscv64},A_ADDIW{$endif}]) and
  332. (taicpu(hp1).ops=3) and
  333. MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) and
  334. (taicpu(hp1).oper[2]^.typ=top_const) and
  335. is_imm12(taicpu(p).oper[2]^.val+taicpu(hp1).oper[2]^.val) and
  336. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and
  337. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  338. begin
  339. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  340. taicpu(hp1).loadreg(1,taicpu(p).oper[1]^.reg);
  341. taicpu(hp1).loadconst(2, taicpu(p).oper[2]^.val+taicpu(hp1).oper[2]^.val);
  342. DebugMsg('Peephole AddiAddi2Addi performed', hp1);
  343. RemoveInstr(p);
  344. result:=true;
  345. end
  346. {
  347. Changes
  348. addi x, z, (ref)
  349. ld/sd y, 0(x)
  350. dealloc x
  351. To
  352. ld/sd y, 0(ref)(x)
  353. }
  354. else if (taicpu(p).ops=3) and
  355. (taicpu(p).oper[2]^.typ=top_ref) and
  356. MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) and
  357. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  358. MatchInstruction(hp1, [A_LB,A_LBU,A_LH,A_LHU,A_LW,
  359. A_SB,A_SH,A_SW{$ifdef riscv64},A_LD,A_LWU,A_SD{$endif}]) and
  360. (taicpu(hp1).ops=2) and
  361. (taicpu(hp1).oper[1]^.typ=top_ref) and
  362. (taicpu(hp1).oper[1]^.ref^.base=taicpu(p).oper[0]^.reg) and
  363. (taicpu(hp1).oper[1]^.ref^.offset=0) and
  364. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and
  365. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  366. begin
  367. taicpu(hp1).loadref(1,taicpu(p).oper[2]^.ref^);
  368. taicpu(hp1).oper[1]^.ref^.base:=taicpu(p).oper[1]^.reg;
  369. DebugMsg('Peephole AddiMem2Mem performed', hp1);
  370. RemoveInstr(p);
  371. result:=true;
  372. end
  373. {
  374. Changes
  375. addi x, z, #w
  376. ld/sd y, 0(x)
  377. dealloc x
  378. To
  379. ld/sd y, #w(z)
  380. }
  381. else if (taicpu(p).ops=3) and
  382. (taicpu(p).oper[2]^.typ=top_const) and
  383. //MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) and
  384. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  385. MatchInstruction(hp1, [A_LB,A_LBU,A_LH,A_LHU,A_LW,
  386. A_SB,A_SH,A_SW{$ifdef riscv64},A_LWU,A_LD,A_SD{$endif}]) and
  387. (taicpu(hp1).ops=2) and
  388. (taicpu(hp1).oper[1]^.typ=top_ref) and
  389. (taicpu(hp1).oper[1]^.ref^.base=taicpu(p).oper[0]^.reg) and
  390. (taicpu(hp1).oper[1]^.ref^.offset=0) and
  391. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and
  392. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  393. begin
  394. //taicpu(hp1).loadconst(1,taicpu(p).oper[2]^.ref^);
  395. taicpu(hp1).oper[1]^.ref^.offset:=taicpu(p).oper[2]^.val;
  396. taicpu(hp1).oper[1]^.ref^.base:=taicpu(p).oper[1]^.reg;
  397. DebugMsg('Peephole AddiMem2Mem performed', hp1);
  398. RemoveInstr(p);
  399. result:=true;
  400. end
  401. {
  402. Changes
  403. addi w, z, 0
  404. op x, y, w
  405. dealloc w
  406. To
  407. op x, y, z
  408. }
  409. else if (taicpu(p).ops=3) and
  410. (taicpu(p).oper[2]^.typ=top_const) and
  411. (taicpu(p).oper[2]^.val=0) and
  412. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  413. ((MatchInstruction(hp1, [A_SUB,A_ADD,A_SLL,A_SRL,A_AND,A_OR,
  414. A_ADDI,A_ANDI,A_ORI,A_SRAI,A_SRLI,A_SLLI,A_XORI,A_MUL,
  415. A_DIV,A_DIVU,A_REM,A_REMU,A_SLT,A_SLTU,A_SLTI,A_SLTIU
  416. {$ifdef riscv64},A_ADDIW,A_SLLIW,A_SRLIW,A_SRAIW,
  417. A_ADDW,A_SLLW,A_SRLW,A_SUBW,A_SRAW,
  418. A_DIVUW,A_DIVW,A_REMW,A_REMUW{$endif}]
  419. ) and
  420. (taicpu(hp1).ops=3) and
  421. (MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[2]^) or MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^))) {or
  422. This is not possible yet as the deallocation after the jump could also mean that the register is in use at the
  423. jump target.
  424. (MatchInstruction(hp1, [A_Bxx]) and
  425. (taicpu(hp1).ops=3) and
  426. (MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[0]^) or MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^))) }
  427. ) and
  428. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and
  429. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  430. begin
  431. { if MatchInstruction(hp1, [A_Bxx]) and MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[0]^) then
  432. taicpu(hp1).loadreg(0,taicpu(p).oper[1]^.reg); }
  433. if MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) then
  434. taicpu(hp1).loadreg(1,taicpu(p).oper[1]^.reg);
  435. if MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[2]^) then
  436. taicpu(hp1).loadreg(2,taicpu(p).oper[1]^.reg);
  437. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  438. DebugMsg('Peephole Addi0Op2Op performed', hp1);
  439. RemoveInstr(p);
  440. result:=true;
  441. end
  442. else
  443. result:=OptPass1OP(p);
  444. end;
  445. function TRVCpuAsmOptimizer.OptPass1Sub(var p: tai): boolean;
  446. var
  447. hp1: tai;
  448. begin
  449. result:=false;
  450. {
  451. Turn
  452. sub x,y,z
  453. bgeu X0,x,...
  454. dealloc x
  455. Into
  456. bne y,x,...
  457. }
  458. if (taicpu(p).ops=3) and
  459. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  460. MatchInstruction(hp1,A_Bxx,[C_GEU,C_EQ]) and
  461. (taicpu(hp1).ops=3) and
  462. MatchOperand(taicpu(hp1).oper[0]^,NR_X0) and
  463. MatchOperand(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) and
  464. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and
  465. (not RegModifiedBetween(taicpu(p).oper[2]^.reg, p,hp1)) and
  466. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  467. begin
  468. taicpu(hp1).loadreg(0,taicpu(p).oper[1]^.reg);
  469. taicpu(hp1).loadreg(1,taicpu(p).oper[2]^.reg);
  470. taicpu(hp1).condition:=C_EQ;
  471. DebugMsg('Peephole SubBxx2Beq performed', hp1);
  472. RemoveInstr(p);
  473. result:=true;
  474. end
  475. else
  476. result:=OptPass1OP(p);
  477. end;
  478. function TRVCpuAsmOptimizer.OptPass1SLTx(var p: tai): boolean;
  479. var
  480. hp1: tai;
  481. begin
  482. result:=false;
  483. {
  484. Turn
  485. sltu x,X0,y
  486. beq/bne x, X0, ...
  487. dealloc x
  488. Into
  489. bltu/geu X0, y, ...
  490. }
  491. if (taicpu(p).ops=3) and
  492. MatchOperand(taicpu(p).oper[1]^,NR_X0) and
  493. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  494. MatchInstruction(hp1,A_Bxx,[C_NE,C_EQ]) and
  495. (taicpu(hp1).ops=3) and
  496. MatchOperand(taicpu(hp1).oper[0]^,taicpu(p).oper[0]^) and
  497. MatchOperand(taicpu(hp1).oper[1]^,NR_X0) and
  498. (not RegModifiedBetween(taicpu(p).oper[2]^.reg, p,hp1)) and
  499. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  500. begin
  501. taicpu(hp1).loadreg(0,NR_X0);
  502. taicpu(hp1).loadreg(1,taicpu(p).oper[2]^.reg);
  503. if taicpu(p).opcode=A_SLTU then
  504. begin
  505. if taicpu(hp1).condition=C_NE then
  506. taicpu(hp1).condition:=C_LTU
  507. else
  508. taicpu(hp1).condition:=C_GEU;
  509. end
  510. else
  511. begin
  512. if taicpu(hp1).condition=C_NE then
  513. taicpu(hp1).condition:=C_LT
  514. else
  515. taicpu(hp1).condition:=C_GE;
  516. end;
  517. DebugMsg('Peephole SltuB2B 1 performed', hp1);
  518. RemoveInstr(p);
  519. result:=true;
  520. end
  521. {
  522. Turn
  523. sltu x,y,z
  524. beq/bne x, X0, ...
  525. dealloc x
  526. Into
  527. bltu/geu y, z, ...
  528. }
  529. else if (taicpu(p).ops=3) and
  530. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  531. MatchInstruction(hp1,A_Bxx,[C_NE,C_EQ]) and
  532. (taicpu(hp1).ops=3) and
  533. MatchOperand(taicpu(hp1).oper[0]^,taicpu(p).oper[0]^) and
  534. MatchOperand(taicpu(hp1).oper[1]^,NR_X0) and
  535. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and
  536. (not RegModifiedBetween(taicpu(p).oper[2]^.reg, p,hp1)) and
  537. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  538. begin
  539. taicpu(hp1).loadreg(0,taicpu(p).oper[1]^.reg);
  540. taicpu(hp1).loadreg(1,taicpu(p).oper[2]^.reg);
  541. if taicpu(p).opcode=A_SLTU then
  542. begin
  543. if taicpu(hp1).condition=C_NE then
  544. taicpu(hp1).condition:=C_LTU
  545. else
  546. taicpu(hp1).condition:=C_GEU;
  547. end
  548. else
  549. begin
  550. if taicpu(hp1).condition=C_NE then
  551. taicpu(hp1).condition:=C_LT
  552. else
  553. taicpu(hp1).condition:=C_GE;
  554. end;
  555. DebugMsg('Peephole SltuB2B 2 performed', hp1);
  556. RemoveInstr(p);
  557. result:=true;
  558. end
  559. else
  560. result:=OptPass1OP(p);
  561. end;
  562. function TRVCpuAsmOptimizer.OptPass1SLTI(var p: tai): boolean;
  563. var
  564. hp1: tai;
  565. begin
  566. result:=false;
  567. {
  568. Turn
  569. slti x,y,0
  570. beq/ne x,x0,...
  571. dealloc x
  572. Into
  573. bge/lt y,x0,...
  574. }
  575. if (taicpu(p).ops=3) and
  576. (taicpu(p).oper[2]^.typ=top_const) and
  577. (taicpu(p).oper[2]^.val=0) and
  578. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) then
  579. begin
  580. {
  581. we cannot do this optimization yet as we don't know if taicpu(p).oper[0]^.reg isn't used after taking the branch
  582. if MatchInstruction(hp1,A_Bxx) and
  583. (taicpu(hp1).ops=3) and
  584. (taicpu(hp1).oper[0]^.typ=top_reg) and
  585. (taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg) and
  586. (taicpu(hp1).oper[1]^.typ=top_reg) and
  587. (taicpu(hp1).oper[1]^.reg=NR_X0) and
  588. (taicpu(hp1).condition in [C_NE,C_EQ]) and
  589. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and
  590. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  591. begin
  592. taicpu(hp1).loadreg(0,taicpu(p).oper[1]^.reg);
  593. taicpu(hp1).loadreg(1,NR_X0);
  594. if taicpu(hp1).condition=C_NE then
  595. taicpu(hp1).condition:=C_LT
  596. else
  597. taicpu(hp1).condition:=C_GE;
  598. DebugMsg('Peephole Slti0B2B performed', hp1);
  599. RemoveInstr(p);
  600. result:=true;
  601. exit;
  602. end
  603. else } if MatchInstruction(hp1,A_ANDI) and
  604. (taicpu(hp1).ops=3) and
  605. (taicpu(hp1).oper[2]^.val>0) and
  606. MatchOperand(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) and
  607. (not RegModifiedBetween(taicpu(hp1).oper[0]^.reg, p,hp1)) then
  608. begin
  609. DebugMsg('Peephole SltiAndi2Slti performed', hp1);
  610. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  611. taicpu(p).loadreg(0,taicpu(hp1).oper[0]^.reg);
  612. RemoveInstr(hp1);
  613. result:=true;
  614. exit;
  615. end;
  616. end;
  617. { in all other branches we exit before }
  618. result:=OptPass1OP(p);
  619. end;
  620. function TRVCpuAsmOptimizer.OptPass1Andi(var p: tai): boolean;
  621. var
  622. hp1: tai;
  623. begin
  624. result:=false;
  625. if (taicpu(p).ops=3) and
  626. (taicpu(p).oper[2]^.typ=top_const) and
  627. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) then
  628. begin
  629. {
  630. Changes
  631. andi x, y, #
  632. andi z, x, #
  633. dealloc x
  634. To
  635. andi z, y, # and #
  636. }
  637. if MatchInstruction(hp1,A_ANDI) and
  638. (taicpu(hp1).ops=3) and
  639. MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) and
  640. (taicpu(hp1).oper[2]^.typ=top_const) and
  641. is_imm12(taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val) and
  642. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and
  643. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  644. begin
  645. taicpu(hp1).loadreg(1,taicpu(p).oper[1]^.reg);
  646. taicpu(hp1).loadconst(2, taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val);
  647. DebugMsg('Peephole AndiAndi2Andi performed', hp1);
  648. RemoveInstr(p);
  649. result:=true;
  650. end
  651. {
  652. Changes
  653. andi x, y, #ff or ...
  654. sb x, ...
  655. dealloc x
  656. To
  657. sb x, ...
  658. }
  659. else if MatchInstruction(hp1,A_SB) and
  660. (taicpu(hp1).ops=2) and
  661. MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[0]^) and
  662. (taicpu(p).oper[2]^.val and $ff=$ff) and
  663. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and
  664. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  665. begin
  666. taicpu(hp1).loadreg(0,taicpu(p).oper[1]^.reg);
  667. DebugMsg('Peephole AndiSb2Sb performed', hp1);
  668. RemoveInstr(p);
  669. result:=true;
  670. end
  671. {$ifndef RISCV32}
  672. else if MatchInstruction(hp1,A_ADDIW) and
  673. (taicpu(hp1).ops=3) and
  674. MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) and
  675. (taicpu(hp1).oper[2]^.typ=top_const) and
  676. (taicpu(hp1).oper[2]^.val=0) and
  677. is_imm12(taicpu(p).oper[2]^.val) and
  678. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and
  679. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  680. begin
  681. taicpu(p).loadreg(0,taicpu(hp1).oper[0]^.reg);
  682. DebugMsg('Peephole AndiAddwi02Andi performed', hp1);
  683. RemoveInstr(hp1);
  684. result:=true;
  685. end
  686. {$endif RISCV32}
  687. else
  688. result:=OptPass1OP(p);
  689. end
  690. else
  691. result:=OptPass1OP(p);
  692. end;
  693. function TRVCpuAsmOptimizer.OptPass1SLTIU(var p: tai): boolean;
  694. var
  695. hp1: tai;
  696. begin
  697. result:=false;
  698. {
  699. Turn
  700. sltiu x,y,1
  701. beq/ne x,x0,...
  702. dealloc x
  703. Into
  704. bne y,x0,...
  705. }
  706. if (taicpu(p).ops=3) and
  707. (taicpu(p).oper[2]^.typ=top_const) and
  708. (taicpu(p).oper[2]^.val=1) and
  709. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) then
  710. begin
  711. {
  712. we cannot do this optimization yet as we don't know if taicpu(p).oper[0]^.reg isn't used after taking the branch
  713. if MatchInstruction(hp1,A_Bxx,[C_NE,C_EQ]) and
  714. (taicpu(hp1).ops=3) and
  715. MatchOperand(taicpu(hp1).oper[0]^,taicpu(p).oper[0]^) and
  716. MatchOperand(taicpu(hp1).oper[1]^,NR_X0) and
  717. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and
  718. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  719. begin
  720. taicpu(hp1).loadreg(0,taicpu(p).oper[1]^.reg);
  721. taicpu(hp1).condition:=inverse_cond(taicpu(hp1).condition);
  722. DebugMsg('Peephole Sltiu0B2B performed', hp1);
  723. RemoveInstr(p);
  724. result:=true;
  725. exit;
  726. end
  727. else } if MatchInstruction(hp1,A_ANDI) and
  728. (taicpu(hp1).ops=3) and
  729. (taicpu(hp1).oper[2]^.val>0) and
  730. MatchOperand(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) and
  731. (not RegModifiedBetween(taicpu(hp1).oper[0]^.reg, p,hp1)) then
  732. begin
  733. DebugMsg('Peephole SltiuAndi2Sltiu performed', hp1);
  734. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  735. taicpu(p).loadreg(0,taicpu(hp1).oper[0]^.reg);
  736. RemoveInstr(hp1);
  737. result:=true;
  738. exit;
  739. end;
  740. end;
  741. { in all other branches we exit before }
  742. result:=OptPass1OP(p);
  743. end;
  744. function TRVCpuAsmOptimizer.OptPass1SxxI(var p: tai): boolean;
  745. begin
  746. result:=false;
  747. if (taicpu(p).oper[2]^.val=0) and
  748. MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then
  749. begin
  750. DebugMsg('Peephole S*LI x,x,0 to nop performed', p);
  751. RemoveInstr(p);
  752. result:=true;
  753. end
  754. else if (taicpu(p).oper[2]^.val=0) then
  755. begin
  756. { this enables further optimizations }
  757. DebugMsg('Peephole S*LI x,y,0 to addi performed', p);
  758. taicpu(p).opcode:=A_ADDI;
  759. result:=true;
  760. end
  761. else
  762. result:=OptPass1OP(p);
  763. end;
  764. function TRVCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
  765. var
  766. hp1: tai;
  767. begin
  768. result:=false;
  769. case p.typ of
  770. ait_instruction:
  771. begin
  772. case taicpu(p).opcode of
  773. A_ADDI:
  774. result:=OptPass1Add(p);
  775. A_SUB:
  776. result:=OptPass1Sub(p);
  777. A_ANDI:
  778. result:=OptPass1Andi(p);
  779. A_SLT,
  780. A_SLTU:
  781. result:=OptPass1SLTx(p);
  782. A_SLTIU:
  783. result:=OptPass1SLTIU(p);
  784. A_LA,
  785. A_LUI,
  786. A_LB,
  787. A_LBU,
  788. A_LH,
  789. A_LHU,
  790. A_LW,
  791. {$ifdef riscv64}
  792. A_LWU,
  793. A_LD,
  794. {$endif riscv64}
  795. A_ADD,
  796. {$ifdef riscv64}
  797. A_ADDIW,
  798. A_SUBW,
  799. {$endif riscv64}
  800. A_DIV,
  801. A_DIVU,
  802. {$ifdef riscv64}
  803. A_DIVW,
  804. A_DIVUW,
  805. {$endif riscv64}
  806. A_REM,
  807. A_REMU,
  808. {$ifdef riscv64}
  809. A_REMW,
  810. A_REMUW,
  811. A_MULW,
  812. {$endif riscv64}
  813. A_MUL,
  814. A_MULH,
  815. A_MULHSU,
  816. A_MULHU,
  817. A_ORI,
  818. A_XORI,
  819. A_AND,
  820. A_OR,
  821. A_XOR,
  822. {$ifdef riscv64}
  823. A_SLLW,
  824. A_SRLW,
  825. A_SRAW,
  826. A_ROLW,
  827. A_RORW,
  828. A_RORIW,
  829. {$endif riscv64}
  830. A_SLL,
  831. A_SRL,
  832. A_SRA,
  833. A_ROL,
  834. A_ROR,
  835. A_RORI,
  836. A_NEG,
  837. A_NOT:
  838. result:=OptPass1OP(p);
  839. {$ifdef riscv64}
  840. A_SRAIW,
  841. A_SRLIW,
  842. A_SLLIW,
  843. {$endif riscv64}
  844. A_SRAI,
  845. A_SRLI,
  846. A_SLLI:
  847. result:=OptPass1SxxI(p);
  848. A_SLTI:
  849. result:=OptPass1SLTI(p);
  850. A_FADD_S,
  851. A_FSUB_S,
  852. A_FMUL_S,
  853. A_FDIV_S,
  854. A_FSQRT_S,
  855. A_FNEG_S,
  856. A_FLW,
  857. A_FCVT_D_S,
  858. A_FMADD_S,A_FMSUB_S,A_FNMSUB_S,A_FNMADD_S,
  859. A_FMIN_S,A_FMAX_S:
  860. result:=OptPass1FOP(p,A_FSGNJ_S);
  861. A_FADD_D,
  862. A_FSUB_D,
  863. A_FMUL_D,
  864. A_FDIV_D,
  865. A_FSQRT_D,
  866. A_FNEG_D,
  867. A_FLD,
  868. A_FCVT_S_D,
  869. A_FMADD_D,A_FMSUB_D,A_FNMSUB_D,A_FNMADD_D,
  870. A_FMIN_D,A_FMAX_D:
  871. result:=OptPass1FOP(p,A_FSGNJ_D);
  872. A_FEQ_S,
  873. A_FLT_S,
  874. A_FLE_S,
  875. A_FEQ_D,
  876. A_FLT_D,
  877. A_FLE_D:
  878. result:=OptPass1Fcmp(p);
  879. A_FSGNJ_S,
  880. A_FSGNJ_D:
  881. result:=OptPass1FSGNJ(p,taicpu(p).opcode);
  882. else
  883. ;
  884. end;
  885. end;
  886. else
  887. ;
  888. end;
  889. end;
  890. end.