aoptcpurv.pas 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840
  1. {
  2. Copyright (c) 1998-2002 by Jonas Maebe, member of the Free Pascal
  3. Development Team
  4. This unit implements the common RiscV optimizer object
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. unit aoptcpurv;
  19. interface
  20. {$I fpcdefs.inc}
  21. { $define DEBUG_AOPTCPU}
  22. uses
  23. cpubase,
  24. globals, globtype,
  25. cgbase,
  26. aoptobj, aoptcpub, aopt,
  27. aasmtai, aasmcpu;
  28. type
  29. TRVCpuAsmOptimizer = class(TAsmOptimizer)
  30. function InstructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean; override;
  31. function RegLoadedWithNewValue(reg: tregister; hp: tai): boolean; override;
  32. function RegModifiedByInstruction(Reg: TRegister; p1: tai): boolean; override;
  33. Function GetNextInstructionUsingReg(Current: tai; Out Next: tai; reg: TRegister): Boolean;
  34. { outputs a debug message into the assembler file }
  35. procedure DebugMsg(const s: string; p: tai);
  36. function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
  37. function OptPass1OP(var p: tai): boolean;
  38. function OptPass1FOP(var p: tai;mvop: tasmop): boolean;
  39. function OptPass1FSGNJ(var p: tai;mvop: tasmop): boolean;
  40. function OptPass1Add(var p: tai): boolean;
  41. procedure RemoveInstr(var orig: tai; moveback: boolean=true);
  42. end;
  43. implementation
  44. uses
  45. cutils,
  46. verbose;
  47. function MatchInstruction(const instr: tai; const op: TCommonAsmOps; const AConditions: TAsmConds = []): boolean;
  48. begin
  49. result :=
  50. (instr.typ = ait_instruction) and
  51. (taicpu(instr).opcode in op) and
  52. ((AConditions=[]) or (taicpu(instr).condition in AConditions));
  53. end;
  54. function MatchInstruction(const instr: tai; const op: TAsmOp; const AConditions: TAsmConds = []): boolean;
  55. begin
  56. result :=
  57. (instr.typ = ait_instruction) and
  58. (taicpu(instr).opcode = op) and
  59. ((AConditions=[]) or (taicpu(instr).condition in AConditions));
  60. end;
  61. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
  62. begin
  63. result := oper1.typ = oper2.typ;
  64. if result then
  65. case oper1.typ of
  66. top_const:
  67. Result:=oper1.val = oper2.val;
  68. top_reg:
  69. Result:=oper1.reg = oper2.reg;
  70. {top_ref:
  71. Result:=RefsEqual(oper1.ref^, oper2.ref^);}
  72. else Result:=false;
  73. end
  74. end;
  75. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  76. begin
  77. result := (oper.typ = top_reg) and (oper.reg = reg);
  78. end;
  79. {$ifdef DEBUG_AOPTCPU}
  80. procedure TRVCpuAsmOptimizer.DebugMsg(const s: string;p : tai);
  81. begin
  82. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  83. end;
  84. {$else DEBUG_AOPTCPU}
  85. procedure TRVCpuAsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  86. begin
  87. end;
  88. {$endif DEBUG_AOPTCPU}
  89. function TRVCpuAsmOptimizer.InstructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean;
  90. var
  91. p: taicpu;
  92. i: longint;
  93. begin
  94. result:=false;
  95. if not (assigned(hp) and (hp.typ=ait_instruction)) then
  96. exit;
  97. p:=taicpu(hp);
  98. i:=0;
  99. while(i<p.ops) do
  100. begin
  101. case p.oper[I]^.typ of
  102. top_reg:
  103. result:=(p.oper[I]^.reg=reg) and (p.spilling_get_operation_type(i)<>operand_write);
  104. top_ref:
  105. result:=
  106. (p.oper[I]^.ref^.base=reg);
  107. else
  108. ;
  109. end;
  110. if result then exit; {Bailout if we found something}
  111. Inc(I);
  112. end;
  113. end;
  114. function TRVCpuAsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  115. begin
  116. result:=
  117. (hp.typ=ait_instruction) and
  118. (taicpu(hp).ops>1) and
  119. (taicpu(hp).oper[0]^.typ=top_reg) and
  120. (taicpu(hp).oper[0]^.reg=reg) and
  121. (taicpu(hp).spilling_get_operation_type(0)<>operand_read);
  122. end;
  123. function TRVCpuAsmOptimizer.RegModifiedByInstruction(Reg: TRegister; p1: tai): boolean;
  124. var
  125. i : Longint;
  126. begin
  127. result:=false;
  128. for i:=0 to taicpu(p1).ops-1 do
  129. case taicpu(p1).oper[i]^.typ of
  130. top_reg:
  131. if (taicpu(p1).oper[i]^.reg=Reg) and (taicpu(p1).spilling_get_operation_type(i) in [operand_write,operand_readwrite]) then
  132. exit(true);
  133. else
  134. ;
  135. end;
  136. end;
  137. function TRVCpuAsmOptimizer.GetNextInstructionUsingReg(Current: tai; out Next: tai; reg: TRegister): Boolean;
  138. begin
  139. Next:=Current;
  140. repeat
  141. Result:=GetNextInstruction(Next,Next);
  142. until not (Result) or
  143. not(cs_opt_level3 in current_settings.optimizerswitches) or
  144. (Next.typ<>ait_instruction) or
  145. RegInInstruction(reg,Next) or
  146. is_calljmp(taicpu(Next).opcode);
  147. end;
  148. function TRVCpuAsmOptimizer.OptPass1OP(var p : tai) : boolean;
  149. var
  150. hp1 : tai;
  151. begin
  152. result:=false;
  153. { replace
  154. <Op> %reg3,%reg2,%reg1
  155. addi %reg4,%reg3,0
  156. dealloc %reg3
  157. by
  158. <Op> %reg4,%reg2,%reg1
  159. ?
  160. }
  161. if GetNextInstruction(p,hp1) and
  162. MatchInstruction(hp1,A_ADDI) and
  163. (taicpu(hp1).oper[2]^.val=0) and
  164. MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) then
  165. begin
  166. TransferUsedRegs(TmpUsedRegs);
  167. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  168. if not(RegUsedAfterInstruction(taicpu(hp1).oper[1]^.reg,hp1,TmpUsedRegs)) then
  169. begin
  170. taicpu(p).loadoper(0,taicpu(hp1).oper[0]^);
  171. DebugMsg('Peephole OpAddi02Op done',p);
  172. RemoveInstruction(hp1);
  173. result:=true;
  174. end;
  175. end;
  176. end;
  177. function TRVCpuAsmOptimizer.OptPass1FOP(var p: tai;mvop: tasmop) : boolean;
  178. var
  179. hp1 : tai;
  180. begin
  181. result:=false;
  182. { replace
  183. <FOp> %reg3,%reg2,%reg1
  184. <mvop> %reg4,%reg3,%reg3
  185. dealloc %reg3
  186. by
  187. <FOp> %reg4,%reg2,%reg1
  188. ?
  189. }
  190. if GetNextInstruction(p,hp1) and
  191. MatchInstruction(hp1,mvop) and
  192. MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) and
  193. MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[2]^) then
  194. begin
  195. TransferUsedRegs(TmpUsedRegs);
  196. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  197. if not(RegUsedAfterInstruction(taicpu(hp1).oper[1]^.reg,hp1,TmpUsedRegs)) then
  198. begin
  199. taicpu(p).loadoper(0,taicpu(hp1).oper[0]^);
  200. DebugMsg('Peephole FOpFsgnj02FOp done',p);
  201. RemoveInstruction(hp1);
  202. result:=true;
  203. end;
  204. end;
  205. end;
  206. function TRVCpuAsmOptimizer.OptPass1FSGNJ(var p: tai; mvop: tasmop): boolean;
  207. var
  208. hp1 : tai;
  209. begin
  210. result:=false;
  211. { replace
  212. <mvop> %reg1,%reg2,%reg2
  213. <FOp> %reg3,%reg1,%reg1
  214. dealloc %reg2
  215. by
  216. <FOp> %reg3,%reg2,%reg2
  217. ?
  218. }
  219. if GetNextInstruction(p,hp1) and (hp1.typ=ait_instruction) and
  220. (((mvop=A_FSGNJ_S) and (taicpu(hp1).opcode in [A_FADD_S,A_FSUB_S,A_FMUL_S,A_FDIV_S,A_FSQRT_S,
  221. A_FNEG_S,A_FMADD_S,A_FMSUB_S,A_FNMSUB_S,A_FNMADD_S,A_FMIN_S,A_FMAX_S,A_FCVT_D_S,
  222. A_FEQ_S])) or
  223. ((mvop=A_FSGNJ_D) and (taicpu(hp1).opcode in [A_FADD_D,A_FSUB_D,A_FMUL_D,A_FDIV_D,A_FSQRT_D,
  224. A_FNEG_D,A_FMADD_D,A_FMSUB_D,A_FNMSUB_D,A_FNMADD_D,A_FMIN_D,A_FMAX_D,A_FCVT_S_D,
  225. A_FEQ_D]))) and
  226. (MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) or
  227. ((taicpu(hp1).ops>=3) and MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[2]^)) or
  228. ((taicpu(hp1).ops>=4) and MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[3]^))) and
  229. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  230. begin
  231. if MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) then
  232. taicpu(hp1).loadreg(1,taicpu(p).oper[1]^.reg);
  233. if (taicpu(hp1).ops>=3) and MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[2]^) then
  234. taicpu(hp1).loadreg(2,taicpu(p).oper[1]^.reg);
  235. if (taicpu(hp1).ops>=4) and MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[3]^) then
  236. taicpu(hp1).loadreg(3,taicpu(p).oper[1]^.reg);
  237. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  238. DebugMsg('Peephole FMVFOp2FOp performed', hp1);
  239. RemoveInstr(p);
  240. result:=true;
  241. end
  242. end;
  243. procedure TRVCpuAsmOptimizer.RemoveInstr(var orig: tai; moveback: boolean = true);
  244. var
  245. n: tai;
  246. begin
  247. if moveback and (not GetLastInstruction(orig,n)) then
  248. GetNextInstruction(orig,n);
  249. AsmL.Remove(orig);
  250. orig.Free;
  251. orig:=n;
  252. end;
  253. function TRVCpuAsmOptimizer.OptPass1Add(var p: tai): boolean;
  254. var
  255. hp1: tai;
  256. begin
  257. result:=false;
  258. {
  259. Get rid of
  260. addi x, x, 0
  261. }
  262. if (taicpu(p).ops=3) and
  263. (taicpu(p).oper[2]^.typ=top_const) and
  264. (taicpu(p).oper[2]^.val=0) and
  265. MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then
  266. begin
  267. DebugMsg('Peephole Addi2Nop performed', p);
  268. RemoveInstr(p);
  269. result:=true;
  270. end
  271. {
  272. Changes
  273. addi x, y, #
  274. addi/addiw z, x, #
  275. dealloc x
  276. To
  277. addi z, y, #+#
  278. }
  279. else if (taicpu(p).ops=3) and
  280. (taicpu(p).oper[2]^.typ=top_const) and
  281. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  282. MatchInstruction(hp1,[A_ADDI{$ifdef riscv64},A_ADDIW{$endif}]) and
  283. (taicpu(hp1).ops=3) and
  284. MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) and
  285. (taicpu(hp1).oper[2]^.typ=top_const) and
  286. is_imm12(taicpu(p).oper[2]^.val+taicpu(hp1).oper[2]^.val) and
  287. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and
  288. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  289. begin
  290. taicpu(hp1).loadreg(1,taicpu(p).oper[1]^.reg);
  291. taicpu(hp1).loadconst(2, taicpu(p).oper[2]^.val+taicpu(hp1).oper[2]^.val);
  292. DebugMsg('Peephole AddiAddi2Addi performed', hp1);
  293. RemoveInstr(p);
  294. result:=true;
  295. end
  296. {
  297. Changes
  298. addi x, z, (ref)
  299. ld/sd y, 0(x)
  300. dealloc x
  301. To
  302. ld/sd y, 0(ref)(x)
  303. }
  304. else if (taicpu(p).ops=3) and
  305. (taicpu(p).oper[2]^.typ=top_ref) and
  306. MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) and
  307. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  308. MatchInstruction(hp1, [A_LB,A_LBU,A_LH,A_LHU,A_LW,
  309. A_SB,A_SH,A_SW{$ifdef riscv64},A_LD,A_LWU,A_SD{$endif}]) and
  310. (taicpu(hp1).ops=2) and
  311. (taicpu(hp1).oper[1]^.typ=top_ref) and
  312. (taicpu(hp1).oper[1]^.ref^.base=taicpu(p).oper[0]^.reg) and
  313. (taicpu(hp1).oper[1]^.ref^.offset=0) and
  314. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and
  315. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  316. begin
  317. taicpu(hp1).loadref(1,taicpu(p).oper[2]^.ref^);
  318. taicpu(hp1).oper[1]^.ref^.base:=taicpu(p).oper[1]^.reg;
  319. DebugMsg('Peephole AddiMem2Mem performed', hp1);
  320. RemoveInstr(p);
  321. result:=true;
  322. end
  323. {
  324. Changes
  325. addi x, z, #w
  326. ld/sd y, 0(x)
  327. dealloc x
  328. To
  329. ld/sd y, #w(z)
  330. }
  331. else if (taicpu(p).ops=3) and
  332. (taicpu(p).oper[2]^.typ=top_const) and
  333. //MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) and
  334. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  335. MatchInstruction(hp1, [A_LB,A_LBU,A_LH,A_LHU,A_LW,
  336. A_SB,A_SH,A_SW{$ifdef riscv64},A_LWU,A_LD,A_SD{$endif}]) and
  337. (taicpu(hp1).ops=2) and
  338. (taicpu(hp1).oper[1]^.typ=top_ref) and
  339. (taicpu(hp1).oper[1]^.ref^.base=taicpu(p).oper[0]^.reg) and
  340. (taicpu(hp1).oper[1]^.ref^.offset=0) and
  341. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and
  342. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  343. begin
  344. //taicpu(hp1).loadconst(1,taicpu(p).oper[2]^.ref^);
  345. taicpu(hp1).oper[1]^.ref^.offset:=taicpu(p).oper[2]^.val;
  346. taicpu(hp1).oper[1]^.ref^.base:=taicpu(p).oper[1]^.reg;
  347. DebugMsg('Peephole AddiMem2Mem performed', hp1);
  348. RemoveInstr(p);
  349. result:=true;
  350. end
  351. {
  352. Changes
  353. addi w, z, 0
  354. op x, y, w
  355. dealloc w
  356. To
  357. op x, y, z
  358. }
  359. else if (taicpu(p).ops=3) and
  360. (taicpu(p).oper[2]^.typ=top_const) and
  361. (taicpu(p).oper[2]^.val=0) and
  362. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  363. ((MatchInstruction(hp1, [A_SUB,A_ADD,A_SLL,A_SRL,A_SLT,A_AND,A_OR,
  364. A_ADDI,A_ANDI,A_ORI,A_SRAI,A_SRLI,A_SLLI,A_XORI,A_MUL,
  365. A_DIV,A_DIVU,A_REM,A_REMU
  366. {$ifdef riscv64},A_ADDIW,A_SLLIW,A_SRLIW,A_SRAIW,
  367. A_ADDW,A_SLLW,A_SRLW,A_SUBW,A_SRAW,
  368. A_DIVUW,A_DIVW,A_REMW,A_REMUW{$endif}]
  369. ) and
  370. (taicpu(hp1).ops=3) and
  371. (MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[2]^) or MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^))) {or
  372. This is not possible yet as the deallocation after the jump could also mean that the register is in use at the
  373. jump target.
  374. (MatchInstruction(hp1, [A_Bxx]) and
  375. (taicpu(hp1).ops=3) and
  376. (MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[0]^) or MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^))) }
  377. ) and
  378. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and
  379. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  380. begin
  381. { if MatchInstruction(hp1, [A_Bxx]) and MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[0]^) then
  382. taicpu(hp1).loadreg(0,taicpu(p).oper[1]^.reg); }
  383. if MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) then
  384. taicpu(hp1).loadreg(1,taicpu(p).oper[1]^.reg);
  385. if MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[2]^) then
  386. taicpu(hp1).loadreg(2,taicpu(p).oper[1]^.reg);
  387. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  388. DebugMsg('Peephole Addi0Op2Op performed', hp1);
  389. RemoveInstr(p);
  390. result:=true;
  391. end
  392. else
  393. result:=OptPass1OP(p);
  394. end;
  395. function TRVCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
  396. var
  397. hp1: tai;
  398. begin
  399. result:=false;
  400. case p.typ of
  401. ait_instruction:
  402. begin
  403. case taicpu(p).opcode of
  404. A_ADDI:
  405. result:=OptPass1Add(p);
  406. A_SUB:
  407. begin
  408. {
  409. Turn
  410. sub x,y,z
  411. bgeu X0,x,...
  412. dealloc x
  413. Into
  414. bne y,x,...
  415. }
  416. if (taicpu(p).ops=3) and
  417. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  418. MatchInstruction(hp1,A_Bxx,[C_GEU,C_EQ]) and
  419. (taicpu(hp1).ops=3) and
  420. MatchOperand(taicpu(hp1).oper[0]^,NR_X0) and
  421. MatchOperand(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) and
  422. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and
  423. (not RegModifiedBetween(taicpu(p).oper[2]^.reg, p,hp1)) and
  424. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  425. begin
  426. taicpu(hp1).loadreg(0,taicpu(p).oper[1]^.reg);
  427. taicpu(hp1).loadreg(1,taicpu(p).oper[2]^.reg);
  428. taicpu(hp1).condition:=C_EQ;
  429. DebugMsg('Peephole SubBxx2Beq performed', hp1);
  430. RemoveInstr(p);
  431. result:=true;
  432. end
  433. else
  434. result:=OptPass1OP(p);
  435. end;
  436. A_ANDI:
  437. begin
  438. {
  439. Changes
  440. andi x, y, #
  441. andi z, x, #
  442. dealloc x
  443. To
  444. andi z, y, # and #
  445. }
  446. if (taicpu(p).ops=3) and
  447. (taicpu(p).oper[2]^.typ=top_const) and
  448. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) then
  449. begin
  450. if MatchInstruction(hp1,A_ANDI) and
  451. (taicpu(hp1).ops=3) and
  452. MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) and
  453. (taicpu(hp1).oper[2]^.typ=top_const) and
  454. is_imm12(taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val) and
  455. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and
  456. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  457. begin
  458. taicpu(hp1).loadreg(1,taicpu(p).oper[1]^.reg);
  459. taicpu(hp1).loadconst(2, taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val);
  460. DebugMsg('Peephole AndiAndi2Andi performed', hp1);
  461. RemoveInstr(p);
  462. result:=true;
  463. end
  464. {$ifndef RISCV32}
  465. else if MatchInstruction(hp1,A_ADDIW) and
  466. (taicpu(hp1).ops=3) and
  467. MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) and
  468. (taicpu(hp1).oper[2]^.typ=top_const) and
  469. (taicpu(hp1).oper[2]^.val=0) and
  470. is_imm12(taicpu(p).oper[2]^.val) and
  471. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and
  472. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  473. begin
  474. taicpu(p).loadreg(0,taicpu(hp1).oper[0]^.reg);
  475. DebugMsg('Peephole AndiAddwi02Andi performed', hp1);
  476. RemoveInstr(hp1);
  477. result:=true;
  478. end
  479. {$endif RISCV32}
  480. else
  481. result:=OptPass1OP(p);
  482. end
  483. else
  484. result:=OptPass1OP(p);
  485. end;
  486. A_SLT,
  487. A_SLTU:
  488. begin
  489. {
  490. Turn
  491. sltu x,X0,y
  492. beq/bne x, X0, ...
  493. dealloc x
  494. Into
  495. bltu/geu X0, y, ...
  496. }
  497. if (taicpu(p).ops=3) and
  498. MatchOperand(taicpu(p).oper[1]^,NR_X0) and
  499. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  500. MatchInstruction(hp1,A_Bxx,[C_NE,C_EQ]) and
  501. (taicpu(hp1).ops=3) and
  502. MatchOperand(taicpu(hp1).oper[0]^,taicpu(p).oper[0]^) and
  503. MatchOperand(taicpu(hp1).oper[1]^,NR_X0) and
  504. (not RegModifiedBetween(taicpu(p).oper[2]^.reg, p,hp1)) and
  505. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  506. begin
  507. taicpu(hp1).loadreg(0,NR_X0);
  508. taicpu(hp1).loadreg(1,taicpu(p).oper[2]^.reg);
  509. if taicpu(p).opcode=A_SLTU then
  510. begin
  511. if taicpu(hp1).condition=C_NE then
  512. taicpu(hp1).condition:=C_LTU
  513. else
  514. taicpu(hp1).condition:=C_GEU;
  515. end
  516. else
  517. begin
  518. if taicpu(hp1).condition=C_NE then
  519. taicpu(hp1).condition:=C_LT
  520. else
  521. taicpu(hp1).condition:=C_GE;
  522. end;
  523. DebugMsg('Peephole SltuB2B performed', hp1);
  524. RemoveInstr(p);
  525. result:=true;
  526. end
  527. {
  528. Turn
  529. sltu x,y,z
  530. beq/bne x, X0, ...
  531. dealloc x
  532. Into
  533. bltu/geu y, z, ...
  534. }
  535. else if (taicpu(p).ops=3) and
  536. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  537. MatchInstruction(hp1,A_Bxx,[C_NE,C_EQ]) and
  538. (taicpu(hp1).ops=3) and
  539. MatchOperand(taicpu(hp1).oper[0]^,taicpu(p).oper[0]^) and
  540. MatchOperand(taicpu(hp1).oper[1]^,NR_X0) and
  541. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and
  542. (not RegModifiedBetween(taicpu(p).oper[2]^.reg, p,hp1)) and
  543. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  544. begin
  545. taicpu(hp1).loadreg(0,taicpu(p).oper[1]^.reg);
  546. taicpu(hp1).loadreg(1,taicpu(p).oper[2]^.reg);
  547. if taicpu(p).opcode=A_SLTU then
  548. begin
  549. if taicpu(hp1).condition=C_NE then
  550. taicpu(hp1).condition:=C_LTU
  551. else
  552. taicpu(hp1).condition:=C_GEU;
  553. end
  554. else
  555. begin
  556. if taicpu(hp1).condition=C_NE then
  557. taicpu(hp1).condition:=C_LT
  558. else
  559. taicpu(hp1).condition:=C_GE;
  560. end;
  561. DebugMsg('Peephole SltuB2B performed', hp1);
  562. RemoveInstr(p);
  563. result:=true;
  564. end
  565. else
  566. result:=OptPass1OP(p);
  567. end;
  568. A_SLTIU:
  569. begin
  570. {
  571. Turn
  572. sltiu x,y,1
  573. beq/ne x,x0,...
  574. dealloc x
  575. Into
  576. bne y,x0,...
  577. }
  578. if (taicpu(p).ops=3) and
  579. (taicpu(p).oper[2]^.typ=top_const) and
  580. (taicpu(p).oper[2]^.val=1) and
  581. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  582. MatchInstruction(hp1,A_Bxx,[C_NE,C_EQ]) and
  583. (taicpu(hp1).ops=3) and
  584. MatchOperand(taicpu(hp1).oper[0]^,taicpu(p).oper[0]^) and
  585. MatchOperand(taicpu(hp1).oper[1]^,NR_X0) and
  586. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and
  587. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  588. begin
  589. taicpu(hp1).loadreg(0,taicpu(p).oper[1]^.reg);
  590. taicpu(hp1).condition:=inverse_cond(taicpu(hp1).condition);
  591. DebugMsg('Peephole Sltiu0B2B performed', hp1);
  592. RemoveInstr(p);
  593. result:=true;
  594. end;
  595. end;
  596. A_LA,
  597. A_LUI,
  598. A_LB,
  599. A_LBU,
  600. A_LH,
  601. A_LHU,
  602. A_LW,
  603. {$ifdef riscv64}
  604. A_LWU,
  605. A_LD,
  606. {$endif riscv64}
  607. A_ADD,
  608. {$ifdef riscv64}
  609. A_ADDIW,
  610. A_SUBW,
  611. {$endif riscv64}
  612. A_DIV,
  613. A_DIVU,
  614. {$ifdef riscv64}
  615. A_DIVW,
  616. A_DIVUW,
  617. {$endif riscv64}
  618. A_REM,
  619. A_REMU,
  620. {$ifdef riscv64}
  621. A_REMW,
  622. A_REMUW,
  623. A_MULW,
  624. {$endif riscv64}
  625. A_MUL,
  626. A_MULH,
  627. A_MULHSU,
  628. A_MULHU,
  629. A_ORI,
  630. A_XORI,
  631. A_AND,
  632. A_OR,
  633. A_XOR,
  634. {$ifdef riscv64}
  635. A_SLLW,
  636. A_SRLW,
  637. A_SRAW,
  638. {$endif riscv64}
  639. A_SLL,
  640. A_SRL,
  641. A_SRA,
  642. A_NEG,
  643. A_NOT:
  644. result:=OptPass1OP(p);
  645. {$ifdef riscv64}
  646. A_SRAIW,
  647. A_SRLIW,
  648. A_SLLIW,
  649. {$endif riscv64}
  650. A_SRAI,
  651. A_SRLI,
  652. A_SLLI:
  653. begin
  654. if (taicpu(p).oper[2]^.val=0) and
  655. MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then
  656. begin
  657. DebugMsg('Peephole S*LI x,x,0 to nop performed', p);
  658. RemoveInstr(p);
  659. result:=true;
  660. end
  661. else if (taicpu(p).oper[2]^.val=0) then
  662. begin
  663. { this enables further optimizations }
  664. DebugMsg('Peephole S*LI x,y,0 to addi performed', p);
  665. taicpu(p).opcode:=A_ADDI;
  666. result:=true;
  667. end
  668. else
  669. result:=OptPass1OP(p);
  670. end;
  671. A_SLTI:
  672. begin
  673. {
  674. Turn
  675. slti x,y,0
  676. beq/ne x,x0,...
  677. dealloc x
  678. Into
  679. bge/lt y,x0,...
  680. }
  681. if (taicpu(p).ops=3) and
  682. (taicpu(p).oper[2]^.typ=top_const) and
  683. (taicpu(p).oper[2]^.val=0) and
  684. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  685. (hp1.typ=ait_instruction) and
  686. (taicpu(hp1).opcode=A_Bxx) and
  687. (taicpu(hp1).ops=3) and
  688. (taicpu(hp1).oper[0]^.typ=top_reg) and
  689. (taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg) and
  690. (taicpu(hp1).oper[1]^.typ=top_reg) and
  691. (taicpu(hp1).oper[1]^.reg=NR_X0) and
  692. (taicpu(hp1).condition in [C_NE,C_EQ]) and
  693. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and
  694. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  695. begin
  696. taicpu(hp1).loadreg(0,taicpu(p).oper[1]^.reg);
  697. taicpu(hp1).loadreg(1,NR_X0);
  698. if taicpu(hp1).condition=C_NE then
  699. taicpu(hp1).condition:=C_LT
  700. else
  701. taicpu(hp1).condition:=C_GE;
  702. DebugMsg('Peephole Slti0B2B performed', hp1);
  703. RemoveInstr(p);
  704. result:=true;
  705. end;
  706. end;
  707. A_FADD_S,
  708. A_FSUB_S,
  709. A_FMUL_S,
  710. A_FDIV_S,
  711. A_FSQRT_S,
  712. A_FNEG_S,
  713. A_FLW,
  714. A_FCVT_D_S,
  715. A_FMADD_S,A_FMSUB_S,A_FNMSUB_S,A_FNMADD_S,
  716. A_FMIN_S,A_FMAX_S:
  717. result:=OptPass1FOP(p,A_FSGNJ_S);
  718. A_FADD_D,
  719. A_FSUB_D,
  720. A_FMUL_D,
  721. A_FDIV_D,
  722. A_FSQRT_D,
  723. A_FNEG_D,
  724. A_FLD,
  725. A_FCVT_S_D,
  726. A_FMADD_D,A_FMSUB_D,A_FNMSUB_D,A_FNMADD_D,
  727. A_FMIN_D,A_FMAX_D:
  728. result:=OptPass1FOP(p,A_FSGNJ_D);
  729. A_FSGNJ_S,
  730. A_FSGNJ_D:
  731. result:=OptPass1FSGNJ(p,taicpu(p).opcode);
  732. else
  733. ;
  734. end;
  735. end;
  736. else
  737. ;
  738. end;
  739. end;
  740. end.