aoptcpurv.pas 31 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007
  1. {
  2. Copyright (c) 1998-2002 by Jonas Maebe, member of the Free Pascal
  3. Development Team
  4. This unit implements the common RiscV optimizer object
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. unit aoptcpurv;
  19. interface
  20. {$I fpcdefs.inc}
  21. {$ifdef EXTDEBUG}
  22. {$define DEBUG_AOPTCPU}
  23. {$endif EXTDEBUG}
  24. uses
  25. cpubase,
  26. globals, globtype,
  27. cgbase,
  28. aoptobj, aoptcpub, aopt,
  29. aasmtai, aasmcpu;
  30. type
  31. TRVCpuAsmOptimizer = class(TAsmOptimizer)
  32. function InstructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean; override;
  33. function RegLoadedWithNewValue(reg: tregister; hp: tai): boolean; override;
  34. function RegModifiedByInstruction(Reg: TRegister; p1: tai): boolean; override;
  35. Function GetNextInstructionUsingReg(Current: tai; Out Next: tai; reg: TRegister): Boolean;
  36. { outputs a debug message into the assembler file }
  37. procedure DebugMsg(const s: string; p: tai);
  38. function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
  39. function OptPass1OP(var p: tai): boolean;
  40. function OptPass1FOP(var p: tai;mvop: tasmop): boolean;
  41. function OptPass1FSGNJ(var p: tai;mvop: tasmop): boolean;
  42. function OptPass1SLTx(var p: tai): boolean;
  43. function OptPass1SLTI(var p: tai): boolean;
  44. function OptPass1Andi(var p: tai): boolean;
  45. function OptPass1SLTIU(var p: tai): boolean;
  46. function OptPass1SxxI(var p: tai): boolean;
  47. function OptPass1Add(var p: tai): boolean;
  48. function OptPass1Sub(var p: tai): boolean;
  49. function OptPass1Fcmp(var p: tai): boolean;
  50. procedure RemoveInstr(var orig: tai; moveback: boolean=true);
  51. end;
  52. implementation
  53. uses
  54. cutils,
  55. verbose;
  56. function MatchInstruction(const instr: tai; const op: TCommonAsmOps; const AConditions: TAsmConds = []): boolean;
  57. begin
  58. result :=
  59. (instr.typ = ait_instruction) and
  60. (taicpu(instr).opcode in op) and
  61. ((AConditions=[]) or (taicpu(instr).condition in AConditions));
  62. end;
  63. function MatchInstruction(const instr: tai; const op: TAsmOp; const AConditions: TAsmConds = []): boolean;
  64. begin
  65. result :=
  66. (instr.typ = ait_instruction) and
  67. (taicpu(instr).opcode = op) and
  68. ((AConditions=[]) or (taicpu(instr).condition in AConditions));
  69. end;
  70. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
  71. begin
  72. result := oper1.typ = oper2.typ;
  73. if result then
  74. case oper1.typ of
  75. top_const:
  76. Result:=oper1.val = oper2.val;
  77. top_reg:
  78. Result:=oper1.reg = oper2.reg;
  79. {top_ref:
  80. Result:=RefsEqual(oper1.ref^, oper2.ref^);}
  81. else Result:=false;
  82. end
  83. end;
  84. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  85. begin
  86. result := (oper.typ = top_reg) and (oper.reg = reg);
  87. end;
  88. {$ifdef DEBUG_AOPTCPU}
  89. procedure TRVCpuAsmOptimizer.DebugMsg(const s: string;p : tai);
  90. begin
  91. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  92. end;
  93. {$else DEBUG_AOPTCPU}
  94. procedure TRVCpuAsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  95. begin
  96. end;
  97. {$endif DEBUG_AOPTCPU}
  98. function TRVCpuAsmOptimizer.InstructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean;
  99. var
  100. p: taicpu;
  101. i: longint;
  102. begin
  103. result:=false;
  104. if not (assigned(hp) and (hp.typ=ait_instruction)) then
  105. exit;
  106. p:=taicpu(hp);
  107. i:=0;
  108. while(i<p.ops) do
  109. begin
  110. case p.oper[I]^.typ of
  111. top_reg:
  112. result:=(p.oper[I]^.reg=reg) and (p.spilling_get_operation_type(i)<>operand_write);
  113. top_ref:
  114. result:=
  115. (p.oper[I]^.ref^.base=reg);
  116. else
  117. ;
  118. end;
  119. if result then exit; {Bailout if we found something}
  120. Inc(I);
  121. end;
  122. end;
  123. function TRVCpuAsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  124. begin
  125. result:=
  126. (hp.typ=ait_instruction) and
  127. (taicpu(hp).ops>1) and
  128. (taicpu(hp).oper[0]^.typ=top_reg) and
  129. (taicpu(hp).oper[0]^.reg=reg) and
  130. (taicpu(hp).spilling_get_operation_type(0)<>operand_read);
  131. end;
  132. function TRVCpuAsmOptimizer.RegModifiedByInstruction(Reg: TRegister; p1: tai): boolean;
  133. var
  134. i : Longint;
  135. begin
  136. result:=false;
  137. for i:=0 to taicpu(p1).ops-1 do
  138. case taicpu(p1).oper[i]^.typ of
  139. top_reg:
  140. if (taicpu(p1).oper[i]^.reg=Reg) and (taicpu(p1).spilling_get_operation_type(i) in [operand_write,operand_readwrite]) then
  141. exit(true);
  142. else
  143. ;
  144. end;
  145. end;
  146. function TRVCpuAsmOptimizer.GetNextInstructionUsingReg(Current: tai; out Next: tai; reg: TRegister): Boolean;
  147. begin
  148. Next:=Current;
  149. repeat
  150. Result:=GetNextInstruction(Next,Next);
  151. until not (Result) or
  152. not(cs_opt_level3 in current_settings.optimizerswitches) or
  153. (Next.typ<>ait_instruction) or
  154. RegInInstruction(reg,Next) or
  155. is_calljmp(taicpu(Next).opcode);
  156. end;
  157. function TRVCpuAsmOptimizer.OptPass1OP(var p : tai) : boolean;
  158. var
  159. hp1 : tai;
  160. begin
  161. result:=false;
  162. { replace
  163. <Op> %reg3,%reg2,%reg1
  164. addi %reg4,%reg3,0
  165. dealloc %reg3
  166. by
  167. <Op> %reg4,%reg2,%reg1
  168. ?
  169. }
  170. if GetNextInstruction(p,hp1) and
  171. MatchInstruction(hp1,A_ADDI) and
  172. (taicpu(hp1).oper[2]^.val=0) and
  173. MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) then
  174. begin
  175. TransferUsedRegs(TmpUsedRegs);
  176. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  177. if not(RegUsedAfterInstruction(taicpu(hp1).oper[1]^.reg,hp1,TmpUsedRegs)) then
  178. begin
  179. taicpu(p).loadoper(0,taicpu(hp1).oper[0]^);
  180. DebugMsg('Peephole OpAddi02Op done',p);
  181. RemoveInstruction(hp1);
  182. result:=true;
  183. end;
  184. end;
  185. end;
  186. function TRVCpuAsmOptimizer.OptPass1FOP(var p: tai;mvop: tasmop) : boolean;
  187. var
  188. hp1 : tai;
  189. begin
  190. result:=false;
  191. { replace
  192. <FOp> %reg3,%reg2,%reg1
  193. <mvop> %reg4,%reg3,%reg3
  194. dealloc %reg3
  195. by
  196. <FOp> %reg4,%reg2,%reg1
  197. ?
  198. }
  199. if GetNextInstruction(p,hp1) and
  200. MatchInstruction(hp1,mvop) and
  201. MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) and
  202. MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[2]^) then
  203. begin
  204. TransferUsedRegs(TmpUsedRegs);
  205. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  206. if not(RegUsedAfterInstruction(taicpu(hp1).oper[1]^.reg,hp1,TmpUsedRegs)) then
  207. begin
  208. taicpu(p).loadoper(0,taicpu(hp1).oper[0]^);
  209. DebugMsg('Peephole FOpFsgnj02FOp done',p);
  210. RemoveInstruction(hp1);
  211. result:=true;
  212. end;
  213. end;
  214. end;
  215. function TRVCpuAsmOptimizer.OptPass1Fcmp(var p: tai) : boolean;
  216. var
  217. hp1 : tai;
  218. begin
  219. result:=false;
  220. { replace
  221. <Fcmp> %ireg3,%freg2,%freg1
  222. <andi> %ireg4,%ireg3,const
  223. dealloc %reg3
  224. by
  225. <Fcmp> %ireg4,%freg2,%freg1
  226. ?
  227. }
  228. if GetNextInstruction(p,hp1) and
  229. MatchInstruction(hp1,A_ANDI) and
  230. MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) and
  231. ((taicpu(hp1).oper[2]^.val and 1)=1) then
  232. begin
  233. TransferUsedRegs(TmpUsedRegs);
  234. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  235. if not(RegUsedAfterInstruction(taicpu(hp1).oper[1]^.reg,hp1,TmpUsedRegs)) then
  236. begin
  237. taicpu(p).loadoper(0,taicpu(hp1).oper[0]^);
  238. DebugMsg('Peephole FcmpAndi2Fcmp done',p);
  239. RemoveInstruction(hp1);
  240. result:=true;
  241. end;
  242. end;
  243. end;
  244. function TRVCpuAsmOptimizer.OptPass1FSGNJ(var p: tai; mvop: tasmop): boolean;
  245. var
  246. hp1 : tai;
  247. begin
  248. result:=false;
  249. { replace
  250. <mvop> %reg1,%reg2,%reg2
  251. <FOp> %reg3,%reg1,%reg1
  252. dealloc %reg2
  253. by
  254. <FOp> %reg3,%reg2,%reg2
  255. ?
  256. }
  257. if GetNextInstruction(p,hp1) and (hp1.typ=ait_instruction) and
  258. (((mvop=A_FSGNJ_S) and (taicpu(hp1).opcode in [A_FADD_S,A_FSUB_S,A_FMUL_S,A_FDIV_S,A_FSQRT_S,
  259. A_FNEG_S,A_FMADD_S,A_FMSUB_S,A_FNMSUB_S,A_FNMADD_S,A_FMIN_S,A_FMAX_S,A_FCVT_D_S,
  260. A_FEQ_S])) or
  261. ((mvop=A_FSGNJ_D) and (taicpu(hp1).opcode in [A_FADD_D,A_FSUB_D,A_FMUL_D,A_FDIV_D,A_FSQRT_D,
  262. A_FNEG_D,A_FMADD_D,A_FMSUB_D,A_FNMSUB_D,A_FNMADD_D,A_FMIN_D,A_FMAX_D,A_FCVT_S_D,
  263. A_FEQ_D]))) and
  264. (MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) or
  265. ((taicpu(hp1).ops>=3) and MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[2]^)) or
  266. ((taicpu(hp1).ops>=4) and MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[3]^))) and
  267. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  268. begin
  269. if MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) then
  270. taicpu(hp1).loadreg(1,taicpu(p).oper[1]^.reg);
  271. if (taicpu(hp1).ops>=3) and MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[2]^) then
  272. taicpu(hp1).loadreg(2,taicpu(p).oper[1]^.reg);
  273. if (taicpu(hp1).ops>=4) and MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[3]^) then
  274. taicpu(hp1).loadreg(3,taicpu(p).oper[1]^.reg);
  275. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  276. DebugMsg('Peephole FMVFOp2FOp performed', hp1);
  277. RemoveInstr(p);
  278. result:=true;
  279. end
  280. end;
  281. procedure TRVCpuAsmOptimizer.RemoveInstr(var orig: tai; moveback: boolean = true);
  282. var
  283. n: tai;
  284. begin
  285. if moveback and (not GetLastInstruction(orig,n)) then
  286. GetNextInstruction(orig,n);
  287. AsmL.Remove(orig);
  288. orig.Free;
  289. orig:=n;
  290. end;
  291. function TRVCpuAsmOptimizer.OptPass1Add(var p: tai): boolean;
  292. var
  293. hp1: tai;
  294. begin
  295. result:=false;
  296. {
  297. Get rid of
  298. addi x, x, 0
  299. }
  300. if (taicpu(p).ops=3) and
  301. (taicpu(p).oper[2]^.typ=top_const) and
  302. (taicpu(p).oper[2]^.val=0) and
  303. MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then
  304. begin
  305. DebugMsg('Peephole Addi2Nop performed', p);
  306. RemoveInstr(p);
  307. result:=true;
  308. end
  309. {
  310. Changes
  311. addi x, y, #
  312. addi/addiw z, x, #
  313. dealloc x
  314. To
  315. addi z, y, #+#
  316. dealloc x
  317. }
  318. else if (taicpu(p).ops=3) and
  319. (taicpu(p).oper[2]^.typ=top_const) and
  320. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  321. MatchInstruction(hp1,[A_ADDI{$ifdef riscv64},A_ADDIW{$endif}]) and
  322. (taicpu(hp1).ops=3) and
  323. MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) and
  324. (taicpu(hp1).oper[2]^.typ=top_const) and
  325. is_imm12(taicpu(p).oper[2]^.val+taicpu(hp1).oper[2]^.val) and
  326. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and
  327. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  328. begin
  329. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  330. taicpu(hp1).loadreg(1,taicpu(p).oper[1]^.reg);
  331. taicpu(hp1).loadconst(2, taicpu(p).oper[2]^.val+taicpu(hp1).oper[2]^.val);
  332. DebugMsg('Peephole AddiAddi2Addi performed', hp1);
  333. RemoveInstr(p);
  334. result:=true;
  335. end
  336. {
  337. Changes
  338. addi x, z, (ref)
  339. ld/sd y, 0(x)
  340. dealloc x
  341. To
  342. ld/sd y, 0(ref)(x)
  343. }
  344. else if (taicpu(p).ops=3) and
  345. (taicpu(p).oper[2]^.typ=top_ref) and
  346. MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) and
  347. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  348. MatchInstruction(hp1, [A_LB,A_LBU,A_LH,A_LHU,A_LW,
  349. A_SB,A_SH,A_SW{$ifdef riscv64},A_LD,A_LWU,A_SD{$endif}]) and
  350. (taicpu(hp1).ops=2) and
  351. (taicpu(hp1).oper[1]^.typ=top_ref) and
  352. (taicpu(hp1).oper[1]^.ref^.base=taicpu(p).oper[0]^.reg) and
  353. (taicpu(hp1).oper[1]^.ref^.offset=0) and
  354. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and
  355. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  356. begin
  357. taicpu(hp1).loadref(1,taicpu(p).oper[2]^.ref^);
  358. taicpu(hp1).oper[1]^.ref^.base:=taicpu(p).oper[1]^.reg;
  359. DebugMsg('Peephole AddiMem2Mem performed', hp1);
  360. RemoveInstr(p);
  361. result:=true;
  362. end
  363. {
  364. Changes
  365. addi x, z, #w
  366. ld/sd y, 0(x)
  367. dealloc x
  368. To
  369. ld/sd y, #w(z)
  370. }
  371. else if (taicpu(p).ops=3) and
  372. (taicpu(p).oper[2]^.typ=top_const) and
  373. //MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) and
  374. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  375. MatchInstruction(hp1, [A_LB,A_LBU,A_LH,A_LHU,A_LW,
  376. A_SB,A_SH,A_SW{$ifdef riscv64},A_LWU,A_LD,A_SD{$endif}]) and
  377. (taicpu(hp1).ops=2) and
  378. (taicpu(hp1).oper[1]^.typ=top_ref) and
  379. (taicpu(hp1).oper[1]^.ref^.base=taicpu(p).oper[0]^.reg) and
  380. (taicpu(hp1).oper[1]^.ref^.offset=0) and
  381. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and
  382. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  383. begin
  384. //taicpu(hp1).loadconst(1,taicpu(p).oper[2]^.ref^);
  385. taicpu(hp1).oper[1]^.ref^.offset:=taicpu(p).oper[2]^.val;
  386. taicpu(hp1).oper[1]^.ref^.base:=taicpu(p).oper[1]^.reg;
  387. DebugMsg('Peephole AddiMem2Mem performed', hp1);
  388. RemoveInstr(p);
  389. result:=true;
  390. end
  391. {
  392. Changes
  393. addi w, z, 0
  394. op x, y, w
  395. dealloc w
  396. To
  397. op x, y, z
  398. }
  399. else if (taicpu(p).ops=3) and
  400. (taicpu(p).oper[2]^.typ=top_const) and
  401. (taicpu(p).oper[2]^.val=0) and
  402. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  403. ((MatchInstruction(hp1, [A_SUB,A_ADD,A_SLL,A_SRL,A_AND,A_OR,
  404. A_ADDI,A_ANDI,A_ORI,A_SRAI,A_SRLI,A_SLLI,A_XORI,A_MUL,
  405. A_DIV,A_DIVU,A_REM,A_REMU,A_SLT,A_SLTU,A_SLTI,A_SLTIU
  406. {$ifdef riscv64},A_ADDIW,A_SLLIW,A_SRLIW,A_SRAIW,
  407. A_ADDW,A_SLLW,A_SRLW,A_SUBW,A_SRAW,
  408. A_DIVUW,A_DIVW,A_REMW,A_REMUW{$endif}]
  409. ) and
  410. (taicpu(hp1).ops=3) and
  411. (MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[2]^) or MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^))) {or
  412. This is not possible yet as the deallocation after the jump could also mean that the register is in use at the
  413. jump target.
  414. (MatchInstruction(hp1, [A_Bxx]) and
  415. (taicpu(hp1).ops=3) and
  416. (MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[0]^) or MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^))) }
  417. ) and
  418. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and
  419. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  420. begin
  421. { if MatchInstruction(hp1, [A_Bxx]) and MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[0]^) then
  422. taicpu(hp1).loadreg(0,taicpu(p).oper[1]^.reg); }
  423. if MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) then
  424. taicpu(hp1).loadreg(1,taicpu(p).oper[1]^.reg);
  425. if MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[2]^) then
  426. taicpu(hp1).loadreg(2,taicpu(p).oper[1]^.reg);
  427. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  428. DebugMsg('Peephole Addi0Op2Op performed', hp1);
  429. RemoveInstr(p);
  430. result:=true;
  431. end
  432. else
  433. result:=OptPass1OP(p);
  434. end;
  435. function TRVCpuAsmOptimizer.OptPass1Sub(var p: tai): boolean;
  436. var
  437. hp1: tai;
  438. begin
  439. result:=false;
  440. {
  441. Turn
  442. sub x,y,z
  443. bgeu X0,x,...
  444. dealloc x
  445. Into
  446. bne y,x,...
  447. }
  448. if (taicpu(p).ops=3) and
  449. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  450. MatchInstruction(hp1,A_Bxx,[C_GEU,C_EQ]) and
  451. (taicpu(hp1).ops=3) and
  452. MatchOperand(taicpu(hp1).oper[0]^,NR_X0) and
  453. MatchOperand(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) and
  454. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and
  455. (not RegModifiedBetween(taicpu(p).oper[2]^.reg, p,hp1)) and
  456. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  457. begin
  458. taicpu(hp1).loadreg(0,taicpu(p).oper[1]^.reg);
  459. taicpu(hp1).loadreg(1,taicpu(p).oper[2]^.reg);
  460. taicpu(hp1).condition:=C_EQ;
  461. DebugMsg('Peephole SubBxx2Beq performed', hp1);
  462. RemoveInstr(p);
  463. result:=true;
  464. end
  465. else
  466. result:=OptPass1OP(p);
  467. end;
  468. function TRVCpuAsmOptimizer.OptPass1SLTx(var p: tai): boolean;
  469. var
  470. hp1: tai;
  471. begin
  472. result:=false;
  473. {
  474. Turn
  475. sltu x,X0,y
  476. beq/bne x, X0, ...
  477. dealloc x
  478. Into
  479. bltu/geu X0, y, ...
  480. }
  481. if (taicpu(p).ops=3) and
  482. MatchOperand(taicpu(p).oper[1]^,NR_X0) and
  483. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  484. MatchInstruction(hp1,A_Bxx,[C_NE,C_EQ]) and
  485. (taicpu(hp1).ops=3) and
  486. MatchOperand(taicpu(hp1).oper[0]^,taicpu(p).oper[0]^) and
  487. MatchOperand(taicpu(hp1).oper[1]^,NR_X0) and
  488. (not RegModifiedBetween(taicpu(p).oper[2]^.reg, p,hp1)) and
  489. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  490. begin
  491. taicpu(hp1).loadreg(0,NR_X0);
  492. taicpu(hp1).loadreg(1,taicpu(p).oper[2]^.reg);
  493. if taicpu(p).opcode=A_SLTU then
  494. begin
  495. if taicpu(hp1).condition=C_NE then
  496. taicpu(hp1).condition:=C_LTU
  497. else
  498. taicpu(hp1).condition:=C_GEU;
  499. end
  500. else
  501. begin
  502. if taicpu(hp1).condition=C_NE then
  503. taicpu(hp1).condition:=C_LT
  504. else
  505. taicpu(hp1).condition:=C_GE;
  506. end;
  507. DebugMsg('Peephole SltuB2B 1 performed', hp1);
  508. RemoveInstr(p);
  509. result:=true;
  510. end
  511. {
  512. Turn
  513. sltu x,y,z
  514. beq/bne x, X0, ...
  515. dealloc x
  516. Into
  517. bltu/geu y, z, ...
  518. }
  519. else if (taicpu(p).ops=3) and
  520. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  521. MatchInstruction(hp1,A_Bxx,[C_NE,C_EQ]) and
  522. (taicpu(hp1).ops=3) and
  523. MatchOperand(taicpu(hp1).oper[0]^,taicpu(p).oper[0]^) and
  524. MatchOperand(taicpu(hp1).oper[1]^,NR_X0) and
  525. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and
  526. (not RegModifiedBetween(taicpu(p).oper[2]^.reg, p,hp1)) and
  527. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  528. begin
  529. taicpu(hp1).loadreg(0,taicpu(p).oper[1]^.reg);
  530. taicpu(hp1).loadreg(1,taicpu(p).oper[2]^.reg);
  531. if taicpu(p).opcode=A_SLTU then
  532. begin
  533. if taicpu(hp1).condition=C_NE then
  534. taicpu(hp1).condition:=C_LTU
  535. else
  536. taicpu(hp1).condition:=C_GEU;
  537. end
  538. else
  539. begin
  540. if taicpu(hp1).condition=C_NE then
  541. taicpu(hp1).condition:=C_LT
  542. else
  543. taicpu(hp1).condition:=C_GE;
  544. end;
  545. DebugMsg('Peephole SltuB2B 2 performed', hp1);
  546. RemoveInstr(p);
  547. result:=true;
  548. end
  549. else
  550. result:=OptPass1OP(p);
  551. end;
  552. function TRVCpuAsmOptimizer.OptPass1SLTI(var p: tai): boolean;
  553. var
  554. hp1: tai;
  555. begin
  556. result:=false;
  557. {
  558. Turn
  559. slti x,y,0
  560. beq/ne x,x0,...
  561. dealloc x
  562. Into
  563. bge/lt y,x0,...
  564. }
  565. if (taicpu(p).ops=3) and
  566. (taicpu(p).oper[2]^.typ=top_const) and
  567. (taicpu(p).oper[2]^.val=0) and
  568. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) then
  569. begin
  570. {
  571. we cannot do this optimization yet as we don't know if taicpu(p).oper[0]^.reg isn't used after taking the branch
  572. if MatchInstruction(hp1,A_Bxx) and
  573. (taicpu(hp1).ops=3) and
  574. (taicpu(hp1).oper[0]^.typ=top_reg) and
  575. (taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg) and
  576. (taicpu(hp1).oper[1]^.typ=top_reg) and
  577. (taicpu(hp1).oper[1]^.reg=NR_X0) and
  578. (taicpu(hp1).condition in [C_NE,C_EQ]) and
  579. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and
  580. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  581. begin
  582. taicpu(hp1).loadreg(0,taicpu(p).oper[1]^.reg);
  583. taicpu(hp1).loadreg(1,NR_X0);
  584. if taicpu(hp1).condition=C_NE then
  585. taicpu(hp1).condition:=C_LT
  586. else
  587. taicpu(hp1).condition:=C_GE;
  588. DebugMsg('Peephole Slti0B2B performed', hp1);
  589. RemoveInstr(p);
  590. result:=true;
  591. exit;
  592. end
  593. else } if MatchInstruction(hp1,A_ANDI) and
  594. (taicpu(hp1).ops=3) and
  595. (taicpu(hp1).oper[2]^.val>0) and
  596. MatchOperand(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) and
  597. (not RegModifiedBetween(taicpu(hp1).oper[0]^.reg, p,hp1)) then
  598. begin
  599. DebugMsg('Peephole SltiAndi2Slti performed', hp1);
  600. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  601. taicpu(p).loadreg(0,taicpu(hp1).oper[0]^.reg);
  602. RemoveInstr(hp1);
  603. result:=true;
  604. exit;
  605. end;
  606. end;
  607. { in all other branches we exit before }
  608. result:=OptPass1OP(p);
  609. end;
  610. function TRVCpuAsmOptimizer.OptPass1Andi(var p: tai): boolean;
  611. var
  612. hp1: tai;
  613. begin
  614. result:=false;
  615. if (taicpu(p).ops=3) and
  616. (taicpu(p).oper[2]^.typ=top_const) and
  617. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) then
  618. begin
  619. {
  620. Changes
  621. andi x, y, #
  622. andi z, x, #
  623. dealloc x
  624. To
  625. andi z, y, # and #
  626. }
  627. if MatchInstruction(hp1,A_ANDI) and
  628. (taicpu(hp1).ops=3) and
  629. MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) and
  630. (taicpu(hp1).oper[2]^.typ=top_const) and
  631. is_imm12(taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val) and
  632. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and
  633. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  634. begin
  635. taicpu(hp1).loadreg(1,taicpu(p).oper[1]^.reg);
  636. taicpu(hp1).loadconst(2, taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val);
  637. DebugMsg('Peephole AndiAndi2Andi performed', hp1);
  638. RemoveInstr(p);
  639. result:=true;
  640. end
  641. {
  642. Changes
  643. andi x, y, #ff or ...
  644. sb x, ...
  645. dealloc x
  646. To
  647. sb x, ...
  648. }
  649. else if MatchInstruction(hp1,A_SB) and
  650. (taicpu(hp1).ops=2) and
  651. MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[0]^) and
  652. (taicpu(p).oper[2]^.val and $ff=$ff) and
  653. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and
  654. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  655. begin
  656. taicpu(hp1).loadreg(0,taicpu(p).oper[1]^.reg);
  657. DebugMsg('Peephole AndiSb2Sb performed', hp1);
  658. RemoveInstr(p);
  659. result:=true;
  660. end
  661. {$ifndef RISCV32}
  662. else if MatchInstruction(hp1,A_ADDIW) and
  663. (taicpu(hp1).ops=3) and
  664. MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) and
  665. (taicpu(hp1).oper[2]^.typ=top_const) and
  666. (taicpu(hp1).oper[2]^.val=0) and
  667. is_imm12(taicpu(p).oper[2]^.val) and
  668. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and
  669. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  670. begin
  671. taicpu(p).loadreg(0,taicpu(hp1).oper[0]^.reg);
  672. DebugMsg('Peephole AndiAddwi02Andi performed', hp1);
  673. RemoveInstr(hp1);
  674. result:=true;
  675. end
  676. {$endif RISCV32}
  677. else
  678. result:=OptPass1OP(p);
  679. end
  680. else
  681. result:=OptPass1OP(p);
  682. end;
  683. function TRVCpuAsmOptimizer.OptPass1SLTIU(var p: tai): boolean;
  684. var
  685. hp1: tai;
  686. begin
  687. result:=false;
  688. {
  689. Turn
  690. sltiu x,y,1
  691. beq/ne x,x0,...
  692. dealloc x
  693. Into
  694. bne y,x0,...
  695. }
  696. if (taicpu(p).ops=3) and
  697. (taicpu(p).oper[2]^.typ=top_const) and
  698. (taicpu(p).oper[2]^.val=1) and
  699. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) then
  700. begin
  701. {
  702. we cannot do this optimization yet as we don't know if taicpu(p).oper[0]^.reg isn't used after taking the branch
  703. if MatchInstruction(hp1,A_Bxx,[C_NE,C_EQ]) and
  704. (taicpu(hp1).ops=3) and
  705. MatchOperand(taicpu(hp1).oper[0]^,taicpu(p).oper[0]^) and
  706. MatchOperand(taicpu(hp1).oper[1]^,NR_X0) and
  707. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and
  708. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  709. begin
  710. taicpu(hp1).loadreg(0,taicpu(p).oper[1]^.reg);
  711. taicpu(hp1).condition:=inverse_cond(taicpu(hp1).condition);
  712. DebugMsg('Peephole Sltiu0B2B performed', hp1);
  713. RemoveInstr(p);
  714. result:=true;
  715. exit;
  716. end
  717. else } if MatchInstruction(hp1,A_ANDI) and
  718. (taicpu(hp1).ops=3) and
  719. (taicpu(hp1).oper[2]^.val>0) and
  720. MatchOperand(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) and
  721. (not RegModifiedBetween(taicpu(hp1).oper[0]^.reg, p,hp1)) then
  722. begin
  723. DebugMsg('Peephole SltiuAndi2Sltiu performed', hp1);
  724. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  725. taicpu(p).loadreg(0,taicpu(hp1).oper[0]^.reg);
  726. RemoveInstr(hp1);
  727. result:=true;
  728. exit;
  729. end;
  730. end;
  731. { in all other branches we exit before }
  732. result:=OptPass1OP(p);
  733. end;
  734. function TRVCpuAsmOptimizer.OptPass1SxxI(var p: tai): boolean;
  735. begin
  736. result:=false;
  737. if (taicpu(p).oper[2]^.val=0) and
  738. MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then
  739. begin
  740. DebugMsg('Peephole S*LI x,x,0 to nop performed', p);
  741. RemoveInstr(p);
  742. result:=true;
  743. end
  744. else if (taicpu(p).oper[2]^.val=0) then
  745. begin
  746. { this enables further optimizations }
  747. DebugMsg('Peephole S*LI x,y,0 to addi performed', p);
  748. taicpu(p).opcode:=A_ADDI;
  749. result:=true;
  750. end
  751. else
  752. result:=OptPass1OP(p);
  753. end;
  754. function TRVCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
  755. var
  756. hp1: tai;
  757. begin
  758. result:=false;
  759. case p.typ of
  760. ait_instruction:
  761. begin
  762. case taicpu(p).opcode of
  763. A_ADDI:
  764. result:=OptPass1Add(p);
  765. A_SUB:
  766. result:=OptPass1Sub(p);
  767. A_ANDI:
  768. result:=OptPass1Andi(p);
  769. A_SLT,
  770. A_SLTU:
  771. result:=OptPass1SLTx(p);
  772. A_SLTIU:
  773. result:=OptPass1SLTIU(p);
  774. A_LA,
  775. A_LUI,
  776. A_LB,
  777. A_LBU,
  778. A_LH,
  779. A_LHU,
  780. A_LW,
  781. {$ifdef riscv64}
  782. A_LWU,
  783. A_LD,
  784. {$endif riscv64}
  785. A_ADD,
  786. {$ifdef riscv64}
  787. A_ADDIW,
  788. A_SUBW,
  789. {$endif riscv64}
  790. A_DIV,
  791. A_DIVU,
  792. {$ifdef riscv64}
  793. A_DIVW,
  794. A_DIVUW,
  795. {$endif riscv64}
  796. A_REM,
  797. A_REMU,
  798. {$ifdef riscv64}
  799. A_REMW,
  800. A_REMUW,
  801. A_MULW,
  802. {$endif riscv64}
  803. A_MUL,
  804. A_MULH,
  805. A_MULHSU,
  806. A_MULHU,
  807. A_ORI,
  808. A_XORI,
  809. A_AND,
  810. A_OR,
  811. A_XOR,
  812. {$ifdef riscv64}
  813. A_SLLW,
  814. A_SRLW,
  815. A_SRAW,
  816. A_ROLW,
  817. A_RORW,
  818. A_RORIW,
  819. {$endif riscv64}
  820. A_SLL,
  821. A_SRL,
  822. A_SRA,
  823. A_ROL,
  824. A_ROR,
  825. A_RORI,
  826. A_NEG,
  827. A_NOT:
  828. result:=OptPass1OP(p);
  829. {$ifdef riscv64}
  830. A_SRAIW,
  831. A_SRLIW,
  832. A_SLLIW,
  833. {$endif riscv64}
  834. A_SRAI,
  835. A_SRLI,
  836. A_SLLI:
  837. result:=OptPass1SxxI(p);
  838. A_SLTI:
  839. result:=OptPass1SLTI(p);
  840. A_FADD_S,
  841. A_FSUB_S,
  842. A_FMUL_S,
  843. A_FDIV_S,
  844. A_FSQRT_S,
  845. A_FNEG_S,
  846. A_FLW,
  847. A_FCVT_D_S,
  848. A_FMADD_S,A_FMSUB_S,A_FNMSUB_S,A_FNMADD_S,
  849. A_FMIN_S,A_FMAX_S:
  850. result:=OptPass1FOP(p,A_FSGNJ_S);
  851. A_FADD_D,
  852. A_FSUB_D,
  853. A_FMUL_D,
  854. A_FDIV_D,
  855. A_FSQRT_D,
  856. A_FNEG_D,
  857. A_FLD,
  858. A_FCVT_S_D,
  859. A_FMADD_D,A_FMSUB_D,A_FNMSUB_D,A_FNMADD_D,
  860. A_FMIN_D,A_FMAX_D:
  861. result:=OptPass1FOP(p,A_FSGNJ_D);
  862. A_FEQ_S,
  863. A_FLT_S,
  864. A_FLE_S,
  865. A_FEQ_D,
  866. A_FLT_D,
  867. A_FLE_D:
  868. result:=OptPass1Fcmp(p);
  869. A_FSGNJ_S,
  870. A_FSGNJ_D:
  871. result:=OptPass1FSGNJ(p,taicpu(p).opcode);
  872. else
  873. ;
  874. end;
  875. end;
  876. else
  877. ;
  878. end;
  879. end;
  880. end.