aoptcpurv.pas 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530
  1. {
  2. Copyright (c) 1998-2002 by Jonas Maebe, member of the Free Pascal
  3. Development Team
  4. This unit implements the common RiscV optimizer object
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. unit aoptcpurv;
  19. interface
  20. {$I fpcdefs.inc}
  21. {$define DEBUG_AOPTCPU}
  22. uses
  23. cpubase,
  24. globals, globtype,
  25. cgbase,
  26. aoptobj, aoptcpub, aopt,
  27. aasmtai, aasmcpu;
  28. type
  29. TRVCpuAsmOptimizer = class(TAsmOptimizer)
  30. function InstructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean; override;
  31. function RegLoadedWithNewValue(reg: tregister; hp: tai): boolean; override;
  32. function RegModifiedByInstruction(Reg: TRegister; p1: tai): boolean; override;
  33. Function GetNextInstructionUsingReg(Current: tai; Out Next: tai; reg: TRegister): Boolean;
  34. { outputs a debug message into the assembler file }
  35. procedure DebugMsg(const s: string; p: tai);
  36. function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
  37. function OptPass1OP(var p: tai): boolean;
  38. end;
  39. implementation
  40. uses
  41. cutils;
  42. function MatchInstruction(const instr: tai; const op: TAsmOps; const AConditions: TAsmConds = []): boolean;
  43. begin
  44. result :=
  45. (instr.typ = ait_instruction) and
  46. (taicpu(instr).opcode in op) and
  47. ((AConditions=[]) or (taicpu(instr).condition in AConditions));
  48. end;
  49. function MatchInstruction(const instr: tai; const op: TAsmOp; const AConditions: TAsmConds = []): boolean;
  50. begin
  51. result :=
  52. (instr.typ = ait_instruction) and
  53. (taicpu(instr).opcode = op) and
  54. ((AConditions=[]) or (taicpu(instr).condition in AConditions));
  55. end;
  56. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
  57. begin
  58. result := oper1.typ = oper2.typ;
  59. if result then
  60. case oper1.typ of
  61. top_const:
  62. Result:=oper1.val = oper2.val;
  63. top_reg:
  64. Result:=oper1.reg = oper2.reg;
  65. {top_ref:
  66. Result:=RefsEqual(oper1.ref^, oper2.ref^);}
  67. else Result:=false;
  68. end
  69. end;
  70. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  71. begin
  72. result := (oper.typ = top_reg) and (oper.reg = reg);
  73. end;
  74. {$ifdef DEBUG_AOPTCPU}
  75. procedure TRVCpuAsmOptimizer.DebugMsg(const s: string;p : tai);
  76. begin
  77. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  78. end;
  79. {$else DEBUG_AOPTCPU}
  80. procedure TRVCpuAsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  81. begin
  82. end;
  83. {$endif DEBUG_AOPTCPU}
  84. function TRVCpuAsmOptimizer.InstructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean;
  85. var
  86. p: taicpu;
  87. i: longint;
  88. begin
  89. result:=false;
  90. if not (assigned(hp) and (hp.typ=ait_instruction)) then
  91. exit;
  92. p:=taicpu(hp);
  93. i:=0;
  94. while(i<p.ops) do
  95. begin
  96. case p.oper[I]^.typ of
  97. top_reg:
  98. result:=(p.oper[I]^.reg=reg) and (p.spilling_get_operation_type(i)<>operand_write);
  99. top_ref:
  100. result:=
  101. (p.oper[I]^.ref^.base=reg);
  102. else
  103. ;
  104. end;
  105. if result then exit; {Bailout if we found something}
  106. Inc(I);
  107. end;
  108. end;
  109. function TRVCpuAsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  110. begin
  111. result:=
  112. (hp.typ=ait_instruction) and
  113. (taicpu(hp).ops>1) and
  114. (taicpu(hp).oper[0]^.typ=top_reg) and
  115. (taicpu(hp).oper[0]^.reg=reg) and
  116. (taicpu(hp).spilling_get_operation_type(0)<>operand_read);
  117. end;
  118. function TRVCpuAsmOptimizer.RegModifiedByInstruction(Reg: TRegister; p1: tai): boolean;
  119. var
  120. i : Longint;
  121. begin
  122. result:=false;
  123. for i:=0 to taicpu(p1).ops-1 do
  124. case taicpu(p1).oper[i]^.typ of
  125. top_reg:
  126. if (taicpu(p1).oper[i]^.reg=Reg) and (taicpu(p1).spilling_get_operation_type(i) in [operand_write,operand_readwrite]) then
  127. exit(true);
  128. else
  129. ;
  130. end;
  131. end;
  132. function TRVCpuAsmOptimizer.GetNextInstructionUsingReg(Current: tai; out Next: tai; reg: TRegister): Boolean;
  133. begin
  134. Next:=Current;
  135. repeat
  136. Result:=GetNextInstruction(Next,Next);
  137. until not (Result) or
  138. not(cs_opt_level3 in current_settings.optimizerswitches) or
  139. (Next.typ<>ait_instruction) or
  140. RegInInstruction(reg,Next) or
  141. is_calljmp(taicpu(Next).opcode);
  142. end;
  143. function TRVCpuAsmOptimizer.OptPass1OP(var p : tai) : boolean;
  144. var
  145. hp1 : tai;
  146. begin
  147. result:=false;
  148. { replace
  149. <Op> %reg3,%mreg2,%mreg1
  150. addi %reg4,%reg3,0
  151. dealloc %reg3
  152. by
  153. <Op> %reg4,%reg2,%reg1
  154. ?
  155. }
  156. if GetNextInstruction(p,hp1) and
  157. { we mix single and double operations here because we assume that the compiler
  158. generates vmovapd only after double operations and vmovaps only after single operations }
  159. MatchInstruction(hp1,A_ADDI) and
  160. (taicpu(hp1).oper[2]^.val=0) and
  161. MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) then
  162. begin
  163. TransferUsedRegs(TmpUsedRegs);
  164. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  165. if not(RegUsedAfterInstruction(taicpu(hp1).oper[1]^.reg,hp1,TmpUsedRegs)) then
  166. begin
  167. taicpu(p).loadoper(0,taicpu(hp1).oper[0]^);
  168. DebugMsg('Peephole OpAddi02Op done',p);
  169. RemoveInstruction(hp1);
  170. result:=true;
  171. end;
  172. end;
  173. end;
  174. function TRVCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
  175. procedure RemoveInstr(var orig: tai; moveback: boolean = true);
  176. var
  177. n: tai;
  178. begin
  179. if moveback and (not GetLastInstruction(orig,n)) then
  180. GetNextInstruction(orig,n);
  181. AsmL.Remove(orig);
  182. orig.Free;
  183. orig:=n;
  184. end;
  185. var
  186. hp1: tai;
  187. begin
  188. result:=false;
  189. case p.typ of
  190. ait_instruction:
  191. begin
  192. case taicpu(p).opcode of
  193. A_ADDI:
  194. begin
  195. {
  196. Changes
  197. addi x, y, #
  198. addi/addiw z, x, #
  199. dealloc x
  200. To
  201. addi z, y, #+#
  202. }
  203. if (taicpu(p).ops=3) and
  204. (taicpu(p).oper[2]^.typ=top_const) and
  205. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  206. MatchInstruction(hp1,[A_ADDI{$ifdef riscv64},A_ADDIW{$endif}]) and
  207. (taicpu(hp1).ops=3) and
  208. MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) and
  209. (taicpu(hp1).oper[2]^.typ=top_const) and
  210. is_imm12(taicpu(p).oper[2]^.val+taicpu(hp1).oper[2]^.val) and
  211. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and
  212. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  213. begin
  214. taicpu(hp1).loadreg(1,taicpu(p).oper[1]^.reg);
  215. taicpu(hp1).loadconst(2, taicpu(p).oper[2]^.val+taicpu(hp1).oper[2]^.val);
  216. DebugMsg('Peephole AddiAddi2Addi performed', hp1);
  217. RemoveInstr(p);
  218. result:=true;
  219. end
  220. {
  221. Changes
  222. addi x, z, (ref)
  223. ld/sd y, 0(x)
  224. dealloc x
  225. To
  226. ld/sd y, 0(ref)(x)
  227. }
  228. else if (taicpu(p).ops=3) and
  229. (taicpu(p).oper[2]^.typ=top_ref) and
  230. MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) and
  231. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  232. MatchInstruction(hp1, [A_LB,A_LBU,A_LH,A_LHU,A_LW,
  233. A_SB,A_SH,A_SW{$ifdef riscv64},A_LD,A_LWU,A_SD{$endif}]) and
  234. (taicpu(hp1).ops=2) and
  235. (taicpu(hp1).oper[1]^.typ=top_ref) and
  236. (taicpu(hp1).oper[1]^.ref^.base=taicpu(p).oper[0]^.reg) and
  237. (taicpu(hp1).oper[1]^.ref^.offset=0) and
  238. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and
  239. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  240. begin
  241. taicpu(hp1).loadref(1,taicpu(p).oper[2]^.ref^);
  242. taicpu(hp1).oper[1]^.ref^.base:=taicpu(p).oper[1]^.reg;
  243. DebugMsg('Peephole AddiMem2Mem performed', hp1);
  244. RemoveInstr(p);
  245. result:=true;
  246. end
  247. {
  248. Changes
  249. addi x, z, #w
  250. ld/sd y, 0(x)
  251. dealloc x
  252. To
  253. ld/sd y, #w(z)
  254. }
  255. else if (taicpu(p).ops=3) and
  256. (taicpu(p).oper[2]^.typ=top_const) and
  257. //MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) and
  258. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  259. MatchInstruction(hp1, [A_LB,A_LBU,A_LH,A_LHU,A_LW,
  260. A_SB,A_SH,A_SW{$ifdef riscv64},A_LWU,A_LD,A_SD{$endif}]) and
  261. (taicpu(hp1).ops=2) and
  262. (taicpu(hp1).oper[1]^.typ=top_ref) and
  263. (taicpu(hp1).oper[1]^.ref^.base=taicpu(p).oper[0]^.reg) and
  264. (taicpu(hp1).oper[1]^.ref^.offset=0) and
  265. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and
  266. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  267. begin
  268. //taicpu(hp1).loadconst(1,taicpu(p).oper[2]^.ref^);
  269. taicpu(hp1).oper[1]^.ref^.offset:=taicpu(p).oper[2]^.val;
  270. taicpu(hp1).oper[1]^.ref^.base:=taicpu(p).oper[1]^.reg;
  271. DebugMsg('Peephole AddiMem2Mem performed', hp1);
  272. RemoveInstr(p);
  273. result:=true;
  274. end;
  275. end;
  276. A_SUB:
  277. begin
  278. {
  279. Turn
  280. sub x,y,z
  281. bgeu X0,x,...
  282. dealloc x
  283. Into
  284. bne y,x,...
  285. }
  286. if (taicpu(p).ops=3) and
  287. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  288. MatchInstruction(hp1,A_Bxx,[C_GEU,C_EQ]) and
  289. (taicpu(hp1).ops=3) and
  290. MatchOperand(taicpu(hp1).oper[0]^,NR_X0) and
  291. MatchOperand(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) and
  292. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and
  293. (not RegModifiedBetween(taicpu(p).oper[2]^.reg, p,hp1)) and
  294. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  295. begin
  296. taicpu(hp1).loadreg(0,taicpu(p).oper[1]^.reg);
  297. taicpu(hp1).loadreg(1,taicpu(p).oper[2]^.reg);
  298. taicpu(hp1).condition:=C_EQ;
  299. DebugMsg('Peephole SubBxx2Beq performed', hp1);
  300. RemoveInstr(p);
  301. result:=true;
  302. end;
  303. end;
  304. A_SLT,
  305. A_SLTU:
  306. begin
  307. {
  308. Turn
  309. sltu x,X0,y
  310. beq/bne x, X0, ...
  311. dealloc x
  312. Into
  313. bltu/geu X0, y, ...
  314. }
  315. if (taicpu(p).ops=3) and
  316. MatchOperand(taicpu(p).oper[1]^,NR_X0) and
  317. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  318. MatchInstruction(hp1,A_Bxx,[C_NE,C_EQ]) and
  319. (taicpu(hp1).ops=3) and
  320. MatchOperand(taicpu(hp1).oper[0]^,taicpu(p).oper[0]^) and
  321. MatchOperand(taicpu(hp1).oper[1]^,NR_X0) and
  322. (not RegModifiedBetween(taicpu(p).oper[2]^.reg, p,hp1)) and
  323. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  324. begin
  325. taicpu(hp1).loadreg(0,NR_X0);
  326. taicpu(hp1).loadreg(1,taicpu(p).oper[2]^.reg);
  327. if taicpu(p).opcode=A_SLTU then
  328. begin
  329. if taicpu(hp1).condition=C_NE then
  330. taicpu(hp1).condition:=C_LTU
  331. else
  332. taicpu(hp1).condition:=C_GEU;
  333. end
  334. else
  335. begin
  336. if taicpu(hp1).condition=C_NE then
  337. taicpu(hp1).condition:=C_LT
  338. else
  339. taicpu(hp1).condition:=C_GE;
  340. end;
  341. DebugMsg('Peephole SltuB2B performed', hp1);
  342. RemoveInstr(p);
  343. result:=true;
  344. end
  345. {
  346. Turn
  347. sltu x,y,z
  348. beq/bne x, X0, ...
  349. dealloc x
  350. Into
  351. bltu/geu y, z, ...
  352. }
  353. else if (taicpu(p).ops=3) and
  354. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  355. MatchInstruction(hp1,A_Bxx,[C_NE,C_EQ]) and
  356. (taicpu(hp1).ops=3) and
  357. MatchOperand(taicpu(hp1).oper[0]^,taicpu(p).oper[0]^) and
  358. MatchOperand(taicpu(hp1).oper[1]^,NR_X0) and
  359. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and
  360. (not RegModifiedBetween(taicpu(p).oper[2]^.reg, p,hp1)) and
  361. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  362. begin
  363. taicpu(hp1).loadreg(0,taicpu(p).oper[1]^.reg);
  364. taicpu(hp1).loadreg(1,taicpu(p).oper[2]^.reg);
  365. if taicpu(p).opcode=A_SLTU then
  366. begin
  367. if taicpu(hp1).condition=C_NE then
  368. taicpu(hp1).condition:=C_LTU
  369. else
  370. taicpu(hp1).condition:=C_GEU;
  371. end
  372. else
  373. begin
  374. if taicpu(hp1).condition=C_NE then
  375. taicpu(hp1).condition:=C_LT
  376. else
  377. taicpu(hp1).condition:=C_GE;
  378. end;
  379. DebugMsg('Peephole SltuB2B performed', hp1);
  380. RemoveInstr(p);
  381. result:=true;
  382. end;
  383. end;
  384. A_SLTIU:
  385. begin
  386. {
  387. Turn
  388. sltiu x,y,1
  389. beq/ne x,x0,...
  390. dealloc x
  391. Into
  392. bne y,x0,...
  393. }
  394. if (taicpu(p).ops=3) and
  395. (taicpu(p).oper[2]^.typ=top_const) and
  396. (taicpu(p).oper[2]^.val=1) and
  397. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  398. MatchInstruction(hp1,A_Bxx,[C_NE,C_EQ]) and
  399. (taicpu(hp1).ops=3) and
  400. MatchOperand(taicpu(hp1).oper[0]^,taicpu(p).oper[0]^) and
  401. MatchOperand(taicpu(hp1).oper[1]^,NR_X0) and
  402. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and
  403. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  404. begin
  405. taicpu(hp1).loadreg(0,taicpu(p).oper[1]^.reg);
  406. taicpu(hp1).condition:=inverse_cond(taicpu(hp1).condition);
  407. DebugMsg('Peephole Sltiu0B2B performed', hp1);
  408. RemoveInstr(p);
  409. result:=true;
  410. end;
  411. end;
  412. A_SRLI,
  413. A_SLLI:
  414. result:=OptPass1OP(p);
  415. A_SLTI:
  416. begin
  417. {
  418. Turn
  419. slti x,y,0
  420. beq/ne x,x0,...
  421. dealloc x
  422. Into
  423. bge/lt y,x0,...
  424. }
  425. if (taicpu(p).ops=3) and
  426. (taicpu(p).oper[2]^.typ=top_const) and
  427. (taicpu(p).oper[2]^.val=0) and
  428. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  429. (hp1.typ=ait_instruction) and
  430. (taicpu(hp1).opcode=A_Bxx) and
  431. (taicpu(hp1).ops=3) and
  432. (taicpu(hp1).oper[0]^.typ=top_reg) and
  433. (taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg) and
  434. (taicpu(hp1).oper[1]^.typ=top_reg) and
  435. (taicpu(hp1).oper[1]^.reg=NR_X0) and
  436. (taicpu(hp1).condition in [C_NE,C_EQ]) and
  437. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and
  438. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  439. begin
  440. taicpu(hp1).loadreg(0,taicpu(p).oper[1]^.reg);
  441. taicpu(hp1).loadreg(1,NR_X0);
  442. if taicpu(hp1).condition=C_NE then
  443. taicpu(hp1).condition:=C_LT
  444. else
  445. taicpu(hp1).condition:=C_GE;
  446. DebugMsg('Peephole Slti0B2B performed', hp1);
  447. RemoveInstr(p);
  448. result:=true;
  449. end;
  450. end;
  451. else
  452. ;
  453. end;
  454. end;
  455. else
  456. ;
  457. end;
  458. end;
  459. end.