aoptcpu.pas 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382
  1. {
  2. Copyright (c) 1998-2002 by Jonas Maebe, member of the Free Pascal
  3. Development Team
  4. This unit implements the RiscV64 optimizer object
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. unit aoptcpu;
  19. interface
  20. {$I fpcdefs.inc}
  21. {$define DEBUG_AOPTCPU}
  22. uses
  23. cpubase,
  24. globals, globtype,
  25. cgbase,
  26. aoptobj, aoptcpub, aopt,
  27. aasmtai, aasmcpu;
  28. type
  29. TCpuAsmOptimizer = class(TAsmOptimizer)
  30. function InstructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean; override;
  31. function RegLoadedWithNewValue(reg: tregister; hp: tai): boolean; override;
  32. Function GetNextInstructionUsingReg(Current: tai; Out Next: tai; reg: TRegister): Boolean;
  33. { outputs a debug message into the assembler file }
  34. procedure DebugMsg(const s: string; p: tai);
  35. function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
  36. end;
  37. implementation
  38. uses
  39. cutils;
  40. {$ifdef DEBUG_AOPTCPU}
  41. procedure TCpuAsmOptimizer.DebugMsg(const s: string;p : tai);
  42. begin
  43. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  44. end;
  45. {$else DEBUG_AOPTCPU}
  46. procedure TCpuAsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  47. begin
  48. end;
  49. {$endif DEBUG_AOPTCPU}
  50. function TCpuAsmOptimizer.InstructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean;
  51. var
  52. p: taicpu;
  53. i: longint;
  54. begin
  55. result:=false;
  56. if not (assigned(hp) and (hp.typ=ait_instruction)) then
  57. exit;
  58. p:=taicpu(hp);
  59. i:=0;
  60. while(i<p.ops) do
  61. begin
  62. case p.oper[I]^.typ of
  63. top_reg:
  64. result:=(p.oper[I]^.reg=reg) and (p.spilling_get_operation_type(i)<>operand_write);
  65. top_ref:
  66. result:=
  67. (p.oper[I]^.ref^.base=reg);
  68. end;
  69. if result then exit; {Bailout if we found something}
  70. Inc(I);
  71. end;
  72. end;
  73. function TCpuAsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  74. begin
  75. result:=
  76. (hp.typ=ait_instruction) and
  77. (taicpu(hp).ops>1) and
  78. (taicpu(hp).oper[0]^.typ=top_reg) and
  79. (taicpu(hp).oper[0]^.reg=reg) and
  80. (taicpu(hp).spilling_get_operation_type(0)<>operand_read);
  81. end;
  82. function TCpuAsmOptimizer.GetNextInstructionUsingReg(Current: tai; out Next: tai; reg: TRegister): Boolean;
  83. begin
  84. Next:=Current;
  85. repeat
  86. Result:=GetNextInstruction(Next,Next);
  87. until not (Result) or
  88. not(cs_opt_level3 in current_settings.optimizerswitches) or
  89. (Next.typ<>ait_instruction) or
  90. RegInInstruction(reg,Next) or
  91. is_calljmp(taicpu(Next).opcode);
  92. end;
  93. function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
  94. var
  95. hp1: tai;
  96. begin
  97. result:=false;
  98. case p.typ of
  99. ait_instruction:
  100. begin
  101. case taicpu(p).opcode of
  102. A_ADDI:
  103. begin
  104. {
  105. Changes
  106. addi x, y, #
  107. addi z, x, #
  108. dealloc x
  109. To
  110. addi z, y, #+#
  111. }
  112. if (taicpu(p).ops=3) and
  113. (taicpu(p).oper[2]^.typ=top_const) and
  114. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  115. (hp1.typ=ait_instruction) and
  116. (taicpu(hp1).opcode=A_ADDI) and
  117. (taicpu(hp1).ops=3) and
  118. (taicpu(p).oper[0]^.reg=taicpu(hp1).oper[1]^.reg) and
  119. (taicpu(p).oper[2]^.typ=top_const) and
  120. is_imm12(taicpu(p).oper[2]^.val+taicpu(hp1).oper[2]^.val) and
  121. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and
  122. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  123. begin
  124. taicpu(hp1).loadreg(1,taicpu(p).oper[1]^.reg);
  125. taicpu(hp1).loadconst(2, taicpu(p).oper[2]^.val+taicpu(hp1).oper[2]^.val);
  126. DebugMsg('Peephole AddiAddi2Addi performed', hp1);
  127. GetNextInstruction(p,hp1);
  128. AsmL.Remove(p);
  129. p.Free;
  130. p:=hp1;
  131. result:=true;
  132. end
  133. {
  134. Changes
  135. addi x, x, (ref)
  136. ld/sd y, 0(x)
  137. dealloc x
  138. To
  139. ld/sd y, 0(ref)(x)
  140. }
  141. else if (taicpu(p).ops=3) and
  142. (taicpu(p).oper[2]^.typ=top_ref) and
  143. (taicpu(p).oper[0]^.reg=taicpu(p).oper[1]^.reg) and
  144. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  145. (hp1.typ=ait_instruction) and
  146. (taicpu(hp1).opcode in [A_LB,A_LBU,A_LH,A_LHU,A_LW,A_LWU,A_LD,
  147. A_SB,A_SH,A_SW,A_SD]) and
  148. (taicpu(hp1).ops=2) and
  149. (taicpu(hp1).oper[1]^.typ=top_ref) and
  150. (taicpu(hp1).oper[1]^.ref^.base=taicpu(p).oper[0]^.reg) and
  151. (taicpu(hp1).oper[1]^.ref^.offset=0) and
  152. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and
  153. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  154. begin
  155. taicpu(hp1).loadref(1,taicpu(p).oper[2]^.ref^);
  156. taicpu(hp1).oper[1]^.ref^.base:=taicpu(p).oper[0]^.reg;
  157. DebugMsg('Peephole AddiMem2Mem performed', hp1);
  158. GetNextInstruction(p,hp1);
  159. AsmL.Remove(p);
  160. p.Free;
  161. p:=hp1;
  162. result:=true;
  163. end;
  164. end;
  165. A_SUB:
  166. begin
  167. {
  168. Turn
  169. sub x,y,z
  170. bgeu X0,x,...
  171. dealloc x
  172. Into
  173. bne y,x,...
  174. }
  175. if (taicpu(p).ops=3) and
  176. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  177. (hp1.typ=ait_instruction) and
  178. (taicpu(hp1).opcode=A_Bxx) and
  179. (taicpu(hp1).ops=3) and
  180. (taicpu(hp1).oper[0]^.typ=top_reg) and
  181. (taicpu(hp1).oper[0]^.reg=NR_X0) and
  182. (taicpu(hp1).oper[1]^.typ=top_reg) and
  183. (taicpu(hp1).oper[1]^.reg=taicpu(p).oper[0]^.reg) and
  184. (taicpu(hp1).condition=C_GEU) and
  185. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and
  186. (not RegModifiedBetween(taicpu(p).oper[2]^.reg, p,hp1)) and
  187. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  188. begin
  189. taicpu(hp1).loadreg(0,taicpu(p).oper[1]^.reg);
  190. taicpu(hp1).loadreg(1,taicpu(p).oper[2]^.reg);
  191. taicpu(hp1).condition:=C_EQ;
  192. DebugMsg('Peephole SubBgeu2Bne performed', hp1);
  193. GetNextInstruction(p,hp1);
  194. asml.remove(p);
  195. p.Free;
  196. p:=hp1;
  197. result:=true;
  198. end;
  199. end;
  200. A_SLTU:
  201. begin
  202. {
  203. Turn
  204. sltu x,X0,y
  205. beq/bne x, X0, ...
  206. dealloc x
  207. Into
  208. bltu/geu X0, y, ...
  209. }
  210. if (taicpu(p).ops=3) and
  211. (taicpu(p).oper[1]^.typ=top_reg) and
  212. (taicpu(p).oper[1]^.reg=NR_X0) and
  213. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  214. (hp1.typ=ait_instruction) and
  215. (taicpu(hp1).opcode=A_Bxx) and
  216. (taicpu(hp1).ops=3) and
  217. (taicpu(hp1).oper[0]^.typ=top_reg) and
  218. (taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg) and
  219. (taicpu(hp1).oper[1]^.typ=top_reg) and
  220. (taicpu(hp1).oper[1]^.reg=NR_X0) and
  221. (taicpu(hp1).condition in [C_NE,C_EQ]) and
  222. (not RegModifiedBetween(taicpu(p).oper[2]^.reg, p,hp1)) and
  223. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  224. begin
  225. taicpu(hp1).loadreg(0,NR_X0);
  226. taicpu(hp1).loadreg(1,taicpu(p).oper[2]^.reg);
  227. if taicpu(hp1).condition=C_NE then
  228. taicpu(hp1).condition:=C_LTU
  229. else
  230. taicpu(hp1).condition:=C_GEU;
  231. DebugMsg('Peephole SltuB2B performed', hp1);
  232. if not GetLastInstruction(p,hp1) then
  233. GetNextInstruction(p,hp1);
  234. asml.remove(p);
  235. p.Free;
  236. p:=hp1;
  237. result:=true;
  238. end;
  239. end;
  240. A_SLTIU:
  241. begin
  242. {
  243. Turn
  244. sltiu x,y,1
  245. beq/ne x,x0,...
  246. dealloc x
  247. Into
  248. bne y,x0,...
  249. }
  250. if (taicpu(p).ops=3) and
  251. (taicpu(p).oper[2]^.typ=top_const) and
  252. (taicpu(p).oper[2]^.val=1) and
  253. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  254. (hp1.typ=ait_instruction) and
  255. (taicpu(hp1).opcode=A_Bxx) and
  256. (taicpu(hp1).ops=3) and
  257. (taicpu(hp1).oper[0]^.typ=top_reg) and
  258. (taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg) and
  259. (taicpu(hp1).oper[1]^.typ=top_reg) and
  260. (taicpu(hp1).oper[1]^.reg=NR_X0) and
  261. (taicpu(hp1).condition in [C_NE,C_EQ]) and
  262. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and
  263. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  264. begin
  265. taicpu(hp1).loadreg(0,taicpu(p).oper[1]^.reg);
  266. taicpu(hp1).condition:=inverse_cond(taicpu(hp1).condition);
  267. DebugMsg('Peephole Sltiu0B2B performed', hp1);
  268. if not GetLastInstruction(p,hp1) then
  269. GetNextInstruction(p,hp1);
  270. asml.remove(p);
  271. p.Free;
  272. p:=hp1;
  273. result:=true;
  274. end;
  275. end;
  276. A_SLTI:
  277. begin
  278. {
  279. Turn
  280. slti x,y,0
  281. beq/ne x,x0,...
  282. dealloc x
  283. Into
  284. bne y,x0,...
  285. }
  286. if (taicpu(p).ops=3) and
  287. (taicpu(p).oper[2]^.typ=top_const) and
  288. (taicpu(p).oper[2]^.val=0) and
  289. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  290. (hp1.typ=ait_instruction) and
  291. (taicpu(hp1).opcode=A_Bxx) and
  292. (taicpu(hp1).ops=3) and
  293. (taicpu(hp1).oper[0]^.typ=top_reg) and
  294. (taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg) and
  295. (taicpu(hp1).oper[1]^.typ=top_reg) and
  296. (taicpu(hp1).oper[1]^.reg=NR_X0) and
  297. (taicpu(hp1).condition in [C_NE,C_EQ]) and
  298. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and
  299. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  300. begin
  301. if taicpu(hp1).condition=C_NE then
  302. begin
  303. taicpu(hp1).loadreg(0,taicpu(p).oper[1]^.reg);
  304. taicpu(hp1).loadreg(1,NR_X0);
  305. taicpu(hp1).condition:=C_LT;
  306. end
  307. else
  308. begin
  309. taicpu(hp1).loadreg(0,taicpu(p).oper[1]^.reg);
  310. taicpu(hp1).loadreg(1,NR_X0);
  311. taicpu(hp1).condition:=C_GE;
  312. end;
  313. DebugMsg('Peephole Slti0B2B performed', hp1);
  314. if not GetLastInstruction(p,hp1) then
  315. GetNextInstruction(p,hp1);
  316. asml.remove(p);
  317. p.Free;
  318. p:=hp1;
  319. result:=true;
  320. end;
  321. end;
  322. end;
  323. end;
  324. end;
  325. end;
  326. begin
  327. casmoptimizer := TCpuAsmOptimizer;
  328. end.