aoptcpu.pas 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393
  1. {
  2. Copyright (c) 1998-2002 by Jonas Maebe, member of the Free Pascal
  3. Development Team
  4. This unit implements the RiscV64 optimizer object
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. unit aoptcpu;
  19. interface
  20. {$I fpcdefs.inc}
  21. {$define DEBUG_AOPTCPU}
  22. uses
  23. cpubase,
  24. globals, globtype,
  25. cgbase,
  26. aoptobj, aoptcpub, aopt,
  27. aasmtai, aasmcpu;
  28. type
  29. TCpuAsmOptimizer = class(TAsmOptimizer)
  30. function InstructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean; override;
  31. function RegLoadedWithNewValue(reg: tregister; hp: tai): boolean; override;
  32. Function GetNextInstructionUsingReg(Current: tai; Out Next: tai; reg: TRegister): Boolean;
  33. { outputs a debug message into the assembler file }
  34. procedure DebugMsg(const s: string; p: tai);
  35. function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
  36. end;
  37. implementation
  38. uses
  39. cutils;
  40. function MatchInstruction(const instr: tai; const op: TAsmOps; const AConditions: TAsmConds = []): boolean;
  41. begin
  42. result :=
  43. (instr.typ = ait_instruction) and
  44. (taicpu(instr).opcode in op) and
  45. ((AConditions=[]) or (taicpu(instr).condition in AConditions));
  46. end;
  47. function MatchInstruction(const instr: tai; const op: TAsmOp; const AConditions: TAsmConds = []): boolean;
  48. begin
  49. result :=
  50. (instr.typ = ait_instruction) and
  51. (taicpu(instr).opcode = op) and
  52. ((AConditions=[]) or (taicpu(instr).condition in AConditions));
  53. end;
  54. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
  55. begin
  56. result := oper1.typ = oper2.typ;
  57. if result then
  58. case oper1.typ of
  59. top_const:
  60. Result:=oper1.val = oper2.val;
  61. top_reg:
  62. Result:=oper1.reg = oper2.reg;
  63. {top_ref:
  64. Result:=RefsEqual(oper1.ref^, oper2.ref^);}
  65. else Result:=false;
  66. end
  67. end;
  68. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  69. begin
  70. result := (oper.typ = top_reg) and (oper.reg = reg);
  71. end;
  72. {$ifdef DEBUG_AOPTCPU}
  73. procedure TCpuAsmOptimizer.DebugMsg(const s: string;p : tai);
  74. begin
  75. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  76. end;
  77. {$else DEBUG_AOPTCPU}
  78. procedure TCpuAsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  79. begin
  80. end;
  81. {$endif DEBUG_AOPTCPU}
  82. function TCpuAsmOptimizer.InstructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean;
  83. var
  84. p: taicpu;
  85. i: longint;
  86. begin
  87. result:=false;
  88. if not (assigned(hp) and (hp.typ=ait_instruction)) then
  89. exit;
  90. p:=taicpu(hp);
  91. i:=0;
  92. while(i<p.ops) do
  93. begin
  94. case p.oper[I]^.typ of
  95. top_reg:
  96. result:=(p.oper[I]^.reg=reg) and (p.spilling_get_operation_type(i)<>operand_write);
  97. top_ref:
  98. result:=
  99. (p.oper[I]^.ref^.base=reg);
  100. else
  101. ;
  102. end;
  103. if result then exit; {Bailout if we found something}
  104. Inc(I);
  105. end;
  106. end;
  107. function TCpuAsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  108. begin
  109. result:=
  110. (hp.typ=ait_instruction) and
  111. (taicpu(hp).ops>1) and
  112. (taicpu(hp).oper[0]^.typ=top_reg) and
  113. (taicpu(hp).oper[0]^.reg=reg) and
  114. (taicpu(hp).spilling_get_operation_type(0)<>operand_read);
  115. end;
  116. function TCpuAsmOptimizer.GetNextInstructionUsingReg(Current: tai; out Next: tai; reg: TRegister): Boolean;
  117. begin
  118. Next:=Current;
  119. repeat
  120. Result:=GetNextInstruction(Next,Next);
  121. until not (Result) or
  122. not(cs_opt_level3 in current_settings.optimizerswitches) or
  123. (Next.typ<>ait_instruction) or
  124. RegInInstruction(reg,Next) or
  125. is_calljmp(taicpu(Next).opcode);
  126. end;
  127. function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
  128. procedure RemoveInstr(var orig: tai; moveback: boolean = true);
  129. var
  130. n: tai;
  131. begin
  132. if moveback and (not GetLastInstruction(orig,n)) then
  133. GetNextInstruction(orig,n);
  134. AsmL.Remove(orig);
  135. orig.Free;
  136. orig:=n;
  137. end;
  138. var
  139. hp1: tai;
  140. begin
  141. result:=false;
  142. case p.typ of
  143. ait_instruction:
  144. begin
  145. case taicpu(p).opcode of
  146. A_ADDI:
  147. begin
  148. {
  149. Changes
  150. addi x, y, #
  151. addi/addiw z, x, #
  152. dealloc x
  153. To
  154. addi z, y, #+#
  155. }
  156. if (taicpu(p).ops=3) and
  157. (taicpu(p).oper[2]^.typ=top_const) and
  158. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  159. MatchInstruction(hp1,[A_ADDI,A_ADDIW]) and
  160. (taicpu(hp1).ops=3) and
  161. MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) and
  162. (taicpu(p).oper[2]^.typ=top_const) and
  163. is_imm12(taicpu(p).oper[2]^.val+taicpu(hp1).oper[2]^.val) and
  164. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and
  165. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  166. begin
  167. taicpu(hp1).loadreg(1,taicpu(p).oper[1]^.reg);
  168. taicpu(hp1).loadconst(2, taicpu(p).oper[2]^.val+taicpu(hp1).oper[2]^.val);
  169. DebugMsg('Peephole AddiAddi2Addi performed', hp1);
  170. RemoveInstr(p);
  171. result:=true;
  172. end
  173. {
  174. Changes
  175. addi x, x, (ref)
  176. ld/sd y, 0(x)
  177. dealloc x
  178. To
  179. ld/sd y, 0(ref)(x)
  180. }
  181. else if (taicpu(p).ops=3) and
  182. (taicpu(p).oper[2]^.typ=top_ref) and
  183. MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) and
  184. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  185. MatchInstruction(hp1, [A_LB,A_LBU,A_LH,A_LHU,A_LW,A_LWU,A_LD,
  186. A_SB,A_SH,A_SW,A_SD]) and
  187. (taicpu(hp1).ops=2) and
  188. (taicpu(hp1).oper[1]^.typ=top_ref) and
  189. (taicpu(hp1).oper[1]^.ref^.base=taicpu(p).oper[0]^.reg) and
  190. (taicpu(hp1).oper[1]^.ref^.offset=0) and
  191. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and
  192. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  193. begin
  194. taicpu(hp1).loadref(1,taicpu(p).oper[2]^.ref^);
  195. taicpu(hp1).oper[1]^.ref^.base:=taicpu(p).oper[1]^.reg;
  196. DebugMsg('Peephole AddiMem2Mem performed', hp1);
  197. RemoveInstr(p);
  198. result:=true;
  199. end;
  200. end;
  201. A_SUB:
  202. begin
  203. {
  204. Turn
  205. sub x,y,z
  206. bgeu X0,x,...
  207. dealloc x
  208. Into
  209. bne y,x,...
  210. }
  211. if (taicpu(p).ops=3) and
  212. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  213. MatchInstruction(hp1,A_Bxx,[C_GEU,C_EQ]) and
  214. (taicpu(hp1).ops=3) and
  215. MatchOperand(taicpu(hp1).oper[0]^,NR_X0) and
  216. MatchOperand(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) and
  217. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and
  218. (not RegModifiedBetween(taicpu(p).oper[2]^.reg, p,hp1)) and
  219. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  220. begin
  221. taicpu(hp1).loadreg(0,taicpu(p).oper[1]^.reg);
  222. taicpu(hp1).loadreg(1,taicpu(p).oper[2]^.reg);
  223. taicpu(hp1).condition:=C_EQ;
  224. DebugMsg('Peephole SubBxx2Beq performed', hp1);
  225. RemoveInstr(p);
  226. result:=true;
  227. end;
  228. end;
  229. A_SLTU:
  230. begin
  231. {
  232. Turn
  233. sltu x,X0,y
  234. beq/bne x, X0, ...
  235. dealloc x
  236. Into
  237. bltu/geu X0, y, ...
  238. }
  239. if (taicpu(p).ops=3) and
  240. MatchOperand(taicpu(p).oper[1]^,NR_X0) and
  241. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  242. MatchInstruction(hp1,A_Bxx,[C_NE,C_EQ]) and
  243. (taicpu(hp1).ops=3) and
  244. MatchOperand(taicpu(hp1).oper[0]^,taicpu(p).oper[0]^) and
  245. MatchOperand(taicpu(hp1).oper[1]^,NR_X0) and
  246. (not RegModifiedBetween(taicpu(p).oper[2]^.reg, p,hp1)) and
  247. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  248. begin
  249. taicpu(hp1).loadreg(0,NR_X0);
  250. taicpu(hp1).loadreg(1,taicpu(p).oper[2]^.reg);
  251. if taicpu(hp1).condition=C_NE then
  252. taicpu(hp1).condition:=C_LTU
  253. else
  254. taicpu(hp1).condition:=C_GEU;
  255. DebugMsg('Peephole SltuB2B performed', hp1);
  256. RemoveInstr(p);
  257. result:=true;
  258. end;
  259. end;
  260. A_SLTIU:
  261. begin
  262. {
  263. Turn
  264. sltiu x,y,1
  265. beq/ne x,x0,...
  266. dealloc x
  267. Into
  268. bne y,x0,...
  269. }
  270. if (taicpu(p).ops=3) and
  271. (taicpu(p).oper[2]^.typ=top_const) and
  272. (taicpu(p).oper[2]^.val=1) and
  273. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  274. MatchInstruction(hp1,A_Bxx,[C_NE,C_EQ]) and
  275. (taicpu(hp1).ops=3) and
  276. MatchOperand(taicpu(hp1).oper[0]^,taicpu(p).oper[0]^) and
  277. MatchOperand(taicpu(hp1).oper[1]^,NR_X0) and
  278. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and
  279. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  280. begin
  281. taicpu(hp1).loadreg(0,taicpu(p).oper[1]^.reg);
  282. taicpu(hp1).condition:=inverse_cond(taicpu(hp1).condition);
  283. DebugMsg('Peephole Sltiu0B2B performed', hp1);
  284. RemoveInstr(p);
  285. result:=true;
  286. end;
  287. end;
  288. A_SLTI:
  289. begin
  290. {
  291. Turn
  292. slti x,y,0
  293. beq/ne x,x0,...
  294. dealloc x
  295. Into
  296. bge/lt y,x0,...
  297. }
  298. if (taicpu(p).ops=3) and
  299. (taicpu(p).oper[2]^.typ=top_const) and
  300. (taicpu(p).oper[2]^.val=0) and
  301. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  302. (hp1.typ=ait_instruction) and
  303. (taicpu(hp1).opcode=A_Bxx) and
  304. (taicpu(hp1).ops=3) and
  305. (taicpu(hp1).oper[0]^.typ=top_reg) and
  306. (taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg) and
  307. (taicpu(hp1).oper[1]^.typ=top_reg) and
  308. (taicpu(hp1).oper[1]^.reg=NR_X0) and
  309. (taicpu(hp1).condition in [C_NE,C_EQ]) and
  310. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and
  311. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  312. begin
  313. taicpu(hp1).loadreg(0,taicpu(p).oper[1]^.reg);
  314. taicpu(hp1).loadreg(1,NR_X0);
  315. if taicpu(hp1).condition=C_NE then
  316. taicpu(hp1).condition:=C_LT
  317. else
  318. taicpu(hp1).condition:=C_GE;
  319. DebugMsg('Peephole Slti0B2B performed', hp1);
  320. RemoveInstr(p);
  321. result:=true;
  322. end;
  323. end;
  324. else
  325. ;
  326. end;
  327. end;
  328. else
  329. ;
  330. end;
  331. end;
  332. begin
  333. casmoptimizer := TCpuAsmOptimizer;
  334. end.