aoptcpu.pas 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387
  1. {
  2. Copyright (c) 1998-2002 by Jonas Maebe, member of the Free Pascal
  3. Development Team
  4. This unit implements the RiscV64 optimizer object
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. unit aoptcpu;
  19. interface
  20. {$I fpcdefs.inc}
  21. {$define DEBUG_AOPTCPU}
  22. uses
  23. cpubase,
  24. globals, globtype,
  25. cgbase,
  26. aoptobj, aoptcpub, aopt,
  27. aasmtai, aasmcpu;
  28. type
  29. TCpuAsmOptimizer = class(TAsmOptimizer)
  30. function InstructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean; override;
  31. function RegLoadedWithNewValue(reg: tregister; hp: tai): boolean; override;
  32. Function GetNextInstructionUsingReg(Current: tai; Out Next: tai; reg: TRegister): Boolean;
  33. { outputs a debug message into the assembler file }
  34. procedure DebugMsg(const s: string; p: tai);
  35. function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
  36. end;
  37. implementation
  38. uses
  39. cutils;
  40. function MatchInstruction(const instr: tai; const op: TAsmOps; const AConditions: TAsmConds = []): boolean;
  41. begin
  42. result :=
  43. (instr.typ = ait_instruction) and
  44. (taicpu(instr).opcode in op) and
  45. ((AConditions=[]) or (taicpu(instr).condition in AConditions));
  46. end;
  47. function MatchInstruction(const instr: tai; const op: TAsmOp; const AConditions: TAsmConds = []): boolean;
  48. begin
  49. result :=
  50. (instr.typ = ait_instruction) and
  51. (taicpu(instr).opcode = op) and
  52. ((AConditions=[]) or (taicpu(instr).condition in AConditions));
  53. end;
  54. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
  55. begin
  56. result := oper1.typ = oper2.typ;
  57. if result then
  58. case oper1.typ of
  59. top_const:
  60. Result:=oper1.val = oper2.val;
  61. top_reg:
  62. Result:=oper1.reg = oper2.reg;
  63. {top_ref:
  64. Result:=RefsEqual(oper1.ref^, oper2.ref^);}
  65. else Result:=false;
  66. end
  67. end;
  68. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  69. begin
  70. result := (oper.typ = top_reg) and (oper.reg = reg);
  71. end;
  72. {$ifdef DEBUG_AOPTCPU}
  73. procedure TCpuAsmOptimizer.DebugMsg(const s: string;p : tai);
  74. begin
  75. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  76. end;
  77. {$else DEBUG_AOPTCPU}
  78. procedure TCpuAsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  79. begin
  80. end;
  81. {$endif DEBUG_AOPTCPU}
  82. function TCpuAsmOptimizer.InstructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean;
  83. var
  84. p: taicpu;
  85. i: longint;
  86. begin
  87. result:=false;
  88. if not (assigned(hp) and (hp.typ=ait_instruction)) then
  89. exit;
  90. p:=taicpu(hp);
  91. i:=0;
  92. while(i<p.ops) do
  93. begin
  94. case p.oper[I]^.typ of
  95. top_reg:
  96. result:=(p.oper[I]^.reg=reg) and (p.spilling_get_operation_type(i)<>operand_write);
  97. top_ref:
  98. result:=
  99. (p.oper[I]^.ref^.base=reg);
  100. end;
  101. if result then exit; {Bailout if we found something}
  102. Inc(I);
  103. end;
  104. end;
  105. function TCpuAsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  106. begin
  107. result:=
  108. (hp.typ=ait_instruction) and
  109. (taicpu(hp).ops>1) and
  110. (taicpu(hp).oper[0]^.typ=top_reg) and
  111. (taicpu(hp).oper[0]^.reg=reg) and
  112. (taicpu(hp).spilling_get_operation_type(0)<>operand_read);
  113. end;
  114. function TCpuAsmOptimizer.GetNextInstructionUsingReg(Current: tai; out Next: tai; reg: TRegister): Boolean;
  115. begin
  116. Next:=Current;
  117. repeat
  118. Result:=GetNextInstruction(Next,Next);
  119. until not (Result) or
  120. not(cs_opt_level3 in current_settings.optimizerswitches) or
  121. (Next.typ<>ait_instruction) or
  122. RegInInstruction(reg,Next) or
  123. is_calljmp(taicpu(Next).opcode);
  124. end;
  125. function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
  126. procedure RemoveInstr(var orig: tai; moveback: boolean = true);
  127. var
  128. n: tai;
  129. begin
  130. if moveback and (not GetLastInstruction(orig,n)) then
  131. GetNextInstruction(orig,n);
  132. AsmL.Remove(orig);
  133. orig.Free;
  134. orig:=n;
  135. end;
  136. var
  137. hp1: tai;
  138. begin
  139. result:=false;
  140. case p.typ of
  141. ait_instruction:
  142. begin
  143. case taicpu(p).opcode of
  144. A_ADDI:
  145. begin
  146. {
  147. Changes
  148. addi x, y, #
  149. addi/addiw z, x, #
  150. dealloc x
  151. To
  152. addi z, y, #+#
  153. }
  154. if (taicpu(p).ops=3) and
  155. (taicpu(p).oper[2]^.typ=top_const) and
  156. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  157. MatchInstruction(hp1,[A_ADDI,A_ADDIW]) and
  158. (taicpu(hp1).ops=3) and
  159. MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) and
  160. (taicpu(p).oper[2]^.typ=top_const) and
  161. is_imm12(taicpu(p).oper[2]^.val+taicpu(hp1).oper[2]^.val) and
  162. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and
  163. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  164. begin
  165. taicpu(hp1).loadreg(1,taicpu(p).oper[1]^.reg);
  166. taicpu(hp1).loadconst(2, taicpu(p).oper[2]^.val+taicpu(hp1).oper[2]^.val);
  167. DebugMsg('Peephole AddiAddi2Addi performed', hp1);
  168. RemoveInstr(p);
  169. result:=true;
  170. end
  171. {
  172. Changes
  173. addi x, x, (ref)
  174. ld/sd y, 0(x)
  175. dealloc x
  176. To
  177. ld/sd y, 0(ref)(x)
  178. }
  179. else if (taicpu(p).ops=3) and
  180. (taicpu(p).oper[2]^.typ=top_ref) and
  181. MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) and
  182. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  183. MatchInstruction(hp1, [A_LB,A_LBU,A_LH,A_LHU,A_LW,A_LWU,A_LD,
  184. A_SB,A_SH,A_SW,A_SD]) and
  185. (taicpu(hp1).ops=2) and
  186. (taicpu(hp1).oper[1]^.typ=top_ref) and
  187. (taicpu(hp1).oper[1]^.ref^.base=taicpu(p).oper[0]^.reg) and
  188. (taicpu(hp1).oper[1]^.ref^.offset=0) and
  189. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and
  190. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  191. begin
  192. taicpu(hp1).loadref(1,taicpu(p).oper[2]^.ref^);
  193. taicpu(hp1).oper[1]^.ref^.base:=taicpu(p).oper[1]^.reg;
  194. DebugMsg('Peephole AddiMem2Mem performed', hp1);
  195. RemoveInstr(p);
  196. result:=true;
  197. end;
  198. end;
  199. A_SUB:
  200. begin
  201. {
  202. Turn
  203. sub x,y,z
  204. bgeu X0,x,...
  205. dealloc x
  206. Into
  207. bne y,x,...
  208. }
  209. if (taicpu(p).ops=3) and
  210. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  211. MatchInstruction(hp1,A_Bxx,[C_GEU,C_EQ]) and
  212. (taicpu(hp1).ops=3) and
  213. MatchOperand(taicpu(hp1).oper[0]^,NR_X0) and
  214. MatchOperand(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) and
  215. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and
  216. (not RegModifiedBetween(taicpu(p).oper[2]^.reg, p,hp1)) and
  217. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  218. begin
  219. taicpu(hp1).loadreg(0,taicpu(p).oper[1]^.reg);
  220. taicpu(hp1).loadreg(1,taicpu(p).oper[2]^.reg);
  221. taicpu(hp1).condition:=C_EQ;
  222. DebugMsg('Peephole SubBxx2Beq performed', hp1);
  223. RemoveInstr(p);
  224. result:=true;
  225. end;
  226. end;
  227. A_SLTU:
  228. begin
  229. {
  230. Turn
  231. sltu x,X0,y
  232. beq/bne x, X0, ...
  233. dealloc x
  234. Into
  235. bltu/geu X0, y, ...
  236. }
  237. if (taicpu(p).ops=3) and
  238. MatchOperand(taicpu(p).oper[1]^,NR_X0) and
  239. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  240. MatchInstruction(hp1,A_Bxx,[C_NE,C_EQ]) and
  241. (taicpu(hp1).ops=3) and
  242. MatchOperand(taicpu(hp1).oper[0]^,taicpu(p).oper[0]^) and
  243. MatchOperand(taicpu(hp1).oper[1]^,NR_X0) and
  244. (not RegModifiedBetween(taicpu(p).oper[2]^.reg, p,hp1)) and
  245. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  246. begin
  247. taicpu(hp1).loadreg(0,NR_X0);
  248. taicpu(hp1).loadreg(1,taicpu(p).oper[2]^.reg);
  249. if taicpu(hp1).condition=C_NE then
  250. taicpu(hp1).condition:=C_LTU
  251. else
  252. taicpu(hp1).condition:=C_GEU;
  253. DebugMsg('Peephole SltuB2B performed', hp1);
  254. RemoveInstr(p);
  255. result:=true;
  256. end;
  257. end;
  258. A_SLTIU:
  259. begin
  260. {
  261. Turn
  262. sltiu x,y,1
  263. beq/ne x,x0,...
  264. dealloc x
  265. Into
  266. bne y,x0,...
  267. }
  268. if (taicpu(p).ops=3) and
  269. (taicpu(p).oper[2]^.typ=top_const) and
  270. (taicpu(p).oper[2]^.val=1) and
  271. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  272. MatchInstruction(hp1,A_Bxx,[C_NE,C_EQ]) and
  273. (taicpu(hp1).ops=3) and
  274. MatchOperand(taicpu(hp1).oper[0]^,taicpu(p).oper[0]^) and
  275. MatchOperand(taicpu(hp1).oper[1]^,NR_X0) and
  276. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and
  277. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  278. begin
  279. taicpu(hp1).loadreg(0,taicpu(p).oper[1]^.reg);
  280. taicpu(hp1).condition:=inverse_cond(taicpu(hp1).condition);
  281. DebugMsg('Peephole Sltiu0B2B performed', hp1);
  282. RemoveInstr(p);
  283. result:=true;
  284. end;
  285. end;
  286. A_SLTI:
  287. begin
  288. {
  289. Turn
  290. slti x,y,0
  291. beq/ne x,x0,...
  292. dealloc x
  293. Into
  294. bge/lt y,x0,...
  295. }
  296. if (taicpu(p).ops=3) and
  297. (taicpu(p).oper[2]^.typ=top_const) and
  298. (taicpu(p).oper[2]^.val=0) and
  299. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  300. (hp1.typ=ait_instruction) and
  301. (taicpu(hp1).opcode=A_Bxx) and
  302. (taicpu(hp1).ops=3) and
  303. (taicpu(hp1).oper[0]^.typ=top_reg) and
  304. (taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg) and
  305. (taicpu(hp1).oper[1]^.typ=top_reg) and
  306. (taicpu(hp1).oper[1]^.reg=NR_X0) and
  307. (taicpu(hp1).condition in [C_NE,C_EQ]) and
  308. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and
  309. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  310. begin
  311. taicpu(hp1).loadreg(0,taicpu(p).oper[1]^.reg);
  312. taicpu(hp1).loadreg(1,NR_X0);
  313. if taicpu(hp1).condition=C_NE then
  314. taicpu(hp1).condition:=C_LT
  315. else
  316. taicpu(hp1).condition:=C_GE;
  317. DebugMsg('Peephole Slti0B2B performed', hp1);
  318. RemoveInstr(p);
  319. result:=true;
  320. end;
  321. end;
  322. end;
  323. end;
  324. end;
  325. end;
  326. begin
  327. casmoptimizer := TCpuAsmOptimizer;
  328. end.