aoptcpu.pas 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362
  1. {
  2. Copyright (c) 1998-2004 by Jonas Maebe
  3. This unit calls the optimization procedures to optimize the assembler
  4. code for sparc
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. unit aoptcpu;
  19. {$i fpcdefs.inc}
  20. Interface
  21. uses
  22. cgbase, cpubase, aoptobj, aoptcpub, aopt, aasmtai;
  23. Type
  24. TCpuAsmOptimizer = class(TAsmOptimizer)
  25. function GetNextInstructionUsingReg(Current: tai;
  26. var Next: tai; reg: TRegister): Boolean;
  27. function RegUsedAfterInstruction(reg: Tregister; p: tai;
  28. var AllUsedRegs: TAllUsedRegs): Boolean;
  29. function TryRemoveMov(var p: tai; opcode: TAsmOp): boolean;
  30. function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
  31. End;
  32. Implementation
  33. uses
  34. globals,aasmbase,aasmcpu,cpuinfo,verbose;
  35. function MatchInstruction(const instr: tai; const op: TAsmOp): boolean;
  36. begin
  37. result :=
  38. (instr.typ = ait_instruction) and
  39. (taicpu(instr).opcode = op);
  40. end;
  41. function MatchOperand(const oper: TOper; reg: TRegister): boolean;
  42. begin
  43. result:=(oper.typ=top_reg) and (oper.reg=reg);
  44. end;
  45. function IsSameReg(this,next: taicpu): boolean;
  46. begin
  47. result:=(next.oper[0]^.typ=top_reg) and
  48. (next.oper[1]^.typ=top_reg) and
  49. (next.oper[0]^.reg=next.oper[1]^.reg) and
  50. (next.oper[0]^.reg=this.oper[0]^.reg);
  51. end;
  52. function regLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  53. var
  54. p: taicpu;
  55. begin
  56. p:=taicpu(hp);
  57. result:=false;
  58. if not ((assigned(hp)) and (hp.typ=ait_instruction)) then
  59. exit;
  60. case p.opcode of
  61. { These instructions do not write into a register at all }
  62. A_NOP,
  63. A_C_EQ_D,A_C_EQ_S,A_C_LE_D,A_C_LE_S,A_C_LT_D,A_C_LT_S,
  64. A_BA,A_BC,
  65. A_SB,A_SH,A_SW,A_SWL,A_SWR,A_SWC1,A_SDC1:
  66. exit;
  67. end;
  68. result:=(p.ops>0) and (p.oper[0]^.typ=top_reg) and
  69. (p.oper[0]^.reg=reg);
  70. end;
  71. function instructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean;
  72. var
  73. p: taicpu;
  74. i: longint;
  75. begin
  76. result:=false;
  77. if not (assigned(hp) and (hp.typ=ait_instruction)) then
  78. exit;
  79. p:=taicpu(hp);
  80. i:=1;
  81. while(i<p.ops) do
  82. begin
  83. case p.oper[I]^.typ of
  84. top_reg:
  85. result:=(p.oper[I]^.reg=reg) and (I<2);
  86. top_ref:
  87. result:=
  88. (p.oper[I]^.ref^.base=reg) or
  89. (p.oper[I]^.ref^.index=reg);
  90. end;
  91. if result then exit; {Bailout if we found something}
  92. Inc(I);
  93. end;
  94. end;
  95. function TCpuAsmOptimizer.GetNextInstructionUsingReg(Current: tai;
  96. var Next: tai; reg: TRegister): Boolean;
  97. begin
  98. Next:=Current;
  99. repeat
  100. Result:=GetNextInstruction(Next,Next);
  101. until {not(cs_opt_level3 in current_settings.optimizerswitches) or} not(Result) or (Next.typ<>ait_instruction) or (RegInInstruction(reg,Next)) or
  102. (is_calljmp(taicpu(Next).opcode));
  103. if Result and is_calljmp(taicpu(next).opcode) then
  104. begin
  105. result:=false;
  106. next:=nil;
  107. end;
  108. end;
  109. function TCpuAsmOptimizer.RegUsedAfterInstruction(reg: Tregister; p: tai;
  110. var AllUsedRegs: TAllUsedRegs): Boolean;
  111. begin
  112. AllUsedRegs[getregtype(reg)].Update(tai(p.Next),true);
  113. RegUsedAfterInstruction :=
  114. AllUsedRegs[getregtype(reg)].IsUsed(reg) and
  115. not(regLoadedWithNewValue(reg,p)) and
  116. (
  117. not(GetNextInstruction(p,p)) or
  118. instructionLoadsFromReg(reg,p) or
  119. not(regLoadedWithNewValue(reg,p))
  120. );
  121. end;
  122. function TCpuAsmOptimizer.TryRemoveMov(var p: tai; opcode: TAsmOp): boolean;
  123. var
  124. next,hp1: tai;
  125. alloc,dealloc: tai_regalloc;
  126. begin
  127. { Fold
  128. op $reg1,...
  129. opcode $reg2,$reg1
  130. dealloc $reg1
  131. into
  132. op $reg2,...
  133. opcode may be A_MOVE, A_MOV_s, A_MOV_d, etc.
  134. }
  135. result:=false;
  136. if (taicpu(p).ops>1) and
  137. GetNextInstructionUsingReg(p,next,taicpu(p).oper[0]^.reg) and
  138. MatchInstruction(next,opcode) and
  139. MatchOperand(taicpu(next).oper[1]^,taicpu(p).oper[0]^.reg) and
  140. { the destination register of mov cannot be used between p and next }
  141. (not RegUsedBetween(taicpu(next).oper[0]^.reg,p,next)) then
  142. begin
  143. dealloc:=FindRegDealloc(taicpu(p).oper[0]^.reg,tai(next.Next));
  144. if assigned(dealloc) then
  145. begin
  146. { taicpu(p).oper[0]^.reg is not used anymore, try to find its allocation
  147. and remove it if possible }
  148. GetLastInstruction(p,hp1);
  149. asml.Remove(dealloc);
  150. alloc:=FindRegAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next));
  151. if assigned(alloc) then
  152. begin
  153. asml.Remove(alloc);
  154. alloc.free;
  155. dealloc.free;
  156. end
  157. else
  158. asml.InsertAfter(dealloc,p);
  159. { try to move the allocation of the target register }
  160. GetLastInstruction(next,hp1);
  161. alloc:=FindRegAlloc(taicpu(next).oper[0]^.reg,tai(hp1.Next));
  162. if assigned(alloc) then
  163. begin
  164. asml.Remove(alloc);
  165. asml.InsertBefore(alloc,p);
  166. { adjust used regs }
  167. IncludeRegInUsedRegs(taicpu(next).oper[0]^.reg,UsedRegs);
  168. end;
  169. { finally get rid of the mov }
  170. taicpu(p).loadreg(0,taicpu(next).oper[0]^.reg);
  171. asml.remove(next);
  172. next.free;
  173. end;
  174. end;
  175. end;
  176. function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
  177. var
  178. next,next2: tai;
  179. TmpUsedRegs: TAllUsedRegs;
  180. begin
  181. result:=false;
  182. case p.typ of
  183. ait_instruction:
  184. begin
  185. case taicpu(p).opcode of
  186. A_SLL:
  187. begin
  188. { if this is a sign extension... }
  189. if (taicpu(p).oper[2]^.typ=top_const) and
  190. GetNextInstruction(p,next) and
  191. MatchInstruction(next,A_SRA) and
  192. IsSameReg(taicpu(p),taicpu(next)) and
  193. (taicpu(next).oper[2]^.typ=top_const) and
  194. (taicpu(next).oper[2]^.val=taicpu(p).oper[2]^.val) and
  195. (taicpu(next).oper[2]^.val=16) and
  196. { ...followed by 16-bit store (possibly with PIC simplification, etc. in between) }
  197. GetNextInstructionUsingReg(next,next2,taicpu(p).oper[0]^.reg) and
  198. MatchInstruction(next2,A_SH) and
  199. (taicpu(next2).oper[0]^.typ=top_reg) and
  200. (taicpu(next2).oper[0]^.reg=taicpu(p).oper[0]^.reg) and
  201. { the initial register may not be reused }
  202. (not RegUsedBetween(taicpu(p).oper[1]^.reg,next,next2)) then
  203. begin
  204. CopyUsedRegs(TmpUsedRegs);
  205. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  206. UpdateUsedRegs(TmpUsedRegs, tai(next.next));
  207. if not RegUsedAfterInstruction(taicpu(p).oper[0]^.reg,next2,TmpUsedRegs) then
  208. begin
  209. taicpu(next2).loadreg(0,taicpu(p).oper[1]^.reg);
  210. asml.remove(p);
  211. asml.remove(next);
  212. p.free;
  213. next.free;
  214. p:=next2;
  215. end;
  216. ReleaseUsedRegs(TmpUsedRegs);
  217. end
  218. else
  219. TryRemoveMov(p,A_MOVE);
  220. end;
  221. A_SRL:
  222. begin
  223. { Remove 'andi' in sequences
  224. srl Rx,Ry,16
  225. andi Rx,Rx,65535
  226. srl Rx,Ry,24
  227. andi Rx,Rx,255
  228. since 'srl' clears all relevant upper bits }
  229. if (taicpu(p).oper[2]^.typ=top_const) and
  230. GetNextInstruction(p,next) and
  231. MatchInstruction(next,A_ANDI) and
  232. IsSameReg(taicpu(p),taicpu(next)) and
  233. (taicpu(next).oper[2]^.typ=top_const) and
  234. ((
  235. (taicpu(p).oper[2]^.val>=16) and
  236. (taicpu(next).oper[2]^.val=65535)
  237. ) or (
  238. (taicpu(p).oper[2]^.val>=24) and
  239. (taicpu(next).oper[2]^.val=255)
  240. )) then
  241. begin
  242. asml.remove(next);
  243. next.free;
  244. end
  245. else
  246. TryRemoveMov(p,A_MOVE);
  247. end;
  248. A_ANDI:
  249. begin
  250. { Remove sign extension after 'andi' if bit 7/15 of const operand is clear }
  251. if (taicpu(p).oper[2]^.typ=top_const) and
  252. GetNextInstruction(p,next) and
  253. MatchInstruction(next,A_SLL) and
  254. GetNextInstruction(next,next2) and
  255. MatchInstruction(next2,A_SRA) and
  256. IsSameReg(taicpu(p),taicpu(next)) and
  257. IsSameReg(taicpu(p),taicpu(next2)) and
  258. (taicpu(next).oper[2]^.typ=top_const) and
  259. (taicpu(next2).oper[2]^.typ=top_const) and
  260. (taicpu(next).oper[2]^.val=taicpu(next2).oper[2]^.val) and
  261. ((
  262. (taicpu(p).oper[2]^.val<=$7fff) and
  263. (taicpu(next).oper[2]^.val=16)
  264. ) or (
  265. (taicpu(p).oper[2]^.val<=$7f) and
  266. (taicpu(next).oper[2]^.val=24)
  267. )) then
  268. begin
  269. asml.remove(next);
  270. asml.remove(next2);
  271. next.free;
  272. next2.free;
  273. end
  274. { Remove zero extension if register is used only for byte/word memory store }
  275. else if (taicpu(p).oper[2]^.typ=top_const) and
  276. GetNextInstruction(p,next) and
  277. ((taicpu(p).oper[2]^.val=255) and MatchInstruction(next,A_SB)) or
  278. ((taicpu(p).oper[2]^.val=65535) and MatchInstruction(next,A_SH)) and
  279. (taicpu(next).oper[0]^.typ=top_reg) and
  280. (taicpu(next).oper[0]^.reg=taicpu(p).oper[0]^.reg) then
  281. begin
  282. CopyUsedRegs(TmpUsedRegs);
  283. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  284. if not RegUsedAfterInstruction(taicpu(p).oper[0]^.reg,next,TmpUsedRegs) then
  285. begin
  286. taicpu(next).loadreg(0,taicpu(p).oper[1]^.reg);
  287. asml.remove(p);
  288. p.free;
  289. p:=next;
  290. end;
  291. ReleaseUsedRegs(TmpUsedRegs);
  292. end
  293. else
  294. TryRemoveMov(p,A_MOVE);
  295. end;
  296. A_ADD,A_ADDU,
  297. A_ADDI,A_ADDIU,
  298. A_SUB,A_SUBU,
  299. A_SRA,A_SRAV,
  300. A_SRLV,
  301. A_SLLV,
  302. A_AND,A_OR,A_XOR,A_ORI,A_XORI:
  303. TryRemoveMov(p,A_MOVE);
  304. A_ADD_s, A_SUB_s, A_MUL_s, A_DIV_s,
  305. A_ABS_s, A_NEG_s, A_SQRT_s,
  306. A_CVT_s_w, A_CVT_s_l, A_CVT_s_d:
  307. TryRemoveMov(p,A_MOV_s);
  308. A_ADD_d, A_SUB_d, A_MUL_d, A_DIV_d,
  309. A_ABS_d, A_NEG_d, A_SQRT_d,
  310. A_CVT_d_w, A_CVT_d_l, A_CVT_d_s:
  311. TryRemoveMov(p,A_MOV_d);
  312. end;
  313. end;
  314. end;
  315. end;
  316. begin
  317. casmoptimizer:=TCpuAsmOptimizer;
  318. end.