2
0

aoptcpu.pas 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299
  1. {
  2. Copyright (c) 1998-2002 by Jonas Maebe, member of the Free Pascal
  3. Development Team
  4. This unit implements the ARM64 optimizer object
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. Unit aoptcpu;
  19. {$i fpcdefs.inc}
  20. { $define DEBUG_AOPTCPU}
  21. Interface
  22. uses
  23. globtype, globals,
  24. cutils,
  25. cgbase, cpubase, aasmtai, aasmcpu, aopt, aoptcpub;
  26. Type
  27. TCpuAsmOptimizer = class(TAsmOptimizer)
  28. function CanDoJumpOpts: Boolean; override;
  29. { uses the same constructor as TAopObj }
  30. function RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;override;
  31. function InstructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean;override;
  32. function GetNextInstructionUsingReg(Current : tai; out Next : tai; reg : TRegister) : Boolean;
  33. procedure DebugMsg(const s : string; p : tai);
  34. function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
  35. private
  36. function RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string): boolean;
  37. End;
  38. Implementation
  39. uses
  40. aasmbase,
  41. aoptutils,
  42. cgutils,
  43. verbose;
  44. {$ifdef DEBUG_AOPTCPU}
  45. procedure TCpuAsmOptimizer.DebugMsg(const s: string;p : tai);
  46. begin
  47. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  48. end;
  49. {$else DEBUG_AOPTCPU}
  50. procedure TCpuAsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  51. begin
  52. end;
  53. {$endif DEBUG_AOPTCPU}
  54. function CanBeCond(p : tai) : boolean;
  55. begin
  56. result:=(p.typ=ait_instruction) and (taicpu(p).condition=C_None);
  57. end;
  58. function TCpuAsmOptimizer.CanDoJumpOpts: Boolean;
  59. begin
  60. Result := true;
  61. end;
  62. function RefsEqual(const r1, r2: treference): boolean;
  63. begin
  64. refsequal :=
  65. (r1.offset = r2.offset) and
  66. (r1.base = r2.base) and
  67. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  68. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  69. (r1.relsymbol = r2.relsymbol) and
  70. (r1.volatility=[]) and
  71. (r2.volatility=[]);
  72. end;
  73. function MatchInstruction(const instr: tai; const op: TAsmOp; const postfix: TOpPostfixes): boolean;
  74. begin
  75. result :=
  76. (instr.typ = ait_instruction) and
  77. (taicpu(instr).opcode = op) and
  78. ((postfix = []) or (taicpu(instr).oppostfix in postfix));
  79. end;
  80. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  81. begin
  82. result := (oper.typ = top_reg) and (oper.reg = reg);
  83. end;
  84. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
  85. begin
  86. result := oper1.typ = oper2.typ;
  87. if result then
  88. case oper1.typ of
  89. top_const:
  90. Result:=oper1.val = oper2.val;
  91. top_reg:
  92. Result:=oper1.reg = oper2.reg;
  93. top_ref:
  94. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  95. else Result:=false;
  96. end
  97. end;
  98. function TCpuAsmOptimizer.GetNextInstructionUsingReg(Current: tai;
  99. Out Next: tai; reg: TRegister): Boolean;
  100. begin
  101. Next:=Current;
  102. repeat
  103. Result:=GetNextInstruction(Next,Next);
  104. until not (Result) or
  105. not(cs_opt_level3 in current_settings.optimizerswitches) or
  106. (Next.typ<>ait_instruction) or
  107. RegInInstruction(reg,Next) or
  108. is_calljmp(taicpu(Next).opcode);
  109. end;
  110. function TCpuAsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  111. var
  112. p: taicpu;
  113. begin
  114. Result := false;
  115. if not(assigned(hp) and (hp.typ = ait_instruction)) then
  116. exit;
  117. p := taicpu(hp);
  118. if not (p.ops >0) then
  119. exit;
  120. case p.opcode of
  121. A_B,
  122. A_SSI,A_SSIU,A_SSX,A_SSXU,
  123. A_S16I,A_S32C1I,A_S32E,A_S32I,A_S32RI,A_S8I:
  124. exit;
  125. else
  126. ;
  127. end;
  128. case p.oper[0]^.typ of
  129. top_reg:
  130. Result := (p.oper[0]^.reg = reg) ;
  131. else
  132. ;
  133. end;
  134. end;
  135. function TCpuAsmOptimizer.InstructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean;
  136. var
  137. p: taicpu;
  138. i: longint;
  139. begin
  140. instructionLoadsFromReg := false;
  141. if not (assigned(hp) and (hp.typ = ait_instruction)) then
  142. exit;
  143. p:=taicpu(hp);
  144. i:=1;
  145. { Start on oper[0]? }
  146. if taicpu(hp).spilling_get_operation_type(0) in [operand_read, operand_readwrite] then
  147. i:=0;
  148. while(i<p.ops) do
  149. begin
  150. case p.oper[I]^.typ of
  151. top_reg:
  152. Result := (p.oper[I]^.reg = reg);
  153. top_ref:
  154. Result :=
  155. (p.oper[I]^.ref^.base = reg) or
  156. (p.oper[I]^.ref^.index = reg);
  157. else
  158. ;
  159. end;
  160. { Bailout if we found something }
  161. if Result then
  162. exit;
  163. Inc(I);
  164. end;
  165. end;
  166. function TCpuAsmOptimizer.RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string):boolean;
  167. var
  168. alloc,
  169. dealloc : tai_regalloc;
  170. hp1 : tai;
  171. begin
  172. Result:=false;
  173. if MatchInstruction(movp, A_MOV, [PF_None,PF_N]) and
  174. { We can't optimize if there is a shiftop }
  175. (taicpu(movp).ops=2) and
  176. MatchOperand(taicpu(movp).oper[1]^, taicpu(p).oper[0]^.reg) and
  177. { the destination register of the mov might not be used beween p and movp }
  178. not(RegUsedBetween(taicpu(movp).oper[0]^.reg,p,movp)) and
  179. { Take care to only do this for instructions which REALLY load to the first register.
  180. Otherwise
  181. s* reg0, [reg1]
  182. mov reg2, reg0
  183. will be optimized to
  184. s* reg2, [reg1]
  185. }
  186. RegLoadedWithNewValue(taicpu(p).oper[0]^.reg, p) then
  187. begin
  188. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(movp.Next));
  189. if assigned(dealloc) then
  190. begin
  191. DebugMsg('Peephole '+optimizer+' removed superfluous mov', movp);
  192. result:=true;
  193. { taicpu(p).oper[0]^.reg is not used anymore, try to find its allocation
  194. and remove it if possible }
  195. asml.Remove(dealloc);
  196. alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.previous));
  197. if assigned(alloc) then
  198. begin
  199. asml.Remove(alloc);
  200. alloc.free;
  201. dealloc.free;
  202. end
  203. else
  204. asml.InsertAfter(dealloc,p);
  205. { try to move the allocation of the target register }
  206. GetLastInstruction(movp,hp1);
  207. alloc:=FindRegAlloc(taicpu(movp).oper[0]^.reg,tai(hp1.Next));
  208. if assigned(alloc) then
  209. begin
  210. asml.Remove(alloc);
  211. asml.InsertBefore(alloc,p);
  212. { adjust used regs }
  213. IncludeRegInUsedRegs(taicpu(movp).oper[0]^.reg,UsedRegs);
  214. end;
  215. { finally get rid of the mov }
  216. taicpu(p).loadreg(0,taicpu(movp).oper[0]^.reg);
  217. asml.remove(movp);
  218. movp.free;
  219. end;
  220. end;
  221. end;
  222. function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
  223. var
  224. hp1: tai;
  225. begin
  226. result := false;
  227. case p.typ of
  228. ait_instruction:
  229. begin
  230. case taicpu(p).opcode of
  231. A_ADD,
  232. A_ADDI,
  233. A_L32I,
  234. A_SRLI,
  235. A_SUB:
  236. begin
  237. if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  238. RemoveSuperfluousMove(p, hp1, 'DataMov2Data') then
  239. Result:=true;
  240. end;
  241. else
  242. ;
  243. end;
  244. end
  245. else
  246. ;
  247. end
  248. end;
  249. begin
  250. casmoptimizer:=TCpuAsmOptimizer;
  251. End.