aoptcpu.pas 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301
  1. {
  2. Copyright (c) 1998-2002 by Jonas Maebe, member of the Free Pascal
  3. Development Team
  4. This unit implements the ARM64 optimizer object
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. Unit aoptcpu;
  19. {$i fpcdefs.inc}
  20. {$ifdef EXTDEBUG}
  21. {$define DEBUG_AOPTCPU}
  22. {$endif EXTDEBUG}
  23. Interface
  24. uses
  25. globtype, globals,
  26. cutils,
  27. cgbase, cpubase, aasmtai, aasmcpu, aopt, aoptcpub;
  28. Type
  29. TCpuAsmOptimizer = class(TAsmOptimizer)
  30. function CanDoJumpOpts: Boolean; override;
  31. { uses the same constructor as TAopObj }
  32. function RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;override;
  33. function InstructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean;override;
  34. function GetNextInstructionUsingReg(Current : tai; out Next : tai; reg : TRegister) : Boolean;
  35. procedure DebugMsg(const s : string; p : tai);
  36. function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
  37. private
  38. function RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string): boolean;
  39. End;
  40. Implementation
  41. uses
  42. aasmbase,
  43. aoptutils,
  44. cgutils,
  45. verbose;
  46. {$ifdef DEBUG_AOPTCPU}
  47. procedure TCpuAsmOptimizer.DebugMsg(const s: string;p : tai);
  48. begin
  49. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  50. end;
  51. {$else DEBUG_AOPTCPU}
  52. procedure TCpuAsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  53. begin
  54. end;
  55. {$endif DEBUG_AOPTCPU}
  56. function CanBeCond(p : tai) : boolean;
  57. begin
  58. result:=(p.typ=ait_instruction) and (taicpu(p).condition=C_None);
  59. end;
  60. function TCpuAsmOptimizer.CanDoJumpOpts: Boolean;
  61. begin
  62. Result := true;
  63. end;
  64. function RefsEqual(const r1, r2: treference): boolean;
  65. begin
  66. refsequal :=
  67. (r1.offset = r2.offset) and
  68. (r1.base = r2.base) and
  69. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  70. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  71. (r1.relsymbol = r2.relsymbol) and
  72. (r1.volatility=[]) and
  73. (r2.volatility=[]);
  74. end;
  75. function MatchInstruction(const instr: tai; const op: TAsmOp; const postfix: TOpPostfixes): boolean;
  76. begin
  77. result :=
  78. (instr.typ = ait_instruction) and
  79. (taicpu(instr).opcode = op) and
  80. ((postfix = []) or (taicpu(instr).oppostfix in postfix));
  81. end;
  82. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  83. begin
  84. result := (oper.typ = top_reg) and (oper.reg = reg);
  85. end;
  86. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
  87. begin
  88. result := oper1.typ = oper2.typ;
  89. if result then
  90. case oper1.typ of
  91. top_const:
  92. Result:=oper1.val = oper2.val;
  93. top_reg:
  94. Result:=oper1.reg = oper2.reg;
  95. top_ref:
  96. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  97. else Result:=false;
  98. end
  99. end;
  100. function TCpuAsmOptimizer.GetNextInstructionUsingReg(Current: tai;
  101. Out Next: tai; reg: TRegister): Boolean;
  102. begin
  103. Next:=Current;
  104. repeat
  105. Result:=GetNextInstruction(Next,Next);
  106. until not (Result) or
  107. not(cs_opt_level3 in current_settings.optimizerswitches) or
  108. (Next.typ<>ait_instruction) or
  109. RegInInstruction(reg,Next) or
  110. is_calljmp(taicpu(Next).opcode);
  111. end;
  112. function TCpuAsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  113. var
  114. p: taicpu;
  115. begin
  116. Result := false;
  117. if not(assigned(hp) and (hp.typ = ait_instruction)) then
  118. exit;
  119. p := taicpu(hp);
  120. if not (p.ops >0) then
  121. exit;
  122. case p.opcode of
  123. A_B,
  124. A_SSI,A_SSIU,A_SSX,A_SSXU,
  125. A_S16I,A_S32C1I,A_S32E,A_S32I,A_S32RI,A_S8I:
  126. exit;
  127. else
  128. ;
  129. end;
  130. case p.oper[0]^.typ of
  131. top_reg:
  132. Result := (p.oper[0]^.reg = reg) ;
  133. else
  134. ;
  135. end;
  136. end;
  137. function TCpuAsmOptimizer.InstructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean;
  138. var
  139. p: taicpu;
  140. i: longint;
  141. begin
  142. instructionLoadsFromReg := false;
  143. if not (assigned(hp) and (hp.typ = ait_instruction)) then
  144. exit;
  145. p:=taicpu(hp);
  146. i:=1;
  147. { Start on oper[0]? }
  148. if taicpu(hp).spilling_get_operation_type(0) in [operand_read, operand_readwrite] then
  149. i:=0;
  150. while(i<p.ops) do
  151. begin
  152. case p.oper[I]^.typ of
  153. top_reg:
  154. Result := (p.oper[I]^.reg = reg);
  155. top_ref:
  156. Result :=
  157. (p.oper[I]^.ref^.base = reg) or
  158. (p.oper[I]^.ref^.index = reg);
  159. else
  160. ;
  161. end;
  162. { Bailout if we found something }
  163. if Result then
  164. exit;
  165. Inc(I);
  166. end;
  167. end;
  168. function TCpuAsmOptimizer.RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string):boolean;
  169. var
  170. alloc,
  171. dealloc : tai_regalloc;
  172. hp1 : tai;
  173. begin
  174. Result:=false;
  175. if MatchInstruction(movp, A_MOV, [PF_None,PF_N]) and
  176. { We can't optimize if there is a shiftop }
  177. (taicpu(movp).ops=2) and
  178. MatchOperand(taicpu(movp).oper[1]^, taicpu(p).oper[0]^.reg) and
  179. { the destination register of the mov might not be used beween p and movp }
  180. not(RegUsedBetween(taicpu(movp).oper[0]^.reg,p,movp)) and
  181. { Take care to only do this for instructions which REALLY load to the first register.
  182. Otherwise
  183. s* reg0, [reg1]
  184. mov reg2, reg0
  185. will be optimized to
  186. s* reg2, [reg1]
  187. }
  188. RegLoadedWithNewValue(taicpu(p).oper[0]^.reg, p) then
  189. begin
  190. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(movp.Next));
  191. if assigned(dealloc) then
  192. begin
  193. DebugMsg('Peephole '+optimizer+' removed superfluous mov', movp);
  194. result:=true;
  195. { taicpu(p).oper[0]^.reg is not used anymore, try to find its allocation
  196. and remove it if possible }
  197. asml.Remove(dealloc);
  198. alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.previous));
  199. if assigned(alloc) then
  200. begin
  201. asml.Remove(alloc);
  202. alloc.free;
  203. dealloc.free;
  204. end
  205. else
  206. asml.InsertAfter(dealloc,p);
  207. { try to move the allocation of the target register }
  208. GetLastInstruction(movp,hp1);
  209. alloc:=FindRegAlloc(taicpu(movp).oper[0]^.reg,tai(hp1.Next));
  210. if assigned(alloc) then
  211. begin
  212. asml.Remove(alloc);
  213. asml.InsertBefore(alloc,p);
  214. { adjust used regs }
  215. IncludeRegInUsedRegs(taicpu(movp).oper[0]^.reg,UsedRegs);
  216. end;
  217. { finally get rid of the mov }
  218. taicpu(p).loadreg(0,taicpu(movp).oper[0]^.reg);
  219. asml.remove(movp);
  220. movp.free;
  221. end;
  222. end;
  223. end;
  224. function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
  225. var
  226. hp1: tai;
  227. begin
  228. result := false;
  229. case p.typ of
  230. ait_instruction:
  231. begin
  232. case taicpu(p).opcode of
  233. A_ADD,
  234. A_ADDI,
  235. A_L32I,
  236. A_SRLI,
  237. A_SUB:
  238. begin
  239. if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  240. RemoveSuperfluousMove(p, hp1, 'DataMov2Data') then
  241. Result:=true;
  242. end;
  243. else
  244. ;
  245. end;
  246. end
  247. else
  248. ;
  249. end
  250. end;
  251. begin
  252. casmoptimizer:=TCpuAsmOptimizer;
  253. End.