aoptarm.pas 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388
  1. {
  2. Copyright (c) 1998-2020 by Jonas Maebe and Florian Klaempfl, members of the Free Pascal
  3. Development Team
  4. This unit implements an ARM optimizer object used commonly for ARM and AAarch64
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. Unit aoptarm;
  19. {$i fpcdefs.inc}
  20. { $define DEBUG_PREREGSCHEDULER}
  21. { $define DEBUG_AOPTCPU}
  22. Interface
  23. uses
  24. cgbase, cgutils, cpubase, aasmtai, aasmcpu,aopt, aoptobj;
  25. Type
  26. { while ARM and AAarch64 look not very similar at a first glance,
  27. several optimizations can be shared between both }
  28. TARMAsmOptimizer = class(TAsmOptimizer)
  29. procedure DebugMsg(const s : string; p : tai);
  30. function RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string): boolean;
  31. function GetNextInstructionUsingReg(Current: tai; out Next: tai; reg: TRegister): Boolean;
  32. function OptPass1UXTB(var p: tai): Boolean;
  33. End;
  34. function MatchInstruction(const instr: tai; const op: TCommonAsmOps; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  35. function MatchInstruction(const instr: tai; const op: TAsmOp; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  36. {$ifdef AARCH64}
  37. function MatchInstruction(const instr: tai; const op: TAsmOps; const postfix: TOpPostfixes): boolean;
  38. {$endif AARCH64}
  39. function MatchInstruction(const instr: tai; const op: TAsmOp; const postfix: TOpPostfixes): boolean;
  40. function RefsEqual(const r1, r2: treference): boolean;
  41. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  42. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
  43. Implementation
  44. uses
  45. cutils,verbose,globtype,globals,
  46. systems,
  47. cpuinfo,
  48. cgobj,procinfo,
  49. aasmbase,aasmdata;
  50. {$ifdef DEBUG_AOPTCPU}
  51. procedure TARMAsmOptimizer.DebugMsg(const s: string;p : tai);
  52. begin
  53. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  54. end;
  55. {$else DEBUG_AOPTCPU}
  56. procedure TARMAsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  57. begin
  58. end;
  59. {$endif DEBUG_AOPTCPU}
  60. function MatchInstruction(const instr: tai; const op: TCommonAsmOps; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  61. begin
  62. result :=
  63. (instr.typ = ait_instruction) and
  64. ((op = []) or ((ord(taicpu(instr).opcode)<256) and (taicpu(instr).opcode in op))) and
  65. ((cond = []) or (taicpu(instr).condition in cond)) and
  66. ((postfix = []) or (taicpu(instr).oppostfix in postfix));
  67. end;
  68. function MatchInstruction(const instr: tai; const op: TAsmOp; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  69. begin
  70. result :=
  71. (instr.typ = ait_instruction) and
  72. (taicpu(instr).opcode = op) and
  73. ((cond = []) or (taicpu(instr).condition in cond)) and
  74. ((postfix = []) or (taicpu(instr).oppostfix in postfix));
  75. end;
  76. {$ifdef AARCH64}
  77. function MatchInstruction(const instr: tai; const op: TAsmOps; const postfix: TOpPostfixes): boolean;
  78. begin
  79. result :=
  80. (instr.typ = ait_instruction) and
  81. ((op = []) or (taicpu(instr).opcode in op)) and
  82. ((postfix = []) or (taicpu(instr).oppostfix in postfix));
  83. end;
  84. {$endif AARCH64}
  85. function MatchInstruction(const instr: tai; const op: TAsmOp; const postfix: TOpPostfixes): boolean;
  86. begin
  87. result :=
  88. (instr.typ = ait_instruction) and
  89. (taicpu(instr).opcode = op) and
  90. ((postfix = []) or (taicpu(instr).oppostfix in postfix));
  91. end;
  92. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  93. begin
  94. result := (oper.typ = top_reg) and (oper.reg = reg);
  95. end;
  96. function RefsEqual(const r1, r2: treference): boolean;
  97. begin
  98. refsequal :=
  99. (r1.offset = r2.offset) and
  100. (r1.base = r2.base) and
  101. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  102. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  103. (r1.relsymbol = r2.relsymbol) and
  104. {$ifdef ARM}
  105. (r1.signindex = r2.signindex) and
  106. {$endif ARM}
  107. (r1.shiftimm = r2.shiftimm) and
  108. (r1.addressmode = r2.addressmode) and
  109. (r1.shiftmode = r2.shiftmode) and
  110. (r1.volatility=[]) and
  111. (r2.volatility=[]);
  112. end;
  113. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
  114. begin
  115. result := oper1.typ = oper2.typ;
  116. if result then
  117. case oper1.typ of
  118. top_const:
  119. Result:=oper1.val = oper2.val;
  120. top_reg:
  121. Result:=oper1.reg = oper2.reg;
  122. top_conditioncode:
  123. Result:=oper1.cc = oper2.cc;
  124. top_realconst:
  125. Result:=oper1.val_real = oper2.val_real;
  126. top_ref:
  127. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  128. else Result:=false;
  129. end
  130. end;
  131. function TARMAsmOptimizer.GetNextInstructionUsingReg(Current: tai;
  132. Out Next: tai; reg: TRegister): Boolean;
  133. begin
  134. Next:=Current;
  135. repeat
  136. Result:=GetNextInstruction(Next,Next);
  137. until not (Result) or
  138. not(cs_opt_level3 in current_settings.optimizerswitches) or
  139. (Next.typ<>ait_instruction) or
  140. RegInInstruction(reg,Next) or
  141. is_calljmp(taicpu(Next).opcode)
  142. {$ifdef ARM}
  143. or RegModifiedByInstruction(NR_PC,Next);
  144. {$endif ARM}
  145. end;
  146. function TARMAsmOptimizer.RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string):boolean;
  147. var
  148. alloc,
  149. dealloc : tai_regalloc;
  150. hp1 : tai;
  151. begin
  152. Result:=false;
  153. if MatchInstruction(movp, A_MOV, [taicpu(p).condition], [PF_None]) and
  154. { We can't optimize if there is a shiftop }
  155. (taicpu(movp).ops=2) and
  156. MatchOperand(taicpu(movp).oper[1]^, taicpu(p).oper[0]^.reg) and
  157. { don't mess with moves to fp }
  158. (taicpu(movp).oper[0]^.reg<>current_procinfo.framepointer) and
  159. { the destination register of the mov might not be used beween p and movp }
  160. not(RegUsedBetween(taicpu(movp).oper[0]^.reg,p,movp)) and
  161. {$ifdef ARM}
  162. { cb[n]z are thumb instructions which require specific registers, with no wide forms }
  163. (taicpu(p).opcode<>A_CBZ) and
  164. (taicpu(p).opcode<>A_CBNZ) and
  165. {There is a special requirement for MUL and MLA, oper[0] and oper[1] are not allowed to be the same}
  166. not (
  167. (taicpu(p).opcode in [A_MLA, A_MUL]) and
  168. (taicpu(p).oper[1]^.reg = taicpu(movp).oper[0]^.reg) and
  169. (current_settings.cputype < cpu_armv6)
  170. ) and
  171. {$endif ARM}
  172. { Take care to only do this for instructions which REALLY load to the first register.
  173. Otherwise
  174. str reg0, [reg1]
  175. mov reg2, reg0
  176. will be optimized to
  177. str reg2, [reg1]
  178. }
  179. RegLoadedWithNewValue(taicpu(p).oper[0]^.reg, p) then
  180. begin
  181. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(movp.Next));
  182. if assigned(dealloc) then
  183. begin
  184. DebugMsg('Peephole '+optimizer+' removed superfluous mov', movp);
  185. result:=true;
  186. { taicpu(p).oper[0]^.reg is not used anymore, try to find its allocation
  187. and remove it if possible }
  188. asml.Remove(dealloc);
  189. alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.previous));
  190. if assigned(alloc) then
  191. begin
  192. asml.Remove(alloc);
  193. alloc.free;
  194. dealloc.free;
  195. end
  196. else
  197. asml.InsertAfter(dealloc,p);
  198. { try to move the allocation of the target register }
  199. GetLastInstruction(movp,hp1);
  200. alloc:=FindRegAlloc(taicpu(movp).oper[0]^.reg,tai(hp1.Next));
  201. if assigned(alloc) then
  202. begin
  203. asml.Remove(alloc);
  204. asml.InsertBefore(alloc,p);
  205. { adjust used regs }
  206. IncludeRegInUsedRegs(taicpu(movp).oper[0]^.reg,UsedRegs);
  207. end;
  208. { finally get rid of the mov }
  209. taicpu(p).loadreg(0,taicpu(movp).oper[0]^.reg);
  210. { Remove preindexing and postindexing for LDR in some cases.
  211. For example:
  212. ldr reg2,[reg1, xxx]!
  213. mov reg1,reg2
  214. must be translated to:
  215. ldr reg1,[reg1, xxx]
  216. Preindexing must be removed there, since the same register is used as the base and as the target.
  217. Such case is not allowed for ARM CPU and produces crash. }
  218. if (taicpu(p).opcode = A_LDR) and (taicpu(p).oper[1]^.typ = top_ref)
  219. and (taicpu(movp).oper[0]^.reg = taicpu(p).oper[1]^.ref^.base)
  220. then
  221. taicpu(p).oper[1]^.ref^.addressmode:=AM_OFFSET;
  222. asml.remove(movp);
  223. movp.free;
  224. end;
  225. end;
  226. end;
  227. function TARMAsmOptimizer.OptPass1UXTB(var p : tai) : Boolean;
  228. var
  229. hp1, hp2: tai;
  230. begin
  231. Result:=false;
  232. {
  233. change
  234. uxtb reg2,reg1
  235. strb reg2,[...]
  236. dealloc reg2
  237. to
  238. strb reg1,[...]
  239. }
  240. if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
  241. (taicpu(p).ops=2) and
  242. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  243. MatchInstruction(hp1, A_STR, [C_None], [PF_B]) and
  244. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
  245. { the reference in strb might not use reg2 }
  246. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  247. { reg1 might not be modified inbetween }
  248. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  249. begin
  250. DebugMsg('Peephole UxtbStrb2Strb done', p);
  251. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  252. GetNextInstruction(p,hp2);
  253. asml.remove(p);
  254. p.free;
  255. p:=hp2;
  256. result:=true;
  257. end
  258. {
  259. change
  260. uxtb reg2,reg1
  261. uxth reg3,reg2
  262. dealloc reg2
  263. to
  264. uxtb reg3,reg1
  265. }
  266. else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
  267. (taicpu(p).ops=2) and
  268. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  269. MatchInstruction(hp1, A_UXTH, [C_None], [PF_None]) and
  270. (taicpu(hp1).ops = 2) and
  271. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  272. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  273. { reg1 might not be modified inbetween }
  274. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  275. begin
  276. DebugMsg('Peephole UxtbUxth2Uxtb done', p);
  277. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  278. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  279. asml.remove(hp1);
  280. hp1.free;
  281. result:=true;
  282. end
  283. {
  284. change
  285. uxtb reg2,reg1
  286. uxtb reg3,reg2
  287. dealloc reg2
  288. to
  289. uxtb reg3,reg1
  290. }
  291. else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
  292. (taicpu(p).ops=2) and
  293. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  294. MatchInstruction(hp1, A_UXTB, [C_None], [PF_None]) and
  295. (taicpu(hp1).ops = 2) and
  296. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  297. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  298. { reg1 might not be modified inbetween }
  299. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  300. begin
  301. DebugMsg('Peephole UxtbUxtb2Uxtb done', p);
  302. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  303. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  304. asml.remove(hp1);
  305. hp1.free;
  306. result:=true;
  307. end
  308. {
  309. change
  310. uxtb reg2,reg1
  311. and reg3,reg2,#0x*FF
  312. dealloc reg2
  313. to
  314. uxtb reg3,reg1
  315. }
  316. else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
  317. (taicpu(p).ops=2) and
  318. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  319. MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
  320. (taicpu(hp1).ops=3) and
  321. (taicpu(hp1).oper[2]^.typ=top_const) and
  322. ((taicpu(hp1).oper[2]^.val and $FF)=$FF) and
  323. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  324. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  325. { reg1 might not be modified inbetween }
  326. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  327. begin
  328. DebugMsg('Peephole UxtbAndImm2Uxtb done', p);
  329. taicpu(hp1).opcode:=A_UXTB;
  330. taicpu(hp1).ops:=2;
  331. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  332. GetNextInstruction(p,hp2);
  333. asml.remove(p);
  334. p.free;
  335. p:=hp2;
  336. result:=true;
  337. end
  338. else if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  339. RemoveSuperfluousMove(p, hp1, 'UxtbMov2Data') then
  340. Result:=true;
  341. end;
  342. end.