aoptarm.pas 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771
  1. {
  2. Copyright (c) 1998-2020 by Jonas Maebe and Florian Klaempfl, members of the Free Pascal
  3. Development Team
  4. This unit implements an ARM optimizer object used commonly for ARM and AAarch64
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. Unit aoptarm;
  19. {$i fpcdefs.inc}
  20. { $define DEBUG_PREREGSCHEDULER}
  21. { $define DEBUG_AOPTCPU}
  22. Interface
  23. uses
  24. cgbase, cgutils, cpubase, aasmtai, aasmcpu,aopt, aoptobj;
  25. Type
  26. { while ARM and AAarch64 look not very similar at a first glance,
  27. several optimizations can be shared between both }
  28. TARMAsmOptimizer = class(TAsmOptimizer)
  29. procedure DebugMsg(const s : string; p : tai);
  30. function RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string): boolean;
  31. function RedundantMovProcess(var p: tai; hp1: tai): boolean;
  32. function GetNextInstructionUsingReg(Current: tai; out Next: tai; reg: TRegister): Boolean;
  33. function OptPass1UXTB(var p: tai): Boolean;
  34. function OptPass1UXTH(var p: tai): Boolean;
  35. function OptPass1SXTB(var p: tai): Boolean;
  36. function OptPass1SXTH(var p: tai): Boolean;
  37. End;
  38. function MatchInstruction(const instr: tai; const op: TCommonAsmOps; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  39. function MatchInstruction(const instr: tai; const op: TAsmOp; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  40. {$ifdef AARCH64}
  41. function MatchInstruction(const instr: tai; const op: TAsmOps; const postfix: TOpPostfixes): boolean;
  42. {$endif AARCH64}
  43. function MatchInstruction(const instr: tai; const op: TAsmOp; const postfix: TOpPostfixes): boolean;
  44. function RefsEqual(const r1, r2: treference): boolean;
  45. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  46. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
  47. Implementation
  48. uses
  49. cutils,verbose,globtype,globals,
  50. systems,
  51. cpuinfo,
  52. cgobj,procinfo,
  53. aasmbase,aasmdata;
  54. {$ifdef DEBUG_AOPTCPU}
  55. procedure TARMAsmOptimizer.DebugMsg(const s: string;p : tai);
  56. begin
  57. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  58. end;
  59. {$else DEBUG_AOPTCPU}
  60. procedure TARMAsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  61. begin
  62. end;
  63. {$endif DEBUG_AOPTCPU}
  64. function MatchInstruction(const instr: tai; const op: TCommonAsmOps; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  65. begin
  66. result :=
  67. (instr.typ = ait_instruction) and
  68. ((op = []) or ((ord(taicpu(instr).opcode)<256) and (taicpu(instr).opcode in op))) and
  69. ((cond = []) or (taicpu(instr).condition in cond)) and
  70. ((postfix = []) or (taicpu(instr).oppostfix in postfix));
  71. end;
  72. function MatchInstruction(const instr: tai; const op: TAsmOp; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  73. begin
  74. result :=
  75. (instr.typ = ait_instruction) and
  76. (taicpu(instr).opcode = op) and
  77. ((cond = []) or (taicpu(instr).condition in cond)) and
  78. ((postfix = []) or (taicpu(instr).oppostfix in postfix));
  79. end;
  80. {$ifdef AARCH64}
  81. function MatchInstruction(const instr: tai; const op: TAsmOps; const postfix: TOpPostfixes): boolean;
  82. begin
  83. result :=
  84. (instr.typ = ait_instruction) and
  85. ((op = []) or (taicpu(instr).opcode in op)) and
  86. ((postfix = []) or (taicpu(instr).oppostfix in postfix));
  87. end;
  88. {$endif AARCH64}
  89. function MatchInstruction(const instr: tai; const op: TAsmOp; const postfix: TOpPostfixes): boolean;
  90. begin
  91. result :=
  92. (instr.typ = ait_instruction) and
  93. (taicpu(instr).opcode = op) and
  94. ((postfix = []) or (taicpu(instr).oppostfix in postfix));
  95. end;
  96. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  97. begin
  98. result := (oper.typ = top_reg) and (oper.reg = reg);
  99. end;
  100. function RefsEqual(const r1, r2: treference): boolean;
  101. begin
  102. refsequal :=
  103. (r1.offset = r2.offset) and
  104. (r1.base = r2.base) and
  105. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  106. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  107. (r1.relsymbol = r2.relsymbol) and
  108. {$ifdef ARM}
  109. (r1.signindex = r2.signindex) and
  110. {$endif ARM}
  111. (r1.shiftimm = r2.shiftimm) and
  112. (r1.addressmode = r2.addressmode) and
  113. (r1.shiftmode = r2.shiftmode) and
  114. (r1.volatility=[]) and
  115. (r2.volatility=[]);
  116. end;
  117. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
  118. begin
  119. result := oper1.typ = oper2.typ;
  120. if result then
  121. case oper1.typ of
  122. top_const:
  123. Result:=oper1.val = oper2.val;
  124. top_reg:
  125. Result:=oper1.reg = oper2.reg;
  126. top_conditioncode:
  127. Result:=oper1.cc = oper2.cc;
  128. top_realconst:
  129. Result:=oper1.val_real = oper2.val_real;
  130. top_ref:
  131. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  132. else Result:=false;
  133. end
  134. end;
  135. function TARMAsmOptimizer.GetNextInstructionUsingReg(Current: tai;
  136. Out Next: tai; reg: TRegister): Boolean;
  137. begin
  138. Next:=Current;
  139. repeat
  140. Result:=GetNextInstruction(Next,Next);
  141. until not (Result) or
  142. not(cs_opt_level3 in current_settings.optimizerswitches) or
  143. (Next.typ<>ait_instruction) or
  144. RegInInstruction(reg,Next) or
  145. is_calljmp(taicpu(Next).opcode)
  146. {$ifdef ARM}
  147. or RegModifiedByInstruction(NR_PC,Next);
  148. {$endif ARM}
  149. end;
  150. function TARMAsmOptimizer.RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string):boolean;
  151. var
  152. alloc,
  153. dealloc : tai_regalloc;
  154. hp1 : tai;
  155. begin
  156. Result:=false;
  157. if MatchInstruction(movp, A_MOV, [taicpu(p).condition], [PF_None]) and
  158. { We can't optimize if there is a shiftop }
  159. (taicpu(movp).ops=2) and
  160. MatchOperand(taicpu(movp).oper[1]^, taicpu(p).oper[0]^.reg) and
  161. { don't mess with moves to fp }
  162. (taicpu(movp).oper[0]^.reg<>current_procinfo.framepointer) and
  163. { the destination register of the mov might not be used beween p and movp }
  164. not(RegUsedBetween(taicpu(movp).oper[0]^.reg,p,movp)) and
  165. {$ifdef ARM}
  166. { PC should be changed only by moves }
  167. (taicpu(movp).oper[0]^.reg<>NR_PC) and
  168. { cb[n]z are thumb instructions which require specific registers, with no wide forms }
  169. (taicpu(p).opcode<>A_CBZ) and
  170. (taicpu(p).opcode<>A_CBNZ) and
  171. { There is a special requirement for MUL and MLA, oper[0] and oper[1] are not allowed to be the same }
  172. not (
  173. (taicpu(p).opcode in [A_MLA, A_MUL]) and
  174. (taicpu(p).oper[1]^.reg = taicpu(movp).oper[0]^.reg) and
  175. (current_settings.cputype < cpu_armv6)
  176. ) and
  177. {$endif ARM}
  178. { Take care to only do this for instructions which REALLY load to the first register.
  179. Otherwise
  180. str reg0, [reg1]
  181. mov reg2, reg0
  182. will be optimized to
  183. str reg2, [reg1]
  184. }
  185. RegLoadedWithNewValue(taicpu(p).oper[0]^.reg, p) then
  186. begin
  187. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(movp.Next));
  188. if assigned(dealloc) then
  189. begin
  190. DebugMsg('Peephole '+optimizer+' removed superfluous mov', movp);
  191. result:=true;
  192. { taicpu(p).oper[0]^.reg is not used anymore, try to find its allocation
  193. and remove it if possible }
  194. asml.Remove(dealloc);
  195. alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.previous));
  196. if assigned(alloc) then
  197. begin
  198. asml.Remove(alloc);
  199. alloc.free;
  200. dealloc.free;
  201. end
  202. else
  203. asml.InsertAfter(dealloc,p);
  204. { try to move the allocation of the target register }
  205. GetLastInstruction(movp,hp1);
  206. alloc:=FindRegAlloc(taicpu(movp).oper[0]^.reg,tai(hp1.Next));
  207. if assigned(alloc) then
  208. begin
  209. asml.Remove(alloc);
  210. asml.InsertBefore(alloc,p);
  211. { adjust used regs }
  212. IncludeRegInUsedRegs(taicpu(movp).oper[0]^.reg,UsedRegs);
  213. end;
  214. { finally get rid of the mov }
  215. taicpu(p).loadreg(0,taicpu(movp).oper[0]^.reg);
  216. { Remove preindexing and postindexing for LDR in some cases.
  217. For example:
  218. ldr reg2,[reg1, xxx]!
  219. mov reg1,reg2
  220. must be translated to:
  221. ldr reg1,[reg1, xxx]
  222. Preindexing must be removed there, since the same register is used as the base and as the target.
  223. Such case is not allowed for ARM CPU and produces crash. }
  224. if (taicpu(p).opcode = A_LDR) and (taicpu(p).oper[1]^.typ = top_ref)
  225. and (taicpu(movp).oper[0]^.reg = taicpu(p).oper[1]^.ref^.base)
  226. then
  227. taicpu(p).oper[1]^.ref^.addressmode:=AM_OFFSET;
  228. asml.remove(movp);
  229. movp.free;
  230. end;
  231. end;
  232. end;
  233. function TARMAsmOptimizer.RedundantMovProcess(var p: tai;hp1: tai):boolean;
  234. var
  235. I: Integer;
  236. begin
  237. Result:=false;
  238. {
  239. change
  240. mov r1, r0
  241. add r1, r1, #1
  242. to
  243. add r1, r0, #1
  244. Todo: Make it work for mov+cmp too
  245. CAUTION! If this one is successful p might not be a mov instruction anymore!
  246. }
  247. if (taicpu(p).ops = 2) and
  248. (taicpu(p).oper[1]^.typ = top_reg) and
  249. (taicpu(p).oppostfix = PF_NONE) and
  250. MatchInstruction(hp1, [A_ADD, A_ADC,
  251. {$ifdef ARM}
  252. A_RSB, A_RSC,
  253. {$endif ARM}
  254. A_SUB, A_SBC,
  255. A_AND, A_BIC, A_EOR, A_ORR, A_MOV, A_MVN],
  256. [taicpu(p).condition], []) and
  257. { MOV and MVN might only have 2 ops }
  258. (taicpu(hp1).ops >= 2) and
  259. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^.reg) and
  260. (taicpu(hp1).oper[1]^.typ = top_reg) and
  261. (
  262. (taicpu(hp1).ops = 2) or
  263. (taicpu(hp1).oper[2]^.typ in [top_reg, top_const, top_shifterop])
  264. ) and
  265. {$ifdef AARCH64}
  266. (taicpu(p).oper[1]^.reg<>NR_SP) and
  267. {$endif AARCH64}
  268. not(RegUsedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  269. begin
  270. { When we get here we still don't know if the registers match }
  271. for I:=1 to 2 do
  272. {
  273. If the first loop was successful p will be replaced with hp1.
  274. The checks will still be ok, because all required information
  275. will also be in hp1 then.
  276. }
  277. if (taicpu(hp1).ops > I) and
  278. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[I]^.reg)
  279. {$ifdef ARM}
  280. { prevent certain combinations on thumb(2), this is only a safe approximation }
  281. and (not(GenerateThumbCode or GenerateThumb2Code) or
  282. ((getsupreg(taicpu(p).oper[1]^.reg)<>RS_R13) and
  283. (getsupreg(taicpu(p).oper[1]^.reg)<>RS_R15)))
  284. {$endif ARM}
  285. then
  286. begin
  287. DebugMsg('Peephole RedundantMovProcess done', hp1);
  288. taicpu(hp1).oper[I]^.reg := taicpu(p).oper[1]^.reg;
  289. if p<>hp1 then
  290. begin
  291. asml.remove(p);
  292. p.free;
  293. p:=hp1;
  294. Result:=true;
  295. end;
  296. end;
  297. end;
  298. end;
  299. function TARMAsmOptimizer.OptPass1UXTB(var p : tai) : Boolean;
  300. var
  301. hp1, hp2: tai;
  302. begin
  303. Result:=false;
  304. {
  305. change
  306. uxtb reg2,reg1
  307. strb reg2,[...]
  308. dealloc reg2
  309. to
  310. strb reg1,[...]
  311. }
  312. if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
  313. (taicpu(p).ops=2) and
  314. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  315. MatchInstruction(hp1, A_STR, [C_None], [PF_B]) and
  316. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
  317. { the reference in strb might not use reg2 }
  318. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  319. { reg1 might not be modified inbetween }
  320. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  321. begin
  322. DebugMsg('Peephole UxtbStrb2Strb done', p);
  323. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  324. GetNextInstruction(p,hp2);
  325. asml.remove(p);
  326. p.free;
  327. p:=hp2;
  328. result:=true;
  329. end
  330. {
  331. change
  332. uxtb reg2,reg1
  333. uxth reg3,reg2
  334. dealloc reg2
  335. to
  336. uxtb reg3,reg1
  337. }
  338. else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
  339. (taicpu(p).ops=2) and
  340. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  341. MatchInstruction(hp1, A_UXTH, [C_None], [PF_None]) and
  342. (taicpu(hp1).ops = 2) and
  343. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  344. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  345. { reg1 might not be modified inbetween }
  346. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  347. begin
  348. DebugMsg('Peephole UxtbUxth2Uxtb done', p);
  349. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  350. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  351. asml.remove(hp1);
  352. hp1.free;
  353. result:=true;
  354. end
  355. {
  356. change
  357. uxtb reg2,reg1
  358. uxtb reg3,reg2
  359. dealloc reg2
  360. to
  361. uxtb reg3,reg1
  362. }
  363. else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
  364. (taicpu(p).ops=2) and
  365. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  366. MatchInstruction(hp1, A_UXTB, [C_None], [PF_None]) and
  367. (taicpu(hp1).ops = 2) and
  368. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  369. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  370. { reg1 might not be modified inbetween }
  371. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  372. begin
  373. DebugMsg('Peephole UxtbUxtb2Uxtb done', p);
  374. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  375. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  376. asml.remove(hp1);
  377. hp1.free;
  378. result:=true;
  379. end
  380. {
  381. change
  382. uxtb reg2,reg1
  383. and reg3,reg2,#0x*FF
  384. dealloc reg2
  385. to
  386. uxtb reg3,reg1
  387. }
  388. else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
  389. (taicpu(p).ops=2) and
  390. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  391. MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
  392. (taicpu(hp1).ops=3) and
  393. (taicpu(hp1).oper[2]^.typ=top_const) and
  394. ((taicpu(hp1).oper[2]^.val and $FF)=$FF) and
  395. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  396. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  397. { reg1 might not be modified inbetween }
  398. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  399. begin
  400. DebugMsg('Peephole UxtbAndImm2Uxtb done', p);
  401. taicpu(hp1).opcode:=A_UXTB;
  402. taicpu(hp1).ops:=2;
  403. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  404. GetNextInstruction(p,hp2);
  405. asml.remove(p);
  406. p.free;
  407. p:=hp2;
  408. result:=true;
  409. end
  410. else if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  411. RemoveSuperfluousMove(p, hp1, 'UxtbMov2Data') then
  412. Result:=true;
  413. end;
  414. function TARMAsmOptimizer.OptPass1UXTH(var p : tai) : Boolean;
  415. var
  416. hp1: tai;
  417. begin
  418. Result:=false;
  419. {
  420. change
  421. uxth reg2,reg1
  422. strh reg2,[...]
  423. dealloc reg2
  424. to
  425. strh reg1,[...]
  426. }
  427. if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
  428. (taicpu(p).ops=2) and
  429. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  430. MatchInstruction(hp1, A_STR, [C_None], [PF_H]) and
  431. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  432. { the reference in strb might not use reg2 }
  433. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  434. { reg1 might not be modified inbetween }
  435. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  436. begin
  437. DebugMsg('Peephole UXTHStrh2Strh done', p);
  438. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  439. GetNextInstruction(p, hp1);
  440. asml.remove(p);
  441. p.free;
  442. p:=hp1;
  443. result:=true;
  444. end
  445. {
  446. change
  447. uxth reg2,reg1
  448. uxth reg3,reg2
  449. dealloc reg2
  450. to
  451. uxth reg3,reg1
  452. }
  453. else if MatchInstruction(p, A_UXTH, [C_None], [PF_None]) and
  454. (taicpu(p).ops=2) and
  455. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  456. MatchInstruction(hp1, A_UXTH, [C_None], [PF_None]) and
  457. (taicpu(hp1).ops=2) and
  458. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  459. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  460. { reg1 might not be modified inbetween }
  461. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  462. begin
  463. DebugMsg('Peephole UxthUxth2Uxth done', p);
  464. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  465. taicpu(hp1).opcode:=A_UXTH;
  466. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  467. GetNextInstruction(p, hp1);
  468. asml.remove(p);
  469. p.free;
  470. p:=hp1;
  471. result:=true;
  472. end
  473. {
  474. change
  475. uxth reg2,reg1
  476. and reg3,reg2,#65535
  477. dealloc reg2
  478. to
  479. uxth reg3,reg1
  480. }
  481. else if MatchInstruction(p, A_UXTH, [C_None], [PF_None]) and
  482. (taicpu(p).ops=2) and
  483. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  484. MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
  485. (taicpu(hp1).ops=3) and
  486. (taicpu(hp1).oper[2]^.typ=top_const) and
  487. ((taicpu(hp1).oper[2]^.val and $FFFF)=$FFFF) and
  488. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  489. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  490. { reg1 might not be modified inbetween }
  491. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  492. begin
  493. DebugMsg('Peephole UxthAndImm2Uxth done', p);
  494. taicpu(hp1).opcode:=A_UXTH;
  495. taicpu(hp1).ops:=2;
  496. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  497. GetNextInstruction(p, hp1);
  498. asml.remove(p);
  499. p.free;
  500. p:=hp1;
  501. result:=true;
  502. end
  503. else if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  504. RemoveSuperfluousMove(p, hp1, 'UxthMov2Data') then
  505. Result:=true;
  506. end;
  507. function TARMAsmOptimizer.OptPass1SXTB(var p : tai) : Boolean;
  508. var
  509. hp1, hp2: tai;
  510. begin
  511. Result:=false;
  512. {
  513. change
  514. sxtb reg2,reg1
  515. strb reg2,[...]
  516. dealloc reg2
  517. to
  518. strb reg1,[...]
  519. }
  520. if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
  521. (taicpu(p).ops=2) and
  522. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  523. MatchInstruction(hp1, A_STR, [C_None], [PF_B]) and
  524. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
  525. { the reference in strb might not use reg2 }
  526. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  527. { reg1 might not be modified inbetween }
  528. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  529. begin
  530. DebugMsg('Peephole SxtbStrb2Strb done', p);
  531. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  532. GetNextInstruction(p,hp2);
  533. asml.remove(p);
  534. p.free;
  535. p:=hp2;
  536. result:=true;
  537. end
  538. {
  539. change
  540. sxtb reg2,reg1
  541. sxth reg3,reg2
  542. dealloc reg2
  543. to
  544. sxtb reg3,reg1
  545. }
  546. else if MatchInstruction(p, A_SXTB, [C_None], [PF_None]) and
  547. (taicpu(p).ops=2) and
  548. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  549. MatchInstruction(hp1, A_SXTH, [C_None], [PF_None]) and
  550. (taicpu(hp1).ops = 2) and
  551. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  552. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  553. { reg1 might not be modified inbetween }
  554. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  555. begin
  556. DebugMsg('Peephole SxtbSxth2Sxtb done', p);
  557. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  558. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  559. asml.remove(hp1);
  560. hp1.free;
  561. result:=true;
  562. end
  563. {
  564. change
  565. sxtb reg2,reg1
  566. sxtb reg3,reg2
  567. dealloc reg2
  568. to
  569. uxtb reg3,reg1
  570. }
  571. else if MatchInstruction(p, A_SXTB, [C_None], [PF_None]) and
  572. (taicpu(p).ops=2) and
  573. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  574. MatchInstruction(hp1, A_SXTB, [C_None], [PF_None]) and
  575. (taicpu(hp1).ops = 2) and
  576. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  577. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  578. { reg1 might not be modified inbetween }
  579. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  580. begin
  581. DebugMsg('Peephole SxtbSxtb2Sxtb done', p);
  582. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  583. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  584. asml.remove(hp1);
  585. hp1.free;
  586. result:=true;
  587. end
  588. {
  589. change
  590. sxtb reg2,reg1
  591. and reg3,reg2,#0x*FF
  592. dealloc reg2
  593. to
  594. uxtb reg3,reg1
  595. }
  596. else if MatchInstruction(p, A_SXTB, [C_None], [PF_None]) and
  597. (taicpu(p).ops=2) and
  598. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  599. MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
  600. (taicpu(hp1).ops=3) and
  601. (taicpu(hp1).oper[2]^.typ=top_const) and
  602. ((taicpu(hp1).oper[2]^.val and $FF)=$FF) and
  603. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  604. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  605. { reg1 might not be modified inbetween }
  606. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  607. begin
  608. DebugMsg('Peephole SxtbAndImm2Sxtb done', p);
  609. taicpu(hp1).opcode:=A_SXTB;
  610. taicpu(hp1).ops:=2;
  611. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  612. GetNextInstruction(p,hp2);
  613. asml.remove(p);
  614. p.free;
  615. p:=hp2;
  616. result:=true;
  617. end
  618. else if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  619. RemoveSuperfluousMove(p, hp1, 'SxtbMov2Data') then
  620. Result:=true;
  621. end;
  622. function TARMAsmOptimizer.OptPass1SXTH(var p : tai) : Boolean;
  623. var
  624. hp1: tai;
  625. begin
  626. Result:=false;
  627. {
  628. change
  629. sxth reg2,reg1
  630. strh reg2,[...]
  631. dealloc reg2
  632. to
  633. strh reg1,[...]
  634. }
  635. if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
  636. (taicpu(p).ops=2) and
  637. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  638. MatchInstruction(hp1, A_STR, [C_None], [PF_H]) and
  639. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  640. { the reference in strb might not use reg2 }
  641. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  642. { reg1 might not be modified inbetween }
  643. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  644. begin
  645. DebugMsg('Peephole SXTHStrh2Strh done', p);
  646. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  647. GetNextInstruction(p, hp1);
  648. asml.remove(p);
  649. p.free;
  650. p:=hp1;
  651. result:=true;
  652. end
  653. {
  654. change
  655. sxth reg2,reg1
  656. sxth reg3,reg2
  657. dealloc reg2
  658. to
  659. sxth reg3,reg1
  660. }
  661. else if MatchInstruction(p, A_SXTH, [C_None], [PF_None]) and
  662. (taicpu(p).ops=2) and
  663. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  664. MatchInstruction(hp1, A_SXTH, [C_None], [PF_None]) and
  665. (taicpu(hp1).ops=2) and
  666. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  667. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  668. { reg1 might not be modified inbetween }
  669. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  670. begin
  671. DebugMsg('Peephole SxthSxth2Sxth done', p);
  672. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  673. taicpu(hp1).opcode:=A_SXTH;
  674. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  675. GetNextInstruction(p, hp1);
  676. asml.remove(p);
  677. p.free;
  678. p:=hp1;
  679. result:=true;
  680. end
  681. {
  682. change
  683. sxth reg2,reg1
  684. and reg3,reg2,#65535
  685. dealloc reg2
  686. to
  687. sxth reg3,reg1
  688. }
  689. else if MatchInstruction(p, A_SXTH, [C_None], [PF_None]) and
  690. (taicpu(p).ops=2) and
  691. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  692. MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
  693. (taicpu(hp1).ops=3) and
  694. (taicpu(hp1).oper[2]^.typ=top_const) and
  695. ((taicpu(hp1).oper[2]^.val and $FFFF)=$FFFF) and
  696. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  697. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  698. { reg1 might not be modified inbetween }
  699. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  700. begin
  701. DebugMsg('Peephole SxthAndImm2Sxth done', p);
  702. taicpu(hp1).opcode:=A_SXTH;
  703. taicpu(hp1).ops:=2;
  704. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  705. GetNextInstruction(p, hp1);
  706. asml.remove(p);
  707. p.free;
  708. p:=hp1;
  709. result:=true;
  710. end
  711. else if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  712. RemoveSuperfluousMove(p, hp1, 'SxthMov2Data') then
  713. Result:=true;
  714. end;
  715. end.