aoptarm.pas 36 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992
  1. {
  2. Copyright (c) 1998-2020 by Jonas Maebe and Florian Klaempfl, members of the Free Pascal
  3. Development Team
  4. This unit implements an ARM optimizer object used commonly for ARM and AAarch64
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. Unit aoptarm;
  19. {$i fpcdefs.inc}
  20. { $define DEBUG_PREREGSCHEDULER}
  21. { $define DEBUG_AOPTCPU}
  22. Interface
  23. uses
  24. cgbase, cgutils, cpubase, aasmtai, aasmcpu,aopt, aoptobj;
  25. Type
  26. { while ARM and AAarch64 look not very similar at a first glance,
  27. several optimizations can be shared between both }
  28. TARMAsmOptimizer = class(TAsmOptimizer)
  29. procedure DebugMsg(const s : string; p : tai);
  30. function RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string): boolean;
  31. function RedundantMovProcess(var p: tai; hp1: tai): boolean;
  32. function GetNextInstructionUsingReg(Current: tai; out Next: tai; reg: TRegister): Boolean;
  33. function OptPass1UXTB(var p: tai): Boolean;
  34. function OptPass1UXTH(var p: tai): Boolean;
  35. function OptPass1SXTB(var p: tai): Boolean;
  36. function OptPass1SXTH(var p: tai): Boolean;
  37. function OptPass1And(var p: tai): Boolean;
  38. End;
  39. function MatchInstruction(const instr: tai; const op: TCommonAsmOps; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  40. function MatchInstruction(const instr: tai; const op: TAsmOp; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  41. {$ifdef AARCH64}
  42. function MatchInstruction(const instr: tai; const op: TAsmOps; const postfix: TOpPostfixes): boolean;
  43. {$endif AARCH64}
  44. function MatchInstruction(const instr: tai; const op: TAsmOp; const postfix: TOpPostfixes): boolean;
  45. function RefsEqual(const r1, r2: treference): boolean;
  46. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  47. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
  48. Implementation
  49. uses
  50. cutils,verbose,globtype,globals,
  51. systems,
  52. cpuinfo,
  53. cgobj,procinfo,
  54. aasmbase,aasmdata;
  55. {$ifdef DEBUG_AOPTCPU}
  56. procedure TARMAsmOptimizer.DebugMsg(const s: string;p : tai);
  57. begin
  58. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  59. end;
  60. {$else DEBUG_AOPTCPU}
  61. procedure TARMAsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  62. begin
  63. end;
  64. {$endif DEBUG_AOPTCPU}
  65. function MatchInstruction(const instr: tai; const op: TCommonAsmOps; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  66. begin
  67. result :=
  68. (instr.typ = ait_instruction) and
  69. ((op = []) or ((ord(taicpu(instr).opcode)<256) and (taicpu(instr).opcode in op))) and
  70. ((cond = []) or (taicpu(instr).condition in cond)) and
  71. ((postfix = []) or (taicpu(instr).oppostfix in postfix));
  72. end;
  73. function MatchInstruction(const instr: tai; const op: TAsmOp; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  74. begin
  75. result :=
  76. (instr.typ = ait_instruction) and
  77. (taicpu(instr).opcode = op) and
  78. ((cond = []) or (taicpu(instr).condition in cond)) and
  79. ((postfix = []) or (taicpu(instr).oppostfix in postfix));
  80. end;
  81. {$ifdef AARCH64}
  82. function MatchInstruction(const instr: tai; const op: TAsmOps; const postfix: TOpPostfixes): boolean;
  83. begin
  84. result :=
  85. (instr.typ = ait_instruction) and
  86. ((op = []) or (taicpu(instr).opcode in op)) and
  87. ((postfix = []) or (taicpu(instr).oppostfix in postfix));
  88. end;
  89. {$endif AARCH64}
  90. function MatchInstruction(const instr: tai; const op: TAsmOp; const postfix: TOpPostfixes): boolean;
  91. begin
  92. result :=
  93. (instr.typ = ait_instruction) and
  94. (taicpu(instr).opcode = op) and
  95. ((postfix = []) or (taicpu(instr).oppostfix in postfix));
  96. end;
  97. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  98. begin
  99. result := (oper.typ = top_reg) and (oper.reg = reg);
  100. end;
  101. function RefsEqual(const r1, r2: treference): boolean;
  102. begin
  103. refsequal :=
  104. (r1.offset = r2.offset) and
  105. (r1.base = r2.base) and
  106. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  107. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  108. (r1.relsymbol = r2.relsymbol) and
  109. {$ifdef ARM}
  110. (r1.signindex = r2.signindex) and
  111. {$endif ARM}
  112. (r1.shiftimm = r2.shiftimm) and
  113. (r1.addressmode = r2.addressmode) and
  114. (r1.shiftmode = r2.shiftmode) and
  115. (r1.volatility=[]) and
  116. (r2.volatility=[]);
  117. end;
  118. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
  119. begin
  120. result := oper1.typ = oper2.typ;
  121. if result then
  122. case oper1.typ of
  123. top_const:
  124. Result:=oper1.val = oper2.val;
  125. top_reg:
  126. Result:=oper1.reg = oper2.reg;
  127. top_conditioncode:
  128. Result:=oper1.cc = oper2.cc;
  129. top_realconst:
  130. Result:=oper1.val_real = oper2.val_real;
  131. top_ref:
  132. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  133. else Result:=false;
  134. end
  135. end;
  136. function TARMAsmOptimizer.GetNextInstructionUsingReg(Current: tai;
  137. Out Next: tai; reg: TRegister): Boolean;
  138. begin
  139. Next:=Current;
  140. repeat
  141. Result:=GetNextInstruction(Next,Next);
  142. until not (Result) or
  143. not(cs_opt_level3 in current_settings.optimizerswitches) or
  144. (Next.typ<>ait_instruction) or
  145. RegInInstruction(reg,Next) or
  146. is_calljmp(taicpu(Next).opcode)
  147. {$ifdef ARM}
  148. or RegModifiedByInstruction(NR_PC,Next);
  149. {$endif ARM}
  150. end;
  151. function TARMAsmOptimizer.RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string):boolean;
  152. var
  153. alloc,
  154. dealloc : tai_regalloc;
  155. hp1 : tai;
  156. begin
  157. Result:=false;
  158. if MatchInstruction(movp, A_MOV, [taicpu(p).condition], [PF_None]) and
  159. { We can't optimize if there is a shiftop }
  160. (taicpu(movp).ops=2) and
  161. MatchOperand(taicpu(movp).oper[1]^, taicpu(p).oper[0]^.reg) and
  162. { don't mess with moves to fp }
  163. (taicpu(movp).oper[0]^.reg<>current_procinfo.framepointer) and
  164. { the destination register of the mov might not be used beween p and movp }
  165. not(RegUsedBetween(taicpu(movp).oper[0]^.reg,p,movp)) and
  166. {$ifdef ARM}
  167. { PC should be changed only by moves }
  168. (taicpu(movp).oper[0]^.reg<>NR_PC) and
  169. { cb[n]z are thumb instructions which require specific registers, with no wide forms }
  170. (taicpu(p).opcode<>A_CBZ) and
  171. (taicpu(p).opcode<>A_CBNZ) and
  172. { There is a special requirement for MUL and MLA, oper[0] and oper[1] are not allowed to be the same }
  173. not (
  174. (taicpu(p).opcode in [A_MLA, A_MUL]) and
  175. (taicpu(p).oper[1]^.reg = taicpu(movp).oper[0]^.reg) and
  176. (current_settings.cputype < cpu_armv6)
  177. ) and
  178. {$endif ARM}
  179. { Take care to only do this for instructions which REALLY load to the first register.
  180. Otherwise
  181. str reg0, [reg1]
  182. mov reg2, reg0
  183. will be optimized to
  184. str reg2, [reg1]
  185. }
  186. RegLoadedWithNewValue(taicpu(p).oper[0]^.reg, p) then
  187. begin
  188. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(movp.Next));
  189. if assigned(dealloc) then
  190. begin
  191. DebugMsg('Peephole '+optimizer+' removed superfluous mov', movp);
  192. result:=true;
  193. { taicpu(p).oper[0]^.reg is not used anymore, try to find its allocation
  194. and remove it if possible }
  195. asml.Remove(dealloc);
  196. alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.previous));
  197. if assigned(alloc) then
  198. begin
  199. asml.Remove(alloc);
  200. alloc.free;
  201. dealloc.free;
  202. end
  203. else
  204. asml.InsertAfter(dealloc,p);
  205. { try to move the allocation of the target register }
  206. GetLastInstruction(movp,hp1);
  207. alloc:=FindRegAlloc(taicpu(movp).oper[0]^.reg,tai(hp1.Next));
  208. if assigned(alloc) then
  209. begin
  210. asml.Remove(alloc);
  211. asml.InsertBefore(alloc,p);
  212. { adjust used regs }
  213. IncludeRegInUsedRegs(taicpu(movp).oper[0]^.reg,UsedRegs);
  214. end;
  215. { finally get rid of the mov }
  216. taicpu(p).loadreg(0,taicpu(movp).oper[0]^.reg);
  217. { Remove preindexing and postindexing for LDR in some cases.
  218. For example:
  219. ldr reg2,[reg1, xxx]!
  220. mov reg1,reg2
  221. must be translated to:
  222. ldr reg1,[reg1, xxx]
  223. Preindexing must be removed there, since the same register is used as the base and as the target.
  224. Such case is not allowed for ARM CPU and produces crash. }
  225. if (taicpu(p).opcode = A_LDR) and (taicpu(p).oper[1]^.typ = top_ref)
  226. and (taicpu(movp).oper[0]^.reg = taicpu(p).oper[1]^.ref^.base)
  227. then
  228. taicpu(p).oper[1]^.ref^.addressmode:=AM_OFFSET;
  229. asml.remove(movp);
  230. movp.free;
  231. end;
  232. end;
  233. end;
  234. function TARMAsmOptimizer.RedundantMovProcess(var p: tai;hp1: tai):boolean;
  235. var
  236. I: Integer;
  237. begin
  238. Result:=false;
  239. {
  240. change
  241. mov r1, r0
  242. add r1, r1, #1
  243. to
  244. add r1, r0, #1
  245. Todo: Make it work for mov+cmp too
  246. CAUTION! If this one is successful p might not be a mov instruction anymore!
  247. }
  248. if (taicpu(p).ops = 2) and
  249. (taicpu(p).oper[1]^.typ = top_reg) and
  250. (taicpu(p).oppostfix = PF_NONE) and
  251. MatchInstruction(hp1, [A_ADD, A_ADC,
  252. {$ifdef ARM}
  253. A_RSB, A_RSC,
  254. {$endif ARM}
  255. A_SUB, A_SBC,
  256. A_AND, A_BIC, A_EOR, A_ORR, A_MOV, A_MVN],
  257. [taicpu(p).condition], []) and
  258. { MOV and MVN might only have 2 ops }
  259. (taicpu(hp1).ops >= 2) and
  260. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^.reg) and
  261. (taicpu(hp1).oper[1]^.typ = top_reg) and
  262. (
  263. (taicpu(hp1).ops = 2) or
  264. (taicpu(hp1).oper[2]^.typ in [top_reg, top_const, top_shifterop])
  265. ) and
  266. {$ifdef AARCH64}
  267. (taicpu(p).oper[1]^.reg<>NR_SP) and
  268. {$endif AARCH64}
  269. not(RegUsedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  270. begin
  271. { When we get here we still don't know if the registers match }
  272. for I:=1 to 2 do
  273. {
  274. If the first loop was successful p will be replaced with hp1.
  275. The checks will still be ok, because all required information
  276. will also be in hp1 then.
  277. }
  278. if (taicpu(hp1).ops > I) and
  279. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[I]^.reg)
  280. {$ifdef ARM}
  281. { prevent certain combinations on thumb(2), this is only a safe approximation }
  282. and (not(GenerateThumbCode or GenerateThumb2Code) or
  283. ((getsupreg(taicpu(p).oper[1]^.reg)<>RS_R13) and
  284. (getsupreg(taicpu(p).oper[1]^.reg)<>RS_R15)))
  285. {$endif ARM}
  286. then
  287. begin
  288. DebugMsg('Peephole RedundantMovProcess done', hp1);
  289. taicpu(hp1).oper[I]^.reg := taicpu(p).oper[1]^.reg;
  290. if p<>hp1 then
  291. begin
  292. asml.remove(p);
  293. p.free;
  294. p:=hp1;
  295. Result:=true;
  296. end;
  297. end;
  298. end;
  299. end;
  300. function TARMAsmOptimizer.OptPass1UXTB(var p : tai) : Boolean;
  301. var
  302. hp1, hp2: tai;
  303. begin
  304. Result:=false;
  305. {
  306. change
  307. uxtb reg2,reg1
  308. strb reg2,[...]
  309. dealloc reg2
  310. to
  311. strb reg1,[...]
  312. }
  313. if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
  314. (taicpu(p).ops=2) and
  315. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  316. MatchInstruction(hp1, A_STR, [C_None], [PF_B]) and
  317. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
  318. { the reference in strb might not use reg2 }
  319. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  320. { reg1 might not be modified inbetween }
  321. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  322. begin
  323. DebugMsg('Peephole UxtbStrb2Strb done', p);
  324. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  325. GetNextInstruction(p,hp2);
  326. asml.remove(p);
  327. p.free;
  328. p:=hp2;
  329. result:=true;
  330. end
  331. {
  332. change
  333. uxtb reg2,reg1
  334. uxth reg3,reg2
  335. dealloc reg2
  336. to
  337. uxtb reg3,reg1
  338. }
  339. else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
  340. (taicpu(p).ops=2) and
  341. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  342. MatchInstruction(hp1, A_UXTH, [C_None], [PF_None]) and
  343. (taicpu(hp1).ops = 2) and
  344. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  345. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  346. { reg1 might not be modified inbetween }
  347. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  348. begin
  349. DebugMsg('Peephole UxtbUxth2Uxtb done', p);
  350. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  351. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  352. asml.remove(hp1);
  353. hp1.free;
  354. result:=true;
  355. end
  356. {
  357. change
  358. uxtb reg2,reg1
  359. uxtb reg3,reg2
  360. dealloc reg2
  361. to
  362. uxtb reg3,reg1
  363. }
  364. else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
  365. (taicpu(p).ops=2) and
  366. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  367. MatchInstruction(hp1, A_UXTB, [C_None], [PF_None]) and
  368. (taicpu(hp1).ops = 2) and
  369. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  370. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  371. { reg1 might not be modified inbetween }
  372. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  373. begin
  374. DebugMsg('Peephole UxtbUxtb2Uxtb done', p);
  375. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  376. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  377. asml.remove(hp1);
  378. hp1.free;
  379. result:=true;
  380. end
  381. {
  382. change
  383. uxtb reg2,reg1
  384. and reg3,reg2,#0x*FF
  385. dealloc reg2
  386. to
  387. uxtb reg3,reg1
  388. }
  389. else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
  390. (taicpu(p).ops=2) and
  391. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  392. MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
  393. (taicpu(hp1).ops=3) and
  394. (taicpu(hp1).oper[2]^.typ=top_const) and
  395. ((taicpu(hp1).oper[2]^.val and $FF)=$FF) and
  396. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  397. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  398. { reg1 might not be modified inbetween }
  399. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  400. begin
  401. DebugMsg('Peephole UxtbAndImm2Uxtb done', p);
  402. taicpu(hp1).opcode:=A_UXTB;
  403. taicpu(hp1).ops:=2;
  404. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  405. GetNextInstruction(p,hp2);
  406. asml.remove(p);
  407. p.free;
  408. p:=hp2;
  409. result:=true;
  410. end
  411. else if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  412. RemoveSuperfluousMove(p, hp1, 'UxtbMov2Data') then
  413. Result:=true;
  414. end;
  415. function TARMAsmOptimizer.OptPass1UXTH(var p : tai) : Boolean;
  416. var
  417. hp1: tai;
  418. begin
  419. Result:=false;
  420. {
  421. change
  422. uxth reg2,reg1
  423. strh reg2,[...]
  424. dealloc reg2
  425. to
  426. strh reg1,[...]
  427. }
  428. if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
  429. (taicpu(p).ops=2) and
  430. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  431. MatchInstruction(hp1, A_STR, [C_None], [PF_H]) and
  432. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  433. { the reference in strb might not use reg2 }
  434. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  435. { reg1 might not be modified inbetween }
  436. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  437. begin
  438. DebugMsg('Peephole UXTHStrh2Strh done', p);
  439. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  440. GetNextInstruction(p, hp1);
  441. asml.remove(p);
  442. p.free;
  443. p:=hp1;
  444. result:=true;
  445. end
  446. {
  447. change
  448. uxth reg2,reg1
  449. uxth reg3,reg2
  450. dealloc reg2
  451. to
  452. uxth reg3,reg1
  453. }
  454. else if MatchInstruction(p, A_UXTH, [C_None], [PF_None]) and
  455. (taicpu(p).ops=2) and
  456. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  457. MatchInstruction(hp1, A_UXTH, [C_None], [PF_None]) and
  458. (taicpu(hp1).ops=2) and
  459. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  460. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  461. { reg1 might not be modified inbetween }
  462. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  463. begin
  464. DebugMsg('Peephole UxthUxth2Uxth done', p);
  465. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  466. taicpu(hp1).opcode:=A_UXTH;
  467. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  468. GetNextInstruction(p, hp1);
  469. asml.remove(p);
  470. p.free;
  471. p:=hp1;
  472. result:=true;
  473. end
  474. {
  475. change
  476. uxth reg2,reg1
  477. and reg3,reg2,#65535
  478. dealloc reg2
  479. to
  480. uxth reg3,reg1
  481. }
  482. else if MatchInstruction(p, A_UXTH, [C_None], [PF_None]) and
  483. (taicpu(p).ops=2) and
  484. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  485. MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
  486. (taicpu(hp1).ops=3) and
  487. (taicpu(hp1).oper[2]^.typ=top_const) and
  488. ((taicpu(hp1).oper[2]^.val and $FFFF)=$FFFF) and
  489. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  490. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  491. { reg1 might not be modified inbetween }
  492. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  493. begin
  494. DebugMsg('Peephole UxthAndImm2Uxth done', p);
  495. taicpu(hp1).opcode:=A_UXTH;
  496. taicpu(hp1).ops:=2;
  497. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  498. GetNextInstruction(p, hp1);
  499. asml.remove(p);
  500. p.free;
  501. p:=hp1;
  502. result:=true;
  503. end
  504. else if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  505. RemoveSuperfluousMove(p, hp1, 'UxthMov2Data') then
  506. Result:=true;
  507. end;
  508. function TARMAsmOptimizer.OptPass1SXTB(var p : tai) : Boolean;
  509. var
  510. hp1, hp2: tai;
  511. begin
  512. Result:=false;
  513. {
  514. change
  515. sxtb reg2,reg1
  516. strb reg2,[...]
  517. dealloc reg2
  518. to
  519. strb reg1,[...]
  520. }
  521. if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
  522. (taicpu(p).ops=2) and
  523. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  524. MatchInstruction(hp1, A_STR, [C_None], [PF_B]) and
  525. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
  526. { the reference in strb might not use reg2 }
  527. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  528. { reg1 might not be modified inbetween }
  529. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  530. begin
  531. DebugMsg('Peephole SxtbStrb2Strb done', p);
  532. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  533. GetNextInstruction(p,hp2);
  534. asml.remove(p);
  535. p.free;
  536. p:=hp2;
  537. result:=true;
  538. end
  539. {
  540. change
  541. sxtb reg2,reg1
  542. sxth reg3,reg2
  543. dealloc reg2
  544. to
  545. sxtb reg3,reg1
  546. }
  547. else if MatchInstruction(p, A_SXTB, [C_None], [PF_None]) and
  548. (taicpu(p).ops=2) and
  549. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  550. MatchInstruction(hp1, A_SXTH, [C_None], [PF_None]) and
  551. (taicpu(hp1).ops = 2) and
  552. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  553. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  554. { reg1 might not be modified inbetween }
  555. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  556. begin
  557. DebugMsg('Peephole SxtbSxth2Sxtb done', p);
  558. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  559. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  560. asml.remove(hp1);
  561. hp1.free;
  562. result:=true;
  563. end
  564. {
  565. change
  566. sxtb reg2,reg1
  567. sxtb reg3,reg2
  568. dealloc reg2
  569. to
  570. uxtb reg3,reg1
  571. }
  572. else if MatchInstruction(p, A_SXTB, [C_None], [PF_None]) and
  573. (taicpu(p).ops=2) and
  574. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  575. MatchInstruction(hp1, A_SXTB, [C_None], [PF_None]) and
  576. (taicpu(hp1).ops = 2) and
  577. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  578. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  579. { reg1 might not be modified inbetween }
  580. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  581. begin
  582. DebugMsg('Peephole SxtbSxtb2Sxtb done', p);
  583. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  584. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  585. asml.remove(hp1);
  586. hp1.free;
  587. result:=true;
  588. end
  589. {
  590. change
  591. sxtb reg2,reg1
  592. and reg3,reg2,#0x*FF
  593. dealloc reg2
  594. to
  595. uxtb reg3,reg1
  596. }
  597. else if MatchInstruction(p, A_SXTB, [C_None], [PF_None]) and
  598. (taicpu(p).ops=2) and
  599. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  600. MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
  601. (taicpu(hp1).ops=3) and
  602. (taicpu(hp1).oper[2]^.typ=top_const) and
  603. ((taicpu(hp1).oper[2]^.val and $FF)=$FF) and
  604. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  605. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  606. { reg1 might not be modified inbetween }
  607. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  608. begin
  609. DebugMsg('Peephole SxtbAndImm2Sxtb done', p);
  610. taicpu(hp1).opcode:=A_SXTB;
  611. taicpu(hp1).ops:=2;
  612. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  613. GetNextInstruction(p,hp2);
  614. asml.remove(p);
  615. p.free;
  616. p:=hp2;
  617. result:=true;
  618. end
  619. else if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  620. RemoveSuperfluousMove(p, hp1, 'SxtbMov2Data') then
  621. Result:=true;
  622. end;
  623. function TARMAsmOptimizer.OptPass1SXTH(var p : tai) : Boolean;
  624. var
  625. hp1: tai;
  626. begin
  627. Result:=false;
  628. {
  629. change
  630. sxth reg2,reg1
  631. strh reg2,[...]
  632. dealloc reg2
  633. to
  634. strh reg1,[...]
  635. }
  636. if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
  637. (taicpu(p).ops=2) and
  638. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  639. MatchInstruction(hp1, A_STR, [C_None], [PF_H]) and
  640. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  641. { the reference in strb might not use reg2 }
  642. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  643. { reg1 might not be modified inbetween }
  644. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  645. begin
  646. DebugMsg('Peephole SXTHStrh2Strh done', p);
  647. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  648. GetNextInstruction(p, hp1);
  649. asml.remove(p);
  650. p.free;
  651. p:=hp1;
  652. result:=true;
  653. end
  654. {
  655. change
  656. sxth reg2,reg1
  657. sxth reg3,reg2
  658. dealloc reg2
  659. to
  660. sxth reg3,reg1
  661. }
  662. else if MatchInstruction(p, A_SXTH, [C_None], [PF_None]) and
  663. (taicpu(p).ops=2) and
  664. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  665. MatchInstruction(hp1, A_SXTH, [C_None], [PF_None]) and
  666. (taicpu(hp1).ops=2) and
  667. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  668. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  669. { reg1 might not be modified inbetween }
  670. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  671. begin
  672. DebugMsg('Peephole SxthSxth2Sxth done', p);
  673. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  674. taicpu(hp1).opcode:=A_SXTH;
  675. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  676. GetNextInstruction(p, hp1);
  677. asml.remove(p);
  678. p.free;
  679. p:=hp1;
  680. result:=true;
  681. end
  682. {
  683. change
  684. sxth reg2,reg1
  685. and reg3,reg2,#65535
  686. dealloc reg2
  687. to
  688. sxth reg3,reg1
  689. }
  690. else if MatchInstruction(p, A_SXTH, [C_None], [PF_None]) and
  691. (taicpu(p).ops=2) and
  692. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  693. MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
  694. (taicpu(hp1).ops=3) and
  695. (taicpu(hp1).oper[2]^.typ=top_const) and
  696. ((taicpu(hp1).oper[2]^.val and $FFFF)=$FFFF) and
  697. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  698. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  699. { reg1 might not be modified inbetween }
  700. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  701. begin
  702. DebugMsg('Peephole SxthAndImm2Sxth done', p);
  703. taicpu(hp1).opcode:=A_SXTH;
  704. taicpu(hp1).ops:=2;
  705. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  706. GetNextInstruction(p, hp1);
  707. asml.remove(p);
  708. p.free;
  709. p:=hp1;
  710. result:=true;
  711. end
  712. else if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  713. RemoveSuperfluousMove(p, hp1, 'SxthMov2Data') then
  714. Result:=true;
  715. end;
  716. function TARMAsmOptimizer.OptPass1And(var p : tai) : Boolean;
  717. var
  718. hp1, hp2: tai;
  719. i: longint;
  720. begin
  721. Result:=false;
  722. {
  723. optimize
  724. and reg2,reg1,const1
  725. ...
  726. }
  727. if (taicpu(p).ops>2) and
  728. (taicpu(p).oper[1]^.typ = top_reg) and
  729. (taicpu(p).oper[2]^.typ = top_const) then
  730. begin
  731. {
  732. change
  733. and reg2,reg1,const1
  734. ...
  735. and reg3,reg2,const2
  736. to
  737. and reg3,reg1,(const1 and const2)
  738. }
  739. if GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  740. MatchInstruction(hp1, A_AND, [taicpu(p).condition], [PF_None]) and
  741. RegEndOfLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  742. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  743. (taicpu(hp1).oper[2]^.typ = top_const) then
  744. begin
  745. if not(RegUsedBetween(taicpu(hp1).oper[0]^.reg,p,hp1)) then
  746. begin
  747. DebugMsg('Peephole AndAnd2And done', p);
  748. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  749. taicpu(p).loadConst(2,taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val);
  750. taicpu(p).oppostfix:=taicpu(hp1).oppostfix;
  751. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  752. asml.remove(hp1);
  753. hp1.free;
  754. Result:=true;
  755. exit;
  756. end
  757. else if not(RegUsedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  758. begin
  759. DebugMsg('Peephole AndAnd2And done', hp1);
  760. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  761. taicpu(hp1).loadConst(2,taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val);
  762. taicpu(hp1).oppostfix:=taicpu(p).oppostfix;
  763. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  764. GetNextInstruction(p, hp1);
  765. RemoveCurrentP(p);
  766. p:=hp1;
  767. Result:=true;
  768. exit;
  769. end;
  770. end
  771. {
  772. change
  773. and reg2,reg1,$xxxxxxFF
  774. strb reg2,[...]
  775. dealloc reg2
  776. to
  777. strb reg1,[...]
  778. }
  779. else if ((taicpu(p).oper[2]^.val and $FF) = $FF) and
  780. MatchInstruction(p, A_AND, [C_None], [PF_None]) and
  781. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  782. MatchInstruction(hp1, A_STR, [C_None], [PF_B]) and
  783. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
  784. { the reference in strb might not use reg2 }
  785. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  786. { reg1 might not be modified inbetween }
  787. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  788. begin
  789. DebugMsg('Peephole AndStrb2Strb done', p);
  790. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  791. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  792. RemoveCurrentP(p);
  793. result:=true;
  794. exit;
  795. end
  796. {
  797. change
  798. and reg2,reg1,255
  799. uxtb/uxth reg3,reg2
  800. dealloc reg2
  801. to
  802. and reg3,reg1,x
  803. }
  804. else if ((taicpu(p).oper[2]^.val and $ffffff00)=0) and
  805. MatchInstruction(p, A_AND, [C_None], [PF_None]) and
  806. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  807. MatchInstruction(hp1, [A_UXTB,A_UXTH], [C_None], [PF_None]) and
  808. (taicpu(hp1).ops = 2) and
  809. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  810. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  811. { reg1 might not be modified inbetween }
  812. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  813. begin
  814. DebugMsg('Peephole AndUxt2And done', p);
  815. taicpu(hp1).opcode:=A_AND;
  816. taicpu(hp1).ops:=3;
  817. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  818. taicpu(hp1).loadconst(2,taicpu(p).oper[2]^.val);
  819. GetNextInstruction(p,hp1);
  820. asml.remove(p);
  821. p.Free;
  822. p:=hp1;
  823. result:=true;
  824. exit;
  825. end
  826. else if ((taicpu(p).oper[2]^.val and $ffffff80)=0) and
  827. MatchInstruction(p, A_AND, [C_None], [PF_None]) and
  828. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  829. MatchInstruction(hp1, [A_SXTB,A_SXTH], [C_None], [PF_None]) and
  830. (taicpu(hp1).ops = 2) and
  831. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  832. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  833. { reg1 might not be modified inbetween }
  834. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  835. begin
  836. DebugMsg('Peephole AndSxt2And done', p);
  837. taicpu(hp1).opcode:=A_AND;
  838. taicpu(hp1).ops:=3;
  839. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  840. taicpu(hp1).loadconst(2,taicpu(p).oper[2]^.val);
  841. GetNextInstruction(p,hp1);
  842. asml.remove(p);
  843. p.Free;
  844. p:=hp1;
  845. result:=true;
  846. exit;
  847. end
  848. {
  849. from
  850. and reg1,reg0,2^n-1
  851. mov reg2,reg1, lsl imm1
  852. (mov reg3,reg2, lsr/asr imm1)
  853. remove either the and or the lsl/xsr sequence if possible
  854. }
  855. else if cutils.ispowerof2(taicpu(p).oper[2]^.val+1,i) and
  856. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  857. MatchInstruction(hp1, A_MOV, [taicpu(p).condition], [PF_None]) and
  858. (taicpu(hp1).ops=3) and
  859. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  860. (taicpu(hp1).oper[2]^.typ = top_shifterop) and
  861. {$ifdef ARM}
  862. (taicpu(hp1).oper[2]^.shifterop^.rs = NR_NO) and
  863. {$endif ARM}
  864. (taicpu(hp1).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  865. RegEndOfLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) then
  866. begin
  867. {
  868. and reg1,reg0,2^n-1
  869. mov reg2,reg1, lsl imm1
  870. mov reg3,reg2, lsr/asr imm1
  871. =>
  872. and reg1,reg0,2^n-1
  873. if lsr and 2^n-1>=imm1 or asr and 2^n-1>imm1
  874. }
  875. if GetNextInstructionUsingReg(hp1,hp2,taicpu(p).oper[0]^.reg) and
  876. MatchInstruction(hp2, A_MOV, [taicpu(p).condition], [PF_None]) and
  877. (taicpu(hp2).ops=3) and
  878. MatchOperand(taicpu(hp2).oper[1]^, taicpu(hp1).oper[0]^.reg) and
  879. (taicpu(hp2).oper[2]^.typ = top_shifterop) and
  880. {$ifdef ARM}
  881. (taicpu(hp2).oper[2]^.shifterop^.rs = NR_NO) and
  882. {$endif ARM}
  883. (taicpu(hp2).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and
  884. (taicpu(hp1).oper[2]^.shifterop^.shiftimm=taicpu(hp2).oper[2]^.shifterop^.shiftimm) and
  885. RegEndOfLife(taicpu(hp1).oper[0]^.reg,taicpu(hp2)) and
  886. ((i<32-taicpu(hp1).oper[2]^.shifterop^.shiftimm) or
  887. ((i=32-taicpu(hp1).oper[2]^.shifterop^.shiftimm) and
  888. (taicpu(hp2).oper[2]^.shifterop^.shiftmode=SM_LSR))) then
  889. begin
  890. DebugMsg('Peephole AndLslXsr2And done', p);
  891. taicpu(p).oper[0]^.reg:=taicpu(hp2).oper[0]^.reg;
  892. asml.Remove(hp1);
  893. asml.Remove(hp2);
  894. hp1.free;
  895. hp2.free;
  896. result:=true;
  897. exit;
  898. end
  899. {
  900. and reg1,reg0,2^n-1
  901. mov reg2,reg1, lsl imm1
  902. =>
  903. mov reg2,reg0, lsl imm1
  904. if imm1>i
  905. }
  906. else if (i>32-taicpu(hp1).oper[2]^.shifterop^.shiftimm) and
  907. not(RegModifiedBetween(taicpu(p).oper[1]^.reg, p, hp1)) then
  908. begin
  909. DebugMsg('Peephole AndLsl2Lsl done', p);
  910. taicpu(hp1).oper[1]^.reg:=taicpu(p).oper[1]^.reg;
  911. GetNextInstruction(p, hp1);
  912. asml.Remove(p);
  913. p.free;
  914. p:=hp1;
  915. result:=true;
  916. exit;
  917. end
  918. end;
  919. end;
  920. {
  921. change
  922. and reg1, ...
  923. mov reg2, reg1
  924. to
  925. and reg2, ...
  926. }
  927. if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  928. (taicpu(p).ops>=3) and
  929. RemoveSuperfluousMove(p, hp1, 'DataMov2Data') then
  930. Result:=true;
  931. end;
  932. end.