aoptarm.pas 50 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270
  1. {
  2. Copyright (c) 1998-2020 by Jonas Maebe and Florian Klaempfl, members of the Free Pascal
  3. Development Team
  4. This unit implements an ARM optimizer object used commonly for ARM and AAarch64
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. Unit aoptarm;
  19. {$i fpcdefs.inc}
  20. { $define DEBUG_PREREGSCHEDULER}
  21. { $define DEBUG_AOPTCPU}
  22. Interface
  23. uses
  24. cgbase, cgutils, cpubase, aasmtai, aasmcpu,aopt, aoptobj;
  25. Type
  26. { while ARM and AAarch64 look not very similar at a first glance,
  27. several optimizations can be shared between both }
  28. TARMAsmOptimizer = class(TAsmOptimizer)
  29. procedure DebugMsg(const s : string; p : tai);
  30. function RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string): boolean;
  31. function RedundantMovProcess(var p: tai; var hp1: tai): boolean;
  32. function GetNextInstructionUsingReg(Current: tai; out Next: tai; reg: TRegister): Boolean;
  33. function OptPass1UXTB(var p: tai): Boolean;
  34. function OptPass1UXTH(var p: tai): Boolean;
  35. function OptPass1SXTB(var p: tai): Boolean;
  36. function OptPass1SXTH(var p: tai): Boolean;
  37. function OptPass1And(var p: tai): Boolean; virtual;
  38. End;
  39. function MatchInstruction(const instr: tai; const op: TCommonAsmOps; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  40. function MatchInstruction(const instr: tai; const op: TAsmOp; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  41. {$ifdef AARCH64}
  42. function MatchInstruction(const instr: tai; const ops : array of TAsmOp; const postfix: TOpPostfixes): boolean;
  43. {$endif AARCH64}
  44. function MatchInstruction(const instr: tai; const op: TAsmOp; const postfix: TOpPostfixes): boolean;
  45. function RefsEqual(const r1, r2: treference): boolean;
  46. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  47. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
  48. Implementation
  49. uses
  50. cutils,verbose,globtype,globals,
  51. systems,
  52. cpuinfo,
  53. cgobj,procinfo,
  54. aasmbase,aasmdata;
  55. {$ifdef DEBUG_AOPTCPU}
  56. procedure TARMAsmOptimizer.DebugMsg(const s: string;p : tai);
  57. begin
  58. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  59. end;
  60. {$else DEBUG_AOPTCPU}
  61. procedure TARMAsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  62. begin
  63. end;
  64. {$endif DEBUG_AOPTCPU}
  65. function MatchInstruction(const instr: tai; const op: TCommonAsmOps; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  66. begin
  67. result :=
  68. (instr.typ = ait_instruction) and
  69. ((op = []) or ((taicpu(instr).opcode<=LastCommonAsmOp) and (taicpu(instr).opcode in op))) and
  70. ((cond = []) or (taicpu(instr).condition in cond)) and
  71. ((postfix = []) or (taicpu(instr).oppostfix in postfix));
  72. end;
  73. function MatchInstruction(const instr: tai; const op: TAsmOp; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  74. begin
  75. result :=
  76. (instr.typ = ait_instruction) and
  77. (taicpu(instr).opcode = op) and
  78. ((cond = []) or (taicpu(instr).condition in cond)) and
  79. ((postfix = []) or (taicpu(instr).oppostfix in postfix));
  80. end;
  81. {$ifdef AARCH64}
  82. function MatchInstruction(const instr: tai; const ops : array of TAsmOp; const postfix: TOpPostfixes): boolean;
  83. var
  84. op : TAsmOp;
  85. begin
  86. result:=false;
  87. if instr.typ <> ait_instruction then
  88. exit;
  89. for op in ops do
  90. begin
  91. if (taicpu(instr).opcode = op) and
  92. ((postfix = []) or (taicpu(instr).oppostfix in postfix)) then
  93. begin
  94. result:=true;
  95. exit;
  96. end;
  97. end;
  98. end;
  99. {$endif AARCH64}
  100. function MatchInstruction(const instr: tai; const op: TAsmOp; const postfix: TOpPostfixes): boolean;
  101. begin
  102. result :=
  103. (instr.typ = ait_instruction) and
  104. (taicpu(instr).opcode = op) and
  105. ((postfix = []) or (taicpu(instr).oppostfix in postfix));
  106. end;
  107. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  108. begin
  109. result := (oper.typ = top_reg) and (oper.reg = reg);
  110. end;
  111. function RefsEqual(const r1, r2: treference): boolean;
  112. begin
  113. refsequal :=
  114. (r1.offset = r2.offset) and
  115. (r1.base = r2.base) and
  116. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  117. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  118. (r1.relsymbol = r2.relsymbol) and
  119. {$ifdef ARM}
  120. (r1.signindex = r2.signindex) and
  121. {$endif ARM}
  122. (r1.shiftimm = r2.shiftimm) and
  123. (r1.addressmode = r2.addressmode) and
  124. (r1.shiftmode = r2.shiftmode) and
  125. (r1.volatility=[]) and
  126. (r2.volatility=[]);
  127. end;
  128. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
  129. begin
  130. result := oper1.typ = oper2.typ;
  131. if result then
  132. case oper1.typ of
  133. top_const:
  134. Result:=oper1.val = oper2.val;
  135. top_reg:
  136. Result:=oper1.reg = oper2.reg;
  137. top_conditioncode:
  138. Result:=oper1.cc = oper2.cc;
  139. top_realconst:
  140. Result:=oper1.val_real = oper2.val_real;
  141. top_ref:
  142. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  143. else Result:=false;
  144. end
  145. end;
  146. function TARMAsmOptimizer.GetNextInstructionUsingReg(Current: tai;
  147. Out Next: tai; reg: TRegister): Boolean;
  148. var
  149. gniResult: Boolean;
  150. begin
  151. Next:=Current;
  152. Result := False;
  153. repeat
  154. gniResult:=GetNextInstruction(Next,Next);
  155. if gniResult and RegInInstruction(reg,Next) then
  156. { Found something }
  157. Exit(True);
  158. until not gniResult or
  159. not(cs_opt_level3 in current_settings.optimizerswitches) or
  160. (Next.typ<>ait_instruction) or
  161. is_calljmp(taicpu(Next).opcode)
  162. {$ifdef ARM}
  163. or RegModifiedByInstruction(NR_PC,Next)
  164. {$endif ARM}
  165. ;
  166. end;
  167. function TARMAsmOptimizer.RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string):boolean;
  168. var
  169. alloc,
  170. dealloc : tai_regalloc;
  171. hp1 : tai;
  172. begin
  173. Result:=false;
  174. if MatchInstruction(movp, A_MOV, [taicpu(p).condition], [PF_None]) and
  175. { We can't optimize if there is a shiftop }
  176. (taicpu(movp).ops=2) and
  177. MatchOperand(taicpu(movp).oper[1]^, taicpu(p).oper[0]^.reg) and
  178. { don't mess with moves to fp }
  179. (taicpu(movp).oper[0]^.reg<>current_procinfo.framepointer) and
  180. { the destination register of the mov might not be used beween p and movp }
  181. not(RegUsedBetween(taicpu(movp).oper[0]^.reg,p,movp)) and
  182. {$ifdef ARM}
  183. { PC should be changed only by moves }
  184. (taicpu(movp).oper[0]^.reg<>NR_PC) and
  185. { cb[n]z are thumb instructions which require specific registers, with no wide forms }
  186. (taicpu(p).opcode<>A_CBZ) and
  187. (taicpu(p).opcode<>A_CBNZ) and
  188. { There is a special requirement for MUL and MLA, oper[0] and oper[1] are not allowed to be the same }
  189. not (
  190. (taicpu(p).opcode in [A_MLA, A_MUL]) and
  191. (taicpu(p).oper[1]^.reg = taicpu(movp).oper[0]^.reg) and
  192. (current_settings.cputype < cpu_armv6)
  193. ) and
  194. {$endif ARM}
  195. { Take care to only do this for instructions which REALLY load to the first register.
  196. Otherwise
  197. str reg0, [reg1]
  198. mov reg2, reg0
  199. will be optimized to
  200. str reg2, [reg1]
  201. }
  202. RegLoadedWithNewValue(taicpu(p).oper[0]^.reg, p) then
  203. begin
  204. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(movp.Next));
  205. if assigned(dealloc) then
  206. begin
  207. DebugMsg('Peephole '+optimizer+' removed superfluous mov', movp);
  208. result:=true;
  209. { taicpu(p).oper[0]^.reg is not used anymore, try to find its allocation
  210. and remove it if possible }
  211. asml.Remove(dealloc);
  212. alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.previous));
  213. if assigned(alloc) then
  214. begin
  215. asml.Remove(alloc);
  216. alloc.free;
  217. dealloc.free;
  218. end
  219. else
  220. asml.InsertAfter(dealloc,p);
  221. { try to move the allocation of the target register }
  222. GetLastInstruction(movp,hp1);
  223. alloc:=FindRegAlloc(taicpu(movp).oper[0]^.reg,tai(hp1.Next));
  224. if assigned(alloc) then
  225. begin
  226. asml.Remove(alloc);
  227. asml.InsertBefore(alloc,p);
  228. { adjust used regs }
  229. IncludeRegInUsedRegs(taicpu(movp).oper[0]^.reg,UsedRegs);
  230. end;
  231. { finally get rid of the mov }
  232. taicpu(p).loadreg(0,taicpu(movp).oper[0]^.reg);
  233. { Remove preindexing and postindexing for LDR in some cases.
  234. For example:
  235. ldr reg2,[reg1, xxx]!
  236. mov reg1,reg2
  237. must be translated to:
  238. ldr reg1,[reg1, xxx]
  239. Preindexing must be removed there, since the same register is used as the base and as the target.
  240. Such case is not allowed for ARM CPU and produces crash. }
  241. if (taicpu(p).opcode = A_LDR) and (taicpu(p).oper[1]^.typ = top_ref)
  242. and (taicpu(movp).oper[0]^.reg = taicpu(p).oper[1]^.ref^.base)
  243. then
  244. taicpu(p).oper[1]^.ref^.addressmode:=AM_OFFSET;
  245. asml.remove(movp);
  246. movp.free;
  247. end;
  248. end;
  249. end;
  250. function TARMAsmOptimizer.RedundantMovProcess(var p: tai; var hp1: tai):boolean;
  251. var
  252. I: Integer;
  253. current_hp, next_hp: tai;
  254. LDRChange: Boolean;
  255. begin
  256. Result:=false;
  257. {
  258. change
  259. mov r1, r0
  260. add r1, r1, #1
  261. to
  262. add r1, r0, #1
  263. Todo: Make it work for mov+cmp too
  264. CAUTION! If this one is successful p might not be a mov instruction anymore!
  265. }
  266. if (taicpu(p).ops = 2) and
  267. (taicpu(p).oper[1]^.typ = top_reg) and
  268. (taicpu(p).oppostfix = PF_NONE) then
  269. begin
  270. if
  271. MatchInstruction(hp1, [A_ADD, A_ADC,
  272. {$ifdef ARM}
  273. A_RSB, A_RSC,
  274. {$endif ARM}
  275. A_SUB, A_SBC,
  276. A_AND, A_BIC, A_EOR, A_ORR, A_MOV, A_MVN],
  277. [taicpu(p).condition], []) and
  278. { MOV and MVN might only have 2 ops }
  279. (taicpu(hp1).ops >= 2) and
  280. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^.reg) and
  281. (taicpu(hp1).oper[1]^.typ = top_reg) and
  282. (
  283. (taicpu(hp1).ops = 2) or
  284. (taicpu(hp1).oper[2]^.typ in [top_reg, top_const, top_shifterop])
  285. ) and
  286. {$ifdef AARCH64}
  287. (taicpu(p).oper[1]^.reg<>NR_SP) and
  288. {$endif AARCH64}
  289. not(RegUsedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  290. begin
  291. { When we get here we still don't know if the registers match }
  292. for I:=1 to 2 do
  293. {
  294. If the first loop was successful p will be replaced with hp1.
  295. The checks will still be ok, because all required information
  296. will also be in hp1 then.
  297. }
  298. if (taicpu(hp1).ops > I) and
  299. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[I]^.reg)
  300. {$ifdef ARM}
  301. { prevent certain combinations on thumb(2), this is only a safe approximation }
  302. and (not(GenerateThumbCode or GenerateThumb2Code) or
  303. ((getsupreg(taicpu(p).oper[1]^.reg)<>RS_R13) and
  304. (getsupreg(taicpu(p).oper[1]^.reg)<>RS_R15)))
  305. {$endif ARM}
  306. then
  307. begin
  308. DebugMsg('Peephole RedundantMovProcess done', hp1);
  309. taicpu(hp1).oper[I]^.reg := taicpu(p).oper[1]^.reg;
  310. if p<>hp1 then
  311. begin
  312. asml.remove(p);
  313. p.free;
  314. p:=hp1;
  315. Result:=true;
  316. end;
  317. end;
  318. if Result then Exit;
  319. end
  320. { Change: Change:
  321. mov r1, r0 mov r1, r0
  322. ... ...
  323. ldr/str r2, [r1, etc.] mov r2, r1
  324. To: To:
  325. ldr/str r2, [r0, etc.] mov r2, r0
  326. }
  327. else if (taicpu(p).condition = C_None) and (taicpu(p).oper[1]^.typ = top_reg)
  328. {$ifdef ARM}
  329. and not (getsupreg(taicpu(p).oper[0]^.reg) in [RS_PC, RS_R14, RS_STACK_POINTER_REG])
  330. and (getsupreg(taicpu(p).oper[1]^.reg) <> RS_PC)
  331. { Thumb does not support references with base and index one being SP }
  332. and (not(GenerateThumbCode) or (getsupreg(taicpu(p).oper[1]^.reg) <> RS_STACK_POINTER_REG))
  333. {$endif ARM}
  334. {$ifdef AARCH64}
  335. and (getsupreg(taicpu(p).oper[0]^.reg) <> RS_STACK_POINTER_REG)
  336. {$endif AARCH64}
  337. then
  338. begin
  339. current_hp := p;
  340. TransferUsedRegs(TmpUsedRegs);
  341. { Search local instruction block }
  342. while GetNextInstruction(current_hp, next_hp) and (next_hp <> BlockEnd) and (next_hp.typ = ait_instruction) do
  343. begin
  344. UpdateUsedRegs(TmpUsedRegs, tai(current_hp.Next));
  345. LDRChange := False;
  346. if (taicpu(next_hp).opcode in [A_LDR,A_STR]) and (taicpu(next_hp).ops = 2) then
  347. begin
  348. { Change the registers from r1 to r0 }
  349. if (taicpu(next_hp).oper[1]^.ref^.base = taicpu(p).oper[0]^.reg) and
  350. {$ifdef ARM}
  351. { This optimisation conflicts with something and raises
  352. an access violation - needs further investigation. [Kit] }
  353. (taicpu(next_hp).opcode <> A_LDR) and
  354. {$endif ARM}
  355. { Don't mess around with the base register if the
  356. reference is pre- or post-indexed }
  357. (taicpu(next_hp).oper[1]^.ref^.addressmode = AM_OFFSET) then
  358. begin
  359. taicpu(next_hp).oper[1]^.ref^.base := taicpu(p).oper[1]^.reg;
  360. LDRChange := True;
  361. end;
  362. if taicpu(next_hp).oper[1]^.ref^.index = taicpu(p).oper[0]^.reg then
  363. begin
  364. taicpu(next_hp).oper[1]^.ref^.index := taicpu(p).oper[1]^.reg;
  365. LDRChange := True;
  366. end;
  367. if LDRChange then
  368. DebugMsg('Peephole Optimization: ' + std_regname(taicpu(p).oper[0]^.reg) + ' = ' + std_regname(taicpu(p).oper[1]^.reg) + ' (MovLdr2Ldr 1)', next_hp);
  369. { Drop out if we're dealing with pre-indexed references }
  370. if (taicpu(next_hp).oper[1]^.ref^.addressmode = AM_PREINDEXED) and
  371. (
  372. RegInRef(taicpu(p).oper[0]^.reg, taicpu(next_hp).oper[1]^.ref^) or
  373. RegInRef(taicpu(p).oper[1]^.reg, taicpu(next_hp).oper[1]^.ref^)
  374. ) then
  375. begin
  376. { Remember to update register allocations }
  377. if LDRChange then
  378. AllocRegBetween(taicpu(p).oper[1]^.reg, p, next_hp, UsedRegs);
  379. Break;
  380. end;
  381. { The register being stored can be potentially changed (as long as it's not the stack pointer) }
  382. if (taicpu(next_hp).opcode = A_STR) and (getsupreg(taicpu(p).oper[1]^.reg) <> RS_STACK_POINTER_REG) and
  383. MatchOperand(taicpu(next_hp).oper[0]^, taicpu(p).oper[0]^.reg) then
  384. begin
  385. DebugMsg('Peephole Optimization: ' + std_regname(taicpu(p).oper[0]^.reg) + ' = ' + std_regname(taicpu(p).oper[1]^.reg) + ' (MovLdr2Ldr 2)', next_hp);
  386. taicpu(next_hp).oper[0]^.reg := taicpu(p).oper[1]^.reg;
  387. LDRChange := True;
  388. end;
  389. if LDRChange and (getsupreg(taicpu(p).oper[1]^.reg) <> RS_STACK_POINTER_REG) then
  390. begin
  391. AllocRegBetween(taicpu(p).oper[1]^.reg, p, next_hp, UsedRegs);
  392. if (taicpu(p).oppostfix = PF_None) and
  393. (
  394. (
  395. (taicpu(next_hp).opcode = A_LDR) and
  396. MatchOperand(taicpu(next_hp).oper[0]^, taicpu(p).oper[0]^.reg)
  397. ) or
  398. not RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, next_hp, TmpUsedRegs)
  399. ) and
  400. { Double-check to see if the old registers were actually
  401. changed (e.g. if the super registers matched, but not
  402. the sizes, they won't be changed). }
  403. (
  404. (taicpu(next_hp).opcode = A_LDR) or
  405. not RegInOp(taicpu(p).oper[0]^.reg, taicpu(next_hp).oper[0]^)
  406. ) and
  407. not RegInRef(taicpu(p).oper[0]^.reg, taicpu(next_hp).oper[1]^.ref^) then
  408. begin
  409. DebugMsg('Peephole Optimization: RedundantMovProcess 2a done', p);
  410. RemoveCurrentP(p);
  411. Result := True;
  412. Exit;
  413. end;
  414. end;
  415. end
  416. else if (taicpu(next_hp).opcode = A_MOV) and (taicpu(next_hp).oppostfix = PF_None) and
  417. (taicpu(next_hp).ops = 2) then
  418. begin
  419. if MatchOperand(taicpu(next_hp).oper[0]^, taicpu(p).oper[0]^.reg) then
  420. begin
  421. { Found another mov that writes entirely to the register }
  422. if RegUsedBetween(taicpu(p).oper[0]^.reg, p, next_hp) then
  423. begin
  424. { Register was used beforehand }
  425. if MatchOperand(taicpu(next_hp).oper[1]^, taicpu(p).oper[1]^.reg) then
  426. begin
  427. { This MOV is exactly the same as the first one.
  428. Since none of the registers have changed value
  429. at this point, we can remove it. }
  430. DebugMsg('Peephole Optimization: RedundantMovProcess 3a done', next_hp);
  431. if (next_hp = hp1) then
  432. { Don't let hp1 become a dangling pointer }
  433. hp1 := nil;
  434. asml.Remove(next_hp);
  435. next_hp.Free;
  436. { We still have the original p, so we can continue optimising;
  437. if it was -O2 or below, this instruction appeared immediately
  438. after the first MOV, so we're technically not looking more
  439. than one instruction ahead after it's removed! [Kit] }
  440. Continue;
  441. end
  442. else
  443. { Register changes value - drop out }
  444. Break;
  445. end;
  446. { We can delete the first MOV (only if the second MOV is unconditional) }
  447. {$ifdef ARM}
  448. if (taicpu(p).oppostfix = PF_None) and
  449. (taicpu(next_hp).condition = C_None) then
  450. {$endif ARM}
  451. begin
  452. DebugMsg('Peephole Optimization: RedundantMovProcess 2b done', p);
  453. RemoveCurrentP(p);
  454. Result := True;
  455. end;
  456. Exit;
  457. end
  458. else if MatchOperand(taicpu(next_hp).oper[1]^, taicpu(p).oper[0]^.reg) then
  459. begin
  460. if MatchOperand(taicpu(next_hp).oper[0]^, taicpu(p).oper[1]^.reg)
  461. { Be careful - if the entire register is not used, removing this
  462. instruction will leave the unused part uninitialised }
  463. {$ifdef AARCH64}
  464. and (getsubreg(taicpu(p).oper[1]^.reg) = R_SUBQ)
  465. {$endif AARCH64}
  466. then
  467. begin
  468. { Instruction will become mov r1,r1 }
  469. DebugMsg('Peephole Optimization: Mov2None 2 done', next_hp);
  470. if (next_hp = hp1) then
  471. { Don't let hp1 become a dangling pointer }
  472. hp1 := nil;
  473. asml.Remove(next_hp);
  474. next_hp.Free;
  475. Continue;
  476. end;
  477. { Change the old register (checking the first operand again
  478. forces it to be left alone if the full register is not
  479. used, lest mov w1,w1 gets optimised out by mistake. [Kit] }
  480. {$ifdef AARCH64}
  481. if not MatchOperand(taicpu(next_hp).oper[0]^, taicpu(p).oper[1]^.reg) then
  482. {$endif AARCH64}
  483. begin
  484. DebugMsg('Peephole Optimization: ' + std_regname(taicpu(p).oper[0]^.reg) + ' = ' + std_regname(taicpu(p).oper[1]^.reg) + ' (MovMov2Mov 2)', next_hp);
  485. taicpu(next_hp).oper[1]^.reg := taicpu(p).oper[1]^.reg;
  486. AllocRegBetween(taicpu(p).oper[1]^.reg, p, next_hp, UsedRegs);
  487. { If this was the only reference to the old register,
  488. then we can remove the original MOV now }
  489. if (taicpu(p).oppostfix = PF_None) and
  490. { A bit of a hack - sometimes registers aren't tracked properly, so do not
  491. remove if the register was apparently not allocated when its value is
  492. first set at the MOV command (this is especially true for the stack
  493. register). [Kit] }
  494. (getsupreg(taicpu(p).oper[1]^.reg) <> RS_STACK_POINTER_REG) and
  495. RegInUsedRegs(taicpu(p).oper[0]^.reg, UsedRegs) and
  496. not RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, next_hp, TmpUsedRegs) then
  497. begin
  498. DebugMsg('Peephole Optimization: RedundantMovProcess 2c done', p);
  499. RemoveCurrentP(p);
  500. Result := True;
  501. Exit;
  502. end;
  503. end;
  504. end;
  505. end;
  506. { On low optimisation settions, don't search more than one instruction ahead }
  507. if not(cs_opt_level3 in current_settings.optimizerswitches) or
  508. { Stop at procedure calls and jumps }
  509. is_calljmp(taicpu(next_hp).opcode) or
  510. { If the read register has changed value, or the MOV
  511. destination register has been used, drop out }
  512. RegInInstruction(taicpu(p).oper[0]^.reg, next_hp) or
  513. RegModifiedByInstruction(taicpu(p).oper[1]^.reg, next_hp) then
  514. Break;
  515. current_hp := next_hp;
  516. end;
  517. end;
  518. end;
  519. end;
  520. function TARMAsmOptimizer.OptPass1UXTB(var p : tai) : Boolean;
  521. var
  522. hp1, hp2: tai;
  523. begin
  524. Result:=false;
  525. {
  526. change
  527. uxtb reg2,reg1
  528. strb reg2,[...]
  529. dealloc reg2
  530. to
  531. strb reg1,[...]
  532. }
  533. if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
  534. (taicpu(p).ops=2) and
  535. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  536. MatchInstruction(hp1, A_STR, [C_None], [PF_B]) and
  537. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
  538. { the reference in strb might not use reg2 }
  539. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  540. { reg1 might not be modified inbetween }
  541. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  542. begin
  543. DebugMsg('Peephole UxtbStrb2Strb done', p);
  544. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  545. GetNextInstruction(p,hp2);
  546. asml.remove(p);
  547. p.free;
  548. p:=hp2;
  549. result:=true;
  550. end
  551. {
  552. change
  553. uxtb reg2,reg1
  554. uxth reg3,reg2
  555. dealloc reg2
  556. to
  557. uxtb reg3,reg1
  558. }
  559. else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
  560. (taicpu(p).ops=2) and
  561. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  562. MatchInstruction(hp1, A_UXTH, [C_None], [PF_None]) and
  563. (taicpu(hp1).ops = 2) and
  564. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  565. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  566. { reg1 might not be modified inbetween }
  567. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  568. begin
  569. DebugMsg('Peephole UxtbUxth2Uxtb done', p);
  570. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  571. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  572. asml.remove(hp1);
  573. hp1.free;
  574. result:=true;
  575. end
  576. {
  577. change
  578. uxtb reg2,reg1
  579. uxtb reg3,reg2
  580. dealloc reg2
  581. to
  582. uxtb reg3,reg1
  583. }
  584. else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
  585. (taicpu(p).ops=2) and
  586. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  587. MatchInstruction(hp1, A_UXTB, [C_None], [PF_None]) and
  588. (taicpu(hp1).ops = 2) and
  589. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  590. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  591. { reg1 might not be modified inbetween }
  592. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  593. begin
  594. DebugMsg('Peephole UxtbUxtb2Uxtb done', p);
  595. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  596. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  597. asml.remove(hp1);
  598. hp1.free;
  599. result:=true;
  600. end
  601. {
  602. change
  603. uxtb reg2,reg1
  604. and reg3,reg2,#0x*FF
  605. dealloc reg2
  606. to
  607. uxtb reg3,reg1
  608. }
  609. else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
  610. (taicpu(p).ops=2) and
  611. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  612. MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
  613. (taicpu(hp1).ops=3) and
  614. (taicpu(hp1).oper[2]^.typ=top_const) and
  615. ((taicpu(hp1).oper[2]^.val and $FF)=$FF) and
  616. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  617. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  618. { reg1 might not be modified inbetween }
  619. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  620. begin
  621. DebugMsg('Peephole UxtbAndImm2Uxtb done', p);
  622. taicpu(hp1).opcode:=A_UXTB;
  623. taicpu(hp1).ops:=2;
  624. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  625. GetNextInstruction(p,hp2);
  626. asml.remove(p);
  627. p.free;
  628. p:=hp2;
  629. result:=true;
  630. end
  631. else if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  632. RemoveSuperfluousMove(p, hp1, 'UxtbMov2Data') then
  633. Result:=true;
  634. end;
  635. function TARMAsmOptimizer.OptPass1UXTH(var p : tai) : Boolean;
  636. var
  637. hp1: tai;
  638. begin
  639. Result:=false;
  640. {
  641. change
  642. uxth reg2,reg1
  643. strh reg2,[...]
  644. dealloc reg2
  645. to
  646. strh reg1,[...]
  647. }
  648. if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
  649. (taicpu(p).ops=2) and
  650. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  651. MatchInstruction(hp1, A_STR, [C_None], [PF_H]) and
  652. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  653. { the reference in strb might not use reg2 }
  654. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  655. { reg1 might not be modified inbetween }
  656. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  657. begin
  658. DebugMsg('Peephole UXTHStrh2Strh done', p);
  659. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  660. GetNextInstruction(p, hp1);
  661. asml.remove(p);
  662. p.free;
  663. p:=hp1;
  664. result:=true;
  665. end
  666. {
  667. change
  668. uxth reg2,reg1
  669. uxth reg3,reg2
  670. dealloc reg2
  671. to
  672. uxth reg3,reg1
  673. }
  674. else if MatchInstruction(p, A_UXTH, [C_None], [PF_None]) and
  675. (taicpu(p).ops=2) and
  676. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  677. MatchInstruction(hp1, A_UXTH, [C_None], [PF_None]) and
  678. (taicpu(hp1).ops=2) and
  679. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  680. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  681. { reg1 might not be modified inbetween }
  682. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  683. begin
  684. DebugMsg('Peephole UxthUxth2Uxth done', p);
  685. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  686. taicpu(hp1).opcode:=A_UXTH;
  687. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  688. GetNextInstruction(p, hp1);
  689. asml.remove(p);
  690. p.free;
  691. p:=hp1;
  692. result:=true;
  693. end
  694. {
  695. change
  696. uxth reg2,reg1
  697. and reg3,reg2,#65535
  698. dealloc reg2
  699. to
  700. uxth reg3,reg1
  701. }
  702. else if MatchInstruction(p, A_UXTH, [C_None], [PF_None]) and
  703. (taicpu(p).ops=2) and
  704. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  705. MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
  706. (taicpu(hp1).ops=3) and
  707. (taicpu(hp1).oper[2]^.typ=top_const) and
  708. ((taicpu(hp1).oper[2]^.val and $FFFF)=$FFFF) and
  709. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  710. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  711. { reg1 might not be modified inbetween }
  712. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  713. begin
  714. DebugMsg('Peephole UxthAndImm2Uxth done', p);
  715. taicpu(hp1).opcode:=A_UXTH;
  716. taicpu(hp1).ops:=2;
  717. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  718. GetNextInstruction(p, hp1);
  719. asml.remove(p);
  720. p.free;
  721. p:=hp1;
  722. result:=true;
  723. end
  724. else if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  725. RemoveSuperfluousMove(p, hp1, 'UxthMov2Data') then
  726. Result:=true;
  727. end;
  728. function TARMAsmOptimizer.OptPass1SXTB(var p : tai) : Boolean;
  729. var
  730. hp1, hp2: tai;
  731. begin
  732. Result:=false;
  733. {
  734. change
  735. sxtb reg2,reg1
  736. strb reg2,[...]
  737. dealloc reg2
  738. to
  739. strb reg1,[...]
  740. }
  741. if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
  742. (taicpu(p).ops=2) and
  743. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  744. MatchInstruction(hp1, A_STR, [C_None], [PF_B]) and
  745. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
  746. { the reference in strb might not use reg2 }
  747. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  748. { reg1 might not be modified inbetween }
  749. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  750. begin
  751. DebugMsg('Peephole SxtbStrb2Strb done', p);
  752. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  753. GetNextInstruction(p,hp2);
  754. asml.remove(p);
  755. p.free;
  756. p:=hp2;
  757. result:=true;
  758. end
  759. {
  760. change
  761. sxtb reg2,reg1
  762. sxth reg3,reg2
  763. dealloc reg2
  764. to
  765. sxtb reg3,reg1
  766. }
  767. else if MatchInstruction(p, A_SXTB, [C_None], [PF_None]) and
  768. (taicpu(p).ops=2) and
  769. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  770. MatchInstruction(hp1, A_SXTH, [C_None], [PF_None]) and
  771. (taicpu(hp1).ops = 2) and
  772. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  773. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  774. { reg1 might not be modified inbetween }
  775. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  776. begin
  777. DebugMsg('Peephole SxtbSxth2Sxtb done', p);
  778. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  779. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  780. asml.remove(hp1);
  781. hp1.free;
  782. result:=true;
  783. end
  784. {
  785. change
  786. sxtb reg2,reg1
  787. sxtb reg3,reg2
  788. dealloc reg2
  789. to
  790. uxtb reg3,reg1
  791. }
  792. else if MatchInstruction(p, A_SXTB, [C_None], [PF_None]) and
  793. (taicpu(p).ops=2) and
  794. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  795. MatchInstruction(hp1, A_SXTB, [C_None], [PF_None]) and
  796. (taicpu(hp1).ops = 2) and
  797. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  798. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  799. { reg1 might not be modified inbetween }
  800. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  801. begin
  802. DebugMsg('Peephole SxtbSxtb2Sxtb done', p);
  803. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  804. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  805. asml.remove(hp1);
  806. hp1.free;
  807. result:=true;
  808. end
  809. {
  810. change
  811. sxtb reg2,reg1
  812. and reg3,reg2,#0x*FF
  813. dealloc reg2
  814. to
  815. uxtb reg3,reg1
  816. }
  817. else if MatchInstruction(p, A_SXTB, [C_None], [PF_None]) and
  818. (taicpu(p).ops=2) and
  819. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  820. MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
  821. (taicpu(hp1).ops=3) and
  822. (taicpu(hp1).oper[2]^.typ=top_const) and
  823. ((taicpu(hp1).oper[2]^.val and $FF)=$FF) and
  824. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  825. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  826. { reg1 might not be modified inbetween }
  827. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  828. begin
  829. DebugMsg('Peephole SxtbAndImm2Sxtb done', p);
  830. taicpu(hp1).opcode:=A_SXTB;
  831. taicpu(hp1).ops:=2;
  832. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  833. GetNextInstruction(p,hp2);
  834. asml.remove(p);
  835. p.free;
  836. p:=hp2;
  837. result:=true;
  838. end
  839. else if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  840. RemoveSuperfluousMove(p, hp1, 'SxtbMov2Data') then
  841. Result:=true;
  842. end;
  843. function TARMAsmOptimizer.OptPass1SXTH(var p : tai) : Boolean;
  844. var
  845. hp1: tai;
  846. begin
  847. Result:=false;
  848. {
  849. change
  850. sxth reg2,reg1
  851. strh reg2,[...]
  852. dealloc reg2
  853. to
  854. strh reg1,[...]
  855. }
  856. if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
  857. (taicpu(p).ops=2) and
  858. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  859. MatchInstruction(hp1, A_STR, [C_None], [PF_H]) and
  860. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  861. { the reference in strb might not use reg2 }
  862. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  863. { reg1 might not be modified inbetween }
  864. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  865. begin
  866. DebugMsg('Peephole SXTHStrh2Strh done', p);
  867. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  868. GetNextInstruction(p, hp1);
  869. asml.remove(p);
  870. p.free;
  871. p:=hp1;
  872. result:=true;
  873. end
  874. {
  875. change
  876. sxth reg2,reg1
  877. sxth reg3,reg2
  878. dealloc reg2
  879. to
  880. sxth reg3,reg1
  881. }
  882. else if MatchInstruction(p, A_SXTH, [C_None], [PF_None]) and
  883. (taicpu(p).ops=2) and
  884. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  885. MatchInstruction(hp1, A_SXTH, [C_None], [PF_None]) and
  886. (taicpu(hp1).ops=2) and
  887. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  888. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  889. { reg1 might not be modified inbetween }
  890. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  891. begin
  892. DebugMsg('Peephole SxthSxth2Sxth done', p);
  893. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  894. taicpu(hp1).opcode:=A_SXTH;
  895. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  896. GetNextInstruction(p, hp1);
  897. asml.remove(p);
  898. p.free;
  899. p:=hp1;
  900. result:=true;
  901. end
  902. {
  903. change
  904. sxth reg2,reg1
  905. and reg3,reg2,#65535
  906. dealloc reg2
  907. to
  908. sxth reg3,reg1
  909. }
  910. else if MatchInstruction(p, A_SXTH, [C_None], [PF_None]) and
  911. (taicpu(p).ops=2) and
  912. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  913. MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
  914. (taicpu(hp1).ops=3) and
  915. (taicpu(hp1).oper[2]^.typ=top_const) and
  916. ((taicpu(hp1).oper[2]^.val and $FFFF)=$FFFF) and
  917. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  918. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  919. { reg1 might not be modified inbetween }
  920. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  921. begin
  922. DebugMsg('Peephole SxthAndImm2Sxth done', p);
  923. taicpu(hp1).opcode:=A_SXTH;
  924. taicpu(hp1).ops:=2;
  925. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  926. GetNextInstruction(p, hp1);
  927. asml.remove(p);
  928. p.free;
  929. p:=hp1;
  930. result:=true;
  931. end
  932. else if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  933. RemoveSuperfluousMove(p, hp1, 'SxthMov2Data') then
  934. Result:=true;
  935. end;
  936. function TARMAsmOptimizer.OptPass1And(var p : tai) : Boolean;
  937. var
  938. hp1, hp2: tai;
  939. i: longint;
  940. begin
  941. Result:=false;
  942. {
  943. optimize
  944. and reg2,reg1,const1
  945. ...
  946. }
  947. if (taicpu(p).ops>2) and
  948. (taicpu(p).oper[1]^.typ = top_reg) and
  949. (taicpu(p).oper[2]^.typ = top_const) then
  950. begin
  951. {
  952. change
  953. and reg2,reg1,const1
  954. ...
  955. and reg3,reg2,const2
  956. to
  957. and reg3,reg1,(const1 and const2)
  958. }
  959. if GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  960. MatchInstruction(hp1, A_AND, [taicpu(p).condition], [PF_None]) and
  961. RegEndOfLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  962. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  963. (taicpu(hp1).oper[2]^.typ = top_const)
  964. {$ifdef AARCH64}
  965. and ((((getsubreg(taicpu(p).oper[0]^.reg)=R_SUBQ) and is_shifter_const(taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val,OS_64)) or
  966. ((getsubreg(taicpu(p).oper[0]^.reg)=R_SUBL) and is_shifter_const(taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val,OS_32))
  967. ) or
  968. ((taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val)=0))
  969. {$endif AARCH64}
  970. then
  971. begin
  972. if not(RegUsedBetween(taicpu(hp1).oper[0]^.reg,p,hp1)) then
  973. begin
  974. DebugMsg('Peephole AndAnd2And done', p);
  975. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  976. if (taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val)=0 then
  977. begin
  978. DebugMsg('Peephole AndAnd2Mov0 1 done', p);
  979. taicpu(p).opcode:=A_MOV;
  980. taicpu(p).ops:=2;
  981. taicpu(p).loadConst(1,0);
  982. taicpu(p).oppostfix:=taicpu(hp1).oppostfix;
  983. end
  984. else
  985. begin
  986. DebugMsg('Peephole AndAnd2And 1 done', p);
  987. taicpu(p).loadConst(2,taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val);
  988. taicpu(p).oppostfix:=taicpu(hp1).oppostfix;
  989. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  990. end;
  991. asml.remove(hp1);
  992. hp1.free;
  993. Result:=true;
  994. exit;
  995. end
  996. else if not(RegUsedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  997. begin
  998. if (taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val)=0 then
  999. begin
  1000. DebugMsg('Peephole AndAnd2Mov0 2 done', hp1);
  1001. taicpu(hp1).opcode:=A_MOV;
  1002. taicpu(hp1).loadConst(1,0);
  1003. taicpu(hp1).ops:=2;
  1004. taicpu(hp1).oppostfix:=taicpu(p).oppostfix;
  1005. end
  1006. else
  1007. begin
  1008. DebugMsg('Peephole AndAnd2And 2 done', hp1);
  1009. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  1010. taicpu(hp1).loadConst(2,taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val);
  1011. taicpu(hp1).oppostfix:=taicpu(p).oppostfix;
  1012. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  1013. end;
  1014. GetNextInstruction(p, hp1);
  1015. RemoveCurrentP(p);
  1016. p:=hp1;
  1017. Result:=true;
  1018. exit;
  1019. end;
  1020. end
  1021. {
  1022. change
  1023. and reg2,reg1,$xxxxxxFF
  1024. strb reg2,[...]
  1025. dealloc reg2
  1026. to
  1027. strb reg1,[...]
  1028. }
  1029. else if ((taicpu(p).oper[2]^.val and $FF) = $FF) and
  1030. MatchInstruction(p, A_AND, [C_None], [PF_None]) and
  1031. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1032. MatchInstruction(hp1, A_STR, [C_None], [PF_B]) and
  1033. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
  1034. { the reference in strb might not use reg2 }
  1035. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  1036. { reg1 might not be modified inbetween }
  1037. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1038. begin
  1039. DebugMsg('Peephole AndStrb2Strb done', p);
  1040. {$ifdef AARCH64}
  1041. taicpu(hp1).loadReg(0,newreg(R_INTREGISTER,getsupreg(taicpu(p).oper[1]^.reg),R_SUBD));
  1042. {$else AARCH64}
  1043. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  1044. {$endif AARCH64}
  1045. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  1046. RemoveCurrentP(p);
  1047. result:=true;
  1048. exit;
  1049. end
  1050. {
  1051. change
  1052. and reg2,reg1,255
  1053. uxtb/uxth reg3,reg2
  1054. dealloc reg2
  1055. to
  1056. and reg3,reg1,x
  1057. }
  1058. else if ((taicpu(p).oper[2]^.val and $ffffff00)=0) and
  1059. MatchInstruction(p, A_AND, [C_None], [PF_None]) and
  1060. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1061. MatchInstruction(hp1, [A_UXTB,A_UXTH], [C_None], [PF_None]) and
  1062. (taicpu(hp1).ops = 2) and
  1063. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  1064. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1065. { reg1 might not be modified inbetween }
  1066. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1067. begin
  1068. DebugMsg('Peephole AndUxt2And done', p);
  1069. taicpu(hp1).opcode:=A_AND;
  1070. taicpu(hp1).ops:=3;
  1071. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  1072. taicpu(hp1).loadconst(2,taicpu(p).oper[2]^.val);
  1073. GetNextInstruction(p,hp1);
  1074. asml.remove(p);
  1075. p.Free;
  1076. p:=hp1;
  1077. result:=true;
  1078. exit;
  1079. end
  1080. else if ((taicpu(p).oper[2]^.val and $ffffff80)=0) and
  1081. MatchInstruction(p, A_AND, [C_None], [PF_None]) and
  1082. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1083. MatchInstruction(hp1, [A_SXTB,A_SXTH], [C_None], [PF_None]) and
  1084. (taicpu(hp1).ops = 2) and
  1085. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  1086. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1087. { reg1 might not be modified inbetween }
  1088. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1089. begin
  1090. DebugMsg('Peephole AndSxt2And done', p);
  1091. taicpu(hp1).opcode:=A_AND;
  1092. taicpu(hp1).ops:=3;
  1093. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  1094. taicpu(hp1).loadconst(2,taicpu(p).oper[2]^.val);
  1095. GetNextInstruction(p,hp1);
  1096. asml.remove(p);
  1097. p.Free;
  1098. p:=hp1;
  1099. result:=true;
  1100. exit;
  1101. end
  1102. {
  1103. from
  1104. and reg1,reg0,2^n-1
  1105. mov reg2,reg1, lsl imm1
  1106. (mov reg3,reg2, lsr/asr imm1)
  1107. remove either the and or the lsl/xsr sequence if possible
  1108. }
  1109. else if (taicpu(p).oper[2]^.val < high(int64)) and
  1110. cutils.ispowerof2(taicpu(p).oper[2]^.val+1,i) and
  1111. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1112. MatchInstruction(hp1, A_MOV, [taicpu(p).condition], [PF_None]) and
  1113. (taicpu(hp1).ops=3) and
  1114. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1115. (taicpu(hp1).oper[2]^.typ = top_shifterop) and
  1116. {$ifdef ARM}
  1117. (taicpu(hp1).oper[2]^.shifterop^.rs = NR_NO) and
  1118. {$endif ARM}
  1119. (taicpu(hp1).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  1120. RegEndOfLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) then
  1121. begin
  1122. {
  1123. and reg1,reg0,2^n-1
  1124. mov reg2,reg1, lsl imm1
  1125. mov reg3,reg2, lsr/asr imm1
  1126. =>
  1127. and reg1,reg0,2^n-1
  1128. if lsr and 2^n-1>=imm1 or asr and 2^n-1>imm1
  1129. }
  1130. if GetNextInstructionUsingReg(hp1,hp2,taicpu(p).oper[0]^.reg) and
  1131. MatchInstruction(hp2, A_MOV, [taicpu(p).condition], [PF_None]) and
  1132. (taicpu(hp2).ops=3) and
  1133. MatchOperand(taicpu(hp2).oper[1]^, taicpu(hp1).oper[0]^.reg) and
  1134. (taicpu(hp2).oper[2]^.typ = top_shifterop) and
  1135. {$ifdef ARM}
  1136. (taicpu(hp2).oper[2]^.shifterop^.rs = NR_NO) and
  1137. {$endif ARM}
  1138. (taicpu(hp2).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and
  1139. (taicpu(hp1).oper[2]^.shifterop^.shiftimm=taicpu(hp2).oper[2]^.shifterop^.shiftimm) and
  1140. RegEndOfLife(taicpu(hp1).oper[0]^.reg,taicpu(hp2)) and
  1141. ((i<32-taicpu(hp1).oper[2]^.shifterop^.shiftimm) or
  1142. ((i=32-taicpu(hp1).oper[2]^.shifterop^.shiftimm) and
  1143. (taicpu(hp2).oper[2]^.shifterop^.shiftmode=SM_LSR))) then
  1144. begin
  1145. DebugMsg('Peephole AndLslXsr2And done', p);
  1146. taicpu(p).oper[0]^.reg:=taicpu(hp2).oper[0]^.reg;
  1147. asml.Remove(hp1);
  1148. asml.Remove(hp2);
  1149. hp1.free;
  1150. hp2.free;
  1151. result:=true;
  1152. exit;
  1153. end
  1154. {
  1155. and reg1,reg0,2^n-1
  1156. mov reg2,reg1, lsl imm1
  1157. =>
  1158. mov reg2,reg0, lsl imm1
  1159. if imm1>i
  1160. }
  1161. else if (i>32-taicpu(hp1).oper[2]^.shifterop^.shiftimm) and
  1162. not(RegModifiedBetween(taicpu(p).oper[1]^.reg, p, hp1)) then
  1163. begin
  1164. DebugMsg('Peephole AndLsl2Lsl done', p);
  1165. taicpu(hp1).oper[1]^.reg:=taicpu(p).oper[1]^.reg;
  1166. GetNextInstruction(p, hp1);
  1167. asml.Remove(p);
  1168. p.free;
  1169. p:=hp1;
  1170. result:=true;
  1171. exit;
  1172. end
  1173. end;
  1174. end;
  1175. {
  1176. change
  1177. and reg1, ...
  1178. mov reg2, reg1
  1179. to
  1180. and reg2, ...
  1181. }
  1182. if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  1183. (taicpu(p).ops>=3) and
  1184. RemoveSuperfluousMove(p, hp1, 'DataMov2Data') then
  1185. Result:=true;
  1186. end;
  1187. end.