aoptarm.pas 64 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594
  1. {
  2. Copyright (c) 1998-2020 by Jonas Maebe and Florian Klaempfl, members of the Free Pascal
  3. Development Team
  4. This unit implements an ARM optimizer object used commonly for ARM and AAarch64
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. Unit aoptarm;
  19. {$i fpcdefs.inc}
  20. { $define DEBUG_PREREGSCHEDULER}
  21. {$ifdef EXTDEBUG}
  22. {$define DEBUG_AOPTCPU}
  23. {$endif EXTDEBUG}
  24. Interface
  25. uses
  26. cgbase, cgutils, cpubase, aasmtai, aasmcpu,aopt, aoptobj;
  27. Type
  28. { while ARM and AAarch64 look not very similar at a first glance,
  29. several optimizations can be shared between both }
  30. TARMAsmOptimizer = class(TAsmOptimizer)
  31. procedure DebugMsg(const s : string; p : tai);
  32. function RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string): boolean;
  33. function RedundantMovProcess(var p: tai; var hp1: tai): boolean;
  34. function GetNextInstructionUsingReg(Current: tai; out Next: tai; const reg: TRegister): Boolean;
  35. {$ifdef AARCH64}
  36. function USxtOp2Op(var p, hp1: tai; shiftmode: tshiftmode): Boolean;
  37. {$endif AARCH64}
  38. function OptPreSBFXUBFX(var p: tai): Boolean;
  39. function OptPass1UXTB(var p: tai): Boolean;
  40. function OptPass1UXTH(var p: tai): Boolean;
  41. function OptPass1SXTB(var p: tai): Boolean;
  42. function OptPass1SXTH(var p: tai): Boolean;
  43. function OptPass1LDR(var p: tai): Boolean; virtual;
  44. function OptPass1STR(var p: tai): Boolean; virtual;
  45. function OptPass1And(var p: tai): Boolean; virtual;
  46. End;
  47. function MatchInstruction(const instr: tai; const op: TCommonAsmOps; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  48. function MatchInstruction(const instr: tai; const op: TAsmOp; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  49. {$ifdef AARCH64}
  50. function MatchInstruction(const instr: tai; const ops : array of TAsmOp; const postfix: TOpPostfixes): boolean;
  51. {$endif AARCH64}
  52. function MatchInstruction(const instr: tai; const op: TAsmOp; const postfix: TOpPostfixes): boolean;
  53. function RefsEqual(const r1, r2: treference): boolean;
  54. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  55. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
  56. Implementation
  57. uses
  58. cutils,verbose,globtype,globals,
  59. systems,
  60. cpuinfo,
  61. cgobj,procinfo,
  62. aasmbase,aasmdata,itcpugas;
  63. {$ifdef DEBUG_AOPTCPU}
  64. const
  65. SPeepholeOptimization: shortstring = 'Peephole Optimization: ';
  66. procedure TARMAsmOptimizer.DebugMsg(const s: string;p : tai);
  67. begin
  68. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  69. end;
  70. {$else DEBUG_AOPTCPU}
  71. { Empty strings help the optimizer to remove string concatenations that won't
  72. ever appear to the user on release builds. [Kit] }
  73. const
  74. SPeepholeOptimization = '';
  75. procedure TARMAsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  76. begin
  77. end;
  78. {$endif DEBUG_AOPTCPU}
  79. function MatchInstruction(const instr: tai; const op: TCommonAsmOps; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  80. begin
  81. result :=
  82. (instr.typ = ait_instruction) and
  83. ((op = []) or ((taicpu(instr).opcode<=LastCommonAsmOp) and (taicpu(instr).opcode in op))) and
  84. ((cond = []) or (taicpu(instr).condition in cond)) and
  85. ((postfix = []) or (taicpu(instr).oppostfix in postfix));
  86. end;
  87. function MatchInstruction(const instr: tai; const op: TAsmOp; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  88. begin
  89. result :=
  90. (instr.typ = ait_instruction) and
  91. (taicpu(instr).opcode = op) and
  92. ((cond = []) or (taicpu(instr).condition in cond)) and
  93. ((postfix = []) or (taicpu(instr).oppostfix in postfix));
  94. end;
  95. {$ifdef AARCH64}
  96. function MatchInstruction(const instr: tai; const ops : array of TAsmOp; const postfix: TOpPostfixes): boolean;
  97. var
  98. op : TAsmOp;
  99. begin
  100. result:=false;
  101. if instr.typ <> ait_instruction then
  102. exit;
  103. for op in ops do
  104. begin
  105. if (taicpu(instr).opcode = op) and
  106. ((postfix = []) or (taicpu(instr).oppostfix in postfix)) then
  107. begin
  108. result:=true;
  109. exit;
  110. end;
  111. end;
  112. end;
  113. {$endif AARCH64}
  114. function MatchInstruction(const instr: tai; const op: TAsmOp; const postfix: TOpPostfixes): boolean;
  115. begin
  116. result :=
  117. (instr.typ = ait_instruction) and
  118. (taicpu(instr).opcode = op) and
  119. ((postfix = []) or (taicpu(instr).oppostfix in postfix));
  120. end;
  121. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  122. begin
  123. result := (oper.typ = top_reg) and (oper.reg = reg);
  124. end;
  125. function RefsEqual(const r1, r2: treference): boolean;
  126. begin
  127. refsequal :=
  128. (r1.offset = r2.offset) and
  129. (r1.base = r2.base) and
  130. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  131. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  132. (r1.relsymbol = r2.relsymbol) and
  133. {$ifdef ARM}
  134. (r1.signindex = r2.signindex) and
  135. {$endif ARM}
  136. (r1.shiftimm = r2.shiftimm) and
  137. (r1.addressmode = r2.addressmode) and
  138. (r1.shiftmode = r2.shiftmode) and
  139. (r1.volatility=[]) and
  140. (r2.volatility=[]);
  141. end;
  142. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
  143. begin
  144. result := oper1.typ = oper2.typ;
  145. if result then
  146. case oper1.typ of
  147. top_const:
  148. Result:=oper1.val = oper2.val;
  149. top_reg:
  150. Result:=oper1.reg = oper2.reg;
  151. top_conditioncode:
  152. Result:=oper1.cc = oper2.cc;
  153. top_realconst:
  154. Result:=oper1.val_real = oper2.val_real;
  155. top_ref:
  156. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  157. else Result:=false;
  158. end
  159. end;
  160. {$ifdef AARCH64}
  161. function TARMAsmOptimizer.USxtOp2Op(var p,hp1: tai; shiftmode: tshiftmode): Boolean;
  162. var
  163. so: tshifterop;
  164. opoffset: Integer;
  165. begin
  166. Result:=false;
  167. if (taicpu(p).ops=2) and
  168. ((MatchInstruction(hp1, [A_ADD,A_SUB], [C_None], [PF_None,PF_S]) and
  169. (taicpu(hp1).ops=3) and
  170. MatchOperand(taicpu(hp1).oper[2]^, taicpu(p).oper[0]^.reg) and
  171. not(MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg))) or
  172. (MatchInstruction(hp1, [A_CMP,A_CMN], [C_None], [PF_None]) and
  173. (taicpu(hp1).ops=2) and
  174. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg))
  175. ) and
  176. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  177. { reg1 might not be modified inbetween }
  178. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  179. begin
  180. DebugMsg('Peephole '+gas_op2str[taicpu(p).opcode]+gas_op2str[taicpu(hp1).opcode]+'2'+gas_op2str[taicpu(hp1).opcode]+' done', p);
  181. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  182. if MatchInstruction(hp1, [A_CMP,A_CMN], [C_None], [PF_None]) then
  183. opoffset:=0
  184. else
  185. opoffset:=1;
  186. taicpu(hp1).loadReg(opoffset+1,taicpu(p).oper[1]^.reg);
  187. taicpu(hp1).ops:=opoffset+3;
  188. shifterop_reset(so);
  189. so.shiftmode:=shiftmode;
  190. so.shiftimm:=0;
  191. taicpu(hp1).loadshifterop(opoffset+2,so);
  192. result:=RemoveCurrentP(p);
  193. end;
  194. end;
  195. {$endif AARCH64}
  196. function TARMAsmOptimizer.GetNextInstructionUsingReg(Current: tai;
  197. Out Next: tai; const reg: TRegister): Boolean;
  198. var
  199. gniResult: Boolean;
  200. begin
  201. Next:=Current;
  202. Result := False;
  203. repeat
  204. gniResult:=GetNextInstruction(Next,Next);
  205. if gniResult and RegInInstruction(reg,Next) then
  206. { Found something }
  207. Exit(True);
  208. until not gniResult or
  209. not(cs_opt_level3 in current_settings.optimizerswitches) or
  210. (Next.typ<>ait_instruction) or
  211. is_calljmp(taicpu(Next).opcode)
  212. {$ifdef ARM}
  213. or RegModifiedByInstruction(NR_PC,Next)
  214. {$endif ARM}
  215. ;
  216. end;
  217. function TARMAsmOptimizer.RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string):boolean;
  218. var
  219. alloc,
  220. dealloc : tai_regalloc;
  221. hp1 : tai;
  222. begin
  223. Result:=false;
  224. if MatchInstruction(movp, A_MOV, [taicpu(p).condition], [PF_None]) and
  225. { We can't optimize if there is a shiftop }
  226. (taicpu(movp).ops=2) and
  227. MatchOperand(taicpu(movp).oper[1]^, taicpu(p).oper[0]^.reg) and
  228. { don't mess with moves to fp }
  229. (taicpu(movp).oper[0]^.reg<>current_procinfo.framepointer) and
  230. { the destination register of the mov might not be used beween p and movp }
  231. not(RegUsedBetween(taicpu(movp).oper[0]^.reg,p,movp)) and
  232. {$ifdef ARM}
  233. { PC should be changed only by moves }
  234. (taicpu(movp).oper[0]^.reg<>NR_PC) and
  235. { cb[n]z are thumb instructions which require specific registers, with no wide forms }
  236. (taicpu(p).opcode<>A_CBZ) and
  237. (taicpu(p).opcode<>A_CBNZ) and
  238. { There is a special requirement for MUL and MLA, oper[0] and oper[1] are not allowed to be the same }
  239. not (
  240. (taicpu(p).opcode in [A_MLA, A_MUL]) and
  241. (taicpu(p).oper[1]^.reg = taicpu(movp).oper[0]^.reg) and
  242. (current_settings.cputype < cpu_armv6)
  243. ) and
  244. {$endif ARM}
  245. { Take care to only do this for instructions which REALLY load to the first register.
  246. Otherwise
  247. str reg0, [reg1]
  248. mov reg2, reg0
  249. will be optimized to
  250. str reg2, [reg1]
  251. }
  252. RegLoadedWithNewValue(taicpu(p).oper[0]^.reg, p) then
  253. begin
  254. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(movp.Next));
  255. if assigned(dealloc) then
  256. begin
  257. DebugMsg('Peephole '+optimizer+' removed superfluous mov', movp);
  258. result:=true;
  259. { taicpu(p).oper[0]^.reg is not used anymore, try to find its allocation
  260. and remove it if possible }
  261. asml.Remove(dealloc);
  262. alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.previous));
  263. if assigned(alloc) then
  264. begin
  265. asml.Remove(alloc);
  266. alloc.free;
  267. dealloc.free;
  268. end
  269. else
  270. asml.InsertAfter(dealloc,p);
  271. AllocRegBetween(taicpu(movp).oper[0]^.reg,p,movp,UsedRegs);
  272. { finally get rid of the mov }
  273. taicpu(p).loadreg(0,taicpu(movp).oper[0]^.reg);
  274. { Remove preindexing and postindexing for LDR in some cases.
  275. For example:
  276. ldr reg2,[reg1, xxx]!
  277. mov reg1,reg2
  278. must be translated to:
  279. ldr reg1,[reg1, xxx]
  280. Preindexing must be removed there, since the same register is used as the base and as the target.
  281. Such case is not allowed for ARM CPU and produces crash. }
  282. if (taicpu(p).opcode = A_LDR) and (taicpu(p).oper[1]^.typ = top_ref)
  283. and (taicpu(movp).oper[0]^.reg = taicpu(p).oper[1]^.ref^.base)
  284. then
  285. taicpu(p).oper[1]^.ref^.addressmode:=AM_OFFSET;
  286. asml.remove(movp);
  287. movp.free;
  288. end;
  289. end;
  290. end;
  291. function TARMAsmOptimizer.RedundantMovProcess(var p: tai; var hp1: tai):boolean;
  292. var
  293. I: Integer;
  294. current_hp, next_hp: tai;
  295. LDRChange: Boolean;
  296. begin
  297. Result:=false;
  298. {
  299. change
  300. mov r1, r0
  301. add r1, r1, #1
  302. to
  303. add r1, r0, #1
  304. Todo: Make it work for mov+cmp too
  305. CAUTION! If this one is successful p might not be a mov instruction anymore!
  306. }
  307. if (taicpu(p).ops = 2) and
  308. (taicpu(p).oper[1]^.typ = top_reg) and
  309. (taicpu(p).oppostfix = PF_NONE) then
  310. begin
  311. if
  312. MatchInstruction(hp1, [A_ADD, A_ADC,
  313. {$ifdef ARM}
  314. A_RSB, A_RSC,
  315. {$endif ARM}
  316. A_SUB, A_SBC,
  317. A_AND, A_BIC, A_EOR, A_ORR, A_MOV, A_MVN],
  318. [taicpu(p).condition], []) and
  319. { MOV and MVN might only have 2 ops }
  320. (taicpu(hp1).ops >= 2) and
  321. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^.reg) and
  322. (taicpu(hp1).oper[1]^.typ = top_reg) and
  323. (
  324. (taicpu(hp1).ops = 2) or
  325. (taicpu(hp1).oper[2]^.typ in [top_reg, top_const, top_shifterop])
  326. ) and
  327. {$ifdef AARCH64}
  328. (taicpu(p).oper[1]^.reg<>NR_SP) and
  329. { in this case you have to transform it to movk or the like }
  330. (getsupreg(taicpu(p).oper[1]^.reg)<>RS_XZR) and
  331. {$endif AARCH64}
  332. not(RegUsedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  333. begin
  334. { When we get here we still don't know if the registers match }
  335. for I:=1 to 2 do
  336. {
  337. If the first loop was successful p will be replaced with hp1.
  338. The checks will still be ok, because all required information
  339. will also be in hp1 then.
  340. }
  341. if (taicpu(hp1).ops > I) and
  342. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[I]^.reg)
  343. {$ifdef ARM}
  344. { prevent certain combinations on thumb(2), this is only a safe approximation }
  345. and (not(GenerateThumbCode or GenerateThumb2Code) or
  346. ((getsupreg(taicpu(p).oper[1]^.reg)<>RS_R13) and
  347. (getsupreg(taicpu(p).oper[1]^.reg)<>RS_R15)))
  348. {$endif ARM}
  349. then
  350. begin
  351. DebugMsg('Peephole RedundantMovProcess done', hp1);
  352. taicpu(hp1).oper[I]^.reg := taicpu(p).oper[1]^.reg;
  353. if p<>hp1 then
  354. begin
  355. asml.remove(p);
  356. p.free;
  357. p:=hp1;
  358. Result:=true;
  359. end;
  360. end;
  361. if Result then Exit;
  362. end
  363. { Change: Change:
  364. mov r1, r0 mov r1, r0
  365. ... ...
  366. ldr/str r2, [r1, etc.] mov r2, r1
  367. To: To:
  368. ldr/str r2, [r0, etc.] mov r2, r0
  369. }
  370. else if (taicpu(p).condition = C_None) and (taicpu(p).oper[1]^.typ = top_reg)
  371. {$ifdef ARM}
  372. and not (getsupreg(taicpu(p).oper[0]^.reg) in [RS_PC, RS_R14, RS_STACK_POINTER_REG])
  373. and (getsupreg(taicpu(p).oper[1]^.reg) <> RS_PC)
  374. { Thumb does not support references with base and index one being SP }
  375. and (not(GenerateThumbCode) or (getsupreg(taicpu(p).oper[1]^.reg) <> RS_STACK_POINTER_REG))
  376. {$endif ARM}
  377. {$ifdef AARCH64}
  378. and (getsupreg(taicpu(p).oper[0]^.reg) <> RS_STACK_POINTER_REG)
  379. {$endif AARCH64}
  380. then
  381. begin
  382. current_hp := p;
  383. TransferUsedRegs(TmpUsedRegs);
  384. { Search local instruction block }
  385. while GetNextInstruction(current_hp, next_hp) and (next_hp <> BlockEnd) and (next_hp.typ = ait_instruction) do
  386. begin
  387. UpdateUsedRegs(TmpUsedRegs, tai(current_hp.Next));
  388. LDRChange := False;
  389. if (taicpu(next_hp).opcode in [A_LDR,A_STR]) and (taicpu(next_hp).ops = 2)
  390. {$ifdef AARCH64}
  391. { If r0 is the zero register, then this sequence of instructions will cause
  392. an access violation, but that's better than an assembler error caused by
  393. changing r0 to xzr inside the reference (Where it's illegal). [Kit] }
  394. and (getsupreg(taicpu(p).oper[1]^.reg) <> RS_XZR)
  395. {$endif AARCH64}
  396. then
  397. begin
  398. { Change the registers from r1 to r0 }
  399. if (taicpu(next_hp).oper[1]^.ref^.base = taicpu(p).oper[0]^.reg) and
  400. {$ifdef ARM}
  401. { This optimisation conflicts with something and raises
  402. an access violation - needs further investigation. [Kit] }
  403. (taicpu(next_hp).opcode <> A_LDR) and
  404. {$endif ARM}
  405. { Don't mess around with the base register if the
  406. reference is pre- or post-indexed }
  407. (taicpu(next_hp).oper[1]^.ref^.addressmode = AM_OFFSET) then
  408. begin
  409. taicpu(next_hp).oper[1]^.ref^.base := taicpu(p).oper[1]^.reg;
  410. LDRChange := True;
  411. end;
  412. if taicpu(next_hp).oper[1]^.ref^.index = taicpu(p).oper[0]^.reg then
  413. begin
  414. taicpu(next_hp).oper[1]^.ref^.index := taicpu(p).oper[1]^.reg;
  415. LDRChange := True;
  416. end;
  417. if LDRChange then
  418. DebugMsg('Peephole Optimization: ' + std_regname(taicpu(p).oper[0]^.reg) + ' = ' + std_regname(taicpu(p).oper[1]^.reg) + ' (MovLdr2Ldr 1)', next_hp);
  419. { Drop out if we're dealing with pre-indexed references }
  420. if (taicpu(next_hp).oper[1]^.ref^.addressmode = AM_PREINDEXED) and
  421. (
  422. RegInRef(taicpu(p).oper[0]^.reg, taicpu(next_hp).oper[1]^.ref^) or
  423. RegInRef(taicpu(p).oper[1]^.reg, taicpu(next_hp).oper[1]^.ref^)
  424. ) then
  425. begin
  426. { Remember to update register allocations }
  427. if LDRChange then
  428. AllocRegBetween(taicpu(p).oper[1]^.reg, p, next_hp, UsedRegs);
  429. Break;
  430. end;
  431. { The register being stored can be potentially changed (as long as it's not the stack pointer) }
  432. if (taicpu(next_hp).opcode = A_STR) and (getsupreg(taicpu(p).oper[1]^.reg) <> RS_STACK_POINTER_REG) and
  433. MatchOperand(taicpu(next_hp).oper[0]^, taicpu(p).oper[0]^.reg) then
  434. begin
  435. DebugMsg('Peephole Optimization: ' + std_regname(taicpu(p).oper[0]^.reg) + ' = ' + std_regname(taicpu(p).oper[1]^.reg) + ' (MovLdr2Ldr 2)', next_hp);
  436. taicpu(next_hp).oper[0]^.reg := taicpu(p).oper[1]^.reg;
  437. LDRChange := True;
  438. end;
  439. if LDRChange and (getsupreg(taicpu(p).oper[1]^.reg) <> RS_STACK_POINTER_REG) then
  440. begin
  441. AllocRegBetween(taicpu(p).oper[1]^.reg, p, next_hp, UsedRegs);
  442. if (taicpu(p).oppostfix = PF_None) and
  443. (
  444. (
  445. (taicpu(next_hp).opcode = A_LDR) and
  446. MatchOperand(taicpu(next_hp).oper[0]^, taicpu(p).oper[0]^.reg)
  447. ) or
  448. not RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, next_hp, TmpUsedRegs)
  449. ) and
  450. { Double-check to see if the old registers were actually
  451. changed (e.g. if the super registers matched, but not
  452. the sizes, they won't be changed). }
  453. (
  454. (taicpu(next_hp).opcode = A_LDR) or
  455. not RegInOp(taicpu(p).oper[0]^.reg, taicpu(next_hp).oper[0]^)
  456. ) and
  457. not RegInRef(taicpu(p).oper[0]^.reg, taicpu(next_hp).oper[1]^.ref^) then
  458. begin
  459. DebugMsg('Peephole Optimization: RedundantMovProcess 2a done', p);
  460. RemoveCurrentP(p);
  461. Result := True;
  462. Exit;
  463. end;
  464. end;
  465. end
  466. else if (taicpu(next_hp).opcode = A_MOV) and (taicpu(next_hp).oppostfix = PF_None) and
  467. (taicpu(next_hp).ops = 2) then
  468. begin
  469. if MatchOperand(taicpu(next_hp).oper[0]^, taicpu(p).oper[0]^.reg) then
  470. begin
  471. { Found another mov that writes entirely to the register }
  472. if RegUsedBetween(taicpu(p).oper[0]^.reg, p, next_hp) then
  473. begin
  474. { Register was used beforehand }
  475. if MatchOperand(taicpu(next_hp).oper[1]^, taicpu(p).oper[1]^.reg) then
  476. begin
  477. { This MOV is exactly the same as the first one.
  478. Since none of the registers have changed value
  479. at this point, we can remove it. }
  480. DebugMsg(SPeepholeOptimization + 'RedundantMovProcess 3a done', next_hp);
  481. if (next_hp = hp1) then
  482. { Don't let hp1 become a dangling pointer }
  483. hp1 := nil;
  484. asml.Remove(next_hp);
  485. next_hp.Free;
  486. { We still have the original p, so we can continue optimising;
  487. if it was -O2 or below, this instruction appeared immediately
  488. after the first MOV, so we're technically not looking more
  489. than one instruction ahead after it's removed! [Kit] }
  490. Continue;
  491. end
  492. else
  493. { Register changes value - drop out }
  494. Break;
  495. end;
  496. { We can delete the first MOV (only if the second MOV is unconditional) }
  497. {$ifdef ARM}
  498. if (taicpu(p).oppostfix = PF_None) and
  499. (taicpu(next_hp).condition = C_None) then
  500. {$endif ARM}
  501. begin
  502. DebugMsg('Peephole Optimization: RedundantMovProcess 2b done', p);
  503. RemoveCurrentP(p);
  504. Result := True;
  505. end;
  506. Exit;
  507. end
  508. else if MatchOperand(taicpu(next_hp).oper[1]^, taicpu(p).oper[0]^.reg) then
  509. begin
  510. if MatchOperand(taicpu(next_hp).oper[0]^, taicpu(p).oper[1]^.reg)
  511. { Be careful - if the entire register is not used, removing this
  512. instruction will leave the unused part uninitialised }
  513. {$ifdef AARCH64}
  514. and (getsubreg(taicpu(p).oper[1]^.reg) = R_SUBQ)
  515. {$endif AARCH64}
  516. then
  517. begin
  518. { Instruction will become mov r1,r1 }
  519. DebugMsg(SPeepholeOptimization + 'Mov2None 2 done', next_hp);
  520. { Allocate r1 between the instructions; not doing
  521. so may cause problems when removing superfluous
  522. MOVs later (i38055) }
  523. AllocRegBetween(taicpu(p).oper[1]^.reg, p, next_hp, UsedRegs);
  524. if (next_hp = hp1) then
  525. { Don't let hp1 become a dangling pointer }
  526. hp1 := nil;
  527. asml.Remove(next_hp);
  528. next_hp.Free;
  529. Continue;
  530. end;
  531. { Change the old register (checking the first operand again
  532. forces it to be left alone if the full register is not
  533. used, lest mov w1,w1 gets optimised out by mistake. [Kit] }
  534. {$ifdef AARCH64}
  535. if not MatchOperand(taicpu(next_hp).oper[0]^, taicpu(p).oper[1]^.reg) then
  536. {$endif AARCH64}
  537. begin
  538. DebugMsg(SPeepholeOptimization + std_regname(taicpu(p).oper[0]^.reg) + ' = ' + std_regname(taicpu(p).oper[1]^.reg) + ' (MovMov2Mov 2)', next_hp);
  539. taicpu(next_hp).oper[1]^.reg := taicpu(p).oper[1]^.reg;
  540. AllocRegBetween(taicpu(p).oper[1]^.reg, p, next_hp, UsedRegs);
  541. { If this was the only reference to the old register,
  542. then we can remove the original MOV now }
  543. if (taicpu(p).oppostfix = PF_None) and
  544. { A bit of a hack - sometimes registers aren't tracked properly, so do not
  545. remove if the register was apparently not allocated when its value is
  546. first set at the MOV command (this is especially true for the stack
  547. register). [Kit] }
  548. (getsupreg(taicpu(p).oper[1]^.reg) <> RS_STACK_POINTER_REG) and
  549. RegInUsedRegs(taicpu(p).oper[0]^.reg, UsedRegs) and
  550. not RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, next_hp, TmpUsedRegs) then
  551. begin
  552. DebugMsg(SPeepholeOptimization + 'RedundantMovProcess 2c done', p);
  553. RemoveCurrentP(p);
  554. Result := True;
  555. Exit;
  556. end;
  557. end;
  558. end;
  559. end;
  560. { On low optimisation settions, don't search more than one instruction ahead }
  561. if not(cs_opt_level3 in current_settings.optimizerswitches) or
  562. { Stop at procedure calls and jumps }
  563. is_calljmp(taicpu(next_hp).opcode) or
  564. { If the read register has changed value, or the MOV
  565. destination register has been used, drop out }
  566. RegInInstruction(taicpu(p).oper[0]^.reg, next_hp) or
  567. RegModifiedByInstruction(taicpu(p).oper[1]^.reg, next_hp) then
  568. Break;
  569. current_hp := next_hp;
  570. end;
  571. end;
  572. end;
  573. end;
  574. function TARMAsmOptimizer.OptPass1UXTB(var p : tai) : Boolean;
  575. var
  576. hp1, hp2: tai;
  577. so: tshifterop;
  578. begin
  579. Result:=false;
  580. if GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) then
  581. begin
  582. {
  583. change
  584. uxtb reg2,reg1
  585. strb reg2,[...]
  586. dealloc reg2
  587. to
  588. strb reg1,[...]
  589. }
  590. if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
  591. (taicpu(p).ops=2) and
  592. MatchInstruction(hp1, A_STR, [C_None], [PF_B]) and
  593. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
  594. { the reference in strb might not use reg2 }
  595. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  596. { reg1 might not be modified inbetween }
  597. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  598. begin
  599. DebugMsg('Peephole UxtbStrb2Strb done', p);
  600. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  601. result:=RemoveCurrentP(p);
  602. end
  603. {
  604. change
  605. uxtb reg2,reg1
  606. uxth reg3,reg2
  607. dealloc reg2
  608. to
  609. uxtb reg3,reg1
  610. }
  611. else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
  612. (taicpu(p).ops=2) and
  613. MatchInstruction(hp1, A_UXTH, [C_None], [PF_None]) and
  614. (taicpu(hp1).ops = 2) and
  615. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  616. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  617. { reg1 might not be modified inbetween }
  618. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  619. begin
  620. DebugMsg('Peephole UxtbUxth2Uxtb done', p);
  621. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  622. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  623. asml.remove(hp1);
  624. hp1.free;
  625. result:=true;
  626. end
  627. {
  628. change
  629. uxtb reg2,reg1
  630. uxtb reg3,reg2
  631. dealloc reg2
  632. to
  633. uxtb reg3,reg1
  634. }
  635. else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
  636. (taicpu(p).ops=2) and
  637. MatchInstruction(hp1, A_UXTB, [C_None], [PF_None]) and
  638. (taicpu(hp1).ops = 2) and
  639. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  640. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  641. { reg1 might not be modified inbetween }
  642. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  643. begin
  644. DebugMsg('Peephole UxtbUxtb2Uxtb done', p);
  645. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  646. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  647. asml.remove(hp1);
  648. hp1.free;
  649. result:=true;
  650. end
  651. {
  652. change
  653. uxtb reg2,reg1
  654. and reg3,reg2,#0x*FF
  655. dealloc reg2
  656. to
  657. uxtb reg3,reg1
  658. }
  659. else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
  660. (taicpu(p).ops=2) and
  661. MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
  662. (taicpu(hp1).ops=3) and
  663. (taicpu(hp1).oper[2]^.typ=top_const) and
  664. ((taicpu(hp1).oper[2]^.val and $FF)=$FF) and
  665. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  666. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  667. { reg1 might not be modified inbetween }
  668. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  669. begin
  670. DebugMsg('Peephole UxtbAndImm2Uxtb done', p);
  671. taicpu(hp1).opcode:=A_UXTB;
  672. taicpu(hp1).ops:=2;
  673. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  674. result:=RemoveCurrentP(p);
  675. end
  676. {$ifdef AARCH64}
  677. else if USxtOp2Op(p,hp1,SM_UXTB) then
  678. Result:=true
  679. {$endif AARCH64}
  680. else if RemoveSuperfluousMove(p, hp1, 'UxtbMov2Uxtb') then
  681. Result:=true;
  682. end;
  683. end;
  684. function TARMAsmOptimizer.OptPass1UXTH(var p : tai) : Boolean;
  685. var
  686. hp1: tai;
  687. so: tshifterop;
  688. begin
  689. Result:=false;
  690. if GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) then
  691. begin
  692. {
  693. change
  694. uxth reg2,reg1
  695. strh reg2,[...]
  696. dealloc reg2
  697. to
  698. strh reg1,[...]
  699. }
  700. if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
  701. (taicpu(p).ops=2) and
  702. MatchInstruction(hp1, A_STR, [C_None], [PF_H]) and
  703. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  704. { the reference in strb might not use reg2 }
  705. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  706. { reg1 might not be modified inbetween }
  707. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  708. begin
  709. DebugMsg('Peephole UXTHStrh2Strh done', p);
  710. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  711. result:=RemoveCurrentP(p);
  712. end
  713. {
  714. change
  715. uxth reg2,reg1
  716. uxth reg3,reg2
  717. dealloc reg2
  718. to
  719. uxth reg3,reg1
  720. }
  721. else if MatchInstruction(p, A_UXTH, [C_None], [PF_None]) and
  722. (taicpu(p).ops=2) and
  723. MatchInstruction(hp1, A_UXTH, [C_None], [PF_None]) and
  724. (taicpu(hp1).ops=2) and
  725. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  726. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  727. { reg1 might not be modified inbetween }
  728. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  729. begin
  730. DebugMsg('Peephole UxthUxth2Uxth done', p);
  731. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  732. taicpu(hp1).opcode:=A_UXTH;
  733. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  734. result:=RemoveCurrentP(p);
  735. end
  736. {
  737. change
  738. uxth reg2,reg1
  739. and reg3,reg2,#65535
  740. dealloc reg2
  741. to
  742. uxth reg3,reg1
  743. }
  744. else if MatchInstruction(p, A_UXTH, [C_None], [PF_None]) and
  745. (taicpu(p).ops=2) and
  746. MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
  747. (taicpu(hp1).ops=3) and
  748. (taicpu(hp1).oper[2]^.typ=top_const) and
  749. ((taicpu(hp1).oper[2]^.val and $FFFF)=$FFFF) and
  750. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  751. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  752. { reg1 might not be modified inbetween }
  753. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  754. begin
  755. DebugMsg('Peephole UxthAndImm2Uxth done', p);
  756. taicpu(hp1).opcode:=A_UXTH;
  757. taicpu(hp1).ops:=2;
  758. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  759. result:=RemoveCurrentP(p);
  760. end
  761. {$ifdef AARCH64}
  762. else if USxtOp2Op(p,hp1,SM_UXTH) then
  763. Result:=true
  764. {$endif AARCH64}
  765. else if RemoveSuperfluousMove(p, hp1, 'UxthMov2Data') then
  766. Result:=true;
  767. end;
  768. end;
  769. function TARMAsmOptimizer.OptPass1SXTB(var p : tai) : Boolean;
  770. var
  771. hp1, hp2: tai;
  772. so: tshifterop;
  773. begin
  774. Result:=false;
  775. if GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) then
  776. begin
  777. {
  778. change
  779. sxtb reg2,reg1
  780. strb reg2,[...]
  781. dealloc reg2
  782. to
  783. strb reg1,[...]
  784. }
  785. if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
  786. (taicpu(p).ops=2) and
  787. MatchInstruction(hp1, A_STR, [C_None], [PF_B]) and
  788. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
  789. { the reference in strb might not use reg2 }
  790. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  791. { reg1 might not be modified inbetween }
  792. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  793. begin
  794. DebugMsg('Peephole SxtbStrb2Strb done', p);
  795. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  796. result:=RemoveCurrentP(p);
  797. end
  798. {
  799. change
  800. sxtb reg2,reg1
  801. sxth reg3,reg2
  802. dealloc reg2
  803. to
  804. sxtb reg3,reg1
  805. }
  806. else if MatchInstruction(p, A_SXTB, [C_None], [PF_None]) and
  807. (taicpu(p).ops=2) and
  808. MatchInstruction(hp1, A_SXTH, [C_None], [PF_None]) and
  809. (taicpu(hp1).ops = 2) and
  810. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  811. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  812. { reg1 might not be modified inbetween }
  813. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  814. begin
  815. DebugMsg('Peephole SxtbSxth2Sxtb done', p);
  816. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  817. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  818. asml.remove(hp1);
  819. hp1.free;
  820. result:=true;
  821. end
  822. {
  823. change
  824. sxtb reg2,reg1
  825. sxtb reg3,reg2
  826. dealloc reg2
  827. to
  828. uxtb reg3,reg1
  829. }
  830. else if MatchInstruction(p, A_SXTB, [C_None], [PF_None]) and
  831. (taicpu(p).ops=2) and
  832. MatchInstruction(hp1, A_SXTB, [C_None], [PF_None]) and
  833. (taicpu(hp1).ops = 2) and
  834. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  835. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  836. { reg1 might not be modified inbetween }
  837. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  838. begin
  839. DebugMsg('Peephole SxtbSxtb2Sxtb done', p);
  840. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  841. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  842. asml.remove(hp1);
  843. hp1.free;
  844. result:=true;
  845. end
  846. {
  847. change
  848. sxtb reg2,reg1
  849. and reg3,reg2,#0x*FF
  850. dealloc reg2
  851. to
  852. uxtb reg3,reg1
  853. }
  854. else if MatchInstruction(p, A_SXTB, [C_None], [PF_None]) and
  855. (taicpu(p).ops=2) and
  856. MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
  857. (taicpu(hp1).ops=3) and
  858. (taicpu(hp1).oper[2]^.typ=top_const) and
  859. ((taicpu(hp1).oper[2]^.val and $FF)=$FF) and
  860. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  861. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  862. { reg1 might not be modified inbetween }
  863. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  864. begin
  865. DebugMsg('Peephole SxtbAndImm2Uxtb done', p);
  866. taicpu(hp1).opcode:=A_UXTB;
  867. taicpu(hp1).ops:=2;
  868. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  869. result:=RemoveCurrentP(p);
  870. end
  871. {$ifdef AARCH64}
  872. else if USxtOp2Op(p,hp1,SM_SXTB) then
  873. Result:=true
  874. {$endif AARCH64}
  875. else if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  876. RemoveSuperfluousMove(p, hp1, 'SxtbMov2Sxtb') then
  877. Result:=true;
  878. end;
  879. end;
  880. function TARMAsmOptimizer.OptPass1SXTH(var p : tai) : Boolean;
  881. var
  882. hp1: tai;
  883. so: tshifterop;
  884. begin
  885. Result:=false;
  886. if GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) then
  887. begin
  888. {
  889. change
  890. sxth reg2,reg1
  891. strh reg2,[...]
  892. dealloc reg2
  893. to
  894. strh reg1,[...]
  895. }
  896. if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
  897. (taicpu(p).ops=2) and
  898. MatchInstruction(hp1, A_STR, [C_None], [PF_H]) and
  899. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  900. { the reference in strb might not use reg2 }
  901. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  902. { reg1 might not be modified inbetween }
  903. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  904. begin
  905. DebugMsg('Peephole SxthStrh2Strh done', p);
  906. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  907. result:=RemoveCurrentP(p);
  908. end
  909. {
  910. change
  911. sxth reg2,reg1
  912. sxth reg3,reg2
  913. dealloc reg2
  914. to
  915. sxth reg3,reg1
  916. }
  917. else if MatchInstruction(p, A_SXTH, [C_None], [PF_None]) and
  918. (taicpu(p).ops=2) and
  919. MatchInstruction(hp1, A_SXTH, [C_None], [PF_None]) and
  920. (taicpu(hp1).ops=2) and
  921. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  922. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  923. { reg1 might not be modified inbetween }
  924. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  925. begin
  926. DebugMsg('Peephole SxthSxth2Sxth done', p);
  927. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  928. taicpu(hp1).opcode:=A_SXTH;
  929. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  930. result:=RemoveCurrentP(p);
  931. end
  932. {$ifdef AARCH64}
  933. {
  934. change
  935. sxth reg2,reg1
  936. sxtw reg3,reg2
  937. dealloc reg2
  938. to
  939. sxth reg3,reg1
  940. }
  941. else if MatchInstruction(p, A_SXTH, [C_None], [PF_None]) and
  942. (taicpu(p).ops=2) and
  943. MatchInstruction(hp1, A_SXTW, [C_None], [PF_None]) and
  944. (taicpu(hp1).ops=2) and
  945. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  946. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  947. { reg1 might not be modified inbetween }
  948. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  949. begin
  950. DebugMsg('Peephole SxthSxtw2Sxth done', p);
  951. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  952. taicpu(hp1).opcode:=A_SXTH;
  953. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  954. result:=RemoveCurrentP(p);
  955. end
  956. {$endif AARCH64}
  957. {
  958. change
  959. sxth reg2,reg1
  960. and reg3,reg2,#65535
  961. dealloc reg2
  962. to
  963. uxth reg3,reg1
  964. }
  965. else if MatchInstruction(p, A_SXTH, [C_None], [PF_None]) and
  966. (taicpu(p).ops=2) and
  967. MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
  968. (taicpu(hp1).ops=3) and
  969. (taicpu(hp1).oper[2]^.typ=top_const) and
  970. ((taicpu(hp1).oper[2]^.val and $FFFF)=$FFFF) and
  971. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  972. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  973. { reg1 might not be modified inbetween }
  974. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  975. begin
  976. DebugMsg('Peephole SxthAndImm2Uxth done', p);
  977. taicpu(hp1).opcode:=A_UXTH;
  978. taicpu(hp1).ops:=2;
  979. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  980. result:=RemoveCurrentP(p);
  981. end
  982. {$ifdef AARCH64}
  983. else if USxtOp2Op(p,hp1,SM_SXTH) then
  984. Result:=true
  985. {$endif AARCH64}
  986. else if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  987. RemoveSuperfluousMove(p, hp1, 'SxthMov2Sxth') then
  988. Result:=true;
  989. end;
  990. end;
  991. function TARMAsmOptimizer.OptPreSBFXUBFX(var p: tai): Boolean;
  992. begin
  993. Result := False;
  994. { Convert:
  995. s/ubfx reg1,reg2,#0,#64 (or #32 for 32-bit registers)
  996. To:
  997. mov reg1,reg2
  998. }
  999. if (taicpu(p).oper[2]^.val = 0) and
  1000. {$ifdef AARCH64}
  1001. (
  1002. (
  1003. (getsubreg(taicpu(p).oper[0]^.reg) = R_SUBQ) and
  1004. (taicpu(p).oper[3]^.val = 64)
  1005. ) or
  1006. (
  1007. (getsubreg(taicpu(p).oper[0]^.reg) = R_SUBD) and
  1008. (taicpu(p).oper[3]^.val = 32)
  1009. )
  1010. )
  1011. {$else AARCH64}
  1012. (taicpu(p).oper[3]^.val = 32)
  1013. {$endif AARCH64}
  1014. then
  1015. begin
  1016. DebugMsg(SPeepholeOptimization + 'SBFX or UBFX -> MOV (full bitfield extract)', p);
  1017. taicpu(p).opcode := A_MOV;
  1018. taicpu(p).ops := 2;
  1019. taicpu(p).clearop(2);
  1020. taicpu(p).clearop(3);
  1021. Result := True;
  1022. Exit;
  1023. end;
  1024. end;
  1025. function TARMAsmOptimizer.OptPass1LDR(var p : tai) : Boolean;
  1026. var
  1027. hp1: tai;
  1028. Reference: TReference;
  1029. NewOp: TAsmOp;
  1030. begin
  1031. Result := False;
  1032. if (taicpu(p).ops <> 2) or (taicpu(p).condition <> C_None) then
  1033. Exit;
  1034. Reference := taicpu(p).oper[1]^.ref^;
  1035. if (Reference.addressmode = AM_OFFSET) and
  1036. not RegInRef(taicpu(p).oper[0]^.reg, Reference) and
  1037. { Delay calling GetNextInstruction for as long as possible }
  1038. GetNextInstruction(p, hp1) and
  1039. (hp1.typ = ait_instruction) and
  1040. (taicpu(hp1).condition = C_None) and
  1041. (taicpu(hp1).oppostfix = taicpu(p).oppostfix) then
  1042. begin
  1043. if (taicpu(hp1).opcode = A_STR) and
  1044. RefsEqual(taicpu(hp1).oper[1]^.ref^, Reference) and
  1045. (getregtype(taicpu(p).oper[0]^.reg) = getregtype(taicpu(hp1).oper[0]^.reg)) then
  1046. begin
  1047. { With:
  1048. ldr reg1,[ref]
  1049. str reg2,[ref]
  1050. If reg1 = reg2, Remove str
  1051. }
  1052. if taicpu(p).oper[0]^.reg = taicpu(hp1).oper[0]^.reg then
  1053. begin
  1054. DebugMsg(SPeepholeOptimization + 'Removed redundant store instruction (load/store -> load/nop)', hp1);
  1055. RemoveInstruction(hp1);
  1056. Result := True;
  1057. Exit;
  1058. end;
  1059. end
  1060. else if (taicpu(hp1).opcode = A_LDR) and
  1061. RefsEqual(taicpu(hp1).oper[1]^.ref^, Reference) then
  1062. begin
  1063. { With:
  1064. ldr reg1,[ref]
  1065. ldr reg2,[ref]
  1066. If reg1 = reg2, delete the second ldr
  1067. If reg1 <> reg2, changing the 2nd ldr to a mov might introduce
  1068. a dependency, but it will likely open up new optimisations, so
  1069. do it for now and handle any new dependencies later.
  1070. }
  1071. if taicpu(p).oper[0]^.reg = taicpu(hp1).oper[0]^.reg then
  1072. begin
  1073. DebugMsg(SPeepholeOptimization + 'Removed duplicate load instruction (load/load -> load/nop)', hp1);
  1074. RemoveInstruction(hp1);
  1075. Result := True;
  1076. Exit;
  1077. end
  1078. else if
  1079. (getregtype(taicpu(p).oper[0]^.reg) = R_INTREGISTER) and
  1080. (getregtype(taicpu(hp1).oper[0]^.reg) = R_INTREGISTER) and
  1081. (getsubreg(taicpu(p).oper[0]^.reg) = getsubreg(taicpu(hp1).oper[0]^.reg)) then
  1082. begin
  1083. DebugMsg(SPeepholeOptimization + 'Changed second ldr' + oppostfix2str[taicpu(hp1).oppostfix] + ' to mov (load/load -> load/move)', hp1);
  1084. taicpu(hp1).opcode := A_MOV;
  1085. taicpu(hp1).oppostfix := PF_None;
  1086. taicpu(hp1).loadreg(1, taicpu(p).oper[0]^.reg);
  1087. AllocRegBetween(taicpu(p).oper[0]^.reg, p, hp1, UsedRegs);
  1088. Result := True;
  1089. Exit;
  1090. end;
  1091. end;
  1092. end;
  1093. end;
  1094. function TARMAsmOptimizer.OptPass1STR(var p : tai) : Boolean;
  1095. var
  1096. hp1: tai;
  1097. Reference: TReference;
  1098. SizeMismatch: Boolean;
  1099. SrcReg, DstReg: TRegister;
  1100. NewOp: TAsmOp;
  1101. begin
  1102. Result := False;
  1103. if (taicpu(p).ops <> 2) or (taicpu(p).condition <> C_None) then
  1104. Exit;
  1105. Reference := taicpu(p).oper[1]^.ref^;
  1106. if (Reference.addressmode = AM_OFFSET) and
  1107. not RegInRef(taicpu(p).oper[0]^.reg, Reference) and
  1108. { Delay calling GetNextInstruction for as long as possible }
  1109. GetNextInstruction(p, hp1) and
  1110. (hp1.typ = ait_instruction) and
  1111. (taicpu(hp1).condition = C_None) and
  1112. (taicpu(hp1).oppostfix = taicpu(p).oppostfix) and
  1113. (taicpu(hp1).ops>0) and (taicpu(hp1).oper[0]^.typ=top_reg) then
  1114. begin
  1115. { Saves constant dereferencing and makes it easier to change the size if necessary }
  1116. SrcReg := taicpu(p).oper[0]^.reg;
  1117. DstReg := taicpu(hp1).oper[0]^.reg;
  1118. if (taicpu(hp1).opcode = A_LDR) and
  1119. RefsEqual(taicpu(hp1).oper[1]^.ref^, Reference) and
  1120. (taicpu(hp1).oper[1]^.ref^.volatility=[]) and
  1121. (
  1122. (taicpu(hp1).oppostfix = taicpu(p).oppostfix) or
  1123. ((taicpu(p).oppostfix = PF_B) and (taicpu(hp1).oppostfix = PF_SB)) or
  1124. ((taicpu(p).oppostfix = PF_H) and (taicpu(hp1).oppostfix = PF_SH))
  1125. {$ifdef AARCH64}
  1126. or ((taicpu(p).oppostfix = PF_W) and (taicpu(hp1).oppostfix = PF_SW))
  1127. {$endif AARCH64}
  1128. ) then
  1129. begin
  1130. { With:
  1131. str reg1,[ref]
  1132. ldr reg2,[ref]
  1133. If reg1 = reg2, Remove ldr.
  1134. If reg1 <> reg2, replace ldr with "mov reg2,reg1"
  1135. }
  1136. if (SrcReg = DstReg) and
  1137. { e.g. the ldrb in strb/ldrb is not a null operation as it clears the upper 24 bits }
  1138. (taicpu(p).oppostfix=PF_None) then
  1139. begin
  1140. DebugMsg(SPeepholeOptimization + 'Removed redundant load instruction (store/load -> store/nop)', hp1);
  1141. RemoveInstruction(hp1);
  1142. Result := True;
  1143. Exit;
  1144. end
  1145. else if (getregtype(SrcReg) = R_INTREGISTER) and
  1146. (getregtype(DstReg) = R_INTREGISTER) and
  1147. (getsubreg(SrcReg) = getsubreg(DstReg)) then
  1148. begin
  1149. NewOp:=A_NONE;
  1150. if taicpu(hp1).oppostfix=PF_None then
  1151. NewOp:=A_MOV
  1152. else
  1153. {$ifdef ARM}
  1154. if (current_settings.cputype < cpu_armv6) then
  1155. begin
  1156. { The zero- and sign-extension operations were only
  1157. introduced under ARMv6 }
  1158. case taicpu(hp1).oppostfix of
  1159. PF_B:
  1160. begin
  1161. { The if-block afterwards will set the middle operand to the correct register }
  1162. taicpu(hp1).allocate_oper(3);
  1163. taicpu(hp1).ops := 3;
  1164. taicpu(hp1).loadconst(2, $FF);
  1165. NewOp := A_AND;
  1166. end;
  1167. PF_H:
  1168. { ARMv5 and under doesn't have a concise way of storing the immediate $FFFF, so leave alone };
  1169. PF_SB,
  1170. PF_SH:
  1171. { Do nothing - can't easily encode sign-extensions };
  1172. else
  1173. InternalError(2021043002);
  1174. end;
  1175. end
  1176. else
  1177. {$endif ARM}
  1178. case taicpu(hp1).oppostfix of
  1179. PF_B:
  1180. NewOp := A_UXTB;
  1181. PF_SB:
  1182. NewOp := A_SXTB;
  1183. PF_H:
  1184. NewOp := A_UXTH;
  1185. PF_SH:
  1186. NewOp := A_SXTH;
  1187. {$ifdef AARCH64}
  1188. PF_SW:
  1189. NewOp := A_SXTW;
  1190. PF_W:
  1191. NewOp := A_MOV;
  1192. {$endif AARCH64}
  1193. else
  1194. InternalError(2021043001);
  1195. end;
  1196. if (NewOp<>A_None) then
  1197. begin
  1198. DebugMsg(SPeepholeOptimization + 'Changed ldr' + oppostfix2str[taicpu(hp1).oppostfix] + ' to ' + gas_op2str[NewOp] + ' (store/load -> store/move)', hp1);
  1199. taicpu(hp1).oppostfix := PF_None;
  1200. taicpu(hp1).opcode := NewOp;
  1201. taicpu(hp1).loadreg(1, SrcReg);
  1202. AllocRegBetween(SrcReg, p, hp1, UsedRegs);
  1203. Result := True;
  1204. Exit;
  1205. end;
  1206. end
  1207. end
  1208. else if (taicpu(hp1).opcode = A_STR) and
  1209. RefsEqual(taicpu(hp1).oper[1]^.ref^, Reference) then
  1210. begin
  1211. { With:
  1212. str reg1,[ref]
  1213. str reg2,[ref]
  1214. If reg1 <> reg2, delete the first str
  1215. IF reg1 = reg2, delete the second str
  1216. }
  1217. if (SrcReg = DstReg) and (taicpu(hp1).oper[1]^.ref^.volatility=[]) then
  1218. begin
  1219. DebugMsg(SPeepholeOptimization + 'Removed duplicate store instruction (store/store -> store/nop)', hp1);
  1220. RemoveInstruction(hp1);
  1221. Result := True;
  1222. Exit;
  1223. end
  1224. else if
  1225. { Registers same byte size? }
  1226. (tcgsize2size[reg_cgsize(SrcReg)] = tcgsize2size[reg_cgsize(DstReg)]) and
  1227. (taicpu(p).oper[1]^.ref^.volatility=[]) then
  1228. begin
  1229. DebugMsg(SPeepholeOptimization + 'Removed dominated store instruction (store/store -> nop/store)', p);
  1230. RemoveCurrentP(p, hp1);
  1231. Result := True;
  1232. Exit;
  1233. end;
  1234. end;
  1235. end;
  1236. end;
  1237. function TARMAsmOptimizer.OptPass1And(var p : tai) : Boolean;
  1238. var
  1239. hp1, hp2: tai;
  1240. i: longint;
  1241. begin
  1242. Result:=false;
  1243. {
  1244. optimize
  1245. and reg2,reg1,const1
  1246. ...
  1247. }
  1248. if (taicpu(p).ops>2) and
  1249. (taicpu(p).oper[1]^.typ = top_reg) and
  1250. (taicpu(p).oper[2]^.typ = top_const) then
  1251. begin
  1252. {
  1253. change
  1254. and reg2,reg1,const1
  1255. ...
  1256. and reg3,reg2,const2
  1257. to
  1258. and reg3,reg1,(const1 and const2)
  1259. }
  1260. if GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1261. MatchInstruction(hp1, A_AND, [taicpu(p).condition], [PF_None]) and
  1262. RegEndOfLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  1263. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1264. (taicpu(hp1).oper[2]^.typ = top_const)
  1265. {$ifdef AARCH64}
  1266. and ((((getsubreg(taicpu(p).oper[0]^.reg)=R_SUBQ) and is_shifter_const(taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val,OS_64)) or
  1267. ((getsubreg(taicpu(p).oper[0]^.reg)=R_SUBL) and is_shifter_const(taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val,OS_32))
  1268. ) or
  1269. ((taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val)=0))
  1270. {$endif AARCH64}
  1271. then
  1272. begin
  1273. if not(RegUsedBetween(taicpu(hp1).oper[0]^.reg,p,hp1)) then
  1274. begin
  1275. DebugMsg('Peephole AndAnd2And done', p);
  1276. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  1277. if (taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val)=0 then
  1278. begin
  1279. DebugMsg('Peephole AndAnd2Mov0 1 done', p);
  1280. taicpu(p).opcode:=A_MOV;
  1281. taicpu(p).ops:=2;
  1282. taicpu(p).loadConst(1,0);
  1283. taicpu(p).oppostfix:=taicpu(hp1).oppostfix;
  1284. end
  1285. else
  1286. begin
  1287. DebugMsg('Peephole AndAnd2And 1 done', p);
  1288. taicpu(p).loadConst(2,taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val);
  1289. taicpu(p).oppostfix:=taicpu(hp1).oppostfix;
  1290. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  1291. end;
  1292. asml.remove(hp1);
  1293. hp1.free;
  1294. Result:=true;
  1295. exit;
  1296. end
  1297. else if not(RegUsedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1298. begin
  1299. if (taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val)=0 then
  1300. begin
  1301. DebugMsg('Peephole AndAnd2Mov0 2 done', hp1);
  1302. taicpu(hp1).opcode:=A_MOV;
  1303. taicpu(hp1).loadConst(1,0);
  1304. taicpu(hp1).ops:=2;
  1305. taicpu(hp1).oppostfix:=taicpu(p).oppostfix;
  1306. end
  1307. else
  1308. begin
  1309. DebugMsg('Peephole AndAnd2And 2 done', hp1);
  1310. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  1311. taicpu(hp1).loadConst(2,taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val);
  1312. taicpu(hp1).oppostfix:=taicpu(p).oppostfix;
  1313. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  1314. end;
  1315. GetNextInstruction(p, hp1);
  1316. RemoveCurrentP(p);
  1317. p:=hp1;
  1318. Result:=true;
  1319. exit;
  1320. end;
  1321. end
  1322. {
  1323. change
  1324. and reg2,reg1,$xxxxxxFF
  1325. strb reg2,[...]
  1326. dealloc reg2
  1327. to
  1328. strb reg1,[...]
  1329. }
  1330. else if ((taicpu(p).oper[2]^.val and $FF) = $FF) and
  1331. MatchInstruction(p, A_AND, [C_None], [PF_None]) and
  1332. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1333. MatchInstruction(hp1, A_STR, [C_None], [PF_B]) and
  1334. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
  1335. { the reference in strb might not use reg2 }
  1336. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  1337. { reg1 might not be modified inbetween }
  1338. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1339. begin
  1340. DebugMsg('Peephole AndStrb2Strb done', p);
  1341. {$ifdef AARCH64}
  1342. taicpu(hp1).loadReg(0,newreg(R_INTREGISTER,getsupreg(taicpu(p).oper[1]^.reg),R_SUBD));
  1343. {$else AARCH64}
  1344. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  1345. {$endif AARCH64}
  1346. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  1347. RemoveCurrentP(p);
  1348. result:=true;
  1349. exit;
  1350. end
  1351. {
  1352. change
  1353. and reg2,reg1,255
  1354. uxtb/uxth reg3,reg2
  1355. dealloc reg2
  1356. to
  1357. and reg3,reg1,x
  1358. }
  1359. else if MatchInstruction(p, A_AND, [C_None], [PF_None]) and
  1360. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1361. ((((taicpu(p).oper[2]^.val and $ffffff00)=0) and MatchInstruction(hp1, A_UXTB, [C_None], [PF_None])) or
  1362. (((taicpu(p).oper[2]^.val and $ffff0000)=0) and MatchInstruction(hp1, A_UXTH, [C_None], [PF_None]))) and
  1363. (taicpu(hp1).ops = 2) and
  1364. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  1365. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1366. { reg1 might not be modified inbetween }
  1367. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1368. begin
  1369. DebugMsg('Peephole AndUxt2And done', p);
  1370. taicpu(hp1).opcode:=A_AND;
  1371. taicpu(hp1).ops:=3;
  1372. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  1373. taicpu(hp1).loadconst(2,taicpu(p).oper[2]^.val);
  1374. GetNextInstruction(p,hp1);
  1375. asml.remove(p);
  1376. p.Free;
  1377. p:=hp1;
  1378. result:=true;
  1379. exit;
  1380. end
  1381. else if ((taicpu(p).oper[2]^.val and $ffffff80)=0) and
  1382. MatchInstruction(p, A_AND, [C_None], [PF_None]) and
  1383. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1384. MatchInstruction(hp1, [A_SXTB,A_SXTH], [C_None], [PF_None]) and
  1385. (taicpu(hp1).ops = 2) and
  1386. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  1387. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1388. { reg1 might not be modified inbetween }
  1389. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1390. begin
  1391. DebugMsg('Peephole AndSxt2And done', p);
  1392. taicpu(hp1).opcode:=A_AND;
  1393. taicpu(hp1).ops:=3;
  1394. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  1395. taicpu(hp1).loadconst(2,taicpu(p).oper[2]^.val);
  1396. GetNextInstruction(p,hp1);
  1397. asml.remove(p);
  1398. p.Free;
  1399. p:=hp1;
  1400. result:=true;
  1401. exit;
  1402. end
  1403. {
  1404. from
  1405. and reg1,reg0,2^n-1
  1406. mov reg2,reg1, lsl imm1
  1407. (mov reg3,reg2, lsr/asr imm1)
  1408. remove either the and or the lsl/xsr sequence if possible
  1409. }
  1410. else if (taicpu(p).oper[2]^.val < high(int64)) and
  1411. cutils.ispowerof2(taicpu(p).oper[2]^.val+1,i) and
  1412. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1413. MatchInstruction(hp1, A_MOV, [taicpu(p).condition], [PF_None]) and
  1414. (taicpu(hp1).ops=3) and
  1415. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1416. (taicpu(hp1).oper[2]^.typ = top_shifterop) and
  1417. {$ifdef ARM}
  1418. (taicpu(hp1).oper[2]^.shifterop^.rs = NR_NO) and
  1419. {$endif ARM}
  1420. (taicpu(hp1).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  1421. RegEndOfLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) then
  1422. begin
  1423. {
  1424. and reg1,reg0,2^n-1
  1425. mov reg2,reg1, lsl imm1
  1426. mov reg3,reg2, lsr/asr imm1
  1427. =>
  1428. and reg1,reg0,2^n-1
  1429. if lsr and 2^n-1>=imm1 or asr and 2^n-1>imm1
  1430. }
  1431. if GetNextInstructionUsingReg(hp1,hp2,taicpu(p).oper[0]^.reg) and
  1432. MatchInstruction(hp2, A_MOV, [taicpu(p).condition], [PF_None]) and
  1433. (taicpu(hp2).ops=3) and
  1434. MatchOperand(taicpu(hp2).oper[1]^, taicpu(hp1).oper[0]^.reg) and
  1435. (taicpu(hp2).oper[2]^.typ = top_shifterop) and
  1436. {$ifdef ARM}
  1437. (taicpu(hp2).oper[2]^.shifterop^.rs = NR_NO) and
  1438. {$endif ARM}
  1439. (taicpu(hp2).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and
  1440. (taicpu(hp1).oper[2]^.shifterop^.shiftimm=taicpu(hp2).oper[2]^.shifterop^.shiftimm) and
  1441. RegEndOfLife(taicpu(hp1).oper[0]^.reg,taicpu(hp2)) and
  1442. ((i<32-taicpu(hp1).oper[2]^.shifterop^.shiftimm) or
  1443. ((i=32-taicpu(hp1).oper[2]^.shifterop^.shiftimm) and
  1444. (taicpu(hp2).oper[2]^.shifterop^.shiftmode=SM_LSR))) then
  1445. begin
  1446. DebugMsg('Peephole AndLslXsr2And done', p);
  1447. taicpu(p).oper[0]^.reg:=taicpu(hp2).oper[0]^.reg;
  1448. asml.Remove(hp1);
  1449. asml.Remove(hp2);
  1450. hp1.free;
  1451. hp2.free;
  1452. result:=true;
  1453. exit;
  1454. end
  1455. {
  1456. and reg1,reg0,2^n-1
  1457. mov reg2,reg1, lsl imm1
  1458. =>
  1459. mov reg2,reg0, lsl imm1
  1460. if imm1>i
  1461. }
  1462. else if (i>32-taicpu(hp1).oper[2]^.shifterop^.shiftimm) and
  1463. not(RegModifiedBetween(taicpu(p).oper[1]^.reg, p, hp1)) then
  1464. begin
  1465. DebugMsg('Peephole AndLsl2Lsl done', p);
  1466. taicpu(hp1).oper[1]^.reg:=taicpu(p).oper[1]^.reg;
  1467. GetNextInstruction(p, hp1);
  1468. asml.Remove(p);
  1469. p.free;
  1470. p:=hp1;
  1471. result:=true;
  1472. exit;
  1473. end
  1474. end;
  1475. end;
  1476. {
  1477. change
  1478. and reg1, ...
  1479. mov reg2, reg1
  1480. to
  1481. and reg2, ...
  1482. }
  1483. if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  1484. (taicpu(p).ops>=3) and
  1485. RemoveSuperfluousMove(p, hp1, 'DataMov2Data') then
  1486. Result:=true;
  1487. end;
  1488. end.