aoptarm.pas 67 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696
  1. {
  2. Copyright (c) 1998-2020 by Jonas Maebe and Florian Klaempfl, members of the Free Pascal
  3. Development Team
  4. This unit implements an ARM optimizer object used commonly for ARM and AAarch64
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. Unit aoptarm;
  19. {$i fpcdefs.inc}
  20. { $define DEBUG_PREREGSCHEDULER}
  21. {$ifdef EXTDEBUG}
  22. {$define DEBUG_AOPTCPU}
  23. {$endif EXTDEBUG}
  24. Interface
  25. uses
  26. cgbase, cgutils, globtype, cpubase, aasmtai, aasmcpu,aopt, aoptobj;
  27. Type
  28. { while ARM and AAarch64 look not very similar at a first glance,
  29. several optimizations can be shared between both }
  30. TARMAsmOptimizer = class(TAsmOptimizer)
  31. procedure DebugMsg(const s : string; p : tai);
  32. function RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string): boolean;
  33. function RedundantMovProcess(var p: tai; var hp1: tai): boolean;
  34. function GetNextInstructionUsingReg(Current: tai; out Next: tai; const reg: TRegister): Boolean;
  35. {$ifdef AARCH64}
  36. function USxtOp2Op(var p, hp1: tai; shiftmode: tshiftmode): Boolean;
  37. {$endif AARCH64}
  38. function OptPreSBFXUBFX(var p: tai): Boolean;
  39. function OptPass1UXTB(var p: tai): Boolean;
  40. function OptPass1UXTH(var p: tai): Boolean;
  41. function OptPass1SXTB(var p: tai): Boolean;
  42. function OptPass1SXTH(var p: tai): Boolean;
  43. function OptPass1LDR(var p: tai): Boolean; virtual;
  44. function OptPass1STR(var p: tai): Boolean; virtual;
  45. function OptPass1And(var p: tai): Boolean; virtual;
  46. function OptPass2AND(var p: tai): Boolean;
  47. End;
  48. function MatchInstruction(const instr: tai; const op: TCommonAsmOps; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  49. function MatchInstruction(const instr: tai; const op: TAsmOp; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  50. {$ifdef AARCH64}
  51. function MatchInstruction(const instr: tai; const ops : array of TAsmOp; const postfix: TOpPostfixes): boolean;
  52. {$endif AARCH64}
  53. function MatchInstruction(const instr: tai; const op: TAsmOp; const postfix: TOpPostfixes): boolean;
  54. function RefsEqual(const r1, r2: treference): boolean;
  55. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  56. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
  57. function MatchOperand(const oper: TOper; const a: TCGInt): boolean; inline;
  58. Implementation
  59. uses
  60. cutils,verbose,globals,
  61. systems,
  62. cpuinfo,
  63. cgobj,procinfo,
  64. aasmbase,aasmdata,itcpugas;
  65. {$ifdef DEBUG_AOPTCPU}
  66. const
  67. SPeepholeOptimization: shortstring = 'Peephole Optimization: ';
  68. procedure TARMAsmOptimizer.DebugMsg(const s: string;p : tai);
  69. begin
  70. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  71. end;
  72. {$else DEBUG_AOPTCPU}
  73. { Empty strings help the optimizer to remove string concatenations that won't
  74. ever appear to the user on release builds. [Kit] }
  75. const
  76. SPeepholeOptimization = '';
  77. procedure TARMAsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  78. begin
  79. end;
  80. {$endif DEBUG_AOPTCPU}
  81. function MatchInstruction(const instr: tai; const op: TCommonAsmOps; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  82. begin
  83. result :=
  84. (instr.typ = ait_instruction) and
  85. ((op = []) or ((taicpu(instr).opcode<=LastCommonAsmOp) and (taicpu(instr).opcode in op))) and
  86. ((cond = []) or (taicpu(instr).condition in cond)) and
  87. ((postfix = []) or (taicpu(instr).oppostfix in postfix));
  88. end;
  89. function MatchInstruction(const instr: tai; const op: TAsmOp; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  90. begin
  91. result :=
  92. (instr.typ = ait_instruction) and
  93. (taicpu(instr).opcode = op) and
  94. ((cond = []) or (taicpu(instr).condition in cond)) and
  95. ((postfix = []) or (taicpu(instr).oppostfix in postfix));
  96. end;
  97. {$ifdef AARCH64}
  98. function MatchInstruction(const instr: tai; const ops : array of TAsmOp; const postfix: TOpPostfixes): boolean;
  99. var
  100. op : TAsmOp;
  101. begin
  102. result:=false;
  103. if instr.typ <> ait_instruction then
  104. exit;
  105. for op in ops do
  106. begin
  107. if (taicpu(instr).opcode = op) and
  108. ((postfix = []) or (taicpu(instr).oppostfix in postfix)) then
  109. begin
  110. result:=true;
  111. exit;
  112. end;
  113. end;
  114. end;
  115. {$endif AARCH64}
  116. function MatchInstruction(const instr: tai; const op: TAsmOp; const postfix: TOpPostfixes): boolean;
  117. begin
  118. result :=
  119. (instr.typ = ait_instruction) and
  120. (taicpu(instr).opcode = op) and
  121. ((postfix = []) or (taicpu(instr).oppostfix in postfix));
  122. end;
  123. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  124. begin
  125. result := (oper.typ = top_reg) and (oper.reg = reg);
  126. end;
  127. function RefsEqual(const r1, r2: treference): boolean;
  128. begin
  129. refsequal :=
  130. (r1.offset = r2.offset) and
  131. (r1.base = r2.base) and
  132. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  133. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  134. (r1.relsymbol = r2.relsymbol) and
  135. {$ifdef ARM}
  136. (r1.signindex = r2.signindex) and
  137. {$endif ARM}
  138. (r1.shiftimm = r2.shiftimm) and
  139. (r1.addressmode = r2.addressmode) and
  140. (r1.shiftmode = r2.shiftmode) and
  141. (r1.volatility=[]) and
  142. (r2.volatility=[]);
  143. end;
  144. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
  145. begin
  146. result := oper1.typ = oper2.typ;
  147. if result then
  148. case oper1.typ of
  149. top_const:
  150. Result:=oper1.val = oper2.val;
  151. top_reg:
  152. Result:=oper1.reg = oper2.reg;
  153. top_conditioncode:
  154. Result:=oper1.cc = oper2.cc;
  155. top_realconst:
  156. Result:=oper1.val_real = oper2.val_real;
  157. top_ref:
  158. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  159. else Result:=false;
  160. end
  161. end;
  162. function MatchOperand(const oper: TOper; const a: TCGInt): boolean; inline;
  163. begin
  164. result := (oper.typ = top_const) and (oper.val = a);
  165. end;
  166. {$ifdef AARCH64}
  167. function TARMAsmOptimizer.USxtOp2Op(var p,hp1: tai; shiftmode: tshiftmode): Boolean;
  168. var
  169. so: tshifterop;
  170. opoffset: Integer;
  171. begin
  172. Result:=false;
  173. if (taicpu(p).ops=2) and
  174. ((MatchInstruction(hp1, [A_ADD,A_SUB], [C_None], [PF_None,PF_S]) and
  175. (taicpu(hp1).ops=3) and
  176. MatchOperand(taicpu(hp1).oper[2]^, taicpu(p).oper[0]^.reg) and
  177. not(MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg))) or
  178. (MatchInstruction(hp1, [A_CMP,A_CMN], [C_None], [PF_None]) and
  179. (taicpu(hp1).ops=2) and
  180. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg))
  181. ) and
  182. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  183. { reg1 might not be modified inbetween }
  184. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  185. begin
  186. DebugMsg('Peephole '+gas_op2str[taicpu(p).opcode]+gas_op2str[taicpu(hp1).opcode]+'2'+gas_op2str[taicpu(hp1).opcode]+' done', p);
  187. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  188. if MatchInstruction(hp1, [A_CMP,A_CMN], [C_None], [PF_None]) then
  189. opoffset:=0
  190. else
  191. opoffset:=1;
  192. taicpu(hp1).loadReg(opoffset+1,taicpu(p).oper[1]^.reg);
  193. taicpu(hp1).ops:=opoffset+3;
  194. shifterop_reset(so);
  195. so.shiftmode:=shiftmode;
  196. so.shiftimm:=0;
  197. taicpu(hp1).loadshifterop(opoffset+2,so);
  198. result:=RemoveCurrentP(p);
  199. end;
  200. end;
  201. {$endif AARCH64}
  202. function TARMAsmOptimizer.GetNextInstructionUsingReg(Current: tai;
  203. Out Next: tai; const reg: TRegister): Boolean;
  204. var
  205. gniResult: Boolean;
  206. begin
  207. Next:=Current;
  208. Result := False;
  209. repeat
  210. gniResult:=GetNextInstruction(Next,Next);
  211. if gniResult and RegInInstruction(reg,Next) then
  212. { Found something }
  213. Exit(True);
  214. until not gniResult or
  215. not(cs_opt_level3 in current_settings.optimizerswitches) or
  216. (Next.typ<>ait_instruction) or
  217. is_calljmp(taicpu(Next).opcode)
  218. {$ifdef ARM}
  219. or RegModifiedByInstruction(NR_PC,Next)
  220. {$endif ARM}
  221. ;
  222. end;
  223. function TARMAsmOptimizer.RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string):boolean;
  224. var
  225. alloc,
  226. dealloc : tai_regalloc;
  227. hp1 : tai;
  228. begin
  229. Result:=false;
  230. if MatchInstruction(movp, A_MOV, [taicpu(p).condition], [PF_None]) and
  231. { We can't optimize if there is a shiftop }
  232. (taicpu(movp).ops=2) and
  233. MatchOperand(taicpu(movp).oper[1]^, taicpu(p).oper[0]^.reg) and
  234. { don't mess with moves to fp }
  235. (taicpu(movp).oper[0]^.reg<>current_procinfo.framepointer) and
  236. { the destination register of the mov might not be used beween p and movp }
  237. not(RegUsedBetween(taicpu(movp).oper[0]^.reg,p,movp)) and
  238. {$ifdef ARM}
  239. { PC should be changed only by moves }
  240. (taicpu(movp).oper[0]^.reg<>NR_PC) and
  241. { cb[n]z are thumb instructions which require specific registers, with no wide forms }
  242. (taicpu(p).opcode<>A_CBZ) and
  243. (taicpu(p).opcode<>A_CBNZ) and
  244. { There is a special requirement for MUL and MLA, oper[0] and oper[1] are not allowed to be the same }
  245. not (
  246. (taicpu(p).opcode in [A_MLA, A_MUL]) and
  247. (taicpu(p).oper[1]^.reg = taicpu(movp).oper[0]^.reg) and
  248. (current_settings.cputype < cpu_armv6)
  249. ) and
  250. {$endif ARM}
  251. { Take care to only do this for instructions which REALLY load to the first register.
  252. Otherwise
  253. str reg0, [reg1]
  254. mov reg2, reg0
  255. will be optimized to
  256. str reg2, [reg1]
  257. }
  258. RegLoadedWithNewValue(taicpu(p).oper[0]^.reg, p) then
  259. begin
  260. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(movp.Next));
  261. if assigned(dealloc) then
  262. begin
  263. DebugMsg('Peephole '+optimizer+' removed superfluous mov', movp);
  264. result:=true;
  265. { taicpu(p).oper[0]^.reg is not used anymore, try to find its allocation
  266. and remove it if possible }
  267. asml.Remove(dealloc);
  268. alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.previous));
  269. if assigned(alloc) then
  270. begin
  271. asml.Remove(alloc);
  272. alloc.free;
  273. dealloc.free;
  274. end
  275. else
  276. asml.InsertAfter(dealloc,p);
  277. AllocRegBetween(taicpu(movp).oper[0]^.reg,p,movp,UsedRegs);
  278. { finally get rid of the mov }
  279. taicpu(p).loadreg(0,taicpu(movp).oper[0]^.reg);
  280. { Remove preindexing and postindexing for LDR in some cases.
  281. For example:
  282. ldr reg2,[reg1, xxx]!
  283. mov reg1,reg2
  284. must be translated to:
  285. ldr reg1,[reg1, xxx]
  286. Preindexing must be removed there, since the same register is used as the base and as the target.
  287. Such case is not allowed for ARM CPU and produces crash. }
  288. if (taicpu(p).opcode = A_LDR) and (taicpu(p).oper[1]^.typ = top_ref)
  289. and (taicpu(movp).oper[0]^.reg = taicpu(p).oper[1]^.ref^.base)
  290. then
  291. taicpu(p).oper[1]^.ref^.addressmode:=AM_OFFSET;
  292. asml.remove(movp);
  293. movp.free;
  294. end;
  295. end;
  296. end;
  297. function TARMAsmOptimizer.RedundantMovProcess(var p: tai; var hp1: tai):boolean;
  298. var
  299. I: Integer;
  300. current_hp, next_hp: tai;
  301. LDRChange: Boolean;
  302. begin
  303. Result:=false;
  304. {
  305. change
  306. mov r1, r0
  307. add r1, r1, #1
  308. to
  309. add r1, r0, #1
  310. Todo: Make it work for mov+cmp too
  311. CAUTION! If this one is successful p might not be a mov instruction anymore!
  312. }
  313. if (taicpu(p).ops = 2) and
  314. (taicpu(p).oper[1]^.typ = top_reg) and
  315. (taicpu(p).oppostfix = PF_NONE) then
  316. begin
  317. if
  318. MatchInstruction(hp1, [A_ADD, A_ADC,
  319. {$ifdef ARM}
  320. A_RSB, A_RSC,
  321. {$endif ARM}
  322. A_SUB, A_SBC,
  323. A_AND, A_BIC, A_EOR, A_ORR, A_MOV, A_MVN],
  324. [taicpu(p).condition], []) and
  325. { MOV and MVN might only have 2 ops }
  326. (taicpu(hp1).ops >= 2) and
  327. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^.reg) and
  328. (taicpu(hp1).oper[1]^.typ = top_reg) and
  329. (
  330. (taicpu(hp1).ops = 2) or
  331. (taicpu(hp1).oper[2]^.typ in [top_reg, top_const, top_shifterop])
  332. ) and
  333. {$ifdef AARCH64}
  334. (taicpu(p).oper[1]^.reg<>NR_SP) and
  335. { in this case you have to transform it to movk or the like }
  336. (getsupreg(taicpu(p).oper[1]^.reg)<>RS_XZR) and
  337. {$endif AARCH64}
  338. not(RegUsedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  339. begin
  340. { When we get here we still don't know if the registers match }
  341. for I:=1 to 2 do
  342. {
  343. If the first loop was successful p will be replaced with hp1.
  344. The checks will still be ok, because all required information
  345. will also be in hp1 then.
  346. }
  347. if (taicpu(hp1).ops > I) and
  348. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[I]^.reg)
  349. {$ifdef ARM}
  350. { prevent certain combinations on thumb(2), this is only a safe approximation }
  351. and (not(GenerateThumbCode or GenerateThumb2Code) or
  352. ((getsupreg(taicpu(p).oper[1]^.reg)<>RS_R13) and
  353. (getsupreg(taicpu(p).oper[1]^.reg)<>RS_R15)))
  354. {$endif ARM}
  355. then
  356. begin
  357. DebugMsg('Peephole RedundantMovProcess done', hp1);
  358. taicpu(hp1).oper[I]^.reg := taicpu(p).oper[1]^.reg;
  359. if p<>hp1 then
  360. begin
  361. asml.remove(p);
  362. p.free;
  363. p:=hp1;
  364. Result:=true;
  365. end;
  366. end;
  367. if Result then Exit;
  368. end
  369. { Change: Change:
  370. mov r1, r0 mov r1, r0
  371. ... ...
  372. ldr/str r2, [r1, etc.] mov r2, r1
  373. To: To:
  374. ldr/str r2, [r0, etc.] mov r2, r0
  375. }
  376. else if (taicpu(p).condition = C_None) and (taicpu(p).oper[1]^.typ = top_reg)
  377. {$ifdef ARM}
  378. and not (getsupreg(taicpu(p).oper[0]^.reg) in [RS_PC, RS_R14, RS_STACK_POINTER_REG])
  379. and (getsupreg(taicpu(p).oper[1]^.reg) <> RS_PC)
  380. { Thumb does not support references with base and index one being SP }
  381. and (not(GenerateThumbCode) or (getsupreg(taicpu(p).oper[1]^.reg) <> RS_STACK_POINTER_REG))
  382. {$endif ARM}
  383. {$ifdef AARCH64}
  384. and (getsupreg(taicpu(p).oper[0]^.reg) <> RS_STACK_POINTER_REG)
  385. {$endif AARCH64}
  386. then
  387. begin
  388. current_hp := p;
  389. TransferUsedRegs(TmpUsedRegs);
  390. { Search local instruction block }
  391. while GetNextInstruction(current_hp, next_hp) and (next_hp <> BlockEnd) and (next_hp.typ = ait_instruction) do
  392. begin
  393. UpdateUsedRegs(TmpUsedRegs, tai(current_hp.Next));
  394. LDRChange := False;
  395. if (taicpu(next_hp).opcode in [A_LDR,A_STR]) and (taicpu(next_hp).ops = 2)
  396. {$ifdef AARCH64}
  397. { If r0 is the zero register, then this sequence of instructions will cause
  398. an access violation, but that's better than an assembler error caused by
  399. changing r0 to xzr inside the reference (Where it's illegal). [Kit] }
  400. and (getsupreg(taicpu(p).oper[1]^.reg) <> RS_XZR)
  401. {$endif AARCH64}
  402. then
  403. begin
  404. { Change the registers from r1 to r0 }
  405. if (taicpu(next_hp).oper[1]^.ref^.base = taicpu(p).oper[0]^.reg) and
  406. {$ifdef ARM}
  407. { This optimisation conflicts with something and raises
  408. an access violation - needs further investigation. [Kit] }
  409. (taicpu(next_hp).opcode <> A_LDR) and
  410. {$endif ARM}
  411. { Don't mess around with the base register if the
  412. reference is pre- or post-indexed }
  413. (taicpu(next_hp).oper[1]^.ref^.addressmode = AM_OFFSET) then
  414. begin
  415. taicpu(next_hp).oper[1]^.ref^.base := taicpu(p).oper[1]^.reg;
  416. LDRChange := True;
  417. end;
  418. if taicpu(next_hp).oper[1]^.ref^.index = taicpu(p).oper[0]^.reg then
  419. begin
  420. taicpu(next_hp).oper[1]^.ref^.index := taicpu(p).oper[1]^.reg;
  421. LDRChange := True;
  422. end;
  423. if LDRChange then
  424. DebugMsg('Peephole Optimization: ' + std_regname(taicpu(p).oper[0]^.reg) + ' = ' + std_regname(taicpu(p).oper[1]^.reg) + ' (MovLdr2Ldr 1)', next_hp);
  425. { Drop out if we're dealing with pre-indexed references }
  426. if (taicpu(next_hp).oper[1]^.ref^.addressmode = AM_PREINDEXED) and
  427. (
  428. RegInRef(taicpu(p).oper[0]^.reg, taicpu(next_hp).oper[1]^.ref^) or
  429. RegInRef(taicpu(p).oper[1]^.reg, taicpu(next_hp).oper[1]^.ref^)
  430. ) then
  431. begin
  432. { Remember to update register allocations }
  433. if LDRChange then
  434. AllocRegBetween(taicpu(p).oper[1]^.reg, p, next_hp, UsedRegs);
  435. Break;
  436. end;
  437. { The register being stored can be potentially changed (as long as it's not the stack pointer) }
  438. if (taicpu(next_hp).opcode = A_STR) and (getsupreg(taicpu(p).oper[1]^.reg) <> RS_STACK_POINTER_REG) and
  439. MatchOperand(taicpu(next_hp).oper[0]^, taicpu(p).oper[0]^.reg) then
  440. begin
  441. DebugMsg('Peephole Optimization: ' + std_regname(taicpu(p).oper[0]^.reg) + ' = ' + std_regname(taicpu(p).oper[1]^.reg) + ' (MovLdr2Ldr 2)', next_hp);
  442. taicpu(next_hp).oper[0]^.reg := taicpu(p).oper[1]^.reg;
  443. LDRChange := True;
  444. end;
  445. if LDRChange and (getsupreg(taicpu(p).oper[1]^.reg) <> RS_STACK_POINTER_REG) then
  446. begin
  447. AllocRegBetween(taicpu(p).oper[1]^.reg, p, next_hp, UsedRegs);
  448. if (taicpu(p).oppostfix = PF_None) and
  449. (
  450. (
  451. (taicpu(next_hp).opcode = A_LDR) and
  452. MatchOperand(taicpu(next_hp).oper[0]^, taicpu(p).oper[0]^.reg)
  453. ) or
  454. not RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, next_hp, TmpUsedRegs)
  455. ) and
  456. { Double-check to see if the old registers were actually
  457. changed (e.g. if the super registers matched, but not
  458. the sizes, they won't be changed). }
  459. (
  460. (taicpu(next_hp).opcode = A_LDR) or
  461. not RegInOp(taicpu(p).oper[0]^.reg, taicpu(next_hp).oper[0]^)
  462. ) and
  463. not RegInRef(taicpu(p).oper[0]^.reg, taicpu(next_hp).oper[1]^.ref^) then
  464. begin
  465. DebugMsg('Peephole Optimization: RedundantMovProcess 2a done', p);
  466. RemoveCurrentP(p);
  467. Result := True;
  468. Exit;
  469. end;
  470. end;
  471. end
  472. else if (taicpu(next_hp).opcode = A_MOV) and (taicpu(next_hp).oppostfix = PF_None) and
  473. (taicpu(next_hp).ops = 2) then
  474. begin
  475. if MatchOperand(taicpu(next_hp).oper[0]^, taicpu(p).oper[0]^.reg) then
  476. begin
  477. { Found another mov that writes entirely to the register }
  478. if RegUsedBetween(taicpu(p).oper[0]^.reg, p, next_hp) then
  479. begin
  480. { Register was used beforehand }
  481. if MatchOperand(taicpu(next_hp).oper[1]^, taicpu(p).oper[1]^.reg) then
  482. begin
  483. { This MOV is exactly the same as the first one.
  484. Since none of the registers have changed value
  485. at this point, we can remove it. }
  486. DebugMsg(SPeepholeOptimization + 'RedundantMovProcess 3a done', next_hp);
  487. if (next_hp = hp1) then
  488. { Don't let hp1 become a dangling pointer }
  489. hp1 := nil;
  490. asml.Remove(next_hp);
  491. next_hp.Free;
  492. { We still have the original p, so we can continue optimising;
  493. if it was -O2 or below, this instruction appeared immediately
  494. after the first MOV, so we're technically not looking more
  495. than one instruction ahead after it's removed! [Kit] }
  496. Continue;
  497. end
  498. else
  499. { Register changes value - drop out }
  500. Break;
  501. end;
  502. { We can delete the first MOV (only if the second MOV is unconditional) }
  503. {$ifdef ARM}
  504. if (taicpu(p).oppostfix = PF_None) and
  505. (taicpu(next_hp).condition = C_None) then
  506. {$endif ARM}
  507. begin
  508. DebugMsg('Peephole Optimization: RedundantMovProcess 2b done', p);
  509. RemoveCurrentP(p);
  510. Result := True;
  511. end;
  512. Exit;
  513. end
  514. else if MatchOperand(taicpu(next_hp).oper[1]^, taicpu(p).oper[0]^.reg) then
  515. begin
  516. if MatchOperand(taicpu(next_hp).oper[0]^, taicpu(p).oper[1]^.reg)
  517. { Be careful - if the entire register is not used, removing this
  518. instruction will leave the unused part uninitialised }
  519. {$ifdef AARCH64}
  520. and (getsubreg(taicpu(p).oper[1]^.reg) = R_SUBQ)
  521. {$endif AARCH64}
  522. then
  523. begin
  524. { Instruction will become mov r1,r1 }
  525. DebugMsg(SPeepholeOptimization + 'Mov2None 2 done', next_hp);
  526. { Allocate r1 between the instructions; not doing
  527. so may cause problems when removing superfluous
  528. MOVs later (i38055) }
  529. AllocRegBetween(taicpu(p).oper[1]^.reg, p, next_hp, UsedRegs);
  530. if (next_hp = hp1) then
  531. { Don't let hp1 become a dangling pointer }
  532. hp1 := nil;
  533. asml.Remove(next_hp);
  534. next_hp.Free;
  535. Continue;
  536. end;
  537. { Change the old register (checking the first operand again
  538. forces it to be left alone if the full register is not
  539. used, lest mov w1,w1 gets optimised out by mistake. [Kit] }
  540. {$ifdef AARCH64}
  541. if not MatchOperand(taicpu(next_hp).oper[0]^, taicpu(p).oper[1]^.reg) then
  542. {$endif AARCH64}
  543. begin
  544. DebugMsg(SPeepholeOptimization + std_regname(taicpu(p).oper[0]^.reg) + ' = ' + std_regname(taicpu(p).oper[1]^.reg) + ' (MovMov2Mov 2)', next_hp);
  545. taicpu(next_hp).oper[1]^.reg := taicpu(p).oper[1]^.reg;
  546. AllocRegBetween(taicpu(p).oper[1]^.reg, p, next_hp, UsedRegs);
  547. { If this was the only reference to the old register,
  548. then we can remove the original MOV now }
  549. if (taicpu(p).oppostfix = PF_None) and
  550. { A bit of a hack - sometimes registers aren't tracked properly, so do not
  551. remove if the register was apparently not allocated when its value is
  552. first set at the MOV command (this is especially true for the stack
  553. register). [Kit] }
  554. (getsupreg(taicpu(p).oper[1]^.reg) <> RS_STACK_POINTER_REG) and
  555. RegInUsedRegs(taicpu(p).oper[0]^.reg, UsedRegs) and
  556. not RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, next_hp, TmpUsedRegs) then
  557. begin
  558. DebugMsg(SPeepholeOptimization + 'RedundantMovProcess 2c done', p);
  559. RemoveCurrentP(p);
  560. Result := True;
  561. Exit;
  562. end;
  563. end;
  564. end;
  565. end;
  566. { On low optimisation settions, don't search more than one instruction ahead }
  567. if not(cs_opt_level3 in current_settings.optimizerswitches) or
  568. { Stop at procedure calls and jumps }
  569. is_calljmp(taicpu(next_hp).opcode) or
  570. { If the read register has changed value, or the MOV
  571. destination register has been used, drop out }
  572. RegInInstruction(taicpu(p).oper[0]^.reg, next_hp) or
  573. RegModifiedByInstruction(taicpu(p).oper[1]^.reg, next_hp) then
  574. Break;
  575. current_hp := next_hp;
  576. end;
  577. end;
  578. end;
  579. end;
  580. function TARMAsmOptimizer.OptPass1UXTB(var p : tai) : Boolean;
  581. var
  582. hp1, hp2: tai;
  583. so: tshifterop;
  584. begin
  585. Result:=false;
  586. if GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) then
  587. begin
  588. {
  589. change
  590. uxtb reg2,reg1
  591. strb reg2,[...]
  592. dealloc reg2
  593. to
  594. strb reg1,[...]
  595. }
  596. if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
  597. (taicpu(p).ops=2) and
  598. MatchInstruction(hp1, A_STR, [C_None], [PF_B]) and
  599. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
  600. { the reference in strb might not use reg2 }
  601. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  602. { reg1 might not be modified inbetween }
  603. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  604. begin
  605. DebugMsg('Peephole UxtbStrb2Strb done', p);
  606. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  607. result:=RemoveCurrentP(p);
  608. end
  609. {
  610. change
  611. uxtb reg2,reg1
  612. uxth reg3,reg2
  613. dealloc reg2
  614. to
  615. uxtb reg3,reg1
  616. }
  617. else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
  618. (taicpu(p).ops=2) and
  619. MatchInstruction(hp1, A_UXTH, [C_None], [PF_None]) and
  620. (taicpu(hp1).ops = 2) and
  621. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  622. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  623. { reg1 might not be modified inbetween }
  624. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  625. begin
  626. DebugMsg('Peephole UxtbUxth2Uxtb done', p);
  627. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  628. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  629. asml.remove(hp1);
  630. hp1.free;
  631. result:=true;
  632. end
  633. {
  634. change
  635. uxtb reg2,reg1
  636. uxtb reg3,reg2
  637. dealloc reg2
  638. to
  639. uxtb reg3,reg1
  640. }
  641. else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
  642. (taicpu(p).ops=2) and
  643. MatchInstruction(hp1, A_UXTB, [C_None], [PF_None]) and
  644. (taicpu(hp1).ops = 2) and
  645. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  646. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  647. { reg1 might not be modified inbetween }
  648. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  649. begin
  650. DebugMsg('Peephole UxtbUxtb2Uxtb done', p);
  651. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  652. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  653. asml.remove(hp1);
  654. hp1.free;
  655. result:=true;
  656. end
  657. {
  658. change
  659. uxtb reg2,reg1
  660. and reg3,reg2,#0x*FF
  661. dealloc reg2
  662. to
  663. uxtb reg3,reg1
  664. }
  665. else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
  666. (taicpu(p).ops=2) and
  667. MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
  668. (taicpu(hp1).ops=3) and
  669. (taicpu(hp1).oper[2]^.typ=top_const) and
  670. ((taicpu(hp1).oper[2]^.val and $FF)=$FF) and
  671. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  672. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  673. { reg1 might not be modified inbetween }
  674. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  675. begin
  676. DebugMsg('Peephole UxtbAndImm2Uxtb done', p);
  677. taicpu(hp1).opcode:=A_UXTB;
  678. taicpu(hp1).ops:=2;
  679. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  680. result:=RemoveCurrentP(p);
  681. end
  682. {$ifdef AARCH64}
  683. else if USxtOp2Op(p,hp1,SM_UXTB) then
  684. Result:=true
  685. {$endif AARCH64}
  686. else if RemoveSuperfluousMove(p, hp1, 'UxtbMov2Uxtb') then
  687. Result:=true;
  688. end;
  689. end;
  690. function TARMAsmOptimizer.OptPass1UXTH(var p : tai) : Boolean;
  691. var
  692. hp1: tai;
  693. so: tshifterop;
  694. begin
  695. Result:=false;
  696. if GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) then
  697. begin
  698. {
  699. change
  700. uxth reg2,reg1
  701. strh reg2,[...]
  702. dealloc reg2
  703. to
  704. strh reg1,[...]
  705. }
  706. if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
  707. (taicpu(p).ops=2) and
  708. MatchInstruction(hp1, A_STR, [C_None], [PF_H]) and
  709. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  710. { the reference in strb might not use reg2 }
  711. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  712. { reg1 might not be modified inbetween }
  713. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  714. begin
  715. DebugMsg('Peephole UXTHStrh2Strh done', p);
  716. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  717. result:=RemoveCurrentP(p);
  718. end
  719. {
  720. change
  721. uxth reg2,reg1
  722. uxth reg3,reg2
  723. dealloc reg2
  724. to
  725. uxth reg3,reg1
  726. }
  727. else if MatchInstruction(p, A_UXTH, [C_None], [PF_None]) and
  728. (taicpu(p).ops=2) and
  729. MatchInstruction(hp1, A_UXTH, [C_None], [PF_None]) and
  730. (taicpu(hp1).ops=2) and
  731. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  732. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  733. { reg1 might not be modified inbetween }
  734. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  735. begin
  736. DebugMsg('Peephole UxthUxth2Uxth done', p);
  737. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  738. taicpu(hp1).opcode:=A_UXTH;
  739. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  740. result:=RemoveCurrentP(p);
  741. end
  742. {
  743. change
  744. uxth reg2,reg1
  745. and reg3,reg2,#65535
  746. dealloc reg2
  747. to
  748. uxth reg3,reg1
  749. }
  750. else if MatchInstruction(p, A_UXTH, [C_None], [PF_None]) and
  751. (taicpu(p).ops=2) and
  752. MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
  753. (taicpu(hp1).ops=3) and
  754. (taicpu(hp1).oper[2]^.typ=top_const) and
  755. ((taicpu(hp1).oper[2]^.val and $FFFF)=$FFFF) and
  756. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  757. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  758. { reg1 might not be modified inbetween }
  759. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  760. begin
  761. DebugMsg('Peephole UxthAndImm2Uxth done', p);
  762. taicpu(hp1).opcode:=A_UXTH;
  763. taicpu(hp1).ops:=2;
  764. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  765. result:=RemoveCurrentP(p);
  766. end
  767. {$ifdef AARCH64}
  768. else if USxtOp2Op(p,hp1,SM_UXTH) then
  769. Result:=true
  770. {$endif AARCH64}
  771. else if RemoveSuperfluousMove(p, hp1, 'UxthMov2Data') then
  772. Result:=true;
  773. end;
  774. end;
  775. function TARMAsmOptimizer.OptPass1SXTB(var p : tai) : Boolean;
  776. var
  777. hp1, hp2: tai;
  778. so: tshifterop;
  779. begin
  780. Result:=false;
  781. if GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) then
  782. begin
  783. {
  784. change
  785. sxtb reg2,reg1
  786. strb reg2,[...]
  787. dealloc reg2
  788. to
  789. strb reg1,[...]
  790. }
  791. if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
  792. (taicpu(p).ops=2) and
  793. MatchInstruction(hp1, A_STR, [C_None], [PF_B]) and
  794. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
  795. { the reference in strb might not use reg2 }
  796. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  797. { reg1 might not be modified inbetween }
  798. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  799. begin
  800. DebugMsg('Peephole SxtbStrb2Strb done', p);
  801. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  802. result:=RemoveCurrentP(p);
  803. end
  804. {
  805. change
  806. sxtb reg2,reg1
  807. sxth reg3,reg2
  808. dealloc reg2
  809. to
  810. sxtb reg3,reg1
  811. }
  812. else if MatchInstruction(p, A_SXTB, [C_None], [PF_None]) and
  813. (taicpu(p).ops=2) and
  814. MatchInstruction(hp1, A_SXTH, [C_None], [PF_None]) and
  815. (taicpu(hp1).ops = 2) and
  816. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  817. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  818. { reg1 might not be modified inbetween }
  819. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  820. begin
  821. DebugMsg('Peephole SxtbSxth2Sxtb done', p);
  822. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  823. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  824. asml.remove(hp1);
  825. hp1.free;
  826. result:=true;
  827. end
  828. {
  829. change
  830. sxtb reg2,reg1
  831. sxtb reg3,reg2
  832. dealloc reg2
  833. to
  834. uxtb reg3,reg1
  835. }
  836. else if MatchInstruction(p, A_SXTB, [C_None], [PF_None]) and
  837. (taicpu(p).ops=2) and
  838. MatchInstruction(hp1, A_SXTB, [C_None], [PF_None]) and
  839. (taicpu(hp1).ops = 2) and
  840. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  841. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  842. { reg1 might not be modified inbetween }
  843. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  844. begin
  845. DebugMsg('Peephole SxtbSxtb2Sxtb done', p);
  846. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  847. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  848. asml.remove(hp1);
  849. hp1.free;
  850. result:=true;
  851. end
  852. {
  853. change
  854. sxtb reg2,reg1
  855. and reg3,reg2,#0x*FF
  856. dealloc reg2
  857. to
  858. uxtb reg3,reg1
  859. }
  860. else if MatchInstruction(p, A_SXTB, [C_None], [PF_None]) and
  861. (taicpu(p).ops=2) and
  862. MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
  863. (taicpu(hp1).ops=3) and
  864. (taicpu(hp1).oper[2]^.typ=top_const) and
  865. ((taicpu(hp1).oper[2]^.val and $FF)=$FF) and
  866. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  867. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  868. { reg1 might not be modified inbetween }
  869. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  870. begin
  871. DebugMsg('Peephole SxtbAndImm2Uxtb done', p);
  872. taicpu(hp1).opcode:=A_UXTB;
  873. taicpu(hp1).ops:=2;
  874. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  875. result:=RemoveCurrentP(p);
  876. end
  877. {$ifdef AARCH64}
  878. else if USxtOp2Op(p,hp1,SM_SXTB) then
  879. Result:=true
  880. {$endif AARCH64}
  881. else if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  882. RemoveSuperfluousMove(p, hp1, 'SxtbMov2Sxtb') then
  883. Result:=true;
  884. end;
  885. end;
  886. function TARMAsmOptimizer.OptPass1SXTH(var p : tai) : Boolean;
  887. var
  888. hp1: tai;
  889. so: tshifterop;
  890. begin
  891. Result:=false;
  892. if GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) then
  893. begin
  894. {
  895. change
  896. sxth reg2,reg1
  897. strh reg2,[...]
  898. dealloc reg2
  899. to
  900. strh reg1,[...]
  901. }
  902. if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
  903. (taicpu(p).ops=2) and
  904. MatchInstruction(hp1, A_STR, [C_None], [PF_H]) and
  905. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  906. { the reference in strb might not use reg2 }
  907. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  908. { reg1 might not be modified inbetween }
  909. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  910. begin
  911. DebugMsg('Peephole SxthStrh2Strh done', p);
  912. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  913. result:=RemoveCurrentP(p);
  914. end
  915. {
  916. change
  917. sxth reg2,reg1
  918. sxth reg3,reg2
  919. dealloc reg2
  920. to
  921. sxth reg3,reg1
  922. }
  923. else if MatchInstruction(p, A_SXTH, [C_None], [PF_None]) and
  924. (taicpu(p).ops=2) and
  925. MatchInstruction(hp1, A_SXTH, [C_None], [PF_None]) and
  926. (taicpu(hp1).ops=2) and
  927. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  928. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  929. { reg1 might not be modified inbetween }
  930. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  931. begin
  932. DebugMsg('Peephole SxthSxth2Sxth done', p);
  933. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  934. taicpu(hp1).opcode:=A_SXTH;
  935. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  936. result:=RemoveCurrentP(p);
  937. end
  938. {$ifdef AARCH64}
  939. {
  940. change
  941. sxth reg2,reg1
  942. sxtw reg3,reg2
  943. dealloc reg2
  944. to
  945. sxth reg3,reg1
  946. }
  947. else if MatchInstruction(p, A_SXTH, [C_None], [PF_None]) and
  948. (taicpu(p).ops=2) and
  949. MatchInstruction(hp1, A_SXTW, [C_None], [PF_None]) and
  950. (taicpu(hp1).ops=2) and
  951. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  952. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  953. { reg1 might not be modified inbetween }
  954. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  955. begin
  956. DebugMsg('Peephole SxthSxtw2Sxth done', p);
  957. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  958. taicpu(hp1).opcode:=A_SXTH;
  959. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  960. result:=RemoveCurrentP(p);
  961. end
  962. {$endif AARCH64}
  963. {
  964. change
  965. sxth reg2,reg1
  966. and reg3,reg2,#65535
  967. dealloc reg2
  968. to
  969. uxth reg3,reg1
  970. }
  971. else if MatchInstruction(p, A_SXTH, [C_None], [PF_None]) and
  972. (taicpu(p).ops=2) and
  973. MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
  974. (taicpu(hp1).ops=3) and
  975. (taicpu(hp1).oper[2]^.typ=top_const) and
  976. ((taicpu(hp1).oper[2]^.val and $FFFF)=$FFFF) and
  977. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  978. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  979. { reg1 might not be modified inbetween }
  980. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  981. begin
  982. DebugMsg('Peephole SxthAndImm2Uxth done', p);
  983. taicpu(hp1).opcode:=A_UXTH;
  984. taicpu(hp1).ops:=2;
  985. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  986. result:=RemoveCurrentP(p);
  987. end
  988. {$ifdef AARCH64}
  989. else if USxtOp2Op(p,hp1,SM_SXTH) then
  990. Result:=true
  991. {$endif AARCH64}
  992. else if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  993. RemoveSuperfluousMove(p, hp1, 'SxthMov2Sxth') then
  994. Result:=true;
  995. end;
  996. end;
  997. function TARMAsmOptimizer.OptPreSBFXUBFX(var p: tai): Boolean;
  998. begin
  999. Result := False;
  1000. { Convert:
  1001. s/ubfx reg1,reg2,#0,#64 (or #32 for 32-bit registers)
  1002. To:
  1003. mov reg1,reg2
  1004. }
  1005. if (taicpu(p).oper[2]^.val = 0) and
  1006. {$ifdef AARCH64}
  1007. (
  1008. (
  1009. (getsubreg(taicpu(p).oper[0]^.reg) = R_SUBQ) and
  1010. (taicpu(p).oper[3]^.val = 64)
  1011. ) or
  1012. (
  1013. (getsubreg(taicpu(p).oper[0]^.reg) = R_SUBD) and
  1014. (taicpu(p).oper[3]^.val = 32)
  1015. )
  1016. )
  1017. {$else AARCH64}
  1018. (taicpu(p).oper[3]^.val = 32)
  1019. {$endif AARCH64}
  1020. then
  1021. begin
  1022. DebugMsg(SPeepholeOptimization + 'SBFX or UBFX -> MOV (full bitfield extract)', p);
  1023. taicpu(p).opcode := A_MOV;
  1024. taicpu(p).ops := 2;
  1025. taicpu(p).clearop(2);
  1026. taicpu(p).clearop(3);
  1027. Result := True;
  1028. Exit;
  1029. end;
  1030. end;
  1031. function TARMAsmOptimizer.OptPass1LDR(var p : tai) : Boolean;
  1032. var
  1033. hp1: tai;
  1034. Reference: TReference;
  1035. NewOp: TAsmOp;
  1036. begin
  1037. Result := False;
  1038. if (taicpu(p).ops <> 2) or (taicpu(p).condition <> C_None) then
  1039. Exit;
  1040. Reference := taicpu(p).oper[1]^.ref^;
  1041. if (Reference.addressmode = AM_OFFSET) and
  1042. not RegInRef(taicpu(p).oper[0]^.reg, Reference) and
  1043. { Delay calling GetNextInstruction for as long as possible }
  1044. GetNextInstruction(p, hp1) and
  1045. (hp1.typ = ait_instruction) and
  1046. (taicpu(hp1).condition = C_None) and
  1047. (taicpu(hp1).oppostfix = taicpu(p).oppostfix) then
  1048. begin
  1049. if (taicpu(hp1).opcode = A_STR) and
  1050. RefsEqual(taicpu(hp1).oper[1]^.ref^, Reference) and
  1051. (getregtype(taicpu(p).oper[0]^.reg) = getregtype(taicpu(hp1).oper[0]^.reg)) then
  1052. begin
  1053. { With:
  1054. ldr reg1,[ref]
  1055. str reg2,[ref]
  1056. If reg1 = reg2, Remove str
  1057. }
  1058. if taicpu(p).oper[0]^.reg = taicpu(hp1).oper[0]^.reg then
  1059. begin
  1060. DebugMsg(SPeepholeOptimization + 'Removed redundant store instruction (load/store -> load/nop)', hp1);
  1061. RemoveInstruction(hp1);
  1062. Result := True;
  1063. Exit;
  1064. end;
  1065. end
  1066. else if (taicpu(hp1).opcode = A_LDR) and
  1067. RefsEqual(taicpu(hp1).oper[1]^.ref^, Reference) then
  1068. begin
  1069. { With:
  1070. ldr reg1,[ref]
  1071. ldr reg2,[ref]
  1072. If reg1 = reg2, delete the second ldr
  1073. If reg1 <> reg2, changing the 2nd ldr to a mov might introduce
  1074. a dependency, but it will likely open up new optimisations, so
  1075. do it for now and handle any new dependencies later.
  1076. }
  1077. if taicpu(p).oper[0]^.reg = taicpu(hp1).oper[0]^.reg then
  1078. begin
  1079. DebugMsg(SPeepholeOptimization + 'Removed duplicate load instruction (load/load -> load/nop)', hp1);
  1080. RemoveInstruction(hp1);
  1081. Result := True;
  1082. Exit;
  1083. end
  1084. else if
  1085. (getregtype(taicpu(p).oper[0]^.reg) = R_INTREGISTER) and
  1086. (getregtype(taicpu(hp1).oper[0]^.reg) = R_INTREGISTER) and
  1087. (getsubreg(taicpu(p).oper[0]^.reg) = getsubreg(taicpu(hp1).oper[0]^.reg)) then
  1088. begin
  1089. DebugMsg(SPeepholeOptimization + 'Changed second ldr' + oppostfix2str[taicpu(hp1).oppostfix] + ' to mov (load/load -> load/move)', hp1);
  1090. taicpu(hp1).opcode := A_MOV;
  1091. taicpu(hp1).oppostfix := PF_None;
  1092. taicpu(hp1).loadreg(1, taicpu(p).oper[0]^.reg);
  1093. AllocRegBetween(taicpu(p).oper[0]^.reg, p, hp1, UsedRegs);
  1094. Result := True;
  1095. Exit;
  1096. end;
  1097. end;
  1098. end;
  1099. end;
  1100. function TARMAsmOptimizer.OptPass1STR(var p : tai) : Boolean;
  1101. var
  1102. hp1: tai;
  1103. Reference: TReference;
  1104. SizeMismatch: Boolean;
  1105. SrcReg, DstReg: TRegister;
  1106. NewOp: TAsmOp;
  1107. begin
  1108. Result := False;
  1109. if (taicpu(p).ops <> 2) or (taicpu(p).condition <> C_None) then
  1110. Exit;
  1111. Reference := taicpu(p).oper[1]^.ref^;
  1112. if (Reference.addressmode = AM_OFFSET) and
  1113. not RegInRef(taicpu(p).oper[0]^.reg, Reference) and
  1114. { Delay calling GetNextInstruction for as long as possible }
  1115. GetNextInstruction(p, hp1) and
  1116. (hp1.typ = ait_instruction) and
  1117. (taicpu(hp1).condition = C_None) and
  1118. (taicpu(hp1).oppostfix = taicpu(p).oppostfix) and
  1119. (taicpu(hp1).ops>0) and (taicpu(hp1).oper[0]^.typ=top_reg) then
  1120. begin
  1121. { Saves constant dereferencing and makes it easier to change the size if necessary }
  1122. SrcReg := taicpu(p).oper[0]^.reg;
  1123. DstReg := taicpu(hp1).oper[0]^.reg;
  1124. if (taicpu(hp1).opcode = A_LDR) and
  1125. RefsEqual(taicpu(hp1).oper[1]^.ref^, Reference) and
  1126. (taicpu(hp1).oper[1]^.ref^.volatility=[]) and
  1127. (
  1128. (taicpu(hp1).oppostfix = taicpu(p).oppostfix) or
  1129. ((taicpu(p).oppostfix = PF_B) and (taicpu(hp1).oppostfix = PF_SB)) or
  1130. ((taicpu(p).oppostfix = PF_H) and (taicpu(hp1).oppostfix = PF_SH))
  1131. {$ifdef AARCH64}
  1132. or ((taicpu(p).oppostfix = PF_W) and (taicpu(hp1).oppostfix = PF_SW))
  1133. {$endif AARCH64}
  1134. ) then
  1135. begin
  1136. { With:
  1137. str reg1,[ref]
  1138. ldr reg2,[ref]
  1139. If reg1 = reg2, Remove ldr.
  1140. If reg1 <> reg2, replace ldr with "mov reg2,reg1"
  1141. }
  1142. if (SrcReg = DstReg) and
  1143. { e.g. the ldrb in strb/ldrb is not a null operation as it clears the upper 24 bits }
  1144. (taicpu(p).oppostfix=PF_None) then
  1145. begin
  1146. DebugMsg(SPeepholeOptimization + 'Removed redundant load instruction (store/load -> store/nop)', hp1);
  1147. RemoveInstruction(hp1);
  1148. Result := True;
  1149. Exit;
  1150. end
  1151. else if (getregtype(SrcReg) = R_INTREGISTER) and
  1152. (getregtype(DstReg) = R_INTREGISTER) and
  1153. (getsubreg(SrcReg) = getsubreg(DstReg)) then
  1154. begin
  1155. NewOp:=A_NONE;
  1156. if taicpu(hp1).oppostfix=PF_None then
  1157. NewOp:=A_MOV
  1158. else
  1159. {$ifdef ARM}
  1160. if (current_settings.cputype < cpu_armv6) then
  1161. begin
  1162. { The zero- and sign-extension operations were only
  1163. introduced under ARMv6 }
  1164. case taicpu(hp1).oppostfix of
  1165. PF_B:
  1166. begin
  1167. { The if-block afterwards will set the middle operand to the correct register }
  1168. taicpu(hp1).allocate_oper(3);
  1169. taicpu(hp1).ops := 3;
  1170. taicpu(hp1).loadconst(2, $FF);
  1171. NewOp := A_AND;
  1172. end;
  1173. PF_H:
  1174. { ARMv5 and under doesn't have a concise way of storing the immediate $FFFF, so leave alone };
  1175. PF_SB,
  1176. PF_SH:
  1177. { Do nothing - can't easily encode sign-extensions };
  1178. else
  1179. InternalError(2021043002);
  1180. end;
  1181. end
  1182. else
  1183. {$endif ARM}
  1184. case taicpu(hp1).oppostfix of
  1185. PF_B:
  1186. NewOp := A_UXTB;
  1187. PF_SB:
  1188. NewOp := A_SXTB;
  1189. PF_H:
  1190. NewOp := A_UXTH;
  1191. PF_SH:
  1192. NewOp := A_SXTH;
  1193. {$ifdef AARCH64}
  1194. PF_SW:
  1195. NewOp := A_SXTW;
  1196. PF_W:
  1197. NewOp := A_MOV;
  1198. {$endif AARCH64}
  1199. else
  1200. InternalError(2021043001);
  1201. end;
  1202. if (NewOp<>A_None) then
  1203. begin
  1204. DebugMsg(SPeepholeOptimization + 'Changed ldr' + oppostfix2str[taicpu(hp1).oppostfix] + ' to ' + gas_op2str[NewOp] + ' (store/load -> store/move)', hp1);
  1205. taicpu(hp1).oppostfix := PF_None;
  1206. taicpu(hp1).opcode := NewOp;
  1207. taicpu(hp1).loadreg(1, SrcReg);
  1208. AllocRegBetween(SrcReg, p, hp1, UsedRegs);
  1209. Result := True;
  1210. Exit;
  1211. end;
  1212. end
  1213. end
  1214. else if (taicpu(hp1).opcode = A_STR) and
  1215. RefsEqual(taicpu(hp1).oper[1]^.ref^, Reference) then
  1216. begin
  1217. { With:
  1218. str reg1,[ref]
  1219. str reg2,[ref]
  1220. If reg1 <> reg2, delete the first str
  1221. IF reg1 = reg2, delete the second str
  1222. }
  1223. if (SrcReg = DstReg) and (taicpu(hp1).oper[1]^.ref^.volatility=[]) then
  1224. begin
  1225. DebugMsg(SPeepholeOptimization + 'Removed duplicate store instruction (store/store -> store/nop)', hp1);
  1226. RemoveInstruction(hp1);
  1227. Result := True;
  1228. Exit;
  1229. end
  1230. else if
  1231. { Registers same byte size? }
  1232. (tcgsize2size[reg_cgsize(SrcReg)] = tcgsize2size[reg_cgsize(DstReg)]) and
  1233. (taicpu(p).oper[1]^.ref^.volatility=[]) then
  1234. begin
  1235. DebugMsg(SPeepholeOptimization + 'Removed dominated store instruction (store/store -> nop/store)', p);
  1236. RemoveCurrentP(p, hp1);
  1237. Result := True;
  1238. Exit;
  1239. end;
  1240. end;
  1241. end;
  1242. end;
  1243. function TARMAsmOptimizer.OptPass1And(var p : tai) : Boolean;
  1244. var
  1245. hp1, hp2: tai;
  1246. i: longint;
  1247. begin
  1248. Result:=false;
  1249. {
  1250. optimize
  1251. and reg2,reg1,const1
  1252. ...
  1253. }
  1254. if (taicpu(p).ops>2) and
  1255. (taicpu(p).oper[1]^.typ = top_reg) and
  1256. (taicpu(p).oper[2]^.typ = top_const) then
  1257. begin
  1258. {
  1259. change
  1260. and reg2,reg1,const1
  1261. ...
  1262. and reg3,reg2,const2
  1263. to
  1264. and reg3,reg1,(const1 and const2)
  1265. }
  1266. if GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1267. MatchInstruction(hp1, A_AND, [taicpu(p).condition], [PF_None]) and
  1268. RegEndOfLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  1269. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1270. (taicpu(hp1).oper[2]^.typ = top_const)
  1271. {$ifdef AARCH64}
  1272. and ((((getsubreg(taicpu(p).oper[0]^.reg)=R_SUBQ) and is_shifter_const(taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val,OS_64)) or
  1273. ((getsubreg(taicpu(p).oper[0]^.reg)=R_SUBL) and is_shifter_const(taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val,OS_32))
  1274. ) or
  1275. ((taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val)=0))
  1276. {$endif AARCH64}
  1277. then
  1278. begin
  1279. if not(RegUsedBetween(taicpu(hp1).oper[0]^.reg,p,hp1)) then
  1280. begin
  1281. DebugMsg('Peephole AndAnd2And done', p);
  1282. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  1283. if (taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val)=0 then
  1284. begin
  1285. DebugMsg('Peephole AndAnd2Mov0 1 done', p);
  1286. taicpu(p).opcode:=A_MOV;
  1287. taicpu(p).ops:=2;
  1288. taicpu(p).loadConst(1,0);
  1289. taicpu(p).oppostfix:=taicpu(hp1).oppostfix;
  1290. end
  1291. else
  1292. begin
  1293. DebugMsg('Peephole AndAnd2And 1 done', p);
  1294. taicpu(p).loadConst(2,taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val);
  1295. taicpu(p).oppostfix:=taicpu(hp1).oppostfix;
  1296. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  1297. end;
  1298. asml.remove(hp1);
  1299. hp1.free;
  1300. Result:=true;
  1301. exit;
  1302. end
  1303. else if not(RegUsedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1304. begin
  1305. if (taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val)=0 then
  1306. begin
  1307. DebugMsg('Peephole AndAnd2Mov0 2 done', hp1);
  1308. taicpu(hp1).opcode:=A_MOV;
  1309. taicpu(hp1).loadConst(1,0);
  1310. taicpu(hp1).ops:=2;
  1311. taicpu(hp1).oppostfix:=taicpu(p).oppostfix;
  1312. end
  1313. else
  1314. begin
  1315. DebugMsg('Peephole AndAnd2And 2 done', hp1);
  1316. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  1317. taicpu(hp1).loadConst(2,taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val);
  1318. taicpu(hp1).oppostfix:=taicpu(p).oppostfix;
  1319. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  1320. end;
  1321. GetNextInstruction(p, hp1);
  1322. RemoveCurrentP(p);
  1323. p:=hp1;
  1324. Result:=true;
  1325. exit;
  1326. end;
  1327. end
  1328. {
  1329. change
  1330. and reg2,reg1,$xxxxxxFF
  1331. strb reg2,[...]
  1332. dealloc reg2
  1333. to
  1334. strb reg1,[...]
  1335. }
  1336. else if ((taicpu(p).oper[2]^.val and $FF) = $FF) and
  1337. MatchInstruction(p, A_AND, [C_None], [PF_None]) and
  1338. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1339. MatchInstruction(hp1, A_STR, [C_None], [PF_B]) and
  1340. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
  1341. { the reference in strb might not use reg2 }
  1342. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  1343. { reg1 might not be modified inbetween }
  1344. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1345. begin
  1346. DebugMsg('Peephole AndStrb2Strb done', p);
  1347. {$ifdef AARCH64}
  1348. taicpu(hp1).loadReg(0,newreg(R_INTREGISTER,getsupreg(taicpu(p).oper[1]^.reg),R_SUBD));
  1349. {$else AARCH64}
  1350. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  1351. {$endif AARCH64}
  1352. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  1353. RemoveCurrentP(p);
  1354. result:=true;
  1355. exit;
  1356. end
  1357. {
  1358. change
  1359. and reg2,reg1,255
  1360. uxtb/uxth reg3,reg2
  1361. dealloc reg2
  1362. to
  1363. and reg3,reg1,x
  1364. }
  1365. else if MatchInstruction(p, A_AND, [C_None], [PF_None]) and
  1366. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1367. ((((taicpu(p).oper[2]^.val and $ffffff00)=0) and MatchInstruction(hp1, A_UXTB, [C_None], [PF_None])) or
  1368. (((taicpu(p).oper[2]^.val and $ffff0000)=0) and MatchInstruction(hp1, A_UXTH, [C_None], [PF_None]))) and
  1369. (taicpu(hp1).ops = 2) and
  1370. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  1371. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1372. { reg1 might not be modified inbetween }
  1373. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1374. begin
  1375. DebugMsg('Peephole AndUxt2And done', p);
  1376. taicpu(hp1).opcode:=A_AND;
  1377. taicpu(hp1).ops:=3;
  1378. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  1379. taicpu(hp1).loadconst(2,taicpu(p).oper[2]^.val);
  1380. GetNextInstruction(p,hp1);
  1381. asml.remove(p);
  1382. p.Free;
  1383. p:=hp1;
  1384. result:=true;
  1385. exit;
  1386. end
  1387. else if ((taicpu(p).oper[2]^.val and $ffffff80)=0) and
  1388. MatchInstruction(p, A_AND, [C_None], [PF_None]) and
  1389. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1390. MatchInstruction(hp1, [A_SXTB,A_SXTH], [C_None], [PF_None]) and
  1391. (taicpu(hp1).ops = 2) and
  1392. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  1393. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1394. { reg1 might not be modified inbetween }
  1395. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1396. begin
  1397. DebugMsg('Peephole AndSxt2And done', p);
  1398. taicpu(hp1).opcode:=A_AND;
  1399. taicpu(hp1).ops:=3;
  1400. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  1401. taicpu(hp1).loadconst(2,taicpu(p).oper[2]^.val);
  1402. GetNextInstruction(p,hp1);
  1403. asml.remove(p);
  1404. p.Free;
  1405. p:=hp1;
  1406. result:=true;
  1407. exit;
  1408. end
  1409. {
  1410. from
  1411. and reg1,reg0,2^n-1
  1412. mov reg2,reg1, lsl imm1
  1413. (mov reg3,reg2, lsr/asr imm1)
  1414. remove either the and or the lsl/xsr sequence if possible
  1415. }
  1416. else if (taicpu(p).oper[2]^.val < high(int64)) and
  1417. cutils.ispowerof2(taicpu(p).oper[2]^.val+1,i) and
  1418. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1419. MatchInstruction(hp1, A_MOV, [taicpu(p).condition], [PF_None]) and
  1420. (taicpu(hp1).ops=3) and
  1421. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1422. (taicpu(hp1).oper[2]^.typ = top_shifterop) and
  1423. {$ifdef ARM}
  1424. (taicpu(hp1).oper[2]^.shifterop^.rs = NR_NO) and
  1425. {$endif ARM}
  1426. (taicpu(hp1).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  1427. RegEndOfLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) then
  1428. begin
  1429. {
  1430. and reg1,reg0,2^n-1
  1431. mov reg2,reg1, lsl imm1
  1432. mov reg3,reg2, lsr/asr imm1
  1433. =>
  1434. and reg1,reg0,2^n-1
  1435. if lsr and 2^n-1>=imm1 or asr and 2^n-1>imm1
  1436. }
  1437. if GetNextInstructionUsingReg(hp1,hp2,taicpu(p).oper[0]^.reg) and
  1438. MatchInstruction(hp2, A_MOV, [taicpu(p).condition], [PF_None]) and
  1439. (taicpu(hp2).ops=3) and
  1440. MatchOperand(taicpu(hp2).oper[1]^, taicpu(hp1).oper[0]^.reg) and
  1441. (taicpu(hp2).oper[2]^.typ = top_shifterop) and
  1442. {$ifdef ARM}
  1443. (taicpu(hp2).oper[2]^.shifterop^.rs = NR_NO) and
  1444. {$endif ARM}
  1445. (taicpu(hp2).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and
  1446. (taicpu(hp1).oper[2]^.shifterop^.shiftimm=taicpu(hp2).oper[2]^.shifterop^.shiftimm) and
  1447. RegEndOfLife(taicpu(hp1).oper[0]^.reg,taicpu(hp2)) and
  1448. ((i<32-taicpu(hp1).oper[2]^.shifterop^.shiftimm) or
  1449. ((i=32-taicpu(hp1).oper[2]^.shifterop^.shiftimm) and
  1450. (taicpu(hp2).oper[2]^.shifterop^.shiftmode=SM_LSR))) then
  1451. begin
  1452. DebugMsg('Peephole AndLslXsr2And done', p);
  1453. taicpu(p).oper[0]^.reg:=taicpu(hp2).oper[0]^.reg;
  1454. asml.Remove(hp1);
  1455. asml.Remove(hp2);
  1456. hp1.free;
  1457. hp2.free;
  1458. result:=true;
  1459. exit;
  1460. end
  1461. {
  1462. and reg1,reg0,2^n-1
  1463. mov reg2,reg1, lsl imm1
  1464. =>
  1465. mov reg2,reg0, lsl imm1
  1466. if imm1>i
  1467. }
  1468. else if (i>32-taicpu(hp1).oper[2]^.shifterop^.shiftimm) and
  1469. not(RegModifiedBetween(taicpu(p).oper[1]^.reg, p, hp1)) then
  1470. begin
  1471. DebugMsg('Peephole AndLsl2Lsl done', p);
  1472. taicpu(hp1).oper[1]^.reg:=taicpu(p).oper[1]^.reg;
  1473. GetNextInstruction(p, hp1);
  1474. asml.Remove(p);
  1475. p.free;
  1476. p:=hp1;
  1477. result:=true;
  1478. exit;
  1479. end
  1480. end;
  1481. end;
  1482. {
  1483. change
  1484. and reg1, ...
  1485. mov reg2, reg1
  1486. to
  1487. and reg2, ...
  1488. }
  1489. if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  1490. (taicpu(p).ops>=3) and
  1491. RemoveSuperfluousMove(p, hp1, 'DataMov2Data') then
  1492. Result:=true;
  1493. end;
  1494. function TARMAsmOptimizer.OptPass2AND(var p: tai): Boolean;
  1495. var
  1496. hp1, hp2: tai;
  1497. WorkingReg: TRegister;
  1498. begin
  1499. Result := False;
  1500. {
  1501. change
  1502. and reg1, ...
  1503. ...
  1504. cmp reg1, #0
  1505. b<ne/eq> @Lbl
  1506. to
  1507. ands reg1, ...
  1508. Also:
  1509. and reg1, ...
  1510. ...
  1511. cmp reg1, #0
  1512. (reg1 end of life)
  1513. b<ne/eq> @Lbl
  1514. to
  1515. tst reg1, ...
  1516. }
  1517. if (taicpu(p).condition = C_None) and
  1518. (taicpu(p).ops>=3) and
  1519. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  1520. MatchInstruction(hp1, A_CMP, [C_None], [PF_None]) and
  1521. MatchOperand(taicpu(hp1).oper[1]^, 0) and
  1522. {$ifdef AARCH64}
  1523. (SuperRegistersEqual(taicpu(hp1).oper[0]^.reg, taicpu(p).oper[0]^.reg)) and
  1524. (
  1525. (getsubreg(taicpu(hp1).oper[0]^.reg) = getsubreg(taicpu(p).oper[0]^.reg))
  1526. or
  1527. (
  1528. (taicpu(p).oper[2]^.typ = top_const) and
  1529. (taicpu(p).oper[2]^.val >= 0) and
  1530. (taicpu(p).oper[2]^.val <= $FFFFFFFF)
  1531. )
  1532. ) and
  1533. {$else AARCH64}
  1534. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[0]^.reg) and
  1535. {$endif AARCH64}
  1536. not RegModifiedBetween(NR_DEFAULTFLAGS, p, hp1) and
  1537. GetNextInstruction(hp1, hp2) and
  1538. MatchInstruction(hp2, A_B, [C_EQ, C_NE], [PF_None]) then
  1539. begin
  1540. AllocRegBetween(NR_DEFAULTFLAGS, p, hp1, UsedRegs);
  1541. WorkingReg := taicpu(p).oper[0]^.reg;
  1542. if RegEndOfLife(WorkingReg, taicpu(hp1)) then
  1543. begin
  1544. taicpu(p).opcode := A_TST;
  1545. taicpu(p).oppostfix := PF_None;
  1546. taicpu(p).loadreg(0, taicpu(p).oper[1]^.reg);
  1547. taicpu(p).loadoper(1, taicpu(p).oper[2]^);
  1548. taicpu(p).ops := 2;
  1549. DebugMsg(SPeepholeOptimization + 'AND; CMP -> TST', p);
  1550. end
  1551. else
  1552. begin
  1553. taicpu(p).oppostfix := PF_S;
  1554. DebugMsg(SPeepholeOptimization + 'AND; CMP -> ANDS', p);
  1555. end;
  1556. RemoveInstruction(hp1);
  1557. { If a temporary register was used for and/cmp before, we might be
  1558. able to deallocate the register so it can be used for other
  1559. optimisations later }
  1560. if (taicpu(p).opcode = A_TST) and TryRemoveRegAlloc(WorkingReg, p, p) then
  1561. ExcludeRegFromUsedRegs(WorkingReg, UsedRegs);
  1562. Result := True;
  1563. Exit;
  1564. end;
  1565. {
  1566. change
  1567. and reg1, ...
  1568. mov reg2, reg1
  1569. to
  1570. and reg2, ...
  1571. }
  1572. if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  1573. (taicpu(p).ops>=3) and
  1574. RemoveSuperfluousMove(p, hp1, 'DataMov2Data') then
  1575. Result:=true;
  1576. end;
  1577. end.