aoptarm.pas 62 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593
  1. {
  2. Copyright (c) 1998-2020 by Jonas Maebe and Florian Klaempfl, members of the Free Pascal
  3. Development Team
  4. This unit implements an ARM optimizer object used commonly for ARM and AAarch64
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. Unit aoptarm;
  19. {$i fpcdefs.inc}
  20. { $define DEBUG_PREREGSCHEDULER}
  21. {$ifdef EXTDEBUG}
  22. {$define DEBUG_AOPTCPU}
  23. {$endif EXTDEBUG}
  24. Interface
  25. uses
  26. cgbase, cgutils, cpubase, aasmtai, aasmcpu,aopt, aoptobj;
  27. Type
  28. { while ARM and AAarch64 look not very similar at a first glance,
  29. several optimizations can be shared between both }
  30. TARMAsmOptimizer = class(TAsmOptimizer)
  31. procedure DebugMsg(const s : string; p : tai);
  32. function RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string): boolean;
  33. function RedundantMovProcess(var p: tai; var hp1: tai): boolean;
  34. function GetNextInstructionUsingReg(Current: tai; out Next: tai; const reg: TRegister): Boolean;
  35. function OptPreSBFXUBFX(var p: tai): Boolean;
  36. function OptPass1UXTB(var p: tai): Boolean;
  37. function OptPass1UXTH(var p: tai): Boolean;
  38. function OptPass1SXTB(var p: tai): Boolean;
  39. function OptPass1SXTH(var p: tai): Boolean;
  40. function OptPass1LDR(var p: tai): Boolean; virtual;
  41. function OptPass1STR(var p: tai): Boolean; virtual;
  42. function OptPass1And(var p: tai): Boolean; virtual;
  43. End;
  44. function MatchInstruction(const instr: tai; const op: TCommonAsmOps; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  45. function MatchInstruction(const instr: tai; const op: TAsmOp; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  46. {$ifdef AARCH64}
  47. function MatchInstruction(const instr: tai; const ops : array of TAsmOp; const postfix: TOpPostfixes): boolean;
  48. {$endif AARCH64}
  49. function MatchInstruction(const instr: tai; const op: TAsmOp; const postfix: TOpPostfixes): boolean;
  50. function RefsEqual(const r1, r2: treference): boolean;
  51. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  52. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
  53. Implementation
  54. uses
  55. cutils,verbose,globtype,globals,
  56. systems,
  57. cpuinfo,
  58. cgobj,procinfo,
  59. aasmbase,aasmdata,itcpugas;
  60. {$ifdef DEBUG_AOPTCPU}
  61. const
  62. SPeepholeOptimization: shortstring = 'Peephole Optimization: ';
  63. procedure TARMAsmOptimizer.DebugMsg(const s: string;p : tai);
  64. begin
  65. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  66. end;
  67. {$else DEBUG_AOPTCPU}
  68. { Empty strings help the optimizer to remove string concatenations that won't
  69. ever appear to the user on release builds. [Kit] }
  70. const
  71. SPeepholeOptimization = '';
  72. procedure TARMAsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  73. begin
  74. end;
  75. {$endif DEBUG_AOPTCPU}
  76. function MatchInstruction(const instr: tai; const op: TCommonAsmOps; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  77. begin
  78. result :=
  79. (instr.typ = ait_instruction) and
  80. ((op = []) or ((taicpu(instr).opcode<=LastCommonAsmOp) and (taicpu(instr).opcode in op))) and
  81. ((cond = []) or (taicpu(instr).condition in cond)) and
  82. ((postfix = []) or (taicpu(instr).oppostfix in postfix));
  83. end;
  84. function MatchInstruction(const instr: tai; const op: TAsmOp; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  85. begin
  86. result :=
  87. (instr.typ = ait_instruction) and
  88. (taicpu(instr).opcode = op) and
  89. ((cond = []) or (taicpu(instr).condition in cond)) and
  90. ((postfix = []) or (taicpu(instr).oppostfix in postfix));
  91. end;
  92. {$ifdef AARCH64}
  93. function MatchInstruction(const instr: tai; const ops : array of TAsmOp; const postfix: TOpPostfixes): boolean;
  94. var
  95. op : TAsmOp;
  96. begin
  97. result:=false;
  98. if instr.typ <> ait_instruction then
  99. exit;
  100. for op in ops do
  101. begin
  102. if (taicpu(instr).opcode = op) and
  103. ((postfix = []) or (taicpu(instr).oppostfix in postfix)) then
  104. begin
  105. result:=true;
  106. exit;
  107. end;
  108. end;
  109. end;
  110. {$endif AARCH64}
  111. function MatchInstruction(const instr: tai; const op: TAsmOp; const postfix: TOpPostfixes): boolean;
  112. begin
  113. result :=
  114. (instr.typ = ait_instruction) and
  115. (taicpu(instr).opcode = op) and
  116. ((postfix = []) or (taicpu(instr).oppostfix in postfix));
  117. end;
  118. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  119. begin
  120. result := (oper.typ = top_reg) and (oper.reg = reg);
  121. end;
  122. function RefsEqual(const r1, r2: treference): boolean;
  123. begin
  124. refsequal :=
  125. (r1.offset = r2.offset) and
  126. (r1.base = r2.base) and
  127. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  128. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  129. (r1.relsymbol = r2.relsymbol) and
  130. {$ifdef ARM}
  131. (r1.signindex = r2.signindex) and
  132. {$endif ARM}
  133. (r1.shiftimm = r2.shiftimm) and
  134. (r1.addressmode = r2.addressmode) and
  135. (r1.shiftmode = r2.shiftmode) and
  136. (r1.volatility=[]) and
  137. (r2.volatility=[]);
  138. end;
  139. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
  140. begin
  141. result := oper1.typ = oper2.typ;
  142. if result then
  143. case oper1.typ of
  144. top_const:
  145. Result:=oper1.val = oper2.val;
  146. top_reg:
  147. Result:=oper1.reg = oper2.reg;
  148. top_conditioncode:
  149. Result:=oper1.cc = oper2.cc;
  150. top_realconst:
  151. Result:=oper1.val_real = oper2.val_real;
  152. top_ref:
  153. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  154. else Result:=false;
  155. end
  156. end;
  157. function TARMAsmOptimizer.GetNextInstructionUsingReg(Current: tai;
  158. Out Next: tai; const reg: TRegister): Boolean;
  159. var
  160. gniResult: Boolean;
  161. begin
  162. Next:=Current;
  163. Result := False;
  164. repeat
  165. gniResult:=GetNextInstruction(Next,Next);
  166. if gniResult and RegInInstruction(reg,Next) then
  167. { Found something }
  168. Exit(True);
  169. until not gniResult or
  170. not(cs_opt_level3 in current_settings.optimizerswitches) or
  171. (Next.typ<>ait_instruction) or
  172. is_calljmp(taicpu(Next).opcode)
  173. {$ifdef ARM}
  174. or RegModifiedByInstruction(NR_PC,Next)
  175. {$endif ARM}
  176. ;
  177. end;
  178. function TARMAsmOptimizer.RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string):boolean;
  179. var
  180. alloc,
  181. dealloc : tai_regalloc;
  182. hp1 : tai;
  183. begin
  184. Result:=false;
  185. if MatchInstruction(movp, A_MOV, [taicpu(p).condition], [PF_None]) and
  186. { We can't optimize if there is a shiftop }
  187. (taicpu(movp).ops=2) and
  188. MatchOperand(taicpu(movp).oper[1]^, taicpu(p).oper[0]^.reg) and
  189. { don't mess with moves to fp }
  190. (taicpu(movp).oper[0]^.reg<>current_procinfo.framepointer) and
  191. { the destination register of the mov might not be used beween p and movp }
  192. not(RegUsedBetween(taicpu(movp).oper[0]^.reg,p,movp)) and
  193. {$ifdef ARM}
  194. { PC should be changed only by moves }
  195. (taicpu(movp).oper[0]^.reg<>NR_PC) and
  196. { cb[n]z are thumb instructions which require specific registers, with no wide forms }
  197. (taicpu(p).opcode<>A_CBZ) and
  198. (taicpu(p).opcode<>A_CBNZ) and
  199. { There is a special requirement for MUL and MLA, oper[0] and oper[1] are not allowed to be the same }
  200. not (
  201. (taicpu(p).opcode in [A_MLA, A_MUL]) and
  202. (taicpu(p).oper[1]^.reg = taicpu(movp).oper[0]^.reg) and
  203. (current_settings.cputype < cpu_armv6)
  204. ) and
  205. {$endif ARM}
  206. { Take care to only do this for instructions which REALLY load to the first register.
  207. Otherwise
  208. str reg0, [reg1]
  209. mov reg2, reg0
  210. will be optimized to
  211. str reg2, [reg1]
  212. }
  213. RegLoadedWithNewValue(taicpu(p).oper[0]^.reg, p) then
  214. begin
  215. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(movp.Next));
  216. if assigned(dealloc) then
  217. begin
  218. DebugMsg('Peephole '+optimizer+' removed superfluous mov', movp);
  219. result:=true;
  220. { taicpu(p).oper[0]^.reg is not used anymore, try to find its allocation
  221. and remove it if possible }
  222. asml.Remove(dealloc);
  223. alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.previous));
  224. if assigned(alloc) then
  225. begin
  226. asml.Remove(alloc);
  227. alloc.free;
  228. dealloc.free;
  229. end
  230. else
  231. asml.InsertAfter(dealloc,p);
  232. { try to move the allocation of the target register }
  233. GetLastInstruction(movp,hp1);
  234. alloc:=FindRegAlloc(taicpu(movp).oper[0]^.reg,tai(hp1.Next));
  235. if assigned(alloc) then
  236. begin
  237. asml.Remove(alloc);
  238. asml.InsertBefore(alloc,p);
  239. { adjust used regs }
  240. IncludeRegInUsedRegs(taicpu(movp).oper[0]^.reg,UsedRegs);
  241. end;
  242. { finally get rid of the mov }
  243. taicpu(p).loadreg(0,taicpu(movp).oper[0]^.reg);
  244. { Remove preindexing and postindexing for LDR in some cases.
  245. For example:
  246. ldr reg2,[reg1, xxx]!
  247. mov reg1,reg2
  248. must be translated to:
  249. ldr reg1,[reg1, xxx]
  250. Preindexing must be removed there, since the same register is used as the base and as the target.
  251. Such case is not allowed for ARM CPU and produces crash. }
  252. if (taicpu(p).opcode = A_LDR) and (taicpu(p).oper[1]^.typ = top_ref)
  253. and (taicpu(movp).oper[0]^.reg = taicpu(p).oper[1]^.ref^.base)
  254. then
  255. taicpu(p).oper[1]^.ref^.addressmode:=AM_OFFSET;
  256. asml.remove(movp);
  257. movp.free;
  258. end;
  259. end;
  260. end;
  261. function TARMAsmOptimizer.RedundantMovProcess(var p: tai; var hp1: tai):boolean;
  262. var
  263. I: Integer;
  264. current_hp, next_hp: tai;
  265. LDRChange: Boolean;
  266. begin
  267. Result:=false;
  268. {
  269. change
  270. mov r1, r0
  271. add r1, r1, #1
  272. to
  273. add r1, r0, #1
  274. Todo: Make it work for mov+cmp too
  275. CAUTION! If this one is successful p might not be a mov instruction anymore!
  276. }
  277. if (taicpu(p).ops = 2) and
  278. (taicpu(p).oper[1]^.typ = top_reg) and
  279. (taicpu(p).oppostfix = PF_NONE) then
  280. begin
  281. if
  282. MatchInstruction(hp1, [A_ADD, A_ADC,
  283. {$ifdef ARM}
  284. A_RSB, A_RSC,
  285. {$endif ARM}
  286. A_SUB, A_SBC,
  287. A_AND, A_BIC, A_EOR, A_ORR, A_MOV, A_MVN],
  288. [taicpu(p).condition], []) and
  289. { MOV and MVN might only have 2 ops }
  290. (taicpu(hp1).ops >= 2) and
  291. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^.reg) and
  292. (taicpu(hp1).oper[1]^.typ = top_reg) and
  293. (
  294. (taicpu(hp1).ops = 2) or
  295. (taicpu(hp1).oper[2]^.typ in [top_reg, top_const, top_shifterop])
  296. ) and
  297. {$ifdef AARCH64}
  298. (taicpu(p).oper[1]^.reg<>NR_SP) and
  299. { in this case you have to transform it to movk or the like }
  300. (getsupreg(taicpu(p).oper[1]^.reg)<>RS_XZR) and
  301. {$endif AARCH64}
  302. not(RegUsedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  303. begin
  304. { When we get here we still don't know if the registers match }
  305. for I:=1 to 2 do
  306. {
  307. If the first loop was successful p will be replaced with hp1.
  308. The checks will still be ok, because all required information
  309. will also be in hp1 then.
  310. }
  311. if (taicpu(hp1).ops > I) and
  312. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[I]^.reg)
  313. {$ifdef ARM}
  314. { prevent certain combinations on thumb(2), this is only a safe approximation }
  315. and (not(GenerateThumbCode or GenerateThumb2Code) or
  316. ((getsupreg(taicpu(p).oper[1]^.reg)<>RS_R13) and
  317. (getsupreg(taicpu(p).oper[1]^.reg)<>RS_R15)))
  318. {$endif ARM}
  319. then
  320. begin
  321. DebugMsg('Peephole RedundantMovProcess done', hp1);
  322. taicpu(hp1).oper[I]^.reg := taicpu(p).oper[1]^.reg;
  323. if p<>hp1 then
  324. begin
  325. asml.remove(p);
  326. p.free;
  327. p:=hp1;
  328. Result:=true;
  329. end;
  330. end;
  331. if Result then Exit;
  332. end
  333. { Change: Change:
  334. mov r1, r0 mov r1, r0
  335. ... ...
  336. ldr/str r2, [r1, etc.] mov r2, r1
  337. To: To:
  338. ldr/str r2, [r0, etc.] mov r2, r0
  339. }
  340. else if (taicpu(p).condition = C_None) and (taicpu(p).oper[1]^.typ = top_reg)
  341. {$ifdef ARM}
  342. and not (getsupreg(taicpu(p).oper[0]^.reg) in [RS_PC, RS_R14, RS_STACK_POINTER_REG])
  343. and (getsupreg(taicpu(p).oper[1]^.reg) <> RS_PC)
  344. { Thumb does not support references with base and index one being SP }
  345. and (not(GenerateThumbCode) or (getsupreg(taicpu(p).oper[1]^.reg) <> RS_STACK_POINTER_REG))
  346. {$endif ARM}
  347. {$ifdef AARCH64}
  348. and (getsupreg(taicpu(p).oper[0]^.reg) <> RS_STACK_POINTER_REG)
  349. {$endif AARCH64}
  350. then
  351. begin
  352. current_hp := p;
  353. TransferUsedRegs(TmpUsedRegs);
  354. { Search local instruction block }
  355. while GetNextInstruction(current_hp, next_hp) and (next_hp <> BlockEnd) and (next_hp.typ = ait_instruction) do
  356. begin
  357. UpdateUsedRegs(TmpUsedRegs, tai(current_hp.Next));
  358. LDRChange := False;
  359. if (taicpu(next_hp).opcode in [A_LDR,A_STR]) and (taicpu(next_hp).ops = 2)
  360. {$ifdef AARCH64}
  361. { If r0 is the zero register, then this sequence of instructions will cause
  362. an access violation, but that's better than an assembler error caused by
  363. changing r0 to xzr inside the reference (Where it's illegal). [Kit] }
  364. and (getsupreg(taicpu(p).oper[1]^.reg) <> RS_XZR)
  365. {$endif AARCH64}
  366. then
  367. begin
  368. { Change the registers from r1 to r0 }
  369. if (taicpu(next_hp).oper[1]^.ref^.base = taicpu(p).oper[0]^.reg) and
  370. {$ifdef ARM}
  371. { This optimisation conflicts with something and raises
  372. an access violation - needs further investigation. [Kit] }
  373. (taicpu(next_hp).opcode <> A_LDR) and
  374. {$endif ARM}
  375. { Don't mess around with the base register if the
  376. reference is pre- or post-indexed }
  377. (taicpu(next_hp).oper[1]^.ref^.addressmode = AM_OFFSET) then
  378. begin
  379. taicpu(next_hp).oper[1]^.ref^.base := taicpu(p).oper[1]^.reg;
  380. LDRChange := True;
  381. end;
  382. if taicpu(next_hp).oper[1]^.ref^.index = taicpu(p).oper[0]^.reg then
  383. begin
  384. taicpu(next_hp).oper[1]^.ref^.index := taicpu(p).oper[1]^.reg;
  385. LDRChange := True;
  386. end;
  387. if LDRChange then
  388. DebugMsg('Peephole Optimization: ' + std_regname(taicpu(p).oper[0]^.reg) + ' = ' + std_regname(taicpu(p).oper[1]^.reg) + ' (MovLdr2Ldr 1)', next_hp);
  389. { Drop out if we're dealing with pre-indexed references }
  390. if (taicpu(next_hp).oper[1]^.ref^.addressmode = AM_PREINDEXED) and
  391. (
  392. RegInRef(taicpu(p).oper[0]^.reg, taicpu(next_hp).oper[1]^.ref^) or
  393. RegInRef(taicpu(p).oper[1]^.reg, taicpu(next_hp).oper[1]^.ref^)
  394. ) then
  395. begin
  396. { Remember to update register allocations }
  397. if LDRChange then
  398. AllocRegBetween(taicpu(p).oper[1]^.reg, p, next_hp, UsedRegs);
  399. Break;
  400. end;
  401. { The register being stored can be potentially changed (as long as it's not the stack pointer) }
  402. if (taicpu(next_hp).opcode = A_STR) and (getsupreg(taicpu(p).oper[1]^.reg) <> RS_STACK_POINTER_REG) and
  403. MatchOperand(taicpu(next_hp).oper[0]^, taicpu(p).oper[0]^.reg) then
  404. begin
  405. DebugMsg('Peephole Optimization: ' + std_regname(taicpu(p).oper[0]^.reg) + ' = ' + std_regname(taicpu(p).oper[1]^.reg) + ' (MovLdr2Ldr 2)', next_hp);
  406. taicpu(next_hp).oper[0]^.reg := taicpu(p).oper[1]^.reg;
  407. LDRChange := True;
  408. end;
  409. if LDRChange and (getsupreg(taicpu(p).oper[1]^.reg) <> RS_STACK_POINTER_REG) then
  410. begin
  411. AllocRegBetween(taicpu(p).oper[1]^.reg, p, next_hp, UsedRegs);
  412. if (taicpu(p).oppostfix = PF_None) and
  413. (
  414. (
  415. (taicpu(next_hp).opcode = A_LDR) and
  416. MatchOperand(taicpu(next_hp).oper[0]^, taicpu(p).oper[0]^.reg)
  417. ) or
  418. not RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, next_hp, TmpUsedRegs)
  419. ) and
  420. { Double-check to see if the old registers were actually
  421. changed (e.g. if the super registers matched, but not
  422. the sizes, they won't be changed). }
  423. (
  424. (taicpu(next_hp).opcode = A_LDR) or
  425. not RegInOp(taicpu(p).oper[0]^.reg, taicpu(next_hp).oper[0]^)
  426. ) and
  427. not RegInRef(taicpu(p).oper[0]^.reg, taicpu(next_hp).oper[1]^.ref^) then
  428. begin
  429. DebugMsg('Peephole Optimization: RedundantMovProcess 2a done', p);
  430. RemoveCurrentP(p);
  431. Result := True;
  432. Exit;
  433. end;
  434. end;
  435. end
  436. else if (taicpu(next_hp).opcode = A_MOV) and (taicpu(next_hp).oppostfix = PF_None) and
  437. (taicpu(next_hp).ops = 2) then
  438. begin
  439. if MatchOperand(taicpu(next_hp).oper[0]^, taicpu(p).oper[0]^.reg) then
  440. begin
  441. { Found another mov that writes entirely to the register }
  442. if RegUsedBetween(taicpu(p).oper[0]^.reg, p, next_hp) then
  443. begin
  444. { Register was used beforehand }
  445. if MatchOperand(taicpu(next_hp).oper[1]^, taicpu(p).oper[1]^.reg) then
  446. begin
  447. { This MOV is exactly the same as the first one.
  448. Since none of the registers have changed value
  449. at this point, we can remove it. }
  450. DebugMsg(SPeepholeOptimization + 'RedundantMovProcess 3a done', next_hp);
  451. if (next_hp = hp1) then
  452. { Don't let hp1 become a dangling pointer }
  453. hp1 := nil;
  454. asml.Remove(next_hp);
  455. next_hp.Free;
  456. { We still have the original p, so we can continue optimising;
  457. if it was -O2 or below, this instruction appeared immediately
  458. after the first MOV, so we're technically not looking more
  459. than one instruction ahead after it's removed! [Kit] }
  460. Continue;
  461. end
  462. else
  463. { Register changes value - drop out }
  464. Break;
  465. end;
  466. { We can delete the first MOV (only if the second MOV is unconditional) }
  467. {$ifdef ARM}
  468. if (taicpu(p).oppostfix = PF_None) and
  469. (taicpu(next_hp).condition = C_None) then
  470. {$endif ARM}
  471. begin
  472. DebugMsg('Peephole Optimization: RedundantMovProcess 2b done', p);
  473. RemoveCurrentP(p);
  474. Result := True;
  475. end;
  476. Exit;
  477. end
  478. else if MatchOperand(taicpu(next_hp).oper[1]^, taicpu(p).oper[0]^.reg) then
  479. begin
  480. if MatchOperand(taicpu(next_hp).oper[0]^, taicpu(p).oper[1]^.reg)
  481. { Be careful - if the entire register is not used, removing this
  482. instruction will leave the unused part uninitialised }
  483. {$ifdef AARCH64}
  484. and (getsubreg(taicpu(p).oper[1]^.reg) = R_SUBQ)
  485. {$endif AARCH64}
  486. then
  487. begin
  488. { Instruction will become mov r1,r1 }
  489. DebugMsg(SPeepholeOptimization + 'Mov2None 2 done', next_hp);
  490. { Allocate r1 between the instructions; not doing
  491. so may cause problems when removing superfluous
  492. MOVs later (i38055) }
  493. AllocRegBetween(taicpu(p).oper[1]^.reg, p, next_hp, UsedRegs);
  494. if (next_hp = hp1) then
  495. { Don't let hp1 become a dangling pointer }
  496. hp1 := nil;
  497. asml.Remove(next_hp);
  498. next_hp.Free;
  499. Continue;
  500. end;
  501. { Change the old register (checking the first operand again
  502. forces it to be left alone if the full register is not
  503. used, lest mov w1,w1 gets optimised out by mistake. [Kit] }
  504. {$ifdef AARCH64}
  505. if not MatchOperand(taicpu(next_hp).oper[0]^, taicpu(p).oper[1]^.reg) then
  506. {$endif AARCH64}
  507. begin
  508. DebugMsg(SPeepholeOptimization + std_regname(taicpu(p).oper[0]^.reg) + ' = ' + std_regname(taicpu(p).oper[1]^.reg) + ' (MovMov2Mov 2)', next_hp);
  509. taicpu(next_hp).oper[1]^.reg := taicpu(p).oper[1]^.reg;
  510. AllocRegBetween(taicpu(p).oper[1]^.reg, p, next_hp, UsedRegs);
  511. { If this was the only reference to the old register,
  512. then we can remove the original MOV now }
  513. if (taicpu(p).oppostfix = PF_None) and
  514. { A bit of a hack - sometimes registers aren't tracked properly, so do not
  515. remove if the register was apparently not allocated when its value is
  516. first set at the MOV command (this is especially true for the stack
  517. register). [Kit] }
  518. (getsupreg(taicpu(p).oper[1]^.reg) <> RS_STACK_POINTER_REG) and
  519. RegInUsedRegs(taicpu(p).oper[0]^.reg, UsedRegs) and
  520. not RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, next_hp, TmpUsedRegs) then
  521. begin
  522. DebugMsg(SPeepholeOptimization + 'RedundantMovProcess 2c done', p);
  523. RemoveCurrentP(p);
  524. Result := True;
  525. Exit;
  526. end;
  527. end;
  528. end;
  529. end;
  530. { On low optimisation settions, don't search more than one instruction ahead }
  531. if not(cs_opt_level3 in current_settings.optimizerswitches) or
  532. { Stop at procedure calls and jumps }
  533. is_calljmp(taicpu(next_hp).opcode) or
  534. { If the read register has changed value, or the MOV
  535. destination register has been used, drop out }
  536. RegInInstruction(taicpu(p).oper[0]^.reg, next_hp) or
  537. RegModifiedByInstruction(taicpu(p).oper[1]^.reg, next_hp) then
  538. Break;
  539. current_hp := next_hp;
  540. end;
  541. end;
  542. end;
  543. end;
  544. function TARMAsmOptimizer.OptPass1UXTB(var p : tai) : Boolean;
  545. var
  546. hp1, hp2: tai;
  547. begin
  548. Result:=false;
  549. {
  550. change
  551. uxtb reg2,reg1
  552. strb reg2,[...]
  553. dealloc reg2
  554. to
  555. strb reg1,[...]
  556. }
  557. if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
  558. (taicpu(p).ops=2) and
  559. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  560. MatchInstruction(hp1, A_STR, [C_None], [PF_B]) and
  561. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
  562. { the reference in strb might not use reg2 }
  563. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  564. { reg1 might not be modified inbetween }
  565. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  566. begin
  567. DebugMsg('Peephole UxtbStrb2Strb done', p);
  568. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  569. GetNextInstruction(p,hp2);
  570. asml.remove(p);
  571. p.free;
  572. p:=hp2;
  573. result:=true;
  574. end
  575. {
  576. change
  577. uxtb reg2,reg1
  578. uxth reg3,reg2
  579. dealloc reg2
  580. to
  581. uxtb reg3,reg1
  582. }
  583. else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
  584. (taicpu(p).ops=2) and
  585. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  586. MatchInstruction(hp1, A_UXTH, [C_None], [PF_None]) and
  587. (taicpu(hp1).ops = 2) and
  588. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  589. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  590. { reg1 might not be modified inbetween }
  591. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  592. begin
  593. DebugMsg('Peephole UxtbUxth2Uxtb done', p);
  594. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  595. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  596. asml.remove(hp1);
  597. hp1.free;
  598. result:=true;
  599. end
  600. {
  601. change
  602. uxtb reg2,reg1
  603. uxtb reg3,reg2
  604. dealloc reg2
  605. to
  606. uxtb reg3,reg1
  607. }
  608. else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
  609. (taicpu(p).ops=2) and
  610. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  611. MatchInstruction(hp1, A_UXTB, [C_None], [PF_None]) and
  612. (taicpu(hp1).ops = 2) and
  613. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  614. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  615. { reg1 might not be modified inbetween }
  616. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  617. begin
  618. DebugMsg('Peephole UxtbUxtb2Uxtb done', p);
  619. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  620. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  621. asml.remove(hp1);
  622. hp1.free;
  623. result:=true;
  624. end
  625. {
  626. change
  627. uxtb reg2,reg1
  628. and reg3,reg2,#0x*FF
  629. dealloc reg2
  630. to
  631. uxtb reg3,reg1
  632. }
  633. else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
  634. (taicpu(p).ops=2) and
  635. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  636. MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
  637. (taicpu(hp1).ops=3) and
  638. (taicpu(hp1).oper[2]^.typ=top_const) and
  639. ((taicpu(hp1).oper[2]^.val and $FF)=$FF) and
  640. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  641. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  642. { reg1 might not be modified inbetween }
  643. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  644. begin
  645. DebugMsg('Peephole UxtbAndImm2Uxtb done', p);
  646. taicpu(hp1).opcode:=A_UXTB;
  647. taicpu(hp1).ops:=2;
  648. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  649. GetNextInstruction(p,hp2);
  650. asml.remove(p);
  651. p.free;
  652. p:=hp2;
  653. result:=true;
  654. end
  655. else if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  656. RemoveSuperfluousMove(p, hp1, 'UxtbMov2Uxtb') then
  657. Result:=true;
  658. end;
  659. function TARMAsmOptimizer.OptPass1UXTH(var p : tai) : Boolean;
  660. var
  661. hp1: tai;
  662. begin
  663. Result:=false;
  664. {
  665. change
  666. uxth reg2,reg1
  667. strh reg2,[...]
  668. dealloc reg2
  669. to
  670. strh reg1,[...]
  671. }
  672. if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
  673. (taicpu(p).ops=2) and
  674. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  675. MatchInstruction(hp1, A_STR, [C_None], [PF_H]) and
  676. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  677. { the reference in strb might not use reg2 }
  678. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  679. { reg1 might not be modified inbetween }
  680. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  681. begin
  682. DebugMsg('Peephole UXTHStrh2Strh done', p);
  683. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  684. GetNextInstruction(p, hp1);
  685. asml.remove(p);
  686. p.free;
  687. p:=hp1;
  688. result:=true;
  689. end
  690. {
  691. change
  692. uxth reg2,reg1
  693. uxth reg3,reg2
  694. dealloc reg2
  695. to
  696. uxth reg3,reg1
  697. }
  698. else if MatchInstruction(p, A_UXTH, [C_None], [PF_None]) and
  699. (taicpu(p).ops=2) and
  700. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  701. MatchInstruction(hp1, A_UXTH, [C_None], [PF_None]) and
  702. (taicpu(hp1).ops=2) and
  703. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  704. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  705. { reg1 might not be modified inbetween }
  706. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  707. begin
  708. DebugMsg('Peephole UxthUxth2Uxth done', p);
  709. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  710. taicpu(hp1).opcode:=A_UXTH;
  711. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  712. GetNextInstruction(p, hp1);
  713. asml.remove(p);
  714. p.free;
  715. p:=hp1;
  716. result:=true;
  717. end
  718. {
  719. change
  720. uxth reg2,reg1
  721. and reg3,reg2,#65535
  722. dealloc reg2
  723. to
  724. uxth reg3,reg1
  725. }
  726. else if MatchInstruction(p, A_UXTH, [C_None], [PF_None]) and
  727. (taicpu(p).ops=2) and
  728. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  729. MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
  730. (taicpu(hp1).ops=3) and
  731. (taicpu(hp1).oper[2]^.typ=top_const) and
  732. ((taicpu(hp1).oper[2]^.val and $FFFF)=$FFFF) and
  733. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  734. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  735. { reg1 might not be modified inbetween }
  736. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  737. begin
  738. DebugMsg('Peephole UxthAndImm2Uxth done', p);
  739. taicpu(hp1).opcode:=A_UXTH;
  740. taicpu(hp1).ops:=2;
  741. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  742. GetNextInstruction(p, hp1);
  743. asml.remove(p);
  744. p.free;
  745. p:=hp1;
  746. result:=true;
  747. end
  748. else if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  749. RemoveSuperfluousMove(p, hp1, 'UxthMov2Data') then
  750. Result:=true;
  751. end;
  752. function TARMAsmOptimizer.OptPass1SXTB(var p : tai) : Boolean;
  753. var
  754. hp1, hp2: tai;
  755. begin
  756. Result:=false;
  757. {
  758. change
  759. sxtb reg2,reg1
  760. strb reg2,[...]
  761. dealloc reg2
  762. to
  763. strb reg1,[...]
  764. }
  765. if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
  766. (taicpu(p).ops=2) and
  767. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  768. MatchInstruction(hp1, A_STR, [C_None], [PF_B]) and
  769. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
  770. { the reference in strb might not use reg2 }
  771. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  772. { reg1 might not be modified inbetween }
  773. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  774. begin
  775. DebugMsg('Peephole SxtbStrb2Strb done', p);
  776. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  777. GetNextInstruction(p,hp2);
  778. asml.remove(p);
  779. p.free;
  780. p:=hp2;
  781. result:=true;
  782. end
  783. {
  784. change
  785. sxtb reg2,reg1
  786. sxth reg3,reg2
  787. dealloc reg2
  788. to
  789. sxtb reg3,reg1
  790. }
  791. else if MatchInstruction(p, A_SXTB, [C_None], [PF_None]) and
  792. (taicpu(p).ops=2) and
  793. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  794. MatchInstruction(hp1, A_SXTH, [C_None], [PF_None]) and
  795. (taicpu(hp1).ops = 2) and
  796. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  797. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  798. { reg1 might not be modified inbetween }
  799. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  800. begin
  801. DebugMsg('Peephole SxtbSxth2Sxtb done', p);
  802. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  803. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  804. asml.remove(hp1);
  805. hp1.free;
  806. result:=true;
  807. end
  808. {
  809. change
  810. sxtb reg2,reg1
  811. sxtb reg3,reg2
  812. dealloc reg2
  813. to
  814. uxtb reg3,reg1
  815. }
  816. else if MatchInstruction(p, A_SXTB, [C_None], [PF_None]) and
  817. (taicpu(p).ops=2) and
  818. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  819. MatchInstruction(hp1, A_SXTB, [C_None], [PF_None]) and
  820. (taicpu(hp1).ops = 2) and
  821. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  822. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  823. { reg1 might not be modified inbetween }
  824. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  825. begin
  826. DebugMsg('Peephole SxtbSxtb2Sxtb done', p);
  827. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  828. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  829. asml.remove(hp1);
  830. hp1.free;
  831. result:=true;
  832. end
  833. {
  834. change
  835. sxtb reg2,reg1
  836. and reg3,reg2,#0x*FF
  837. dealloc reg2
  838. to
  839. uxtb reg3,reg1
  840. }
  841. else if MatchInstruction(p, A_SXTB, [C_None], [PF_None]) and
  842. (taicpu(p).ops=2) and
  843. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  844. MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
  845. (taicpu(hp1).ops=3) and
  846. (taicpu(hp1).oper[2]^.typ=top_const) and
  847. ((taicpu(hp1).oper[2]^.val and $FF)=$FF) and
  848. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  849. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  850. { reg1 might not be modified inbetween }
  851. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  852. begin
  853. DebugMsg('Peephole SxtbAndImm2Uxtb done', p);
  854. taicpu(hp1).opcode:=A_UXTB;
  855. taicpu(hp1).ops:=2;
  856. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  857. GetNextInstruction(p,hp2);
  858. asml.remove(p);
  859. p.free;
  860. p:=hp2;
  861. result:=true;
  862. end
  863. else if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  864. RemoveSuperfluousMove(p, hp1, 'SxtbMov2Sxtb') then
  865. Result:=true;
  866. end;
  867. function TARMAsmOptimizer.OptPass1SXTH(var p : tai) : Boolean;
  868. var
  869. hp1: tai;
  870. begin
  871. Result:=false;
  872. {
  873. change
  874. sxth reg2,reg1
  875. strh reg2,[...]
  876. dealloc reg2
  877. to
  878. strh reg1,[...]
  879. }
  880. if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
  881. (taicpu(p).ops=2) and
  882. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  883. MatchInstruction(hp1, A_STR, [C_None], [PF_H]) and
  884. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  885. { the reference in strb might not use reg2 }
  886. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  887. { reg1 might not be modified inbetween }
  888. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  889. begin
  890. DebugMsg('Peephole SxthStrh2Strh done', p);
  891. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  892. GetNextInstruction(p, hp1);
  893. asml.remove(p);
  894. p.free;
  895. p:=hp1;
  896. result:=true;
  897. end
  898. {
  899. change
  900. sxth reg2,reg1
  901. sxth reg3,reg2
  902. dealloc reg2
  903. to
  904. sxth reg3,reg1
  905. }
  906. else if MatchInstruction(p, A_SXTH, [C_None], [PF_None]) and
  907. (taicpu(p).ops=2) and
  908. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  909. MatchInstruction(hp1, A_SXTH, [C_None], [PF_None]) and
  910. (taicpu(hp1).ops=2) and
  911. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  912. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  913. { reg1 might not be modified inbetween }
  914. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  915. begin
  916. DebugMsg('Peephole SxthSxth2Sxth done', p);
  917. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  918. taicpu(hp1).opcode:=A_SXTH;
  919. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  920. GetNextInstruction(p, hp1);
  921. asml.remove(p);
  922. p.free;
  923. p:=hp1;
  924. result:=true;
  925. end
  926. {$ifdef AARCH64}
  927. {
  928. change
  929. sxth reg2,reg1
  930. sxtw reg3,reg2
  931. dealloc reg2
  932. to
  933. sxth reg3,reg1
  934. }
  935. else if MatchInstruction(p, A_SXTH, [C_None], [PF_None]) and
  936. (taicpu(p).ops=2) and
  937. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  938. MatchInstruction(hp1, A_SXTW, [C_None], [PF_None]) and
  939. (taicpu(hp1).ops=2) and
  940. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  941. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  942. { reg1 might not be modified inbetween }
  943. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  944. begin
  945. DebugMsg('Peephole SxthSxtw2Sxth done', p);
  946. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  947. taicpu(hp1).opcode:=A_SXTH;
  948. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  949. GetNextInstruction(p, hp1);
  950. asml.remove(p);
  951. p.free;
  952. p:=hp1;
  953. result:=true;
  954. end
  955. {$endif AARCH64}
  956. {
  957. change
  958. sxth reg2,reg1
  959. and reg3,reg2,#65535
  960. dealloc reg2
  961. to
  962. uxth reg3,reg1
  963. }
  964. else if MatchInstruction(p, A_SXTH, [C_None], [PF_None]) and
  965. (taicpu(p).ops=2) and
  966. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  967. MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
  968. (taicpu(hp1).ops=3) and
  969. (taicpu(hp1).oper[2]^.typ=top_const) and
  970. ((taicpu(hp1).oper[2]^.val and $FFFF)=$FFFF) and
  971. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  972. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  973. { reg1 might not be modified inbetween }
  974. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  975. begin
  976. DebugMsg('Peephole SxthAndImm2Uxth done', p);
  977. taicpu(hp1).opcode:=A_UXTH;
  978. taicpu(hp1).ops:=2;
  979. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  980. GetNextInstruction(p, hp1);
  981. asml.remove(p);
  982. p.free;
  983. p:=hp1;
  984. result:=true;
  985. end
  986. else if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  987. RemoveSuperfluousMove(p, hp1, 'SxthMov2Sxth') then
  988. Result:=true;
  989. end;
  990. function TARMAsmOptimizer.OptPreSBFXUBFX(var p: tai): Boolean;
  991. begin
  992. Result := False;
  993. { Convert:
  994. s/ubfx reg1,reg2,#0,#64 (or #32 for 32-bit registers)
  995. To:
  996. mov reg1,reg2
  997. }
  998. if (taicpu(p).oper[2]^.val = 0) and
  999. {$ifdef AARCH64}
  1000. (
  1001. (
  1002. (getsubreg(taicpu(p).oper[0]^.reg) = R_SUBQ) and
  1003. (taicpu(p).oper[3]^.val = 64)
  1004. ) or
  1005. (
  1006. (getsubreg(taicpu(p).oper[0]^.reg) = R_SUBD) and
  1007. (taicpu(p).oper[3]^.val = 32)
  1008. )
  1009. )
  1010. {$else AARCH64}
  1011. (taicpu(p).oper[3]^.val = 32)
  1012. {$endif AARCH64}
  1013. then
  1014. begin
  1015. DebugMsg(SPeepholeOptimization + 'SBFX or UBFX -> MOV (full bitfield extract)', p);
  1016. taicpu(p).opcode := A_MOV;
  1017. taicpu(p).ops := 2;
  1018. taicpu(p).clearop(2);
  1019. taicpu(p).clearop(3);
  1020. Result := True;
  1021. Exit;
  1022. end;
  1023. end;
  1024. function TARMAsmOptimizer.OptPass1LDR(var p : tai) : Boolean;
  1025. var
  1026. hp1: tai;
  1027. Reference: TReference;
  1028. NewOp: TAsmOp;
  1029. begin
  1030. Result := False;
  1031. if (taicpu(p).ops <> 2) or (taicpu(p).condition <> C_None) then
  1032. Exit;
  1033. Reference := taicpu(p).oper[1]^.ref^;
  1034. if (Reference.addressmode = AM_OFFSET) and
  1035. not RegInRef(taicpu(p).oper[0]^.reg, Reference) and
  1036. { Delay calling GetNextInstruction for as long as possible }
  1037. GetNextInstruction(p, hp1) and
  1038. (hp1.typ = ait_instruction) and
  1039. (taicpu(hp1).condition = C_None) and
  1040. (taicpu(hp1).oppostfix = taicpu(p).oppostfix) then
  1041. begin
  1042. if (taicpu(hp1).opcode = A_STR) and
  1043. RefsEqual(taicpu(hp1).oper[1]^.ref^, Reference) and
  1044. (getregtype(taicpu(p).oper[0]^.reg) = getregtype(taicpu(hp1).oper[0]^.reg)) then
  1045. begin
  1046. { With:
  1047. ldr reg1,[ref]
  1048. str reg2,[ref]
  1049. If reg1 = reg2, Remove str
  1050. }
  1051. if taicpu(p).oper[0]^.reg = taicpu(hp1).oper[0]^.reg then
  1052. begin
  1053. DebugMsg(SPeepholeOptimization + 'Removed redundant store instruction (load/store -> load/nop)', hp1);
  1054. RemoveInstruction(hp1);
  1055. Result := True;
  1056. Exit;
  1057. end;
  1058. end
  1059. else if (taicpu(hp1).opcode = A_LDR) and
  1060. RefsEqual(taicpu(hp1).oper[1]^.ref^, Reference) then
  1061. begin
  1062. { With:
  1063. ldr reg1,[ref]
  1064. ldr reg2,[ref]
  1065. If reg1 = reg2, delete the second ldr
  1066. If reg1 <> reg2, changing the 2nd ldr to a mov might introduce
  1067. a dependency, but it will likely open up new optimisations, so
  1068. do it for now and handle any new dependencies later.
  1069. }
  1070. if taicpu(p).oper[0]^.reg = taicpu(hp1).oper[0]^.reg then
  1071. begin
  1072. DebugMsg(SPeepholeOptimization + 'Removed duplicate load instruction (load/load -> load/nop)', hp1);
  1073. RemoveInstruction(hp1);
  1074. Result := True;
  1075. Exit;
  1076. end
  1077. else if
  1078. (getregtype(taicpu(p).oper[0]^.reg) = R_INTREGISTER) and
  1079. (getregtype(taicpu(hp1).oper[0]^.reg) = R_INTREGISTER) and
  1080. (getsubreg(taicpu(p).oper[0]^.reg) = getsubreg(taicpu(hp1).oper[0]^.reg)) then
  1081. begin
  1082. DebugMsg(SPeepholeOptimization + 'Changed second ldr' + oppostfix2str[taicpu(hp1).oppostfix] + ' to mov (load/load -> load/move)', hp1);
  1083. taicpu(hp1).opcode := A_MOV;
  1084. taicpu(hp1).oppostfix := PF_None;
  1085. taicpu(hp1).loadreg(1, taicpu(p).oper[0]^.reg);
  1086. AllocRegBetween(taicpu(p).oper[0]^.reg, p, hp1, UsedRegs);
  1087. Result := True;
  1088. Exit;
  1089. end;
  1090. end;
  1091. end;
  1092. end;
  1093. function TARMAsmOptimizer.OptPass1STR(var p : tai) : Boolean;
  1094. var
  1095. hp1: tai;
  1096. Reference: TReference;
  1097. SizeMismatch: Boolean;
  1098. SrcReg, DstReg: TRegister;
  1099. NewOp: TAsmOp;
  1100. begin
  1101. Result := False;
  1102. if (taicpu(p).ops <> 2) or (taicpu(p).condition <> C_None) then
  1103. Exit;
  1104. Reference := taicpu(p).oper[1]^.ref^;
  1105. if (Reference.addressmode = AM_OFFSET) and
  1106. not RegInRef(taicpu(p).oper[0]^.reg, Reference) and
  1107. { Delay calling GetNextInstruction for as long as possible }
  1108. GetNextInstruction(p, hp1) and
  1109. (hp1.typ = ait_instruction) and
  1110. (taicpu(hp1).condition = C_None) and
  1111. (taicpu(hp1).oppostfix = taicpu(p).oppostfix) and
  1112. (taicpu(hp1).ops>0) and (taicpu(hp1).oper[0]^.typ=top_reg) then
  1113. begin
  1114. { Saves constant dereferencing and makes it easier to change the size if necessary }
  1115. SrcReg := taicpu(p).oper[0]^.reg;
  1116. DstReg := taicpu(hp1).oper[0]^.reg;
  1117. if (taicpu(hp1).opcode = A_LDR) and
  1118. RefsEqual(taicpu(hp1).oper[1]^.ref^, Reference) and
  1119. (taicpu(hp1).oper[1]^.ref^.volatility=[]) and
  1120. (
  1121. (taicpu(hp1).oppostfix = taicpu(p).oppostfix) or
  1122. ((taicpu(p).oppostfix = PF_B) and (taicpu(hp1).oppostfix = PF_SB)) or
  1123. ((taicpu(p).oppostfix = PF_H) and (taicpu(hp1).oppostfix = PF_SH))
  1124. {$ifdef AARCH64}
  1125. or ((taicpu(p).oppostfix = PF_W) and (taicpu(hp1).oppostfix = PF_SW))
  1126. {$endif AARCH64}
  1127. ) then
  1128. begin
  1129. { With:
  1130. str reg1,[ref]
  1131. ldr reg2,[ref]
  1132. If reg1 = reg2, Remove ldr.
  1133. If reg1 <> reg2, replace ldr with "mov reg2,reg1"
  1134. }
  1135. if (SrcReg = DstReg) and
  1136. { e.g. the ldrb in strb/ldrb is not a null operation as it clears the upper 24 bits }
  1137. (taicpu(p).oppostfix=PF_None) then
  1138. begin
  1139. DebugMsg(SPeepholeOptimization + 'Removed redundant load instruction (store/load -> store/nop)', hp1);
  1140. RemoveInstruction(hp1);
  1141. Result := True;
  1142. Exit;
  1143. end
  1144. else if (getregtype(SrcReg) = R_INTREGISTER) and
  1145. (getregtype(DstReg) = R_INTREGISTER) and
  1146. (getsubreg(SrcReg) = getsubreg(DstReg)) then
  1147. begin
  1148. NewOp:=A_NONE;
  1149. if taicpu(hp1).oppostfix=PF_None then
  1150. NewOp:=A_MOV
  1151. else
  1152. {$ifdef ARM}
  1153. if (current_settings.cputype < cpu_armv6) then
  1154. begin
  1155. { The zero- and sign-extension operations were only
  1156. introduced under ARMv6 }
  1157. case taicpu(hp1).oppostfix of
  1158. PF_B:
  1159. begin
  1160. { The if-block afterwards will set the middle operand to the correct register }
  1161. taicpu(hp1).allocate_oper(3);
  1162. taicpu(hp1).ops := 3;
  1163. taicpu(hp1).loadconst(2, $FF);
  1164. NewOp := A_AND;
  1165. end;
  1166. PF_H:
  1167. { ARMv5 and under doesn't have a concise way of storing the immediate $FFFF, so leave alone };
  1168. PF_SB,
  1169. PF_SH:
  1170. { Do nothing - can't easily encode sign-extensions };
  1171. else
  1172. InternalError(2021043002);
  1173. end;
  1174. end
  1175. else
  1176. {$endif ARM}
  1177. case taicpu(hp1).oppostfix of
  1178. PF_B:
  1179. NewOp := A_UXTB;
  1180. PF_SB:
  1181. NewOp := A_SXTB;
  1182. PF_H:
  1183. NewOp := A_UXTH;
  1184. PF_SH:
  1185. NewOp := A_SXTH;
  1186. {$ifdef AARCH64}
  1187. PF_SW:
  1188. NewOp := A_SXTW;
  1189. PF_W:
  1190. NewOp := A_MOV;
  1191. {$endif AARCH64}
  1192. else
  1193. InternalError(2021043001);
  1194. end;
  1195. if (NewOp<>A_None) then
  1196. begin
  1197. DebugMsg(SPeepholeOptimization + 'Changed ldr' + oppostfix2str[taicpu(hp1).oppostfix] + ' to ' + gas_op2str[NewOp] + ' (store/load -> store/move)', hp1);
  1198. taicpu(hp1).oppostfix := PF_None;
  1199. taicpu(hp1).opcode := NewOp;
  1200. taicpu(hp1).loadreg(1, SrcReg);
  1201. AllocRegBetween(SrcReg, p, hp1, UsedRegs);
  1202. Result := True;
  1203. Exit;
  1204. end;
  1205. end
  1206. end
  1207. else if (taicpu(hp1).opcode = A_STR) and
  1208. RefsEqual(taicpu(hp1).oper[1]^.ref^, Reference) then
  1209. begin
  1210. { With:
  1211. str reg1,[ref]
  1212. str reg2,[ref]
  1213. If reg1 <> reg2, delete the first str
  1214. IF reg1 = reg2, delete the second str
  1215. }
  1216. if (SrcReg = DstReg) and (taicpu(hp1).oper[1]^.ref^.volatility=[]) then
  1217. begin
  1218. DebugMsg(SPeepholeOptimization + 'Removed duplicate store instruction (store/store -> store/nop)', hp1);
  1219. RemoveInstruction(hp1);
  1220. Result := True;
  1221. Exit;
  1222. end
  1223. else if
  1224. { Registers same byte size? }
  1225. (tcgsize2size[reg_cgsize(SrcReg)] = tcgsize2size[reg_cgsize(DstReg)]) and
  1226. (taicpu(p).oper[1]^.ref^.volatility=[]) then
  1227. begin
  1228. DebugMsg(SPeepholeOptimization + 'Removed dominated store instruction (store/store -> nop/store)', p);
  1229. RemoveCurrentP(p, hp1);
  1230. Result := True;
  1231. Exit;
  1232. end;
  1233. end;
  1234. end;
  1235. end;
  1236. function TARMAsmOptimizer.OptPass1And(var p : tai) : Boolean;
  1237. var
  1238. hp1, hp2: tai;
  1239. i: longint;
  1240. begin
  1241. Result:=false;
  1242. {
  1243. optimize
  1244. and reg2,reg1,const1
  1245. ...
  1246. }
  1247. if (taicpu(p).ops>2) and
  1248. (taicpu(p).oper[1]^.typ = top_reg) and
  1249. (taicpu(p).oper[2]^.typ = top_const) then
  1250. begin
  1251. {
  1252. change
  1253. and reg2,reg1,const1
  1254. ...
  1255. and reg3,reg2,const2
  1256. to
  1257. and reg3,reg1,(const1 and const2)
  1258. }
  1259. if GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1260. MatchInstruction(hp1, A_AND, [taicpu(p).condition], [PF_None]) and
  1261. RegEndOfLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  1262. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1263. (taicpu(hp1).oper[2]^.typ = top_const)
  1264. {$ifdef AARCH64}
  1265. and ((((getsubreg(taicpu(p).oper[0]^.reg)=R_SUBQ) and is_shifter_const(taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val,OS_64)) or
  1266. ((getsubreg(taicpu(p).oper[0]^.reg)=R_SUBL) and is_shifter_const(taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val,OS_32))
  1267. ) or
  1268. ((taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val)=0))
  1269. {$endif AARCH64}
  1270. then
  1271. begin
  1272. if not(RegUsedBetween(taicpu(hp1).oper[0]^.reg,p,hp1)) then
  1273. begin
  1274. DebugMsg('Peephole AndAnd2And done', p);
  1275. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  1276. if (taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val)=0 then
  1277. begin
  1278. DebugMsg('Peephole AndAnd2Mov0 1 done', p);
  1279. taicpu(p).opcode:=A_MOV;
  1280. taicpu(p).ops:=2;
  1281. taicpu(p).loadConst(1,0);
  1282. taicpu(p).oppostfix:=taicpu(hp1).oppostfix;
  1283. end
  1284. else
  1285. begin
  1286. DebugMsg('Peephole AndAnd2And 1 done', p);
  1287. taicpu(p).loadConst(2,taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val);
  1288. taicpu(p).oppostfix:=taicpu(hp1).oppostfix;
  1289. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  1290. end;
  1291. asml.remove(hp1);
  1292. hp1.free;
  1293. Result:=true;
  1294. exit;
  1295. end
  1296. else if not(RegUsedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1297. begin
  1298. if (taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val)=0 then
  1299. begin
  1300. DebugMsg('Peephole AndAnd2Mov0 2 done', hp1);
  1301. taicpu(hp1).opcode:=A_MOV;
  1302. taicpu(hp1).loadConst(1,0);
  1303. taicpu(hp1).ops:=2;
  1304. taicpu(hp1).oppostfix:=taicpu(p).oppostfix;
  1305. end
  1306. else
  1307. begin
  1308. DebugMsg('Peephole AndAnd2And 2 done', hp1);
  1309. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  1310. taicpu(hp1).loadConst(2,taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val);
  1311. taicpu(hp1).oppostfix:=taicpu(p).oppostfix;
  1312. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  1313. end;
  1314. GetNextInstruction(p, hp1);
  1315. RemoveCurrentP(p);
  1316. p:=hp1;
  1317. Result:=true;
  1318. exit;
  1319. end;
  1320. end
  1321. {
  1322. change
  1323. and reg2,reg1,$xxxxxxFF
  1324. strb reg2,[...]
  1325. dealloc reg2
  1326. to
  1327. strb reg1,[...]
  1328. }
  1329. else if ((taicpu(p).oper[2]^.val and $FF) = $FF) and
  1330. MatchInstruction(p, A_AND, [C_None], [PF_None]) and
  1331. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1332. MatchInstruction(hp1, A_STR, [C_None], [PF_B]) and
  1333. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
  1334. { the reference in strb might not use reg2 }
  1335. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  1336. { reg1 might not be modified inbetween }
  1337. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1338. begin
  1339. DebugMsg('Peephole AndStrb2Strb done', p);
  1340. {$ifdef AARCH64}
  1341. taicpu(hp1).loadReg(0,newreg(R_INTREGISTER,getsupreg(taicpu(p).oper[1]^.reg),R_SUBD));
  1342. {$else AARCH64}
  1343. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  1344. {$endif AARCH64}
  1345. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  1346. RemoveCurrentP(p);
  1347. result:=true;
  1348. exit;
  1349. end
  1350. {
  1351. change
  1352. and reg2,reg1,255
  1353. uxtb/uxth reg3,reg2
  1354. dealloc reg2
  1355. to
  1356. and reg3,reg1,x
  1357. }
  1358. else if MatchInstruction(p, A_AND, [C_None], [PF_None]) and
  1359. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1360. ((((taicpu(p).oper[2]^.val and $ffffff00)=0) and MatchInstruction(hp1, A_UXTB, [C_None], [PF_None])) or
  1361. (((taicpu(p).oper[2]^.val and $ffff0000)=0) and MatchInstruction(hp1, A_UXTH, [C_None], [PF_None]))) and
  1362. (taicpu(hp1).ops = 2) and
  1363. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  1364. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1365. { reg1 might not be modified inbetween }
  1366. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1367. begin
  1368. DebugMsg('Peephole AndUxt2And done', p);
  1369. taicpu(hp1).opcode:=A_AND;
  1370. taicpu(hp1).ops:=3;
  1371. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  1372. taicpu(hp1).loadconst(2,taicpu(p).oper[2]^.val);
  1373. GetNextInstruction(p,hp1);
  1374. asml.remove(p);
  1375. p.Free;
  1376. p:=hp1;
  1377. result:=true;
  1378. exit;
  1379. end
  1380. else if ((taicpu(p).oper[2]^.val and $ffffff80)=0) and
  1381. MatchInstruction(p, A_AND, [C_None], [PF_None]) and
  1382. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1383. MatchInstruction(hp1, [A_SXTB,A_SXTH], [C_None], [PF_None]) and
  1384. (taicpu(hp1).ops = 2) and
  1385. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  1386. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1387. { reg1 might not be modified inbetween }
  1388. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1389. begin
  1390. DebugMsg('Peephole AndSxt2And done', p);
  1391. taicpu(hp1).opcode:=A_AND;
  1392. taicpu(hp1).ops:=3;
  1393. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  1394. taicpu(hp1).loadconst(2,taicpu(p).oper[2]^.val);
  1395. GetNextInstruction(p,hp1);
  1396. asml.remove(p);
  1397. p.Free;
  1398. p:=hp1;
  1399. result:=true;
  1400. exit;
  1401. end
  1402. {
  1403. from
  1404. and reg1,reg0,2^n-1
  1405. mov reg2,reg1, lsl imm1
  1406. (mov reg3,reg2, lsr/asr imm1)
  1407. remove either the and or the lsl/xsr sequence if possible
  1408. }
  1409. else if (taicpu(p).oper[2]^.val < high(int64)) and
  1410. cutils.ispowerof2(taicpu(p).oper[2]^.val+1,i) and
  1411. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1412. MatchInstruction(hp1, A_MOV, [taicpu(p).condition], [PF_None]) and
  1413. (taicpu(hp1).ops=3) and
  1414. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1415. (taicpu(hp1).oper[2]^.typ = top_shifterop) and
  1416. {$ifdef ARM}
  1417. (taicpu(hp1).oper[2]^.shifterop^.rs = NR_NO) and
  1418. {$endif ARM}
  1419. (taicpu(hp1).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  1420. RegEndOfLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) then
  1421. begin
  1422. {
  1423. and reg1,reg0,2^n-1
  1424. mov reg2,reg1, lsl imm1
  1425. mov reg3,reg2, lsr/asr imm1
  1426. =>
  1427. and reg1,reg0,2^n-1
  1428. if lsr and 2^n-1>=imm1 or asr and 2^n-1>imm1
  1429. }
  1430. if GetNextInstructionUsingReg(hp1,hp2,taicpu(p).oper[0]^.reg) and
  1431. MatchInstruction(hp2, A_MOV, [taicpu(p).condition], [PF_None]) and
  1432. (taicpu(hp2).ops=3) and
  1433. MatchOperand(taicpu(hp2).oper[1]^, taicpu(hp1).oper[0]^.reg) and
  1434. (taicpu(hp2).oper[2]^.typ = top_shifterop) and
  1435. {$ifdef ARM}
  1436. (taicpu(hp2).oper[2]^.shifterop^.rs = NR_NO) and
  1437. {$endif ARM}
  1438. (taicpu(hp2).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and
  1439. (taicpu(hp1).oper[2]^.shifterop^.shiftimm=taicpu(hp2).oper[2]^.shifterop^.shiftimm) and
  1440. RegEndOfLife(taicpu(hp1).oper[0]^.reg,taicpu(hp2)) and
  1441. ((i<32-taicpu(hp1).oper[2]^.shifterop^.shiftimm) or
  1442. ((i=32-taicpu(hp1).oper[2]^.shifterop^.shiftimm) and
  1443. (taicpu(hp2).oper[2]^.shifterop^.shiftmode=SM_LSR))) then
  1444. begin
  1445. DebugMsg('Peephole AndLslXsr2And done', p);
  1446. taicpu(p).oper[0]^.reg:=taicpu(hp2).oper[0]^.reg;
  1447. asml.Remove(hp1);
  1448. asml.Remove(hp2);
  1449. hp1.free;
  1450. hp2.free;
  1451. result:=true;
  1452. exit;
  1453. end
  1454. {
  1455. and reg1,reg0,2^n-1
  1456. mov reg2,reg1, lsl imm1
  1457. =>
  1458. mov reg2,reg0, lsl imm1
  1459. if imm1>i
  1460. }
  1461. else if (i>32-taicpu(hp1).oper[2]^.shifterop^.shiftimm) and
  1462. not(RegModifiedBetween(taicpu(p).oper[1]^.reg, p, hp1)) then
  1463. begin
  1464. DebugMsg('Peephole AndLsl2Lsl done', p);
  1465. taicpu(hp1).oper[1]^.reg:=taicpu(p).oper[1]^.reg;
  1466. GetNextInstruction(p, hp1);
  1467. asml.Remove(p);
  1468. p.free;
  1469. p:=hp1;
  1470. result:=true;
  1471. exit;
  1472. end
  1473. end;
  1474. end;
  1475. {
  1476. change
  1477. and reg1, ...
  1478. mov reg2, reg1
  1479. to
  1480. and reg2, ...
  1481. }
  1482. if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  1483. (taicpu(p).ops>=3) and
  1484. RemoveSuperfluousMove(p, hp1, 'DataMov2Data') then
  1485. Result:=true;
  1486. end;
  1487. end.