aoptarm.pas 64 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599
  1. {
  2. Copyright (c) 1998-2020 by Jonas Maebe and Florian Klaempfl, members of the Free Pascal
  3. Development Team
  4. This unit implements an ARM optimizer object used commonly for ARM and AAarch64
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. Unit aoptarm;
  19. {$i fpcdefs.inc}
  20. { $define DEBUG_PREREGSCHEDULER}
  21. {$ifdef EXTDEBUG}
  22. {$define DEBUG_AOPTCPU}
  23. {$endif EXTDEBUG}
  24. Interface
  25. uses
  26. cgbase, cgutils, globtype, cpubase, aasmtai, aasmcpu,aopt, aoptobj;
  27. Type
  28. { while ARM and AAarch64 look not very similar at a first glance,
  29. several optimizations can be shared between both }
  30. TARMAsmOptimizer = class(TAsmOptimizer)
  31. procedure DebugMsg(const s : string; p : tai);
  32. function RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string): boolean;
  33. function RedundantMovProcess(var p: tai; var hp1: tai): boolean;
  34. function GetNextInstructionUsingReg(Current: tai; out Next: tai; const reg: TRegister): Boolean;
  35. {$ifdef AARCH64}
  36. function USxtOp2Op(var p, hp1: tai; shiftmode: tshiftmode): Boolean;
  37. {$endif AARCH64}
  38. function OptPreSBFXUBFX(var p: tai): Boolean;
  39. function OptPass1UXTB(var p: tai): Boolean;
  40. function OptPass1UXTH(var p: tai): Boolean;
  41. function OptPass1SXTB(var p: tai): Boolean;
  42. function OptPass1SXTH(var p: tai): Boolean;
  43. function OptPass1LDR(var p: tai): Boolean; virtual;
  44. function OptPass1STR(var p: tai): Boolean; virtual;
  45. function OptPass1And(var p: tai): Boolean; virtual;
  46. End;
  47. function MatchInstruction(const instr: tai; const op: TCommonAsmOps; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  48. function MatchInstruction(const instr: tai; const op: TAsmOp; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  49. {$ifdef AARCH64}
  50. function MatchInstruction(const instr: tai; const ops : array of TAsmOp; const postfix: TOpPostfixes): boolean;
  51. {$endif AARCH64}
  52. function MatchInstruction(const instr: tai; const op: TAsmOp; const postfix: TOpPostfixes): boolean;
  53. function RefsEqual(const r1, r2: treference): boolean;
  54. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  55. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
  56. function MatchOperand(const oper: TOper; const a: TCGInt): boolean; inline;
  57. Implementation
  58. uses
  59. cutils,verbose,globals,
  60. systems,
  61. cpuinfo,
  62. cgobj,procinfo,
  63. aasmbase,aasmdata,itcpugas;
  64. {$ifdef DEBUG_AOPTCPU}
  65. const
  66. SPeepholeOptimization: shortstring = 'Peephole Optimization: ';
  67. procedure TARMAsmOptimizer.DebugMsg(const s: string;p : tai);
  68. begin
  69. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  70. end;
  71. {$else DEBUG_AOPTCPU}
  72. { Empty strings help the optimizer to remove string concatenations that won't
  73. ever appear to the user on release builds. [Kit] }
  74. const
  75. SPeepholeOptimization = '';
  76. procedure TARMAsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  77. begin
  78. end;
  79. {$endif DEBUG_AOPTCPU}
  80. function MatchInstruction(const instr: tai; const op: TCommonAsmOps; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  81. begin
  82. result :=
  83. (instr.typ = ait_instruction) and
  84. ((op = []) or ((taicpu(instr).opcode<=LastCommonAsmOp) and (taicpu(instr).opcode in op))) and
  85. ((cond = []) or (taicpu(instr).condition in cond)) and
  86. ((postfix = []) or (taicpu(instr).oppostfix in postfix));
  87. end;
  88. function MatchInstruction(const instr: tai; const op: TAsmOp; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  89. begin
  90. result :=
  91. (instr.typ = ait_instruction) and
  92. (taicpu(instr).opcode = op) and
  93. ((cond = []) or (taicpu(instr).condition in cond)) and
  94. ((postfix = []) or (taicpu(instr).oppostfix in postfix));
  95. end;
  96. {$ifdef AARCH64}
  97. function MatchInstruction(const instr: tai; const ops : array of TAsmOp; const postfix: TOpPostfixes): boolean;
  98. var
  99. op : TAsmOp;
  100. begin
  101. result:=false;
  102. if instr.typ <> ait_instruction then
  103. exit;
  104. for op in ops do
  105. begin
  106. if (taicpu(instr).opcode = op) and
  107. ((postfix = []) or (taicpu(instr).oppostfix in postfix)) then
  108. begin
  109. result:=true;
  110. exit;
  111. end;
  112. end;
  113. end;
  114. {$endif AARCH64}
  115. function MatchInstruction(const instr: tai; const op: TAsmOp; const postfix: TOpPostfixes): boolean;
  116. begin
  117. result :=
  118. (instr.typ = ait_instruction) and
  119. (taicpu(instr).opcode = op) and
  120. ((postfix = []) or (taicpu(instr).oppostfix in postfix));
  121. end;
  122. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  123. begin
  124. result := (oper.typ = top_reg) and (oper.reg = reg);
  125. end;
  126. function RefsEqual(const r1, r2: treference): boolean;
  127. begin
  128. refsequal :=
  129. (r1.offset = r2.offset) and
  130. (r1.base = r2.base) and
  131. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  132. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  133. (r1.relsymbol = r2.relsymbol) and
  134. {$ifdef ARM}
  135. (r1.signindex = r2.signindex) and
  136. {$endif ARM}
  137. (r1.shiftimm = r2.shiftimm) and
  138. (r1.addressmode = r2.addressmode) and
  139. (r1.shiftmode = r2.shiftmode) and
  140. (r1.volatility=[]) and
  141. (r2.volatility=[]);
  142. end;
  143. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
  144. begin
  145. result := oper1.typ = oper2.typ;
  146. if result then
  147. case oper1.typ of
  148. top_const:
  149. Result:=oper1.val = oper2.val;
  150. top_reg:
  151. Result:=oper1.reg = oper2.reg;
  152. top_conditioncode:
  153. Result:=oper1.cc = oper2.cc;
  154. top_realconst:
  155. Result:=oper1.val_real = oper2.val_real;
  156. top_ref:
  157. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  158. else Result:=false;
  159. end
  160. end;
  161. function MatchOperand(const oper: TOper; const a: TCGInt): boolean; inline;
  162. begin
  163. result := (oper.typ = top_const) and (oper.val = a);
  164. end;
  165. {$ifdef AARCH64}
  166. function TARMAsmOptimizer.USxtOp2Op(var p,hp1: tai; shiftmode: tshiftmode): Boolean;
  167. var
  168. so: tshifterop;
  169. opoffset: Integer;
  170. begin
  171. Result:=false;
  172. if (taicpu(p).ops=2) and
  173. ((MatchInstruction(hp1, [A_ADD,A_SUB], [C_None], [PF_None,PF_S]) and
  174. (taicpu(hp1).ops=3) and
  175. MatchOperand(taicpu(hp1).oper[2]^, taicpu(p).oper[0]^.reg) and
  176. not(MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg))) or
  177. (MatchInstruction(hp1, [A_CMP,A_CMN], [C_None], [PF_None]) and
  178. (taicpu(hp1).ops=2) and
  179. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg))
  180. ) and
  181. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  182. { reg1 might not be modified inbetween }
  183. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  184. begin
  185. DebugMsg('Peephole '+gas_op2str[taicpu(p).opcode]+gas_op2str[taicpu(hp1).opcode]+'2'+gas_op2str[taicpu(hp1).opcode]+' done', p);
  186. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  187. if MatchInstruction(hp1, [A_CMP,A_CMN], [C_None], [PF_None]) then
  188. opoffset:=0
  189. else
  190. opoffset:=1;
  191. taicpu(hp1).loadReg(opoffset+1,taicpu(p).oper[1]^.reg);
  192. taicpu(hp1).ops:=opoffset+3;
  193. shifterop_reset(so);
  194. so.shiftmode:=shiftmode;
  195. so.shiftimm:=0;
  196. taicpu(hp1).loadshifterop(opoffset+2,so);
  197. result:=RemoveCurrentP(p);
  198. end;
  199. end;
  200. {$endif AARCH64}
  201. function TARMAsmOptimizer.GetNextInstructionUsingReg(Current: tai;
  202. Out Next: tai; const reg: TRegister): Boolean;
  203. var
  204. gniResult: Boolean;
  205. begin
  206. Next:=Current;
  207. Result := False;
  208. repeat
  209. gniResult:=GetNextInstruction(Next,Next);
  210. if gniResult and RegInInstruction(reg,Next) then
  211. { Found something }
  212. Exit(True);
  213. until not gniResult or
  214. not(cs_opt_level3 in current_settings.optimizerswitches) or
  215. (Next.typ<>ait_instruction) or
  216. is_calljmp(taicpu(Next).opcode)
  217. {$ifdef ARM}
  218. or RegModifiedByInstruction(NR_PC,Next)
  219. {$endif ARM}
  220. ;
  221. end;
  222. function TARMAsmOptimizer.RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string):boolean;
  223. var
  224. alloc,
  225. dealloc : tai_regalloc;
  226. hp1 : tai;
  227. begin
  228. Result:=false;
  229. if MatchInstruction(movp, A_MOV, [taicpu(p).condition], [PF_None]) and
  230. { We can't optimize if there is a shiftop }
  231. (taicpu(movp).ops=2) and
  232. MatchOperand(taicpu(movp).oper[1]^, taicpu(p).oper[0]^.reg) and
  233. { don't mess with moves to fp }
  234. (taicpu(movp).oper[0]^.reg<>current_procinfo.framepointer) and
  235. { the destination register of the mov might not be used beween p and movp }
  236. not(RegUsedBetween(taicpu(movp).oper[0]^.reg,p,movp)) and
  237. {$ifdef ARM}
  238. { PC should be changed only by moves }
  239. (taicpu(movp).oper[0]^.reg<>NR_PC) and
  240. { cb[n]z are thumb instructions which require specific registers, with no wide forms }
  241. (taicpu(p).opcode<>A_CBZ) and
  242. (taicpu(p).opcode<>A_CBNZ) and
  243. { There is a special requirement for MUL and MLA, oper[0] and oper[1] are not allowed to be the same }
  244. not (
  245. (taicpu(p).opcode in [A_MLA, A_MUL]) and
  246. (taicpu(p).oper[1]^.reg = taicpu(movp).oper[0]^.reg) and
  247. (current_settings.cputype < cpu_armv6)
  248. ) and
  249. {$endif ARM}
  250. { Take care to only do this for instructions which REALLY load to the first register.
  251. Otherwise
  252. str reg0, [reg1]
  253. mov reg2, reg0
  254. will be optimized to
  255. str reg2, [reg1]
  256. }
  257. RegLoadedWithNewValue(taicpu(p).oper[0]^.reg, p) then
  258. begin
  259. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(movp.Next));
  260. if assigned(dealloc) then
  261. begin
  262. DebugMsg('Peephole '+optimizer+' removed superfluous mov', movp);
  263. result:=true;
  264. { taicpu(p).oper[0]^.reg is not used anymore, try to find its allocation
  265. and remove it if possible }
  266. asml.Remove(dealloc);
  267. alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.previous));
  268. if assigned(alloc) then
  269. begin
  270. asml.Remove(alloc);
  271. alloc.free;
  272. dealloc.free;
  273. end
  274. else
  275. asml.InsertAfter(dealloc,p);
  276. AllocRegBetween(taicpu(movp).oper[0]^.reg,p,movp,UsedRegs);
  277. { finally get rid of the mov }
  278. taicpu(p).loadreg(0,taicpu(movp).oper[0]^.reg);
  279. { Remove preindexing and postindexing for LDR in some cases.
  280. For example:
  281. ldr reg2,[reg1, xxx]!
  282. mov reg1,reg2
  283. must be translated to:
  284. ldr reg1,[reg1, xxx]
  285. Preindexing must be removed there, since the same register is used as the base and as the target.
  286. Such case is not allowed for ARM CPU and produces crash. }
  287. if (taicpu(p).opcode = A_LDR) and (taicpu(p).oper[1]^.typ = top_ref)
  288. and (taicpu(movp).oper[0]^.reg = taicpu(p).oper[1]^.ref^.base)
  289. then
  290. taicpu(p).oper[1]^.ref^.addressmode:=AM_OFFSET;
  291. asml.remove(movp);
  292. movp.free;
  293. end;
  294. end;
  295. end;
  296. function TARMAsmOptimizer.RedundantMovProcess(var p: tai; var hp1: tai):boolean;
  297. var
  298. I: Integer;
  299. current_hp, next_hp: tai;
  300. LDRChange: Boolean;
  301. begin
  302. Result:=false;
  303. {
  304. change
  305. mov r1, r0
  306. add r1, r1, #1
  307. to
  308. add r1, r0, #1
  309. Todo: Make it work for mov+cmp too
  310. CAUTION! If this one is successful p might not be a mov instruction anymore!
  311. }
  312. if (taicpu(p).ops = 2) and
  313. (taicpu(p).oper[1]^.typ = top_reg) and
  314. (taicpu(p).oppostfix = PF_NONE) then
  315. begin
  316. if
  317. MatchInstruction(hp1, [A_ADD, A_ADC,
  318. {$ifdef ARM}
  319. A_RSB, A_RSC,
  320. {$endif ARM}
  321. A_SUB, A_SBC,
  322. A_AND, A_BIC, A_EOR, A_ORR, A_MOV, A_MVN],
  323. [taicpu(p).condition], []) and
  324. { MOV and MVN might only have 2 ops }
  325. (taicpu(hp1).ops >= 2) and
  326. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^.reg) and
  327. (taicpu(hp1).oper[1]^.typ = top_reg) and
  328. (
  329. (taicpu(hp1).ops = 2) or
  330. (taicpu(hp1).oper[2]^.typ in [top_reg, top_const, top_shifterop])
  331. ) and
  332. {$ifdef AARCH64}
  333. (taicpu(p).oper[1]^.reg<>NR_SP) and
  334. { in this case you have to transform it to movk or the like }
  335. (getsupreg(taicpu(p).oper[1]^.reg)<>RS_XZR) and
  336. {$endif AARCH64}
  337. not(RegUsedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  338. begin
  339. { When we get here we still don't know if the registers match }
  340. for I:=1 to 2 do
  341. {
  342. If the first loop was successful p will be replaced with hp1.
  343. The checks will still be ok, because all required information
  344. will also be in hp1 then.
  345. }
  346. if (taicpu(hp1).ops > I) and
  347. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[I]^.reg)
  348. {$ifdef ARM}
  349. { prevent certain combinations on thumb(2), this is only a safe approximation }
  350. and (not(GenerateThumbCode or GenerateThumb2Code) or
  351. ((getsupreg(taicpu(p).oper[1]^.reg)<>RS_R13) and
  352. (getsupreg(taicpu(p).oper[1]^.reg)<>RS_R15)))
  353. {$endif ARM}
  354. then
  355. begin
  356. DebugMsg('Peephole RedundantMovProcess done', hp1);
  357. taicpu(hp1).oper[I]^.reg := taicpu(p).oper[1]^.reg;
  358. if p<>hp1 then
  359. begin
  360. asml.remove(p);
  361. p.free;
  362. p:=hp1;
  363. Result:=true;
  364. end;
  365. end;
  366. if Result then Exit;
  367. end
  368. { Change: Change:
  369. mov r1, r0 mov r1, r0
  370. ... ...
  371. ldr/str r2, [r1, etc.] mov r2, r1
  372. To: To:
  373. ldr/str r2, [r0, etc.] mov r2, r0
  374. }
  375. else if (taicpu(p).condition = C_None) and (taicpu(p).oper[1]^.typ = top_reg)
  376. {$ifdef ARM}
  377. and not (getsupreg(taicpu(p).oper[0]^.reg) in [RS_PC, RS_R14, RS_STACK_POINTER_REG])
  378. and (getsupreg(taicpu(p).oper[1]^.reg) <> RS_PC)
  379. { Thumb does not support references with base and index one being SP }
  380. and (not(GenerateThumbCode) or (getsupreg(taicpu(p).oper[1]^.reg) <> RS_STACK_POINTER_REG))
  381. {$endif ARM}
  382. {$ifdef AARCH64}
  383. and (getsupreg(taicpu(p).oper[0]^.reg) <> RS_STACK_POINTER_REG)
  384. {$endif AARCH64}
  385. then
  386. begin
  387. current_hp := p;
  388. TransferUsedRegs(TmpUsedRegs);
  389. { Search local instruction block }
  390. while GetNextInstruction(current_hp, next_hp) and (next_hp <> BlockEnd) and (next_hp.typ = ait_instruction) do
  391. begin
  392. UpdateUsedRegs(TmpUsedRegs, tai(current_hp.Next));
  393. LDRChange := False;
  394. if (taicpu(next_hp).opcode in [A_LDR,A_STR]) and (taicpu(next_hp).ops = 2)
  395. {$ifdef AARCH64}
  396. { If r0 is the zero register, then this sequence of instructions will cause
  397. an access violation, but that's better than an assembler error caused by
  398. changing r0 to xzr inside the reference (Where it's illegal). [Kit] }
  399. and (getsupreg(taicpu(p).oper[1]^.reg) <> RS_XZR)
  400. {$endif AARCH64}
  401. then
  402. begin
  403. { Change the registers from r1 to r0 }
  404. if (taicpu(next_hp).oper[1]^.ref^.base = taicpu(p).oper[0]^.reg) and
  405. {$ifdef ARM}
  406. { This optimisation conflicts with something and raises
  407. an access violation - needs further investigation. [Kit] }
  408. (taicpu(next_hp).opcode <> A_LDR) and
  409. {$endif ARM}
  410. { Don't mess around with the base register if the
  411. reference is pre- or post-indexed }
  412. (taicpu(next_hp).oper[1]^.ref^.addressmode = AM_OFFSET) then
  413. begin
  414. taicpu(next_hp).oper[1]^.ref^.base := taicpu(p).oper[1]^.reg;
  415. LDRChange := True;
  416. end;
  417. if taicpu(next_hp).oper[1]^.ref^.index = taicpu(p).oper[0]^.reg then
  418. begin
  419. taicpu(next_hp).oper[1]^.ref^.index := taicpu(p).oper[1]^.reg;
  420. LDRChange := True;
  421. end;
  422. if LDRChange then
  423. DebugMsg('Peephole Optimization: ' + std_regname(taicpu(p).oper[0]^.reg) + ' = ' + std_regname(taicpu(p).oper[1]^.reg) + ' (MovLdr2Ldr 1)', next_hp);
  424. { Drop out if we're dealing with pre-indexed references }
  425. if (taicpu(next_hp).oper[1]^.ref^.addressmode = AM_PREINDEXED) and
  426. (
  427. RegInRef(taicpu(p).oper[0]^.reg, taicpu(next_hp).oper[1]^.ref^) or
  428. RegInRef(taicpu(p).oper[1]^.reg, taicpu(next_hp).oper[1]^.ref^)
  429. ) then
  430. begin
  431. { Remember to update register allocations }
  432. if LDRChange then
  433. AllocRegBetween(taicpu(p).oper[1]^.reg, p, next_hp, UsedRegs);
  434. Break;
  435. end;
  436. { The register being stored can be potentially changed (as long as it's not the stack pointer) }
  437. if (taicpu(next_hp).opcode = A_STR) and (getsupreg(taicpu(p).oper[1]^.reg) <> RS_STACK_POINTER_REG) and
  438. MatchOperand(taicpu(next_hp).oper[0]^, taicpu(p).oper[0]^.reg) then
  439. begin
  440. DebugMsg('Peephole Optimization: ' + std_regname(taicpu(p).oper[0]^.reg) + ' = ' + std_regname(taicpu(p).oper[1]^.reg) + ' (MovLdr2Ldr 2)', next_hp);
  441. taicpu(next_hp).oper[0]^.reg := taicpu(p).oper[1]^.reg;
  442. LDRChange := True;
  443. end;
  444. if LDRChange and (getsupreg(taicpu(p).oper[1]^.reg) <> RS_STACK_POINTER_REG) then
  445. begin
  446. AllocRegBetween(taicpu(p).oper[1]^.reg, p, next_hp, UsedRegs);
  447. if (taicpu(p).oppostfix = PF_None) and
  448. (
  449. (
  450. (taicpu(next_hp).opcode = A_LDR) and
  451. MatchOperand(taicpu(next_hp).oper[0]^, taicpu(p).oper[0]^.reg)
  452. ) or
  453. not RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, next_hp, TmpUsedRegs)
  454. ) and
  455. { Double-check to see if the old registers were actually
  456. changed (e.g. if the super registers matched, but not
  457. the sizes, they won't be changed). }
  458. (
  459. (taicpu(next_hp).opcode = A_LDR) or
  460. not RegInOp(taicpu(p).oper[0]^.reg, taicpu(next_hp).oper[0]^)
  461. ) and
  462. not RegInRef(taicpu(p).oper[0]^.reg, taicpu(next_hp).oper[1]^.ref^) then
  463. begin
  464. DebugMsg('Peephole Optimization: RedundantMovProcess 2a done', p);
  465. RemoveCurrentP(p);
  466. Result := True;
  467. Exit;
  468. end;
  469. end;
  470. end
  471. else if (taicpu(next_hp).opcode = A_MOV) and (taicpu(next_hp).oppostfix = PF_None) and
  472. (taicpu(next_hp).ops = 2) then
  473. begin
  474. if MatchOperand(taicpu(next_hp).oper[0]^, taicpu(p).oper[0]^.reg) then
  475. begin
  476. { Found another mov that writes entirely to the register }
  477. if RegUsedBetween(taicpu(p).oper[0]^.reg, p, next_hp) then
  478. begin
  479. { Register was used beforehand }
  480. if MatchOperand(taicpu(next_hp).oper[1]^, taicpu(p).oper[1]^.reg) then
  481. begin
  482. { This MOV is exactly the same as the first one.
  483. Since none of the registers have changed value
  484. at this point, we can remove it. }
  485. DebugMsg(SPeepholeOptimization + 'RedundantMovProcess 3a done', next_hp);
  486. if (next_hp = hp1) then
  487. { Don't let hp1 become a dangling pointer }
  488. hp1 := nil;
  489. asml.Remove(next_hp);
  490. next_hp.Free;
  491. { We still have the original p, so we can continue optimising;
  492. if it was -O2 or below, this instruction appeared immediately
  493. after the first MOV, so we're technically not looking more
  494. than one instruction ahead after it's removed! [Kit] }
  495. Continue;
  496. end
  497. else
  498. { Register changes value - drop out }
  499. Break;
  500. end;
  501. { We can delete the first MOV (only if the second MOV is unconditional) }
  502. {$ifdef ARM}
  503. if (taicpu(p).oppostfix = PF_None) and
  504. (taicpu(next_hp).condition = C_None) then
  505. {$endif ARM}
  506. begin
  507. DebugMsg('Peephole Optimization: RedundantMovProcess 2b done', p);
  508. RemoveCurrentP(p);
  509. Result := True;
  510. end;
  511. Exit;
  512. end
  513. else if MatchOperand(taicpu(next_hp).oper[1]^, taicpu(p).oper[0]^.reg) then
  514. begin
  515. if MatchOperand(taicpu(next_hp).oper[0]^, taicpu(p).oper[1]^.reg)
  516. { Be careful - if the entire register is not used, removing this
  517. instruction will leave the unused part uninitialised }
  518. {$ifdef AARCH64}
  519. and (getsubreg(taicpu(p).oper[1]^.reg) = R_SUBQ)
  520. {$endif AARCH64}
  521. then
  522. begin
  523. { Instruction will become mov r1,r1 }
  524. DebugMsg(SPeepholeOptimization + 'Mov2None 2 done', next_hp);
  525. { Allocate r1 between the instructions; not doing
  526. so may cause problems when removing superfluous
  527. MOVs later (i38055) }
  528. AllocRegBetween(taicpu(p).oper[1]^.reg, p, next_hp, UsedRegs);
  529. if (next_hp = hp1) then
  530. { Don't let hp1 become a dangling pointer }
  531. hp1 := nil;
  532. asml.Remove(next_hp);
  533. next_hp.Free;
  534. Continue;
  535. end;
  536. { Change the old register (checking the first operand again
  537. forces it to be left alone if the full register is not
  538. used, lest mov w1,w1 gets optimised out by mistake. [Kit] }
  539. {$ifdef AARCH64}
  540. if not MatchOperand(taicpu(next_hp).oper[0]^, taicpu(p).oper[1]^.reg) then
  541. {$endif AARCH64}
  542. begin
  543. DebugMsg(SPeepholeOptimization + std_regname(taicpu(p).oper[0]^.reg) + ' = ' + std_regname(taicpu(p).oper[1]^.reg) + ' (MovMov2Mov 2)', next_hp);
  544. taicpu(next_hp).oper[1]^.reg := taicpu(p).oper[1]^.reg;
  545. AllocRegBetween(taicpu(p).oper[1]^.reg, p, next_hp, UsedRegs);
  546. { If this was the only reference to the old register,
  547. then we can remove the original MOV now }
  548. if (taicpu(p).oppostfix = PF_None) and
  549. { A bit of a hack - sometimes registers aren't tracked properly, so do not
  550. remove if the register was apparently not allocated when its value is
  551. first set at the MOV command (this is especially true for the stack
  552. register). [Kit] }
  553. (getsupreg(taicpu(p).oper[1]^.reg) <> RS_STACK_POINTER_REG) and
  554. RegInUsedRegs(taicpu(p).oper[0]^.reg, UsedRegs) and
  555. not RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, next_hp, TmpUsedRegs) then
  556. begin
  557. DebugMsg(SPeepholeOptimization + 'RedundantMovProcess 2c done', p);
  558. RemoveCurrentP(p);
  559. Result := True;
  560. Exit;
  561. end;
  562. end;
  563. end;
  564. end;
  565. { On low optimisation settions, don't search more than one instruction ahead }
  566. if not(cs_opt_level3 in current_settings.optimizerswitches) or
  567. { Stop at procedure calls and jumps }
  568. is_calljmp(taicpu(next_hp).opcode) or
  569. { If the read register has changed value, or the MOV
  570. destination register has been used, drop out }
  571. RegInInstruction(taicpu(p).oper[0]^.reg, next_hp) or
  572. RegModifiedByInstruction(taicpu(p).oper[1]^.reg, next_hp) then
  573. Break;
  574. current_hp := next_hp;
  575. end;
  576. end;
  577. end;
  578. end;
  579. function TARMAsmOptimizer.OptPass1UXTB(var p : tai) : Boolean;
  580. var
  581. hp1, hp2: tai;
  582. so: tshifterop;
  583. begin
  584. Result:=false;
  585. if GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) then
  586. begin
  587. {
  588. change
  589. uxtb reg2,reg1
  590. strb reg2,[...]
  591. dealloc reg2
  592. to
  593. strb reg1,[...]
  594. }
  595. if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
  596. (taicpu(p).ops=2) and
  597. MatchInstruction(hp1, A_STR, [C_None], [PF_B]) and
  598. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
  599. { the reference in strb might not use reg2 }
  600. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  601. { reg1 might not be modified inbetween }
  602. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  603. begin
  604. DebugMsg('Peephole UxtbStrb2Strb done', p);
  605. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  606. result:=RemoveCurrentP(p);
  607. end
  608. {
  609. change
  610. uxtb reg2,reg1
  611. uxth reg3,reg2
  612. dealloc reg2
  613. to
  614. uxtb reg3,reg1
  615. }
  616. else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
  617. (taicpu(p).ops=2) and
  618. MatchInstruction(hp1, A_UXTH, [C_None], [PF_None]) and
  619. (taicpu(hp1).ops = 2) and
  620. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  621. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  622. { reg1 might not be modified inbetween }
  623. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  624. begin
  625. DebugMsg('Peephole UxtbUxth2Uxtb done', p);
  626. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  627. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  628. asml.remove(hp1);
  629. hp1.free;
  630. result:=true;
  631. end
  632. {
  633. change
  634. uxtb reg2,reg1
  635. uxtb reg3,reg2
  636. dealloc reg2
  637. to
  638. uxtb reg3,reg1
  639. }
  640. else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
  641. (taicpu(p).ops=2) and
  642. MatchInstruction(hp1, A_UXTB, [C_None], [PF_None]) and
  643. (taicpu(hp1).ops = 2) and
  644. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  645. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  646. { reg1 might not be modified inbetween }
  647. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  648. begin
  649. DebugMsg('Peephole UxtbUxtb2Uxtb done', p);
  650. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  651. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  652. asml.remove(hp1);
  653. hp1.free;
  654. result:=true;
  655. end
  656. {
  657. change
  658. uxtb reg2,reg1
  659. and reg3,reg2,#0x*FF
  660. dealloc reg2
  661. to
  662. uxtb reg3,reg1
  663. }
  664. else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
  665. (taicpu(p).ops=2) and
  666. MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
  667. (taicpu(hp1).ops=3) and
  668. (taicpu(hp1).oper[2]^.typ=top_const) and
  669. ((taicpu(hp1).oper[2]^.val and $FF)=$FF) and
  670. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  671. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  672. { reg1 might not be modified inbetween }
  673. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  674. begin
  675. DebugMsg('Peephole UxtbAndImm2Uxtb done', p);
  676. taicpu(hp1).opcode:=A_UXTB;
  677. taicpu(hp1).ops:=2;
  678. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  679. result:=RemoveCurrentP(p);
  680. end
  681. {$ifdef AARCH64}
  682. else if USxtOp2Op(p,hp1,SM_UXTB) then
  683. Result:=true
  684. {$endif AARCH64}
  685. else if RemoveSuperfluousMove(p, hp1, 'UxtbMov2Uxtb') then
  686. Result:=true;
  687. end;
  688. end;
  689. function TARMAsmOptimizer.OptPass1UXTH(var p : tai) : Boolean;
  690. var
  691. hp1: tai;
  692. so: tshifterop;
  693. begin
  694. Result:=false;
  695. if GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) then
  696. begin
  697. {
  698. change
  699. uxth reg2,reg1
  700. strh reg2,[...]
  701. dealloc reg2
  702. to
  703. strh reg1,[...]
  704. }
  705. if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
  706. (taicpu(p).ops=2) and
  707. MatchInstruction(hp1, A_STR, [C_None], [PF_H]) and
  708. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  709. { the reference in strb might not use reg2 }
  710. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  711. { reg1 might not be modified inbetween }
  712. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  713. begin
  714. DebugMsg('Peephole UXTHStrh2Strh done', p);
  715. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  716. result:=RemoveCurrentP(p);
  717. end
  718. {
  719. change
  720. uxth reg2,reg1
  721. uxth reg3,reg2
  722. dealloc reg2
  723. to
  724. uxth reg3,reg1
  725. }
  726. else if MatchInstruction(p, A_UXTH, [C_None], [PF_None]) and
  727. (taicpu(p).ops=2) and
  728. MatchInstruction(hp1, A_UXTH, [C_None], [PF_None]) and
  729. (taicpu(hp1).ops=2) and
  730. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  731. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  732. { reg1 might not be modified inbetween }
  733. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  734. begin
  735. DebugMsg('Peephole UxthUxth2Uxth done', p);
  736. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  737. taicpu(hp1).opcode:=A_UXTH;
  738. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  739. result:=RemoveCurrentP(p);
  740. end
  741. {
  742. change
  743. uxth reg2,reg1
  744. and reg3,reg2,#65535
  745. dealloc reg2
  746. to
  747. uxth reg3,reg1
  748. }
  749. else if MatchInstruction(p, A_UXTH, [C_None], [PF_None]) and
  750. (taicpu(p).ops=2) and
  751. MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
  752. (taicpu(hp1).ops=3) and
  753. (taicpu(hp1).oper[2]^.typ=top_const) and
  754. ((taicpu(hp1).oper[2]^.val and $FFFF)=$FFFF) and
  755. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  756. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  757. { reg1 might not be modified inbetween }
  758. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  759. begin
  760. DebugMsg('Peephole UxthAndImm2Uxth done', p);
  761. taicpu(hp1).opcode:=A_UXTH;
  762. taicpu(hp1).ops:=2;
  763. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  764. result:=RemoveCurrentP(p);
  765. end
  766. {$ifdef AARCH64}
  767. else if USxtOp2Op(p,hp1,SM_UXTH) then
  768. Result:=true
  769. {$endif AARCH64}
  770. else if RemoveSuperfluousMove(p, hp1, 'UxthMov2Data') then
  771. Result:=true;
  772. end;
  773. end;
  774. function TARMAsmOptimizer.OptPass1SXTB(var p : tai) : Boolean;
  775. var
  776. hp1, hp2: tai;
  777. so: tshifterop;
  778. begin
  779. Result:=false;
  780. if GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) then
  781. begin
  782. {
  783. change
  784. sxtb reg2,reg1
  785. strb reg2,[...]
  786. dealloc reg2
  787. to
  788. strb reg1,[...]
  789. }
  790. if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
  791. (taicpu(p).ops=2) and
  792. MatchInstruction(hp1, A_STR, [C_None], [PF_B]) and
  793. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
  794. { the reference in strb might not use reg2 }
  795. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  796. { reg1 might not be modified inbetween }
  797. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  798. begin
  799. DebugMsg('Peephole SxtbStrb2Strb done', p);
  800. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  801. result:=RemoveCurrentP(p);
  802. end
  803. {
  804. change
  805. sxtb reg2,reg1
  806. sxth reg3,reg2
  807. dealloc reg2
  808. to
  809. sxtb reg3,reg1
  810. }
  811. else if MatchInstruction(p, A_SXTB, [C_None], [PF_None]) and
  812. (taicpu(p).ops=2) and
  813. MatchInstruction(hp1, A_SXTH, [C_None], [PF_None]) and
  814. (taicpu(hp1).ops = 2) and
  815. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  816. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  817. { reg1 might not be modified inbetween }
  818. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  819. begin
  820. DebugMsg('Peephole SxtbSxth2Sxtb done', p);
  821. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  822. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  823. asml.remove(hp1);
  824. hp1.free;
  825. result:=true;
  826. end
  827. {
  828. change
  829. sxtb reg2,reg1
  830. sxtb reg3,reg2
  831. dealloc reg2
  832. to
  833. uxtb reg3,reg1
  834. }
  835. else if MatchInstruction(p, A_SXTB, [C_None], [PF_None]) and
  836. (taicpu(p).ops=2) and
  837. MatchInstruction(hp1, A_SXTB, [C_None], [PF_None]) and
  838. (taicpu(hp1).ops = 2) and
  839. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  840. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  841. { reg1 might not be modified inbetween }
  842. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  843. begin
  844. DebugMsg('Peephole SxtbSxtb2Sxtb done', p);
  845. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  846. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  847. asml.remove(hp1);
  848. hp1.free;
  849. result:=true;
  850. end
  851. {
  852. change
  853. sxtb reg2,reg1
  854. and reg3,reg2,#0x*FF
  855. dealloc reg2
  856. to
  857. uxtb reg3,reg1
  858. }
  859. else if MatchInstruction(p, A_SXTB, [C_None], [PF_None]) and
  860. (taicpu(p).ops=2) and
  861. MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
  862. (taicpu(hp1).ops=3) and
  863. (taicpu(hp1).oper[2]^.typ=top_const) and
  864. ((taicpu(hp1).oper[2]^.val and $FF)=$FF) and
  865. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  866. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  867. { reg1 might not be modified inbetween }
  868. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  869. begin
  870. DebugMsg('Peephole SxtbAndImm2Uxtb done', p);
  871. taicpu(hp1).opcode:=A_UXTB;
  872. taicpu(hp1).ops:=2;
  873. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  874. result:=RemoveCurrentP(p);
  875. end
  876. {$ifdef AARCH64}
  877. else if USxtOp2Op(p,hp1,SM_SXTB) then
  878. Result:=true
  879. {$endif AARCH64}
  880. else if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  881. RemoveSuperfluousMove(p, hp1, 'SxtbMov2Sxtb') then
  882. Result:=true;
  883. end;
  884. end;
  885. function TARMAsmOptimizer.OptPass1SXTH(var p : tai) : Boolean;
  886. var
  887. hp1: tai;
  888. so: tshifterop;
  889. begin
  890. Result:=false;
  891. if GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) then
  892. begin
  893. {
  894. change
  895. sxth reg2,reg1
  896. strh reg2,[...]
  897. dealloc reg2
  898. to
  899. strh reg1,[...]
  900. }
  901. if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
  902. (taicpu(p).ops=2) and
  903. MatchInstruction(hp1, A_STR, [C_None], [PF_H]) and
  904. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  905. { the reference in strb might not use reg2 }
  906. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  907. { reg1 might not be modified inbetween }
  908. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  909. begin
  910. DebugMsg('Peephole SxthStrh2Strh done', p);
  911. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  912. result:=RemoveCurrentP(p);
  913. end
  914. {
  915. change
  916. sxth reg2,reg1
  917. sxth reg3,reg2
  918. dealloc reg2
  919. to
  920. sxth reg3,reg1
  921. }
  922. else if MatchInstruction(p, A_SXTH, [C_None], [PF_None]) and
  923. (taicpu(p).ops=2) and
  924. MatchInstruction(hp1, A_SXTH, [C_None], [PF_None]) and
  925. (taicpu(hp1).ops=2) and
  926. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  927. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  928. { reg1 might not be modified inbetween }
  929. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  930. begin
  931. DebugMsg('Peephole SxthSxth2Sxth done', p);
  932. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  933. taicpu(hp1).opcode:=A_SXTH;
  934. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  935. result:=RemoveCurrentP(p);
  936. end
  937. {$ifdef AARCH64}
  938. {
  939. change
  940. sxth reg2,reg1
  941. sxtw reg3,reg2
  942. dealloc reg2
  943. to
  944. sxth reg3,reg1
  945. }
  946. else if MatchInstruction(p, A_SXTH, [C_None], [PF_None]) and
  947. (taicpu(p).ops=2) and
  948. MatchInstruction(hp1, A_SXTW, [C_None], [PF_None]) and
  949. (taicpu(hp1).ops=2) and
  950. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  951. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  952. { reg1 might not be modified inbetween }
  953. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  954. begin
  955. DebugMsg('Peephole SxthSxtw2Sxth done', p);
  956. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  957. taicpu(hp1).opcode:=A_SXTH;
  958. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  959. result:=RemoveCurrentP(p);
  960. end
  961. {$endif AARCH64}
  962. {
  963. change
  964. sxth reg2,reg1
  965. and reg3,reg2,#65535
  966. dealloc reg2
  967. to
  968. uxth reg3,reg1
  969. }
  970. else if MatchInstruction(p, A_SXTH, [C_None], [PF_None]) and
  971. (taicpu(p).ops=2) and
  972. MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
  973. (taicpu(hp1).ops=3) and
  974. (taicpu(hp1).oper[2]^.typ=top_const) and
  975. ((taicpu(hp1).oper[2]^.val and $FFFF)=$FFFF) and
  976. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  977. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  978. { reg1 might not be modified inbetween }
  979. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  980. begin
  981. DebugMsg('Peephole SxthAndImm2Uxth done', p);
  982. taicpu(hp1).opcode:=A_UXTH;
  983. taicpu(hp1).ops:=2;
  984. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  985. result:=RemoveCurrentP(p);
  986. end
  987. {$ifdef AARCH64}
  988. else if USxtOp2Op(p,hp1,SM_SXTH) then
  989. Result:=true
  990. {$endif AARCH64}
  991. else if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  992. RemoveSuperfluousMove(p, hp1, 'SxthMov2Sxth') then
  993. Result:=true;
  994. end;
  995. end;
  996. function TARMAsmOptimizer.OptPreSBFXUBFX(var p: tai): Boolean;
  997. begin
  998. Result := False;
  999. { Convert:
  1000. s/ubfx reg1,reg2,#0,#64 (or #32 for 32-bit registers)
  1001. To:
  1002. mov reg1,reg2
  1003. }
  1004. if (taicpu(p).oper[2]^.val = 0) and
  1005. {$ifdef AARCH64}
  1006. (
  1007. (
  1008. (getsubreg(taicpu(p).oper[0]^.reg) = R_SUBQ) and
  1009. (taicpu(p).oper[3]^.val = 64)
  1010. ) or
  1011. (
  1012. (getsubreg(taicpu(p).oper[0]^.reg) = R_SUBD) and
  1013. (taicpu(p).oper[3]^.val = 32)
  1014. )
  1015. )
  1016. {$else AARCH64}
  1017. (taicpu(p).oper[3]^.val = 32)
  1018. {$endif AARCH64}
  1019. then
  1020. begin
  1021. DebugMsg(SPeepholeOptimization + 'SBFX or UBFX -> MOV (full bitfield extract)', p);
  1022. taicpu(p).opcode := A_MOV;
  1023. taicpu(p).ops := 2;
  1024. taicpu(p).clearop(2);
  1025. taicpu(p).clearop(3);
  1026. Result := True;
  1027. Exit;
  1028. end;
  1029. end;
  1030. function TARMAsmOptimizer.OptPass1LDR(var p : tai) : Boolean;
  1031. var
  1032. hp1: tai;
  1033. Reference: TReference;
  1034. NewOp: TAsmOp;
  1035. begin
  1036. Result := False;
  1037. if (taicpu(p).ops <> 2) or (taicpu(p).condition <> C_None) then
  1038. Exit;
  1039. Reference := taicpu(p).oper[1]^.ref^;
  1040. if (Reference.addressmode = AM_OFFSET) and
  1041. not RegInRef(taicpu(p).oper[0]^.reg, Reference) and
  1042. { Delay calling GetNextInstruction for as long as possible }
  1043. GetNextInstruction(p, hp1) and
  1044. (hp1.typ = ait_instruction) and
  1045. (taicpu(hp1).condition = C_None) and
  1046. (taicpu(hp1).oppostfix = taicpu(p).oppostfix) then
  1047. begin
  1048. if (taicpu(hp1).opcode = A_STR) and
  1049. RefsEqual(taicpu(hp1).oper[1]^.ref^, Reference) and
  1050. (getregtype(taicpu(p).oper[0]^.reg) = getregtype(taicpu(hp1).oper[0]^.reg)) then
  1051. begin
  1052. { With:
  1053. ldr reg1,[ref]
  1054. str reg2,[ref]
  1055. If reg1 = reg2, Remove str
  1056. }
  1057. if taicpu(p).oper[0]^.reg = taicpu(hp1).oper[0]^.reg then
  1058. begin
  1059. DebugMsg(SPeepholeOptimization + 'Removed redundant store instruction (load/store -> load/nop)', hp1);
  1060. RemoveInstruction(hp1);
  1061. Result := True;
  1062. Exit;
  1063. end;
  1064. end
  1065. else if (taicpu(hp1).opcode = A_LDR) and
  1066. RefsEqual(taicpu(hp1).oper[1]^.ref^, Reference) then
  1067. begin
  1068. { With:
  1069. ldr reg1,[ref]
  1070. ldr reg2,[ref]
  1071. If reg1 = reg2, delete the second ldr
  1072. If reg1 <> reg2, changing the 2nd ldr to a mov might introduce
  1073. a dependency, but it will likely open up new optimisations, so
  1074. do it for now and handle any new dependencies later.
  1075. }
  1076. if taicpu(p).oper[0]^.reg = taicpu(hp1).oper[0]^.reg then
  1077. begin
  1078. DebugMsg(SPeepholeOptimization + 'Removed duplicate load instruction (load/load -> load/nop)', hp1);
  1079. RemoveInstruction(hp1);
  1080. Result := True;
  1081. Exit;
  1082. end
  1083. else if
  1084. (getregtype(taicpu(p).oper[0]^.reg) = R_INTREGISTER) and
  1085. (getregtype(taicpu(hp1).oper[0]^.reg) = R_INTREGISTER) and
  1086. (getsubreg(taicpu(p).oper[0]^.reg) = getsubreg(taicpu(hp1).oper[0]^.reg)) then
  1087. begin
  1088. DebugMsg(SPeepholeOptimization + 'Changed second ldr' + oppostfix2str[taicpu(hp1).oppostfix] + ' to mov (load/load -> load/move)', hp1);
  1089. taicpu(hp1).opcode := A_MOV;
  1090. taicpu(hp1).oppostfix := PF_None;
  1091. taicpu(hp1).loadreg(1, taicpu(p).oper[0]^.reg);
  1092. AllocRegBetween(taicpu(p).oper[0]^.reg, p, hp1, UsedRegs);
  1093. Result := True;
  1094. Exit;
  1095. end;
  1096. end;
  1097. end;
  1098. end;
  1099. function TARMAsmOptimizer.OptPass1STR(var p : tai) : Boolean;
  1100. var
  1101. hp1: tai;
  1102. Reference: TReference;
  1103. SizeMismatch: Boolean;
  1104. SrcReg, DstReg: TRegister;
  1105. NewOp: TAsmOp;
  1106. begin
  1107. Result := False;
  1108. if (taicpu(p).ops <> 2) or (taicpu(p).condition <> C_None) then
  1109. Exit;
  1110. Reference := taicpu(p).oper[1]^.ref^;
  1111. if (Reference.addressmode = AM_OFFSET) and
  1112. not RegInRef(taicpu(p).oper[0]^.reg, Reference) and
  1113. { Delay calling GetNextInstruction for as long as possible }
  1114. GetNextInstruction(p, hp1) and
  1115. (hp1.typ = ait_instruction) and
  1116. (taicpu(hp1).condition = C_None) and
  1117. (taicpu(hp1).oppostfix = taicpu(p).oppostfix) and
  1118. (taicpu(hp1).ops>0) and (taicpu(hp1).oper[0]^.typ=top_reg) then
  1119. begin
  1120. { Saves constant dereferencing and makes it easier to change the size if necessary }
  1121. SrcReg := taicpu(p).oper[0]^.reg;
  1122. DstReg := taicpu(hp1).oper[0]^.reg;
  1123. if (taicpu(hp1).opcode = A_LDR) and
  1124. RefsEqual(taicpu(hp1).oper[1]^.ref^, Reference) and
  1125. (taicpu(hp1).oper[1]^.ref^.volatility=[]) and
  1126. (
  1127. (taicpu(hp1).oppostfix = taicpu(p).oppostfix) or
  1128. ((taicpu(p).oppostfix = PF_B) and (taicpu(hp1).oppostfix = PF_SB)) or
  1129. ((taicpu(p).oppostfix = PF_H) and (taicpu(hp1).oppostfix = PF_SH))
  1130. {$ifdef AARCH64}
  1131. or ((taicpu(p).oppostfix = PF_W) and (taicpu(hp1).oppostfix = PF_SW))
  1132. {$endif AARCH64}
  1133. ) then
  1134. begin
  1135. { With:
  1136. str reg1,[ref]
  1137. ldr reg2,[ref]
  1138. If reg1 = reg2, Remove ldr.
  1139. If reg1 <> reg2, replace ldr with "mov reg2,reg1"
  1140. }
  1141. if (SrcReg = DstReg) and
  1142. { e.g. the ldrb in strb/ldrb is not a null operation as it clears the upper 24 bits }
  1143. (taicpu(p).oppostfix=PF_None) then
  1144. begin
  1145. DebugMsg(SPeepholeOptimization + 'Removed redundant load instruction (store/load -> store/nop)', hp1);
  1146. RemoveInstruction(hp1);
  1147. Result := True;
  1148. Exit;
  1149. end
  1150. else if (getregtype(SrcReg) = R_INTREGISTER) and
  1151. (getregtype(DstReg) = R_INTREGISTER) and
  1152. (getsubreg(SrcReg) = getsubreg(DstReg)) then
  1153. begin
  1154. NewOp:=A_NONE;
  1155. if taicpu(hp1).oppostfix=PF_None then
  1156. NewOp:=A_MOV
  1157. else
  1158. {$ifdef ARM}
  1159. if (current_settings.cputype < cpu_armv6) then
  1160. begin
  1161. { The zero- and sign-extension operations were only
  1162. introduced under ARMv6 }
  1163. case taicpu(hp1).oppostfix of
  1164. PF_B:
  1165. begin
  1166. { The if-block afterwards will set the middle operand to the correct register }
  1167. taicpu(hp1).allocate_oper(3);
  1168. taicpu(hp1).ops := 3;
  1169. taicpu(hp1).loadconst(2, $FF);
  1170. NewOp := A_AND;
  1171. end;
  1172. PF_H:
  1173. { ARMv5 and under doesn't have a concise way of storing the immediate $FFFF, so leave alone };
  1174. PF_SB,
  1175. PF_SH:
  1176. { Do nothing - can't easily encode sign-extensions };
  1177. else
  1178. InternalError(2021043002);
  1179. end;
  1180. end
  1181. else
  1182. {$endif ARM}
  1183. case taicpu(hp1).oppostfix of
  1184. PF_B:
  1185. NewOp := A_UXTB;
  1186. PF_SB:
  1187. NewOp := A_SXTB;
  1188. PF_H:
  1189. NewOp := A_UXTH;
  1190. PF_SH:
  1191. NewOp := A_SXTH;
  1192. {$ifdef AARCH64}
  1193. PF_SW:
  1194. NewOp := A_SXTW;
  1195. PF_W:
  1196. NewOp := A_MOV;
  1197. {$endif AARCH64}
  1198. else
  1199. InternalError(2021043001);
  1200. end;
  1201. if (NewOp<>A_None) then
  1202. begin
  1203. DebugMsg(SPeepholeOptimization + 'Changed ldr' + oppostfix2str[taicpu(hp1).oppostfix] + ' to ' + gas_op2str[NewOp] + ' (store/load -> store/move)', hp1);
  1204. taicpu(hp1).oppostfix := PF_None;
  1205. taicpu(hp1).opcode := NewOp;
  1206. taicpu(hp1).loadreg(1, SrcReg);
  1207. AllocRegBetween(SrcReg, p, hp1, UsedRegs);
  1208. Result := True;
  1209. Exit;
  1210. end;
  1211. end
  1212. end
  1213. else if (taicpu(hp1).opcode = A_STR) and
  1214. RefsEqual(taicpu(hp1).oper[1]^.ref^, Reference) then
  1215. begin
  1216. { With:
  1217. str reg1,[ref]
  1218. str reg2,[ref]
  1219. If reg1 <> reg2, delete the first str
  1220. IF reg1 = reg2, delete the second str
  1221. }
  1222. if (SrcReg = DstReg) and (taicpu(hp1).oper[1]^.ref^.volatility=[]) then
  1223. begin
  1224. DebugMsg(SPeepholeOptimization + 'Removed duplicate store instruction (store/store -> store/nop)', hp1);
  1225. RemoveInstruction(hp1);
  1226. Result := True;
  1227. Exit;
  1228. end
  1229. else if
  1230. { Registers same byte size? }
  1231. (tcgsize2size[reg_cgsize(SrcReg)] = tcgsize2size[reg_cgsize(DstReg)]) and
  1232. (taicpu(p).oper[1]^.ref^.volatility=[]) then
  1233. begin
  1234. DebugMsg(SPeepholeOptimization + 'Removed dominated store instruction (store/store -> nop/store)', p);
  1235. RemoveCurrentP(p, hp1);
  1236. Result := True;
  1237. Exit;
  1238. end;
  1239. end;
  1240. end;
  1241. end;
  1242. function TARMAsmOptimizer.OptPass1And(var p : tai) : Boolean;
  1243. var
  1244. hp1, hp2: tai;
  1245. i: longint;
  1246. begin
  1247. Result:=false;
  1248. {
  1249. optimize
  1250. and reg2,reg1,const1
  1251. ...
  1252. }
  1253. if (taicpu(p).ops>2) and
  1254. (taicpu(p).oper[1]^.typ = top_reg) and
  1255. (taicpu(p).oper[2]^.typ = top_const) then
  1256. begin
  1257. {
  1258. change
  1259. and reg2,reg1,const1
  1260. ...
  1261. and reg3,reg2,const2
  1262. to
  1263. and reg3,reg1,(const1 and const2)
  1264. }
  1265. if GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1266. MatchInstruction(hp1, A_AND, [taicpu(p).condition], [PF_None]) and
  1267. RegEndOfLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  1268. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1269. (taicpu(hp1).oper[2]^.typ = top_const)
  1270. {$ifdef AARCH64}
  1271. and ((((getsubreg(taicpu(p).oper[0]^.reg)=R_SUBQ) and is_shifter_const(taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val,OS_64)) or
  1272. ((getsubreg(taicpu(p).oper[0]^.reg)=R_SUBL) and is_shifter_const(taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val,OS_32))
  1273. ) or
  1274. ((taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val)=0))
  1275. {$endif AARCH64}
  1276. then
  1277. begin
  1278. if not(RegUsedBetween(taicpu(hp1).oper[0]^.reg,p,hp1)) then
  1279. begin
  1280. DebugMsg('Peephole AndAnd2And done', p);
  1281. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  1282. if (taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val)=0 then
  1283. begin
  1284. DebugMsg('Peephole AndAnd2Mov0 1 done', p);
  1285. taicpu(p).opcode:=A_MOV;
  1286. taicpu(p).ops:=2;
  1287. taicpu(p).loadConst(1,0);
  1288. taicpu(p).oppostfix:=taicpu(hp1).oppostfix;
  1289. end
  1290. else
  1291. begin
  1292. DebugMsg('Peephole AndAnd2And 1 done', p);
  1293. taicpu(p).loadConst(2,taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val);
  1294. taicpu(p).oppostfix:=taicpu(hp1).oppostfix;
  1295. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  1296. end;
  1297. asml.remove(hp1);
  1298. hp1.free;
  1299. Result:=true;
  1300. exit;
  1301. end
  1302. else if not(RegUsedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1303. begin
  1304. if (taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val)=0 then
  1305. begin
  1306. DebugMsg('Peephole AndAnd2Mov0 2 done', hp1);
  1307. taicpu(hp1).opcode:=A_MOV;
  1308. taicpu(hp1).loadConst(1,0);
  1309. taicpu(hp1).ops:=2;
  1310. taicpu(hp1).oppostfix:=taicpu(p).oppostfix;
  1311. end
  1312. else
  1313. begin
  1314. DebugMsg('Peephole AndAnd2And 2 done', hp1);
  1315. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  1316. taicpu(hp1).loadConst(2,taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val);
  1317. taicpu(hp1).oppostfix:=taicpu(p).oppostfix;
  1318. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  1319. end;
  1320. GetNextInstruction(p, hp1);
  1321. RemoveCurrentP(p);
  1322. p:=hp1;
  1323. Result:=true;
  1324. exit;
  1325. end;
  1326. end
  1327. {
  1328. change
  1329. and reg2,reg1,$xxxxxxFF
  1330. strb reg2,[...]
  1331. dealloc reg2
  1332. to
  1333. strb reg1,[...]
  1334. }
  1335. else if ((taicpu(p).oper[2]^.val and $FF) = $FF) and
  1336. MatchInstruction(p, A_AND, [C_None], [PF_None]) and
  1337. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1338. MatchInstruction(hp1, A_STR, [C_None], [PF_B]) and
  1339. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
  1340. { the reference in strb might not use reg2 }
  1341. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  1342. { reg1 might not be modified inbetween }
  1343. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1344. begin
  1345. DebugMsg('Peephole AndStrb2Strb done', p);
  1346. {$ifdef AARCH64}
  1347. taicpu(hp1).loadReg(0,newreg(R_INTREGISTER,getsupreg(taicpu(p).oper[1]^.reg),R_SUBD));
  1348. {$else AARCH64}
  1349. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  1350. {$endif AARCH64}
  1351. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  1352. RemoveCurrentP(p);
  1353. result:=true;
  1354. exit;
  1355. end
  1356. {
  1357. change
  1358. and reg2,reg1,255
  1359. uxtb/uxth reg3,reg2
  1360. dealloc reg2
  1361. to
  1362. and reg3,reg1,x
  1363. }
  1364. else if MatchInstruction(p, A_AND, [C_None], [PF_None]) and
  1365. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1366. ((((taicpu(p).oper[2]^.val and $ffffff00)=0) and MatchInstruction(hp1, A_UXTB, [C_None], [PF_None])) or
  1367. (((taicpu(p).oper[2]^.val and $ffff0000)=0) and MatchInstruction(hp1, A_UXTH, [C_None], [PF_None]))) and
  1368. (taicpu(hp1).ops = 2) and
  1369. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  1370. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1371. { reg1 might not be modified inbetween }
  1372. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1373. begin
  1374. DebugMsg('Peephole AndUxt2And done', p);
  1375. taicpu(hp1).opcode:=A_AND;
  1376. taicpu(hp1).ops:=3;
  1377. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  1378. taicpu(hp1).loadconst(2,taicpu(p).oper[2]^.val);
  1379. GetNextInstruction(p,hp1);
  1380. asml.remove(p);
  1381. p.Free;
  1382. p:=hp1;
  1383. result:=true;
  1384. exit;
  1385. end
  1386. else if ((taicpu(p).oper[2]^.val and $ffffff80)=0) and
  1387. MatchInstruction(p, A_AND, [C_None], [PF_None]) and
  1388. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1389. MatchInstruction(hp1, [A_SXTB,A_SXTH], [C_None], [PF_None]) and
  1390. (taicpu(hp1).ops = 2) and
  1391. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  1392. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1393. { reg1 might not be modified inbetween }
  1394. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1395. begin
  1396. DebugMsg('Peephole AndSxt2And done', p);
  1397. taicpu(hp1).opcode:=A_AND;
  1398. taicpu(hp1).ops:=3;
  1399. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  1400. taicpu(hp1).loadconst(2,taicpu(p).oper[2]^.val);
  1401. GetNextInstruction(p,hp1);
  1402. asml.remove(p);
  1403. p.Free;
  1404. p:=hp1;
  1405. result:=true;
  1406. exit;
  1407. end
  1408. {
  1409. from
  1410. and reg1,reg0,2^n-1
  1411. mov reg2,reg1, lsl imm1
  1412. (mov reg3,reg2, lsr/asr imm1)
  1413. remove either the and or the lsl/xsr sequence if possible
  1414. }
  1415. else if (taicpu(p).oper[2]^.val < high(int64)) and
  1416. cutils.ispowerof2(taicpu(p).oper[2]^.val+1,i) and
  1417. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1418. MatchInstruction(hp1, A_MOV, [taicpu(p).condition], [PF_None]) and
  1419. (taicpu(hp1).ops=3) and
  1420. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1421. (taicpu(hp1).oper[2]^.typ = top_shifterop) and
  1422. {$ifdef ARM}
  1423. (taicpu(hp1).oper[2]^.shifterop^.rs = NR_NO) and
  1424. {$endif ARM}
  1425. (taicpu(hp1).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  1426. RegEndOfLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) then
  1427. begin
  1428. {
  1429. and reg1,reg0,2^n-1
  1430. mov reg2,reg1, lsl imm1
  1431. mov reg3,reg2, lsr/asr imm1
  1432. =>
  1433. and reg1,reg0,2^n-1
  1434. if lsr and 2^n-1>=imm1 or asr and 2^n-1>imm1
  1435. }
  1436. if GetNextInstructionUsingReg(hp1,hp2,taicpu(p).oper[0]^.reg) and
  1437. MatchInstruction(hp2, A_MOV, [taicpu(p).condition], [PF_None]) and
  1438. (taicpu(hp2).ops=3) and
  1439. MatchOperand(taicpu(hp2).oper[1]^, taicpu(hp1).oper[0]^.reg) and
  1440. (taicpu(hp2).oper[2]^.typ = top_shifterop) and
  1441. {$ifdef ARM}
  1442. (taicpu(hp2).oper[2]^.shifterop^.rs = NR_NO) and
  1443. {$endif ARM}
  1444. (taicpu(hp2).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and
  1445. (taicpu(hp1).oper[2]^.shifterop^.shiftimm=taicpu(hp2).oper[2]^.shifterop^.shiftimm) and
  1446. RegEndOfLife(taicpu(hp1).oper[0]^.reg,taicpu(hp2)) and
  1447. ((i<32-taicpu(hp1).oper[2]^.shifterop^.shiftimm) or
  1448. ((i=32-taicpu(hp1).oper[2]^.shifterop^.shiftimm) and
  1449. (taicpu(hp2).oper[2]^.shifterop^.shiftmode=SM_LSR))) then
  1450. begin
  1451. DebugMsg('Peephole AndLslXsr2And done', p);
  1452. taicpu(p).oper[0]^.reg:=taicpu(hp2).oper[0]^.reg;
  1453. asml.Remove(hp1);
  1454. asml.Remove(hp2);
  1455. hp1.free;
  1456. hp2.free;
  1457. result:=true;
  1458. exit;
  1459. end
  1460. {
  1461. and reg1,reg0,2^n-1
  1462. mov reg2,reg1, lsl imm1
  1463. =>
  1464. mov reg2,reg0, lsl imm1
  1465. if imm1>i
  1466. }
  1467. else if (i>32-taicpu(hp1).oper[2]^.shifterop^.shiftimm) and
  1468. not(RegModifiedBetween(taicpu(p).oper[1]^.reg, p, hp1)) then
  1469. begin
  1470. DebugMsg('Peephole AndLsl2Lsl done', p);
  1471. taicpu(hp1).oper[1]^.reg:=taicpu(p).oper[1]^.reg;
  1472. GetNextInstruction(p, hp1);
  1473. asml.Remove(p);
  1474. p.free;
  1475. p:=hp1;
  1476. result:=true;
  1477. exit;
  1478. end
  1479. end;
  1480. end;
  1481. {
  1482. change
  1483. and reg1, ...
  1484. mov reg2, reg1
  1485. to
  1486. and reg2, ...
  1487. }
  1488. if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  1489. (taicpu(p).ops>=3) and
  1490. RemoveSuperfluousMove(p, hp1, 'DataMov2Data') then
  1491. Result:=true;
  1492. end;
  1493. end.