aoptarm.pas 74 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872
  1. {
  2. Copyright (c) 1998-2020 by Jonas Maebe and Florian Klaempfl, members of the Free Pascal
  3. Development Team
  4. This unit implements an ARM optimizer object used commonly for ARM and AAarch64
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. Unit aoptarm;
  19. {$i fpcdefs.inc}
  20. { $define DEBUG_PREREGSCHEDULER}
  21. {$ifdef EXTDEBUG}
  22. {$define DEBUG_AOPTCPU}
  23. {$endif EXTDEBUG}
  24. Interface
  25. uses
  26. cgbase, cgutils, globtype, cpubase, aasmtai, aasmcpu,aopt, aoptobj;
  27. Type
  28. { while ARM and AAarch64 look not very similar at a first glance,
  29. several optimizations can be shared between both }
  30. TARMAsmOptimizer = class(TAsmOptimizer)
  31. procedure DebugMsg(const s : string; p : tai);
  32. function RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string): boolean;
  33. function RedundantMovProcess(var p: tai; var hp1: tai): boolean;
  34. function GetNextInstructionUsingReg(Current: tai; out Next: tai; const reg: TRegister): Boolean;
  35. {$ifdef AARCH64}
  36. function USxtOp2Op(var p, hp1: tai; shiftmode: tshiftmode): Boolean;
  37. {$endif AARCH64}
  38. function OptPreSBFXUBFX(var p: tai): Boolean;
  39. function OptPass1UXTB(var p: tai): Boolean;
  40. function OptPass1UXTH(var p: tai): Boolean;
  41. function OptPass1SXTB(var p: tai): Boolean;
  42. function OptPass1SXTH(var p: tai): Boolean;
  43. function OptPass1LDR(var p: tai): Boolean; virtual;
  44. function OptPass1STR(var p: tai): Boolean; virtual;
  45. function OptPass1And(var p: tai): Boolean; virtual;
  46. function OptPass2Bitwise(var p: tai): Boolean;
  47. function OptPass2TST(var p: tai): Boolean;
  48. End;
  49. function MatchInstruction(const instr: tai; const op: TCommonAsmOps; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  50. function MatchInstruction(const instr: tai; const op: TAsmOp; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  51. {$ifdef AARCH64}
  52. function MatchInstruction(const instr: tai; const ops : array of TAsmOp; const postfix: TOpPostfixes): boolean;
  53. {$endif AARCH64}
  54. function MatchInstruction(const instr: tai; const op: TAsmOp; const postfix: TOpPostfixes): boolean;
  55. function RefsEqual(const r1, r2: treference): boolean;
  56. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  57. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
  58. function MatchOperand(const oper: TOper; const a: TCGInt): boolean; inline;
  59. Implementation
  60. uses
  61. cutils,verbose,globals,
  62. systems,
  63. cpuinfo,
  64. cgobj,procinfo,
  65. aasmbase,aasmdata,itcpugas;
  66. {$ifdef DEBUG_AOPTCPU}
  67. const
  68. SPeepholeOptimization: shortstring = 'Peephole Optimization: ';
  69. procedure TARMAsmOptimizer.DebugMsg(const s: string;p : tai);
  70. begin
  71. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  72. end;
  73. {$else DEBUG_AOPTCPU}
  74. { Empty strings help the optimizer to remove string concatenations that won't
  75. ever appear to the user on release builds. [Kit] }
  76. const
  77. SPeepholeOptimization = '';
  78. procedure TARMAsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  79. begin
  80. end;
  81. {$endif DEBUG_AOPTCPU}
  82. function MatchInstruction(const instr: tai; const op: TCommonAsmOps; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  83. begin
  84. result :=
  85. (instr.typ = ait_instruction) and
  86. ((op = []) or ((taicpu(instr).opcode<=LastCommonAsmOp) and (taicpu(instr).opcode in op))) and
  87. ((cond = []) or (taicpu(instr).condition in cond)) and
  88. ((postfix = []) or (taicpu(instr).oppostfix in postfix));
  89. end;
  90. function MatchInstruction(const instr: tai; const op: TAsmOp; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  91. begin
  92. result :=
  93. (instr.typ = ait_instruction) and
  94. (taicpu(instr).opcode = op) and
  95. ((cond = []) or (taicpu(instr).condition in cond)) and
  96. ((postfix = []) or (taicpu(instr).oppostfix in postfix));
  97. end;
  98. {$ifdef AARCH64}
  99. function MatchInstruction(const instr: tai; const ops : array of TAsmOp; const postfix: TOpPostfixes): boolean;
  100. var
  101. op : TAsmOp;
  102. begin
  103. result:=false;
  104. if instr.typ <> ait_instruction then
  105. exit;
  106. for op in ops do
  107. begin
  108. if (taicpu(instr).opcode = op) and
  109. ((postfix = []) or (taicpu(instr).oppostfix in postfix)) then
  110. begin
  111. result:=true;
  112. exit;
  113. end;
  114. end;
  115. end;
  116. {$endif AARCH64}
  117. function MatchInstruction(const instr: tai; const op: TAsmOp; const postfix: TOpPostfixes): boolean;
  118. begin
  119. result :=
  120. (instr.typ = ait_instruction) and
  121. (taicpu(instr).opcode = op) and
  122. ((postfix = []) or (taicpu(instr).oppostfix in postfix));
  123. end;
  124. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  125. begin
  126. result := (oper.typ = top_reg) and (oper.reg = reg);
  127. end;
  128. function RefsEqual(const r1, r2: treference): boolean;
  129. begin
  130. refsequal :=
  131. (r1.offset = r2.offset) and
  132. (r1.base = r2.base) and
  133. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  134. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  135. (r1.relsymbol = r2.relsymbol) and
  136. {$ifdef ARM}
  137. (r1.signindex = r2.signindex) and
  138. {$endif ARM}
  139. (r1.shiftimm = r2.shiftimm) and
  140. (r1.addressmode = r2.addressmode) and
  141. (r1.shiftmode = r2.shiftmode) and
  142. (r1.volatility=[]) and
  143. (r2.volatility=[]);
  144. end;
  145. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
  146. begin
  147. result := oper1.typ = oper2.typ;
  148. if result then
  149. case oper1.typ of
  150. top_const:
  151. Result:=oper1.val = oper2.val;
  152. top_reg:
  153. Result:=oper1.reg = oper2.reg;
  154. top_conditioncode:
  155. Result:=oper1.cc = oper2.cc;
  156. top_realconst:
  157. Result:=oper1.val_real = oper2.val_real;
  158. top_ref:
  159. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  160. else Result:=false;
  161. end
  162. end;
  163. function MatchOperand(const oper: TOper; const a: TCGInt): boolean; inline;
  164. begin
  165. result := (oper.typ = top_const) and (oper.val = a);
  166. end;
  167. {$ifdef AARCH64}
  168. function TARMAsmOptimizer.USxtOp2Op(var p,hp1: tai; shiftmode: tshiftmode): Boolean;
  169. var
  170. so: tshifterop;
  171. opoffset: Integer;
  172. begin
  173. Result:=false;
  174. if (taicpu(p).ops=2) and
  175. ((MatchInstruction(hp1, [A_ADD,A_SUB], [C_None], [PF_None,PF_S]) and
  176. (taicpu(hp1).ops=3) and
  177. MatchOperand(taicpu(hp1).oper[2]^, taicpu(p).oper[0]^.reg) and
  178. not(MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg))) or
  179. (MatchInstruction(hp1, [A_CMP,A_CMN], [C_None], [PF_None]) and
  180. (taicpu(hp1).ops=2) and
  181. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg))
  182. ) and
  183. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  184. { reg1 might not be modified inbetween }
  185. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  186. begin
  187. DebugMsg('Peephole '+gas_op2str[taicpu(p).opcode]+gas_op2str[taicpu(hp1).opcode]+'2'+gas_op2str[taicpu(hp1).opcode]+' done', p);
  188. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  189. if MatchInstruction(hp1, [A_CMP,A_CMN], [C_None], [PF_None]) then
  190. opoffset:=0
  191. else
  192. opoffset:=1;
  193. taicpu(hp1).loadReg(opoffset+1,taicpu(p).oper[1]^.reg);
  194. if not(shiftmode in [SM_SXTX,SM_UXTX,SM_LSL]) then
  195. setsubreg(taicpu(hp1).oper[opoffset+1]^.reg,R_SUBD);
  196. taicpu(hp1).ops:=opoffset+3;
  197. shifterop_reset(so);
  198. so.shiftmode:=shiftmode;
  199. so.shiftimm:=0;
  200. taicpu(hp1).loadshifterop(opoffset+2,so);
  201. result:=RemoveCurrentP(p);
  202. end;
  203. end;
  204. {$endif AARCH64}
  205. function TARMAsmOptimizer.GetNextInstructionUsingReg(Current: tai;
  206. Out Next: tai; const reg: TRegister): Boolean;
  207. var
  208. gniResult: Boolean;
  209. begin
  210. Next:=Current;
  211. Result := False;
  212. repeat
  213. gniResult:=GetNextInstruction(Next,Next);
  214. if gniResult and RegInInstruction(reg,Next) then
  215. { Found something }
  216. Exit(True);
  217. until not gniResult or
  218. not(cs_opt_level3 in current_settings.optimizerswitches) or
  219. (Next.typ<>ait_instruction) or
  220. is_calljmp(taicpu(Next).opcode)
  221. {$ifdef ARM}
  222. or RegModifiedByInstruction(NR_PC,Next)
  223. {$endif ARM}
  224. ;
  225. end;
  226. function TARMAsmOptimizer.RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string):boolean;
  227. var
  228. alloc,
  229. dealloc : tai_regalloc;
  230. hp1 : tai;
  231. begin
  232. Result:=false;
  233. if MatchInstruction(movp, A_MOV, [taicpu(p).condition], [PF_None]) and
  234. { We can't optimize if there is a shiftop }
  235. (taicpu(movp).ops=2) and
  236. MatchOperand(taicpu(movp).oper[1]^, taicpu(p).oper[0]^.reg) and
  237. { don't mess with moves to fp }
  238. (taicpu(movp).oper[0]^.reg<>current_procinfo.framepointer) and
  239. { the destination register of the mov might not be used beween p and movp }
  240. not(RegUsedBetween(taicpu(movp).oper[0]^.reg,p,movp)) and
  241. {$ifdef ARM}
  242. { PC should be changed only by moves }
  243. (taicpu(movp).oper[0]^.reg<>NR_PC) and
  244. { cb[n]z are thumb instructions which require specific registers, with no wide forms }
  245. (taicpu(p).opcode<>A_CBZ) and
  246. (taicpu(p).opcode<>A_CBNZ) and
  247. { There is a special requirement for MUL and MLA, oper[0] and oper[1] are not allowed to be the same }
  248. not (
  249. (taicpu(p).opcode in [A_MLA, A_MUL]) and
  250. (taicpu(p).oper[1]^.reg = taicpu(movp).oper[0]^.reg) and
  251. (current_settings.cputype < cpu_armv6)
  252. ) and
  253. {$endif ARM}
  254. { Take care to only do this for instructions which REALLY load to the first register.
  255. Otherwise
  256. str reg0, [reg1]
  257. mov reg2, reg0
  258. will be optimized to
  259. str reg2, [reg1]
  260. }
  261. RegLoadedWithNewValue(taicpu(p).oper[0]^.reg, p) then
  262. begin
  263. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(movp.Next));
  264. if assigned(dealloc) then
  265. begin
  266. DebugMsg('Peephole '+optimizer+' removed superfluous mov', movp);
  267. result:=true;
  268. { taicpu(p).oper[0]^.reg is not used anymore, try to find its allocation
  269. and remove it if possible }
  270. asml.Remove(dealloc);
  271. alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.previous));
  272. if assigned(alloc) then
  273. begin
  274. asml.Remove(alloc);
  275. alloc.free;
  276. dealloc.free;
  277. end
  278. else
  279. asml.InsertAfter(dealloc,p);
  280. AllocRegBetween(taicpu(movp).oper[0]^.reg,p,movp,UsedRegs);
  281. { finally get rid of the mov }
  282. taicpu(p).loadreg(0,taicpu(movp).oper[0]^.reg);
  283. { Remove preindexing and postindexing for LDR in some cases.
  284. For example:
  285. ldr reg2,[reg1, xxx]!
  286. mov reg1,reg2
  287. must be translated to:
  288. ldr reg1,[reg1, xxx]
  289. Preindexing must be removed there, since the same register is used as the base and as the target.
  290. Such case is not allowed for ARM CPU and produces crash. }
  291. if (taicpu(p).opcode = A_LDR) and (taicpu(p).oper[1]^.typ = top_ref)
  292. and (taicpu(movp).oper[0]^.reg = taicpu(p).oper[1]^.ref^.base)
  293. then
  294. taicpu(p).oper[1]^.ref^.addressmode:=AM_OFFSET;
  295. asml.remove(movp);
  296. movp.free;
  297. end;
  298. end;
  299. end;
  300. function TARMAsmOptimizer.RedundantMovProcess(var p: tai; var hp1: tai):boolean;
  301. var
  302. I: Integer;
  303. current_hp, next_hp: tai;
  304. LDRChange: Boolean;
  305. begin
  306. Result:=false;
  307. {
  308. change
  309. mov r1, r0
  310. add r1, r1, #1
  311. to
  312. add r1, r0, #1
  313. Todo: Make it work for mov+cmp too
  314. CAUTION! If this one is successful p might not be a mov instruction anymore!
  315. }
  316. if (taicpu(p).ops = 2) and
  317. (taicpu(p).oper[1]^.typ = top_reg) and
  318. (taicpu(p).oppostfix = PF_NONE) then
  319. begin
  320. if
  321. MatchInstruction(hp1, [A_ADD, A_ADC,
  322. {$ifdef ARM}
  323. A_RSB, A_RSC,
  324. {$endif ARM}
  325. A_SUB, A_SBC,
  326. A_AND, A_BIC, A_EOR, A_ORR, A_MOV, A_MVN],
  327. [taicpu(p).condition], []) and
  328. { MOV and MVN might only have 2 ops }
  329. (taicpu(hp1).ops >= 2) and
  330. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^.reg) and
  331. (taicpu(hp1).oper[1]^.typ = top_reg) and
  332. (
  333. (taicpu(hp1).ops = 2) or
  334. (taicpu(hp1).oper[2]^.typ in [top_reg, top_const, top_shifterop])
  335. ) and
  336. {$ifdef AARCH64}
  337. (taicpu(p).oper[1]^.reg<>NR_SP) and
  338. { in this case you have to transform it to movk or the like }
  339. (getsupreg(taicpu(p).oper[1]^.reg)<>RS_XZR) and
  340. {$endif AARCH64}
  341. not(RegUsedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  342. begin
  343. { When we get here we still don't know if the registers match }
  344. for I:=1 to 2 do
  345. {
  346. If the first loop was successful p will be replaced with hp1.
  347. The checks will still be ok, because all required information
  348. will also be in hp1 then.
  349. }
  350. if (taicpu(hp1).ops > I) and
  351. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[I]^.reg)
  352. {$ifdef ARM}
  353. { prevent certain combinations on thumb(2), this is only a safe approximation }
  354. and (not(GenerateThumbCode or GenerateThumb2Code) or
  355. ((getsupreg(taicpu(p).oper[1]^.reg)<>RS_R13) and
  356. (getsupreg(taicpu(p).oper[1]^.reg)<>RS_R15)))
  357. {$endif ARM}
  358. then
  359. begin
  360. DebugMsg('Peephole RedundantMovProcess done', hp1);
  361. taicpu(hp1).oper[I]^.reg := taicpu(p).oper[1]^.reg;
  362. if p<>hp1 then
  363. begin
  364. asml.remove(p);
  365. p.free;
  366. p:=hp1;
  367. Result:=true;
  368. end;
  369. end;
  370. if Result then Exit;
  371. end
  372. { Change: Change:
  373. mov r1, r0 mov r1, r0
  374. ... ...
  375. ldr/str r2, [r1, etc.] mov r2, r1
  376. To: To:
  377. ldr/str r2, [r0, etc.] mov r2, r0
  378. }
  379. else if (taicpu(p).condition = C_None) and (taicpu(p).oper[1]^.typ = top_reg)
  380. {$ifdef ARM}
  381. and not (getsupreg(taicpu(p).oper[0]^.reg) in [RS_PC, RS_R14, RS_STACK_POINTER_REG])
  382. and (getsupreg(taicpu(p).oper[1]^.reg) <> RS_PC)
  383. { Thumb does not support references with base and index one being SP }
  384. and (not(GenerateThumbCode) or (getsupreg(taicpu(p).oper[1]^.reg) <> RS_STACK_POINTER_REG))
  385. {$endif ARM}
  386. {$ifdef AARCH64}
  387. and (getsupreg(taicpu(p).oper[0]^.reg) <> RS_STACK_POINTER_REG)
  388. {$endif AARCH64}
  389. then
  390. begin
  391. current_hp := p;
  392. TransferUsedRegs(TmpUsedRegs);
  393. { Search local instruction block }
  394. while GetNextInstruction(current_hp, next_hp) and (next_hp <> BlockEnd) and (next_hp.typ = ait_instruction) do
  395. begin
  396. UpdateUsedRegs(TmpUsedRegs, tai(current_hp.Next));
  397. LDRChange := False;
  398. if (taicpu(next_hp).opcode in [A_LDR,A_STR]) and (taicpu(next_hp).ops = 2)
  399. {$ifdef AARCH64}
  400. { If r0 is the zero register, then this sequence of instructions will cause
  401. an access violation, but that's better than an assembler error caused by
  402. changing r0 to xzr inside the reference (Where it's illegal). [Kit] }
  403. and (getsupreg(taicpu(p).oper[1]^.reg) <> RS_XZR)
  404. {$endif AARCH64}
  405. then
  406. begin
  407. { Change the registers from r1 to r0 }
  408. if (taicpu(next_hp).oper[1]^.ref^.base = taicpu(p).oper[0]^.reg) and
  409. {$ifdef ARM}
  410. { This optimisation conflicts with something and raises
  411. an access violation - needs further investigation. [Kit] }
  412. (taicpu(next_hp).opcode <> A_LDR) and
  413. {$endif ARM}
  414. { Don't mess around with the base register if the
  415. reference is pre- or post-indexed }
  416. (taicpu(next_hp).oper[1]^.ref^.addressmode = AM_OFFSET) then
  417. begin
  418. taicpu(next_hp).oper[1]^.ref^.base := taicpu(p).oper[1]^.reg;
  419. LDRChange := True;
  420. end;
  421. if taicpu(next_hp).oper[1]^.ref^.index = taicpu(p).oper[0]^.reg then
  422. begin
  423. taicpu(next_hp).oper[1]^.ref^.index := taicpu(p).oper[1]^.reg;
  424. LDRChange := True;
  425. end;
  426. if LDRChange then
  427. DebugMsg('Peephole Optimization: ' + std_regname(taicpu(p).oper[0]^.reg) + ' = ' + std_regname(taicpu(p).oper[1]^.reg) + ' (MovLdr2Ldr 1)', next_hp);
  428. { Drop out if we're dealing with pre-indexed references }
  429. if (taicpu(next_hp).oper[1]^.ref^.addressmode = AM_PREINDEXED) and
  430. (
  431. RegInRef(taicpu(p).oper[0]^.reg, taicpu(next_hp).oper[1]^.ref^) or
  432. RegInRef(taicpu(p).oper[1]^.reg, taicpu(next_hp).oper[1]^.ref^)
  433. ) then
  434. begin
  435. { Remember to update register allocations }
  436. if LDRChange then
  437. AllocRegBetween(taicpu(p).oper[1]^.reg, p, next_hp, UsedRegs);
  438. Break;
  439. end;
  440. { The register being stored can be potentially changed (as long as it's not the stack pointer) }
  441. if (taicpu(next_hp).opcode = A_STR) and (getsupreg(taicpu(p).oper[1]^.reg) <> RS_STACK_POINTER_REG) and
  442. MatchOperand(taicpu(next_hp).oper[0]^, taicpu(p).oper[0]^.reg) then
  443. begin
  444. DebugMsg('Peephole Optimization: ' + std_regname(taicpu(p).oper[0]^.reg) + ' = ' + std_regname(taicpu(p).oper[1]^.reg) + ' (MovLdr2Ldr 2)', next_hp);
  445. taicpu(next_hp).oper[0]^.reg := taicpu(p).oper[1]^.reg;
  446. LDRChange := True;
  447. end;
  448. if LDRChange and (getsupreg(taicpu(p).oper[1]^.reg) <> RS_STACK_POINTER_REG) then
  449. begin
  450. AllocRegBetween(taicpu(p).oper[1]^.reg, p, next_hp, UsedRegs);
  451. if (taicpu(p).oppostfix = PF_None) and
  452. (
  453. (
  454. (taicpu(next_hp).opcode = A_LDR) and
  455. MatchOperand(taicpu(next_hp).oper[0]^, taicpu(p).oper[0]^.reg)
  456. ) or
  457. not RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, next_hp, TmpUsedRegs)
  458. ) and
  459. { Double-check to see if the old registers were actually
  460. changed (e.g. if the super registers matched, but not
  461. the sizes, they won't be changed). }
  462. (
  463. (taicpu(next_hp).opcode = A_LDR) or
  464. not RegInOp(taicpu(p).oper[0]^.reg, taicpu(next_hp).oper[0]^)
  465. ) and
  466. not RegInRef(taicpu(p).oper[0]^.reg, taicpu(next_hp).oper[1]^.ref^) then
  467. begin
  468. DebugMsg('Peephole Optimization: RedundantMovProcess 2a done', p);
  469. RemoveCurrentP(p);
  470. Result := True;
  471. Exit;
  472. end;
  473. end;
  474. end
  475. else if (taicpu(next_hp).opcode = A_MOV) and (taicpu(next_hp).oppostfix = PF_None) and
  476. (taicpu(next_hp).ops = 2) then
  477. begin
  478. if MatchOperand(taicpu(next_hp).oper[0]^, taicpu(p).oper[0]^.reg) then
  479. begin
  480. { mov r0,r1; mov r1,r1 - remove second MOV here so
  481. so "RedundantMovProcess 2b" doesn't get erroneously
  482. applied }
  483. if MatchOperand(taicpu(next_hp).oper[0]^, taicpu(next_hp).oper[1]^.reg) then
  484. begin
  485. DebugMsg(SPeepholeOptimization + 'Mov2None 2a done', next_hp);
  486. if (next_hp = hp1) then
  487. { Don't let hp1 become a dangling pointer }
  488. hp1 := nil;
  489. asml.Remove(next_hp);
  490. next_hp.Free;
  491. Continue;
  492. end;
  493. { Found another mov that writes entirely to the register }
  494. if RegUsedBetween(taicpu(p).oper[0]^.reg, p, next_hp) then
  495. begin
  496. { Register was used beforehand }
  497. if MatchOperand(taicpu(next_hp).oper[1]^, taicpu(p).oper[1]^.reg) then
  498. begin
  499. { This MOV is exactly the same as the first one.
  500. Since none of the registers have changed value
  501. at this point, we can remove it. }
  502. DebugMsg(SPeepholeOptimization + 'RedundantMovProcess 3a done', next_hp);
  503. if (next_hp = hp1) then
  504. { Don't let hp1 become a dangling pointer }
  505. hp1 := nil;
  506. asml.Remove(next_hp);
  507. next_hp.Free;
  508. { We still have the original p, so we can continue optimising;
  509. if it was -O2 or below, this instruction appeared immediately
  510. after the first MOV, so we're technically not looking more
  511. than one instruction ahead after it's removed! [Kit] }
  512. Continue;
  513. end
  514. else
  515. { Register changes value - drop out }
  516. Break;
  517. end;
  518. { We can delete the first MOV (only if the second MOV is unconditional) }
  519. {$ifdef ARM}
  520. if (taicpu(p).oppostfix = PF_None) and
  521. (taicpu(next_hp).condition = C_None) then
  522. {$endif ARM}
  523. begin
  524. DebugMsg('Peephole Optimization: RedundantMovProcess 2b done', p);
  525. RemoveCurrentP(p);
  526. Result := True;
  527. end;
  528. Exit;
  529. end
  530. else if MatchOperand(taicpu(next_hp).oper[1]^, taicpu(p).oper[0]^.reg) then
  531. begin
  532. if MatchOperand(taicpu(next_hp).oper[0]^, taicpu(p).oper[1]^.reg)
  533. { Be careful - if the entire register is not used, removing this
  534. instruction will leave the unused part uninitialised }
  535. {$ifdef AARCH64}
  536. and (getsubreg(taicpu(p).oper[1]^.reg) = R_SUBQ)
  537. {$endif AARCH64}
  538. then
  539. begin
  540. { Instruction will become mov r1,r1 }
  541. DebugMsg(SPeepholeOptimization + 'Mov2None 2 done', next_hp);
  542. { Allocate r1 between the instructions; not doing
  543. so may cause problems when removing superfluous
  544. MOVs later (i38055) }
  545. AllocRegBetween(taicpu(p).oper[1]^.reg, p, next_hp, UsedRegs);
  546. if (next_hp = hp1) then
  547. { Don't let hp1 become a dangling pointer }
  548. hp1 := nil;
  549. asml.Remove(next_hp);
  550. next_hp.Free;
  551. Continue;
  552. end;
  553. { Change the old register (checking the first operand again
  554. forces it to be left alone if the full register is not
  555. used, lest mov w1,w1 gets optimised out by mistake. [Kit] }
  556. {$ifdef AARCH64}
  557. if not MatchOperand(taicpu(next_hp).oper[0]^, taicpu(p).oper[1]^.reg) then
  558. {$endif AARCH64}
  559. begin
  560. DebugMsg(SPeepholeOptimization + std_regname(taicpu(p).oper[0]^.reg) + ' = ' + std_regname(taicpu(p).oper[1]^.reg) + ' (MovMov2Mov 2)', next_hp);
  561. taicpu(next_hp).oper[1]^.reg := taicpu(p).oper[1]^.reg;
  562. AllocRegBetween(taicpu(p).oper[1]^.reg, p, next_hp, UsedRegs);
  563. { If this was the only reference to the old register,
  564. then we can remove the original MOV now }
  565. if (taicpu(p).oppostfix = PF_None) and
  566. { A bit of a hack - sometimes registers aren't tracked properly, so do not
  567. remove if the register was apparently not allocated when its value is
  568. first set at the MOV command (this is especially true for the stack
  569. register). [Kit] }
  570. (getsupreg(taicpu(p).oper[1]^.reg) <> RS_STACK_POINTER_REG) and
  571. RegInUsedRegs(taicpu(p).oper[0]^.reg, UsedRegs) and
  572. not RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, next_hp, TmpUsedRegs) then
  573. begin
  574. DebugMsg(SPeepholeOptimization + 'RedundantMovProcess 2c done', p);
  575. RemoveCurrentP(p);
  576. Result := True;
  577. Exit;
  578. end;
  579. end;
  580. end;
  581. end;
  582. { On low optimisation settions, don't search more than one instruction ahead }
  583. if not(cs_opt_level3 in current_settings.optimizerswitches) or
  584. { Stop at procedure calls and jumps }
  585. is_calljmp(taicpu(next_hp).opcode) or
  586. { If the read register has changed value, or the MOV
  587. destination register has been used, drop out }
  588. RegInInstruction(taicpu(p).oper[0]^.reg, next_hp) or
  589. RegModifiedByInstruction(taicpu(p).oper[1]^.reg, next_hp) then
  590. Break;
  591. current_hp := next_hp;
  592. end;
  593. end;
  594. end;
  595. end;
  596. function TARMAsmOptimizer.OptPass1UXTB(var p : tai) : Boolean;
  597. var
  598. hp1, hp2: tai;
  599. so: tshifterop;
  600. begin
  601. Result:=false;
  602. if GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) then
  603. begin
  604. {
  605. change
  606. uxtb reg2,reg1
  607. strb reg2,[...]
  608. dealloc reg2
  609. to
  610. strb reg1,[...]
  611. }
  612. if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
  613. (taicpu(p).ops=2) and
  614. MatchInstruction(hp1, A_STR, [C_None], [PF_B]) and
  615. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
  616. { the reference in strb might not use reg2 }
  617. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  618. { reg1 might not be modified inbetween }
  619. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  620. begin
  621. DebugMsg('Peephole UxtbStrb2Strb done', p);
  622. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  623. result:=RemoveCurrentP(p);
  624. end
  625. {
  626. change
  627. uxtb reg2,reg1
  628. uxth reg3,reg2
  629. dealloc reg2
  630. to
  631. uxtb reg3,reg1
  632. }
  633. else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
  634. (taicpu(p).ops=2) and
  635. MatchInstruction(hp1, A_UXTH, [C_None], [PF_None]) and
  636. (taicpu(hp1).ops = 2) and
  637. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  638. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  639. { reg1 might not be modified inbetween }
  640. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  641. begin
  642. DebugMsg('Peephole UxtbUxth2Uxtb done', p);
  643. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  644. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  645. asml.remove(hp1);
  646. hp1.free;
  647. result:=true;
  648. end
  649. {
  650. change
  651. uxtb reg2,reg1
  652. uxtb reg3,reg2
  653. dealloc reg2
  654. to
  655. uxtb reg3,reg1
  656. }
  657. else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
  658. (taicpu(p).ops=2) and
  659. MatchInstruction(hp1, A_UXTB, [C_None], [PF_None]) and
  660. (taicpu(hp1).ops = 2) and
  661. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  662. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  663. { reg1 might not be modified inbetween }
  664. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  665. begin
  666. DebugMsg('Peephole UxtbUxtb2Uxtb done', p);
  667. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  668. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  669. asml.remove(hp1);
  670. hp1.free;
  671. result:=true;
  672. end
  673. {
  674. change
  675. uxtb reg2,reg1
  676. and reg3,reg2,#0x*FF
  677. dealloc reg2
  678. to
  679. uxtb reg3,reg1
  680. }
  681. else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
  682. (taicpu(p).ops=2) and
  683. MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
  684. (taicpu(hp1).ops=3) and
  685. (taicpu(hp1).oper[2]^.typ=top_const) and
  686. ((taicpu(hp1).oper[2]^.val and $FF)=$FF) and
  687. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  688. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  689. { reg1 might not be modified inbetween }
  690. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  691. begin
  692. DebugMsg('Peephole UxtbAndImm2Uxtb done', p);
  693. taicpu(hp1).opcode:=A_UXTB;
  694. taicpu(hp1).ops:=2;
  695. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  696. result:=RemoveCurrentP(p);
  697. end
  698. {$ifdef AARCH64}
  699. else if USxtOp2Op(p,hp1,SM_UXTB) then
  700. Result:=true
  701. {$endif AARCH64}
  702. else if RemoveSuperfluousMove(p, hp1, 'UxtbMov2Uxtb') then
  703. Result:=true;
  704. end;
  705. end;
  706. function TARMAsmOptimizer.OptPass1UXTH(var p : tai) : Boolean;
  707. var
  708. hp1: tai;
  709. so: tshifterop;
  710. begin
  711. Result:=false;
  712. if GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) then
  713. begin
  714. {
  715. change
  716. uxth reg2,reg1
  717. strh reg2,[...]
  718. dealloc reg2
  719. to
  720. strh reg1,[...]
  721. }
  722. if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
  723. (taicpu(p).ops=2) and
  724. MatchInstruction(hp1, A_STR, [C_None], [PF_H]) and
  725. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  726. { the reference in strb might not use reg2 }
  727. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  728. { reg1 might not be modified inbetween }
  729. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  730. begin
  731. DebugMsg('Peephole UXTHStrh2Strh done', p);
  732. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  733. result:=RemoveCurrentP(p);
  734. end
  735. {
  736. change
  737. uxth reg2,reg1
  738. uxth reg3,reg2
  739. dealloc reg2
  740. to
  741. uxth reg3,reg1
  742. }
  743. else if MatchInstruction(p, A_UXTH, [C_None], [PF_None]) and
  744. (taicpu(p).ops=2) and
  745. MatchInstruction(hp1, A_UXTH, [C_None], [PF_None]) and
  746. (taicpu(hp1).ops=2) and
  747. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  748. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  749. { reg1 might not be modified inbetween }
  750. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  751. begin
  752. DebugMsg('Peephole UxthUxth2Uxth done', p);
  753. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  754. taicpu(hp1).opcode:=A_UXTH;
  755. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  756. result:=RemoveCurrentP(p);
  757. end
  758. {
  759. change
  760. uxth reg2,reg1
  761. and reg3,reg2,#65535
  762. dealloc reg2
  763. to
  764. uxth reg3,reg1
  765. }
  766. else if MatchInstruction(p, A_UXTH, [C_None], [PF_None]) and
  767. (taicpu(p).ops=2) and
  768. MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
  769. (taicpu(hp1).ops=3) and
  770. (taicpu(hp1).oper[2]^.typ=top_const) and
  771. ((taicpu(hp1).oper[2]^.val and $FFFF)=$FFFF) and
  772. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  773. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  774. { reg1 might not be modified inbetween }
  775. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  776. begin
  777. DebugMsg('Peephole UxthAndImm2Uxth done', p);
  778. taicpu(hp1).opcode:=A_UXTH;
  779. taicpu(hp1).ops:=2;
  780. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  781. result:=RemoveCurrentP(p);
  782. end
  783. {$ifdef AARCH64}
  784. else if USxtOp2Op(p,hp1,SM_UXTH) then
  785. Result:=true
  786. {$endif AARCH64}
  787. else if RemoveSuperfluousMove(p, hp1, 'UxthMov2Data') then
  788. Result:=true;
  789. end;
  790. end;
  791. function TARMAsmOptimizer.OptPass1SXTB(var p : tai) : Boolean;
  792. var
  793. hp1, hp2: tai;
  794. so: tshifterop;
  795. begin
  796. Result:=false;
  797. if GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) then
  798. begin
  799. {
  800. change
  801. sxtb reg2,reg1
  802. strb reg2,[...]
  803. dealloc reg2
  804. to
  805. strb reg1,[...]
  806. }
  807. if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
  808. (taicpu(p).ops=2) and
  809. MatchInstruction(hp1, A_STR, [C_None], [PF_B]) and
  810. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
  811. { the reference in strb might not use reg2 }
  812. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  813. { reg1 might not be modified inbetween }
  814. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  815. begin
  816. DebugMsg('Peephole SxtbStrb2Strb done', p);
  817. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  818. result:=RemoveCurrentP(p);
  819. end
  820. {
  821. change
  822. sxtb reg2,reg1
  823. sxth reg3,reg2
  824. dealloc reg2
  825. to
  826. sxtb reg3,reg1
  827. }
  828. else if MatchInstruction(p, A_SXTB, [C_None], [PF_None]) and
  829. (taicpu(p).ops=2) and
  830. MatchInstruction(hp1, A_SXTH, [C_None], [PF_None]) and
  831. (taicpu(hp1).ops = 2) and
  832. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  833. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  834. { reg1 might not be modified inbetween }
  835. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  836. begin
  837. DebugMsg('Peephole SxtbSxth2Sxtb done', p);
  838. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  839. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  840. asml.remove(hp1);
  841. hp1.free;
  842. result:=true;
  843. end
  844. {
  845. change
  846. sxtb reg2,reg1
  847. sxtb reg3,reg2
  848. dealloc reg2
  849. to
  850. uxtb reg3,reg1
  851. }
  852. else if MatchInstruction(p, A_SXTB, [C_None], [PF_None]) and
  853. (taicpu(p).ops=2) and
  854. MatchInstruction(hp1, A_SXTB, [C_None], [PF_None]) and
  855. (taicpu(hp1).ops = 2) and
  856. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  857. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  858. { reg1 might not be modified inbetween }
  859. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  860. begin
  861. DebugMsg('Peephole SxtbSxtb2Sxtb done', p);
  862. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  863. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  864. asml.remove(hp1);
  865. hp1.free;
  866. result:=true;
  867. end
  868. {
  869. change
  870. sxtb reg2,reg1
  871. and reg3,reg2,#0x*FF
  872. dealloc reg2
  873. to
  874. uxtb reg3,reg1
  875. }
  876. else if MatchInstruction(p, A_SXTB, [C_None], [PF_None]) and
  877. (taicpu(p).ops=2) and
  878. MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
  879. (taicpu(hp1).ops=3) and
  880. (taicpu(hp1).oper[2]^.typ=top_const) and
  881. ((taicpu(hp1).oper[2]^.val and $FF)=$FF) and
  882. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  883. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  884. { reg1 might not be modified inbetween }
  885. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  886. begin
  887. DebugMsg('Peephole SxtbAndImm2Uxtb done', p);
  888. taicpu(hp1).opcode:=A_UXTB;
  889. taicpu(hp1).ops:=2;
  890. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  891. result:=RemoveCurrentP(p);
  892. end
  893. {$ifdef AARCH64}
  894. else if USxtOp2Op(p,hp1,SM_SXTB) then
  895. Result:=true
  896. {$endif AARCH64}
  897. else if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  898. RemoveSuperfluousMove(p, hp1, 'SxtbMov2Sxtb') then
  899. Result:=true;
  900. end;
  901. end;
  902. function TARMAsmOptimizer.OptPass1SXTH(var p : tai) : Boolean;
  903. var
  904. hp1: tai;
  905. so: tshifterop;
  906. begin
  907. Result:=false;
  908. if GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) then
  909. begin
  910. {
  911. change
  912. sxth reg2,reg1
  913. strh reg2,[...]
  914. dealloc reg2
  915. to
  916. strh reg1,[...]
  917. }
  918. if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
  919. (taicpu(p).ops=2) and
  920. MatchInstruction(hp1, A_STR, [C_None], [PF_H]) and
  921. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  922. { the reference in strb might not use reg2 }
  923. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  924. { reg1 might not be modified inbetween }
  925. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  926. begin
  927. DebugMsg('Peephole SxthStrh2Strh done', p);
  928. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  929. result:=RemoveCurrentP(p);
  930. end
  931. {
  932. change
  933. sxth reg2,reg1
  934. sxth reg3,reg2
  935. dealloc reg2
  936. to
  937. sxth reg3,reg1
  938. }
  939. else if MatchInstruction(p, A_SXTH, [C_None], [PF_None]) and
  940. (taicpu(p).ops=2) and
  941. MatchInstruction(hp1, A_SXTH, [C_None], [PF_None]) and
  942. (taicpu(hp1).ops=2) and
  943. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  944. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  945. { reg1 might not be modified inbetween }
  946. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  947. begin
  948. DebugMsg('Peephole SxthSxth2Sxth done', p);
  949. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  950. taicpu(hp1).opcode:=A_SXTH;
  951. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  952. result:=RemoveCurrentP(p);
  953. end
  954. {$ifdef AARCH64}
  955. {
  956. change
  957. sxth reg2,reg1
  958. sxtw reg3,reg2
  959. dealloc reg2
  960. to
  961. sxth reg3,reg1
  962. }
  963. else if MatchInstruction(p, A_SXTH, [C_None], [PF_None]) and
  964. (taicpu(p).ops=2) and
  965. MatchInstruction(hp1, A_SXTW, [C_None], [PF_None]) and
  966. (taicpu(hp1).ops=2) and
  967. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  968. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  969. { reg1 might not be modified inbetween }
  970. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  971. begin
  972. DebugMsg('Peephole SxthSxtw2Sxth done', p);
  973. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  974. taicpu(hp1).opcode:=A_SXTH;
  975. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  976. result:=RemoveCurrentP(p);
  977. end
  978. {$endif AARCH64}
  979. {
  980. change
  981. sxth reg2,reg1
  982. and reg3,reg2,#65535
  983. dealloc reg2
  984. to
  985. uxth reg3,reg1
  986. }
  987. else if MatchInstruction(p, A_SXTH, [C_None], [PF_None]) and
  988. (taicpu(p).ops=2) and
  989. MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
  990. (taicpu(hp1).ops=3) and
  991. (taicpu(hp1).oper[2]^.typ=top_const) and
  992. ((taicpu(hp1).oper[2]^.val and $FFFF)=$FFFF) and
  993. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  994. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  995. { reg1 might not be modified inbetween }
  996. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  997. begin
  998. DebugMsg('Peephole SxthAndImm2Uxth done', p);
  999. taicpu(hp1).opcode:=A_UXTH;
  1000. taicpu(hp1).ops:=2;
  1001. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  1002. result:=RemoveCurrentP(p);
  1003. end
  1004. {$ifdef AARCH64}
  1005. else if USxtOp2Op(p,hp1,SM_SXTH) then
  1006. Result:=true
  1007. {$endif AARCH64}
  1008. else if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  1009. RemoveSuperfluousMove(p, hp1, 'SxthMov2Sxth') then
  1010. Result:=true;
  1011. end;
  1012. end;
  1013. function TARMAsmOptimizer.OptPreSBFXUBFX(var p: tai): Boolean;
  1014. begin
  1015. Result := False;
  1016. { Convert:
  1017. s/ubfx reg1,reg2,#0,#64 (or #32 for 32-bit registers)
  1018. To:
  1019. mov reg1,reg2
  1020. }
  1021. if (taicpu(p).oper[2]^.val = 0) and
  1022. {$ifdef AARCH64}
  1023. (
  1024. (
  1025. (getsubreg(taicpu(p).oper[0]^.reg) = R_SUBQ) and
  1026. (taicpu(p).oper[3]^.val = 64)
  1027. ) or
  1028. (
  1029. (getsubreg(taicpu(p).oper[0]^.reg) = R_SUBD) and
  1030. (taicpu(p).oper[3]^.val = 32)
  1031. )
  1032. )
  1033. {$else AARCH64}
  1034. (taicpu(p).oper[3]^.val = 32)
  1035. {$endif AARCH64}
  1036. then
  1037. begin
  1038. DebugMsg(SPeepholeOptimization + 'SBFX or UBFX -> MOV (full bitfield extract)', p);
  1039. taicpu(p).opcode := A_MOV;
  1040. taicpu(p).ops := 2;
  1041. taicpu(p).clearop(2);
  1042. taicpu(p).clearop(3);
  1043. Result := True;
  1044. Exit;
  1045. end;
  1046. end;
  1047. function TARMAsmOptimizer.OptPass1LDR(var p : tai) : Boolean;
  1048. var
  1049. hp1: tai;
  1050. Reference: TReference;
  1051. NewOp: TAsmOp;
  1052. begin
  1053. Result := False;
  1054. if (taicpu(p).ops <> 2) or (taicpu(p).condition <> C_None) then
  1055. Exit;
  1056. Reference := taicpu(p).oper[1]^.ref^;
  1057. if (Reference.addressmode = AM_OFFSET) and
  1058. not RegInRef(taicpu(p).oper[0]^.reg, Reference) and
  1059. { Delay calling GetNextInstruction for as long as possible }
  1060. GetNextInstruction(p, hp1) and
  1061. (hp1.typ = ait_instruction) and
  1062. (taicpu(hp1).condition = C_None) and
  1063. (taicpu(hp1).oppostfix = taicpu(p).oppostfix) then
  1064. begin
  1065. if (taicpu(hp1).opcode = A_STR) and
  1066. RefsEqual(taicpu(hp1).oper[1]^.ref^, Reference) and
  1067. (getregtype(taicpu(p).oper[0]^.reg) = getregtype(taicpu(hp1).oper[0]^.reg)) then
  1068. begin
  1069. { With:
  1070. ldr reg1,[ref]
  1071. str reg2,[ref]
  1072. If reg1 = reg2, Remove str
  1073. }
  1074. if taicpu(p).oper[0]^.reg = taicpu(hp1).oper[0]^.reg then
  1075. begin
  1076. DebugMsg(SPeepholeOptimization + 'Removed redundant store instruction (load/store -> load/nop)', hp1);
  1077. RemoveInstruction(hp1);
  1078. Result := True;
  1079. Exit;
  1080. end;
  1081. end
  1082. else if (taicpu(hp1).opcode = A_LDR) and
  1083. RefsEqual(taicpu(hp1).oper[1]^.ref^, Reference) then
  1084. begin
  1085. { With:
  1086. ldr reg1,[ref]
  1087. ldr reg2,[ref]
  1088. If reg1 = reg2, delete the second ldr
  1089. If reg1 <> reg2, changing the 2nd ldr to a mov might introduce
  1090. a dependency, but it will likely open up new optimisations, so
  1091. do it for now and handle any new dependencies later.
  1092. }
  1093. if taicpu(p).oper[0]^.reg = taicpu(hp1).oper[0]^.reg then
  1094. begin
  1095. DebugMsg(SPeepholeOptimization + 'Removed duplicate load instruction (load/load -> load/nop)', hp1);
  1096. RemoveInstruction(hp1);
  1097. Result := True;
  1098. Exit;
  1099. end
  1100. else if
  1101. (getregtype(taicpu(p).oper[0]^.reg) = R_INTREGISTER) and
  1102. (getregtype(taicpu(hp1).oper[0]^.reg) = R_INTREGISTER) and
  1103. (getsubreg(taicpu(p).oper[0]^.reg) = getsubreg(taicpu(hp1).oper[0]^.reg)) then
  1104. begin
  1105. DebugMsg(SPeepholeOptimization + 'Changed second ldr' + oppostfix2str[taicpu(hp1).oppostfix] + ' to mov (load/load -> load/move)', hp1);
  1106. taicpu(hp1).opcode := A_MOV;
  1107. taicpu(hp1).oppostfix := PF_None;
  1108. taicpu(hp1).loadreg(1, taicpu(p).oper[0]^.reg);
  1109. AllocRegBetween(taicpu(p).oper[0]^.reg, p, hp1, UsedRegs);
  1110. Result := True;
  1111. Exit;
  1112. end;
  1113. end;
  1114. end;
  1115. end;
  1116. function TARMAsmOptimizer.OptPass1STR(var p : tai) : Boolean;
  1117. var
  1118. hp1: tai;
  1119. Reference: TReference;
  1120. SizeMismatch: Boolean;
  1121. SrcReg, DstReg: TRegister;
  1122. NewOp: TAsmOp;
  1123. begin
  1124. Result := False;
  1125. if (taicpu(p).ops <> 2) or (taicpu(p).condition <> C_None) then
  1126. Exit;
  1127. Reference := taicpu(p).oper[1]^.ref^;
  1128. if (Reference.addressmode = AM_OFFSET) and
  1129. not RegInRef(taicpu(p).oper[0]^.reg, Reference) and
  1130. { Delay calling GetNextInstruction for as long as possible }
  1131. GetNextInstruction(p, hp1) and
  1132. (hp1.typ = ait_instruction) and
  1133. (taicpu(hp1).condition = C_None) and
  1134. (taicpu(hp1).oppostfix = taicpu(p).oppostfix) and
  1135. (taicpu(hp1).ops>0) and (taicpu(hp1).oper[0]^.typ=top_reg) then
  1136. begin
  1137. { Saves constant dereferencing and makes it easier to change the size if necessary }
  1138. SrcReg := taicpu(p).oper[0]^.reg;
  1139. DstReg := taicpu(hp1).oper[0]^.reg;
  1140. if (taicpu(hp1).opcode = A_LDR) and
  1141. RefsEqual(taicpu(hp1).oper[1]^.ref^, Reference) and
  1142. (taicpu(hp1).oper[1]^.ref^.volatility=[]) and
  1143. (
  1144. (taicpu(hp1).oppostfix = taicpu(p).oppostfix) or
  1145. ((taicpu(p).oppostfix = PF_B) and (taicpu(hp1).oppostfix = PF_SB)) or
  1146. ((taicpu(p).oppostfix = PF_H) and (taicpu(hp1).oppostfix = PF_SH))
  1147. {$ifdef AARCH64}
  1148. or ((taicpu(p).oppostfix = PF_W) and (taicpu(hp1).oppostfix = PF_SW))
  1149. {$endif AARCH64}
  1150. ) then
  1151. begin
  1152. { With:
  1153. str reg1,[ref]
  1154. ldr reg2,[ref]
  1155. If reg1 = reg2, Remove ldr.
  1156. If reg1 <> reg2, replace ldr with "mov reg2,reg1"
  1157. }
  1158. if (SrcReg = DstReg) and
  1159. { e.g. the ldrb in strb/ldrb is not a null operation as it clears the upper 24 bits }
  1160. (taicpu(p).oppostfix=PF_None) then
  1161. begin
  1162. DebugMsg(SPeepholeOptimization + 'Removed redundant load instruction (store/load -> store/nop)', hp1);
  1163. RemoveInstruction(hp1);
  1164. Result := True;
  1165. Exit;
  1166. end
  1167. else if (getregtype(SrcReg) = R_INTREGISTER) and
  1168. (getregtype(DstReg) = R_INTREGISTER) and
  1169. (getsubreg(SrcReg) = getsubreg(DstReg)) then
  1170. begin
  1171. NewOp:=A_NONE;
  1172. if taicpu(hp1).oppostfix=PF_None then
  1173. NewOp:=A_MOV
  1174. else
  1175. {$ifdef ARM}
  1176. if (current_settings.cputype < cpu_armv6) then
  1177. begin
  1178. { The zero- and sign-extension operations were only
  1179. introduced under ARMv6 }
  1180. case taicpu(hp1).oppostfix of
  1181. PF_B:
  1182. begin
  1183. { The if-block afterwards will set the middle operand to the correct register }
  1184. taicpu(hp1).allocate_oper(3);
  1185. taicpu(hp1).ops := 3;
  1186. taicpu(hp1).loadconst(2, $FF);
  1187. NewOp := A_AND;
  1188. end;
  1189. PF_H:
  1190. { ARMv5 and under doesn't have a concise way of storing the immediate $FFFF, so leave alone };
  1191. PF_SB,
  1192. PF_SH:
  1193. { Do nothing - can't easily encode sign-extensions };
  1194. else
  1195. InternalError(2021043002);
  1196. end;
  1197. end
  1198. else
  1199. {$endif ARM}
  1200. case taicpu(hp1).oppostfix of
  1201. PF_B:
  1202. NewOp := A_UXTB;
  1203. PF_SB:
  1204. NewOp := A_SXTB;
  1205. PF_H:
  1206. NewOp := A_UXTH;
  1207. PF_SH:
  1208. NewOp := A_SXTH;
  1209. {$ifdef AARCH64}
  1210. PF_SW:
  1211. NewOp := A_SXTW;
  1212. PF_W:
  1213. NewOp := A_MOV;
  1214. {$endif AARCH64}
  1215. else
  1216. InternalError(2021043001);
  1217. end;
  1218. if (NewOp<>A_None) then
  1219. begin
  1220. DebugMsg(SPeepholeOptimization + 'Changed ldr' + oppostfix2str[taicpu(hp1).oppostfix] + ' to ' + gas_op2str[NewOp] + ' (store/load -> store/move)', hp1);
  1221. taicpu(hp1).oppostfix := PF_None;
  1222. taicpu(hp1).opcode := NewOp;
  1223. taicpu(hp1).loadreg(1, SrcReg);
  1224. AllocRegBetween(SrcReg, p, hp1, UsedRegs);
  1225. Result := True;
  1226. Exit;
  1227. end;
  1228. end
  1229. end
  1230. else if (taicpu(hp1).opcode = A_STR) and
  1231. RefsEqual(taicpu(hp1).oper[1]^.ref^, Reference) then
  1232. begin
  1233. { With:
  1234. str reg1,[ref]
  1235. str reg2,[ref]
  1236. If reg1 <> reg2, delete the first str
  1237. IF reg1 = reg2, delete the second str
  1238. }
  1239. if (SrcReg = DstReg) and (taicpu(hp1).oper[1]^.ref^.volatility=[]) then
  1240. begin
  1241. DebugMsg(SPeepholeOptimization + 'Removed duplicate store instruction (store/store -> store/nop)', hp1);
  1242. RemoveInstruction(hp1);
  1243. Result := True;
  1244. Exit;
  1245. end
  1246. else if
  1247. { Registers same byte size? }
  1248. (tcgsize2size[reg_cgsize(SrcReg)] = tcgsize2size[reg_cgsize(DstReg)]) and
  1249. (taicpu(p).oper[1]^.ref^.volatility=[]) then
  1250. begin
  1251. DebugMsg(SPeepholeOptimization + 'Removed dominated store instruction (store/store -> nop/store)', p);
  1252. RemoveCurrentP(p, hp1);
  1253. Result := True;
  1254. Exit;
  1255. end;
  1256. end;
  1257. end;
  1258. end;
  1259. function TARMAsmOptimizer.OptPass1And(var p : tai) : Boolean;
  1260. var
  1261. hp1, hp2: tai;
  1262. i: longint;
  1263. begin
  1264. Result:=false;
  1265. {
  1266. optimize
  1267. and reg2,reg1,const1
  1268. ...
  1269. }
  1270. if (taicpu(p).ops>2) and
  1271. (taicpu(p).oper[1]^.typ = top_reg) and
  1272. (taicpu(p).oper[2]^.typ = top_const) then
  1273. begin
  1274. {
  1275. change
  1276. and reg2,reg1,const1
  1277. ...
  1278. and reg3,reg2,const2
  1279. to
  1280. and reg3,reg1,(const1 and const2)
  1281. }
  1282. if GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1283. MatchInstruction(hp1, A_AND, [taicpu(p).condition], [PF_None]) and
  1284. RegEndOfLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  1285. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1286. (taicpu(hp1).oper[2]^.typ = top_const)
  1287. {$ifdef AARCH64}
  1288. and ((((getsubreg(taicpu(p).oper[0]^.reg)=R_SUBQ) and is_shifter_const(taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val,OS_64)) or
  1289. ((getsubreg(taicpu(p).oper[0]^.reg)=R_SUBL) and is_shifter_const(taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val,OS_32))
  1290. ) or
  1291. ((taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val)=0))
  1292. {$endif AARCH64}
  1293. then
  1294. begin
  1295. if not(RegUsedBetween(taicpu(hp1).oper[0]^.reg,p,hp1)) then
  1296. begin
  1297. DebugMsg('Peephole AndAnd2And done', p);
  1298. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  1299. if (taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val)=0 then
  1300. begin
  1301. DebugMsg('Peephole AndAnd2Mov0 1 done', p);
  1302. taicpu(p).opcode:=A_MOV;
  1303. taicpu(p).ops:=2;
  1304. taicpu(p).loadConst(1,0);
  1305. taicpu(p).oppostfix:=taicpu(hp1).oppostfix;
  1306. end
  1307. else
  1308. begin
  1309. DebugMsg('Peephole AndAnd2And 1 done', p);
  1310. taicpu(p).loadConst(2,taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val);
  1311. taicpu(p).oppostfix:=taicpu(hp1).oppostfix;
  1312. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  1313. end;
  1314. asml.remove(hp1);
  1315. hp1.free;
  1316. Result:=true;
  1317. exit;
  1318. end
  1319. else if not(RegUsedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1320. begin
  1321. if (taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val)=0 then
  1322. begin
  1323. DebugMsg('Peephole AndAnd2Mov0 2 done', hp1);
  1324. taicpu(hp1).opcode:=A_MOV;
  1325. taicpu(hp1).loadConst(1,0);
  1326. taicpu(hp1).ops:=2;
  1327. taicpu(hp1).oppostfix:=taicpu(p).oppostfix;
  1328. end
  1329. else
  1330. begin
  1331. DebugMsg('Peephole AndAnd2And 2 done', hp1);
  1332. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  1333. taicpu(hp1).loadConst(2,taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val);
  1334. taicpu(hp1).oppostfix:=taicpu(p).oppostfix;
  1335. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  1336. end;
  1337. GetNextInstruction(p, hp1);
  1338. RemoveCurrentP(p);
  1339. p:=hp1;
  1340. Result:=true;
  1341. exit;
  1342. end;
  1343. end
  1344. {
  1345. change
  1346. and reg2,reg1,$xxxxxxFF
  1347. strb reg2,[...]
  1348. dealloc reg2
  1349. to
  1350. strb reg1,[...]
  1351. }
  1352. else if ((taicpu(p).oper[2]^.val and $FF) = $FF) and
  1353. MatchInstruction(p, A_AND, [C_None], [PF_None]) and
  1354. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1355. MatchInstruction(hp1, A_STR, [C_None], [PF_B]) and
  1356. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
  1357. { the reference in strb might not use reg2 }
  1358. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  1359. { reg1 might not be modified inbetween }
  1360. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1361. begin
  1362. DebugMsg('Peephole AndStrb2Strb done', p);
  1363. {$ifdef AARCH64}
  1364. taicpu(hp1).loadReg(0,newreg(R_INTREGISTER,getsupreg(taicpu(p).oper[1]^.reg),R_SUBD));
  1365. {$else AARCH64}
  1366. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  1367. {$endif AARCH64}
  1368. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  1369. RemoveCurrentP(p);
  1370. result:=true;
  1371. exit;
  1372. end
  1373. {
  1374. change
  1375. and reg2,reg1,255
  1376. uxtb/uxth reg3,reg2
  1377. dealloc reg2
  1378. to
  1379. and reg3,reg1,x
  1380. }
  1381. else if MatchInstruction(p, A_AND, [C_None], [PF_None]) and
  1382. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1383. ((((taicpu(p).oper[2]^.val and $ffffff00)=0) and MatchInstruction(hp1, A_UXTB, [C_None], [PF_None])) or
  1384. (((taicpu(p).oper[2]^.val and $ffff0000)=0) and MatchInstruction(hp1, A_UXTH, [C_None], [PF_None]))) and
  1385. (taicpu(hp1).ops = 2) and
  1386. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  1387. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1388. { reg1 might not be modified inbetween }
  1389. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1390. begin
  1391. DebugMsg('Peephole AndUxt2And done', p);
  1392. taicpu(hp1).opcode:=A_AND;
  1393. taicpu(hp1).ops:=3;
  1394. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  1395. taicpu(hp1).loadconst(2,taicpu(p).oper[2]^.val);
  1396. GetNextInstruction(p,hp1);
  1397. asml.remove(p);
  1398. p.Free;
  1399. p:=hp1;
  1400. result:=true;
  1401. exit;
  1402. end
  1403. else if ((taicpu(p).oper[2]^.val and $ffffff80)=0) and
  1404. MatchInstruction(p, A_AND, [C_None], [PF_None]) and
  1405. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1406. MatchInstruction(hp1, [A_SXTB,A_SXTH], [C_None], [PF_None]) and
  1407. (taicpu(hp1).ops = 2) and
  1408. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  1409. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1410. { reg1 might not be modified inbetween }
  1411. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1412. begin
  1413. DebugMsg('Peephole AndSxt2And done', p);
  1414. taicpu(hp1).opcode:=A_AND;
  1415. taicpu(hp1).ops:=3;
  1416. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  1417. setsubreg(taicpu(hp1).oper[1]^.reg,getsubreg(taicpu(hp1).oper[0]^.reg));
  1418. taicpu(hp1).loadconst(2,taicpu(p).oper[2]^.val);
  1419. GetNextInstruction(p,hp1);
  1420. asml.remove(p);
  1421. p.Free;
  1422. p:=hp1;
  1423. result:=true;
  1424. exit;
  1425. end
  1426. {
  1427. from
  1428. and reg1,reg0,2^n-1
  1429. mov reg2,reg1, lsl imm1
  1430. (mov reg3,reg2, lsr/asr imm1)
  1431. remove either the and or the lsl/xsr sequence if possible
  1432. }
  1433. else if (taicpu(p).oper[2]^.val < high(int64)) and
  1434. cutils.ispowerof2(taicpu(p).oper[2]^.val+1,i) and
  1435. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1436. MatchInstruction(hp1, A_MOV, [taicpu(p).condition], [PF_None]) and
  1437. (taicpu(hp1).ops=3) and
  1438. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1439. (taicpu(hp1).oper[2]^.typ = top_shifterop) and
  1440. {$ifdef ARM}
  1441. (taicpu(hp1).oper[2]^.shifterop^.rs = NR_NO) and
  1442. {$endif ARM}
  1443. (taicpu(hp1).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  1444. RegEndOfLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) then
  1445. begin
  1446. {
  1447. and reg1,reg0,2^n-1
  1448. mov reg2,reg1, lsl imm1
  1449. mov reg3,reg2, lsr/asr imm1
  1450. =>
  1451. and reg1,reg0,2^n-1
  1452. if lsr and 2^n-1>=imm1 or asr and 2^n-1>imm1
  1453. }
  1454. if GetNextInstructionUsingReg(hp1,hp2,taicpu(p).oper[0]^.reg) and
  1455. MatchInstruction(hp2, A_MOV, [taicpu(p).condition], [PF_None]) and
  1456. (taicpu(hp2).ops=3) and
  1457. MatchOperand(taicpu(hp2).oper[1]^, taicpu(hp1).oper[0]^.reg) and
  1458. (taicpu(hp2).oper[2]^.typ = top_shifterop) and
  1459. {$ifdef ARM}
  1460. (taicpu(hp2).oper[2]^.shifterop^.rs = NR_NO) and
  1461. {$endif ARM}
  1462. (taicpu(hp2).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and
  1463. (taicpu(hp1).oper[2]^.shifterop^.shiftimm=taicpu(hp2).oper[2]^.shifterop^.shiftimm) and
  1464. RegEndOfLife(taicpu(hp1).oper[0]^.reg,taicpu(hp2)) and
  1465. ((i<32-taicpu(hp1).oper[2]^.shifterop^.shiftimm) or
  1466. ((i=32-taicpu(hp1).oper[2]^.shifterop^.shiftimm) and
  1467. (taicpu(hp2).oper[2]^.shifterop^.shiftmode=SM_LSR))) then
  1468. begin
  1469. DebugMsg('Peephole AndLslXsr2And done', p);
  1470. taicpu(p).oper[0]^.reg:=taicpu(hp2).oper[0]^.reg;
  1471. asml.Remove(hp1);
  1472. asml.Remove(hp2);
  1473. hp1.free;
  1474. hp2.free;
  1475. result:=true;
  1476. exit;
  1477. end
  1478. {
  1479. and reg1,reg0,2^n-1
  1480. mov reg2,reg1, lsl imm1
  1481. =>
  1482. mov reg2,reg0, lsl imm1
  1483. if imm1>i
  1484. }
  1485. else if (i>32-taicpu(hp1).oper[2]^.shifterop^.shiftimm) and
  1486. not(RegModifiedBetween(taicpu(p).oper[1]^.reg, p, hp1)) then
  1487. begin
  1488. DebugMsg('Peephole AndLsl2Lsl done', p);
  1489. taicpu(hp1).oper[1]^.reg:=taicpu(p).oper[1]^.reg;
  1490. GetNextInstruction(p, hp1);
  1491. asml.Remove(p);
  1492. p.free;
  1493. p:=hp1;
  1494. result:=true;
  1495. exit;
  1496. end
  1497. end;
  1498. end;
  1499. {
  1500. change
  1501. and reg1, ...
  1502. mov reg2, reg1
  1503. to
  1504. and reg2, ...
  1505. }
  1506. if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  1507. (taicpu(p).ops>=3) and
  1508. RemoveSuperfluousMove(p, hp1, 'DataMov2Data') then
  1509. Result:=true;
  1510. end;
  1511. function TARMAsmOptimizer.OptPass2Bitwise(var p: tai): Boolean;
  1512. var
  1513. hp1, hp2: tai;
  1514. WorkingReg: TRegister;
  1515. begin
  1516. Result := False;
  1517. {
  1518. change
  1519. and/bic reg1, ...
  1520. ...
  1521. cmp reg1, #0
  1522. b<ne/eq> @Lbl
  1523. to
  1524. ands/bics reg1, ...
  1525. Also:
  1526. and/bic reg1, ...
  1527. ...
  1528. cmp reg1, #0
  1529. (reg1 end of life)
  1530. b<ne/eq> @Lbl
  1531. to
  1532. tst reg1, ...
  1533. or
  1534. bics xzr, reg1, ... under AArch64
  1535. For ARM, also include OR, EOR and ORN
  1536. }
  1537. if (taicpu(p).condition = C_None) and
  1538. (taicpu(p).ops>=3) and
  1539. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  1540. MatchInstruction(hp1, A_CMP, [C_None], [PF_None]) and
  1541. MatchOperand(taicpu(hp1).oper[1]^, 0) and
  1542. {$ifdef AARCH64}
  1543. (SuperRegistersEqual(taicpu(hp1).oper[0]^.reg, taicpu(p).oper[0]^.reg)) and
  1544. (
  1545. (getsubreg(taicpu(hp1).oper[0]^.reg) = getsubreg(taicpu(p).oper[0]^.reg))
  1546. or
  1547. (
  1548. (taicpu(p).oper[2]^.typ = top_const) and
  1549. (taicpu(p).oper[2]^.val >= 0) and
  1550. (taicpu(p).oper[2]^.val <= $FFFFFFFF)
  1551. )
  1552. ) and
  1553. {$else AARCH64}
  1554. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[0]^.reg) and
  1555. {$endif AARCH64}
  1556. not RegModifiedBetween(NR_DEFAULTFLAGS, p, hp1) and
  1557. GetNextInstruction(hp1, hp2) then
  1558. begin
  1559. if MatchInstruction(hp2, [A_B, A_CMP, A_CMN, A_TST{$ifndef AARCH64}, A_TEQ{$endif not AARCH64}], [C_EQ, C_NE], [PF_None]) then
  1560. begin
  1561. AllocRegBetween(NR_DEFAULTFLAGS, p, hp1, UsedRegs);
  1562. WorkingReg := taicpu(p).oper[0]^.reg;
  1563. if
  1564. {$ifndef AARCH64}
  1565. (taicpu(p).opcode = A_AND) and
  1566. {$endif AARCH64}
  1567. RegEndOfLife(WorkingReg, taicpu(hp1)) then
  1568. begin
  1569. {$ifdef AARCH64}
  1570. if (taicpu(p).opcode <> A_AND) then
  1571. begin
  1572. setsupreg(taicpu(p).oper[0]^.reg, RS_XZR);
  1573. taicpu(p).oppostfix := PF_S;
  1574. DebugMsg(SPeepholeOptimization + 'BIC; CMP -> BICS ' + gas_regname(taicpu(p).oper[0]^.reg), p);
  1575. end
  1576. else
  1577. {$endif AARCH64}
  1578. begin
  1579. taicpu(p).opcode := A_TST;
  1580. taicpu(p).oppostfix := PF_None;
  1581. taicpu(p).loadreg(0, taicpu(p).oper[1]^.reg);
  1582. taicpu(p).loadoper(1, taicpu(p).oper[2]^);
  1583. if (taicpu(p).ops = 4) then
  1584. begin
  1585. { Make sure any shifter operator is also transferred }
  1586. taicpu(p).loadshifterop(2, taicpu(p).oper[3]^.shifterop^);
  1587. taicpu(p).ops := 3;
  1588. end
  1589. else
  1590. taicpu(p).ops := 2;
  1591. DebugMsg(SPeepholeOptimization + 'AND; CMP -> TST', p);
  1592. end;
  1593. end
  1594. else
  1595. begin
  1596. taicpu(p).oppostfix := PF_S;
  1597. {$ifdef AARCH64}
  1598. DebugMsg(SPeepholeOptimization + 'AND/BIC; CMP -> ANDS/BICS', p);
  1599. {$else AARCH64}
  1600. DebugMsg(SPeepholeOptimization + 'Bitwise; CMP -> Bitwise+S', p);
  1601. {$endif AARCH64}
  1602. end;
  1603. RemoveInstruction(hp1);
  1604. { If a temporary register was used for and/cmp before, we might be
  1605. able to deallocate the register so it can be used for other
  1606. optimisations later }
  1607. if (taicpu(p).opcode = A_TST) and TryRemoveRegAlloc(WorkingReg, p, p) then
  1608. ExcludeRegFromUsedRegs(WorkingReg, UsedRegs);
  1609. Result := True;
  1610. Exit;
  1611. end
  1612. else if
  1613. (hp2.typ = ait_label) or
  1614. { Conditional comparison instructions have already been covered }
  1615. RegModifiedByInstruction(NR_DEFAULTFLAGS, hp2) then
  1616. begin
  1617. { The comparison is a null operation }
  1618. if RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  1619. begin
  1620. DebugMsg(SPeepholeOptimization + 'Bitwise; CMP -> nop', p);
  1621. RemoveInstruction(hp1);
  1622. RemoveCurrentP(p);
  1623. end
  1624. else
  1625. begin
  1626. DebugMsg(SPeepholeOptimization + 'CMP/BIC -> nop', hp1);
  1627. RemoveInstruction(hp1);
  1628. end;
  1629. Result := True;
  1630. Exit;
  1631. end;
  1632. end;
  1633. end;
  1634. function TARMAsmOptimizer.OptPass2TST(var p: tai): Boolean;
  1635. var
  1636. hp1, hp2: tai;
  1637. begin
  1638. Result := False;
  1639. if
  1640. {$ifndef AARCH64}
  1641. (taicpu(p).condition = C_None) and
  1642. {$endif AARCH64}
  1643. GetNextInstruction(p, hp1) and
  1644. MatchInstruction(hp1, A_B, [C_EQ, C_NE], [PF_None]) and
  1645. GetNextInstructionUsingReg(hp1, hp2, taicpu(p).oper[0]^.reg) then
  1646. begin
  1647. case taicpu(hp2).opcode of
  1648. A_AND:
  1649. { Change:
  1650. tst r1,##
  1651. (r2 not in use, or r2 = r1)
  1652. b.c .Lbl
  1653. ...
  1654. and r2,r1,##
  1655. Optimise to:
  1656. ands r2,r1,##
  1657. b.c .Lbl
  1658. ...
  1659. }
  1660. if (taicpu(hp2).oppostfix in [PF_None, PF_S]) and
  1661. {$ifndef AARCH64}
  1662. (taicpu(hp2).condition = C_None) and
  1663. {$endif AARCH64}
  1664. (taicpu(hp2).ops = taicpu(p).ops + 1) and
  1665. not RegInUsedRegs(taicpu(hp2).oper[0]^.reg, UsedRegs) and
  1666. MatchOperand(taicpu(hp2).oper[1]^, taicpu(p).oper[0]^.reg) and
  1667. MatchOperand(taicpu(hp2).oper[2]^, taicpu(p).oper[1]^) and
  1668. (
  1669. (taicpu(hp2).ops = 3) or
  1670. MatchOperand(taicpu(hp2).oper[3]^, taicpu(p).oper[2]^)
  1671. ) and
  1672. (
  1673. not (cs_opt_level3 in current_settings.optimizerswitches) or
  1674. (
  1675. { Make sure the target register isn't used in between }
  1676. not RegUsedBetween(taicpu(hp2).oper[0]^.reg, hp1, hp2) and
  1677. (
  1678. { If the second operand is a register, make sure it isn't modified in between }
  1679. (taicpu(p).oper[1]^.typ <> top_reg) or
  1680. not RegModifiedBetween(taicpu(p).oper[1]^.reg, hp1, hp2)
  1681. )
  1682. )
  1683. ) then
  1684. begin
  1685. AllocRegBetween(taicpu(hp2).oper[0]^.reg, p, hp2, UsedRegs);
  1686. if (taicpu(hp2).oppostfix = PF_S) then
  1687. AllocRegBetween(NR_DEFAULTFLAGS, p, hp2, UsedRegs);
  1688. DebugMsg(SPeepholeOptimization + 'TST; B.c; AND -> ANDS; B.c (TstBcAnd2AndsBc)', p);
  1689. taicpu(hp2).oppostfix := PF_S;
  1690. Asml.Remove(hp2);
  1691. Asml.InsertAfter(hp2, p);
  1692. RemoveCurrentP(p, hp2);
  1693. Result := True;
  1694. Exit;
  1695. end;
  1696. A_TST:
  1697. { Change:
  1698. tst r1,##
  1699. b.c .Lbl
  1700. ... (flags not modified)
  1701. tst r1,##
  1702. Remove second tst
  1703. }
  1704. if
  1705. {$ifndef AARCH64}
  1706. (taicpu(hp2).condition = C_None) and
  1707. {$endif AARCH64}
  1708. (taicpu(hp2).ops = taicpu(p).ops) and
  1709. MatchOperand(taicpu(hp2).oper[0]^, taicpu(p).oper[0]^.reg) and
  1710. MatchOperand(taicpu(hp2).oper[1]^, taicpu(p).oper[1]^) and
  1711. (
  1712. (taicpu(hp2).ops = 2) or
  1713. MatchOperand(taicpu(hp2).oper[2]^, taicpu(p).oper[2]^)
  1714. ) and
  1715. (
  1716. not (cs_opt_level3 in current_settings.optimizerswitches) or
  1717. (
  1718. { Make sure the flags aren't modified in between }
  1719. not RegModifiedBetween(NR_DEFAULTFLAGS, hp1, hp2) and
  1720. (
  1721. { If the second operand is a register, make sure it isn't modified in between }
  1722. (taicpu(p).oper[1]^.typ <> top_reg) or
  1723. not RegModifiedBetween(taicpu(p).oper[1]^.reg, hp1, hp2)
  1724. )
  1725. )
  1726. ) then
  1727. begin
  1728. DebugMsg(SPeepholeOptimization + 'TST; B.c; TST -> TST; B.c (TstBcTst2TstBc)', p);
  1729. AllocRegBetween(NR_DEFAULTFLAGS, hp1, hp2, UsedRegs);
  1730. RemoveInstruction(hp2);
  1731. Result := True;
  1732. Exit;
  1733. end;
  1734. else
  1735. ;
  1736. end;
  1737. end;
  1738. end;
  1739. end.