aoptarm.pas 79 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007
  1. {
  2. Copyright (c) 1998-2020 by Jonas Maebe and Florian Klaempfl, members of the Free Pascal
  3. Development Team
  4. This unit implements an ARM optimizer object used commonly for ARM and AAarch64
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. Unit aoptarm;
  19. {$i fpcdefs.inc}
  20. { $define DEBUG_PREREGSCHEDULER}
  21. {$ifdef EXTDEBUG}
  22. {$define DEBUG_AOPTCPU}
  23. {$endif EXTDEBUG}
  24. Interface
  25. uses
  26. cgbase, cgutils, globtype, cpubase, aasmtai, aasmcpu,aopt, aoptobj;
  27. Type
  28. { while ARM and AAarch64 look not very similar at a first glance,
  29. several optimizations can be shared between both }
  30. TARMAsmOptimizer = class(TAsmOptimizer)
  31. procedure DebugMsg(const s : string; p : tai);
  32. function RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string): boolean;
  33. function RedundantMovProcess(var p: tai; var hp1: tai): boolean;
  34. function GetNextInstructionUsingReg(Current: tai; out Next: tai; const reg: TRegister): Boolean;
  35. {$ifdef AARCH64}
  36. function USxtOp2Op(var p, hp1: tai; shiftmode: tshiftmode): Boolean;
  37. {$endif AARCH64}
  38. function OptPreSBFXUBFX(var p: tai): Boolean;
  39. function OptPass1UXTB(var p: tai): Boolean;
  40. function OptPass1UXTH(var p: tai): Boolean;
  41. function OptPass1SXTB(var p: tai): Boolean;
  42. function OptPass1SXTH(var p: tai): Boolean;
  43. function OptPass1LDR(var p: tai): Boolean; virtual;
  44. function OptPass1STR(var p: tai): Boolean; virtual;
  45. function OptPass1And(var p: tai): Boolean; virtual;
  46. function OptPass2Bitwise(var p: tai): Boolean;
  47. function OptPass2TST(var p: tai): Boolean;
  48. protected
  49. function DoXTArithOp(var p: tai; hp1: tai): Boolean;
  50. End;
  51. function MatchInstruction(const instr: tai; const op: TCommonAsmOps; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  52. function MatchInstruction(const instr: tai; const op: TAsmOp; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  53. {$ifdef AARCH64}
  54. function MatchInstruction(const instr: tai; const ops : array of TAsmOp; const postfix: TOpPostfixes): boolean;
  55. {$endif AARCH64}
  56. function MatchInstruction(const instr: tai; const op: TAsmOp; const postfix: TOpPostfixes): boolean;
  57. function RefsEqual(const r1, r2: treference): boolean;
  58. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  59. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
  60. function MatchOperand(const oper: TOper; const a: TCGInt): boolean; inline;
  61. Implementation
  62. uses
  63. cutils,verbose,globals,
  64. systems,
  65. cpuinfo,
  66. cgobj,procinfo,
  67. aasmbase,aasmdata,itcpugas;
  68. {$ifdef DEBUG_AOPTCPU}
  69. const
  70. SPeepholeOptimization: shortstring = 'Peephole Optimization: ';
  71. procedure TARMAsmOptimizer.DebugMsg(const s: string;p : tai);
  72. begin
  73. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  74. end;
  75. {$else DEBUG_AOPTCPU}
  76. { Empty strings help the optimizer to remove string concatenations that won't
  77. ever appear to the user on release builds. [Kit] }
  78. const
  79. SPeepholeOptimization = '';
  80. procedure TARMAsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  81. begin
  82. end;
  83. {$endif DEBUG_AOPTCPU}
  84. function MatchInstruction(const instr: tai; const op: TCommonAsmOps; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  85. begin
  86. result :=
  87. (instr.typ = ait_instruction) and
  88. ((op = []) or ((taicpu(instr).opcode<=LastCommonAsmOp) and (taicpu(instr).opcode in op))) and
  89. ((cond = []) or (taicpu(instr).condition in cond)) and
  90. ((postfix = []) or (taicpu(instr).oppostfix in postfix));
  91. end;
  92. function MatchInstruction(const instr: tai; const op: TAsmOp; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  93. begin
  94. result :=
  95. (instr.typ = ait_instruction) and
  96. (taicpu(instr).opcode = op) and
  97. ((cond = []) or (taicpu(instr).condition in cond)) and
  98. ((postfix = []) or (taicpu(instr).oppostfix in postfix));
  99. end;
  100. {$ifdef AARCH64}
  101. function MatchInstruction(const instr: tai; const ops : array of TAsmOp; const postfix: TOpPostfixes): boolean;
  102. var
  103. op : TAsmOp;
  104. begin
  105. result:=false;
  106. if instr.typ <> ait_instruction then
  107. exit;
  108. for op in ops do
  109. begin
  110. if (taicpu(instr).opcode = op) and
  111. ((postfix = []) or (taicpu(instr).oppostfix in postfix)) then
  112. begin
  113. result:=true;
  114. exit;
  115. end;
  116. end;
  117. end;
  118. {$endif AARCH64}
  119. function MatchInstruction(const instr: tai; const op: TAsmOp; const postfix: TOpPostfixes): boolean;
  120. begin
  121. result :=
  122. (instr.typ = ait_instruction) and
  123. (taicpu(instr).opcode = op) and
  124. ((postfix = []) or (taicpu(instr).oppostfix in postfix));
  125. end;
  126. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  127. begin
  128. result := (oper.typ = top_reg) and (oper.reg = reg);
  129. end;
  130. function RefsEqual(const r1, r2: treference): boolean;
  131. begin
  132. refsequal :=
  133. (r1.offset = r2.offset) and
  134. (r1.base = r2.base) and
  135. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  136. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  137. (r1.relsymbol = r2.relsymbol) and
  138. {$ifdef ARM}
  139. (r1.signindex = r2.signindex) and
  140. {$endif ARM}
  141. (r1.shiftimm = r2.shiftimm) and
  142. (r1.addressmode = r2.addressmode) and
  143. (r1.shiftmode = r2.shiftmode) and
  144. (r1.volatility=[]) and
  145. (r2.volatility=[]);
  146. end;
  147. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
  148. begin
  149. result := oper1.typ = oper2.typ;
  150. if result then
  151. case oper1.typ of
  152. top_const:
  153. Result:=oper1.val = oper2.val;
  154. top_reg:
  155. Result:=oper1.reg = oper2.reg;
  156. top_conditioncode:
  157. Result:=oper1.cc = oper2.cc;
  158. top_realconst:
  159. Result:=oper1.val_real = oper2.val_real;
  160. top_ref:
  161. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  162. else Result:=false;
  163. end
  164. end;
  165. function MatchOperand(const oper: TOper; const a: TCGInt): boolean; inline;
  166. begin
  167. result := (oper.typ = top_const) and (oper.val = a);
  168. end;
  169. {$ifdef AARCH64}
  170. function TARMAsmOptimizer.USxtOp2Op(var p,hp1: tai; shiftmode: tshiftmode): Boolean;
  171. var
  172. so: tshifterop;
  173. opoffset: Integer;
  174. begin
  175. Result:=false;
  176. if ((MatchInstruction(hp1, [A_ADD,A_SUB], [C_None], [PF_None,PF_S]) and
  177. (taicpu(hp1).ops=3) and
  178. MatchOperand(taicpu(hp1).oper[2]^, taicpu(p).oper[0]^.reg) and
  179. not(MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg))) or
  180. (MatchInstruction(hp1, [A_CMP,A_CMN], [C_None], [PF_None]) and
  181. (taicpu(hp1).ops=2) and
  182. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg))
  183. ) and
  184. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  185. { reg1 might not be modified inbetween }
  186. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  187. begin
  188. DebugMsg('Peephole '+gas_op2str[taicpu(p).opcode]+gas_op2str[taicpu(hp1).opcode]+'2'+gas_op2str[taicpu(hp1).opcode]+' done', p);
  189. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  190. if MatchInstruction(hp1, [A_CMP,A_CMN], [C_None], [PF_None]) then
  191. opoffset:=0
  192. else
  193. opoffset:=1;
  194. taicpu(hp1).loadReg(opoffset+1,taicpu(p).oper[1]^.reg);
  195. if not(shiftmode in [SM_SXTX,SM_UXTX,SM_LSL]) then
  196. setsubreg(taicpu(hp1).oper[opoffset+1]^.reg,R_SUBD);
  197. taicpu(hp1).ops:=opoffset+3;
  198. shifterop_reset(so);
  199. so.shiftmode:=shiftmode;
  200. so.shiftimm:=0;
  201. taicpu(hp1).loadshifterop(opoffset+2,so);
  202. result:=RemoveCurrentP(p);
  203. end;
  204. end;
  205. {$endif AARCH64}
  206. function TARMAsmOptimizer.GetNextInstructionUsingReg(Current: tai;
  207. Out Next: tai; const reg: TRegister): Boolean;
  208. var
  209. gniResult: Boolean;
  210. begin
  211. Next:=Current;
  212. Result := False;
  213. repeat
  214. gniResult:=GetNextInstruction(Next,Next);
  215. if gniResult and RegInInstruction(reg,Next) then
  216. { Found something }
  217. Exit(True);
  218. until not gniResult or
  219. not(cs_opt_level3 in current_settings.optimizerswitches) or
  220. (Next.typ<>ait_instruction) or
  221. is_calljmp(taicpu(Next).opcode)
  222. {$ifdef ARM}
  223. or RegModifiedByInstruction(NR_PC,Next)
  224. {$endif ARM}
  225. ;
  226. end;
  227. function TARMAsmOptimizer.RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string):boolean;
  228. var
  229. alloc,
  230. dealloc : tai_regalloc;
  231. hp1 : tai;
  232. begin
  233. Result:=false;
  234. if MatchInstruction(movp, A_MOV, [taicpu(p).condition], [PF_None]) and
  235. { We can't optimize if there is a shiftop }
  236. (taicpu(movp).ops=2) and
  237. MatchOperand(taicpu(movp).oper[1]^, taicpu(p).oper[0]^.reg) and
  238. { don't mess with moves to fp }
  239. (taicpu(movp).oper[0]^.reg<>current_procinfo.framepointer) and
  240. { the destination register of the mov might not be used beween p and movp }
  241. not(RegUsedBetween(taicpu(movp).oper[0]^.reg,p,movp)) and
  242. {$ifdef ARM}
  243. { PC should be changed only by moves }
  244. (taicpu(movp).oper[0]^.reg<>NR_PC) and
  245. { cb[n]z are thumb instructions which require specific registers, with no wide forms }
  246. (taicpu(p).opcode<>A_CBZ) and
  247. (taicpu(p).opcode<>A_CBNZ) and
  248. { There is a special requirement for MUL and MLA, oper[0] and oper[1] are not allowed to be the same }
  249. not (
  250. (taicpu(p).opcode in [A_MLA, A_MUL]) and
  251. (taicpu(p).oper[1]^.reg = taicpu(movp).oper[0]^.reg) and
  252. (current_settings.cputype < cpu_armv6)
  253. ) and
  254. {$endif ARM}
  255. { Take care to only do this for instructions which REALLY load to the first register.
  256. Otherwise
  257. str reg0, [reg1]
  258. mov reg2, reg0
  259. will be optimized to
  260. str reg2, [reg1]
  261. }
  262. RegLoadedWithNewValue(taicpu(p).oper[0]^.reg, p) then
  263. begin
  264. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(movp.Next));
  265. if assigned(dealloc) then
  266. begin
  267. DebugMsg('Peephole '+optimizer+' removed superfluous mov', movp);
  268. result:=true;
  269. { taicpu(p).oper[0]^.reg is not used anymore, try to find its allocation
  270. and remove it if possible }
  271. asml.Remove(dealloc);
  272. alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.previous));
  273. if assigned(alloc) then
  274. begin
  275. asml.Remove(alloc);
  276. alloc.free;
  277. dealloc.free;
  278. end
  279. else
  280. asml.InsertAfter(dealloc,p);
  281. AllocRegBetween(taicpu(movp).oper[0]^.reg,p,movp,UsedRegs);
  282. { finally get rid of the mov }
  283. taicpu(p).loadreg(0,taicpu(movp).oper[0]^.reg);
  284. { Remove preindexing and postindexing for LDR in some cases.
  285. For example:
  286. ldr reg2,[reg1, xxx]!
  287. mov reg1,reg2
  288. must be translated to:
  289. ldr reg1,[reg1, xxx]
  290. Preindexing must be removed there, since the same register is used as the base and as the target.
  291. Such case is not allowed for ARM CPU and produces crash. }
  292. if (taicpu(p).opcode = A_LDR) and (taicpu(p).oper[1]^.typ = top_ref)
  293. and (taicpu(movp).oper[0]^.reg = taicpu(p).oper[1]^.ref^.base)
  294. then
  295. taicpu(p).oper[1]^.ref^.addressmode:=AM_OFFSET;
  296. asml.remove(movp);
  297. movp.free;
  298. end;
  299. end;
  300. end;
  301. function TARMAsmOptimizer.RedundantMovProcess(var p: tai; var hp1: tai):boolean;
  302. var
  303. I: Integer;
  304. current_hp, next_hp: tai;
  305. LDRChange: Boolean;
  306. begin
  307. Result:=false;
  308. {
  309. change
  310. mov r1, r0
  311. add r1, r1, #1
  312. to
  313. add r1, r0, #1
  314. Todo: Make it work for mov+cmp too
  315. CAUTION! If this one is successful p might not be a mov instruction anymore!
  316. }
  317. if (taicpu(p).ops = 2) and
  318. (taicpu(p).oper[1]^.typ = top_reg) and
  319. (taicpu(p).oppostfix = PF_NONE) then
  320. begin
  321. if
  322. MatchInstruction(hp1, [A_ADD, A_ADC,
  323. {$ifdef ARM}
  324. A_RSB, A_RSC,
  325. {$endif ARM}
  326. A_SUB, A_SBC,
  327. A_AND, A_BIC, A_EOR, A_ORR, A_MOV, A_MVN],
  328. [taicpu(p).condition], []) and
  329. { MOV and MVN might only have 2 ops }
  330. (taicpu(hp1).ops >= 2) and
  331. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^.reg) and
  332. (taicpu(hp1).oper[1]^.typ = top_reg) and
  333. (
  334. (taicpu(hp1).ops = 2) or
  335. (taicpu(hp1).oper[2]^.typ in [top_reg, top_const, top_shifterop])
  336. ) and
  337. {$ifdef AARCH64}
  338. (taicpu(p).oper[1]^.reg<>NR_SP) and
  339. { in this case you have to transform it to movk or the like }
  340. (getsupreg(taicpu(p).oper[1]^.reg)<>RS_XZR) and
  341. {$endif AARCH64}
  342. not(RegUsedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  343. begin
  344. { When we get here we still don't know if the registers match }
  345. for I:=1 to 2 do
  346. {
  347. If the first loop was successful p will be replaced with hp1.
  348. The checks will still be ok, because all required information
  349. will also be in hp1 then.
  350. }
  351. if (taicpu(hp1).ops > I) and
  352. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[I]^.reg)
  353. {$ifdef ARM}
  354. { prevent certain combinations on thumb(2), this is only a safe approximation }
  355. and (not(GenerateThumbCode or GenerateThumb2Code) or
  356. ((getsupreg(taicpu(p).oper[1]^.reg)<>RS_R13) and
  357. (getsupreg(taicpu(p).oper[1]^.reg)<>RS_R15)))
  358. {$endif ARM}
  359. then
  360. begin
  361. DebugMsg('Peephole RedundantMovProcess done', hp1);
  362. taicpu(hp1).oper[I]^.reg := taicpu(p).oper[1]^.reg;
  363. if p<>hp1 then
  364. begin
  365. asml.remove(p);
  366. p.free;
  367. p:=hp1;
  368. Result:=true;
  369. end;
  370. end;
  371. if Result then Exit;
  372. end
  373. { Change: Change:
  374. mov r1, r0 mov r1, r0
  375. ... ...
  376. ldr/str r2, [r1, etc.] mov r2, r1
  377. To: To:
  378. ldr/str r2, [r0, etc.] mov r2, r0
  379. }
  380. else if (taicpu(p).condition = C_None) and (taicpu(p).oper[1]^.typ = top_reg)
  381. {$ifdef ARM}
  382. and not (getsupreg(taicpu(p).oper[0]^.reg) in [RS_PC, RS_R14, RS_STACK_POINTER_REG])
  383. and (getsupreg(taicpu(p).oper[1]^.reg) <> RS_PC)
  384. { Thumb does not support references with base and index one being SP }
  385. and (not(GenerateThumbCode) or (getsupreg(taicpu(p).oper[1]^.reg) <> RS_STACK_POINTER_REG))
  386. {$endif ARM}
  387. {$ifdef AARCH64}
  388. and (getsupreg(taicpu(p).oper[0]^.reg) <> RS_STACK_POINTER_REG)
  389. {$endif AARCH64}
  390. then
  391. begin
  392. current_hp := p;
  393. TransferUsedRegs(TmpUsedRegs);
  394. { Search local instruction block }
  395. while GetNextInstruction(current_hp, next_hp) and (next_hp <> BlockEnd) and (next_hp.typ = ait_instruction) do
  396. begin
  397. UpdateUsedRegs(TmpUsedRegs, tai(current_hp.Next));
  398. LDRChange := False;
  399. if (taicpu(next_hp).opcode in [A_LDR,A_STR]) and (taicpu(next_hp).ops = 2)
  400. {$ifdef AARCH64}
  401. { If r0 is the zero register, then this sequence of instructions will cause
  402. an access violation, but that's better than an assembler error caused by
  403. changing r0 to xzr inside the reference (Where it's illegal). [Kit] }
  404. and (getsupreg(taicpu(p).oper[1]^.reg) <> RS_XZR)
  405. {$endif AARCH64}
  406. then
  407. begin
  408. { Change the registers from r1 to r0 }
  409. if (taicpu(next_hp).oper[1]^.ref^.base = taicpu(p).oper[0]^.reg) and
  410. {$ifdef ARM}
  411. { This optimisation conflicts with something and raises
  412. an access violation - needs further investigation. [Kit] }
  413. (taicpu(next_hp).opcode <> A_LDR) and
  414. {$endif ARM}
  415. { Don't mess around with the base register if the
  416. reference is pre- or post-indexed }
  417. (taicpu(next_hp).oper[1]^.ref^.addressmode = AM_OFFSET) then
  418. begin
  419. taicpu(next_hp).oper[1]^.ref^.base := taicpu(p).oper[1]^.reg;
  420. LDRChange := True;
  421. end;
  422. if taicpu(next_hp).oper[1]^.ref^.index = taicpu(p).oper[0]^.reg then
  423. begin
  424. taicpu(next_hp).oper[1]^.ref^.index := taicpu(p).oper[1]^.reg;
  425. LDRChange := True;
  426. end;
  427. if LDRChange then
  428. DebugMsg('Peephole Optimization: ' + std_regname(taicpu(p).oper[0]^.reg) + ' = ' + std_regname(taicpu(p).oper[1]^.reg) + ' (MovLdr2Ldr 1)', next_hp);
  429. { Drop out if we're dealing with pre-indexed references }
  430. if (taicpu(next_hp).oper[1]^.ref^.addressmode = AM_PREINDEXED) and
  431. (
  432. RegInRef(taicpu(p).oper[0]^.reg, taicpu(next_hp).oper[1]^.ref^) or
  433. RegInRef(taicpu(p).oper[1]^.reg, taicpu(next_hp).oper[1]^.ref^)
  434. ) then
  435. begin
  436. { Remember to update register allocations }
  437. if LDRChange then
  438. AllocRegBetween(taicpu(p).oper[1]^.reg, p, next_hp, UsedRegs);
  439. Break;
  440. end;
  441. { The register being stored can be potentially changed (as long as it's not the stack pointer) }
  442. if (taicpu(next_hp).opcode = A_STR) and (getsupreg(taicpu(p).oper[1]^.reg) <> RS_STACK_POINTER_REG) and
  443. MatchOperand(taicpu(next_hp).oper[0]^, taicpu(p).oper[0]^.reg) then
  444. begin
  445. DebugMsg('Peephole Optimization: ' + std_regname(taicpu(p).oper[0]^.reg) + ' = ' + std_regname(taicpu(p).oper[1]^.reg) + ' (MovLdr2Ldr 2)', next_hp);
  446. taicpu(next_hp).oper[0]^.reg := taicpu(p).oper[1]^.reg;
  447. LDRChange := True;
  448. end;
  449. if LDRChange and (getsupreg(taicpu(p).oper[1]^.reg) <> RS_STACK_POINTER_REG) then
  450. begin
  451. AllocRegBetween(taicpu(p).oper[1]^.reg, p, next_hp, UsedRegs);
  452. if (taicpu(p).oppostfix = PF_None) and
  453. (
  454. (
  455. (taicpu(next_hp).opcode = A_LDR) and
  456. MatchOperand(taicpu(next_hp).oper[0]^, taicpu(p).oper[0]^.reg)
  457. ) or
  458. not RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, next_hp, TmpUsedRegs)
  459. ) and
  460. { Double-check to see if the old registers were actually
  461. changed (e.g. if the super registers matched, but not
  462. the sizes, they won't be changed). }
  463. (
  464. (taicpu(next_hp).opcode = A_LDR) or
  465. not RegInOp(taicpu(p).oper[0]^.reg, taicpu(next_hp).oper[0]^)
  466. ) and
  467. not RegInRef(taicpu(p).oper[0]^.reg, taicpu(next_hp).oper[1]^.ref^) then
  468. begin
  469. DebugMsg('Peephole Optimization: RedundantMovProcess 2a done', p);
  470. RemoveCurrentP(p);
  471. Result := True;
  472. Exit;
  473. end;
  474. end;
  475. end
  476. else if (taicpu(next_hp).opcode = A_MOV) and (taicpu(next_hp).oppostfix = PF_None) and
  477. (taicpu(next_hp).ops = 2) then
  478. begin
  479. if MatchOperand(taicpu(next_hp).oper[0]^, taicpu(p).oper[0]^.reg) then
  480. begin
  481. { mov r0,r1; mov r1,r1 - remove second MOV here so
  482. so "RedundantMovProcess 2b" doesn't get erroneously
  483. applied }
  484. if MatchOperand(taicpu(next_hp).oper[0]^, taicpu(next_hp).oper[1]^.reg) then
  485. begin
  486. DebugMsg(SPeepholeOptimization + 'Mov2None 2a done', next_hp);
  487. if (next_hp = hp1) then
  488. { Don't let hp1 become a dangling pointer }
  489. hp1 := nil;
  490. asml.Remove(next_hp);
  491. next_hp.Free;
  492. Continue;
  493. end;
  494. { Found another mov that writes entirely to the register }
  495. if RegUsedBetween(taicpu(p).oper[0]^.reg, p, next_hp) then
  496. begin
  497. { Register was used beforehand }
  498. if MatchOperand(taicpu(next_hp).oper[1]^, taicpu(p).oper[1]^.reg) then
  499. begin
  500. { This MOV is exactly the same as the first one.
  501. Since none of the registers have changed value
  502. at this point, we can remove it. }
  503. DebugMsg(SPeepholeOptimization + 'RedundantMovProcess 3a done', next_hp);
  504. if (next_hp = hp1) then
  505. { Don't let hp1 become a dangling pointer }
  506. hp1 := nil;
  507. asml.Remove(next_hp);
  508. next_hp.Free;
  509. { We still have the original p, so we can continue optimising;
  510. if it was -O2 or below, this instruction appeared immediately
  511. after the first MOV, so we're technically not looking more
  512. than one instruction ahead after it's removed! [Kit] }
  513. Continue;
  514. end
  515. else
  516. { Register changes value - drop out }
  517. Break;
  518. end;
  519. { We can delete the first MOV (only if the second MOV is unconditional) }
  520. {$ifdef ARM}
  521. if (taicpu(p).oppostfix = PF_None) and
  522. (taicpu(next_hp).condition = C_None) then
  523. {$endif ARM}
  524. begin
  525. DebugMsg('Peephole Optimization: RedundantMovProcess 2b done', p);
  526. RemoveCurrentP(p);
  527. Result := True;
  528. end;
  529. Exit;
  530. end
  531. else if MatchOperand(taicpu(next_hp).oper[1]^, taicpu(p).oper[0]^.reg) then
  532. begin
  533. if MatchOperand(taicpu(next_hp).oper[0]^, taicpu(p).oper[1]^.reg)
  534. { Be careful - if the entire register is not used, removing this
  535. instruction will leave the unused part uninitialised }
  536. {$ifdef AARCH64}
  537. and (getsubreg(taicpu(p).oper[1]^.reg) = R_SUBQ)
  538. {$endif AARCH64}
  539. then
  540. begin
  541. { Instruction will become mov r1,r1 }
  542. DebugMsg(SPeepholeOptimization + 'Mov2None 2 done', next_hp);
  543. { Allocate r1 between the instructions; not doing
  544. so may cause problems when removing superfluous
  545. MOVs later (i38055) }
  546. AllocRegBetween(taicpu(p).oper[1]^.reg, p, next_hp, UsedRegs);
  547. if (next_hp = hp1) then
  548. { Don't let hp1 become a dangling pointer }
  549. hp1 := nil;
  550. asml.Remove(next_hp);
  551. next_hp.Free;
  552. Continue;
  553. end;
  554. { Change the old register (checking the first operand again
  555. forces it to be left alone if the full register is not
  556. used, lest mov w1,w1 gets optimised out by mistake. [Kit] }
  557. {$ifdef AARCH64}
  558. if not MatchOperand(taicpu(next_hp).oper[0]^, taicpu(p).oper[1]^.reg) then
  559. {$endif AARCH64}
  560. begin
  561. DebugMsg(SPeepholeOptimization + std_regname(taicpu(p).oper[0]^.reg) + ' = ' + std_regname(taicpu(p).oper[1]^.reg) + ' (MovMov2Mov 2)', next_hp);
  562. taicpu(next_hp).oper[1]^.reg := taicpu(p).oper[1]^.reg;
  563. AllocRegBetween(taicpu(p).oper[1]^.reg, p, next_hp, UsedRegs);
  564. { If this was the only reference to the old register,
  565. then we can remove the original MOV now }
  566. if (taicpu(p).oppostfix = PF_None) and
  567. { A bit of a hack - sometimes registers aren't tracked properly, so do not
  568. remove if the register was apparently not allocated when its value is
  569. first set at the MOV command (this is especially true for the stack
  570. register). [Kit] }
  571. (getsupreg(taicpu(p).oper[1]^.reg) <> RS_STACK_POINTER_REG) and
  572. RegInUsedRegs(taicpu(p).oper[0]^.reg, UsedRegs) and
  573. not RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, next_hp, TmpUsedRegs) then
  574. begin
  575. DebugMsg(SPeepholeOptimization + 'RedundantMovProcess 2c done', p);
  576. RemoveCurrentP(p);
  577. Result := True;
  578. Exit;
  579. end;
  580. end;
  581. end;
  582. end;
  583. { On low optimisation settions, don't search more than one instruction ahead }
  584. if not(cs_opt_level3 in current_settings.optimizerswitches) or
  585. { Stop at procedure calls and jumps }
  586. is_calljmp(taicpu(next_hp).opcode) or
  587. { If the read register has changed value, or the MOV
  588. destination register has been used, drop out }
  589. RegInInstruction(taicpu(p).oper[0]^.reg, next_hp) or
  590. RegModifiedByInstruction(taicpu(p).oper[1]^.reg, next_hp) then
  591. Break;
  592. current_hp := next_hp;
  593. end;
  594. end;
  595. end;
  596. end;
  597. function TARMAsmOptimizer.DoXTArithOp(var p: tai; hp1: tai): Boolean;
  598. var
  599. hp2: tai;
  600. ConstLimit: TCGInt;
  601. ValidPostFixes: TOpPostFixes;
  602. FirstCode, SecondCode, ThirdCode, FourthCode: TAsmOp;
  603. begin
  604. Result := False;
  605. { Change:
  606. uxtb/h reg1,reg1
  607. (operation on reg1 with immediate operand where the upper 24/56
  608. bits don't affect the state of the first 8 bits )
  609. uxtb/h reg1,reg1
  610. Remove first uxtb/h
  611. }
  612. case taicpu(p).opcode of
  613. A_UXTB,
  614. A_SXTB:
  615. begin
  616. ConstLimit := $FF;
  617. ValidPostFixes := [PF_B];
  618. FirstCode := A_UXTB;
  619. SecondCode := A_SXTB;
  620. ThirdCode := A_UXTB; { Used to indicate no other valid codes }
  621. FourthCode := A_SXTB;
  622. end;
  623. A_UXTH,
  624. A_SXTH:
  625. begin
  626. ConstLimit := $FFFF;
  627. ValidPostFixes := [PF_B, PF_H];
  628. FirstCode := A_UXTH;
  629. SecondCode := A_SXTH;
  630. ThirdCode := A_UXTB;
  631. FourthCode := A_SXTB;
  632. end;
  633. else
  634. InternalError(2024051401);
  635. end;
  636. {$ifndef AARCH64}
  637. { Regular ARM doesn't have the multi-instruction MatchInstruction available }
  638. if (hp1.typ = ait_instruction) and (taicpu(hp1).oppostfix = PF_None) then
  639. case taicpu(hp1).opcode of
  640. A_ADD, A_SUB, A_MUL, A_LSL, A_AND, A_ORR, A_EOR, A_BIC, A_ORN:
  641. {$endif AARCH64}
  642. if
  643. (taicpu(p).oper[1]^.reg = taicpu(p).oper[0]^.reg) and
  644. {$ifdef AARCH64}
  645. MatchInstruction(hp1, [A_ADD, A_SUB, A_MUL, A_LSL, A_AND, A_ORR, A_EOR, A_BIC, A_ORN, A_EON], [PF_None]) and
  646. {$endif AARCH64}
  647. (taicpu(hp1).condition = C_None) and
  648. (taicpu(hp1).ops = 3) and
  649. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[0]^.reg) and
  650. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[0]^.reg) and
  651. (taicpu(hp1).oper[2]^.typ = top_const) and
  652. (
  653. (
  654. { If the AND immediate is 8-bit, then this essentially performs
  655. the functionality of the second UXTB and so its presence is
  656. not required }
  657. (taicpu(hp1).opcode = A_AND) and
  658. (taicpu(hp1).oper[2]^.val >= 0) and
  659. (taicpu(hp1).oper[2]^.val <= ConstLimit)
  660. ) or
  661. (
  662. GetNextInstructionUsingReg(hp1,hp2,taicpu(p).oper[0]^.reg) and
  663. (hp2.typ = ait_instruction) and
  664. (taicpu(hp2).ops = 2) and
  665. (taicpu(hp2).condition = C_None) and
  666. (
  667. (
  668. (taicpu(hp2).opcode in [FirstCode, SecondCode, ThirdCode, FourthCode]) and
  669. (taicpu(hp2).oppostfix = PF_None) and
  670. (taicpu(hp2).oper[1]^.reg = taicpu(p).oper[0]^.reg)
  671. { Destination is allowed to be different in this case, but
  672. only if the source is no longer in use (it being the same as
  673. the source is covered by RegEndOfLife as well) }
  674. ) or
  675. (
  676. { STRB essentially fills the same role as the second UXTB
  677. as long as the register is deallocated afterwards }
  678. MatchInstruction(hp2, A_STR, [C_None], ValidPostFixes) and
  679. (taicpu(hp2).oper[0]^.reg = taicpu(p).oper[0]^.reg) and
  680. not RegInOp(taicpu(p).oper[0]^.reg, taicpu(hp2).oper[1]^)
  681. )
  682. ) and
  683. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp2))
  684. )
  685. ) then
  686. begin
  687. DebugMsg(SPeepholeOptimization + 'S/Uxtb/hArithUxtb/h2ArithS/Uxtb/h done', p);
  688. Result := RemoveCurrentP(p);
  689. { Simplify bitwise constants if able }
  690. {$ifdef AARCH64}
  691. if (taicpu(hp1).opcode in [A_AND, A_ORR, A_EOR, A_BIC, A_ORN, A_EON]) and
  692. is_shifter_const(taicpu(hp1).oper[2]^.val and ConstLimit, OS_32) then
  693. {$else AARCH64}
  694. if (
  695. (ConstLimit = $FF) or
  696. (taicpu(hp1).oper[2]^.val <= $100)
  697. ) and
  698. (taicpu(hp1).opcode in [A_AND, A_ORR, A_EOR, A_BIC, A_ORN]) then
  699. {$endif AARCH64}
  700. taicpu(hp1).oper[2]^.val := taicpu(hp1).oper[2]^.val and ConstLimit;
  701. end;
  702. {$ifndef AARCH64}
  703. else
  704. ;
  705. end;
  706. {$endif not AARCH64}
  707. end;
  708. function TARMAsmOptimizer.OptPass1UXTB(var p : tai) : Boolean;
  709. var
  710. hp1, hp2: tai;
  711. so: tshifterop;
  712. begin
  713. Result:=false;
  714. if GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  715. (taicpu(p).oppostfix = PF_None) and
  716. (taicpu(p).ops = 2) then
  717. begin
  718. if (taicpu(p).condition = C_None) then
  719. begin
  720. {
  721. change
  722. uxtb reg2,reg1
  723. strb reg2,[...]
  724. dealloc reg2
  725. to
  726. strb reg1,[...]
  727. }
  728. if MatchInstruction(hp1, A_STR, [C_None], [PF_B]) and
  729. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
  730. { the reference in strb might not use reg2 }
  731. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  732. { reg1 might not be modified inbetween }
  733. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  734. begin
  735. DebugMsg('Peephole UxtbStrb2Strb done', p);
  736. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  737. result:=RemoveCurrentP(p);
  738. end
  739. {
  740. change
  741. uxtb reg2,reg1
  742. uxth reg3,reg2
  743. dealloc reg2
  744. to
  745. uxtb reg3,reg1
  746. }
  747. else if MatchInstruction(hp1, A_UXTH, [C_None], [PF_None]) and
  748. (taicpu(hp1).ops = 2) and
  749. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  750. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  751. { reg1 might not be modified inbetween }
  752. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  753. begin
  754. DebugMsg('Peephole UxtbUxth2Uxtb done', p);
  755. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  756. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  757. asml.remove(hp1);
  758. hp1.free;
  759. result:=true;
  760. end
  761. {
  762. change
  763. uxtb reg2,reg1
  764. uxtb reg3,reg2
  765. dealloc reg2
  766. to
  767. uxtb reg3,reg1
  768. }
  769. else if MatchInstruction(hp1, A_UXTB, [C_None], [PF_None]) and
  770. (taicpu(hp1).ops = 2) and
  771. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  772. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  773. { reg1 might not be modified inbetween }
  774. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  775. begin
  776. DebugMsg('Peephole UxtbUxtb2Uxtb done', p);
  777. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  778. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  779. asml.remove(hp1);
  780. hp1.free;
  781. result:=true;
  782. end
  783. {
  784. change
  785. uxtb reg2,reg1
  786. and reg3,reg2,#0x*FF
  787. dealloc reg2
  788. to
  789. uxtb reg3,reg1
  790. }
  791. else if MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
  792. (taicpu(hp1).ops=3) and
  793. (taicpu(hp1).oper[2]^.typ=top_const) and
  794. ((taicpu(hp1).oper[2]^.val and $FF)=$FF) and
  795. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  796. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  797. { reg1 might not be modified inbetween }
  798. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  799. begin
  800. DebugMsg('Peephole UxtbAndImm2Uxtb done', p);
  801. taicpu(hp1).opcode:=A_UXTB;
  802. taicpu(hp1).ops:=2;
  803. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  804. result:=RemoveCurrentP(p);
  805. end
  806. else if DoXTArithOp(p, hp1) then
  807. Result:=true
  808. {$ifdef AARCH64}
  809. else if USxtOp2Op(p,hp1,SM_UXTB) then
  810. Result:=true
  811. {$endif AARCH64}
  812. end;
  813. { Condition doesn't have to be C_None }
  814. if not Result and
  815. RemoveSuperfluousMove(p, hp1, 'UxtbMov2Uxtb') then
  816. Result:=true;
  817. end;
  818. end;
  819. function TARMAsmOptimizer.OptPass1UXTH(var p : tai) : Boolean;
  820. var
  821. hp1: tai;
  822. so: tshifterop;
  823. begin
  824. Result:=false;
  825. if GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  826. (taicpu(p).oppostfix = PF_None) and
  827. (taicpu(p).ops = 2) then
  828. begin
  829. if (taicpu(p).condition = C_None) then
  830. begin
  831. {
  832. change
  833. uxth reg2,reg1
  834. strh reg2,[...]
  835. dealloc reg2
  836. to
  837. strh reg1,[...]
  838. }
  839. if MatchInstruction(hp1, A_STR, [C_None], [PF_H]) and
  840. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  841. { the reference in strb might not use reg2 }
  842. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  843. { reg1 might not be modified inbetween }
  844. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  845. begin
  846. DebugMsg('Peephole UXTHStrh2Strh done', p);
  847. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  848. result:=RemoveCurrentP(p);
  849. end
  850. {
  851. change
  852. uxth reg2,reg1
  853. uxth reg3,reg2
  854. dealloc reg2
  855. to
  856. uxth reg3,reg1
  857. }
  858. else if MatchInstruction(hp1, A_UXTH, [C_None], [PF_None]) and
  859. (taicpu(hp1).ops=2) and
  860. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  861. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  862. { reg1 might not be modified inbetween }
  863. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  864. begin
  865. DebugMsg('Peephole UxthUxth2Uxth done', p);
  866. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  867. taicpu(hp1).opcode:=A_UXTH;
  868. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  869. result:=RemoveCurrentP(p);
  870. end
  871. {
  872. change
  873. uxth reg2,reg1
  874. and reg3,reg2,#65535
  875. dealloc reg2
  876. to
  877. uxth reg3,reg1
  878. }
  879. else if MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
  880. (taicpu(hp1).ops=3) and
  881. (taicpu(hp1).oper[2]^.typ=top_const) and
  882. ((taicpu(hp1).oper[2]^.val and $FFFF)=$FFFF) and
  883. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  884. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  885. { reg1 might not be modified inbetween }
  886. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  887. begin
  888. DebugMsg('Peephole UxthAndImm2Uxth done', p);
  889. taicpu(hp1).opcode:=A_UXTH;
  890. taicpu(hp1).ops:=2;
  891. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  892. result:=RemoveCurrentP(p);
  893. end
  894. else if DoXTArithOp(p, hp1) then
  895. Result:=true
  896. {$ifdef AARCH64}
  897. else if USxtOp2Op(p,hp1,SM_UXTH) then
  898. Result:=true
  899. {$endif AARCH64}
  900. end;
  901. { Condition doesn't have to be C_None }
  902. if not Result and
  903. RemoveSuperfluousMove(p, hp1, 'UxthMov2Data') then
  904. Result:=true;
  905. end;
  906. end;
  907. function TARMAsmOptimizer.OptPass1SXTB(var p : tai) : Boolean;
  908. var
  909. hp1, hp2: tai;
  910. so: tshifterop;
  911. begin
  912. Result:=false;
  913. if GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  914. (taicpu(p).oppostfix = PF_None) and
  915. (taicpu(p).ops = 2) then
  916. begin
  917. if (taicpu(p).condition = C_None) then
  918. begin
  919. {
  920. change
  921. sxtb reg2,reg1
  922. strb reg2,[...]
  923. dealloc reg2
  924. to
  925. strb reg1,[...]
  926. }
  927. if MatchInstruction(hp1, A_STR, [C_None], [PF_B]) and
  928. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
  929. { the reference in strb might not use reg2 }
  930. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  931. { reg1 might not be modified inbetween }
  932. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  933. begin
  934. DebugMsg('Peephole SxtbStrb2Strb done', p);
  935. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  936. result:=RemoveCurrentP(p);
  937. end
  938. {
  939. change
  940. sxtb reg2,reg1
  941. sxth reg3,reg2
  942. dealloc reg2
  943. to
  944. sxtb reg3,reg1
  945. }
  946. else if MatchInstruction(hp1, A_SXTH, [C_None], [PF_None]) and
  947. (taicpu(hp1).ops = 2) and
  948. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  949. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  950. { reg1 might not be modified inbetween }
  951. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  952. begin
  953. DebugMsg('Peephole SxtbSxth2Sxtb done', p);
  954. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  955. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  956. asml.remove(hp1);
  957. hp1.free;
  958. result:=true;
  959. end
  960. {
  961. change
  962. sxtb reg2,reg1
  963. sxtb reg3,reg2
  964. dealloc reg2
  965. to
  966. uxtb reg3,reg1
  967. }
  968. else if MatchInstruction(hp1, A_SXTB, [C_None], [PF_None]) and
  969. (taicpu(hp1).ops = 2) and
  970. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  971. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  972. { reg1 might not be modified inbetween }
  973. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  974. begin
  975. DebugMsg('Peephole SxtbSxtb2Sxtb done', p);
  976. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  977. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  978. asml.remove(hp1);
  979. hp1.free;
  980. result:=true;
  981. end
  982. {
  983. change
  984. sxtb reg2,reg1
  985. and reg3,reg2,#0x*FF
  986. dealloc reg2
  987. to
  988. uxtb reg3,reg1
  989. }
  990. else if MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
  991. (taicpu(hp1).ops=3) and
  992. (taicpu(hp1).oper[2]^.typ=top_const) and
  993. ((taicpu(hp1).oper[2]^.val and $FF)=$FF) and
  994. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  995. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  996. { reg1 might not be modified inbetween }
  997. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  998. begin
  999. DebugMsg('Peephole SxtbAndImm2Uxtb done', p);
  1000. taicpu(hp1).opcode:=A_UXTB;
  1001. taicpu(hp1).ops:=2;
  1002. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  1003. result:=RemoveCurrentP(p);
  1004. end
  1005. else if DoXTArithOp(p, hp1) then
  1006. Result:=true
  1007. {$ifdef AARCH64}
  1008. else if USxtOp2Op(p,hp1,SM_SXTB) then
  1009. Result:=true
  1010. {$endif AARCH64}
  1011. end;
  1012. { Condition doesn't have to be C_None }
  1013. if not Result and
  1014. RemoveSuperfluousMove(p, hp1, 'SxtbMov2Sxtb') then
  1015. Result:=true;
  1016. end;
  1017. end;
  1018. function TARMAsmOptimizer.OptPass1SXTH(var p : tai) : Boolean;
  1019. var
  1020. hp1: tai;
  1021. so: tshifterop;
  1022. begin
  1023. Result:=false;
  1024. if GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1025. (taicpu(p).oppostfix = PF_None) and
  1026. (taicpu(p).ops = 2) then
  1027. begin
  1028. if (taicpu(p).condition = C_None) then
  1029. begin
  1030. {
  1031. change
  1032. sxth reg2,reg1
  1033. strh reg2,[...]
  1034. dealloc reg2
  1035. to
  1036. strh reg1,[...]
  1037. }
  1038. if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
  1039. (taicpu(p).ops=2) and
  1040. MatchInstruction(hp1, A_STR, [C_None], [PF_H]) and
  1041. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  1042. { the reference in strb might not use reg2 }
  1043. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  1044. { reg1 might not be modified inbetween }
  1045. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1046. begin
  1047. DebugMsg('Peephole SxthStrh2Strh done', p);
  1048. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  1049. result:=RemoveCurrentP(p);
  1050. end
  1051. {
  1052. change
  1053. sxth reg2,reg1
  1054. sxth reg3,reg2
  1055. dealloc reg2
  1056. to
  1057. sxth reg3,reg1
  1058. }
  1059. else if MatchInstruction(p, A_SXTH, [C_None], [PF_None]) and
  1060. (taicpu(p).ops=2) and
  1061. MatchInstruction(hp1, A_SXTH, [C_None], [PF_None]) and
  1062. (taicpu(hp1).ops=2) and
  1063. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1064. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  1065. { reg1 might not be modified inbetween }
  1066. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1067. begin
  1068. DebugMsg('Peephole SxthSxth2Sxth done', p);
  1069. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  1070. taicpu(hp1).opcode:=A_SXTH;
  1071. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  1072. result:=RemoveCurrentP(p);
  1073. end
  1074. {$ifdef AARCH64}
  1075. {
  1076. change
  1077. sxth reg2,reg1
  1078. sxtw reg3,reg2
  1079. dealloc reg2
  1080. to
  1081. sxth reg3,reg1
  1082. }
  1083. else if MatchInstruction(p, A_SXTH, [C_None], [PF_None]) and
  1084. (taicpu(p).ops=2) and
  1085. MatchInstruction(hp1, A_SXTW, [C_None], [PF_None]) and
  1086. (taicpu(hp1).ops=2) and
  1087. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1088. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  1089. { reg1 might not be modified inbetween }
  1090. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1091. begin
  1092. DebugMsg('Peephole SxthSxtw2Sxth done', p);
  1093. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  1094. taicpu(hp1).opcode:=A_SXTH;
  1095. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  1096. result:=RemoveCurrentP(p);
  1097. end
  1098. {$endif AARCH64}
  1099. {
  1100. change
  1101. sxth reg2,reg1
  1102. and reg3,reg2,#65535
  1103. dealloc reg2
  1104. to
  1105. uxth reg3,reg1
  1106. }
  1107. else if MatchInstruction(p, A_SXTH, [C_None], [PF_None]) and
  1108. (taicpu(p).ops=2) and
  1109. MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
  1110. (taicpu(hp1).ops=3) and
  1111. (taicpu(hp1).oper[2]^.typ=top_const) and
  1112. ((taicpu(hp1).oper[2]^.val and $FFFF)=$FFFF) and
  1113. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1114. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  1115. { reg1 might not be modified inbetween }
  1116. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1117. begin
  1118. DebugMsg('Peephole SxthAndImm2Uxth done', p);
  1119. taicpu(hp1).opcode:=A_UXTH;
  1120. taicpu(hp1).ops:=2;
  1121. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  1122. result:=RemoveCurrentP(p);
  1123. end
  1124. else if DoXTArithOp(p, hp1) then
  1125. Result:=true
  1126. {$ifdef AARCH64}
  1127. else if USxtOp2Op(p,hp1,SM_SXTH) then
  1128. Result:=true
  1129. {$endif AARCH64}
  1130. end;
  1131. { Condition doesn't have to be C_None }
  1132. if not Result and
  1133. RemoveSuperfluousMove(p, hp1, 'SxthMov2Sxth') then
  1134. Result:=true;
  1135. end;
  1136. end;
  1137. function TARMAsmOptimizer.OptPreSBFXUBFX(var p: tai): Boolean;
  1138. begin
  1139. Result := False;
  1140. { Convert:
  1141. s/ubfx reg1,reg2,#0,#64 (or #32 for 32-bit registers)
  1142. To:
  1143. mov reg1,reg2
  1144. }
  1145. if (taicpu(p).oper[2]^.val = 0) and
  1146. {$ifdef AARCH64}
  1147. (
  1148. (
  1149. (getsubreg(taicpu(p).oper[0]^.reg) = R_SUBQ) and
  1150. (taicpu(p).oper[3]^.val = 64)
  1151. ) or
  1152. (
  1153. (getsubreg(taicpu(p).oper[0]^.reg) = R_SUBD) and
  1154. (taicpu(p).oper[3]^.val = 32)
  1155. )
  1156. )
  1157. {$else AARCH64}
  1158. (taicpu(p).oper[3]^.val = 32)
  1159. {$endif AARCH64}
  1160. then
  1161. begin
  1162. DebugMsg(SPeepholeOptimization + 'SBFX or UBFX -> MOV (full bitfield extract)', p);
  1163. taicpu(p).opcode := A_MOV;
  1164. taicpu(p).ops := 2;
  1165. taicpu(p).clearop(2);
  1166. taicpu(p).clearop(3);
  1167. Result := True;
  1168. Exit;
  1169. end;
  1170. end;
  1171. function TARMAsmOptimizer.OptPass1LDR(var p : tai) : Boolean;
  1172. var
  1173. hp1: tai;
  1174. Reference: TReference;
  1175. NewOp: TAsmOp;
  1176. begin
  1177. Result := False;
  1178. if (taicpu(p).ops <> 2) or (taicpu(p).condition <> C_None) then
  1179. Exit;
  1180. Reference := taicpu(p).oper[1]^.ref^;
  1181. if (Reference.addressmode = AM_OFFSET) and
  1182. not RegInRef(taicpu(p).oper[0]^.reg, Reference) and
  1183. { Delay calling GetNextInstruction for as long as possible }
  1184. GetNextInstruction(p, hp1) and
  1185. (hp1.typ = ait_instruction) and
  1186. (taicpu(hp1).condition = C_None) and
  1187. (taicpu(hp1).oppostfix = taicpu(p).oppostfix) then
  1188. begin
  1189. if (taicpu(hp1).opcode = A_STR) and
  1190. RefsEqual(taicpu(hp1).oper[1]^.ref^, Reference) and
  1191. (getregtype(taicpu(p).oper[0]^.reg) = getregtype(taicpu(hp1).oper[0]^.reg)) then
  1192. begin
  1193. { With:
  1194. ldr reg1,[ref]
  1195. str reg2,[ref]
  1196. If reg1 = reg2, Remove str
  1197. }
  1198. if taicpu(p).oper[0]^.reg = taicpu(hp1).oper[0]^.reg then
  1199. begin
  1200. DebugMsg(SPeepholeOptimization + 'Removed redundant store instruction (load/store -> load/nop)', hp1);
  1201. RemoveInstruction(hp1);
  1202. Result := True;
  1203. Exit;
  1204. end;
  1205. end
  1206. else if (taicpu(hp1).opcode = A_LDR) and
  1207. RefsEqual(taicpu(hp1).oper[1]^.ref^, Reference) then
  1208. begin
  1209. { With:
  1210. ldr reg1,[ref]
  1211. ldr reg2,[ref]
  1212. If reg1 = reg2, delete the second ldr
  1213. If reg1 <> reg2, changing the 2nd ldr to a mov might introduce
  1214. a dependency, but it will likely open up new optimisations, so
  1215. do it for now and handle any new dependencies later.
  1216. }
  1217. if taicpu(p).oper[0]^.reg = taicpu(hp1).oper[0]^.reg then
  1218. begin
  1219. DebugMsg(SPeepholeOptimization + 'Removed duplicate load instruction (load/load -> load/nop)', hp1);
  1220. RemoveInstruction(hp1);
  1221. Result := True;
  1222. Exit;
  1223. end
  1224. else if
  1225. (getregtype(taicpu(p).oper[0]^.reg) = R_INTREGISTER) and
  1226. (getregtype(taicpu(hp1).oper[0]^.reg) = R_INTREGISTER) and
  1227. (getsubreg(taicpu(p).oper[0]^.reg) = getsubreg(taicpu(hp1).oper[0]^.reg)) then
  1228. begin
  1229. DebugMsg(SPeepholeOptimization + 'Changed second ldr' + oppostfix2str[taicpu(hp1).oppostfix] + ' to mov (load/load -> load/move)', hp1);
  1230. taicpu(hp1).opcode := A_MOV;
  1231. taicpu(hp1).oppostfix := PF_None;
  1232. taicpu(hp1).loadreg(1, taicpu(p).oper[0]^.reg);
  1233. AllocRegBetween(taicpu(p).oper[0]^.reg, p, hp1, UsedRegs);
  1234. Result := True;
  1235. Exit;
  1236. end;
  1237. end;
  1238. end;
  1239. end;
  1240. function TARMAsmOptimizer.OptPass1STR(var p : tai) : Boolean;
  1241. var
  1242. hp1: tai;
  1243. Reference: TReference;
  1244. SizeMismatch: Boolean;
  1245. SrcReg, DstReg: TRegister;
  1246. NewOp: TAsmOp;
  1247. begin
  1248. Result := False;
  1249. if (taicpu(p).ops <> 2) or (taicpu(p).condition <> C_None) then
  1250. Exit;
  1251. Reference := taicpu(p).oper[1]^.ref^;
  1252. if (Reference.addressmode = AM_OFFSET) and
  1253. not RegInRef(taicpu(p).oper[0]^.reg, Reference) and
  1254. { Delay calling GetNextInstruction for as long as possible }
  1255. GetNextInstruction(p, hp1) and
  1256. (hp1.typ = ait_instruction) and
  1257. (taicpu(hp1).condition = C_None) and
  1258. (taicpu(hp1).oppostfix = taicpu(p).oppostfix) and
  1259. (taicpu(hp1).ops>0) and (taicpu(hp1).oper[0]^.typ=top_reg) then
  1260. begin
  1261. { Saves constant dereferencing and makes it easier to change the size if necessary }
  1262. SrcReg := taicpu(p).oper[0]^.reg;
  1263. DstReg := taicpu(hp1).oper[0]^.reg;
  1264. if (taicpu(hp1).opcode = A_LDR) and
  1265. RefsEqual(taicpu(hp1).oper[1]^.ref^, Reference) and
  1266. (taicpu(hp1).oper[1]^.ref^.volatility=[]) and
  1267. (
  1268. (taicpu(hp1).oppostfix = taicpu(p).oppostfix) or
  1269. ((taicpu(p).oppostfix = PF_B) and (taicpu(hp1).oppostfix = PF_SB)) or
  1270. ((taicpu(p).oppostfix = PF_H) and (taicpu(hp1).oppostfix = PF_SH))
  1271. {$ifdef AARCH64}
  1272. or ((taicpu(p).oppostfix = PF_W) and (taicpu(hp1).oppostfix = PF_SW))
  1273. {$endif AARCH64}
  1274. ) then
  1275. begin
  1276. { With:
  1277. str reg1,[ref]
  1278. ldr reg2,[ref]
  1279. If reg1 = reg2, Remove ldr.
  1280. If reg1 <> reg2, replace ldr with "mov reg2,reg1"
  1281. }
  1282. if (SrcReg = DstReg) and
  1283. { e.g. the ldrb in strb/ldrb is not a null operation as it clears the upper 24 bits }
  1284. (taicpu(p).oppostfix=PF_None) then
  1285. begin
  1286. DebugMsg(SPeepholeOptimization + 'Removed redundant load instruction (store/load -> store/nop)', hp1);
  1287. RemoveInstruction(hp1);
  1288. Result := True;
  1289. Exit;
  1290. end
  1291. else if (getregtype(SrcReg) = R_INTREGISTER) and
  1292. (getregtype(DstReg) = R_INTREGISTER) and
  1293. (getsubreg(SrcReg) = getsubreg(DstReg)) then
  1294. begin
  1295. NewOp:=A_NONE;
  1296. if taicpu(hp1).oppostfix=PF_None then
  1297. NewOp:=A_MOV
  1298. else
  1299. {$ifdef ARM}
  1300. if (current_settings.cputype < cpu_armv6) then
  1301. begin
  1302. { The zero- and sign-extension operations were only
  1303. introduced under ARMv6 }
  1304. case taicpu(hp1).oppostfix of
  1305. PF_B:
  1306. begin
  1307. { The if-block afterwards will set the middle operand to the correct register }
  1308. taicpu(hp1).allocate_oper(3);
  1309. taicpu(hp1).ops := 3;
  1310. taicpu(hp1).loadconst(2, $FF);
  1311. NewOp := A_AND;
  1312. end;
  1313. PF_H:
  1314. { ARMv5 and under doesn't have a concise way of storing the immediate $FFFF, so leave alone };
  1315. PF_SB,
  1316. PF_SH:
  1317. { Do nothing - can't easily encode sign-extensions };
  1318. else
  1319. InternalError(2021043002);
  1320. end;
  1321. end
  1322. else
  1323. {$endif ARM}
  1324. case taicpu(hp1).oppostfix of
  1325. PF_B:
  1326. NewOp := A_UXTB;
  1327. PF_SB:
  1328. NewOp := A_SXTB;
  1329. PF_H:
  1330. NewOp := A_UXTH;
  1331. PF_SH:
  1332. NewOp := A_SXTH;
  1333. {$ifdef AARCH64}
  1334. PF_SW:
  1335. NewOp := A_SXTW;
  1336. PF_W:
  1337. NewOp := A_MOV;
  1338. {$endif AARCH64}
  1339. else
  1340. InternalError(2021043001);
  1341. end;
  1342. if (NewOp<>A_None) then
  1343. begin
  1344. DebugMsg(SPeepholeOptimization + 'Changed ldr' + oppostfix2str[taicpu(hp1).oppostfix] + ' to ' + gas_op2str[NewOp] + ' (store/load -> store/move)', hp1);
  1345. taicpu(hp1).oppostfix := PF_None;
  1346. taicpu(hp1).opcode := NewOp;
  1347. taicpu(hp1).loadreg(1, SrcReg);
  1348. AllocRegBetween(SrcReg, p, hp1, UsedRegs);
  1349. Result := True;
  1350. Exit;
  1351. end;
  1352. end
  1353. end
  1354. else if (taicpu(hp1).opcode = A_STR) and
  1355. RefsEqual(taicpu(hp1).oper[1]^.ref^, Reference) then
  1356. begin
  1357. { With:
  1358. str reg1,[ref]
  1359. str reg2,[ref]
  1360. If reg1 <> reg2, delete the first str
  1361. IF reg1 = reg2, delete the second str
  1362. }
  1363. if (SrcReg = DstReg) and (taicpu(hp1).oper[1]^.ref^.volatility=[]) then
  1364. begin
  1365. DebugMsg(SPeepholeOptimization + 'Removed duplicate store instruction (store/store -> store/nop)', hp1);
  1366. RemoveInstruction(hp1);
  1367. Result := True;
  1368. Exit;
  1369. end
  1370. else if
  1371. { Registers same byte size? }
  1372. (tcgsize2size[reg_cgsize(SrcReg)] = tcgsize2size[reg_cgsize(DstReg)]) and
  1373. (taicpu(p).oper[1]^.ref^.volatility=[]) then
  1374. begin
  1375. DebugMsg(SPeepholeOptimization + 'Removed dominated store instruction (store/store -> nop/store)', p);
  1376. RemoveCurrentP(p, hp1);
  1377. Result := True;
  1378. Exit;
  1379. end;
  1380. end;
  1381. end;
  1382. end;
  1383. function TARMAsmOptimizer.OptPass1And(var p : tai) : Boolean;
  1384. var
  1385. hp1, hp2: tai;
  1386. i: longint;
  1387. begin
  1388. Result:=false;
  1389. {
  1390. optimize
  1391. and reg2,reg1,const1
  1392. ...
  1393. }
  1394. if (taicpu(p).ops>2) and
  1395. (taicpu(p).oper[1]^.typ = top_reg) and
  1396. (taicpu(p).oper[2]^.typ = top_const) then
  1397. begin
  1398. {
  1399. change
  1400. and reg2,reg1,const1
  1401. ...
  1402. and reg3,reg2,const2
  1403. to
  1404. and reg3,reg1,(const1 and const2)
  1405. }
  1406. if GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1407. MatchInstruction(hp1, A_AND, [taicpu(p).condition], [PF_None]) and
  1408. RegEndOfLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  1409. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1410. (taicpu(hp1).oper[2]^.typ = top_const)
  1411. {$ifdef AARCH64}
  1412. and ((((getsubreg(taicpu(p).oper[0]^.reg)=R_SUBQ) and is_shifter_const(taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val,OS_64)) or
  1413. ((getsubreg(taicpu(p).oper[0]^.reg)=R_SUBL) and is_shifter_const(taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val,OS_32))
  1414. ) or
  1415. ((taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val)=0))
  1416. {$endif AARCH64}
  1417. then
  1418. begin
  1419. if not(RegUsedBetween(taicpu(hp1).oper[0]^.reg,p,hp1)) then
  1420. begin
  1421. DebugMsg('Peephole AndAnd2And done', p);
  1422. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  1423. if (taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val)=0 then
  1424. begin
  1425. DebugMsg('Peephole AndAnd2Mov0 1 done', p);
  1426. taicpu(p).opcode:=A_MOV;
  1427. taicpu(p).ops:=2;
  1428. taicpu(p).loadConst(1,0);
  1429. taicpu(p).oppostfix:=taicpu(hp1).oppostfix;
  1430. end
  1431. else
  1432. begin
  1433. DebugMsg('Peephole AndAnd2And 1 done', p);
  1434. taicpu(p).loadConst(2,taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val);
  1435. taicpu(p).oppostfix:=taicpu(hp1).oppostfix;
  1436. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  1437. end;
  1438. asml.remove(hp1);
  1439. hp1.free;
  1440. Result:=true;
  1441. exit;
  1442. end
  1443. else if not(RegUsedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1444. begin
  1445. if (taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val)=0 then
  1446. begin
  1447. DebugMsg('Peephole AndAnd2Mov0 2 done', hp1);
  1448. taicpu(hp1).opcode:=A_MOV;
  1449. taicpu(hp1).loadConst(1,0);
  1450. taicpu(hp1).ops:=2;
  1451. taicpu(hp1).oppostfix:=taicpu(p).oppostfix;
  1452. end
  1453. else
  1454. begin
  1455. DebugMsg('Peephole AndAnd2And 2 done', hp1);
  1456. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  1457. taicpu(hp1).loadConst(2,taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val);
  1458. taicpu(hp1).oppostfix:=taicpu(p).oppostfix;
  1459. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  1460. end;
  1461. GetNextInstruction(p, hp1);
  1462. RemoveCurrentP(p);
  1463. p:=hp1;
  1464. Result:=true;
  1465. exit;
  1466. end;
  1467. end
  1468. {
  1469. change
  1470. and reg2,reg1,$xxxxxxFF
  1471. strb reg2,[...]
  1472. dealloc reg2
  1473. to
  1474. strb reg1,[...]
  1475. }
  1476. else if ((taicpu(p).oper[2]^.val and $FF) = $FF) and
  1477. MatchInstruction(p, A_AND, [C_None], [PF_None]) and
  1478. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1479. MatchInstruction(hp1, A_STR, [C_None], [PF_B]) and
  1480. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
  1481. { the reference in strb might not use reg2 }
  1482. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  1483. { reg1 might not be modified inbetween }
  1484. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1485. begin
  1486. DebugMsg('Peephole AndStrb2Strb done', p);
  1487. {$ifdef AARCH64}
  1488. taicpu(hp1).loadReg(0,newreg(R_INTREGISTER,getsupreg(taicpu(p).oper[1]^.reg),R_SUBD));
  1489. {$else AARCH64}
  1490. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  1491. {$endif AARCH64}
  1492. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  1493. RemoveCurrentP(p);
  1494. result:=true;
  1495. exit;
  1496. end
  1497. {
  1498. change
  1499. and reg2,reg1,255
  1500. uxtb/uxth reg3,reg2
  1501. dealloc reg2
  1502. to
  1503. and reg3,reg1,x
  1504. }
  1505. else if MatchInstruction(p, A_AND, [C_None], [PF_None]) and
  1506. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1507. ((((taicpu(p).oper[2]^.val and $ffffff00)=0) and MatchInstruction(hp1, A_UXTB, [C_None], [PF_None])) or
  1508. (((taicpu(p).oper[2]^.val and $ffff0000)=0) and MatchInstruction(hp1, A_UXTH, [C_None], [PF_None]))) and
  1509. (taicpu(hp1).ops = 2) and
  1510. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  1511. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1512. { reg1 might not be modified inbetween }
  1513. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1514. begin
  1515. DebugMsg('Peephole AndUxt2And done', p);
  1516. taicpu(hp1).opcode:=A_AND;
  1517. taicpu(hp1).ops:=3;
  1518. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  1519. taicpu(hp1).loadconst(2,taicpu(p).oper[2]^.val);
  1520. GetNextInstruction(p,hp1);
  1521. asml.remove(p);
  1522. p.Free;
  1523. p:=hp1;
  1524. result:=true;
  1525. exit;
  1526. end
  1527. else if ((taicpu(p).oper[2]^.val and $ffffff80)=0) and
  1528. MatchInstruction(p, A_AND, [C_None], [PF_None]) and
  1529. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1530. MatchInstruction(hp1, [A_SXTB,A_SXTH], [C_None], [PF_None]) and
  1531. (taicpu(hp1).ops = 2) and
  1532. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  1533. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1534. { reg1 might not be modified inbetween }
  1535. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1536. begin
  1537. DebugMsg('Peephole AndSxt2And done', p);
  1538. taicpu(hp1).opcode:=A_AND;
  1539. taicpu(hp1).ops:=3;
  1540. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  1541. setsubreg(taicpu(hp1).oper[1]^.reg,getsubreg(taicpu(hp1).oper[0]^.reg));
  1542. taicpu(hp1).loadconst(2,taicpu(p).oper[2]^.val);
  1543. GetNextInstruction(p,hp1);
  1544. asml.remove(p);
  1545. p.Free;
  1546. p:=hp1;
  1547. result:=true;
  1548. exit;
  1549. end
  1550. {
  1551. from
  1552. and reg1,reg0,2^n-1
  1553. mov reg2,reg1, lsl imm1
  1554. (mov reg3,reg2, lsr/asr imm1)
  1555. remove either the and or the lsl/xsr sequence if possible
  1556. }
  1557. else if (taicpu(p).oper[2]^.val < high(int64)) and
  1558. cutils.ispowerof2(taicpu(p).oper[2]^.val+1,i) and
  1559. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1560. MatchInstruction(hp1, A_MOV, [taicpu(p).condition], [PF_None]) and
  1561. (taicpu(hp1).ops=3) and
  1562. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1563. (taicpu(hp1).oper[2]^.typ = top_shifterop) and
  1564. {$ifdef ARM}
  1565. (taicpu(hp1).oper[2]^.shifterop^.rs = NR_NO) and
  1566. {$endif ARM}
  1567. (taicpu(hp1).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  1568. RegEndOfLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) then
  1569. begin
  1570. {
  1571. and reg1,reg0,2^n-1
  1572. mov reg2,reg1, lsl imm1
  1573. mov reg3,reg2, lsr/asr imm1
  1574. =>
  1575. and reg1,reg0,2^n-1
  1576. if lsr and 2^n-1>=imm1 or asr and 2^n-1>imm1
  1577. }
  1578. if GetNextInstructionUsingReg(hp1,hp2,taicpu(p).oper[0]^.reg) and
  1579. MatchInstruction(hp2, A_MOV, [taicpu(p).condition], [PF_None]) and
  1580. (taicpu(hp2).ops=3) and
  1581. MatchOperand(taicpu(hp2).oper[1]^, taicpu(hp1).oper[0]^.reg) and
  1582. (taicpu(hp2).oper[2]^.typ = top_shifterop) and
  1583. {$ifdef ARM}
  1584. (taicpu(hp2).oper[2]^.shifterop^.rs = NR_NO) and
  1585. {$endif ARM}
  1586. (taicpu(hp2).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and
  1587. (taicpu(hp1).oper[2]^.shifterop^.shiftimm=taicpu(hp2).oper[2]^.shifterop^.shiftimm) and
  1588. RegEndOfLife(taicpu(hp1).oper[0]^.reg,taicpu(hp2)) and
  1589. ((i<32-taicpu(hp1).oper[2]^.shifterop^.shiftimm) or
  1590. ((i=32-taicpu(hp1).oper[2]^.shifterop^.shiftimm) and
  1591. (taicpu(hp2).oper[2]^.shifterop^.shiftmode=SM_LSR))) then
  1592. begin
  1593. DebugMsg('Peephole AndLslXsr2And done', p);
  1594. taicpu(p).oper[0]^.reg:=taicpu(hp2).oper[0]^.reg;
  1595. asml.Remove(hp1);
  1596. asml.Remove(hp2);
  1597. hp1.free;
  1598. hp2.free;
  1599. result:=true;
  1600. exit;
  1601. end
  1602. {
  1603. and reg1,reg0,2^n-1
  1604. mov reg2,reg1, lsl imm1
  1605. =>
  1606. mov reg2,reg0, lsl imm1
  1607. if imm1>i
  1608. }
  1609. else if (i>32-taicpu(hp1).oper[2]^.shifterop^.shiftimm) and
  1610. not(RegModifiedBetween(taicpu(p).oper[1]^.reg, p, hp1)) then
  1611. begin
  1612. DebugMsg('Peephole AndLsl2Lsl done', p);
  1613. taicpu(hp1).oper[1]^.reg:=taicpu(p).oper[1]^.reg;
  1614. GetNextInstruction(p, hp1);
  1615. asml.Remove(p);
  1616. p.free;
  1617. p:=hp1;
  1618. result:=true;
  1619. exit;
  1620. end
  1621. end;
  1622. end;
  1623. {
  1624. change
  1625. and reg1, ...
  1626. mov reg2, reg1
  1627. to
  1628. and reg2, ...
  1629. }
  1630. if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  1631. (taicpu(p).ops>=3) and
  1632. RemoveSuperfluousMove(p, hp1, 'DataMov2Data') then
  1633. Result:=true;
  1634. end;
  1635. function TARMAsmOptimizer.OptPass2Bitwise(var p: tai): Boolean;
  1636. var
  1637. hp1, hp2: tai;
  1638. WorkingReg: TRegister;
  1639. begin
  1640. Result := False;
  1641. {
  1642. change
  1643. and/bic reg1, ...
  1644. ...
  1645. cmp reg1, #0
  1646. b<ne/eq> @Lbl
  1647. to
  1648. ands/bics reg1, ...
  1649. Also:
  1650. and/bic reg1, ...
  1651. ...
  1652. cmp reg1, #0
  1653. (reg1 end of life)
  1654. b<ne/eq> @Lbl
  1655. to
  1656. tst reg1, ...
  1657. or
  1658. bics xzr, reg1, ... under AArch64
  1659. For ARM, also include OR, EOR and ORN
  1660. }
  1661. if (taicpu(p).condition = C_None) and
  1662. (taicpu(p).ops>=3) and
  1663. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  1664. MatchInstruction(hp1, A_CMP, [C_None], [PF_None]) and
  1665. MatchOperand(taicpu(hp1).oper[1]^, 0) and
  1666. {$ifdef AARCH64}
  1667. (SuperRegistersEqual(taicpu(hp1).oper[0]^.reg, taicpu(p).oper[0]^.reg)) and
  1668. (
  1669. (getsubreg(taicpu(hp1).oper[0]^.reg) = getsubreg(taicpu(p).oper[0]^.reg))
  1670. or
  1671. (
  1672. (taicpu(p).oper[2]^.typ = top_const) and
  1673. (taicpu(p).oper[2]^.val >= 0) and
  1674. (taicpu(p).oper[2]^.val <= $FFFFFFFF)
  1675. )
  1676. ) and
  1677. {$else AARCH64}
  1678. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[0]^.reg) and
  1679. {$endif AARCH64}
  1680. not RegModifiedBetween(NR_DEFAULTFLAGS, p, hp1) and
  1681. GetNextInstruction(hp1, hp2) then
  1682. begin
  1683. if MatchInstruction(hp2, [A_B, A_CMP, A_CMN, A_TST{$ifndef AARCH64}, A_TEQ{$endif not AARCH64}], [C_EQ, C_NE], [PF_None]) then
  1684. begin
  1685. AllocRegBetween(NR_DEFAULTFLAGS, p, hp1, UsedRegs);
  1686. WorkingReg := taicpu(p).oper[0]^.reg;
  1687. if
  1688. {$ifndef AARCH64}
  1689. (taicpu(p).opcode = A_AND) and
  1690. {$endif AARCH64}
  1691. RegEndOfLife(WorkingReg, taicpu(hp1)) then
  1692. begin
  1693. {$ifdef AARCH64}
  1694. if (taicpu(p).opcode <> A_AND) then
  1695. begin
  1696. setsupreg(taicpu(p).oper[0]^.reg, RS_XZR);
  1697. taicpu(p).oppostfix := PF_S;
  1698. DebugMsg(SPeepholeOptimization + 'BIC; CMP -> BICS ' + gas_regname(taicpu(p).oper[0]^.reg), p);
  1699. end
  1700. else
  1701. {$endif AARCH64}
  1702. begin
  1703. taicpu(p).opcode := A_TST;
  1704. taicpu(p).oppostfix := PF_None;
  1705. taicpu(p).loadreg(0, taicpu(p).oper[1]^.reg);
  1706. taicpu(p).loadoper(1, taicpu(p).oper[2]^);
  1707. if (taicpu(p).ops = 4) then
  1708. begin
  1709. { Make sure any shifter operator is also transferred }
  1710. taicpu(p).loadshifterop(2, taicpu(p).oper[3]^.shifterop^);
  1711. taicpu(p).ops := 3;
  1712. end
  1713. else
  1714. taicpu(p).ops := 2;
  1715. DebugMsg(SPeepholeOptimization + 'AND; CMP -> TST', p);
  1716. end;
  1717. end
  1718. else
  1719. begin
  1720. taicpu(p).oppostfix := PF_S;
  1721. {$ifdef AARCH64}
  1722. DebugMsg(SPeepholeOptimization + 'AND/BIC; CMP -> ANDS/BICS', p);
  1723. {$else AARCH64}
  1724. DebugMsg(SPeepholeOptimization + 'Bitwise; CMP -> Bitwise+S', p);
  1725. {$endif AARCH64}
  1726. end;
  1727. RemoveInstruction(hp1);
  1728. { If a temporary register was used for and/cmp before, we might be
  1729. able to deallocate the register so it can be used for other
  1730. optimisations later }
  1731. if (taicpu(p).opcode = A_TST) and TryRemoveRegAlloc(WorkingReg, p, p) then
  1732. ExcludeRegFromUsedRegs(WorkingReg, UsedRegs);
  1733. Result := True;
  1734. Exit;
  1735. end
  1736. else if
  1737. (hp2.typ = ait_label) or
  1738. { Conditional comparison instructions have already been covered }
  1739. RegModifiedByInstruction(NR_DEFAULTFLAGS, hp2) then
  1740. begin
  1741. { The comparison is a null operation }
  1742. if RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  1743. begin
  1744. DebugMsg(SPeepholeOptimization + 'Bitwise; CMP -> nop', p);
  1745. RemoveInstruction(hp1);
  1746. RemoveCurrentP(p);
  1747. end
  1748. else
  1749. begin
  1750. DebugMsg(SPeepholeOptimization + 'CMP/BIC -> nop', hp1);
  1751. RemoveInstruction(hp1);
  1752. end;
  1753. Result := True;
  1754. Exit;
  1755. end;
  1756. end;
  1757. end;
  1758. function TARMAsmOptimizer.OptPass2TST(var p: tai): Boolean;
  1759. var
  1760. hp1, hp2: tai;
  1761. begin
  1762. Result := False;
  1763. if
  1764. {$ifndef AARCH64}
  1765. (taicpu(p).condition = C_None) and
  1766. {$endif AARCH64}
  1767. GetNextInstruction(p, hp1) and
  1768. MatchInstruction(hp1, A_B, [C_EQ, C_NE], [PF_None]) and
  1769. GetNextInstructionUsingReg(hp1, hp2, taicpu(p).oper[0]^.reg) then
  1770. begin
  1771. case taicpu(hp2).opcode of
  1772. A_AND:
  1773. { Change:
  1774. tst r1,##
  1775. (r2 not in use, or r2 = r1)
  1776. b.c .Lbl
  1777. ...
  1778. and r2,r1,##
  1779. Optimise to:
  1780. ands r2,r1,##
  1781. b.c .Lbl
  1782. ...
  1783. }
  1784. if (taicpu(hp2).oppostfix in [PF_None, PF_S]) and
  1785. {$ifndef AARCH64}
  1786. (taicpu(hp2).condition = C_None) and
  1787. {$endif AARCH64}
  1788. (taicpu(hp2).ops = taicpu(p).ops + 1) and
  1789. not RegInUsedRegs(taicpu(hp2).oper[0]^.reg, UsedRegs) and
  1790. MatchOperand(taicpu(hp2).oper[1]^, taicpu(p).oper[0]^.reg) and
  1791. MatchOperand(taicpu(hp2).oper[2]^, taicpu(p).oper[1]^) and
  1792. (
  1793. (taicpu(hp2).ops = 3) or
  1794. MatchOperand(taicpu(hp2).oper[3]^, taicpu(p).oper[2]^)
  1795. ) and
  1796. (
  1797. not (cs_opt_level3 in current_settings.optimizerswitches) or
  1798. (
  1799. { Make sure the target register isn't used in between }
  1800. not RegUsedBetween(taicpu(hp2).oper[0]^.reg, hp1, hp2) and
  1801. (
  1802. { If the second operand is a register, make sure it isn't modified in between }
  1803. (taicpu(p).oper[1]^.typ <> top_reg) or
  1804. not RegModifiedBetween(taicpu(p).oper[1]^.reg, hp1, hp2)
  1805. )
  1806. )
  1807. ) then
  1808. begin
  1809. AllocRegBetween(taicpu(hp2).oper[0]^.reg, p, hp2, UsedRegs);
  1810. if (taicpu(hp2).oppostfix = PF_S) then
  1811. AllocRegBetween(NR_DEFAULTFLAGS, p, hp2, UsedRegs);
  1812. DebugMsg(SPeepholeOptimization + 'TST; B.c; AND -> ANDS; B.c (TstBcAnd2AndsBc)', p);
  1813. taicpu(hp2).oppostfix := PF_S;
  1814. Asml.Remove(hp2);
  1815. Asml.InsertAfter(hp2, p);
  1816. RemoveCurrentP(p, hp2);
  1817. Result := True;
  1818. Exit;
  1819. end;
  1820. A_TST:
  1821. { Change:
  1822. tst r1,##
  1823. b.c .Lbl
  1824. ... (flags not modified)
  1825. tst r1,##
  1826. Remove second tst
  1827. }
  1828. if
  1829. {$ifndef AARCH64}
  1830. (taicpu(hp2).condition = C_None) and
  1831. {$endif AARCH64}
  1832. (taicpu(hp2).ops = taicpu(p).ops) and
  1833. MatchOperand(taicpu(hp2).oper[0]^, taicpu(p).oper[0]^.reg) and
  1834. MatchOperand(taicpu(hp2).oper[1]^, taicpu(p).oper[1]^) and
  1835. (
  1836. (taicpu(hp2).ops = 2) or
  1837. MatchOperand(taicpu(hp2).oper[2]^, taicpu(p).oper[2]^)
  1838. ) and
  1839. (
  1840. not (cs_opt_level3 in current_settings.optimizerswitches) or
  1841. (
  1842. { Make sure the flags aren't modified in between }
  1843. not RegModifiedBetween(NR_DEFAULTFLAGS, hp1, hp2) and
  1844. (
  1845. { If the second operand is a register, make sure it isn't modified in between }
  1846. (taicpu(p).oper[1]^.typ <> top_reg) or
  1847. not RegModifiedBetween(taicpu(p).oper[1]^.reg, hp1, hp2)
  1848. )
  1849. )
  1850. ) then
  1851. begin
  1852. DebugMsg(SPeepholeOptimization + 'TST; B.c; TST -> TST; B.c (TstBcTst2TstBc)', p);
  1853. AllocRegBetween(NR_DEFAULTFLAGS, hp1, hp2, UsedRegs);
  1854. RemoveInstruction(hp2);
  1855. Result := True;
  1856. Exit;
  1857. end;
  1858. else
  1859. ;
  1860. end;
  1861. end;
  1862. end;
  1863. end.