aoptarm.pas 131 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952
  1. {
  2. Copyright (c) 1998-2020 by Jonas Maebe and Florian Klaempfl, members of the Free Pascal
  3. Development Team
  4. This unit implements an ARM optimizer object used commonly for ARM and AAarch64
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. Unit aoptarm;
  19. {$i fpcdefs.inc}
  20. { $define DEBUG_PREREGSCHEDULER}
  21. {$ifdef EXTDEBUG}
  22. {$define DEBUG_AOPTCPU}
  23. {$endif EXTDEBUG}
  24. Interface
  25. uses
  26. cgbase, cgutils, globtype, cpubase, aasmtai, aasmcpu,aopt, aoptobj;
  27. Type
  28. { while ARM and AAarch64 look not very similar at a first glance,
  29. several optimizations can be shared between both }
  30. TARMAsmOptimizer = class(TAsmOptimizer)
  31. procedure DebugMsg(const s : string; p : tai);
  32. function RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string): boolean;
  33. function RedundantMovProcess(var p: tai; var hp1: tai): boolean;
  34. function GetNextInstructionUsingReg(Current: tai; out Next: tai; const reg: TRegister): Boolean;
  35. {$ifdef AARCH64}
  36. function USxtOp2Op(var p, hp1: tai; shiftmode: tshiftmode): Boolean;
  37. {$endif AARCH64}
  38. function OptPreSBFXUBFX(var p: tai): Boolean;
  39. function OptPass1UXTB(var p: tai): Boolean;
  40. function OptPass1UXTH(var p: tai): Boolean;
  41. function OptPass1SXTB(var p: tai): Boolean;
  42. function OptPass1SXTH(var p: tai): Boolean;
  43. function OptPass1LDR(var p: tai): Boolean; virtual;
  44. function OptPass1STR(var p: tai): Boolean; virtual;
  45. function OptPass1And(var p: tai): Boolean; virtual;
  46. function OptPass2Bitwise(var p: tai): Boolean;
  47. function OptPass2TST(var p: tai): Boolean;
  48. { Common code that tries to merge constant writes to sequential memory }
  49. function TryConstMerge(var p: tai; hp1: tai): Boolean;
  50. protected
  51. function DoXTArithOp(var p: tai; hp1: tai): Boolean;
  52. End;
  53. function MatchInstruction(const instr: tai; const op: TCommonAsmOps; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  54. function MatchInstruction(const instr: tai; const op: TAsmOp; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  55. {$ifdef AARCH64}
  56. function MatchInstruction(const instr: tai; const ops : array of TAsmOp; const postfix: TOpPostfixes): boolean;
  57. {$endif AARCH64}
  58. function MatchInstruction(const instr: tai; const op: TAsmOp; const postfix: TOpPostfixes): boolean;
  59. function RefsEqual(const r1, r2: treference): boolean;
  60. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  61. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
  62. function MatchOperand(const oper: TOper; const a: TCGInt): boolean; inline;
  63. Implementation
  64. uses
  65. cutils,verbose,globals,aoptutils,
  66. systems,
  67. cpuinfo,
  68. cgobj,procinfo,
  69. aasmbase,aasmdata,itcpugas;
  70. {$ifdef DEBUG_AOPTCPU}
  71. const
  72. SPeepholeOptimization: shortstring = 'Peephole Optimization: ';
  73. procedure TARMAsmOptimizer.DebugMsg(const s: string;p : tai);
  74. begin
  75. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  76. end;
  77. {$else DEBUG_AOPTCPU}
  78. { Empty strings help the optimizer to remove string concatenations that won't
  79. ever appear to the user on release builds. [Kit] }
  80. const
  81. SPeepholeOptimization = '';
  82. procedure TARMAsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  83. begin
  84. end;
  85. {$endif DEBUG_AOPTCPU}
  86. function MatchInstruction(const instr: tai; const op: TCommonAsmOps; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  87. begin
  88. result :=
  89. (instr.typ = ait_instruction) and
  90. ((op = []) or ((taicpu(instr).opcode<=LastCommonAsmOp) and (taicpu(instr).opcode in op))) and
  91. ((cond = []) or (taicpu(instr).condition in cond)) and
  92. ((postfix = []) or (taicpu(instr).oppostfix in postfix));
  93. end;
  94. function MatchInstruction(const instr: tai; const op: TAsmOp; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  95. begin
  96. result :=
  97. (instr.typ = ait_instruction) and
  98. (taicpu(instr).opcode = op) and
  99. ((cond = []) or (taicpu(instr).condition in cond)) and
  100. ((postfix = []) or (taicpu(instr).oppostfix in postfix));
  101. end;
  102. {$ifdef AARCH64}
  103. function MatchInstruction(const instr: tai; const ops : array of TAsmOp; const postfix: TOpPostfixes): boolean;
  104. var
  105. op : TAsmOp;
  106. begin
  107. result:=false;
  108. if instr.typ <> ait_instruction then
  109. exit;
  110. for op in ops do
  111. begin
  112. if (taicpu(instr).opcode = op) and
  113. ((postfix = []) or (taicpu(instr).oppostfix in postfix)) then
  114. begin
  115. result:=true;
  116. exit;
  117. end;
  118. end;
  119. end;
  120. {$endif AARCH64}
  121. function MatchInstruction(const instr: tai; const op: TAsmOp; const postfix: TOpPostfixes): boolean;
  122. begin
  123. result :=
  124. (instr.typ = ait_instruction) and
  125. (taicpu(instr).opcode = op) and
  126. ((postfix = []) or (taicpu(instr).oppostfix in postfix));
  127. end;
  128. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  129. begin
  130. result := (oper.typ = top_reg) and (oper.reg = reg);
  131. end;
  132. function RefsEqual(const r1, r2: treference): boolean;
  133. begin
  134. refsequal :=
  135. (r1.offset = r2.offset) and
  136. (r1.base = r2.base) and
  137. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  138. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  139. (r1.relsymbol = r2.relsymbol) and
  140. {$ifdef ARM}
  141. (r1.signindex = r2.signindex) and
  142. {$endif ARM}
  143. (r1.shiftimm = r2.shiftimm) and
  144. (r1.addressmode = r2.addressmode) and
  145. (r1.shiftmode = r2.shiftmode) and
  146. (r1.volatility=[]) and
  147. (r2.volatility=[]);
  148. end;
  149. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
  150. begin
  151. result := oper1.typ = oper2.typ;
  152. if result then
  153. case oper1.typ of
  154. top_const:
  155. Result:=oper1.val = oper2.val;
  156. top_reg:
  157. Result:=oper1.reg = oper2.reg;
  158. top_conditioncode:
  159. Result:=oper1.cc = oper2.cc;
  160. top_realconst:
  161. Result:=oper1.val_real = oper2.val_real;
  162. top_ref:
  163. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  164. else Result:=false;
  165. end
  166. end;
  167. function MatchOperand(const oper: TOper; const a: TCGInt): boolean; inline;
  168. begin
  169. result := (oper.typ = top_const) and (oper.val = a);
  170. end;
  171. {$ifdef AARCH64}
  172. function TARMAsmOptimizer.USxtOp2Op(var p,hp1: tai; shiftmode: tshiftmode): Boolean;
  173. var
  174. so: tshifterop;
  175. opoffset: Integer;
  176. begin
  177. Result:=false;
  178. if ((MatchInstruction(hp1, [A_ADD,A_SUB], [C_None], [PF_None,PF_S]) and
  179. (taicpu(hp1).ops=3) and
  180. MatchOperand(taicpu(hp1).oper[2]^, taicpu(p).oper[0]^.reg) and
  181. not(MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg))) or
  182. (MatchInstruction(hp1, [A_CMP,A_CMN], [C_None], [PF_None]) and
  183. (taicpu(hp1).ops=2) and
  184. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg))
  185. ) and
  186. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  187. { reg1 might not be modified inbetween }
  188. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  189. begin
  190. DebugMsg('Peephole '+gas_op2str[taicpu(p).opcode]+gas_op2str[taicpu(hp1).opcode]+'2'+gas_op2str[taicpu(hp1).opcode]+' done', p);
  191. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  192. if MatchInstruction(hp1, [A_CMP,A_CMN], [C_None], [PF_None]) then
  193. opoffset:=0
  194. else
  195. opoffset:=1;
  196. taicpu(hp1).loadReg(opoffset+1,taicpu(p).oper[1]^.reg);
  197. if not(shiftmode in [SM_SXTX,SM_UXTX,SM_LSL]) then
  198. setsubreg(taicpu(hp1).oper[opoffset+1]^.reg,R_SUBD);
  199. taicpu(hp1).ops:=opoffset+3;
  200. shifterop_reset(so);
  201. so.shiftmode:=shiftmode;
  202. so.shiftimm:=0;
  203. taicpu(hp1).loadshifterop(opoffset+2,so);
  204. result:=RemoveCurrentP(p);
  205. end;
  206. end;
  207. {$endif AARCH64}
  208. function TARMAsmOptimizer.GetNextInstructionUsingReg(Current: tai;
  209. Out Next: tai; const reg: TRegister): Boolean;
  210. var
  211. gniResult: Boolean;
  212. begin
  213. Next:=Current;
  214. Result := False;
  215. repeat
  216. gniResult:=GetNextInstruction(Next,Next);
  217. if gniResult and RegInInstruction(reg,Next) then
  218. { Found something }
  219. Exit(True);
  220. until not gniResult or
  221. not(cs_opt_level3 in current_settings.optimizerswitches) or
  222. (Next.typ<>ait_instruction) or
  223. is_calljmp(taicpu(Next).opcode)
  224. {$ifdef ARM}
  225. or RegModifiedByInstruction(NR_PC,Next)
  226. {$endif ARM}
  227. ;
  228. end;
  229. function TARMAsmOptimizer.RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string):boolean;
  230. var
  231. alloc,
  232. dealloc : tai_regalloc;
  233. hp1 : tai;
  234. begin
  235. Result:=false;
  236. if MatchInstruction(movp, A_MOV, [taicpu(p).condition], [PF_None]) and
  237. { We can't optimize if there is a shiftop }
  238. (taicpu(movp).ops=2) and
  239. MatchOperand(taicpu(movp).oper[1]^, taicpu(p).oper[0]^.reg) and
  240. { don't mess with moves to fp }
  241. (taicpu(movp).oper[0]^.reg<>current_procinfo.framepointer) and
  242. { the destination register of the mov might not be used beween p and movp }
  243. not(RegUsedBetween(taicpu(movp).oper[0]^.reg,p,movp)) and
  244. {$ifdef ARM}
  245. { PC should be changed only by moves }
  246. (taicpu(movp).oper[0]^.reg<>NR_PC) and
  247. { cb[n]z are thumb instructions which require specific registers, with no wide forms }
  248. (taicpu(p).opcode<>A_CBZ) and
  249. (taicpu(p).opcode<>A_CBNZ) and
  250. { There is a special requirement for MUL and MLA, oper[0] and oper[1] are not allowed to be the same }
  251. not (
  252. (taicpu(p).opcode in [A_MLA, A_MUL]) and
  253. (taicpu(p).oper[1]^.reg = taicpu(movp).oper[0]^.reg) and
  254. (current_settings.cputype < cpu_armv6)
  255. ) and
  256. {$endif ARM}
  257. { Take care to only do this for instructions which REALLY load to the first register.
  258. Otherwise
  259. str reg0, [reg1]
  260. mov reg2, reg0
  261. will be optimized to
  262. str reg2, [reg1]
  263. }
  264. RegLoadedWithNewValue(taicpu(p).oper[0]^.reg, p) then
  265. begin
  266. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(movp.Next));
  267. if assigned(dealloc) then
  268. begin
  269. DebugMsg('Peephole '+optimizer+' removed superfluous mov', movp);
  270. result:=true;
  271. { taicpu(p).oper[0]^.reg is not used anymore, try to find its allocation
  272. and remove it if possible }
  273. asml.Remove(dealloc);
  274. alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.previous));
  275. if assigned(alloc) then
  276. begin
  277. asml.Remove(alloc);
  278. alloc.free;
  279. dealloc.free;
  280. end
  281. else
  282. asml.InsertAfter(dealloc,p);
  283. AllocRegBetween(taicpu(movp).oper[0]^.reg,p,movp,UsedRegs);
  284. { finally get rid of the mov }
  285. taicpu(p).loadreg(0,taicpu(movp).oper[0]^.reg);
  286. { Remove preindexing and postindexing for LDR in some cases.
  287. For example:
  288. ldr reg2,[reg1, xxx]!
  289. mov reg1,reg2
  290. must be translated to:
  291. ldr reg1,[reg1, xxx]
  292. Preindexing must be removed there, since the same register is used as the base and as the target.
  293. Such case is not allowed for ARM CPU and produces crash. }
  294. if (taicpu(p).opcode = A_LDR) and (taicpu(p).oper[1]^.typ = top_ref)
  295. and (taicpu(movp).oper[0]^.reg = taicpu(p).oper[1]^.ref^.base)
  296. then
  297. taicpu(p).oper[1]^.ref^.addressmode:=AM_OFFSET;
  298. asml.remove(movp);
  299. movp.free;
  300. end;
  301. end;
  302. end;
  303. function TARMAsmOptimizer.RedundantMovProcess(var p: tai; var hp1: tai):boolean;
  304. var
  305. I: Integer;
  306. current_hp, next_hp: tai;
  307. LDRChange: Boolean;
  308. begin
  309. Result:=false;
  310. {
  311. change
  312. mov r1, r0
  313. add r1, r1, #1
  314. to
  315. add r1, r0, #1
  316. Todo: Make it work for mov+cmp too
  317. CAUTION! If this one is successful p might not be a mov instruction anymore!
  318. }
  319. if (taicpu(p).ops = 2) and
  320. (taicpu(p).oper[1]^.typ = top_reg) and
  321. (taicpu(p).oppostfix = PF_NONE) then
  322. begin
  323. if
  324. MatchInstruction(hp1, [A_ADD, A_ADC,
  325. {$ifdef ARM}
  326. A_RSB, A_RSC,
  327. {$endif ARM}
  328. A_SUB, A_SBC,
  329. A_AND, A_BIC, A_EOR, A_ORR, A_MOV, A_MVN],
  330. [taicpu(p).condition], []) and
  331. { MOV and MVN might only have 2 ops }
  332. (taicpu(hp1).ops >= 2) and
  333. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^.reg) and
  334. (taicpu(hp1).oper[1]^.typ = top_reg) and
  335. (
  336. (taicpu(hp1).ops = 2) or
  337. (taicpu(hp1).oper[2]^.typ in [top_reg, top_const, top_shifterop])
  338. ) and
  339. {$ifdef AARCH64}
  340. (taicpu(p).oper[1]^.reg<>NR_SP) and
  341. { in this case you have to transform it to movk or the like }
  342. (getsupreg(taicpu(p).oper[1]^.reg)<>RS_XZR) and
  343. {$endif AARCH64}
  344. not(RegUsedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  345. begin
  346. { When we get here we still don't know if the registers match }
  347. for I:=1 to 2 do
  348. {
  349. If the first loop was successful p will be replaced with hp1.
  350. The checks will still be ok, because all required information
  351. will also be in hp1 then.
  352. }
  353. if (taicpu(hp1).ops > I) and
  354. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[I]^.reg)
  355. {$ifdef ARM}
  356. { prevent certain combinations on thumb(2), this is only a safe approximation }
  357. and (not(GenerateThumbCode or GenerateThumb2Code) or
  358. ((getsupreg(taicpu(p).oper[1]^.reg)<>RS_R13) and
  359. (getsupreg(taicpu(p).oper[1]^.reg)<>RS_R15)))
  360. {$endif ARM}
  361. then
  362. begin
  363. DebugMsg('Peephole RedundantMovProcess done', hp1);
  364. taicpu(hp1).oper[I]^.reg := taicpu(p).oper[1]^.reg;
  365. if p<>hp1 then
  366. begin
  367. asml.remove(p);
  368. p.free;
  369. p:=hp1;
  370. Result:=true;
  371. end;
  372. end;
  373. if Result then Exit;
  374. end
  375. { Change: Change:
  376. mov r1, r0 mov r1, r0
  377. ... ...
  378. ldr/str r2, [r1, etc.] mov r2, r1
  379. To: To:
  380. ldr/str r2, [r0, etc.] mov r2, r0
  381. }
  382. else if (taicpu(p).condition = C_None) and (taicpu(p).oper[1]^.typ = top_reg)
  383. {$ifdef ARM}
  384. and not (getsupreg(taicpu(p).oper[0]^.reg) in [RS_PC, RS_R14, RS_STACK_POINTER_REG])
  385. and (getsupreg(taicpu(p).oper[1]^.reg) <> RS_PC)
  386. { Thumb does not support references with base and index one being SP }
  387. and (not(GenerateThumbCode) or (getsupreg(taicpu(p).oper[1]^.reg) <> RS_STACK_POINTER_REG))
  388. {$endif ARM}
  389. {$ifdef AARCH64}
  390. and (getsupreg(taicpu(p).oper[0]^.reg) <> RS_STACK_POINTER_REG)
  391. {$endif AARCH64}
  392. then
  393. begin
  394. current_hp := p;
  395. TransferUsedRegs(TmpUsedRegs);
  396. { Search local instruction block }
  397. while GetNextInstruction(current_hp, next_hp) and (next_hp <> BlockEnd) and (next_hp.typ = ait_instruction) do
  398. begin
  399. UpdateUsedRegs(TmpUsedRegs, tai(current_hp.Next));
  400. LDRChange := False;
  401. if (taicpu(next_hp).opcode in [A_LDR,A_STR]) and (taicpu(next_hp).ops = 2)
  402. {$ifdef AARCH64}
  403. { If r0 is the zero register, then this sequence of instructions will cause
  404. an access violation, but that's better than an assembler error caused by
  405. changing r0 to xzr inside the reference (Where it's illegal). [Kit] }
  406. and (getsupreg(taicpu(p).oper[1]^.reg) <> RS_XZR)
  407. {$endif AARCH64}
  408. then
  409. begin
  410. { Change the registers from r1 to r0 }
  411. if (taicpu(next_hp).oper[1]^.ref^.base = taicpu(p).oper[0]^.reg) and
  412. {$ifdef ARM}
  413. { This optimisation conflicts with something and raises
  414. an access violation - needs further investigation. [Kit] }
  415. (taicpu(next_hp).opcode <> A_LDR) and
  416. {$endif ARM}
  417. { Don't mess around with the base register if the
  418. reference is pre- or post-indexed }
  419. (taicpu(next_hp).oper[1]^.ref^.addressmode = AM_OFFSET) then
  420. begin
  421. taicpu(next_hp).oper[1]^.ref^.base := taicpu(p).oper[1]^.reg;
  422. LDRChange := True;
  423. end;
  424. if taicpu(next_hp).oper[1]^.ref^.index = taicpu(p).oper[0]^.reg then
  425. begin
  426. taicpu(next_hp).oper[1]^.ref^.index := taicpu(p).oper[1]^.reg;
  427. LDRChange := True;
  428. end;
  429. if LDRChange then
  430. DebugMsg('Peephole Optimization: ' + std_regname(taicpu(p).oper[0]^.reg) + ' = ' + std_regname(taicpu(p).oper[1]^.reg) + ' (MovLdr2Ldr 1)', next_hp);
  431. { Drop out if we're dealing with pre-indexed references }
  432. if (taicpu(next_hp).oper[1]^.ref^.addressmode = AM_PREINDEXED) and
  433. (
  434. RegInRef(taicpu(p).oper[0]^.reg, taicpu(next_hp).oper[1]^.ref^) or
  435. RegInRef(taicpu(p).oper[1]^.reg, taicpu(next_hp).oper[1]^.ref^)
  436. ) then
  437. begin
  438. { Remember to update register allocations }
  439. if LDRChange then
  440. AllocRegBetween(taicpu(p).oper[1]^.reg, p, next_hp, UsedRegs);
  441. Break;
  442. end;
  443. { The register being stored can be potentially changed (as long as it's not the stack pointer) }
  444. if (taicpu(next_hp).opcode = A_STR) and (getsupreg(taicpu(p).oper[1]^.reg) <> RS_STACK_POINTER_REG) and
  445. MatchOperand(taicpu(next_hp).oper[0]^, taicpu(p).oper[0]^.reg) then
  446. begin
  447. DebugMsg('Peephole Optimization: ' + std_regname(taicpu(p).oper[0]^.reg) + ' = ' + std_regname(taicpu(p).oper[1]^.reg) + ' (MovLdr2Ldr 2)', next_hp);
  448. taicpu(next_hp).oper[0]^.reg := taicpu(p).oper[1]^.reg;
  449. LDRChange := True;
  450. end;
  451. if LDRChange and (getsupreg(taicpu(p).oper[1]^.reg) <> RS_STACK_POINTER_REG) then
  452. begin
  453. AllocRegBetween(taicpu(p).oper[1]^.reg, p, next_hp, UsedRegs);
  454. if (taicpu(p).oppostfix = PF_None) and
  455. (
  456. (
  457. (taicpu(next_hp).opcode = A_LDR) and
  458. MatchOperand(taicpu(next_hp).oper[0]^, taicpu(p).oper[0]^.reg)
  459. ) or
  460. not RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, next_hp, TmpUsedRegs)
  461. ) and
  462. { Double-check to see if the old registers were actually
  463. changed (e.g. if the super registers matched, but not
  464. the sizes, they won't be changed). }
  465. (
  466. (taicpu(next_hp).opcode = A_LDR) or
  467. not RegInOp(taicpu(p).oper[0]^.reg, taicpu(next_hp).oper[0]^)
  468. ) and
  469. not RegInRef(taicpu(p).oper[0]^.reg, taicpu(next_hp).oper[1]^.ref^) then
  470. begin
  471. DebugMsg('Peephole Optimization: RedundantMovProcess 2a done', p);
  472. RemoveCurrentP(p);
  473. Result := True;
  474. Exit;
  475. end;
  476. end;
  477. end
  478. else if (taicpu(next_hp).opcode = A_MOV) and (taicpu(next_hp).oppostfix = PF_None) and
  479. (taicpu(next_hp).ops = 2) then
  480. begin
  481. if MatchOperand(taicpu(next_hp).oper[0]^, taicpu(p).oper[0]^.reg) then
  482. begin
  483. { mov r0,r1; mov r1,r1 - remove second MOV here so
  484. so "RedundantMovProcess 2b" doesn't get erroneously
  485. applied }
  486. if MatchOperand(taicpu(next_hp).oper[0]^, taicpu(next_hp).oper[1]^.reg) then
  487. begin
  488. DebugMsg(SPeepholeOptimization + 'Mov2None 2a done', next_hp);
  489. if (next_hp = hp1) then
  490. { Don't let hp1 become a dangling pointer }
  491. hp1 := nil;
  492. asml.Remove(next_hp);
  493. next_hp.Free;
  494. Continue;
  495. end;
  496. { Found another mov that writes entirely to the register }
  497. if RegUsedBetween(taicpu(p).oper[0]^.reg, p, next_hp) then
  498. begin
  499. { Register was used beforehand }
  500. if MatchOperand(taicpu(next_hp).oper[1]^, taicpu(p).oper[1]^.reg) then
  501. begin
  502. { This MOV is exactly the same as the first one.
  503. Since none of the registers have changed value
  504. at this point, we can remove it. }
  505. DebugMsg(SPeepholeOptimization + 'RedundantMovProcess 3a done', next_hp);
  506. if (next_hp = hp1) then
  507. { Don't let hp1 become a dangling pointer }
  508. hp1 := nil;
  509. asml.Remove(next_hp);
  510. next_hp.Free;
  511. { We still have the original p, so we can continue optimising;
  512. if it was -O2 or below, this instruction appeared immediately
  513. after the first MOV, so we're technically not looking more
  514. than one instruction ahead after it's removed! [Kit] }
  515. Continue;
  516. end
  517. else
  518. { Register changes value - drop out }
  519. Break;
  520. end;
  521. { We can delete the first MOV (only if the second MOV is unconditional) }
  522. {$ifdef ARM}
  523. if (taicpu(p).oppostfix = PF_None) and
  524. (taicpu(next_hp).condition = C_None) then
  525. {$endif ARM}
  526. begin
  527. DebugMsg('Peephole Optimization: RedundantMovProcess 2b done', p);
  528. RemoveCurrentP(p);
  529. Result := True;
  530. end;
  531. Exit;
  532. end
  533. else if MatchOperand(taicpu(next_hp).oper[1]^, taicpu(p).oper[0]^.reg) then
  534. begin
  535. if MatchOperand(taicpu(next_hp).oper[0]^, taicpu(p).oper[1]^.reg)
  536. { Be careful - if the entire register is not used, removing this
  537. instruction will leave the unused part uninitialised }
  538. {$ifdef AARCH64}
  539. and (getsubreg(taicpu(p).oper[1]^.reg) = R_SUBQ)
  540. {$endif AARCH64}
  541. then
  542. begin
  543. { Instruction will become mov r1,r1 }
  544. DebugMsg(SPeepholeOptimization + 'Mov2None 2 done', next_hp);
  545. { Allocate r1 between the instructions; not doing
  546. so may cause problems when removing superfluous
  547. MOVs later (i38055) }
  548. AllocRegBetween(taicpu(p).oper[1]^.reg, p, next_hp, UsedRegs);
  549. if (next_hp = hp1) then
  550. { Don't let hp1 become a dangling pointer }
  551. hp1 := nil;
  552. asml.Remove(next_hp);
  553. next_hp.Free;
  554. Continue;
  555. end;
  556. { Change the old register (checking the first operand again
  557. forces it to be left alone if the full register is not
  558. used, lest mov w1,w1 gets optimised out by mistake. [Kit] }
  559. {$ifdef AARCH64}
  560. if not MatchOperand(taicpu(next_hp).oper[0]^, taicpu(p).oper[1]^.reg) then
  561. {$endif AARCH64}
  562. begin
  563. DebugMsg(SPeepholeOptimization + std_regname(taicpu(p).oper[0]^.reg) + ' = ' + std_regname(taicpu(p).oper[1]^.reg) + ' (MovMov2Mov 2)', next_hp);
  564. taicpu(next_hp).oper[1]^.reg := taicpu(p).oper[1]^.reg;
  565. AllocRegBetween(taicpu(p).oper[1]^.reg, p, next_hp, UsedRegs);
  566. { If this was the only reference to the old register,
  567. then we can remove the original MOV now }
  568. if (taicpu(p).oppostfix = PF_None) and
  569. { A bit of a hack - sometimes registers aren't tracked properly, so do not
  570. remove if the register was apparently not allocated when its value is
  571. first set at the MOV command (this is especially true for the stack
  572. register). [Kit] }
  573. (getsupreg(taicpu(p).oper[1]^.reg) <> RS_STACK_POINTER_REG) and
  574. RegInUsedRegs(taicpu(p).oper[0]^.reg, UsedRegs) and
  575. not RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, next_hp, TmpUsedRegs) then
  576. begin
  577. DebugMsg(SPeepholeOptimization + 'RedundantMovProcess 2c done', p);
  578. RemoveCurrentP(p);
  579. Result := True;
  580. Exit;
  581. end;
  582. end;
  583. end;
  584. end;
  585. { On low optimisation settions, don't search more than one instruction ahead }
  586. if not(cs_opt_level3 in current_settings.optimizerswitches) or
  587. { Stop at procedure calls and jumps }
  588. is_calljmp(taicpu(next_hp).opcode) or
  589. { If the read register has changed value, or the MOV
  590. destination register has been used, drop out }
  591. RegInInstruction(taicpu(p).oper[0]^.reg, next_hp) or
  592. RegModifiedByInstruction(taicpu(p).oper[1]^.reg, next_hp) then
  593. Break;
  594. current_hp := next_hp;
  595. end;
  596. end;
  597. end;
  598. end;
  599. function TARMAsmOptimizer.DoXTArithOp(var p: tai; hp1: tai): Boolean;
  600. var
  601. hp2: tai;
  602. ConstLimit: TCGInt;
  603. ValidPostFixes: TOpPostFixes;
  604. FirstCode, SecondCode, ThirdCode, FourthCode: TAsmOp;
  605. begin
  606. Result := False;
  607. { Change:
  608. uxtb/h reg1,reg1
  609. (operation on reg1 with immediate operand where the upper 24/56
  610. bits don't affect the state of the first 8 bits )
  611. uxtb/h reg1,reg1
  612. Remove first uxtb/h
  613. }
  614. case taicpu(p).opcode of
  615. A_UXTB,
  616. A_SXTB:
  617. begin
  618. ConstLimit := $FF;
  619. ValidPostFixes := [PF_B];
  620. FirstCode := A_UXTB;
  621. SecondCode := A_SXTB;
  622. ThirdCode := A_UXTB; { Used to indicate no other valid codes }
  623. FourthCode := A_SXTB;
  624. end;
  625. A_UXTH,
  626. A_SXTH:
  627. begin
  628. ConstLimit := $FFFF;
  629. ValidPostFixes := [PF_B, PF_H];
  630. FirstCode := A_UXTH;
  631. SecondCode := A_SXTH;
  632. ThirdCode := A_UXTB;
  633. FourthCode := A_SXTB;
  634. end;
  635. else
  636. InternalError(2024051401);
  637. end;
  638. {$ifndef AARCH64}
  639. { Regular ARM doesn't have the multi-instruction MatchInstruction available }
  640. if (hp1.typ = ait_instruction) and (taicpu(hp1).oppostfix = PF_None) then
  641. case taicpu(hp1).opcode of
  642. A_ADD, A_SUB, A_MUL, A_LSL, A_AND, A_ORR, A_EOR, A_BIC, A_ORN:
  643. {$endif AARCH64}
  644. if
  645. (taicpu(p).oper[1]^.reg = taicpu(p).oper[0]^.reg) and
  646. {$ifdef AARCH64}
  647. MatchInstruction(hp1, [A_ADD, A_SUB, A_MUL, A_LSL, A_AND, A_ORR, A_EOR, A_BIC, A_ORN, A_EON], [PF_None]) and
  648. {$endif AARCH64}
  649. (taicpu(hp1).condition = C_None) and
  650. (taicpu(hp1).ops = 3) and
  651. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[0]^.reg) and
  652. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[0]^.reg) and
  653. (taicpu(hp1).oper[2]^.typ = top_const) and
  654. (
  655. (
  656. { If the AND immediate is 8-bit, then this essentially performs
  657. the functionality of the second UXTB and so its presence is
  658. not required }
  659. (taicpu(hp1).opcode = A_AND) and
  660. (taicpu(hp1).oper[2]^.val >= 0) and
  661. (taicpu(hp1).oper[2]^.val <= ConstLimit)
  662. ) or
  663. (
  664. GetNextInstructionUsingReg(hp1,hp2,taicpu(p).oper[0]^.reg) and
  665. (hp2.typ = ait_instruction) and
  666. (taicpu(hp2).ops = 2) and
  667. (taicpu(hp2).condition = C_None) and
  668. (
  669. (
  670. (taicpu(hp2).opcode in [FirstCode, SecondCode, ThirdCode, FourthCode]) and
  671. (taicpu(hp2).oppostfix = PF_None) and
  672. (taicpu(hp2).oper[1]^.reg = taicpu(p).oper[0]^.reg)
  673. { Destination is allowed to be different in this case, but
  674. only if the source is no longer in use (it being the same as
  675. the source is covered by RegEndOfLife as well) }
  676. ) or
  677. (
  678. { STRB essentially fills the same role as the second UXTB
  679. as long as the register is deallocated afterwards }
  680. MatchInstruction(hp2, A_STR, [C_None], ValidPostFixes) and
  681. (taicpu(hp2).oper[0]^.reg = taicpu(p).oper[0]^.reg) and
  682. not RegInOp(taicpu(p).oper[0]^.reg, taicpu(hp2).oper[1]^)
  683. )
  684. ) and
  685. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp2))
  686. )
  687. ) then
  688. begin
  689. DebugMsg(SPeepholeOptimization + 'S/Uxtb/hArithUxtb/h2ArithS/Uxtb/h done', p);
  690. Result := RemoveCurrentP(p);
  691. { Simplify bitwise constants if able }
  692. {$ifdef AARCH64}
  693. if (taicpu(hp1).opcode in [A_AND, A_ORR, A_EOR, A_BIC, A_ORN, A_EON]) and
  694. is_shifter_const(taicpu(hp1).oper[2]^.val and ConstLimit, OS_32) then
  695. {$else AARCH64}
  696. if (
  697. (ConstLimit = $FF) or
  698. (taicpu(hp1).oper[2]^.val <= $100)
  699. ) and
  700. (taicpu(hp1).opcode in [A_AND, A_ORR, A_EOR, A_BIC, A_ORN]) then
  701. {$endif AARCH64}
  702. taicpu(hp1).oper[2]^.val := taicpu(hp1).oper[2]^.val and ConstLimit;
  703. end;
  704. {$ifndef AARCH64}
  705. else
  706. ;
  707. end;
  708. {$endif not AARCH64}
  709. end;
  710. function TARMAsmOptimizer.OptPass1UXTB(var p : tai) : Boolean;
  711. var
  712. hp1, hp2: tai;
  713. so: tshifterop;
  714. begin
  715. Result:=false;
  716. if GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  717. (taicpu(p).oppostfix = PF_None) and
  718. (taicpu(p).ops = 2) then
  719. begin
  720. if (taicpu(p).condition = C_None) then
  721. begin
  722. {
  723. change
  724. uxtb reg2,reg1
  725. strb reg2,[...]
  726. dealloc reg2
  727. to
  728. strb reg1,[...]
  729. }
  730. if MatchInstruction(hp1, A_STR, [C_None], [PF_B]) and
  731. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
  732. { the reference in strb might not use reg2 }
  733. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  734. { reg1 might not be modified inbetween }
  735. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  736. begin
  737. DebugMsg('Peephole UxtbStrb2Strb done', p);
  738. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  739. result:=RemoveCurrentP(p);
  740. end
  741. {
  742. change
  743. uxtb reg2,reg1
  744. uxth reg3,reg2
  745. dealloc reg2
  746. to
  747. uxtb reg3,reg1
  748. }
  749. else if MatchInstruction(hp1, A_UXTH, [C_None], [PF_None]) and
  750. (taicpu(hp1).ops = 2) and
  751. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  752. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  753. { reg1 might not be modified inbetween }
  754. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  755. begin
  756. DebugMsg('Peephole UxtbUxth2Uxtb done', p);
  757. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  758. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  759. asml.remove(hp1);
  760. hp1.free;
  761. result:=true;
  762. end
  763. {
  764. change
  765. uxtb reg2,reg1
  766. uxtb reg3,reg2
  767. dealloc reg2
  768. to
  769. uxtb reg3,reg1
  770. }
  771. else if MatchInstruction(hp1, A_UXTB, [C_None], [PF_None]) and
  772. (taicpu(hp1).ops = 2) and
  773. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  774. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  775. { reg1 might not be modified inbetween }
  776. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  777. begin
  778. DebugMsg('Peephole UxtbUxtb2Uxtb done', p);
  779. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  780. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  781. asml.remove(hp1);
  782. hp1.free;
  783. result:=true;
  784. end
  785. {
  786. change
  787. uxtb reg2,reg1
  788. and reg3,reg2,#0x*FF
  789. dealloc reg2
  790. to
  791. uxtb reg3,reg1
  792. }
  793. else if MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
  794. (taicpu(hp1).ops=3) and
  795. (taicpu(hp1).oper[2]^.typ=top_const) and
  796. ((taicpu(hp1).oper[2]^.val and $FF)=$FF) and
  797. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  798. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  799. { reg1 might not be modified inbetween }
  800. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  801. begin
  802. DebugMsg('Peephole UxtbAndImm2Uxtb done', p);
  803. taicpu(hp1).opcode:=A_UXTB;
  804. taicpu(hp1).ops:=2;
  805. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  806. result:=RemoveCurrentP(p);
  807. end
  808. else if DoXTArithOp(p, hp1) then
  809. Result:=true
  810. {$ifdef AARCH64}
  811. else if USxtOp2Op(p,hp1,SM_UXTB) then
  812. Result:=true
  813. {$endif AARCH64}
  814. end;
  815. { Condition doesn't have to be C_None }
  816. if not Result and
  817. RemoveSuperfluousMove(p, hp1, 'UxtbMov2Uxtb') then
  818. Result:=true;
  819. end;
  820. end;
  821. function TARMAsmOptimizer.OptPass1UXTH(var p : tai) : Boolean;
  822. var
  823. hp1: tai;
  824. so: tshifterop;
  825. begin
  826. Result:=false;
  827. if GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  828. (taicpu(p).oppostfix = PF_None) and
  829. (taicpu(p).ops = 2) then
  830. begin
  831. if (taicpu(p).condition = C_None) then
  832. begin
  833. {
  834. change
  835. uxth reg2,reg1
  836. strh reg2,[...]
  837. dealloc reg2
  838. to
  839. strh reg1,[...]
  840. }
  841. if MatchInstruction(hp1, A_STR, [C_None], [PF_H]) and
  842. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  843. { the reference in strb might not use reg2 }
  844. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  845. { reg1 might not be modified inbetween }
  846. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  847. begin
  848. DebugMsg('Peephole UXTHStrh2Strh done', p);
  849. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  850. result:=RemoveCurrentP(p);
  851. end
  852. {
  853. change
  854. uxth reg2,reg1
  855. uxth reg3,reg2
  856. dealloc reg2
  857. to
  858. uxth reg3,reg1
  859. }
  860. else if MatchInstruction(hp1, A_UXTH, [C_None], [PF_None]) and
  861. (taicpu(hp1).ops=2) and
  862. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  863. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  864. { reg1 might not be modified inbetween }
  865. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  866. begin
  867. DebugMsg('Peephole UxthUxth2Uxth done', p);
  868. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  869. taicpu(hp1).opcode:=A_UXTH;
  870. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  871. result:=RemoveCurrentP(p);
  872. end
  873. {
  874. change
  875. uxth reg2,reg1
  876. and reg3,reg2,#65535
  877. dealloc reg2
  878. to
  879. uxth reg3,reg1
  880. }
  881. else if MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
  882. (taicpu(hp1).ops=3) and
  883. (taicpu(hp1).oper[2]^.typ=top_const) and
  884. ((taicpu(hp1).oper[2]^.val and $FFFF)=$FFFF) and
  885. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  886. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  887. { reg1 might not be modified inbetween }
  888. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  889. begin
  890. DebugMsg('Peephole UxthAndImm2Uxth done', p);
  891. taicpu(hp1).opcode:=A_UXTH;
  892. taicpu(hp1).ops:=2;
  893. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  894. result:=RemoveCurrentP(p);
  895. end
  896. else if DoXTArithOp(p, hp1) then
  897. Result:=true
  898. {$ifdef AARCH64}
  899. else if USxtOp2Op(p,hp1,SM_UXTH) then
  900. Result:=true
  901. {$endif AARCH64}
  902. end;
  903. { Condition doesn't have to be C_None }
  904. if not Result and
  905. RemoveSuperfluousMove(p, hp1, 'UxthMov2Data') then
  906. Result:=true;
  907. end;
  908. end;
  909. function TARMAsmOptimizer.OptPass1SXTB(var p : tai) : Boolean;
  910. var
  911. hp1, hp2: tai;
  912. so: tshifterop;
  913. begin
  914. Result:=false;
  915. if GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  916. (taicpu(p).oppostfix = PF_None) and
  917. (taicpu(p).ops = 2) then
  918. begin
  919. if (taicpu(p).condition = C_None) then
  920. begin
  921. {
  922. change
  923. sxtb reg2,reg1
  924. strb reg2,[...]
  925. dealloc reg2
  926. to
  927. strb reg1,[...]
  928. }
  929. if MatchInstruction(hp1, A_STR, [C_None], [PF_B]) and
  930. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
  931. { the reference in strb might not use reg2 }
  932. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  933. { reg1 might not be modified inbetween }
  934. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  935. begin
  936. DebugMsg('Peephole SxtbStrb2Strb done', p);
  937. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  938. result:=RemoveCurrentP(p);
  939. end
  940. {
  941. change
  942. sxtb reg2,reg1
  943. sxth reg3,reg2
  944. dealloc reg2
  945. to
  946. sxtb reg3,reg1
  947. }
  948. else if MatchInstruction(hp1, A_SXTH, [C_None], [PF_None]) and
  949. (taicpu(hp1).ops = 2) and
  950. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  951. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  952. { reg1 might not be modified inbetween }
  953. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  954. begin
  955. DebugMsg('Peephole SxtbSxth2Sxtb done', p);
  956. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  957. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  958. asml.remove(hp1);
  959. hp1.free;
  960. result:=true;
  961. end
  962. {
  963. change
  964. sxtb reg2,reg1
  965. sxtb reg3,reg2
  966. dealloc reg2
  967. to
  968. uxtb reg3,reg1
  969. }
  970. else if MatchInstruction(hp1, A_SXTB, [C_None], [PF_None]) and
  971. (taicpu(hp1).ops = 2) and
  972. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  973. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  974. { reg1 might not be modified inbetween }
  975. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  976. begin
  977. DebugMsg('Peephole SxtbSxtb2Sxtb done', p);
  978. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  979. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  980. asml.remove(hp1);
  981. hp1.free;
  982. result:=true;
  983. end
  984. {
  985. change
  986. sxtb reg2,reg1
  987. and reg3,reg2,#0x*FF
  988. dealloc reg2
  989. to
  990. uxtb reg3,reg1
  991. }
  992. else if MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
  993. (taicpu(hp1).ops=3) and
  994. (taicpu(hp1).oper[2]^.typ=top_const) and
  995. ((taicpu(hp1).oper[2]^.val and $FF)=$FF) and
  996. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  997. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  998. { reg1 might not be modified inbetween }
  999. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1000. begin
  1001. DebugMsg('Peephole SxtbAndImm2Uxtb done', p);
  1002. taicpu(hp1).opcode:=A_UXTB;
  1003. taicpu(hp1).ops:=2;
  1004. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  1005. result:=RemoveCurrentP(p);
  1006. end
  1007. else if DoXTArithOp(p, hp1) then
  1008. Result:=true
  1009. {$ifdef AARCH64}
  1010. else if USxtOp2Op(p,hp1,SM_SXTB) then
  1011. Result:=true
  1012. {$endif AARCH64}
  1013. end;
  1014. { Condition doesn't have to be C_None }
  1015. if not Result and
  1016. RemoveSuperfluousMove(p, hp1, 'SxtbMov2Sxtb') then
  1017. Result:=true;
  1018. end;
  1019. end;
  1020. function TARMAsmOptimizer.OptPass1SXTH(var p : tai) : Boolean;
  1021. var
  1022. hp1: tai;
  1023. so: tshifterop;
  1024. begin
  1025. Result:=false;
  1026. if GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1027. (taicpu(p).oppostfix = PF_None) and
  1028. (taicpu(p).ops = 2) then
  1029. begin
  1030. if (taicpu(p).condition = C_None) then
  1031. begin
  1032. {
  1033. change
  1034. sxth reg2,reg1
  1035. strh reg2,[...]
  1036. dealloc reg2
  1037. to
  1038. strh reg1,[...]
  1039. }
  1040. if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
  1041. (taicpu(p).ops=2) and
  1042. MatchInstruction(hp1, A_STR, [C_None], [PF_H]) and
  1043. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  1044. { the reference in strb might not use reg2 }
  1045. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  1046. { reg1 might not be modified inbetween }
  1047. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1048. begin
  1049. DebugMsg('Peephole SxthStrh2Strh done', p);
  1050. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  1051. result:=RemoveCurrentP(p);
  1052. end
  1053. {
  1054. change
  1055. sxth reg2,reg1
  1056. sxth reg3,reg2
  1057. dealloc reg2
  1058. to
  1059. sxth reg3,reg1
  1060. }
  1061. else if MatchInstruction(p, A_SXTH, [C_None], [PF_None]) and
  1062. (taicpu(p).ops=2) and
  1063. MatchInstruction(hp1, A_SXTH, [C_None], [PF_None]) and
  1064. (taicpu(hp1).ops=2) and
  1065. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1066. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  1067. { reg1 might not be modified inbetween }
  1068. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1069. begin
  1070. DebugMsg('Peephole SxthSxth2Sxth done', p);
  1071. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  1072. taicpu(hp1).opcode:=A_SXTH;
  1073. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  1074. result:=RemoveCurrentP(p);
  1075. end
  1076. {$ifdef AARCH64}
  1077. {
  1078. change
  1079. sxth reg2,reg1
  1080. sxtw reg3,reg2
  1081. dealloc reg2
  1082. to
  1083. sxth reg3,reg1
  1084. }
  1085. else if MatchInstruction(p, A_SXTH, [C_None], [PF_None]) and
  1086. (taicpu(p).ops=2) and
  1087. MatchInstruction(hp1, A_SXTW, [C_None], [PF_None]) and
  1088. (taicpu(hp1).ops=2) and
  1089. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1090. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  1091. { reg1 might not be modified inbetween }
  1092. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1093. begin
  1094. DebugMsg('Peephole SxthSxtw2Sxth done', p);
  1095. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  1096. taicpu(hp1).opcode:=A_SXTH;
  1097. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  1098. result:=RemoveCurrentP(p);
  1099. end
  1100. {$endif AARCH64}
  1101. {
  1102. change
  1103. sxth reg2,reg1
  1104. and reg3,reg2,#65535
  1105. dealloc reg2
  1106. to
  1107. uxth reg3,reg1
  1108. }
  1109. else if MatchInstruction(p, A_SXTH, [C_None], [PF_None]) and
  1110. (taicpu(p).ops=2) and
  1111. MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
  1112. (taicpu(hp1).ops=3) and
  1113. (taicpu(hp1).oper[2]^.typ=top_const) and
  1114. ((taicpu(hp1).oper[2]^.val and $FFFF)=$FFFF) and
  1115. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1116. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  1117. { reg1 might not be modified inbetween }
  1118. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1119. begin
  1120. DebugMsg('Peephole SxthAndImm2Uxth done', p);
  1121. taicpu(hp1).opcode:=A_UXTH;
  1122. taicpu(hp1).ops:=2;
  1123. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  1124. result:=RemoveCurrentP(p);
  1125. end
  1126. else if DoXTArithOp(p, hp1) then
  1127. Result:=true
  1128. {$ifdef AARCH64}
  1129. else if USxtOp2Op(p,hp1,SM_SXTH) then
  1130. Result:=true
  1131. {$endif AARCH64}
  1132. end;
  1133. { Condition doesn't have to be C_None }
  1134. if not Result and
  1135. RemoveSuperfluousMove(p, hp1, 'SxthMov2Sxth') then
  1136. Result:=true;
  1137. end;
  1138. end;
  1139. function TARMAsmOptimizer.OptPreSBFXUBFX(var p: tai): Boolean;
  1140. begin
  1141. Result := False;
  1142. { Convert:
  1143. s/ubfx reg1,reg2,#0,#64 (or #32 for 32-bit registers)
  1144. To:
  1145. mov reg1,reg2
  1146. }
  1147. if (taicpu(p).oper[2]^.val = 0) and
  1148. {$ifdef AARCH64}
  1149. (
  1150. (
  1151. (getsubreg(taicpu(p).oper[0]^.reg) = R_SUBQ) and
  1152. (taicpu(p).oper[3]^.val = 64)
  1153. ) or
  1154. (
  1155. (getsubreg(taicpu(p).oper[0]^.reg) = R_SUBD) and
  1156. (taicpu(p).oper[3]^.val = 32)
  1157. )
  1158. )
  1159. {$else AARCH64}
  1160. (taicpu(p).oper[3]^.val = 32)
  1161. {$endif AARCH64}
  1162. then
  1163. begin
  1164. DebugMsg(SPeepholeOptimization + 'SBFX or UBFX -> MOV (full bitfield extract)', p);
  1165. taicpu(p).opcode := A_MOV;
  1166. taicpu(p).ops := 2;
  1167. taicpu(p).clearop(2);
  1168. taicpu(p).clearop(3);
  1169. Result := True;
  1170. Exit;
  1171. end;
  1172. end;
  1173. function TARMAsmOptimizer.OptPass1LDR(var p : tai) : Boolean;
  1174. var
  1175. hp1: tai;
  1176. Reference: TReference;
  1177. NewOp: TAsmOp;
  1178. begin
  1179. Result := False;
  1180. if (taicpu(p).ops <> 2) or (taicpu(p).condition <> C_None) then
  1181. Exit;
  1182. Reference := taicpu(p).oper[1]^.ref^;
  1183. if (Reference.addressmode = AM_OFFSET) and
  1184. not RegInRef(taicpu(p).oper[0]^.reg, Reference) and
  1185. { Delay calling GetNextInstruction for as long as possible }
  1186. GetNextInstruction(p, hp1) and
  1187. (hp1.typ = ait_instruction) and
  1188. (taicpu(hp1).condition = C_None) and
  1189. (taicpu(hp1).oppostfix = taicpu(p).oppostfix) then
  1190. begin
  1191. if (taicpu(hp1).opcode = A_STR) and
  1192. RefsEqual(taicpu(hp1).oper[1]^.ref^, Reference) and
  1193. (getregtype(taicpu(p).oper[0]^.reg) = getregtype(taicpu(hp1).oper[0]^.reg)) then
  1194. begin
  1195. { With:
  1196. ldr reg1,[ref]
  1197. str reg2,[ref]
  1198. If reg1 = reg2, Remove str
  1199. }
  1200. if taicpu(p).oper[0]^.reg = taicpu(hp1).oper[0]^.reg then
  1201. begin
  1202. DebugMsg(SPeepholeOptimization + 'Removed redundant store instruction (load/store -> load/nop)', hp1);
  1203. RemoveInstruction(hp1);
  1204. Result := True;
  1205. Exit;
  1206. end;
  1207. end
  1208. else if (taicpu(hp1).opcode = A_LDR) and
  1209. RefsEqual(taicpu(hp1).oper[1]^.ref^, Reference) then
  1210. begin
  1211. { With:
  1212. ldr reg1,[ref]
  1213. ldr reg2,[ref]
  1214. If reg1 = reg2, delete the second ldr
  1215. If reg1 <> reg2, changing the 2nd ldr to a mov might introduce
  1216. a dependency, but it will likely open up new optimisations, so
  1217. do it for now and handle any new dependencies later.
  1218. }
  1219. if taicpu(p).oper[0]^.reg = taicpu(hp1).oper[0]^.reg then
  1220. begin
  1221. DebugMsg(SPeepholeOptimization + 'Removed duplicate load instruction (load/load -> load/nop)', hp1);
  1222. RemoveInstruction(hp1);
  1223. Result := True;
  1224. Exit;
  1225. end
  1226. else if
  1227. (getregtype(taicpu(p).oper[0]^.reg) = R_INTREGISTER) and
  1228. (getregtype(taicpu(hp1).oper[0]^.reg) = R_INTREGISTER) and
  1229. (getsubreg(taicpu(p).oper[0]^.reg) = getsubreg(taicpu(hp1).oper[0]^.reg)) then
  1230. begin
  1231. DebugMsg(SPeepholeOptimization + 'Changed second ldr' + oppostfix2str[taicpu(hp1).oppostfix] + ' to mov (load/load -> load/move)', hp1);
  1232. taicpu(hp1).opcode := A_MOV;
  1233. taicpu(hp1).oppostfix := PF_None;
  1234. taicpu(hp1).loadreg(1, taicpu(p).oper[0]^.reg);
  1235. AllocRegBetween(taicpu(p).oper[0]^.reg, p, hp1, UsedRegs);
  1236. Result := True;
  1237. Exit;
  1238. end;
  1239. end;
  1240. end;
  1241. end;
  1242. function TARMAsmOptimizer.OptPass1STR(var p : tai) : Boolean;
  1243. var
  1244. hp1: tai;
  1245. Reference: TReference;
  1246. SizeMismatch: Boolean;
  1247. SrcReg, DstReg: TRegister;
  1248. NewOp: TAsmOp;
  1249. begin
  1250. Result := False;
  1251. if (taicpu(p).ops <> 2) or (taicpu(p).condition <> C_None) then
  1252. Exit;
  1253. Reference := taicpu(p).oper[1]^.ref^;
  1254. if (Reference.addressmode = AM_OFFSET) and
  1255. not RegInRef(taicpu(p).oper[0]^.reg, Reference) and
  1256. { Delay calling GetNextInstruction for as long as possible }
  1257. GetNextInstruction(p, hp1) and
  1258. (hp1.typ = ait_instruction) and
  1259. (taicpu(hp1).condition = C_None) and
  1260. (taicpu(hp1).oppostfix = taicpu(p).oppostfix) and
  1261. (taicpu(hp1).ops>0) and (taicpu(hp1).oper[0]^.typ=top_reg) then
  1262. begin
  1263. { Saves constant dereferencing and makes it easier to change the size if necessary }
  1264. SrcReg := taicpu(p).oper[0]^.reg;
  1265. DstReg := taicpu(hp1).oper[0]^.reg;
  1266. if (taicpu(hp1).opcode = A_LDR) and
  1267. RefsEqual(taicpu(hp1).oper[1]^.ref^, Reference) and
  1268. (taicpu(hp1).oper[1]^.ref^.volatility=[]) and
  1269. (
  1270. (taicpu(hp1).oppostfix = taicpu(p).oppostfix) or
  1271. ((taicpu(p).oppostfix = PF_B) and (taicpu(hp1).oppostfix = PF_SB)) or
  1272. ((taicpu(p).oppostfix = PF_H) and (taicpu(hp1).oppostfix = PF_SH))
  1273. {$ifdef AARCH64}
  1274. or ((taicpu(p).oppostfix = PF_W) and (taicpu(hp1).oppostfix = PF_SW))
  1275. {$endif AARCH64}
  1276. ) then
  1277. begin
  1278. { With:
  1279. str reg1,[ref]
  1280. ldr reg2,[ref]
  1281. If reg1 = reg2, Remove ldr.
  1282. If reg1 <> reg2, replace ldr with "mov reg2,reg1"
  1283. }
  1284. if (SrcReg = DstReg) and
  1285. { e.g. the ldrb in strb/ldrb is not a null operation as it clears the upper 24 bits }
  1286. (taicpu(p).oppostfix=PF_None) then
  1287. begin
  1288. DebugMsg(SPeepholeOptimization + 'Removed redundant load instruction (store/load -> store/nop)', hp1);
  1289. RemoveInstruction(hp1);
  1290. Result := True;
  1291. Exit;
  1292. end
  1293. else if (getregtype(SrcReg) = R_INTREGISTER) and
  1294. (getregtype(DstReg) = R_INTREGISTER) and
  1295. (getsubreg(SrcReg) = getsubreg(DstReg)) then
  1296. begin
  1297. NewOp:=A_NONE;
  1298. if taicpu(hp1).oppostfix=PF_None then
  1299. NewOp:=A_MOV
  1300. else
  1301. {$ifdef ARM}
  1302. if (current_settings.cputype < cpu_armv6) then
  1303. begin
  1304. { The zero- and sign-extension operations were only
  1305. introduced under ARMv6 }
  1306. case taicpu(hp1).oppostfix of
  1307. PF_B:
  1308. begin
  1309. { The if-block afterwards will set the middle operand to the correct register }
  1310. taicpu(hp1).allocate_oper(3);
  1311. taicpu(hp1).ops := 3;
  1312. taicpu(hp1).loadconst(2, $FF);
  1313. NewOp := A_AND;
  1314. end;
  1315. PF_H:
  1316. { ARMv5 and under doesn't have a concise way of storing the immediate $FFFF, so leave alone };
  1317. PF_SB,
  1318. PF_SH:
  1319. { Do nothing - can't easily encode sign-extensions };
  1320. else
  1321. InternalError(2021043002);
  1322. end;
  1323. end
  1324. else
  1325. {$endif ARM}
  1326. case taicpu(hp1).oppostfix of
  1327. PF_B:
  1328. NewOp := A_UXTB;
  1329. PF_SB:
  1330. NewOp := A_SXTB;
  1331. PF_H:
  1332. NewOp := A_UXTH;
  1333. PF_SH:
  1334. NewOp := A_SXTH;
  1335. {$ifdef AARCH64}
  1336. PF_SW:
  1337. NewOp := A_SXTW;
  1338. PF_W:
  1339. NewOp := A_MOV;
  1340. {$endif AARCH64}
  1341. else
  1342. InternalError(2021043001);
  1343. end;
  1344. if (NewOp<>A_None) then
  1345. begin
  1346. DebugMsg(SPeepholeOptimization + 'Changed ldr' + oppostfix2str[taicpu(hp1).oppostfix] + ' to ' + gas_op2str[NewOp] + ' (store/load -> store/move)', hp1);
  1347. taicpu(hp1).oppostfix := PF_None;
  1348. taicpu(hp1).opcode := NewOp;
  1349. taicpu(hp1).loadreg(1, SrcReg);
  1350. AllocRegBetween(SrcReg, p, hp1, UsedRegs);
  1351. Result := True;
  1352. Exit;
  1353. end;
  1354. end
  1355. end
  1356. else if (taicpu(hp1).opcode = A_STR) and
  1357. RefsEqual(taicpu(hp1).oper[1]^.ref^, Reference) then
  1358. begin
  1359. { With:
  1360. str reg1,[ref]
  1361. str reg2,[ref]
  1362. If reg1 <> reg2, delete the first str
  1363. IF reg1 = reg2, delete the second str
  1364. }
  1365. if (SrcReg = DstReg) and (taicpu(hp1).oper[1]^.ref^.volatility=[]) then
  1366. begin
  1367. DebugMsg(SPeepholeOptimization + 'Removed duplicate store instruction (store/store -> store/nop)', hp1);
  1368. RemoveInstruction(hp1);
  1369. Result := True;
  1370. Exit;
  1371. end
  1372. else if
  1373. { Registers same byte size? }
  1374. (tcgsize2size[reg_cgsize(SrcReg)] = tcgsize2size[reg_cgsize(DstReg)]) and
  1375. (taicpu(p).oper[1]^.ref^.volatility=[]) then
  1376. begin
  1377. DebugMsg(SPeepholeOptimization + 'Removed dominated store instruction (store/store -> nop/store)', p);
  1378. RemoveCurrentP(p, hp1);
  1379. Result := True;
  1380. Exit;
  1381. end;
  1382. end;
  1383. end;
  1384. end;
  1385. function TARMAsmOptimizer.OptPass1And(var p : tai) : Boolean;
  1386. var
  1387. hp1, hp2: tai;
  1388. i: longint;
  1389. begin
  1390. Result:=false;
  1391. {
  1392. optimize
  1393. and reg2,reg1,const1
  1394. ...
  1395. }
  1396. if (taicpu(p).ops>2) and
  1397. (taicpu(p).oper[1]^.typ = top_reg) and
  1398. (taicpu(p).oper[2]^.typ = top_const) then
  1399. begin
  1400. {
  1401. change
  1402. and reg2,reg1,const1
  1403. ...
  1404. and reg3,reg2,const2
  1405. to
  1406. and reg3,reg1,(const1 and const2)
  1407. }
  1408. if GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1409. MatchInstruction(hp1, A_AND, [taicpu(p).condition], [PF_None]) and
  1410. RegEndOfLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  1411. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1412. (taicpu(hp1).oper[2]^.typ = top_const)
  1413. {$ifdef AARCH64}
  1414. and ((((getsubreg(taicpu(p).oper[0]^.reg)=R_SUBQ) and is_shifter_const(taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val,OS_64)) or
  1415. ((getsubreg(taicpu(p).oper[0]^.reg)=R_SUBL) and is_shifter_const(taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val,OS_32))
  1416. ) or
  1417. ((taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val)=0))
  1418. {$endif AARCH64}
  1419. then
  1420. begin
  1421. if not(RegUsedBetween(taicpu(hp1).oper[0]^.reg,p,hp1)) then
  1422. begin
  1423. DebugMsg('Peephole AndAnd2And done', p);
  1424. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  1425. if (taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val)=0 then
  1426. begin
  1427. DebugMsg('Peephole AndAnd2Mov0 1 done', p);
  1428. taicpu(p).opcode:=A_MOV;
  1429. taicpu(p).ops:=2;
  1430. taicpu(p).loadConst(1,0);
  1431. taicpu(p).oppostfix:=taicpu(hp1).oppostfix;
  1432. end
  1433. else
  1434. begin
  1435. DebugMsg('Peephole AndAnd2And 1 done', p);
  1436. taicpu(p).loadConst(2,taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val);
  1437. taicpu(p).oppostfix:=taicpu(hp1).oppostfix;
  1438. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  1439. end;
  1440. asml.remove(hp1);
  1441. hp1.free;
  1442. Result:=true;
  1443. exit;
  1444. end
  1445. else if not(RegUsedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1446. begin
  1447. if (taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val)=0 then
  1448. begin
  1449. DebugMsg('Peephole AndAnd2Mov0 2 done', hp1);
  1450. taicpu(hp1).opcode:=A_MOV;
  1451. taicpu(hp1).loadConst(1,0);
  1452. taicpu(hp1).ops:=2;
  1453. taicpu(hp1).oppostfix:=taicpu(p).oppostfix;
  1454. end
  1455. else
  1456. begin
  1457. DebugMsg('Peephole AndAnd2And 2 done', hp1);
  1458. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  1459. taicpu(hp1).loadConst(2,taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val);
  1460. taicpu(hp1).oppostfix:=taicpu(p).oppostfix;
  1461. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  1462. end;
  1463. GetNextInstruction(p, hp1);
  1464. RemoveCurrentP(p);
  1465. p:=hp1;
  1466. Result:=true;
  1467. exit;
  1468. end;
  1469. end
  1470. {
  1471. change
  1472. and reg2,reg1,$xxxxxxFF
  1473. strb reg2,[...]
  1474. dealloc reg2
  1475. to
  1476. strb reg1,[...]
  1477. }
  1478. else if ((taicpu(p).oper[2]^.val and $FF) = $FF) and
  1479. MatchInstruction(p, A_AND, [C_None], [PF_None]) and
  1480. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1481. MatchInstruction(hp1, A_STR, [C_None], [PF_B]) and
  1482. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
  1483. { the reference in strb might not use reg2 }
  1484. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  1485. { reg1 might not be modified inbetween }
  1486. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1487. begin
  1488. DebugMsg('Peephole AndStrb2Strb done', p);
  1489. {$ifdef AARCH64}
  1490. taicpu(hp1).loadReg(0,newreg(R_INTREGISTER,getsupreg(taicpu(p).oper[1]^.reg),R_SUBD));
  1491. {$else AARCH64}
  1492. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  1493. {$endif AARCH64}
  1494. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  1495. RemoveCurrentP(p);
  1496. result:=true;
  1497. exit;
  1498. end
  1499. {
  1500. change
  1501. and reg2,reg1,255
  1502. uxtb/uxth reg3,reg2
  1503. dealloc reg2
  1504. to
  1505. and reg3,reg1,x
  1506. }
  1507. else if MatchInstruction(p, A_AND, [C_None], [PF_None]) and
  1508. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1509. ((((taicpu(p).oper[2]^.val and $ffffff00)=0) and MatchInstruction(hp1, A_UXTB, [C_None], [PF_None])) or
  1510. (((taicpu(p).oper[2]^.val and $ffff0000)=0) and MatchInstruction(hp1, A_UXTH, [C_None], [PF_None]))) and
  1511. (taicpu(hp1).ops = 2) and
  1512. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  1513. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1514. { reg1 might not be modified inbetween }
  1515. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1516. begin
  1517. DebugMsg('Peephole AndUxt2And done', p);
  1518. taicpu(hp1).opcode:=A_AND;
  1519. taicpu(hp1).ops:=3;
  1520. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  1521. taicpu(hp1).loadconst(2,taicpu(p).oper[2]^.val);
  1522. GetNextInstruction(p,hp1);
  1523. asml.remove(p);
  1524. p.Free;
  1525. p:=hp1;
  1526. result:=true;
  1527. exit;
  1528. end
  1529. else if ((taicpu(p).oper[2]^.val and $ffffff80)=0) and
  1530. MatchInstruction(p, A_AND, [C_None], [PF_None]) and
  1531. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1532. MatchInstruction(hp1, [A_SXTB,A_SXTH], [C_None], [PF_None]) and
  1533. (taicpu(hp1).ops = 2) and
  1534. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  1535. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1536. { reg1 might not be modified inbetween }
  1537. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1538. begin
  1539. DebugMsg('Peephole AndSxt2And done', p);
  1540. taicpu(hp1).opcode:=A_AND;
  1541. taicpu(hp1).ops:=3;
  1542. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  1543. setsubreg(taicpu(hp1).oper[1]^.reg,getsubreg(taicpu(hp1).oper[0]^.reg));
  1544. taicpu(hp1).loadconst(2,taicpu(p).oper[2]^.val);
  1545. GetNextInstruction(p,hp1);
  1546. asml.remove(p);
  1547. p.Free;
  1548. p:=hp1;
  1549. result:=true;
  1550. exit;
  1551. end
  1552. {
  1553. from
  1554. and reg1,reg0,2^n-1
  1555. mov reg2,reg1, lsl imm1
  1556. (mov reg3,reg2, lsr/asr imm1)
  1557. remove either the and or the lsl/xsr sequence if possible
  1558. }
  1559. else if (taicpu(p).oper[2]^.val < high(int64)) and
  1560. cutils.ispowerof2(taicpu(p).oper[2]^.val+1,i) and
  1561. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1562. MatchInstruction(hp1, A_MOV, [taicpu(p).condition], [PF_None]) and
  1563. (taicpu(hp1).ops=3) and
  1564. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1565. (taicpu(hp1).oper[2]^.typ = top_shifterop) and
  1566. {$ifdef ARM}
  1567. (taicpu(hp1).oper[2]^.shifterop^.rs = NR_NO) and
  1568. {$endif ARM}
  1569. (taicpu(hp1).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  1570. RegEndOfLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) then
  1571. begin
  1572. {
  1573. and reg1,reg0,2^n-1
  1574. mov reg2,reg1, lsl imm1
  1575. mov reg3,reg2, lsr/asr imm1
  1576. =>
  1577. and reg1,reg0,2^n-1
  1578. if lsr and 2^n-1>=imm1 or asr and 2^n-1>imm1
  1579. }
  1580. if GetNextInstructionUsingReg(hp1,hp2,taicpu(p).oper[0]^.reg) and
  1581. MatchInstruction(hp2, A_MOV, [taicpu(p).condition], [PF_None]) and
  1582. (taicpu(hp2).ops=3) and
  1583. MatchOperand(taicpu(hp2).oper[1]^, taicpu(hp1).oper[0]^.reg) and
  1584. (taicpu(hp2).oper[2]^.typ = top_shifterop) and
  1585. {$ifdef ARM}
  1586. (taicpu(hp2).oper[2]^.shifterop^.rs = NR_NO) and
  1587. {$endif ARM}
  1588. (taicpu(hp2).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and
  1589. (taicpu(hp1).oper[2]^.shifterop^.shiftimm=taicpu(hp2).oper[2]^.shifterop^.shiftimm) and
  1590. RegEndOfLife(taicpu(hp1).oper[0]^.reg,taicpu(hp2)) and
  1591. ((i<32-taicpu(hp1).oper[2]^.shifterop^.shiftimm) or
  1592. ((i=32-taicpu(hp1).oper[2]^.shifterop^.shiftimm) and
  1593. (taicpu(hp2).oper[2]^.shifterop^.shiftmode=SM_LSR))) then
  1594. begin
  1595. DebugMsg('Peephole AndLslXsr2And done', p);
  1596. taicpu(p).oper[0]^.reg:=taicpu(hp2).oper[0]^.reg;
  1597. asml.Remove(hp1);
  1598. asml.Remove(hp2);
  1599. hp1.free;
  1600. hp2.free;
  1601. result:=true;
  1602. exit;
  1603. end
  1604. {
  1605. and reg1,reg0,2^n-1
  1606. mov reg2,reg1, lsl imm1
  1607. =>
  1608. mov reg2,reg0, lsl imm1
  1609. if imm1>i
  1610. }
  1611. else if (i>32-taicpu(hp1).oper[2]^.shifterop^.shiftimm) and
  1612. not(RegModifiedBetween(taicpu(p).oper[1]^.reg, p, hp1)) then
  1613. begin
  1614. DebugMsg('Peephole AndLsl2Lsl done', p);
  1615. taicpu(hp1).oper[1]^.reg:=taicpu(p).oper[1]^.reg;
  1616. GetNextInstruction(p, hp1);
  1617. asml.Remove(p);
  1618. p.free;
  1619. p:=hp1;
  1620. result:=true;
  1621. exit;
  1622. end
  1623. end;
  1624. end;
  1625. {
  1626. change
  1627. and reg1, ...
  1628. mov reg2, reg1
  1629. to
  1630. and reg2, ...
  1631. }
  1632. if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  1633. (taicpu(p).ops>=3) and
  1634. RemoveSuperfluousMove(p, hp1, 'DataMov2Data') then
  1635. Result:=true;
  1636. end;
  1637. function TARMAsmOptimizer.OptPass2Bitwise(var p: tai): Boolean;
  1638. var
  1639. hp1, hp2: tai;
  1640. WorkingReg: TRegister;
  1641. begin
  1642. Result := False;
  1643. {
  1644. change
  1645. and/bic reg1, ...
  1646. ...
  1647. cmp reg1, #0
  1648. b<ne/eq> @Lbl
  1649. to
  1650. ands/bics reg1, ...
  1651. Also:
  1652. and/bic reg1, ...
  1653. ...
  1654. cmp reg1, #0
  1655. (reg1 end of life)
  1656. b<ne/eq> @Lbl
  1657. to
  1658. tst reg1, ...
  1659. or
  1660. bics xzr, reg1, ... under AArch64
  1661. For ARM, also include OR, EOR and ORN
  1662. }
  1663. if (taicpu(p).condition = C_None) and
  1664. (taicpu(p).ops>=3) and
  1665. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  1666. MatchInstruction(hp1, A_CMP, [C_None], [PF_None]) and
  1667. MatchOperand(taicpu(hp1).oper[1]^, 0) and
  1668. {$ifdef AARCH64}
  1669. (SuperRegistersEqual(taicpu(hp1).oper[0]^.reg, taicpu(p).oper[0]^.reg)) and
  1670. (
  1671. (getsubreg(taicpu(hp1).oper[0]^.reg) = getsubreg(taicpu(p).oper[0]^.reg))
  1672. or
  1673. (
  1674. (taicpu(p).oper[2]^.typ = top_const) and
  1675. (taicpu(p).oper[2]^.val >= 0) and
  1676. (taicpu(p).oper[2]^.val <= $FFFFFFFF)
  1677. )
  1678. ) and
  1679. {$else AARCH64}
  1680. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[0]^.reg) and
  1681. {$endif AARCH64}
  1682. not RegModifiedBetween(NR_DEFAULTFLAGS, p, hp1) and
  1683. GetNextInstruction(hp1, hp2) then
  1684. begin
  1685. if MatchInstruction(hp2, [A_B, A_CMP, A_CMN, A_TST{$ifndef AARCH64}, A_TEQ{$endif not AARCH64}], [C_EQ, C_NE], [PF_None]) then
  1686. begin
  1687. AllocRegBetween(NR_DEFAULTFLAGS, p, hp1, UsedRegs);
  1688. WorkingReg := taicpu(p).oper[0]^.reg;
  1689. if
  1690. {$ifndef AARCH64}
  1691. (taicpu(p).opcode = A_AND) and
  1692. {$endif AARCH64}
  1693. RegEndOfLife(WorkingReg, taicpu(hp1)) then
  1694. begin
  1695. {$ifdef AARCH64}
  1696. if (taicpu(p).opcode <> A_AND) then
  1697. begin
  1698. setsupreg(taicpu(p).oper[0]^.reg, RS_XZR);
  1699. taicpu(p).oppostfix := PF_S;
  1700. DebugMsg(SPeepholeOptimization + 'BIC; CMP -> BICS ' + gas_regname(taicpu(p).oper[0]^.reg), p);
  1701. end
  1702. else
  1703. {$endif AARCH64}
  1704. begin
  1705. taicpu(p).opcode := A_TST;
  1706. taicpu(p).oppostfix := PF_None;
  1707. taicpu(p).loadreg(0, taicpu(p).oper[1]^.reg);
  1708. taicpu(p).loadoper(1, taicpu(p).oper[2]^);
  1709. if (taicpu(p).ops = 4) then
  1710. begin
  1711. { Make sure any shifter operator is also transferred }
  1712. taicpu(p).loadshifterop(2, taicpu(p).oper[3]^.shifterop^);
  1713. taicpu(p).ops := 3;
  1714. end
  1715. else
  1716. taicpu(p).ops := 2;
  1717. DebugMsg(SPeepholeOptimization + 'AND; CMP -> TST', p);
  1718. end;
  1719. end
  1720. else
  1721. begin
  1722. taicpu(p).oppostfix := PF_S;
  1723. {$ifdef AARCH64}
  1724. DebugMsg(SPeepholeOptimization + 'AND/BIC; CMP -> ANDS/BICS', p);
  1725. {$else AARCH64}
  1726. DebugMsg(SPeepholeOptimization + 'Bitwise; CMP -> Bitwise+S', p);
  1727. {$endif AARCH64}
  1728. end;
  1729. RemoveInstruction(hp1);
  1730. { If a temporary register was used for and/cmp before, we might be
  1731. able to deallocate the register so it can be used for other
  1732. optimisations later }
  1733. if (taicpu(p).opcode = A_TST) and TryRemoveRegAlloc(WorkingReg, p, p) then
  1734. ExcludeRegFromUsedRegs(WorkingReg, UsedRegs);
  1735. Result := True;
  1736. Exit;
  1737. end
  1738. else if
  1739. (hp2.typ = ait_label) or
  1740. { Conditional comparison instructions have already been covered }
  1741. RegModifiedByInstruction(NR_DEFAULTFLAGS, hp2) then
  1742. begin
  1743. { The comparison is a null operation }
  1744. if RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  1745. begin
  1746. DebugMsg(SPeepholeOptimization + 'Bitwise; CMP -> nop', p);
  1747. RemoveInstruction(hp1);
  1748. RemoveCurrentP(p);
  1749. end
  1750. else
  1751. begin
  1752. DebugMsg(SPeepholeOptimization + 'CMP/BIC -> nop', hp1);
  1753. RemoveInstruction(hp1);
  1754. end;
  1755. Result := True;
  1756. Exit;
  1757. end;
  1758. end;
  1759. end;
  1760. function TARMAsmOptimizer.OptPass2TST(var p: tai): Boolean;
  1761. var
  1762. hp1, hp2: tai;
  1763. begin
  1764. Result := False;
  1765. if
  1766. {$ifndef AARCH64}
  1767. (taicpu(p).condition = C_None) and
  1768. {$endif AARCH64}
  1769. GetNextInstruction(p, hp1) and
  1770. MatchInstruction(hp1, A_B, [C_EQ, C_NE], [PF_None]) and
  1771. GetNextInstructionUsingReg(hp1, hp2, taicpu(p).oper[0]^.reg) then
  1772. begin
  1773. case taicpu(hp2).opcode of
  1774. A_AND:
  1775. { Change:
  1776. tst r1,##
  1777. (r2 not in use, or r2 = r1)
  1778. b.c .Lbl
  1779. ...
  1780. and r2,r1,##
  1781. Optimise to:
  1782. ands r2,r1,##
  1783. b.c .Lbl
  1784. ...
  1785. }
  1786. if (taicpu(hp2).oppostfix in [PF_None, PF_S]) and
  1787. {$ifndef AARCH64}
  1788. (taicpu(hp2).condition = C_None) and
  1789. {$endif AARCH64}
  1790. (taicpu(hp2).ops = taicpu(p).ops + 1) and
  1791. not RegInUsedRegs(taicpu(hp2).oper[0]^.reg, UsedRegs) and
  1792. MatchOperand(taicpu(hp2).oper[1]^, taicpu(p).oper[0]^.reg) and
  1793. MatchOperand(taicpu(hp2).oper[2]^, taicpu(p).oper[1]^) and
  1794. (
  1795. (taicpu(hp2).ops = 3) or
  1796. MatchOperand(taicpu(hp2).oper[3]^, taicpu(p).oper[2]^)
  1797. ) and
  1798. (
  1799. not (cs_opt_level3 in current_settings.optimizerswitches) or
  1800. (
  1801. { Make sure the target register isn't used in between }
  1802. not RegUsedBetween(taicpu(hp2).oper[0]^.reg, hp1, hp2) and
  1803. (
  1804. { If the second operand is a register, make sure it isn't modified in between }
  1805. (taicpu(p).oper[1]^.typ <> top_reg) or
  1806. not RegModifiedBetween(taicpu(p).oper[1]^.reg, hp1, hp2)
  1807. )
  1808. )
  1809. ) then
  1810. begin
  1811. AllocRegBetween(taicpu(hp2).oper[0]^.reg, p, hp2, UsedRegs);
  1812. if (taicpu(hp2).oppostfix = PF_S) then
  1813. AllocRegBetween(NR_DEFAULTFLAGS, p, hp2, UsedRegs);
  1814. DebugMsg(SPeepholeOptimization + 'TST; B.c; AND -> ANDS; B.c (TstBcAnd2AndsBc)', p);
  1815. taicpu(hp2).oppostfix := PF_S;
  1816. Asml.Remove(hp2);
  1817. Asml.InsertAfter(hp2, p);
  1818. RemoveCurrentP(p, hp2);
  1819. Result := True;
  1820. Exit;
  1821. end;
  1822. A_TST:
  1823. { Change:
  1824. tst r1,##
  1825. b.c .Lbl
  1826. ... (flags not modified)
  1827. tst r1,##
  1828. Remove second tst
  1829. }
  1830. if
  1831. {$ifndef AARCH64}
  1832. (taicpu(hp2).condition = C_None) and
  1833. {$endif AARCH64}
  1834. (taicpu(hp2).ops = taicpu(p).ops) and
  1835. MatchOperand(taicpu(hp2).oper[0]^, taicpu(p).oper[0]^.reg) and
  1836. MatchOperand(taicpu(hp2).oper[1]^, taicpu(p).oper[1]^) and
  1837. (
  1838. (taicpu(hp2).ops = 2) or
  1839. MatchOperand(taicpu(hp2).oper[2]^, taicpu(p).oper[2]^)
  1840. ) and
  1841. (
  1842. not (cs_opt_level3 in current_settings.optimizerswitches) or
  1843. (
  1844. { Make sure the flags aren't modified in between }
  1845. not RegModifiedBetween(NR_DEFAULTFLAGS, hp1, hp2) and
  1846. (
  1847. { If the second operand is a register, make sure it isn't modified in between }
  1848. (taicpu(p).oper[1]^.typ <> top_reg) or
  1849. not RegModifiedBetween(taicpu(p).oper[1]^.reg, hp1, hp2)
  1850. )
  1851. )
  1852. ) then
  1853. begin
  1854. DebugMsg(SPeepholeOptimization + 'TST; B.c; TST -> TST; B.c (TstBcTst2TstBc)', p);
  1855. AllocRegBetween(NR_DEFAULTFLAGS, hp1, hp2, UsedRegs);
  1856. RemoveInstruction(hp2);
  1857. Result := True;
  1858. Exit;
  1859. end;
  1860. else
  1861. ;
  1862. end;
  1863. end;
  1864. end;
  1865. function TARMAsmOptimizer.TryConstMerge(var p: tai; hp1: tai): Boolean;
  1866. const
  1867. {$ifdef ARM}
  1868. LO_16_WRITE: TAsmOp = A_MOVW;
  1869. HI_16_WRITE: TAsmOp = A_MOVT;
  1870. {$endif ARM}
  1871. {$ifdef AARCH64}
  1872. LO_16_WRITE: TAsmOp = A_MOVZ;
  1873. HI_16_WRITE: TAsmOp = A_MOVK;
  1874. {$endif AARCH64}
  1875. var
  1876. hp2, hp2_second, hp3, hp3_second, p_second, hp1_second: tai;
  1877. ThisReg: TRegister;
  1878. ThisRef: TReference;
  1879. so: TShifterOp;
  1880. procedure SearchAhead;
  1881. begin
  1882. { If p.opcode = A_STR, then ThisReg will be NR_NO }
  1883. if
  1884. {$ifdef ARM}
  1885. Assigned(hp1) and
  1886. {$endif ARM}
  1887. {$ifdef AARCH64}
  1888. (
  1889. (
  1890. MatchInstruction(p, A_MOVZ, []) and
  1891. Assigned(hp1)
  1892. ) or
  1893. (
  1894. MatchInstruction(p, A_STR, []) and
  1895. SetAndTest(p, hp1)
  1896. )
  1897. ) and
  1898. {$endif AARCH64}
  1899. (
  1900. (
  1901. (ThisReg <> NR_NO) and
  1902. (
  1903. {$ifdef AARCH64}
  1904. (
  1905. (getsubreg(ThisReg) = R_SUBD) and
  1906. MatchInstruction(hp1, A_MOVK, []) and
  1907. (taicpu(hp1).oper[0]^.reg = ThisReg) and
  1908. GetNextInstruction(hp1, hp2) and
  1909. MatchInstruction(hp2, A_STR, []) and
  1910. (taicpu(hp2).oper[0]^.reg = ThisReg) and
  1911. GetNextInstruction(hp2, p_second)
  1912. ) or
  1913. {$endif AARCH64}
  1914. (
  1915. MatchInstruction(hp1, A_STR{$ifdef ARM}, [taicpu(p).condition]{$endif ARM}, []) and
  1916. (taicpu(hp1).oper[0]^.reg = ThisReg) and
  1917. GetNextInstruction(hp1, p_second)
  1918. )
  1919. )
  1920. ) or (
  1921. { Just search one ahead if ThisReg is NR_NO }
  1922. (ThisReg = NR_NO) and
  1923. GetNextInstruction(hp1, p_second)
  1924. )
  1925. ) and
  1926. (
  1927. (
  1928. {$ifdef ARM}
  1929. (
  1930. MatchInstruction(p_second, A_MOV, [taicpu(p).condition], []) or
  1931. MatchInstruction(p_second, A_MOVW, [taicpu(p).condition], [])
  1932. ) and
  1933. {$endif ARM}
  1934. {$ifdef AARCH64}
  1935. MatchInstruction(p_second, A_MOVZ, []) and
  1936. {$endif AARCH64}
  1937. { Don't use ThisReg because it may be NR_NO }
  1938. GetNextInstruction(p_second, hp1_second) and
  1939. (
  1940. {$ifdef AARCH64}
  1941. (
  1942. MatchInstruction(hp1_second, A_MOVK, []) and
  1943. GetNextInstruction(hp1_second, hp2_second) and
  1944. MatchInstruction(hp2_second, A_STR, [PF_None])
  1945. ) or
  1946. {$endif AARCH64}
  1947. MatchInstruction(hp1_second, A_STR{$ifdef ARM}, [taicpu(p).condition]{$endif ARM}, [])
  1948. )
  1949. )
  1950. {$ifdef AARCH64}
  1951. or (
  1952. MatchInstruction(p_second, A_STR, []) and
  1953. (getsupreg(taicpu(p_second).oper[0]^.reg) = RS_WZR) and
  1954. { Negate the result because we're setting hp1_second to nil }
  1955. not SetAndTest(nil, hp1_second)
  1956. )
  1957. {$endif AARCH64}
  1958. ) then
  1959. TryConstMerge(p_second, hp1_second);
  1960. end;
  1961. begin
  1962. Result := False;
  1963. {$ifdef ARM}
  1964. { We need a Cortex-A ARM processor that supports MOVW and MOVT }
  1965. if not (CPUARM_HAS_EXTENDED_CONSTANTS in cpu_capabilities[current_settings.cputype]) then
  1966. Exit;
  1967. {$endif ARM}
  1968. ThisReg := NR_NO; { Safe initialisation }
  1969. case taicpu(p).opcode of
  1970. {$ifdef ARM}
  1971. A_MOV,
  1972. A_MOVW:
  1973. if (taicpu(p).opcode <> A_MOV) or (taicpu(p).oper[1]^.typ = top_const) then
  1974. {$endif ARM}
  1975. {$ifdef AARCH64}
  1976. A_MOVZ:
  1977. {$endif AARCH64}
  1978. begin
  1979. ThisReg := taicpu(p).oper[0]^.reg;
  1980. if Assigned(hp1){$ifdef ARM} and (taicpu(hp1).condition = taicpu(p).condition){$endif ARM} then
  1981. case taicpu(hp1).opcode of
  1982. A_STR:
  1983. if {$ifdef ARM}(taicpu(hp1).ops = 2) and {$endif ARM}SuperRegistersEqual(taicpu(hp1).oper[0]^.reg, ThisReg) then
  1984. begin
  1985. ThisRef := taicpu(hp1).oper[1]^.ref^;
  1986. if (ThisRef.addressmode = AM_OFFSET) and
  1987. (ThisRef.index = NR_NO) and
  1988. { Only permit writes to the stack, since we can guarantee alignment with that }
  1989. (
  1990. (ThisRef.base = NR_STACK_POINTER_REG) or
  1991. (ThisRef.base = current_procinfo.framepointer)
  1992. ) then
  1993. begin
  1994. case taicpu(hp1).oppostfix of
  1995. PF_B:
  1996. {
  1997. With sequences such as:
  1998. movz w0,x
  1999. strb w0,[sp, #ofs]
  2000. movz w0,y
  2001. strb w0,[sp, #ofs+1]
  2002. Merge the constants to:
  2003. movz w0,x + (y shl 8)
  2004. strh w0,[sp, #ofs]
  2005. Only use the stack pointer or frame pointer and an even offset though
  2006. to guarantee alignment
  2007. }
  2008. if ((ThisRef.offset mod 2) = 0) and
  2009. GetNextInstruction(hp1, p_second) and
  2010. (p_second.typ = ait_instruction)
  2011. {$ifdef ARM}
  2012. and (taicpu(p_second).condition = taicpu(p).condition)
  2013. {$endif ARM}
  2014. then
  2015. begin
  2016. case taicpu(p_second).opcode of
  2017. {$ifdef ARM}
  2018. A_MOV,
  2019. A_MOVW:
  2020. if (taicpu(p_second).oppostfix = PF_None) and
  2021. ((taicpu(p_second).opcode <> A_MOV) or (taicpu(p_second).oper[1]^.typ = top_const)) then
  2022. {$endif ARM}
  2023. {$ifdef AARCH64}
  2024. A_MOVZ:
  2025. {$endif AARCH64}
  2026. begin
  2027. if SuperRegistersEqual(taicpu(p_second).oper[0]^.reg, ThisReg) and
  2028. GetNextInstruction(p_second, hp1_second) and
  2029. MatchInstruction(hp1_second, A_STR{$ifdef ARM}, [taicpu(p).condition]{$endif ARM}, [PF_B]) and
  2030. SuperRegistersEqual(taicpu(hp1_second).oper[0]^.reg, ThisReg) then
  2031. begin
  2032. { Is the second storage location exactly one byte ahead? }
  2033. Inc(ThisRef.offset);
  2034. if RefsEqual(taicpu(hp1_second).oper[1]^.ref^, ThisRef) and
  2035. { The final safety check... make sure the register used
  2036. to store the constant isn't used afterwards }
  2037. RegEndOfLife(ThisReg, taicpu(hp1_second)) then
  2038. begin
  2039. { See if we can merge 4 bytes at once (this benefits ARM mostly, but provides a speed boost for AArch64 too) }
  2040. if GetNextInstruction(hp1_second, hp2) and
  2041. (
  2042. {$ifdef ARM}
  2043. MatchInstruction(hp2, A_MOVW, [taicpu(p).condition], []) or
  2044. {$endif ARM}
  2045. (
  2046. MatchInstruction(hp2, LO_16_WRITE{$ifdef ARM}, [taicpu(p).condition]{$endif ARM}, [])
  2047. {$ifdef ARM}
  2048. and (taicpu(hp2).oper[1]^.typ = top_const)
  2049. {$endif ARM}
  2050. )
  2051. ) and
  2052. SuperRegistersEqual(taicpu(hp2).oper[0]^.reg, ThisReg) and
  2053. GetNextInstruction(hp2, hp2_second) and
  2054. MatchInstruction(hp2_second, A_STR{$ifdef ARM}, [taicpu(p).condition]{$endif ARM}, [PF_B]) and
  2055. SuperRegistersEqual(taicpu(hp2_second).oper[0]^.reg, ThisReg) and
  2056. GetNextInstruction(hp2_second, hp3) and
  2057. (
  2058. {$ifdef ARM}
  2059. MatchInstruction(hp3, A_MOVW, [taicpu(p).condition], []) or
  2060. {$endif ARM}
  2061. (
  2062. MatchInstruction(hp3, LO_16_WRITE{$ifdef ARM}, [taicpu(p).condition]{$endif ARM}, [])
  2063. {$ifdef ARM}
  2064. and (taicpu(hp3).oper[1]^.typ = top_const)
  2065. {$endif ARM}
  2066. )
  2067. ) and
  2068. SuperRegistersEqual(taicpu(hp3).oper[0]^.reg, ThisReg) and
  2069. GetNextInstruction(hp3, hp3_second) and
  2070. MatchInstruction(hp3_second, A_STR{$ifdef ARM}, [taicpu(p).condition]{$endif ARM}, [PF_B]) and
  2071. SuperRegistersEqual(taicpu(hp3_second).oper[0]^.reg, ThisReg) then
  2072. begin
  2073. Inc(ThisRef.offset);
  2074. if RefsEqual(taicpu(hp2_second).oper[1]^.ref^, ThisRef) then
  2075. begin
  2076. Inc(ThisRef.offset);
  2077. if RefsEqual(taicpu(hp3_second).oper[1]^.ref^, ThisRef) then
  2078. begin
  2079. { Merge the constants }
  2080. DebugMsg(SPeepholeOptimization + 'Merged four byte-writes to memory into a single word-write (MovzStrbMovzStrbMovzStrbMovzStrb2MovzMovkStr)', p);
  2081. {$ifdef ARM}
  2082. taicpu(p).opcode := A_MOVW;
  2083. {$endif ARM}
  2084. taicpu(p).oper[1]^.val := (taicpu(p).oper[1]^.val and $FF) or ((taicpu(p_second).oper[1]^.val and $FF) shl 8);
  2085. taicpu(hp2).opcode := HI_16_WRITE;
  2086. taicpu(hp2).oper[1]^.val := (taicpu(hp2).oper[1]^.val and $FF) or ((taicpu(hp3).oper[1]^.val and $FF) shl 8);
  2087. so.shiftimm := 16;
  2088. so.shiftmode := SM_LSL;
  2089. taicpu(hp2).loadshifterop(2, so);
  2090. taicpu(hp2).ops := 3;
  2091. taicpu(hp1).oppostfix := PF_None;
  2092. AsmL.Remove(hp2);
  2093. AsmL.InsertAfter(hp2, p);
  2094. RemoveInstruction(p_second);
  2095. RemoveInstruction(hp1_second);
  2096. RemoveInstruction(hp2_second);
  2097. RemoveInstruction(hp3);
  2098. RemoveInstruction(hp3_second);
  2099. Result := True;
  2100. {$ifdef AARCH64}
  2101. { Searching ahead only benefits AArch64 here }
  2102. hp1 := hp2; { Since hp2 now appears immediately after p }
  2103. SearchAhead;
  2104. {$endif AARCH64}
  2105. Exit;
  2106. end;
  2107. { Reset the offset so the range check below is correct }
  2108. Dec(ThisRef.offset);
  2109. end;
  2110. Dec(ThisRef.offset);
  2111. end;
  2112. {$ifdef ARM}
  2113. { Be careful. strb and str support offsets between -4095 and +4095, but
  2114. strh only supports offsets between -255 and +255. However, we might be
  2115. able to bypass this if there are four bytes in a row (for AArch64, just
  2116. use SearchAhead below }
  2117. if { Remember we added 1 to the offset }
  2118. (ThisRef.offset >= -254) and (ThisRef.offset <= 256) then
  2119. {$endif ARM}
  2120. begin
  2121. { Merge the constants and remove the second pair of instructions }
  2122. DebugMsg(SPeepholeOptimization + 'Merged two byte-writes to memory into a single half-write (MovzStrbMovzStrb2MovzStrh)', p);
  2123. {$ifdef ARM}
  2124. taicpu(p).opcode := A_MOVW;
  2125. {$endif ARM}
  2126. taicpu(p).oper[1]^.val := (taicpu(p).oper[1]^.val and $FF) or ((taicpu(p_second).oper[1]^.val and $FF) shl 8);
  2127. taicpu(hp1).oppostfix := PF_H;
  2128. RemoveInstruction(p_second);
  2129. RemoveInstruction(hp1_second);
  2130. Result := True;
  2131. end;
  2132. end;
  2133. end;
  2134. end;
  2135. {$ifdef AARCH64}
  2136. A_STR:
  2137. { Sometimes, the second mov might not be present as we're writing the
  2138. zero register to the next address - that is:
  2139. movz w0,x
  2140. strb w0,[sp, #ofs]
  2141. strb wzr,[sp, #ofs+1]
  2142. Which becomes:
  2143. movz w0,x
  2144. strh w0,[sp, #ofs]
  2145. }
  2146. if RegEndOfLife(ThisReg, taicpu(hp1)) and
  2147. (taicpu(p_second).oppostfix = PF_B) and
  2148. (getsupreg(taicpu(p_second).oper[0]^.reg) = RS_WZR) then
  2149. begin
  2150. { Is the second storage location exactly one byte ahead? }
  2151. Inc(ThisRef.offset);
  2152. if RefsEqual(taicpu(p_second).oper[1]^.ref^, ThisRef) then
  2153. begin
  2154. { Merge the constants and remove the second pair of instructions }
  2155. DebugMsg(SPeepholeOptimization + 'Merged a byte-write and a zero-register byte-write to memory into a single half-write (MovzStrbStrb2MovzStrh 1)', p);
  2156. taicpu(p).oper[1]^.val := taicpu(p).oper[1]^.val and $FF; { In case there's some extraneous bits }
  2157. taicpu(hp1).oppostfix := PF_H;
  2158. RemoveInstruction(p_second);
  2159. Result := True;
  2160. end;
  2161. end;
  2162. {$endif AARCH64}
  2163. else
  2164. ;
  2165. end;
  2166. { Search ahead to see if more bytes are written individually,
  2167. because then we may be able to merge 4 bytes into a full
  2168. word write in a single pass }
  2169. if Result then
  2170. begin
  2171. SearchAhead;
  2172. Exit;
  2173. end;
  2174. end;
  2175. PF_H:
  2176. {
  2177. With sequences such as:
  2178. movz w0,x
  2179. strh w0,[sp, #ofs]
  2180. movz w0,y
  2181. strh w0,[sp, #ofs+2]
  2182. Merge the constants to:
  2183. movz w0,x
  2184. movk w0,y,lsl #16
  2185. str w0,[sp, #ofs]
  2186. Only use the stack pointer or frame pointer and an offset
  2187. that's a multiple of 4 though to guarantee alignment
  2188. }
  2189. if ((ThisRef.offset mod 4) = 0) and
  2190. GetNextInstruction(hp1, p_second) and
  2191. (p_second.typ = ait_instruction)
  2192. {$ifdef ARM}
  2193. and (taicpu(p_second).condition = taicpu(p).condition)
  2194. {$endif ARM}
  2195. then
  2196. begin
  2197. case taicpu(p_second).opcode of
  2198. {$ifdef ARM}
  2199. A_MOV,
  2200. A_MOVW:
  2201. if (taicpu(p).oppostfix = PF_None) and
  2202. ((taicpu(p).opcode <> A_MOV) or (taicpu(p).oper[1]^.typ = top_const)) then
  2203. {$endif ARM}
  2204. {$ifdef AARCH64}
  2205. A_MOVZ:
  2206. {$endif AARCH64}
  2207. begin
  2208. if SuperRegistersEqual(taicpu(p_second).oper[0]^.reg, ThisReg) and
  2209. GetNextInstruction(p_second, hp1_second) and
  2210. MatchInstruction(hp1_second, A_STR{$ifdef ARM}, [taicpu(p).condition]{$endif ARM}, [PF_H]) and
  2211. SuperRegistersEqual(taicpu(hp1_second).oper[0]^.reg, ThisReg) then
  2212. begin
  2213. { Is the second storage location exactly one byte ahead? }
  2214. Inc(ThisRef.offset, 2);
  2215. if RefsEqual(taicpu(hp1_second).oper[1]^.ref^, ThisRef) and
  2216. { The final safety check... make sure the register used
  2217. to store the constant isn't used afterwards }
  2218. RegEndOfLife(ThisReg, taicpu(hp1_second)) then
  2219. begin
  2220. { Merge the constants }
  2221. DebugMsg(SPeepholeOptimization + 'Merged two half-writes to memory into a single word-write (MovzStrhMovzStrh2MovzMovkStr)', p);
  2222. { Repurpose the second MOVZ instruction into a MOVK instruction }
  2223. if taicpu(p_second).oper[1]^.val = 0 then
  2224. begin
  2225. { Or just remove it if it's not needed }
  2226. RemoveInstruction(p_second);
  2227. {$ifdef ARM}
  2228. { If within the range 0..255, MOV suffices (256 can also be encoded this way) }
  2229. if (taicpu(p).oper[1]^.val < 0) or (taicpu(p).oper[1]^.val > 256) then
  2230. taicpu(p).opcode := A_MOVW;
  2231. {$endif ARM}
  2232. taicpu(hp1).oppostfix := PF_None;
  2233. end
  2234. else
  2235. begin
  2236. asml.Remove(p_second);
  2237. asml.InsertAfter(p_second, p);
  2238. {$ifdef ARM}
  2239. taicpu(p).opcode := A_MOVW;
  2240. {$endif ARM}
  2241. taicpu(p_second).opcode := HI_16_WRITE;
  2242. {$ifdef AARCH64}
  2243. so.shiftmode := SM_LSL;
  2244. so.shiftimm := 16;
  2245. taicpu(p_second).ops := 3;
  2246. taicpu(p_second).loadshifterop(2, so);
  2247. { Make doubly sure we're only using the 32-bit register, otherwise STR could write 64 bits }
  2248. setsubreg(ThisReg, R_SUBD);
  2249. taicpu(p).oper[0]^.reg := ThisReg;
  2250. taicpu(p_second).oper[0]^.reg := ThisReg;
  2251. taicpu(hp1).oper[0]^.reg := ThisReg;
  2252. {$endif AARCH64}
  2253. taicpu(hp1).oppostfix := PF_None;
  2254. {$ifdef AARCH64}
  2255. hp1 := p_second; { Since p_second now appears immediately after p }
  2256. p_second := hp1;
  2257. {$endif AARCH64}
  2258. { TODO: Confirm that the A_MOVZ / A_MOVK combination is the most efficient }
  2259. end;
  2260. RemoveInstruction(hp1_second);
  2261. Result := True;
  2262. end;
  2263. end;
  2264. end;
  2265. {$ifdef AARCH64}
  2266. A_STR:
  2267. { Sometimes, the second mov might not be present as we're writing the
  2268. zero register to the next address - that is:
  2269. movz w0,x
  2270. strh w0,[sp, #ofs]
  2271. strh wzr,[sp, #ofs+1]
  2272. Which becomes:
  2273. movz w0,x
  2274. str w0,[sp, #ofs]
  2275. }
  2276. if RegEndOfLife(ThisReg, taicpu(hp1)) and
  2277. (taicpu(p_second).oppostfix = PF_H) and
  2278. (getsupreg(taicpu(p_second).oper[0]^.reg) = RS_WZR) then
  2279. begin
  2280. { Is the second storage location exactly one byte ahead? }
  2281. Inc(ThisRef.offset, 2);
  2282. if RefsEqual(taicpu(p_second).oper[1]^.ref^, ThisRef) then
  2283. begin
  2284. { Merge the constants and remove the second pair of instructions }
  2285. DebugMsg(SPeepholeOptimization + 'Merged a half-write and a zero-register half-write to memory into a single word-write (MovzStrhStrh2MovzStr)', p);
  2286. { Make doubly sure we're only using the 32-bit register, otherwise STR could write 64 bits }
  2287. setsubreg(ThisReg, R_SUBD);
  2288. taicpu(p).oper[0]^.reg := ThisReg;
  2289. taicpu(hp1).oper[0]^.reg := ThisReg;
  2290. taicpu(hp1).oppostfix := PF_None;
  2291. RemoveInstruction(p_second);
  2292. Result := True;
  2293. end;
  2294. end;
  2295. {$endif AARCH64}
  2296. else
  2297. ;
  2298. end;
  2299. {$ifdef AARCH64}
  2300. { Search ahead to see if more half-words are written
  2301. individually, because then we may be able to merge
  2302. 4 words into a full extended write in a single pass }
  2303. if Result then
  2304. begin
  2305. SearchAhead;
  2306. Exit;
  2307. end;
  2308. {$endif AARCH64}
  2309. end;
  2310. else
  2311. ;
  2312. end;
  2313. end;
  2314. end;
  2315. {$ifdef AARCH64}
  2316. A_MOVK:
  2317. if (getsubreg(ThisReg) = R_SUBD) and
  2318. Assigned(hp1) and
  2319. (taicpu(hp1).oper[0]^.reg = ThisReg) and
  2320. (taicpu(hp1).ops = 3) and
  2321. (taicpu(hp1).oper[2]^.shifterop^.shiftmode = SM_LSL) and
  2322. (taicpu(hp1).oper[2]^.shifterop^.shiftimm = 16) and
  2323. GetNextInstruction(hp1, hp2) and
  2324. MatchInstruction(hp2, A_STR, [PF_None]) and
  2325. (taicpu(hp2).oper[0]^.reg = ThisReg) then
  2326. begin
  2327. {
  2328. With sequences such as:
  2329. movz w0,x
  2330. movk w0,y,lsl #16
  2331. str w0,[sp, #ofs]
  2332. movz w0,z
  2333. movk w0,q,lsl #16
  2334. str w0,[sp, #ofs+4]
  2335. Merge the constants to:
  2336. movz x0,x
  2337. movk x0,y,lsl #16
  2338. movk x0,z,lsl #32
  2339. movk x0,q,lsl #48
  2340. str x0,[sp, #ofs]
  2341. Only use the stack pointer or frame pointer and an offset
  2342. that's a multiple of 8 though to guarantee alignment
  2343. }
  2344. ThisRef := taicpu(hp2).oper[1]^.ref^;
  2345. if ((ThisRef.offset mod 8) = 0) and
  2346. GetNextInstruction(hp2, p_second) and
  2347. (p_second.typ = ait_instruction) then
  2348. case taicpu(p_second).opcode of
  2349. A_MOVZ:
  2350. if (
  2351. (taicpu(p_second).oper[0]^.reg = ThisReg) or
  2352. (
  2353. RegEndOfLife(ThisReg, taicpu(hp2)) and
  2354. (getsubreg(taicpu(p_second).oper[0]^.reg) = R_SUBD)
  2355. )
  2356. ) and GetNextInstruction(p_second, hp1_second) then
  2357. begin
  2358. case taicpu(hp1_second).opcode of
  2359. A_MOVK:
  2360. if (taicpu(p_second).oper[1]^.val <= $FFFF) and
  2361. (taicpu(hp1_second).oper[0]^.reg = taicpu(p_second).oper[0]^.reg) and
  2362. (taicpu(hp1_second).ops = 3) and
  2363. (taicpu(hp1_second).oper[2]^.shifterop^.shiftmode = SM_LSL) and
  2364. (taicpu(hp1_second).oper[2]^.shifterop^.shiftimm = 16) and
  2365. GetNextInstruction(hp1_second, hp2_second) and
  2366. MatchInstruction(hp2_second, A_STR, [PF_None]) and
  2367. (taicpu(hp1_second).oper[0]^.reg = taicpu(p_second).oper[0]^.reg) then
  2368. begin
  2369. Inc(ThisRef.offset, 4);
  2370. if RefsEqual(taicpu(hp2_second).oper[1]^.ref^, ThisRef) and
  2371. { The final safety check... make sure the register used
  2372. to store the constant isn't used afterwards }
  2373. RegEndOfLife(taicpu(p_second).oper[0]^.reg, taicpu(hp2_second)) then
  2374. begin
  2375. DebugMsg(SPeepholeOptimization + 'Merged two word-writes to memory into a single extended-write (MovzMovkStrMovzMovkStr2MovzMovkMovkMovkStr)', p);
  2376. { Extend register to 64-bit and repurpose second MOVZ to a MOVK with lsl 32 }
  2377. setsubreg(ThisReg, R_SUBQ);
  2378. taicpu(p).oper[0]^.reg := ThisReg;
  2379. taicpu(hp1).oper[0]^.reg := ThisReg;
  2380. { If the 3rd word is zero, we can remove the instruction entirely }
  2381. if taicpu(p_second).oper[1]^.val = 0 then
  2382. RemoveInstruction(p_second)
  2383. else
  2384. begin
  2385. taicpu(p_second).oper[0]^.reg := ThisReg;
  2386. so.shiftimm := 32;
  2387. so.shiftmode := SM_LSL;
  2388. taicpu(p_second).opcode := A_MOVK;
  2389. taicpu(p_second).ops := 3;
  2390. taicpu(p_second).loadshifterop(2, so);
  2391. AsmL.Remove(p_second);
  2392. AsmL.InsertBefore(p_second, hp2);
  2393. end;
  2394. taicpu(hp1_second).oper[0]^.reg := ThisReg;
  2395. taicpu(hp1_second).oper[2]^.shifterop^.shiftimm := 48;
  2396. taicpu(hp2).oper[0]^.reg := ThisReg;
  2397. AsmL.Remove(hp1_second);
  2398. AsmL.InsertBefore(hp1_second, hp2);
  2399. RemoveInstruction(hp2_second);
  2400. Result := True;
  2401. end;
  2402. end;
  2403. else
  2404. ;
  2405. end;
  2406. end;
  2407. A_STR:
  2408. { Sometimes, the second mov might not be present as we're writing the
  2409. zero register to the next address - that is:
  2410. movz w0,x
  2411. movk w0,y,lsl #16
  2412. str w0,[sp, #ofs]
  2413. str wzr,[sp, #ofs+4]
  2414. Which becomes:
  2415. movz x0,x
  2416. movk x0,y,lsl #16
  2417. str x0,[sp, #ofs]
  2418. }
  2419. begin
  2420. { Sometimes, the second mov might not be present as we're writing the
  2421. zero register to the next address - that is:
  2422. movz w0,x
  2423. strh w0,[sp, #ofs]
  2424. strh wzr,[sp, #ofs+1]
  2425. Which becomes:
  2426. movz w0,x
  2427. str w0,[sp, #ofs]
  2428. }
  2429. { Don't need to check end-of-life because the upper 32 bits are zero
  2430. and the overall value isn't being modified }
  2431. if (taicpu(p_second).oppostfix = PF_None) and
  2432. (taicpu(p_second).oper[0]^.reg = NR_WZR) then
  2433. begin
  2434. { Is the second storage location exactly one byte ahead? }
  2435. Inc(ThisRef.offset, 4);
  2436. if RefsEqual(taicpu(p_second).oper[1]^.ref^, ThisRef) then
  2437. begin
  2438. { Merge the constants and remove the second pair of instructions }
  2439. DebugMsg(SPeepholeOptimization + 'Merged a word-write and a zero-register word-write to memory into a single extended-write (MovzStrStr2MovzStr)', p);
  2440. setsubreg(taicpu(p).oper[0]^.reg, R_SUBQ);
  2441. setsubreg(taicpu(hp1).oper[0]^.reg, R_SUBQ);
  2442. setsubreg(taicpu(hp2).oper[0]^.reg, R_SUBQ);
  2443. RemoveInstruction(p_second);
  2444. Result := True;
  2445. end;
  2446. end;
  2447. end
  2448. else
  2449. ;
  2450. end;
  2451. end;
  2452. {$endif AARCH64}
  2453. else
  2454. ;
  2455. end;
  2456. end;
  2457. {$ifdef AARCH64}
  2458. A_STR:
  2459. { hp1 is probably nil }
  2460. if getsupreg(taicpu(p).oper[0]^.reg) = RS_WZR then
  2461. begin
  2462. ThisRef := taicpu(p).oper[1]^.ref^;
  2463. if (ThisRef.addressmode = AM_OFFSET) and
  2464. (ThisRef.index = NR_NO) and
  2465. { Only permit writes to the stack, since we can guarantee alignment with that }
  2466. (
  2467. (ThisRef.base = NR_STACK_POINTER_REG) or
  2468. (ThisRef.base = current_procinfo.framepointer)
  2469. ) then
  2470. begin
  2471. case taicpu(p).oppostfix of
  2472. PF_B:
  2473. {
  2474. With sequences such as:
  2475. strb wzr,[sp, #ofs]
  2476. movz w0,x
  2477. strb w0,[sp, #ofs+1]
  2478. Merge the constants to:
  2479. movz w0,x shl 8
  2480. strh w0,[sp, #ofs]
  2481. Only use the stack pointer or frame pointer and an even offset though
  2482. to guarantee alignment
  2483. }
  2484. if ((ThisRef.offset mod 2) = 0) and
  2485. GetNextInstruction(p, p_second) and
  2486. (p_second.typ = ait_instruction) then
  2487. begin
  2488. case taicpu(p_second).opcode of
  2489. A_MOVZ:
  2490. begin
  2491. ThisReg := taicpu(p_second).oper[0]^.reg;
  2492. if GetNextInstruction(p_second, hp1_second) and
  2493. MatchInstruction(hp1_second, A_STR, [PF_B]) and
  2494. SuperRegistersEqual(taicpu(hp1_second).oper[0]^.reg, ThisReg) then
  2495. begin
  2496. { Is the second storage location exactly one byte ahead? }
  2497. Inc(ThisRef.offset);
  2498. if RefsEqual(taicpu(hp1_second).oper[1]^.ref^, ThisRef) and
  2499. { The final safety check... make sure the register used
  2500. to store the constant isn't used afterwards }
  2501. RegEndOfLife(ThisReg, taicpu(hp1_second)) then
  2502. begin
  2503. { Merge the constants by repurposing the 2nd move, changing the register in the first STR and removing the second STR }
  2504. DebugMsg(SPeepholeOptimization + 'Merged a zero-register byte-write and a byte-write to memory into a single half-write (MovzStrbStrb2MovzStrh 2)', p);
  2505. taicpu(p_second).oper[1]^.val := (taicpu(p_second).oper[1]^.val and $FF) shl 8;
  2506. taicpu(hp1_second).oppostfix := PF_H;
  2507. Dec(taicpu(hp1_second).oper[1]^.ref^.offset, 1);
  2508. RemoveCurrentP(p, p_second);
  2509. Result := True;
  2510. hp1 := hp1_second; { So SearchAhead works properly below }
  2511. end;
  2512. end;
  2513. end;
  2514. A_STR:
  2515. { Change:
  2516. strb wzr,[sp, #ofs]
  2517. strb wzr,[sp, #ofs+1]
  2518. To:
  2519. strh wzr,[sp, #ofs]
  2520. }
  2521. if (taicpu(p_second).oppostfix = PF_B) and
  2522. (getsupreg(taicpu(p_second).oper[0]^.reg) = RS_WZR) then
  2523. begin
  2524. { Is the second storage location exactly one byte ahead? }
  2525. Inc(ThisRef.offset);
  2526. if RefsEqual(taicpu(p_second).oper[1]^.ref^, ThisRef) then
  2527. begin
  2528. DebugMsg(SPeepholeOptimization + 'Merged two zero-register byte-writes to memory into a single zero-register half-write (StrbStrb2Strh)', p);
  2529. taicpu(p).oppostfix := PF_H;
  2530. RemoveInstruction(p_second);
  2531. if hp1 = p_second then { Make sure hp1 deson't become a dangling pointer }
  2532. GetNextInstruction(p, hp1);
  2533. Result := True;
  2534. end;
  2535. end;
  2536. else
  2537. ;
  2538. end;
  2539. { Search ahead to see if more bytes are written individually,
  2540. because then we may be able to merge 4 bytes into a full
  2541. word write in a single pass }
  2542. if Result then
  2543. begin
  2544. SearchAhead;
  2545. Exit;
  2546. end;
  2547. end;
  2548. PF_H:
  2549. {
  2550. With sequences such as:
  2551. strh wzr,[sp, #ofs]
  2552. movz w0,x
  2553. strh w0,[sp, #ofs+2]
  2554. Merge the constants to:
  2555. movz w0,#0
  2556. movk w0,x,lsl #16
  2557. str w0,[sp, #ofs]
  2558. Only use the stack pointer or frame pointer and an offset
  2559. that's a multiple of 4 though to guarantee alignment
  2560. }
  2561. if ((ThisRef.offset mod 4) = 0) and
  2562. GetNextInstruction(p, p_second) and
  2563. (p_second.typ = ait_instruction) then
  2564. begin
  2565. case taicpu(p_second).opcode of
  2566. A_MOVZ:
  2567. begin
  2568. ThisReg := taicpu(p_second).oper[0]^.reg;
  2569. if GetNextInstruction(p_second, hp1_second) and
  2570. MatchInstruction(hp1_second, A_STR, [PF_H]) and
  2571. SuperRegistersEqual(taicpu(hp1_second).oper[0]^.reg, ThisReg) then
  2572. begin
  2573. { Is the second storage location exactly two bytes ahead? }
  2574. Inc(ThisRef.offset, 2);
  2575. if RefsEqual(taicpu(hp1_second).oper[1]^.ref^, ThisRef) and
  2576. { The final safety check... make sure the register used
  2577. to store the constant isn't used afterwards }
  2578. RegEndOfLife(ThisReg, taicpu(hp1_second)) then
  2579. begin
  2580. { Merge the constants }
  2581. DebugMsg(SPeepholeOptimization + 'Merged a zero-register half-write and a half-write to memory into a single word-write (StrhMovzStrh2MovzMovkStr)', p);
  2582. { Repurpose the first STR to a MOVZ instruction }
  2583. taicpu(p).opcode := A_MOVZ;
  2584. taicpu(p).oppostfix := PF_None;
  2585. taicpu(p).oper[0]^.reg := ThisReg;
  2586. taicpu(p).loadconst(1, 0);
  2587. so.shiftmode := SM_LSL;
  2588. so.shiftimm := 16;
  2589. taicpu(p_second).opcode := A_MOVK;
  2590. taicpu(p_second).ops := 3;
  2591. taicpu(p_second).loadshifterop(2, so);
  2592. { Make doubly sure we're only using the 32-bit register, otherwise STR could write 64 bits }
  2593. setsubreg(ThisReg, R_SUBD);
  2594. taicpu(p).oper[0]^.reg := ThisReg;
  2595. taicpu(p_second).oper[0]^.reg := ThisReg;
  2596. taicpu(hp1_second).oper[0]^.reg := ThisReg;
  2597. { TODO: Confirm that the A_MOVZ / A_MOVK combination is the most efficient }
  2598. taicpu(hp1_second).oppostfix := PF_None;
  2599. Dec(taicpu(hp1_second).oper[1]^.ref^.offset, 2);
  2600. Result := True;
  2601. end;
  2602. end;
  2603. end;
  2604. A_STR:
  2605. { Change:
  2606. strh wzr,[sp, #ofs]
  2607. strh wzr,[sp, #ofs+2]
  2608. To:
  2609. str wzr,[sp, #ofs]
  2610. }
  2611. if (taicpu(p_second).oppostfix = PF_H) and
  2612. (getsupreg(taicpu(p_second).oper[0]^.reg) = RS_WZR) then
  2613. begin
  2614. { Is the second storage location exactly one byte ahead? }
  2615. Inc(ThisRef.offset, 2);
  2616. if RefsEqual(taicpu(p_second).oper[1]^.ref^, ThisRef) then
  2617. begin
  2618. DebugMsg(SPeepholeOptimization + 'Merged two zero-register half-writes to memory into a single zero-register word-write (StrhStrh2Str)', p);
  2619. { Make doubly sure we're only using the 32-bit register, otherwise STR could write 64 bits }
  2620. taicpu(p).oper[0]^.reg := NR_WZR;
  2621. taicpu(p).oppostfix := PF_None;
  2622. RemoveInstruction(p_second);
  2623. if hp1 = p_second then { Make sure hp1 deson't become a dangling pointer }
  2624. GetNextInstruction(p, hp1);
  2625. Result := True;
  2626. end;
  2627. end;
  2628. else
  2629. ;
  2630. end;
  2631. end;
  2632. PF_None:
  2633. {
  2634. With sequences such as:
  2635. str wzr,[sp, #ofs]
  2636. movz w0,x
  2637. movk w0,y,lsl #16
  2638. str w0,[sp, #ofs+4]
  2639. Merge the constants to:
  2640. movz x0,#0
  2641. movk x0,x,lsl #32
  2642. movk x0,y,lsl #48
  2643. str x0,[sp, #ofs]
  2644. Only use the stack pointer or frame pointer and an offset
  2645. that's a multiple of 8 though to guarantee alignment
  2646. }
  2647. if ((ThisRef.offset mod 8) = 0) and
  2648. GetNextInstruction(p, p_second) and
  2649. (p_second.typ = ait_instruction) then
  2650. begin
  2651. case taicpu(p_second).opcode of
  2652. A_MOVZ:
  2653. begin
  2654. ThisReg := taicpu(p_second).oper[0]^.reg;
  2655. if GetNextInstruction(p_second, hp1_second) and
  2656. MatchInstruction(hp1_second, A_MOVK, []) and
  2657. GetNextInstruction(hp1_second, hp2_second) and
  2658. MatchInstruction(hp2_second, A_STR, [PF_None]) and
  2659. (taicpu(hp2_second).oper[0]^.reg = ThisReg) then
  2660. begin
  2661. { Is the second storage location exactly four bytes ahead? }
  2662. Inc(ThisRef.offset, 4);
  2663. if RefsEqual(taicpu(hp2_second).oper[1]^.ref^, ThisRef) and
  2664. { The final safety check... make sure the register used
  2665. to store the constant isn't used afterwards }
  2666. RegEndOfLife(ThisReg, taicpu(hp1_second)) then
  2667. begin
  2668. { Merge the constants }
  2669. DebugMsg(SPeepholeOptimization + 'Merged a zero-register word-write and a word-write to memory into a single extended-write (StrMovzMovkStr2MovzMovkMovkStr)', p);
  2670. setsubreg(ThisReg, R_SUBQ);
  2671. { Repurpose the first STR to a MOVZ instruction }
  2672. taicpu(p).opcode := A_MOVZ;
  2673. taicpu(p).oppostfix := PF_None;
  2674. taicpu(p).oper[0]^.reg := ThisReg;
  2675. taicpu(p).loadconst(1, 0);
  2676. { If the 3rd word is zero, we can remove the instruction entirely }
  2677. if taicpu(p_second).oper[1]^.val = 0 then
  2678. begin
  2679. RemoveInstruction(p_second);
  2680. if hp1 = p_second then { Make sure hp1 deson't become a dangling pointer }
  2681. GetNextInstruction(p, hp1);
  2682. end
  2683. else
  2684. begin
  2685. so.shiftmode := SM_LSL;
  2686. so.shiftimm := 32;
  2687. taicpu(p_second).opcode := A_MOVK;
  2688. taicpu(p_second).ops := 3;
  2689. taicpu(p_second).loadshifterop(2, so);
  2690. taicpu(p_second).oper[0]^.reg := ThisReg;
  2691. end;
  2692. taicpu(p).oper[0]^.reg := ThisReg;
  2693. taicpu(hp1_second).oper[0]^.reg := ThisReg;
  2694. taicpu(hp1_second).oper[2]^.shifterop^.shiftimm := 48;
  2695. { TODO: Confirm that the A_MOVZ / A_MOVK / A_MOVK combination is the most efficient }
  2696. taicpu(hp2_second).oppostfix := PF_None;
  2697. Dec(taicpu(hp2_second).oper[1]^.ref^.offset, 4);
  2698. taicpu(hp2_second).oper[0]^.reg := ThisReg; { Remember to change the register to its 64-bit counterpart }
  2699. Result := True;
  2700. end;
  2701. end;
  2702. end;
  2703. A_STR:
  2704. { Change:
  2705. str wzr,[sp, #ofs]
  2706. str wzr,[sp, #ofs+4]
  2707. To:
  2708. str xzr,[sp, #ofs]
  2709. }
  2710. if (taicpu(p_second).oppostfix = PF_None) and
  2711. (getsupreg(taicpu(p_second).oper[0]^.reg) = RS_WZR) then
  2712. begin
  2713. { Is the second storage location exactly one byte ahead? }
  2714. Inc(ThisRef.offset, 4);
  2715. if RefsEqual(taicpu(p_second).oper[1]^.ref^, ThisRef) then
  2716. begin
  2717. DebugMsg(SPeepholeOptimization + 'Merged two zero-register word-writes to memory into a single zero-register extended-write (StrStr2Str)', p);
  2718. taicpu(p).oper[0]^.reg := NR_XZR;
  2719. RemoveInstruction(p_second);
  2720. if hp1 = p_second then { Make sure hp1 deson't become a dangling pointer }
  2721. GetNextInstruction(p, hp1);
  2722. Result := True;
  2723. end;
  2724. end;
  2725. else
  2726. ;
  2727. end;
  2728. end;
  2729. else
  2730. ;
  2731. end;
  2732. end;
  2733. end;
  2734. {$endif AARCH64}
  2735. else
  2736. ;
  2737. end;
  2738. end;
  2739. end.