aoptcpu.pas 83 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349
  1. {
  2. Copyright (c) 1998-2002 by Jonas Maebe, member of the Free Pascal
  3. Development Team
  4. This unit implements the ARM64 optimizer object
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. Unit aoptcpu;
  19. {$i fpcdefs.inc}
  20. {$ifdef EXTDEBUG}
  21. {$define DEBUG_AOPTCPU}
  22. {$endif EXTDEBUG}
  23. Interface
  24. uses
  25. globtype, globals,
  26. cutils,
  27. cgbase, cpubase, aasmtai, aasmcpu,
  28. aopt, aoptcpub, aoptarm, aoptobj;
  29. Type
  30. TCpuAsmOptimizer = class(TARMAsmOptimizer)
  31. { uses the same constructor as TAopObj }
  32. function PrePeepHoleOptsCpu(var p: tai): boolean; override;
  33. function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
  34. function PeepHoleOptPass2Cpu(var p: tai): boolean; override;
  35. function PostPeepHoleOptsCpu(var p: tai): boolean; override;
  36. function RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;override;
  37. function InstructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean;override;
  38. function LookForPostindexedPattern(var p : tai) : boolean;
  39. public
  40. { With these routines, there's optimisation code that's general for all ARM platforms }
  41. function OptPass1LDR(var p: tai): Boolean; override;
  42. function OptPass1STR(var p: tai): Boolean; override;
  43. private
  44. function RemoveSuperfluousFMov(const p: tai; movp: tai; const optimizer: string): boolean;
  45. function OptPass1Shift(var p: tai): boolean;
  46. function OptPass1Data(var p: tai): boolean;
  47. function OptPass1FData(var p: tai): Boolean;
  48. function OptPass1STP(var p: tai): boolean;
  49. function OptPass1Mov(var p: tai): boolean;
  50. function OptPass1MOVZ(var p: tai): boolean;
  51. function OptPass1FMov(var p: tai): Boolean;
  52. function OptPass1B(var p: tai): boolean;
  53. function OptPass1SXTW(var p: tai): Boolean;
  54. function OptPass2CSEL(var p: tai): Boolean;
  55. function OptPass2B(var p: tai): Boolean;
  56. function OptPass2LDRSTR(var p: tai): boolean;
  57. function OptPass2MOV(var p: tai): Boolean;
  58. function PostPeepholeOptAND(var p: tai): Boolean;
  59. function PostPeepholeOptCMP(var p: tai): boolean;
  60. function PostPeepholeOptTST(var p: tai): Boolean;
  61. protected
  62. { Like UpdateUsedRegs, but ignores deallocations }
  63. class procedure UpdateIntRegsNoDealloc(var AUsedRegs: TAllUsedRegs; p: Tai); static;
  64. { Attempts to allocate a volatile integer register for use between p and hp,
  65. using AUsedRegs for the current register usage information. Returns NR_NO
  66. if no free register could be found }
  67. function GetIntRegisterBetween(RegSize: TSubRegister; var AUsedRegs: TAllUsedRegs; p, hp: tai; DontAlloc: Boolean = False): TRegister;
  68. End;
  69. Implementation
  70. uses
  71. aasmbase,
  72. aoptbase,
  73. aoptutils,
  74. cgutils,
  75. procinfo,
  76. paramgr,
  77. verbose;
  78. {$ifdef DEBUG_AOPTCPU}
  79. const
  80. SPeepholeOptimization: shortstring = 'Peephole Optimization: ';
  81. {$else DEBUG_AOPTCPU}
  82. { Empty strings help the optimizer to remove string concatenations that won't
  83. ever appear to the user on release builds. [Kit] }
  84. const
  85. SPeepholeOptimization = '';
  86. {$endif DEBUG_AOPTCPU}
  87. MAX_CSEL_INSTRUCTIONS = 8;
  88. MAX_CSEL_REGISTERS = 30;
  89. type
  90. TCSELTrackingState = (tsInvalid, tsSimple, tsDetour, tsBranching,
  91. tsDouble, tsDoubleBranchSame, tsDoubleBranchDifferent, tsDoubleSecondBranching,
  92. tsProcessed);
  93. { For OptPass2Jcc }
  94. TCSELTracking = object
  95. private
  96. CSELScore, ConstCount: LongInt;
  97. RegWrites: array[0..MAX_CSEL_INSTRUCTIONS*2 - 1] of TRegister;
  98. ConstRegs: array[0..MAX_CSEL_REGISTERS - 1] of TRegister;
  99. ConstVals: array[0..MAX_CSEL_REGISTERS - 1] of TCGInt;
  100. ConstSizes: array[0..MAX_CSEL_REGISTERS - 1] of TSubRegister; { May not match ConstRegs if one is shared over multiple CSELs. }
  101. ConstMovs: array[0..MAX_CSEL_REGISTERS - 1] of tai; { Location of initialisation instruction }
  102. ConstWriteSizes: array[0..first_int_imreg - 1] of TSubRegister; { Largest size of register written. }
  103. fOptimizer: TCpuAsmOptimizer;
  104. fLabel: TAsmSymbol;
  105. fInsertionPoint,
  106. fCondition,
  107. fInitialJump,
  108. fFirstMovBlock,
  109. fFirstMovBlockStop,
  110. fSecondJump,
  111. fThirdJump,
  112. fSecondMovBlock,
  113. fSecondMovBlockStop,
  114. fMidLabel,
  115. fEndLabel,
  116. fAllocationRange: tai;
  117. fState: TCSELTrackingState;
  118. function TryCSELConst(p, start, stop: tai; var Count: LongInt): Boolean;
  119. function InitialiseBlock(BlockStart, OneBeforeBlock: tai; out BlockStop: tai; out EndJump: tai): Boolean;
  120. function AnalyseMOVBlock(BlockStart, BlockStop, SearchStart: tai): LongInt;
  121. public
  122. RegisterTracking: TAllUsedRegs;
  123. constructor Init(Optimizer: TCpuAsmOptimizer; var p_initialjump, p_initialmov: tai; var AFirstLabel: TAsmLabel);
  124. destructor Done;
  125. procedure Process(out new_p: tai);
  126. property State: TCSELTrackingState read fState;
  127. end;
  128. PCSELTracking = ^TCSELTracking;
  129. function CanBeCond(p : tai) : boolean;
  130. begin
  131. result:=(p.typ=ait_instruction) and (taicpu(p).condition=C_None);
  132. end;
  133. function TCpuAsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  134. var
  135. p: taicpu;
  136. begin
  137. Result := false;
  138. if not ((assigned(hp)) and (hp.typ = ait_instruction)) then
  139. exit;
  140. p := taicpu(hp);
  141. case p.opcode of
  142. { These operations do not write into a register at all
  143. LDR/STR with post/pre-indexed operations do not need special treatment
  144. because post-/preindexed does not mean that a register
  145. is loaded with a new value, it is only modified }
  146. A_STR, A_CMP, A_CMN, A_TST, A_B, A_BL, A_MSR, A_FCMP:
  147. exit;
  148. else
  149. ;
  150. end;
  151. if p.ops=0 then
  152. exit;
  153. case p.oper[0]^.typ of
  154. top_reg:
  155. Result := SuperRegistersEqual(p.oper[0]^.reg,reg);
  156. top_ref:
  157. Result :=
  158. (taicpu(p).oper[0]^.ref^.addressmode in [AM_PREINDEXED,AM_POSTINDEXED]) and
  159. (taicpu(p).oper[0]^.ref^.base = reg);
  160. else
  161. ;
  162. end;
  163. end;
  164. function TCpuAsmOptimizer.InstructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean;
  165. var
  166. p: taicpu;
  167. i: longint;
  168. begin
  169. instructionLoadsFromReg := false;
  170. if not (assigned(hp) and (hp.typ = ait_instruction)) then
  171. exit;
  172. p:=taicpu(hp);
  173. i:=1;
  174. { Start on oper[0]? }
  175. if taicpu(hp).spilling_get_operation_type(0) in [operand_read, operand_readwrite] then
  176. i:=0;
  177. while(i<p.ops) do
  178. begin
  179. case p.oper[I]^.typ of
  180. top_reg:
  181. Result := (p.oper[I]^.reg = reg);
  182. top_ref:
  183. Result :=
  184. (p.oper[I]^.ref^.base = reg) or
  185. (p.oper[I]^.ref^.index = reg);
  186. else
  187. ;
  188. end;
  189. { Bailout if we found something }
  190. if Result then
  191. exit;
  192. Inc(I);
  193. end;
  194. end;
  195. {
  196. optimize
  197. ldr/str regX,[reg1]
  198. ...
  199. add/sub reg1,reg1,regY/const
  200. into
  201. ldr/str regX,[reg1], regY/const
  202. }
  203. function TCpuAsmOptimizer.LookForPostindexedPattern(var p: tai) : boolean;
  204. var
  205. hp1 : tai;
  206. begin
  207. Result:=false;
  208. if (taicpu(p).oper[1]^.typ = top_ref) and
  209. (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
  210. (taicpu(p).oper[1]^.ref^.index=NR_NO) and
  211. (taicpu(p).oper[1]^.ref^.offset=0) and
  212. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[1]^.ref^.base) and
  213. { we cannot check NR_DEFAULTFLAGS for modification yet so don't allow a condition }
  214. MatchInstruction(hp1, [A_ADD, A_SUB], [PF_None]) and
  215. (taicpu(hp1).oper[0]^.reg=taicpu(p).oper[1]^.ref^.base) and
  216. (taicpu(hp1).oper[1]^.reg=taicpu(p).oper[1]^.ref^.base) and
  217. (
  218. { valid offset? }
  219. (taicpu(hp1).oper[2]^.typ=top_const) and
  220. (taicpu(hp1).oper[2]^.val>=-256) and
  221. (abs(taicpu(hp1).oper[2]^.val)<256)
  222. ) and
  223. { don't apply the optimization if the base register is loaded }
  224. (getsupreg(taicpu(p).oper[0]^.reg)<>getsupreg(taicpu(p).oper[1]^.ref^.base)) and
  225. not(RegModifiedBetween(taicpu(hp1).oper[0]^.reg,p,hp1)) and
  226. not(RegModifiedBetween(taicpu(hp1).oper[2]^.reg,p,hp1)) then
  227. begin
  228. if taicpu(p).opcode = A_LDR then
  229. DebugMsg(SPeepholeOptimization + 'LdrAdd/Sub2Ldr Postindex done', p)
  230. else
  231. DebugMsg(SPeepholeOptimization + 'StrAdd/Sub2Str Postindex done', p);
  232. taicpu(p).oper[1]^.ref^.addressmode:=AM_POSTINDEXED;
  233. if taicpu(hp1).opcode=A_ADD then
  234. taicpu(p).oper[1]^.ref^.offset:=taicpu(hp1).oper[2]^.val
  235. else
  236. taicpu(p).oper[1]^.ref^.offset:=-taicpu(hp1).oper[2]^.val;
  237. asml.Remove(hp1);
  238. hp1.Free;
  239. Result:=true;
  240. end;
  241. end;
  242. function TCpuAsmOptimizer.RemoveSuperfluousFMov(const p: tai; movp: tai; const optimizer: string):boolean;
  243. var
  244. alloc,
  245. dealloc : tai_regalloc;
  246. hp1 : tai;
  247. begin
  248. Result:=false;
  249. if ((MatchInstruction(movp, A_FMOV, [taicpu(p).condition], [taicpu(p).oppostfix]) and
  250. ((getregtype(taicpu(movp).oper[0]^.reg)=R_MMREGISTER) { or (taicpu(p).opcode in [A_LDUR])})
  251. ) { or
  252. (((taicpu(p).oppostfix in [PF_F64F32,PF_F64S16,PF_F64S32,PF_F64U16,PF_F64U32]) or (getsubreg(taicpu(p).oper[0]^.reg)=R_SUBFD)) and MatchInstruction(movp, A_VMOV, [taicpu(p).condition], [PF_F64])) or
  253. (((taicpu(p).oppostfix in [PF_F32F64,PF_F32S16,PF_F32S32,PF_F32U16,PF_F32U32]) or (getsubreg(taicpu(p).oper[0]^.reg)=R_SUBFS)) and MatchInstruction(movp, A_VMOV, [taicpu(p).condition], [PF_F32])) }
  254. ) and
  255. (taicpu(movp).ops=2) and
  256. MatchOperand(taicpu(movp).oper[1]^, taicpu(p).oper[0]^.reg) and
  257. { the destination register of the mov might not be used beween p and movp }
  258. not(RegUsedBetween(taicpu(movp).oper[0]^.reg,p,movp)) and
  259. { Take care to only do this for instructions which REALLY load to the first register.
  260. Otherwise
  261. str reg0, [reg1]
  262. fmov reg2, reg0
  263. will be optimized to
  264. str reg2, [reg1]
  265. }
  266. RegLoadedWithNewValue(taicpu(p).oper[0]^.reg, p) then
  267. begin
  268. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(movp.Next));
  269. if assigned(dealloc) then
  270. begin
  271. DebugMsg(SPeepholeOptimization + optimizer+' removed superfluous vmov', movp);
  272. result:=true;
  273. { taicpu(p).oper[0]^.reg is not used anymore, try to find its allocation
  274. and remove it if possible }
  275. asml.Remove(dealloc);
  276. alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.previous));
  277. if assigned(alloc) then
  278. begin
  279. asml.Remove(alloc);
  280. alloc.free;
  281. dealloc.free;
  282. end
  283. else
  284. asml.InsertAfter(dealloc,p);
  285. { try to move the allocation of the target register }
  286. GetLastInstruction(movp,hp1);
  287. alloc:=FindRegAlloc(taicpu(movp).oper[0]^.reg,tai(hp1.Next));
  288. if assigned(alloc) then
  289. begin
  290. asml.Remove(alloc);
  291. asml.InsertBefore(alloc,p);
  292. { adjust used regs }
  293. IncludeRegInUsedRegs(taicpu(movp).oper[0]^.reg,UsedRegs);
  294. end;
  295. { change
  296. vldr reg0,[reg1]
  297. vmov reg2,reg0
  298. into
  299. ldr reg2,[reg1]
  300. if reg2 is an int register
  301. if (taicpu(p).opcode=A_VLDR) and (getregtype(taicpu(movp).oper[0]^.reg)=R_INTREGISTER) then
  302. taicpu(p).opcode:=A_LDR;
  303. }
  304. { finally get rid of the mov }
  305. taicpu(p).loadreg(0,taicpu(movp).oper[0]^.reg);
  306. asml.remove(movp);
  307. movp.free;
  308. end;
  309. end;
  310. end;
  311. function TCpuAsmOptimizer.OptPass1LDR(var p: tai): Boolean;
  312. var
  313. hp1: tai;
  314. begin
  315. Result := False;
  316. if inherited OptPass1LDR(p) or
  317. LookForPostindexedPattern(p) then
  318. Exit(True)
  319. else if (taicpu(p).oppostfix in [PF_B,PF_SB,PF_H,PF_SH,PF_None]) and
  320. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  321. RemoveSuperfluousMove(p, hp1, 'Ldr<Postfix>Mov2Ldr<Postfix>') then
  322. Exit(true);
  323. end;
  324. function TCpuAsmOptimizer.OptPass1STR(var p: tai): Boolean;
  325. begin
  326. Result := False;
  327. if inherited OptPass1STR(p) or
  328. LookForPostindexedPattern(p) then
  329. Exit(True);
  330. end;
  331. function TCpuAsmOptimizer.OptPass1Shift(var p : tai): boolean;
  332. var
  333. hp1,hp2: tai;
  334. I2, I: Integer;
  335. shifterop: tshifterop;
  336. begin
  337. Result:=false;
  338. { This folds shifterops into following instructions
  339. <shiftop> r0, r1, #imm
  340. <op> r2, r3, r0
  341. to
  342. <op> r2, r3, r1, <shiftop> #imm
  343. }
  344. { do not handle ROR yet, only part of the instructions below support ROR as shifter operand }
  345. if MatchInstruction(p,[A_LSL, A_LSR, A_ASR{, A_ROR}],[PF_None]) and
  346. MatchOpType(taicpu(p),top_reg,top_reg,top_const) and
  347. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  348. MatchInstruction(hp1, [A_ADD, A_AND, A_BIC, A_CMP, A_CMN,
  349. A_EON, A_EOR, A_NEG, A_ORN, A_ORR,
  350. A_SUB, A_TST], [PF_None]) and
  351. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) and
  352. (taicpu(hp1).ops >= 2) and
  353. { Currently we can't fold into another shifterop }
  354. (taicpu(hp1).oper[taicpu(hp1).ops-1]^.typ = top_reg) and
  355. { SP does not work completely with shifted registers, as I didn't find the exact rules,
  356. we do not operate on SP }
  357. (taicpu(hp1).oper[0]^.reg<>NR_SP) and
  358. (taicpu(hp1).oper[1]^.reg<>NR_SP) and
  359. (taicpu(hp1).oper[taicpu(hp1).ops-1]^.reg<>NR_SP) and
  360. { reg1 might not be modified inbetween }
  361. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) and
  362. (
  363. { Only ONE of the two src operands is allowed to match }
  364. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[taicpu(hp1).ops-2]^) xor
  365. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[taicpu(hp1).ops-1]^)
  366. ) and
  367. { for SUB, the last operand must match, there is no RSB on AArch64 }
  368. ((taicpu(hp1).opcode<>A_SUB) or
  369. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[taicpu(hp1).ops-1]^)) then
  370. begin
  371. { for the two operand instructions, start also at the second operand as they are not always commutative
  372. (depends on the flags tested laster on) and thus the operands cannot swapped }
  373. I2:=1;
  374. for I:=I2 to taicpu(hp1).ops-1 do
  375. if MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[I]^.reg) then
  376. begin
  377. { If the parameter matched on the second op from the RIGHT
  378. we have to switch the parameters, this will not happen for CMP
  379. were we're only evaluating the most right parameter
  380. }
  381. shifterop_reset(shifterop);
  382. case taicpu(p).opcode of
  383. A_LSL:
  384. shifterop.shiftmode:=SM_LSL;
  385. A_ROR:
  386. shifterop.shiftmode:=SM_ROR;
  387. A_LSR:
  388. shifterop.shiftmode:=SM_LSR;
  389. A_ASR:
  390. shifterop.shiftmode:=SM_ASR;
  391. else
  392. InternalError(2019090401);
  393. end;
  394. shifterop.shiftimm:=taicpu(p).oper[2]^.val;
  395. if I <> taicpu(hp1).ops-1 then
  396. begin
  397. if taicpu(hp1).ops = 3 then
  398. hp2:=taicpu.op_reg_reg_reg_shifterop(taicpu(hp1).opcode,
  399. taicpu(hp1).oper[0]^.reg, taicpu(hp1).oper[2]^.reg,
  400. taicpu(p).oper[1]^.reg, shifterop)
  401. else
  402. hp2:=taicpu.op_reg_reg_shifterop(taicpu(hp1).opcode,
  403. taicpu(hp1).oper[0]^.reg, taicpu(p).oper[1]^.reg,
  404. shifterop);
  405. end
  406. else
  407. if taicpu(hp1).ops = 3 then
  408. hp2:=taicpu.op_reg_reg_reg_shifterop(taicpu(hp1).opcode,
  409. taicpu(hp1).oper[0]^.reg, taicpu(hp1).oper[1]^.reg,
  410. taicpu(p).oper[1]^.reg,shifterop)
  411. else
  412. hp2:=taicpu.op_reg_reg_shifterop(taicpu(hp1).opcode,
  413. taicpu(hp1).oper[0]^.reg, taicpu(p).oper[1]^.reg,
  414. shifterop);
  415. { Make sure the register used in the shifting is tracked all
  416. the way through, otherwise it may become deallocated while
  417. it's still live and cause incorrect optimisations later }
  418. if (taicpu(hp1).oper[0]^.reg <> taicpu(p).oper[1]^.reg) then
  419. begin
  420. TransferUsedRegs(TmpUsedRegs);
  421. UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
  422. ALlocRegBetween(taicpu(p).oper[1]^.reg, p, hp1, TmpUsedRegs);
  423. end;
  424. taicpu(hp2).fileinfo:=taicpu(hp1).fileinfo;
  425. asml.insertbefore(hp2, hp1);
  426. RemoveInstruction(hp1);
  427. RemoveCurrentp(p);
  428. DebugMsg(SPeepholeOptimization + 'FoldShiftProcess done', hp2);
  429. Result:=true;
  430. break;
  431. end;
  432. end
  433. else if MatchInstruction(p,[A_LSL, A_LSR, A_ASR,A_ROR],[PF_None]) and
  434. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  435. RemoveSuperfluousMove(p, hp1, 'ShiftMov2Shift') then
  436. Result:=true;
  437. end;
  438. function TCpuAsmOptimizer.OptPass1Data(var p : tai): boolean;
  439. var
  440. hp1: tai;
  441. begin
  442. Result := GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  443. RemoveSuperfluousMove(p, hp1, 'DataMov2Data');
  444. end;
  445. function TCpuAsmOptimizer.OptPass1FData(var p: tai): Boolean;
  446. var
  447. hp1: tai;
  448. begin
  449. Result := GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  450. RemoveSuperfluousFMov(p, hp1, 'FOpFMov2FOp');
  451. end;
  452. function TCpuAsmOptimizer.OptPass1STP(var p : tai): boolean;
  453. var
  454. hp1, hp2, hp3, hp4: tai;
  455. begin
  456. Result:=false;
  457. {
  458. change
  459. stp x29,x30,[sp, #-16]!
  460. mov x29,sp
  461. bl abc
  462. ldp x29,x30,[sp], #16
  463. ret
  464. into
  465. b abc
  466. }
  467. if MatchInstruction(p, A_STP, [C_None], [PF_None]) and
  468. MatchOpType(taicpu(p),top_reg,top_reg,top_ref) and
  469. (taicpu(p).oper[0]^.reg = NR_X29) and
  470. (taicpu(p).oper[1]^.reg = NR_X30) and
  471. (taicpu(p).oper[2]^.ref^.base=NR_STACK_POINTER_REG) and
  472. (taicpu(p).oper[2]^.ref^.index=NR_NO) and
  473. (taicpu(p).oper[2]^.ref^.offset=-16) and
  474. (taicpu(p).oper[2]^.ref^.addressmode=AM_PREINDEXED) and
  475. GetNextInstruction(p, hp1) and
  476. MatchInstruction(hp1, A_MOV, [C_None], [PF_NONE]) and
  477. MatchOperand(taicpu(hp1).oper[0]^,taicpu(p).oper[0]^) and
  478. (taicpu(hp1).oper[1]^.typ = top_reg) and
  479. (taicpu(hp1).oper[1]^.reg = NR_STACK_POINTER_REG) and
  480. GetNextInstruction(hp1, hp2) and
  481. SkipEntryExitMarker(hp2, hp2) and
  482. MatchInstruction(hp2, A_BL, [C_None], [PF_NONE]) and
  483. (taicpu(hp2).oper[0]^.typ = top_ref) and
  484. GetNextInstruction(hp2, hp3) and
  485. SkipEntryExitMarker(hp3, hp3) and
  486. MatchInstruction(hp3, A_LDP, [C_None], [PF_NONE]) and
  487. MatchOpType(taicpu(hp3),top_reg,top_reg,top_ref) and
  488. (taicpu(hp3).oper[0]^.reg = NR_X29) and
  489. (taicpu(hp3).oper[1]^.reg = NR_X30) and
  490. (taicpu(hp3).oper[2]^.ref^.base=NR_STACK_POINTER_REG) and
  491. (taicpu(hp3).oper[2]^.ref^.index=NR_NO) and
  492. (taicpu(hp3).oper[2]^.ref^.offset=16) and
  493. (taicpu(hp3).oper[2]^.ref^.addressmode=AM_POSTINDEXED) and
  494. GetNextInstruction(hp3, hp4) and
  495. MatchInstruction(hp4, A_RET, [C_None], [PF_None]) and
  496. (taicpu(hp4).ops = 0) then
  497. begin
  498. asml.Remove(p);
  499. asml.Remove(hp1);
  500. asml.Remove(hp3);
  501. asml.Remove(hp4);
  502. taicpu(hp2).opcode:=A_B;
  503. p.free;
  504. hp1.free;
  505. hp3.free;
  506. hp4.free;
  507. p:=hp2;
  508. DebugMsg(SPeepholeOptimization + 'Bl2B done', p);
  509. Result:=true;
  510. end;
  511. end;
  512. function TCpuAsmOptimizer.OptPass1Mov(var p : tai): boolean;
  513. var
  514. hp1: tai;
  515. so: tshifterop;
  516. begin
  517. Result:=false;
  518. if MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) and
  519. (taicpu(p).oppostfix=PF_None) then
  520. begin
  521. RemoveCurrentP(p);
  522. DebugMsg(SPeepholeOptimization + 'Mov2None done', p);
  523. Result:=true;
  524. end
  525. else if (taicpu(p).ops=2) and
  526. (getsubreg(taicpu(p).oper[0]^.reg)=R_SUBD) and
  527. GetNextInstruction(p, hp1) and
  528. { Faster to get it out of the way than go through MatchInstruction }
  529. (hp1.typ=ait_instruction) and
  530. (taicpu(hp1).ops=3) and
  531. MatchInstruction(hp1,[A_ADD,A_SUB],[taicpu(p).condition], [PF_None,PF_S]) and
  532. (getsubreg(taicpu(hp1).oper[2]^.reg)=R_SUBQ) and
  533. (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[2]^.reg)) and
  534. RegEndOfLife(taicpu(hp1).oper[2]^.reg,taicpu(hp1)) then
  535. begin
  536. DebugMsg(SPeepholeOptimization + 'MovOp2AddUtxw 1 done', p);
  537. shifterop_reset(so);
  538. so.shiftmode:=SM_UXTW;
  539. taicpu(hp1).ops:=4;
  540. taicpu(hp1).loadreg(2,taicpu(p).oper[1]^.reg);
  541. taicpu(hp1).loadshifterop(3,so);
  542. RemoveCurrentP(p);
  543. Result:=true;
  544. exit;
  545. end
  546. {
  547. optimize
  548. mov rX, yyyy
  549. ....
  550. }
  551. else if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) then
  552. begin
  553. if RemoveSuperfluousMove(p, hp1, 'MovMov2Mov') then
  554. Result:=true
  555. else if (taicpu(p).ops = 2) and
  556. (tai(hp1).typ = ait_instruction) and
  557. RedundantMovProcess(p,hp1) then
  558. Result:=true
  559. end;
  560. end;
  561. function TCpuAsmOptimizer.OptPass1MOVZ(var p: tai): boolean;
  562. var
  563. hp1: tai;
  564. ZeroReg: TRegister;
  565. begin
  566. Result := False;
  567. hp1 := nil;
  568. if (taicpu(p).oppostfix = PF_None) and (taicpu(p).condition = C_None) then
  569. begin
  570. if
  571. { Check next instruction first so hp1 gets set to something, then
  572. if it remains nil, we know for sure that there's no valid next
  573. instruction. }
  574. not GetNextInstruction(p, hp1) or
  575. { MOVZ and MOVK/MOVN instructions undergo macro-fusion. }
  576. not MatchInstruction(hp1, [A_MOVK, A_MOVN], [C_None], [PF_None]) or
  577. (taicpu(hp1).oper[0]^.reg <> taicpu(p).oper[0]^.reg) then
  578. begin
  579. if (taicpu(p).oper[1]^.val = 0) then
  580. begin
  581. { Change;
  582. movz reg,#0
  583. (no movk or movn)
  584. To:
  585. mov reg,xzr (or wzr)
  586. Easier to perform other optimisations with registers
  587. }
  588. DebugMsg(SPeepholeOptimization + 'Movz0ToMovZeroReg', p);
  589. { Make sure the zero register is the correct size }
  590. ZeroReg := taicpu(p).oper[0]^.reg;
  591. setsupreg(ZeroReg, RS_XZR);
  592. taicpu(p).opcode := A_MOV;
  593. taicpu(p).loadreg(1, ZeroReg);
  594. Result := True;
  595. Exit;
  596. end;
  597. end;
  598. {
  599. remove the second Movz from
  600. movz reg,...
  601. movz reg,...
  602. }
  603. if GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  604. MatchInstruction(hp1,A_MOVZ,[C_None],[PF_none]) and
  605. MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[0]^) then
  606. begin
  607. DebugMsg(SPeepholeOptimization + 'MovzMovz2Movz', p);
  608. RemoveCurrentP(p);
  609. Result:=true;
  610. exit;
  611. end;
  612. end;
  613. end;
  614. function TCpuAsmOptimizer.OptPass1FMov(var p: tai): Boolean;
  615. var
  616. hp1: tai;
  617. alloc, dealloc: tai_regalloc;
  618. begin
  619. {
  620. change
  621. fmov reg0,reg1
  622. fmov reg1,reg0
  623. into
  624. fmov reg0,reg1
  625. }
  626. Result := False;
  627. while GetNextInstruction(p, hp1) and
  628. MatchInstruction(hp1, A_FMOV, [taicpu(p).condition], [taicpu(p).oppostfix]) and
  629. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[1]^) and
  630. MatchOperand(taicpu(p).oper[1]^, taicpu(hp1).oper[0]^) do
  631. begin
  632. asml.Remove(hp1);
  633. hp1.free;
  634. DebugMsg(SPeepholeOptimization + 'FMovFMov2FMov 1 done', p);
  635. Result:=true;
  636. end;
  637. { change
  638. fmov reg0,const
  639. fmov reg1,reg0
  640. dealloc reg0
  641. into
  642. fmov reg1,const
  643. }
  644. if MatchOpType(taicpu(p),top_reg,top_realconst) and
  645. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  646. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p, hp1)) and
  647. MatchInstruction(hp1,A_FMOV,[taicpu(p).condition],[taicpu(p).oppostfix]) and
  648. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  649. MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^.reg) and
  650. (not RegModifiedByInstruction(taicpu(p).oper[0]^.reg, hp1)) and
  651. assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next)))
  652. then
  653. begin
  654. DebugMsg('Peephole FMovFMov2FMov 2 done', p);
  655. taicpu(hp1).loadrealconst(1,taicpu(p).oper[1]^.val_real);
  656. alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.Previous));
  657. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next));
  658. if assigned(alloc) and assigned(dealloc) then
  659. begin
  660. asml.Remove(alloc);
  661. alloc.Free;
  662. asml.Remove(dealloc);
  663. dealloc.Free;
  664. end;
  665. { p will be removed, update used register as we continue
  666. with the next instruction after p }
  667. result:=RemoveCurrentP(p);
  668. end;
  669. { not enabled as apparently not happening
  670. if MatchOpType(taicpu(p),top_reg,top_reg) and
  671. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  672. MatchInstruction(hp1, [A_FSUB,A_FADD,A_FNEG,A_FMUL,A_FSQRT,A_FDIV,A_FABS], [PF_None]) and
  673. (MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) or
  674. ((taicpu(hp1).ops=3) and MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[2]^))
  675. ) and
  676. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  677. not(RegUsedBetween(taicpu(p).oper[0]^.reg,p,hp1)) then
  678. begin
  679. DebugMsg(SPeepholeOptimization + 'FMovFOp2FOp done', hp1);
  680. AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,UsedRegs);
  681. if MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) then
  682. taicpu(hp1).oper[1]^.reg:=taicpu(p).oper[1]^.reg;
  683. if (taicpu(hp1).ops=3) and MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[2]^) then
  684. taicpu(hp1).oper[2]^.reg:=taicpu(p).oper[1]^.reg;
  685. RemoveCurrentP(p);
  686. Result:=true;
  687. exit;
  688. end;
  689. }
  690. end;
  691. function TCpuAsmOptimizer.OptPass1SXTW(var p : tai) : Boolean;
  692. var
  693. hp1: tai;
  694. GetNextInstructionUsingReg_hp1: Boolean;
  695. begin
  696. Result:=false;
  697. if GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) then
  698. begin
  699. {
  700. change
  701. sxtw reg2,reg1
  702. str reg2,[...]
  703. dealloc reg2
  704. to
  705. str reg1,[...]
  706. }
  707. if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
  708. (taicpu(p).ops=2) and
  709. MatchInstruction(hp1, A_STR, [C_None], [PF_None]) and
  710. (getsubreg(taicpu(hp1).oper[0]^.reg)=R_SUBD) and
  711. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  712. { the reference in strb might not use reg2 }
  713. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  714. { reg1 might not be modified inbetween }
  715. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  716. begin
  717. DebugMsg('Peephole SXTHStr2Str done', p);
  718. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  719. result:=RemoveCurrentP(p);
  720. end
  721. {
  722. change
  723. sxtw reg2,reg1
  724. sxtw reg3,reg2
  725. dealloc reg2
  726. to
  727. sxtw reg3,reg1
  728. }
  729. else if MatchInstruction(p, A_SXTW, [C_None], [PF_None]) and
  730. (taicpu(p).ops=2) and
  731. MatchInstruction(hp1, A_SXTW, [C_None], [PF_None]) and
  732. (taicpu(hp1).ops=2) and
  733. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  734. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  735. { reg1 might not be modified inbetween }
  736. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  737. begin
  738. DebugMsg('Peephole SxtwSxtw2Sxtw done', p);
  739. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  740. taicpu(hp1).opcode:=A_SXTW;
  741. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  742. result:=RemoveCurrentP(p);
  743. end
  744. else if USxtOp2Op(p,hp1,SM_SXTW) then
  745. Result:=true
  746. else if RemoveSuperfluousMove(p, hp1, 'SxtwMov2Data') then
  747. Result:=true;
  748. end;
  749. end;
  750. function TCpuAsmOptimizer.OptPass1B(var p: tai): boolean;
  751. var
  752. hp1, hp2, hp3, hp4, hp5: tai;
  753. Invert: Boolean;
  754. begin
  755. Result := False;
  756. {
  757. convert
  758. b<c> .L1
  759. movz reg,#1`
  760. b .L2
  761. .L1
  762. movz reg,#0 (or mov reg,xzr)
  763. .L2
  764. into
  765. cset reg,<not(c)>
  766. Also do the same if the constants are reversed, instead converting it to:
  767. cset reg,<c>
  768. }
  769. if (taicpu(p).condition <> C_None) and
  770. (taicpu(p).oper[0]^.typ = top_ref) and
  771. GetNextInstruction(p, hp1) and
  772. { Check individually instead of using MatchInstruction in order to save time }
  773. (hp1.typ = ait_instruction) and
  774. (taicpu(hp1).condition = C_None) and
  775. (taicpu(hp1).oppostfix = PF_None) and
  776. (taicpu(hp1).ops = 2) and
  777. (
  778. (
  779. (taicpu(hp1).opcode = A_MOVZ) and
  780. (taicpu(hp1).oper[1]^.val in [0, 1])
  781. ) or
  782. (
  783. (taicpu(hp1).opcode = A_MOV) and
  784. (getsupreg(taicpu(hp1).oper[1]^.reg) = RS_XZR)
  785. )
  786. ) and
  787. GetNextInstruction(hp1, hp2) and
  788. MatchInstruction(hp2, A_B, [PF_None]) and
  789. (taicpu(hp2).condition = C_None) and
  790. (taicpu(hp2).oper[0]^.typ = top_ref) and
  791. GetNextInstruction(hp2, hp3) and
  792. (hp3.typ = ait_label) and
  793. (tasmlabel(taicpu(p).oper[0]^.ref^.symbol) = tai_label(hp3).labsym) and
  794. GetNextInstruction(hp3, hp4) and
  795. { As before, check individually instead of using MatchInstruction in order to save time }
  796. (hp4.typ = ait_instruction) and
  797. (taicpu(hp4).condition = C_None) and
  798. (taicpu(hp4).oppostfix = PF_None) and
  799. (taicpu(hp4).ops = 2) and
  800. (taicpu(hp4).oper[0]^.reg = taicpu(hp1).oper[0]^.reg) and
  801. (
  802. (
  803. (taicpu(hp4).opcode = A_MOVZ) and
  804. (
  805. (
  806. { Check to confirm the following:
  807. - First mov is either "movz reg,#0" or "mov reg,xzr"
  808. - Second mov is "movz reg,#1"
  809. }
  810. (
  811. (taicpu(hp1).oper[1]^.typ = top_reg) { Will be the zero register } or
  812. (taicpu(hp1).oper[1]^.val = 0)
  813. ) and
  814. (taicpu(hp4).oper[1]^.val = 1)
  815. ) or
  816. (
  817. { Check to confirm the following:
  818. - First mov is "movz reg,#1"
  819. - Second mov is "movz reg,#0"
  820. }
  821. MatchOperand(taicpu(hp1).oper[1]^, 1) and
  822. (taicpu(hp4).oper[1]^.val = 0)
  823. )
  824. )
  825. ) or
  826. (
  827. { Check to confirm the following:
  828. - First mov is "movz reg,#1"
  829. - Second mov is "mov reg,xzr"
  830. }
  831. (taicpu(hp4).opcode = A_MOV) and
  832. (getsupreg(taicpu(hp4).oper[1]^.reg) = RS_XZR) and
  833. MatchOperand(taicpu(hp1).oper[1]^, 1)
  834. )
  835. ) and
  836. GetNextInstruction(hp4, hp5) and
  837. (hp5.typ = ait_label) and
  838. (tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol) = tai_label(hp5).labsym) then
  839. begin
  840. Invert := MatchOperand(taicpu(hp1).oper[1]^, 1); { if true, hp4 will be mov reg,0 in some form }
  841. if Invert then
  842. taicpu(p).condition := inverse_cond(taicpu(p).condition);
  843. tai_label(hp3).labsym.DecRefs;
  844. { If this isn't the only reference to the middle label, we can
  845. still make a saving - only that the first jump and everything
  846. that follows will remain. }
  847. if (tai_label(hp3).labsym.getrefs = 0) then
  848. begin
  849. if Invert then
  850. DebugMsg(SPeepholeOptimization + 'B(c)Movz1BMovz0 -> Cset(~c)',p)
  851. else
  852. DebugMsg(SPeepholeOptimization + 'B(c)Movz0bMovZ1 -> Cset(c)',p);
  853. { remove jump, first label and second MOV (also catching any aligns) }
  854. repeat
  855. if not GetNextInstruction(hp2, hp3) then
  856. InternalError(2022070801);
  857. RemoveInstruction(hp2);
  858. hp2 := hp3;
  859. until hp2 = hp5;
  860. { Don't decrement reference count before the removal loop
  861. above, otherwise GetNextInstruction won't stop on the
  862. the label }
  863. tai_label(hp5).labsym.DecRefs;
  864. end
  865. else
  866. begin
  867. if Invert then
  868. DebugMsg(SPeepholeOptimization + 'B(c)Movz1BMovz0 -> Cset(~c) (partial)',p)
  869. else
  870. DebugMsg(SPeepholeOptimization + 'B(c)Movz0BMovz1 -> Cset(c) (partial)',p);
  871. end;
  872. taicpu(hp1).opcode := A_CSET;
  873. taicpu(hp1).loadconditioncode(1, taicpu(p).condition);
  874. RemoveCurrentP(p, hp1);
  875. Result:=true;
  876. exit;
  877. end;
  878. end;
  879. function TCpuAsmOptimizer.OptPass2B(var p: tai): Boolean;
  880. var
  881. hp1: tai;
  882. LabelSym: TAsmLabel;
  883. CSELTracking: PCSELTracking;
  884. begin
  885. Result := False;
  886. if (taicpu(p).condition = C_None) and
  887. IsJumpToLabel(taicpu(p)) then
  888. begin
  889. { Check for:
  890. B @lbl
  891. ...
  892. @Lbl:
  893. RET
  894. Change to:
  895. RET (and reduce reference count on label)
  896. }
  897. LabelSym := TAsmLabel(JumpTargetOp(taicpu(p))^.ref^.symbol);
  898. hp1 := GetLabelWithSym(LabelSym);
  899. if Assigned(hp1) and
  900. GetNextInstruction(hp1, hp1) and
  901. (hp1.typ = ait_instruction) and
  902. (taicpu(hp1).opcode = A_RET) then
  903. begin
  904. DebugMsg(SPeepholeOptimization + 'B -> RET since a RET immediately follows the destination label (B2Ret)', p);
  905. taicpu(p).ops := 0;
  906. taicpu(p).clearop(0);
  907. taicpu(p).is_jmp := false;
  908. taicpu(p).opcode := A_RET;
  909. { Make sure the label is dereferenced now }
  910. LabelSym.decrefs;
  911. Result := True;
  912. Exit;
  913. end;
  914. end;
  915. if (taicpu(p).condition <> C_None) and
  916. IsJumpToLabel(taicpu(p)) and
  917. GetNextInstruction(p, hp1) and
  918. (hp1.typ = ait_instruction) and
  919. (taicpu(hp1).opcode = A_MOV) then
  920. begin
  921. { check for
  922. jCC xxx
  923. <several movs>
  924. xxx:
  925. Also spot:
  926. Jcc xxx
  927. <several movs>
  928. jmp xxx
  929. Change to:
  930. <several csets with inverted condition>
  931. jmp xxx (only for the 2nd case)
  932. }
  933. CSELTracking := New(PCSELTracking, Init(Self, p, hp1, TAsmLabel(JumpTargetOp(taicpu(p))^.ref^.symbol)));
  934. if CSELTracking^.State <> tsInvalid then
  935. begin
  936. CSELTracking^.Process(p);
  937. Result := True;
  938. end;
  939. CSELTracking^.Done;
  940. end;
  941. end;
  942. function TCpuAsmOptimizer.OptPass2CSEL(var p: tai): Boolean;
  943. begin
  944. Result := False;
  945. { Csel r0,r1,r1,cond -> mov r0,r1 }
  946. if (taicpu(p).oper[1]^.reg = taicpu(p).oper[2]^.reg) then
  947. begin
  948. DebugMsg(SPeepholeOptimization + 'CSel2Mov (identical true/false registers)', p);
  949. taicpu(p).opcode := A_MOV;
  950. taicpu(p).ops := 2;
  951. Result := True;
  952. Exit;
  953. end;
  954. end;
  955. function TCpuAsmOptimizer.OptPass2LDRSTR(var p: tai): boolean;
  956. var
  957. hp1, hp1_last: tai;
  958. ThisRegister: TRegister;
  959. OffsetVal, ValidOffset, MinOffset, MaxOffset: asizeint;
  960. TargetOpcode: TAsmOp;
  961. begin
  962. Result := False;
  963. ThisRegister := taicpu(p).oper[0]^.reg;
  964. case taicpu(p).opcode of
  965. A_LDR:
  966. TargetOpcode := A_LDP;
  967. A_STR:
  968. TargetOpcode := A_STP;
  969. else
  970. InternalError(2020081501);
  971. end;
  972. { reg appearing in ref invalidates these optimisations }
  973. if (TargetOpcode = A_STP) or not RegInRef(ThisRegister, taicpu(p).oper[1]^.ref^) then
  974. begin
  975. { LDP/STP has a smaller permitted offset range than LDR/STR.
  976. TODO: For a group of out-of-range LDR/STR instructions, can
  977. we declare a temporary register equal to the offset base
  978. address, modify the STR instructions to use that register
  979. and then convert them to STP instructions? Note that STR
  980. generally takes 2 cycles (on top of the memory latency),
  981. while LDP/STP takes 3.
  982. }
  983. if (getsubreg(ThisRegister) = R_SUBQ) then
  984. begin
  985. ValidOffset := 8;
  986. MinOffset := -512;
  987. MaxOffset := 504;
  988. end
  989. else
  990. begin
  991. ValidOffset := 4;
  992. MinOffset := -256;
  993. MaxOffset := 252;
  994. end;
  995. hp1_last := p;
  996. { Look for nearby LDR/STR instructions }
  997. if (taicpu(p).oppostfix = PF_NONE) and
  998. (taicpu(p).oper[1]^.ref^.addressmode = AM_OFFSET) then
  999. { If SkipGetNext is True, GextNextInstruction isn't called }
  1000. while GetNextInstruction(hp1_last, hp1) do
  1001. begin
  1002. if (hp1.typ <> ait_instruction) then
  1003. Break;
  1004. if (taicpu(hp1).opcode = taicpu(p).opcode) then
  1005. begin
  1006. if (taicpu(hp1).oppostfix = PF_NONE) and
  1007. { Registers need to be the same size }
  1008. (getsubreg(ThisRegister) = getsubreg(taicpu(hp1).oper[0]^.reg)) and
  1009. (
  1010. (TargetOpcode = A_STP) or
  1011. { LDP x0, x0, [sp, #imm] is undefined behaviour, even
  1012. though such an LDR pair should have been optimised
  1013. out by now. STP is okay }
  1014. (ThisRegister <> taicpu(hp1).oper[0]^.reg)
  1015. ) and
  1016. (taicpu(hp1).oper[1]^.ref^.addressmode = AM_OFFSET) and
  1017. (taicpu(p).oper[1]^.ref^.base = taicpu(hp1).oper[1]^.ref^.base) and
  1018. (taicpu(p).oper[1]^.ref^.index = taicpu(hp1).oper[1]^.ref^.index) and
  1019. { Make sure the address registers haven't changed }
  1020. not RegModifiedBetween(taicpu(hp1).oper[1]^.ref^.base, p, hp1) and
  1021. (
  1022. (taicpu(hp1).oper[1]^.ref^.index = NR_NO) or
  1023. not RegModifiedBetween(taicpu(hp1).oper[1]^.ref^.index, p, hp1)
  1024. ) and
  1025. { Don't need to check "RegInRef" because the base registers are identical,
  1026. and the first one was checked already. [Kit] }
  1027. (((TargetOpcode=A_LDP) and not RegUsedBetween(taicpu(hp1).oper[0]^.reg, p, hp1)) or
  1028. ((TargetOpcode=A_STP) and not RegModifiedBetween(taicpu(hp1).oper[0]^.reg, p, hp1))) then
  1029. begin
  1030. { Can we convert these two LDR/STR instructions into a
  1031. single LDR/STP? }
  1032. OffsetVal := taicpu(hp1).oper[1]^.ref^.offset - taicpu(p).oper[1]^.ref^.offset;
  1033. if (OffsetVal = ValidOffset) then
  1034. begin
  1035. if (taicpu(p).oper[1]^.ref^.offset >= MinOffset) and (taicpu(hp1).oper[1]^.ref^.offset <= MaxOffset) then
  1036. begin
  1037. { Convert:
  1038. LDR/STR reg0, [reg2, #ofs]
  1039. ...
  1040. LDR/STR reg1. [reg2, #ofs + 8] // 4 if registers are 32-bit
  1041. To:
  1042. LDP/STP reg0, reg1, [reg2, #ofs]
  1043. }
  1044. taicpu(p).opcode := TargetOpcode;
  1045. if TargetOpcode = A_STP then
  1046. DebugMsg(SPeepholeOptimization + 'StrStr2Stp', p)
  1047. else
  1048. DebugMsg(SPeepholeOptimization + 'LdrLdr2Ldp', p);
  1049. taicpu(p).ops := 3;
  1050. taicpu(p).loadref(2, taicpu(p).oper[1]^.ref^);
  1051. taicpu(p).loadreg(1, taicpu(hp1).oper[0]^.reg);
  1052. asml.Remove(hp1);
  1053. hp1.Free;
  1054. Result := True;
  1055. Exit;
  1056. end;
  1057. end
  1058. else if (OffsetVal = -ValidOffset) then
  1059. begin
  1060. if (taicpu(hp1).oper[1]^.ref^.offset >= MinOffset) and (taicpu(p).oper[1]^.ref^.offset <= MaxOffset) then
  1061. begin
  1062. { Convert:
  1063. LDR/STR reg0, [reg2, #ofs + 8] // 4 if registers are 32-bit
  1064. ...
  1065. LDR/STR reg1. [reg2, #ofs]
  1066. To:
  1067. LDP/STP reg1, reg0, [reg2, #ofs]
  1068. }
  1069. taicpu(p).opcode := TargetOpcode;
  1070. if TargetOpcode = A_STP then
  1071. DebugMsg(SPeepholeOptimization + 'StrStr2Stp (reverse)', p)
  1072. else
  1073. DebugMsg(SPeepholeOptimization + 'LdrLdr2Ldp (reverse)', p);
  1074. taicpu(p).ops := 3;
  1075. taicpu(p).loadref(2, taicpu(hp1).oper[1]^.ref^);
  1076. taicpu(p).loadreg(1, taicpu(p).oper[0]^.reg);
  1077. taicpu(p).loadreg(0, taicpu(hp1).oper[0]^.reg);
  1078. asml.Remove(hp1);
  1079. hp1.Free;
  1080. Result := True;
  1081. Exit;
  1082. end;
  1083. end;
  1084. end;
  1085. end
  1086. else
  1087. Break;
  1088. { Don't continue looking for LDR/STR pairs if the address register
  1089. gets modified }
  1090. if RegModifiedByInstruction(taicpu(p).oper[1]^.ref^.base, hp1) then
  1091. Break;
  1092. hp1_last := hp1;
  1093. end;
  1094. end;
  1095. end;
  1096. function TCpuAsmOptimizer.OptPass2MOV(var p: tai): Boolean;
  1097. var
  1098. hp1: tai;
  1099. X: Integer;
  1100. begin
  1101. Result := False;
  1102. { Merge MOV and CSEL instructions left behind by OptPass2B - that is,
  1103. change:
  1104. mov r0,r1
  1105. csel r0,r2,r0,cond
  1106. To:
  1107. csel r0,r2,r1,cond
  1108. (Also if r0 is the second operand)
  1109. }
  1110. if (taicpu(p).oper[1]^.typ = top_reg) and
  1111. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  1112. (hp1.typ = ait_instruction) and
  1113. (taicpu(hp1).opcode = A_CSEL) and
  1114. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[0]^.reg) and
  1115. not RegModifiedBetween(taicpu(p).oper[1]^.reg, p, hp1) then
  1116. begin
  1117. { Use "Result" to note if a change was made so we only have to do
  1118. expensive register allocation once }
  1119. for X := 1 to 2 do
  1120. if (taicpu(hp1).oper[X]^.reg = taicpu(p).oper[0]^.reg) then
  1121. begin
  1122. taicpu(hp1).oper[X]^.reg := taicpu(p).oper[1]^.reg;
  1123. Result := True;
  1124. end;
  1125. if Result then
  1126. begin
  1127. DebugMSg(SPeepholeOptimization + 'MovCSel2CSel', p);
  1128. { Don't need to allocate the zero register - so save time by
  1129. skipping it in this case }
  1130. if getsupreg(taicpu(p).oper[1]^.reg) <> RS_XZR then
  1131. AllocRegBetween(taicpu(p).oper[1]^.reg, p, hp1, UsedRegs);
  1132. RemoveCurrentP(p);
  1133. Exit;
  1134. end;
  1135. end;
  1136. end;
  1137. function TCpuAsmOptimizer.PostPeepholeOptAND(var p: tai): Boolean;
  1138. var
  1139. hp1, hp2: tai;
  1140. hp3: taicpu;
  1141. bitval : cardinal;
  1142. begin
  1143. Result:=false;
  1144. {
  1145. and reg1,reg0,<const=power of 2>
  1146. cmp reg1,#0
  1147. <reg1 end of life>
  1148. b.e/b.ne label
  1149. into
  1150. tb(n)z reg0,<power of 2>,label
  1151. }
  1152. if MatchOpType(taicpu(p),top_reg,top_reg,top_const) and
  1153. (PopCnt(QWord(taicpu(p).oper[2]^.val))=1) and
  1154. GetNextInstruction(p,hp1) and
  1155. MatchInstruction(hp1,A_CMP,[PF_None]) and
  1156. MatchOpType(taicpu(hp1),top_reg,top_const) and
  1157. (taicpu(hp1).oper[1]^.val=0) and
  1158. MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[0]^) and
  1159. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) and
  1160. GetNextInstruction(hp1,hp2) and
  1161. MatchInstruction(hp2,A_B,[PF_None]) and
  1162. (taicpu(hp2).condition in [C_EQ,C_NE]) then
  1163. begin
  1164. bitval:=BsfQWord(qword(taicpu(p).oper[2]^.val));
  1165. case taicpu(hp2).condition of
  1166. C_NE:
  1167. hp3:=taicpu.op_reg_const_ref(A_TBNZ,taicpu(p).oper[1]^.reg,bitval,taicpu(hp2).oper[0]^.ref^);
  1168. C_EQ:
  1169. hp3:=taicpu.op_reg_const_ref(A_TBZ,taicpu(p).oper[1]^.reg,bitval,taicpu(hp2).oper[0]^.ref^);
  1170. else
  1171. Internalerror(2021100201);
  1172. end;
  1173. taicpu(hp3).fileinfo:=taicpu(hp1).fileinfo;
  1174. asml.insertbefore(hp3, hp1);
  1175. RemoveInstruction(hp1);
  1176. RemoveInstruction(hp2);
  1177. RemoveCurrentP(p);
  1178. DebugMsg(SPeepholeOptimization + 'AndCmpB.E/NE2Tbnz/Tbz done', p);
  1179. Result:=true;
  1180. end;
  1181. end;
  1182. function TCpuAsmOptimizer.PostPeepholeOptCMP(var p : tai): boolean;
  1183. var
  1184. hp1,hp2: tai;
  1185. begin
  1186. Result:=false;
  1187. {
  1188. cmp reg0,#0
  1189. b.e/b.ne label
  1190. into
  1191. cb(n)z reg0,label
  1192. }
  1193. if MatchOpType(taicpu(p),top_reg,top_const) and
  1194. (taicpu(p).oper[0]^.reg<>NR_SP) and
  1195. (taicpu(p).oper[1]^.val=0) and
  1196. GetNextInstruction(p,hp1) and
  1197. MatchInstruction(hp1,A_B,[PF_None]) and
  1198. (taicpu(hp1).condition in [C_EQ,C_NE]) then
  1199. begin
  1200. case taicpu(hp1).condition of
  1201. C_NE:
  1202. hp2:=taicpu.op_reg_sym_ofs(A_CBNZ,taicpu(p).oper[0]^.reg,taicpu(hp1).oper[0]^.ref^.symbol,taicpu(hp1).oper[0]^.ref^.offset);
  1203. C_EQ:
  1204. hp2:=taicpu.op_reg_sym_ofs(A_CBZ,taicpu(p).oper[0]^.reg,taicpu(hp1).oper[0]^.ref^.symbol,taicpu(hp1).oper[0]^.ref^.offset);
  1205. else
  1206. Internalerror(2019090801);
  1207. end;
  1208. taicpu(hp2).fileinfo:=taicpu(hp1).fileinfo;
  1209. asml.insertbefore(hp2, hp1);
  1210. asml.remove(p);
  1211. asml.remove(hp1);
  1212. p.free;
  1213. hp1.free;
  1214. p:=hp2;
  1215. DebugMsg(SPeepholeOptimization + 'CMPB.E/NE2CBNZ/CBZ done', p);
  1216. Result:=true;
  1217. end;
  1218. end;
  1219. function TCpuAsmOptimizer.PostPeepholeOptTST(var p : tai): boolean;
  1220. var
  1221. hp1: tai;
  1222. hp3: taicpu;
  1223. bitval : cardinal;
  1224. begin
  1225. Result:=false;
  1226. {
  1227. tst reg1,<const=power of 2>
  1228. b.e/b.ne label
  1229. into
  1230. tb(n)z reg0,<power of 2>,label
  1231. }
  1232. if MatchOpType(taicpu(p),top_reg,top_const) and
  1233. (PopCnt(QWord(taicpu(p).oper[1]^.val))=1) and
  1234. GetNextInstruction(p,hp1) and
  1235. MatchInstruction(hp1,A_B,[C_EQ,C_NE],[PF_None]) then
  1236. begin
  1237. bitval:=BsfQWord(qword(taicpu(p).oper[1]^.val));
  1238. case taicpu(hp1).condition of
  1239. C_NE:
  1240. hp3:=taicpu.op_reg_const_ref(A_TBNZ,taicpu(p).oper[0]^.reg,bitval,taicpu(hp1).oper[0]^.ref^);
  1241. C_EQ:
  1242. hp3:=taicpu.op_reg_const_ref(A_TBZ,taicpu(p).oper[0]^.reg,bitval,taicpu(hp1).oper[0]^.ref^);
  1243. else
  1244. Internalerror(2021100210);
  1245. end;
  1246. taicpu(hp3).fileinfo:=taicpu(p).fileinfo;
  1247. asml.insertafter(hp3, p);
  1248. RemoveInstruction(hp1);
  1249. RemoveCurrentP(p, hp3);
  1250. DebugMsg(SPeepholeOptimization + 'TST; B(E/NE) -> TB(Z/NZ) done', p);
  1251. Result:=true;
  1252. end;
  1253. end;
  1254. function TCpuAsmOptimizer.PrePeepHoleOptsCpu(var p: tai): boolean;
  1255. begin
  1256. result := false;
  1257. if p.typ=ait_instruction then
  1258. begin
  1259. case taicpu(p).opcode of
  1260. A_SBFX,
  1261. A_UBFX:
  1262. Result:=OptPreSBFXUBFX(p);
  1263. else
  1264. ;
  1265. end;
  1266. end;
  1267. end;
  1268. function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
  1269. begin
  1270. result := false;
  1271. if p.typ=ait_instruction then
  1272. begin
  1273. case taicpu(p).opcode of
  1274. A_B:
  1275. Result:=OptPass1B(p);
  1276. A_LDR:
  1277. Result:=OptPass1LDR(p);
  1278. A_STR:
  1279. Result:=OptPass1STR(p);
  1280. A_MOV:
  1281. Result:=OptPass1Mov(p);
  1282. A_MOVZ:
  1283. Result:=OptPass1MOVZ(p);
  1284. A_STP:
  1285. Result:=OptPass1STP(p);
  1286. A_LSR,
  1287. A_ROR,
  1288. A_ASR,
  1289. A_LSL:
  1290. Result:=OptPass1Shift(p);
  1291. A_AND:
  1292. Result:=OptPass1And(p);
  1293. A_NEG,
  1294. A_CSEL,
  1295. A_ADD,
  1296. A_ADC,
  1297. A_SUB,
  1298. A_SBC,
  1299. A_BIC,
  1300. A_EOR,
  1301. A_ORR,
  1302. A_MUL:
  1303. Result:=OptPass1Data(p);
  1304. A_UXTB:
  1305. Result:=OptPass1UXTB(p);
  1306. A_UXTH:
  1307. Result:=OptPass1UXTH(p);
  1308. A_SXTB:
  1309. Result:=OptPass1SXTB(p);
  1310. A_SXTH:
  1311. Result:=OptPass1SXTH(p);
  1312. A_SXTW:
  1313. Result:=OptPass1SXTW(p);
  1314. // A_VLDR,
  1315. A_FMADD,
  1316. A_FMSUB,
  1317. A_FNMADD,
  1318. A_FNMSUB,
  1319. A_FNMUL,
  1320. A_FADD,
  1321. A_FMUL,
  1322. A_FDIV,
  1323. A_FSUB,
  1324. A_FSQRT,
  1325. A_FNEG,
  1326. A_FCVT,
  1327. A_FABS:
  1328. Result:=OptPass1FData(p);
  1329. A_FMOV:
  1330. Result:=OptPass1FMov(p);
  1331. else
  1332. ;
  1333. end;
  1334. end;
  1335. end;
  1336. function TCpuAsmOptimizer.PeepHoleOptPass2Cpu(var p: tai): boolean;
  1337. begin
  1338. result := false;
  1339. if p.typ=ait_instruction then
  1340. begin
  1341. case taicpu(p).opcode of
  1342. A_AND,
  1343. A_BIC:
  1344. Result := OptPass2Bitwise(p);
  1345. A_B:
  1346. Result := OptPass2B(p);
  1347. A_CSEL:
  1348. Result := OptPass2CSEL(p);
  1349. A_MOV:
  1350. Result := OptPass2MOV(p);
  1351. A_LDR,
  1352. A_STR:
  1353. Result := OptPass2LDRSTR(p);
  1354. A_TST:
  1355. Result := OptPass2TST(p);
  1356. else
  1357. ;
  1358. end;
  1359. end;
  1360. end;
  1361. function TCpuAsmOptimizer.PostPeepHoleOptsCpu(var p: tai): boolean;
  1362. begin
  1363. result := false;
  1364. if p.typ=ait_instruction then
  1365. begin
  1366. case taicpu(p).opcode of
  1367. A_CMP:
  1368. Result:=PostPeepholeOptCMP(p);
  1369. A_AND:
  1370. Result:=PostPeepholeOptAND(p);
  1371. A_TST:
  1372. Result:=PostPeepholeOptTST(p);
  1373. else
  1374. ;
  1375. end;
  1376. end;
  1377. end;
  1378. class procedure TCpuAsmOptimizer.UpdateIntRegsNoDealloc(var AUsedRegs: TAllUsedRegs; p: Tai);
  1379. begin
  1380. { Update integer registers, ignoring deallocations }
  1381. repeat
  1382. while assigned(p) and
  1383. ((p.typ in (SkipInstr - [ait_RegAlloc])) or
  1384. (p.typ = ait_label) or
  1385. ((p.typ = ait_marker) and
  1386. (tai_Marker(p).Kind in [mark_AsmBlockEnd,mark_NoLineInfoStart,mark_NoLineInfoEnd]))) do
  1387. p := tai(p.next);
  1388. while assigned(p) and
  1389. (p.typ=ait_RegAlloc) Do
  1390. begin
  1391. if (getregtype(tai_regalloc(p).reg) = R_INTREGISTER) then
  1392. begin
  1393. case tai_regalloc(p).ratype of
  1394. ra_alloc :
  1395. IncludeRegInUsedRegs(tai_regalloc(p).reg, AUsedRegs);
  1396. else
  1397. ;
  1398. end;
  1399. end;
  1400. p := tai(p.next);
  1401. end;
  1402. until not(assigned(p)) or
  1403. (not(p.typ in SkipInstr) and
  1404. not((p.typ = ait_label) and
  1405. labelCanBeSkipped(tai_label(p))));
  1406. end;
  1407. { Attempts to allocate a volatile integer register for use between p and hp,
  1408. using AUsedRegs for the current register usage information. Returns NR_NO
  1409. if no free register could be found }
  1410. function TCpuAsmOptimizer.GetIntRegisterBetween(RegSize: TSubRegister; var AUsedRegs: TAllUsedRegs; p, hp: tai; DontAlloc: Boolean = False): TRegister;
  1411. var
  1412. RegSet: TCPURegisterSet;
  1413. CurrentSuperReg: Integer;
  1414. CurrentReg: TRegister;
  1415. Currentp: tai;
  1416. Breakout: Boolean;
  1417. begin
  1418. Result := NR_NO;
  1419. RegSet :=
  1420. paramanager.get_volatile_registers_int(current_procinfo.procdef.proccalloption) +
  1421. current_procinfo.saved_regs_int;
  1422. (*
  1423. { Don't use the frame register unless explicitly allowed (fixes i40111) }
  1424. if ([cs_useebp, cs_userbp] * current_settings.optimizerswitches) = [] then
  1425. Exclude(RegSet, RS_FRAME_POINTER_REG);
  1426. *)
  1427. for CurrentSuperReg in RegSet do
  1428. begin
  1429. CurrentReg := newreg(R_INTREGISTER, TSuperRegister(CurrentSuperReg), RegSize);
  1430. if not AUsedRegs[R_INTREGISTER].IsUsed(CurrentReg)
  1431. then
  1432. begin
  1433. Currentp := p;
  1434. Breakout := False;
  1435. while not Breakout and GetNextInstruction(Currentp, Currentp) and (Currentp <> hp) do
  1436. begin
  1437. case Currentp.typ of
  1438. ait_instruction:
  1439. begin
  1440. if RegInInstruction(CurrentReg, Currentp) then
  1441. begin
  1442. Breakout := True;
  1443. Break;
  1444. end;
  1445. { Cannot allocate across an unconditional jump }
  1446. if is_calljmpmaybeuncondret(taicpu(Currentp).opcode) and (taicpu(Currentp).condition = C_None) then
  1447. Exit;
  1448. end;
  1449. ait_marker:
  1450. { Don't try anything more if a marker is hit }
  1451. Exit;
  1452. ait_regalloc:
  1453. if (tai_regalloc(Currentp).ratype <> ra_dealloc) and SuperRegistersEqual(CurrentReg, tai_regalloc(Currentp).reg) then
  1454. begin
  1455. Breakout := True;
  1456. Break;
  1457. end;
  1458. else
  1459. ;
  1460. end;
  1461. end;
  1462. if Breakout then
  1463. { Try the next register }
  1464. Continue;
  1465. { We have a free register available }
  1466. Result := CurrentReg;
  1467. if not DontAlloc then
  1468. AllocRegBetween(CurrentReg, p, hp, AUsedRegs);
  1469. Exit;
  1470. end;
  1471. end;
  1472. end;
  1473. function TCSELTracking.InitialiseBlock(BlockStart, OneBeforeBlock: tai; out BlockStop: tai; out EndJump: tai): Boolean;
  1474. begin
  1475. Result := False;
  1476. EndJump := nil;
  1477. BlockStop := nil;
  1478. while (BlockStart <> fOptimizer.BlockEnd) and
  1479. { stop on labels }
  1480. (BlockStart.typ <> ait_label) do
  1481. begin
  1482. { Keep track of all integer registers that are used }
  1483. fOptimizer.UpdateIntRegsNoDealloc(RegisterTracking, tai(OneBeforeBlock.Next));
  1484. if BlockStart.typ = ait_instruction then
  1485. begin
  1486. if MatchInstruction(BlockStart, A_B, [C_None], []) then
  1487. begin
  1488. if not IsJumpToLabel(taicpu(BlockStart)) or
  1489. (JumpTargetOp(taicpu(BlockStart))^.ref^.index <> NR_NO) then
  1490. Exit;
  1491. EndJump := BlockStart;
  1492. Break;
  1493. end
  1494. { Check to see if we have a valid MOV instruction instead }
  1495. else if (taicpu(BlockStart).opcode <> A_MOV) or
  1496. { Can't include the stack pointer in CSEL }
  1497. fOptimizer.RegInInstruction(NR_SP, BlockStart) then
  1498. begin
  1499. Exit;
  1500. end
  1501. else
  1502. { This will be a valid MOV }
  1503. fAllocationRange := BlockStart;
  1504. end;
  1505. OneBeforeBlock := BlockStart;
  1506. fOptimizer.GetNextInstruction(BlockStart, BlockStart);
  1507. end;
  1508. if (BlockStart = fOptimizer.BlockEnd) then
  1509. Exit;
  1510. BlockStop := BlockStart;
  1511. Result := True;
  1512. end;
  1513. function TCSELTracking.AnalyseMOVBlock(BlockStart, BlockStop, SearchStart: tai): LongInt;
  1514. var
  1515. hp1: tai;
  1516. RefModified: Boolean;
  1517. begin
  1518. Result := 0;
  1519. hp1 := BlockStart;
  1520. RefModified := False; { As long as the condition is inverted, this can be reset }
  1521. while assigned(hp1) and
  1522. (hp1 <> BlockStop) do
  1523. begin
  1524. case hp1.typ of
  1525. ait_instruction:
  1526. if MatchInstruction(hp1, A_MOV, []) then
  1527. begin
  1528. Inc(Result);
  1529. if taicpu(hp1).oper[1]^.typ = top_reg then
  1530. begin
  1531. Inc(Result);
  1532. end
  1533. else if not (cs_opt_size in current_settings.optimizerswitches) and
  1534. { CSEL with constants grows the code size }
  1535. TryCSELConst(hp1, SearchStart, BlockStop, Result) then
  1536. begin
  1537. { Register was reserved by TryCSELConst and
  1538. stored on ConstRegs }
  1539. end
  1540. else
  1541. begin
  1542. Result := -1;
  1543. Exit;
  1544. end;
  1545. end
  1546. else
  1547. begin
  1548. Result := -1;
  1549. Exit;
  1550. end;
  1551. else
  1552. { Most likely an align };
  1553. end;
  1554. fOptimizer.GetNextInstruction(hp1, hp1);
  1555. end;
  1556. end;
  1557. constructor TCSELTracking.Init(Optimizer: TCpuAsmOptimizer; var p_initialjump, p_initialmov: tai; var AFirstLabel: TAsmLabel);
  1558. { For the tsBranching type, increase the weighting score to account for the new conditional jump
  1559. (this is done as a separate stage because the double types are extensions of the branching type,
  1560. but we can't discount the conditional jump until the last step) }
  1561. procedure EvaluateBranchingType;
  1562. begin
  1563. Inc(CSELScore);
  1564. if (CSELScore > MAX_CSEL_INSTRUCTIONS) then
  1565. { Too many instructions to be worthwhile }
  1566. fState := tsInvalid;
  1567. end;
  1568. var
  1569. hp1: tai;
  1570. Count: Integer;
  1571. begin
  1572. { Table of valid CSEL block types
  1573. Block type 2nd Jump Mid-label 2nd MOVs 3rd Jump End-label
  1574. ---------- --------- --------- --------- --------- ---------
  1575. tsSimple X Yes X X X
  1576. tsDetour = 1st X X X X
  1577. tsBranching <> Mid Yes X X X
  1578. tsDouble End-label Yes * Yes X Yes
  1579. tsDoubleBranchSame <> Mid Yes * Yes = 2nd X
  1580. tsDoubleBranchDifferent <> Mid Yes * Yes <> 2nd X
  1581. tsDoubleSecondBranching End-label Yes * Yes <> 2nd Yes
  1582. * Only one reference allowed
  1583. }
  1584. hp1 := nil; { To prevent compiler warnings }
  1585. Optimizer.CopyUsedRegs(RegisterTracking);
  1586. fOptimizer := Optimizer;
  1587. fLabel := AFirstLabel;
  1588. CSELScore := 0;
  1589. ConstCount := 0;
  1590. { Initialise RegWrites, ConstRegs, ConstVals, ConstSizes, ConstWriteSizes and ConstMovs }
  1591. FillChar(RegWrites[0], MAX_CSEL_INSTRUCTIONS * 2 * SizeOf(TRegister), 0);
  1592. FillChar(ConstRegs[0], MAX_CSEL_REGISTERS * SizeOf(TRegister), 0);
  1593. FillChar(ConstVals[0], MAX_CSEL_REGISTERS * SizeOf(TCGInt), 0);
  1594. FillChar(ConstSizes[0], MAX_CSEL_REGISTERS * SizeOf(TSubRegister), 0);
  1595. FillChar(ConstWriteSizes[0], first_int_imreg * SizeOf(TOpSize), 0);
  1596. FillChar(ConstMovs[0], MAX_CSEL_REGISTERS * SizeOf(taicpu), 0);
  1597. fInsertionPoint := p_initialjump;
  1598. fCondition := nil;
  1599. fInitialJump := p_initialjump;
  1600. fFirstMovBlock := p_initialmov;
  1601. fFirstMovBlockStop := nil;
  1602. fSecondJump := nil;
  1603. fSecondMovBlock := nil;
  1604. fSecondMovBlockStop := nil;
  1605. fMidLabel := nil;
  1606. fSecondJump := nil;
  1607. fSecondMovBlock := nil;
  1608. fEndLabel := nil;
  1609. fAllocationRange := nil;
  1610. { Assume it all goes horribly wrong! }
  1611. fState := tsInvalid;
  1612. { Look backwards at the comparisons to get an accurate picture of register usage and a better position for any MOV const,reg insertions }
  1613. if Optimizer.GetLastInstruction(p_initialjump, fCondition) and
  1614. (
  1615. MatchInstruction(fCondition, [A_CMP, A_CMN, A_TST], []) or
  1616. (
  1617. (fCondition.typ = ait_instruction) and
  1618. (taicpu(fCondition).opcode = A_AND) and
  1619. (taicpu(fCondition).oppostfix = PF_S)
  1620. )
  1621. ) then
  1622. begin
  1623. { Mark all the registers in the comparison as 'in use', even if they've just been deallocated }
  1624. for Count := 0 to taicpu(fCondition).ops - 1 do
  1625. with taicpu(fCondition).oper[Count]^ do
  1626. case typ of
  1627. top_reg:
  1628. if getregtype(reg) = R_INTREGISTER then
  1629. Optimizer.IncludeRegInUsedRegs(reg, RegisterTracking);
  1630. top_ref:
  1631. begin
  1632. if
  1633. (ref^.base <> NR_NO) then
  1634. Optimizer.IncludeRegInUsedRegs(ref^.base, RegisterTracking);
  1635. if (ref^.index <> NR_NO) then
  1636. Optimizer.IncludeRegInUsedRegs(ref^.index, RegisterTracking);
  1637. end
  1638. else
  1639. ;
  1640. end;
  1641. { When inserting instructions before hp_prev, try to insert them
  1642. before the allocation of the FLAGS register }
  1643. if not SetAndTest(Optimizer.FindRegAllocBackward(NR_DEFAULTFLAGS, tai(fCondition.Previous)), fInsertionPoint) or
  1644. (tai_regalloc(fInsertionPoint).ratype = ra_dealloc) then
  1645. { If not found, set it equal to the condition so it's something sensible }
  1646. fInsertionPoint := fCondition;
  1647. end
  1648. else
  1649. fCondition := nil;
  1650. { When inserting instructions, try to insert them before the allocation of the FLAGS register }
  1651. if SetAndTest(Optimizer.FindRegAllocBackward(NR_DEFAULTFLAGS, tai(p_initialjump.Previous)), hp1) and
  1652. (tai_regalloc(hp1).ratype <> ra_dealloc) then
  1653. { If not found, set it equal to p so it's something sensible }
  1654. fInsertionPoint := hp1;
  1655. hp1 := p_initialmov;
  1656. if not InitialiseBlock(p_initialmov, p_initialjump, fFirstMovBlockStop, fSecondJump) then
  1657. Exit;
  1658. hp1 := fFirstMovBlockStop; { Will either be on a label or a jump }
  1659. if (hp1.typ <> ait_label) then { should be on a jump }
  1660. begin
  1661. if not Optimizer.GetNextInstruction(hp1, fMidLabel) or (fMidLabel.typ <> ait_label) then
  1662. { Need a label afterwards }
  1663. Exit;
  1664. end
  1665. else
  1666. fMidLabel := hp1;
  1667. if tai_label(fMidLabel).labsym <> AFirstLabel then
  1668. { Not the correct label }
  1669. fMidLabel := nil;
  1670. if not Assigned(fSecondJump) and not Assigned(fMidLabel) then
  1671. { If there's neither a 2nd jump nor correct label, then it's invalid
  1672. (see above table) }
  1673. Exit;
  1674. { Analyse the first block of MOVs more closely }
  1675. CSELScore := AnalyseMOVBlock(fFirstMovBlock, fFirstMovBlockStop, fInsertionPoint);
  1676. if Assigned(fSecondJump) then
  1677. begin
  1678. if (JumpTargetOp(taicpu(fSecondJump))^.ref^.symbol = AFirstLabel) then
  1679. begin
  1680. fState := tsDetour
  1681. end
  1682. else
  1683. begin
  1684. { Need the correct mid-label for this one }
  1685. if not Assigned(fMidLabel) then
  1686. Exit;
  1687. fState := tsBranching;
  1688. end;
  1689. end
  1690. else
  1691. { No jump. but mid-label is present }
  1692. fState := tsSimple;
  1693. if (CSELScore > MAX_CSEL_INSTRUCTIONS) or (CSELScore <= 0) then
  1694. begin
  1695. { Invalid or too many instructions to be worthwhile }
  1696. fState := tsInvalid;
  1697. Exit;
  1698. end;
  1699. { check further for
  1700. b xxx
  1701. <several movs 1>
  1702. bl yyy
  1703. xxx:
  1704. <several movs 2>
  1705. yyy:
  1706. etc.
  1707. }
  1708. if (fState = tsBranching) and
  1709. { Estimate for required savings for extra jump }
  1710. (CSELScore <= MAX_CSEL_INSTRUCTIONS - 1) and
  1711. { Only one reference is allowed for double blocks }
  1712. (AFirstLabel.getrefs = 1) then
  1713. begin
  1714. Optimizer.GetNextInstruction(fMidLabel, hp1);
  1715. fSecondMovBlock := hp1;
  1716. if not InitialiseBlock(fSecondMovBlock, fMidLabel, fSecondMovBlockStop, fThirdJump) then
  1717. begin
  1718. EvaluateBranchingType;
  1719. Exit;
  1720. end;
  1721. hp1 := fSecondMovBlockStop; { Will either be on a label or a jump }
  1722. if (hp1.typ <> ait_label) then { should be on a jump }
  1723. begin
  1724. if not Optimizer.GetNextInstruction(hp1, fEndLabel) or (fEndLabel.typ <> ait_label) then
  1725. begin
  1726. { Need a label afterwards }
  1727. EvaluateBranchingType;
  1728. Exit;
  1729. end;
  1730. end
  1731. else
  1732. fEndLabel := hp1;
  1733. if tai_label(fEndLabel).labsym <> JumpTargetOp(taicpu(fSecondJump))^.ref^.symbol then
  1734. { Second jump doesn't go to the end }
  1735. fEndLabel := nil;
  1736. if not Assigned(fThirdJump) and not Assigned(fEndLabel) then
  1737. begin
  1738. { If there's neither a 3rd jump nor correct end label, then it's
  1739. not a invalid double block, but is a valid single branching
  1740. block (see above table) }
  1741. EvaluateBranchingType;
  1742. Exit;
  1743. end;
  1744. Count := AnalyseMOVBlock(fSecondMovBlock, fSecondMovBlockStop, fMidLabel);
  1745. if (Count > MAX_CSEL_INSTRUCTIONS) or (Count <= 0) then
  1746. { Invalid or too many instructions to be worthwhile }
  1747. Exit;
  1748. Inc(CSELScore, Count);
  1749. if Assigned(fThirdJump) then
  1750. begin
  1751. if not Assigned(fSecondJump) then
  1752. fState := tsDoubleSecondBranching
  1753. else if (JumpTargetOp(taicpu(fSecondJump))^.ref^.symbol = JumpTargetOp(taicpu(fThirdJump))^.ref^.symbol) then
  1754. fState := tsDoubleBranchSame
  1755. else
  1756. fState := tsDoubleBranchDifferent;
  1757. end
  1758. else
  1759. fState := tsDouble;
  1760. end;
  1761. if fState = tsBranching then
  1762. EvaluateBranchingType;
  1763. end;
  1764. { Tries to convert a mov const,%reg instruction into a CSEL by reserving a
  1765. new register to store the constant }
  1766. function TCSELTracking.TryCSELConst(p, start, stop: tai; var Count: LongInt): Boolean;
  1767. var
  1768. RegSize: TSubRegister;
  1769. CurrentVal: TCGInt;
  1770. ANewReg: TRegister;
  1771. X: ShortInt;
  1772. begin
  1773. Result := False;
  1774. if not MatchOpType(taicpu(p), top_reg, top_const) then
  1775. Exit;
  1776. if ConstCount >= MAX_CSEL_REGISTERS then
  1777. { Arrays are full }
  1778. Exit;
  1779. { See if the value has already been reserved for another CSEL instruction }
  1780. CurrentVal := taicpu(p).oper[1]^.val;
  1781. RegSize := getsubreg(taicpu(p).oper[0]^.reg);
  1782. for X := 0 to ConstCount - 1 do
  1783. if ConstVals[X] = CurrentVal then
  1784. begin
  1785. ConstRegs[ConstCount] := ConstRegs[X];
  1786. ConstSizes[ConstCount] := RegSize;
  1787. ConstVals[ConstCount] := CurrentVal;
  1788. Inc(ConstCount);
  1789. Inc(Count);
  1790. Result := True;
  1791. Exit;
  1792. end;
  1793. ANewReg := fOptimizer.GetIntRegisterBetween(R_SUBWHOLE, RegisterTracking, start, stop, True);
  1794. if ANewReg = NR_NO then
  1795. { No free registers }
  1796. Exit;
  1797. { Reserve the register so subsequent TryCSELConst calls don't all end
  1798. up vying for the same register }
  1799. fOptimizer.IncludeRegInUsedRegs(ANewReg, RegisterTracking);
  1800. ConstRegs[ConstCount] := ANewReg;
  1801. ConstSizes[ConstCount] := RegSize;
  1802. ConstVals[ConstCount] := CurrentVal;
  1803. Inc(ConstCount);
  1804. Inc(Count);
  1805. Result := True;
  1806. end;
  1807. destructor TCSELTracking.Done;
  1808. begin
  1809. TAOptObj.ReleaseUsedRegs(RegisterTracking);
  1810. end;
  1811. procedure TCSELTracking.Process(out new_p: tai);
  1812. var
  1813. Count, Writes: LongInt;
  1814. RegMatch: Boolean;
  1815. hp1, hp_new: tai;
  1816. inverted_condition, condition: TAsmCond;
  1817. begin
  1818. if (fState in [tsInvalid, tsProcessed]) then
  1819. InternalError(2023110702);
  1820. { Repurpose RegisterTracking to mark registers that we've defined }
  1821. RegisterTracking[R_INTREGISTER].Clear;
  1822. Count := 0;
  1823. Writes := 0;
  1824. condition := taicpu(fInitialJump).condition;
  1825. inverted_condition := inverse_cond(condition);
  1826. { Exclude tsDoubleBranchDifferent from this check, as the second block
  1827. doesn't get CSELs in this case }
  1828. if (fState in [tsDouble, tsDoubleBranchSame, tsDoubleSecondBranching]) then
  1829. begin
  1830. { Include the jump in the flag tracking }
  1831. if Assigned(fThirdJump) then
  1832. begin
  1833. if (fState = tsDoubleBranchSame) then
  1834. begin
  1835. { Will be an unconditional jump, so track to the instruction before it }
  1836. if not fOptimizer.GetLastInstruction(fThirdJump, hp1) then
  1837. InternalError(2023110712);
  1838. end
  1839. else
  1840. hp1 := fThirdJump;
  1841. end
  1842. else
  1843. hp1 := fSecondMovBlockStop;
  1844. end
  1845. else
  1846. begin
  1847. { Include a conditional jump in the flag tracking }
  1848. if Assigned(fSecondJump) then
  1849. begin
  1850. if (fState = tsDetour) then
  1851. begin
  1852. { Will be an unconditional jump, so track to the instruction before it }
  1853. if not fOptimizer.GetLastInstruction(fSecondJump, hp1) then
  1854. InternalError(2023110713);
  1855. end
  1856. else
  1857. hp1 := fSecondJump;
  1858. end
  1859. else
  1860. hp1 := fFirstMovBlockStop;
  1861. end;
  1862. fOptimizer.AllocRegBetween(NR_DEFAULTFLAGS, fInitialJump, hp1, fOptimizer.UsedRegs);
  1863. { Process the second set of MOVs first, because if a destination
  1864. register is shared between the first and second MOV sets, it is more
  1865. efficient to turn the first one into a MOV instruction and place it
  1866. before the CMP if possible, but we won't know which registers are
  1867. shared until we've processed at least one list, so we might as well
  1868. make it the second one since that won't be modified again. }
  1869. if (fState in [tsDouble, tsDoubleBranchSame, tsDoubleBranchDifferent, tsDoubleSecondBranching]) then
  1870. begin
  1871. hp1 := fSecondMovBlock;
  1872. repeat
  1873. if not Assigned(hp1) then
  1874. InternalError(2018062902);
  1875. if (hp1.typ = ait_instruction) then
  1876. begin
  1877. { Extra safeguard }
  1878. if (taicpu(hp1).opcode <> A_MOV) then
  1879. InternalError(2018062903);
  1880. { Note: tsDoubleBranchDifferent is essentially identical to
  1881. tsBranching and the 2nd block is best left largely
  1882. untouched, but we need to evaluate which registers the MOVs
  1883. write to in order to track what would be complementary CSEL
  1884. pairs that can be further optimised. [Kit] }
  1885. if fState <> tsDoubleBranchDifferent then
  1886. begin
  1887. if taicpu(hp1).oper[1]^.typ = top_const then
  1888. begin
  1889. RegMatch := False;
  1890. for Count := 0 to ConstCount - 1 do
  1891. if (ConstVals[Count] = taicpu(hp1).oper[1]^.val) and
  1892. (getsubreg(taicpu(hp1).oper[0]^.reg) = ConstSizes[Count]) then
  1893. begin
  1894. RegMatch := True;
  1895. { If it's in RegisterTracking, then this register
  1896. is being used more than once and hence has
  1897. already had its value defined (it gets added to
  1898. UsedRegs through AllocRegBetween below) }
  1899. if not RegisterTracking[R_INTREGISTER].IsUsed(ConstRegs[Count]) then
  1900. begin
  1901. hp_new := tai(hp1.getcopy);
  1902. taicpu(hp_new).oper[0]^.reg := ConstRegs[Count];
  1903. taicpu(hp_new).fileinfo := taicpu(fInitialJump).fileinfo;
  1904. fOptimizer.asml.InsertBefore(hp_new, fInsertionPoint);
  1905. fOptimizer.IncludeRegInUsedRegs(ConstRegs[Count], RegisterTracking);
  1906. ConstMovs[Count] := hp_new;
  1907. end
  1908. else
  1909. { We just need an instruction between hp_prev and hp1
  1910. where we know the register is marked as in use }
  1911. hp_new := fSecondMovBlock;
  1912. { Keep track of largest write for this register so it can be optimised later }
  1913. if (getsubreg(taicpu(hp1).oper[0]^.reg) > ConstWriteSizes[getsupreg(ConstRegs[Count])]) then
  1914. ConstWriteSizes[getsupreg(ConstRegs[Count])] := getsubreg(taicpu(hp1).oper[0]^.reg);
  1915. fOptimizer.AllocRegBetween(ConstRegs[Count], hp_new, hp1, fOptimizer.UsedRegs);
  1916. taicpu(hp1).loadreg(1, newreg(R_INTREGISTER, getsupreg(ConstRegs[Count]), ConstSizes[Count]));
  1917. Break;
  1918. end;
  1919. if not RegMatch then
  1920. InternalError(2021100413);
  1921. end;
  1922. taicpu(hp1).opcode := A_CSEL;
  1923. taicpu(hp1).ops := 4;
  1924. taicpu(hp1).loadreg(2, taicpu(hp1).oper[0]^.reg);
  1925. taicpu(hp1).loadconditioncode(3, condition);
  1926. end;
  1927. { Store these writes to search for duplicates later on }
  1928. RegWrites[Writes] := taicpu(hp1).oper[0]^.reg;
  1929. Inc(Writes);
  1930. end;
  1931. fOptimizer.GetNextInstruction(hp1, hp1);
  1932. until (hp1 = fSecondMovBlockStop);
  1933. end;
  1934. { Now do the first set of MOVs }
  1935. hp1 := fFirstMovBlock;
  1936. repeat
  1937. if not Assigned(hp1) then
  1938. InternalError(2018062904);
  1939. if (hp1.typ = ait_instruction) then
  1940. begin
  1941. RegMatch := False;
  1942. { Extra safeguard }
  1943. if (taicpu(hp1).opcode <> A_MOV) then
  1944. InternalError(2018062905);
  1945. { Search through the RegWrites list to see if there are any
  1946. opposing CSEL pairs that write to the same register }
  1947. for Count := 0 to Writes - 1 do
  1948. if (RegWrites[Count] = taicpu(hp1).oper[0]^.reg) then
  1949. begin
  1950. { We have a match. Keep this as a MOV }
  1951. { Move ahead in preparation }
  1952. fOptimizer.GetNextInstruction(hp1, hp1);
  1953. RegMatch := True;
  1954. Break;
  1955. end;
  1956. if RegMatch then
  1957. Continue;
  1958. if taicpu(hp1).oper[1]^.typ = top_const then
  1959. begin
  1960. for Count := 0 to ConstCount - 1 do
  1961. if (ConstVals[Count] = taicpu(hp1).oper[1]^.val) and
  1962. (getsubreg(taicpu(hp1).oper[0]^.reg) = ConstSizes[Count]) then
  1963. begin
  1964. RegMatch := True;
  1965. { If it's in RegisterTracking, then this register is
  1966. being used more than once and hence has already had
  1967. its value defined (it gets added to UsedRegs through
  1968. AllocRegBetween below) }
  1969. if not RegisterTracking[R_INTREGISTER].IsUsed(ConstRegs[Count]) then
  1970. begin
  1971. hp_new := tai(hp1.getcopy);
  1972. taicpu(hp_new).oper[0]^.reg := ConstRegs[Count];
  1973. taicpu(hp_new).fileinfo := taicpu(fInitialJump).fileinfo;
  1974. fOptimizer.asml.InsertBefore(hp_new, fInsertionPoint);
  1975. fOptimizer.IncludeRegInUsedRegs(ConstRegs[Count], RegisterTracking);
  1976. ConstMovs[Count] := hp_new;
  1977. end
  1978. else
  1979. { We just need an instruction between hp_prev and hp1
  1980. where we know the register is marked as in use }
  1981. hp_new := fFirstMovBlock;
  1982. { Keep track of largest write for this register so it can be optimised later }
  1983. if (getsubreg(taicpu(hp1).oper[0]^.reg) > ConstWriteSizes[getsupreg(ConstRegs[Count])]) then
  1984. ConstWriteSizes[getsupreg(ConstRegs[Count])] := getsubreg(taicpu(hp1).oper[0]^.reg);
  1985. fOptimizer.AllocRegBetween(ConstRegs[Count], hp_new, hp1, fOptimizer.UsedRegs);
  1986. taicpu(hp1).loadreg(1, newreg(R_INTREGISTER, getsupreg(ConstRegs[Count]), ConstSizes[Count]));
  1987. Break;
  1988. end;
  1989. if not RegMatch then
  1990. InternalError(2021100412);
  1991. end;
  1992. taicpu(hp1).opcode := A_CSEL;
  1993. taicpu(hp1).ops := 4;
  1994. taicpu(hp1).loadreg(2, taicpu(hp1).oper[0]^.reg);
  1995. taicpu(hp1).loadconditioncode(3, inverted_condition);
  1996. if (fState = tsDoubleBranchDifferent) then
  1997. begin
  1998. { Store these writes to search for duplicates later on }
  1999. RegWrites[Writes] := taicpu(hp1).oper[0]^.reg;
  2000. Inc(Writes);
  2001. end;
  2002. end;
  2003. fOptimizer.GetNextInstruction(hp1, hp1);
  2004. until (hp1 = fFirstMovBlockStop);
  2005. { Update initialisation MOVs to the smallest possible size }
  2006. for Count := 0 to ConstCount - 1 do
  2007. if Assigned(ConstMovs[Count]) then
  2008. setsubreg(taicpu(ConstMovs[Count]).oper[0]^.reg, ConstWriteSizes[Word(ConstRegs[Count])]);
  2009. case fState of
  2010. tsSimple:
  2011. begin
  2012. fOptimizer.DebugMsg(SPeepholeOptimization + 'CSEL Block (Simple type)', fInitialJump);
  2013. { No branch to delete }
  2014. end;
  2015. tsDetour:
  2016. begin
  2017. fOptimizer.DebugMsg(SPeepholeOptimization + 'CSEL Block (Detour type)', fInitialJump);
  2018. { Preserve jump }
  2019. end;
  2020. tsBranching, tsDoubleBranchDifferent:
  2021. begin
  2022. if (fState = tsBranching) then
  2023. fOptimizer.DebugMsg(SPeepholeOptimization + 'CSEL Block (Branching type)', fInitialJump)
  2024. else
  2025. fOptimizer.DebugMsg(SPeepholeOptimization + 'CSEL Block (Double branching (different) type)', fInitialJump);
  2026. taicpu(fSecondJump).condition := inverted_condition;
  2027. end;
  2028. tsDouble, tsDoubleBranchSame:
  2029. begin
  2030. if (fState = tsDouble) then
  2031. fOptimizer.DebugMsg(SPeepholeOptimization + 'CSEL Block (Double type)', fInitialJump)
  2032. else
  2033. fOptimizer.DebugMsg(SPeepholeOptimization + 'CSEL Block (Double branching (same) type)', fInitialJump);
  2034. { Delete second jump }
  2035. JumpTargetOp(taicpu(fSecondJump))^.ref^.symbol.decrefs;
  2036. fOptimizer.RemoveInstruction(fSecondJump);
  2037. end;
  2038. tsDoubleSecondBranching:
  2039. begin
  2040. fOptimizer.DebugMsg(SPeepholeOptimization + 'CSEL Block (Double, second branching type)', fInitialJump);
  2041. { Delete second jump, preserve third jump as conditional }
  2042. JumpTargetOp(taicpu(fSecondJump))^.ref^.symbol.decrefs;
  2043. fOptimizer.RemoveInstruction(fSecondJump);
  2044. taicpu(fThirdJump).condition := condition;
  2045. end;
  2046. else
  2047. InternalError(2023110721);
  2048. end;
  2049. { Now we can safely decrement the reference count }
  2050. tasmlabel(fLabel).decrefs;
  2051. fOptimizer.UpdateUsedRegs(tai(fInitialJump.next));
  2052. { Remove the original jump }
  2053. fOptimizer.RemoveInstruction(fInitialJump); { Note, the choice to not use RemoveCurrentp is deliberate }
  2054. new_p := fFirstMovBlock; { Appears immediately after the initial jump }
  2055. fState := tsProcessed;
  2056. end;
  2057. begin
  2058. casmoptimizer:=TCpuAsmOptimizer;
  2059. End.