aoptcpu.pas 82 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312
  1. {
  2. Copyright (c) 1998-2002 by Jonas Maebe, member of the Free Pascal
  3. Development Team
  4. This unit implements the ARM64 optimizer object
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. Unit aoptcpu;
  19. {$i fpcdefs.inc}
  20. {$ifdef EXTDEBUG}
  21. {$define DEBUG_AOPTCPU}
  22. {$endif EXTDEBUG}
  23. Interface
  24. uses
  25. globtype, globals,
  26. cutils,
  27. cgbase, cpubase, aasmtai, aasmcpu,
  28. aopt, aoptcpub, aoptarm, aoptobj;
  29. Type
  30. TCpuAsmOptimizer = class(TARMAsmOptimizer)
  31. { uses the same constructor as TAopObj }
  32. function PrePeepHoleOptsCpu(var p: tai): boolean; override;
  33. function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
  34. function PeepHoleOptPass2Cpu(var p: tai): boolean; override;
  35. function PostPeepHoleOptsCpu(var p: tai): boolean; override;
  36. function RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;override;
  37. function InstructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean;override;
  38. function LookForPostindexedPattern(var p : tai) : boolean;
  39. public
  40. { With these routines, there's optimisation code that's general for all ARM platforms }
  41. function OptPass1LDR(var p: tai): Boolean; override;
  42. function OptPass1STR(var p: tai): Boolean; override;
  43. private
  44. function RemoveSuperfluousFMov(const p: tai; movp: tai; const optimizer: string): boolean;
  45. function OptPass1Shift(var p: tai): boolean;
  46. function OptPass1Data(var p: tai): boolean;
  47. function OptPass1FData(var p: tai): Boolean;
  48. function OptPass1STP(var p: tai): boolean;
  49. function OptPass1Mov(var p: tai): boolean;
  50. function OptPass1MOVZ(var p: tai): boolean;
  51. function OptPass1FMov(var p: tai): Boolean;
  52. function OptPass1B(var p: tai): boolean;
  53. function OptPass1SXTW(var p: tai): Boolean;
  54. function OptPass2CSEL(var p: tai): Boolean;
  55. function OptPass2B(var p: tai): Boolean;
  56. function OptPass2LDRSTR(var p: tai): boolean;
  57. function OptPass2MOV(var p: tai): Boolean;
  58. function PostPeepholeOptAND(var p: tai): Boolean;
  59. function PostPeepholeOptCMP(var p: tai): boolean;
  60. function PostPeepholeOptTST(var p: tai): Boolean;
  61. protected
  62. { Like UpdateUsedRegs, but ignores deallocations }
  63. class procedure UpdateIntRegsNoDealloc(var AUsedRegs: TAllUsedRegs; p: Tai); static;
  64. { Attempts to allocate a volatile integer register for use between p and hp,
  65. using AUsedRegs for the current register usage information. Returns NR_NO
  66. if no free register could be found }
  67. function GetIntRegisterBetween(RegSize: TSubRegister; var AUsedRegs: TAllUsedRegs; p, hp: tai; DontAlloc: Boolean = False): TRegister;
  68. End;
  69. Implementation
  70. uses
  71. aasmbase,
  72. aoptbase,
  73. aoptutils,
  74. cgutils,
  75. procinfo,
  76. paramgr,
  77. verbose;
  78. {$ifdef DEBUG_AOPTCPU}
  79. const
  80. SPeepholeOptimization: shortstring = 'Peephole Optimization: ';
  81. {$else DEBUG_AOPTCPU}
  82. { Empty strings help the optimizer to remove string concatenations that won't
  83. ever appear to the user on release builds. [Kit] }
  84. const
  85. SPeepholeOptimization = '';
  86. {$endif DEBUG_AOPTCPU}
  87. MAX_CSEL_INSTRUCTIONS = 8;
  88. MAX_CSEL_REGISTERS = 30;
  89. type
  90. TCSELTrackingState = (tsInvalid, tsSimple, tsDetour, tsBranching,
  91. tsDouble, tsDoubleBranchSame, tsDoubleBranchDifferent, tsDoubleSecondBranching,
  92. tsProcessed);
  93. { For OptPass2Jcc }
  94. TCSELTracking = object
  95. private
  96. CSELScore, ConstCount: LongInt;
  97. RegWrites: array[0..MAX_CSEL_INSTRUCTIONS*2 - 1] of TRegister;
  98. ConstRegs: array[0..MAX_CSEL_REGISTERS - 1] of TRegister;
  99. ConstVals: array[0..MAX_CSEL_REGISTERS - 1] of TCGInt;
  100. ConstSizes: array[0..MAX_CSEL_REGISTERS - 1] of TSubRegister; { May not match ConstRegs if one is shared over multiple CSELs. }
  101. ConstMovs: array[0..MAX_CSEL_REGISTERS - 1] of tai; { Location of initialisation instruction }
  102. ConstWriteSizes: array[0..first_int_imreg - 1] of TSubRegister; { Largest size of register written. }
  103. fOptimizer: TCpuAsmOptimizer;
  104. fLabel: TAsmSymbol;
  105. fInsertionPoint,
  106. fCondition,
  107. fInitialJump,
  108. fFirstMovBlock,
  109. fFirstMovBlockStop,
  110. fSecondJump,
  111. fThirdJump,
  112. fSecondMovBlock,
  113. fSecondMovBlockStop,
  114. fMidLabel,
  115. fEndLabel,
  116. fAllocationRange: tai;
  117. fState: TCSELTrackingState;
  118. function TryCSELConst(p, start, stop: tai; var Count: LongInt): Boolean;
  119. function InitialiseBlock(BlockStart, OneBeforeBlock: tai; out BlockStop: tai; out EndJump: tai): Boolean;
  120. function AnalyseMOVBlock(BlockStart, BlockStop, SearchStart: tai): LongInt;
  121. public
  122. RegisterTracking: TAllUsedRegs;
  123. constructor Init(Optimizer: TCpuAsmOptimizer; var p_initialjump, p_initialmov: tai; var AFirstLabel: TAsmLabel);
  124. destructor Done;
  125. procedure Process(out new_p: tai);
  126. property State: TCSELTrackingState read fState;
  127. end;
  128. PCSELTracking = ^TCSELTracking;
  129. function CanBeCond(p : tai) : boolean;
  130. begin
  131. result:=(p.typ=ait_instruction) and (taicpu(p).condition=C_None);
  132. end;
  133. function TCpuAsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  134. var
  135. p: taicpu;
  136. begin
  137. Result := false;
  138. if not ((assigned(hp)) and (hp.typ = ait_instruction)) then
  139. exit;
  140. p := taicpu(hp);
  141. case p.opcode of
  142. { These operations do not write into a register at all
  143. LDR/STR with post/pre-indexed operations do not need special treatment
  144. because post-/preindexed does not mean that a register
  145. is loaded with a new value, it is only modified }
  146. A_STR, A_CMP, A_CMN, A_TST, A_B, A_BL, A_MSR, A_FCMP:
  147. exit;
  148. else
  149. ;
  150. end;
  151. if p.ops=0 then
  152. exit;
  153. case p.oper[0]^.typ of
  154. top_reg:
  155. Result := SuperRegistersEqual(p.oper[0]^.reg,reg);
  156. top_ref:
  157. Result :=
  158. (taicpu(p).oper[0]^.ref^.addressmode in [AM_PREINDEXED,AM_POSTINDEXED]) and
  159. (taicpu(p).oper[0]^.ref^.base = reg);
  160. else
  161. ;
  162. end;
  163. end;
  164. function TCpuAsmOptimizer.InstructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean;
  165. var
  166. p: taicpu;
  167. i: longint;
  168. begin
  169. instructionLoadsFromReg := false;
  170. if not (assigned(hp) and (hp.typ = ait_instruction)) then
  171. exit;
  172. p:=taicpu(hp);
  173. i:=1;
  174. { Start on oper[0]? }
  175. if taicpu(hp).spilling_get_operation_type(0) in [operand_read, operand_readwrite] then
  176. i:=0;
  177. while(i<p.ops) do
  178. begin
  179. case p.oper[I]^.typ of
  180. top_reg:
  181. Result := (p.oper[I]^.reg = reg);
  182. top_ref:
  183. Result :=
  184. (p.oper[I]^.ref^.base = reg) or
  185. (p.oper[I]^.ref^.index = reg);
  186. else
  187. ;
  188. end;
  189. { Bailout if we found something }
  190. if Result then
  191. exit;
  192. Inc(I);
  193. end;
  194. end;
  195. {
  196. optimize
  197. ldr/str regX,[reg1]
  198. ...
  199. add/sub reg1,reg1,regY/const
  200. into
  201. ldr/str regX,[reg1], regY/const
  202. }
  203. function TCpuAsmOptimizer.LookForPostindexedPattern(var p: tai) : boolean;
  204. var
  205. hp1 : tai;
  206. begin
  207. Result:=false;
  208. if (taicpu(p).oper[1]^.typ = top_ref) and
  209. (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
  210. (taicpu(p).oper[1]^.ref^.index=NR_NO) and
  211. (taicpu(p).oper[1]^.ref^.offset=0) and
  212. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[1]^.ref^.base) and
  213. { we cannot check NR_DEFAULTFLAGS for modification yet so don't allow a condition }
  214. MatchInstruction(hp1, [A_ADD, A_SUB], [PF_None]) and
  215. (taicpu(hp1).oper[0]^.reg=taicpu(p).oper[1]^.ref^.base) and
  216. (taicpu(hp1).oper[1]^.reg=taicpu(p).oper[1]^.ref^.base) and
  217. (
  218. { valid offset? }
  219. (taicpu(hp1).oper[2]^.typ=top_const) and
  220. (taicpu(hp1).oper[2]^.val>=-256) and
  221. (abs(taicpu(hp1).oper[2]^.val)<256)
  222. ) and
  223. { don't apply the optimization if the base register is loaded }
  224. (getsupreg(taicpu(p).oper[0]^.reg)<>getsupreg(taicpu(p).oper[1]^.ref^.base)) and
  225. not(RegModifiedBetween(taicpu(hp1).oper[0]^.reg,p,hp1)) and
  226. not(RegModifiedBetween(taicpu(hp1).oper[2]^.reg,p,hp1)) then
  227. begin
  228. if taicpu(p).opcode = A_LDR then
  229. DebugMsg(SPeepholeOptimization + 'LdrAdd/Sub2Ldr Postindex done', p)
  230. else
  231. DebugMsg(SPeepholeOptimization + 'StrAdd/Sub2Str Postindex done', p);
  232. taicpu(p).oper[1]^.ref^.addressmode:=AM_POSTINDEXED;
  233. if taicpu(hp1).opcode=A_ADD then
  234. taicpu(p).oper[1]^.ref^.offset:=taicpu(hp1).oper[2]^.val
  235. else
  236. taicpu(p).oper[1]^.ref^.offset:=-taicpu(hp1).oper[2]^.val;
  237. asml.Remove(hp1);
  238. hp1.Free;
  239. Result:=true;
  240. end;
  241. end;
  242. function TCpuAsmOptimizer.RemoveSuperfluousFMov(const p: tai; movp: tai; const optimizer: string):boolean;
  243. var
  244. alloc,
  245. dealloc : tai_regalloc;
  246. hp1 : tai;
  247. begin
  248. Result:=false;
  249. if ((MatchInstruction(movp, A_FMOV, [taicpu(p).condition], [taicpu(p).oppostfix]) and
  250. ((getregtype(taicpu(movp).oper[0]^.reg)=R_MMREGISTER) { or (taicpu(p).opcode in [A_LDUR])})
  251. ) { or
  252. (((taicpu(p).oppostfix in [PF_F64F32,PF_F64S16,PF_F64S32,PF_F64U16,PF_F64U32]) or (getsubreg(taicpu(p).oper[0]^.reg)=R_SUBFD)) and MatchInstruction(movp, A_VMOV, [taicpu(p).condition], [PF_F64])) or
  253. (((taicpu(p).oppostfix in [PF_F32F64,PF_F32S16,PF_F32S32,PF_F32U16,PF_F32U32]) or (getsubreg(taicpu(p).oper[0]^.reg)=R_SUBFS)) and MatchInstruction(movp, A_VMOV, [taicpu(p).condition], [PF_F32])) }
  254. ) and
  255. (taicpu(movp).ops=2) and
  256. MatchOperand(taicpu(movp).oper[1]^, taicpu(p).oper[0]^.reg) and
  257. { the destination register of the mov might not be used beween p and movp }
  258. not(RegUsedBetween(taicpu(movp).oper[0]^.reg,p,movp)) and
  259. { Take care to only do this for instructions which REALLY load to the first register.
  260. Otherwise
  261. str reg0, [reg1]
  262. fmov reg2, reg0
  263. will be optimized to
  264. str reg2, [reg1]
  265. }
  266. RegLoadedWithNewValue(taicpu(p).oper[0]^.reg, p) then
  267. begin
  268. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(movp.Next));
  269. if assigned(dealloc) then
  270. begin
  271. DebugMsg(SPeepholeOptimization + optimizer+' removed superfluous vmov', movp);
  272. result:=true;
  273. { taicpu(p).oper[0]^.reg is not used anymore, try to find its allocation
  274. and remove it if possible }
  275. asml.Remove(dealloc);
  276. alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.previous));
  277. if assigned(alloc) then
  278. begin
  279. asml.Remove(alloc);
  280. alloc.free;
  281. dealloc.free;
  282. end
  283. else
  284. asml.InsertAfter(dealloc,p);
  285. { try to move the allocation of the target register }
  286. GetLastInstruction(movp,hp1);
  287. alloc:=FindRegAlloc(taicpu(movp).oper[0]^.reg,tai(hp1.Next));
  288. if assigned(alloc) then
  289. begin
  290. asml.Remove(alloc);
  291. asml.InsertBefore(alloc,p);
  292. { adjust used regs }
  293. IncludeRegInUsedRegs(taicpu(movp).oper[0]^.reg,UsedRegs);
  294. end;
  295. { change
  296. vldr reg0,[reg1]
  297. vmov reg2,reg0
  298. into
  299. ldr reg2,[reg1]
  300. if reg2 is an int register
  301. if (taicpu(p).opcode=A_VLDR) and (getregtype(taicpu(movp).oper[0]^.reg)=R_INTREGISTER) then
  302. taicpu(p).opcode:=A_LDR;
  303. }
  304. { finally get rid of the mov }
  305. taicpu(p).loadreg(0,taicpu(movp).oper[0]^.reg);
  306. asml.remove(movp);
  307. movp.free;
  308. end;
  309. end;
  310. end;
  311. function TCpuAsmOptimizer.OptPass1LDR(var p: tai): Boolean;
  312. var
  313. hp1: tai;
  314. begin
  315. Result := False;
  316. if inherited OptPass1LDR(p) or
  317. LookForPostindexedPattern(p) then
  318. Exit(True)
  319. else if (taicpu(p).oppostfix in [PF_B,PF_SB,PF_H,PF_SH,PF_None]) and
  320. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  321. RemoveSuperfluousMove(p, hp1, 'Ldr<Postfix>Mov2Ldr<Postfix>') then
  322. Exit(true);
  323. end;
  324. function TCpuAsmOptimizer.OptPass1STR(var p: tai): Boolean;
  325. begin
  326. Result := False;
  327. if inherited OptPass1STR(p) or
  328. LookForPostindexedPattern(p) then
  329. Exit(True);
  330. end;
  331. function TCpuAsmOptimizer.OptPass1Shift(var p : tai): boolean;
  332. var
  333. hp1,hp2: tai;
  334. I2, I: Integer;
  335. shifterop: tshifterop;
  336. begin
  337. Result:=false;
  338. { This folds shifterops into following instructions
  339. <shiftop> r0, r1, #imm
  340. <op> r2, r3, r0
  341. to
  342. <op> r2, r3, r1, <shiftop> #imm
  343. }
  344. { do not handle ROR yet, only part of the instructions below support ROR as shifter operand }
  345. if MatchInstruction(p,[A_LSL, A_LSR, A_ASR{, A_ROR}],[PF_None]) and
  346. MatchOpType(taicpu(p),top_reg,top_reg,top_const) and
  347. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  348. MatchInstruction(hp1, [A_ADD, A_AND, A_BIC, A_CMP, A_CMN,
  349. A_EON, A_EOR, A_NEG, A_ORN, A_ORR,
  350. A_SUB, A_TST], [PF_None]) and
  351. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) and
  352. (taicpu(hp1).ops >= 2) and
  353. { Currently we can't fold into another shifterop }
  354. (taicpu(hp1).oper[taicpu(hp1).ops-1]^.typ = top_reg) and
  355. { SP does not work completely with shifted registers, as I didn't find the exact rules,
  356. we do not operate on SP }
  357. (taicpu(hp1).oper[0]^.reg<>NR_SP) and
  358. (taicpu(hp1).oper[1]^.reg<>NR_SP) and
  359. (taicpu(hp1).oper[taicpu(hp1).ops-1]^.reg<>NR_SP) and
  360. { reg1 might not be modified inbetween }
  361. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) and
  362. (
  363. { Only ONE of the two src operands is allowed to match }
  364. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[taicpu(hp1).ops-2]^) xor
  365. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[taicpu(hp1).ops-1]^)
  366. ) and
  367. { for SUB, the last operand must match, there is no RSB on AArch64 }
  368. ((taicpu(hp1).opcode<>A_SUB) or
  369. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[taicpu(hp1).ops-1]^)) then
  370. begin
  371. { for the two operand instructions, start also at the second operand as they are not always commutative
  372. (depends on the flags tested laster on) and thus the operands cannot swapped }
  373. I2:=1;
  374. for I:=I2 to taicpu(hp1).ops-1 do
  375. if MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[I]^.reg) then
  376. begin
  377. { If the parameter matched on the second op from the RIGHT
  378. we have to switch the parameters, this will not happen for CMP
  379. were we're only evaluating the most right parameter
  380. }
  381. shifterop_reset(shifterop);
  382. case taicpu(p).opcode of
  383. A_LSL:
  384. shifterop.shiftmode:=SM_LSL;
  385. A_ROR:
  386. shifterop.shiftmode:=SM_ROR;
  387. A_LSR:
  388. shifterop.shiftmode:=SM_LSR;
  389. A_ASR:
  390. shifterop.shiftmode:=SM_ASR;
  391. else
  392. InternalError(2019090401);
  393. end;
  394. shifterop.shiftimm:=taicpu(p).oper[2]^.val;
  395. if I <> taicpu(hp1).ops-1 then
  396. begin
  397. if taicpu(hp1).ops = 3 then
  398. hp2:=taicpu.op_reg_reg_reg_shifterop(taicpu(hp1).opcode,
  399. taicpu(hp1).oper[0]^.reg, taicpu(hp1).oper[2]^.reg,
  400. taicpu(p).oper[1]^.reg, shifterop)
  401. else
  402. hp2:=taicpu.op_reg_reg_shifterop(taicpu(hp1).opcode,
  403. taicpu(hp1).oper[0]^.reg, taicpu(p).oper[1]^.reg,
  404. shifterop);
  405. end
  406. else
  407. if taicpu(hp1).ops = 3 then
  408. hp2:=taicpu.op_reg_reg_reg_shifterop(taicpu(hp1).opcode,
  409. taicpu(hp1).oper[0]^.reg, taicpu(hp1).oper[1]^.reg,
  410. taicpu(p).oper[1]^.reg,shifterop)
  411. else
  412. hp2:=taicpu.op_reg_reg_shifterop(taicpu(hp1).opcode,
  413. taicpu(hp1).oper[0]^.reg, taicpu(p).oper[1]^.reg,
  414. shifterop);
  415. { Make sure the register used in the shifting is tracked all
  416. the way through, otherwise it may become deallocated while
  417. it's still live and cause incorrect optimisations later }
  418. if (taicpu(hp1).oper[0]^.reg <> taicpu(p).oper[1]^.reg) then
  419. begin
  420. TransferUsedRegs(TmpUsedRegs);
  421. UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
  422. ALlocRegBetween(taicpu(p).oper[1]^.reg, p, hp1, TmpUsedRegs);
  423. end;
  424. taicpu(hp2).fileinfo:=taicpu(hp1).fileinfo;
  425. asml.insertbefore(hp2, hp1);
  426. RemoveInstruction(hp1);
  427. RemoveCurrentp(p);
  428. DebugMsg(SPeepholeOptimization + 'FoldShiftProcess done', hp2);
  429. Result:=true;
  430. break;
  431. end;
  432. end
  433. else if MatchInstruction(p,[A_LSL, A_LSR, A_ASR,A_ROR],[PF_None]) and
  434. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  435. RemoveSuperfluousMove(p, hp1, 'ShiftMov2Shift') then
  436. Result:=true;
  437. end;
  438. function TCpuAsmOptimizer.OptPass1Data(var p : tai): boolean;
  439. var
  440. hp1: tai;
  441. begin
  442. Result := GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  443. RemoveSuperfluousMove(p, hp1, 'DataMov2Data');
  444. end;
  445. function TCpuAsmOptimizer.OptPass1FData(var p: tai): Boolean;
  446. var
  447. hp1: tai;
  448. begin
  449. Result := GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  450. RemoveSuperfluousFMov(p, hp1, 'FOpFMov2FOp');
  451. end;
  452. function TCpuAsmOptimizer.OptPass1STP(var p : tai): boolean;
  453. var
  454. hp1, hp2, hp3, hp4: tai;
  455. begin
  456. Result:=false;
  457. {
  458. change
  459. stp x29,x30,[sp, #-16]!
  460. mov x29,sp
  461. bl abc
  462. ldp x29,x30,[sp], #16
  463. ret
  464. into
  465. b abc
  466. }
  467. if MatchInstruction(p, A_STP, [C_None], [PF_None]) and
  468. MatchOpType(taicpu(p),top_reg,top_reg,top_ref) and
  469. (taicpu(p).oper[0]^.reg = NR_X29) and
  470. (taicpu(p).oper[1]^.reg = NR_X30) and
  471. (taicpu(p).oper[2]^.ref^.base=NR_STACK_POINTER_REG) and
  472. (taicpu(p).oper[2]^.ref^.index=NR_NO) and
  473. (taicpu(p).oper[2]^.ref^.offset=-16) and
  474. (taicpu(p).oper[2]^.ref^.addressmode=AM_PREINDEXED) and
  475. GetNextInstruction(p, hp1) and
  476. MatchInstruction(hp1, A_MOV, [C_None], [PF_NONE]) and
  477. MatchOperand(taicpu(hp1).oper[0]^,taicpu(p).oper[0]^) and
  478. (taicpu(hp1).oper[1]^.typ = top_reg) and
  479. (taicpu(hp1).oper[1]^.reg = NR_STACK_POINTER_REG) and
  480. GetNextInstruction(hp1, hp2) and
  481. SkipEntryExitMarker(hp2, hp2) and
  482. MatchInstruction(hp2, A_BL, [C_None], [PF_NONE]) and
  483. (taicpu(hp2).oper[0]^.typ = top_ref) and
  484. GetNextInstruction(hp2, hp3) and
  485. SkipEntryExitMarker(hp3, hp3) and
  486. MatchInstruction(hp3, A_LDP, [C_None], [PF_NONE]) and
  487. MatchOpType(taicpu(hp3),top_reg,top_reg,top_ref) and
  488. (taicpu(hp3).oper[0]^.reg = NR_X29) and
  489. (taicpu(hp3).oper[1]^.reg = NR_X30) and
  490. (taicpu(hp3).oper[2]^.ref^.base=NR_STACK_POINTER_REG) and
  491. (taicpu(hp3).oper[2]^.ref^.index=NR_NO) and
  492. (taicpu(hp3).oper[2]^.ref^.offset=16) and
  493. (taicpu(hp3).oper[2]^.ref^.addressmode=AM_POSTINDEXED) and
  494. GetNextInstruction(hp3, hp4) and
  495. MatchInstruction(hp4, A_RET, [C_None], [PF_None]) and
  496. (taicpu(hp4).ops = 0) then
  497. begin
  498. asml.Remove(p);
  499. asml.Remove(hp1);
  500. asml.Remove(hp3);
  501. asml.Remove(hp4);
  502. taicpu(hp2).opcode:=A_B;
  503. p.free;
  504. hp1.free;
  505. hp3.free;
  506. hp4.free;
  507. p:=hp2;
  508. DebugMsg(SPeepholeOptimization + 'Bl2B done', p);
  509. Result:=true;
  510. end;
  511. end;
  512. function TCpuAsmOptimizer.OptPass1Mov(var p : tai): boolean;
  513. var
  514. hp1: tai;
  515. so: tshifterop;
  516. begin
  517. Result:=false;
  518. if MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) and
  519. (taicpu(p).oppostfix=PF_None) then
  520. begin
  521. RemoveCurrentP(p);
  522. DebugMsg(SPeepholeOptimization + 'Mov2None done', p);
  523. Result:=true;
  524. end
  525. else if (taicpu(p).ops=2) and
  526. (getsubreg(taicpu(p).oper[0]^.reg)=R_SUBD) and
  527. GetNextInstruction(p, hp1) and
  528. { Faster to get it out of the way than go through MatchInstruction }
  529. (hp1.typ=ait_instruction) and
  530. (taicpu(hp1).ops=3) and
  531. MatchInstruction(hp1,[A_ADD,A_SUB],[taicpu(p).condition], [PF_None,PF_S]) and
  532. (getsubreg(taicpu(hp1).oper[2]^.reg)=R_SUBQ) and
  533. (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[2]^.reg)) and
  534. RegEndOfLife(taicpu(hp1).oper[2]^.reg,taicpu(hp1)) then
  535. begin
  536. DebugMsg(SPeepholeOptimization + 'MovOp2AddUtxw 1 done', p);
  537. shifterop_reset(so);
  538. so.shiftmode:=SM_UXTW;
  539. taicpu(hp1).ops:=4;
  540. taicpu(hp1).loadreg(2,taicpu(p).oper[1]^.reg);
  541. taicpu(hp1).loadshifterop(3,so);
  542. RemoveCurrentP(p);
  543. Result:=true;
  544. exit;
  545. end
  546. {
  547. optimize
  548. mov rX, yyyy
  549. ....
  550. }
  551. else if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) then
  552. begin
  553. if RemoveSuperfluousMove(p, hp1, 'MovMov2Mov') then
  554. Result:=true
  555. else if (taicpu(p).ops = 2) and
  556. (tai(hp1).typ = ait_instruction) and
  557. RedundantMovProcess(p,hp1) then
  558. Result:=true
  559. end;
  560. end;
  561. function TCpuAsmOptimizer.OptPass1MOVZ(var p: tai): boolean;
  562. var
  563. hp1: tai;
  564. ZeroReg: TRegister;
  565. begin
  566. Result := False;
  567. hp1 := nil;
  568. if (taicpu(p).oppostfix = PF_None) and (taicpu(p).condition = C_None) then
  569. begin
  570. if
  571. { Check next instruction first so hp1 gets set to something, then
  572. if it remains nil, we know for sure that there's no valid next
  573. instruction. }
  574. not GetNextInstruction(p, hp1) or
  575. { MOVZ and MOVK/MOVN instructions undergo macro-fusion. }
  576. not MatchInstruction(hp1, [A_MOVK, A_MOVN], [C_None], [PF_None]) or
  577. (taicpu(hp1).oper[0]^.reg <> taicpu(p).oper[0]^.reg) then
  578. begin
  579. if (taicpu(p).oper[1]^.val = 0) then
  580. begin
  581. { Change;
  582. movz reg,#0
  583. (no movk or movn)
  584. To:
  585. mov reg,xzr (or wzr)
  586. Easier to perform other optimisations with registers
  587. }
  588. DebugMsg(SPeepholeOptimization + 'Movz0ToMovZeroReg', p);
  589. { Make sure the zero register is the correct size }
  590. ZeroReg := taicpu(p).oper[0]^.reg;
  591. setsupreg(ZeroReg, RS_XZR);
  592. taicpu(p).opcode := A_MOV;
  593. taicpu(p).loadreg(1, ZeroReg);
  594. Result := True;
  595. Exit;
  596. end;
  597. end;
  598. {
  599. remove the second Movz from
  600. movz reg,...
  601. movz reg,...
  602. }
  603. if GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  604. MatchInstruction(hp1,A_MOVZ,[C_None],[PF_none]) and
  605. MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[0]^) then
  606. begin
  607. DebugMsg(SPeepholeOptimization + 'MovzMovz2Movz', p);
  608. RemoveCurrentP(p);
  609. Result:=true;
  610. exit;
  611. end;
  612. end;
  613. end;
  614. function TCpuAsmOptimizer.OptPass1FMov(var p: tai): Boolean;
  615. var
  616. hp1: tai;
  617. alloc, dealloc: tai_regalloc;
  618. begin
  619. {
  620. change
  621. fmov reg0,reg1
  622. fmov reg1,reg0
  623. into
  624. fmov reg0,reg1
  625. }
  626. Result := False;
  627. while GetNextInstruction(p, hp1) and
  628. MatchInstruction(hp1, A_FMOV, [taicpu(p).condition], [taicpu(p).oppostfix]) and
  629. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[1]^) and
  630. MatchOperand(taicpu(p).oper[1]^, taicpu(hp1).oper[0]^) do
  631. begin
  632. asml.Remove(hp1);
  633. hp1.free;
  634. DebugMsg(SPeepholeOptimization + 'FMovFMov2FMov 1 done', p);
  635. Result:=true;
  636. end;
  637. { change
  638. fmov reg0,const
  639. fmov reg1,reg0
  640. dealloc reg0
  641. into
  642. fmov reg1,const
  643. }
  644. if MatchOpType(taicpu(p),top_reg,top_realconst) and
  645. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  646. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p, hp1)) and
  647. MatchInstruction(hp1,A_FMOV,[taicpu(p).condition],[taicpu(p).oppostfix]) and
  648. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  649. MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^.reg) and
  650. (not RegModifiedByInstruction(taicpu(p).oper[0]^.reg, hp1)) and
  651. assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next)))
  652. then
  653. begin
  654. DebugMsg('Peephole FMovFMov2FMov 2 done', p);
  655. taicpu(hp1).loadrealconst(1,taicpu(p).oper[1]^.val_real);
  656. alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.Previous));
  657. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next));
  658. if assigned(alloc) and assigned(dealloc) then
  659. begin
  660. asml.Remove(alloc);
  661. alloc.Free;
  662. asml.Remove(dealloc);
  663. dealloc.Free;
  664. end;
  665. { p will be removed, update used register as we continue
  666. with the next instruction after p }
  667. result:=RemoveCurrentP(p);
  668. end;
  669. { not enabled as apparently not happening
  670. if MatchOpType(taicpu(p),top_reg,top_reg) and
  671. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  672. MatchInstruction(hp1, [A_FSUB,A_FADD,A_FNEG,A_FMUL,A_FSQRT,A_FDIV,A_FABS], [PF_None]) and
  673. (MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) or
  674. ((taicpu(hp1).ops=3) and MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[2]^))
  675. ) and
  676. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  677. not(RegUsedBetween(taicpu(p).oper[0]^.reg,p,hp1)) then
  678. begin
  679. DebugMsg(SPeepholeOptimization + 'FMovFOp2FOp done', hp1);
  680. AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,UsedRegs);
  681. if MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) then
  682. taicpu(hp1).oper[1]^.reg:=taicpu(p).oper[1]^.reg;
  683. if (taicpu(hp1).ops=3) and MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[2]^) then
  684. taicpu(hp1).oper[2]^.reg:=taicpu(p).oper[1]^.reg;
  685. RemoveCurrentP(p);
  686. Result:=true;
  687. exit;
  688. end;
  689. }
  690. end;
  691. function TCpuAsmOptimizer.OptPass1SXTW(var p : tai) : Boolean;
  692. var
  693. hp1: tai;
  694. GetNextInstructionUsingReg_hp1: Boolean;
  695. begin
  696. Result:=false;
  697. if GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) then
  698. begin
  699. {
  700. change
  701. sxtw reg2,reg1
  702. str reg2,[...]
  703. dealloc reg2
  704. to
  705. str reg1,[...]
  706. }
  707. if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
  708. (taicpu(p).ops=2) and
  709. MatchInstruction(hp1, A_STR, [C_None], [PF_None]) and
  710. (getsubreg(taicpu(hp1).oper[0]^.reg)=R_SUBD) and
  711. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  712. { the reference in strb might not use reg2 }
  713. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  714. { reg1 might not be modified inbetween }
  715. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  716. begin
  717. DebugMsg('Peephole SXTHStr2Str done', p);
  718. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  719. result:=RemoveCurrentP(p);
  720. end
  721. {
  722. change
  723. sxtw reg2,reg1
  724. sxtw reg3,reg2
  725. dealloc reg2
  726. to
  727. sxtw reg3,reg1
  728. }
  729. else if MatchInstruction(p, A_SXTW, [C_None], [PF_None]) and
  730. (taicpu(p).ops=2) and
  731. MatchInstruction(hp1, A_SXTW, [C_None], [PF_None]) and
  732. (taicpu(hp1).ops=2) and
  733. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  734. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  735. { reg1 might not be modified inbetween }
  736. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  737. begin
  738. DebugMsg('Peephole SxtwSxtw2Sxtw done', p);
  739. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  740. taicpu(hp1).opcode:=A_SXTW;
  741. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  742. result:=RemoveCurrentP(p);
  743. end
  744. else if USxtOp2Op(p,hp1,SM_SXTW) then
  745. Result:=true
  746. else if RemoveSuperfluousMove(p, hp1, 'SxtwMov2Data') then
  747. Result:=true;
  748. end;
  749. end;
  750. function TCpuAsmOptimizer.OptPass1B(var p: tai): boolean;
  751. var
  752. hp1, hp2, hp3, hp4, hp5: tai;
  753. Invert: Boolean;
  754. begin
  755. Result := False;
  756. {
  757. convert
  758. b<c> .L1
  759. movz reg,#1`
  760. b .L2
  761. .L1
  762. movz reg,#0 (or mov reg,xzr)
  763. .L2
  764. into
  765. cset reg,<not(c)>
  766. Also do the same if the constants are reversed, instead converting it to:
  767. cset reg,<c>
  768. }
  769. if (taicpu(p).condition <> C_None) and
  770. (taicpu(p).oper[0]^.typ = top_ref) and
  771. GetNextInstruction(p, hp1) and
  772. { Check individually instead of using MatchInstruction in order to save time }
  773. (hp1.typ = ait_instruction) and
  774. (taicpu(hp1).condition = C_None) and
  775. (taicpu(hp1).oppostfix = PF_None) and
  776. (taicpu(hp1).ops = 2) and
  777. (
  778. (
  779. (taicpu(hp1).opcode = A_MOVZ) and
  780. (taicpu(hp1).oper[1]^.val in [0, 1])
  781. ) or
  782. (
  783. (taicpu(hp1).opcode = A_MOV) and
  784. (getsupreg(taicpu(hp1).oper[1]^.reg) = RS_XZR)
  785. )
  786. ) and
  787. GetNextInstruction(hp1, hp2) and
  788. MatchInstruction(hp2, A_B, [PF_None]) and
  789. (taicpu(hp2).condition = C_None) and
  790. (taicpu(hp2).oper[0]^.typ = top_ref) and
  791. GetNextInstruction(hp2, hp3) and
  792. (hp3.typ = ait_label) and
  793. (tasmlabel(taicpu(p).oper[0]^.ref^.symbol) = tai_label(hp3).labsym) and
  794. GetNextInstruction(hp3, hp4) and
  795. { As before, check individually instead of using MatchInstruction in order to save time }
  796. (hp4.typ = ait_instruction) and
  797. (taicpu(hp4).condition = C_None) and
  798. (taicpu(hp4).oppostfix = PF_None) and
  799. (taicpu(hp4).ops = 2) and
  800. (taicpu(hp4).oper[0]^.reg = taicpu(hp1).oper[0]^.reg) and
  801. (
  802. (
  803. (taicpu(hp4).opcode = A_MOVZ) and
  804. (
  805. (
  806. { Check to confirm the following:
  807. - First mov is either "movz reg,#0" or "mov reg,xzr"
  808. - Second mov is "movz reg,#1"
  809. }
  810. (
  811. (taicpu(hp1).oper[1]^.typ = top_reg) { Will be the zero register } or
  812. (taicpu(hp1).oper[1]^.val = 0)
  813. ) and
  814. (taicpu(hp4).oper[1]^.val = 1)
  815. ) or
  816. (
  817. { Check to confirm the following:
  818. - First mov is "movz reg,#1"
  819. - Second mov is "movz reg,#0"
  820. }
  821. MatchOperand(taicpu(hp1).oper[1]^, 1) and
  822. (taicpu(hp4).oper[1]^.val = 0)
  823. )
  824. )
  825. ) or
  826. (
  827. { Check to confirm the following:
  828. - First mov is "movz reg,#1"
  829. - Second mov is "mov reg,xzr"
  830. }
  831. (taicpu(hp4).opcode = A_MOV) and
  832. (getsupreg(taicpu(hp4).oper[1]^.reg) = RS_XZR) and
  833. MatchOperand(taicpu(hp1).oper[1]^, 1)
  834. )
  835. ) and
  836. GetNextInstruction(hp4, hp5) and
  837. (hp5.typ = ait_label) and
  838. (tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol) = tai_label(hp5).labsym) then
  839. begin
  840. Invert := MatchOperand(taicpu(hp1).oper[1]^, 1); { if true, hp4 will be mov reg,0 in some form }
  841. if Invert then
  842. taicpu(p).condition := inverse_cond(taicpu(p).condition);
  843. tai_label(hp3).labsym.DecRefs;
  844. { If this isn't the only reference to the middle label, we can
  845. still make a saving - only that the first jump and everything
  846. that follows will remain. }
  847. if (tai_label(hp3).labsym.getrefs = 0) then
  848. begin
  849. if Invert then
  850. DebugMsg(SPeepholeOptimization + 'B(c)Movz1BMovz0 -> Cset(~c)',p)
  851. else
  852. DebugMsg(SPeepholeOptimization + 'B(c)Movz0bMovZ1 -> Cset(c)',p);
  853. { remove jump, first label and second MOV (also catching any aligns) }
  854. repeat
  855. if not GetNextInstruction(hp2, hp3) then
  856. InternalError(2022070801);
  857. RemoveInstruction(hp2);
  858. hp2 := hp3;
  859. until hp2 = hp5;
  860. { Don't decrement reference count before the removal loop
  861. above, otherwise GetNextInstruction won't stop on the
  862. the label }
  863. tai_label(hp5).labsym.DecRefs;
  864. end
  865. else
  866. begin
  867. if Invert then
  868. DebugMsg(SPeepholeOptimization + 'B(c)Movz1BMovz0 -> Cset(~c) (partial)',p)
  869. else
  870. DebugMsg(SPeepholeOptimization + 'B(c)Movz0BMovz1 -> Cset(c) (partial)',p);
  871. end;
  872. taicpu(hp1).opcode := A_CSET;
  873. taicpu(hp1).loadconditioncode(1, taicpu(p).condition);
  874. RemoveCurrentP(p, hp1);
  875. Result:=true;
  876. exit;
  877. end;
  878. end;
  879. function TCpuAsmOptimizer.OptPass2B(var p: tai): Boolean;
  880. var
  881. hp1: tai;
  882. CSELTracking: PCSELTracking;
  883. begin
  884. Result := False;
  885. if (taicpu(p).condition <> C_None) and
  886. IsJumpToLabel(taicpu(p)) and
  887. GetNextInstruction(p, hp1) and
  888. (hp1.typ = ait_instruction) and
  889. (taicpu(hp1).opcode = A_MOV) then
  890. begin
  891. { check for
  892. jCC xxx
  893. <several movs>
  894. xxx:
  895. Also spot:
  896. Jcc xxx
  897. <several movs>
  898. jmp xxx
  899. Change to:
  900. <several csets with inverted condition>
  901. jmp xxx (only for the 2nd case)
  902. }
  903. CSELTracking := New(PCSELTracking, Init(Self, p, hp1, TAsmLabel(JumpTargetOp(taicpu(p))^.ref^.symbol)));
  904. if CSELTracking^.State <> tsInvalid then
  905. begin
  906. CSELTracking^.Process(p);
  907. Result := True;
  908. end;
  909. CSELTracking^.Done;
  910. end;
  911. end;
  912. function TCpuAsmOptimizer.OptPass2CSEL(var p: tai): Boolean;
  913. begin
  914. Result := False;
  915. { Csel r0,r1,r1,cond -> mov r0,r1 }
  916. if (taicpu(p).oper[1]^.reg = taicpu(p).oper[2]^.reg) then
  917. begin
  918. DebugMsg(SPeepholeOptimization + 'CSel2Mov (identical true/false registers)', p);
  919. taicpu(p).opcode := A_MOV;
  920. taicpu(p).ops := 2;
  921. Result := True;
  922. Exit;
  923. end;
  924. end;
  925. function TCpuAsmOptimizer.OptPass2LDRSTR(var p: tai): boolean;
  926. var
  927. hp1, hp1_last: tai;
  928. ThisRegister: TRegister;
  929. OffsetVal, ValidOffset, MinOffset, MaxOffset: asizeint;
  930. TargetOpcode: TAsmOp;
  931. begin
  932. Result := False;
  933. ThisRegister := taicpu(p).oper[0]^.reg;
  934. case taicpu(p).opcode of
  935. A_LDR:
  936. TargetOpcode := A_LDP;
  937. A_STR:
  938. TargetOpcode := A_STP;
  939. else
  940. InternalError(2020081501);
  941. end;
  942. { reg appearing in ref invalidates these optimisations }
  943. if (TargetOpcode = A_STP) or not RegInRef(ThisRegister, taicpu(p).oper[1]^.ref^) then
  944. begin
  945. { LDP/STP has a smaller permitted offset range than LDR/STR.
  946. TODO: For a group of out-of-range LDR/STR instructions, can
  947. we declare a temporary register equal to the offset base
  948. address, modify the STR instructions to use that register
  949. and then convert them to STP instructions? Note that STR
  950. generally takes 2 cycles (on top of the memory latency),
  951. while LDP/STP takes 3.
  952. }
  953. if (getsubreg(ThisRegister) = R_SUBQ) then
  954. begin
  955. ValidOffset := 8;
  956. MinOffset := -512;
  957. MaxOffset := 504;
  958. end
  959. else
  960. begin
  961. ValidOffset := 4;
  962. MinOffset := -256;
  963. MaxOffset := 252;
  964. end;
  965. hp1_last := p;
  966. { Look for nearby LDR/STR instructions }
  967. if (taicpu(p).oppostfix = PF_NONE) and
  968. (taicpu(p).oper[1]^.ref^.addressmode = AM_OFFSET) then
  969. { If SkipGetNext is True, GextNextInstruction isn't called }
  970. while GetNextInstruction(hp1_last, hp1) do
  971. begin
  972. if (hp1.typ <> ait_instruction) then
  973. Break;
  974. if (taicpu(hp1).opcode = taicpu(p).opcode) then
  975. begin
  976. if (taicpu(hp1).oppostfix = PF_NONE) and
  977. { Registers need to be the same size }
  978. (getsubreg(ThisRegister) = getsubreg(taicpu(hp1).oper[0]^.reg)) and
  979. (
  980. (TargetOpcode = A_STP) or
  981. { LDP x0, x0, [sp, #imm] is undefined behaviour, even
  982. though such an LDR pair should have been optimised
  983. out by now. STP is okay }
  984. (ThisRegister <> taicpu(hp1).oper[0]^.reg)
  985. ) and
  986. (taicpu(hp1).oper[1]^.ref^.addressmode = AM_OFFSET) and
  987. (taicpu(p).oper[1]^.ref^.base = taicpu(hp1).oper[1]^.ref^.base) and
  988. (taicpu(p).oper[1]^.ref^.index = taicpu(hp1).oper[1]^.ref^.index) and
  989. { Make sure the address registers haven't changed }
  990. not RegModifiedBetween(taicpu(hp1).oper[1]^.ref^.base, p, hp1) and
  991. (
  992. (taicpu(hp1).oper[1]^.ref^.index = NR_NO) or
  993. not RegModifiedBetween(taicpu(hp1).oper[1]^.ref^.index, p, hp1)
  994. ) and
  995. { Don't need to check "RegInRef" because the base registers are identical,
  996. and the first one was checked already. [Kit] }
  997. (((TargetOpcode=A_LDP) and not RegUsedBetween(taicpu(hp1).oper[0]^.reg, p, hp1)) or
  998. ((TargetOpcode=A_STP) and not RegModifiedBetween(taicpu(hp1).oper[0]^.reg, p, hp1))) then
  999. begin
  1000. { Can we convert these two LDR/STR instructions into a
  1001. single LDR/STP? }
  1002. OffsetVal := taicpu(hp1).oper[1]^.ref^.offset - taicpu(p).oper[1]^.ref^.offset;
  1003. if (OffsetVal = ValidOffset) then
  1004. begin
  1005. if (taicpu(p).oper[1]^.ref^.offset >= MinOffset) and (taicpu(hp1).oper[1]^.ref^.offset <= MaxOffset) then
  1006. begin
  1007. { Convert:
  1008. LDR/STR reg0, [reg2, #ofs]
  1009. ...
  1010. LDR/STR reg1. [reg2, #ofs + 8] // 4 if registers are 32-bit
  1011. To:
  1012. LDP/STP reg0, reg1, [reg2, #ofs]
  1013. }
  1014. taicpu(p).opcode := TargetOpcode;
  1015. if TargetOpcode = A_STP then
  1016. DebugMsg(SPeepholeOptimization + 'StrStr2Stp', p)
  1017. else
  1018. DebugMsg(SPeepholeOptimization + 'LdrLdr2Ldp', p);
  1019. taicpu(p).ops := 3;
  1020. taicpu(p).loadref(2, taicpu(p).oper[1]^.ref^);
  1021. taicpu(p).loadreg(1, taicpu(hp1).oper[0]^.reg);
  1022. asml.Remove(hp1);
  1023. hp1.Free;
  1024. Result := True;
  1025. Exit;
  1026. end;
  1027. end
  1028. else if (OffsetVal = -ValidOffset) then
  1029. begin
  1030. if (taicpu(hp1).oper[1]^.ref^.offset >= MinOffset) and (taicpu(p).oper[1]^.ref^.offset <= MaxOffset) then
  1031. begin
  1032. { Convert:
  1033. LDR/STR reg0, [reg2, #ofs + 8] // 4 if registers are 32-bit
  1034. ...
  1035. LDR/STR reg1. [reg2, #ofs]
  1036. To:
  1037. LDP/STP reg1, reg0, [reg2, #ofs]
  1038. }
  1039. taicpu(p).opcode := TargetOpcode;
  1040. if TargetOpcode = A_STP then
  1041. DebugMsg(SPeepholeOptimization + 'StrStr2Stp (reverse)', p)
  1042. else
  1043. DebugMsg(SPeepholeOptimization + 'LdrLdr2Ldp (reverse)', p);
  1044. taicpu(p).ops := 3;
  1045. taicpu(p).loadref(2, taicpu(hp1).oper[1]^.ref^);
  1046. taicpu(p).loadreg(1, taicpu(p).oper[0]^.reg);
  1047. taicpu(p).loadreg(0, taicpu(hp1).oper[0]^.reg);
  1048. asml.Remove(hp1);
  1049. hp1.Free;
  1050. Result := True;
  1051. Exit;
  1052. end;
  1053. end;
  1054. end;
  1055. end
  1056. else
  1057. Break;
  1058. { Don't continue looking for LDR/STR pairs if the address register
  1059. gets modified }
  1060. if RegModifiedByInstruction(taicpu(p).oper[1]^.ref^.base, hp1) then
  1061. Break;
  1062. hp1_last := hp1;
  1063. end;
  1064. end;
  1065. end;
  1066. function TCpuAsmOptimizer.OptPass2MOV(var p: tai): Boolean;
  1067. var
  1068. hp1: tai;
  1069. X: Integer;
  1070. begin
  1071. Result := False;
  1072. { Merge MOV and CSEL instructions left behind by OptPass2B - that is,
  1073. change:
  1074. mov r0,r1
  1075. csel r0,r2,r0,cond
  1076. To:
  1077. csel r0,r2,r1,cond
  1078. (Also if r0 is the second operand)
  1079. }
  1080. if (taicpu(p).oper[1]^.typ = top_reg) and
  1081. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  1082. (hp1.typ = ait_instruction) and
  1083. (taicpu(hp1).opcode = A_CSEL) and
  1084. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[0]^.reg) and
  1085. not RegModifiedBetween(taicpu(p).oper[1]^.reg, p, hp1) then
  1086. begin
  1087. { Use "Result" to note if a change was made so we only have to do
  1088. expensive register allocation once }
  1089. for X := 1 to 2 do
  1090. if (taicpu(hp1).oper[X]^.reg = taicpu(p).oper[0]^.reg) then
  1091. begin
  1092. taicpu(hp1).oper[X]^.reg := taicpu(p).oper[1]^.reg;
  1093. Result := True;
  1094. end;
  1095. if Result then
  1096. begin
  1097. DebugMSg(SPeepholeOptimization + 'MovCSel2CSel', p);
  1098. { Don't need to allocate the zero register - so save time by
  1099. skipping it in this case }
  1100. if getsupreg(taicpu(p).oper[1]^.reg) <> RS_XZR then
  1101. AllocRegBetween(taicpu(p).oper[1]^.reg, p, hp1, UsedRegs);
  1102. RemoveCurrentP(p);
  1103. Exit;
  1104. end;
  1105. end;
  1106. end;
  1107. function TCpuAsmOptimizer.PostPeepholeOptAND(var p: tai): Boolean;
  1108. var
  1109. hp1, hp2: tai;
  1110. hp3: taicpu;
  1111. bitval : cardinal;
  1112. begin
  1113. Result:=false;
  1114. {
  1115. and reg1,reg0,<const=power of 2>
  1116. cmp reg1,#0
  1117. <reg1 end of life>
  1118. b.e/b.ne label
  1119. into
  1120. tb(n)z reg0,<power of 2>,label
  1121. }
  1122. if MatchOpType(taicpu(p),top_reg,top_reg,top_const) and
  1123. (PopCnt(QWord(taicpu(p).oper[2]^.val))=1) and
  1124. GetNextInstruction(p,hp1) and
  1125. MatchInstruction(hp1,A_CMP,[PF_None]) and
  1126. MatchOpType(taicpu(hp1),top_reg,top_const) and
  1127. (taicpu(hp1).oper[1]^.val=0) and
  1128. MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[0]^) and
  1129. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) and
  1130. GetNextInstruction(hp1,hp2) and
  1131. MatchInstruction(hp2,A_B,[PF_None]) and
  1132. (taicpu(hp2).condition in [C_EQ,C_NE]) then
  1133. begin
  1134. bitval:=BsfQWord(qword(taicpu(p).oper[2]^.val));
  1135. case taicpu(hp2).condition of
  1136. C_NE:
  1137. hp3:=taicpu.op_reg_const_ref(A_TBNZ,taicpu(p).oper[1]^.reg,bitval,taicpu(hp2).oper[0]^.ref^);
  1138. C_EQ:
  1139. hp3:=taicpu.op_reg_const_ref(A_TBZ,taicpu(p).oper[1]^.reg,bitval,taicpu(hp2).oper[0]^.ref^);
  1140. else
  1141. Internalerror(2021100201);
  1142. end;
  1143. taicpu(hp3).fileinfo:=taicpu(hp1).fileinfo;
  1144. asml.insertbefore(hp3, hp1);
  1145. RemoveInstruction(hp1);
  1146. RemoveInstruction(hp2);
  1147. RemoveCurrentP(p);
  1148. DebugMsg(SPeepholeOptimization + 'AndCmpB.E/NE2Tbnz/Tbz done', p);
  1149. Result:=true;
  1150. end;
  1151. end;
  1152. function TCpuAsmOptimizer.PostPeepholeOptCMP(var p : tai): boolean;
  1153. var
  1154. hp1,hp2: tai;
  1155. begin
  1156. Result:=false;
  1157. {
  1158. cmp reg0,#0
  1159. b.e/b.ne label
  1160. into
  1161. cb(n)z reg0,label
  1162. }
  1163. if MatchOpType(taicpu(p),top_reg,top_const) and
  1164. (taicpu(p).oper[0]^.reg<>NR_SP) and
  1165. (taicpu(p).oper[1]^.val=0) and
  1166. GetNextInstruction(p,hp1) and
  1167. MatchInstruction(hp1,A_B,[PF_None]) and
  1168. (taicpu(hp1).condition in [C_EQ,C_NE]) then
  1169. begin
  1170. case taicpu(hp1).condition of
  1171. C_NE:
  1172. hp2:=taicpu.op_reg_sym_ofs(A_CBNZ,taicpu(p).oper[0]^.reg,taicpu(hp1).oper[0]^.ref^.symbol,taicpu(hp1).oper[0]^.ref^.offset);
  1173. C_EQ:
  1174. hp2:=taicpu.op_reg_sym_ofs(A_CBZ,taicpu(p).oper[0]^.reg,taicpu(hp1).oper[0]^.ref^.symbol,taicpu(hp1).oper[0]^.ref^.offset);
  1175. else
  1176. Internalerror(2019090801);
  1177. end;
  1178. taicpu(hp2).fileinfo:=taicpu(hp1).fileinfo;
  1179. asml.insertbefore(hp2, hp1);
  1180. asml.remove(p);
  1181. asml.remove(hp1);
  1182. p.free;
  1183. hp1.free;
  1184. p:=hp2;
  1185. DebugMsg(SPeepholeOptimization + 'CMPB.E/NE2CBNZ/CBZ done', p);
  1186. Result:=true;
  1187. end;
  1188. end;
  1189. function TCpuAsmOptimizer.PostPeepholeOptTST(var p : tai): boolean;
  1190. var
  1191. hp1: tai;
  1192. hp3: taicpu;
  1193. bitval : cardinal;
  1194. begin
  1195. Result:=false;
  1196. {
  1197. tst reg1,<const=power of 2>
  1198. b.e/b.ne label
  1199. into
  1200. tb(n)z reg0,<power of 2>,label
  1201. }
  1202. if MatchOpType(taicpu(p),top_reg,top_const) and
  1203. (PopCnt(QWord(taicpu(p).oper[1]^.val))=1) and
  1204. GetNextInstruction(p,hp1) and
  1205. MatchInstruction(hp1,A_B,[C_EQ,C_NE],[PF_None]) then
  1206. begin
  1207. bitval:=BsfQWord(qword(taicpu(p).oper[1]^.val));
  1208. case taicpu(hp1).condition of
  1209. C_NE:
  1210. hp3:=taicpu.op_reg_const_ref(A_TBNZ,taicpu(p).oper[0]^.reg,bitval,taicpu(hp1).oper[0]^.ref^);
  1211. C_EQ:
  1212. hp3:=taicpu.op_reg_const_ref(A_TBZ,taicpu(p).oper[0]^.reg,bitval,taicpu(hp1).oper[0]^.ref^);
  1213. else
  1214. Internalerror(2021100210);
  1215. end;
  1216. taicpu(hp3).fileinfo:=taicpu(p).fileinfo;
  1217. asml.insertafter(hp3, p);
  1218. RemoveInstruction(hp1);
  1219. RemoveCurrentP(p, hp3);
  1220. DebugMsg(SPeepholeOptimization + 'TST; B(E/NE) -> TB(Z/NZ) done', p);
  1221. Result:=true;
  1222. end;
  1223. end;
  1224. function TCpuAsmOptimizer.PrePeepHoleOptsCpu(var p: tai): boolean;
  1225. begin
  1226. result := false;
  1227. if p.typ=ait_instruction then
  1228. begin
  1229. case taicpu(p).opcode of
  1230. A_SBFX,
  1231. A_UBFX:
  1232. Result:=OptPreSBFXUBFX(p);
  1233. else
  1234. ;
  1235. end;
  1236. end;
  1237. end;
  1238. function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
  1239. begin
  1240. result := false;
  1241. if p.typ=ait_instruction then
  1242. begin
  1243. case taicpu(p).opcode of
  1244. A_B:
  1245. Result:=OptPass1B(p);
  1246. A_LDR:
  1247. Result:=OptPass1LDR(p);
  1248. A_STR:
  1249. Result:=OptPass1STR(p);
  1250. A_MOV:
  1251. Result:=OptPass1Mov(p);
  1252. A_MOVZ:
  1253. Result:=OptPass1MOVZ(p);
  1254. A_STP:
  1255. Result:=OptPass1STP(p);
  1256. A_LSR,
  1257. A_ROR,
  1258. A_ASR,
  1259. A_LSL:
  1260. Result:=OptPass1Shift(p);
  1261. A_AND:
  1262. Result:=OptPass1And(p);
  1263. A_NEG,
  1264. A_CSEL,
  1265. A_ADD,
  1266. A_ADC,
  1267. A_SUB,
  1268. A_SBC,
  1269. A_BIC,
  1270. A_EOR,
  1271. A_ORR,
  1272. A_MUL:
  1273. Result:=OptPass1Data(p);
  1274. A_UXTB:
  1275. Result:=OptPass1UXTB(p);
  1276. A_UXTH:
  1277. Result:=OptPass1UXTH(p);
  1278. A_SXTB:
  1279. Result:=OptPass1SXTB(p);
  1280. A_SXTH:
  1281. Result:=OptPass1SXTH(p);
  1282. A_SXTW:
  1283. Result:=OptPass1SXTW(p);
  1284. // A_VLDR,
  1285. A_FMADD,
  1286. A_FMSUB,
  1287. A_FNMADD,
  1288. A_FNMSUB,
  1289. A_FNMUL,
  1290. A_FADD,
  1291. A_FMUL,
  1292. A_FDIV,
  1293. A_FSUB,
  1294. A_FSQRT,
  1295. A_FNEG,
  1296. A_FCVT,
  1297. A_FABS:
  1298. Result:=OptPass1FData(p);
  1299. A_FMOV:
  1300. Result:=OptPass1FMov(p);
  1301. else
  1302. ;
  1303. end;
  1304. end;
  1305. end;
  1306. function TCpuAsmOptimizer.PeepHoleOptPass2Cpu(var p: tai): boolean;
  1307. begin
  1308. result := false;
  1309. if p.typ=ait_instruction then
  1310. begin
  1311. case taicpu(p).opcode of
  1312. A_AND:
  1313. Result := OptPass2AND(p);
  1314. A_B:
  1315. Result := OptPass2B(p);
  1316. A_CSEL:
  1317. Result := OptPass2CSEL(p);
  1318. A_MOV:
  1319. Result := OptPass2MOV(p);
  1320. A_LDR,
  1321. A_STR:
  1322. Result := OptPass2LDRSTR(p);
  1323. A_TST:
  1324. Result := OptPass2TST(p);
  1325. else
  1326. ;
  1327. end;
  1328. end;
  1329. end;
  1330. function TCpuAsmOptimizer.PostPeepHoleOptsCpu(var p: tai): boolean;
  1331. begin
  1332. result := false;
  1333. if p.typ=ait_instruction then
  1334. begin
  1335. case taicpu(p).opcode of
  1336. A_CMP:
  1337. Result:=PostPeepholeOptCMP(p);
  1338. A_AND:
  1339. Result:=PostPeepholeOptAND(p);
  1340. A_TST:
  1341. Result:=PostPeepholeOptTST(p);
  1342. else
  1343. ;
  1344. end;
  1345. end;
  1346. end;
  1347. class procedure TCpuAsmOptimizer.UpdateIntRegsNoDealloc(var AUsedRegs: TAllUsedRegs; p: Tai);
  1348. begin
  1349. { Update integer registers, ignoring deallocations }
  1350. repeat
  1351. while assigned(p) and
  1352. ((p.typ in (SkipInstr - [ait_RegAlloc])) or
  1353. (p.typ = ait_label) or
  1354. ((p.typ = ait_marker) and
  1355. (tai_Marker(p).Kind in [mark_AsmBlockEnd,mark_NoLineInfoStart,mark_NoLineInfoEnd]))) do
  1356. p := tai(p.next);
  1357. while assigned(p) and
  1358. (p.typ=ait_RegAlloc) Do
  1359. begin
  1360. if (getregtype(tai_regalloc(p).reg) = R_INTREGISTER) then
  1361. begin
  1362. case tai_regalloc(p).ratype of
  1363. ra_alloc :
  1364. IncludeRegInUsedRegs(tai_regalloc(p).reg, AUsedRegs);
  1365. else
  1366. ;
  1367. end;
  1368. end;
  1369. p := tai(p.next);
  1370. end;
  1371. until not(assigned(p)) or
  1372. (not(p.typ in SkipInstr) and
  1373. not((p.typ = ait_label) and
  1374. labelCanBeSkipped(tai_label(p))));
  1375. end;
  1376. { Attempts to allocate a volatile integer register for use between p and hp,
  1377. using AUsedRegs for the current register usage information. Returns NR_NO
  1378. if no free register could be found }
  1379. function TCpuAsmOptimizer.GetIntRegisterBetween(RegSize: TSubRegister; var AUsedRegs: TAllUsedRegs; p, hp: tai; DontAlloc: Boolean = False): TRegister;
  1380. var
  1381. RegSet: TCPURegisterSet;
  1382. CurrentSuperReg: Integer;
  1383. CurrentReg: TRegister;
  1384. Currentp: tai;
  1385. Breakout: Boolean;
  1386. begin
  1387. Result := NR_NO;
  1388. RegSet :=
  1389. paramanager.get_volatile_registers_int(current_procinfo.procdef.proccalloption) +
  1390. current_procinfo.saved_regs_int;
  1391. (*
  1392. { Don't use the frame register unless explicitly allowed (fixes i40111) }
  1393. if ([cs_useebp, cs_userbp] * current_settings.optimizerswitches) = [] then
  1394. Exclude(RegSet, RS_FRAME_POINTER_REG);
  1395. *)
  1396. for CurrentSuperReg in RegSet do
  1397. begin
  1398. CurrentReg := newreg(R_INTREGISTER, TSuperRegister(CurrentSuperReg), RegSize);
  1399. if not AUsedRegs[R_INTREGISTER].IsUsed(CurrentReg)
  1400. then
  1401. begin
  1402. Currentp := p;
  1403. Breakout := False;
  1404. while not Breakout and GetNextInstruction(Currentp, Currentp) and (Currentp <> hp) do
  1405. begin
  1406. case Currentp.typ of
  1407. ait_instruction:
  1408. begin
  1409. if RegInInstruction(CurrentReg, Currentp) then
  1410. begin
  1411. Breakout := True;
  1412. Break;
  1413. end;
  1414. { Cannot allocate across an unconditional jump }
  1415. if is_calljmpmaybeuncondret(taicpu(Currentp).opcode) and (taicpu(Currentp).condition = C_None) then
  1416. Exit;
  1417. end;
  1418. ait_marker:
  1419. { Don't try anything more if a marker is hit }
  1420. Exit;
  1421. ait_regalloc:
  1422. if (tai_regalloc(Currentp).ratype <> ra_dealloc) and SuperRegistersEqual(CurrentReg, tai_regalloc(Currentp).reg) then
  1423. begin
  1424. Breakout := True;
  1425. Break;
  1426. end;
  1427. else
  1428. ;
  1429. end;
  1430. end;
  1431. if Breakout then
  1432. { Try the next register }
  1433. Continue;
  1434. { We have a free register available }
  1435. Result := CurrentReg;
  1436. if not DontAlloc then
  1437. AllocRegBetween(CurrentReg, p, hp, AUsedRegs);
  1438. Exit;
  1439. end;
  1440. end;
  1441. end;
  1442. function TCSELTracking.InitialiseBlock(BlockStart, OneBeforeBlock: tai; out BlockStop: tai; out EndJump: tai): Boolean;
  1443. begin
  1444. Result := False;
  1445. EndJump := nil;
  1446. BlockStop := nil;
  1447. while (BlockStart <> fOptimizer.BlockEnd) and
  1448. { stop on labels }
  1449. (BlockStart.typ <> ait_label) do
  1450. begin
  1451. { Keep track of all integer registers that are used }
  1452. fOptimizer.UpdateIntRegsNoDealloc(RegisterTracking, tai(OneBeforeBlock.Next));
  1453. if BlockStart.typ = ait_instruction then
  1454. begin
  1455. if MatchInstruction(BlockStart, A_B, [C_None], []) then
  1456. begin
  1457. if not IsJumpToLabel(taicpu(BlockStart)) or
  1458. (JumpTargetOp(taicpu(BlockStart))^.ref^.index <> NR_NO) then
  1459. Exit;
  1460. EndJump := BlockStart;
  1461. Break;
  1462. end
  1463. { Check to see if we have a valid MOV instruction instead }
  1464. else if (taicpu(BlockStart).opcode <> A_MOV) or
  1465. { Can't include the stack pointer in CSEL }
  1466. fOptimizer.RegInInstruction(NR_SP, BlockStart) then
  1467. begin
  1468. Exit;
  1469. end
  1470. else
  1471. { This will be a valid MOV }
  1472. fAllocationRange := BlockStart;
  1473. end;
  1474. OneBeforeBlock := BlockStart;
  1475. fOptimizer.GetNextInstruction(BlockStart, BlockStart);
  1476. end;
  1477. if (BlockStart = fOptimizer.BlockEnd) then
  1478. Exit;
  1479. BlockStop := BlockStart;
  1480. Result := True;
  1481. end;
  1482. function TCSELTracking.AnalyseMOVBlock(BlockStart, BlockStop, SearchStart: tai): LongInt;
  1483. var
  1484. hp1: tai;
  1485. RefModified: Boolean;
  1486. begin
  1487. Result := 0;
  1488. hp1 := BlockStart;
  1489. RefModified := False; { As long as the condition is inverted, this can be reset }
  1490. while assigned(hp1) and
  1491. (hp1 <> BlockStop) do
  1492. begin
  1493. case hp1.typ of
  1494. ait_instruction:
  1495. if MatchInstruction(hp1, A_MOV, []) then
  1496. begin
  1497. Inc(Result);
  1498. if taicpu(hp1).oper[1]^.typ = top_reg then
  1499. begin
  1500. Inc(Result);
  1501. end
  1502. else if not (cs_opt_size in current_settings.optimizerswitches) and
  1503. { CSEL with constants grows the code size }
  1504. TryCSELConst(hp1, SearchStart, BlockStop, Result) then
  1505. begin
  1506. { Register was reserved by TryCSELConst and
  1507. stored on ConstRegs }
  1508. end
  1509. else
  1510. begin
  1511. Result := -1;
  1512. Exit;
  1513. end;
  1514. end
  1515. else
  1516. begin
  1517. Result := -1;
  1518. Exit;
  1519. end;
  1520. else
  1521. { Most likely an align };
  1522. end;
  1523. fOptimizer.GetNextInstruction(hp1, hp1);
  1524. end;
  1525. end;
  1526. constructor TCSELTracking.Init(Optimizer: TCpuAsmOptimizer; var p_initialjump, p_initialmov: tai; var AFirstLabel: TAsmLabel);
  1527. { For the tsBranching type, increase the weighting score to account for the new conditional jump
  1528. (this is done as a separate stage because the double types are extensions of the branching type,
  1529. but we can't discount the conditional jump until the last step) }
  1530. procedure EvaluateBranchingType;
  1531. begin
  1532. Inc(CSELScore);
  1533. if (CSELScore > MAX_CSEL_INSTRUCTIONS) then
  1534. { Too many instructions to be worthwhile }
  1535. fState := tsInvalid;
  1536. end;
  1537. var
  1538. hp1: tai;
  1539. Count: Integer;
  1540. begin
  1541. { Table of valid CSEL block types
  1542. Block type 2nd Jump Mid-label 2nd MOVs 3rd Jump End-label
  1543. ---------- --------- --------- --------- --------- ---------
  1544. tsSimple X Yes X X X
  1545. tsDetour = 1st X X X X
  1546. tsBranching <> Mid Yes X X X
  1547. tsDouble End-label Yes * Yes X Yes
  1548. tsDoubleBranchSame <> Mid Yes * Yes = 2nd X
  1549. tsDoubleBranchDifferent <> Mid Yes * Yes <> 2nd X
  1550. tsDoubleSecondBranching End-label Yes * Yes <> 2nd Yes
  1551. * Only one reference allowed
  1552. }
  1553. hp1 := nil; { To prevent compiler warnings }
  1554. Optimizer.CopyUsedRegs(RegisterTracking);
  1555. fOptimizer := Optimizer;
  1556. fLabel := AFirstLabel;
  1557. CSELScore := 0;
  1558. ConstCount := 0;
  1559. { Initialise RegWrites, ConstRegs, ConstVals, ConstSizes, ConstWriteSizes and ConstMovs }
  1560. FillChar(RegWrites[0], MAX_CSEL_INSTRUCTIONS * 2 * SizeOf(TRegister), 0);
  1561. FillChar(ConstRegs[0], MAX_CSEL_REGISTERS * SizeOf(TRegister), 0);
  1562. FillChar(ConstVals[0], MAX_CSEL_REGISTERS * SizeOf(TCGInt), 0);
  1563. FillChar(ConstSizes[0], MAX_CSEL_REGISTERS * SizeOf(TSubRegister), 0);
  1564. FillChar(ConstWriteSizes[0], first_int_imreg * SizeOf(TOpSize), 0);
  1565. FillChar(ConstMovs[0], MAX_CSEL_REGISTERS * SizeOf(taicpu), 0);
  1566. fInsertionPoint := p_initialjump;
  1567. fCondition := nil;
  1568. fInitialJump := p_initialjump;
  1569. fFirstMovBlock := p_initialmov;
  1570. fFirstMovBlockStop := nil;
  1571. fSecondJump := nil;
  1572. fSecondMovBlock := nil;
  1573. fSecondMovBlockStop := nil;
  1574. fMidLabel := nil;
  1575. fSecondJump := nil;
  1576. fSecondMovBlock := nil;
  1577. fEndLabel := nil;
  1578. fAllocationRange := nil;
  1579. { Assume it all goes horribly wrong! }
  1580. fState := tsInvalid;
  1581. { Look backwards at the comparisons to get an accurate picture of register usage and a better position for any MOV const,reg insertions }
  1582. if Optimizer.GetLastInstruction(p_initialjump, fCondition) and
  1583. (
  1584. MatchInstruction(fCondition, [A_CMP, A_CMN, A_TST], []) or
  1585. (
  1586. (fCondition.typ = ait_instruction) and
  1587. (taicpu(fCondition).opcode = A_AND) and
  1588. (taicpu(fCondition).oppostfix = PF_S)
  1589. )
  1590. ) then
  1591. begin
  1592. { Mark all the registers in the comparison as 'in use', even if they've just been deallocated }
  1593. for Count := 0 to taicpu(fCondition).ops - 1 do
  1594. with taicpu(fCondition).oper[Count]^ do
  1595. case typ of
  1596. top_reg:
  1597. if getregtype(reg) = R_INTREGISTER then
  1598. Optimizer.IncludeRegInUsedRegs(reg, RegisterTracking);
  1599. top_ref:
  1600. begin
  1601. if
  1602. (ref^.base <> NR_NO) then
  1603. Optimizer.IncludeRegInUsedRegs(ref^.base, RegisterTracking);
  1604. if (ref^.index <> NR_NO) then
  1605. Optimizer.IncludeRegInUsedRegs(ref^.index, RegisterTracking);
  1606. end
  1607. else
  1608. ;
  1609. end;
  1610. { When inserting instructions before hp_prev, try to insert them
  1611. before the allocation of the FLAGS register }
  1612. if not SetAndTest(Optimizer.FindRegAllocBackward(NR_DEFAULTFLAGS, tai(fCondition.Previous)), fInsertionPoint) or
  1613. (tai_regalloc(fInsertionPoint).ratype = ra_dealloc) then
  1614. { If not found, set it equal to the condition so it's something sensible }
  1615. fInsertionPoint := fCondition;
  1616. end
  1617. else
  1618. fCondition := nil;
  1619. { When inserting instructions, try to insert them before the allocation of the FLAGS register }
  1620. if SetAndTest(Optimizer.FindRegAllocBackward(NR_DEFAULTFLAGS, tai(p_initialjump.Previous)), hp1) and
  1621. (tai_regalloc(hp1).ratype <> ra_dealloc) then
  1622. { If not found, set it equal to p so it's something sensible }
  1623. fInsertionPoint := hp1;
  1624. hp1 := p_initialmov;
  1625. if not InitialiseBlock(p_initialmov, p_initialjump, fFirstMovBlockStop, fSecondJump) then
  1626. Exit;
  1627. hp1 := fFirstMovBlockStop; { Will either be on a label or a jump }
  1628. if (hp1.typ <> ait_label) then { should be on a jump }
  1629. begin
  1630. if not Optimizer.GetNextInstruction(hp1, fMidLabel) or (fMidLabel.typ <> ait_label) then
  1631. { Need a label afterwards }
  1632. Exit;
  1633. end
  1634. else
  1635. fMidLabel := hp1;
  1636. if tai_label(fMidLabel).labsym <> AFirstLabel then
  1637. { Not the correct label }
  1638. fMidLabel := nil;
  1639. if not Assigned(fSecondJump) and not Assigned(fMidLabel) then
  1640. { If there's neither a 2nd jump nor correct label, then it's invalid
  1641. (see above table) }
  1642. Exit;
  1643. { Analyse the first block of MOVs more closely }
  1644. CSELScore := AnalyseMOVBlock(fFirstMovBlock, fFirstMovBlockStop, fInsertionPoint);
  1645. if Assigned(fSecondJump) then
  1646. begin
  1647. if (JumpTargetOp(taicpu(fSecondJump))^.ref^.symbol = AFirstLabel) then
  1648. begin
  1649. fState := tsDetour
  1650. end
  1651. else
  1652. begin
  1653. { Need the correct mid-label for this one }
  1654. if not Assigned(fMidLabel) then
  1655. Exit;
  1656. fState := tsBranching;
  1657. end;
  1658. end
  1659. else
  1660. { No jump. but mid-label is present }
  1661. fState := tsSimple;
  1662. if (CSELScore > MAX_CSEL_INSTRUCTIONS) or (CSELScore <= 0) then
  1663. begin
  1664. { Invalid or too many instructions to be worthwhile }
  1665. fState := tsInvalid;
  1666. Exit;
  1667. end;
  1668. { check further for
  1669. b xxx
  1670. <several movs 1>
  1671. bl yyy
  1672. xxx:
  1673. <several movs 2>
  1674. yyy:
  1675. etc.
  1676. }
  1677. if (fState = tsBranching) and
  1678. { Estimate for required savings for extra jump }
  1679. (CSELScore <= MAX_CSEL_INSTRUCTIONS - 1) and
  1680. { Only one reference is allowed for double blocks }
  1681. (AFirstLabel.getrefs = 1) then
  1682. begin
  1683. Optimizer.GetNextInstruction(fMidLabel, hp1);
  1684. fSecondMovBlock := hp1;
  1685. if not InitialiseBlock(fSecondMovBlock, fMidLabel, fSecondMovBlockStop, fThirdJump) then
  1686. begin
  1687. EvaluateBranchingType;
  1688. Exit;
  1689. end;
  1690. hp1 := fSecondMovBlockStop; { Will either be on a label or a jump }
  1691. if (hp1.typ <> ait_label) then { should be on a jump }
  1692. begin
  1693. if not Optimizer.GetNextInstruction(hp1, fEndLabel) or (fEndLabel.typ <> ait_label) then
  1694. begin
  1695. { Need a label afterwards }
  1696. EvaluateBranchingType;
  1697. Exit;
  1698. end;
  1699. end
  1700. else
  1701. fEndLabel := hp1;
  1702. if tai_label(fEndLabel).labsym <> JumpTargetOp(taicpu(fSecondJump))^.ref^.symbol then
  1703. { Second jump doesn't go to the end }
  1704. fEndLabel := nil;
  1705. if not Assigned(fThirdJump) and not Assigned(fEndLabel) then
  1706. begin
  1707. { If there's neither a 3rd jump nor correct end label, then it's
  1708. not a invalid double block, but is a valid single branching
  1709. block (see above table) }
  1710. EvaluateBranchingType;
  1711. Exit;
  1712. end;
  1713. Count := AnalyseMOVBlock(fSecondMovBlock, fSecondMovBlockStop, fMidLabel);
  1714. if (Count > MAX_CSEL_INSTRUCTIONS) or (Count <= 0) then
  1715. { Invalid or too many instructions to be worthwhile }
  1716. Exit;
  1717. Inc(CSELScore, Count);
  1718. if Assigned(fThirdJump) then
  1719. begin
  1720. if not Assigned(fSecondJump) then
  1721. fState := tsDoubleSecondBranching
  1722. else if (JumpTargetOp(taicpu(fSecondJump))^.ref^.symbol = JumpTargetOp(taicpu(fThirdJump))^.ref^.symbol) then
  1723. fState := tsDoubleBranchSame
  1724. else
  1725. fState := tsDoubleBranchDifferent;
  1726. end
  1727. else
  1728. fState := tsDouble;
  1729. end;
  1730. if fState = tsBranching then
  1731. EvaluateBranchingType;
  1732. end;
  1733. { Tries to convert a mov const,%reg instruction into a CSEL by reserving a
  1734. new register to store the constant }
  1735. function TCSELTracking.TryCSELConst(p, start, stop: tai; var Count: LongInt): Boolean;
  1736. var
  1737. RegSize: TSubRegister;
  1738. CurrentVal: TCGInt;
  1739. ANewReg: TRegister;
  1740. X: ShortInt;
  1741. begin
  1742. Result := False;
  1743. if not MatchOpType(taicpu(p), top_reg, top_const) then
  1744. Exit;
  1745. if ConstCount >= MAX_CSEL_REGISTERS then
  1746. { Arrays are full }
  1747. Exit;
  1748. { See if the value has already been reserved for another CSEL instruction }
  1749. CurrentVal := taicpu(p).oper[1]^.val;
  1750. RegSize := getsubreg(taicpu(p).oper[0]^.reg);
  1751. for X := 0 to ConstCount - 1 do
  1752. if ConstVals[X] = CurrentVal then
  1753. begin
  1754. ConstRegs[ConstCount] := ConstRegs[X];
  1755. ConstSizes[ConstCount] := RegSize;
  1756. ConstVals[ConstCount] := CurrentVal;
  1757. Inc(ConstCount);
  1758. Inc(Count);
  1759. Result := True;
  1760. Exit;
  1761. end;
  1762. ANewReg := fOptimizer.GetIntRegisterBetween(R_SUBWHOLE, RegisterTracking, start, stop, True);
  1763. if ANewReg = NR_NO then
  1764. { No free registers }
  1765. Exit;
  1766. { Reserve the register so subsequent TryCSELConst calls don't all end
  1767. up vying for the same register }
  1768. fOptimizer.IncludeRegInUsedRegs(ANewReg, RegisterTracking);
  1769. ConstRegs[ConstCount] := ANewReg;
  1770. ConstSizes[ConstCount] := RegSize;
  1771. ConstVals[ConstCount] := CurrentVal;
  1772. Inc(ConstCount);
  1773. Inc(Count);
  1774. Result := True;
  1775. end;
  1776. destructor TCSELTracking.Done;
  1777. begin
  1778. TAOptObj.ReleaseUsedRegs(RegisterTracking);
  1779. end;
  1780. procedure TCSELTracking.Process(out new_p: tai);
  1781. var
  1782. Count, Writes: LongInt;
  1783. RegMatch: Boolean;
  1784. hp1, hp_new: tai;
  1785. inverted_condition, condition: TAsmCond;
  1786. begin
  1787. if (fState in [tsInvalid, tsProcessed]) then
  1788. InternalError(2023110702);
  1789. { Repurpose RegisterTracking to mark registers that we've defined }
  1790. RegisterTracking[R_INTREGISTER].Clear;
  1791. Count := 0;
  1792. Writes := 0;
  1793. condition := taicpu(fInitialJump).condition;
  1794. inverted_condition := inverse_cond(condition);
  1795. { Exclude tsDoubleBranchDifferent from this check, as the second block
  1796. doesn't get CSELs in this case }
  1797. if (fState in [tsDouble, tsDoubleBranchSame, tsDoubleSecondBranching]) then
  1798. begin
  1799. { Include the jump in the flag tracking }
  1800. if Assigned(fThirdJump) then
  1801. begin
  1802. if (fState = tsDoubleBranchSame) then
  1803. begin
  1804. { Will be an unconditional jump, so track to the instruction before it }
  1805. if not fOptimizer.GetLastInstruction(fThirdJump, hp1) then
  1806. InternalError(2023110712);
  1807. end
  1808. else
  1809. hp1 := fThirdJump;
  1810. end
  1811. else
  1812. hp1 := fSecondMovBlockStop;
  1813. end
  1814. else
  1815. begin
  1816. { Include a conditional jump in the flag tracking }
  1817. if Assigned(fSecondJump) then
  1818. begin
  1819. if (fState = tsDetour) then
  1820. begin
  1821. { Will be an unconditional jump, so track to the instruction before it }
  1822. if not fOptimizer.GetLastInstruction(fSecondJump, hp1) then
  1823. InternalError(2023110713);
  1824. end
  1825. else
  1826. hp1 := fSecondJump;
  1827. end
  1828. else
  1829. hp1 := fFirstMovBlockStop;
  1830. end;
  1831. fOptimizer.AllocRegBetween(NR_DEFAULTFLAGS, fInitialJump, hp1, fOptimizer.UsedRegs);
  1832. { Process the second set of MOVs first, because if a destination
  1833. register is shared between the first and second MOV sets, it is more
  1834. efficient to turn the first one into a MOV instruction and place it
  1835. before the CMP if possible, but we won't know which registers are
  1836. shared until we've processed at least one list, so we might as well
  1837. make it the second one since that won't be modified again. }
  1838. if (fState in [tsDouble, tsDoubleBranchSame, tsDoubleBranchDifferent, tsDoubleSecondBranching]) then
  1839. begin
  1840. hp1 := fSecondMovBlock;
  1841. repeat
  1842. if not Assigned(hp1) then
  1843. InternalError(2018062902);
  1844. if (hp1.typ = ait_instruction) then
  1845. begin
  1846. { Extra safeguard }
  1847. if (taicpu(hp1).opcode <> A_MOV) then
  1848. InternalError(2018062903);
  1849. { Note: tsDoubleBranchDifferent is essentially identical to
  1850. tsBranching and the 2nd block is best left largely
  1851. untouched, but we need to evaluate which registers the MOVs
  1852. write to in order to track what would be complementary CSEL
  1853. pairs that can be further optimised. [Kit] }
  1854. if fState <> tsDoubleBranchDifferent then
  1855. begin
  1856. if taicpu(hp1).oper[1]^.typ = top_const then
  1857. begin
  1858. RegMatch := False;
  1859. for Count := 0 to ConstCount - 1 do
  1860. if (ConstVals[Count] = taicpu(hp1).oper[1]^.val) and
  1861. (getsubreg(taicpu(hp1).oper[0]^.reg) = ConstSizes[Count]) then
  1862. begin
  1863. RegMatch := True;
  1864. { If it's in RegisterTracking, then this register
  1865. is being used more than once and hence has
  1866. already had its value defined (it gets added to
  1867. UsedRegs through AllocRegBetween below) }
  1868. if not RegisterTracking[R_INTREGISTER].IsUsed(ConstRegs[Count]) then
  1869. begin
  1870. hp_new := tai(hp1.getcopy);
  1871. taicpu(hp_new).oper[0]^.reg := ConstRegs[Count];
  1872. taicpu(hp_new).fileinfo := taicpu(fInitialJump).fileinfo;
  1873. fOptimizer.asml.InsertBefore(hp_new, fInsertionPoint);
  1874. fOptimizer.IncludeRegInUsedRegs(ConstRegs[Count], RegisterTracking);
  1875. ConstMovs[Count] := hp_new;
  1876. end
  1877. else
  1878. { We just need an instruction between hp_prev and hp1
  1879. where we know the register is marked as in use }
  1880. hp_new := fSecondMovBlock;
  1881. { Keep track of largest write for this register so it can be optimised later }
  1882. if (getsubreg(taicpu(hp1).oper[0]^.reg) > ConstWriteSizes[getsupreg(ConstRegs[Count])]) then
  1883. ConstWriteSizes[getsupreg(ConstRegs[Count])] := getsubreg(taicpu(hp1).oper[0]^.reg);
  1884. fOptimizer.AllocRegBetween(ConstRegs[Count], hp_new, hp1, fOptimizer.UsedRegs);
  1885. taicpu(hp1).loadreg(1, newreg(R_INTREGISTER, getsupreg(ConstRegs[Count]), ConstSizes[Count]));
  1886. Break;
  1887. end;
  1888. if not RegMatch then
  1889. InternalError(2021100413);
  1890. end;
  1891. taicpu(hp1).opcode := A_CSEL;
  1892. taicpu(hp1).ops := 4;
  1893. taicpu(hp1).loadreg(2, taicpu(hp1).oper[0]^.reg);
  1894. taicpu(hp1).loadconditioncode(3, condition);
  1895. end;
  1896. { Store these writes to search for duplicates later on }
  1897. RegWrites[Writes] := taicpu(hp1).oper[0]^.reg;
  1898. Inc(Writes);
  1899. end;
  1900. fOptimizer.GetNextInstruction(hp1, hp1);
  1901. until (hp1 = fSecondMovBlockStop);
  1902. end;
  1903. { Now do the first set of MOVs }
  1904. hp1 := fFirstMovBlock;
  1905. repeat
  1906. if not Assigned(hp1) then
  1907. InternalError(2018062904);
  1908. if (hp1.typ = ait_instruction) then
  1909. begin
  1910. RegMatch := False;
  1911. { Extra safeguard }
  1912. if (taicpu(hp1).opcode <> A_MOV) then
  1913. InternalError(2018062905);
  1914. { Search through the RegWrites list to see if there are any
  1915. opposing CSEL pairs that write to the same register }
  1916. for Count := 0 to Writes - 1 do
  1917. if (RegWrites[Count] = taicpu(hp1).oper[0]^.reg) then
  1918. begin
  1919. { We have a match. Keep this as a MOV }
  1920. { Move ahead in preparation }
  1921. fOptimizer.GetNextInstruction(hp1, hp1);
  1922. RegMatch := True;
  1923. Break;
  1924. end;
  1925. if RegMatch then
  1926. Continue;
  1927. if taicpu(hp1).oper[1]^.typ = top_const then
  1928. begin
  1929. for Count := 0 to ConstCount - 1 do
  1930. if (ConstVals[Count] = taicpu(hp1).oper[1]^.val) and
  1931. (getsubreg(taicpu(hp1).oper[0]^.reg) = ConstSizes[Count]) then
  1932. begin
  1933. RegMatch := True;
  1934. { If it's in RegisterTracking, then this register is
  1935. being used more than once and hence has already had
  1936. its value defined (it gets added to UsedRegs through
  1937. AllocRegBetween below) }
  1938. if not RegisterTracking[R_INTREGISTER].IsUsed(ConstRegs[Count]) then
  1939. begin
  1940. hp_new := tai(hp1.getcopy);
  1941. taicpu(hp_new).oper[0]^.reg := ConstRegs[Count];
  1942. taicpu(hp_new).fileinfo := taicpu(fInitialJump).fileinfo;
  1943. fOptimizer.asml.InsertBefore(hp_new, fInsertionPoint);
  1944. fOptimizer.IncludeRegInUsedRegs(ConstRegs[Count], RegisterTracking);
  1945. ConstMovs[Count] := hp_new;
  1946. end
  1947. else
  1948. { We just need an instruction between hp_prev and hp1
  1949. where we know the register is marked as in use }
  1950. hp_new := fFirstMovBlock;
  1951. { Keep track of largest write for this register so it can be optimised later }
  1952. if (getsubreg(taicpu(hp1).oper[0]^.reg) > ConstWriteSizes[getsupreg(ConstRegs[Count])]) then
  1953. ConstWriteSizes[getsupreg(ConstRegs[Count])] := getsubreg(taicpu(hp1).oper[0]^.reg);
  1954. fOptimizer.AllocRegBetween(ConstRegs[Count], hp_new, hp1, fOptimizer.UsedRegs);
  1955. taicpu(hp1).loadreg(1, newreg(R_INTREGISTER, getsupreg(ConstRegs[Count]), ConstSizes[Count]));
  1956. Break;
  1957. end;
  1958. if not RegMatch then
  1959. InternalError(2021100412);
  1960. end;
  1961. taicpu(hp1).opcode := A_CSEL;
  1962. taicpu(hp1).ops := 4;
  1963. taicpu(hp1).loadreg(2, taicpu(hp1).oper[0]^.reg);
  1964. taicpu(hp1).loadconditioncode(3, inverted_condition);
  1965. if (fState = tsDoubleBranchDifferent) then
  1966. begin
  1967. { Store these writes to search for duplicates later on }
  1968. RegWrites[Writes] := taicpu(hp1).oper[0]^.reg;
  1969. Inc(Writes);
  1970. end;
  1971. end;
  1972. fOptimizer.GetNextInstruction(hp1, hp1);
  1973. until (hp1 = fFirstMovBlockStop);
  1974. { Update initialisation MOVs to the smallest possible size }
  1975. for Count := 0 to ConstCount - 1 do
  1976. if Assigned(ConstMovs[Count]) then
  1977. setsubreg(taicpu(ConstMovs[Count]).oper[0]^.reg, ConstWriteSizes[Word(ConstRegs[Count])]);
  1978. case fState of
  1979. tsSimple:
  1980. begin
  1981. fOptimizer.DebugMsg(SPeepholeOptimization + 'CSEL Block (Simple type)', fInitialJump);
  1982. { No branch to delete }
  1983. end;
  1984. tsDetour:
  1985. begin
  1986. fOptimizer.DebugMsg(SPeepholeOptimization + 'CSEL Block (Detour type)', fInitialJump);
  1987. { Preserve jump }
  1988. end;
  1989. tsBranching, tsDoubleBranchDifferent:
  1990. begin
  1991. if (fState = tsBranching) then
  1992. fOptimizer.DebugMsg(SPeepholeOptimization + 'CSEL Block (Branching type)', fInitialJump)
  1993. else
  1994. fOptimizer.DebugMsg(SPeepholeOptimization + 'CSEL Block (Double branching (different) type)', fInitialJump);
  1995. taicpu(fSecondJump).condition := inverted_condition;
  1996. end;
  1997. tsDouble, tsDoubleBranchSame:
  1998. begin
  1999. if (fState = tsDouble) then
  2000. fOptimizer.DebugMsg(SPeepholeOptimization + 'CSEL Block (Double type)', fInitialJump)
  2001. else
  2002. fOptimizer.DebugMsg(SPeepholeOptimization + 'CSEL Block (Double branching (same) type)', fInitialJump);
  2003. { Delete second jump }
  2004. JumpTargetOp(taicpu(fSecondJump))^.ref^.symbol.decrefs;
  2005. fOptimizer.RemoveInstruction(fSecondJump);
  2006. end;
  2007. tsDoubleSecondBranching:
  2008. begin
  2009. fOptimizer.DebugMsg(SPeepholeOptimization + 'CSEL Block (Double, second branching type)', fInitialJump);
  2010. { Delete second jump, preserve third jump as conditional }
  2011. JumpTargetOp(taicpu(fSecondJump))^.ref^.symbol.decrefs;
  2012. fOptimizer.RemoveInstruction(fSecondJump);
  2013. taicpu(fThirdJump).condition := condition;
  2014. end;
  2015. else
  2016. InternalError(2023110721);
  2017. end;
  2018. { Now we can safely decrement the reference count }
  2019. tasmlabel(fLabel).decrefs;
  2020. fOptimizer.UpdateUsedRegs(tai(fInitialJump.next));
  2021. { Remove the original jump }
  2022. fOptimizer.RemoveInstruction(fInitialJump); { Note, the choice to not use RemoveCurrentp is deliberate }
  2023. new_p := fFirstMovBlock; { Appears immediately after the initial jump }
  2024. fState := tsProcessed;
  2025. end;
  2026. begin
  2027. casmoptimizer:=TCpuAsmOptimizer;
  2028. End.