aoptcpu.pas 80 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242
  1. {
  2. Copyright (c) 1998-2002 by Jonas Maebe, member of the Free Pascal
  3. Development Team
  4. This unit implements the ARM64 optimizer object
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. Unit aoptcpu;
  19. {$i fpcdefs.inc}
  20. {$ifdef EXTDEBUG}
  21. {$define DEBUG_AOPTCPU}
  22. {$endif EXTDEBUG}
  23. Interface
  24. uses
  25. globtype, globals,
  26. cutils,
  27. cgbase, cpubase, aasmtai, aasmcpu,
  28. aopt, aoptcpub, aoptarm, aoptobj;
  29. Type
  30. TCpuAsmOptimizer = class(TARMAsmOptimizer)
  31. { uses the same constructor as TAopObj }
  32. function PrePeepHoleOptsCpu(var p: tai): boolean; override;
  33. function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
  34. function PeepHoleOptPass2Cpu(var p: tai): boolean; override;
  35. function PostPeepHoleOptsCpu(var p: tai): boolean; override;
  36. function RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;override;
  37. function InstructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean;override;
  38. function LookForPostindexedPattern(var p : tai) : boolean;
  39. public
  40. { With these routines, there's optimisation code that's general for all ARM platforms }
  41. function OptPass1LDR(var p: tai): Boolean; override;
  42. function OptPass1STR(var p: tai): Boolean; override;
  43. private
  44. function RemoveSuperfluousFMov(const p: tai; movp: tai; const optimizer: string): boolean;
  45. function OptPass1Shift(var p: tai): boolean;
  46. function OptPass1Data(var p: tai): boolean;
  47. function OptPass1FData(var p: tai): Boolean;
  48. function OptPass1STP(var p: tai): boolean;
  49. function OptPass1Mov(var p: tai): boolean;
  50. function OptPass1MOVZ(var p: tai): boolean;
  51. function OptPass1FMov(var p: tai): Boolean;
  52. function OptPass1B(var p: tai): boolean;
  53. function OptPass1SXTW(var p: tai): Boolean;
  54. function OptPass2B(var p: tai): Boolean;
  55. function OptPass2LDRSTR(var p: tai): boolean;
  56. function PostPeepholeOptAND(var p: tai): Boolean;
  57. function PostPeepholeOptCMP(var p: tai): boolean;
  58. function PostPeepholeOptTST(var p: tai): Boolean;
  59. protected
  60. { Like UpdateUsedRegs, but ignores deallocations }
  61. class procedure UpdateIntRegsNoDealloc(var AUsedRegs: TAllUsedRegs; p: Tai); static;
  62. { Attempts to allocate a volatile integer register for use between p and hp,
  63. using AUsedRegs for the current register usage information. Returns NR_NO
  64. if no free register could be found }
  65. function GetIntRegisterBetween(RegSize: TSubRegister; var AUsedRegs: TAllUsedRegs; p, hp: tai; DontAlloc: Boolean = False): TRegister;
  66. End;
  67. Implementation
  68. uses
  69. aasmbase,
  70. aoptbase,
  71. aoptutils,
  72. cgutils,
  73. procinfo,
  74. paramgr,
  75. verbose;
  76. {$ifdef DEBUG_AOPTCPU}
  77. const
  78. SPeepholeOptimization: shortstring = 'Peephole Optimization: ';
  79. {$else DEBUG_AOPTCPU}
  80. { Empty strings help the optimizer to remove string concatenations that won't
  81. ever appear to the user on release builds. [Kit] }
  82. const
  83. SPeepholeOptimization = '';
  84. {$endif DEBUG_AOPTCPU}
  85. MAX_CSEL_INSTRUCTIONS = 8;
  86. MAX_CSEL_REGISTERS = 30;
  87. type
  88. TCSELTrackingState = (tsInvalid, tsSimple, tsDetour, tsBranching,
  89. tsDouble, tsDoubleBranchSame, tsDoubleBranchDifferent, tsDoubleSecondBranching,
  90. tsProcessed);
  91. { For OptPass2Jcc }
  92. TCSELTracking = object
  93. private
  94. CSELScore, ConstCount: LongInt;
  95. RegWrites: array[0..MAX_CSEL_INSTRUCTIONS*2 - 1] of TRegister;
  96. ConstRegs: array[0..MAX_CSEL_REGISTERS - 1] of TRegister;
  97. ConstVals: array[0..MAX_CSEL_REGISTERS - 1] of TCGInt;
  98. ConstSizes: array[0..MAX_CSEL_REGISTERS - 1] of TSubRegister; { May not match ConstRegs if one is shared over multiple CSELs. }
  99. ConstMovs: array[0..MAX_CSEL_REGISTERS - 1] of tai; { Location of initialisation instruction }
  100. ConstWriteSizes: array[0..first_int_imreg - 1] of TSubRegister; { Largest size of register written. }
  101. fOptimizer: TCpuAsmOptimizer;
  102. fLabel: TAsmSymbol;
  103. fInsertionPoint,
  104. fCondition,
  105. fInitialJump,
  106. fFirstMovBlock,
  107. fFirstMovBlockStop,
  108. fSecondJump,
  109. fThirdJump,
  110. fSecondMovBlock,
  111. fSecondMovBlockStop,
  112. fMidLabel,
  113. fEndLabel,
  114. fAllocationRange: tai;
  115. fState: TCSELTrackingState;
  116. function TryCSELConst(p, start, stop: tai; var Count: LongInt): Boolean;
  117. function InitialiseBlock(BlockStart, OneBeforeBlock: tai; out BlockStop: tai; out EndJump: tai): Boolean;
  118. function AnalyseMOVBlock(BlockStart, BlockStop, SearchStart: tai): LongInt;
  119. public
  120. RegisterTracking: TAllUsedRegs;
  121. constructor Init(Optimizer: TCpuAsmOptimizer; var p_initialjump, p_initialmov: tai; var AFirstLabel: TAsmLabel);
  122. destructor Done;
  123. procedure Process(out new_p: tai);
  124. property State: TCSELTrackingState read fState;
  125. end;
  126. PCSELTracking = ^TCSELTracking;
  127. function CanBeCond(p : tai) : boolean;
  128. begin
  129. result:=(p.typ=ait_instruction) and (taicpu(p).condition=C_None);
  130. end;
  131. function TCpuAsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  132. var
  133. p: taicpu;
  134. begin
  135. Result := false;
  136. if not ((assigned(hp)) and (hp.typ = ait_instruction)) then
  137. exit;
  138. p := taicpu(hp);
  139. case p.opcode of
  140. { These operations do not write into a register at all
  141. LDR/STR with post/pre-indexed operations do not need special treatment
  142. because post-/preindexed does not mean that a register
  143. is loaded with a new value, it is only modified }
  144. A_STR, A_CMP, A_CMN, A_TST, A_B, A_BL, A_MSR, A_FCMP:
  145. exit;
  146. else
  147. ;
  148. end;
  149. if p.ops=0 then
  150. exit;
  151. case p.oper[0]^.typ of
  152. top_reg:
  153. Result := SuperRegistersEqual(p.oper[0]^.reg,reg);
  154. top_ref:
  155. Result :=
  156. (taicpu(p).oper[0]^.ref^.addressmode in [AM_PREINDEXED,AM_POSTINDEXED]) and
  157. (taicpu(p).oper[0]^.ref^.base = reg);
  158. else
  159. ;
  160. end;
  161. end;
  162. function TCpuAsmOptimizer.InstructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean;
  163. var
  164. p: taicpu;
  165. i: longint;
  166. begin
  167. instructionLoadsFromReg := false;
  168. if not (assigned(hp) and (hp.typ = ait_instruction)) then
  169. exit;
  170. p:=taicpu(hp);
  171. i:=1;
  172. { Start on oper[0]? }
  173. if taicpu(hp).spilling_get_operation_type(0) in [operand_read, operand_readwrite] then
  174. i:=0;
  175. while(i<p.ops) do
  176. begin
  177. case p.oper[I]^.typ of
  178. top_reg:
  179. Result := (p.oper[I]^.reg = reg);
  180. top_ref:
  181. Result :=
  182. (p.oper[I]^.ref^.base = reg) or
  183. (p.oper[I]^.ref^.index = reg);
  184. else
  185. ;
  186. end;
  187. { Bailout if we found something }
  188. if Result then
  189. exit;
  190. Inc(I);
  191. end;
  192. end;
  193. {
  194. optimize
  195. ldr/str regX,[reg1]
  196. ...
  197. add/sub reg1,reg1,regY/const
  198. into
  199. ldr/str regX,[reg1], regY/const
  200. }
  201. function TCpuAsmOptimizer.LookForPostindexedPattern(var p: tai) : boolean;
  202. var
  203. hp1 : tai;
  204. begin
  205. Result:=false;
  206. if (taicpu(p).oper[1]^.typ = top_ref) and
  207. (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
  208. (taicpu(p).oper[1]^.ref^.index=NR_NO) and
  209. (taicpu(p).oper[1]^.ref^.offset=0) and
  210. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[1]^.ref^.base) and
  211. { we cannot check NR_DEFAULTFLAGS for modification yet so don't allow a condition }
  212. MatchInstruction(hp1, [A_ADD, A_SUB], [PF_None]) and
  213. (taicpu(hp1).oper[0]^.reg=taicpu(p).oper[1]^.ref^.base) and
  214. (taicpu(hp1).oper[1]^.reg=taicpu(p).oper[1]^.ref^.base) and
  215. (
  216. { valid offset? }
  217. (taicpu(hp1).oper[2]^.typ=top_const) and
  218. (taicpu(hp1).oper[2]^.val>=-256) and
  219. (abs(taicpu(hp1).oper[2]^.val)<256)
  220. ) and
  221. { don't apply the optimization if the base register is loaded }
  222. (getsupreg(taicpu(p).oper[0]^.reg)<>getsupreg(taicpu(p).oper[1]^.ref^.base)) and
  223. not(RegModifiedBetween(taicpu(hp1).oper[0]^.reg,p,hp1)) and
  224. not(RegModifiedBetween(taicpu(hp1).oper[2]^.reg,p,hp1)) then
  225. begin
  226. if taicpu(p).opcode = A_LDR then
  227. DebugMsg(SPeepholeOptimization + 'LdrAdd/Sub2Ldr Postindex done', p)
  228. else
  229. DebugMsg(SPeepholeOptimization + 'StrAdd/Sub2Str Postindex done', p);
  230. taicpu(p).oper[1]^.ref^.addressmode:=AM_POSTINDEXED;
  231. if taicpu(hp1).opcode=A_ADD then
  232. taicpu(p).oper[1]^.ref^.offset:=taicpu(hp1).oper[2]^.val
  233. else
  234. taicpu(p).oper[1]^.ref^.offset:=-taicpu(hp1).oper[2]^.val;
  235. asml.Remove(hp1);
  236. hp1.Free;
  237. Result:=true;
  238. end;
  239. end;
  240. function TCpuAsmOptimizer.RemoveSuperfluousFMov(const p: tai; movp: tai; const optimizer: string):boolean;
  241. var
  242. alloc,
  243. dealloc : tai_regalloc;
  244. hp1 : tai;
  245. begin
  246. Result:=false;
  247. if ((MatchInstruction(movp, A_FMOV, [taicpu(p).condition], [taicpu(p).oppostfix]) and
  248. ((getregtype(taicpu(movp).oper[0]^.reg)=R_MMREGISTER) { or (taicpu(p).opcode in [A_LDUR])})
  249. ) { or
  250. (((taicpu(p).oppostfix in [PF_F64F32,PF_F64S16,PF_F64S32,PF_F64U16,PF_F64U32]) or (getsubreg(taicpu(p).oper[0]^.reg)=R_SUBFD)) and MatchInstruction(movp, A_VMOV, [taicpu(p).condition], [PF_F64])) or
  251. (((taicpu(p).oppostfix in [PF_F32F64,PF_F32S16,PF_F32S32,PF_F32U16,PF_F32U32]) or (getsubreg(taicpu(p).oper[0]^.reg)=R_SUBFS)) and MatchInstruction(movp, A_VMOV, [taicpu(p).condition], [PF_F32])) }
  252. ) and
  253. (taicpu(movp).ops=2) and
  254. MatchOperand(taicpu(movp).oper[1]^, taicpu(p).oper[0]^.reg) and
  255. { the destination register of the mov might not be used beween p and movp }
  256. not(RegUsedBetween(taicpu(movp).oper[0]^.reg,p,movp)) and
  257. { Take care to only do this for instructions which REALLY load to the first register.
  258. Otherwise
  259. str reg0, [reg1]
  260. fmov reg2, reg0
  261. will be optimized to
  262. str reg2, [reg1]
  263. }
  264. RegLoadedWithNewValue(taicpu(p).oper[0]^.reg, p) then
  265. begin
  266. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(movp.Next));
  267. if assigned(dealloc) then
  268. begin
  269. DebugMsg(SPeepholeOptimization + optimizer+' removed superfluous vmov', movp);
  270. result:=true;
  271. { taicpu(p).oper[0]^.reg is not used anymore, try to find its allocation
  272. and remove it if possible }
  273. asml.Remove(dealloc);
  274. alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.previous));
  275. if assigned(alloc) then
  276. begin
  277. asml.Remove(alloc);
  278. alloc.free;
  279. dealloc.free;
  280. end
  281. else
  282. asml.InsertAfter(dealloc,p);
  283. { try to move the allocation of the target register }
  284. GetLastInstruction(movp,hp1);
  285. alloc:=FindRegAlloc(taicpu(movp).oper[0]^.reg,tai(hp1.Next));
  286. if assigned(alloc) then
  287. begin
  288. asml.Remove(alloc);
  289. asml.InsertBefore(alloc,p);
  290. { adjust used regs }
  291. IncludeRegInUsedRegs(taicpu(movp).oper[0]^.reg,UsedRegs);
  292. end;
  293. { change
  294. vldr reg0,[reg1]
  295. vmov reg2,reg0
  296. into
  297. ldr reg2,[reg1]
  298. if reg2 is an int register
  299. if (taicpu(p).opcode=A_VLDR) and (getregtype(taicpu(movp).oper[0]^.reg)=R_INTREGISTER) then
  300. taicpu(p).opcode:=A_LDR;
  301. }
  302. { finally get rid of the mov }
  303. taicpu(p).loadreg(0,taicpu(movp).oper[0]^.reg);
  304. asml.remove(movp);
  305. movp.free;
  306. end;
  307. end;
  308. end;
  309. function TCpuAsmOptimizer.OptPass1LDR(var p: tai): Boolean;
  310. var
  311. hp1: tai;
  312. begin
  313. Result := False;
  314. if inherited OptPass1LDR(p) or
  315. LookForPostindexedPattern(p) then
  316. Exit(True)
  317. else if (taicpu(p).oppostfix in [PF_B,PF_SB,PF_H,PF_SH,PF_None]) and
  318. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  319. RemoveSuperfluousMove(p, hp1, 'Ldr<Postfix>Mov2Ldr<Postfix>') then
  320. Exit(true);
  321. end;
  322. function TCpuAsmOptimizer.OptPass1STR(var p: tai): Boolean;
  323. begin
  324. Result := False;
  325. if inherited OptPass1STR(p) or
  326. LookForPostindexedPattern(p) then
  327. Exit(True);
  328. end;
  329. function TCpuAsmOptimizer.OptPass1Shift(var p : tai): boolean;
  330. var
  331. hp1,hp2: tai;
  332. I2, I: Integer;
  333. shifterop: tshifterop;
  334. begin
  335. Result:=false;
  336. { This folds shifterops into following instructions
  337. <shiftop> r0, r1, #imm
  338. <op> r2, r3, r0
  339. to
  340. <op> r2, r3, r1, <shiftop> #imm
  341. }
  342. { do not handle ROR yet, only part of the instructions below support ROR as shifter operand }
  343. if MatchInstruction(p,[A_LSL, A_LSR, A_ASR{, A_ROR}],[PF_None]) and
  344. MatchOpType(taicpu(p),top_reg,top_reg,top_const) and
  345. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  346. MatchInstruction(hp1, [A_ADD, A_AND, A_BIC, A_CMP, A_CMN,
  347. A_EON, A_EOR, A_NEG, A_ORN, A_ORR,
  348. A_SUB, A_TST], [PF_None]) and
  349. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) and
  350. (taicpu(hp1).ops >= 2) and
  351. { Currently we can't fold into another shifterop }
  352. (taicpu(hp1).oper[taicpu(hp1).ops-1]^.typ = top_reg) and
  353. { SP does not work completely with shifted registers, as I didn't find the exact rules,
  354. we do not operate on SP }
  355. (taicpu(hp1).oper[0]^.reg<>NR_SP) and
  356. (taicpu(hp1).oper[1]^.reg<>NR_SP) and
  357. (taicpu(hp1).oper[taicpu(hp1).ops-1]^.reg<>NR_SP) and
  358. { reg1 might not be modified inbetween }
  359. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) and
  360. (
  361. { Only ONE of the two src operands is allowed to match }
  362. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[taicpu(hp1).ops-2]^) xor
  363. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[taicpu(hp1).ops-1]^)
  364. ) and
  365. { for SUB, the last operand must match, there is no RSB on AArch64 }
  366. ((taicpu(hp1).opcode<>A_SUB) or
  367. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[taicpu(hp1).ops-1]^)) then
  368. begin
  369. { for the two operand instructions, start also at the second operand as they are not always commutative
  370. (depends on the flags tested laster on) and thus the operands cannot swapped }
  371. I2:=1;
  372. for I:=I2 to taicpu(hp1).ops-1 do
  373. if MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[I]^.reg) then
  374. begin
  375. { If the parameter matched on the second op from the RIGHT
  376. we have to switch the parameters, this will not happen for CMP
  377. were we're only evaluating the most right parameter
  378. }
  379. shifterop_reset(shifterop);
  380. case taicpu(p).opcode of
  381. A_LSL:
  382. shifterop.shiftmode:=SM_LSL;
  383. A_ROR:
  384. shifterop.shiftmode:=SM_ROR;
  385. A_LSR:
  386. shifterop.shiftmode:=SM_LSR;
  387. A_ASR:
  388. shifterop.shiftmode:=SM_ASR;
  389. else
  390. InternalError(2019090401);
  391. end;
  392. shifterop.shiftimm:=taicpu(p).oper[2]^.val;
  393. if I <> taicpu(hp1).ops-1 then
  394. begin
  395. if taicpu(hp1).ops = 3 then
  396. hp2:=taicpu.op_reg_reg_reg_shifterop(taicpu(hp1).opcode,
  397. taicpu(hp1).oper[0]^.reg, taicpu(hp1).oper[2]^.reg,
  398. taicpu(p).oper[1]^.reg, shifterop)
  399. else
  400. hp2:=taicpu.op_reg_reg_shifterop(taicpu(hp1).opcode,
  401. taicpu(hp1).oper[0]^.reg, taicpu(p).oper[1]^.reg,
  402. shifterop);
  403. end
  404. else
  405. if taicpu(hp1).ops = 3 then
  406. hp2:=taicpu.op_reg_reg_reg_shifterop(taicpu(hp1).opcode,
  407. taicpu(hp1).oper[0]^.reg, taicpu(hp1).oper[1]^.reg,
  408. taicpu(p).oper[1]^.reg,shifterop)
  409. else
  410. hp2:=taicpu.op_reg_reg_shifterop(taicpu(hp1).opcode,
  411. taicpu(hp1).oper[0]^.reg, taicpu(p).oper[1]^.reg,
  412. shifterop);
  413. { Make sure the register used in the shifting is tracked all
  414. the way through, otherwise it may become deallocated while
  415. it's still live and cause incorrect optimisations later }
  416. if (taicpu(hp1).oper[0]^.reg <> taicpu(p).oper[1]^.reg) then
  417. begin
  418. TransferUsedRegs(TmpUsedRegs);
  419. UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
  420. ALlocRegBetween(taicpu(p).oper[1]^.reg, p, hp1, TmpUsedRegs);
  421. end;
  422. taicpu(hp2).fileinfo:=taicpu(hp1).fileinfo;
  423. asml.insertbefore(hp2, hp1);
  424. RemoveInstruction(hp1);
  425. RemoveCurrentp(p);
  426. DebugMsg(SPeepholeOptimization + 'FoldShiftProcess done', hp2);
  427. Result:=true;
  428. break;
  429. end;
  430. end
  431. else if MatchInstruction(p,[A_LSL, A_LSR, A_ASR,A_ROR],[PF_None]) and
  432. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  433. RemoveSuperfluousMove(p, hp1, 'ShiftMov2Shift') then
  434. Result:=true;
  435. end;
  436. function TCpuAsmOptimizer.OptPass1Data(var p : tai): boolean;
  437. var
  438. hp1: tai;
  439. begin
  440. Result := GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  441. RemoveSuperfluousMove(p, hp1, 'DataMov2Data');
  442. end;
  443. function TCpuAsmOptimizer.OptPass1FData(var p: tai): Boolean;
  444. var
  445. hp1: tai;
  446. begin
  447. Result := GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  448. RemoveSuperfluousFMov(p, hp1, 'FOpFMov2FOp');
  449. end;
  450. function TCpuAsmOptimizer.OptPass1STP(var p : tai): boolean;
  451. var
  452. hp1, hp2, hp3, hp4: tai;
  453. begin
  454. Result:=false;
  455. {
  456. change
  457. stp x29,x30,[sp, #-16]!
  458. mov x29,sp
  459. bl abc
  460. ldp x29,x30,[sp], #16
  461. ret
  462. into
  463. b abc
  464. }
  465. if MatchInstruction(p, A_STP, [C_None], [PF_None]) and
  466. MatchOpType(taicpu(p),top_reg,top_reg,top_ref) and
  467. (taicpu(p).oper[0]^.reg = NR_X29) and
  468. (taicpu(p).oper[1]^.reg = NR_X30) and
  469. (taicpu(p).oper[2]^.ref^.base=NR_STACK_POINTER_REG) and
  470. (taicpu(p).oper[2]^.ref^.index=NR_NO) and
  471. (taicpu(p).oper[2]^.ref^.offset=-16) and
  472. (taicpu(p).oper[2]^.ref^.addressmode=AM_PREINDEXED) and
  473. GetNextInstruction(p, hp1) and
  474. MatchInstruction(hp1, A_MOV, [C_None], [PF_NONE]) and
  475. MatchOperand(taicpu(hp1).oper[0]^,taicpu(p).oper[0]^) and
  476. (taicpu(hp1).oper[1]^.typ = top_reg) and
  477. (taicpu(hp1).oper[1]^.reg = NR_STACK_POINTER_REG) and
  478. GetNextInstruction(hp1, hp2) and
  479. SkipEntryExitMarker(hp2, hp2) and
  480. MatchInstruction(hp2, A_BL, [C_None], [PF_NONE]) and
  481. (taicpu(hp2).oper[0]^.typ = top_ref) and
  482. GetNextInstruction(hp2, hp3) and
  483. SkipEntryExitMarker(hp3, hp3) and
  484. MatchInstruction(hp3, A_LDP, [C_None], [PF_NONE]) and
  485. MatchOpType(taicpu(hp3),top_reg,top_reg,top_ref) and
  486. (taicpu(hp3).oper[0]^.reg = NR_X29) and
  487. (taicpu(hp3).oper[1]^.reg = NR_X30) and
  488. (taicpu(hp3).oper[2]^.ref^.base=NR_STACK_POINTER_REG) and
  489. (taicpu(hp3).oper[2]^.ref^.index=NR_NO) and
  490. (taicpu(hp3).oper[2]^.ref^.offset=16) and
  491. (taicpu(hp3).oper[2]^.ref^.addressmode=AM_POSTINDEXED) and
  492. GetNextInstruction(hp3, hp4) and
  493. MatchInstruction(hp4, A_RET, [C_None], [PF_None]) and
  494. (taicpu(hp4).ops = 0) then
  495. begin
  496. asml.Remove(p);
  497. asml.Remove(hp1);
  498. asml.Remove(hp3);
  499. asml.Remove(hp4);
  500. taicpu(hp2).opcode:=A_B;
  501. p.free;
  502. hp1.free;
  503. hp3.free;
  504. hp4.free;
  505. p:=hp2;
  506. DebugMsg(SPeepholeOptimization + 'Bl2B done', p);
  507. Result:=true;
  508. end;
  509. end;
  510. function TCpuAsmOptimizer.OptPass1Mov(var p : tai): boolean;
  511. var
  512. hp1: tai;
  513. so: tshifterop;
  514. begin
  515. Result:=false;
  516. if MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) and
  517. (taicpu(p).oppostfix=PF_None) then
  518. begin
  519. RemoveCurrentP(p);
  520. DebugMsg(SPeepholeOptimization + 'Mov2None done', p);
  521. Result:=true;
  522. end
  523. else if (taicpu(p).ops=2) and
  524. (getsubreg(taicpu(p).oper[0]^.reg)=R_SUBD) and
  525. GetNextInstruction(p, hp1) and
  526. { Faster to get it out of the way than go through MatchInstruction }
  527. (hp1.typ=ait_instruction) and
  528. (taicpu(hp1).ops=3) and
  529. MatchInstruction(hp1,[A_ADD,A_SUB],[taicpu(p).condition], [PF_None,PF_S]) and
  530. (getsubreg(taicpu(hp1).oper[2]^.reg)=R_SUBQ) and
  531. (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[2]^.reg)) and
  532. RegEndOfLife(taicpu(hp1).oper[2]^.reg,taicpu(hp1)) then
  533. begin
  534. DebugMsg(SPeepholeOptimization + 'MovOp2AddUtxw 1 done', p);
  535. shifterop_reset(so);
  536. so.shiftmode:=SM_UXTW;
  537. taicpu(hp1).ops:=4;
  538. taicpu(hp1).loadreg(2,taicpu(p).oper[1]^.reg);
  539. taicpu(hp1).loadshifterop(3,so);
  540. RemoveCurrentP(p);
  541. Result:=true;
  542. exit;
  543. end
  544. {
  545. optimize
  546. mov rX, yyyy
  547. ....
  548. }
  549. else if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) then
  550. begin
  551. if RemoveSuperfluousMove(p, hp1, 'MovMov2Mov') then
  552. Result:=true
  553. else if (taicpu(p).ops = 2) and
  554. (tai(hp1).typ = ait_instruction) and
  555. RedundantMovProcess(p,hp1) then
  556. Result:=true
  557. end;
  558. end;
  559. function TCpuAsmOptimizer.OptPass1MOVZ(var p: tai): boolean;
  560. var
  561. hp1: tai;
  562. ZeroReg: TRegister;
  563. begin
  564. Result := False;
  565. hp1 := nil;
  566. if (taicpu(p).oppostfix = PF_None) and (taicpu(p).condition = C_None) then
  567. begin
  568. if
  569. { Check next instruction first so hp1 gets set to something, then
  570. if it remains nil, we know for sure that there's no valid next
  571. instruction. }
  572. not GetNextInstruction(p, hp1) or
  573. { MOVZ and MOVK/MOVN instructions undergo macro-fusion. }
  574. not MatchInstruction(hp1, [A_MOVK, A_MOVN], [C_None], [PF_None]) or
  575. (taicpu(hp1).oper[0]^.reg <> taicpu(p).oper[0]^.reg) then
  576. begin
  577. if (taicpu(p).oper[1]^.val = 0) then
  578. begin
  579. { Change;
  580. movz reg,#0
  581. (no movk or movn)
  582. To:
  583. mov reg,xzr (or wzr)
  584. Easier to perform other optimisations with registers
  585. }
  586. DebugMsg(SPeepholeOptimization + 'Movz0ToMovZeroReg', p);
  587. { Make sure the zero register is the correct size }
  588. ZeroReg := taicpu(p).oper[0]^.reg;
  589. setsupreg(ZeroReg, RS_XZR);
  590. taicpu(p).opcode := A_MOV;
  591. taicpu(p).loadreg(1, ZeroReg);
  592. Result := True;
  593. Exit;
  594. end;
  595. end;
  596. {
  597. remove the second Movz from
  598. movz reg,...
  599. movz reg,...
  600. }
  601. if GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  602. MatchInstruction(hp1,A_MOVZ,[C_None],[PF_none]) and
  603. MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[0]^) then
  604. begin
  605. DebugMsg(SPeepholeOptimization + 'MovzMovz2Movz', p);
  606. RemoveCurrentP(p);
  607. Result:=true;
  608. exit;
  609. end;
  610. end;
  611. end;
  612. function TCpuAsmOptimizer.OptPass1FMov(var p: tai): Boolean;
  613. var
  614. hp1: tai;
  615. alloc, dealloc: tai_regalloc;
  616. begin
  617. {
  618. change
  619. fmov reg0,reg1
  620. fmov reg1,reg0
  621. into
  622. fmov reg0,reg1
  623. }
  624. Result := False;
  625. while GetNextInstruction(p, hp1) and
  626. MatchInstruction(hp1, A_FMOV, [taicpu(p).condition], [taicpu(p).oppostfix]) and
  627. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[1]^) and
  628. MatchOperand(taicpu(p).oper[1]^, taicpu(hp1).oper[0]^) do
  629. begin
  630. asml.Remove(hp1);
  631. hp1.free;
  632. DebugMsg(SPeepholeOptimization + 'FMovFMov2FMov 1 done', p);
  633. Result:=true;
  634. end;
  635. { change
  636. fmov reg0,const
  637. fmov reg1,reg0
  638. dealloc reg0
  639. into
  640. fmov reg1,const
  641. }
  642. if MatchOpType(taicpu(p),top_reg,top_realconst) and
  643. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  644. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p, hp1)) and
  645. MatchInstruction(hp1,A_FMOV,[taicpu(p).condition],[taicpu(p).oppostfix]) and
  646. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  647. MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^.reg) and
  648. (not RegModifiedByInstruction(taicpu(p).oper[0]^.reg, hp1)) and
  649. assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next)))
  650. then
  651. begin
  652. DebugMsg('Peephole FMovFMov2FMov 2 done', p);
  653. taicpu(hp1).loadrealconst(1,taicpu(p).oper[1]^.val_real);
  654. alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.Previous));
  655. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next));
  656. if assigned(alloc) and assigned(dealloc) then
  657. begin
  658. asml.Remove(alloc);
  659. alloc.Free;
  660. asml.Remove(dealloc);
  661. dealloc.Free;
  662. end;
  663. { p will be removed, update used register as we continue
  664. with the next instruction after p }
  665. result:=RemoveCurrentP(p);
  666. end;
  667. { not enabled as apparently not happening
  668. if MatchOpType(taicpu(p),top_reg,top_reg) and
  669. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  670. MatchInstruction(hp1, [A_FSUB,A_FADD,A_FNEG,A_FMUL,A_FSQRT,A_FDIV,A_FABS], [PF_None]) and
  671. (MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) or
  672. ((taicpu(hp1).ops=3) and MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[2]^))
  673. ) and
  674. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  675. not(RegUsedBetween(taicpu(p).oper[0]^.reg,p,hp1)) then
  676. begin
  677. DebugMsg(SPeepholeOptimization + 'FMovFOp2FOp done', hp1);
  678. AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,UsedRegs);
  679. if MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) then
  680. taicpu(hp1).oper[1]^.reg:=taicpu(p).oper[1]^.reg;
  681. if (taicpu(hp1).ops=3) and MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[2]^) then
  682. taicpu(hp1).oper[2]^.reg:=taicpu(p).oper[1]^.reg;
  683. RemoveCurrentP(p);
  684. Result:=true;
  685. exit;
  686. end;
  687. }
  688. end;
  689. function TCpuAsmOptimizer.OptPass1SXTW(var p : tai) : Boolean;
  690. var
  691. hp1: tai;
  692. GetNextInstructionUsingReg_hp1: Boolean;
  693. begin
  694. Result:=false;
  695. if GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) then
  696. begin
  697. {
  698. change
  699. sxtw reg2,reg1
  700. str reg2,[...]
  701. dealloc reg2
  702. to
  703. str reg1,[...]
  704. }
  705. if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
  706. (taicpu(p).ops=2) and
  707. MatchInstruction(hp1, A_STR, [C_None], [PF_None]) and
  708. (getsubreg(taicpu(hp1).oper[0]^.reg)=R_SUBD) and
  709. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  710. { the reference in strb might not use reg2 }
  711. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  712. { reg1 might not be modified inbetween }
  713. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  714. begin
  715. DebugMsg('Peephole SXTHStr2Str done', p);
  716. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  717. result:=RemoveCurrentP(p);
  718. end
  719. {
  720. change
  721. sxtw reg2,reg1
  722. sxtw reg3,reg2
  723. dealloc reg2
  724. to
  725. sxtw reg3,reg1
  726. }
  727. else if MatchInstruction(p, A_SXTW, [C_None], [PF_None]) and
  728. (taicpu(p).ops=2) and
  729. MatchInstruction(hp1, A_SXTW, [C_None], [PF_None]) and
  730. (taicpu(hp1).ops=2) and
  731. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  732. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  733. { reg1 might not be modified inbetween }
  734. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  735. begin
  736. DebugMsg('Peephole SxtwSxtw2Sxtw done', p);
  737. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  738. taicpu(hp1).opcode:=A_SXTW;
  739. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  740. result:=RemoveCurrentP(p);
  741. end
  742. else if USxtOp2Op(p,hp1,SM_SXTW) then
  743. Result:=true
  744. else if RemoveSuperfluousMove(p, hp1, 'SxtwMov2Data') then
  745. Result:=true;
  746. end;
  747. end;
  748. function TCpuAsmOptimizer.OptPass1B(var p: tai): boolean;
  749. var
  750. hp1, hp2, hp3, hp4, hp5: tai;
  751. Invert: Boolean;
  752. begin
  753. Result := False;
  754. {
  755. convert
  756. b<c> .L1
  757. movz reg,#1`
  758. b .L2
  759. .L1
  760. movz reg,#0 (or mov reg,xzr)
  761. .L2
  762. into
  763. cset reg,<not(c)>
  764. Also do the same if the constants are reversed, instead converting it to:
  765. cset reg,<c>
  766. }
  767. if (taicpu(p).condition <> C_None) and
  768. (taicpu(p).oper[0]^.typ = top_ref) and
  769. GetNextInstruction(p, hp1) and
  770. { Check individually instead of using MatchInstruction in order to save time }
  771. (hp1.typ = ait_instruction) and
  772. (taicpu(hp1).condition = C_None) and
  773. (taicpu(hp1).oppostfix = PF_None) and
  774. (taicpu(hp1).ops = 2) and
  775. (
  776. (
  777. (taicpu(hp1).opcode = A_MOVZ) and
  778. (taicpu(hp1).oper[1]^.val in [0, 1])
  779. ) or
  780. (
  781. (taicpu(hp1).opcode = A_MOV) and
  782. (getsupreg(taicpu(hp1).oper[1]^.reg) = RS_XZR)
  783. )
  784. ) and
  785. GetNextInstruction(hp1, hp2) and
  786. MatchInstruction(hp2, A_B, [PF_None]) and
  787. (taicpu(hp2).condition = C_None) and
  788. (taicpu(hp2).oper[0]^.typ = top_ref) and
  789. GetNextInstruction(hp2, hp3) and
  790. (hp3.typ = ait_label) and
  791. (tasmlabel(taicpu(p).oper[0]^.ref^.symbol) = tai_label(hp3).labsym) and
  792. GetNextInstruction(hp3, hp4) and
  793. { As before, check individually instead of using MatchInstruction in order to save time }
  794. (hp4.typ = ait_instruction) and
  795. (taicpu(hp4).condition = C_None) and
  796. (taicpu(hp4).oppostfix = PF_None) and
  797. (taicpu(hp4).ops = 2) and
  798. (taicpu(hp4).oper[0]^.reg = taicpu(hp1).oper[0]^.reg) and
  799. (
  800. (
  801. (taicpu(hp4).opcode = A_MOVZ) and
  802. (
  803. (
  804. { Check to confirm the following:
  805. - First mov is either "movz reg,#0" or "mov reg,xzr"
  806. - Second mov is "movz reg,#1"
  807. }
  808. (
  809. (taicpu(hp1).oper[1]^.typ = top_reg) { Will be the zero register } or
  810. (taicpu(hp1).oper[1]^.val = 0)
  811. ) and
  812. (taicpu(hp4).oper[1]^.val = 1)
  813. ) or
  814. (
  815. { Check to confirm the following:
  816. - First mov is "movz reg,#1"
  817. - Second mov is "movz reg,#0"
  818. }
  819. MatchOperand(taicpu(hp1).oper[1]^, 1) and
  820. (taicpu(hp4).oper[1]^.val = 0)
  821. )
  822. )
  823. ) or
  824. (
  825. { Check to confirm the following:
  826. - First mov is "movz reg,#1"
  827. - Second mov is "mov reg,xzr"
  828. }
  829. (taicpu(hp4).opcode = A_MOV) and
  830. (getsupreg(taicpu(hp4).oper[1]^.reg) = RS_XZR) and
  831. MatchOperand(taicpu(hp1).oper[1]^, 1)
  832. )
  833. ) and
  834. GetNextInstruction(hp4, hp5) and
  835. (hp5.typ = ait_label) and
  836. (tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol) = tai_label(hp5).labsym) then
  837. begin
  838. Invert := MatchOperand(taicpu(hp1).oper[1]^, 1); { if true, hp4 will be mov reg,0 in some form }
  839. if Invert then
  840. taicpu(p).condition := inverse_cond(taicpu(p).condition);
  841. tai_label(hp3).labsym.DecRefs;
  842. { If this isn't the only reference to the middle label, we can
  843. still make a saving - only that the first jump and everything
  844. that follows will remain. }
  845. if (tai_label(hp3).labsym.getrefs = 0) then
  846. begin
  847. if Invert then
  848. DebugMsg(SPeepholeOptimization + 'B(c)Movz1BMovz0 -> Cset(~c)',p)
  849. else
  850. DebugMsg(SPeepholeOptimization + 'B(c)Movz0bMovZ1 -> Cset(c)',p);
  851. { remove jump, first label and second MOV (also catching any aligns) }
  852. repeat
  853. if not GetNextInstruction(hp2, hp3) then
  854. InternalError(2022070801);
  855. RemoveInstruction(hp2);
  856. hp2 := hp3;
  857. until hp2 = hp5;
  858. { Don't decrement reference count before the removal loop
  859. above, otherwise GetNextInstruction won't stop on the
  860. the label }
  861. tai_label(hp5).labsym.DecRefs;
  862. end
  863. else
  864. begin
  865. if Invert then
  866. DebugMsg(SPeepholeOptimization + 'B(c)Movz1BMovz0 -> Cset(~c) (partial)',p)
  867. else
  868. DebugMsg(SPeepholeOptimization + 'B(c)Movz0BMovz1 -> Cset(c) (partial)',p);
  869. end;
  870. taicpu(hp1).opcode := A_CSET;
  871. taicpu(hp1).loadconditioncode(1, taicpu(p).condition);
  872. RemoveCurrentP(p, hp1);
  873. Result:=true;
  874. exit;
  875. end;
  876. end;
  877. function TCpuAsmOptimizer.OptPass2B(var p: tai): Boolean;
  878. var
  879. hp1: tai;
  880. CSELTracking: PCSELTracking;
  881. begin
  882. Result := False;
  883. if (taicpu(p).condition <> C_None) and
  884. IsJumpToLabel(taicpu(p)) and
  885. GetNextInstruction(p, hp1) and
  886. (hp1.typ = ait_instruction) and
  887. (taicpu(hp1).opcode = A_MOV) then
  888. begin
  889. { check for
  890. jCC xxx
  891. <several movs>
  892. xxx:
  893. Also spot:
  894. Jcc xxx
  895. <several movs>
  896. jmp xxx
  897. Change to:
  898. <several csets with inverted condition>
  899. jmp xxx (only for the 2nd case)
  900. }
  901. CSELTracking := New(PCSELTracking, Init(Self, p, hp1, TAsmLabel(JumpTargetOp(taicpu(p))^.ref^.symbol)));
  902. if CSELTracking^.State <> tsInvalid then
  903. begin
  904. CSELTracking^.Process(p);
  905. Result := True;
  906. end;
  907. CSELTracking^.Done;
  908. end;
  909. end;
  910. function TCpuAsmOptimizer.OptPass2LDRSTR(var p: tai): boolean;
  911. var
  912. hp1, hp1_last: tai;
  913. ThisRegister: TRegister;
  914. OffsetVal, ValidOffset, MinOffset, MaxOffset: asizeint;
  915. TargetOpcode: TAsmOp;
  916. begin
  917. Result := False;
  918. ThisRegister := taicpu(p).oper[0]^.reg;
  919. case taicpu(p).opcode of
  920. A_LDR:
  921. TargetOpcode := A_LDP;
  922. A_STR:
  923. TargetOpcode := A_STP;
  924. else
  925. InternalError(2020081501);
  926. end;
  927. { reg appearing in ref invalidates these optimisations }
  928. if (TargetOpcode = A_STP) or not RegInRef(ThisRegister, taicpu(p).oper[1]^.ref^) then
  929. begin
  930. { LDP/STP has a smaller permitted offset range than LDR/STR.
  931. TODO: For a group of out-of-range LDR/STR instructions, can
  932. we declare a temporary register equal to the offset base
  933. address, modify the STR instructions to use that register
  934. and then convert them to STP instructions? Note that STR
  935. generally takes 2 cycles (on top of the memory latency),
  936. while LDP/STP takes 3.
  937. }
  938. if (getsubreg(ThisRegister) = R_SUBQ) then
  939. begin
  940. ValidOffset := 8;
  941. MinOffset := -512;
  942. MaxOffset := 504;
  943. end
  944. else
  945. begin
  946. ValidOffset := 4;
  947. MinOffset := -256;
  948. MaxOffset := 252;
  949. end;
  950. hp1_last := p;
  951. { Look for nearby LDR/STR instructions }
  952. if (taicpu(p).oppostfix = PF_NONE) and
  953. (taicpu(p).oper[1]^.ref^.addressmode = AM_OFFSET) then
  954. { If SkipGetNext is True, GextNextInstruction isn't called }
  955. while GetNextInstruction(hp1_last, hp1) do
  956. begin
  957. if (hp1.typ <> ait_instruction) then
  958. Break;
  959. if (taicpu(hp1).opcode = taicpu(p).opcode) then
  960. begin
  961. if (taicpu(hp1).oppostfix = PF_NONE) and
  962. { Registers need to be the same size }
  963. (getsubreg(ThisRegister) = getsubreg(taicpu(hp1).oper[0]^.reg)) and
  964. (
  965. (TargetOpcode = A_STP) or
  966. { LDP x0, x0, [sp, #imm] is undefined behaviour, even
  967. though such an LDR pair should have been optimised
  968. out by now. STP is okay }
  969. (ThisRegister <> taicpu(hp1).oper[0]^.reg)
  970. ) and
  971. (taicpu(hp1).oper[1]^.ref^.addressmode = AM_OFFSET) and
  972. (taicpu(p).oper[1]^.ref^.base = taicpu(hp1).oper[1]^.ref^.base) and
  973. (taicpu(p).oper[1]^.ref^.index = taicpu(hp1).oper[1]^.ref^.index) and
  974. { Make sure the address registers haven't changed }
  975. not RegModifiedBetween(taicpu(hp1).oper[1]^.ref^.base, p, hp1) and
  976. (
  977. (taicpu(hp1).oper[1]^.ref^.index = NR_NO) or
  978. not RegModifiedBetween(taicpu(hp1).oper[1]^.ref^.index, p, hp1)
  979. ) and
  980. { Don't need to check "RegInRef" because the base registers are identical,
  981. and the first one was checked already. [Kit] }
  982. (((TargetOpcode=A_LDP) and not RegUsedBetween(taicpu(hp1).oper[0]^.reg, p, hp1)) or
  983. ((TargetOpcode=A_STP) and not RegModifiedBetween(taicpu(hp1).oper[0]^.reg, p, hp1))) then
  984. begin
  985. { Can we convert these two LDR/STR instructions into a
  986. single LDR/STP? }
  987. OffsetVal := taicpu(hp1).oper[1]^.ref^.offset - taicpu(p).oper[1]^.ref^.offset;
  988. if (OffsetVal = ValidOffset) then
  989. begin
  990. if (taicpu(p).oper[1]^.ref^.offset >= MinOffset) and (taicpu(hp1).oper[1]^.ref^.offset <= MaxOffset) then
  991. begin
  992. { Convert:
  993. LDR/STR reg0, [reg2, #ofs]
  994. ...
  995. LDR/STR reg1. [reg2, #ofs + 8] // 4 if registers are 32-bit
  996. To:
  997. LDP/STP reg0, reg1, [reg2, #ofs]
  998. }
  999. taicpu(p).opcode := TargetOpcode;
  1000. if TargetOpcode = A_STP then
  1001. DebugMsg(SPeepholeOptimization + 'StrStr2Stp', p)
  1002. else
  1003. DebugMsg(SPeepholeOptimization + 'LdrLdr2Ldp', p);
  1004. taicpu(p).ops := 3;
  1005. taicpu(p).loadref(2, taicpu(p).oper[1]^.ref^);
  1006. taicpu(p).loadreg(1, taicpu(hp1).oper[0]^.reg);
  1007. asml.Remove(hp1);
  1008. hp1.Free;
  1009. Result := True;
  1010. Exit;
  1011. end;
  1012. end
  1013. else if (OffsetVal = -ValidOffset) then
  1014. begin
  1015. if (taicpu(hp1).oper[1]^.ref^.offset >= MinOffset) and (taicpu(p).oper[1]^.ref^.offset <= MaxOffset) then
  1016. begin
  1017. { Convert:
  1018. LDR/STR reg0, [reg2, #ofs + 8] // 4 if registers are 32-bit
  1019. ...
  1020. LDR/STR reg1. [reg2, #ofs]
  1021. To:
  1022. LDP/STP reg1, reg0, [reg2, #ofs]
  1023. }
  1024. taicpu(p).opcode := TargetOpcode;
  1025. if TargetOpcode = A_STP then
  1026. DebugMsg(SPeepholeOptimization + 'StrStr2Stp (reverse)', p)
  1027. else
  1028. DebugMsg(SPeepholeOptimization + 'LdrLdr2Ldp (reverse)', p);
  1029. taicpu(p).ops := 3;
  1030. taicpu(p).loadref(2, taicpu(hp1).oper[1]^.ref^);
  1031. taicpu(p).loadreg(1, taicpu(p).oper[0]^.reg);
  1032. taicpu(p).loadreg(0, taicpu(hp1).oper[0]^.reg);
  1033. asml.Remove(hp1);
  1034. hp1.Free;
  1035. Result := True;
  1036. Exit;
  1037. end;
  1038. end;
  1039. end;
  1040. end
  1041. else
  1042. Break;
  1043. { Don't continue looking for LDR/STR pairs if the address register
  1044. gets modified }
  1045. if RegModifiedByInstruction(taicpu(p).oper[1]^.ref^.base, hp1) then
  1046. Break;
  1047. hp1_last := hp1;
  1048. end;
  1049. end;
  1050. end;
  1051. function TCpuAsmOptimizer.PostPeepholeOptAND(var p: tai): Boolean;
  1052. var
  1053. hp1, hp2: tai;
  1054. hp3: taicpu;
  1055. bitval : cardinal;
  1056. begin
  1057. Result:=false;
  1058. {
  1059. and reg1,reg0,<const=power of 2>
  1060. cmp reg1,#0
  1061. <reg1 end of life>
  1062. b.e/b.ne label
  1063. into
  1064. tb(n)z reg0,<power of 2>,label
  1065. }
  1066. if MatchOpType(taicpu(p),top_reg,top_reg,top_const) and
  1067. (PopCnt(QWord(taicpu(p).oper[2]^.val))=1) and
  1068. GetNextInstruction(p,hp1) and
  1069. MatchInstruction(hp1,A_CMP,[PF_None]) and
  1070. MatchOpType(taicpu(hp1),top_reg,top_const) and
  1071. (taicpu(hp1).oper[1]^.val=0) and
  1072. MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[0]^) and
  1073. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) and
  1074. GetNextInstruction(hp1,hp2) and
  1075. MatchInstruction(hp2,A_B,[PF_None]) and
  1076. (taicpu(hp2).condition in [C_EQ,C_NE]) then
  1077. begin
  1078. bitval:=BsfQWord(qword(taicpu(p).oper[2]^.val));
  1079. case taicpu(hp2).condition of
  1080. C_NE:
  1081. hp3:=taicpu.op_reg_const_ref(A_TBNZ,taicpu(p).oper[1]^.reg,bitval,taicpu(hp2).oper[0]^.ref^);
  1082. C_EQ:
  1083. hp3:=taicpu.op_reg_const_ref(A_TBZ,taicpu(p).oper[1]^.reg,bitval,taicpu(hp2).oper[0]^.ref^);
  1084. else
  1085. Internalerror(2021100201);
  1086. end;
  1087. taicpu(hp3).fileinfo:=taicpu(hp1).fileinfo;
  1088. asml.insertbefore(hp3, hp1);
  1089. RemoveInstruction(hp1);
  1090. RemoveInstruction(hp2);
  1091. RemoveCurrentP(p);
  1092. DebugMsg(SPeepholeOptimization + 'AndCmpB.E/NE2Tbnz/Tbz done', p);
  1093. Result:=true;
  1094. end;
  1095. end;
  1096. function TCpuAsmOptimizer.PostPeepholeOptCMP(var p : tai): boolean;
  1097. var
  1098. hp1,hp2: tai;
  1099. begin
  1100. Result:=false;
  1101. {
  1102. cmp reg0,#0
  1103. b.e/b.ne label
  1104. into
  1105. cb(n)z reg0,label
  1106. }
  1107. if MatchOpType(taicpu(p),top_reg,top_const) and
  1108. (taicpu(p).oper[0]^.reg<>NR_SP) and
  1109. (taicpu(p).oper[1]^.val=0) and
  1110. GetNextInstruction(p,hp1) and
  1111. MatchInstruction(hp1,A_B,[PF_None]) and
  1112. (taicpu(hp1).condition in [C_EQ,C_NE]) then
  1113. begin
  1114. case taicpu(hp1).condition of
  1115. C_NE:
  1116. hp2:=taicpu.op_reg_sym_ofs(A_CBNZ,taicpu(p).oper[0]^.reg,taicpu(hp1).oper[0]^.ref^.symbol,taicpu(hp1).oper[0]^.ref^.offset);
  1117. C_EQ:
  1118. hp2:=taicpu.op_reg_sym_ofs(A_CBZ,taicpu(p).oper[0]^.reg,taicpu(hp1).oper[0]^.ref^.symbol,taicpu(hp1).oper[0]^.ref^.offset);
  1119. else
  1120. Internalerror(2019090801);
  1121. end;
  1122. taicpu(hp2).fileinfo:=taicpu(hp1).fileinfo;
  1123. asml.insertbefore(hp2, hp1);
  1124. asml.remove(p);
  1125. asml.remove(hp1);
  1126. p.free;
  1127. hp1.free;
  1128. p:=hp2;
  1129. DebugMsg(SPeepholeOptimization + 'CMPB.E/NE2CBNZ/CBZ done', p);
  1130. Result:=true;
  1131. end;
  1132. end;
  1133. function TCpuAsmOptimizer.PostPeepholeOptTST(var p : tai): boolean;
  1134. var
  1135. hp1: tai;
  1136. hp3: taicpu;
  1137. bitval : cardinal;
  1138. begin
  1139. Result:=false;
  1140. {
  1141. tst reg1,<const=power of 2>
  1142. b.e/b.ne label
  1143. into
  1144. tb(n)z reg0,<power of 2>,label
  1145. }
  1146. if MatchOpType(taicpu(p),top_reg,top_const) and
  1147. (PopCnt(QWord(taicpu(p).oper[1]^.val))=1) and
  1148. GetNextInstruction(p,hp1) and
  1149. MatchInstruction(hp1,A_B,[C_EQ,C_NE],[PF_None]) then
  1150. begin
  1151. bitval:=BsfQWord(qword(taicpu(p).oper[1]^.val));
  1152. case taicpu(hp1).condition of
  1153. C_NE:
  1154. hp3:=taicpu.op_reg_const_ref(A_TBNZ,taicpu(p).oper[0]^.reg,bitval,taicpu(hp1).oper[0]^.ref^);
  1155. C_EQ:
  1156. hp3:=taicpu.op_reg_const_ref(A_TBZ,taicpu(p).oper[0]^.reg,bitval,taicpu(hp1).oper[0]^.ref^);
  1157. else
  1158. Internalerror(2021100210);
  1159. end;
  1160. taicpu(hp3).fileinfo:=taicpu(p).fileinfo;
  1161. asml.insertafter(hp3, p);
  1162. RemoveInstruction(hp1);
  1163. RemoveCurrentP(p, hp3);
  1164. DebugMsg(SPeepholeOptimization + 'TST; B(E/NE) -> TB(Z/NZ) done', p);
  1165. Result:=true;
  1166. end;
  1167. end;
  1168. function TCpuAsmOptimizer.PrePeepHoleOptsCpu(var p: tai): boolean;
  1169. begin
  1170. result := false;
  1171. if p.typ=ait_instruction then
  1172. begin
  1173. case taicpu(p).opcode of
  1174. A_SBFX,
  1175. A_UBFX:
  1176. Result:=OptPreSBFXUBFX(p);
  1177. else
  1178. ;
  1179. end;
  1180. end;
  1181. end;
  1182. function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
  1183. begin
  1184. result := false;
  1185. if p.typ=ait_instruction then
  1186. begin
  1187. case taicpu(p).opcode of
  1188. A_B:
  1189. Result:=OptPass1B(p);
  1190. A_LDR:
  1191. Result:=OptPass1LDR(p);
  1192. A_STR:
  1193. Result:=OptPass1STR(p);
  1194. A_MOV:
  1195. Result:=OptPass1Mov(p);
  1196. A_MOVZ:
  1197. Result:=OptPass1MOVZ(p);
  1198. A_STP:
  1199. Result:=OptPass1STP(p);
  1200. A_LSR,
  1201. A_ROR,
  1202. A_ASR,
  1203. A_LSL:
  1204. Result:=OptPass1Shift(p);
  1205. A_AND:
  1206. Result:=OptPass1And(p);
  1207. A_NEG,
  1208. A_CSEL,
  1209. A_ADD,
  1210. A_ADC,
  1211. A_SUB,
  1212. A_SBC,
  1213. A_BIC,
  1214. A_EOR,
  1215. A_ORR,
  1216. A_MUL:
  1217. Result:=OptPass1Data(p);
  1218. A_UXTB:
  1219. Result:=OptPass1UXTB(p);
  1220. A_UXTH:
  1221. Result:=OptPass1UXTH(p);
  1222. A_SXTB:
  1223. Result:=OptPass1SXTB(p);
  1224. A_SXTH:
  1225. Result:=OptPass1SXTH(p);
  1226. A_SXTW:
  1227. Result:=OptPass1SXTW(p);
  1228. // A_VLDR,
  1229. A_FMADD,
  1230. A_FMSUB,
  1231. A_FNMADD,
  1232. A_FNMSUB,
  1233. A_FNMUL,
  1234. A_FADD,
  1235. A_FMUL,
  1236. A_FDIV,
  1237. A_FSUB,
  1238. A_FSQRT,
  1239. A_FNEG,
  1240. A_FCVT,
  1241. A_FABS:
  1242. Result:=OptPass1FData(p);
  1243. A_FMOV:
  1244. Result:=OptPass1FMov(p);
  1245. else
  1246. ;
  1247. end;
  1248. end;
  1249. end;
  1250. function TCpuAsmOptimizer.PeepHoleOptPass2Cpu(var p: tai): boolean;
  1251. begin
  1252. result := false;
  1253. if p.typ=ait_instruction then
  1254. begin
  1255. case taicpu(p).opcode of
  1256. A_AND:
  1257. Result := OptPass2AND(p);
  1258. A_B:
  1259. Result:=OptPass2B(p);
  1260. A_LDR,
  1261. A_STR:
  1262. Result:=OptPass2LDRSTR(p);
  1263. A_TST:
  1264. Result := OptPass2TST(p);
  1265. else
  1266. ;
  1267. end;
  1268. end;
  1269. end;
  1270. function TCpuAsmOptimizer.PostPeepHoleOptsCpu(var p: tai): boolean;
  1271. begin
  1272. result := false;
  1273. if p.typ=ait_instruction then
  1274. begin
  1275. case taicpu(p).opcode of
  1276. A_CMP:
  1277. Result:=PostPeepholeOptCMP(p);
  1278. A_AND:
  1279. Result:=PostPeepholeOptAND(p);
  1280. A_TST:
  1281. Result:=PostPeepholeOptTST(p);
  1282. else
  1283. ;
  1284. end;
  1285. end;
  1286. end;
  1287. class procedure TCpuAsmOptimizer.UpdateIntRegsNoDealloc(var AUsedRegs: TAllUsedRegs; p: Tai);
  1288. begin
  1289. { Update integer registers, ignoring deallocations }
  1290. repeat
  1291. while assigned(p) and
  1292. ((p.typ in (SkipInstr - [ait_RegAlloc])) or
  1293. (p.typ = ait_label) or
  1294. ((p.typ = ait_marker) and
  1295. (tai_Marker(p).Kind in [mark_AsmBlockEnd,mark_NoLineInfoStart,mark_NoLineInfoEnd]))) do
  1296. p := tai(p.next);
  1297. while assigned(p) and
  1298. (p.typ=ait_RegAlloc) Do
  1299. begin
  1300. if (getregtype(tai_regalloc(p).reg) = R_INTREGISTER) then
  1301. begin
  1302. case tai_regalloc(p).ratype of
  1303. ra_alloc :
  1304. IncludeRegInUsedRegs(tai_regalloc(p).reg, AUsedRegs);
  1305. else
  1306. ;
  1307. end;
  1308. end;
  1309. p := tai(p.next);
  1310. end;
  1311. until not(assigned(p)) or
  1312. (not(p.typ in SkipInstr) and
  1313. not((p.typ = ait_label) and
  1314. labelCanBeSkipped(tai_label(p))));
  1315. end;
  1316. { Attempts to allocate a volatile integer register for use between p and hp,
  1317. using AUsedRegs for the current register usage information. Returns NR_NO
  1318. if no free register could be found }
  1319. function TCpuAsmOptimizer.GetIntRegisterBetween(RegSize: TSubRegister; var AUsedRegs: TAllUsedRegs; p, hp: tai; DontAlloc: Boolean = False): TRegister;
  1320. var
  1321. RegSet: TCPURegisterSet;
  1322. CurrentSuperReg: Integer;
  1323. CurrentReg: TRegister;
  1324. Currentp: tai;
  1325. Breakout: Boolean;
  1326. begin
  1327. Result := NR_NO;
  1328. RegSet :=
  1329. paramanager.get_volatile_registers_int(current_procinfo.procdef.proccalloption) +
  1330. current_procinfo.saved_regs_int;
  1331. (*
  1332. { Don't use the frame register unless explicitly allowed (fixes i40111) }
  1333. if ([cs_useebp, cs_userbp] * current_settings.optimizerswitches) = [] then
  1334. Exclude(RegSet, RS_FRAME_POINTER_REG);
  1335. *)
  1336. for CurrentSuperReg in RegSet do
  1337. begin
  1338. CurrentReg := newreg(R_INTREGISTER, TSuperRegister(CurrentSuperReg), RegSize);
  1339. if not AUsedRegs[R_INTREGISTER].IsUsed(CurrentReg)
  1340. then
  1341. begin
  1342. Currentp := p;
  1343. Breakout := False;
  1344. while not Breakout and GetNextInstruction(Currentp, Currentp) and (Currentp <> hp) do
  1345. begin
  1346. case Currentp.typ of
  1347. ait_instruction:
  1348. begin
  1349. if RegInInstruction(CurrentReg, Currentp) then
  1350. begin
  1351. Breakout := True;
  1352. Break;
  1353. end;
  1354. { Cannot allocate across an unconditional jump }
  1355. if is_calljmpmaybeuncondret(taicpu(Currentp).opcode) and (taicpu(Currentp).condition = C_None) then
  1356. Exit;
  1357. end;
  1358. ait_marker:
  1359. { Don't try anything more if a marker is hit }
  1360. Exit;
  1361. ait_regalloc:
  1362. if (tai_regalloc(Currentp).ratype <> ra_dealloc) and SuperRegistersEqual(CurrentReg, tai_regalloc(Currentp).reg) then
  1363. begin
  1364. Breakout := True;
  1365. Break;
  1366. end;
  1367. else
  1368. ;
  1369. end;
  1370. end;
  1371. if Breakout then
  1372. { Try the next register }
  1373. Continue;
  1374. { We have a free register available }
  1375. Result := CurrentReg;
  1376. if not DontAlloc then
  1377. AllocRegBetween(CurrentReg, p, hp, AUsedRegs);
  1378. Exit;
  1379. end;
  1380. end;
  1381. end;
  1382. function TCSELTracking.InitialiseBlock(BlockStart, OneBeforeBlock: tai; out BlockStop: tai; out EndJump: tai): Boolean;
  1383. begin
  1384. Result := False;
  1385. EndJump := nil;
  1386. BlockStop := nil;
  1387. while (BlockStart <> fOptimizer.BlockEnd) and
  1388. { stop on labels }
  1389. (BlockStart.typ <> ait_label) do
  1390. begin
  1391. { Keep track of all integer registers that are used }
  1392. fOptimizer.UpdateIntRegsNoDealloc(RegisterTracking, tai(OneBeforeBlock.Next));
  1393. if BlockStart.typ = ait_instruction then
  1394. begin
  1395. if MatchInstruction(BlockStart, A_B, [C_None], []) then
  1396. begin
  1397. if not IsJumpToLabel(taicpu(BlockStart)) or
  1398. (JumpTargetOp(taicpu(BlockStart))^.ref^.index <> NR_NO) then
  1399. Exit;
  1400. EndJump := BlockStart;
  1401. Break;
  1402. end
  1403. { Check to see if we have a valid MOV instruction instead }
  1404. else if (taicpu(BlockStart).opcode <> A_MOV) or
  1405. { Can't include the stack pointer in CSEL }
  1406. fOptimizer.RegInInstruction(NR_SP, BlockStart) then
  1407. begin
  1408. Exit;
  1409. end
  1410. else
  1411. { This will be a valid MOV }
  1412. fAllocationRange := BlockStart;
  1413. end;
  1414. OneBeforeBlock := BlockStart;
  1415. fOptimizer.GetNextInstruction(BlockStart, BlockStart);
  1416. end;
  1417. if (BlockStart = fOptimizer.BlockEnd) then
  1418. Exit;
  1419. BlockStop := BlockStart;
  1420. Result := True;
  1421. end;
  1422. function TCSELTracking.AnalyseMOVBlock(BlockStart, BlockStop, SearchStart: tai): LongInt;
  1423. var
  1424. hp1: tai;
  1425. RefModified: Boolean;
  1426. begin
  1427. Result := 0;
  1428. hp1 := BlockStart;
  1429. RefModified := False; { As long as the condition is inverted, this can be reset }
  1430. while assigned(hp1) and
  1431. (hp1 <> BlockStop) do
  1432. begin
  1433. case hp1.typ of
  1434. ait_instruction:
  1435. if MatchInstruction(hp1, A_MOV, []) then
  1436. begin
  1437. Inc(Result);
  1438. if taicpu(hp1).oper[1]^.typ = top_reg then
  1439. begin
  1440. Inc(Result);
  1441. end
  1442. else if not (cs_opt_size in current_settings.optimizerswitches) and
  1443. { CSEL with constants grows the code size }
  1444. TryCSELConst(hp1, SearchStart, BlockStop, Result) then
  1445. begin
  1446. { Register was reserved by TryCSELConst and
  1447. stored on ConstRegs }
  1448. end
  1449. else
  1450. begin
  1451. Result := -1;
  1452. Exit;
  1453. end;
  1454. end
  1455. else
  1456. begin
  1457. Result := -1;
  1458. Exit;
  1459. end;
  1460. else
  1461. { Most likely an align };
  1462. end;
  1463. fOptimizer.GetNextInstruction(hp1, hp1);
  1464. end;
  1465. end;
  1466. constructor TCSELTracking.Init(Optimizer: TCpuAsmOptimizer; var p_initialjump, p_initialmov: tai; var AFirstLabel: TAsmLabel);
  1467. { For the tsBranching type, increase the weighting score to account for the new conditional jump
  1468. (this is done as a separate stage because the double types are extensions of the branching type,
  1469. but we can't discount the conditional jump until the last step) }
  1470. procedure EvaluateBranchingType;
  1471. begin
  1472. Inc(CSELScore);
  1473. if (CSELScore > MAX_CSEL_INSTRUCTIONS) then
  1474. { Too many instructions to be worthwhile }
  1475. fState := tsInvalid;
  1476. end;
  1477. var
  1478. hp1: tai;
  1479. Count: Integer;
  1480. begin
  1481. { Table of valid CSEL block types
  1482. Block type 2nd Jump Mid-label 2nd MOVs 3rd Jump End-label
  1483. ---------- --------- --------- --------- --------- ---------
  1484. tsSimple X Yes X X X
  1485. tsDetour = 1st X X X X
  1486. tsBranching <> Mid Yes X X X
  1487. tsDouble End-label Yes * Yes X Yes
  1488. tsDoubleBranchSame <> Mid Yes * Yes = 2nd X
  1489. tsDoubleBranchDifferent <> Mid Yes * Yes <> 2nd X
  1490. tsDoubleSecondBranching End-label Yes * Yes <> 2nd Yes
  1491. * Only one reference allowed
  1492. }
  1493. hp1 := nil; { To prevent compiler warnings }
  1494. Optimizer.CopyUsedRegs(RegisterTracking);
  1495. fOptimizer := Optimizer;
  1496. fLabel := AFirstLabel;
  1497. CSELScore := 0;
  1498. ConstCount := 0;
  1499. { Initialise RegWrites, ConstRegs, ConstVals, ConstSizes, ConstWriteSizes and ConstMovs }
  1500. FillChar(RegWrites[0], MAX_CSEL_INSTRUCTIONS * 2 * SizeOf(TRegister), 0);
  1501. FillChar(ConstRegs[0], MAX_CSEL_REGISTERS * SizeOf(TRegister), 0);
  1502. FillChar(ConstVals[0], MAX_CSEL_REGISTERS * SizeOf(TCGInt), 0);
  1503. FillChar(ConstSizes[0], MAX_CSEL_REGISTERS * SizeOf(TSubRegister), 0);
  1504. FillChar(ConstWriteSizes[0], first_int_imreg * SizeOf(TOpSize), 0);
  1505. FillChar(ConstMovs[0], MAX_CSEL_REGISTERS * SizeOf(taicpu), 0);
  1506. fInsertionPoint := p_initialjump;
  1507. fCondition := nil;
  1508. fInitialJump := p_initialjump;
  1509. fFirstMovBlock := p_initialmov;
  1510. fFirstMovBlockStop := nil;
  1511. fSecondJump := nil;
  1512. fSecondMovBlock := nil;
  1513. fSecondMovBlockStop := nil;
  1514. fMidLabel := nil;
  1515. fSecondJump := nil;
  1516. fSecondMovBlock := nil;
  1517. fEndLabel := nil;
  1518. fAllocationRange := nil;
  1519. { Assume it all goes horribly wrong! }
  1520. fState := tsInvalid;
  1521. { Look backwards at the comparisons to get an accurate picture of register usage and a better position for any MOV const,reg insertions }
  1522. if Optimizer.GetLastInstruction(p_initialjump, fCondition) and
  1523. (
  1524. MatchInstruction(fCondition, [A_CMP, A_CMN, A_TST], []) or
  1525. (
  1526. (fCondition.typ = ait_instruction) and
  1527. (taicpu(fCondition).opcode = A_AND) and
  1528. (taicpu(fCondition).oppostfix = PF_S)
  1529. )
  1530. ) then
  1531. begin
  1532. { Mark all the registers in the comparison as 'in use', even if they've just been deallocated }
  1533. for Count := 0 to taicpu(fCondition).ops - 1 do
  1534. with taicpu(fCondition).oper[Count]^ do
  1535. case typ of
  1536. top_reg:
  1537. if getregtype(reg) = R_INTREGISTER then
  1538. Optimizer.IncludeRegInUsedRegs(reg, RegisterTracking);
  1539. top_ref:
  1540. begin
  1541. if
  1542. (ref^.base <> NR_NO) then
  1543. Optimizer.IncludeRegInUsedRegs(ref^.base, RegisterTracking);
  1544. if (ref^.index <> NR_NO) then
  1545. Optimizer.IncludeRegInUsedRegs(ref^.index, RegisterTracking);
  1546. end
  1547. else
  1548. ;
  1549. end;
  1550. { When inserting instructions before hp_prev, try to insert them
  1551. before the allocation of the FLAGS register }
  1552. if not SetAndTest(Optimizer.FindRegAllocBackward(NR_DEFAULTFLAGS, tai(fCondition.Previous)), fInsertionPoint) or
  1553. (tai_regalloc(fInsertionPoint).ratype = ra_dealloc) then
  1554. { If not found, set it equal to the condition so it's something sensible }
  1555. fInsertionPoint := fCondition;
  1556. end
  1557. else
  1558. fCondition := nil;
  1559. { When inserting instructions, try to insert them before the allocation of the FLAGS register }
  1560. if SetAndTest(Optimizer.FindRegAllocBackward(NR_DEFAULTFLAGS, tai(p_initialjump.Previous)), hp1) and
  1561. (tai_regalloc(hp1).ratype <> ra_dealloc) then
  1562. { If not found, set it equal to p so it's something sensible }
  1563. fInsertionPoint := hp1;
  1564. hp1 := p_initialmov;
  1565. if not InitialiseBlock(p_initialmov, p_initialjump, fFirstMovBlockStop, fSecondJump) then
  1566. Exit;
  1567. hp1 := fFirstMovBlockStop; { Will either be on a label or a jump }
  1568. if (hp1.typ <> ait_label) then { should be on a jump }
  1569. begin
  1570. if not Optimizer.GetNextInstruction(hp1, fMidLabel) or (fMidLabel.typ <> ait_label) then
  1571. { Need a label afterwards }
  1572. Exit;
  1573. end
  1574. else
  1575. fMidLabel := hp1;
  1576. if tai_label(fMidLabel).labsym <> AFirstLabel then
  1577. { Not the correct label }
  1578. fMidLabel := nil;
  1579. if not Assigned(fSecondJump) and not Assigned(fMidLabel) then
  1580. { If there's neither a 2nd jump nor correct label, then it's invalid
  1581. (see above table) }
  1582. Exit;
  1583. { Analyse the first block of MOVs more closely }
  1584. CSELScore := AnalyseMOVBlock(fFirstMovBlock, fFirstMovBlockStop, fInsertionPoint);
  1585. if Assigned(fSecondJump) then
  1586. begin
  1587. if (JumpTargetOp(taicpu(fSecondJump))^.ref^.symbol = AFirstLabel) then
  1588. begin
  1589. fState := tsDetour
  1590. end
  1591. else
  1592. begin
  1593. { Need the correct mid-label for this one }
  1594. if not Assigned(fMidLabel) then
  1595. Exit;
  1596. fState := tsBranching;
  1597. end;
  1598. end
  1599. else
  1600. { No jump. but mid-label is present }
  1601. fState := tsSimple;
  1602. if (CSELScore > MAX_CSEL_INSTRUCTIONS) or (CSELScore <= 0) then
  1603. begin
  1604. { Invalid or too many instructions to be worthwhile }
  1605. fState := tsInvalid;
  1606. Exit;
  1607. end;
  1608. { check further for
  1609. b xxx
  1610. <several movs 1>
  1611. bl yyy
  1612. xxx:
  1613. <several movs 2>
  1614. yyy:
  1615. etc.
  1616. }
  1617. if (fState = tsBranching) and
  1618. { Estimate for required savings for extra jump }
  1619. (CSELScore <= MAX_CSEL_INSTRUCTIONS - 1) and
  1620. { Only one reference is allowed for double blocks }
  1621. (AFirstLabel.getrefs = 1) then
  1622. begin
  1623. Optimizer.GetNextInstruction(fMidLabel, hp1);
  1624. fSecondMovBlock := hp1;
  1625. if not InitialiseBlock(fSecondMovBlock, fMidLabel, fSecondMovBlockStop, fThirdJump) then
  1626. begin
  1627. EvaluateBranchingType;
  1628. Exit;
  1629. end;
  1630. hp1 := fSecondMovBlockStop; { Will either be on a label or a jump }
  1631. if (hp1.typ <> ait_label) then { should be on a jump }
  1632. begin
  1633. if not Optimizer.GetNextInstruction(hp1, fEndLabel) or (fEndLabel.typ <> ait_label) then
  1634. begin
  1635. { Need a label afterwards }
  1636. EvaluateBranchingType;
  1637. Exit;
  1638. end;
  1639. end
  1640. else
  1641. fEndLabel := hp1;
  1642. if tai_label(fEndLabel).labsym <> JumpTargetOp(taicpu(fSecondJump))^.ref^.symbol then
  1643. { Second jump doesn't go to the end }
  1644. fEndLabel := nil;
  1645. if not Assigned(fThirdJump) and not Assigned(fEndLabel) then
  1646. begin
  1647. { If there's neither a 3rd jump nor correct end label, then it's
  1648. not a invalid double block, but is a valid single branching
  1649. block (see above table) }
  1650. EvaluateBranchingType;
  1651. Exit;
  1652. end;
  1653. Count := AnalyseMOVBlock(fSecondMovBlock, fSecondMovBlockStop, fMidLabel);
  1654. if (Count > MAX_CSEL_INSTRUCTIONS) or (Count <= 0) then
  1655. { Invalid or too many instructions to be worthwhile }
  1656. Exit;
  1657. Inc(CSELScore, Count);
  1658. if Assigned(fThirdJump) then
  1659. begin
  1660. if not Assigned(fSecondJump) then
  1661. fState := tsDoubleSecondBranching
  1662. else if (JumpTargetOp(taicpu(fSecondJump))^.ref^.symbol = JumpTargetOp(taicpu(fThirdJump))^.ref^.symbol) then
  1663. fState := tsDoubleBranchSame
  1664. else
  1665. fState := tsDoubleBranchDifferent;
  1666. end
  1667. else
  1668. fState := tsDouble;
  1669. end;
  1670. if fState = tsBranching then
  1671. EvaluateBranchingType;
  1672. end;
  1673. { Tries to convert a mov const,%reg instruction into a CSEL by reserving a
  1674. new register to store the constant }
  1675. function TCSELTracking.TryCSELConst(p, start, stop: tai; var Count: LongInt): Boolean;
  1676. var
  1677. RegSize: TSubRegister;
  1678. CurrentVal: TCGInt;
  1679. ANewReg: TRegister;
  1680. X: ShortInt;
  1681. begin
  1682. Result := False;
  1683. if not MatchOpType(taicpu(p), top_reg, top_const) then
  1684. Exit;
  1685. if ConstCount >= MAX_CSEL_REGISTERS then
  1686. { Arrays are full }
  1687. Exit;
  1688. { See if the value has already been reserved for another CSEL instruction }
  1689. CurrentVal := taicpu(p).oper[1]^.val;
  1690. RegSize := getsubreg(taicpu(p).oper[0]^.reg);
  1691. for X := 0 to ConstCount - 1 do
  1692. if ConstVals[X] = CurrentVal then
  1693. begin
  1694. ConstRegs[ConstCount] := ConstRegs[X];
  1695. ConstSizes[ConstCount] := RegSize;
  1696. ConstVals[ConstCount] := CurrentVal;
  1697. Inc(ConstCount);
  1698. Inc(Count);
  1699. Result := True;
  1700. Exit;
  1701. end;
  1702. ANewReg := fOptimizer.GetIntRegisterBetween(R_SUBWHOLE, RegisterTracking, start, stop, True);
  1703. if ANewReg = NR_NO then
  1704. { No free registers }
  1705. Exit;
  1706. { Reserve the register so subsequent TryCSELConst calls don't all end
  1707. up vying for the same register }
  1708. fOptimizer.IncludeRegInUsedRegs(ANewReg, RegisterTracking);
  1709. ConstRegs[ConstCount] := ANewReg;
  1710. ConstSizes[ConstCount] := RegSize;
  1711. ConstVals[ConstCount] := CurrentVal;
  1712. Inc(ConstCount);
  1713. Inc(Count);
  1714. Result := True;
  1715. end;
  1716. destructor TCSELTracking.Done;
  1717. begin
  1718. TAOptObj.ReleaseUsedRegs(RegisterTracking);
  1719. end;
  1720. procedure TCSELTracking.Process(out new_p: tai);
  1721. var
  1722. Count, Writes: LongInt;
  1723. RegMatch: Boolean;
  1724. hp1, hp_new: tai;
  1725. inverted_condition, condition: TAsmCond;
  1726. begin
  1727. if (fState in [tsInvalid, tsProcessed]) then
  1728. InternalError(2023110702);
  1729. { Repurpose RegisterTracking to mark registers that we've defined }
  1730. RegisterTracking[R_INTREGISTER].Clear;
  1731. Count := 0;
  1732. Writes := 0;
  1733. condition := taicpu(fInitialJump).condition;
  1734. inverted_condition := inverse_cond(condition);
  1735. { Exclude tsDoubleBranchDifferent from this check, as the second block
  1736. doesn't get CSELs in this case }
  1737. if (fState in [tsDouble, tsDoubleBranchSame, tsDoubleSecondBranching]) then
  1738. begin
  1739. { Include the jump in the flag tracking }
  1740. if Assigned(fThirdJump) then
  1741. begin
  1742. if (fState = tsDoubleBranchSame) then
  1743. begin
  1744. { Will be an unconditional jump, so track to the instruction before it }
  1745. if not fOptimizer.GetLastInstruction(fThirdJump, hp1) then
  1746. InternalError(2023110712);
  1747. end
  1748. else
  1749. hp1 := fThirdJump;
  1750. end
  1751. else
  1752. hp1 := fSecondMovBlockStop;
  1753. end
  1754. else
  1755. begin
  1756. { Include a conditional jump in the flag tracking }
  1757. if Assigned(fSecondJump) then
  1758. begin
  1759. if (fState = tsDetour) then
  1760. begin
  1761. { Will be an unconditional jump, so track to the instruction before it }
  1762. if not fOptimizer.GetLastInstruction(fSecondJump, hp1) then
  1763. InternalError(2023110713);
  1764. end
  1765. else
  1766. hp1 := fSecondJump;
  1767. end
  1768. else
  1769. hp1 := fFirstMovBlockStop;
  1770. end;
  1771. fOptimizer.AllocRegBetween(NR_DEFAULTFLAGS, fInitialJump, hp1, fOptimizer.UsedRegs);
  1772. { Process the second set of MOVs first, because if a destination
  1773. register is shared between the first and second MOV sets, it is more
  1774. efficient to turn the first one into a MOV instruction and place it
  1775. before the CMP if possible, but we won't know which registers are
  1776. shared until we've processed at least one list, so we might as well
  1777. make it the second one since that won't be modified again. }
  1778. if (fState in [tsDouble, tsDoubleBranchSame, tsDoubleBranchDifferent, tsDoubleSecondBranching]) then
  1779. begin
  1780. hp1 := fSecondMovBlock;
  1781. repeat
  1782. if not Assigned(hp1) then
  1783. InternalError(2018062902);
  1784. if (hp1.typ = ait_instruction) then
  1785. begin
  1786. { Extra safeguard }
  1787. if (taicpu(hp1).opcode <> A_MOV) then
  1788. InternalError(2018062903);
  1789. { Note: tsDoubleBranchDifferent is essentially identical to
  1790. tsBranching and the 2nd block is best left largely
  1791. untouched, but we need to evaluate which registers the MOVs
  1792. write to in order to track what would be complementary CSEL
  1793. pairs that can be further optimised. [Kit] }
  1794. if fState <> tsDoubleBranchDifferent then
  1795. begin
  1796. if taicpu(hp1).oper[1]^.typ = top_const then
  1797. begin
  1798. RegMatch := False;
  1799. for Count := 0 to ConstCount - 1 do
  1800. if (ConstVals[Count] = taicpu(hp1).oper[1]^.val) and
  1801. (getsubreg(taicpu(hp1).oper[0]^.reg) = ConstSizes[Count]) then
  1802. begin
  1803. RegMatch := True;
  1804. { If it's in RegisterTracking, then this register
  1805. is being used more than once and hence has
  1806. already had its value defined (it gets added to
  1807. UsedRegs through AllocRegBetween below) }
  1808. if not RegisterTracking[R_INTREGISTER].IsUsed(ConstRegs[Count]) then
  1809. begin
  1810. hp_new := tai(hp1.getcopy);
  1811. taicpu(hp_new).oper[0]^.reg := ConstRegs[Count];
  1812. taicpu(hp_new).fileinfo := taicpu(fInitialJump).fileinfo;
  1813. fOptimizer.asml.InsertBefore(hp_new, fInsertionPoint);
  1814. fOptimizer.IncludeRegInUsedRegs(ConstRegs[Count], RegisterTracking);
  1815. ConstMovs[Count] := hp_new;
  1816. end
  1817. else
  1818. { We just need an instruction between hp_prev and hp1
  1819. where we know the register is marked as in use }
  1820. hp_new := fSecondMovBlock;
  1821. { Keep track of largest write for this register so it can be optimised later }
  1822. if (getsubreg(taicpu(hp1).oper[0]^.reg) > ConstWriteSizes[getsupreg(ConstRegs[Count])]) then
  1823. ConstWriteSizes[getsupreg(ConstRegs[Count])] := getsubreg(taicpu(hp1).oper[0]^.reg);
  1824. fOptimizer.AllocRegBetween(ConstRegs[Count], hp_new, hp1, fOptimizer.UsedRegs);
  1825. taicpu(hp1).loadreg(1, newreg(R_INTREGISTER, getsupreg(ConstRegs[Count]), ConstSizes[Count]));
  1826. Break;
  1827. end;
  1828. if not RegMatch then
  1829. InternalError(2021100413);
  1830. end;
  1831. taicpu(hp1).opcode := A_CSEL;
  1832. taicpu(hp1).ops := 4;
  1833. taicpu(hp1).loadreg(2, taicpu(hp1).oper[0]^.reg);
  1834. taicpu(hp1).loadconditioncode(3, condition);
  1835. end;
  1836. { Store these writes to search for duplicates later on }
  1837. RegWrites[Writes] := taicpu(hp1).oper[0]^.reg;
  1838. Inc(Writes);
  1839. end;
  1840. fOptimizer.GetNextInstruction(hp1, hp1);
  1841. until (hp1 = fSecondMovBlockStop);
  1842. end;
  1843. { Now do the first set of MOVs }
  1844. hp1 := fFirstMovBlock;
  1845. repeat
  1846. if not Assigned(hp1) then
  1847. InternalError(2018062904);
  1848. if (hp1.typ = ait_instruction) then
  1849. begin
  1850. RegMatch := False;
  1851. { Extra safeguard }
  1852. if (taicpu(hp1).opcode <> A_MOV) then
  1853. InternalError(2018062905);
  1854. { Search through the RegWrites list to see if there are any
  1855. opposing CSEL pairs that write to the same register }
  1856. for Count := 0 to Writes - 1 do
  1857. if (RegWrites[Count] = taicpu(hp1).oper[0]^.reg) then
  1858. begin
  1859. { We have a match. Keep this as a MOV }
  1860. { Move ahead in preparation }
  1861. fOptimizer.GetNextInstruction(hp1, hp1);
  1862. RegMatch := True;
  1863. Break;
  1864. end;
  1865. if RegMatch then
  1866. Continue;
  1867. if taicpu(hp1).oper[1]^.typ = top_const then
  1868. begin
  1869. for Count := 0 to ConstCount - 1 do
  1870. if (ConstVals[Count] = taicpu(hp1).oper[1]^.val) and
  1871. (getsubreg(taicpu(hp1).oper[0]^.reg) = ConstSizes[Count]) then
  1872. begin
  1873. RegMatch := True;
  1874. { If it's in RegisterTracking, then this register is
  1875. being used more than once and hence has already had
  1876. its value defined (it gets added to UsedRegs through
  1877. AllocRegBetween below) }
  1878. if not RegisterTracking[R_INTREGISTER].IsUsed(ConstRegs[Count]) then
  1879. begin
  1880. hp_new := tai(hp1.getcopy);
  1881. taicpu(hp_new).oper[0]^.reg := ConstRegs[Count];
  1882. taicpu(hp_new).fileinfo := taicpu(fInitialJump).fileinfo;
  1883. fOptimizer.asml.InsertBefore(hp_new, fInsertionPoint);
  1884. fOptimizer.IncludeRegInUsedRegs(ConstRegs[Count], RegisterTracking);
  1885. ConstMovs[Count] := hp_new;
  1886. end
  1887. else
  1888. { We just need an instruction between hp_prev and hp1
  1889. where we know the register is marked as in use }
  1890. hp_new := fFirstMovBlock;
  1891. { Keep track of largest write for this register so it can be optimised later }
  1892. if (getsubreg(taicpu(hp1).oper[0]^.reg) > ConstWriteSizes[getsupreg(ConstRegs[Count])]) then
  1893. ConstWriteSizes[getsupreg(ConstRegs[Count])] := getsubreg(taicpu(hp1).oper[0]^.reg);
  1894. fOptimizer.AllocRegBetween(ConstRegs[Count], hp_new, hp1, fOptimizer.UsedRegs);
  1895. taicpu(hp1).loadreg(1, newreg(R_INTREGISTER, getsupreg(ConstRegs[Count]), ConstSizes[Count]));
  1896. Break;
  1897. end;
  1898. if not RegMatch then
  1899. InternalError(2021100412);
  1900. end;
  1901. taicpu(hp1).opcode := A_CSEL;
  1902. taicpu(hp1).ops := 4;
  1903. taicpu(hp1).loadreg(2, taicpu(hp1).oper[0]^.reg);
  1904. taicpu(hp1).loadconditioncode(3, inverted_condition);
  1905. if (fState = tsDoubleBranchDifferent) then
  1906. begin
  1907. { Store these writes to search for duplicates later on }
  1908. RegWrites[Writes] := taicpu(hp1).oper[0]^.reg;
  1909. Inc(Writes);
  1910. end;
  1911. end;
  1912. fOptimizer.GetNextInstruction(hp1, hp1);
  1913. until (hp1 = fFirstMovBlockStop);
  1914. { Update initialisation MOVs to the smallest possible size }
  1915. for Count := 0 to ConstCount - 1 do
  1916. if Assigned(ConstMovs[Count]) then
  1917. setsubreg(taicpu(ConstMovs[Count]).oper[0]^.reg, ConstWriteSizes[Word(ConstRegs[Count])]);
  1918. case fState of
  1919. tsSimple:
  1920. begin
  1921. fOptimizer.DebugMsg(SPeepholeOptimization + 'CSEL Block (Simple type)', fInitialJump);
  1922. { No branch to delete }
  1923. end;
  1924. tsDetour:
  1925. begin
  1926. fOptimizer.DebugMsg(SPeepholeOptimization + 'CSEL Block (Detour type)', fInitialJump);
  1927. { Preserve jump }
  1928. end;
  1929. tsBranching, tsDoubleBranchDifferent:
  1930. begin
  1931. if (fState = tsBranching) then
  1932. fOptimizer.DebugMsg(SPeepholeOptimization + 'CSEL Block (Branching type)', fInitialJump)
  1933. else
  1934. fOptimizer.DebugMsg(SPeepholeOptimization + 'CSEL Block (Double branching (different) type)', fInitialJump);
  1935. taicpu(fSecondJump).condition := inverted_condition;
  1936. end;
  1937. tsDouble, tsDoubleBranchSame:
  1938. begin
  1939. if (fState = tsDouble) then
  1940. fOptimizer.DebugMsg(SPeepholeOptimization + 'CSEL Block (Double type)', fInitialJump)
  1941. else
  1942. fOptimizer.DebugMsg(SPeepholeOptimization + 'CSEL Block (Double branching (same) type)', fInitialJump);
  1943. { Delete second jump }
  1944. JumpTargetOp(taicpu(fSecondJump))^.ref^.symbol.decrefs;
  1945. fOptimizer.RemoveInstruction(fSecondJump);
  1946. end;
  1947. tsDoubleSecondBranching:
  1948. begin
  1949. fOptimizer.DebugMsg(SPeepholeOptimization + 'CSEL Block (Double, second branching type)', fInitialJump);
  1950. { Delete second jump, preserve third jump as conditional }
  1951. JumpTargetOp(taicpu(fSecondJump))^.ref^.symbol.decrefs;
  1952. fOptimizer.RemoveInstruction(fSecondJump);
  1953. taicpu(fThirdJump).condition := condition;
  1954. end;
  1955. else
  1956. InternalError(2023110721);
  1957. end;
  1958. { Now we can safely decrement the reference count }
  1959. tasmlabel(fLabel).decrefs;
  1960. fOptimizer.UpdateUsedRegs(tai(fInitialJump.next));
  1961. { Remove the original jump }
  1962. fOptimizer.RemoveInstruction(fInitialJump); { Note, the choice to not use RemoveCurrentp is deliberate }
  1963. new_p := fFirstMovBlock; { Appears immediately after the initial jump }
  1964. fState := tsProcessed;
  1965. end;
  1966. begin
  1967. casmoptimizer:=TCpuAsmOptimizer;
  1968. End.