aoptcpu.pas 85 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425
  1. {
  2. Copyright (c) 1998-2002 by Jonas Maebe, member of the Free Pascal
  3. Development Team
  4. This unit implements the ARM64 optimizer object
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. Unit aoptcpu;
  19. {$i fpcdefs.inc}
  20. {$ifdef EXTDEBUG}
  21. {$define DEBUG_AOPTCPU}
  22. {$endif EXTDEBUG}
  23. Interface
  24. uses
  25. globtype, globals,
  26. cutils,
  27. cgbase, cpubase, aasmtai, aasmcpu,
  28. aopt, aoptcpub, aoptarm, aoptobj;
  29. Type
  30. TCpuAsmOptimizer = class(TARMAsmOptimizer)
  31. { uses the same constructor as TAopObj }
  32. function PrePeepHoleOptsCpu(var p: tai): boolean; override;
  33. function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
  34. function PeepHoleOptPass2Cpu(var p: tai): boolean; override;
  35. function PostPeepHoleOptsCpu(var p: tai): boolean; override;
  36. function RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;override;
  37. function InstructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean;override;
  38. function LookForPostindexedPattern(var p : tai) : boolean;
  39. public
  40. { With these routines, there's optimisation code that's general for all ARM platforms }
  41. function OptPass1LDR(var p: tai): Boolean; override;
  42. function OptPass1STR(var p: tai): Boolean; override;
  43. private
  44. function RemoveSuperfluousFMov(const p: tai; movp: tai; const optimizer: string): boolean;
  45. function OptPass1Shift(var p: tai): boolean;
  46. function OptPass1Data(var p: tai): boolean;
  47. function OptPass1FData(var p: tai): Boolean;
  48. function OptPass1STP(var p: tai): boolean;
  49. function OptPass1Mov(var p: tai): boolean;
  50. function OptPass1MOVZ(var p: tai): boolean;
  51. function OptPass1FMov(var p: tai): Boolean;
  52. function OptPass1B(var p: tai): boolean;
  53. function OptPass1SXTW(var p: tai): Boolean;
  54. function OptPass2CSEL(var p: tai): Boolean;
  55. function OptPass2B(var p: tai): Boolean;
  56. function OptPass2LDRSTR(var p: tai): boolean;
  57. function OptPass2MOV(var p: tai): Boolean;
  58. function PostPeepholeOptAND(var p: tai): Boolean;
  59. function PostPeepholeOptCMP(var p: tai): boolean;
  60. function PostPeepholeOptTST(var p: tai): Boolean;
  61. protected
  62. { Like UpdateUsedRegs, but ignores deallocations }
  63. class procedure UpdateIntRegsNoDealloc(var AUsedRegs: TAllUsedRegs; p: Tai); static;
  64. { Attempts to allocate a volatile integer register for use between p and hp,
  65. using AUsedRegs for the current register usage information. Returns NR_NO
  66. if no free register could be found }
  67. function GetIntRegisterBetween(RegSize: TSubRegister; var AUsedRegs: TAllUsedRegs; p, hp: tai; DontAlloc: Boolean = False): TRegister;
  68. End;
  69. Implementation
  70. uses
  71. aasmbase,
  72. aoptbase,
  73. aoptutils,
  74. cgutils,
  75. procinfo,
  76. paramgr,
  77. verbose;
  78. {$ifdef DEBUG_AOPTCPU}
  79. const
  80. SPeepholeOptimization: shortstring = 'Peephole Optimization: ';
  81. {$else DEBUG_AOPTCPU}
  82. { Empty strings help the optimizer to remove string concatenations that won't
  83. ever appear to the user on release builds. [Kit] }
  84. const
  85. SPeepholeOptimization = '';
  86. {$endif DEBUG_AOPTCPU}
  87. MAX_CSEL_INSTRUCTIONS = 8;
  88. MAX_CSEL_REGISTERS = 30;
  89. type
  90. TCSELTrackingState = (tsInvalid, tsSimple, tsDetour, tsBranching,
  91. tsDouble, tsDoubleBranchSame, tsDoubleBranchDifferent, tsDoubleSecondBranching,
  92. tsProcessed);
  93. { For OptPass2Jcc }
  94. TCSELTracking = object
  95. private
  96. CSELScore, ConstCount: LongInt;
  97. RegWrites: array[0..MAX_CSEL_INSTRUCTIONS*2 - 1] of TRegister;
  98. ConstRegs: array[0..MAX_CSEL_REGISTERS - 1] of TRegister;
  99. ConstVals: array[0..MAX_CSEL_REGISTERS - 1] of TCGInt;
  100. ConstSizes: array[0..MAX_CSEL_REGISTERS - 1] of TSubRegister; { May not match ConstRegs if one is shared over multiple CSELs. }
  101. ConstMovs: array[0..MAX_CSEL_REGISTERS - 1] of tai; { Location of initialisation instruction }
  102. ConstWriteSizes: array[0..first_int_imreg - 1] of TSubRegister; { Largest size of register written. }
  103. fOptimizer: TCpuAsmOptimizer;
  104. fLabel: TAsmSymbol;
  105. fInsertionPoint,
  106. fCondition,
  107. fInitialJump,
  108. fFirstMovBlock,
  109. fFirstMovBlockStop,
  110. fSecondJump,
  111. fThirdJump,
  112. fSecondMovBlock,
  113. fSecondMovBlockStop,
  114. fMidLabel,
  115. fEndLabel,
  116. fAllocationRange: tai;
  117. fState: TCSELTrackingState;
  118. function TryCSELConst(p, start, stop: tai; var Count: LongInt): Boolean;
  119. function InitialiseBlock(BlockStart, OneBeforeBlock: tai; out BlockStop: tai; out EndJump: tai): Boolean;
  120. function AnalyseMOVBlock(BlockStart, BlockStop, SearchStart: tai): LongInt;
  121. public
  122. RegisterTracking: TAllUsedRegs;
  123. constructor Init(Optimizer: TCpuAsmOptimizer; var p_initialjump, p_initialmov: tai; var AFirstLabel: TAsmLabel);
  124. destructor Done;
  125. procedure Process(out new_p: tai);
  126. property State: TCSELTrackingState read fState;
  127. end;
  128. PCSELTracking = ^TCSELTracking;
  129. function CanBeCond(p : tai) : boolean;
  130. begin
  131. result:=(p.typ=ait_instruction) and (taicpu(p).condition=C_None);
  132. end;
  133. function TCpuAsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  134. var
  135. p: taicpu;
  136. begin
  137. Result := false;
  138. if not ((assigned(hp)) and (hp.typ = ait_instruction)) then
  139. exit;
  140. p := taicpu(hp);
  141. case p.opcode of
  142. { These operations do not write into a register at all
  143. LDR/STR with post/pre-indexed operations do not need special treatment
  144. because post-/preindexed does not mean that a register
  145. is loaded with a new value, it is only modified }
  146. A_STR, A_CMP, A_CMN, A_TST, A_B, A_BL, A_MSR, A_FCMP:
  147. exit;
  148. else
  149. ;
  150. end;
  151. if p.ops=0 then
  152. exit;
  153. case p.oper[0]^.typ of
  154. top_reg:
  155. Result := SuperRegistersEqual(p.oper[0]^.reg,reg);
  156. top_ref:
  157. Result :=
  158. (taicpu(p).oper[0]^.ref^.addressmode in [AM_PREINDEXED,AM_POSTINDEXED]) and
  159. (taicpu(p).oper[0]^.ref^.base = reg);
  160. else
  161. ;
  162. end;
  163. end;
  164. function TCpuAsmOptimizer.InstructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean;
  165. var
  166. p: taicpu;
  167. i: longint;
  168. begin
  169. instructionLoadsFromReg := false;
  170. if not (assigned(hp) and (hp.typ = ait_instruction)) then
  171. exit;
  172. p:=taicpu(hp);
  173. i:=1;
  174. { Start on oper[0]? }
  175. if taicpu(hp).spilling_get_operation_type(0) in [operand_read, operand_readwrite] then
  176. i:=0;
  177. while(i<p.ops) do
  178. begin
  179. case p.oper[I]^.typ of
  180. top_reg:
  181. Result := (p.oper[I]^.reg = reg);
  182. top_ref:
  183. Result :=
  184. (p.oper[I]^.ref^.base = reg) or
  185. (p.oper[I]^.ref^.index = reg);
  186. else
  187. ;
  188. end;
  189. { Bailout if we found something }
  190. if Result then
  191. exit;
  192. Inc(I);
  193. end;
  194. end;
  195. {
  196. optimize
  197. ldr/str regX,[reg1]
  198. ...
  199. add/sub reg1,reg1,regY/const
  200. into
  201. ldr/str regX,[reg1], regY/const
  202. }
  203. function TCpuAsmOptimizer.LookForPostindexedPattern(var p: tai) : boolean;
  204. var
  205. hp1 : tai;
  206. begin
  207. Result:=false;
  208. if (taicpu(p).oper[1]^.typ = top_ref) and
  209. (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
  210. (taicpu(p).oper[1]^.ref^.index=NR_NO) and
  211. (taicpu(p).oper[1]^.ref^.offset=0) and
  212. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[1]^.ref^.base) and
  213. { we cannot check NR_DEFAULTFLAGS for modification yet so don't allow a condition }
  214. MatchInstruction(hp1, [A_ADD, A_SUB], [PF_None]) and
  215. (taicpu(hp1).oper[0]^.reg=taicpu(p).oper[1]^.ref^.base) and
  216. (taicpu(hp1).oper[1]^.reg=taicpu(p).oper[1]^.ref^.base) and
  217. (
  218. { valid offset? }
  219. (taicpu(hp1).oper[2]^.typ=top_const) and
  220. (taicpu(hp1).oper[2]^.val>=-256) and
  221. (abs(taicpu(hp1).oper[2]^.val)<256)
  222. ) and
  223. { don't apply the optimization if the base register is loaded }
  224. (getsupreg(taicpu(p).oper[0]^.reg)<>getsupreg(taicpu(p).oper[1]^.ref^.base)) and
  225. not(RegModifiedBetween(taicpu(hp1).oper[0]^.reg,p,hp1)) and
  226. not(RegModifiedBetween(taicpu(hp1).oper[2]^.reg,p,hp1)) then
  227. begin
  228. if taicpu(p).opcode = A_LDR then
  229. DebugMsg(SPeepholeOptimization + 'LdrAdd/Sub2Ldr Postindex done', p)
  230. else
  231. DebugMsg(SPeepholeOptimization + 'StrAdd/Sub2Str Postindex done', p);
  232. taicpu(p).oper[1]^.ref^.addressmode:=AM_POSTINDEXED;
  233. if taicpu(hp1).opcode=A_ADD then
  234. taicpu(p).oper[1]^.ref^.offset:=taicpu(hp1).oper[2]^.val
  235. else
  236. taicpu(p).oper[1]^.ref^.offset:=-taicpu(hp1).oper[2]^.val;
  237. asml.Remove(hp1);
  238. hp1.Free;
  239. Result:=true;
  240. end;
  241. end;
  242. function TCpuAsmOptimizer.RemoveSuperfluousFMov(const p: tai; movp: tai; const optimizer: string):boolean;
  243. var
  244. alloc,
  245. dealloc : tai_regalloc;
  246. hp1 : tai;
  247. begin
  248. Result:=false;
  249. if ((MatchInstruction(movp, A_FMOV, [taicpu(p).condition], [taicpu(p).oppostfix]) and
  250. ((getregtype(taicpu(movp).oper[0]^.reg)=R_MMREGISTER) { or (taicpu(p).opcode in [A_LDUR])})
  251. ) { or
  252. (((taicpu(p).oppostfix in [PF_F64F32,PF_F64S16,PF_F64S32,PF_F64U16,PF_F64U32]) or (getsubreg(taicpu(p).oper[0]^.reg)=R_SUBFD)) and MatchInstruction(movp, A_VMOV, [taicpu(p).condition], [PF_F64])) or
  253. (((taicpu(p).oppostfix in [PF_F32F64,PF_F32S16,PF_F32S32,PF_F32U16,PF_F32U32]) or (getsubreg(taicpu(p).oper[0]^.reg)=R_SUBFS)) and MatchInstruction(movp, A_VMOV, [taicpu(p).condition], [PF_F32])) }
  254. ) and
  255. (taicpu(movp).ops=2) and
  256. MatchOperand(taicpu(movp).oper[1]^, taicpu(p).oper[0]^.reg) and
  257. { the destination register of the mov might not be used beween p and movp }
  258. not(RegUsedBetween(taicpu(movp).oper[0]^.reg,p,movp)) and
  259. { Take care to only do this for instructions which REALLY load to the first register.
  260. Otherwise
  261. str reg0, [reg1]
  262. fmov reg2, reg0
  263. will be optimized to
  264. str reg2, [reg1]
  265. }
  266. RegLoadedWithNewValue(taicpu(p).oper[0]^.reg, p) then
  267. begin
  268. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(movp.Next));
  269. if assigned(dealloc) then
  270. begin
  271. DebugMsg(SPeepholeOptimization + optimizer+' removed superfluous vmov', movp);
  272. result:=true;
  273. { taicpu(p).oper[0]^.reg is not used anymore, try to find its allocation
  274. and remove it if possible }
  275. asml.Remove(dealloc);
  276. alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.previous));
  277. if assigned(alloc) then
  278. begin
  279. asml.Remove(alloc);
  280. alloc.free;
  281. dealloc.free;
  282. end
  283. else
  284. asml.InsertAfter(dealloc,p);
  285. { try to move the allocation of the target register }
  286. GetLastInstruction(movp,hp1);
  287. alloc:=FindRegAlloc(taicpu(movp).oper[0]^.reg,tai(hp1.Next));
  288. if assigned(alloc) then
  289. begin
  290. asml.Remove(alloc);
  291. asml.InsertBefore(alloc,p);
  292. { adjust used regs }
  293. IncludeRegInUsedRegs(taicpu(movp).oper[0]^.reg,UsedRegs);
  294. end;
  295. { change
  296. vldr reg0,[reg1]
  297. vmov reg2,reg0
  298. into
  299. ldr reg2,[reg1]
  300. if reg2 is an int register
  301. if (taicpu(p).opcode=A_VLDR) and (getregtype(taicpu(movp).oper[0]^.reg)=R_INTREGISTER) then
  302. taicpu(p).opcode:=A_LDR;
  303. }
  304. { finally get rid of the mov }
  305. taicpu(p).loadreg(0,taicpu(movp).oper[0]^.reg);
  306. asml.remove(movp);
  307. movp.free;
  308. end;
  309. end;
  310. end;
  311. function TCpuAsmOptimizer.OptPass1LDR(var p: tai): Boolean;
  312. var
  313. hp1: tai;
  314. begin
  315. Result := False;
  316. if inherited OptPass1LDR(p) or
  317. LookForPostindexedPattern(p) then
  318. Exit(True)
  319. else if (taicpu(p).oppostfix in [PF_B,PF_SB,PF_H,PF_SH,PF_None]) and
  320. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  321. RemoveSuperfluousMove(p, hp1, 'Ldr<Postfix>Mov2Ldr<Postfix>') then
  322. Exit(true);
  323. end;
  324. function TCpuAsmOptimizer.OptPass1STR(var p: tai): Boolean;
  325. begin
  326. Result := False;
  327. if inherited OptPass1STR(p) or
  328. LookForPostindexedPattern(p) then
  329. Exit(True);
  330. if getsupreg(taicpu(p).oper[0]^.reg) = RS_WZR then
  331. Result := TryConstMerge(p, nil);
  332. end;
  333. function TCpuAsmOptimizer.OptPass1Shift(var p : tai): boolean;
  334. var
  335. hp1,hp2: tai;
  336. I2, I: Integer;
  337. shifterop: tshifterop;
  338. begin
  339. Result:=false;
  340. { This folds shifterops into following instructions
  341. <shiftop> r0, r1, #imm
  342. <op> r2, r3, r0
  343. to
  344. <op> r2, r3, r1, <shiftop> #imm
  345. }
  346. { do not handle ROR yet, only part of the instructions below support ROR as shifter operand }
  347. if MatchInstruction(p,[A_LSL, A_LSR, A_ASR{, A_ROR}],[PF_None]) and
  348. MatchOpType(taicpu(p),top_reg,top_reg,top_const) and
  349. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  350. MatchInstruction(hp1, [A_ADD, A_AND, A_BIC, A_CMP, A_CMN,
  351. A_EON, A_EOR, A_NEG, A_ORN, A_ORR,
  352. A_SUB, A_TST], [PF_None]) and
  353. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) and
  354. (taicpu(hp1).ops >= 2) and
  355. { Currently we can't fold into another shifterop }
  356. (taicpu(hp1).oper[taicpu(hp1).ops-1]^.typ = top_reg) and
  357. { SP does not work completely with shifted registers, as I didn't find the exact rules,
  358. we do not operate on SP }
  359. (taicpu(hp1).oper[0]^.reg<>NR_SP) and
  360. (taicpu(hp1).oper[1]^.reg<>NR_SP) and
  361. (taicpu(hp1).oper[taicpu(hp1).ops-1]^.reg<>NR_SP) and
  362. { reg1 might not be modified inbetween }
  363. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) and
  364. (
  365. { Only ONE of the two src operands is allowed to match }
  366. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[taicpu(hp1).ops-2]^) xor
  367. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[taicpu(hp1).ops-1]^)
  368. ) and
  369. { for SUB, the last operand must match, there is no RSB on AArch64 }
  370. ((taicpu(hp1).opcode<>A_SUB) or
  371. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[taicpu(hp1).ops-1]^)) then
  372. begin
  373. { for the two operand instructions, start also at the second operand as they are not always commutative
  374. (depends on the flags tested laster on) and thus the operands cannot swapped }
  375. I2:=1;
  376. for I:=I2 to taicpu(hp1).ops-1 do
  377. if MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[I]^.reg) then
  378. begin
  379. { If the parameter matched on the second op from the RIGHT
  380. we have to switch the parameters, this will not happen for CMP
  381. were we're only evaluating the most right parameter
  382. }
  383. shifterop_reset(shifterop);
  384. case taicpu(p).opcode of
  385. A_LSL:
  386. shifterop.shiftmode:=SM_LSL;
  387. A_ROR:
  388. shifterop.shiftmode:=SM_ROR;
  389. A_LSR:
  390. shifterop.shiftmode:=SM_LSR;
  391. A_ASR:
  392. shifterop.shiftmode:=SM_ASR;
  393. else
  394. InternalError(2019090401);
  395. end;
  396. shifterop.shiftimm:=taicpu(p).oper[2]^.val;
  397. if I <> taicpu(hp1).ops-1 then
  398. begin
  399. if taicpu(hp1).ops = 3 then
  400. hp2:=taicpu.op_reg_reg_reg_shifterop(taicpu(hp1).opcode,
  401. taicpu(hp1).oper[0]^.reg, taicpu(hp1).oper[2]^.reg,
  402. taicpu(p).oper[1]^.reg, shifterop)
  403. else
  404. hp2:=taicpu.op_reg_reg_shifterop(taicpu(hp1).opcode,
  405. taicpu(hp1).oper[0]^.reg, taicpu(p).oper[1]^.reg,
  406. shifterop);
  407. end
  408. else
  409. if taicpu(hp1).ops = 3 then
  410. hp2:=taicpu.op_reg_reg_reg_shifterop(taicpu(hp1).opcode,
  411. taicpu(hp1).oper[0]^.reg, taicpu(hp1).oper[1]^.reg,
  412. taicpu(p).oper[1]^.reg,shifterop)
  413. else
  414. hp2:=taicpu.op_reg_reg_shifterop(taicpu(hp1).opcode,
  415. taicpu(hp1).oper[0]^.reg, taicpu(p).oper[1]^.reg,
  416. shifterop);
  417. { Make sure the register used in the shifting is tracked all
  418. the way through, otherwise it may become deallocated while
  419. it's still live and cause incorrect optimisations later }
  420. if (taicpu(hp1).oper[0]^.reg <> taicpu(p).oper[1]^.reg) then
  421. begin
  422. TransferUsedRegs(TmpUsedRegs);
  423. UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
  424. ALlocRegBetween(taicpu(p).oper[1]^.reg, p, hp1, TmpUsedRegs);
  425. end;
  426. taicpu(hp2).fileinfo:=taicpu(hp1).fileinfo;
  427. asml.insertbefore(hp2, hp1);
  428. RemoveInstruction(hp1);
  429. RemoveCurrentp(p);
  430. DebugMsg(SPeepholeOptimization + 'FoldShiftProcess done', hp2);
  431. Result:=true;
  432. break;
  433. end;
  434. end
  435. else if MatchInstruction(p,[A_LSL, A_LSR, A_ASR,A_ROR],[PF_None]) and
  436. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  437. RemoveSuperfluousMove(p, hp1, 'ShiftMov2Shift') then
  438. Result:=true;
  439. end;
  440. function TCpuAsmOptimizer.OptPass1Data(var p : tai): boolean;
  441. var
  442. hp1: tai;
  443. begin
  444. Result := GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  445. RemoveSuperfluousMove(p, hp1, 'DataMov2Data');
  446. end;
  447. function TCpuAsmOptimizer.OptPass1FData(var p: tai): Boolean;
  448. var
  449. hp1: tai;
  450. begin
  451. Result := GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  452. RemoveSuperfluousFMov(p, hp1, 'FOpFMov2FOp');
  453. end;
  454. function TCpuAsmOptimizer.OptPass1STP(var p : tai): boolean;
  455. var
  456. hp1, hp2, hp3, hp4, tmp1 : tai;
  457. begin
  458. Result:=false;
  459. {
  460. change
  461. stp x29,x30,[sp, #-16]!
  462. mov x29,sp
  463. bl abc
  464. ldp x29,x30,[sp], #16
  465. ret
  466. into
  467. b abc
  468. }
  469. if MatchInstruction(p, A_STP, [C_None], [PF_None]) and
  470. MatchOpType(taicpu(p),top_reg,top_reg,top_ref) and
  471. (taicpu(p).oper[0]^.reg = NR_X29) and
  472. (taicpu(p).oper[1]^.reg = NR_X30) and
  473. (taicpu(p).oper[2]^.ref^.base=NR_STACK_POINTER_REG) and
  474. (taicpu(p).oper[2]^.ref^.index=NR_NO) and
  475. (taicpu(p).oper[2]^.ref^.offset=-16) and
  476. (taicpu(p).oper[2]^.ref^.addressmode=AM_PREINDEXED) and
  477. GetNextInstruction(p, hp1) and
  478. MatchInstruction(hp1, A_MOV, [C_None], [PF_NONE]) and
  479. MatchOperand(taicpu(hp1).oper[0]^,taicpu(p).oper[0]^) and
  480. (taicpu(hp1).oper[1]^.typ = top_reg) and
  481. (taicpu(hp1).oper[1]^.reg = NR_STACK_POINTER_REG) and
  482. GetNextInstruction(hp1, hp2) and
  483. SkipEntryExitMarker(hp2, hp2) and
  484. MatchInstruction(hp2, A_BL, [C_None], [PF_NONE]) and
  485. (taicpu(hp2).oper[0]^.typ = top_ref) and
  486. GetNextInstruction(hp2, hp3) and
  487. SkipEntryExitMarker(hp3, hp3) and
  488. MatchInstruction(hp3, A_LDP, [C_None], [PF_NONE]) and
  489. MatchOpType(taicpu(hp3),top_reg,top_reg,top_ref) and
  490. (taicpu(hp3).oper[0]^.reg = NR_X29) and
  491. (taicpu(hp3).oper[1]^.reg = NR_X30) and
  492. (taicpu(hp3).oper[2]^.ref^.base=NR_STACK_POINTER_REG) and
  493. (taicpu(hp3).oper[2]^.ref^.index=NR_NO) and
  494. (taicpu(hp3).oper[2]^.ref^.offset=16) and
  495. (taicpu(hp3).oper[2]^.ref^.addressmode=AM_POSTINDEXED) and
  496. GetNextInstruction(hp3, hp4) and
  497. MatchInstruction(hp4, A_RET, [C_None], [PF_None]) and
  498. (taicpu(hp4).ops = 0) then
  499. begin
  500. { remove the SEH instruction for the STP FP,LR }
  501. if GetNextInstruction(p,tmp1,[ait_seh_directive]) and
  502. (tmp1.typ=ait_seh_directive) and
  503. (tai_seh_directive(tmp1).kind=ash_savefplr_x) then
  504. begin
  505. asml.Remove(tmp1);
  506. tmp1.free;
  507. end;
  508. { remove the SEH instruction for the MOV FP,SP }
  509. if GetNextInstruction(hp1,tmp1,[ait_seh_directive]) and
  510. (tmp1.typ=ait_seh_directive) and
  511. (tai_seh_directive(tmp1).kind=ash_setfp) then
  512. begin
  513. asml.Remove(tmp1);
  514. tmp1.free;
  515. end;
  516. asml.Remove(p);
  517. asml.Remove(hp1);
  518. asml.Remove(hp3);
  519. asml.Remove(hp4);
  520. taicpu(hp2).opcode:=A_B;
  521. p.free;
  522. hp1.free;
  523. hp3.free;
  524. hp4.free;
  525. p:=hp2;
  526. DebugMsg(SPeepholeOptimization + 'Bl2B done', p);
  527. Result:=true;
  528. end;
  529. end;
  530. function TCpuAsmOptimizer.OptPass1Mov(var p : tai): boolean;
  531. var
  532. hp1: tai;
  533. so: tshifterop;
  534. begin
  535. Result:=false;
  536. if MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) and
  537. (taicpu(p).oppostfix=PF_None) then
  538. begin
  539. RemoveCurrentP(p);
  540. DebugMsg(SPeepholeOptimization + 'Mov2None done', p);
  541. Result:=true;
  542. end
  543. else if (taicpu(p).ops=2) and
  544. (getsubreg(taicpu(p).oper[0]^.reg)=R_SUBD) and
  545. GetNextInstruction(p, hp1) and
  546. { Faster to get it out of the way than go through MatchInstruction }
  547. (hp1.typ=ait_instruction) and
  548. (taicpu(hp1).ops=3) and
  549. MatchInstruction(hp1,[A_ADD,A_SUB],[taicpu(p).condition], [PF_None,PF_S]) and
  550. (getsubreg(taicpu(hp1).oper[2]^.reg)=R_SUBQ) and
  551. (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[2]^.reg)) and
  552. RegEndOfLife(taicpu(hp1).oper[2]^.reg,taicpu(hp1)) then
  553. begin
  554. DebugMsg(SPeepholeOptimization + 'MovOp2AddUtxw 1 done', p);
  555. shifterop_reset(so);
  556. so.shiftmode:=SM_UXTW;
  557. taicpu(hp1).ops:=4;
  558. taicpu(hp1).loadreg(2,taicpu(p).oper[1]^.reg);
  559. taicpu(hp1).loadshifterop(3,so);
  560. RemoveCurrentP(p);
  561. Result:=true;
  562. exit;
  563. end
  564. {
  565. optimize
  566. mov rX, yyyy
  567. ....
  568. }
  569. else if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) then
  570. begin
  571. if RemoveSuperfluousMove(p, hp1, 'MovMov2Mov') then
  572. Result:=true
  573. else if (taicpu(p).ops = 2) and
  574. (tai(hp1).typ = ait_instruction) and
  575. RedundantMovProcess(p,hp1) then
  576. Result:=true
  577. end;
  578. end;
  579. function TCpuAsmOptimizer.OptPass1MOVZ(var p: tai): boolean;
  580. var
  581. hp1: tai;
  582. TargetReg: TRegister;
  583. begin
  584. Result := False;
  585. hp1 := nil;
  586. TargetReg := taicpu(p).oper[0]^.reg;
  587. if (taicpu(p).oppostfix = PF_None) and (taicpu(p).condition = C_None) then
  588. begin
  589. if
  590. { Check next instruction first so hp1 gets set to something, then
  591. if it remains nil, we know for sure that there's no valid next
  592. instruction. }
  593. not GetNextInstruction(p, hp1) or
  594. { MOVZ and MOVK/MOVN instructions undergo macro-fusion. }
  595. not MatchInstruction(hp1, [A_MOVK, A_MOVN], [C_None], [PF_None]) or
  596. (taicpu(hp1).oper[0]^.reg <> TargetReg) then
  597. begin
  598. if (taicpu(p).oper[1]^.val = 0) then
  599. begin
  600. { Change;
  601. movz reg,#0
  602. (no movk or movn)
  603. To:
  604. mov reg,xzr (or wzr)
  605. Easier to perform other optimisations with registers
  606. }
  607. DebugMsg(SPeepholeOptimization + 'Movz0ToMovZeroReg', p);
  608. { Convert TargetReg to the correctly-sized zero register }
  609. setsupreg(TargetReg, RS_XZR);
  610. taicpu(p).opcode := A_MOV;
  611. taicpu(p).loadreg(1, TargetReg);
  612. Result := True;
  613. Exit;
  614. end;
  615. end;
  616. {
  617. remove the second Movz from
  618. movz reg,...
  619. movz reg,...
  620. }
  621. if GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  622. MatchInstruction(hp1,A_MOVZ,[C_None],[PF_none]) and
  623. MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[0]^) then
  624. begin
  625. DebugMsg(SPeepholeOptimization + 'MovzMovz2Movz', p);
  626. RemoveCurrentP(p);
  627. Result:=true;
  628. exit;
  629. end;
  630. end;
  631. if (getsupreg(TargetReg) <= RS_X30) and { Mostly to play safe }
  632. GetNextInstructionUsingReg(p, hp1, TargetReg) and
  633. (hp1.typ = ait_instruction) then
  634. begin
  635. case taicpu(hp1).opcode of
  636. {$ifdef AARCH64}
  637. A_MOVK:
  638. { Try to avoid too much unnecessary processing by checking to see
  639. if the register is 32-bit }
  640. if (getsubreg(TargetReg) = R_SUBD) and
  641. (taicpu(hp1).oper[0]^.reg = TargetReg) and
  642. TryConstMerge(p, hp1) then
  643. begin
  644. Result := True;
  645. Exit;
  646. end;
  647. {$endif AARCH64}
  648. A_STR:
  649. {
  650. With sequences such as:
  651. movz w0,x
  652. strb w0,[sp, #ofs]
  653. movz w0,y
  654. strb w0,[sp, #ofs+1]
  655. Merge the constants to:
  656. movz w0,x + (y shl 8)
  657. strw w0,[sp, #ofs]
  658. Only use the stack pointer or frame pointer and an even offset though
  659. to guarantee alignment
  660. }
  661. if TryConstMerge(p, hp1) then
  662. begin
  663. Result := True;
  664. Exit;
  665. end;
  666. else
  667. ;
  668. end;
  669. end;
  670. end;
  671. function TCpuAsmOptimizer.OptPass1FMov(var p: tai): Boolean;
  672. var
  673. hp1: tai;
  674. alloc, dealloc: tai_regalloc;
  675. begin
  676. {
  677. change
  678. fmov reg0,reg1
  679. fmov reg1,reg0
  680. into
  681. fmov reg0,reg1
  682. }
  683. Result := False;
  684. while GetNextInstruction(p, hp1) and
  685. MatchInstruction(hp1, A_FMOV, [taicpu(p).condition], [taicpu(p).oppostfix]) and
  686. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[1]^) and
  687. MatchOperand(taicpu(p).oper[1]^, taicpu(hp1).oper[0]^) do
  688. begin
  689. asml.Remove(hp1);
  690. hp1.free;
  691. DebugMsg(SPeepholeOptimization + 'FMovFMov2FMov 1 done', p);
  692. Result:=true;
  693. end;
  694. { change
  695. fmov reg0,const
  696. fmov reg1,reg0
  697. dealloc reg0
  698. into
  699. fmov reg1,const
  700. }
  701. if MatchOpType(taicpu(p),top_reg,top_realconst) and
  702. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  703. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p, hp1)) and
  704. MatchInstruction(hp1,A_FMOV,[taicpu(p).condition],[taicpu(p).oppostfix]) and
  705. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  706. MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^.reg) and
  707. (not RegModifiedByInstruction(taicpu(p).oper[0]^.reg, hp1)) and
  708. assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next)))
  709. then
  710. begin
  711. DebugMsg('Peephole FMovFMov2FMov 2 done', p);
  712. taicpu(hp1).loadrealconst(1,taicpu(p).oper[1]^.val_real);
  713. alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.Previous));
  714. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next));
  715. if assigned(alloc) and assigned(dealloc) then
  716. begin
  717. asml.Remove(alloc);
  718. alloc.Free;
  719. asml.Remove(dealloc);
  720. dealloc.Free;
  721. end;
  722. { p will be removed, update used register as we continue
  723. with the next instruction after p }
  724. result:=RemoveCurrentP(p);
  725. end;
  726. { not enabled as apparently not happening
  727. if MatchOpType(taicpu(p),top_reg,top_reg) and
  728. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  729. MatchInstruction(hp1, [A_FSUB,A_FADD,A_FNEG,A_FMUL,A_FSQRT,A_FDIV,A_FABS], [PF_None]) and
  730. (MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) or
  731. ((taicpu(hp1).ops=3) and MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[2]^))
  732. ) and
  733. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  734. not(RegUsedBetween(taicpu(p).oper[0]^.reg,p,hp1)) then
  735. begin
  736. DebugMsg(SPeepholeOptimization + 'FMovFOp2FOp done', hp1);
  737. AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,UsedRegs);
  738. if MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) then
  739. taicpu(hp1).oper[1]^.reg:=taicpu(p).oper[1]^.reg;
  740. if (taicpu(hp1).ops=3) and MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[2]^) then
  741. taicpu(hp1).oper[2]^.reg:=taicpu(p).oper[1]^.reg;
  742. RemoveCurrentP(p);
  743. Result:=true;
  744. exit;
  745. end;
  746. }
  747. end;
  748. function TCpuAsmOptimizer.OptPass1SXTW(var p : tai) : Boolean;
  749. var
  750. hp1: tai;
  751. GetNextInstructionUsingReg_hp1: Boolean;
  752. begin
  753. Result:=false;
  754. if GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) then
  755. begin
  756. {
  757. change
  758. sxtw reg2,reg1
  759. str reg2,[...]
  760. dealloc reg2
  761. to
  762. str reg1,[...]
  763. }
  764. if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
  765. (taicpu(p).ops=2) and
  766. MatchInstruction(hp1, A_STR, [C_None], [PF_None]) and
  767. (getsubreg(taicpu(hp1).oper[0]^.reg)=R_SUBD) and
  768. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  769. { the reference in strb might not use reg2 }
  770. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  771. { reg1 might not be modified inbetween }
  772. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  773. begin
  774. DebugMsg('Peephole SXTHStr2Str done', p);
  775. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  776. result:=RemoveCurrentP(p);
  777. end
  778. {
  779. change
  780. sxtw reg2,reg1
  781. sxtw reg3,reg2
  782. dealloc reg2
  783. to
  784. sxtw reg3,reg1
  785. }
  786. else if MatchInstruction(p, A_SXTW, [C_None], [PF_None]) and
  787. (taicpu(p).ops=2) and
  788. MatchInstruction(hp1, A_SXTW, [C_None], [PF_None]) and
  789. (taicpu(hp1).ops=2) and
  790. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  791. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  792. { reg1 might not be modified inbetween }
  793. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  794. begin
  795. DebugMsg('Peephole SxtwSxtw2Sxtw done', p);
  796. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  797. taicpu(hp1).opcode:=A_SXTW;
  798. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  799. result:=RemoveCurrentP(p);
  800. end
  801. else if USxtOp2Op(p,hp1,SM_SXTW) then
  802. Result:=true
  803. else if RemoveSuperfluousMove(p, hp1, 'SxtwMov2Data') then
  804. Result:=true;
  805. end;
  806. end;
  807. function TCpuAsmOptimizer.OptPass1B(var p: tai): boolean;
  808. var
  809. hp1, hp2, hp3, hp4, hp5: tai;
  810. Invert: Boolean;
  811. begin
  812. Result := False;
  813. {
  814. convert
  815. b<c> .L1
  816. movz reg,#1`
  817. b .L2
  818. .L1
  819. movz reg,#0 (or mov reg,xzr)
  820. .L2
  821. into
  822. cset reg,<not(c)>
  823. Also do the same if the constants are reversed, instead converting it to:
  824. cset reg,<c>
  825. }
  826. if (taicpu(p).condition <> C_None) and
  827. (taicpu(p).oper[0]^.typ = top_ref) and
  828. GetNextInstruction(p, hp1) and
  829. { Check individually instead of using MatchInstruction in order to save time }
  830. (hp1.typ = ait_instruction) and
  831. (taicpu(hp1).condition = C_None) and
  832. (taicpu(hp1).oppostfix = PF_None) and
  833. (taicpu(hp1).ops = 2) and
  834. (
  835. (
  836. (taicpu(hp1).opcode = A_MOVZ) and
  837. (taicpu(hp1).oper[1]^.val in [0, 1])
  838. ) or
  839. (
  840. (taicpu(hp1).opcode = A_MOV) and
  841. (getsupreg(taicpu(hp1).oper[1]^.reg) = RS_XZR)
  842. )
  843. ) and
  844. GetNextInstruction(hp1, hp2) and
  845. MatchInstruction(hp2, A_B, [PF_None]) and
  846. (taicpu(hp2).condition = C_None) and
  847. (taicpu(hp2).oper[0]^.typ = top_ref) and
  848. GetNextInstruction(hp2, hp3) and
  849. (hp3.typ = ait_label) and
  850. (tasmlabel(taicpu(p).oper[0]^.ref^.symbol) = tai_label(hp3).labsym) and
  851. GetNextInstruction(hp3, hp4) and
  852. { As before, check individually instead of using MatchInstruction in order to save time }
  853. (hp4.typ = ait_instruction) and
  854. (taicpu(hp4).condition = C_None) and
  855. (taicpu(hp4).oppostfix = PF_None) and
  856. (taicpu(hp4).ops = 2) and
  857. (taicpu(hp4).oper[0]^.reg = taicpu(hp1).oper[0]^.reg) and
  858. (
  859. (
  860. (taicpu(hp4).opcode = A_MOVZ) and
  861. (
  862. (
  863. { Check to confirm the following:
  864. - First mov is either "movz reg,#0" or "mov reg,xzr"
  865. - Second mov is "movz reg,#1"
  866. }
  867. (
  868. (taicpu(hp1).oper[1]^.typ = top_reg) { Will be the zero register } or
  869. (taicpu(hp1).oper[1]^.val = 0)
  870. ) and
  871. (taicpu(hp4).oper[1]^.val = 1)
  872. ) or
  873. (
  874. { Check to confirm the following:
  875. - First mov is "movz reg,#1"
  876. - Second mov is "movz reg,#0"
  877. }
  878. MatchOperand(taicpu(hp1).oper[1]^, 1) and
  879. (taicpu(hp4).oper[1]^.val = 0)
  880. )
  881. )
  882. ) or
  883. (
  884. { Check to confirm the following:
  885. - First mov is "movz reg,#1"
  886. - Second mov is "mov reg,xzr"
  887. }
  888. (taicpu(hp4).opcode = A_MOV) and
  889. (getsupreg(taicpu(hp4).oper[1]^.reg) = RS_XZR) and
  890. MatchOperand(taicpu(hp1).oper[1]^, 1)
  891. )
  892. ) and
  893. GetNextInstruction(hp4, hp5) and
  894. (hp5.typ = ait_label) and
  895. (tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol) = tai_label(hp5).labsym) then
  896. begin
  897. Invert := MatchOperand(taicpu(hp1).oper[1]^, 1); { if true, hp4 will be mov reg,0 in some form }
  898. if Invert then
  899. taicpu(p).condition := inverse_cond(taicpu(p).condition);
  900. tai_label(hp3).labsym.DecRefs;
  901. { If this isn't the only reference to the middle label, we can
  902. still make a saving - only that the first jump and everything
  903. that follows will remain. }
  904. if (tai_label(hp3).labsym.getrefs = 0) then
  905. begin
  906. if Invert then
  907. DebugMsg(SPeepholeOptimization + 'B(c)Movz1BMovz0 -> Cset(~c)',p)
  908. else
  909. DebugMsg(SPeepholeOptimization + 'B(c)Movz0bMovZ1 -> Cset(c)',p);
  910. { remove jump, first label and second MOV (also catching any aligns) }
  911. repeat
  912. if not GetNextInstruction(hp2, hp3) then
  913. InternalError(2022070801);
  914. RemoveInstruction(hp2);
  915. hp2 := hp3;
  916. until hp2 = hp5;
  917. { Don't decrement reference count before the removal loop
  918. above, otherwise GetNextInstruction won't stop on the
  919. the label }
  920. tai_label(hp5).labsym.DecRefs;
  921. end
  922. else
  923. begin
  924. if Invert then
  925. DebugMsg(SPeepholeOptimization + 'B(c)Movz1BMovz0 -> Cset(~c) (partial)',p)
  926. else
  927. DebugMsg(SPeepholeOptimization + 'B(c)Movz0BMovz1 -> Cset(c) (partial)',p);
  928. end;
  929. taicpu(hp1).opcode := A_CSET;
  930. taicpu(hp1).loadconditioncode(1, taicpu(p).condition);
  931. RemoveCurrentP(p, hp1);
  932. Result:=true;
  933. exit;
  934. end;
  935. end;
  936. function TCpuAsmOptimizer.OptPass2B(var p: tai): Boolean;
  937. var
  938. hp1: tai;
  939. LabelSym: TAsmLabel;
  940. CSELTracking: PCSELTracking;
  941. begin
  942. Result := False;
  943. if (taicpu(p).condition = C_None) and
  944. IsJumpToLabel(taicpu(p)) then
  945. begin
  946. { Check for:
  947. B @lbl
  948. ...
  949. @Lbl:
  950. RET
  951. Change to:
  952. RET (and reduce reference count on label)
  953. }
  954. LabelSym := TAsmLabel(JumpTargetOp(taicpu(p))^.ref^.symbol);
  955. hp1 := GetLabelWithSym(LabelSym);
  956. if Assigned(hp1) and
  957. GetNextInstruction(hp1, hp1) and
  958. (hp1.typ = ait_instruction) and
  959. (taicpu(hp1).opcode = A_RET) then
  960. begin
  961. DebugMsg(SPeepholeOptimization + 'B -> RET since a RET immediately follows the destination label (B2Ret)', p);
  962. taicpu(p).ops := 0;
  963. taicpu(p).clearop(0);
  964. taicpu(p).is_jmp := false;
  965. taicpu(p).opcode := A_RET;
  966. { Make sure the label is dereferenced now }
  967. LabelSym.decrefs;
  968. Result := True;
  969. Exit;
  970. end;
  971. end;
  972. if (taicpu(p).condition <> C_None) and
  973. IsJumpToLabel(taicpu(p)) and
  974. GetNextInstruction(p, hp1) and
  975. (hp1.typ = ait_instruction) and
  976. (taicpu(hp1).opcode = A_MOV) then
  977. begin
  978. { check for
  979. jCC xxx
  980. <several movs>
  981. xxx:
  982. Also spot:
  983. Jcc xxx
  984. <several movs>
  985. jmp xxx
  986. Change to:
  987. <several csets with inverted condition>
  988. jmp xxx (only for the 2nd case)
  989. }
  990. CSELTracking := New(PCSELTracking, Init(Self, p, hp1, TAsmLabel(JumpTargetOp(taicpu(p))^.ref^.symbol)));
  991. if CSELTracking^.State <> tsInvalid then
  992. begin
  993. CSELTracking^.Process(p);
  994. Result := True;
  995. end;
  996. CSELTracking^.Done;
  997. end;
  998. end;
  999. function TCpuAsmOptimizer.OptPass2CSEL(var p: tai): Boolean;
  1000. begin
  1001. Result := False;
  1002. { Csel r0,r1,r1,cond -> mov r0,r1 }
  1003. if (taicpu(p).oper[1]^.reg = taicpu(p).oper[2]^.reg) then
  1004. begin
  1005. DebugMsg(SPeepholeOptimization + 'CSel2Mov (identical true/false registers)', p);
  1006. taicpu(p).opcode := A_MOV;
  1007. taicpu(p).ops := 2;
  1008. Result := True;
  1009. Exit;
  1010. end;
  1011. end;
  1012. function TCpuAsmOptimizer.OptPass2LDRSTR(var p: tai): boolean;
  1013. var
  1014. hp1, hp1_last: tai;
  1015. ThisRegister: TRegister;
  1016. OffsetVal, ValidOffset, MinOffset, MaxOffset: asizeint;
  1017. TargetOpcode: TAsmOp;
  1018. begin
  1019. Result := False;
  1020. ThisRegister := taicpu(p).oper[0]^.reg;
  1021. case taicpu(p).opcode of
  1022. A_LDR:
  1023. TargetOpcode := A_LDP;
  1024. A_STR:
  1025. TargetOpcode := A_STP;
  1026. else
  1027. InternalError(2020081501);
  1028. end;
  1029. { reg appearing in ref invalidates these optimisations }
  1030. if (TargetOpcode = A_STP) or not RegInRef(ThisRegister, taicpu(p).oper[1]^.ref^) then
  1031. begin
  1032. { LDP/STP has a smaller permitted offset range than LDR/STR.
  1033. TODO: For a group of out-of-range LDR/STR instructions, can
  1034. we declare a temporary register equal to the offset base
  1035. address, modify the STR instructions to use that register
  1036. and then convert them to STP instructions? Note that STR
  1037. generally takes 2 cycles (on top of the memory latency),
  1038. while LDP/STP takes 3.
  1039. }
  1040. if (getsubreg(ThisRegister) = R_SUBQ) then
  1041. begin
  1042. ValidOffset := 8;
  1043. MinOffset := -512;
  1044. MaxOffset := 504;
  1045. end
  1046. else
  1047. begin
  1048. ValidOffset := 4;
  1049. MinOffset := -256;
  1050. MaxOffset := 252;
  1051. end;
  1052. hp1_last := p;
  1053. { Look for nearby LDR/STR instructions }
  1054. if (taicpu(p).oppostfix = PF_NONE) and
  1055. (taicpu(p).oper[1]^.ref^.addressmode = AM_OFFSET) then
  1056. { If SkipGetNext is True, GextNextInstruction isn't called }
  1057. while GetNextInstruction(hp1_last, hp1) do
  1058. begin
  1059. if (hp1.typ <> ait_instruction) then
  1060. Break;
  1061. if (taicpu(hp1).opcode = taicpu(p).opcode) then
  1062. begin
  1063. if (taicpu(hp1).oppostfix = PF_NONE) and
  1064. { Registers need to be the same size }
  1065. (getsubreg(ThisRegister) = getsubreg(taicpu(hp1).oper[0]^.reg)) and
  1066. (
  1067. (TargetOpcode = A_STP) or
  1068. { LDP x0, x0, [sp, #imm] is undefined behaviour, even
  1069. though such an LDR pair should have been optimised
  1070. out by now. STP is okay }
  1071. (ThisRegister <> taicpu(hp1).oper[0]^.reg)
  1072. ) and
  1073. (taicpu(hp1).oper[1]^.ref^.addressmode = AM_OFFSET) and
  1074. (taicpu(p).oper[1]^.ref^.base = taicpu(hp1).oper[1]^.ref^.base) and
  1075. (taicpu(p).oper[1]^.ref^.index = taicpu(hp1).oper[1]^.ref^.index) and
  1076. { Make sure the address registers haven't changed }
  1077. not RegModifiedBetween(taicpu(hp1).oper[1]^.ref^.base, p, hp1) and
  1078. (
  1079. (taicpu(hp1).oper[1]^.ref^.index = NR_NO) or
  1080. not RegModifiedBetween(taicpu(hp1).oper[1]^.ref^.index, p, hp1)
  1081. ) and
  1082. { Don't need to check "RegInRef" because the base registers are identical,
  1083. and the first one was checked already. [Kit] }
  1084. (((TargetOpcode=A_LDP) and not RegUsedBetween(taicpu(hp1).oper[0]^.reg, p, hp1)) or
  1085. ((TargetOpcode=A_STP) and not RegModifiedBetween(taicpu(hp1).oper[0]^.reg, p, hp1))) then
  1086. begin
  1087. { Can we convert these two LDR/STR instructions into a
  1088. single LDR/STP? }
  1089. OffsetVal := taicpu(hp1).oper[1]^.ref^.offset - taicpu(p).oper[1]^.ref^.offset;
  1090. if (OffsetVal = ValidOffset) then
  1091. begin
  1092. if (taicpu(p).oper[1]^.ref^.offset >= MinOffset) and (taicpu(hp1).oper[1]^.ref^.offset <= MaxOffset) then
  1093. begin
  1094. { Convert:
  1095. LDR/STR reg0, [reg2, #ofs]
  1096. ...
  1097. LDR/STR reg1. [reg2, #ofs + 8] // 4 if registers are 32-bit
  1098. To:
  1099. LDP/STP reg0, reg1, [reg2, #ofs]
  1100. }
  1101. taicpu(p).opcode := TargetOpcode;
  1102. if TargetOpcode = A_STP then
  1103. DebugMsg(SPeepholeOptimization + 'StrStr2Stp', p)
  1104. else
  1105. DebugMsg(SPeepholeOptimization + 'LdrLdr2Ldp', p);
  1106. taicpu(p).ops := 3;
  1107. taicpu(p).loadref(2, taicpu(p).oper[1]^.ref^);
  1108. taicpu(p).loadreg(1, taicpu(hp1).oper[0]^.reg);
  1109. asml.Remove(hp1);
  1110. hp1.Free;
  1111. Result := True;
  1112. Exit;
  1113. end;
  1114. end
  1115. else if (OffsetVal = -ValidOffset) then
  1116. begin
  1117. if (taicpu(hp1).oper[1]^.ref^.offset >= MinOffset) and (taicpu(p).oper[1]^.ref^.offset <= MaxOffset) then
  1118. begin
  1119. { Convert:
  1120. LDR/STR reg0, [reg2, #ofs + 8] // 4 if registers are 32-bit
  1121. ...
  1122. LDR/STR reg1. [reg2, #ofs]
  1123. To:
  1124. LDP/STP reg1, reg0, [reg2, #ofs]
  1125. }
  1126. taicpu(p).opcode := TargetOpcode;
  1127. if TargetOpcode = A_STP then
  1128. DebugMsg(SPeepholeOptimization + 'StrStr2Stp (reverse)', p)
  1129. else
  1130. DebugMsg(SPeepholeOptimization + 'LdrLdr2Ldp (reverse)', p);
  1131. taicpu(p).ops := 3;
  1132. taicpu(p).loadref(2, taicpu(hp1).oper[1]^.ref^);
  1133. taicpu(p).loadreg(1, taicpu(p).oper[0]^.reg);
  1134. taicpu(p).loadreg(0, taicpu(hp1).oper[0]^.reg);
  1135. asml.Remove(hp1);
  1136. hp1.Free;
  1137. Result := True;
  1138. Exit;
  1139. end;
  1140. end;
  1141. end;
  1142. end
  1143. else
  1144. Break;
  1145. { Don't continue looking for LDR/STR pairs if the address register
  1146. gets modified }
  1147. if RegModifiedByInstruction(taicpu(p).oper[1]^.ref^.base, hp1) then
  1148. Break;
  1149. hp1_last := hp1;
  1150. end;
  1151. end;
  1152. end;
  1153. function TCpuAsmOptimizer.OptPass2MOV(var p: tai): Boolean;
  1154. var
  1155. hp1: tai;
  1156. X: Integer;
  1157. begin
  1158. Result := False;
  1159. { Merge MOV and CSEL instructions left behind by OptPass2B - that is,
  1160. change:
  1161. mov r0,r1
  1162. csel r0,r2,r0,cond
  1163. To:
  1164. csel r0,r2,r1,cond
  1165. (Also if r0 is the second operand)
  1166. }
  1167. if (taicpu(p).oper[1]^.typ = top_reg) and
  1168. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  1169. (hp1.typ = ait_instruction) and
  1170. (taicpu(hp1).opcode = A_CSEL) and
  1171. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[0]^.reg) and
  1172. not RegModifiedBetween(taicpu(p).oper[1]^.reg, p, hp1) then
  1173. begin
  1174. { Use "Result" to note if a change was made so we only have to do
  1175. expensive register allocation once }
  1176. for X := 1 to 2 do
  1177. if (taicpu(hp1).oper[X]^.reg = taicpu(p).oper[0]^.reg) then
  1178. begin
  1179. taicpu(hp1).oper[X]^.reg := taicpu(p).oper[1]^.reg;
  1180. Result := True;
  1181. end;
  1182. if Result then
  1183. begin
  1184. DebugMSg(SPeepholeOptimization + 'MovCSel2CSel', p);
  1185. { Don't need to allocate the zero register - so save time by
  1186. skipping it in this case }
  1187. if getsupreg(taicpu(p).oper[1]^.reg) <> RS_XZR then
  1188. AllocRegBetween(taicpu(p).oper[1]^.reg, p, hp1, UsedRegs);
  1189. RemoveCurrentP(p);
  1190. Exit;
  1191. end;
  1192. end;
  1193. end;
  1194. function TCpuAsmOptimizer.PostPeepholeOptAND(var p: tai): Boolean;
  1195. var
  1196. hp1, hp2: tai;
  1197. hp3: taicpu;
  1198. bitval : cardinal;
  1199. begin
  1200. Result:=false;
  1201. {
  1202. and reg1,reg0,<const=power of 2>
  1203. cmp reg1,#0
  1204. <reg1 end of life>
  1205. b.e/b.ne label
  1206. into
  1207. tb(n)z reg0,<power of 2>,label
  1208. }
  1209. if MatchOpType(taicpu(p),top_reg,top_reg,top_const) and
  1210. (PopCnt(QWord(taicpu(p).oper[2]^.val))=1) and
  1211. GetNextInstruction(p,hp1) and
  1212. MatchInstruction(hp1,A_CMP,[PF_None]) and
  1213. MatchOpType(taicpu(hp1),top_reg,top_const) and
  1214. (taicpu(hp1).oper[1]^.val=0) and
  1215. MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[0]^) and
  1216. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) and
  1217. GetNextInstruction(hp1,hp2) and
  1218. MatchInstruction(hp2,A_B,[PF_None]) and
  1219. (taicpu(hp2).condition in [C_EQ,C_NE]) then
  1220. begin
  1221. bitval:=BsfQWord(qword(taicpu(p).oper[2]^.val));
  1222. case taicpu(hp2).condition of
  1223. C_NE:
  1224. hp3:=taicpu.op_reg_const_ref(A_TBNZ,taicpu(p).oper[1]^.reg,bitval,taicpu(hp2).oper[0]^.ref^);
  1225. C_EQ:
  1226. hp3:=taicpu.op_reg_const_ref(A_TBZ,taicpu(p).oper[1]^.reg,bitval,taicpu(hp2).oper[0]^.ref^);
  1227. else
  1228. Internalerror(2021100201);
  1229. end;
  1230. taicpu(hp3).fileinfo:=taicpu(hp1).fileinfo;
  1231. asml.insertbefore(hp3, hp1);
  1232. RemoveInstruction(hp1);
  1233. RemoveInstruction(hp2);
  1234. RemoveCurrentP(p);
  1235. DebugMsg(SPeepholeOptimization + 'AndCmpB.E/NE2Tbnz/Tbz done', p);
  1236. Result:=true;
  1237. end;
  1238. end;
  1239. function TCpuAsmOptimizer.PostPeepholeOptCMP(var p : tai): boolean;
  1240. var
  1241. hp1,hp2: tai;
  1242. begin
  1243. Result:=false;
  1244. {
  1245. cmp reg0,#0
  1246. b.e/b.ne label
  1247. into
  1248. cb(n)z reg0,label
  1249. }
  1250. if MatchOpType(taicpu(p),top_reg,top_const) and
  1251. (taicpu(p).oper[0]^.reg<>NR_SP) and
  1252. (taicpu(p).oper[1]^.val=0) and
  1253. GetNextInstruction(p,hp1) and
  1254. MatchInstruction(hp1,A_B,[PF_None]) and
  1255. (taicpu(hp1).condition in [C_EQ,C_NE]) then
  1256. begin
  1257. case taicpu(hp1).condition of
  1258. C_NE:
  1259. hp2:=taicpu.op_reg_sym_ofs(A_CBNZ,taicpu(p).oper[0]^.reg,taicpu(hp1).oper[0]^.ref^.symbol,taicpu(hp1).oper[0]^.ref^.offset);
  1260. C_EQ:
  1261. hp2:=taicpu.op_reg_sym_ofs(A_CBZ,taicpu(p).oper[0]^.reg,taicpu(hp1).oper[0]^.ref^.symbol,taicpu(hp1).oper[0]^.ref^.offset);
  1262. else
  1263. Internalerror(2019090801);
  1264. end;
  1265. taicpu(hp2).fileinfo:=taicpu(hp1).fileinfo;
  1266. asml.insertbefore(hp2, hp1);
  1267. asml.remove(p);
  1268. asml.remove(hp1);
  1269. p.free;
  1270. hp1.free;
  1271. p:=hp2;
  1272. DebugMsg(SPeepholeOptimization + 'CMPB.E/NE2CBNZ/CBZ done', p);
  1273. Result:=true;
  1274. end;
  1275. end;
  1276. function TCpuAsmOptimizer.PostPeepholeOptTST(var p : tai): boolean;
  1277. var
  1278. hp1: tai;
  1279. hp3: taicpu;
  1280. bitval : cardinal;
  1281. begin
  1282. Result:=false;
  1283. {
  1284. tst reg1,<const=power of 2>
  1285. b.e/b.ne label
  1286. into
  1287. tb(n)z reg0,<power of 2>,label
  1288. }
  1289. if MatchOpType(taicpu(p),top_reg,top_const) and
  1290. (PopCnt(QWord(taicpu(p).oper[1]^.val))=1) and
  1291. GetNextInstruction(p,hp1) and
  1292. MatchInstruction(hp1,A_B,[C_EQ,C_NE],[PF_None]) then
  1293. begin
  1294. bitval:=BsfQWord(qword(taicpu(p).oper[1]^.val));
  1295. case taicpu(hp1).condition of
  1296. C_NE:
  1297. hp3:=taicpu.op_reg_const_ref(A_TBNZ,taicpu(p).oper[0]^.reg,bitval,taicpu(hp1).oper[0]^.ref^);
  1298. C_EQ:
  1299. hp3:=taicpu.op_reg_const_ref(A_TBZ,taicpu(p).oper[0]^.reg,bitval,taicpu(hp1).oper[0]^.ref^);
  1300. else
  1301. Internalerror(2021100210);
  1302. end;
  1303. taicpu(hp3).fileinfo:=taicpu(p).fileinfo;
  1304. asml.insertafter(hp3, p);
  1305. RemoveInstruction(hp1);
  1306. RemoveCurrentP(p, hp3);
  1307. DebugMsg(SPeepholeOptimization + 'TST; B(E/NE) -> TB(Z/NZ) done', p);
  1308. Result:=true;
  1309. end;
  1310. end;
  1311. function TCpuAsmOptimizer.PrePeepHoleOptsCpu(var p: tai): boolean;
  1312. begin
  1313. result := false;
  1314. if p.typ=ait_instruction then
  1315. begin
  1316. case taicpu(p).opcode of
  1317. A_SBFX,
  1318. A_UBFX:
  1319. Result:=OptPreSBFXUBFX(p);
  1320. else
  1321. ;
  1322. end;
  1323. end;
  1324. end;
  1325. function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
  1326. begin
  1327. result := false;
  1328. if p.typ=ait_instruction then
  1329. begin
  1330. case taicpu(p).opcode of
  1331. A_B:
  1332. Result:=OptPass1B(p);
  1333. A_LDR:
  1334. Result:=OptPass1LDR(p);
  1335. A_STR:
  1336. Result:=OptPass1STR(p);
  1337. A_MOV:
  1338. Result:=OptPass1Mov(p);
  1339. A_MOVZ:
  1340. Result:=OptPass1MOVZ(p);
  1341. A_STP:
  1342. Result:=OptPass1STP(p);
  1343. A_LSR,
  1344. A_ROR,
  1345. A_ASR,
  1346. A_LSL:
  1347. Result:=OptPass1Shift(p);
  1348. A_AND:
  1349. Result:=OptPass1And(p);
  1350. A_LSRV,
  1351. A_RORV,
  1352. A_ASRV,
  1353. A_LSLV,
  1354. A_UDIV,
  1355. A_SDIV,
  1356. A_NEG,
  1357. A_CSEL,
  1358. A_ADD,
  1359. A_ADC,
  1360. A_SUB,
  1361. A_SBC,
  1362. A_BIC,
  1363. A_EOR,
  1364. A_ORR,
  1365. A_MUL:
  1366. Result:=OptPass1Data(p);
  1367. A_UXTB:
  1368. Result:=OptPass1UXTB(p);
  1369. A_UXTH:
  1370. Result:=OptPass1UXTH(p);
  1371. A_SXTB:
  1372. Result:=OptPass1SXTB(p);
  1373. A_SXTH:
  1374. Result:=OptPass1SXTH(p);
  1375. A_SXTW:
  1376. Result:=OptPass1SXTW(p);
  1377. // A_VLDR,
  1378. A_FRINTA,
  1379. A_FRINTI,
  1380. A_FRINTM,
  1381. A_FRINTN,
  1382. A_FRINTP,
  1383. A_FRINTX,
  1384. A_FRINTZ,
  1385. A_FCSEL,
  1386. A_FMADD,
  1387. A_FMSUB,
  1388. A_FNMADD,
  1389. A_FNMSUB,
  1390. A_FNMUL,
  1391. A_FADD,
  1392. A_FMUL,
  1393. A_FDIV,
  1394. A_FSUB,
  1395. A_FSQRT,
  1396. A_FNEG,
  1397. A_FCVT,
  1398. A_FABS:
  1399. Result:=OptPass1FData(p);
  1400. A_FMOV:
  1401. Result:=OptPass1FMov(p);
  1402. else
  1403. ;
  1404. end;
  1405. end;
  1406. end;
  1407. function TCpuAsmOptimizer.PeepHoleOptPass2Cpu(var p: tai): boolean;
  1408. begin
  1409. result := false;
  1410. if p.typ=ait_instruction then
  1411. begin
  1412. case taicpu(p).opcode of
  1413. A_AND,
  1414. A_BIC:
  1415. Result := OptPass2Bitwise(p);
  1416. A_B:
  1417. Result := OptPass2B(p);
  1418. A_CSEL:
  1419. Result := OptPass2CSEL(p);
  1420. A_MOV:
  1421. Result := OptPass2MOV(p);
  1422. A_LDR,
  1423. A_STR:
  1424. Result := OptPass2LDRSTR(p);
  1425. A_TST:
  1426. Result := OptPass2TST(p);
  1427. else
  1428. ;
  1429. end;
  1430. end;
  1431. end;
  1432. function TCpuAsmOptimizer.PostPeepHoleOptsCpu(var p: tai): boolean;
  1433. begin
  1434. result := false;
  1435. if p.typ=ait_instruction then
  1436. begin
  1437. case taicpu(p).opcode of
  1438. A_CMP:
  1439. Result:=PostPeepholeOptCMP(p);
  1440. A_AND:
  1441. Result:=PostPeepholeOptAND(p);
  1442. A_TST:
  1443. Result:=PostPeepholeOptTST(p);
  1444. else
  1445. ;
  1446. end;
  1447. end;
  1448. end;
  1449. class procedure TCpuAsmOptimizer.UpdateIntRegsNoDealloc(var AUsedRegs: TAllUsedRegs; p: Tai);
  1450. begin
  1451. { Update integer registers, ignoring deallocations }
  1452. repeat
  1453. while assigned(p) and
  1454. ((p.typ in (SkipInstr - [ait_RegAlloc])) or
  1455. (p.typ = ait_label) or
  1456. ((p.typ = ait_marker) and
  1457. (tai_Marker(p).Kind in [mark_AsmBlockEnd,mark_NoLineInfoStart,mark_NoLineInfoEnd]))) do
  1458. p := tai(p.next);
  1459. while assigned(p) and
  1460. (p.typ=ait_RegAlloc) Do
  1461. begin
  1462. if (getregtype(tai_regalloc(p).reg) = R_INTREGISTER) then
  1463. begin
  1464. case tai_regalloc(p).ratype of
  1465. ra_alloc :
  1466. IncludeRegInUsedRegs(tai_regalloc(p).reg, AUsedRegs);
  1467. else
  1468. ;
  1469. end;
  1470. end;
  1471. p := tai(p.next);
  1472. end;
  1473. until not(assigned(p)) or
  1474. (not(p.typ in SkipInstr) and
  1475. not((p.typ = ait_label) and
  1476. labelCanBeSkipped(tai_label(p))));
  1477. end;
  1478. { Attempts to allocate a volatile integer register for use between p and hp,
  1479. using AUsedRegs for the current register usage information. Returns NR_NO
  1480. if no free register could be found }
  1481. function TCpuAsmOptimizer.GetIntRegisterBetween(RegSize: TSubRegister; var AUsedRegs: TAllUsedRegs; p, hp: tai; DontAlloc: Boolean = False): TRegister;
  1482. var
  1483. RegSet: TCPURegisterSet;
  1484. CurrentSuperReg: Integer;
  1485. CurrentReg: TRegister;
  1486. Currentp: tai;
  1487. Breakout: Boolean;
  1488. begin
  1489. Result := NR_NO;
  1490. RegSet :=
  1491. paramanager.get_volatile_registers_int(current_procinfo.procdef.proccalloption) +
  1492. current_procinfo.saved_regs_int;
  1493. (*
  1494. { Don't use the frame register unless explicitly allowed (fixes i40111) }
  1495. if ([cs_useebp, cs_userbp] * current_settings.optimizerswitches) = [] then
  1496. Exclude(RegSet, RS_FRAME_POINTER_REG);
  1497. *)
  1498. for CurrentSuperReg in RegSet do
  1499. begin
  1500. CurrentReg := newreg(R_INTREGISTER, TSuperRegister(CurrentSuperReg), RegSize);
  1501. if not AUsedRegs[R_INTREGISTER].IsUsed(CurrentReg)
  1502. then
  1503. begin
  1504. Currentp := p;
  1505. Breakout := False;
  1506. while not Breakout and GetNextInstruction(Currentp, Currentp) and (Currentp <> hp) do
  1507. begin
  1508. case Currentp.typ of
  1509. ait_instruction:
  1510. begin
  1511. if RegInInstruction(CurrentReg, Currentp) then
  1512. begin
  1513. Breakout := True;
  1514. Break;
  1515. end;
  1516. { Cannot allocate across an unconditional jump }
  1517. if is_calljmpmaybeuncondret(taicpu(Currentp).opcode) and (taicpu(Currentp).condition = C_None) then
  1518. Exit;
  1519. end;
  1520. ait_marker:
  1521. { Don't try anything more if a marker is hit }
  1522. Exit;
  1523. ait_regalloc:
  1524. if (tai_regalloc(Currentp).ratype <> ra_dealloc) and SuperRegistersEqual(CurrentReg, tai_regalloc(Currentp).reg) then
  1525. begin
  1526. Breakout := True;
  1527. Break;
  1528. end;
  1529. else
  1530. ;
  1531. end;
  1532. end;
  1533. if Breakout then
  1534. { Try the next register }
  1535. Continue;
  1536. { We have a free register available }
  1537. Result := CurrentReg;
  1538. if not DontAlloc then
  1539. AllocRegBetween(CurrentReg, p, hp, AUsedRegs);
  1540. Exit;
  1541. end;
  1542. end;
  1543. end;
  1544. function TCSELTracking.InitialiseBlock(BlockStart, OneBeforeBlock: tai; out BlockStop: tai; out EndJump: tai): Boolean;
  1545. begin
  1546. Result := False;
  1547. EndJump := nil;
  1548. BlockStop := nil;
  1549. while (BlockStart <> fOptimizer.BlockEnd) and
  1550. { stop on labels }
  1551. (BlockStart.typ <> ait_label) do
  1552. begin
  1553. { Keep track of all integer registers that are used }
  1554. fOptimizer.UpdateIntRegsNoDealloc(RegisterTracking, tai(OneBeforeBlock.Next));
  1555. if BlockStart.typ = ait_instruction then
  1556. begin
  1557. if MatchInstruction(BlockStart, A_B, [C_None], []) then
  1558. begin
  1559. if not IsJumpToLabel(taicpu(BlockStart)) or
  1560. (JumpTargetOp(taicpu(BlockStart))^.ref^.index <> NR_NO) then
  1561. Exit;
  1562. EndJump := BlockStart;
  1563. Break;
  1564. end
  1565. { Check to see if we have a valid MOV instruction instead }
  1566. else if (taicpu(BlockStart).opcode <> A_MOV) or
  1567. { Can't include the stack pointer in CSEL }
  1568. fOptimizer.RegInInstruction(NR_SP, BlockStart) then
  1569. begin
  1570. Exit;
  1571. end
  1572. else
  1573. { This will be a valid MOV }
  1574. fAllocationRange := BlockStart;
  1575. end;
  1576. OneBeforeBlock := BlockStart;
  1577. fOptimizer.GetNextInstruction(BlockStart, BlockStart);
  1578. end;
  1579. if (BlockStart = fOptimizer.BlockEnd) then
  1580. Exit;
  1581. BlockStop := BlockStart;
  1582. Result := True;
  1583. end;
  1584. function TCSELTracking.AnalyseMOVBlock(BlockStart, BlockStop, SearchStart: tai): LongInt;
  1585. var
  1586. hp1: tai;
  1587. RefModified: Boolean;
  1588. begin
  1589. Result := 0;
  1590. hp1 := BlockStart;
  1591. RefModified := False; { As long as the condition is inverted, this can be reset }
  1592. while assigned(hp1) and
  1593. (hp1 <> BlockStop) do
  1594. begin
  1595. case hp1.typ of
  1596. ait_instruction:
  1597. if MatchInstruction(hp1, A_MOV, []) then
  1598. begin
  1599. Inc(Result);
  1600. if taicpu(hp1).oper[1]^.typ = top_reg then
  1601. begin
  1602. Inc(Result);
  1603. end
  1604. else if not (cs_opt_size in current_settings.optimizerswitches) and
  1605. { CSEL with constants grows the code size }
  1606. TryCSELConst(hp1, SearchStart, BlockStop, Result) then
  1607. begin
  1608. { Register was reserved by TryCSELConst and
  1609. stored on ConstRegs }
  1610. end
  1611. else
  1612. begin
  1613. Result := -1;
  1614. Exit;
  1615. end;
  1616. end
  1617. else
  1618. begin
  1619. Result := -1;
  1620. Exit;
  1621. end;
  1622. else
  1623. { Most likely an align };
  1624. end;
  1625. fOptimizer.GetNextInstruction(hp1, hp1);
  1626. end;
  1627. end;
  1628. constructor TCSELTracking.Init(Optimizer: TCpuAsmOptimizer; var p_initialjump, p_initialmov: tai; var AFirstLabel: TAsmLabel);
  1629. { For the tsBranching type, increase the weighting score to account for the new conditional jump
  1630. (this is done as a separate stage because the double types are extensions of the branching type,
  1631. but we can't discount the conditional jump until the last step) }
  1632. procedure EvaluateBranchingType;
  1633. begin
  1634. Inc(CSELScore);
  1635. if (CSELScore > MAX_CSEL_INSTRUCTIONS) then
  1636. { Too many instructions to be worthwhile }
  1637. fState := tsInvalid;
  1638. end;
  1639. var
  1640. hp1: tai;
  1641. Count: Integer;
  1642. begin
  1643. { Table of valid CSEL block types
  1644. Block type 2nd Jump Mid-label 2nd MOVs 3rd Jump End-label
  1645. ---------- --------- --------- --------- --------- ---------
  1646. tsSimple X Yes X X X
  1647. tsDetour = 1st X X X X
  1648. tsBranching <> Mid Yes X X X
  1649. tsDouble End-label Yes * Yes X Yes
  1650. tsDoubleBranchSame <> Mid Yes * Yes = 2nd X
  1651. tsDoubleBranchDifferent <> Mid Yes * Yes <> 2nd X
  1652. tsDoubleSecondBranching End-label Yes * Yes <> 2nd Yes
  1653. * Only one reference allowed
  1654. }
  1655. hp1 := nil; { To prevent compiler warnings }
  1656. Optimizer.CopyUsedRegs(RegisterTracking);
  1657. fOptimizer := Optimizer;
  1658. fLabel := AFirstLabel;
  1659. CSELScore := 0;
  1660. ConstCount := 0;
  1661. { Initialise RegWrites, ConstRegs, ConstVals, ConstSizes, ConstWriteSizes and ConstMovs }
  1662. FillChar(RegWrites[0], MAX_CSEL_INSTRUCTIONS * 2 * SizeOf(TRegister), 0);
  1663. FillChar(ConstRegs[0], MAX_CSEL_REGISTERS * SizeOf(TRegister), 0);
  1664. FillChar(ConstVals[0], MAX_CSEL_REGISTERS * SizeOf(TCGInt), 0);
  1665. FillChar(ConstSizes[0], MAX_CSEL_REGISTERS * SizeOf(TSubRegister), 0);
  1666. FillChar(ConstWriteSizes[0], first_int_imreg * SizeOf(TOpSize), 0);
  1667. FillChar(ConstMovs[0], MAX_CSEL_REGISTERS * SizeOf(taicpu), 0);
  1668. fInsertionPoint := p_initialjump;
  1669. fCondition := nil;
  1670. fInitialJump := p_initialjump;
  1671. fFirstMovBlock := p_initialmov;
  1672. fFirstMovBlockStop := nil;
  1673. fSecondJump := nil;
  1674. fSecondMovBlock := nil;
  1675. fSecondMovBlockStop := nil;
  1676. fMidLabel := nil;
  1677. fSecondJump := nil;
  1678. fSecondMovBlock := nil;
  1679. fEndLabel := nil;
  1680. fAllocationRange := nil;
  1681. { Assume it all goes horribly wrong! }
  1682. fState := tsInvalid;
  1683. { Look backwards at the comparisons to get an accurate picture of register usage and a better position for any MOV const,reg insertions }
  1684. if Optimizer.GetLastInstruction(p_initialjump, fCondition) and
  1685. (
  1686. MatchInstruction(fCondition, [A_CMP, A_CMN, A_TST], []) or
  1687. (
  1688. (fCondition.typ = ait_instruction) and
  1689. (taicpu(fCondition).opcode = A_AND) and
  1690. (taicpu(fCondition).oppostfix = PF_S)
  1691. )
  1692. ) then
  1693. begin
  1694. { Mark all the registers in the comparison as 'in use', even if they've just been deallocated }
  1695. for Count := 0 to taicpu(fCondition).ops - 1 do
  1696. with taicpu(fCondition).oper[Count]^ do
  1697. case typ of
  1698. top_reg:
  1699. if getregtype(reg) = R_INTREGISTER then
  1700. Optimizer.IncludeRegInUsedRegs(reg, RegisterTracking);
  1701. top_ref:
  1702. begin
  1703. if
  1704. (ref^.base <> NR_NO) then
  1705. Optimizer.IncludeRegInUsedRegs(ref^.base, RegisterTracking);
  1706. if (ref^.index <> NR_NO) then
  1707. Optimizer.IncludeRegInUsedRegs(ref^.index, RegisterTracking);
  1708. end
  1709. else
  1710. ;
  1711. end;
  1712. { When inserting instructions before hp_prev, try to insert them
  1713. before the allocation of the FLAGS register }
  1714. if not SetAndTest(Optimizer.FindRegAllocBackward(NR_DEFAULTFLAGS, tai(fCondition.Previous)), fInsertionPoint) or
  1715. (tai_regalloc(fInsertionPoint).ratype = ra_dealloc) then
  1716. { If not found, set it equal to the condition so it's something sensible }
  1717. fInsertionPoint := fCondition;
  1718. end
  1719. else
  1720. fCondition := nil;
  1721. { When inserting instructions, try to insert them before the allocation of the FLAGS register }
  1722. if SetAndTest(Optimizer.FindRegAllocBackward(NR_DEFAULTFLAGS, tai(p_initialjump.Previous)), hp1) and
  1723. (tai_regalloc(hp1).ratype <> ra_dealloc) then
  1724. { If not found, set it equal to p so it's something sensible }
  1725. fInsertionPoint := hp1;
  1726. hp1 := p_initialmov;
  1727. if not InitialiseBlock(p_initialmov, p_initialjump, fFirstMovBlockStop, fSecondJump) then
  1728. Exit;
  1729. hp1 := fFirstMovBlockStop; { Will either be on a label or a jump }
  1730. if (hp1.typ <> ait_label) then { should be on a jump }
  1731. begin
  1732. if not Optimizer.GetNextInstruction(hp1, fMidLabel) or (fMidLabel.typ <> ait_label) then
  1733. { Need a label afterwards }
  1734. Exit;
  1735. end
  1736. else
  1737. fMidLabel := hp1;
  1738. if tai_label(fMidLabel).labsym <> AFirstLabel then
  1739. { Not the correct label }
  1740. fMidLabel := nil;
  1741. if not Assigned(fSecondJump) and not Assigned(fMidLabel) then
  1742. { If there's neither a 2nd jump nor correct label, then it's invalid
  1743. (see above table) }
  1744. Exit;
  1745. { Analyse the first block of MOVs more closely }
  1746. CSELScore := AnalyseMOVBlock(fFirstMovBlock, fFirstMovBlockStop, fInsertionPoint);
  1747. if Assigned(fSecondJump) then
  1748. begin
  1749. if (JumpTargetOp(taicpu(fSecondJump))^.ref^.symbol = AFirstLabel) then
  1750. begin
  1751. fState := tsDetour
  1752. end
  1753. else
  1754. begin
  1755. { Need the correct mid-label for this one }
  1756. if not Assigned(fMidLabel) then
  1757. Exit;
  1758. fState := tsBranching;
  1759. end;
  1760. end
  1761. else
  1762. { No jump. but mid-label is present }
  1763. fState := tsSimple;
  1764. if (CSELScore > MAX_CSEL_INSTRUCTIONS) or (CSELScore <= 0) then
  1765. begin
  1766. { Invalid or too many instructions to be worthwhile }
  1767. fState := tsInvalid;
  1768. Exit;
  1769. end;
  1770. { check further for
  1771. b xxx
  1772. <several movs 1>
  1773. bl yyy
  1774. xxx:
  1775. <several movs 2>
  1776. yyy:
  1777. etc.
  1778. }
  1779. if (fState = tsBranching) and
  1780. { Estimate for required savings for extra jump }
  1781. (CSELScore <= MAX_CSEL_INSTRUCTIONS - 1) and
  1782. { Only one reference is allowed for double blocks }
  1783. (AFirstLabel.getrefs = 1) then
  1784. begin
  1785. Optimizer.GetNextInstruction(fMidLabel, hp1);
  1786. fSecondMovBlock := hp1;
  1787. if not InitialiseBlock(fSecondMovBlock, fMidLabel, fSecondMovBlockStop, fThirdJump) then
  1788. begin
  1789. EvaluateBranchingType;
  1790. Exit;
  1791. end;
  1792. hp1 := fSecondMovBlockStop; { Will either be on a label or a jump }
  1793. if (hp1.typ <> ait_label) then { should be on a jump }
  1794. begin
  1795. if not Optimizer.GetNextInstruction(hp1, fEndLabel) or (fEndLabel.typ <> ait_label) then
  1796. begin
  1797. { Need a label afterwards }
  1798. EvaluateBranchingType;
  1799. Exit;
  1800. end;
  1801. end
  1802. else
  1803. fEndLabel := hp1;
  1804. if tai_label(fEndLabel).labsym <> JumpTargetOp(taicpu(fSecondJump))^.ref^.symbol then
  1805. { Second jump doesn't go to the end }
  1806. fEndLabel := nil;
  1807. if not Assigned(fThirdJump) and not Assigned(fEndLabel) then
  1808. begin
  1809. { If there's neither a 3rd jump nor correct end label, then it's
  1810. not a invalid double block, but is a valid single branching
  1811. block (see above table) }
  1812. EvaluateBranchingType;
  1813. Exit;
  1814. end;
  1815. Count := AnalyseMOVBlock(fSecondMovBlock, fSecondMovBlockStop, fMidLabel);
  1816. if (Count > MAX_CSEL_INSTRUCTIONS) or (Count <= 0) then
  1817. { Invalid or too many instructions to be worthwhile }
  1818. Exit;
  1819. Inc(CSELScore, Count);
  1820. if Assigned(fThirdJump) then
  1821. begin
  1822. if not Assigned(fSecondJump) then
  1823. fState := tsDoubleSecondBranching
  1824. else if (JumpTargetOp(taicpu(fSecondJump))^.ref^.symbol = JumpTargetOp(taicpu(fThirdJump))^.ref^.symbol) then
  1825. fState := tsDoubleBranchSame
  1826. else
  1827. fState := tsDoubleBranchDifferent;
  1828. end
  1829. else
  1830. fState := tsDouble;
  1831. end;
  1832. if fState = tsBranching then
  1833. EvaluateBranchingType;
  1834. end;
  1835. { Tries to convert a mov const,%reg instruction into a CSEL by reserving a
  1836. new register to store the constant }
  1837. function TCSELTracking.TryCSELConst(p, start, stop: tai; var Count: LongInt): Boolean;
  1838. var
  1839. RegSize: TSubRegister;
  1840. CurrentVal: TCGInt;
  1841. ANewReg: TRegister;
  1842. X: ShortInt;
  1843. begin
  1844. Result := False;
  1845. if not MatchOpType(taicpu(p), top_reg, top_const) then
  1846. Exit;
  1847. if ConstCount >= MAX_CSEL_REGISTERS then
  1848. { Arrays are full }
  1849. Exit;
  1850. { See if the value has already been reserved for another CSEL instruction }
  1851. CurrentVal := taicpu(p).oper[1]^.val;
  1852. RegSize := getsubreg(taicpu(p).oper[0]^.reg);
  1853. for X := 0 to ConstCount - 1 do
  1854. if ConstVals[X] = CurrentVal then
  1855. begin
  1856. ConstRegs[ConstCount] := ConstRegs[X];
  1857. ConstSizes[ConstCount] := RegSize;
  1858. ConstVals[ConstCount] := CurrentVal;
  1859. Inc(ConstCount);
  1860. Inc(Count);
  1861. Result := True;
  1862. Exit;
  1863. end;
  1864. ANewReg := fOptimizer.GetIntRegisterBetween(R_SUBWHOLE, RegisterTracking, start, stop, True);
  1865. if ANewReg = NR_NO then
  1866. { No free registers }
  1867. Exit;
  1868. { Reserve the register so subsequent TryCSELConst calls don't all end
  1869. up vying for the same register }
  1870. fOptimizer.IncludeRegInUsedRegs(ANewReg, RegisterTracking);
  1871. ConstRegs[ConstCount] := ANewReg;
  1872. ConstSizes[ConstCount] := RegSize;
  1873. ConstVals[ConstCount] := CurrentVal;
  1874. Inc(ConstCount);
  1875. Inc(Count);
  1876. Result := True;
  1877. end;
  1878. destructor TCSELTracking.Done;
  1879. begin
  1880. TAOptObj.ReleaseUsedRegs(RegisterTracking);
  1881. end;
  1882. procedure TCSELTracking.Process(out new_p: tai);
  1883. var
  1884. Count, Writes: LongInt;
  1885. RegMatch: Boolean;
  1886. hp1, hp_new: tai;
  1887. inverted_condition, condition: TAsmCond;
  1888. begin
  1889. if (fState in [tsInvalid, tsProcessed]) then
  1890. InternalError(2023110702);
  1891. { Repurpose RegisterTracking to mark registers that we've defined }
  1892. RegisterTracking[R_INTREGISTER].Clear;
  1893. Count := 0;
  1894. Writes := 0;
  1895. condition := taicpu(fInitialJump).condition;
  1896. inverted_condition := inverse_cond(condition);
  1897. { Exclude tsDoubleBranchDifferent from this check, as the second block
  1898. doesn't get CSELs in this case }
  1899. if (fState in [tsDouble, tsDoubleBranchSame, tsDoubleSecondBranching]) then
  1900. begin
  1901. { Include the jump in the flag tracking }
  1902. if Assigned(fThirdJump) then
  1903. begin
  1904. if (fState = tsDoubleBranchSame) then
  1905. begin
  1906. { Will be an unconditional jump, so track to the instruction before it }
  1907. if not fOptimizer.GetLastInstruction(fThirdJump, hp1) then
  1908. InternalError(2023110712);
  1909. end
  1910. else
  1911. hp1 := fThirdJump;
  1912. end
  1913. else
  1914. hp1 := fSecondMovBlockStop;
  1915. end
  1916. else
  1917. begin
  1918. { Include a conditional jump in the flag tracking }
  1919. if Assigned(fSecondJump) then
  1920. begin
  1921. if (fState = tsDetour) then
  1922. begin
  1923. { Will be an unconditional jump, so track to the instruction before it }
  1924. if not fOptimizer.GetLastInstruction(fSecondJump, hp1) then
  1925. InternalError(2023110713);
  1926. end
  1927. else
  1928. hp1 := fSecondJump;
  1929. end
  1930. else
  1931. hp1 := fFirstMovBlockStop;
  1932. end;
  1933. fOptimizer.AllocRegBetween(NR_DEFAULTFLAGS, fInitialJump, hp1, fOptimizer.UsedRegs);
  1934. { Process the second set of MOVs first, because if a destination
  1935. register is shared between the first and second MOV sets, it is more
  1936. efficient to turn the first one into a MOV instruction and place it
  1937. before the CMP if possible, but we won't know which registers are
  1938. shared until we've processed at least one list, so we might as well
  1939. make it the second one since that won't be modified again. }
  1940. if (fState in [tsDouble, tsDoubleBranchSame, tsDoubleBranchDifferent, tsDoubleSecondBranching]) then
  1941. begin
  1942. hp1 := fSecondMovBlock;
  1943. repeat
  1944. if not Assigned(hp1) then
  1945. InternalError(2018062902);
  1946. if (hp1.typ = ait_instruction) then
  1947. begin
  1948. { Extra safeguard }
  1949. if (taicpu(hp1).opcode <> A_MOV) then
  1950. InternalError(2018062903);
  1951. { Note: tsDoubleBranchDifferent is essentially identical to
  1952. tsBranching and the 2nd block is best left largely
  1953. untouched, but we need to evaluate which registers the MOVs
  1954. write to in order to track what would be complementary CSEL
  1955. pairs that can be further optimised. [Kit] }
  1956. if fState <> tsDoubleBranchDifferent then
  1957. begin
  1958. if taicpu(hp1).oper[1]^.typ = top_const then
  1959. begin
  1960. RegMatch := False;
  1961. for Count := 0 to ConstCount - 1 do
  1962. if (ConstVals[Count] = taicpu(hp1).oper[1]^.val) and
  1963. (getsubreg(taicpu(hp1).oper[0]^.reg) = ConstSizes[Count]) then
  1964. begin
  1965. RegMatch := True;
  1966. { If it's in RegisterTracking, then this register
  1967. is being used more than once and hence has
  1968. already had its value defined (it gets added to
  1969. UsedRegs through AllocRegBetween below) }
  1970. if not RegisterTracking[R_INTREGISTER].IsUsed(ConstRegs[Count]) then
  1971. begin
  1972. hp_new := tai(hp1.getcopy);
  1973. taicpu(hp_new).oper[0]^.reg := ConstRegs[Count];
  1974. taicpu(hp_new).fileinfo := taicpu(fInitialJump).fileinfo;
  1975. fOptimizer.asml.InsertBefore(hp_new, fInsertionPoint);
  1976. fOptimizer.IncludeRegInUsedRegs(ConstRegs[Count], RegisterTracking);
  1977. ConstMovs[Count] := hp_new;
  1978. end
  1979. else
  1980. { We just need an instruction between hp_prev and hp1
  1981. where we know the register is marked as in use }
  1982. hp_new := fSecondMovBlock;
  1983. { Keep track of largest write for this register so it can be optimised later }
  1984. if (getsubreg(taicpu(hp1).oper[0]^.reg) > ConstWriteSizes[getsupreg(ConstRegs[Count])]) then
  1985. ConstWriteSizes[getsupreg(ConstRegs[Count])] := getsubreg(taicpu(hp1).oper[0]^.reg);
  1986. fOptimizer.AllocRegBetween(ConstRegs[Count], hp_new, hp1, fOptimizer.UsedRegs);
  1987. taicpu(hp1).loadreg(1, newreg(R_INTREGISTER, getsupreg(ConstRegs[Count]), ConstSizes[Count]));
  1988. Break;
  1989. end;
  1990. if not RegMatch then
  1991. InternalError(2021100413);
  1992. end;
  1993. taicpu(hp1).opcode := A_CSEL;
  1994. taicpu(hp1).ops := 4;
  1995. taicpu(hp1).loadreg(2, taicpu(hp1).oper[0]^.reg);
  1996. taicpu(hp1).loadconditioncode(3, condition);
  1997. end;
  1998. { Store these writes to search for duplicates later on }
  1999. RegWrites[Writes] := taicpu(hp1).oper[0]^.reg;
  2000. Inc(Writes);
  2001. end;
  2002. fOptimizer.GetNextInstruction(hp1, hp1);
  2003. until (hp1 = fSecondMovBlockStop);
  2004. end;
  2005. { Now do the first set of MOVs }
  2006. hp1 := fFirstMovBlock;
  2007. repeat
  2008. if not Assigned(hp1) then
  2009. InternalError(2018062904);
  2010. if (hp1.typ = ait_instruction) then
  2011. begin
  2012. RegMatch := False;
  2013. { Extra safeguard }
  2014. if (taicpu(hp1).opcode <> A_MOV) then
  2015. InternalError(2018062905);
  2016. { Search through the RegWrites list to see if there are any
  2017. opposing CSEL pairs that write to the same register }
  2018. for Count := 0 to Writes - 1 do
  2019. if (RegWrites[Count] = taicpu(hp1).oper[0]^.reg) then
  2020. begin
  2021. { We have a match. Keep this as a MOV }
  2022. { Move ahead in preparation }
  2023. fOptimizer.GetNextInstruction(hp1, hp1);
  2024. RegMatch := True;
  2025. Break;
  2026. end;
  2027. if RegMatch then
  2028. Continue;
  2029. if taicpu(hp1).oper[1]^.typ = top_const then
  2030. begin
  2031. for Count := 0 to ConstCount - 1 do
  2032. if (ConstVals[Count] = taicpu(hp1).oper[1]^.val) and
  2033. (getsubreg(taicpu(hp1).oper[0]^.reg) = ConstSizes[Count]) then
  2034. begin
  2035. RegMatch := True;
  2036. { If it's in RegisterTracking, then this register is
  2037. being used more than once and hence has already had
  2038. its value defined (it gets added to UsedRegs through
  2039. AllocRegBetween below) }
  2040. if not RegisterTracking[R_INTREGISTER].IsUsed(ConstRegs[Count]) then
  2041. begin
  2042. hp_new := tai(hp1.getcopy);
  2043. taicpu(hp_new).oper[0]^.reg := ConstRegs[Count];
  2044. taicpu(hp_new).fileinfo := taicpu(fInitialJump).fileinfo;
  2045. fOptimizer.asml.InsertBefore(hp_new, fInsertionPoint);
  2046. fOptimizer.IncludeRegInUsedRegs(ConstRegs[Count], RegisterTracking);
  2047. ConstMovs[Count] := hp_new;
  2048. end
  2049. else
  2050. { We just need an instruction between hp_prev and hp1
  2051. where we know the register is marked as in use }
  2052. hp_new := fFirstMovBlock;
  2053. { Keep track of largest write for this register so it can be optimised later }
  2054. if (getsubreg(taicpu(hp1).oper[0]^.reg) > ConstWriteSizes[getsupreg(ConstRegs[Count])]) then
  2055. ConstWriteSizes[getsupreg(ConstRegs[Count])] := getsubreg(taicpu(hp1).oper[0]^.reg);
  2056. fOptimizer.AllocRegBetween(ConstRegs[Count], hp_new, hp1, fOptimizer.UsedRegs);
  2057. taicpu(hp1).loadreg(1, newreg(R_INTREGISTER, getsupreg(ConstRegs[Count]), ConstSizes[Count]));
  2058. Break;
  2059. end;
  2060. if not RegMatch then
  2061. InternalError(2021100412);
  2062. end;
  2063. taicpu(hp1).opcode := A_CSEL;
  2064. taicpu(hp1).ops := 4;
  2065. taicpu(hp1).loadreg(2, taicpu(hp1).oper[0]^.reg);
  2066. taicpu(hp1).loadconditioncode(3, inverted_condition);
  2067. if (fState = tsDoubleBranchDifferent) then
  2068. begin
  2069. { Store these writes to search for duplicates later on }
  2070. RegWrites[Writes] := taicpu(hp1).oper[0]^.reg;
  2071. Inc(Writes);
  2072. end;
  2073. end;
  2074. fOptimizer.GetNextInstruction(hp1, hp1);
  2075. until (hp1 = fFirstMovBlockStop);
  2076. { Update initialisation MOVs to the smallest possible size }
  2077. for Count := 0 to ConstCount - 1 do
  2078. if Assigned(ConstMovs[Count]) then
  2079. setsubreg(taicpu(ConstMovs[Count]).oper[0]^.reg, ConstWriteSizes[Word(ConstRegs[Count])]);
  2080. case fState of
  2081. tsSimple:
  2082. begin
  2083. fOptimizer.DebugMsg(SPeepholeOptimization + 'CSEL Block (Simple type)', fInitialJump);
  2084. { No branch to delete }
  2085. end;
  2086. tsDetour:
  2087. begin
  2088. fOptimizer.DebugMsg(SPeepholeOptimization + 'CSEL Block (Detour type)', fInitialJump);
  2089. { Preserve jump }
  2090. end;
  2091. tsBranching, tsDoubleBranchDifferent:
  2092. begin
  2093. if (fState = tsBranching) then
  2094. fOptimizer.DebugMsg(SPeepholeOptimization + 'CSEL Block (Branching type)', fInitialJump)
  2095. else
  2096. fOptimizer.DebugMsg(SPeepholeOptimization + 'CSEL Block (Double branching (different) type)', fInitialJump);
  2097. taicpu(fSecondJump).condition := inverted_condition;
  2098. end;
  2099. tsDouble, tsDoubleBranchSame:
  2100. begin
  2101. if (fState = tsDouble) then
  2102. fOptimizer.DebugMsg(SPeepholeOptimization + 'CSEL Block (Double type)', fInitialJump)
  2103. else
  2104. fOptimizer.DebugMsg(SPeepholeOptimization + 'CSEL Block (Double branching (same) type)', fInitialJump);
  2105. { Delete second jump }
  2106. JumpTargetOp(taicpu(fSecondJump))^.ref^.symbol.decrefs;
  2107. fOptimizer.RemoveInstruction(fSecondJump);
  2108. end;
  2109. tsDoubleSecondBranching:
  2110. begin
  2111. fOptimizer.DebugMsg(SPeepholeOptimization + 'CSEL Block (Double, second branching type)', fInitialJump);
  2112. { Delete second jump, preserve third jump as conditional }
  2113. JumpTargetOp(taicpu(fSecondJump))^.ref^.symbol.decrefs;
  2114. fOptimizer.RemoveInstruction(fSecondJump);
  2115. taicpu(fThirdJump).condition := condition;
  2116. end;
  2117. else
  2118. InternalError(2023110721);
  2119. end;
  2120. { Now we can safely decrement the reference count }
  2121. tasmlabel(fLabel).decrefs;
  2122. fOptimizer.UpdateUsedRegs(tai(fInitialJump.next));
  2123. { Remove the original jump }
  2124. fOptimizer.RemoveInstruction(fInitialJump); { Note, the choice to not use RemoveCurrentp is deliberate }
  2125. new_p := fFirstMovBlock; { Appears immediately after the initial jump }
  2126. fState := tsProcessed;
  2127. end;
  2128. begin
  2129. casmoptimizer:=TCpuAsmOptimizer;
  2130. End.