aoptcpu.pas 45 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292
  1. {
  2. Copyright (c) 1998-2002 by Jonas Maebe, member of the Free Pascal
  3. Development Team
  4. This unit implements the ARM64 optimizer object
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. Unit aoptcpu;
  19. {$i fpcdefs.inc}
  20. {$ifdef EXTDEBUG}
  21. {$define DEBUG_AOPTCPU}
  22. {$endif EXTDEBUG}
  23. Interface
  24. uses
  25. globtype, globals,
  26. cutils,
  27. cgbase, cpubase, aasmtai, aasmcpu,
  28. aopt, aoptcpub, aoptarm;
  29. Type
  30. TCpuAsmOptimizer = class(TARMAsmOptimizer)
  31. { uses the same constructor as TAopObj }
  32. function PrePeepHoleOptsCpu(var p: tai): boolean; override;
  33. function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
  34. function PeepHoleOptPass2Cpu(var p: tai): boolean; override;
  35. function PostPeepHoleOptsCpu(var p: tai): boolean; override;
  36. function RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;override;
  37. function InstructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean;override;
  38. function LookForPostindexedPattern(var p : tai) : boolean;
  39. public
  40. { With these routines, there's optimisation code that's general for all ARM platforms }
  41. function OptPass1LDR(var p: tai): Boolean; override;
  42. function OptPass1STR(var p: tai): Boolean; override;
  43. private
  44. function RemoveSuperfluousFMov(const p: tai; movp: tai; const optimizer: string): boolean;
  45. function OptPass1Shift(var p: tai): boolean;
  46. function OptPostCMP(var p: tai): boolean;
  47. function OptPostAnd(var p: tai): Boolean;
  48. function OptPass1Data(var p: tai): boolean;
  49. function OptPass1FData(var p: tai): Boolean;
  50. function OptPass1STP(var p: tai): boolean;
  51. function OptPass1Mov(var p: tai): boolean;
  52. function OptPass1MOVZ(var p: tai): boolean;
  53. function OptPass1FMov(var p: tai): Boolean;
  54. function OptPass1B(var p: tai): boolean;
  55. function OptPass1SXTW(var p: tai): Boolean;
  56. function OptPass2LDRSTR(var p: tai): boolean;
  57. End;
  58. Implementation
  59. uses
  60. aasmbase,
  61. aoptutils,
  62. cgutils,
  63. verbose;
  64. {$ifdef DEBUG_AOPTCPU}
  65. const
  66. SPeepholeOptimization: shortstring = 'Peephole Optimization: ';
  67. {$else DEBUG_AOPTCPU}
  68. { Empty strings help the optimizer to remove string concatenations that won't
  69. ever appear to the user on release builds. [Kit] }
  70. const
  71. SPeepholeOptimization = '';
  72. {$endif DEBUG_AOPTCPU}
  73. function CanBeCond(p : tai) : boolean;
  74. begin
  75. result:=(p.typ=ait_instruction) and (taicpu(p).condition=C_None);
  76. end;
  77. function TCpuAsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  78. var
  79. p: taicpu;
  80. begin
  81. Result := false;
  82. if not ((assigned(hp)) and (hp.typ = ait_instruction)) then
  83. exit;
  84. p := taicpu(hp);
  85. case p.opcode of
  86. { These operations do not write into a register at all
  87. LDR/STR with post/pre-indexed operations do not need special treatment
  88. because post-/preindexed does not mean that a register
  89. is loaded with a new value, it is only modified }
  90. A_STR, A_CMP, A_CMN, A_TST, A_B, A_BL, A_MSR, A_FCMP:
  91. exit;
  92. else
  93. ;
  94. end;
  95. if p.ops=0 then
  96. exit;
  97. case p.oper[0]^.typ of
  98. top_reg:
  99. Result := SuperRegistersEqual(p.oper[0]^.reg,reg);
  100. top_ref:
  101. Result :=
  102. (taicpu(p).oper[0]^.ref^.addressmode in [AM_PREINDEXED,AM_POSTINDEXED]) and
  103. (taicpu(p).oper[0]^.ref^.base = reg);
  104. else
  105. ;
  106. end;
  107. end;
  108. function TCpuAsmOptimizer.InstructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean;
  109. var
  110. p: taicpu;
  111. i: longint;
  112. begin
  113. instructionLoadsFromReg := false;
  114. if not (assigned(hp) and (hp.typ = ait_instruction)) then
  115. exit;
  116. p:=taicpu(hp);
  117. i:=1;
  118. { Start on oper[0]? }
  119. if taicpu(hp).spilling_get_operation_type(0) in [operand_read, operand_readwrite] then
  120. i:=0;
  121. while(i<p.ops) do
  122. begin
  123. case p.oper[I]^.typ of
  124. top_reg:
  125. Result := (p.oper[I]^.reg = reg);
  126. top_ref:
  127. Result :=
  128. (p.oper[I]^.ref^.base = reg) or
  129. (p.oper[I]^.ref^.index = reg);
  130. else
  131. ;
  132. end;
  133. { Bailout if we found something }
  134. if Result then
  135. exit;
  136. Inc(I);
  137. end;
  138. end;
  139. {
  140. optimize
  141. ldr/str regX,[reg1]
  142. ...
  143. add/sub reg1,reg1,regY/const
  144. into
  145. ldr/str regX,[reg1], regY/const
  146. }
  147. function TCpuAsmOptimizer.LookForPostindexedPattern(var p: tai) : boolean;
  148. var
  149. hp1 : tai;
  150. begin
  151. Result:=false;
  152. if (taicpu(p).oper[1]^.typ = top_ref) and
  153. (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
  154. (taicpu(p).oper[1]^.ref^.index=NR_NO) and
  155. (taicpu(p).oper[1]^.ref^.offset=0) and
  156. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[1]^.ref^.base) and
  157. { we cannot check NR_DEFAULTFLAGS for modification yet so don't allow a condition }
  158. MatchInstruction(hp1, [A_ADD, A_SUB], [PF_None]) and
  159. (taicpu(hp1).oper[0]^.reg=taicpu(p).oper[1]^.ref^.base) and
  160. (taicpu(hp1).oper[1]^.reg=taicpu(p).oper[1]^.ref^.base) and
  161. (
  162. { valid offset? }
  163. (taicpu(hp1).oper[2]^.typ=top_const) and
  164. (taicpu(hp1).oper[2]^.val>=-256) and
  165. (abs(taicpu(hp1).oper[2]^.val)<256)
  166. ) and
  167. { don't apply the optimization if the base register is loaded }
  168. (getsupreg(taicpu(p).oper[0]^.reg)<>getsupreg(taicpu(p).oper[1]^.ref^.base)) and
  169. not(RegModifiedBetween(taicpu(hp1).oper[0]^.reg,p,hp1)) and
  170. not(RegModifiedBetween(taicpu(hp1).oper[2]^.reg,p,hp1)) then
  171. begin
  172. if taicpu(p).opcode = A_LDR then
  173. DebugMsg(SPeepholeOptimization + 'LdrAdd/Sub2Ldr Postindex done', p)
  174. else
  175. DebugMsg(SPeepholeOptimization + 'StrAdd/Sub2Str Postindex done', p);
  176. taicpu(p).oper[1]^.ref^.addressmode:=AM_POSTINDEXED;
  177. if taicpu(hp1).opcode=A_ADD then
  178. taicpu(p).oper[1]^.ref^.offset:=taicpu(hp1).oper[2]^.val
  179. else
  180. taicpu(p).oper[1]^.ref^.offset:=-taicpu(hp1).oper[2]^.val;
  181. asml.Remove(hp1);
  182. hp1.Free;
  183. Result:=true;
  184. end;
  185. end;
  186. function TCpuAsmOptimizer.RemoveSuperfluousFMov(const p: tai; movp: tai; const optimizer: string):boolean;
  187. var
  188. alloc,
  189. dealloc : tai_regalloc;
  190. hp1 : tai;
  191. begin
  192. Result:=false;
  193. if ((MatchInstruction(movp, A_FMOV, [taicpu(p).condition], [taicpu(p).oppostfix]) and
  194. ((getregtype(taicpu(movp).oper[0]^.reg)=R_MMREGISTER) { or (taicpu(p).opcode in [A_LDUR])})
  195. ) { or
  196. (((taicpu(p).oppostfix in [PF_F64F32,PF_F64S16,PF_F64S32,PF_F64U16,PF_F64U32]) or (getsubreg(taicpu(p).oper[0]^.reg)=R_SUBFD)) and MatchInstruction(movp, A_VMOV, [taicpu(p).condition], [PF_F64])) or
  197. (((taicpu(p).oppostfix in [PF_F32F64,PF_F32S16,PF_F32S32,PF_F32U16,PF_F32U32]) or (getsubreg(taicpu(p).oper[0]^.reg)=R_SUBFS)) and MatchInstruction(movp, A_VMOV, [taicpu(p).condition], [PF_F32])) }
  198. ) and
  199. (taicpu(movp).ops=2) and
  200. MatchOperand(taicpu(movp).oper[1]^, taicpu(p).oper[0]^.reg) and
  201. { the destination register of the mov might not be used beween p and movp }
  202. not(RegUsedBetween(taicpu(movp).oper[0]^.reg,p,movp)) and
  203. { Take care to only do this for instructions which REALLY load to the first register.
  204. Otherwise
  205. str reg0, [reg1]
  206. fmov reg2, reg0
  207. will be optimized to
  208. str reg2, [reg1]
  209. }
  210. RegLoadedWithNewValue(taicpu(p).oper[0]^.reg, p) then
  211. begin
  212. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(movp.Next));
  213. if assigned(dealloc) then
  214. begin
  215. DebugMsg(SPeepholeOptimization + optimizer+' removed superfluous vmov', movp);
  216. result:=true;
  217. { taicpu(p).oper[0]^.reg is not used anymore, try to find its allocation
  218. and remove it if possible }
  219. asml.Remove(dealloc);
  220. alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.previous));
  221. if assigned(alloc) then
  222. begin
  223. asml.Remove(alloc);
  224. alloc.free;
  225. dealloc.free;
  226. end
  227. else
  228. asml.InsertAfter(dealloc,p);
  229. { try to move the allocation of the target register }
  230. GetLastInstruction(movp,hp1);
  231. alloc:=FindRegAlloc(taicpu(movp).oper[0]^.reg,tai(hp1.Next));
  232. if assigned(alloc) then
  233. begin
  234. asml.Remove(alloc);
  235. asml.InsertBefore(alloc,p);
  236. { adjust used regs }
  237. IncludeRegInUsedRegs(taicpu(movp).oper[0]^.reg,UsedRegs);
  238. end;
  239. { change
  240. vldr reg0,[reg1]
  241. vmov reg2,reg0
  242. into
  243. ldr reg2,[reg1]
  244. if reg2 is an int register
  245. if (taicpu(p).opcode=A_VLDR) and (getregtype(taicpu(movp).oper[0]^.reg)=R_INTREGISTER) then
  246. taicpu(p).opcode:=A_LDR;
  247. }
  248. { finally get rid of the mov }
  249. taicpu(p).loadreg(0,taicpu(movp).oper[0]^.reg);
  250. asml.remove(movp);
  251. movp.free;
  252. end;
  253. end;
  254. end;
  255. function TCpuAsmOptimizer.OptPass1LDR(var p: tai): Boolean;
  256. var
  257. hp1: tai;
  258. begin
  259. Result := False;
  260. if inherited OptPass1LDR(p) or
  261. LookForPostindexedPattern(p) then
  262. Exit(True)
  263. else if (taicpu(p).oppostfix in [PF_B,PF_SB,PF_H,PF_SH,PF_None]) and
  264. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  265. RemoveSuperfluousMove(p, hp1, 'Ldr<Postfix>Mov2Ldr<Postfix>') then
  266. Exit(true);
  267. end;
  268. function TCpuAsmOptimizer.OptPass1STR(var p: tai): Boolean;
  269. begin
  270. Result := False;
  271. if inherited OptPass1STR(p) or
  272. LookForPostindexedPattern(p) then
  273. Exit(True);
  274. end;
  275. function TCpuAsmOptimizer.OptPass1Shift(var p : tai): boolean;
  276. var
  277. hp1,hp2: tai;
  278. I2, I: Integer;
  279. shifterop: tshifterop;
  280. begin
  281. Result:=false;
  282. { This folds shifterops into following instructions
  283. <shiftop> r0, r1, #imm
  284. <op> r2, r3, r0
  285. to
  286. <op> r2, r3, r1, <shiftop> #imm
  287. }
  288. { do not handle ROR yet, only part of the instructions below support ROR as shifter operand }
  289. if MatchInstruction(p,[A_LSL, A_LSR, A_ASR{, A_ROR}],[PF_None]) and
  290. MatchOpType(taicpu(p),top_reg,top_reg,top_const) and
  291. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  292. MatchInstruction(hp1, [A_ADD, A_AND, A_BIC, A_CMP, A_CMN,
  293. A_EON, A_EOR, A_NEG, A_ORN, A_ORR,
  294. A_SUB, A_TST], [PF_None]) and
  295. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) and
  296. (taicpu(hp1).ops >= 2) and
  297. { Currently we can't fold into another shifterop }
  298. (taicpu(hp1).oper[taicpu(hp1).ops-1]^.typ = top_reg) and
  299. { SP does not work completely with shifted registers, as I didn't find the exact rules,
  300. we do not operate on SP }
  301. (taicpu(hp1).oper[0]^.reg<>NR_SP) and
  302. (taicpu(hp1).oper[1]^.reg<>NR_SP) and
  303. (taicpu(hp1).oper[taicpu(hp1).ops-1]^.reg<>NR_SP) and
  304. { reg1 might not be modified inbetween }
  305. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) and
  306. (
  307. { Only ONE of the two src operands is allowed to match }
  308. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[taicpu(hp1).ops-2]^) xor
  309. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[taicpu(hp1).ops-1]^)
  310. ) and
  311. { for SUB, the last operand must match, there is no RSB on AArch64 }
  312. ((taicpu(hp1).opcode<>A_SUB) or
  313. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[taicpu(hp1).ops-1]^)) then
  314. begin
  315. { for the two operand instructions, start also at the second operand as they are not always commutative
  316. (depends on the flags tested laster on) and thus the operands cannot swapped }
  317. I2:=1;
  318. for I:=I2 to taicpu(hp1).ops-1 do
  319. if MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[I]^.reg) then
  320. begin
  321. { If the parameter matched on the second op from the RIGHT
  322. we have to switch the parameters, this will not happen for CMP
  323. were we're only evaluating the most right parameter
  324. }
  325. shifterop_reset(shifterop);
  326. case taicpu(p).opcode of
  327. A_LSL:
  328. shifterop.shiftmode:=SM_LSL;
  329. A_ROR:
  330. shifterop.shiftmode:=SM_ROR;
  331. A_LSR:
  332. shifterop.shiftmode:=SM_LSR;
  333. A_ASR:
  334. shifterop.shiftmode:=SM_ASR;
  335. else
  336. InternalError(2019090401);
  337. end;
  338. shifterop.shiftimm:=taicpu(p).oper[2]^.val;
  339. if I <> taicpu(hp1).ops-1 then
  340. begin
  341. if taicpu(hp1).ops = 3 then
  342. hp2:=taicpu.op_reg_reg_reg_shifterop(taicpu(hp1).opcode,
  343. taicpu(hp1).oper[0]^.reg, taicpu(hp1).oper[2]^.reg,
  344. taicpu(p).oper[1]^.reg, shifterop)
  345. else
  346. hp2:=taicpu.op_reg_reg_shifterop(taicpu(hp1).opcode,
  347. taicpu(hp1).oper[0]^.reg, taicpu(p).oper[1]^.reg,
  348. shifterop);
  349. end
  350. else
  351. if taicpu(hp1).ops = 3 then
  352. hp2:=taicpu.op_reg_reg_reg_shifterop(taicpu(hp1).opcode,
  353. taicpu(hp1).oper[0]^.reg, taicpu(hp1).oper[1]^.reg,
  354. taicpu(p).oper[1]^.reg,shifterop)
  355. else
  356. hp2:=taicpu.op_reg_reg_shifterop(taicpu(hp1).opcode,
  357. taicpu(hp1).oper[0]^.reg, taicpu(p).oper[1]^.reg,
  358. shifterop);
  359. { Make sure the register used in the shifting is tracked all
  360. the way through, otherwise it may become deallocated while
  361. it's still live and cause incorrect optimisations later }
  362. if (taicpu(hp1).oper[0]^.reg <> taicpu(p).oper[1]^.reg) then
  363. begin
  364. TransferUsedRegs(TmpUsedRegs);
  365. UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
  366. ALlocRegBetween(taicpu(p).oper[1]^.reg, p, hp1, TmpUsedRegs);
  367. end;
  368. taicpu(hp2).fileinfo:=taicpu(hp1).fileinfo;
  369. asml.insertbefore(hp2, hp1);
  370. RemoveInstruction(hp1);
  371. RemoveCurrentp(p);
  372. DebugMsg(SPeepholeOptimization + 'FoldShiftProcess done', hp2);
  373. Result:=true;
  374. break;
  375. end;
  376. end
  377. else if MatchInstruction(p,[A_LSL, A_LSR, A_ASR,A_ROR],[PF_None]) and
  378. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  379. RemoveSuperfluousMove(p, hp1, 'ShiftMov2Shift') then
  380. Result:=true;
  381. end;
  382. function TCpuAsmOptimizer.OptPass1Data(var p : tai): boolean;
  383. var
  384. hp1: tai;
  385. begin
  386. Result := GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  387. RemoveSuperfluousMove(p, hp1, 'DataMov2Data');
  388. end;
  389. function TCpuAsmOptimizer.OptPass1FData(var p: tai): Boolean;
  390. var
  391. hp1: tai;
  392. begin
  393. Result := GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  394. RemoveSuperfluousFMov(p, hp1, 'FOpFMov2FOp');
  395. end;
  396. function TCpuAsmOptimizer.OptPass1STP(var p : tai): boolean;
  397. var
  398. hp1, hp2, hp3, hp4: tai;
  399. begin
  400. Result:=false;
  401. {
  402. change
  403. stp x29,x30,[sp, #-16]!
  404. mov x29,sp
  405. bl abc
  406. ldp x29,x30,[sp], #16
  407. ret
  408. into
  409. b abc
  410. }
  411. if MatchInstruction(p, A_STP, [C_None], [PF_None]) and
  412. MatchOpType(taicpu(p),top_reg,top_reg,top_ref) and
  413. (taicpu(p).oper[0]^.reg = NR_X29) and
  414. (taicpu(p).oper[1]^.reg = NR_X30) and
  415. (taicpu(p).oper[2]^.ref^.base=NR_STACK_POINTER_REG) and
  416. (taicpu(p).oper[2]^.ref^.index=NR_NO) and
  417. (taicpu(p).oper[2]^.ref^.offset=-16) and
  418. (taicpu(p).oper[2]^.ref^.addressmode=AM_PREINDEXED) and
  419. GetNextInstruction(p, hp1) and
  420. MatchInstruction(hp1, A_MOV, [C_None], [PF_NONE]) and
  421. MatchOperand(taicpu(hp1).oper[0]^,taicpu(p).oper[0]^) and
  422. (taicpu(hp1).oper[1]^.typ = top_reg) and
  423. (taicpu(hp1).oper[1]^.reg = NR_STACK_POINTER_REG) and
  424. GetNextInstruction(hp1, hp2) and
  425. SkipEntryExitMarker(hp2, hp2) and
  426. MatchInstruction(hp2, A_BL, [C_None], [PF_NONE]) and
  427. (taicpu(hp2).oper[0]^.typ = top_ref) and
  428. GetNextInstruction(hp2, hp3) and
  429. SkipEntryExitMarker(hp3, hp3) and
  430. MatchInstruction(hp3, A_LDP, [C_None], [PF_NONE]) and
  431. MatchOpType(taicpu(hp3),top_reg,top_reg,top_ref) and
  432. (taicpu(hp3).oper[0]^.reg = NR_X29) and
  433. (taicpu(hp3).oper[1]^.reg = NR_X30) and
  434. (taicpu(hp3).oper[2]^.ref^.base=NR_STACK_POINTER_REG) and
  435. (taicpu(hp3).oper[2]^.ref^.index=NR_NO) and
  436. (taicpu(hp3).oper[2]^.ref^.offset=16) and
  437. (taicpu(hp3).oper[2]^.ref^.addressmode=AM_POSTINDEXED) and
  438. GetNextInstruction(hp3, hp4) and
  439. MatchInstruction(hp4, A_RET, [C_None], [PF_None]) and
  440. (taicpu(hp4).ops = 0) then
  441. begin
  442. asml.Remove(p);
  443. asml.Remove(hp1);
  444. asml.Remove(hp3);
  445. asml.Remove(hp4);
  446. taicpu(hp2).opcode:=A_B;
  447. p.free;
  448. hp1.free;
  449. hp3.free;
  450. hp4.free;
  451. p:=hp2;
  452. DebugMsg(SPeepholeOptimization + 'Bl2B done', p);
  453. Result:=true;
  454. end;
  455. end;
  456. function TCpuAsmOptimizer.OptPass1Mov(var p : tai): boolean;
  457. var
  458. hp1: tai;
  459. so: tshifterop;
  460. begin
  461. Result:=false;
  462. if MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) and
  463. (taicpu(p).oppostfix=PF_None) then
  464. begin
  465. RemoveCurrentP(p);
  466. DebugMsg(SPeepholeOptimization + 'Mov2None done', p);
  467. Result:=true;
  468. end
  469. else if (taicpu(p).ops=2) and
  470. (getsubreg(taicpu(p).oper[0]^.reg)=R_SUBD) and
  471. GetNextInstruction(p, hp1) and
  472. { Faster to get it out of the way than go through MatchInstruction }
  473. (hp1.typ=ait_instruction) and
  474. (taicpu(hp1).ops=3) and
  475. MatchInstruction(hp1,[A_ADD,A_SUB],[taicpu(p).condition], [PF_None,PF_S]) and
  476. (getsubreg(taicpu(hp1).oper[2]^.reg)=R_SUBQ) and
  477. (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[2]^.reg)) and
  478. RegEndOfLife(taicpu(hp1).oper[2]^.reg,taicpu(hp1)) then
  479. begin
  480. DebugMsg(SPeepholeOptimization + 'MovOp2AddUtxw 1 done', p);
  481. shifterop_reset(so);
  482. so.shiftmode:=SM_UXTW;
  483. taicpu(hp1).ops:=4;
  484. taicpu(hp1).loadreg(2,taicpu(p).oper[1]^.reg);
  485. taicpu(hp1).loadshifterop(3,so);
  486. RemoveCurrentP(p);
  487. Result:=true;
  488. exit;
  489. end
  490. {
  491. optimize
  492. mov rX, yyyy
  493. ....
  494. }
  495. else if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) then
  496. begin
  497. if RemoveSuperfluousMove(p, hp1, 'MovMov2Mov') then
  498. Result:=true
  499. else if (taicpu(p).ops = 2) and
  500. (tai(hp1).typ = ait_instruction) and
  501. RedundantMovProcess(p,hp1) then
  502. Result:=true
  503. end;
  504. end;
  505. function TCpuAsmOptimizer.OptPass1MOVZ(var p: tai): boolean;
  506. var
  507. hp1: tai;
  508. ZeroReg: TRegister;
  509. begin
  510. Result := False;
  511. hp1 := nil;
  512. if (taicpu(p).oppostfix = PF_None) and (taicpu(p).condition = C_None) then
  513. begin
  514. if
  515. { Check next instruction first so hp1 gets set to something, then
  516. if it remains nil, we know for sure that there's no valid next
  517. instruction. }
  518. not GetNextInstruction(p, hp1) or
  519. { MOVZ and MOVK/MOVN instructions undergo macro-fusion. }
  520. not MatchInstruction(hp1, [A_MOVK, A_MOVN], [C_None], [PF_None]) or
  521. (taicpu(hp1).oper[0]^.reg <> taicpu(p).oper[0]^.reg) then
  522. begin
  523. if (taicpu(p).oper[1]^.val = 0) then
  524. begin
  525. { Change;
  526. movz reg,#0
  527. (no movk or movn)
  528. To:
  529. mov reg,xzr (or wzr)
  530. Easier to perform other optimisations with registers
  531. }
  532. DebugMsg(SPeepholeOptimization + 'Movz0ToMovZeroReg', p);
  533. { Make sure the zero register is the correct size }
  534. ZeroReg := taicpu(p).oper[0]^.reg;
  535. setsupreg(ZeroReg, RS_XZR);
  536. taicpu(p).opcode := A_MOV;
  537. taicpu(p).loadreg(1, ZeroReg);
  538. Result := True;
  539. Exit;
  540. end;
  541. end;
  542. {
  543. remove the second Movz from
  544. movz reg,...
  545. movz reg,...
  546. }
  547. if GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  548. MatchInstruction(hp1,A_MOVZ,[C_None],[PF_none]) and
  549. MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[0]^) then
  550. begin
  551. DebugMsg(SPeepholeOptimization + 'MovzMovz2Movz', p);
  552. RemoveCurrentP(p);
  553. Result:=true;
  554. exit;
  555. end;
  556. end;
  557. end;
  558. function TCpuAsmOptimizer.OptPass1FMov(var p: tai): Boolean;
  559. var
  560. hp1: tai;
  561. alloc, dealloc: tai_regalloc;
  562. begin
  563. {
  564. change
  565. fmov reg0,reg1
  566. fmov reg1,reg0
  567. into
  568. fmov reg0,reg1
  569. }
  570. Result := False;
  571. while GetNextInstruction(p, hp1) and
  572. MatchInstruction(hp1, A_FMOV, [taicpu(p).condition], [taicpu(p).oppostfix]) and
  573. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[1]^) and
  574. MatchOperand(taicpu(p).oper[1]^, taicpu(hp1).oper[0]^) do
  575. begin
  576. asml.Remove(hp1);
  577. hp1.free;
  578. DebugMsg(SPeepholeOptimization + 'FMovFMov2FMov 1 done', p);
  579. Result:=true;
  580. end;
  581. { change
  582. fmov reg0,const
  583. fmov reg1,reg0
  584. dealloc reg0
  585. into
  586. fmov reg1,const
  587. }
  588. if MatchOpType(taicpu(p),top_reg,top_realconst) and
  589. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  590. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p, hp1)) and
  591. MatchInstruction(hp1,A_FMOV,[taicpu(p).condition],[taicpu(p).oppostfix]) and
  592. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  593. MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^.reg) and
  594. (not RegModifiedByInstruction(taicpu(p).oper[0]^.reg, hp1)) and
  595. assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next)))
  596. then
  597. begin
  598. DebugMsg('Peephole FMovFMov2FMov 2 done', p);
  599. taicpu(hp1).loadrealconst(1,taicpu(p).oper[1]^.val_real);
  600. alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.Previous));
  601. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next));
  602. if assigned(alloc) and assigned(dealloc) then
  603. begin
  604. asml.Remove(alloc);
  605. alloc.Free;
  606. asml.Remove(dealloc);
  607. dealloc.Free;
  608. end;
  609. { p will be removed, update used register as we continue
  610. with the next instruction after p }
  611. result:=RemoveCurrentP(p);
  612. end;
  613. { not enabled as apparently not happening
  614. if MatchOpType(taicpu(p),top_reg,top_reg) and
  615. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  616. MatchInstruction(hp1, [A_FSUB,A_FADD,A_FNEG,A_FMUL,A_FSQRT,A_FDIV,A_FABS], [PF_None]) and
  617. (MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) or
  618. ((taicpu(hp1).ops=3) and MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[2]^))
  619. ) and
  620. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  621. not(RegUsedBetween(taicpu(p).oper[0]^.reg,p,hp1)) then
  622. begin
  623. DebugMsg(SPeepholeOptimization + 'FMovFOp2FOp done', hp1);
  624. AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,UsedRegs);
  625. if MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) then
  626. taicpu(hp1).oper[1]^.reg:=taicpu(p).oper[1]^.reg;
  627. if (taicpu(hp1).ops=3) and MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[2]^) then
  628. taicpu(hp1).oper[2]^.reg:=taicpu(p).oper[1]^.reg;
  629. RemoveCurrentP(p);
  630. Result:=true;
  631. exit;
  632. end;
  633. }
  634. end;
  635. function TCpuAsmOptimizer.OptPass1SXTW(var p : tai) : Boolean;
  636. var
  637. hp1: tai;
  638. GetNextInstructionUsingReg_hp1: Boolean;
  639. begin
  640. Result:=false;
  641. if GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) then
  642. begin
  643. {
  644. change
  645. sxtw reg2,reg1
  646. str reg2,[...]
  647. dealloc reg2
  648. to
  649. str reg1,[...]
  650. }
  651. if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
  652. (taicpu(p).ops=2) and
  653. MatchInstruction(hp1, A_STR, [C_None], [PF_None]) and
  654. (getsubreg(taicpu(hp1).oper[0]^.reg)=R_SUBD) and
  655. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  656. { the reference in strb might not use reg2 }
  657. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  658. { reg1 might not be modified inbetween }
  659. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  660. begin
  661. DebugMsg('Peephole SXTHStr2Str done', p);
  662. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  663. result:=RemoveCurrentP(p);
  664. end
  665. {
  666. change
  667. sxtw reg2,reg1
  668. sxtw reg3,reg2
  669. dealloc reg2
  670. to
  671. sxtw reg3,reg1
  672. }
  673. else if MatchInstruction(p, A_SXTW, [C_None], [PF_None]) and
  674. (taicpu(p).ops=2) and
  675. MatchInstruction(hp1, A_SXTW, [C_None], [PF_None]) and
  676. (taicpu(hp1).ops=2) and
  677. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  678. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  679. { reg1 might not be modified inbetween }
  680. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  681. begin
  682. DebugMsg('Peephole SxtwSxtw2Sxtw done', p);
  683. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  684. taicpu(hp1).opcode:=A_SXTW;
  685. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  686. result:=RemoveCurrentP(p);
  687. end
  688. else if USxtOp2Op(p,hp1,SM_SXTW) then
  689. Result:=true
  690. else if RemoveSuperfluousMove(p, hp1, 'SxtwMov2Data') then
  691. Result:=true;
  692. end;
  693. end;
  694. function TCpuAsmOptimizer.OptPass1B(var p: tai): boolean;
  695. var
  696. hp1, hp2, hp3, hp4, hp5: tai;
  697. Invert: Boolean;
  698. begin
  699. Result := False;
  700. {
  701. convert
  702. b<c> .L1
  703. movz reg,#1`
  704. b .L2
  705. .L1
  706. movz reg,#0 (or mov reg,xzr)
  707. .L2
  708. into
  709. cset reg,<not(c)>
  710. Also do the same if the constants are reversed, instead converting it to:
  711. cset reg,<c>
  712. }
  713. if (taicpu(p).condition <> C_None) and
  714. (taicpu(p).oper[0]^.typ = top_ref) and
  715. GetNextInstruction(p, hp1) and
  716. { Check individually instead of using MatchInstruction in order to save time }
  717. (hp1.typ = ait_instruction) and
  718. (taicpu(hp1).condition = C_None) and
  719. (taicpu(hp1).oppostfix = PF_None) and
  720. (taicpu(hp1).ops = 2) and
  721. (
  722. (
  723. (taicpu(hp1).opcode = A_MOVZ) and
  724. (taicpu(hp1).oper[1]^.val in [0, 1])
  725. ) or
  726. (
  727. (taicpu(hp1).opcode = A_MOV) and
  728. (getsupreg(taicpu(hp1).oper[1]^.reg) = RS_XZR)
  729. )
  730. ) and
  731. GetNextInstruction(hp1, hp2) and
  732. MatchInstruction(hp2, A_B, [PF_None]) and
  733. (taicpu(hp2).condition = C_None) and
  734. (taicpu(hp2).oper[0]^.typ = top_ref) and
  735. GetNextInstruction(hp2, hp3) and
  736. (hp3.typ = ait_label) and
  737. (tasmlabel(taicpu(p).oper[0]^.ref^.symbol) = tai_label(hp3).labsym) and
  738. GetNextInstruction(hp3, hp4) and
  739. { As before, check individually instead of using MatchInstruction in order to save time }
  740. (hp4.typ = ait_instruction) and
  741. (taicpu(hp4).condition = C_None) and
  742. (taicpu(hp4).oppostfix = PF_None) and
  743. (taicpu(hp4).ops = 2) and
  744. (taicpu(hp4).oper[0]^.reg = taicpu(hp1).oper[0]^.reg) and
  745. (
  746. (
  747. (taicpu(hp4).opcode = A_MOVZ) and
  748. (
  749. (
  750. { Check to confirm the following:
  751. - First mov is either "movz reg,#0" or "mov reg,xzr"
  752. - Second mov is "movz reg,#1"
  753. }
  754. (
  755. (taicpu(hp1).oper[1]^.typ = top_reg) { Will be the zero register } or
  756. (taicpu(hp1).oper[1]^.val = 0)
  757. ) and
  758. (taicpu(hp4).oper[1]^.val = 1)
  759. ) or
  760. (
  761. { Check to confirm the following:
  762. - First mov is "movz reg,#1"
  763. - Second mov is "movz reg,#0"
  764. }
  765. MatchOperand(taicpu(hp1).oper[1]^, 1) and
  766. (taicpu(hp4).oper[1]^.val = 0)
  767. )
  768. )
  769. ) or
  770. (
  771. { Check to confirm the following:
  772. - First mov is "movz reg,#1"
  773. - Second mov is "mov reg,xzr"
  774. }
  775. (taicpu(hp4).opcode = A_MOV) and
  776. (getsupreg(taicpu(hp4).oper[1]^.reg) = RS_XZR) and
  777. MatchOperand(taicpu(hp1).oper[1]^, 1)
  778. )
  779. ) and
  780. GetNextInstruction(hp4, hp5) and
  781. (hp5.typ = ait_label) and
  782. (tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol) = tai_label(hp5).labsym) then
  783. begin
  784. Invert := MatchOperand(taicpu(hp1).oper[1]^, 1); { if true, hp4 will be mov reg,0 in some form }
  785. if Invert then
  786. taicpu(p).condition := inverse_cond(taicpu(p).condition);
  787. tai_label(hp3).labsym.DecRefs;
  788. { If this isn't the only reference to the middle label, we can
  789. still make a saving - only that the first jump and everything
  790. that follows will remain. }
  791. if (tai_label(hp3).labsym.getrefs = 0) then
  792. begin
  793. if Invert then
  794. DebugMsg(SPeepholeOptimization + 'B(c)Movz1BMovz0 -> Cset(~c)',p)
  795. else
  796. DebugMsg(SPeepholeOptimization + 'B(c)Movz0bMovZ1 -> Cset(c)',p);
  797. { remove jump, first label and second MOV (also catching any aligns) }
  798. repeat
  799. if not GetNextInstruction(hp2, hp3) then
  800. InternalError(2022070801);
  801. RemoveInstruction(hp2);
  802. hp2 := hp3;
  803. until hp2 = hp5;
  804. { Don't decrement reference count before the removal loop
  805. above, otherwise GetNextInstruction won't stop on the
  806. the label }
  807. tai_label(hp5).labsym.DecRefs;
  808. end
  809. else
  810. begin
  811. if Invert then
  812. DebugMsg(SPeepholeOptimization + 'B(c)Movz1BMovz0 -> Cset(~c) (partial)',p)
  813. else
  814. DebugMsg(SPeepholeOptimization + 'B(c)Movz0BMovz1 -> Cset(c) (partial)',p);
  815. end;
  816. taicpu(hp1).opcode := A_CSET;
  817. taicpu(hp1).loadconditioncode(1, taicpu(p).condition);
  818. RemoveCurrentP(p, hp1);
  819. Result:=true;
  820. exit;
  821. end;
  822. end;
  823. function TCpuAsmOptimizer.OptPass2LDRSTR(var p: tai): boolean;
  824. var
  825. hp1, hp1_last: tai;
  826. ThisRegister: TRegister;
  827. OffsetVal, ValidOffset, MinOffset, MaxOffset: asizeint;
  828. TargetOpcode: TAsmOp;
  829. begin
  830. Result := False;
  831. ThisRegister := taicpu(p).oper[0]^.reg;
  832. case taicpu(p).opcode of
  833. A_LDR:
  834. TargetOpcode := A_LDP;
  835. A_STR:
  836. TargetOpcode := A_STP;
  837. else
  838. InternalError(2020081501);
  839. end;
  840. { reg appearing in ref invalidates these optimisations }
  841. if (TargetOpcode = A_STP) or not RegInRef(ThisRegister, taicpu(p).oper[1]^.ref^) then
  842. begin
  843. { LDP/STP has a smaller permitted offset range than LDR/STR.
  844. TODO: For a group of out-of-range LDR/STR instructions, can
  845. we declare a temporary register equal to the offset base
  846. address, modify the STR instructions to use that register
  847. and then convert them to STP instructions? Note that STR
  848. generally takes 2 cycles (on top of the memory latency),
  849. while LDP/STP takes 3.
  850. }
  851. if (getsubreg(ThisRegister) = R_SUBQ) then
  852. begin
  853. ValidOffset := 8;
  854. MinOffset := -512;
  855. MaxOffset := 504;
  856. end
  857. else
  858. begin
  859. ValidOffset := 4;
  860. MinOffset := -256;
  861. MaxOffset := 252;
  862. end;
  863. hp1_last := p;
  864. { Look for nearby LDR/STR instructions }
  865. if (taicpu(p).oppostfix = PF_NONE) and
  866. (taicpu(p).oper[1]^.ref^.addressmode = AM_OFFSET) then
  867. { If SkipGetNext is True, GextNextInstruction isn't called }
  868. while GetNextInstruction(hp1_last, hp1) do
  869. begin
  870. if (hp1.typ <> ait_instruction) then
  871. Break;
  872. if (taicpu(hp1).opcode = taicpu(p).opcode) then
  873. begin
  874. if (taicpu(hp1).oppostfix = PF_NONE) and
  875. { Registers need to be the same size }
  876. (getsubreg(ThisRegister) = getsubreg(taicpu(hp1).oper[0]^.reg)) and
  877. (
  878. (TargetOpcode = A_STP) or
  879. { LDP x0, x0, [sp, #imm] is undefined behaviour, even
  880. though such an LDR pair should have been optimised
  881. out by now. STP is okay }
  882. (ThisRegister <> taicpu(hp1).oper[0]^.reg)
  883. ) and
  884. (taicpu(hp1).oper[1]^.ref^.addressmode = AM_OFFSET) and
  885. (taicpu(p).oper[1]^.ref^.base = taicpu(hp1).oper[1]^.ref^.base) and
  886. (taicpu(p).oper[1]^.ref^.index = taicpu(hp1).oper[1]^.ref^.index) and
  887. { Make sure the address registers haven't changed }
  888. not RegModifiedBetween(taicpu(hp1).oper[1]^.ref^.base, p, hp1) and
  889. (
  890. (taicpu(hp1).oper[1]^.ref^.index = NR_NO) or
  891. not RegModifiedBetween(taicpu(hp1).oper[1]^.ref^.index, p, hp1)
  892. ) and
  893. { Don't need to check "RegInRef" because the base registers are identical,
  894. and the first one was checked already. [Kit] }
  895. (((TargetOpcode=A_LDP) and not RegUsedBetween(taicpu(hp1).oper[0]^.reg, p, hp1)) or
  896. ((TargetOpcode=A_STP) and not RegModifiedBetween(taicpu(hp1).oper[0]^.reg, p, hp1))) then
  897. begin
  898. { Can we convert these two LDR/STR instructions into a
  899. single LDR/STP? }
  900. OffsetVal := taicpu(hp1).oper[1]^.ref^.offset - taicpu(p).oper[1]^.ref^.offset;
  901. if (OffsetVal = ValidOffset) then
  902. begin
  903. if (taicpu(p).oper[1]^.ref^.offset >= MinOffset) and (taicpu(hp1).oper[1]^.ref^.offset <= MaxOffset) then
  904. begin
  905. { Convert:
  906. LDR/STR reg0, [reg2, #ofs]
  907. ...
  908. LDR/STR reg1. [reg2, #ofs + 8] // 4 if registers are 32-bit
  909. To:
  910. LDP/STP reg0, reg1, [reg2, #ofs]
  911. }
  912. taicpu(p).opcode := TargetOpcode;
  913. if TargetOpcode = A_STP then
  914. DebugMsg(SPeepholeOptimization + 'StrStr2Stp', p)
  915. else
  916. DebugMsg(SPeepholeOptimization + 'LdrLdr2Ldp', p);
  917. taicpu(p).ops := 3;
  918. taicpu(p).loadref(2, taicpu(p).oper[1]^.ref^);
  919. taicpu(p).loadreg(1, taicpu(hp1).oper[0]^.reg);
  920. asml.Remove(hp1);
  921. hp1.Free;
  922. Result := True;
  923. Exit;
  924. end;
  925. end
  926. else if (OffsetVal = -ValidOffset) then
  927. begin
  928. if (taicpu(hp1).oper[1]^.ref^.offset >= MinOffset) and (taicpu(p).oper[1]^.ref^.offset <= MaxOffset) then
  929. begin
  930. { Convert:
  931. LDR/STR reg0, [reg2, #ofs + 8] // 4 if registers are 32-bit
  932. ...
  933. LDR/STR reg1. [reg2, #ofs]
  934. To:
  935. LDP/STP reg1, reg0, [reg2, #ofs]
  936. }
  937. taicpu(p).opcode := TargetOpcode;
  938. if TargetOpcode = A_STP then
  939. DebugMsg(SPeepholeOptimization + 'StrStr2Stp (reverse)', p)
  940. else
  941. DebugMsg(SPeepholeOptimization + 'LdrLdr2Ldp (reverse)', p);
  942. taicpu(p).ops := 3;
  943. taicpu(p).loadref(2, taicpu(hp1).oper[1]^.ref^);
  944. taicpu(p).loadreg(1, taicpu(p).oper[0]^.reg);
  945. taicpu(p).loadreg(0, taicpu(hp1).oper[0]^.reg);
  946. asml.Remove(hp1);
  947. hp1.Free;
  948. Result := True;
  949. Exit;
  950. end;
  951. end;
  952. end;
  953. end
  954. else
  955. Break;
  956. { Don't continue looking for LDR/STR pairs if the address register
  957. gets modified }
  958. if RegModifiedByInstruction(taicpu(p).oper[1]^.ref^.base, hp1) then
  959. Break;
  960. hp1_last := hp1;
  961. end;
  962. end;
  963. end;
  964. function TCpuAsmOptimizer.OptPostAnd(var p: tai): Boolean;
  965. var
  966. hp1, hp2: tai;
  967. hp3: taicpu;
  968. bitval : cardinal;
  969. begin
  970. Result:=false;
  971. {
  972. and reg1,reg0,<const=power of 2>
  973. cmp reg1,#0
  974. <reg1 end of life>
  975. b.e/b.ne label
  976. into
  977. tb(n)z reg0,<power of 2>,label
  978. }
  979. if MatchOpType(taicpu(p),top_reg,top_reg,top_const) and
  980. (PopCnt(QWord(taicpu(p).oper[2]^.val))=1) and
  981. GetNextInstruction(p,hp1) and
  982. MatchInstruction(hp1,A_CMP,[PF_None]) and
  983. MatchOpType(taicpu(hp1),top_reg,top_const) and
  984. (taicpu(hp1).oper[1]^.val=0) and
  985. MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[0]^) and
  986. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) and
  987. GetNextInstruction(hp1,hp2) and
  988. MatchInstruction(hp2,A_B,[PF_None]) and
  989. (taicpu(hp2).condition in [C_EQ,C_NE]) then
  990. begin
  991. bitval:=BsfQWord(qword(taicpu(p).oper[2]^.val));
  992. case taicpu(hp2).condition of
  993. C_NE:
  994. hp3:=taicpu.op_reg_const_ref(A_TBNZ,taicpu(p).oper[1]^.reg,bitval,taicpu(hp2).oper[0]^.ref^);
  995. C_EQ:
  996. hp3:=taicpu.op_reg_const_ref(A_TBZ,taicpu(p).oper[1]^.reg,bitval,taicpu(hp2).oper[0]^.ref^);
  997. else
  998. Internalerror(2021100201);
  999. end;
  1000. taicpu(hp3).fileinfo:=taicpu(hp1).fileinfo;
  1001. asml.insertbefore(hp3, hp1);
  1002. RemoveInstruction(hp1);
  1003. RemoveInstruction(hp2);
  1004. RemoveCurrentP(p);
  1005. DebugMsg(SPeepholeOptimization + 'AndCmpB.E/NE2Tbnz/Tbz done', p);
  1006. Result:=true;
  1007. end;
  1008. end;
  1009. function TCpuAsmOptimizer.OptPostCMP(var p : tai): boolean;
  1010. var
  1011. hp1,hp2: tai;
  1012. begin
  1013. Result:=false;
  1014. {
  1015. cmp reg0,#0
  1016. b.e/b.ne label
  1017. into
  1018. cb(n)z reg0,label
  1019. }
  1020. if MatchOpType(taicpu(p),top_reg,top_const) and
  1021. (taicpu(p).oper[0]^.reg<>NR_SP) and
  1022. (taicpu(p).oper[1]^.val=0) and
  1023. GetNextInstruction(p,hp1) and
  1024. MatchInstruction(hp1,A_B,[PF_None]) and
  1025. (taicpu(hp1).condition in [C_EQ,C_NE]) then
  1026. begin
  1027. case taicpu(hp1).condition of
  1028. C_NE:
  1029. hp2:=taicpu.op_reg_sym_ofs(A_CBNZ,taicpu(p).oper[0]^.reg,taicpu(hp1).oper[0]^.ref^.symbol,taicpu(hp1).oper[0]^.ref^.offset);
  1030. C_EQ:
  1031. hp2:=taicpu.op_reg_sym_ofs(A_CBZ,taicpu(p).oper[0]^.reg,taicpu(hp1).oper[0]^.ref^.symbol,taicpu(hp1).oper[0]^.ref^.offset);
  1032. else
  1033. Internalerror(2019090801);
  1034. end;
  1035. taicpu(hp2).fileinfo:=taicpu(hp1).fileinfo;
  1036. asml.insertbefore(hp2, hp1);
  1037. asml.remove(p);
  1038. asml.remove(hp1);
  1039. p.free;
  1040. hp1.free;
  1041. p:=hp2;
  1042. DebugMsg(SPeepholeOptimization + 'CMPB.E/NE2CBNZ/CBZ done', p);
  1043. Result:=true;
  1044. end;
  1045. end;
  1046. function TCpuAsmOptimizer.PrePeepHoleOptsCpu(var p: tai): boolean;
  1047. begin
  1048. result := false;
  1049. if p.typ=ait_instruction then
  1050. begin
  1051. case taicpu(p).opcode of
  1052. A_SBFX,
  1053. A_UBFX:
  1054. Result:=OptPreSBFXUBFX(p);
  1055. else
  1056. ;
  1057. end;
  1058. end;
  1059. end;
  1060. function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
  1061. begin
  1062. result := false;
  1063. if p.typ=ait_instruction then
  1064. begin
  1065. case taicpu(p).opcode of
  1066. A_B:
  1067. Result:=OptPass1B(p);
  1068. A_LDR:
  1069. Result:=OptPass1LDR(p);
  1070. A_STR:
  1071. Result:=OptPass1STR(p);
  1072. A_MOV:
  1073. Result:=OptPass1Mov(p);
  1074. A_MOVZ:
  1075. Result:=OptPass1MOVZ(p);
  1076. A_STP:
  1077. Result:=OptPass1STP(p);
  1078. A_LSR,
  1079. A_ROR,
  1080. A_ASR,
  1081. A_LSL:
  1082. Result:=OptPass1Shift(p);
  1083. A_AND:
  1084. Result:=OptPass1And(p);
  1085. A_NEG,
  1086. A_CSEL,
  1087. A_ADD,
  1088. A_ADC,
  1089. A_SUB,
  1090. A_SBC,
  1091. A_BIC,
  1092. A_EOR,
  1093. A_ORR,
  1094. A_MUL:
  1095. Result:=OptPass1Data(p);
  1096. A_UXTB:
  1097. Result:=OptPass1UXTB(p);
  1098. A_UXTH:
  1099. Result:=OptPass1UXTH(p);
  1100. A_SXTB:
  1101. Result:=OptPass1SXTB(p);
  1102. A_SXTH:
  1103. Result:=OptPass1SXTH(p);
  1104. A_SXTW:
  1105. Result:=OptPass1SXTW(p);
  1106. // A_VLDR,
  1107. A_FMADD,
  1108. A_FMSUB,
  1109. A_FNMADD,
  1110. A_FNMSUB,
  1111. A_FNMUL,
  1112. A_FADD,
  1113. A_FMUL,
  1114. A_FDIV,
  1115. A_FSUB,
  1116. A_FSQRT,
  1117. A_FNEG,
  1118. A_FCVT,
  1119. A_FABS:
  1120. Result:=OptPass1FData(p);
  1121. A_FMOV:
  1122. Result:=OptPass1FMov(p);
  1123. else
  1124. ;
  1125. end;
  1126. end;
  1127. end;
  1128. function TCpuAsmOptimizer.PeepHoleOptPass2Cpu(var p: tai): boolean;
  1129. begin
  1130. result := false;
  1131. if p.typ=ait_instruction then
  1132. begin
  1133. case taicpu(p).opcode of
  1134. A_LDR,
  1135. A_STR:
  1136. Result:=OptPass2LDRSTR(p);
  1137. else
  1138. ;
  1139. end;
  1140. end;
  1141. end;
  1142. function TCpuAsmOptimizer.PostPeepHoleOptsCpu(var p: tai): boolean;
  1143. begin
  1144. result := false;
  1145. if p.typ=ait_instruction then
  1146. begin
  1147. case taicpu(p).opcode of
  1148. A_CMP:
  1149. Result:=OptPostCMP(p);
  1150. A_AND:
  1151. Result:=OptPostAnd(p);
  1152. else
  1153. ;
  1154. end;
  1155. end;
  1156. end;
  1157. begin
  1158. casmoptimizer:=TCpuAsmOptimizer;
  1159. End.