aoptcpu.pas 40 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145
  1. {
  2. Copyright (c) 1998-2002 by Jonas Maebe, member of the Free Pascal
  3. Development Team
  4. This unit implements the ARM64 optimizer object
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. Unit aoptcpu;
  19. {$i fpcdefs.inc}
  20. {$ifdef EXTDEBUG}
  21. {$define DEBUG_AOPTCPU}
  22. {$endif EXTDEBUG}
  23. Interface
  24. uses
  25. globtype, globals,
  26. cutils,
  27. cgbase, cpubase, aasmtai, aasmcpu,
  28. aopt, aoptcpub, aoptarm;
  29. Type
  30. TCpuAsmOptimizer = class(TARMAsmOptimizer)
  31. { uses the same constructor as TAopObj }
  32. function PrePeepHoleOptsCpu(var p: tai): boolean; override;
  33. function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
  34. function PeepHoleOptPass2Cpu(var p: tai): boolean; override;
  35. function PostPeepHoleOptsCpu(var p: tai): boolean; override;
  36. function RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;override;
  37. function InstructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean;override;
  38. function LookForPostindexedPattern(var p : tai) : boolean;
  39. public
  40. { With these routines, there's optimisation code that's general for all ARM platforms }
  41. function OptPass1LDR(var p: tai): Boolean; override;
  42. function OptPass1STR(var p: tai): Boolean; override;
  43. private
  44. function RemoveSuperfluousFMov(const p: tai; movp: tai; const optimizer: string): boolean;
  45. function OptPass1Shift(var p: tai): boolean;
  46. function OptPostCMP(var p: tai): boolean;
  47. function OptPostAnd(var p: tai): Boolean;
  48. function OptPass1Data(var p: tai): boolean;
  49. function OptPass1FData(var p: tai): Boolean;
  50. function OptPass1STP(var p: tai): boolean;
  51. function OptPass1Mov(var p: tai): boolean;
  52. function OptPass1MOVZ(var p: tai): boolean;
  53. function OptPass1FMov(var p: tai): Boolean;
  54. function OptPass1SXTW(var p: tai): Boolean;
  55. function OptPass2LDRSTR(var p: tai): boolean;
  56. End;
  57. Implementation
  58. uses
  59. aasmbase,
  60. aoptutils,
  61. cgutils,
  62. verbose;
  63. {$ifdef DEBUG_AOPTCPU}
  64. const
  65. SPeepholeOptimization: shortstring = 'Peephole Optimization: ';
  66. {$else DEBUG_AOPTCPU}
  67. { Empty strings help the optimizer to remove string concatenations that won't
  68. ever appear to the user on release builds. [Kit] }
  69. const
  70. SPeepholeOptimization = '';
  71. {$endif DEBUG_AOPTCPU}
  72. function CanBeCond(p : tai) : boolean;
  73. begin
  74. result:=(p.typ=ait_instruction) and (taicpu(p).condition=C_None);
  75. end;
  76. function TCpuAsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  77. var
  78. p: taicpu;
  79. begin
  80. Result := false;
  81. if not ((assigned(hp)) and (hp.typ = ait_instruction)) then
  82. exit;
  83. p := taicpu(hp);
  84. case p.opcode of
  85. { These operations do not write into a register at all
  86. LDR/STR with post/pre-indexed operations do not need special treatment
  87. because post-/preindexed does not mean that a register
  88. is loaded with a new value, it is only modified }
  89. A_STR, A_CMP, A_CMN, A_TST, A_B, A_BL, A_MSR, A_FCMP:
  90. exit;
  91. else
  92. ;
  93. end;
  94. if p.ops=0 then
  95. exit;
  96. case p.oper[0]^.typ of
  97. top_reg:
  98. Result := SuperRegistersEqual(p.oper[0]^.reg,reg);
  99. top_ref:
  100. Result :=
  101. (taicpu(p).oper[0]^.ref^.addressmode in [AM_PREINDEXED,AM_POSTINDEXED]) and
  102. (taicpu(p).oper[0]^.ref^.base = reg);
  103. else
  104. ;
  105. end;
  106. end;
  107. function TCpuAsmOptimizer.InstructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean;
  108. var
  109. p: taicpu;
  110. i: longint;
  111. begin
  112. instructionLoadsFromReg := false;
  113. if not (assigned(hp) and (hp.typ = ait_instruction)) then
  114. exit;
  115. p:=taicpu(hp);
  116. i:=1;
  117. { Start on oper[0]? }
  118. if taicpu(hp).spilling_get_operation_type(0) in [operand_read, operand_readwrite] then
  119. i:=0;
  120. while(i<p.ops) do
  121. begin
  122. case p.oper[I]^.typ of
  123. top_reg:
  124. Result := (p.oper[I]^.reg = reg);
  125. top_ref:
  126. Result :=
  127. (p.oper[I]^.ref^.base = reg) or
  128. (p.oper[I]^.ref^.index = reg);
  129. else
  130. ;
  131. end;
  132. { Bailout if we found something }
  133. if Result then
  134. exit;
  135. Inc(I);
  136. end;
  137. end;
  138. {
  139. optimize
  140. ldr/str regX,[reg1]
  141. ...
  142. add/sub reg1,reg1,regY/const
  143. into
  144. ldr/str regX,[reg1], regY/const
  145. }
  146. function TCpuAsmOptimizer.LookForPostindexedPattern(var p: tai) : boolean;
  147. var
  148. hp1 : tai;
  149. begin
  150. Result:=false;
  151. if (taicpu(p).oper[1]^.typ = top_ref) and
  152. (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
  153. (taicpu(p).oper[1]^.ref^.index=NR_NO) and
  154. (taicpu(p).oper[1]^.ref^.offset=0) and
  155. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[1]^.ref^.base) and
  156. { we cannot check NR_DEFAULTFLAGS for modification yet so don't allow a condition }
  157. MatchInstruction(hp1, [A_ADD, A_SUB], [PF_None]) and
  158. (taicpu(hp1).oper[0]^.reg=taicpu(p).oper[1]^.ref^.base) and
  159. (taicpu(hp1).oper[1]^.reg=taicpu(p).oper[1]^.ref^.base) and
  160. (
  161. { valid offset? }
  162. (taicpu(hp1).oper[2]^.typ=top_const) and
  163. (taicpu(hp1).oper[2]^.val>=-256) and
  164. (abs(taicpu(hp1).oper[2]^.val)<256)
  165. ) and
  166. { don't apply the optimization if the base register is loaded }
  167. (getsupreg(taicpu(p).oper[0]^.reg)<>getsupreg(taicpu(p).oper[1]^.ref^.base)) and
  168. not(RegModifiedBetween(taicpu(hp1).oper[0]^.reg,p,hp1)) and
  169. not(RegModifiedBetween(taicpu(hp1).oper[2]^.reg,p,hp1)) then
  170. begin
  171. if taicpu(p).opcode = A_LDR then
  172. DebugMsg(SPeepholeOptimization + 'LdrAdd/Sub2Ldr Postindex done', p)
  173. else
  174. DebugMsg(SPeepholeOptimization + 'StrAdd/Sub2Str Postindex done', p);
  175. taicpu(p).oper[1]^.ref^.addressmode:=AM_POSTINDEXED;
  176. if taicpu(hp1).opcode=A_ADD then
  177. taicpu(p).oper[1]^.ref^.offset:=taicpu(hp1).oper[2]^.val
  178. else
  179. taicpu(p).oper[1]^.ref^.offset:=-taicpu(hp1).oper[2]^.val;
  180. asml.Remove(hp1);
  181. hp1.Free;
  182. Result:=true;
  183. end;
  184. end;
  185. function TCpuAsmOptimizer.RemoveSuperfluousFMov(const p: tai; movp: tai; const optimizer: string):boolean;
  186. var
  187. alloc,
  188. dealloc : tai_regalloc;
  189. hp1 : tai;
  190. begin
  191. Result:=false;
  192. if ((MatchInstruction(movp, A_FMOV, [taicpu(p).condition], [taicpu(p).oppostfix]) and
  193. ((getregtype(taicpu(movp).oper[0]^.reg)=R_MMREGISTER) { or (taicpu(p).opcode in [A_LDUR])})
  194. ) { or
  195. (((taicpu(p).oppostfix in [PF_F64F32,PF_F64S16,PF_F64S32,PF_F64U16,PF_F64U32]) or (getsubreg(taicpu(p).oper[0]^.reg)=R_SUBFD)) and MatchInstruction(movp, A_VMOV, [taicpu(p).condition], [PF_F64])) or
  196. (((taicpu(p).oppostfix in [PF_F32F64,PF_F32S16,PF_F32S32,PF_F32U16,PF_F32U32]) or (getsubreg(taicpu(p).oper[0]^.reg)=R_SUBFS)) and MatchInstruction(movp, A_VMOV, [taicpu(p).condition], [PF_F32])) }
  197. ) and
  198. (taicpu(movp).ops=2) and
  199. MatchOperand(taicpu(movp).oper[1]^, taicpu(p).oper[0]^.reg) and
  200. { the destination register of the mov might not be used beween p and movp }
  201. not(RegUsedBetween(taicpu(movp).oper[0]^.reg,p,movp)) and
  202. { Take care to only do this for instructions which REALLY load to the first register.
  203. Otherwise
  204. str reg0, [reg1]
  205. fmov reg2, reg0
  206. will be optimized to
  207. str reg2, [reg1]
  208. }
  209. RegLoadedWithNewValue(taicpu(p).oper[0]^.reg, p) then
  210. begin
  211. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(movp.Next));
  212. if assigned(dealloc) then
  213. begin
  214. DebugMsg(SPeepholeOptimization + optimizer+' removed superfluous vmov', movp);
  215. result:=true;
  216. { taicpu(p).oper[0]^.reg is not used anymore, try to find its allocation
  217. and remove it if possible }
  218. asml.Remove(dealloc);
  219. alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.previous));
  220. if assigned(alloc) then
  221. begin
  222. asml.Remove(alloc);
  223. alloc.free;
  224. dealloc.free;
  225. end
  226. else
  227. asml.InsertAfter(dealloc,p);
  228. { try to move the allocation of the target register }
  229. GetLastInstruction(movp,hp1);
  230. alloc:=FindRegAlloc(taicpu(movp).oper[0]^.reg,tai(hp1.Next));
  231. if assigned(alloc) then
  232. begin
  233. asml.Remove(alloc);
  234. asml.InsertBefore(alloc,p);
  235. { adjust used regs }
  236. IncludeRegInUsedRegs(taicpu(movp).oper[0]^.reg,UsedRegs);
  237. end;
  238. { change
  239. vldr reg0,[reg1]
  240. vmov reg2,reg0
  241. into
  242. ldr reg2,[reg1]
  243. if reg2 is an int register
  244. if (taicpu(p).opcode=A_VLDR) and (getregtype(taicpu(movp).oper[0]^.reg)=R_INTREGISTER) then
  245. taicpu(p).opcode:=A_LDR;
  246. }
  247. { finally get rid of the mov }
  248. taicpu(p).loadreg(0,taicpu(movp).oper[0]^.reg);
  249. asml.remove(movp);
  250. movp.free;
  251. end;
  252. end;
  253. end;
  254. function TCpuAsmOptimizer.OptPass1LDR(var p: tai): Boolean;
  255. var
  256. hp1: tai;
  257. begin
  258. Result := False;
  259. if inherited OptPass1LDR(p) or
  260. LookForPostindexedPattern(p) then
  261. Exit(True)
  262. else if (taicpu(p).oppostfix in [PF_B,PF_SB,PF_H,PF_SH,PF_None]) and
  263. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  264. RemoveSuperfluousMove(p, hp1, 'Ldr<Postfix>Mov2Ldr<Postfix>') then
  265. Exit(true);
  266. end;
  267. function TCpuAsmOptimizer.OptPass1STR(var p: tai): Boolean;
  268. begin
  269. Result := False;
  270. if inherited OptPass1STR(p) or
  271. LookForPostindexedPattern(p) then
  272. Exit(True);
  273. end;
  274. function TCpuAsmOptimizer.OptPass1Shift(var p : tai): boolean;
  275. var
  276. hp1,hp2: tai;
  277. I2, I: Integer;
  278. shifterop: tshifterop;
  279. begin
  280. Result:=false;
  281. { This folds shifterops into following instructions
  282. <shiftop> r0, r1, #imm
  283. <op> r2, r3, r0
  284. to
  285. <op> r2, r3, r1, <shiftop> #imm
  286. }
  287. { do not handle ROR yet, only part of the instructions below support ROR as shifter operand }
  288. if MatchInstruction(p,[A_LSL, A_LSR, A_ASR{, A_ROR}],[PF_None]) and
  289. MatchOpType(taicpu(p),top_reg,top_reg,top_const) and
  290. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  291. MatchInstruction(hp1, [A_ADD, A_AND, A_BIC, A_CMP, A_CMN,
  292. A_EON, A_EOR, A_NEG, A_ORN, A_ORR,
  293. A_SUB, A_TST], [PF_None]) and
  294. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) and
  295. (taicpu(hp1).ops >= 2) and
  296. { Currently we can't fold into another shifterop }
  297. (taicpu(hp1).oper[taicpu(hp1).ops-1]^.typ = top_reg) and
  298. { SP does not work completely with shifted registers, as I didn't find the exact rules,
  299. we do not operate on SP }
  300. (taicpu(hp1).oper[0]^.reg<>NR_SP) and
  301. (taicpu(hp1).oper[1]^.reg<>NR_SP) and
  302. (taicpu(hp1).oper[taicpu(hp1).ops-1]^.reg<>NR_SP) and
  303. { reg1 might not be modified inbetween }
  304. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) and
  305. (
  306. { Only ONE of the two src operands is allowed to match }
  307. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[taicpu(hp1).ops-2]^) xor
  308. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[taicpu(hp1).ops-1]^)
  309. ) and
  310. { for SUB, the last operand must match, there is no RSB on AArch64 }
  311. ((taicpu(hp1).opcode<>A_SUB) or
  312. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[taicpu(hp1).ops-1]^)) then
  313. begin
  314. { for the two operand instructions, start also at the second operand as they are not always commutative
  315. (depends on the flags tested laster on) and thus the operands cannot swapped }
  316. I2:=1;
  317. for I:=I2 to taicpu(hp1).ops-1 do
  318. if MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[I]^.reg) then
  319. begin
  320. { If the parameter matched on the second op from the RIGHT
  321. we have to switch the parameters, this will not happen for CMP
  322. were we're only evaluating the most right parameter
  323. }
  324. shifterop_reset(shifterop);
  325. case taicpu(p).opcode of
  326. A_LSL:
  327. shifterop.shiftmode:=SM_LSL;
  328. A_ROR:
  329. shifterop.shiftmode:=SM_ROR;
  330. A_LSR:
  331. shifterop.shiftmode:=SM_LSR;
  332. A_ASR:
  333. shifterop.shiftmode:=SM_ASR;
  334. else
  335. InternalError(2019090401);
  336. end;
  337. shifterop.shiftimm:=taicpu(p).oper[2]^.val;
  338. if I <> taicpu(hp1).ops-1 then
  339. begin
  340. if taicpu(hp1).ops = 3 then
  341. hp2:=taicpu.op_reg_reg_reg_shifterop(taicpu(hp1).opcode,
  342. taicpu(hp1).oper[0]^.reg, taicpu(hp1).oper[2]^.reg,
  343. taicpu(p).oper[1]^.reg, shifterop)
  344. else
  345. hp2:=taicpu.op_reg_reg_shifterop(taicpu(hp1).opcode,
  346. taicpu(hp1).oper[0]^.reg, taicpu(p).oper[1]^.reg,
  347. shifterop);
  348. end
  349. else
  350. if taicpu(hp1).ops = 3 then
  351. hp2:=taicpu.op_reg_reg_reg_shifterop(taicpu(hp1).opcode,
  352. taicpu(hp1).oper[0]^.reg, taicpu(hp1).oper[1]^.reg,
  353. taicpu(p).oper[1]^.reg,shifterop)
  354. else
  355. hp2:=taicpu.op_reg_reg_shifterop(taicpu(hp1).opcode,
  356. taicpu(hp1).oper[0]^.reg, taicpu(p).oper[1]^.reg,
  357. shifterop);
  358. { Make sure the register used in the shifting is tracked all
  359. the way through, otherwise it may become deallocated while
  360. it's still live and cause incorrect optimisations later }
  361. if (taicpu(hp1).oper[0]^.reg <> taicpu(p).oper[1]^.reg) then
  362. begin
  363. TransferUsedRegs(TmpUsedRegs);
  364. UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
  365. ALlocRegBetween(taicpu(p).oper[1]^.reg, p, hp1, TmpUsedRegs);
  366. end;
  367. taicpu(hp2).fileinfo:=taicpu(hp1).fileinfo;
  368. asml.insertbefore(hp2, hp1);
  369. RemoveInstruction(hp1);
  370. RemoveCurrentp(p);
  371. DebugMsg(SPeepholeOptimization + 'FoldShiftProcess done', hp2);
  372. Result:=true;
  373. break;
  374. end;
  375. end
  376. else if MatchInstruction(p,[A_LSL, A_LSR, A_ASR,A_ROR],[PF_None]) and
  377. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  378. RemoveSuperfluousMove(p, hp1, 'ShiftMov2Shift') then
  379. Result:=true;
  380. end;
  381. function TCpuAsmOptimizer.OptPass1Data(var p : tai): boolean;
  382. var
  383. hp1: tai;
  384. begin
  385. Result := GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  386. RemoveSuperfluousMove(p, hp1, 'DataMov2Data');
  387. end;
  388. function TCpuAsmOptimizer.OptPass1FData(var p: tai): Boolean;
  389. var
  390. hp1: tai;
  391. begin
  392. Result := GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  393. RemoveSuperfluousFMov(p, hp1, 'FOpFMov2FOp');
  394. end;
  395. function TCpuAsmOptimizer.OptPass1STP(var p : tai): boolean;
  396. var
  397. hp1, hp2, hp3, hp4: tai;
  398. begin
  399. Result:=false;
  400. {
  401. change
  402. stp x29,x30,[sp, #-16]!
  403. mov x29,sp
  404. bl abc
  405. ldp x29,x30,[sp], #16
  406. ret
  407. into
  408. b abc
  409. }
  410. if MatchInstruction(p, A_STP, [C_None], [PF_None]) and
  411. MatchOpType(taicpu(p),top_reg,top_reg,top_ref) and
  412. (taicpu(p).oper[0]^.reg = NR_X29) and
  413. (taicpu(p).oper[1]^.reg = NR_X30) and
  414. (taicpu(p).oper[2]^.ref^.base=NR_STACK_POINTER_REG) and
  415. (taicpu(p).oper[2]^.ref^.index=NR_NO) and
  416. (taicpu(p).oper[2]^.ref^.offset=-16) and
  417. (taicpu(p).oper[2]^.ref^.addressmode=AM_PREINDEXED) and
  418. GetNextInstruction(p, hp1) and
  419. MatchInstruction(hp1, A_MOV, [C_None], [PF_NONE]) and
  420. MatchOperand(taicpu(hp1).oper[0]^,taicpu(p).oper[0]^) and
  421. (taicpu(hp1).oper[1]^.typ = top_reg) and
  422. (taicpu(hp1).oper[1]^.reg = NR_STACK_POINTER_REG) and
  423. GetNextInstruction(hp1, hp2) and
  424. SkipEntryExitMarker(hp2, hp2) and
  425. MatchInstruction(hp2, A_BL, [C_None], [PF_NONE]) and
  426. (taicpu(hp2).oper[0]^.typ = top_ref) and
  427. GetNextInstruction(hp2, hp3) and
  428. SkipEntryExitMarker(hp3, hp3) and
  429. MatchInstruction(hp3, A_LDP, [C_None], [PF_NONE]) and
  430. MatchOpType(taicpu(hp3),top_reg,top_reg,top_ref) and
  431. (taicpu(hp3).oper[0]^.reg = NR_X29) and
  432. (taicpu(hp3).oper[1]^.reg = NR_X30) and
  433. (taicpu(hp3).oper[2]^.ref^.base=NR_STACK_POINTER_REG) and
  434. (taicpu(hp3).oper[2]^.ref^.index=NR_NO) and
  435. (taicpu(hp3).oper[2]^.ref^.offset=16) and
  436. (taicpu(hp3).oper[2]^.ref^.addressmode=AM_POSTINDEXED) and
  437. GetNextInstruction(hp3, hp4) and
  438. MatchInstruction(hp4, A_RET, [C_None], [PF_None]) and
  439. (taicpu(hp4).ops = 0) then
  440. begin
  441. asml.Remove(p);
  442. asml.Remove(hp1);
  443. asml.Remove(hp3);
  444. asml.Remove(hp4);
  445. taicpu(hp2).opcode:=A_B;
  446. p.free;
  447. hp1.free;
  448. hp3.free;
  449. hp4.free;
  450. p:=hp2;
  451. DebugMsg(SPeepholeOptimization + 'Bl2B done', p);
  452. Result:=true;
  453. end;
  454. end;
  455. function TCpuAsmOptimizer.OptPass1Mov(var p : tai): boolean;
  456. var
  457. hp1: tai;
  458. so: tshifterop;
  459. begin
  460. Result:=false;
  461. if MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) and
  462. (taicpu(p).oppostfix=PF_None) then
  463. begin
  464. RemoveCurrentP(p);
  465. DebugMsg(SPeepholeOptimization + 'Mov2None done', p);
  466. Result:=true;
  467. end
  468. else if (taicpu(p).ops=2) and
  469. (getsubreg(taicpu(p).oper[0]^.reg)=R_SUBD) and
  470. GetNextInstruction(p, hp1) and
  471. { Faster to get it out of the way than go through MatchInstruction }
  472. (hp1.typ=ait_instruction) and
  473. (taicpu(hp1).ops=3) and
  474. MatchInstruction(hp1,[A_ADD,A_SUB],[taicpu(p).condition], [PF_None,PF_S]) and
  475. (getsubreg(taicpu(hp1).oper[2]^.reg)=R_SUBQ) and
  476. (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[2]^.reg)) and
  477. RegEndOfLife(taicpu(hp1).oper[2]^.reg,taicpu(hp1)) then
  478. begin
  479. DebugMsg(SPeepholeOptimization + 'MovOp2AddUtxw 1 done', p);
  480. shifterop_reset(so);
  481. so.shiftmode:=SM_UXTW;
  482. taicpu(hp1).ops:=4;
  483. taicpu(hp1).loadreg(2,taicpu(p).oper[1]^.reg);
  484. taicpu(hp1).loadshifterop(3,so);
  485. RemoveCurrentP(p);
  486. Result:=true;
  487. exit;
  488. end
  489. {
  490. optimize
  491. mov rX, yyyy
  492. ....
  493. }
  494. else if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) then
  495. begin
  496. if RemoveSuperfluousMove(p, hp1, 'MovMov2Mov') then
  497. Result:=true
  498. else if (taicpu(p).ops = 2) and
  499. (tai(hp1).typ = ait_instruction) and
  500. RedundantMovProcess(p,hp1) then
  501. Result:=true
  502. end;
  503. end;
  504. function TCpuAsmOptimizer.OptPass1MOVZ(var p: tai): boolean;
  505. var
  506. hp1: tai;
  507. ZeroReg: TRegister;
  508. begin
  509. Result := False;
  510. hp1 := nil;
  511. if (taicpu(p).oppostfix = PF_None) and (taicpu(p).condition = C_None) then
  512. begin
  513. if
  514. { Check next instruction first so hp1 gets set to something, then
  515. if it remains nil, we know for sure that there's no valid next
  516. instruction. }
  517. not GetNextInstruction(p, hp1) or
  518. { MOVZ and MOVK/MOVN instructions undergo macro-fusion. }
  519. not MatchInstruction(hp1, [A_MOVK, A_MOVN], [C_None], [PF_None]) or
  520. (taicpu(hp1).oper[0]^.reg <> taicpu(p).oper[0]^.reg) then
  521. begin
  522. if (taicpu(p).oper[1]^.val = 0) then
  523. begin
  524. { Change;
  525. movz reg,#0
  526. (no movk or movn)
  527. To:
  528. mov reg,xzr (or wzr)
  529. Easier to perform other optimisations with registers
  530. }
  531. DebugMsg(SPeepholeOptimization + 'Movz0ToMovZeroReg', p);
  532. { Make sure the zero register is the correct size }
  533. ZeroReg := taicpu(p).oper[0]^.reg;
  534. setsupreg(ZeroReg, RS_XZR);
  535. taicpu(p).opcode := A_MOV;
  536. taicpu(p).loadreg(1, ZeroReg);
  537. Result := True;
  538. Exit;
  539. end;
  540. end;
  541. {
  542. remove the second Movz from
  543. movz reg,...
  544. movz reg,...
  545. }
  546. if GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  547. MatchInstruction(hp1,A_MOVZ,[C_None],[PF_none]) and
  548. MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[0]^) then
  549. begin
  550. DebugMsg(SPeepholeOptimization + 'MovzMovz2Movz', p);
  551. RemoveCurrentP(p);
  552. Result:=true;
  553. exit;
  554. end;
  555. end;
  556. end;
  557. function TCpuAsmOptimizer.OptPass1FMov(var p: tai): Boolean;
  558. var
  559. hp1: tai;
  560. alloc, dealloc: tai_regalloc;
  561. begin
  562. {
  563. change
  564. fmov reg0,reg1
  565. fmov reg1,reg0
  566. into
  567. fmov reg0,reg1
  568. }
  569. Result := False;
  570. while GetNextInstruction(p, hp1) and
  571. MatchInstruction(hp1, A_FMOV, [taicpu(p).condition], [taicpu(p).oppostfix]) and
  572. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[1]^) and
  573. MatchOperand(taicpu(p).oper[1]^, taicpu(hp1).oper[0]^) do
  574. begin
  575. asml.Remove(hp1);
  576. hp1.free;
  577. DebugMsg(SPeepholeOptimization + 'FMovFMov2FMov 1 done', p);
  578. Result:=true;
  579. end;
  580. { change
  581. fmov reg0,const
  582. fmov reg1,reg0
  583. dealloc reg0
  584. into
  585. fmov reg1,const
  586. }
  587. if MatchOpType(taicpu(p),top_reg,top_realconst) and
  588. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  589. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p, hp1)) and
  590. MatchInstruction(hp1,A_FMOV,[taicpu(p).condition],[taicpu(p).oppostfix]) and
  591. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  592. MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^.reg) and
  593. (not RegModifiedByInstruction(taicpu(p).oper[0]^.reg, hp1)) and
  594. assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next)))
  595. then
  596. begin
  597. DebugMsg('Peephole FMovFMov2FMov 2 done', p);
  598. taicpu(hp1).loadrealconst(1,taicpu(p).oper[1]^.val_real);
  599. alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.Previous));
  600. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next));
  601. if assigned(alloc) and assigned(dealloc) then
  602. begin
  603. asml.Remove(alloc);
  604. alloc.Free;
  605. asml.Remove(dealloc);
  606. dealloc.Free;
  607. end;
  608. { p will be removed, update used register as we continue
  609. with the next instruction after p }
  610. result:=RemoveCurrentP(p);
  611. end;
  612. { not enabled as apparently not happening
  613. if MatchOpType(taicpu(p),top_reg,top_reg) and
  614. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  615. MatchInstruction(hp1, [A_FSUB,A_FADD,A_FNEG,A_FMUL,A_FSQRT,A_FDIV,A_FABS], [PF_None]) and
  616. (MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) or
  617. ((taicpu(hp1).ops=3) and MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[2]^))
  618. ) and
  619. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  620. not(RegUsedBetween(taicpu(p).oper[0]^.reg,p,hp1)) then
  621. begin
  622. DebugMsg(SPeepholeOptimization + 'FMovFOp2FOp done', hp1);
  623. AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,UsedRegs);
  624. if MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) then
  625. taicpu(hp1).oper[1]^.reg:=taicpu(p).oper[1]^.reg;
  626. if (taicpu(hp1).ops=3) and MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[2]^) then
  627. taicpu(hp1).oper[2]^.reg:=taicpu(p).oper[1]^.reg;
  628. RemoveCurrentP(p);
  629. Result:=true;
  630. exit;
  631. end;
  632. }
  633. end;
  634. function TCpuAsmOptimizer.OptPass1SXTW(var p : tai) : Boolean;
  635. var
  636. hp1: tai;
  637. GetNextInstructionUsingReg_hp1: Boolean;
  638. begin
  639. Result:=false;
  640. if GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) then
  641. begin
  642. {
  643. change
  644. sxtw reg2,reg1
  645. str reg2,[...]
  646. dealloc reg2
  647. to
  648. str reg1,[...]
  649. }
  650. if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
  651. (taicpu(p).ops=2) and
  652. MatchInstruction(hp1, A_STR, [C_None], [PF_None]) and
  653. (getsubreg(taicpu(hp1).oper[0]^.reg)=R_SUBD) and
  654. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  655. { the reference in strb might not use reg2 }
  656. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  657. { reg1 might not be modified inbetween }
  658. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  659. begin
  660. DebugMsg('Peephole SXTHStr2Str done', p);
  661. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  662. result:=RemoveCurrentP(p);
  663. end
  664. {
  665. change
  666. sxtw reg2,reg1
  667. sxtw reg3,reg2
  668. dealloc reg2
  669. to
  670. sxtw reg3,reg1
  671. }
  672. else if MatchInstruction(p, A_SXTW, [C_None], [PF_None]) and
  673. (taicpu(p).ops=2) and
  674. MatchInstruction(hp1, A_SXTW, [C_None], [PF_None]) and
  675. (taicpu(hp1).ops=2) and
  676. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  677. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  678. { reg1 might not be modified inbetween }
  679. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  680. begin
  681. DebugMsg('Peephole SxtwSxtw2Sxtw done', p);
  682. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  683. taicpu(hp1).opcode:=A_SXTW;
  684. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  685. result:=RemoveCurrentP(p);
  686. end
  687. else if USxtOp2Op(p,hp1,SM_SXTW) then
  688. Result:=true
  689. else if RemoveSuperfluousMove(p, hp1, 'SxtwMov2Data') then
  690. Result:=true;
  691. end;
  692. end;
  693. function TCpuAsmOptimizer.OptPass2LDRSTR(var p: tai): boolean;
  694. var
  695. hp1, hp1_last: tai;
  696. ThisRegister: TRegister;
  697. OffsetVal, ValidOffset, MinOffset, MaxOffset: asizeint;
  698. TargetOpcode: TAsmOp;
  699. begin
  700. Result := False;
  701. ThisRegister := taicpu(p).oper[0]^.reg;
  702. case taicpu(p).opcode of
  703. A_LDR:
  704. TargetOpcode := A_LDP;
  705. A_STR:
  706. TargetOpcode := A_STP;
  707. else
  708. InternalError(2020081501);
  709. end;
  710. { reg appearing in ref invalidates these optimisations }
  711. if (TargetOpcode = A_STP) or not RegInRef(ThisRegister, taicpu(p).oper[1]^.ref^) then
  712. begin
  713. { LDP/STP has a smaller permitted offset range than LDR/STR.
  714. TODO: For a group of out-of-range LDR/STR instructions, can
  715. we declare a temporary register equal to the offset base
  716. address, modify the STR instructions to use that register
  717. and then convert them to STP instructions? Note that STR
  718. generally takes 2 cycles (on top of the memory latency),
  719. while LDP/STP takes 3.
  720. }
  721. if (getsubreg(ThisRegister) = R_SUBQ) then
  722. begin
  723. ValidOffset := 8;
  724. MinOffset := -512;
  725. MaxOffset := 504;
  726. end
  727. else
  728. begin
  729. ValidOffset := 4;
  730. MinOffset := -256;
  731. MaxOffset := 252;
  732. end;
  733. hp1_last := p;
  734. { Look for nearby LDR/STR instructions }
  735. if (taicpu(p).oppostfix = PF_NONE) and
  736. (taicpu(p).oper[1]^.ref^.addressmode = AM_OFFSET) then
  737. { If SkipGetNext is True, GextNextInstruction isn't called }
  738. while GetNextInstruction(hp1_last, hp1) do
  739. begin
  740. if (hp1.typ <> ait_instruction) then
  741. Break;
  742. if (taicpu(hp1).opcode = taicpu(p).opcode) then
  743. begin
  744. if (taicpu(hp1).oppostfix = PF_NONE) and
  745. { Registers need to be the same size }
  746. (getsubreg(ThisRegister) = getsubreg(taicpu(hp1).oper[0]^.reg)) and
  747. (
  748. (TargetOpcode = A_STP) or
  749. { LDP x0, x0, [sp, #imm] is undefined behaviour, even
  750. though such an LDR pair should have been optimised
  751. out by now. STP is okay }
  752. (ThisRegister <> taicpu(hp1).oper[0]^.reg)
  753. ) and
  754. (taicpu(hp1).oper[1]^.ref^.addressmode = AM_OFFSET) and
  755. (taicpu(p).oper[1]^.ref^.base = taicpu(hp1).oper[1]^.ref^.base) and
  756. (taicpu(p).oper[1]^.ref^.index = taicpu(hp1).oper[1]^.ref^.index) and
  757. { Make sure the address registers haven't changed }
  758. not RegModifiedBetween(taicpu(hp1).oper[1]^.ref^.base, p, hp1) and
  759. (
  760. (taicpu(hp1).oper[1]^.ref^.index = NR_NO) or
  761. not RegModifiedBetween(taicpu(hp1).oper[1]^.ref^.index, p, hp1)
  762. ) and
  763. { Don't need to check "RegInRef" because the base registers are identical,
  764. and the first one was checked already. [Kit] }
  765. (((TargetOpcode=A_LDP) and not RegUsedBetween(taicpu(hp1).oper[0]^.reg, p, hp1)) or
  766. ((TargetOpcode=A_STP) and not RegModifiedBetween(taicpu(hp1).oper[0]^.reg, p, hp1))) then
  767. begin
  768. { Can we convert these two LDR/STR instructions into a
  769. single LDR/STP? }
  770. OffsetVal := taicpu(hp1).oper[1]^.ref^.offset - taicpu(p).oper[1]^.ref^.offset;
  771. if (OffsetVal = ValidOffset) then
  772. begin
  773. if (taicpu(p).oper[1]^.ref^.offset >= MinOffset) and (taicpu(hp1).oper[1]^.ref^.offset <= MaxOffset) then
  774. begin
  775. { Convert:
  776. LDR/STR reg0, [reg2, #ofs]
  777. ...
  778. LDR/STR reg1. [reg2, #ofs + 8] // 4 if registers are 32-bit
  779. To:
  780. LDP/STP reg0, reg1, [reg2, #ofs]
  781. }
  782. taicpu(p).opcode := TargetOpcode;
  783. if TargetOpcode = A_STP then
  784. DebugMsg(SPeepholeOptimization + 'StrStr2Stp', p)
  785. else
  786. DebugMsg(SPeepholeOptimization + 'LdrLdr2Ldp', p);
  787. taicpu(p).ops := 3;
  788. taicpu(p).loadref(2, taicpu(p).oper[1]^.ref^);
  789. taicpu(p).loadreg(1, taicpu(hp1).oper[0]^.reg);
  790. asml.Remove(hp1);
  791. hp1.Free;
  792. Result := True;
  793. Exit;
  794. end;
  795. end
  796. else if (OffsetVal = -ValidOffset) then
  797. begin
  798. if (taicpu(hp1).oper[1]^.ref^.offset >= MinOffset) and (taicpu(p).oper[1]^.ref^.offset <= MaxOffset) then
  799. begin
  800. { Convert:
  801. LDR/STR reg0, [reg2, #ofs + 8] // 4 if registers are 32-bit
  802. ...
  803. LDR/STR reg1. [reg2, #ofs]
  804. To:
  805. LDP/STP reg1, reg0, [reg2, #ofs]
  806. }
  807. taicpu(p).opcode := TargetOpcode;
  808. if TargetOpcode = A_STP then
  809. DebugMsg(SPeepholeOptimization + 'StrStr2Stp (reverse)', p)
  810. else
  811. DebugMsg(SPeepholeOptimization + 'LdrLdr2Ldp (reverse)', p);
  812. taicpu(p).ops := 3;
  813. taicpu(p).loadref(2, taicpu(hp1).oper[1]^.ref^);
  814. taicpu(p).loadreg(1, taicpu(p).oper[0]^.reg);
  815. taicpu(p).loadreg(0, taicpu(hp1).oper[0]^.reg);
  816. asml.Remove(hp1);
  817. hp1.Free;
  818. Result := True;
  819. Exit;
  820. end;
  821. end;
  822. end;
  823. end
  824. else
  825. Break;
  826. { Don't continue looking for LDR/STR pairs if the address register
  827. gets modified }
  828. if RegModifiedByInstruction(taicpu(p).oper[1]^.ref^.base, hp1) then
  829. Break;
  830. hp1_last := hp1;
  831. end;
  832. end;
  833. end;
  834. function TCpuAsmOptimizer.OptPostAnd(var p: tai): Boolean;
  835. var
  836. hp1, hp2: tai;
  837. hp3: taicpu;
  838. bitval : cardinal;
  839. begin
  840. Result:=false;
  841. {
  842. and reg1,reg0,<const=power of 2>
  843. cmp reg1,#0
  844. <reg1 end of life>
  845. b.e/b.ne label
  846. into
  847. tb(n)z reg0,<power of 2>,label
  848. }
  849. if MatchOpType(taicpu(p),top_reg,top_reg,top_const) and
  850. (PopCnt(QWord(taicpu(p).oper[2]^.val))=1) and
  851. GetNextInstruction(p,hp1) and
  852. MatchInstruction(hp1,A_CMP,[PF_None]) and
  853. MatchOpType(taicpu(hp1),top_reg,top_const) and
  854. (taicpu(hp1).oper[1]^.val=0) and
  855. MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[0]^) and
  856. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) and
  857. GetNextInstruction(hp1,hp2) and
  858. MatchInstruction(hp2,A_B,[PF_None]) and
  859. (taicpu(hp2).condition in [C_EQ,C_NE]) then
  860. begin
  861. bitval:=BsfQWord(qword(taicpu(p).oper[2]^.val));
  862. case taicpu(hp2).condition of
  863. C_NE:
  864. hp3:=taicpu.op_reg_const_ref(A_TBNZ,taicpu(p).oper[1]^.reg,bitval,taicpu(hp2).oper[0]^.ref^);
  865. C_EQ:
  866. hp3:=taicpu.op_reg_const_ref(A_TBZ,taicpu(p).oper[1]^.reg,bitval,taicpu(hp2).oper[0]^.ref^);
  867. else
  868. Internalerror(2021100201);
  869. end;
  870. taicpu(hp3).fileinfo:=taicpu(hp1).fileinfo;
  871. asml.insertbefore(hp3, hp1);
  872. RemoveInstruction(hp1);
  873. RemoveInstruction(hp2);
  874. RemoveCurrentP(p);
  875. DebugMsg(SPeepholeOptimization + 'AndCmpB.E/NE2Tbnz/Tbz done', p);
  876. Result:=true;
  877. end;
  878. end;
  879. function TCpuAsmOptimizer.OptPostCMP(var p : tai): boolean;
  880. var
  881. hp1,hp2: tai;
  882. begin
  883. Result:=false;
  884. {
  885. cmp reg0,#0
  886. b.e/b.ne label
  887. into
  888. cb(n)z reg0,label
  889. }
  890. if MatchOpType(taicpu(p),top_reg,top_const) and
  891. (taicpu(p).oper[1]^.val=0) and
  892. GetNextInstruction(p,hp1) and
  893. MatchInstruction(hp1,A_B,[PF_None]) and
  894. (taicpu(hp1).condition in [C_EQ,C_NE]) then
  895. begin
  896. case taicpu(hp1).condition of
  897. C_NE:
  898. hp2:=taicpu.op_reg_sym_ofs(A_CBNZ,taicpu(p).oper[0]^.reg,taicpu(hp1).oper[0]^.ref^.symbol,taicpu(hp1).oper[0]^.ref^.offset);
  899. C_EQ:
  900. hp2:=taicpu.op_reg_sym_ofs(A_CBZ,taicpu(p).oper[0]^.reg,taicpu(hp1).oper[0]^.ref^.symbol,taicpu(hp1).oper[0]^.ref^.offset);
  901. else
  902. Internalerror(2019090801);
  903. end;
  904. taicpu(hp2).fileinfo:=taicpu(hp1).fileinfo;
  905. asml.insertbefore(hp2, hp1);
  906. asml.remove(p);
  907. asml.remove(hp1);
  908. p.free;
  909. hp1.free;
  910. p:=hp2;
  911. DebugMsg(SPeepholeOptimization + 'CMPB.E/NE2CBNZ/CBZ done', p);
  912. Result:=true;
  913. end;
  914. end;
  915. function TCpuAsmOptimizer.PrePeepHoleOptsCpu(var p: tai): boolean;
  916. begin
  917. result := false;
  918. if p.typ=ait_instruction then
  919. begin
  920. case taicpu(p).opcode of
  921. A_SBFX,
  922. A_UBFX:
  923. Result:=OptPreSBFXUBFX(p);
  924. else
  925. ;
  926. end;
  927. end;
  928. end;
  929. function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
  930. begin
  931. result := false;
  932. if p.typ=ait_instruction then
  933. begin
  934. case taicpu(p).opcode of
  935. A_LDR:
  936. Result:=OptPass1LDR(p);
  937. A_STR:
  938. Result:=OptPass1STR(p);
  939. A_MOV:
  940. Result:=OptPass1Mov(p);
  941. A_MOVZ:
  942. Result:=OptPass1MOVZ(p);
  943. A_STP:
  944. Result:=OptPass1STP(p);
  945. A_LSR,
  946. A_ROR,
  947. A_ASR,
  948. A_LSL:
  949. Result:=OptPass1Shift(p);
  950. A_AND:
  951. Result:=OptPass1And(p);
  952. A_NEG,
  953. A_CSEL,
  954. A_ADD,
  955. A_ADC,
  956. A_SUB,
  957. A_SBC,
  958. A_BIC,
  959. A_EOR,
  960. A_ORR,
  961. A_MUL:
  962. Result:=OptPass1Data(p);
  963. A_UXTB:
  964. Result:=OptPass1UXTB(p);
  965. A_UXTH:
  966. Result:=OptPass1UXTH(p);
  967. A_SXTB:
  968. Result:=OptPass1SXTB(p);
  969. A_SXTH:
  970. Result:=OptPass1SXTH(p);
  971. A_SXTW:
  972. Result:=OptPass1SXTW(p);
  973. // A_VLDR,
  974. A_FMADD,
  975. A_FMSUB,
  976. A_FNMADD,
  977. A_FNMSUB,
  978. A_FNMUL,
  979. A_FADD,
  980. A_FMUL,
  981. A_FDIV,
  982. A_FSUB,
  983. A_FSQRT,
  984. A_FNEG,
  985. A_FCVT,
  986. A_FABS:
  987. Result:=OptPass1FData(p);
  988. A_FMOV:
  989. Result:=OptPass1FMov(p);
  990. else
  991. ;
  992. end;
  993. end;
  994. end;
  995. function TCpuAsmOptimizer.PeepHoleOptPass2Cpu(var p: tai): boolean;
  996. begin
  997. result := false;
  998. if p.typ=ait_instruction then
  999. begin
  1000. case taicpu(p).opcode of
  1001. A_LDR,
  1002. A_STR:
  1003. Result:=OptPass2LDRSTR(p);
  1004. else
  1005. ;
  1006. end;
  1007. end;
  1008. end;
  1009. function TCpuAsmOptimizer.PostPeepHoleOptsCpu(var p: tai): boolean;
  1010. begin
  1011. result := false;
  1012. if p.typ=ait_instruction then
  1013. begin
  1014. case taicpu(p).opcode of
  1015. A_CMP:
  1016. Result:=OptPostCMP(p);
  1017. A_AND:
  1018. Result:=OptPostAnd(p);
  1019. else
  1020. ;
  1021. end;
  1022. end;
  1023. end;
  1024. begin
  1025. casmoptimizer:=TCpuAsmOptimizer;
  1026. End.