aoptarm.pas 59 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496
  1. {
  2. Copyright (c) 1998-2020 by Jonas Maebe and Florian Klaempfl, members of the Free Pascal
  3. Development Team
  4. This unit implements an ARM optimizer object used commonly for ARM and AAarch64
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. Unit aoptarm;
  19. {$i fpcdefs.inc}
  20. { $define DEBUG_PREREGSCHEDULER}
  21. {$define DEBUG_AOPTCPU}
  22. Interface
  23. uses
  24. cgbase, cgutils, cpubase, aasmtai, aasmcpu,aopt, aoptobj;
  25. Type
  26. { while ARM and AAarch64 look not very similar at a first glance,
  27. several optimizations can be shared between both }
  28. TARMAsmOptimizer = class(TAsmOptimizer)
  29. procedure DebugMsg(const s : string; p : tai);
  30. function RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string): boolean;
  31. function RedundantMovProcess(var p: tai; var hp1: tai): boolean;
  32. function GetNextInstructionUsingReg(Current: tai; out Next: tai; const reg: TRegister): Boolean;
  33. function OptPass1UXTB(var p: tai): Boolean;
  34. function OptPass1UXTH(var p: tai): Boolean;
  35. function OptPass1SXTB(var p: tai): Boolean;
  36. function OptPass1SXTH(var p: tai): Boolean;
  37. function OptPass1LDR(var p: tai): Boolean; virtual;
  38. function OptPass1STR(var p: tai): Boolean; virtual;
  39. function OptPass1And(var p: tai): Boolean; virtual;
  40. End;
  41. function MatchInstruction(const instr: tai; const op: TCommonAsmOps; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  42. function MatchInstruction(const instr: tai; const op: TAsmOp; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  43. {$ifdef AARCH64}
  44. function MatchInstruction(const instr: tai; const ops : array of TAsmOp; const postfix: TOpPostfixes): boolean;
  45. {$endif AARCH64}
  46. function MatchInstruction(const instr: tai; const op: TAsmOp; const postfix: TOpPostfixes): boolean;
  47. function RefsEqual(const r1, r2: treference): boolean;
  48. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  49. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
  50. Implementation
  51. uses
  52. cutils,verbose,globtype,globals,
  53. systems,
  54. cpuinfo,
  55. cgobj,procinfo,
  56. aasmbase,aasmdata,itcpugas;
  57. {$ifdef DEBUG_AOPTCPU}
  58. const
  59. SPeepholeOptimization: shortstring = 'Peephole Optimization: ';
  60. procedure TARMAsmOptimizer.DebugMsg(const s: string;p : tai);
  61. begin
  62. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  63. end;
  64. {$else DEBUG_AOPTCPU}
  65. { Empty strings help the optimizer to remove string concatenations that won't
  66. ever appear to the user on release builds. [Kit] }
  67. const
  68. SPeepholeOptimization = '';
  69. procedure TARMAsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  70. begin
  71. end;
  72. {$endif DEBUG_AOPTCPU}
  73. function MatchInstruction(const instr: tai; const op: TCommonAsmOps; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  74. begin
  75. result :=
  76. (instr.typ = ait_instruction) and
  77. ((op = []) or ((taicpu(instr).opcode<=LastCommonAsmOp) and (taicpu(instr).opcode in op))) and
  78. ((cond = []) or (taicpu(instr).condition in cond)) and
  79. ((postfix = []) or (taicpu(instr).oppostfix in postfix));
  80. end;
  81. function MatchInstruction(const instr: tai; const op: TAsmOp; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  82. begin
  83. result :=
  84. (instr.typ = ait_instruction) and
  85. (taicpu(instr).opcode = op) and
  86. ((cond = []) or (taicpu(instr).condition in cond)) and
  87. ((postfix = []) or (taicpu(instr).oppostfix in postfix));
  88. end;
  89. {$ifdef AARCH64}
  90. function MatchInstruction(const instr: tai; const ops : array of TAsmOp; const postfix: TOpPostfixes): boolean;
  91. var
  92. op : TAsmOp;
  93. begin
  94. result:=false;
  95. if instr.typ <> ait_instruction then
  96. exit;
  97. for op in ops do
  98. begin
  99. if (taicpu(instr).opcode = op) and
  100. ((postfix = []) or (taicpu(instr).oppostfix in postfix)) then
  101. begin
  102. result:=true;
  103. exit;
  104. end;
  105. end;
  106. end;
  107. {$endif AARCH64}
  108. function MatchInstruction(const instr: tai; const op: TAsmOp; const postfix: TOpPostfixes): boolean;
  109. begin
  110. result :=
  111. (instr.typ = ait_instruction) and
  112. (taicpu(instr).opcode = op) and
  113. ((postfix = []) or (taicpu(instr).oppostfix in postfix));
  114. end;
  115. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  116. begin
  117. result := (oper.typ = top_reg) and (oper.reg = reg);
  118. end;
  119. function RefsEqual(const r1, r2: treference): boolean;
  120. begin
  121. refsequal :=
  122. (r1.offset = r2.offset) and
  123. (r1.base = r2.base) and
  124. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  125. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  126. (r1.relsymbol = r2.relsymbol) and
  127. {$ifdef ARM}
  128. (r1.signindex = r2.signindex) and
  129. {$endif ARM}
  130. (r1.shiftimm = r2.shiftimm) and
  131. (r1.addressmode = r2.addressmode) and
  132. (r1.shiftmode = r2.shiftmode) and
  133. (r1.volatility=[]) and
  134. (r2.volatility=[]);
  135. end;
  136. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
  137. begin
  138. result := oper1.typ = oper2.typ;
  139. if result then
  140. case oper1.typ of
  141. top_const:
  142. Result:=oper1.val = oper2.val;
  143. top_reg:
  144. Result:=oper1.reg = oper2.reg;
  145. top_conditioncode:
  146. Result:=oper1.cc = oper2.cc;
  147. top_realconst:
  148. Result:=oper1.val_real = oper2.val_real;
  149. top_ref:
  150. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  151. else Result:=false;
  152. end
  153. end;
  154. function TARMAsmOptimizer.GetNextInstructionUsingReg(Current: tai;
  155. Out Next: tai; const reg: TRegister): Boolean;
  156. var
  157. gniResult: Boolean;
  158. begin
  159. Next:=Current;
  160. Result := False;
  161. repeat
  162. gniResult:=GetNextInstruction(Next,Next);
  163. if gniResult and RegInInstruction(reg,Next) then
  164. { Found something }
  165. Exit(True);
  166. until not gniResult or
  167. not(cs_opt_level3 in current_settings.optimizerswitches) or
  168. (Next.typ<>ait_instruction) or
  169. is_calljmp(taicpu(Next).opcode)
  170. {$ifdef ARM}
  171. or RegModifiedByInstruction(NR_PC,Next)
  172. {$endif ARM}
  173. ;
  174. end;
  175. function TARMAsmOptimizer.RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string):boolean;
  176. var
  177. alloc,
  178. dealloc : tai_regalloc;
  179. hp1 : tai;
  180. begin
  181. Result:=false;
  182. if MatchInstruction(movp, A_MOV, [taicpu(p).condition], [PF_None]) and
  183. { We can't optimize if there is a shiftop }
  184. (taicpu(movp).ops=2) and
  185. MatchOperand(taicpu(movp).oper[1]^, taicpu(p).oper[0]^.reg) and
  186. { don't mess with moves to fp }
  187. (taicpu(movp).oper[0]^.reg<>current_procinfo.framepointer) and
  188. { the destination register of the mov might not be used beween p and movp }
  189. not(RegUsedBetween(taicpu(movp).oper[0]^.reg,p,movp)) and
  190. {$ifdef ARM}
  191. { PC should be changed only by moves }
  192. (taicpu(movp).oper[0]^.reg<>NR_PC) and
  193. { cb[n]z are thumb instructions which require specific registers, with no wide forms }
  194. (taicpu(p).opcode<>A_CBZ) and
  195. (taicpu(p).opcode<>A_CBNZ) and
  196. { There is a special requirement for MUL and MLA, oper[0] and oper[1] are not allowed to be the same }
  197. not (
  198. (taicpu(p).opcode in [A_MLA, A_MUL]) and
  199. (taicpu(p).oper[1]^.reg = taicpu(movp).oper[0]^.reg) and
  200. (current_settings.cputype < cpu_armv6)
  201. ) and
  202. {$endif ARM}
  203. { Take care to only do this for instructions which REALLY load to the first register.
  204. Otherwise
  205. str reg0, [reg1]
  206. mov reg2, reg0
  207. will be optimized to
  208. str reg2, [reg1]
  209. }
  210. RegLoadedWithNewValue(taicpu(p).oper[0]^.reg, p) then
  211. begin
  212. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(movp.Next));
  213. if assigned(dealloc) then
  214. begin
  215. DebugMsg('Peephole '+optimizer+' removed superfluous mov', movp);
  216. result:=true;
  217. { taicpu(p).oper[0]^.reg is not used anymore, try to find its allocation
  218. and remove it if possible }
  219. asml.Remove(dealloc);
  220. alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.previous));
  221. if assigned(alloc) then
  222. begin
  223. asml.Remove(alloc);
  224. alloc.free;
  225. dealloc.free;
  226. end
  227. else
  228. asml.InsertAfter(dealloc,p);
  229. { try to move the allocation of the target register }
  230. GetLastInstruction(movp,hp1);
  231. alloc:=FindRegAlloc(taicpu(movp).oper[0]^.reg,tai(hp1.Next));
  232. if assigned(alloc) then
  233. begin
  234. asml.Remove(alloc);
  235. asml.InsertBefore(alloc,p);
  236. { adjust used regs }
  237. IncludeRegInUsedRegs(taicpu(movp).oper[0]^.reg,UsedRegs);
  238. end;
  239. { finally get rid of the mov }
  240. taicpu(p).loadreg(0,taicpu(movp).oper[0]^.reg);
  241. { Remove preindexing and postindexing for LDR in some cases.
  242. For example:
  243. ldr reg2,[reg1, xxx]!
  244. mov reg1,reg2
  245. must be translated to:
  246. ldr reg1,[reg1, xxx]
  247. Preindexing must be removed there, since the same register is used as the base and as the target.
  248. Such case is not allowed for ARM CPU and produces crash. }
  249. if (taicpu(p).opcode = A_LDR) and (taicpu(p).oper[1]^.typ = top_ref)
  250. and (taicpu(movp).oper[0]^.reg = taicpu(p).oper[1]^.ref^.base)
  251. then
  252. taicpu(p).oper[1]^.ref^.addressmode:=AM_OFFSET;
  253. asml.remove(movp);
  254. movp.free;
  255. end;
  256. end;
  257. end;
  258. function TARMAsmOptimizer.RedundantMovProcess(var p: tai; var hp1: tai):boolean;
  259. var
  260. I: Integer;
  261. current_hp, next_hp: tai;
  262. LDRChange: Boolean;
  263. begin
  264. Result:=false;
  265. {
  266. change
  267. mov r1, r0
  268. add r1, r1, #1
  269. to
  270. add r1, r0, #1
  271. Todo: Make it work for mov+cmp too
  272. CAUTION! If this one is successful p might not be a mov instruction anymore!
  273. }
  274. if (taicpu(p).ops = 2) and
  275. (taicpu(p).oper[1]^.typ = top_reg) and
  276. (taicpu(p).oppostfix = PF_NONE) then
  277. begin
  278. if
  279. MatchInstruction(hp1, [A_ADD, A_ADC,
  280. {$ifdef ARM}
  281. A_RSB, A_RSC,
  282. {$endif ARM}
  283. A_SUB, A_SBC,
  284. A_AND, A_BIC, A_EOR, A_ORR, A_MOV, A_MVN],
  285. [taicpu(p).condition], []) and
  286. { MOV and MVN might only have 2 ops }
  287. (taicpu(hp1).ops >= 2) and
  288. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^.reg) and
  289. (taicpu(hp1).oper[1]^.typ = top_reg) and
  290. (
  291. (taicpu(hp1).ops = 2) or
  292. (taicpu(hp1).oper[2]^.typ in [top_reg, top_const, top_shifterop])
  293. ) and
  294. {$ifdef AARCH64}
  295. (taicpu(p).oper[1]^.reg<>NR_SP) and
  296. {$endif AARCH64}
  297. not(RegUsedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  298. begin
  299. { When we get here we still don't know if the registers match }
  300. for I:=1 to 2 do
  301. {
  302. If the first loop was successful p will be replaced with hp1.
  303. The checks will still be ok, because all required information
  304. will also be in hp1 then.
  305. }
  306. if (taicpu(hp1).ops > I) and
  307. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[I]^.reg)
  308. {$ifdef ARM}
  309. { prevent certain combinations on thumb(2), this is only a safe approximation }
  310. and (not(GenerateThumbCode or GenerateThumb2Code) or
  311. ((getsupreg(taicpu(p).oper[1]^.reg)<>RS_R13) and
  312. (getsupreg(taicpu(p).oper[1]^.reg)<>RS_R15)))
  313. {$endif ARM}
  314. then
  315. begin
  316. DebugMsg('Peephole RedundantMovProcess done', hp1);
  317. taicpu(hp1).oper[I]^.reg := taicpu(p).oper[1]^.reg;
  318. if p<>hp1 then
  319. begin
  320. asml.remove(p);
  321. p.free;
  322. p:=hp1;
  323. Result:=true;
  324. end;
  325. end;
  326. if Result then Exit;
  327. end
  328. { Change: Change:
  329. mov r1, r0 mov r1, r0
  330. ... ...
  331. ldr/str r2, [r1, etc.] mov r2, r1
  332. To: To:
  333. ldr/str r2, [r0, etc.] mov r2, r0
  334. }
  335. else if (taicpu(p).condition = C_None) and (taicpu(p).oper[1]^.typ = top_reg)
  336. {$ifdef ARM}
  337. and not (getsupreg(taicpu(p).oper[0]^.reg) in [RS_PC, RS_R14, RS_STACK_POINTER_REG])
  338. and (getsupreg(taicpu(p).oper[1]^.reg) <> RS_PC)
  339. { Thumb does not support references with base and index one being SP }
  340. and (not(GenerateThumbCode) or (getsupreg(taicpu(p).oper[1]^.reg) <> RS_STACK_POINTER_REG))
  341. {$endif ARM}
  342. {$ifdef AARCH64}
  343. and (getsupreg(taicpu(p).oper[0]^.reg) <> RS_STACK_POINTER_REG)
  344. {$endif AARCH64}
  345. then
  346. begin
  347. current_hp := p;
  348. TransferUsedRegs(TmpUsedRegs);
  349. { Search local instruction block }
  350. while GetNextInstruction(current_hp, next_hp) and (next_hp <> BlockEnd) and (next_hp.typ = ait_instruction) do
  351. begin
  352. UpdateUsedRegs(TmpUsedRegs, tai(current_hp.Next));
  353. LDRChange := False;
  354. if (taicpu(next_hp).opcode in [A_LDR,A_STR]) and (taicpu(next_hp).ops = 2)
  355. {$ifdef AARCH64}
  356. { If r0 is the zero register, then this sequence of instructions will cause
  357. an access violation, but that's better than an assembler error caused by
  358. changing r0 to xzr inside the reference (Where it's illegal). [Kit] }
  359. and (getsupreg(taicpu(p).oper[1]^.reg) <> RS_XZR)
  360. {$endif AARCH64}
  361. then
  362. begin
  363. { Change the registers from r1 to r0 }
  364. if (taicpu(next_hp).oper[1]^.ref^.base = taicpu(p).oper[0]^.reg) and
  365. {$ifdef ARM}
  366. { This optimisation conflicts with something and raises
  367. an access violation - needs further investigation. [Kit] }
  368. (taicpu(next_hp).opcode <> A_LDR) and
  369. {$endif ARM}
  370. { Don't mess around with the base register if the
  371. reference is pre- or post-indexed }
  372. (taicpu(next_hp).oper[1]^.ref^.addressmode = AM_OFFSET) then
  373. begin
  374. taicpu(next_hp).oper[1]^.ref^.base := taicpu(p).oper[1]^.reg;
  375. LDRChange := True;
  376. end;
  377. if taicpu(next_hp).oper[1]^.ref^.index = taicpu(p).oper[0]^.reg then
  378. begin
  379. taicpu(next_hp).oper[1]^.ref^.index := taicpu(p).oper[1]^.reg;
  380. LDRChange := True;
  381. end;
  382. if LDRChange then
  383. DebugMsg('Peephole Optimization: ' + std_regname(taicpu(p).oper[0]^.reg) + ' = ' + std_regname(taicpu(p).oper[1]^.reg) + ' (MovLdr2Ldr 1)', next_hp);
  384. { Drop out if we're dealing with pre-indexed references }
  385. if (taicpu(next_hp).oper[1]^.ref^.addressmode = AM_PREINDEXED) and
  386. (
  387. RegInRef(taicpu(p).oper[0]^.reg, taicpu(next_hp).oper[1]^.ref^) or
  388. RegInRef(taicpu(p).oper[1]^.reg, taicpu(next_hp).oper[1]^.ref^)
  389. ) then
  390. begin
  391. { Remember to update register allocations }
  392. if LDRChange then
  393. AllocRegBetween(taicpu(p).oper[1]^.reg, p, next_hp, UsedRegs);
  394. Break;
  395. end;
  396. { The register being stored can be potentially changed (as long as it's not the stack pointer) }
  397. if (taicpu(next_hp).opcode = A_STR) and (getsupreg(taicpu(p).oper[1]^.reg) <> RS_STACK_POINTER_REG) and
  398. MatchOperand(taicpu(next_hp).oper[0]^, taicpu(p).oper[0]^.reg) then
  399. begin
  400. DebugMsg('Peephole Optimization: ' + std_regname(taicpu(p).oper[0]^.reg) + ' = ' + std_regname(taicpu(p).oper[1]^.reg) + ' (MovLdr2Ldr 2)', next_hp);
  401. taicpu(next_hp).oper[0]^.reg := taicpu(p).oper[1]^.reg;
  402. LDRChange := True;
  403. end;
  404. if LDRChange and (getsupreg(taicpu(p).oper[1]^.reg) <> RS_STACK_POINTER_REG) then
  405. begin
  406. AllocRegBetween(taicpu(p).oper[1]^.reg, p, next_hp, UsedRegs);
  407. if (taicpu(p).oppostfix = PF_None) and
  408. (
  409. (
  410. (taicpu(next_hp).opcode = A_LDR) and
  411. MatchOperand(taicpu(next_hp).oper[0]^, taicpu(p).oper[0]^.reg)
  412. ) or
  413. not RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, next_hp, TmpUsedRegs)
  414. ) and
  415. { Double-check to see if the old registers were actually
  416. changed (e.g. if the super registers matched, but not
  417. the sizes, they won't be changed). }
  418. (
  419. (taicpu(next_hp).opcode = A_LDR) or
  420. not RegInOp(taicpu(p).oper[0]^.reg, taicpu(next_hp).oper[0]^)
  421. ) and
  422. not RegInRef(taicpu(p).oper[0]^.reg, taicpu(next_hp).oper[1]^.ref^) then
  423. begin
  424. DebugMsg('Peephole Optimization: RedundantMovProcess 2a done', p);
  425. RemoveCurrentP(p);
  426. Result := True;
  427. Exit;
  428. end;
  429. end;
  430. end
  431. else if (taicpu(next_hp).opcode = A_MOV) and (taicpu(next_hp).oppostfix = PF_None) and
  432. (taicpu(next_hp).ops = 2) then
  433. begin
  434. if MatchOperand(taicpu(next_hp).oper[0]^, taicpu(p).oper[0]^.reg) then
  435. begin
  436. { Found another mov that writes entirely to the register }
  437. if RegUsedBetween(taicpu(p).oper[0]^.reg, p, next_hp) then
  438. begin
  439. { Register was used beforehand }
  440. if MatchOperand(taicpu(next_hp).oper[1]^, taicpu(p).oper[1]^.reg) then
  441. begin
  442. { This MOV is exactly the same as the first one.
  443. Since none of the registers have changed value
  444. at this point, we can remove it. }
  445. DebugMsg('Peephole Optimization: RedundantMovProcess 3a done', next_hp);
  446. if (next_hp = hp1) then
  447. { Don't let hp1 become a dangling pointer }
  448. hp1 := nil;
  449. asml.Remove(next_hp);
  450. next_hp.Free;
  451. { We still have the original p, so we can continue optimising;
  452. if it was -O2 or below, this instruction appeared immediately
  453. after the first MOV, so we're technically not looking more
  454. than one instruction ahead after it's removed! [Kit] }
  455. Continue;
  456. end
  457. else
  458. { Register changes value - drop out }
  459. Break;
  460. end;
  461. { We can delete the first MOV (only if the second MOV is unconditional) }
  462. {$ifdef ARM}
  463. if (taicpu(p).oppostfix = PF_None) and
  464. (taicpu(next_hp).condition = C_None) then
  465. {$endif ARM}
  466. begin
  467. DebugMsg('Peephole Optimization: RedundantMovProcess 2b done', p);
  468. RemoveCurrentP(p);
  469. Result := True;
  470. end;
  471. Exit;
  472. end
  473. else if MatchOperand(taicpu(next_hp).oper[1]^, taicpu(p).oper[0]^.reg) then
  474. begin
  475. if MatchOperand(taicpu(next_hp).oper[0]^, taicpu(p).oper[1]^.reg)
  476. { Be careful - if the entire register is not used, removing this
  477. instruction will leave the unused part uninitialised }
  478. {$ifdef AARCH64}
  479. and (getsubreg(taicpu(p).oper[1]^.reg) = R_SUBQ)
  480. {$endif AARCH64}
  481. then
  482. begin
  483. { Instruction will become mov r1,r1 }
  484. DebugMsg('Peephole Optimization: Mov2None 2 done', next_hp);
  485. { Allocate r1 between the instructions; not doing
  486. so may cause problems when removing superfluous
  487. MOVs later (i38055) }
  488. AllocRegBetween(taicpu(p).oper[1]^.reg, p, next_hp, UsedRegs);
  489. if (next_hp = hp1) then
  490. { Don't let hp1 become a dangling pointer }
  491. hp1 := nil;
  492. asml.Remove(next_hp);
  493. next_hp.Free;
  494. Continue;
  495. end;
  496. { Change the old register (checking the first operand again
  497. forces it to be left alone if the full register is not
  498. used, lest mov w1,w1 gets optimised out by mistake. [Kit] }
  499. {$ifdef AARCH64}
  500. if not MatchOperand(taicpu(next_hp).oper[0]^, taicpu(p).oper[1]^.reg) then
  501. {$endif AARCH64}
  502. begin
  503. DebugMsg('Peephole Optimization: ' + std_regname(taicpu(p).oper[0]^.reg) + ' = ' + std_regname(taicpu(p).oper[1]^.reg) + ' (MovMov2Mov 2)', next_hp);
  504. taicpu(next_hp).oper[1]^.reg := taicpu(p).oper[1]^.reg;
  505. AllocRegBetween(taicpu(p).oper[1]^.reg, p, next_hp, UsedRegs);
  506. { If this was the only reference to the old register,
  507. then we can remove the original MOV now }
  508. if (taicpu(p).oppostfix = PF_None) and
  509. { A bit of a hack - sometimes registers aren't tracked properly, so do not
  510. remove if the register was apparently not allocated when its value is
  511. first set at the MOV command (this is especially true for the stack
  512. register). [Kit] }
  513. (getsupreg(taicpu(p).oper[1]^.reg) <> RS_STACK_POINTER_REG) and
  514. RegInUsedRegs(taicpu(p).oper[0]^.reg, UsedRegs) and
  515. not RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, next_hp, TmpUsedRegs) then
  516. begin
  517. DebugMsg('Peephole Optimization: RedundantMovProcess 2c done', p);
  518. RemoveCurrentP(p);
  519. Result := True;
  520. Exit;
  521. end;
  522. end;
  523. end;
  524. end;
  525. { On low optimisation settions, don't search more than one instruction ahead }
  526. if not(cs_opt_level3 in current_settings.optimizerswitches) or
  527. { Stop at procedure calls and jumps }
  528. is_calljmp(taicpu(next_hp).opcode) or
  529. { If the read register has changed value, or the MOV
  530. destination register has been used, drop out }
  531. RegInInstruction(taicpu(p).oper[0]^.reg, next_hp) or
  532. RegModifiedByInstruction(taicpu(p).oper[1]^.reg, next_hp) then
  533. Break;
  534. current_hp := next_hp;
  535. end;
  536. end;
  537. end;
  538. end;
  539. function TARMAsmOptimizer.OptPass1UXTB(var p : tai) : Boolean;
  540. var
  541. hp1, hp2: tai;
  542. begin
  543. Result:=false;
  544. {
  545. change
  546. uxtb reg2,reg1
  547. strb reg2,[...]
  548. dealloc reg2
  549. to
  550. strb reg1,[...]
  551. }
  552. if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
  553. (taicpu(p).ops=2) and
  554. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  555. MatchInstruction(hp1, A_STR, [C_None], [PF_B]) and
  556. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
  557. { the reference in strb might not use reg2 }
  558. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  559. { reg1 might not be modified inbetween }
  560. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  561. begin
  562. DebugMsg('Peephole UxtbStrb2Strb done', p);
  563. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  564. GetNextInstruction(p,hp2);
  565. asml.remove(p);
  566. p.free;
  567. p:=hp2;
  568. result:=true;
  569. end
  570. {
  571. change
  572. uxtb reg2,reg1
  573. uxth reg3,reg2
  574. dealloc reg2
  575. to
  576. uxtb reg3,reg1
  577. }
  578. else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
  579. (taicpu(p).ops=2) and
  580. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  581. MatchInstruction(hp1, A_UXTH, [C_None], [PF_None]) and
  582. (taicpu(hp1).ops = 2) and
  583. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  584. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  585. { reg1 might not be modified inbetween }
  586. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  587. begin
  588. DebugMsg('Peephole UxtbUxth2Uxtb done', p);
  589. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  590. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  591. asml.remove(hp1);
  592. hp1.free;
  593. result:=true;
  594. end
  595. {
  596. change
  597. uxtb reg2,reg1
  598. uxtb reg3,reg2
  599. dealloc reg2
  600. to
  601. uxtb reg3,reg1
  602. }
  603. else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
  604. (taicpu(p).ops=2) and
  605. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  606. MatchInstruction(hp1, A_UXTB, [C_None], [PF_None]) and
  607. (taicpu(hp1).ops = 2) and
  608. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  609. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  610. { reg1 might not be modified inbetween }
  611. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  612. begin
  613. DebugMsg('Peephole UxtbUxtb2Uxtb done', p);
  614. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  615. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  616. asml.remove(hp1);
  617. hp1.free;
  618. result:=true;
  619. end
  620. {
  621. change
  622. uxtb reg2,reg1
  623. and reg3,reg2,#0x*FF
  624. dealloc reg2
  625. to
  626. uxtb reg3,reg1
  627. }
  628. else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
  629. (taicpu(p).ops=2) and
  630. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  631. MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
  632. (taicpu(hp1).ops=3) and
  633. (taicpu(hp1).oper[2]^.typ=top_const) and
  634. ((taicpu(hp1).oper[2]^.val and $FF)=$FF) and
  635. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  636. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  637. { reg1 might not be modified inbetween }
  638. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  639. begin
  640. DebugMsg('Peephole UxtbAndImm2Uxtb done', p);
  641. taicpu(hp1).opcode:=A_UXTB;
  642. taicpu(hp1).ops:=2;
  643. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  644. GetNextInstruction(p,hp2);
  645. asml.remove(p);
  646. p.free;
  647. p:=hp2;
  648. result:=true;
  649. end
  650. else if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  651. RemoveSuperfluousMove(p, hp1, 'UxtbMov2Data') then
  652. Result:=true;
  653. end;
  654. function TARMAsmOptimizer.OptPass1UXTH(var p : tai) : Boolean;
  655. var
  656. hp1: tai;
  657. begin
  658. Result:=false;
  659. {
  660. change
  661. uxth reg2,reg1
  662. strh reg2,[...]
  663. dealloc reg2
  664. to
  665. strh reg1,[...]
  666. }
  667. if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
  668. (taicpu(p).ops=2) and
  669. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  670. MatchInstruction(hp1, A_STR, [C_None], [PF_H]) and
  671. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  672. { the reference in strb might not use reg2 }
  673. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  674. { reg1 might not be modified inbetween }
  675. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  676. begin
  677. DebugMsg('Peephole UXTHStrh2Strh done', p);
  678. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  679. GetNextInstruction(p, hp1);
  680. asml.remove(p);
  681. p.free;
  682. p:=hp1;
  683. result:=true;
  684. end
  685. {
  686. change
  687. uxth reg2,reg1
  688. uxth reg3,reg2
  689. dealloc reg2
  690. to
  691. uxth reg3,reg1
  692. }
  693. else if MatchInstruction(p, A_UXTH, [C_None], [PF_None]) and
  694. (taicpu(p).ops=2) and
  695. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  696. MatchInstruction(hp1, A_UXTH, [C_None], [PF_None]) and
  697. (taicpu(hp1).ops=2) and
  698. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  699. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  700. { reg1 might not be modified inbetween }
  701. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  702. begin
  703. DebugMsg('Peephole UxthUxth2Uxth done', p);
  704. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  705. taicpu(hp1).opcode:=A_UXTH;
  706. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  707. GetNextInstruction(p, hp1);
  708. asml.remove(p);
  709. p.free;
  710. p:=hp1;
  711. result:=true;
  712. end
  713. {
  714. change
  715. uxth reg2,reg1
  716. and reg3,reg2,#65535
  717. dealloc reg2
  718. to
  719. uxth reg3,reg1
  720. }
  721. else if MatchInstruction(p, A_UXTH, [C_None], [PF_None]) and
  722. (taicpu(p).ops=2) and
  723. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  724. MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
  725. (taicpu(hp1).ops=3) and
  726. (taicpu(hp1).oper[2]^.typ=top_const) and
  727. ((taicpu(hp1).oper[2]^.val and $FFFF)=$FFFF) and
  728. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  729. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  730. { reg1 might not be modified inbetween }
  731. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  732. begin
  733. DebugMsg('Peephole UxthAndImm2Uxth done', p);
  734. taicpu(hp1).opcode:=A_UXTH;
  735. taicpu(hp1).ops:=2;
  736. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  737. GetNextInstruction(p, hp1);
  738. asml.remove(p);
  739. p.free;
  740. p:=hp1;
  741. result:=true;
  742. end
  743. else if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  744. RemoveSuperfluousMove(p, hp1, 'UxthMov2Data') then
  745. Result:=true;
  746. end;
  747. function TARMAsmOptimizer.OptPass1SXTB(var p : tai) : Boolean;
  748. var
  749. hp1, hp2: tai;
  750. begin
  751. Result:=false;
  752. {
  753. change
  754. sxtb reg2,reg1
  755. strb reg2,[...]
  756. dealloc reg2
  757. to
  758. strb reg1,[...]
  759. }
  760. if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
  761. (taicpu(p).ops=2) and
  762. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  763. MatchInstruction(hp1, A_STR, [C_None], [PF_B]) and
  764. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
  765. { the reference in strb might not use reg2 }
  766. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  767. { reg1 might not be modified inbetween }
  768. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  769. begin
  770. DebugMsg('Peephole SxtbStrb2Strb done', p);
  771. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  772. GetNextInstruction(p,hp2);
  773. asml.remove(p);
  774. p.free;
  775. p:=hp2;
  776. result:=true;
  777. end
  778. {
  779. change
  780. sxtb reg2,reg1
  781. sxth reg3,reg2
  782. dealloc reg2
  783. to
  784. sxtb reg3,reg1
  785. }
  786. else if MatchInstruction(p, A_SXTB, [C_None], [PF_None]) and
  787. (taicpu(p).ops=2) and
  788. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  789. MatchInstruction(hp1, A_SXTH, [C_None], [PF_None]) and
  790. (taicpu(hp1).ops = 2) and
  791. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  792. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  793. { reg1 might not be modified inbetween }
  794. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  795. begin
  796. DebugMsg('Peephole SxtbSxth2Sxtb done', p);
  797. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  798. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  799. asml.remove(hp1);
  800. hp1.free;
  801. result:=true;
  802. end
  803. {
  804. change
  805. sxtb reg2,reg1
  806. sxtb reg3,reg2
  807. dealloc reg2
  808. to
  809. uxtb reg3,reg1
  810. }
  811. else if MatchInstruction(p, A_SXTB, [C_None], [PF_None]) and
  812. (taicpu(p).ops=2) and
  813. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  814. MatchInstruction(hp1, A_SXTB, [C_None], [PF_None]) and
  815. (taicpu(hp1).ops = 2) and
  816. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  817. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  818. { reg1 might not be modified inbetween }
  819. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  820. begin
  821. DebugMsg('Peephole SxtbSxtb2Sxtb done', p);
  822. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  823. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  824. asml.remove(hp1);
  825. hp1.free;
  826. result:=true;
  827. end
  828. {
  829. change
  830. sxtb reg2,reg1
  831. and reg3,reg2,#0x*FF
  832. dealloc reg2
  833. to
  834. uxtb reg3,reg1
  835. }
  836. else if MatchInstruction(p, A_SXTB, [C_None], [PF_None]) and
  837. (taicpu(p).ops=2) and
  838. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  839. MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
  840. (taicpu(hp1).ops=3) and
  841. (taicpu(hp1).oper[2]^.typ=top_const) and
  842. ((taicpu(hp1).oper[2]^.val and $FF)=$FF) and
  843. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  844. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  845. { reg1 might not be modified inbetween }
  846. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  847. begin
  848. DebugMsg('Peephole SxtbAndImm2Uxtb done', p);
  849. taicpu(hp1).opcode:=A_UXTB;
  850. taicpu(hp1).ops:=2;
  851. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  852. GetNextInstruction(p,hp2);
  853. asml.remove(p);
  854. p.free;
  855. p:=hp2;
  856. result:=true;
  857. end
  858. else if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  859. RemoveSuperfluousMove(p, hp1, 'UxtbMov2Data') then
  860. Result:=true;
  861. end;
  862. function TARMAsmOptimizer.OptPass1SXTH(var p : tai) : Boolean;
  863. var
  864. hp1: tai;
  865. begin
  866. Result:=false;
  867. {
  868. change
  869. sxth reg2,reg1
  870. strh reg2,[...]
  871. dealloc reg2
  872. to
  873. strh reg1,[...]
  874. }
  875. if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
  876. (taicpu(p).ops=2) and
  877. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  878. MatchInstruction(hp1, A_STR, [C_None], [PF_H]) and
  879. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  880. { the reference in strb might not use reg2 }
  881. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  882. { reg1 might not be modified inbetween }
  883. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  884. begin
  885. DebugMsg('Peephole SXTHStrh2Strh done', p);
  886. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  887. GetNextInstruction(p, hp1);
  888. asml.remove(p);
  889. p.free;
  890. p:=hp1;
  891. result:=true;
  892. end
  893. {
  894. change
  895. sxth reg2,reg1
  896. sxth reg3,reg2
  897. dealloc reg2
  898. to
  899. sxth reg3,reg1
  900. }
  901. else if MatchInstruction(p, A_SXTH, [C_None], [PF_None]) and
  902. (taicpu(p).ops=2) and
  903. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  904. MatchInstruction(hp1, A_SXTH, [C_None], [PF_None]) and
  905. (taicpu(hp1).ops=2) and
  906. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  907. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  908. { reg1 might not be modified inbetween }
  909. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  910. begin
  911. DebugMsg('Peephole SxthSxth2Sxth done', p);
  912. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  913. taicpu(hp1).opcode:=A_SXTH;
  914. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  915. GetNextInstruction(p, hp1);
  916. asml.remove(p);
  917. p.free;
  918. p:=hp1;
  919. result:=true;
  920. end
  921. {
  922. change
  923. sxth reg2,reg1
  924. and reg3,reg2,#65535
  925. dealloc reg2
  926. to
  927. uxth reg3,reg1
  928. }
  929. else if MatchInstruction(p, A_SXTH, [C_None], [PF_None]) and
  930. (taicpu(p).ops=2) and
  931. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  932. MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
  933. (taicpu(hp1).ops=3) and
  934. (taicpu(hp1).oper[2]^.typ=top_const) and
  935. ((taicpu(hp1).oper[2]^.val and $FFFF)=$FFFF) and
  936. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  937. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  938. { reg1 might not be modified inbetween }
  939. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  940. begin
  941. DebugMsg('Peephole SxthAndImm2Uxth done', p);
  942. taicpu(hp1).opcode:=A_UXTH;
  943. taicpu(hp1).ops:=2;
  944. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  945. GetNextInstruction(p, hp1);
  946. asml.remove(p);
  947. p.free;
  948. p:=hp1;
  949. result:=true;
  950. end
  951. else if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  952. RemoveSuperfluousMove(p, hp1, 'UxthMov2Data') then
  953. Result:=true;
  954. end;
  955. function TARMAsmOptimizer.OptPass1LDR(var p : tai) : Boolean;
  956. var
  957. hp1: tai;
  958. Reference: TReference;
  959. NewOp: TAsmOp;
  960. begin
  961. Result := False;
  962. if (taicpu(p).ops <> 2) or (taicpu(p).condition <> C_None) then
  963. Exit;
  964. Reference := taicpu(p).oper[1]^.ref^;
  965. if (Reference.addressmode = AM_OFFSET) and
  966. not RegInRef(taicpu(p).oper[0]^.reg, Reference) and
  967. { Delay calling GetNextInstruction for as long as possible }
  968. GetNextInstruction(p, hp1) and
  969. (hp1.typ = ait_instruction) and
  970. (taicpu(hp1).condition = C_None) and
  971. (taicpu(hp1).oppostfix = taicpu(p).oppostfix) then
  972. begin
  973. if (taicpu(hp1).opcode = A_STR) and
  974. RefsEqual(taicpu(hp1).oper[1]^.ref^, Reference) and
  975. (getregtype(taicpu(p).oper[0]^.reg) = getregtype(taicpu(hp1).oper[0]^.reg)) then
  976. begin
  977. { With:
  978. ldr reg1,[ref]
  979. str reg2,[ref]
  980. If reg1 = reg2, Remove str
  981. }
  982. if taicpu(p).oper[0]^.reg = taicpu(hp1).oper[0]^.reg then
  983. begin
  984. DebugMsg(SPeepholeOptimization + 'Removed redundant store instruction (load/store -> load/nop)', hp1);
  985. RemoveInstruction(hp1);
  986. Result := True;
  987. Exit;
  988. end;
  989. end
  990. else if (taicpu(hp1).opcode = A_LDR) and
  991. RefsEqual(taicpu(hp1).oper[1]^.ref^, Reference) then
  992. begin
  993. { With:
  994. ldr reg1,[ref]
  995. ldr reg2,[ref]
  996. If reg1 = reg2, delete the second ldr
  997. If reg1 <> reg2, changing the 2nd ldr to a mov might introduce
  998. a dependency, but it will likely open up new optimisations, so
  999. do it for now and handle any new dependencies later.
  1000. }
  1001. if taicpu(p).oper[0]^.reg = taicpu(hp1).oper[0]^.reg then
  1002. begin
  1003. DebugMsg(SPeepholeOptimization + 'Removed duplicate load instruction (load/load -> load/nop)', hp1);
  1004. RemoveInstruction(hp1);
  1005. Result := True;
  1006. Exit;
  1007. end
  1008. else if
  1009. (getregtype(taicpu(p).oper[0]^.reg) = R_INTREGISTER) and
  1010. (getregtype(taicpu(hp1).oper[0]^.reg) = R_INTREGISTER) and
  1011. (getsubreg(taicpu(p).oper[0]^.reg) = getsubreg(taicpu(hp1).oper[0]^.reg)) then
  1012. begin
  1013. DebugMsg(SPeepholeOptimization + 'Changed second ldr' + oppostfix2str[taicpu(hp1).oppostfix] + ' to mov (load/load -> load/move)', hp1);
  1014. taicpu(hp1).opcode := A_MOV;
  1015. taicpu(hp1).oppostfix := PF_None;
  1016. taicpu(hp1).loadreg(1, taicpu(p).oper[0]^.reg);
  1017. AllocRegBetween(taicpu(p).oper[0]^.reg, p, hp1, UsedRegs);
  1018. Result := True;
  1019. Exit;
  1020. end;
  1021. end;
  1022. end;
  1023. end;
  1024. function TARMAsmOptimizer.OptPass1STR(var p : tai) : Boolean;
  1025. var
  1026. hp1: tai;
  1027. Reference: TReference;
  1028. SizeMismatch: Boolean;
  1029. SrcReg: TRegister;
  1030. NewOp: TAsmOp;
  1031. begin
  1032. Result := False;
  1033. if (taicpu(p).ops <> 2) or (taicpu(p).condition <> C_None) then
  1034. Exit;
  1035. Reference := taicpu(p).oper[1]^.ref^;
  1036. if (Reference.addressmode = AM_OFFSET) and
  1037. not RegInRef(taicpu(p).oper[0]^.reg, Reference) and
  1038. { Delay calling GetNextInstruction for as long as possible }
  1039. GetNextInstruction(p, hp1) and
  1040. (hp1.typ = ait_instruction) and
  1041. (taicpu(hp1).condition = C_None) and
  1042. (taicpu(hp1).oppostfix = taicpu(p).oppostfix) then
  1043. if GetNextInstruction(p, hp1) and
  1044. (hp1.typ = ait_instruction) and
  1045. (taicpu(hp1).condition = C_None) then
  1046. begin
  1047. { Saves constant dereferencing and makes it easier to change the size if necessary }
  1048. SrcReg := taicpu(p).oper[0]^.reg;
  1049. if (taicpu(hp1).opcode = A_LDR) and
  1050. RefsEqual(taicpu(hp1).oper[1]^.ref^, Reference) and
  1051. (
  1052. (taicpu(hp1).oppostfix = taicpu(p).oppostfix) or
  1053. ((taicpu(p).oppostfix = PF_B) and (taicpu(hp1).oppostfix = PF_SB)) or
  1054. ((taicpu(p).oppostfix = PF_H) and (taicpu(hp1).oppostfix = PF_SH))
  1055. {$ifdef AARCH64}
  1056. or ((taicpu(p).oppostfix = PF_W) and (taicpu(hp1).oppostfix = PF_SW))
  1057. {$endif AARCH64}
  1058. ) then
  1059. begin
  1060. { With:
  1061. str reg1,[ref]
  1062. ldr reg2,[ref]
  1063. If reg1 = reg2, Remove ldr.
  1064. If reg1 <> reg2, replace ldr with "mov reg2,reg1"
  1065. }
  1066. if (SrcReg = taicpu(hp1).oper[0]^.reg) and
  1067. { e.g. the ldrb in strb/ldrb is not a null operation as it clears the upper 24 bits }
  1068. (taicpu(p).oppostfix=PF_None) then
  1069. begin
  1070. DebugMsg(SPeepholeOptimization + 'Removed redundant load instruction (store/load -> store/nop)', hp1);
  1071. RemoveInstruction(hp1);
  1072. Result := True;
  1073. Exit;
  1074. end
  1075. else if (getregtype(taicpu(p).oper[0]^.reg) = R_INTREGISTER) and
  1076. (getregtype(taicpu(hp1).oper[0]^.reg) = R_INTREGISTER) and
  1077. (getsubreg(taicpu(p).oper[0]^.reg) = getsubreg(taicpu(hp1).oper[0]^.reg)) then
  1078. begin
  1079. NewOp:=A_NONE;
  1080. if taicpu(hp1).oppostfix=PF_None then
  1081. NewOp:=A_MOV
  1082. else
  1083. {$ifndef AARCH64}
  1084. if (current_settings.cputype >= cpu_armv6) then
  1085. {$endif not AARCH64}
  1086. case taicpu(hp1).oppostfix of
  1087. PF_B:
  1088. NewOp := A_UXTB;
  1089. PF_SB:
  1090. NewOp := A_SXTB;
  1091. PF_H:
  1092. NewOp := A_UXTH;
  1093. PF_SH:
  1094. NewOp := A_SXTH;
  1095. {$ifdef AARCH64}
  1096. PF_SW:
  1097. NewOp := A_SXTW;
  1098. PF_W:
  1099. NewOp := A_MOV;
  1100. {$endif AARCH64}
  1101. else
  1102. InternalError(2021043001);
  1103. end;
  1104. if (NewOp<>A_None) then
  1105. begin
  1106. DebugMsg(SPeepholeOptimization + 'Changed ldr' + oppostfix2str[taicpu(hp1).oppostfix] + ' to ' + gas_op2str[NewOp] + ' (store/load -> store/move)', hp1);
  1107. taicpu(hp1).oppostfix := PF_None;
  1108. taicpu(hp1).opcode := NewOp;
  1109. taicpu(hp1).loadreg(1, taicpu(p).oper[0]^.reg);
  1110. AllocRegBetween(taicpu(p).oper[0]^.reg, p, hp1, UsedRegs);
  1111. Result := True;
  1112. Exit;
  1113. end;
  1114. end
  1115. end
  1116. else if (taicpu(hp1).opcode = A_STR) and
  1117. RefsEqual(taicpu(hp1).oper[1]^.ref^, Reference) then
  1118. begin
  1119. { With:
  1120. str reg1,[ref]
  1121. str reg2,[ref]
  1122. If reg1 <> reg2, delete the first str
  1123. IF reg1 = reg2, delete the second str
  1124. }
  1125. if SrcReg = taicpu(hp1).oper[0]^.reg then
  1126. begin
  1127. DebugMsg(SPeepholeOptimization + 'Removed duplicate store instruction (store/store -> store/nop)', hp1);
  1128. RemoveInstruction(hp1);
  1129. Result := True;
  1130. Exit;
  1131. end
  1132. else if
  1133. { Registers same byte size? }
  1134. (tcgsize2size[reg_cgsize(taicpu(p).oper[0]^.reg)] = tcgsize2size[reg_cgsize(taicpu(hp1).oper[0]^.reg)]) then
  1135. begin
  1136. DebugMsg(SPeepholeOptimization + 'Removed dominated store instruction (store/store -> nop/store)', p);
  1137. RemoveCurrentP(p, hp1);
  1138. Result := True;
  1139. Exit;
  1140. end;
  1141. end;
  1142. end;
  1143. end;
  1144. function TARMAsmOptimizer.OptPass1And(var p : tai) : Boolean;
  1145. var
  1146. hp1, hp2: tai;
  1147. i: longint;
  1148. begin
  1149. Result:=false;
  1150. {
  1151. optimize
  1152. and reg2,reg1,const1
  1153. ...
  1154. }
  1155. if (taicpu(p).ops>2) and
  1156. (taicpu(p).oper[1]^.typ = top_reg) and
  1157. (taicpu(p).oper[2]^.typ = top_const) then
  1158. begin
  1159. {
  1160. change
  1161. and reg2,reg1,const1
  1162. ...
  1163. and reg3,reg2,const2
  1164. to
  1165. and reg3,reg1,(const1 and const2)
  1166. }
  1167. if GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1168. MatchInstruction(hp1, A_AND, [taicpu(p).condition], [PF_None]) and
  1169. RegEndOfLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  1170. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1171. (taicpu(hp1).oper[2]^.typ = top_const)
  1172. {$ifdef AARCH64}
  1173. and ((((getsubreg(taicpu(p).oper[0]^.reg)=R_SUBQ) and is_shifter_const(taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val,OS_64)) or
  1174. ((getsubreg(taicpu(p).oper[0]^.reg)=R_SUBL) and is_shifter_const(taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val,OS_32))
  1175. ) or
  1176. ((taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val)=0))
  1177. {$endif AARCH64}
  1178. then
  1179. begin
  1180. if not(RegUsedBetween(taicpu(hp1).oper[0]^.reg,p,hp1)) then
  1181. begin
  1182. DebugMsg('Peephole AndAnd2And done', p);
  1183. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  1184. if (taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val)=0 then
  1185. begin
  1186. DebugMsg('Peephole AndAnd2Mov0 1 done', p);
  1187. taicpu(p).opcode:=A_MOV;
  1188. taicpu(p).ops:=2;
  1189. taicpu(p).loadConst(1,0);
  1190. taicpu(p).oppostfix:=taicpu(hp1).oppostfix;
  1191. end
  1192. else
  1193. begin
  1194. DebugMsg('Peephole AndAnd2And 1 done', p);
  1195. taicpu(p).loadConst(2,taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val);
  1196. taicpu(p).oppostfix:=taicpu(hp1).oppostfix;
  1197. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  1198. end;
  1199. asml.remove(hp1);
  1200. hp1.free;
  1201. Result:=true;
  1202. exit;
  1203. end
  1204. else if not(RegUsedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1205. begin
  1206. if (taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val)=0 then
  1207. begin
  1208. DebugMsg('Peephole AndAnd2Mov0 2 done', hp1);
  1209. taicpu(hp1).opcode:=A_MOV;
  1210. taicpu(hp1).loadConst(1,0);
  1211. taicpu(hp1).ops:=2;
  1212. taicpu(hp1).oppostfix:=taicpu(p).oppostfix;
  1213. end
  1214. else
  1215. begin
  1216. DebugMsg('Peephole AndAnd2And 2 done', hp1);
  1217. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  1218. taicpu(hp1).loadConst(2,taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val);
  1219. taicpu(hp1).oppostfix:=taicpu(p).oppostfix;
  1220. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  1221. end;
  1222. GetNextInstruction(p, hp1);
  1223. RemoveCurrentP(p);
  1224. p:=hp1;
  1225. Result:=true;
  1226. exit;
  1227. end;
  1228. end
  1229. {
  1230. change
  1231. and reg2,reg1,$xxxxxxFF
  1232. strb reg2,[...]
  1233. dealloc reg2
  1234. to
  1235. strb reg1,[...]
  1236. }
  1237. else if ((taicpu(p).oper[2]^.val and $FF) = $FF) and
  1238. MatchInstruction(p, A_AND, [C_None], [PF_None]) and
  1239. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1240. MatchInstruction(hp1, A_STR, [C_None], [PF_B]) and
  1241. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
  1242. { the reference in strb might not use reg2 }
  1243. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  1244. { reg1 might not be modified inbetween }
  1245. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1246. begin
  1247. DebugMsg('Peephole AndStrb2Strb done', p);
  1248. {$ifdef AARCH64}
  1249. taicpu(hp1).loadReg(0,newreg(R_INTREGISTER,getsupreg(taicpu(p).oper[1]^.reg),R_SUBD));
  1250. {$else AARCH64}
  1251. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  1252. {$endif AARCH64}
  1253. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  1254. RemoveCurrentP(p);
  1255. result:=true;
  1256. exit;
  1257. end
  1258. {
  1259. change
  1260. and reg2,reg1,255
  1261. uxtb/uxth reg3,reg2
  1262. dealloc reg2
  1263. to
  1264. and reg3,reg1,x
  1265. }
  1266. else if MatchInstruction(p, A_AND, [C_None], [PF_None]) and
  1267. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1268. ((((taicpu(p).oper[2]^.val and $ffffff00)=0) and MatchInstruction(hp1, A_UXTB, [C_None], [PF_None])) or
  1269. (((taicpu(p).oper[2]^.val and $ffff0000)=0) and MatchInstruction(hp1, A_UXTH, [C_None], [PF_None]))) and
  1270. (taicpu(hp1).ops = 2) and
  1271. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  1272. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1273. { reg1 might not be modified inbetween }
  1274. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1275. begin
  1276. DebugMsg('Peephole AndUxt2And done', p);
  1277. taicpu(hp1).opcode:=A_AND;
  1278. taicpu(hp1).ops:=3;
  1279. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  1280. taicpu(hp1).loadconst(2,taicpu(p).oper[2]^.val);
  1281. GetNextInstruction(p,hp1);
  1282. asml.remove(p);
  1283. p.Free;
  1284. p:=hp1;
  1285. result:=true;
  1286. exit;
  1287. end
  1288. else if ((taicpu(p).oper[2]^.val and $ffffff80)=0) and
  1289. MatchInstruction(p, A_AND, [C_None], [PF_None]) and
  1290. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1291. MatchInstruction(hp1, [A_SXTB,A_SXTH], [C_None], [PF_None]) and
  1292. (taicpu(hp1).ops = 2) and
  1293. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  1294. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1295. { reg1 might not be modified inbetween }
  1296. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1297. begin
  1298. DebugMsg('Peephole AndSxt2And done', p);
  1299. taicpu(hp1).opcode:=A_AND;
  1300. taicpu(hp1).ops:=3;
  1301. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  1302. taicpu(hp1).loadconst(2,taicpu(p).oper[2]^.val);
  1303. GetNextInstruction(p,hp1);
  1304. asml.remove(p);
  1305. p.Free;
  1306. p:=hp1;
  1307. result:=true;
  1308. exit;
  1309. end
  1310. {
  1311. from
  1312. and reg1,reg0,2^n-1
  1313. mov reg2,reg1, lsl imm1
  1314. (mov reg3,reg2, lsr/asr imm1)
  1315. remove either the and or the lsl/xsr sequence if possible
  1316. }
  1317. else if (taicpu(p).oper[2]^.val < high(int64)) and
  1318. cutils.ispowerof2(taicpu(p).oper[2]^.val+1,i) and
  1319. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1320. MatchInstruction(hp1, A_MOV, [taicpu(p).condition], [PF_None]) and
  1321. (taicpu(hp1).ops=3) and
  1322. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1323. (taicpu(hp1).oper[2]^.typ = top_shifterop) and
  1324. {$ifdef ARM}
  1325. (taicpu(hp1).oper[2]^.shifterop^.rs = NR_NO) and
  1326. {$endif ARM}
  1327. (taicpu(hp1).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  1328. RegEndOfLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) then
  1329. begin
  1330. {
  1331. and reg1,reg0,2^n-1
  1332. mov reg2,reg1, lsl imm1
  1333. mov reg3,reg2, lsr/asr imm1
  1334. =>
  1335. and reg1,reg0,2^n-1
  1336. if lsr and 2^n-1>=imm1 or asr and 2^n-1>imm1
  1337. }
  1338. if GetNextInstructionUsingReg(hp1,hp2,taicpu(p).oper[0]^.reg) and
  1339. MatchInstruction(hp2, A_MOV, [taicpu(p).condition], [PF_None]) and
  1340. (taicpu(hp2).ops=3) and
  1341. MatchOperand(taicpu(hp2).oper[1]^, taicpu(hp1).oper[0]^.reg) and
  1342. (taicpu(hp2).oper[2]^.typ = top_shifterop) and
  1343. {$ifdef ARM}
  1344. (taicpu(hp2).oper[2]^.shifterop^.rs = NR_NO) and
  1345. {$endif ARM}
  1346. (taicpu(hp2).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and
  1347. (taicpu(hp1).oper[2]^.shifterop^.shiftimm=taicpu(hp2).oper[2]^.shifterop^.shiftimm) and
  1348. RegEndOfLife(taicpu(hp1).oper[0]^.reg,taicpu(hp2)) and
  1349. ((i<32-taicpu(hp1).oper[2]^.shifterop^.shiftimm) or
  1350. ((i=32-taicpu(hp1).oper[2]^.shifterop^.shiftimm) and
  1351. (taicpu(hp2).oper[2]^.shifterop^.shiftmode=SM_LSR))) then
  1352. begin
  1353. DebugMsg('Peephole AndLslXsr2And done', p);
  1354. taicpu(p).oper[0]^.reg:=taicpu(hp2).oper[0]^.reg;
  1355. asml.Remove(hp1);
  1356. asml.Remove(hp2);
  1357. hp1.free;
  1358. hp2.free;
  1359. result:=true;
  1360. exit;
  1361. end
  1362. {
  1363. and reg1,reg0,2^n-1
  1364. mov reg2,reg1, lsl imm1
  1365. =>
  1366. mov reg2,reg0, lsl imm1
  1367. if imm1>i
  1368. }
  1369. else if (i>32-taicpu(hp1).oper[2]^.shifterop^.shiftimm) and
  1370. not(RegModifiedBetween(taicpu(p).oper[1]^.reg, p, hp1)) then
  1371. begin
  1372. DebugMsg('Peephole AndLsl2Lsl done', p);
  1373. taicpu(hp1).oper[1]^.reg:=taicpu(p).oper[1]^.reg;
  1374. GetNextInstruction(p, hp1);
  1375. asml.Remove(p);
  1376. p.free;
  1377. p:=hp1;
  1378. result:=true;
  1379. exit;
  1380. end
  1381. end;
  1382. end;
  1383. {
  1384. change
  1385. and reg1, ...
  1386. mov reg2, reg1
  1387. to
  1388. and reg2, ...
  1389. }
  1390. if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  1391. (taicpu(p).ops>=3) and
  1392. RemoveSuperfluousMove(p, hp1, 'DataMov2Data') then
  1393. Result:=true;
  1394. end;
  1395. end.