aoptarm.pas 66 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631
  1. {
  2. Copyright (c) 1998-2020 by Jonas Maebe and Florian Klaempfl, members of the Free Pascal
  3. Development Team
  4. This unit implements an ARM optimizer object used commonly for ARM and AAarch64
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. Unit aoptarm;
  19. {$i fpcdefs.inc}
  20. { $define DEBUG_PREREGSCHEDULER}
  21. {$ifdef EXTDEBUG}
  22. {$define DEBUG_AOPTCPU}
  23. {$endif EXTDEBUG}
  24. Interface
  25. uses
  26. cgbase, cgutils, cpubase, aasmtai, aasmcpu,aopt, aoptobj;
  27. Type
  28. { while ARM and AAarch64 look not very similar at a first glance,
  29. several optimizations can be shared between both }
  30. TARMAsmOptimizer = class(TAsmOptimizer)
  31. procedure DebugMsg(const s : string; p : tai);
  32. function RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string): boolean;
  33. function RedundantMovProcess(var p: tai; var hp1: tai): boolean;
  34. function GetNextInstructionUsingReg(Current: tai; out Next: tai; const reg: TRegister): Boolean;
  35. function OptPreSBFXUBFX(var p: tai): Boolean;
  36. function OptPass1UXTB(var p: tai): Boolean;
  37. function OptPass1UXTH(var p: tai): Boolean;
  38. function OptPass1SXTB(var p: tai): Boolean;
  39. function OptPass1SXTH(var p: tai): Boolean;
  40. function OptPass1LDR(var p: tai): Boolean; virtual;
  41. function OptPass1STR(var p: tai): Boolean; virtual;
  42. function OptPass1And(var p: tai): Boolean; virtual;
  43. End;
  44. function MatchInstruction(const instr: tai; const op: TCommonAsmOps; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  45. function MatchInstruction(const instr: tai; const op: TAsmOp; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  46. {$ifdef AARCH64}
  47. function MatchInstruction(const instr: tai; const ops : array of TAsmOp; const postfix: TOpPostfixes): boolean;
  48. {$endif AARCH64}
  49. function MatchInstruction(const instr: tai; const op: TAsmOp; const postfix: TOpPostfixes): boolean;
  50. function RefsEqual(const r1, r2: treference): boolean;
  51. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  52. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
  53. Implementation
  54. uses
  55. cutils,verbose,globtype,globals,
  56. systems,
  57. cpuinfo,
  58. cgobj,procinfo,
  59. aasmbase,aasmdata,itcpugas;
  60. {$ifdef DEBUG_AOPTCPU}
  61. const
  62. SPeepholeOptimization: shortstring = 'Peephole Optimization: ';
  63. procedure TARMAsmOptimizer.DebugMsg(const s: string;p : tai);
  64. begin
  65. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  66. end;
  67. {$else DEBUG_AOPTCPU}
  68. { Empty strings help the optimizer to remove string concatenations that won't
  69. ever appear to the user on release builds. [Kit] }
  70. const
  71. SPeepholeOptimization = '';
  72. procedure TARMAsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  73. begin
  74. end;
  75. {$endif DEBUG_AOPTCPU}
  76. function MatchInstruction(const instr: tai; const op: TCommonAsmOps; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  77. begin
  78. result :=
  79. (instr.typ = ait_instruction) and
  80. ((op = []) or ((taicpu(instr).opcode<=LastCommonAsmOp) and (taicpu(instr).opcode in op))) and
  81. ((cond = []) or (taicpu(instr).condition in cond)) and
  82. ((postfix = []) or (taicpu(instr).oppostfix in postfix));
  83. end;
  84. function MatchInstruction(const instr: tai; const op: TAsmOp; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  85. begin
  86. result :=
  87. (instr.typ = ait_instruction) and
  88. (taicpu(instr).opcode = op) and
  89. ((cond = []) or (taicpu(instr).condition in cond)) and
  90. ((postfix = []) or (taicpu(instr).oppostfix in postfix));
  91. end;
  92. {$ifdef AARCH64}
  93. function MatchInstruction(const instr: tai; const ops : array of TAsmOp; const postfix: TOpPostfixes): boolean;
  94. var
  95. op : TAsmOp;
  96. begin
  97. result:=false;
  98. if instr.typ <> ait_instruction then
  99. exit;
  100. for op in ops do
  101. begin
  102. if (taicpu(instr).opcode = op) and
  103. ((postfix = []) or (taicpu(instr).oppostfix in postfix)) then
  104. begin
  105. result:=true;
  106. exit;
  107. end;
  108. end;
  109. end;
  110. {$endif AARCH64}
  111. function MatchInstruction(const instr: tai; const op: TAsmOp; const postfix: TOpPostfixes): boolean;
  112. begin
  113. result :=
  114. (instr.typ = ait_instruction) and
  115. (taicpu(instr).opcode = op) and
  116. ((postfix = []) or (taicpu(instr).oppostfix in postfix));
  117. end;
  118. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  119. begin
  120. result := (oper.typ = top_reg) and (oper.reg = reg);
  121. end;
  122. function RefsEqual(const r1, r2: treference): boolean;
  123. begin
  124. refsequal :=
  125. (r1.offset = r2.offset) and
  126. (r1.base = r2.base) and
  127. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  128. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  129. (r1.relsymbol = r2.relsymbol) and
  130. {$ifdef ARM}
  131. (r1.signindex = r2.signindex) and
  132. {$endif ARM}
  133. (r1.shiftimm = r2.shiftimm) and
  134. (r1.addressmode = r2.addressmode) and
  135. (r1.shiftmode = r2.shiftmode) and
  136. (r1.volatility=[]) and
  137. (r2.volatility=[]);
  138. end;
  139. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
  140. begin
  141. result := oper1.typ = oper2.typ;
  142. if result then
  143. case oper1.typ of
  144. top_const:
  145. Result:=oper1.val = oper2.val;
  146. top_reg:
  147. Result:=oper1.reg = oper2.reg;
  148. top_conditioncode:
  149. Result:=oper1.cc = oper2.cc;
  150. top_realconst:
  151. Result:=oper1.val_real = oper2.val_real;
  152. top_ref:
  153. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  154. else Result:=false;
  155. end
  156. end;
  157. function TARMAsmOptimizer.GetNextInstructionUsingReg(Current: tai;
  158. Out Next: tai; const reg: TRegister): Boolean;
  159. var
  160. gniResult: Boolean;
  161. begin
  162. Next:=Current;
  163. Result := False;
  164. repeat
  165. gniResult:=GetNextInstruction(Next,Next);
  166. if gniResult and RegInInstruction(reg,Next) then
  167. { Found something }
  168. Exit(True);
  169. until not gniResult or
  170. not(cs_opt_level3 in current_settings.optimizerswitches) or
  171. (Next.typ<>ait_instruction) or
  172. is_calljmp(taicpu(Next).opcode)
  173. {$ifdef ARM}
  174. or RegModifiedByInstruction(NR_PC,Next)
  175. {$endif ARM}
  176. ;
  177. end;
  178. function TARMAsmOptimizer.RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string):boolean;
  179. var
  180. alloc,
  181. dealloc : tai_regalloc;
  182. hp1 : tai;
  183. begin
  184. Result:=false;
  185. if MatchInstruction(movp, A_MOV, [taicpu(p).condition], [PF_None]) and
  186. { We can't optimize if there is a shiftop }
  187. (taicpu(movp).ops=2) and
  188. MatchOperand(taicpu(movp).oper[1]^, taicpu(p).oper[0]^.reg) and
  189. { don't mess with moves to fp }
  190. (taicpu(movp).oper[0]^.reg<>current_procinfo.framepointer) and
  191. { the destination register of the mov might not be used beween p and movp }
  192. not(RegUsedBetween(taicpu(movp).oper[0]^.reg,p,movp)) and
  193. {$ifdef ARM}
  194. { PC should be changed only by moves }
  195. (taicpu(movp).oper[0]^.reg<>NR_PC) and
  196. { cb[n]z are thumb instructions which require specific registers, with no wide forms }
  197. (taicpu(p).opcode<>A_CBZ) and
  198. (taicpu(p).opcode<>A_CBNZ) and
  199. { There is a special requirement for MUL and MLA, oper[0] and oper[1] are not allowed to be the same }
  200. not (
  201. (taicpu(p).opcode in [A_MLA, A_MUL]) and
  202. (taicpu(p).oper[1]^.reg = taicpu(movp).oper[0]^.reg) and
  203. (current_settings.cputype < cpu_armv6)
  204. ) and
  205. {$endif ARM}
  206. { Take care to only do this for instructions which REALLY load to the first register.
  207. Otherwise
  208. str reg0, [reg1]
  209. mov reg2, reg0
  210. will be optimized to
  211. str reg2, [reg1]
  212. }
  213. RegLoadedWithNewValue(taicpu(p).oper[0]^.reg, p) then
  214. begin
  215. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(movp.Next));
  216. if assigned(dealloc) then
  217. begin
  218. DebugMsg('Peephole '+optimizer+' removed superfluous mov', movp);
  219. result:=true;
  220. { taicpu(p).oper[0]^.reg is not used anymore, try to find its allocation
  221. and remove it if possible }
  222. asml.Remove(dealloc);
  223. alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.previous));
  224. if assigned(alloc) then
  225. begin
  226. asml.Remove(alloc);
  227. alloc.free;
  228. dealloc.free;
  229. end
  230. else
  231. asml.InsertAfter(dealloc,p);
  232. { try to move the allocation of the target register }
  233. GetLastInstruction(movp,hp1);
  234. alloc:=FindRegAlloc(taicpu(movp).oper[0]^.reg,tai(hp1.Next));
  235. if assigned(alloc) then
  236. begin
  237. asml.Remove(alloc);
  238. asml.InsertBefore(alloc,p);
  239. { adjust used regs }
  240. IncludeRegInUsedRegs(taicpu(movp).oper[0]^.reg,UsedRegs);
  241. end;
  242. { finally get rid of the mov }
  243. taicpu(p).loadreg(0,taicpu(movp).oper[0]^.reg);
  244. { Remove preindexing and postindexing for LDR in some cases.
  245. For example:
  246. ldr reg2,[reg1, xxx]!
  247. mov reg1,reg2
  248. must be translated to:
  249. ldr reg1,[reg1, xxx]
  250. Preindexing must be removed there, since the same register is used as the base and as the target.
  251. Such case is not allowed for ARM CPU and produces crash. }
  252. if (taicpu(p).opcode = A_LDR) and (taicpu(p).oper[1]^.typ = top_ref)
  253. and (taicpu(movp).oper[0]^.reg = taicpu(p).oper[1]^.ref^.base)
  254. then
  255. taicpu(p).oper[1]^.ref^.addressmode:=AM_OFFSET;
  256. asml.remove(movp);
  257. movp.free;
  258. end;
  259. end;
  260. end;
  261. function TARMAsmOptimizer.RedundantMovProcess(var p: tai; var hp1: tai):boolean;
  262. var
  263. I: Integer;
  264. current_hp, next_hp: tai;
  265. LDRChange: Boolean;
  266. begin
  267. Result:=false;
  268. {
  269. change
  270. mov r1, r0
  271. add r1, r1, #1
  272. to
  273. add r1, r0, #1
  274. Todo: Make it work for mov+cmp too
  275. CAUTION! If this one is successful p might not be a mov instruction anymore!
  276. }
  277. if (taicpu(p).ops = 2) and
  278. (taicpu(p).oper[1]^.typ = top_reg) and
  279. (taicpu(p).oppostfix = PF_NONE) then
  280. begin
  281. if
  282. MatchInstruction(hp1, [A_ADD, A_ADC,
  283. {$ifdef ARM}
  284. A_RSB, A_RSC,
  285. {$endif ARM}
  286. A_SUB, A_SBC,
  287. A_AND, A_BIC, A_EOR, A_ORR, A_MOV, A_MVN],
  288. [taicpu(p).condition], []) and
  289. { MOV and MVN might only have 2 ops }
  290. (taicpu(hp1).ops >= 2) and
  291. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^.reg) and
  292. (taicpu(hp1).oper[1]^.typ = top_reg) and
  293. (
  294. (taicpu(hp1).ops = 2) or
  295. (taicpu(hp1).oper[2]^.typ in [top_reg, top_const, top_shifterop])
  296. ) and
  297. {$ifdef AARCH64}
  298. (taicpu(p).oper[1]^.reg<>NR_SP) and
  299. { in this case you have to transform it to movk or the like }
  300. (getsupreg(taicpu(p).oper[1]^.reg)<>RS_XZR) and
  301. {$endif AARCH64}
  302. not(RegUsedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  303. begin
  304. { When we get here we still don't know if the registers match }
  305. for I:=1 to 2 do
  306. {
  307. If the first loop was successful p will be replaced with hp1.
  308. The checks will still be ok, because all required information
  309. will also be in hp1 then.
  310. }
  311. if (taicpu(hp1).ops > I) and
  312. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[I]^.reg)
  313. {$ifdef ARM}
  314. { prevent certain combinations on thumb(2), this is only a safe approximation }
  315. and (not(GenerateThumbCode or GenerateThumb2Code) or
  316. ((getsupreg(taicpu(p).oper[1]^.reg)<>RS_R13) and
  317. (getsupreg(taicpu(p).oper[1]^.reg)<>RS_R15)))
  318. {$endif ARM}
  319. then
  320. begin
  321. DebugMsg('Peephole RedundantMovProcess done', hp1);
  322. taicpu(hp1).oper[I]^.reg := taicpu(p).oper[1]^.reg;
  323. if p<>hp1 then
  324. begin
  325. asml.remove(p);
  326. p.free;
  327. p:=hp1;
  328. Result:=true;
  329. end;
  330. end;
  331. if Result then Exit;
  332. end
  333. { Change: Change:
  334. mov r1, r0 mov r1, r0
  335. ... ...
  336. ldr/str r2, [r1, etc.] mov r2, r1
  337. To: To:
  338. ldr/str r2, [r0, etc.] mov r2, r0
  339. }
  340. else if (taicpu(p).condition = C_None) and (taicpu(p).oper[1]^.typ = top_reg)
  341. {$ifdef ARM}
  342. and not (getsupreg(taicpu(p).oper[0]^.reg) in [RS_PC, RS_R14, RS_STACK_POINTER_REG])
  343. and (getsupreg(taicpu(p).oper[1]^.reg) <> RS_PC)
  344. { Thumb does not support references with base and index one being SP }
  345. and (not(GenerateThumbCode) or (getsupreg(taicpu(p).oper[1]^.reg) <> RS_STACK_POINTER_REG))
  346. {$endif ARM}
  347. {$ifdef AARCH64}
  348. and (getsupreg(taicpu(p).oper[0]^.reg) <> RS_STACK_POINTER_REG)
  349. {$endif AARCH64}
  350. then
  351. begin
  352. current_hp := p;
  353. TransferUsedRegs(TmpUsedRegs);
  354. { Search local instruction block }
  355. while GetNextInstruction(current_hp, next_hp) and (next_hp <> BlockEnd) and (next_hp.typ = ait_instruction) do
  356. begin
  357. UpdateUsedRegs(TmpUsedRegs, tai(current_hp.Next));
  358. LDRChange := False;
  359. if (taicpu(next_hp).opcode in [A_LDR,A_STR]) and (taicpu(next_hp).ops = 2)
  360. {$ifdef AARCH64}
  361. { If r0 is the zero register, then this sequence of instructions will cause
  362. an access violation, but that's better than an assembler error caused by
  363. changing r0 to xzr inside the reference (Where it's illegal). [Kit] }
  364. and (getsupreg(taicpu(p).oper[1]^.reg) <> RS_XZR)
  365. {$endif AARCH64}
  366. then
  367. begin
  368. { Change the registers from r1 to r0 }
  369. if (taicpu(next_hp).oper[1]^.ref^.base = taicpu(p).oper[0]^.reg) and
  370. {$ifdef ARM}
  371. { This optimisation conflicts with something and raises
  372. an access violation - needs further investigation. [Kit] }
  373. (taicpu(next_hp).opcode <> A_LDR) and
  374. {$endif ARM}
  375. { Don't mess around with the base register if the
  376. reference is pre- or post-indexed }
  377. (taicpu(next_hp).oper[1]^.ref^.addressmode = AM_OFFSET) then
  378. begin
  379. taicpu(next_hp).oper[1]^.ref^.base := taicpu(p).oper[1]^.reg;
  380. LDRChange := True;
  381. end;
  382. if taicpu(next_hp).oper[1]^.ref^.index = taicpu(p).oper[0]^.reg then
  383. begin
  384. taicpu(next_hp).oper[1]^.ref^.index := taicpu(p).oper[1]^.reg;
  385. LDRChange := True;
  386. end;
  387. if LDRChange then
  388. DebugMsg('Peephole Optimization: ' + std_regname(taicpu(p).oper[0]^.reg) + ' = ' + std_regname(taicpu(p).oper[1]^.reg) + ' (MovLdr2Ldr 1)', next_hp);
  389. { Drop out if we're dealing with pre-indexed references }
  390. if (taicpu(next_hp).oper[1]^.ref^.addressmode = AM_PREINDEXED) and
  391. (
  392. RegInRef(taicpu(p).oper[0]^.reg, taicpu(next_hp).oper[1]^.ref^) or
  393. RegInRef(taicpu(p).oper[1]^.reg, taicpu(next_hp).oper[1]^.ref^)
  394. ) then
  395. begin
  396. { Remember to update register allocations }
  397. if LDRChange then
  398. AllocRegBetween(taicpu(p).oper[1]^.reg, p, next_hp, UsedRegs);
  399. Break;
  400. end;
  401. { The register being stored can be potentially changed (as long as it's not the stack pointer) }
  402. if (taicpu(next_hp).opcode = A_STR) and (getsupreg(taicpu(p).oper[1]^.reg) <> RS_STACK_POINTER_REG) and
  403. MatchOperand(taicpu(next_hp).oper[0]^, taicpu(p).oper[0]^.reg) then
  404. begin
  405. DebugMsg('Peephole Optimization: ' + std_regname(taicpu(p).oper[0]^.reg) + ' = ' + std_regname(taicpu(p).oper[1]^.reg) + ' (MovLdr2Ldr 2)', next_hp);
  406. taicpu(next_hp).oper[0]^.reg := taicpu(p).oper[1]^.reg;
  407. LDRChange := True;
  408. end;
  409. if LDRChange and (getsupreg(taicpu(p).oper[1]^.reg) <> RS_STACK_POINTER_REG) then
  410. begin
  411. AllocRegBetween(taicpu(p).oper[1]^.reg, p, next_hp, UsedRegs);
  412. if (taicpu(p).oppostfix = PF_None) and
  413. (
  414. (
  415. (taicpu(next_hp).opcode = A_LDR) and
  416. MatchOperand(taicpu(next_hp).oper[0]^, taicpu(p).oper[0]^.reg)
  417. ) or
  418. not RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, next_hp, TmpUsedRegs)
  419. ) and
  420. { Double-check to see if the old registers were actually
  421. changed (e.g. if the super registers matched, but not
  422. the sizes, they won't be changed). }
  423. (
  424. (taicpu(next_hp).opcode = A_LDR) or
  425. not RegInOp(taicpu(p).oper[0]^.reg, taicpu(next_hp).oper[0]^)
  426. ) and
  427. not RegInRef(taicpu(p).oper[0]^.reg, taicpu(next_hp).oper[1]^.ref^) then
  428. begin
  429. DebugMsg('Peephole Optimization: RedundantMovProcess 2a done', p);
  430. RemoveCurrentP(p);
  431. Result := True;
  432. Exit;
  433. end;
  434. end;
  435. end
  436. else if (taicpu(next_hp).opcode = A_MOV) and (taicpu(next_hp).oppostfix = PF_None) and
  437. (taicpu(next_hp).ops = 2) then
  438. begin
  439. if MatchOperand(taicpu(next_hp).oper[0]^, taicpu(p).oper[0]^.reg) then
  440. begin
  441. { Found another mov that writes entirely to the register }
  442. if RegUsedBetween(taicpu(p).oper[0]^.reg, p, next_hp) then
  443. begin
  444. { Register was used beforehand }
  445. if MatchOperand(taicpu(next_hp).oper[1]^, taicpu(p).oper[1]^.reg) then
  446. begin
  447. { This MOV is exactly the same as the first one.
  448. Since none of the registers have changed value
  449. at this point, we can remove it. }
  450. DebugMsg(SPeepholeOptimization + 'RedundantMovProcess 3a done', next_hp);
  451. if (next_hp = hp1) then
  452. { Don't let hp1 become a dangling pointer }
  453. hp1 := nil;
  454. asml.Remove(next_hp);
  455. next_hp.Free;
  456. { We still have the original p, so we can continue optimising;
  457. if it was -O2 or below, this instruction appeared immediately
  458. after the first MOV, so we're technically not looking more
  459. than one instruction ahead after it's removed! [Kit] }
  460. Continue;
  461. end
  462. else
  463. { Register changes value - drop out }
  464. Break;
  465. end;
  466. { We can delete the first MOV (only if the second MOV is unconditional) }
  467. {$ifdef ARM}
  468. if (taicpu(p).oppostfix = PF_None) and
  469. (taicpu(next_hp).condition = C_None) then
  470. {$endif ARM}
  471. begin
  472. DebugMsg('Peephole Optimization: RedundantMovProcess 2b done', p);
  473. RemoveCurrentP(p);
  474. Result := True;
  475. end;
  476. Exit;
  477. end
  478. else if MatchOperand(taicpu(next_hp).oper[1]^, taicpu(p).oper[0]^.reg) then
  479. begin
  480. if MatchOperand(taicpu(next_hp).oper[0]^, taicpu(p).oper[1]^.reg)
  481. { Be careful - if the entire register is not used, removing this
  482. instruction will leave the unused part uninitialised }
  483. {$ifdef AARCH64}
  484. and (getsubreg(taicpu(p).oper[1]^.reg) = R_SUBQ)
  485. {$endif AARCH64}
  486. then
  487. begin
  488. { Instruction will become mov r1,r1 }
  489. DebugMsg(SPeepholeOptimization + 'Mov2None 2 done', next_hp);
  490. { Allocate r1 between the instructions; not doing
  491. so may cause problems when removing superfluous
  492. MOVs later (i38055) }
  493. AllocRegBetween(taicpu(p).oper[1]^.reg, p, next_hp, UsedRegs);
  494. if (next_hp = hp1) then
  495. { Don't let hp1 become a dangling pointer }
  496. hp1 := nil;
  497. asml.Remove(next_hp);
  498. next_hp.Free;
  499. Continue;
  500. end;
  501. { Change the old register (checking the first operand again
  502. forces it to be left alone if the full register is not
  503. used, lest mov w1,w1 gets optimised out by mistake. [Kit] }
  504. {$ifdef AARCH64}
  505. if not MatchOperand(taicpu(next_hp).oper[0]^, taicpu(p).oper[1]^.reg) then
  506. {$endif AARCH64}
  507. begin
  508. DebugMsg(SPeepholeOptimization + std_regname(taicpu(p).oper[0]^.reg) + ' = ' + std_regname(taicpu(p).oper[1]^.reg) + ' (MovMov2Mov 2)', next_hp);
  509. taicpu(next_hp).oper[1]^.reg := taicpu(p).oper[1]^.reg;
  510. AllocRegBetween(taicpu(p).oper[1]^.reg, p, next_hp, UsedRegs);
  511. { If this was the only reference to the old register,
  512. then we can remove the original MOV now }
  513. if (taicpu(p).oppostfix = PF_None) and
  514. { A bit of a hack - sometimes registers aren't tracked properly, so do not
  515. remove if the register was apparently not allocated when its value is
  516. first set at the MOV command (this is especially true for the stack
  517. register). [Kit] }
  518. (getsupreg(taicpu(p).oper[1]^.reg) <> RS_STACK_POINTER_REG) and
  519. RegInUsedRegs(taicpu(p).oper[0]^.reg, UsedRegs) and
  520. not RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, next_hp, TmpUsedRegs) then
  521. begin
  522. DebugMsg(SPeepholeOptimization + 'RedundantMovProcess 2c done', p);
  523. RemoveCurrentP(p);
  524. Result := True;
  525. Exit;
  526. end;
  527. end;
  528. end;
  529. end;
  530. { On low optimisation settions, don't search more than one instruction ahead }
  531. if not(cs_opt_level3 in current_settings.optimizerswitches) or
  532. { Stop at procedure calls and jumps }
  533. is_calljmp(taicpu(next_hp).opcode) or
  534. { If the read register has changed value, or the MOV
  535. destination register has been used, drop out }
  536. RegInInstruction(taicpu(p).oper[0]^.reg, next_hp) or
  537. RegModifiedByInstruction(taicpu(p).oper[1]^.reg, next_hp) then
  538. Break;
  539. current_hp := next_hp;
  540. end;
  541. end;
  542. end;
  543. end;
  544. function TARMAsmOptimizer.OptPass1UXTB(var p : tai) : Boolean;
  545. var
  546. hp1, hp2: tai;
  547. so: tshifterop;
  548. begin
  549. Result:=false;
  550. if GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) then
  551. begin
  552. {
  553. change
  554. uxtb reg2,reg1
  555. strb reg2,[...]
  556. dealloc reg2
  557. to
  558. strb reg1,[...]
  559. }
  560. if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
  561. (taicpu(p).ops=2) and
  562. MatchInstruction(hp1, A_STR, [C_None], [PF_B]) and
  563. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
  564. { the reference in strb might not use reg2 }
  565. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  566. { reg1 might not be modified inbetween }
  567. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  568. begin
  569. DebugMsg('Peephole UxtbStrb2Strb done', p);
  570. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  571. result:=RemoveCurrentP(p);
  572. end
  573. {
  574. change
  575. uxtb reg2,reg1
  576. uxth reg3,reg2
  577. dealloc reg2
  578. to
  579. uxtb reg3,reg1
  580. }
  581. else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
  582. (taicpu(p).ops=2) and
  583. MatchInstruction(hp1, A_UXTH, [C_None], [PF_None]) and
  584. (taicpu(hp1).ops = 2) and
  585. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  586. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  587. { reg1 might not be modified inbetween }
  588. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  589. begin
  590. DebugMsg('Peephole UxtbUxth2Uxtb done', p);
  591. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  592. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  593. asml.remove(hp1);
  594. hp1.free;
  595. result:=true;
  596. end
  597. {
  598. change
  599. uxtb reg2,reg1
  600. uxtb reg3,reg2
  601. dealloc reg2
  602. to
  603. uxtb reg3,reg1
  604. }
  605. else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
  606. (taicpu(p).ops=2) and
  607. MatchInstruction(hp1, A_UXTB, [C_None], [PF_None]) and
  608. (taicpu(hp1).ops = 2) and
  609. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  610. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  611. { reg1 might not be modified inbetween }
  612. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  613. begin
  614. DebugMsg('Peephole UxtbUxtb2Uxtb done', p);
  615. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  616. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  617. asml.remove(hp1);
  618. hp1.free;
  619. result:=true;
  620. end
  621. {
  622. change
  623. uxtb reg2,reg1
  624. and reg3,reg2,#0x*FF
  625. dealloc reg2
  626. to
  627. uxtb reg3,reg1
  628. }
  629. else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
  630. (taicpu(p).ops=2) and
  631. MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
  632. (taicpu(hp1).ops=3) and
  633. (taicpu(hp1).oper[2]^.typ=top_const) and
  634. ((taicpu(hp1).oper[2]^.val and $FF)=$FF) and
  635. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  636. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  637. { reg1 might not be modified inbetween }
  638. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  639. begin
  640. DebugMsg('Peephole UxtbAndImm2Uxtb done', p);
  641. taicpu(hp1).opcode:=A_UXTB;
  642. taicpu(hp1).ops:=2;
  643. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  644. result:=RemoveCurrentP(p);
  645. end
  646. {$ifdef AARCH64}
  647. else if MatchInstruction(hp1, [A_ADD,A_SUB], [C_None], [PF_None,PF_S]) and
  648. (taicpu(p).ops=2) and
  649. (taicpu(hp1).ops=3) and
  650. MatchOperand(taicpu(hp1).oper[2]^, taicpu(p).oper[0]^.reg) and
  651. not(MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg)) and
  652. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  653. { reg1 might not be modified inbetween }
  654. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  655. begin
  656. DebugMsg('Peephole UxtbOp2Op done', p);
  657. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  658. taicpu(hp1).loadReg(2,taicpu(p).oper[1]^.reg);
  659. taicpu(hp1).ops:=4;
  660. shifterop_reset(so);
  661. so.shiftmode:=SM_UXTB;
  662. so.shiftimm:=0;
  663. taicpu(hp1).loadshifterop(3,so);
  664. result:=RemoveCurrentP(p);
  665. end
  666. {$endif AARCH64}
  667. else if RemoveSuperfluousMove(p, hp1, 'UxtbMov2Uxtb') then
  668. Result:=true;
  669. end;
  670. end;
  671. function TARMAsmOptimizer.OptPass1UXTH(var p : tai) : Boolean;
  672. var
  673. hp1: tai;
  674. so: tshifterop;
  675. begin
  676. Result:=false;
  677. if GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) then
  678. begin
  679. {
  680. change
  681. uxth reg2,reg1
  682. strh reg2,[...]
  683. dealloc reg2
  684. to
  685. strh reg1,[...]
  686. }
  687. if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
  688. (taicpu(p).ops=2) and
  689. MatchInstruction(hp1, A_STR, [C_None], [PF_H]) and
  690. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  691. { the reference in strb might not use reg2 }
  692. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  693. { reg1 might not be modified inbetween }
  694. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  695. begin
  696. DebugMsg('Peephole UXTHStrh2Strh done', p);
  697. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  698. result:=RemoveCurrentP(p);
  699. end
  700. {
  701. change
  702. uxth reg2,reg1
  703. uxth reg3,reg2
  704. dealloc reg2
  705. to
  706. uxth reg3,reg1
  707. }
  708. else if MatchInstruction(p, A_UXTH, [C_None], [PF_None]) and
  709. (taicpu(p).ops=2) and
  710. MatchInstruction(hp1, A_UXTH, [C_None], [PF_None]) and
  711. (taicpu(hp1).ops=2) and
  712. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  713. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  714. { reg1 might not be modified inbetween }
  715. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  716. begin
  717. DebugMsg('Peephole UxthUxth2Uxth done', p);
  718. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  719. taicpu(hp1).opcode:=A_UXTH;
  720. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  721. result:=RemoveCurrentP(p);
  722. end
  723. {
  724. change
  725. uxth reg2,reg1
  726. and reg3,reg2,#65535
  727. dealloc reg2
  728. to
  729. uxth reg3,reg1
  730. }
  731. else if MatchInstruction(p, A_UXTH, [C_None], [PF_None]) and
  732. (taicpu(p).ops=2) and
  733. MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
  734. (taicpu(hp1).ops=3) and
  735. (taicpu(hp1).oper[2]^.typ=top_const) and
  736. ((taicpu(hp1).oper[2]^.val and $FFFF)=$FFFF) and
  737. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  738. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  739. { reg1 might not be modified inbetween }
  740. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  741. begin
  742. DebugMsg('Peephole UxthAndImm2Uxth done', p);
  743. taicpu(hp1).opcode:=A_UXTH;
  744. taicpu(hp1).ops:=2;
  745. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  746. result:=RemoveCurrentP(p);
  747. end
  748. {$ifdef AARCH64}
  749. else if MatchInstruction(hp1, [A_ADD,A_SUB], [C_None], [PF_None,PF_S]) and
  750. (taicpu(p).ops=2) and
  751. (taicpu(hp1).ops=3) and
  752. MatchOperand(taicpu(hp1).oper[2]^, taicpu(p).oper[0]^.reg) and
  753. not(MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg)) and
  754. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  755. { reg1 might not be modified inbetween }
  756. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  757. begin
  758. DebugMsg('Peephole UxthOp2Op done', p);
  759. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  760. taicpu(hp1).loadReg(2,taicpu(p).oper[1]^.reg);
  761. taicpu(hp1).ops:=4;
  762. shifterop_reset(so);
  763. so.shiftmode:=SM_UXTH;
  764. so.shiftimm:=0;
  765. taicpu(hp1).loadshifterop(3,so);
  766. result:=RemoveCurrentP(p);
  767. end
  768. {$endif AARCH64}
  769. else if RemoveSuperfluousMove(p, hp1, 'UxthMov2Data') then
  770. Result:=true;
  771. end;
  772. end;
  773. function TARMAsmOptimizer.OptPass1SXTB(var p : tai) : Boolean;
  774. var
  775. hp1, hp2: tai;
  776. so: tshifterop;
  777. begin
  778. Result:=false;
  779. if GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) then
  780. begin
  781. {
  782. change
  783. sxtb reg2,reg1
  784. strb reg2,[...]
  785. dealloc reg2
  786. to
  787. strb reg1,[...]
  788. }
  789. if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
  790. (taicpu(p).ops=2) and
  791. MatchInstruction(hp1, A_STR, [C_None], [PF_B]) and
  792. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
  793. { the reference in strb might not use reg2 }
  794. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  795. { reg1 might not be modified inbetween }
  796. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  797. begin
  798. DebugMsg('Peephole SxtbStrb2Strb done', p);
  799. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  800. result:=RemoveCurrentP(p);
  801. end
  802. {
  803. change
  804. sxtb reg2,reg1
  805. sxth reg3,reg2
  806. dealloc reg2
  807. to
  808. sxtb reg3,reg1
  809. }
  810. else if MatchInstruction(p, A_SXTB, [C_None], [PF_None]) and
  811. (taicpu(p).ops=2) and
  812. MatchInstruction(hp1, A_SXTH, [C_None], [PF_None]) and
  813. (taicpu(hp1).ops = 2) and
  814. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  815. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  816. { reg1 might not be modified inbetween }
  817. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  818. begin
  819. DebugMsg('Peephole SxtbSxth2Sxtb done', p);
  820. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  821. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  822. asml.remove(hp1);
  823. hp1.free;
  824. result:=true;
  825. end
  826. {
  827. change
  828. sxtb reg2,reg1
  829. sxtb reg3,reg2
  830. dealloc reg2
  831. to
  832. uxtb reg3,reg1
  833. }
  834. else if MatchInstruction(p, A_SXTB, [C_None], [PF_None]) and
  835. (taicpu(p).ops=2) and
  836. MatchInstruction(hp1, A_SXTB, [C_None], [PF_None]) and
  837. (taicpu(hp1).ops = 2) and
  838. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  839. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  840. { reg1 might not be modified inbetween }
  841. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  842. begin
  843. DebugMsg('Peephole SxtbSxtb2Sxtb done', p);
  844. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  845. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  846. asml.remove(hp1);
  847. hp1.free;
  848. result:=true;
  849. end
  850. {
  851. change
  852. sxtb reg2,reg1
  853. and reg3,reg2,#0x*FF
  854. dealloc reg2
  855. to
  856. uxtb reg3,reg1
  857. }
  858. else if MatchInstruction(p, A_SXTB, [C_None], [PF_None]) and
  859. (taicpu(p).ops=2) and
  860. MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
  861. (taicpu(hp1).ops=3) and
  862. (taicpu(hp1).oper[2]^.typ=top_const) and
  863. ((taicpu(hp1).oper[2]^.val and $FF)=$FF) and
  864. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  865. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  866. { reg1 might not be modified inbetween }
  867. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  868. begin
  869. DebugMsg('Peephole SxtbAndImm2Uxtb done', p);
  870. taicpu(hp1).opcode:=A_UXTB;
  871. taicpu(hp1).ops:=2;
  872. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  873. result:=RemoveCurrentP(p);
  874. end
  875. {$ifdef AARCH64}
  876. else if MatchInstruction(hp1, [A_ADD,A_SUB], [C_None], [PF_None,PF_S]) and
  877. (taicpu(p).ops=2) and
  878. (taicpu(hp1).ops=3) and
  879. MatchOperand(taicpu(hp1).oper[2]^, taicpu(p).oper[0]^.reg) and
  880. not(MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg)) and
  881. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  882. { reg1 might not be modified inbetween }
  883. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  884. begin
  885. DebugMsg('Peephole SxtbOp2Op done', p);
  886. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  887. taicpu(hp1).loadReg(2,taicpu(p).oper[1]^.reg);
  888. taicpu(hp1).ops:=4;
  889. shifterop_reset(so);
  890. so.shiftmode:=SM_SXTB;
  891. so.shiftimm:=0;
  892. taicpu(hp1).loadshifterop(3,so);
  893. result:=RemoveCurrentP(p);
  894. end
  895. {$endif AARCH64}
  896. else if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  897. RemoveSuperfluousMove(p, hp1, 'SxtbMov2Sxtb') then
  898. Result:=true;
  899. end;
  900. end;
  901. function TARMAsmOptimizer.OptPass1SXTH(var p : tai) : Boolean;
  902. var
  903. hp1: tai;
  904. so: tshifterop;
  905. begin
  906. Result:=false;
  907. if GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) then
  908. begin
  909. {
  910. change
  911. sxth reg2,reg1
  912. strh reg2,[...]
  913. dealloc reg2
  914. to
  915. strh reg1,[...]
  916. }
  917. if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
  918. (taicpu(p).ops=2) and
  919. MatchInstruction(hp1, A_STR, [C_None], [PF_H]) and
  920. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  921. { the reference in strb might not use reg2 }
  922. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  923. { reg1 might not be modified inbetween }
  924. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  925. begin
  926. DebugMsg('Peephole SxthStrh2Strh done', p);
  927. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  928. result:=RemoveCurrentP(p);
  929. end
  930. {
  931. change
  932. sxth reg2,reg1
  933. sxth reg3,reg2
  934. dealloc reg2
  935. to
  936. sxth reg3,reg1
  937. }
  938. else if MatchInstruction(p, A_SXTH, [C_None], [PF_None]) and
  939. (taicpu(p).ops=2) and
  940. MatchInstruction(hp1, A_SXTH, [C_None], [PF_None]) and
  941. (taicpu(hp1).ops=2) and
  942. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  943. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  944. { reg1 might not be modified inbetween }
  945. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  946. begin
  947. DebugMsg('Peephole SxthSxth2Sxth done', p);
  948. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  949. taicpu(hp1).opcode:=A_SXTH;
  950. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  951. result:=RemoveCurrentP(p);
  952. end
  953. {$ifdef AARCH64}
  954. {
  955. change
  956. sxth reg2,reg1
  957. sxtw reg3,reg2
  958. dealloc reg2
  959. to
  960. sxth reg3,reg1
  961. }
  962. else if MatchInstruction(p, A_SXTH, [C_None], [PF_None]) and
  963. (taicpu(p).ops=2) and
  964. MatchInstruction(hp1, A_SXTW, [C_None], [PF_None]) and
  965. (taicpu(hp1).ops=2) and
  966. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  967. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  968. { reg1 might not be modified inbetween }
  969. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  970. begin
  971. DebugMsg('Peephole SxthSxtw2Sxth done', p);
  972. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  973. taicpu(hp1).opcode:=A_SXTH;
  974. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  975. result:=RemoveCurrentP(p);
  976. end
  977. {$endif AARCH64}
  978. {
  979. change
  980. sxth reg2,reg1
  981. and reg3,reg2,#65535
  982. dealloc reg2
  983. to
  984. uxth reg3,reg1
  985. }
  986. else if MatchInstruction(p, A_SXTH, [C_None], [PF_None]) and
  987. (taicpu(p).ops=2) and
  988. MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
  989. (taicpu(hp1).ops=3) and
  990. (taicpu(hp1).oper[2]^.typ=top_const) and
  991. ((taicpu(hp1).oper[2]^.val and $FFFF)=$FFFF) and
  992. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  993. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  994. { reg1 might not be modified inbetween }
  995. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  996. begin
  997. DebugMsg('Peephole SxthAndImm2Uxth done', p);
  998. taicpu(hp1).opcode:=A_UXTH;
  999. taicpu(hp1).ops:=2;
  1000. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  1001. result:=RemoveCurrentP(p);
  1002. end
  1003. {$ifdef AARCH64}
  1004. else if MatchInstruction(hp1, [A_ADD,A_SUB], [C_None], [PF_None,PF_S]) and
  1005. (taicpu(p).ops=2) and
  1006. (taicpu(hp1).ops=3) and
  1007. MatchOperand(taicpu(hp1).oper[2]^, taicpu(p).oper[0]^.reg) and
  1008. not(MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg)) and
  1009. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  1010. { reg1 might not be modified inbetween }
  1011. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1012. begin
  1013. DebugMsg('Peephole SxthOp2Op done', p);
  1014. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  1015. taicpu(hp1).loadReg(2,taicpu(p).oper[1]^.reg);
  1016. taicpu(hp1).ops:=4;
  1017. shifterop_reset(so);
  1018. so.shiftmode:=SM_SXTH;
  1019. so.shiftimm:=0;
  1020. taicpu(hp1).loadshifterop(3,so);
  1021. result:=RemoveCurrentP(p);
  1022. end
  1023. {$endif AARCH64}
  1024. else if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  1025. RemoveSuperfluousMove(p, hp1, 'SxthMov2Sxth') then
  1026. Result:=true;
  1027. end;
  1028. end;
  1029. function TARMAsmOptimizer.OptPreSBFXUBFX(var p: tai): Boolean;
  1030. begin
  1031. Result := False;
  1032. { Convert:
  1033. s/ubfx reg1,reg2,#0,#64 (or #32 for 32-bit registers)
  1034. To:
  1035. mov reg1,reg2
  1036. }
  1037. if (taicpu(p).oper[2]^.val = 0) and
  1038. {$ifdef AARCH64}
  1039. (
  1040. (
  1041. (getsubreg(taicpu(p).oper[0]^.reg) = R_SUBQ) and
  1042. (taicpu(p).oper[3]^.val = 64)
  1043. ) or
  1044. (
  1045. (getsubreg(taicpu(p).oper[0]^.reg) = R_SUBD) and
  1046. (taicpu(p).oper[3]^.val = 32)
  1047. )
  1048. )
  1049. {$else AARCH64}
  1050. (taicpu(p).oper[3]^.val = 32)
  1051. {$endif AARCH64}
  1052. then
  1053. begin
  1054. DebugMsg(SPeepholeOptimization + 'SBFX or UBFX -> MOV (full bitfield extract)', p);
  1055. taicpu(p).opcode := A_MOV;
  1056. taicpu(p).ops := 2;
  1057. taicpu(p).clearop(2);
  1058. taicpu(p).clearop(3);
  1059. Result := True;
  1060. Exit;
  1061. end;
  1062. end;
  1063. function TARMAsmOptimizer.OptPass1LDR(var p : tai) : Boolean;
  1064. var
  1065. hp1: tai;
  1066. Reference: TReference;
  1067. NewOp: TAsmOp;
  1068. begin
  1069. Result := False;
  1070. if (taicpu(p).ops <> 2) or (taicpu(p).condition <> C_None) then
  1071. Exit;
  1072. Reference := taicpu(p).oper[1]^.ref^;
  1073. if (Reference.addressmode = AM_OFFSET) and
  1074. not RegInRef(taicpu(p).oper[0]^.reg, Reference) and
  1075. { Delay calling GetNextInstruction for as long as possible }
  1076. GetNextInstruction(p, hp1) and
  1077. (hp1.typ = ait_instruction) and
  1078. (taicpu(hp1).condition = C_None) and
  1079. (taicpu(hp1).oppostfix = taicpu(p).oppostfix) then
  1080. begin
  1081. if (taicpu(hp1).opcode = A_STR) and
  1082. RefsEqual(taicpu(hp1).oper[1]^.ref^, Reference) and
  1083. (getregtype(taicpu(p).oper[0]^.reg) = getregtype(taicpu(hp1).oper[0]^.reg)) then
  1084. begin
  1085. { With:
  1086. ldr reg1,[ref]
  1087. str reg2,[ref]
  1088. If reg1 = reg2, Remove str
  1089. }
  1090. if taicpu(p).oper[0]^.reg = taicpu(hp1).oper[0]^.reg then
  1091. begin
  1092. DebugMsg(SPeepholeOptimization + 'Removed redundant store instruction (load/store -> load/nop)', hp1);
  1093. RemoveInstruction(hp1);
  1094. Result := True;
  1095. Exit;
  1096. end;
  1097. end
  1098. else if (taicpu(hp1).opcode = A_LDR) and
  1099. RefsEqual(taicpu(hp1).oper[1]^.ref^, Reference) then
  1100. begin
  1101. { With:
  1102. ldr reg1,[ref]
  1103. ldr reg2,[ref]
  1104. If reg1 = reg2, delete the second ldr
  1105. If reg1 <> reg2, changing the 2nd ldr to a mov might introduce
  1106. a dependency, but it will likely open up new optimisations, so
  1107. do it for now and handle any new dependencies later.
  1108. }
  1109. if taicpu(p).oper[0]^.reg = taicpu(hp1).oper[0]^.reg then
  1110. begin
  1111. DebugMsg(SPeepholeOptimization + 'Removed duplicate load instruction (load/load -> load/nop)', hp1);
  1112. RemoveInstruction(hp1);
  1113. Result := True;
  1114. Exit;
  1115. end
  1116. else if
  1117. (getregtype(taicpu(p).oper[0]^.reg) = R_INTREGISTER) and
  1118. (getregtype(taicpu(hp1).oper[0]^.reg) = R_INTREGISTER) and
  1119. (getsubreg(taicpu(p).oper[0]^.reg) = getsubreg(taicpu(hp1).oper[0]^.reg)) then
  1120. begin
  1121. DebugMsg(SPeepholeOptimization + 'Changed second ldr' + oppostfix2str[taicpu(hp1).oppostfix] + ' to mov (load/load -> load/move)', hp1);
  1122. taicpu(hp1).opcode := A_MOV;
  1123. taicpu(hp1).oppostfix := PF_None;
  1124. taicpu(hp1).loadreg(1, taicpu(p).oper[0]^.reg);
  1125. AllocRegBetween(taicpu(p).oper[0]^.reg, p, hp1, UsedRegs);
  1126. Result := True;
  1127. Exit;
  1128. end;
  1129. end;
  1130. end;
  1131. end;
  1132. function TARMAsmOptimizer.OptPass1STR(var p : tai) : Boolean;
  1133. var
  1134. hp1: tai;
  1135. Reference: TReference;
  1136. SizeMismatch: Boolean;
  1137. SrcReg, DstReg: TRegister;
  1138. NewOp: TAsmOp;
  1139. begin
  1140. Result := False;
  1141. if (taicpu(p).ops <> 2) or (taicpu(p).condition <> C_None) then
  1142. Exit;
  1143. Reference := taicpu(p).oper[1]^.ref^;
  1144. if (Reference.addressmode = AM_OFFSET) and
  1145. not RegInRef(taicpu(p).oper[0]^.reg, Reference) and
  1146. { Delay calling GetNextInstruction for as long as possible }
  1147. GetNextInstruction(p, hp1) and
  1148. (hp1.typ = ait_instruction) and
  1149. (taicpu(hp1).condition = C_None) and
  1150. (taicpu(hp1).oppostfix = taicpu(p).oppostfix) and
  1151. (taicpu(hp1).ops>0) and (taicpu(hp1).oper[0]^.typ=top_reg) then
  1152. begin
  1153. { Saves constant dereferencing and makes it easier to change the size if necessary }
  1154. SrcReg := taicpu(p).oper[0]^.reg;
  1155. DstReg := taicpu(hp1).oper[0]^.reg;
  1156. if (taicpu(hp1).opcode = A_LDR) and
  1157. RefsEqual(taicpu(hp1).oper[1]^.ref^, Reference) and
  1158. (taicpu(hp1).oper[1]^.ref^.volatility=[]) and
  1159. (
  1160. (taicpu(hp1).oppostfix = taicpu(p).oppostfix) or
  1161. ((taicpu(p).oppostfix = PF_B) and (taicpu(hp1).oppostfix = PF_SB)) or
  1162. ((taicpu(p).oppostfix = PF_H) and (taicpu(hp1).oppostfix = PF_SH))
  1163. {$ifdef AARCH64}
  1164. or ((taicpu(p).oppostfix = PF_W) and (taicpu(hp1).oppostfix = PF_SW))
  1165. {$endif AARCH64}
  1166. ) then
  1167. begin
  1168. { With:
  1169. str reg1,[ref]
  1170. ldr reg2,[ref]
  1171. If reg1 = reg2, Remove ldr.
  1172. If reg1 <> reg2, replace ldr with "mov reg2,reg1"
  1173. }
  1174. if (SrcReg = DstReg) and
  1175. { e.g. the ldrb in strb/ldrb is not a null operation as it clears the upper 24 bits }
  1176. (taicpu(p).oppostfix=PF_None) then
  1177. begin
  1178. DebugMsg(SPeepholeOptimization + 'Removed redundant load instruction (store/load -> store/nop)', hp1);
  1179. RemoveInstruction(hp1);
  1180. Result := True;
  1181. Exit;
  1182. end
  1183. else if (getregtype(SrcReg) = R_INTREGISTER) and
  1184. (getregtype(DstReg) = R_INTREGISTER) and
  1185. (getsubreg(SrcReg) = getsubreg(DstReg)) then
  1186. begin
  1187. NewOp:=A_NONE;
  1188. if taicpu(hp1).oppostfix=PF_None then
  1189. NewOp:=A_MOV
  1190. else
  1191. {$ifdef ARM}
  1192. if (current_settings.cputype < cpu_armv6) then
  1193. begin
  1194. { The zero- and sign-extension operations were only
  1195. introduced under ARMv6 }
  1196. case taicpu(hp1).oppostfix of
  1197. PF_B:
  1198. begin
  1199. { The if-block afterwards will set the middle operand to the correct register }
  1200. taicpu(hp1).allocate_oper(3);
  1201. taicpu(hp1).ops := 3;
  1202. taicpu(hp1).loadconst(2, $FF);
  1203. NewOp := A_AND;
  1204. end;
  1205. PF_H:
  1206. { ARMv5 and under doesn't have a concise way of storing the immediate $FFFF, so leave alone };
  1207. PF_SB,
  1208. PF_SH:
  1209. { Do nothing - can't easily encode sign-extensions };
  1210. else
  1211. InternalError(2021043002);
  1212. end;
  1213. end
  1214. else
  1215. {$endif ARM}
  1216. case taicpu(hp1).oppostfix of
  1217. PF_B:
  1218. NewOp := A_UXTB;
  1219. PF_SB:
  1220. NewOp := A_SXTB;
  1221. PF_H:
  1222. NewOp := A_UXTH;
  1223. PF_SH:
  1224. NewOp := A_SXTH;
  1225. {$ifdef AARCH64}
  1226. PF_SW:
  1227. NewOp := A_SXTW;
  1228. PF_W:
  1229. NewOp := A_MOV;
  1230. {$endif AARCH64}
  1231. else
  1232. InternalError(2021043001);
  1233. end;
  1234. if (NewOp<>A_None) then
  1235. begin
  1236. DebugMsg(SPeepholeOptimization + 'Changed ldr' + oppostfix2str[taicpu(hp1).oppostfix] + ' to ' + gas_op2str[NewOp] + ' (store/load -> store/move)', hp1);
  1237. taicpu(hp1).oppostfix := PF_None;
  1238. taicpu(hp1).opcode := NewOp;
  1239. taicpu(hp1).loadreg(1, SrcReg);
  1240. AllocRegBetween(SrcReg, p, hp1, UsedRegs);
  1241. Result := True;
  1242. Exit;
  1243. end;
  1244. end
  1245. end
  1246. else if (taicpu(hp1).opcode = A_STR) and
  1247. RefsEqual(taicpu(hp1).oper[1]^.ref^, Reference) then
  1248. begin
  1249. { With:
  1250. str reg1,[ref]
  1251. str reg2,[ref]
  1252. If reg1 <> reg2, delete the first str
  1253. IF reg1 = reg2, delete the second str
  1254. }
  1255. if (SrcReg = DstReg) and (taicpu(hp1).oper[1]^.ref^.volatility=[]) then
  1256. begin
  1257. DebugMsg(SPeepholeOptimization + 'Removed duplicate store instruction (store/store -> store/nop)', hp1);
  1258. RemoveInstruction(hp1);
  1259. Result := True;
  1260. Exit;
  1261. end
  1262. else if
  1263. { Registers same byte size? }
  1264. (tcgsize2size[reg_cgsize(SrcReg)] = tcgsize2size[reg_cgsize(DstReg)]) and
  1265. (taicpu(p).oper[1]^.ref^.volatility=[]) then
  1266. begin
  1267. DebugMsg(SPeepholeOptimization + 'Removed dominated store instruction (store/store -> nop/store)', p);
  1268. RemoveCurrentP(p, hp1);
  1269. Result := True;
  1270. Exit;
  1271. end;
  1272. end;
  1273. end;
  1274. end;
  1275. function TARMAsmOptimizer.OptPass1And(var p : tai) : Boolean;
  1276. var
  1277. hp1, hp2: tai;
  1278. i: longint;
  1279. begin
  1280. Result:=false;
  1281. {
  1282. optimize
  1283. and reg2,reg1,const1
  1284. ...
  1285. }
  1286. if (taicpu(p).ops>2) and
  1287. (taicpu(p).oper[1]^.typ = top_reg) and
  1288. (taicpu(p).oper[2]^.typ = top_const) then
  1289. begin
  1290. {
  1291. change
  1292. and reg2,reg1,const1
  1293. ...
  1294. and reg3,reg2,const2
  1295. to
  1296. and reg3,reg1,(const1 and const2)
  1297. }
  1298. if GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1299. MatchInstruction(hp1, A_AND, [taicpu(p).condition], [PF_None]) and
  1300. RegEndOfLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  1301. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1302. (taicpu(hp1).oper[2]^.typ = top_const)
  1303. {$ifdef AARCH64}
  1304. and ((((getsubreg(taicpu(p).oper[0]^.reg)=R_SUBQ) and is_shifter_const(taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val,OS_64)) or
  1305. ((getsubreg(taicpu(p).oper[0]^.reg)=R_SUBL) and is_shifter_const(taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val,OS_32))
  1306. ) or
  1307. ((taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val)=0))
  1308. {$endif AARCH64}
  1309. then
  1310. begin
  1311. if not(RegUsedBetween(taicpu(hp1).oper[0]^.reg,p,hp1)) then
  1312. begin
  1313. DebugMsg('Peephole AndAnd2And done', p);
  1314. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  1315. if (taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val)=0 then
  1316. begin
  1317. DebugMsg('Peephole AndAnd2Mov0 1 done', p);
  1318. taicpu(p).opcode:=A_MOV;
  1319. taicpu(p).ops:=2;
  1320. taicpu(p).loadConst(1,0);
  1321. taicpu(p).oppostfix:=taicpu(hp1).oppostfix;
  1322. end
  1323. else
  1324. begin
  1325. DebugMsg('Peephole AndAnd2And 1 done', p);
  1326. taicpu(p).loadConst(2,taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val);
  1327. taicpu(p).oppostfix:=taicpu(hp1).oppostfix;
  1328. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  1329. end;
  1330. asml.remove(hp1);
  1331. hp1.free;
  1332. Result:=true;
  1333. exit;
  1334. end
  1335. else if not(RegUsedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1336. begin
  1337. if (taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val)=0 then
  1338. begin
  1339. DebugMsg('Peephole AndAnd2Mov0 2 done', hp1);
  1340. taicpu(hp1).opcode:=A_MOV;
  1341. taicpu(hp1).loadConst(1,0);
  1342. taicpu(hp1).ops:=2;
  1343. taicpu(hp1).oppostfix:=taicpu(p).oppostfix;
  1344. end
  1345. else
  1346. begin
  1347. DebugMsg('Peephole AndAnd2And 2 done', hp1);
  1348. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  1349. taicpu(hp1).loadConst(2,taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val);
  1350. taicpu(hp1).oppostfix:=taicpu(p).oppostfix;
  1351. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  1352. end;
  1353. GetNextInstruction(p, hp1);
  1354. RemoveCurrentP(p);
  1355. p:=hp1;
  1356. Result:=true;
  1357. exit;
  1358. end;
  1359. end
  1360. {
  1361. change
  1362. and reg2,reg1,$xxxxxxFF
  1363. strb reg2,[...]
  1364. dealloc reg2
  1365. to
  1366. strb reg1,[...]
  1367. }
  1368. else if ((taicpu(p).oper[2]^.val and $FF) = $FF) and
  1369. MatchInstruction(p, A_AND, [C_None], [PF_None]) and
  1370. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1371. MatchInstruction(hp1, A_STR, [C_None], [PF_B]) and
  1372. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
  1373. { the reference in strb might not use reg2 }
  1374. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  1375. { reg1 might not be modified inbetween }
  1376. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1377. begin
  1378. DebugMsg('Peephole AndStrb2Strb done', p);
  1379. {$ifdef AARCH64}
  1380. taicpu(hp1).loadReg(0,newreg(R_INTREGISTER,getsupreg(taicpu(p).oper[1]^.reg),R_SUBD));
  1381. {$else AARCH64}
  1382. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  1383. {$endif AARCH64}
  1384. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  1385. RemoveCurrentP(p);
  1386. result:=true;
  1387. exit;
  1388. end
  1389. {
  1390. change
  1391. and reg2,reg1,255
  1392. uxtb/uxth reg3,reg2
  1393. dealloc reg2
  1394. to
  1395. and reg3,reg1,x
  1396. }
  1397. else if MatchInstruction(p, A_AND, [C_None], [PF_None]) and
  1398. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1399. ((((taicpu(p).oper[2]^.val and $ffffff00)=0) and MatchInstruction(hp1, A_UXTB, [C_None], [PF_None])) or
  1400. (((taicpu(p).oper[2]^.val and $ffff0000)=0) and MatchInstruction(hp1, A_UXTH, [C_None], [PF_None]))) and
  1401. (taicpu(hp1).ops = 2) and
  1402. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  1403. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1404. { reg1 might not be modified inbetween }
  1405. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1406. begin
  1407. DebugMsg('Peephole AndUxt2And done', p);
  1408. taicpu(hp1).opcode:=A_AND;
  1409. taicpu(hp1).ops:=3;
  1410. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  1411. taicpu(hp1).loadconst(2,taicpu(p).oper[2]^.val);
  1412. GetNextInstruction(p,hp1);
  1413. asml.remove(p);
  1414. p.Free;
  1415. p:=hp1;
  1416. result:=true;
  1417. exit;
  1418. end
  1419. else if ((taicpu(p).oper[2]^.val and $ffffff80)=0) and
  1420. MatchInstruction(p, A_AND, [C_None], [PF_None]) and
  1421. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1422. MatchInstruction(hp1, [A_SXTB,A_SXTH], [C_None], [PF_None]) and
  1423. (taicpu(hp1).ops = 2) and
  1424. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  1425. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1426. { reg1 might not be modified inbetween }
  1427. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1428. begin
  1429. DebugMsg('Peephole AndSxt2And done', p);
  1430. taicpu(hp1).opcode:=A_AND;
  1431. taicpu(hp1).ops:=3;
  1432. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  1433. taicpu(hp1).loadconst(2,taicpu(p).oper[2]^.val);
  1434. GetNextInstruction(p,hp1);
  1435. asml.remove(p);
  1436. p.Free;
  1437. p:=hp1;
  1438. result:=true;
  1439. exit;
  1440. end
  1441. {
  1442. from
  1443. and reg1,reg0,2^n-1
  1444. mov reg2,reg1, lsl imm1
  1445. (mov reg3,reg2, lsr/asr imm1)
  1446. remove either the and or the lsl/xsr sequence if possible
  1447. }
  1448. else if (taicpu(p).oper[2]^.val < high(int64)) and
  1449. cutils.ispowerof2(taicpu(p).oper[2]^.val+1,i) and
  1450. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1451. MatchInstruction(hp1, A_MOV, [taicpu(p).condition], [PF_None]) and
  1452. (taicpu(hp1).ops=3) and
  1453. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1454. (taicpu(hp1).oper[2]^.typ = top_shifterop) and
  1455. {$ifdef ARM}
  1456. (taicpu(hp1).oper[2]^.shifterop^.rs = NR_NO) and
  1457. {$endif ARM}
  1458. (taicpu(hp1).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  1459. RegEndOfLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) then
  1460. begin
  1461. {
  1462. and reg1,reg0,2^n-1
  1463. mov reg2,reg1, lsl imm1
  1464. mov reg3,reg2, lsr/asr imm1
  1465. =>
  1466. and reg1,reg0,2^n-1
  1467. if lsr and 2^n-1>=imm1 or asr and 2^n-1>imm1
  1468. }
  1469. if GetNextInstructionUsingReg(hp1,hp2,taicpu(p).oper[0]^.reg) and
  1470. MatchInstruction(hp2, A_MOV, [taicpu(p).condition], [PF_None]) and
  1471. (taicpu(hp2).ops=3) and
  1472. MatchOperand(taicpu(hp2).oper[1]^, taicpu(hp1).oper[0]^.reg) and
  1473. (taicpu(hp2).oper[2]^.typ = top_shifterop) and
  1474. {$ifdef ARM}
  1475. (taicpu(hp2).oper[2]^.shifterop^.rs = NR_NO) and
  1476. {$endif ARM}
  1477. (taicpu(hp2).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and
  1478. (taicpu(hp1).oper[2]^.shifterop^.shiftimm=taicpu(hp2).oper[2]^.shifterop^.shiftimm) and
  1479. RegEndOfLife(taicpu(hp1).oper[0]^.reg,taicpu(hp2)) and
  1480. ((i<32-taicpu(hp1).oper[2]^.shifterop^.shiftimm) or
  1481. ((i=32-taicpu(hp1).oper[2]^.shifterop^.shiftimm) and
  1482. (taicpu(hp2).oper[2]^.shifterop^.shiftmode=SM_LSR))) then
  1483. begin
  1484. DebugMsg('Peephole AndLslXsr2And done', p);
  1485. taicpu(p).oper[0]^.reg:=taicpu(hp2).oper[0]^.reg;
  1486. asml.Remove(hp1);
  1487. asml.Remove(hp2);
  1488. hp1.free;
  1489. hp2.free;
  1490. result:=true;
  1491. exit;
  1492. end
  1493. {
  1494. and reg1,reg0,2^n-1
  1495. mov reg2,reg1, lsl imm1
  1496. =>
  1497. mov reg2,reg0, lsl imm1
  1498. if imm1>i
  1499. }
  1500. else if (i>32-taicpu(hp1).oper[2]^.shifterop^.shiftimm) and
  1501. not(RegModifiedBetween(taicpu(p).oper[1]^.reg, p, hp1)) then
  1502. begin
  1503. DebugMsg('Peephole AndLsl2Lsl done', p);
  1504. taicpu(hp1).oper[1]^.reg:=taicpu(p).oper[1]^.reg;
  1505. GetNextInstruction(p, hp1);
  1506. asml.Remove(p);
  1507. p.free;
  1508. p:=hp1;
  1509. result:=true;
  1510. exit;
  1511. end
  1512. end;
  1513. end;
  1514. {
  1515. change
  1516. and reg1, ...
  1517. mov reg2, reg1
  1518. to
  1519. and reg2, ...
  1520. }
  1521. if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  1522. (taicpu(p).ops>=3) and
  1523. RemoveSuperfluousMove(p, hp1, 'DataMov2Data') then
  1524. Result:=true;
  1525. end;
  1526. end.