aoptarm.pas 72 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838
  1. {
  2. Copyright (c) 1998-2020 by Jonas Maebe and Florian Klaempfl, members of the Free Pascal
  3. Development Team
  4. This unit implements an ARM optimizer object used commonly for ARM and AAarch64
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. Unit aoptarm;
  19. {$i fpcdefs.inc}
  20. { $define DEBUG_PREREGSCHEDULER}
  21. {$ifdef EXTDEBUG}
  22. {$define DEBUG_AOPTCPU}
  23. {$endif EXTDEBUG}
  24. Interface
  25. uses
  26. cgbase, cgutils, globtype, cpubase, aasmtai, aasmcpu,aopt, aoptobj;
  27. Type
  28. { while ARM and AAarch64 look not very similar at a first glance,
  29. several optimizations can be shared between both }
  30. TARMAsmOptimizer = class(TAsmOptimizer)
  31. procedure DebugMsg(const s : string; p : tai);
  32. function RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string): boolean;
  33. function RedundantMovProcess(var p: tai; var hp1: tai): boolean;
  34. function GetNextInstructionUsingReg(Current: tai; out Next: tai; const reg: TRegister): Boolean;
  35. {$ifdef AARCH64}
  36. function USxtOp2Op(var p, hp1: tai; shiftmode: tshiftmode): Boolean;
  37. {$endif AARCH64}
  38. function OptPreSBFXUBFX(var p: tai): Boolean;
  39. function OptPass1UXTB(var p: tai): Boolean;
  40. function OptPass1UXTH(var p: tai): Boolean;
  41. function OptPass1SXTB(var p: tai): Boolean;
  42. function OptPass1SXTH(var p: tai): Boolean;
  43. function OptPass1LDR(var p: tai): Boolean; virtual;
  44. function OptPass1STR(var p: tai): Boolean; virtual;
  45. function OptPass1And(var p: tai): Boolean; virtual;
  46. function OptPass2AND(var p: tai): Boolean;
  47. function OptPass2TST(var p: tai): Boolean;
  48. End;
  49. function MatchInstruction(const instr: tai; const op: TCommonAsmOps; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  50. function MatchInstruction(const instr: tai; const op: TAsmOp; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  51. {$ifdef AARCH64}
  52. function MatchInstruction(const instr: tai; const ops : array of TAsmOp; const postfix: TOpPostfixes): boolean;
  53. {$endif AARCH64}
  54. function MatchInstruction(const instr: tai; const op: TAsmOp; const postfix: TOpPostfixes): boolean;
  55. function RefsEqual(const r1, r2: treference): boolean;
  56. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  57. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
  58. function MatchOperand(const oper: TOper; const a: TCGInt): boolean; inline;
  59. Implementation
  60. uses
  61. cutils,verbose,globals,
  62. systems,
  63. cpuinfo,
  64. cgobj,procinfo,
  65. aasmbase,aasmdata,itcpugas;
  66. {$ifdef DEBUG_AOPTCPU}
  67. const
  68. SPeepholeOptimization: shortstring = 'Peephole Optimization: ';
  69. procedure TARMAsmOptimizer.DebugMsg(const s: string;p : tai);
  70. begin
  71. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  72. end;
  73. {$else DEBUG_AOPTCPU}
  74. { Empty strings help the optimizer to remove string concatenations that won't
  75. ever appear to the user on release builds. [Kit] }
  76. const
  77. SPeepholeOptimization = '';
  78. procedure TARMAsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  79. begin
  80. end;
  81. {$endif DEBUG_AOPTCPU}
  82. function MatchInstruction(const instr: tai; const op: TCommonAsmOps; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  83. begin
  84. result :=
  85. (instr.typ = ait_instruction) and
  86. ((op = []) or ((taicpu(instr).opcode<=LastCommonAsmOp) and (taicpu(instr).opcode in op))) and
  87. ((cond = []) or (taicpu(instr).condition in cond)) and
  88. ((postfix = []) or (taicpu(instr).oppostfix in postfix));
  89. end;
  90. function MatchInstruction(const instr: tai; const op: TAsmOp; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  91. begin
  92. result :=
  93. (instr.typ = ait_instruction) and
  94. (taicpu(instr).opcode = op) and
  95. ((cond = []) or (taicpu(instr).condition in cond)) and
  96. ((postfix = []) or (taicpu(instr).oppostfix in postfix));
  97. end;
  98. {$ifdef AARCH64}
  99. function MatchInstruction(const instr: tai; const ops : array of TAsmOp; const postfix: TOpPostfixes): boolean;
  100. var
  101. op : TAsmOp;
  102. begin
  103. result:=false;
  104. if instr.typ <> ait_instruction then
  105. exit;
  106. for op in ops do
  107. begin
  108. if (taicpu(instr).opcode = op) and
  109. ((postfix = []) or (taicpu(instr).oppostfix in postfix)) then
  110. begin
  111. result:=true;
  112. exit;
  113. end;
  114. end;
  115. end;
  116. {$endif AARCH64}
  117. function MatchInstruction(const instr: tai; const op: TAsmOp; const postfix: TOpPostfixes): boolean;
  118. begin
  119. result :=
  120. (instr.typ = ait_instruction) and
  121. (taicpu(instr).opcode = op) and
  122. ((postfix = []) or (taicpu(instr).oppostfix in postfix));
  123. end;
  124. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  125. begin
  126. result := (oper.typ = top_reg) and (oper.reg = reg);
  127. end;
  128. function RefsEqual(const r1, r2: treference): boolean;
  129. begin
  130. refsequal :=
  131. (r1.offset = r2.offset) and
  132. (r1.base = r2.base) and
  133. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  134. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  135. (r1.relsymbol = r2.relsymbol) and
  136. {$ifdef ARM}
  137. (r1.signindex = r2.signindex) and
  138. {$endif ARM}
  139. (r1.shiftimm = r2.shiftimm) and
  140. (r1.addressmode = r2.addressmode) and
  141. (r1.shiftmode = r2.shiftmode) and
  142. (r1.volatility=[]) and
  143. (r2.volatility=[]);
  144. end;
  145. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
  146. begin
  147. result := oper1.typ = oper2.typ;
  148. if result then
  149. case oper1.typ of
  150. top_const:
  151. Result:=oper1.val = oper2.val;
  152. top_reg:
  153. Result:=oper1.reg = oper2.reg;
  154. top_conditioncode:
  155. Result:=oper1.cc = oper2.cc;
  156. top_realconst:
  157. Result:=oper1.val_real = oper2.val_real;
  158. top_ref:
  159. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  160. else Result:=false;
  161. end
  162. end;
  163. function MatchOperand(const oper: TOper; const a: TCGInt): boolean; inline;
  164. begin
  165. result := (oper.typ = top_const) and (oper.val = a);
  166. end;
  167. {$ifdef AARCH64}
  168. function TARMAsmOptimizer.USxtOp2Op(var p,hp1: tai; shiftmode: tshiftmode): Boolean;
  169. var
  170. so: tshifterop;
  171. opoffset: Integer;
  172. begin
  173. Result:=false;
  174. if (taicpu(p).ops=2) and
  175. ((MatchInstruction(hp1, [A_ADD,A_SUB], [C_None], [PF_None,PF_S]) and
  176. (taicpu(hp1).ops=3) and
  177. MatchOperand(taicpu(hp1).oper[2]^, taicpu(p).oper[0]^.reg) and
  178. not(MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg))) or
  179. (MatchInstruction(hp1, [A_CMP,A_CMN], [C_None], [PF_None]) and
  180. (taicpu(hp1).ops=2) and
  181. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg))
  182. ) and
  183. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  184. { reg1 might not be modified inbetween }
  185. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  186. begin
  187. DebugMsg('Peephole '+gas_op2str[taicpu(p).opcode]+gas_op2str[taicpu(hp1).opcode]+'2'+gas_op2str[taicpu(hp1).opcode]+' done', p);
  188. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  189. if MatchInstruction(hp1, [A_CMP,A_CMN], [C_None], [PF_None]) then
  190. opoffset:=0
  191. else
  192. opoffset:=1;
  193. taicpu(hp1).loadReg(opoffset+1,taicpu(p).oper[1]^.reg);
  194. taicpu(hp1).ops:=opoffset+3;
  195. shifterop_reset(so);
  196. so.shiftmode:=shiftmode;
  197. so.shiftimm:=0;
  198. taicpu(hp1).loadshifterop(opoffset+2,so);
  199. result:=RemoveCurrentP(p);
  200. end;
  201. end;
  202. {$endif AARCH64}
  203. function TARMAsmOptimizer.GetNextInstructionUsingReg(Current: tai;
  204. Out Next: tai; const reg: TRegister): Boolean;
  205. var
  206. gniResult: Boolean;
  207. begin
  208. Next:=Current;
  209. Result := False;
  210. repeat
  211. gniResult:=GetNextInstruction(Next,Next);
  212. if gniResult and RegInInstruction(reg,Next) then
  213. { Found something }
  214. Exit(True);
  215. until not gniResult or
  216. not(cs_opt_level3 in current_settings.optimizerswitches) or
  217. (Next.typ<>ait_instruction) or
  218. is_calljmp(taicpu(Next).opcode)
  219. {$ifdef ARM}
  220. or RegModifiedByInstruction(NR_PC,Next)
  221. {$endif ARM}
  222. ;
  223. end;
  224. function TARMAsmOptimizer.RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string):boolean;
  225. var
  226. alloc,
  227. dealloc : tai_regalloc;
  228. hp1 : tai;
  229. begin
  230. Result:=false;
  231. if MatchInstruction(movp, A_MOV, [taicpu(p).condition], [PF_None]) and
  232. { We can't optimize if there is a shiftop }
  233. (taicpu(movp).ops=2) and
  234. MatchOperand(taicpu(movp).oper[1]^, taicpu(p).oper[0]^.reg) and
  235. { don't mess with moves to fp }
  236. (taicpu(movp).oper[0]^.reg<>current_procinfo.framepointer) and
  237. { the destination register of the mov might not be used beween p and movp }
  238. not(RegUsedBetween(taicpu(movp).oper[0]^.reg,p,movp)) and
  239. {$ifdef ARM}
  240. { PC should be changed only by moves }
  241. (taicpu(movp).oper[0]^.reg<>NR_PC) and
  242. { cb[n]z are thumb instructions which require specific registers, with no wide forms }
  243. (taicpu(p).opcode<>A_CBZ) and
  244. (taicpu(p).opcode<>A_CBNZ) and
  245. { There is a special requirement for MUL and MLA, oper[0] and oper[1] are not allowed to be the same }
  246. not (
  247. (taicpu(p).opcode in [A_MLA, A_MUL]) and
  248. (taicpu(p).oper[1]^.reg = taicpu(movp).oper[0]^.reg) and
  249. (current_settings.cputype < cpu_armv6)
  250. ) and
  251. {$endif ARM}
  252. { Take care to only do this for instructions which REALLY load to the first register.
  253. Otherwise
  254. str reg0, [reg1]
  255. mov reg2, reg0
  256. will be optimized to
  257. str reg2, [reg1]
  258. }
  259. RegLoadedWithNewValue(taicpu(p).oper[0]^.reg, p) then
  260. begin
  261. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(movp.Next));
  262. if assigned(dealloc) then
  263. begin
  264. DebugMsg('Peephole '+optimizer+' removed superfluous mov', movp);
  265. result:=true;
  266. { taicpu(p).oper[0]^.reg is not used anymore, try to find its allocation
  267. and remove it if possible }
  268. asml.Remove(dealloc);
  269. alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.previous));
  270. if assigned(alloc) then
  271. begin
  272. asml.Remove(alloc);
  273. alloc.free;
  274. dealloc.free;
  275. end
  276. else
  277. asml.InsertAfter(dealloc,p);
  278. AllocRegBetween(taicpu(movp).oper[0]^.reg,p,movp,UsedRegs);
  279. { finally get rid of the mov }
  280. taicpu(p).loadreg(0,taicpu(movp).oper[0]^.reg);
  281. { Remove preindexing and postindexing for LDR in some cases.
  282. For example:
  283. ldr reg2,[reg1, xxx]!
  284. mov reg1,reg2
  285. must be translated to:
  286. ldr reg1,[reg1, xxx]
  287. Preindexing must be removed there, since the same register is used as the base and as the target.
  288. Such case is not allowed for ARM CPU and produces crash. }
  289. if (taicpu(p).opcode = A_LDR) and (taicpu(p).oper[1]^.typ = top_ref)
  290. and (taicpu(movp).oper[0]^.reg = taicpu(p).oper[1]^.ref^.base)
  291. then
  292. taicpu(p).oper[1]^.ref^.addressmode:=AM_OFFSET;
  293. asml.remove(movp);
  294. movp.free;
  295. end;
  296. end;
  297. end;
  298. function TARMAsmOptimizer.RedundantMovProcess(var p: tai; var hp1: tai):boolean;
  299. var
  300. I: Integer;
  301. current_hp, next_hp: tai;
  302. LDRChange: Boolean;
  303. begin
  304. Result:=false;
  305. {
  306. change
  307. mov r1, r0
  308. add r1, r1, #1
  309. to
  310. add r1, r0, #1
  311. Todo: Make it work for mov+cmp too
  312. CAUTION! If this one is successful p might not be a mov instruction anymore!
  313. }
  314. if (taicpu(p).ops = 2) and
  315. (taicpu(p).oper[1]^.typ = top_reg) and
  316. (taicpu(p).oppostfix = PF_NONE) then
  317. begin
  318. if
  319. MatchInstruction(hp1, [A_ADD, A_ADC,
  320. {$ifdef ARM}
  321. A_RSB, A_RSC,
  322. {$endif ARM}
  323. A_SUB, A_SBC,
  324. A_AND, A_BIC, A_EOR, A_ORR, A_MOV, A_MVN],
  325. [taicpu(p).condition], []) and
  326. { MOV and MVN might only have 2 ops }
  327. (taicpu(hp1).ops >= 2) and
  328. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^.reg) and
  329. (taicpu(hp1).oper[1]^.typ = top_reg) and
  330. (
  331. (taicpu(hp1).ops = 2) or
  332. (taicpu(hp1).oper[2]^.typ in [top_reg, top_const, top_shifterop])
  333. ) and
  334. {$ifdef AARCH64}
  335. (taicpu(p).oper[1]^.reg<>NR_SP) and
  336. { in this case you have to transform it to movk or the like }
  337. (getsupreg(taicpu(p).oper[1]^.reg)<>RS_XZR) and
  338. {$endif AARCH64}
  339. not(RegUsedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  340. begin
  341. { When we get here we still don't know if the registers match }
  342. for I:=1 to 2 do
  343. {
  344. If the first loop was successful p will be replaced with hp1.
  345. The checks will still be ok, because all required information
  346. will also be in hp1 then.
  347. }
  348. if (taicpu(hp1).ops > I) and
  349. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[I]^.reg)
  350. {$ifdef ARM}
  351. { prevent certain combinations on thumb(2), this is only a safe approximation }
  352. and (not(GenerateThumbCode or GenerateThumb2Code) or
  353. ((getsupreg(taicpu(p).oper[1]^.reg)<>RS_R13) and
  354. (getsupreg(taicpu(p).oper[1]^.reg)<>RS_R15)))
  355. {$endif ARM}
  356. then
  357. begin
  358. DebugMsg('Peephole RedundantMovProcess done', hp1);
  359. taicpu(hp1).oper[I]^.reg := taicpu(p).oper[1]^.reg;
  360. if p<>hp1 then
  361. begin
  362. asml.remove(p);
  363. p.free;
  364. p:=hp1;
  365. Result:=true;
  366. end;
  367. end;
  368. if Result then Exit;
  369. end
  370. { Change: Change:
  371. mov r1, r0 mov r1, r0
  372. ... ...
  373. ldr/str r2, [r1, etc.] mov r2, r1
  374. To: To:
  375. ldr/str r2, [r0, etc.] mov r2, r0
  376. }
  377. else if (taicpu(p).condition = C_None) and (taicpu(p).oper[1]^.typ = top_reg)
  378. {$ifdef ARM}
  379. and not (getsupreg(taicpu(p).oper[0]^.reg) in [RS_PC, RS_R14, RS_STACK_POINTER_REG])
  380. and (getsupreg(taicpu(p).oper[1]^.reg) <> RS_PC)
  381. { Thumb does not support references with base and index one being SP }
  382. and (not(GenerateThumbCode) or (getsupreg(taicpu(p).oper[1]^.reg) <> RS_STACK_POINTER_REG))
  383. {$endif ARM}
  384. {$ifdef AARCH64}
  385. and (getsupreg(taicpu(p).oper[0]^.reg) <> RS_STACK_POINTER_REG)
  386. {$endif AARCH64}
  387. then
  388. begin
  389. current_hp := p;
  390. TransferUsedRegs(TmpUsedRegs);
  391. { Search local instruction block }
  392. while GetNextInstruction(current_hp, next_hp) and (next_hp <> BlockEnd) and (next_hp.typ = ait_instruction) do
  393. begin
  394. UpdateUsedRegs(TmpUsedRegs, tai(current_hp.Next));
  395. LDRChange := False;
  396. if (taicpu(next_hp).opcode in [A_LDR,A_STR]) and (taicpu(next_hp).ops = 2)
  397. {$ifdef AARCH64}
  398. { If r0 is the zero register, then this sequence of instructions will cause
  399. an access violation, but that's better than an assembler error caused by
  400. changing r0 to xzr inside the reference (Where it's illegal). [Kit] }
  401. and (getsupreg(taicpu(p).oper[1]^.reg) <> RS_XZR)
  402. {$endif AARCH64}
  403. then
  404. begin
  405. { Change the registers from r1 to r0 }
  406. if (taicpu(next_hp).oper[1]^.ref^.base = taicpu(p).oper[0]^.reg) and
  407. {$ifdef ARM}
  408. { This optimisation conflicts with something and raises
  409. an access violation - needs further investigation. [Kit] }
  410. (taicpu(next_hp).opcode <> A_LDR) and
  411. {$endif ARM}
  412. { Don't mess around with the base register if the
  413. reference is pre- or post-indexed }
  414. (taicpu(next_hp).oper[1]^.ref^.addressmode = AM_OFFSET) then
  415. begin
  416. taicpu(next_hp).oper[1]^.ref^.base := taicpu(p).oper[1]^.reg;
  417. LDRChange := True;
  418. end;
  419. if taicpu(next_hp).oper[1]^.ref^.index = taicpu(p).oper[0]^.reg then
  420. begin
  421. taicpu(next_hp).oper[1]^.ref^.index := taicpu(p).oper[1]^.reg;
  422. LDRChange := True;
  423. end;
  424. if LDRChange then
  425. DebugMsg('Peephole Optimization: ' + std_regname(taicpu(p).oper[0]^.reg) + ' = ' + std_regname(taicpu(p).oper[1]^.reg) + ' (MovLdr2Ldr 1)', next_hp);
  426. { Drop out if we're dealing with pre-indexed references }
  427. if (taicpu(next_hp).oper[1]^.ref^.addressmode = AM_PREINDEXED) and
  428. (
  429. RegInRef(taicpu(p).oper[0]^.reg, taicpu(next_hp).oper[1]^.ref^) or
  430. RegInRef(taicpu(p).oper[1]^.reg, taicpu(next_hp).oper[1]^.ref^)
  431. ) then
  432. begin
  433. { Remember to update register allocations }
  434. if LDRChange then
  435. AllocRegBetween(taicpu(p).oper[1]^.reg, p, next_hp, UsedRegs);
  436. Break;
  437. end;
  438. { The register being stored can be potentially changed (as long as it's not the stack pointer) }
  439. if (taicpu(next_hp).opcode = A_STR) and (getsupreg(taicpu(p).oper[1]^.reg) <> RS_STACK_POINTER_REG) and
  440. MatchOperand(taicpu(next_hp).oper[0]^, taicpu(p).oper[0]^.reg) then
  441. begin
  442. DebugMsg('Peephole Optimization: ' + std_regname(taicpu(p).oper[0]^.reg) + ' = ' + std_regname(taicpu(p).oper[1]^.reg) + ' (MovLdr2Ldr 2)', next_hp);
  443. taicpu(next_hp).oper[0]^.reg := taicpu(p).oper[1]^.reg;
  444. LDRChange := True;
  445. end;
  446. if LDRChange and (getsupreg(taicpu(p).oper[1]^.reg) <> RS_STACK_POINTER_REG) then
  447. begin
  448. AllocRegBetween(taicpu(p).oper[1]^.reg, p, next_hp, UsedRegs);
  449. if (taicpu(p).oppostfix = PF_None) and
  450. (
  451. (
  452. (taicpu(next_hp).opcode = A_LDR) and
  453. MatchOperand(taicpu(next_hp).oper[0]^, taicpu(p).oper[0]^.reg)
  454. ) or
  455. not RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, next_hp, TmpUsedRegs)
  456. ) and
  457. { Double-check to see if the old registers were actually
  458. changed (e.g. if the super registers matched, but not
  459. the sizes, they won't be changed). }
  460. (
  461. (taicpu(next_hp).opcode = A_LDR) or
  462. not RegInOp(taicpu(p).oper[0]^.reg, taicpu(next_hp).oper[0]^)
  463. ) and
  464. not RegInRef(taicpu(p).oper[0]^.reg, taicpu(next_hp).oper[1]^.ref^) then
  465. begin
  466. DebugMsg('Peephole Optimization: RedundantMovProcess 2a done', p);
  467. RemoveCurrentP(p);
  468. Result := True;
  469. Exit;
  470. end;
  471. end;
  472. end
  473. else if (taicpu(next_hp).opcode = A_MOV) and (taicpu(next_hp).oppostfix = PF_None) and
  474. (taicpu(next_hp).ops = 2) then
  475. begin
  476. if MatchOperand(taicpu(next_hp).oper[0]^, taicpu(p).oper[0]^.reg) then
  477. begin
  478. { mov r0,r1; mov r1,r1 - remove second MOV here so
  479. so "RedundantMovProcess 2b" doesn't get erroneously
  480. applied }
  481. if MatchOperand(taicpu(next_hp).oper[0]^, taicpu(next_hp).oper[1]^.reg) then
  482. begin
  483. DebugMsg(SPeepholeOptimization + 'Mov2None 2a done', next_hp);
  484. if (next_hp = hp1) then
  485. { Don't let hp1 become a dangling pointer }
  486. hp1 := nil;
  487. asml.Remove(next_hp);
  488. next_hp.Free;
  489. Continue;
  490. end;
  491. { Found another mov that writes entirely to the register }
  492. if RegUsedBetween(taicpu(p).oper[0]^.reg, p, next_hp) then
  493. begin
  494. { Register was used beforehand }
  495. if MatchOperand(taicpu(next_hp).oper[1]^, taicpu(p).oper[1]^.reg) then
  496. begin
  497. { This MOV is exactly the same as the first one.
  498. Since none of the registers have changed value
  499. at this point, we can remove it. }
  500. DebugMsg(SPeepholeOptimization + 'RedundantMovProcess 3a done', next_hp);
  501. if (next_hp = hp1) then
  502. { Don't let hp1 become a dangling pointer }
  503. hp1 := nil;
  504. asml.Remove(next_hp);
  505. next_hp.Free;
  506. { We still have the original p, so we can continue optimising;
  507. if it was -O2 or below, this instruction appeared immediately
  508. after the first MOV, so we're technically not looking more
  509. than one instruction ahead after it's removed! [Kit] }
  510. Continue;
  511. end
  512. else
  513. { Register changes value - drop out }
  514. Break;
  515. end;
  516. { We can delete the first MOV (only if the second MOV is unconditional) }
  517. {$ifdef ARM}
  518. if (taicpu(p).oppostfix = PF_None) and
  519. (taicpu(next_hp).condition = C_None) then
  520. {$endif ARM}
  521. begin
  522. DebugMsg('Peephole Optimization: RedundantMovProcess 2b done', p);
  523. RemoveCurrentP(p);
  524. Result := True;
  525. end;
  526. Exit;
  527. end
  528. else if MatchOperand(taicpu(next_hp).oper[1]^, taicpu(p).oper[0]^.reg) then
  529. begin
  530. if MatchOperand(taicpu(next_hp).oper[0]^, taicpu(p).oper[1]^.reg)
  531. { Be careful - if the entire register is not used, removing this
  532. instruction will leave the unused part uninitialised }
  533. {$ifdef AARCH64}
  534. and (getsubreg(taicpu(p).oper[1]^.reg) = R_SUBQ)
  535. {$endif AARCH64}
  536. then
  537. begin
  538. { Instruction will become mov r1,r1 }
  539. DebugMsg(SPeepholeOptimization + 'Mov2None 2 done', next_hp);
  540. { Allocate r1 between the instructions; not doing
  541. so may cause problems when removing superfluous
  542. MOVs later (i38055) }
  543. AllocRegBetween(taicpu(p).oper[1]^.reg, p, next_hp, UsedRegs);
  544. if (next_hp = hp1) then
  545. { Don't let hp1 become a dangling pointer }
  546. hp1 := nil;
  547. asml.Remove(next_hp);
  548. next_hp.Free;
  549. Continue;
  550. end;
  551. { Change the old register (checking the first operand again
  552. forces it to be left alone if the full register is not
  553. used, lest mov w1,w1 gets optimised out by mistake. [Kit] }
  554. {$ifdef AARCH64}
  555. if not MatchOperand(taicpu(next_hp).oper[0]^, taicpu(p).oper[1]^.reg) then
  556. {$endif AARCH64}
  557. begin
  558. DebugMsg(SPeepholeOptimization + std_regname(taicpu(p).oper[0]^.reg) + ' = ' + std_regname(taicpu(p).oper[1]^.reg) + ' (MovMov2Mov 2)', next_hp);
  559. taicpu(next_hp).oper[1]^.reg := taicpu(p).oper[1]^.reg;
  560. AllocRegBetween(taicpu(p).oper[1]^.reg, p, next_hp, UsedRegs);
  561. { If this was the only reference to the old register,
  562. then we can remove the original MOV now }
  563. if (taicpu(p).oppostfix = PF_None) and
  564. { A bit of a hack - sometimes registers aren't tracked properly, so do not
  565. remove if the register was apparently not allocated when its value is
  566. first set at the MOV command (this is especially true for the stack
  567. register). [Kit] }
  568. (getsupreg(taicpu(p).oper[1]^.reg) <> RS_STACK_POINTER_REG) and
  569. RegInUsedRegs(taicpu(p).oper[0]^.reg, UsedRegs) and
  570. not RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, next_hp, TmpUsedRegs) then
  571. begin
  572. DebugMsg(SPeepholeOptimization + 'RedundantMovProcess 2c done', p);
  573. RemoveCurrentP(p);
  574. Result := True;
  575. Exit;
  576. end;
  577. end;
  578. end;
  579. end;
  580. { On low optimisation settions, don't search more than one instruction ahead }
  581. if not(cs_opt_level3 in current_settings.optimizerswitches) or
  582. { Stop at procedure calls and jumps }
  583. is_calljmp(taicpu(next_hp).opcode) or
  584. { If the read register has changed value, or the MOV
  585. destination register has been used, drop out }
  586. RegInInstruction(taicpu(p).oper[0]^.reg, next_hp) or
  587. RegModifiedByInstruction(taicpu(p).oper[1]^.reg, next_hp) then
  588. Break;
  589. current_hp := next_hp;
  590. end;
  591. end;
  592. end;
  593. end;
  594. function TARMAsmOptimizer.OptPass1UXTB(var p : tai) : Boolean;
  595. var
  596. hp1, hp2: tai;
  597. so: tshifterop;
  598. begin
  599. Result:=false;
  600. if GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) then
  601. begin
  602. {
  603. change
  604. uxtb reg2,reg1
  605. strb reg2,[...]
  606. dealloc reg2
  607. to
  608. strb reg1,[...]
  609. }
  610. if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
  611. (taicpu(p).ops=2) and
  612. MatchInstruction(hp1, A_STR, [C_None], [PF_B]) and
  613. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
  614. { the reference in strb might not use reg2 }
  615. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  616. { reg1 might not be modified inbetween }
  617. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  618. begin
  619. DebugMsg('Peephole UxtbStrb2Strb done', p);
  620. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  621. result:=RemoveCurrentP(p);
  622. end
  623. {
  624. change
  625. uxtb reg2,reg1
  626. uxth reg3,reg2
  627. dealloc reg2
  628. to
  629. uxtb reg3,reg1
  630. }
  631. else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
  632. (taicpu(p).ops=2) and
  633. MatchInstruction(hp1, A_UXTH, [C_None], [PF_None]) and
  634. (taicpu(hp1).ops = 2) and
  635. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  636. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  637. { reg1 might not be modified inbetween }
  638. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  639. begin
  640. DebugMsg('Peephole UxtbUxth2Uxtb done', p);
  641. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  642. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  643. asml.remove(hp1);
  644. hp1.free;
  645. result:=true;
  646. end
  647. {
  648. change
  649. uxtb reg2,reg1
  650. uxtb reg3,reg2
  651. dealloc reg2
  652. to
  653. uxtb reg3,reg1
  654. }
  655. else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
  656. (taicpu(p).ops=2) and
  657. MatchInstruction(hp1, A_UXTB, [C_None], [PF_None]) and
  658. (taicpu(hp1).ops = 2) and
  659. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  660. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  661. { reg1 might not be modified inbetween }
  662. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  663. begin
  664. DebugMsg('Peephole UxtbUxtb2Uxtb done', p);
  665. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  666. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  667. asml.remove(hp1);
  668. hp1.free;
  669. result:=true;
  670. end
  671. {
  672. change
  673. uxtb reg2,reg1
  674. and reg3,reg2,#0x*FF
  675. dealloc reg2
  676. to
  677. uxtb reg3,reg1
  678. }
  679. else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
  680. (taicpu(p).ops=2) and
  681. MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
  682. (taicpu(hp1).ops=3) and
  683. (taicpu(hp1).oper[2]^.typ=top_const) and
  684. ((taicpu(hp1).oper[2]^.val and $FF)=$FF) and
  685. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  686. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  687. { reg1 might not be modified inbetween }
  688. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  689. begin
  690. DebugMsg('Peephole UxtbAndImm2Uxtb done', p);
  691. taicpu(hp1).opcode:=A_UXTB;
  692. taicpu(hp1).ops:=2;
  693. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  694. result:=RemoveCurrentP(p);
  695. end
  696. {$ifdef AARCH64}
  697. else if USxtOp2Op(p,hp1,SM_UXTB) then
  698. Result:=true
  699. {$endif AARCH64}
  700. else if RemoveSuperfluousMove(p, hp1, 'UxtbMov2Uxtb') then
  701. Result:=true;
  702. end;
  703. end;
  704. function TARMAsmOptimizer.OptPass1UXTH(var p : tai) : Boolean;
  705. var
  706. hp1: tai;
  707. so: tshifterop;
  708. begin
  709. Result:=false;
  710. if GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) then
  711. begin
  712. {
  713. change
  714. uxth reg2,reg1
  715. strh reg2,[...]
  716. dealloc reg2
  717. to
  718. strh reg1,[...]
  719. }
  720. if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
  721. (taicpu(p).ops=2) and
  722. MatchInstruction(hp1, A_STR, [C_None], [PF_H]) and
  723. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  724. { the reference in strb might not use reg2 }
  725. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  726. { reg1 might not be modified inbetween }
  727. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  728. begin
  729. DebugMsg('Peephole UXTHStrh2Strh done', p);
  730. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  731. result:=RemoveCurrentP(p);
  732. end
  733. {
  734. change
  735. uxth reg2,reg1
  736. uxth reg3,reg2
  737. dealloc reg2
  738. to
  739. uxth reg3,reg1
  740. }
  741. else if MatchInstruction(p, A_UXTH, [C_None], [PF_None]) and
  742. (taicpu(p).ops=2) and
  743. MatchInstruction(hp1, A_UXTH, [C_None], [PF_None]) and
  744. (taicpu(hp1).ops=2) and
  745. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  746. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  747. { reg1 might not be modified inbetween }
  748. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  749. begin
  750. DebugMsg('Peephole UxthUxth2Uxth done', p);
  751. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  752. taicpu(hp1).opcode:=A_UXTH;
  753. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  754. result:=RemoveCurrentP(p);
  755. end
  756. {
  757. change
  758. uxth reg2,reg1
  759. and reg3,reg2,#65535
  760. dealloc reg2
  761. to
  762. uxth reg3,reg1
  763. }
  764. else if MatchInstruction(p, A_UXTH, [C_None], [PF_None]) and
  765. (taicpu(p).ops=2) and
  766. MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
  767. (taicpu(hp1).ops=3) and
  768. (taicpu(hp1).oper[2]^.typ=top_const) and
  769. ((taicpu(hp1).oper[2]^.val and $FFFF)=$FFFF) and
  770. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  771. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  772. { reg1 might not be modified inbetween }
  773. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  774. begin
  775. DebugMsg('Peephole UxthAndImm2Uxth done', p);
  776. taicpu(hp1).opcode:=A_UXTH;
  777. taicpu(hp1).ops:=2;
  778. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  779. result:=RemoveCurrentP(p);
  780. end
  781. {$ifdef AARCH64}
  782. else if USxtOp2Op(p,hp1,SM_UXTH) then
  783. Result:=true
  784. {$endif AARCH64}
  785. else if RemoveSuperfluousMove(p, hp1, 'UxthMov2Data') then
  786. Result:=true;
  787. end;
  788. end;
  789. function TARMAsmOptimizer.OptPass1SXTB(var p : tai) : Boolean;
  790. var
  791. hp1, hp2: tai;
  792. so: tshifterop;
  793. begin
  794. Result:=false;
  795. if GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) then
  796. begin
  797. {
  798. change
  799. sxtb reg2,reg1
  800. strb reg2,[...]
  801. dealloc reg2
  802. to
  803. strb reg1,[...]
  804. }
  805. if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
  806. (taicpu(p).ops=2) and
  807. MatchInstruction(hp1, A_STR, [C_None], [PF_B]) and
  808. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
  809. { the reference in strb might not use reg2 }
  810. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  811. { reg1 might not be modified inbetween }
  812. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  813. begin
  814. DebugMsg('Peephole SxtbStrb2Strb done', p);
  815. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  816. result:=RemoveCurrentP(p);
  817. end
  818. {
  819. change
  820. sxtb reg2,reg1
  821. sxth reg3,reg2
  822. dealloc reg2
  823. to
  824. sxtb reg3,reg1
  825. }
  826. else if MatchInstruction(p, A_SXTB, [C_None], [PF_None]) and
  827. (taicpu(p).ops=2) and
  828. MatchInstruction(hp1, A_SXTH, [C_None], [PF_None]) and
  829. (taicpu(hp1).ops = 2) and
  830. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  831. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  832. { reg1 might not be modified inbetween }
  833. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  834. begin
  835. DebugMsg('Peephole SxtbSxth2Sxtb done', p);
  836. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  837. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  838. asml.remove(hp1);
  839. hp1.free;
  840. result:=true;
  841. end
  842. {
  843. change
  844. sxtb reg2,reg1
  845. sxtb reg3,reg2
  846. dealloc reg2
  847. to
  848. uxtb reg3,reg1
  849. }
  850. else if MatchInstruction(p, A_SXTB, [C_None], [PF_None]) and
  851. (taicpu(p).ops=2) and
  852. MatchInstruction(hp1, A_SXTB, [C_None], [PF_None]) and
  853. (taicpu(hp1).ops = 2) and
  854. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  855. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  856. { reg1 might not be modified inbetween }
  857. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  858. begin
  859. DebugMsg('Peephole SxtbSxtb2Sxtb done', p);
  860. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  861. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  862. asml.remove(hp1);
  863. hp1.free;
  864. result:=true;
  865. end
  866. {
  867. change
  868. sxtb reg2,reg1
  869. and reg3,reg2,#0x*FF
  870. dealloc reg2
  871. to
  872. uxtb reg3,reg1
  873. }
  874. else if MatchInstruction(p, A_SXTB, [C_None], [PF_None]) and
  875. (taicpu(p).ops=2) and
  876. MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
  877. (taicpu(hp1).ops=3) and
  878. (taicpu(hp1).oper[2]^.typ=top_const) and
  879. ((taicpu(hp1).oper[2]^.val and $FF)=$FF) and
  880. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  881. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  882. { reg1 might not be modified inbetween }
  883. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  884. begin
  885. DebugMsg('Peephole SxtbAndImm2Uxtb done', p);
  886. taicpu(hp1).opcode:=A_UXTB;
  887. taicpu(hp1).ops:=2;
  888. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  889. result:=RemoveCurrentP(p);
  890. end
  891. {$ifdef AARCH64}
  892. else if USxtOp2Op(p,hp1,SM_SXTB) then
  893. Result:=true
  894. {$endif AARCH64}
  895. else if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  896. RemoveSuperfluousMove(p, hp1, 'SxtbMov2Sxtb') then
  897. Result:=true;
  898. end;
  899. end;
  900. function TARMAsmOptimizer.OptPass1SXTH(var p : tai) : Boolean;
  901. var
  902. hp1: tai;
  903. so: tshifterop;
  904. begin
  905. Result:=false;
  906. if GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) then
  907. begin
  908. {
  909. change
  910. sxth reg2,reg1
  911. strh reg2,[...]
  912. dealloc reg2
  913. to
  914. strh reg1,[...]
  915. }
  916. if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
  917. (taicpu(p).ops=2) and
  918. MatchInstruction(hp1, A_STR, [C_None], [PF_H]) and
  919. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  920. { the reference in strb might not use reg2 }
  921. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  922. { reg1 might not be modified inbetween }
  923. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  924. begin
  925. DebugMsg('Peephole SxthStrh2Strh done', p);
  926. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  927. result:=RemoveCurrentP(p);
  928. end
  929. {
  930. change
  931. sxth reg2,reg1
  932. sxth reg3,reg2
  933. dealloc reg2
  934. to
  935. sxth reg3,reg1
  936. }
  937. else if MatchInstruction(p, A_SXTH, [C_None], [PF_None]) and
  938. (taicpu(p).ops=2) and
  939. MatchInstruction(hp1, A_SXTH, [C_None], [PF_None]) and
  940. (taicpu(hp1).ops=2) and
  941. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  942. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  943. { reg1 might not be modified inbetween }
  944. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  945. begin
  946. DebugMsg('Peephole SxthSxth2Sxth done', p);
  947. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  948. taicpu(hp1).opcode:=A_SXTH;
  949. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  950. result:=RemoveCurrentP(p);
  951. end
  952. {$ifdef AARCH64}
  953. {
  954. change
  955. sxth reg2,reg1
  956. sxtw reg3,reg2
  957. dealloc reg2
  958. to
  959. sxth reg3,reg1
  960. }
  961. else if MatchInstruction(p, A_SXTH, [C_None], [PF_None]) and
  962. (taicpu(p).ops=2) and
  963. MatchInstruction(hp1, A_SXTW, [C_None], [PF_None]) and
  964. (taicpu(hp1).ops=2) and
  965. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  966. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  967. { reg1 might not be modified inbetween }
  968. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  969. begin
  970. DebugMsg('Peephole SxthSxtw2Sxth done', p);
  971. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  972. taicpu(hp1).opcode:=A_SXTH;
  973. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  974. result:=RemoveCurrentP(p);
  975. end
  976. {$endif AARCH64}
  977. {
  978. change
  979. sxth reg2,reg1
  980. and reg3,reg2,#65535
  981. dealloc reg2
  982. to
  983. uxth reg3,reg1
  984. }
  985. else if MatchInstruction(p, A_SXTH, [C_None], [PF_None]) and
  986. (taicpu(p).ops=2) and
  987. MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
  988. (taicpu(hp1).ops=3) and
  989. (taicpu(hp1).oper[2]^.typ=top_const) and
  990. ((taicpu(hp1).oper[2]^.val and $FFFF)=$FFFF) and
  991. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  992. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  993. { reg1 might not be modified inbetween }
  994. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  995. begin
  996. DebugMsg('Peephole SxthAndImm2Uxth done', p);
  997. taicpu(hp1).opcode:=A_UXTH;
  998. taicpu(hp1).ops:=2;
  999. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  1000. result:=RemoveCurrentP(p);
  1001. end
  1002. {$ifdef AARCH64}
  1003. else if USxtOp2Op(p,hp1,SM_SXTH) then
  1004. Result:=true
  1005. {$endif AARCH64}
  1006. else if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  1007. RemoveSuperfluousMove(p, hp1, 'SxthMov2Sxth') then
  1008. Result:=true;
  1009. end;
  1010. end;
  1011. function TARMAsmOptimizer.OptPreSBFXUBFX(var p: tai): Boolean;
  1012. begin
  1013. Result := False;
  1014. { Convert:
  1015. s/ubfx reg1,reg2,#0,#64 (or #32 for 32-bit registers)
  1016. To:
  1017. mov reg1,reg2
  1018. }
  1019. if (taicpu(p).oper[2]^.val = 0) and
  1020. {$ifdef AARCH64}
  1021. (
  1022. (
  1023. (getsubreg(taicpu(p).oper[0]^.reg) = R_SUBQ) and
  1024. (taicpu(p).oper[3]^.val = 64)
  1025. ) or
  1026. (
  1027. (getsubreg(taicpu(p).oper[0]^.reg) = R_SUBD) and
  1028. (taicpu(p).oper[3]^.val = 32)
  1029. )
  1030. )
  1031. {$else AARCH64}
  1032. (taicpu(p).oper[3]^.val = 32)
  1033. {$endif AARCH64}
  1034. then
  1035. begin
  1036. DebugMsg(SPeepholeOptimization + 'SBFX or UBFX -> MOV (full bitfield extract)', p);
  1037. taicpu(p).opcode := A_MOV;
  1038. taicpu(p).ops := 2;
  1039. taicpu(p).clearop(2);
  1040. taicpu(p).clearop(3);
  1041. Result := True;
  1042. Exit;
  1043. end;
  1044. end;
  1045. function TARMAsmOptimizer.OptPass1LDR(var p : tai) : Boolean;
  1046. var
  1047. hp1: tai;
  1048. Reference: TReference;
  1049. NewOp: TAsmOp;
  1050. begin
  1051. Result := False;
  1052. if (taicpu(p).ops <> 2) or (taicpu(p).condition <> C_None) then
  1053. Exit;
  1054. Reference := taicpu(p).oper[1]^.ref^;
  1055. if (Reference.addressmode = AM_OFFSET) and
  1056. not RegInRef(taicpu(p).oper[0]^.reg, Reference) and
  1057. { Delay calling GetNextInstruction for as long as possible }
  1058. GetNextInstruction(p, hp1) and
  1059. (hp1.typ = ait_instruction) and
  1060. (taicpu(hp1).condition = C_None) and
  1061. (taicpu(hp1).oppostfix = taicpu(p).oppostfix) then
  1062. begin
  1063. if (taicpu(hp1).opcode = A_STR) and
  1064. RefsEqual(taicpu(hp1).oper[1]^.ref^, Reference) and
  1065. (getregtype(taicpu(p).oper[0]^.reg) = getregtype(taicpu(hp1).oper[0]^.reg)) then
  1066. begin
  1067. { With:
  1068. ldr reg1,[ref]
  1069. str reg2,[ref]
  1070. If reg1 = reg2, Remove str
  1071. }
  1072. if taicpu(p).oper[0]^.reg = taicpu(hp1).oper[0]^.reg then
  1073. begin
  1074. DebugMsg(SPeepholeOptimization + 'Removed redundant store instruction (load/store -> load/nop)', hp1);
  1075. RemoveInstruction(hp1);
  1076. Result := True;
  1077. Exit;
  1078. end;
  1079. end
  1080. else if (taicpu(hp1).opcode = A_LDR) and
  1081. RefsEqual(taicpu(hp1).oper[1]^.ref^, Reference) then
  1082. begin
  1083. { With:
  1084. ldr reg1,[ref]
  1085. ldr reg2,[ref]
  1086. If reg1 = reg2, delete the second ldr
  1087. If reg1 <> reg2, changing the 2nd ldr to a mov might introduce
  1088. a dependency, but it will likely open up new optimisations, so
  1089. do it for now and handle any new dependencies later.
  1090. }
  1091. if taicpu(p).oper[0]^.reg = taicpu(hp1).oper[0]^.reg then
  1092. begin
  1093. DebugMsg(SPeepholeOptimization + 'Removed duplicate load instruction (load/load -> load/nop)', hp1);
  1094. RemoveInstruction(hp1);
  1095. Result := True;
  1096. Exit;
  1097. end
  1098. else if
  1099. (getregtype(taicpu(p).oper[0]^.reg) = R_INTREGISTER) and
  1100. (getregtype(taicpu(hp1).oper[0]^.reg) = R_INTREGISTER) and
  1101. (getsubreg(taicpu(p).oper[0]^.reg) = getsubreg(taicpu(hp1).oper[0]^.reg)) then
  1102. begin
  1103. DebugMsg(SPeepholeOptimization + 'Changed second ldr' + oppostfix2str[taicpu(hp1).oppostfix] + ' to mov (load/load -> load/move)', hp1);
  1104. taicpu(hp1).opcode := A_MOV;
  1105. taicpu(hp1).oppostfix := PF_None;
  1106. taicpu(hp1).loadreg(1, taicpu(p).oper[0]^.reg);
  1107. AllocRegBetween(taicpu(p).oper[0]^.reg, p, hp1, UsedRegs);
  1108. Result := True;
  1109. Exit;
  1110. end;
  1111. end;
  1112. end;
  1113. end;
  1114. function TARMAsmOptimizer.OptPass1STR(var p : tai) : Boolean;
  1115. var
  1116. hp1: tai;
  1117. Reference: TReference;
  1118. SizeMismatch: Boolean;
  1119. SrcReg, DstReg: TRegister;
  1120. NewOp: TAsmOp;
  1121. begin
  1122. Result := False;
  1123. if (taicpu(p).ops <> 2) or (taicpu(p).condition <> C_None) then
  1124. Exit;
  1125. Reference := taicpu(p).oper[1]^.ref^;
  1126. if (Reference.addressmode = AM_OFFSET) and
  1127. not RegInRef(taicpu(p).oper[0]^.reg, Reference) and
  1128. { Delay calling GetNextInstruction for as long as possible }
  1129. GetNextInstruction(p, hp1) and
  1130. (hp1.typ = ait_instruction) and
  1131. (taicpu(hp1).condition = C_None) and
  1132. (taicpu(hp1).oppostfix = taicpu(p).oppostfix) and
  1133. (taicpu(hp1).ops>0) and (taicpu(hp1).oper[0]^.typ=top_reg) then
  1134. begin
  1135. { Saves constant dereferencing and makes it easier to change the size if necessary }
  1136. SrcReg := taicpu(p).oper[0]^.reg;
  1137. DstReg := taicpu(hp1).oper[0]^.reg;
  1138. if (taicpu(hp1).opcode = A_LDR) and
  1139. RefsEqual(taicpu(hp1).oper[1]^.ref^, Reference) and
  1140. (taicpu(hp1).oper[1]^.ref^.volatility=[]) and
  1141. (
  1142. (taicpu(hp1).oppostfix = taicpu(p).oppostfix) or
  1143. ((taicpu(p).oppostfix = PF_B) and (taicpu(hp1).oppostfix = PF_SB)) or
  1144. ((taicpu(p).oppostfix = PF_H) and (taicpu(hp1).oppostfix = PF_SH))
  1145. {$ifdef AARCH64}
  1146. or ((taicpu(p).oppostfix = PF_W) and (taicpu(hp1).oppostfix = PF_SW))
  1147. {$endif AARCH64}
  1148. ) then
  1149. begin
  1150. { With:
  1151. str reg1,[ref]
  1152. ldr reg2,[ref]
  1153. If reg1 = reg2, Remove ldr.
  1154. If reg1 <> reg2, replace ldr with "mov reg2,reg1"
  1155. }
  1156. if (SrcReg = DstReg) and
  1157. { e.g. the ldrb in strb/ldrb is not a null operation as it clears the upper 24 bits }
  1158. (taicpu(p).oppostfix=PF_None) then
  1159. begin
  1160. DebugMsg(SPeepholeOptimization + 'Removed redundant load instruction (store/load -> store/nop)', hp1);
  1161. RemoveInstruction(hp1);
  1162. Result := True;
  1163. Exit;
  1164. end
  1165. else if (getregtype(SrcReg) = R_INTREGISTER) and
  1166. (getregtype(DstReg) = R_INTREGISTER) and
  1167. (getsubreg(SrcReg) = getsubreg(DstReg)) then
  1168. begin
  1169. NewOp:=A_NONE;
  1170. if taicpu(hp1).oppostfix=PF_None then
  1171. NewOp:=A_MOV
  1172. else
  1173. {$ifdef ARM}
  1174. if (current_settings.cputype < cpu_armv6) then
  1175. begin
  1176. { The zero- and sign-extension operations were only
  1177. introduced under ARMv6 }
  1178. case taicpu(hp1).oppostfix of
  1179. PF_B:
  1180. begin
  1181. { The if-block afterwards will set the middle operand to the correct register }
  1182. taicpu(hp1).allocate_oper(3);
  1183. taicpu(hp1).ops := 3;
  1184. taicpu(hp1).loadconst(2, $FF);
  1185. NewOp := A_AND;
  1186. end;
  1187. PF_H:
  1188. { ARMv5 and under doesn't have a concise way of storing the immediate $FFFF, so leave alone };
  1189. PF_SB,
  1190. PF_SH:
  1191. { Do nothing - can't easily encode sign-extensions };
  1192. else
  1193. InternalError(2021043002);
  1194. end;
  1195. end
  1196. else
  1197. {$endif ARM}
  1198. case taicpu(hp1).oppostfix of
  1199. PF_B:
  1200. NewOp := A_UXTB;
  1201. PF_SB:
  1202. NewOp := A_SXTB;
  1203. PF_H:
  1204. NewOp := A_UXTH;
  1205. PF_SH:
  1206. NewOp := A_SXTH;
  1207. {$ifdef AARCH64}
  1208. PF_SW:
  1209. NewOp := A_SXTW;
  1210. PF_W:
  1211. NewOp := A_MOV;
  1212. {$endif AARCH64}
  1213. else
  1214. InternalError(2021043001);
  1215. end;
  1216. if (NewOp<>A_None) then
  1217. begin
  1218. DebugMsg(SPeepholeOptimization + 'Changed ldr' + oppostfix2str[taicpu(hp1).oppostfix] + ' to ' + gas_op2str[NewOp] + ' (store/load -> store/move)', hp1);
  1219. taicpu(hp1).oppostfix := PF_None;
  1220. taicpu(hp1).opcode := NewOp;
  1221. taicpu(hp1).loadreg(1, SrcReg);
  1222. AllocRegBetween(SrcReg, p, hp1, UsedRegs);
  1223. Result := True;
  1224. Exit;
  1225. end;
  1226. end
  1227. end
  1228. else if (taicpu(hp1).opcode = A_STR) and
  1229. RefsEqual(taicpu(hp1).oper[1]^.ref^, Reference) then
  1230. begin
  1231. { With:
  1232. str reg1,[ref]
  1233. str reg2,[ref]
  1234. If reg1 <> reg2, delete the first str
  1235. IF reg1 = reg2, delete the second str
  1236. }
  1237. if (SrcReg = DstReg) and (taicpu(hp1).oper[1]^.ref^.volatility=[]) then
  1238. begin
  1239. DebugMsg(SPeepholeOptimization + 'Removed duplicate store instruction (store/store -> store/nop)', hp1);
  1240. RemoveInstruction(hp1);
  1241. Result := True;
  1242. Exit;
  1243. end
  1244. else if
  1245. { Registers same byte size? }
  1246. (tcgsize2size[reg_cgsize(SrcReg)] = tcgsize2size[reg_cgsize(DstReg)]) and
  1247. (taicpu(p).oper[1]^.ref^.volatility=[]) then
  1248. begin
  1249. DebugMsg(SPeepholeOptimization + 'Removed dominated store instruction (store/store -> nop/store)', p);
  1250. RemoveCurrentP(p, hp1);
  1251. Result := True;
  1252. Exit;
  1253. end;
  1254. end;
  1255. end;
  1256. end;
  1257. function TARMAsmOptimizer.OptPass1And(var p : tai) : Boolean;
  1258. var
  1259. hp1, hp2: tai;
  1260. i: longint;
  1261. begin
  1262. Result:=false;
  1263. {
  1264. optimize
  1265. and reg2,reg1,const1
  1266. ...
  1267. }
  1268. if (taicpu(p).ops>2) and
  1269. (taicpu(p).oper[1]^.typ = top_reg) and
  1270. (taicpu(p).oper[2]^.typ = top_const) then
  1271. begin
  1272. {
  1273. change
  1274. and reg2,reg1,const1
  1275. ...
  1276. and reg3,reg2,const2
  1277. to
  1278. and reg3,reg1,(const1 and const2)
  1279. }
  1280. if GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1281. MatchInstruction(hp1, A_AND, [taicpu(p).condition], [PF_None]) and
  1282. RegEndOfLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  1283. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1284. (taicpu(hp1).oper[2]^.typ = top_const)
  1285. {$ifdef AARCH64}
  1286. and ((((getsubreg(taicpu(p).oper[0]^.reg)=R_SUBQ) and is_shifter_const(taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val,OS_64)) or
  1287. ((getsubreg(taicpu(p).oper[0]^.reg)=R_SUBL) and is_shifter_const(taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val,OS_32))
  1288. ) or
  1289. ((taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val)=0))
  1290. {$endif AARCH64}
  1291. then
  1292. begin
  1293. if not(RegUsedBetween(taicpu(hp1).oper[0]^.reg,p,hp1)) then
  1294. begin
  1295. DebugMsg('Peephole AndAnd2And done', p);
  1296. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  1297. if (taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val)=0 then
  1298. begin
  1299. DebugMsg('Peephole AndAnd2Mov0 1 done', p);
  1300. taicpu(p).opcode:=A_MOV;
  1301. taicpu(p).ops:=2;
  1302. taicpu(p).loadConst(1,0);
  1303. taicpu(p).oppostfix:=taicpu(hp1).oppostfix;
  1304. end
  1305. else
  1306. begin
  1307. DebugMsg('Peephole AndAnd2And 1 done', p);
  1308. taicpu(p).loadConst(2,taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val);
  1309. taicpu(p).oppostfix:=taicpu(hp1).oppostfix;
  1310. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  1311. end;
  1312. asml.remove(hp1);
  1313. hp1.free;
  1314. Result:=true;
  1315. exit;
  1316. end
  1317. else if not(RegUsedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1318. begin
  1319. if (taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val)=0 then
  1320. begin
  1321. DebugMsg('Peephole AndAnd2Mov0 2 done', hp1);
  1322. taicpu(hp1).opcode:=A_MOV;
  1323. taicpu(hp1).loadConst(1,0);
  1324. taicpu(hp1).ops:=2;
  1325. taicpu(hp1).oppostfix:=taicpu(p).oppostfix;
  1326. end
  1327. else
  1328. begin
  1329. DebugMsg('Peephole AndAnd2And 2 done', hp1);
  1330. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  1331. taicpu(hp1).loadConst(2,taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val);
  1332. taicpu(hp1).oppostfix:=taicpu(p).oppostfix;
  1333. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  1334. end;
  1335. GetNextInstruction(p, hp1);
  1336. RemoveCurrentP(p);
  1337. p:=hp1;
  1338. Result:=true;
  1339. exit;
  1340. end;
  1341. end
  1342. {
  1343. change
  1344. and reg2,reg1,$xxxxxxFF
  1345. strb reg2,[...]
  1346. dealloc reg2
  1347. to
  1348. strb reg1,[...]
  1349. }
  1350. else if ((taicpu(p).oper[2]^.val and $FF) = $FF) and
  1351. MatchInstruction(p, A_AND, [C_None], [PF_None]) and
  1352. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1353. MatchInstruction(hp1, A_STR, [C_None], [PF_B]) and
  1354. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
  1355. { the reference in strb might not use reg2 }
  1356. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  1357. { reg1 might not be modified inbetween }
  1358. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1359. begin
  1360. DebugMsg('Peephole AndStrb2Strb done', p);
  1361. {$ifdef AARCH64}
  1362. taicpu(hp1).loadReg(0,newreg(R_INTREGISTER,getsupreg(taicpu(p).oper[1]^.reg),R_SUBD));
  1363. {$else AARCH64}
  1364. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  1365. {$endif AARCH64}
  1366. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  1367. RemoveCurrentP(p);
  1368. result:=true;
  1369. exit;
  1370. end
  1371. {
  1372. change
  1373. and reg2,reg1,255
  1374. uxtb/uxth reg3,reg2
  1375. dealloc reg2
  1376. to
  1377. and reg3,reg1,x
  1378. }
  1379. else if MatchInstruction(p, A_AND, [C_None], [PF_None]) and
  1380. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1381. ((((taicpu(p).oper[2]^.val and $ffffff00)=0) and MatchInstruction(hp1, A_UXTB, [C_None], [PF_None])) or
  1382. (((taicpu(p).oper[2]^.val and $ffff0000)=0) and MatchInstruction(hp1, A_UXTH, [C_None], [PF_None]))) and
  1383. (taicpu(hp1).ops = 2) and
  1384. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  1385. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1386. { reg1 might not be modified inbetween }
  1387. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1388. begin
  1389. DebugMsg('Peephole AndUxt2And done', p);
  1390. taicpu(hp1).opcode:=A_AND;
  1391. taicpu(hp1).ops:=3;
  1392. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  1393. taicpu(hp1).loadconst(2,taicpu(p).oper[2]^.val);
  1394. GetNextInstruction(p,hp1);
  1395. asml.remove(p);
  1396. p.Free;
  1397. p:=hp1;
  1398. result:=true;
  1399. exit;
  1400. end
  1401. else if ((taicpu(p).oper[2]^.val and $ffffff80)=0) and
  1402. MatchInstruction(p, A_AND, [C_None], [PF_None]) and
  1403. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1404. MatchInstruction(hp1, [A_SXTB,A_SXTH], [C_None], [PF_None]) and
  1405. (taicpu(hp1).ops = 2) and
  1406. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  1407. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1408. { reg1 might not be modified inbetween }
  1409. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1410. begin
  1411. DebugMsg('Peephole AndSxt2And done', p);
  1412. taicpu(hp1).opcode:=A_AND;
  1413. taicpu(hp1).ops:=3;
  1414. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  1415. taicpu(hp1).loadconst(2,taicpu(p).oper[2]^.val);
  1416. GetNextInstruction(p,hp1);
  1417. asml.remove(p);
  1418. p.Free;
  1419. p:=hp1;
  1420. result:=true;
  1421. exit;
  1422. end
  1423. {
  1424. from
  1425. and reg1,reg0,2^n-1
  1426. mov reg2,reg1, lsl imm1
  1427. (mov reg3,reg2, lsr/asr imm1)
  1428. remove either the and or the lsl/xsr sequence if possible
  1429. }
  1430. else if (taicpu(p).oper[2]^.val < high(int64)) and
  1431. cutils.ispowerof2(taicpu(p).oper[2]^.val+1,i) and
  1432. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1433. MatchInstruction(hp1, A_MOV, [taicpu(p).condition], [PF_None]) and
  1434. (taicpu(hp1).ops=3) and
  1435. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1436. (taicpu(hp1).oper[2]^.typ = top_shifterop) and
  1437. {$ifdef ARM}
  1438. (taicpu(hp1).oper[2]^.shifterop^.rs = NR_NO) and
  1439. {$endif ARM}
  1440. (taicpu(hp1).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  1441. RegEndOfLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) then
  1442. begin
  1443. {
  1444. and reg1,reg0,2^n-1
  1445. mov reg2,reg1, lsl imm1
  1446. mov reg3,reg2, lsr/asr imm1
  1447. =>
  1448. and reg1,reg0,2^n-1
  1449. if lsr and 2^n-1>=imm1 or asr and 2^n-1>imm1
  1450. }
  1451. if GetNextInstructionUsingReg(hp1,hp2,taicpu(p).oper[0]^.reg) and
  1452. MatchInstruction(hp2, A_MOV, [taicpu(p).condition], [PF_None]) and
  1453. (taicpu(hp2).ops=3) and
  1454. MatchOperand(taicpu(hp2).oper[1]^, taicpu(hp1).oper[0]^.reg) and
  1455. (taicpu(hp2).oper[2]^.typ = top_shifterop) and
  1456. {$ifdef ARM}
  1457. (taicpu(hp2).oper[2]^.shifterop^.rs = NR_NO) and
  1458. {$endif ARM}
  1459. (taicpu(hp2).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and
  1460. (taicpu(hp1).oper[2]^.shifterop^.shiftimm=taicpu(hp2).oper[2]^.shifterop^.shiftimm) and
  1461. RegEndOfLife(taicpu(hp1).oper[0]^.reg,taicpu(hp2)) and
  1462. ((i<32-taicpu(hp1).oper[2]^.shifterop^.shiftimm) or
  1463. ((i=32-taicpu(hp1).oper[2]^.shifterop^.shiftimm) and
  1464. (taicpu(hp2).oper[2]^.shifterop^.shiftmode=SM_LSR))) then
  1465. begin
  1466. DebugMsg('Peephole AndLslXsr2And done', p);
  1467. taicpu(p).oper[0]^.reg:=taicpu(hp2).oper[0]^.reg;
  1468. asml.Remove(hp1);
  1469. asml.Remove(hp2);
  1470. hp1.free;
  1471. hp2.free;
  1472. result:=true;
  1473. exit;
  1474. end
  1475. {
  1476. and reg1,reg0,2^n-1
  1477. mov reg2,reg1, lsl imm1
  1478. =>
  1479. mov reg2,reg0, lsl imm1
  1480. if imm1>i
  1481. }
  1482. else if (i>32-taicpu(hp1).oper[2]^.shifterop^.shiftimm) and
  1483. not(RegModifiedBetween(taicpu(p).oper[1]^.reg, p, hp1)) then
  1484. begin
  1485. DebugMsg('Peephole AndLsl2Lsl done', p);
  1486. taicpu(hp1).oper[1]^.reg:=taicpu(p).oper[1]^.reg;
  1487. GetNextInstruction(p, hp1);
  1488. asml.Remove(p);
  1489. p.free;
  1490. p:=hp1;
  1491. result:=true;
  1492. exit;
  1493. end
  1494. end;
  1495. end;
  1496. {
  1497. change
  1498. and reg1, ...
  1499. mov reg2, reg1
  1500. to
  1501. and reg2, ...
  1502. }
  1503. if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  1504. (taicpu(p).ops>=3) and
  1505. RemoveSuperfluousMove(p, hp1, 'DataMov2Data') then
  1506. Result:=true;
  1507. end;
  1508. function TARMAsmOptimizer.OptPass2AND(var p: tai): Boolean;
  1509. var
  1510. hp1, hp2: tai;
  1511. WorkingReg: TRegister;
  1512. begin
  1513. Result := False;
  1514. {
  1515. change
  1516. and reg1, ...
  1517. ...
  1518. cmp reg1, #0
  1519. b<ne/eq> @Lbl
  1520. to
  1521. ands reg1, ...
  1522. Also:
  1523. and reg1, ...
  1524. ...
  1525. cmp reg1, #0
  1526. (reg1 end of life)
  1527. b<ne/eq> @Lbl
  1528. to
  1529. tst reg1, ...
  1530. }
  1531. if (taicpu(p).condition = C_None) and
  1532. (taicpu(p).ops>=3) and
  1533. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  1534. MatchInstruction(hp1, A_CMP, [C_None], [PF_None]) and
  1535. MatchOperand(taicpu(hp1).oper[1]^, 0) and
  1536. {$ifdef AARCH64}
  1537. (SuperRegistersEqual(taicpu(hp1).oper[0]^.reg, taicpu(p).oper[0]^.reg)) and
  1538. (
  1539. (getsubreg(taicpu(hp1).oper[0]^.reg) = getsubreg(taicpu(p).oper[0]^.reg))
  1540. or
  1541. (
  1542. (taicpu(p).oper[2]^.typ = top_const) and
  1543. (taicpu(p).oper[2]^.val >= 0) and
  1544. (taicpu(p).oper[2]^.val <= $FFFFFFFF)
  1545. )
  1546. ) and
  1547. {$else AARCH64}
  1548. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[0]^.reg) and
  1549. {$endif AARCH64}
  1550. not RegModifiedBetween(NR_DEFAULTFLAGS, p, hp1) and
  1551. GetNextInstruction(hp1, hp2) then
  1552. begin
  1553. if MatchInstruction(hp2, [A_B, A_CMP, A_CMN, A_TST{$ifndef AARCH64}, A_TEQ{$endif not AARCH64}], [C_EQ, C_NE], [PF_None]) then
  1554. begin
  1555. AllocRegBetween(NR_DEFAULTFLAGS, p, hp1, UsedRegs);
  1556. WorkingReg := taicpu(p).oper[0]^.reg;
  1557. if RegEndOfLife(WorkingReg, taicpu(hp1)) then
  1558. begin
  1559. taicpu(p).opcode := A_TST;
  1560. taicpu(p).oppostfix := PF_None;
  1561. taicpu(p).loadreg(0, taicpu(p).oper[1]^.reg);
  1562. taicpu(p).loadoper(1, taicpu(p).oper[2]^);
  1563. taicpu(p).ops := 2;
  1564. DebugMsg(SPeepholeOptimization + 'AND; CMP -> TST', p);
  1565. end
  1566. else
  1567. begin
  1568. taicpu(p).oppostfix := PF_S;
  1569. DebugMsg(SPeepholeOptimization + 'AND; CMP -> ANDS', p);
  1570. end;
  1571. RemoveInstruction(hp1);
  1572. { If a temporary register was used for and/cmp before, we might be
  1573. able to deallocate the register so it can be used for other
  1574. optimisations later }
  1575. if (taicpu(p).opcode = A_TST) and TryRemoveRegAlloc(WorkingReg, p, p) then
  1576. ExcludeRegFromUsedRegs(WorkingReg, UsedRegs);
  1577. Result := True;
  1578. Exit;
  1579. end
  1580. else if
  1581. (hp2.typ = ait_label) or
  1582. { Conditional comparison instructions have already been covered }
  1583. RegModifiedByInstruction(NR_DEFAULTFLAGS, hp2) then
  1584. begin
  1585. { The comparison is a null operation }
  1586. if RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  1587. begin
  1588. DebugMsg(SPeepholeOptimization + 'AND; CMP -> nop', p);
  1589. RemoveInstruction(hp1);
  1590. RemoveCurrentP(p);
  1591. end
  1592. else
  1593. begin
  1594. DebugMsg(SPeepholeOptimization + 'CMP -> nop', hp1);
  1595. RemoveInstruction(hp1);
  1596. end;
  1597. Result := True;
  1598. Exit;
  1599. end;
  1600. end;
  1601. end;
  1602. function TARMAsmOptimizer.OptPass2TST(var p: tai): Boolean;
  1603. var
  1604. hp1, hp2: tai;
  1605. begin
  1606. Result := False;
  1607. if
  1608. {$ifndef AARCH64}
  1609. (taicpu(p).condition = C_None) and
  1610. {$endif AARCH64}
  1611. GetNextInstruction(p, hp1) and
  1612. MatchInstruction(hp1, A_B, [C_EQ, C_NE], [PF_None]) and
  1613. GetNextInstructionUsingReg(hp1, hp2, taicpu(p).oper[0]^.reg) then
  1614. begin
  1615. case taicpu(hp2).opcode of
  1616. A_AND:
  1617. { Change:
  1618. tst r1,##
  1619. (r2 not in use, or r2 = r1)
  1620. b.c .Lbl
  1621. ...
  1622. and r2,r1,##
  1623. Optimise to:
  1624. ands r2,r1,##
  1625. b.c .Lbl
  1626. ...
  1627. }
  1628. if (taicpu(hp2).oppostfix in [PF_None, PF_S]) and
  1629. {$ifndef AARCH64}
  1630. (taicpu(hp2).condition = C_None) and
  1631. {$endif AARCH64}
  1632. (taicpu(hp2).ops = taicpu(p).ops + 1) and
  1633. not RegInUsedRegs(taicpu(hp2).oper[0]^.reg, UsedRegs) and
  1634. MatchOperand(taicpu(hp2).oper[1]^, taicpu(p).oper[0]^.reg) and
  1635. MatchOperand(taicpu(hp2).oper[2]^, taicpu(p).oper[1]^) and
  1636. (
  1637. (taicpu(hp2).ops = 3) or
  1638. MatchOperand(taicpu(hp2).oper[3]^, taicpu(p).oper[2]^)
  1639. ) and
  1640. (
  1641. not (cs_opt_level3 in current_settings.optimizerswitches) or
  1642. (
  1643. { Make sure the target register isn't used in between }
  1644. not RegUsedBetween(taicpu(hp2).oper[0]^.reg, hp1, hp2) and
  1645. (
  1646. { If the second operand is a register, make sure it isn't modified in between }
  1647. (taicpu(p).oper[1]^.typ <> top_reg) or
  1648. not RegModifiedBetween(taicpu(p).oper[1]^.reg, hp1, hp2)
  1649. )
  1650. )
  1651. ) then
  1652. begin
  1653. AllocRegBetween(taicpu(hp2).oper[0]^.reg, p, hp2, UsedRegs);
  1654. if (taicpu(hp2).oppostfix = PF_S) then
  1655. AllocRegBetween(NR_DEFAULTFLAGS, p, hp2, UsedRegs);
  1656. DebugMsg(SPeepholeOptimization + 'TST; B.c; AND -> ANDS; B.c (TstBcAnd2AndsBc)', p);
  1657. taicpu(hp2).oppostfix := PF_S;
  1658. Asml.Remove(hp2);
  1659. Asml.InsertAfter(hp2, p);
  1660. RemoveCurrentP(p, hp2);
  1661. Result := True;
  1662. Exit;
  1663. end;
  1664. A_TST:
  1665. { Change:
  1666. tst r1,##
  1667. b.c .Lbl
  1668. ... (flags not modified)
  1669. tst r1,##
  1670. Remove second tst
  1671. }
  1672. if
  1673. {$ifndef AARCH64}
  1674. (taicpu(hp2).condition = C_None) and
  1675. {$endif AARCH64}
  1676. (taicpu(hp2).ops = taicpu(p).ops) and
  1677. MatchOperand(taicpu(hp2).oper[0]^, taicpu(p).oper[0]^.reg) and
  1678. MatchOperand(taicpu(hp2).oper[1]^, taicpu(p).oper[1]^) and
  1679. (
  1680. (taicpu(hp2).ops = 2) or
  1681. MatchOperand(taicpu(hp2).oper[2]^, taicpu(p).oper[2]^)
  1682. ) and
  1683. (
  1684. not (cs_opt_level3 in current_settings.optimizerswitches) or
  1685. (
  1686. { Make sure the flags aren't modified in between }
  1687. not RegModifiedBetween(NR_DEFAULTFLAGS, hp1, hp2) and
  1688. (
  1689. { If the second operand is a register, make sure it isn't modified in between }
  1690. (taicpu(p).oper[1]^.typ <> top_reg) or
  1691. not RegModifiedBetween(taicpu(p).oper[1]^.reg, hp1, hp2)
  1692. )
  1693. )
  1694. ) then
  1695. begin
  1696. DebugMsg(SPeepholeOptimization + 'TST; B.c; TST -> TST; B.c (TstBcTst2TstBc)', p);
  1697. AllocRegBetween(NR_DEFAULTFLAGS, hp1, hp2, UsedRegs);
  1698. RemoveInstruction(hp2);
  1699. Result := True;
  1700. Exit;
  1701. end;
  1702. else
  1703. ;
  1704. end;
  1705. end;
  1706. end;
  1707. end.