aoptarm.pas 50 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275
  1. {
  2. Copyright (c) 1998-2020 by Jonas Maebe and Florian Klaempfl, members of the Free Pascal
  3. Development Team
  4. This unit implements an ARM optimizer object used commonly for ARM and AAarch64
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. Unit aoptarm;
  19. {$i fpcdefs.inc}
  20. { $define DEBUG_PREREGSCHEDULER}
  21. { $define DEBUG_AOPTCPU}
  22. Interface
  23. uses
  24. cgbase, cgutils, cpubase, aasmtai, aasmcpu,aopt, aoptobj;
  25. Type
  26. { while ARM and AAarch64 look not very similar at a first glance,
  27. several optimizations can be shared between both }
  28. TARMAsmOptimizer = class(TAsmOptimizer)
  29. procedure DebugMsg(const s : string; p : tai);
  30. function RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string): boolean;
  31. function RedundantMovProcess(var p: tai; var hp1: tai): boolean;
  32. function GetNextInstructionUsingReg(Current: tai; out Next: tai; reg: TRegister): Boolean;
  33. function OptPass1UXTB(var p: tai): Boolean;
  34. function OptPass1UXTH(var p: tai): Boolean;
  35. function OptPass1SXTB(var p: tai): Boolean;
  36. function OptPass1SXTH(var p: tai): Boolean;
  37. function OptPass1And(var p: tai): Boolean; virtual;
  38. End;
  39. function MatchInstruction(const instr: tai; const op: TCommonAsmOps; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  40. function MatchInstruction(const instr: tai; const op: TAsmOp; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  41. {$ifdef AARCH64}
  42. function MatchInstruction(const instr: tai; const ops : array of TAsmOp; const postfix: TOpPostfixes): boolean;
  43. {$endif AARCH64}
  44. function MatchInstruction(const instr: tai; const op: TAsmOp; const postfix: TOpPostfixes): boolean;
  45. function RefsEqual(const r1, r2: treference): boolean;
  46. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  47. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
  48. Implementation
  49. uses
  50. cutils,verbose,globtype,globals,
  51. systems,
  52. cpuinfo,
  53. cgobj,procinfo,
  54. aasmbase,aasmdata;
  55. {$ifdef DEBUG_AOPTCPU}
  56. procedure TARMAsmOptimizer.DebugMsg(const s: string;p : tai);
  57. begin
  58. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  59. end;
  60. {$else DEBUG_AOPTCPU}
  61. procedure TARMAsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  62. begin
  63. end;
  64. {$endif DEBUG_AOPTCPU}
  65. function MatchInstruction(const instr: tai; const op: TCommonAsmOps; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  66. begin
  67. result :=
  68. (instr.typ = ait_instruction) and
  69. ((op = []) or ((taicpu(instr).opcode<=LastCommonAsmOp) and (taicpu(instr).opcode in op))) and
  70. ((cond = []) or (taicpu(instr).condition in cond)) and
  71. ((postfix = []) or (taicpu(instr).oppostfix in postfix));
  72. end;
  73. function MatchInstruction(const instr: tai; const op: TAsmOp; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  74. begin
  75. result :=
  76. (instr.typ = ait_instruction) and
  77. (taicpu(instr).opcode = op) and
  78. ((cond = []) or (taicpu(instr).condition in cond)) and
  79. ((postfix = []) or (taicpu(instr).oppostfix in postfix));
  80. end;
  81. {$ifdef AARCH64}
  82. function MatchInstruction(const instr: tai; const ops : array of TAsmOp; const postfix: TOpPostfixes): boolean;
  83. var
  84. op : TAsmOp;
  85. begin
  86. result:=false;
  87. if instr.typ <> ait_instruction then
  88. exit;
  89. for op in ops do
  90. begin
  91. if (taicpu(instr).opcode = op) and
  92. ((postfix = []) or (taicpu(instr).oppostfix in postfix)) then
  93. begin
  94. result:=true;
  95. exit;
  96. end;
  97. end;
  98. end;
  99. {$endif AARCH64}
  100. function MatchInstruction(const instr: tai; const op: TAsmOp; const postfix: TOpPostfixes): boolean;
  101. begin
  102. result :=
  103. (instr.typ = ait_instruction) and
  104. (taicpu(instr).opcode = op) and
  105. ((postfix = []) or (taicpu(instr).oppostfix in postfix));
  106. end;
  107. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  108. begin
  109. result := (oper.typ = top_reg) and (oper.reg = reg);
  110. end;
  111. function RefsEqual(const r1, r2: treference): boolean;
  112. begin
  113. refsequal :=
  114. (r1.offset = r2.offset) and
  115. (r1.base = r2.base) and
  116. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  117. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  118. (r1.relsymbol = r2.relsymbol) and
  119. {$ifdef ARM}
  120. (r1.signindex = r2.signindex) and
  121. {$endif ARM}
  122. (r1.shiftimm = r2.shiftimm) and
  123. (r1.addressmode = r2.addressmode) and
  124. (r1.shiftmode = r2.shiftmode) and
  125. (r1.volatility=[]) and
  126. (r2.volatility=[]);
  127. end;
  128. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
  129. begin
  130. result := oper1.typ = oper2.typ;
  131. if result then
  132. case oper1.typ of
  133. top_const:
  134. Result:=oper1.val = oper2.val;
  135. top_reg:
  136. Result:=oper1.reg = oper2.reg;
  137. top_conditioncode:
  138. Result:=oper1.cc = oper2.cc;
  139. top_realconst:
  140. Result:=oper1.val_real = oper2.val_real;
  141. top_ref:
  142. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  143. else Result:=false;
  144. end
  145. end;
  146. function TARMAsmOptimizer.GetNextInstructionUsingReg(Current: tai;
  147. Out Next: tai; reg: TRegister): Boolean;
  148. var
  149. gniResult: Boolean;
  150. begin
  151. Next:=Current;
  152. Result := False;
  153. repeat
  154. gniResult:=GetNextInstruction(Next,Next);
  155. if gniResult and RegInInstruction(reg,Next) then
  156. { Found something }
  157. Exit(True);
  158. until not gniResult or
  159. not(cs_opt_level3 in current_settings.optimizerswitches) or
  160. (Next.typ<>ait_instruction) or
  161. is_calljmp(taicpu(Next).opcode)
  162. {$ifdef ARM}
  163. or RegModifiedByInstruction(NR_PC,Next)
  164. {$endif ARM}
  165. ;
  166. end;
  167. function TARMAsmOptimizer.RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string):boolean;
  168. var
  169. alloc,
  170. dealloc : tai_regalloc;
  171. hp1 : tai;
  172. begin
  173. Result:=false;
  174. if MatchInstruction(movp, A_MOV, [taicpu(p).condition], [PF_None]) and
  175. { We can't optimize if there is a shiftop }
  176. (taicpu(movp).ops=2) and
  177. MatchOperand(taicpu(movp).oper[1]^, taicpu(p).oper[0]^.reg) and
  178. { don't mess with moves to fp }
  179. (taicpu(movp).oper[0]^.reg<>current_procinfo.framepointer) and
  180. { the destination register of the mov might not be used beween p and movp }
  181. not(RegUsedBetween(taicpu(movp).oper[0]^.reg,p,movp)) and
  182. {$ifdef ARM}
  183. { PC should be changed only by moves }
  184. (taicpu(movp).oper[0]^.reg<>NR_PC) and
  185. { cb[n]z are thumb instructions which require specific registers, with no wide forms }
  186. (taicpu(p).opcode<>A_CBZ) and
  187. (taicpu(p).opcode<>A_CBNZ) and
  188. { There is a special requirement for MUL and MLA, oper[0] and oper[1] are not allowed to be the same }
  189. not (
  190. (taicpu(p).opcode in [A_MLA, A_MUL]) and
  191. (taicpu(p).oper[1]^.reg = taicpu(movp).oper[0]^.reg) and
  192. (current_settings.cputype < cpu_armv6)
  193. ) and
  194. {$endif ARM}
  195. { Take care to only do this for instructions which REALLY load to the first register.
  196. Otherwise
  197. str reg0, [reg1]
  198. mov reg2, reg0
  199. will be optimized to
  200. str reg2, [reg1]
  201. }
  202. RegLoadedWithNewValue(taicpu(p).oper[0]^.reg, p) then
  203. begin
  204. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(movp.Next));
  205. if assigned(dealloc) then
  206. begin
  207. DebugMsg('Peephole '+optimizer+' removed superfluous mov', movp);
  208. result:=true;
  209. { taicpu(p).oper[0]^.reg is not used anymore, try to find its allocation
  210. and remove it if possible }
  211. asml.Remove(dealloc);
  212. alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.previous));
  213. if assigned(alloc) then
  214. begin
  215. asml.Remove(alloc);
  216. alloc.free;
  217. dealloc.free;
  218. end
  219. else
  220. asml.InsertAfter(dealloc,p);
  221. { try to move the allocation of the target register }
  222. GetLastInstruction(movp,hp1);
  223. alloc:=FindRegAlloc(taicpu(movp).oper[0]^.reg,tai(hp1.Next));
  224. if assigned(alloc) then
  225. begin
  226. asml.Remove(alloc);
  227. asml.InsertBefore(alloc,p);
  228. { adjust used regs }
  229. IncludeRegInUsedRegs(taicpu(movp).oper[0]^.reg,UsedRegs);
  230. end;
  231. { finally get rid of the mov }
  232. taicpu(p).loadreg(0,taicpu(movp).oper[0]^.reg);
  233. { Remove preindexing and postindexing for LDR in some cases.
  234. For example:
  235. ldr reg2,[reg1, xxx]!
  236. mov reg1,reg2
  237. must be translated to:
  238. ldr reg1,[reg1, xxx]
  239. Preindexing must be removed there, since the same register is used as the base and as the target.
  240. Such case is not allowed for ARM CPU and produces crash. }
  241. if (taicpu(p).opcode = A_LDR) and (taicpu(p).oper[1]^.typ = top_ref)
  242. and (taicpu(movp).oper[0]^.reg = taicpu(p).oper[1]^.ref^.base)
  243. then
  244. taicpu(p).oper[1]^.ref^.addressmode:=AM_OFFSET;
  245. asml.remove(movp);
  246. movp.free;
  247. end;
  248. end;
  249. end;
  250. function TARMAsmOptimizer.RedundantMovProcess(var p: tai; var hp1: tai):boolean;
  251. var
  252. I: Integer;
  253. current_hp, next_hp: tai;
  254. LDRChange: Boolean;
  255. begin
  256. Result:=false;
  257. {
  258. change
  259. mov r1, r0
  260. add r1, r1, #1
  261. to
  262. add r1, r0, #1
  263. Todo: Make it work for mov+cmp too
  264. CAUTION! If this one is successful p might not be a mov instruction anymore!
  265. }
  266. if (taicpu(p).ops = 2) and
  267. (taicpu(p).oper[1]^.typ = top_reg) and
  268. (taicpu(p).oppostfix = PF_NONE) then
  269. begin
  270. if
  271. MatchInstruction(hp1, [A_ADD, A_ADC,
  272. {$ifdef ARM}
  273. A_RSB, A_RSC,
  274. {$endif ARM}
  275. A_SUB, A_SBC,
  276. A_AND, A_BIC, A_EOR, A_ORR, A_MOV, A_MVN],
  277. [taicpu(p).condition], []) and
  278. { MOV and MVN might only have 2 ops }
  279. (taicpu(hp1).ops >= 2) and
  280. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^.reg) and
  281. (taicpu(hp1).oper[1]^.typ = top_reg) and
  282. (
  283. (taicpu(hp1).ops = 2) or
  284. (taicpu(hp1).oper[2]^.typ in [top_reg, top_const, top_shifterop])
  285. ) and
  286. {$ifdef AARCH64}
  287. (taicpu(p).oper[1]^.reg<>NR_SP) and
  288. {$endif AARCH64}
  289. not(RegUsedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  290. begin
  291. { When we get here we still don't know if the registers match }
  292. for I:=1 to 2 do
  293. {
  294. If the first loop was successful p will be replaced with hp1.
  295. The checks will still be ok, because all required information
  296. will also be in hp1 then.
  297. }
  298. if (taicpu(hp1).ops > I) and
  299. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[I]^.reg)
  300. {$ifdef ARM}
  301. { prevent certain combinations on thumb(2), this is only a safe approximation }
  302. and (not(GenerateThumbCode or GenerateThumb2Code) or
  303. ((getsupreg(taicpu(p).oper[1]^.reg)<>RS_R13) and
  304. (getsupreg(taicpu(p).oper[1]^.reg)<>RS_R15)))
  305. {$endif ARM}
  306. then
  307. begin
  308. DebugMsg('Peephole RedundantMovProcess done', hp1);
  309. taicpu(hp1).oper[I]^.reg := taicpu(p).oper[1]^.reg;
  310. if p<>hp1 then
  311. begin
  312. asml.remove(p);
  313. p.free;
  314. p:=hp1;
  315. Result:=true;
  316. end;
  317. end;
  318. if Result then Exit;
  319. end
  320. { Change: Change:
  321. mov r1, r0 mov r1, r0
  322. ... ...
  323. ldr/str r2, [r1, etc.] mov r2, r1
  324. To: To:
  325. ldr/str r2, [r0, etc.] mov r2, r0
  326. }
  327. else if (taicpu(p).condition = C_None) and (taicpu(p).oper[1]^.typ = top_reg)
  328. {$ifdef ARM}
  329. and not (getsupreg(taicpu(p).oper[0]^.reg) in [RS_PC, RS_R14, RS_STACK_POINTER_REG])
  330. and (getsupreg(taicpu(p).oper[1]^.reg) <> RS_PC)
  331. { Thumb does not support references with base and index one being SP }
  332. and (not(GenerateThumbCode) or (getsupreg(taicpu(p).oper[1]^.reg) <> RS_STACK_POINTER_REG))
  333. {$endif ARM}
  334. {$ifdef AARCH64}
  335. and (getsupreg(taicpu(p).oper[0]^.reg) <> RS_STACK_POINTER_REG)
  336. {$endif AARCH64}
  337. then
  338. begin
  339. current_hp := p;
  340. TransferUsedRegs(TmpUsedRegs);
  341. { Search local instruction block }
  342. while GetNextInstruction(current_hp, next_hp) and (next_hp <> BlockEnd) and (next_hp.typ = ait_instruction) do
  343. begin
  344. UpdateUsedRegs(TmpUsedRegs, tai(current_hp.Next));
  345. LDRChange := False;
  346. if (taicpu(next_hp).opcode in [A_LDR,A_STR]) and (taicpu(next_hp).ops = 2) then
  347. begin
  348. { Change the registers from r1 to r0 }
  349. if (taicpu(next_hp).oper[1]^.ref^.base = taicpu(p).oper[0]^.reg) and
  350. {$ifdef ARM}
  351. { This optimisation conflicts with something and raises
  352. an access violation - needs further investigation. [Kit] }
  353. (taicpu(next_hp).opcode <> A_LDR) and
  354. {$endif ARM}
  355. { Don't mess around with the base register if the
  356. reference is pre- or post-indexed }
  357. (taicpu(next_hp).oper[1]^.ref^.addressmode = AM_OFFSET) then
  358. begin
  359. taicpu(next_hp).oper[1]^.ref^.base := taicpu(p).oper[1]^.reg;
  360. LDRChange := True;
  361. end;
  362. if taicpu(next_hp).oper[1]^.ref^.index = taicpu(p).oper[0]^.reg then
  363. begin
  364. taicpu(next_hp).oper[1]^.ref^.index := taicpu(p).oper[1]^.reg;
  365. LDRChange := True;
  366. end;
  367. if LDRChange then
  368. DebugMsg('Peephole Optimization: ' + std_regname(taicpu(p).oper[0]^.reg) + ' = ' + std_regname(taicpu(p).oper[1]^.reg) + ' (MovLdr2Ldr 1)', next_hp);
  369. { Drop out if we're dealing with pre-indexed references }
  370. if (taicpu(next_hp).oper[1]^.ref^.addressmode = AM_PREINDEXED) and
  371. (
  372. RegInRef(taicpu(p).oper[0]^.reg, taicpu(next_hp).oper[1]^.ref^) or
  373. RegInRef(taicpu(p).oper[1]^.reg, taicpu(next_hp).oper[1]^.ref^)
  374. ) then
  375. begin
  376. { Remember to update register allocations }
  377. if LDRChange then
  378. AllocRegBetween(taicpu(p).oper[1]^.reg, p, next_hp, UsedRegs);
  379. Break;
  380. end;
  381. { The register being stored can be potentially changed (as long as it's not the stack pointer) }
  382. if (taicpu(next_hp).opcode = A_STR) and (getsupreg(taicpu(p).oper[1]^.reg) <> RS_STACK_POINTER_REG) and
  383. MatchOperand(taicpu(next_hp).oper[0]^, taicpu(p).oper[0]^.reg) then
  384. begin
  385. DebugMsg('Peephole Optimization: ' + std_regname(taicpu(p).oper[0]^.reg) + ' = ' + std_regname(taicpu(p).oper[1]^.reg) + ' (MovLdr2Ldr 2)', next_hp);
  386. taicpu(next_hp).oper[0]^.reg := taicpu(p).oper[1]^.reg;
  387. LDRChange := True;
  388. end;
  389. if LDRChange and (getsupreg(taicpu(p).oper[1]^.reg) <> RS_STACK_POINTER_REG) then
  390. begin
  391. AllocRegBetween(taicpu(p).oper[1]^.reg, p, next_hp, UsedRegs);
  392. if (taicpu(p).oppostfix = PF_None) and
  393. (
  394. (
  395. (taicpu(next_hp).opcode = A_LDR) and
  396. MatchOperand(taicpu(next_hp).oper[0]^, taicpu(p).oper[0]^.reg)
  397. ) or
  398. not RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, next_hp, TmpUsedRegs)
  399. ) and
  400. { Double-check to see if the old registers were actually
  401. changed (e.g. if the super registers matched, but not
  402. the sizes, they won't be changed). }
  403. (
  404. (taicpu(next_hp).opcode = A_LDR) or
  405. not RegInOp(taicpu(p).oper[0]^.reg, taicpu(next_hp).oper[0]^)
  406. ) and
  407. not RegInRef(taicpu(p).oper[0]^.reg, taicpu(next_hp).oper[1]^.ref^) then
  408. begin
  409. DebugMsg('Peephole Optimization: RedundantMovProcess 2a done', p);
  410. RemoveCurrentP(p);
  411. Result := True;
  412. Exit;
  413. end;
  414. end;
  415. end
  416. else if (taicpu(next_hp).opcode = A_MOV) and (taicpu(next_hp).oppostfix = PF_None) and
  417. (taicpu(next_hp).ops = 2) then
  418. begin
  419. if MatchOperand(taicpu(next_hp).oper[0]^, taicpu(p).oper[0]^.reg) then
  420. begin
  421. { Found another mov that writes entirely to the register }
  422. if RegUsedBetween(taicpu(p).oper[0]^.reg, p, next_hp) then
  423. begin
  424. { Register was used beforehand }
  425. if MatchOperand(taicpu(next_hp).oper[1]^, taicpu(p).oper[1]^.reg) then
  426. begin
  427. { This MOV is exactly the same as the first one.
  428. Since none of the registers have changed value
  429. at this point, we can remove it. }
  430. DebugMsg('Peephole Optimization: RedundantMovProcess 3a done', next_hp);
  431. if (next_hp = hp1) then
  432. { Don't let hp1 become a dangling pointer }
  433. hp1 := nil;
  434. asml.Remove(next_hp);
  435. next_hp.Free;
  436. { We still have the original p, so we can continue optimising;
  437. if it was -O2 or below, this instruction appeared immediately
  438. after the first MOV, so we're technically not looking more
  439. than one instruction ahead after it's removed! [Kit] }
  440. Continue;
  441. end
  442. else
  443. { Register changes value - drop out }
  444. Break;
  445. end;
  446. { We can delete the first MOV (only if the second MOV is unconditional) }
  447. {$ifdef ARM}
  448. if (taicpu(p).oppostfix = PF_None) and
  449. (taicpu(next_hp).condition = C_None) then
  450. {$endif ARM}
  451. begin
  452. DebugMsg('Peephole Optimization: RedundantMovProcess 2b done', p);
  453. RemoveCurrentP(p);
  454. Result := True;
  455. end;
  456. Exit;
  457. end
  458. else if MatchOperand(taicpu(next_hp).oper[1]^, taicpu(p).oper[0]^.reg) then
  459. begin
  460. if MatchOperand(taicpu(next_hp).oper[0]^, taicpu(p).oper[1]^.reg)
  461. { Be careful - if the entire register is not used, removing this
  462. instruction will leave the unused part uninitialised }
  463. {$ifdef AARCH64}
  464. and (getsubreg(taicpu(p).oper[1]^.reg) = R_SUBQ)
  465. {$endif AARCH64}
  466. then
  467. begin
  468. { Instruction will become mov r1,r1 }
  469. DebugMsg('Peephole Optimization: Mov2None 2 done', next_hp);
  470. { Allocate r1 between the instructions; not doing
  471. so may cause problems when removing superfluous
  472. MOVs later (i38055) }
  473. AllocRegBetween(taicpu(p).oper[1]^.reg, p, next_hp, UsedRegs);
  474. if (next_hp = hp1) then
  475. { Don't let hp1 become a dangling pointer }
  476. hp1 := nil;
  477. asml.Remove(next_hp);
  478. next_hp.Free;
  479. Continue;
  480. end;
  481. { Change the old register (checking the first operand again
  482. forces it to be left alone if the full register is not
  483. used, lest mov w1,w1 gets optimised out by mistake. [Kit] }
  484. {$ifdef AARCH64}
  485. if not MatchOperand(taicpu(next_hp).oper[0]^, taicpu(p).oper[1]^.reg) then
  486. {$endif AARCH64}
  487. begin
  488. DebugMsg('Peephole Optimization: ' + std_regname(taicpu(p).oper[0]^.reg) + ' = ' + std_regname(taicpu(p).oper[1]^.reg) + ' (MovMov2Mov 2)', next_hp);
  489. taicpu(next_hp).oper[1]^.reg := taicpu(p).oper[1]^.reg;
  490. AllocRegBetween(taicpu(p).oper[1]^.reg, p, next_hp, UsedRegs);
  491. { If this was the only reference to the old register,
  492. then we can remove the original MOV now }
  493. if (taicpu(p).oppostfix = PF_None) and
  494. { A bit of a hack - sometimes registers aren't tracked properly, so do not
  495. remove if the register was apparently not allocated when its value is
  496. first set at the MOV command (this is especially true for the stack
  497. register). [Kit] }
  498. (getsupreg(taicpu(p).oper[1]^.reg) <> RS_STACK_POINTER_REG) and
  499. RegInUsedRegs(taicpu(p).oper[0]^.reg, UsedRegs) and
  500. not RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, next_hp, TmpUsedRegs) then
  501. begin
  502. DebugMsg('Peephole Optimization: RedundantMovProcess 2c done', p);
  503. RemoveCurrentP(p);
  504. Result := True;
  505. Exit;
  506. end;
  507. end;
  508. end;
  509. end;
  510. { On low optimisation settions, don't search more than one instruction ahead }
  511. if not(cs_opt_level3 in current_settings.optimizerswitches) or
  512. { Stop at procedure calls and jumps }
  513. is_calljmp(taicpu(next_hp).opcode) or
  514. { If the read register has changed value, or the MOV
  515. destination register has been used, drop out }
  516. RegInInstruction(taicpu(p).oper[0]^.reg, next_hp) or
  517. RegModifiedByInstruction(taicpu(p).oper[1]^.reg, next_hp) then
  518. Break;
  519. current_hp := next_hp;
  520. end;
  521. end;
  522. end;
  523. end;
  524. function TARMAsmOptimizer.OptPass1UXTB(var p : tai) : Boolean;
  525. var
  526. hp1, hp2: tai;
  527. begin
  528. Result:=false;
  529. {
  530. change
  531. uxtb reg2,reg1
  532. strb reg2,[...]
  533. dealloc reg2
  534. to
  535. strb reg1,[...]
  536. }
  537. if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
  538. (taicpu(p).ops=2) and
  539. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  540. MatchInstruction(hp1, A_STR, [C_None], [PF_B]) and
  541. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
  542. { the reference in strb might not use reg2 }
  543. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  544. { reg1 might not be modified inbetween }
  545. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  546. begin
  547. DebugMsg('Peephole UxtbStrb2Strb done', p);
  548. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  549. GetNextInstruction(p,hp2);
  550. asml.remove(p);
  551. p.free;
  552. p:=hp2;
  553. result:=true;
  554. end
  555. {
  556. change
  557. uxtb reg2,reg1
  558. uxth reg3,reg2
  559. dealloc reg2
  560. to
  561. uxtb reg3,reg1
  562. }
  563. else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
  564. (taicpu(p).ops=2) and
  565. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  566. MatchInstruction(hp1, A_UXTH, [C_None], [PF_None]) and
  567. (taicpu(hp1).ops = 2) and
  568. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  569. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  570. { reg1 might not be modified inbetween }
  571. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  572. begin
  573. DebugMsg('Peephole UxtbUxth2Uxtb done', p);
  574. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  575. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  576. asml.remove(hp1);
  577. hp1.free;
  578. result:=true;
  579. end
  580. {
  581. change
  582. uxtb reg2,reg1
  583. uxtb reg3,reg2
  584. dealloc reg2
  585. to
  586. uxtb reg3,reg1
  587. }
  588. else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
  589. (taicpu(p).ops=2) and
  590. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  591. MatchInstruction(hp1, A_UXTB, [C_None], [PF_None]) and
  592. (taicpu(hp1).ops = 2) and
  593. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  594. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  595. { reg1 might not be modified inbetween }
  596. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  597. begin
  598. DebugMsg('Peephole UxtbUxtb2Uxtb done', p);
  599. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  600. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  601. asml.remove(hp1);
  602. hp1.free;
  603. result:=true;
  604. end
  605. {
  606. change
  607. uxtb reg2,reg1
  608. and reg3,reg2,#0x*FF
  609. dealloc reg2
  610. to
  611. uxtb reg3,reg1
  612. }
  613. else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
  614. (taicpu(p).ops=2) and
  615. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  616. MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
  617. (taicpu(hp1).ops=3) and
  618. (taicpu(hp1).oper[2]^.typ=top_const) and
  619. ((taicpu(hp1).oper[2]^.val and $FF)=$FF) and
  620. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  621. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  622. { reg1 might not be modified inbetween }
  623. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  624. begin
  625. DebugMsg('Peephole UxtbAndImm2Uxtb done', p);
  626. taicpu(hp1).opcode:=A_UXTB;
  627. taicpu(hp1).ops:=2;
  628. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  629. GetNextInstruction(p,hp2);
  630. asml.remove(p);
  631. p.free;
  632. p:=hp2;
  633. result:=true;
  634. end
  635. else if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  636. RemoveSuperfluousMove(p, hp1, 'UxtbMov2Data') then
  637. Result:=true;
  638. end;
  639. function TARMAsmOptimizer.OptPass1UXTH(var p : tai) : Boolean;
  640. var
  641. hp1: tai;
  642. begin
  643. Result:=false;
  644. {
  645. change
  646. uxth reg2,reg1
  647. strh reg2,[...]
  648. dealloc reg2
  649. to
  650. strh reg1,[...]
  651. }
  652. if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
  653. (taicpu(p).ops=2) and
  654. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  655. MatchInstruction(hp1, A_STR, [C_None], [PF_H]) and
  656. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  657. { the reference in strb might not use reg2 }
  658. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  659. { reg1 might not be modified inbetween }
  660. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  661. begin
  662. DebugMsg('Peephole UXTHStrh2Strh done', p);
  663. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  664. GetNextInstruction(p, hp1);
  665. asml.remove(p);
  666. p.free;
  667. p:=hp1;
  668. result:=true;
  669. end
  670. {
  671. change
  672. uxth reg2,reg1
  673. uxth reg3,reg2
  674. dealloc reg2
  675. to
  676. uxth reg3,reg1
  677. }
  678. else if MatchInstruction(p, A_UXTH, [C_None], [PF_None]) and
  679. (taicpu(p).ops=2) and
  680. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  681. MatchInstruction(hp1, A_UXTH, [C_None], [PF_None]) and
  682. (taicpu(hp1).ops=2) and
  683. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  684. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  685. { reg1 might not be modified inbetween }
  686. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  687. begin
  688. DebugMsg('Peephole UxthUxth2Uxth done', p);
  689. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  690. taicpu(hp1).opcode:=A_UXTH;
  691. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  692. GetNextInstruction(p, hp1);
  693. asml.remove(p);
  694. p.free;
  695. p:=hp1;
  696. result:=true;
  697. end
  698. {
  699. change
  700. uxth reg2,reg1
  701. and reg3,reg2,#65535
  702. dealloc reg2
  703. to
  704. uxth reg3,reg1
  705. }
  706. else if MatchInstruction(p, A_UXTH, [C_None], [PF_None]) and
  707. (taicpu(p).ops=2) and
  708. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  709. MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
  710. (taicpu(hp1).ops=3) and
  711. (taicpu(hp1).oper[2]^.typ=top_const) and
  712. ((taicpu(hp1).oper[2]^.val and $FFFF)=$FFFF) and
  713. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  714. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  715. { reg1 might not be modified inbetween }
  716. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  717. begin
  718. DebugMsg('Peephole UxthAndImm2Uxth done', p);
  719. taicpu(hp1).opcode:=A_UXTH;
  720. taicpu(hp1).ops:=2;
  721. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  722. GetNextInstruction(p, hp1);
  723. asml.remove(p);
  724. p.free;
  725. p:=hp1;
  726. result:=true;
  727. end
  728. else if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  729. RemoveSuperfluousMove(p, hp1, 'UxthMov2Data') then
  730. Result:=true;
  731. end;
  732. function TARMAsmOptimizer.OptPass1SXTB(var p : tai) : Boolean;
  733. var
  734. hp1, hp2: tai;
  735. begin
  736. Result:=false;
  737. {
  738. change
  739. sxtb reg2,reg1
  740. strb reg2,[...]
  741. dealloc reg2
  742. to
  743. strb reg1,[...]
  744. }
  745. if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
  746. (taicpu(p).ops=2) and
  747. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  748. MatchInstruction(hp1, A_STR, [C_None], [PF_B]) and
  749. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
  750. { the reference in strb might not use reg2 }
  751. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  752. { reg1 might not be modified inbetween }
  753. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  754. begin
  755. DebugMsg('Peephole SxtbStrb2Strb done', p);
  756. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  757. GetNextInstruction(p,hp2);
  758. asml.remove(p);
  759. p.free;
  760. p:=hp2;
  761. result:=true;
  762. end
  763. {
  764. change
  765. sxtb reg2,reg1
  766. sxth reg3,reg2
  767. dealloc reg2
  768. to
  769. sxtb reg3,reg1
  770. }
  771. else if MatchInstruction(p, A_SXTB, [C_None], [PF_None]) and
  772. (taicpu(p).ops=2) and
  773. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  774. MatchInstruction(hp1, A_SXTH, [C_None], [PF_None]) and
  775. (taicpu(hp1).ops = 2) and
  776. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  777. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  778. { reg1 might not be modified inbetween }
  779. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  780. begin
  781. DebugMsg('Peephole SxtbSxth2Sxtb done', p);
  782. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  783. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  784. asml.remove(hp1);
  785. hp1.free;
  786. result:=true;
  787. end
  788. {
  789. change
  790. sxtb reg2,reg1
  791. sxtb reg3,reg2
  792. dealloc reg2
  793. to
  794. uxtb reg3,reg1
  795. }
  796. else if MatchInstruction(p, A_SXTB, [C_None], [PF_None]) and
  797. (taicpu(p).ops=2) and
  798. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  799. MatchInstruction(hp1, A_SXTB, [C_None], [PF_None]) and
  800. (taicpu(hp1).ops = 2) and
  801. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  802. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  803. { reg1 might not be modified inbetween }
  804. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  805. begin
  806. DebugMsg('Peephole SxtbSxtb2Sxtb done', p);
  807. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  808. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  809. asml.remove(hp1);
  810. hp1.free;
  811. result:=true;
  812. end
  813. {
  814. change
  815. sxtb reg2,reg1
  816. and reg3,reg2,#0x*FF
  817. dealloc reg2
  818. to
  819. uxtb reg3,reg1
  820. }
  821. else if MatchInstruction(p, A_SXTB, [C_None], [PF_None]) and
  822. (taicpu(p).ops=2) and
  823. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  824. MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
  825. (taicpu(hp1).ops=3) and
  826. (taicpu(hp1).oper[2]^.typ=top_const) and
  827. ((taicpu(hp1).oper[2]^.val and $FF)=$FF) and
  828. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  829. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  830. { reg1 might not be modified inbetween }
  831. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  832. begin
  833. DebugMsg('Peephole SxtbAndImm2Uxtb done', p);
  834. taicpu(hp1).opcode:=A_UXTB;
  835. taicpu(hp1).ops:=2;
  836. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  837. GetNextInstruction(p,hp2);
  838. asml.remove(p);
  839. p.free;
  840. p:=hp2;
  841. result:=true;
  842. end
  843. else if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  844. RemoveSuperfluousMove(p, hp1, 'UxtbMov2Data') then
  845. Result:=true;
  846. end;
  847. function TARMAsmOptimizer.OptPass1SXTH(var p : tai) : Boolean;
  848. var
  849. hp1: tai;
  850. begin
  851. Result:=false;
  852. {
  853. change
  854. sxth reg2,reg1
  855. strh reg2,[...]
  856. dealloc reg2
  857. to
  858. strh reg1,[...]
  859. }
  860. if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
  861. (taicpu(p).ops=2) and
  862. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  863. MatchInstruction(hp1, A_STR, [C_None], [PF_H]) and
  864. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  865. { the reference in strb might not use reg2 }
  866. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  867. { reg1 might not be modified inbetween }
  868. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  869. begin
  870. DebugMsg('Peephole SXTHStrh2Strh done', p);
  871. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  872. GetNextInstruction(p, hp1);
  873. asml.remove(p);
  874. p.free;
  875. p:=hp1;
  876. result:=true;
  877. end
  878. {
  879. change
  880. sxth reg2,reg1
  881. sxth reg3,reg2
  882. dealloc reg2
  883. to
  884. sxth reg3,reg1
  885. }
  886. else if MatchInstruction(p, A_SXTH, [C_None], [PF_None]) and
  887. (taicpu(p).ops=2) and
  888. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  889. MatchInstruction(hp1, A_SXTH, [C_None], [PF_None]) and
  890. (taicpu(hp1).ops=2) and
  891. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  892. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  893. { reg1 might not be modified inbetween }
  894. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  895. begin
  896. DebugMsg('Peephole SxthSxth2Sxth done', p);
  897. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  898. taicpu(hp1).opcode:=A_SXTH;
  899. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  900. GetNextInstruction(p, hp1);
  901. asml.remove(p);
  902. p.free;
  903. p:=hp1;
  904. result:=true;
  905. end
  906. {
  907. change
  908. sxth reg2,reg1
  909. and reg3,reg2,#65535
  910. dealloc reg2
  911. to
  912. uxth reg3,reg1
  913. }
  914. else if MatchInstruction(p, A_SXTH, [C_None], [PF_None]) and
  915. (taicpu(p).ops=2) and
  916. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  917. MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
  918. (taicpu(hp1).ops=3) and
  919. (taicpu(hp1).oper[2]^.typ=top_const) and
  920. ((taicpu(hp1).oper[2]^.val and $FFFF)=$FFFF) and
  921. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  922. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  923. { reg1 might not be modified inbetween }
  924. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  925. begin
  926. DebugMsg('Peephole SxthAndImm2Uxth done', p);
  927. taicpu(hp1).opcode:=A_UXTH;
  928. taicpu(hp1).ops:=2;
  929. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  930. GetNextInstruction(p, hp1);
  931. asml.remove(p);
  932. p.free;
  933. p:=hp1;
  934. result:=true;
  935. end
  936. else if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  937. RemoveSuperfluousMove(p, hp1, 'UxthMov2Data') then
  938. Result:=true;
  939. end;
  940. function TARMAsmOptimizer.OptPass1And(var p : tai) : Boolean;
  941. var
  942. hp1, hp2: tai;
  943. i: longint;
  944. begin
  945. Result:=false;
  946. {
  947. optimize
  948. and reg2,reg1,const1
  949. ...
  950. }
  951. if (taicpu(p).ops>2) and
  952. (taicpu(p).oper[1]^.typ = top_reg) and
  953. (taicpu(p).oper[2]^.typ = top_const) then
  954. begin
  955. {
  956. change
  957. and reg2,reg1,const1
  958. ...
  959. and reg3,reg2,const2
  960. to
  961. and reg3,reg1,(const1 and const2)
  962. }
  963. if GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  964. MatchInstruction(hp1, A_AND, [taicpu(p).condition], [PF_None]) and
  965. RegEndOfLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  966. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  967. (taicpu(hp1).oper[2]^.typ = top_const)
  968. {$ifdef AARCH64}
  969. and ((((getsubreg(taicpu(p).oper[0]^.reg)=R_SUBQ) and is_shifter_const(taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val,OS_64)) or
  970. ((getsubreg(taicpu(p).oper[0]^.reg)=R_SUBL) and is_shifter_const(taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val,OS_32))
  971. ) or
  972. ((taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val)=0))
  973. {$endif AARCH64}
  974. then
  975. begin
  976. if not(RegUsedBetween(taicpu(hp1).oper[0]^.reg,p,hp1)) then
  977. begin
  978. DebugMsg('Peephole AndAnd2And done', p);
  979. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  980. if (taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val)=0 then
  981. begin
  982. DebugMsg('Peephole AndAnd2Mov0 1 done', p);
  983. taicpu(p).opcode:=A_MOV;
  984. taicpu(p).ops:=2;
  985. taicpu(p).loadConst(1,0);
  986. taicpu(p).oppostfix:=taicpu(hp1).oppostfix;
  987. end
  988. else
  989. begin
  990. DebugMsg('Peephole AndAnd2And 1 done', p);
  991. taicpu(p).loadConst(2,taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val);
  992. taicpu(p).oppostfix:=taicpu(hp1).oppostfix;
  993. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  994. end;
  995. asml.remove(hp1);
  996. hp1.free;
  997. Result:=true;
  998. exit;
  999. end
  1000. else if not(RegUsedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1001. begin
  1002. if (taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val)=0 then
  1003. begin
  1004. DebugMsg('Peephole AndAnd2Mov0 2 done', hp1);
  1005. taicpu(hp1).opcode:=A_MOV;
  1006. taicpu(hp1).loadConst(1,0);
  1007. taicpu(hp1).ops:=2;
  1008. taicpu(hp1).oppostfix:=taicpu(p).oppostfix;
  1009. end
  1010. else
  1011. begin
  1012. DebugMsg('Peephole AndAnd2And 2 done', hp1);
  1013. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  1014. taicpu(hp1).loadConst(2,taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val);
  1015. taicpu(hp1).oppostfix:=taicpu(p).oppostfix;
  1016. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  1017. end;
  1018. GetNextInstruction(p, hp1);
  1019. RemoveCurrentP(p);
  1020. p:=hp1;
  1021. Result:=true;
  1022. exit;
  1023. end;
  1024. end
  1025. {
  1026. change
  1027. and reg2,reg1,$xxxxxxFF
  1028. strb reg2,[...]
  1029. dealloc reg2
  1030. to
  1031. strb reg1,[...]
  1032. }
  1033. else if ((taicpu(p).oper[2]^.val and $FF) = $FF) and
  1034. MatchInstruction(p, A_AND, [C_None], [PF_None]) and
  1035. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1036. MatchInstruction(hp1, A_STR, [C_None], [PF_B]) and
  1037. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
  1038. { the reference in strb might not use reg2 }
  1039. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  1040. { reg1 might not be modified inbetween }
  1041. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1042. begin
  1043. DebugMsg('Peephole AndStrb2Strb done', p);
  1044. {$ifdef AARCH64}
  1045. taicpu(hp1).loadReg(0,newreg(R_INTREGISTER,getsupreg(taicpu(p).oper[1]^.reg),R_SUBD));
  1046. {$else AARCH64}
  1047. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  1048. {$endif AARCH64}
  1049. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  1050. RemoveCurrentP(p);
  1051. result:=true;
  1052. exit;
  1053. end
  1054. {
  1055. change
  1056. and reg2,reg1,255
  1057. uxtb/uxth reg3,reg2
  1058. dealloc reg2
  1059. to
  1060. and reg3,reg1,x
  1061. }
  1062. else if ((taicpu(p).oper[2]^.val and $ffffff00)=0) and
  1063. MatchInstruction(p, A_AND, [C_None], [PF_None]) and
  1064. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1065. MatchInstruction(hp1, [A_UXTB,A_UXTH], [C_None], [PF_None]) and
  1066. (taicpu(hp1).ops = 2) and
  1067. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  1068. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1069. { reg1 might not be modified inbetween }
  1070. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1071. begin
  1072. DebugMsg('Peephole AndUxt2And done', p);
  1073. taicpu(hp1).opcode:=A_AND;
  1074. taicpu(hp1).ops:=3;
  1075. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  1076. taicpu(hp1).loadconst(2,taicpu(p).oper[2]^.val);
  1077. GetNextInstruction(p,hp1);
  1078. asml.remove(p);
  1079. p.Free;
  1080. p:=hp1;
  1081. result:=true;
  1082. exit;
  1083. end
  1084. else if ((taicpu(p).oper[2]^.val and $ffffff80)=0) and
  1085. MatchInstruction(p, A_AND, [C_None], [PF_None]) and
  1086. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1087. MatchInstruction(hp1, [A_SXTB,A_SXTH], [C_None], [PF_None]) and
  1088. (taicpu(hp1).ops = 2) and
  1089. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  1090. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1091. { reg1 might not be modified inbetween }
  1092. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1093. begin
  1094. DebugMsg('Peephole AndSxt2And done', p);
  1095. taicpu(hp1).opcode:=A_AND;
  1096. taicpu(hp1).ops:=3;
  1097. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  1098. taicpu(hp1).loadconst(2,taicpu(p).oper[2]^.val);
  1099. GetNextInstruction(p,hp1);
  1100. asml.remove(p);
  1101. p.Free;
  1102. p:=hp1;
  1103. result:=true;
  1104. exit;
  1105. end
  1106. {
  1107. from
  1108. and reg1,reg0,2^n-1
  1109. mov reg2,reg1, lsl imm1
  1110. (mov reg3,reg2, lsr/asr imm1)
  1111. remove either the and or the lsl/xsr sequence if possible
  1112. }
  1113. else if (taicpu(p).oper[2]^.val < high(int64)) and
  1114. cutils.ispowerof2(taicpu(p).oper[2]^.val+1,i) and
  1115. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1116. MatchInstruction(hp1, A_MOV, [taicpu(p).condition], [PF_None]) and
  1117. (taicpu(hp1).ops=3) and
  1118. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1119. (taicpu(hp1).oper[2]^.typ = top_shifterop) and
  1120. {$ifdef ARM}
  1121. (taicpu(hp1).oper[2]^.shifterop^.rs = NR_NO) and
  1122. {$endif ARM}
  1123. (taicpu(hp1).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  1124. RegEndOfLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) then
  1125. begin
  1126. {
  1127. and reg1,reg0,2^n-1
  1128. mov reg2,reg1, lsl imm1
  1129. mov reg3,reg2, lsr/asr imm1
  1130. =>
  1131. and reg1,reg0,2^n-1
  1132. if lsr and 2^n-1>=imm1 or asr and 2^n-1>imm1
  1133. }
  1134. if GetNextInstructionUsingReg(hp1,hp2,taicpu(p).oper[0]^.reg) and
  1135. MatchInstruction(hp2, A_MOV, [taicpu(p).condition], [PF_None]) and
  1136. (taicpu(hp2).ops=3) and
  1137. MatchOperand(taicpu(hp2).oper[1]^, taicpu(hp1).oper[0]^.reg) and
  1138. (taicpu(hp2).oper[2]^.typ = top_shifterop) and
  1139. {$ifdef ARM}
  1140. (taicpu(hp2).oper[2]^.shifterop^.rs = NR_NO) and
  1141. {$endif ARM}
  1142. (taicpu(hp2).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and
  1143. (taicpu(hp1).oper[2]^.shifterop^.shiftimm=taicpu(hp2).oper[2]^.shifterop^.shiftimm) and
  1144. RegEndOfLife(taicpu(hp1).oper[0]^.reg,taicpu(hp2)) and
  1145. ((i<32-taicpu(hp1).oper[2]^.shifterop^.shiftimm) or
  1146. ((i=32-taicpu(hp1).oper[2]^.shifterop^.shiftimm) and
  1147. (taicpu(hp2).oper[2]^.shifterop^.shiftmode=SM_LSR))) then
  1148. begin
  1149. DebugMsg('Peephole AndLslXsr2And done', p);
  1150. taicpu(p).oper[0]^.reg:=taicpu(hp2).oper[0]^.reg;
  1151. asml.Remove(hp1);
  1152. asml.Remove(hp2);
  1153. hp1.free;
  1154. hp2.free;
  1155. result:=true;
  1156. exit;
  1157. end
  1158. {
  1159. and reg1,reg0,2^n-1
  1160. mov reg2,reg1, lsl imm1
  1161. =>
  1162. mov reg2,reg0, lsl imm1
  1163. if imm1>i
  1164. }
  1165. else if (i>32-taicpu(hp1).oper[2]^.shifterop^.shiftimm) and
  1166. not(RegModifiedBetween(taicpu(p).oper[1]^.reg, p, hp1)) then
  1167. begin
  1168. DebugMsg('Peephole AndLsl2Lsl done', p);
  1169. taicpu(hp1).oper[1]^.reg:=taicpu(p).oper[1]^.reg;
  1170. GetNextInstruction(p, hp1);
  1171. asml.Remove(p);
  1172. p.free;
  1173. p:=hp1;
  1174. result:=true;
  1175. exit;
  1176. end
  1177. end;
  1178. end;
  1179. {
  1180. change
  1181. and reg1, ...
  1182. mov reg2, reg1
  1183. to
  1184. and reg2, ...
  1185. }
  1186. if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  1187. (taicpu(p).ops>=3) and
  1188. RemoveSuperfluousMove(p, hp1, 'DataMov2Data') then
  1189. Result:=true;
  1190. end;
  1191. end.