aoptarm.pas 49 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260
  1. {
  2. Copyright (c) 1998-2020 by Jonas Maebe and Florian Klaempfl, members of the Free Pascal
  3. Development Team
  4. This unit implements an ARM optimizer object used commonly for ARM and AAarch64
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. Unit aoptarm;
  19. {$i fpcdefs.inc}
  20. { $define DEBUG_PREREGSCHEDULER}
  21. { $define DEBUG_AOPTCPU}
  22. Interface
  23. uses
  24. cgbase, cgutils, cpubase, aasmtai, aasmcpu,aopt, aoptobj;
  25. Type
  26. { while ARM and AAarch64 look not very similar at a first glance,
  27. several optimizations can be shared between both }
  28. TARMAsmOptimizer = class(TAsmOptimizer)
  29. procedure DebugMsg(const s : string; p : tai);
  30. function RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string): boolean;
  31. function RedundantMovProcess(var p: tai; hp1: tai): boolean;
  32. function GetNextInstructionUsingReg(Current: tai; out Next: tai; reg: TRegister): Boolean;
  33. function OptPass1UXTB(var p: tai): Boolean;
  34. function OptPass1UXTH(var p: tai): Boolean;
  35. function OptPass1SXTB(var p: tai): Boolean;
  36. function OptPass1SXTH(var p: tai): Boolean;
  37. function OptPass1And(var p: tai): Boolean; virtual;
  38. End;
  39. function MatchInstruction(const instr: tai; const op: TCommonAsmOps; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  40. function MatchInstruction(const instr: tai; const op: TAsmOp; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  41. {$ifdef AARCH64}
  42. function MatchInstruction(const instr: tai; const ops : array of TAsmOp; const postfix: TOpPostfixes): boolean;
  43. {$endif AARCH64}
  44. function MatchInstruction(const instr: tai; const op: TAsmOp; const postfix: TOpPostfixes): boolean;
  45. function RefsEqual(const r1, r2: treference): boolean;
  46. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  47. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
  48. Implementation
  49. uses
  50. cutils,verbose,globtype,globals,
  51. systems,
  52. cpuinfo,
  53. cgobj,procinfo,
  54. aasmbase,aasmdata;
  55. {$ifdef DEBUG_AOPTCPU}
  56. procedure TARMAsmOptimizer.DebugMsg(const s: string;p : tai);
  57. begin
  58. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  59. end;
  60. {$else DEBUG_AOPTCPU}
  61. procedure TARMAsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  62. begin
  63. end;
  64. {$endif DEBUG_AOPTCPU}
  65. function MatchInstruction(const instr: tai; const op: TCommonAsmOps; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  66. begin
  67. result :=
  68. (instr.typ = ait_instruction) and
  69. ((op = []) or ((taicpu(instr).opcode<=LastCommonAsmOp) and (taicpu(instr).opcode in op))) and
  70. ((cond = []) or (taicpu(instr).condition in cond)) and
  71. ((postfix = []) or (taicpu(instr).oppostfix in postfix));
  72. end;
  73. function MatchInstruction(const instr: tai; const op: TAsmOp; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  74. begin
  75. result :=
  76. (instr.typ = ait_instruction) and
  77. (taicpu(instr).opcode = op) and
  78. ((cond = []) or (taicpu(instr).condition in cond)) and
  79. ((postfix = []) or (taicpu(instr).oppostfix in postfix));
  80. end;
  81. {$ifdef AARCH64}
  82. function MatchInstruction(const instr: tai; const ops : array of TAsmOp; const postfix: TOpPostfixes): boolean;
  83. var
  84. op : TAsmOp;
  85. begin
  86. result:=false;
  87. if instr.typ <> ait_instruction then
  88. exit;
  89. for op in ops do
  90. begin
  91. if (taicpu(instr).opcode = op) and
  92. ((postfix = []) or (taicpu(instr).oppostfix in postfix)) then
  93. begin
  94. result:=true;
  95. exit;
  96. end;
  97. end;
  98. end;
  99. {$endif AARCH64}
  100. function MatchInstruction(const instr: tai; const op: TAsmOp; const postfix: TOpPostfixes): boolean;
  101. begin
  102. result :=
  103. (instr.typ = ait_instruction) and
  104. (taicpu(instr).opcode = op) and
  105. ((postfix = []) or (taicpu(instr).oppostfix in postfix));
  106. end;
  107. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  108. begin
  109. result := (oper.typ = top_reg) and (oper.reg = reg);
  110. end;
  111. function RefsEqual(const r1, r2: treference): boolean;
  112. begin
  113. refsequal :=
  114. (r1.offset = r2.offset) and
  115. (r1.base = r2.base) and
  116. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  117. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  118. (r1.relsymbol = r2.relsymbol) and
  119. {$ifdef ARM}
  120. (r1.signindex = r2.signindex) and
  121. {$endif ARM}
  122. (r1.shiftimm = r2.shiftimm) and
  123. (r1.addressmode = r2.addressmode) and
  124. (r1.shiftmode = r2.shiftmode) and
  125. (r1.volatility=[]) and
  126. (r2.volatility=[]);
  127. end;
  128. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
  129. begin
  130. result := oper1.typ = oper2.typ;
  131. if result then
  132. case oper1.typ of
  133. top_const:
  134. Result:=oper1.val = oper2.val;
  135. top_reg:
  136. Result:=oper1.reg = oper2.reg;
  137. top_conditioncode:
  138. Result:=oper1.cc = oper2.cc;
  139. top_realconst:
  140. Result:=oper1.val_real = oper2.val_real;
  141. top_ref:
  142. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  143. else Result:=false;
  144. end
  145. end;
  146. function TARMAsmOptimizer.GetNextInstructionUsingReg(Current: tai;
  147. Out Next: tai; reg: TRegister): Boolean;
  148. var
  149. gniResult: Boolean;
  150. begin
  151. Next:=Current;
  152. Result := False;
  153. repeat
  154. gniResult:=GetNextInstruction(Next,Next);
  155. if gniResult and RegInInstruction(reg,Next) then
  156. { Found something }
  157. Exit(True);
  158. until not gniResult or
  159. not(cs_opt_level3 in current_settings.optimizerswitches) or
  160. (Next.typ<>ait_instruction) or
  161. is_calljmp(taicpu(Next).opcode)
  162. {$ifdef ARM}
  163. or RegModifiedByInstruction(NR_PC,Next)
  164. {$endif ARM}
  165. ;
  166. end;
  167. function TARMAsmOptimizer.RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string):boolean;
  168. var
  169. alloc,
  170. dealloc : tai_regalloc;
  171. hp1 : tai;
  172. begin
  173. Result:=false;
  174. if MatchInstruction(movp, A_MOV, [taicpu(p).condition], [PF_None]) and
  175. { We can't optimize if there is a shiftop }
  176. (taicpu(movp).ops=2) and
  177. MatchOperand(taicpu(movp).oper[1]^, taicpu(p).oper[0]^.reg) and
  178. { don't mess with moves to fp }
  179. (taicpu(movp).oper[0]^.reg<>current_procinfo.framepointer) and
  180. { the destination register of the mov might not be used beween p and movp }
  181. not(RegUsedBetween(taicpu(movp).oper[0]^.reg,p,movp)) and
  182. {$ifdef ARM}
  183. { PC should be changed only by moves }
  184. (taicpu(movp).oper[0]^.reg<>NR_PC) and
  185. { cb[n]z are thumb instructions which require specific registers, with no wide forms }
  186. (taicpu(p).opcode<>A_CBZ) and
  187. (taicpu(p).opcode<>A_CBNZ) and
  188. { There is a special requirement for MUL and MLA, oper[0] and oper[1] are not allowed to be the same }
  189. not (
  190. (taicpu(p).opcode in [A_MLA, A_MUL]) and
  191. (taicpu(p).oper[1]^.reg = taicpu(movp).oper[0]^.reg) and
  192. (current_settings.cputype < cpu_armv6)
  193. ) and
  194. {$endif ARM}
  195. { Take care to only do this for instructions which REALLY load to the first register.
  196. Otherwise
  197. str reg0, [reg1]
  198. mov reg2, reg0
  199. will be optimized to
  200. str reg2, [reg1]
  201. }
  202. RegLoadedWithNewValue(taicpu(p).oper[0]^.reg, p) then
  203. begin
  204. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(movp.Next));
  205. if assigned(dealloc) then
  206. begin
  207. DebugMsg('Peephole '+optimizer+' removed superfluous mov', movp);
  208. result:=true;
  209. { taicpu(p).oper[0]^.reg is not used anymore, try to find its allocation
  210. and remove it if possible }
  211. asml.Remove(dealloc);
  212. alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.previous));
  213. if assigned(alloc) then
  214. begin
  215. asml.Remove(alloc);
  216. alloc.free;
  217. dealloc.free;
  218. end
  219. else
  220. asml.InsertAfter(dealloc,p);
  221. { try to move the allocation of the target register }
  222. GetLastInstruction(movp,hp1);
  223. alloc:=FindRegAlloc(taicpu(movp).oper[0]^.reg,tai(hp1.Next));
  224. if assigned(alloc) then
  225. begin
  226. asml.Remove(alloc);
  227. asml.InsertBefore(alloc,p);
  228. { adjust used regs }
  229. IncludeRegInUsedRegs(taicpu(movp).oper[0]^.reg,UsedRegs);
  230. end;
  231. { finally get rid of the mov }
  232. taicpu(p).loadreg(0,taicpu(movp).oper[0]^.reg);
  233. { Remove preindexing and postindexing for LDR in some cases.
  234. For example:
  235. ldr reg2,[reg1, xxx]!
  236. mov reg1,reg2
  237. must be translated to:
  238. ldr reg1,[reg1, xxx]
  239. Preindexing must be removed there, since the same register is used as the base and as the target.
  240. Such case is not allowed for ARM CPU and produces crash. }
  241. if (taicpu(p).opcode = A_LDR) and (taicpu(p).oper[1]^.typ = top_ref)
  242. and (taicpu(movp).oper[0]^.reg = taicpu(p).oper[1]^.ref^.base)
  243. then
  244. taicpu(p).oper[1]^.ref^.addressmode:=AM_OFFSET;
  245. asml.remove(movp);
  246. movp.free;
  247. end;
  248. end;
  249. end;
  250. function TARMAsmOptimizer.RedundantMovProcess(var p: tai;hp1: tai):boolean;
  251. var
  252. I: Integer;
  253. current_hp: tai;
  254. LDRChange: Boolean;
  255. begin
  256. Result:=false;
  257. {
  258. change
  259. mov r1, r0
  260. add r1, r1, #1
  261. to
  262. add r1, r0, #1
  263. Todo: Make it work for mov+cmp too
  264. CAUTION! If this one is successful p might not be a mov instruction anymore!
  265. }
  266. if (taicpu(p).ops = 2) and
  267. (taicpu(p).oper[1]^.typ = top_reg) and
  268. (taicpu(p).oppostfix = PF_NONE) then
  269. begin
  270. if
  271. MatchInstruction(hp1, [A_ADD, A_ADC,
  272. {$ifdef ARM}
  273. A_RSB, A_RSC,
  274. {$endif ARM}
  275. A_SUB, A_SBC,
  276. A_AND, A_BIC, A_EOR, A_ORR, A_MOV, A_MVN],
  277. [taicpu(p).condition], []) and
  278. { MOV and MVN might only have 2 ops }
  279. (taicpu(hp1).ops >= 2) and
  280. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^.reg) and
  281. (taicpu(hp1).oper[1]^.typ = top_reg) and
  282. (
  283. (taicpu(hp1).ops = 2) or
  284. (taicpu(hp1).oper[2]^.typ in [top_reg, top_const, top_shifterop])
  285. ) and
  286. {$ifdef AARCH64}
  287. (taicpu(p).oper[1]^.reg<>NR_SP) and
  288. {$endif AARCH64}
  289. not(RegUsedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  290. begin
  291. { When we get here we still don't know if the registers match }
  292. for I:=1 to 2 do
  293. {
  294. If the first loop was successful p will be replaced with hp1.
  295. The checks will still be ok, because all required information
  296. will also be in hp1 then.
  297. }
  298. if (taicpu(hp1).ops > I) and
  299. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[I]^.reg)
  300. {$ifdef ARM}
  301. { prevent certain combinations on thumb(2), this is only a safe approximation }
  302. and (not(GenerateThumbCode or GenerateThumb2Code) or
  303. ((getsupreg(taicpu(p).oper[1]^.reg)<>RS_R13) and
  304. (getsupreg(taicpu(p).oper[1]^.reg)<>RS_R15)))
  305. {$endif ARM}
  306. then
  307. begin
  308. DebugMsg('Peephole RedundantMovProcess done', hp1);
  309. taicpu(hp1).oper[I]^.reg := taicpu(p).oper[1]^.reg;
  310. if p<>hp1 then
  311. begin
  312. asml.remove(p);
  313. p.free;
  314. p:=hp1;
  315. Result:=true;
  316. end;
  317. end;
  318. if Result then Exit;
  319. end
  320. { Change: Change:
  321. mov r1, r0 mov r1, r0
  322. ... ...
  323. ldr/str r2, [r1, etc.] mov r2, r1
  324. To: To:
  325. ldr/str r2, [r0, etc.] mov r2, r0
  326. }
  327. else if (taicpu(p).condition = C_None) and (taicpu(p).oper[1]^.typ = top_reg)
  328. {$ifdef ARM}
  329. and not (getsupreg(taicpu(p).oper[0]^.reg) in [RS_PC, RS_R14, RS_STACK_POINTER_REG])
  330. and (getsupreg(taicpu(p).oper[1]^.reg) <> RS_PC)
  331. { Thumb does not support references with base and index one being SP }
  332. and (not(GenerateThumbCode) or (getsupreg(taicpu(p).oper[1]^.reg) <> RS_STACK_POINTER_REG))
  333. {$endif ARM}
  334. {$ifdef AARCH64}
  335. and (getsupreg(taicpu(p).oper[0]^.reg) <> RS_STACK_POINTER_REG)
  336. {$endif AARCH64}
  337. then
  338. begin
  339. current_hp := p;
  340. TransferUsedRegs(TmpUsedRegs);
  341. { Search local instruction block }
  342. while GetNextInstruction(current_hp, hp1) and (hp1 <> BlockEnd) and (hp1.typ = ait_instruction) do
  343. begin
  344. UpdateUsedRegs(TmpUsedRegs, tai(current_hp.Next));
  345. LDRChange := False;
  346. if (taicpu(hp1).opcode in [A_LDR,A_STR]) and (taicpu(hp1).ops = 2) then
  347. begin
  348. { Change the registers from r1 to r0 }
  349. if (taicpu(hp1).oper[1]^.ref^.base = taicpu(p).oper[0]^.reg) and
  350. {$ifdef ARM}
  351. { This optimisation conflicts with something and raises
  352. an access violation - needs further investigation. [Kit] }
  353. (taicpu(hp1).opcode <> A_LDR) and
  354. {$endif ARM}
  355. { Don't mess around with the base register if the
  356. reference is pre- or post-indexed }
  357. (taicpu(hp1).oper[1]^.ref^.addressmode = AM_OFFSET) then
  358. begin
  359. taicpu(hp1).oper[1]^.ref^.base := taicpu(p).oper[1]^.reg;
  360. LDRChange := True;
  361. end;
  362. if taicpu(hp1).oper[1]^.ref^.index = taicpu(p).oper[0]^.reg then
  363. begin
  364. taicpu(hp1).oper[1]^.ref^.index := taicpu(p).oper[1]^.reg;
  365. LDRChange := True;
  366. end;
  367. if LDRChange then
  368. DebugMsg('Peephole Optimization: ' + std_regname(taicpu(p).oper[0]^.reg) + ' = ' + std_regname(taicpu(p).oper[1]^.reg) + ' (MovLdr2Ldr 1)', hp1);
  369. { Drop out if we're dealing with pre-indexed references }
  370. if (taicpu(hp1).oper[1]^.ref^.addressmode = AM_PREINDEXED) and
  371. (
  372. RegInRef(taicpu(p).oper[0]^.reg, taicpu(hp1).oper[1]^.ref^) or
  373. RegInRef(taicpu(p).oper[1]^.reg, taicpu(hp1).oper[1]^.ref^)
  374. ) then
  375. begin
  376. { Remember to update register allocations }
  377. if LDRChange then
  378. AllocRegBetween(taicpu(p).oper[1]^.reg, p, hp1, UsedRegs);
  379. Break;
  380. end;
  381. { The register being stored can be potentially changed (as long as it's not the stack pointer) }
  382. if (taicpu(hp1).opcode = A_STR) and (getsupreg(taicpu(p).oper[1]^.reg) <> RS_STACK_POINTER_REG) and
  383. MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^.reg) then
  384. begin
  385. DebugMsg('Peephole Optimization: ' + std_regname(taicpu(p).oper[0]^.reg) + ' = ' + std_regname(taicpu(p).oper[1]^.reg) + ' (MovLdr2Ldr 2)', hp1);
  386. taicpu(hp1).oper[0]^.reg := taicpu(p).oper[1]^.reg;
  387. LDRChange := True;
  388. end;
  389. if LDRChange and (getsupreg(taicpu(p).oper[1]^.reg) <> RS_STACK_POINTER_REG) then
  390. begin
  391. AllocRegBetween(taicpu(p).oper[1]^.reg, p, hp1, UsedRegs);
  392. if (taicpu(p).oppostfix = PF_None) and
  393. (
  394. (
  395. (taicpu(hp1).opcode = A_LDR) and
  396. MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^.reg)
  397. ) or
  398. not RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, hp1, TmpUsedRegs)
  399. ) and
  400. { Double-check to see if the old registers were actually
  401. changed (e.g. if the super registers matched, but not
  402. the sizes, they won't be changed). }
  403. (
  404. (taicpu(hp1).opcode = A_LDR) or
  405. not RegInOp(taicpu(p).oper[0]^.reg, taicpu(hp1).oper[0]^)
  406. ) and
  407. not RegInRef(taicpu(p).oper[0]^.reg, taicpu(hp1).oper[1]^.ref^) then
  408. begin
  409. DebugMsg('Peephole Optimization: RedundantMovProcess 2a done', p);
  410. RemoveCurrentP(p);
  411. Result := True;
  412. Exit;
  413. end;
  414. end;
  415. end
  416. else if (taicpu(hp1).opcode = A_MOV) and (taicpu(hp1).oppostfix = PF_None) and
  417. (taicpu(hp1).ops = 2) then
  418. begin
  419. if MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^.reg) then
  420. begin
  421. { Found another mov that writes entirely to the register }
  422. if RegUsedBetween(taicpu(p).oper[0]^.reg, p, hp1) then
  423. begin
  424. { Register was used beforehand }
  425. if MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[1]^.reg) then
  426. begin
  427. { This MOV is exactly the same as the first one.
  428. Since none of the registers have changed value
  429. at this point, we can remove it. }
  430. DebugMsg('Peephole Optimization: RedundantMovProcess 3a done', hp1);
  431. asml.Remove(hp1);
  432. hp1.Free;
  433. { We still have the original p, so we can continue optimising;
  434. if it was -O2 or below, this instruction appeared immediately
  435. after the first MOV, so we're technically not looking more
  436. than one instruction ahead after it's removed! [Kit] }
  437. Continue;
  438. end
  439. else
  440. { Register changes value - drop out }
  441. Break;
  442. end;
  443. { We can delete the first MOV (only if the second MOV is unconditional) }
  444. {$ifdef ARM}
  445. if (taicpu(p).oppostfix = PF_None) and
  446. (taicpu(hp1).condition = C_None) then
  447. {$endif ARM}
  448. begin
  449. DebugMsg('Peephole Optimization: RedundantMovProcess 2b done', p);
  450. RemoveCurrentP(p);
  451. Result := True;
  452. end;
  453. Exit;
  454. end
  455. else if MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) then
  456. begin
  457. if MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[1]^.reg)
  458. { Be careful - if the entire register is not used, removing this
  459. instruction will leave the unused part uninitialised }
  460. {$ifdef AARCH64}
  461. and (getsubreg(taicpu(p).oper[1]^.reg) = R_SUBQ)
  462. {$endif AARCH64}
  463. then
  464. begin
  465. { Instruction will become mov r1,r1 }
  466. DebugMsg('Peephole Optimization: Mov2None 2 done', hp1);
  467. asml.Remove(hp1);
  468. hp1.Free;
  469. Continue;
  470. end;
  471. { Change the old register (checking the first operand again
  472. forces it to be left alone if the full register is not
  473. used, lest mov w1,w1 gets optimised out by mistake. [Kit] }
  474. {$ifdef AARCH64}
  475. if not MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[1]^.reg) then
  476. {$endif AARCH64}
  477. begin
  478. DebugMsg('Peephole Optimization: ' + std_regname(taicpu(p).oper[0]^.reg) + ' = ' + std_regname(taicpu(p).oper[1]^.reg) + ' (MovMov2Mov 2)', hp1);
  479. taicpu(hp1).oper[1]^.reg := taicpu(p).oper[1]^.reg;
  480. AllocRegBetween(taicpu(p).oper[1]^.reg, p, hp1, UsedRegs);
  481. { If this was the only reference to the old register,
  482. then we can remove the original MOV now }
  483. if (taicpu(p).oppostfix = PF_None) and
  484. { A bit of a hack - sometimes registers aren't tracked properly, so do not
  485. remove if the register was apparently not allocated when its value is
  486. first set at the MOV command (this is especially true for the stack
  487. register). [Kit] }
  488. (getsupreg(taicpu(p).oper[1]^.reg) <> RS_STACK_POINTER_REG) and
  489. RegInUsedRegs(taicpu(p).oper[0]^.reg, UsedRegs) and
  490. not RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, hp1, TmpUsedRegs) then
  491. begin
  492. DebugMsg('Peephole Optimization: RedundantMovProcess 2c done', p);
  493. RemoveCurrentP(p);
  494. Result := True;
  495. Exit;
  496. end;
  497. end;
  498. end;
  499. end;
  500. { On low optimisation settions, don't search more than one instruction ahead }
  501. if not(cs_opt_level3 in current_settings.optimizerswitches) or
  502. { Stop at procedure calls and jumps }
  503. is_calljmp(taicpu(hp1).opcode) or
  504. { If the read register has changed value, or the MOV
  505. destination register has been used, drop out }
  506. RegInInstruction(taicpu(p).oper[0]^.reg, hp1) or
  507. RegModifiedByInstruction(taicpu(p).oper[1]^.reg, hp1) then
  508. Break;
  509. current_hp := hp1;
  510. end;
  511. end;
  512. end;
  513. end;
  514. function TARMAsmOptimizer.OptPass1UXTB(var p : tai) : Boolean;
  515. var
  516. hp1, hp2: tai;
  517. begin
  518. Result:=false;
  519. {
  520. change
  521. uxtb reg2,reg1
  522. strb reg2,[...]
  523. dealloc reg2
  524. to
  525. strb reg1,[...]
  526. }
  527. if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
  528. (taicpu(p).ops=2) and
  529. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  530. MatchInstruction(hp1, A_STR, [C_None], [PF_B]) and
  531. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
  532. { the reference in strb might not use reg2 }
  533. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  534. { reg1 might not be modified inbetween }
  535. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  536. begin
  537. DebugMsg('Peephole UxtbStrb2Strb done', p);
  538. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  539. GetNextInstruction(p,hp2);
  540. asml.remove(p);
  541. p.free;
  542. p:=hp2;
  543. result:=true;
  544. end
  545. {
  546. change
  547. uxtb reg2,reg1
  548. uxth reg3,reg2
  549. dealloc reg2
  550. to
  551. uxtb reg3,reg1
  552. }
  553. else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
  554. (taicpu(p).ops=2) and
  555. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  556. MatchInstruction(hp1, A_UXTH, [C_None], [PF_None]) and
  557. (taicpu(hp1).ops = 2) and
  558. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  559. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  560. { reg1 might not be modified inbetween }
  561. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  562. begin
  563. DebugMsg('Peephole UxtbUxth2Uxtb done', p);
  564. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  565. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  566. asml.remove(hp1);
  567. hp1.free;
  568. result:=true;
  569. end
  570. {
  571. change
  572. uxtb reg2,reg1
  573. uxtb reg3,reg2
  574. dealloc reg2
  575. to
  576. uxtb reg3,reg1
  577. }
  578. else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
  579. (taicpu(p).ops=2) and
  580. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  581. MatchInstruction(hp1, A_UXTB, [C_None], [PF_None]) and
  582. (taicpu(hp1).ops = 2) and
  583. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  584. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  585. { reg1 might not be modified inbetween }
  586. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  587. begin
  588. DebugMsg('Peephole UxtbUxtb2Uxtb done', p);
  589. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  590. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  591. asml.remove(hp1);
  592. hp1.free;
  593. result:=true;
  594. end
  595. {
  596. change
  597. uxtb reg2,reg1
  598. and reg3,reg2,#0x*FF
  599. dealloc reg2
  600. to
  601. uxtb reg3,reg1
  602. }
  603. else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
  604. (taicpu(p).ops=2) and
  605. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  606. MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
  607. (taicpu(hp1).ops=3) and
  608. (taicpu(hp1).oper[2]^.typ=top_const) and
  609. ((taicpu(hp1).oper[2]^.val and $FF)=$FF) and
  610. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  611. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  612. { reg1 might not be modified inbetween }
  613. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  614. begin
  615. DebugMsg('Peephole UxtbAndImm2Uxtb done', p);
  616. taicpu(hp1).opcode:=A_UXTB;
  617. taicpu(hp1).ops:=2;
  618. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  619. GetNextInstruction(p,hp2);
  620. asml.remove(p);
  621. p.free;
  622. p:=hp2;
  623. result:=true;
  624. end
  625. else if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  626. RemoveSuperfluousMove(p, hp1, 'UxtbMov2Data') then
  627. Result:=true;
  628. end;
  629. function TARMAsmOptimizer.OptPass1UXTH(var p : tai) : Boolean;
  630. var
  631. hp1: tai;
  632. begin
  633. Result:=false;
  634. {
  635. change
  636. uxth reg2,reg1
  637. strh reg2,[...]
  638. dealloc reg2
  639. to
  640. strh reg1,[...]
  641. }
  642. if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
  643. (taicpu(p).ops=2) and
  644. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  645. MatchInstruction(hp1, A_STR, [C_None], [PF_H]) and
  646. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  647. { the reference in strb might not use reg2 }
  648. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  649. { reg1 might not be modified inbetween }
  650. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  651. begin
  652. DebugMsg('Peephole UXTHStrh2Strh done', p);
  653. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  654. GetNextInstruction(p, hp1);
  655. asml.remove(p);
  656. p.free;
  657. p:=hp1;
  658. result:=true;
  659. end
  660. {
  661. change
  662. uxth reg2,reg1
  663. uxth reg3,reg2
  664. dealloc reg2
  665. to
  666. uxth reg3,reg1
  667. }
  668. else if MatchInstruction(p, A_UXTH, [C_None], [PF_None]) and
  669. (taicpu(p).ops=2) and
  670. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  671. MatchInstruction(hp1, A_UXTH, [C_None], [PF_None]) and
  672. (taicpu(hp1).ops=2) and
  673. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  674. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  675. { reg1 might not be modified inbetween }
  676. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  677. begin
  678. DebugMsg('Peephole UxthUxth2Uxth done', p);
  679. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  680. taicpu(hp1).opcode:=A_UXTH;
  681. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  682. GetNextInstruction(p, hp1);
  683. asml.remove(p);
  684. p.free;
  685. p:=hp1;
  686. result:=true;
  687. end
  688. {
  689. change
  690. uxth reg2,reg1
  691. and reg3,reg2,#65535
  692. dealloc reg2
  693. to
  694. uxth reg3,reg1
  695. }
  696. else if MatchInstruction(p, A_UXTH, [C_None], [PF_None]) and
  697. (taicpu(p).ops=2) and
  698. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  699. MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
  700. (taicpu(hp1).ops=3) and
  701. (taicpu(hp1).oper[2]^.typ=top_const) and
  702. ((taicpu(hp1).oper[2]^.val and $FFFF)=$FFFF) and
  703. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  704. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  705. { reg1 might not be modified inbetween }
  706. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  707. begin
  708. DebugMsg('Peephole UxthAndImm2Uxth done', p);
  709. taicpu(hp1).opcode:=A_UXTH;
  710. taicpu(hp1).ops:=2;
  711. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  712. GetNextInstruction(p, hp1);
  713. asml.remove(p);
  714. p.free;
  715. p:=hp1;
  716. result:=true;
  717. end
  718. else if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  719. RemoveSuperfluousMove(p, hp1, 'UxthMov2Data') then
  720. Result:=true;
  721. end;
  722. function TARMAsmOptimizer.OptPass1SXTB(var p : tai) : Boolean;
  723. var
  724. hp1, hp2: tai;
  725. begin
  726. Result:=false;
  727. {
  728. change
  729. sxtb reg2,reg1
  730. strb reg2,[...]
  731. dealloc reg2
  732. to
  733. strb reg1,[...]
  734. }
  735. if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
  736. (taicpu(p).ops=2) and
  737. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  738. MatchInstruction(hp1, A_STR, [C_None], [PF_B]) and
  739. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
  740. { the reference in strb might not use reg2 }
  741. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  742. { reg1 might not be modified inbetween }
  743. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  744. begin
  745. DebugMsg('Peephole SxtbStrb2Strb done', p);
  746. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  747. GetNextInstruction(p,hp2);
  748. asml.remove(p);
  749. p.free;
  750. p:=hp2;
  751. result:=true;
  752. end
  753. {
  754. change
  755. sxtb reg2,reg1
  756. sxth reg3,reg2
  757. dealloc reg2
  758. to
  759. sxtb reg3,reg1
  760. }
  761. else if MatchInstruction(p, A_SXTB, [C_None], [PF_None]) and
  762. (taicpu(p).ops=2) and
  763. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  764. MatchInstruction(hp1, A_SXTH, [C_None], [PF_None]) and
  765. (taicpu(hp1).ops = 2) and
  766. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  767. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  768. { reg1 might not be modified inbetween }
  769. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  770. begin
  771. DebugMsg('Peephole SxtbSxth2Sxtb done', p);
  772. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  773. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  774. asml.remove(hp1);
  775. hp1.free;
  776. result:=true;
  777. end
  778. {
  779. change
  780. sxtb reg2,reg1
  781. sxtb reg3,reg2
  782. dealloc reg2
  783. to
  784. uxtb reg3,reg1
  785. }
  786. else if MatchInstruction(p, A_SXTB, [C_None], [PF_None]) and
  787. (taicpu(p).ops=2) and
  788. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  789. MatchInstruction(hp1, A_SXTB, [C_None], [PF_None]) and
  790. (taicpu(hp1).ops = 2) and
  791. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  792. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  793. { reg1 might not be modified inbetween }
  794. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  795. begin
  796. DebugMsg('Peephole SxtbSxtb2Sxtb done', p);
  797. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  798. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  799. asml.remove(hp1);
  800. hp1.free;
  801. result:=true;
  802. end
  803. {
  804. change
  805. sxtb reg2,reg1
  806. and reg3,reg2,#0x*FF
  807. dealloc reg2
  808. to
  809. uxtb reg3,reg1
  810. }
  811. else if MatchInstruction(p, A_SXTB, [C_None], [PF_None]) and
  812. (taicpu(p).ops=2) and
  813. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  814. MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
  815. (taicpu(hp1).ops=3) and
  816. (taicpu(hp1).oper[2]^.typ=top_const) and
  817. ((taicpu(hp1).oper[2]^.val and $FF)=$FF) and
  818. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  819. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  820. { reg1 might not be modified inbetween }
  821. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  822. begin
  823. DebugMsg('Peephole SxtbAndImm2Sxtb done', p);
  824. taicpu(hp1).opcode:=A_SXTB;
  825. taicpu(hp1).ops:=2;
  826. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  827. GetNextInstruction(p,hp2);
  828. asml.remove(p);
  829. p.free;
  830. p:=hp2;
  831. result:=true;
  832. end
  833. else if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  834. RemoveSuperfluousMove(p, hp1, 'SxtbMov2Data') then
  835. Result:=true;
  836. end;
  837. function TARMAsmOptimizer.OptPass1SXTH(var p : tai) : Boolean;
  838. var
  839. hp1: tai;
  840. begin
  841. Result:=false;
  842. {
  843. change
  844. sxth reg2,reg1
  845. strh reg2,[...]
  846. dealloc reg2
  847. to
  848. strh reg1,[...]
  849. }
  850. if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
  851. (taicpu(p).ops=2) and
  852. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  853. MatchInstruction(hp1, A_STR, [C_None], [PF_H]) and
  854. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  855. { the reference in strb might not use reg2 }
  856. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  857. { reg1 might not be modified inbetween }
  858. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  859. begin
  860. DebugMsg('Peephole SXTHStrh2Strh done', p);
  861. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  862. GetNextInstruction(p, hp1);
  863. asml.remove(p);
  864. p.free;
  865. p:=hp1;
  866. result:=true;
  867. end
  868. {
  869. change
  870. sxth reg2,reg1
  871. sxth reg3,reg2
  872. dealloc reg2
  873. to
  874. sxth reg3,reg1
  875. }
  876. else if MatchInstruction(p, A_SXTH, [C_None], [PF_None]) and
  877. (taicpu(p).ops=2) and
  878. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  879. MatchInstruction(hp1, A_SXTH, [C_None], [PF_None]) and
  880. (taicpu(hp1).ops=2) and
  881. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  882. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  883. { reg1 might not be modified inbetween }
  884. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  885. begin
  886. DebugMsg('Peephole SxthSxth2Sxth done', p);
  887. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  888. taicpu(hp1).opcode:=A_SXTH;
  889. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  890. GetNextInstruction(p, hp1);
  891. asml.remove(p);
  892. p.free;
  893. p:=hp1;
  894. result:=true;
  895. end
  896. {
  897. change
  898. sxth reg2,reg1
  899. and reg3,reg2,#65535
  900. dealloc reg2
  901. to
  902. sxth reg3,reg1
  903. }
  904. else if MatchInstruction(p, A_SXTH, [C_None], [PF_None]) and
  905. (taicpu(p).ops=2) and
  906. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  907. MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
  908. (taicpu(hp1).ops=3) and
  909. (taicpu(hp1).oper[2]^.typ=top_const) and
  910. ((taicpu(hp1).oper[2]^.val and $FFFF)=$FFFF) and
  911. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  912. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  913. { reg1 might not be modified inbetween }
  914. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  915. begin
  916. DebugMsg('Peephole SxthAndImm2Sxth done', p);
  917. taicpu(hp1).opcode:=A_SXTH;
  918. taicpu(hp1).ops:=2;
  919. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  920. GetNextInstruction(p, hp1);
  921. asml.remove(p);
  922. p.free;
  923. p:=hp1;
  924. result:=true;
  925. end
  926. else if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  927. RemoveSuperfluousMove(p, hp1, 'SxthMov2Data') then
  928. Result:=true;
  929. end;
  930. function TARMAsmOptimizer.OptPass1And(var p : tai) : Boolean;
  931. var
  932. hp1, hp2: tai;
  933. i: longint;
  934. begin
  935. Result:=false;
  936. {
  937. optimize
  938. and reg2,reg1,const1
  939. ...
  940. }
  941. if (taicpu(p).ops>2) and
  942. (taicpu(p).oper[1]^.typ = top_reg) and
  943. (taicpu(p).oper[2]^.typ = top_const) then
  944. begin
  945. {
  946. change
  947. and reg2,reg1,const1
  948. ...
  949. and reg3,reg2,const2
  950. to
  951. and reg3,reg1,(const1 and const2)
  952. }
  953. if GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  954. MatchInstruction(hp1, A_AND, [taicpu(p).condition], [PF_None]) and
  955. RegEndOfLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  956. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  957. (taicpu(hp1).oper[2]^.typ = top_const)
  958. {$ifdef AARCH64}
  959. and ((((getsubreg(taicpu(p).oper[0]^.reg)=R_SUBQ) and is_shifter_const(taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val,OS_64)) or
  960. ((getsubreg(taicpu(p).oper[0]^.reg)=R_SUBL) and is_shifter_const(taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val,OS_32))
  961. ) or
  962. ((taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val)=0))
  963. {$endif AARCH64}
  964. then
  965. begin
  966. if not(RegUsedBetween(taicpu(hp1).oper[0]^.reg,p,hp1)) then
  967. begin
  968. DebugMsg('Peephole AndAnd2And done', p);
  969. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  970. if (taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val)=0 then
  971. begin
  972. DebugMsg('Peephole AndAnd2Mov0 1 done', p);
  973. taicpu(p).opcode:=A_MOV;
  974. taicpu(p).ops:=2;
  975. taicpu(p).loadConst(1,0);
  976. taicpu(p).oppostfix:=taicpu(hp1).oppostfix;
  977. end
  978. else
  979. begin
  980. DebugMsg('Peephole AndAnd2And 1 done', p);
  981. taicpu(p).loadConst(2,taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val);
  982. taicpu(p).oppostfix:=taicpu(hp1).oppostfix;
  983. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  984. end;
  985. asml.remove(hp1);
  986. hp1.free;
  987. Result:=true;
  988. exit;
  989. end
  990. else if not(RegUsedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  991. begin
  992. if (taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val)=0 then
  993. begin
  994. DebugMsg('Peephole AndAnd2Mov0 2 done', hp1);
  995. taicpu(hp1).opcode:=A_MOV;
  996. taicpu(hp1).loadConst(1,0);
  997. taicpu(hp1).ops:=2;
  998. taicpu(hp1).oppostfix:=taicpu(p).oppostfix;
  999. end
  1000. else
  1001. begin
  1002. DebugMsg('Peephole AndAnd2And 2 done', hp1);
  1003. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  1004. taicpu(hp1).loadConst(2,taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val);
  1005. taicpu(hp1).oppostfix:=taicpu(p).oppostfix;
  1006. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  1007. end;
  1008. GetNextInstruction(p, hp1);
  1009. RemoveCurrentP(p);
  1010. p:=hp1;
  1011. Result:=true;
  1012. exit;
  1013. end;
  1014. end
  1015. {
  1016. change
  1017. and reg2,reg1,$xxxxxxFF
  1018. strb reg2,[...]
  1019. dealloc reg2
  1020. to
  1021. strb reg1,[...]
  1022. }
  1023. else if ((taicpu(p).oper[2]^.val and $FF) = $FF) and
  1024. MatchInstruction(p, A_AND, [C_None], [PF_None]) and
  1025. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1026. MatchInstruction(hp1, A_STR, [C_None], [PF_B]) and
  1027. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
  1028. { the reference in strb might not use reg2 }
  1029. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  1030. { reg1 might not be modified inbetween }
  1031. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1032. begin
  1033. DebugMsg('Peephole AndStrb2Strb done', p);
  1034. {$ifdef AARCH64}
  1035. taicpu(hp1).loadReg(0,newreg(R_INTREGISTER,getsupreg(taicpu(p).oper[1]^.reg),R_SUBD));
  1036. {$else AARCH64}
  1037. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  1038. {$endif AARCH64}
  1039. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  1040. RemoveCurrentP(p);
  1041. result:=true;
  1042. exit;
  1043. end
  1044. {
  1045. change
  1046. and reg2,reg1,255
  1047. uxtb/uxth reg3,reg2
  1048. dealloc reg2
  1049. to
  1050. and reg3,reg1,x
  1051. }
  1052. else if ((taicpu(p).oper[2]^.val and $ffffff00)=0) and
  1053. MatchInstruction(p, A_AND, [C_None], [PF_None]) and
  1054. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1055. MatchInstruction(hp1, [A_UXTB,A_UXTH], [C_None], [PF_None]) and
  1056. (taicpu(hp1).ops = 2) and
  1057. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  1058. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1059. { reg1 might not be modified inbetween }
  1060. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1061. begin
  1062. DebugMsg('Peephole AndUxt2And done', p);
  1063. taicpu(hp1).opcode:=A_AND;
  1064. taicpu(hp1).ops:=3;
  1065. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  1066. taicpu(hp1).loadconst(2,taicpu(p).oper[2]^.val);
  1067. GetNextInstruction(p,hp1);
  1068. asml.remove(p);
  1069. p.Free;
  1070. p:=hp1;
  1071. result:=true;
  1072. exit;
  1073. end
  1074. else if ((taicpu(p).oper[2]^.val and $ffffff80)=0) and
  1075. MatchInstruction(p, A_AND, [C_None], [PF_None]) and
  1076. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1077. MatchInstruction(hp1, [A_SXTB,A_SXTH], [C_None], [PF_None]) and
  1078. (taicpu(hp1).ops = 2) and
  1079. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  1080. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1081. { reg1 might not be modified inbetween }
  1082. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1083. begin
  1084. DebugMsg('Peephole AndSxt2And done', p);
  1085. taicpu(hp1).opcode:=A_AND;
  1086. taicpu(hp1).ops:=3;
  1087. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  1088. taicpu(hp1).loadconst(2,taicpu(p).oper[2]^.val);
  1089. GetNextInstruction(p,hp1);
  1090. asml.remove(p);
  1091. p.Free;
  1092. p:=hp1;
  1093. result:=true;
  1094. exit;
  1095. end
  1096. {
  1097. from
  1098. and reg1,reg0,2^n-1
  1099. mov reg2,reg1, lsl imm1
  1100. (mov reg3,reg2, lsr/asr imm1)
  1101. remove either the and or the lsl/xsr sequence if possible
  1102. }
  1103. else if (taicpu(p).oper[2]^.val < high(int64)) and
  1104. cutils.ispowerof2(taicpu(p).oper[2]^.val+1,i) and
  1105. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1106. MatchInstruction(hp1, A_MOV, [taicpu(p).condition], [PF_None]) and
  1107. (taicpu(hp1).ops=3) and
  1108. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1109. (taicpu(hp1).oper[2]^.typ = top_shifterop) and
  1110. {$ifdef ARM}
  1111. (taicpu(hp1).oper[2]^.shifterop^.rs = NR_NO) and
  1112. {$endif ARM}
  1113. (taicpu(hp1).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  1114. RegEndOfLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) then
  1115. begin
  1116. {
  1117. and reg1,reg0,2^n-1
  1118. mov reg2,reg1, lsl imm1
  1119. mov reg3,reg2, lsr/asr imm1
  1120. =>
  1121. and reg1,reg0,2^n-1
  1122. if lsr and 2^n-1>=imm1 or asr and 2^n-1>imm1
  1123. }
  1124. if GetNextInstructionUsingReg(hp1,hp2,taicpu(p).oper[0]^.reg) and
  1125. MatchInstruction(hp2, A_MOV, [taicpu(p).condition], [PF_None]) and
  1126. (taicpu(hp2).ops=3) and
  1127. MatchOperand(taicpu(hp2).oper[1]^, taicpu(hp1).oper[0]^.reg) and
  1128. (taicpu(hp2).oper[2]^.typ = top_shifterop) and
  1129. {$ifdef ARM}
  1130. (taicpu(hp2).oper[2]^.shifterop^.rs = NR_NO) and
  1131. {$endif ARM}
  1132. (taicpu(hp2).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and
  1133. (taicpu(hp1).oper[2]^.shifterop^.shiftimm=taicpu(hp2).oper[2]^.shifterop^.shiftimm) and
  1134. RegEndOfLife(taicpu(hp1).oper[0]^.reg,taicpu(hp2)) and
  1135. ((i<32-taicpu(hp1).oper[2]^.shifterop^.shiftimm) or
  1136. ((i=32-taicpu(hp1).oper[2]^.shifterop^.shiftimm) and
  1137. (taicpu(hp2).oper[2]^.shifterop^.shiftmode=SM_LSR))) then
  1138. begin
  1139. DebugMsg('Peephole AndLslXsr2And done', p);
  1140. taicpu(p).oper[0]^.reg:=taicpu(hp2).oper[0]^.reg;
  1141. asml.Remove(hp1);
  1142. asml.Remove(hp2);
  1143. hp1.free;
  1144. hp2.free;
  1145. result:=true;
  1146. exit;
  1147. end
  1148. {
  1149. and reg1,reg0,2^n-1
  1150. mov reg2,reg1, lsl imm1
  1151. =>
  1152. mov reg2,reg0, lsl imm1
  1153. if imm1>i
  1154. }
  1155. else if (i>32-taicpu(hp1).oper[2]^.shifterop^.shiftimm) and
  1156. not(RegModifiedBetween(taicpu(p).oper[1]^.reg, p, hp1)) then
  1157. begin
  1158. DebugMsg('Peephole AndLsl2Lsl done', p);
  1159. taicpu(hp1).oper[1]^.reg:=taicpu(p).oper[1]^.reg;
  1160. GetNextInstruction(p, hp1);
  1161. asml.Remove(p);
  1162. p.free;
  1163. p:=hp1;
  1164. result:=true;
  1165. exit;
  1166. end
  1167. end;
  1168. end;
  1169. {
  1170. change
  1171. and reg1, ...
  1172. mov reg2, reg1
  1173. to
  1174. and reg2, ...
  1175. }
  1176. if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  1177. (taicpu(p).ops>=3) and
  1178. RemoveSuperfluousMove(p, hp1, 'DataMov2Data') then
  1179. Result:=true;
  1180. end;
  1181. end.