aoptarm.pas 49 KB


  1. {
  2. Copyright (c) 1998-2020 by Jonas Maebe and Florian Klaempfl, members of the Free Pascal
  3. Development Team
  4. This unit implements an ARM optimizer object used commonly for ARM and AAarch64
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. Unit aoptarm;
  19. {$i fpcdefs.inc}
  20. { $define DEBUG_PREREGSCHEDULER}
  21. { $define DEBUG_AOPTCPU}
  22. Interface
  23. uses
  24. cgbase, cgutils, cpubase, aasmtai, aasmcpu,aopt, aoptobj;
  25. Type
  26. { while ARM and AAarch64 look not very similar at a first glance,
  27. several optimizations can be shared between both }
  28. TARMAsmOptimizer = class(TAsmOptimizer)
  29. procedure DebugMsg(const s : string; p : tai);
  30. function RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string): boolean;
  31. function RedundantMovProcess(var p: tai; hp1: tai): boolean;
  32. function GetNextInstructionUsingReg(Current: tai; out Next: tai; reg: TRegister): Boolean;
  33. function OptPass1UXTB(var p: tai): Boolean;
  34. function OptPass1UXTH(var p: tai): Boolean;
  35. function OptPass1SXTB(var p: tai): Boolean;
  36. function OptPass1SXTH(var p: tai): Boolean;
  37. function OptPass1And(var p: tai): Boolean; virtual;
  38. End;
  39. function MatchInstruction(const instr: tai; const op: TCommonAsmOps; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  40. function MatchInstruction(const instr: tai; const op: TAsmOp; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  41. {$ifdef AARCH64}
  42. function MatchInstruction(const instr: tai; const ops : array of TAsmOp; const postfix: TOpPostfixes): boolean;
  43. {$endif AARCH64}
  44. function MatchInstruction(const instr: tai; const op: TAsmOp; const postfix: TOpPostfixes): boolean;
  45. function RefsEqual(const r1, r2: treference): boolean;
  46. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  47. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
  48. Implementation
  49. uses
  50. cutils,verbose,globtype,globals,
  51. systems,
  52. cpuinfo,
  53. cgobj,procinfo,
  54. aasmbase,aasmdata;
  55. {$ifdef DEBUG_AOPTCPU}
  56. procedure TARMAsmOptimizer.DebugMsg(const s: string;p : tai);
  57. begin
  58. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  59. end;
  60. {$else DEBUG_AOPTCPU}
  61. procedure TARMAsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  62. begin
  63. end;
  64. {$endif DEBUG_AOPTCPU}
  65. function MatchInstruction(const instr: tai; const op: TCommonAsmOps; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  66. begin
  67. result :=
  68. (instr.typ = ait_instruction) and
  69. ((op = []) or ((taicpu(instr).opcode<=LastCommonAsmOp) and (taicpu(instr).opcode in op))) and
  70. ((cond = []) or (taicpu(instr).condition in cond)) and
  71. ((postfix = []) or (taicpu(instr).oppostfix in postfix));
  72. end;
  73. function MatchInstruction(const instr: tai; const op: TAsmOp; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  74. begin
  75. result :=
  76. (instr.typ = ait_instruction) and
  77. (taicpu(instr).opcode = op) and
  78. ((cond = []) or (taicpu(instr).condition in cond)) and
  79. ((postfix = []) or (taicpu(instr).oppostfix in postfix));
  80. end;
  81. {$ifdef AARCH64}
  82. function MatchInstruction(const instr: tai; const ops : array of TAsmOp; const postfix: TOpPostfixes): boolean;
  83. var
  84. op : TAsmOp;
  85. begin
  86. result:=false;
  87. if instr.typ <> ait_instruction then
  88. exit;
  89. for op in ops do
  90. begin
  91. if (taicpu(instr).opcode = op) and
  92. ((postfix = []) or (taicpu(instr).oppostfix in postfix)) then
  93. begin
  94. result:=true;
  95. exit;
  96. end;
  97. end;
  98. end;
  99. {$endif AARCH64}
  100. function MatchInstruction(const instr: tai; const op: TAsmOp; const postfix: TOpPostfixes): boolean;
  101. begin
  102. result :=
  103. (instr.typ = ait_instruction) and
  104. (taicpu(instr).opcode = op) and
  105. ((postfix = []) or (taicpu(instr).oppostfix in postfix));
  106. end;
  107. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  108. begin
  109. result := (oper.typ = top_reg) and (oper.reg = reg);
  110. end;
  111. function RefsEqual(const r1, r2: treference): boolean;
  112. begin
  113. refsequal :=
  114. (r1.offset = r2.offset) and
  115. (r1.base = r2.base) and
  116. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  117. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  118. (r1.relsymbol = r2.relsymbol) and
  119. {$ifdef ARM}
  120. (r1.signindex = r2.signindex) and
  121. {$endif ARM}
  122. (r1.shiftimm = r2.shiftimm) and
  123. (r1.addressmode = r2.addressmode) and
  124. (r1.shiftmode = r2.shiftmode) and
  125. (r1.volatility=[]) and
  126. (r2.volatility=[]);
  127. end;
  128. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
  129. begin
  130. result := oper1.typ = oper2.typ;
  131. if result then
  132. case oper1.typ of
  133. top_const:
  134. Result:=oper1.val = oper2.val;
  135. top_reg:
  136. Result:=oper1.reg = oper2.reg;
  137. top_conditioncode:
  138. Result:=oper1.cc = oper2.cc;
  139. top_realconst:
  140. Result:=oper1.val_real = oper2.val_real;
  141. top_ref:
  142. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  143. else Result:=false;
  144. end
  145. end;
  146. function TARMAsmOptimizer.GetNextInstructionUsingReg(Current: tai;
  147. Out Next: tai; reg: TRegister): Boolean;
  148. var
  149. gniResult: Boolean;
  150. begin
  151. Next:=Current;
  152. Result := False;
  153. repeat
  154. gniResult:=GetNextInstruction(Next,Next);
  155. if gniResult and RegInInstruction(reg,Next) then
  156. { Found something }
  157. Exit(True);
  158. until not gniResult or
  159. not(cs_opt_level3 in current_settings.optimizerswitches) or
  160. (Next.typ<>ait_instruction) or
  161. is_calljmp(taicpu(Next).opcode)
  162. {$ifdef ARM}
  163. or RegModifiedByInstruction(NR_PC,Next)
  164. {$endif ARM}
  165. ;
  166. end;
  167. function TARMAsmOptimizer.RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string):boolean;
  168. var
  169. alloc,
  170. dealloc : tai_regalloc;
  171. hp1 : tai;
  172. begin
  173. Result:=false;
  174. if MatchInstruction(movp, A_MOV, [taicpu(p).condition], [PF_None]) and
  175. { We can't optimize if there is a shiftop }
  176. (taicpu(movp).ops=2) and
  177. MatchOperand(taicpu(movp).oper[1]^, taicpu(p).oper[0]^.reg) and
  178. { don't mess with moves to fp }
  179. (taicpu(movp).oper[0]^.reg<>current_procinfo.framepointer) and
  180. { the destination register of the mov might not be used beween p and movp }
  181. not(RegUsedBetween(taicpu(movp).oper[0]^.reg,p,movp)) and
  182. {$ifdef ARM}
  183. { PC should be changed only by moves }
  184. (taicpu(movp).oper[0]^.reg<>NR_PC) and
  185. { cb[n]z are thumb instructions which require specific registers, with no wide forms }
  186. (taicpu(p).opcode<>A_CBZ) and
  187. (taicpu(p).opcode<>A_CBNZ) and
  188. { There is a special requirement for MUL and MLA, oper[0] and oper[1] are not allowed to be the same }
  189. not (
  190. (taicpu(p).opcode in [A_MLA, A_MUL]) and
  191. (taicpu(p).oper[1]^.reg = taicpu(movp).oper[0]^.reg) and
  192. (current_settings.cputype < cpu_armv6)
  193. ) and
  194. {$endif ARM}
  195. { Take care to only do this for instructions which REALLY load to the first register.
  196. Otherwise
  197. str reg0, [reg1]
  198. mov reg2, reg0
  199. will be optimized to
  200. str reg2, [reg1]
  201. }
  202. RegLoadedWithNewValue(taicpu(p).oper[0]^.reg, p) then
  203. begin
  204. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(movp.Next));
  205. if assigned(dealloc) then
  206. begin
  207. DebugMsg('Peephole '+optimizer+' removed superfluous mov', movp);
  208. result:=true;
  209. { taicpu(p).oper[0]^.reg is not used anymore, try to find its allocation
  210. and remove it if possible }
  211. asml.Remove(dealloc);
  212. alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.previous));
  213. if assigned(alloc) then
  214. begin
  215. asml.Remove(alloc);
  216. alloc.free;
  217. dealloc.free;
  218. end
  219. else
  220. asml.InsertAfter(dealloc,p);
  221. { try to move the allocation of the target register }
  222. GetLastInstruction(movp,hp1);
  223. alloc:=FindRegAlloc(taicpu(movp).oper[0]^.reg,tai(hp1.Next));
  224. if assigned(alloc) then
  225. begin
  226. asml.Remove(alloc);
  227. asml.InsertBefore(alloc,p);
  228. { adjust used regs }
  229. IncludeRegInUsedRegs(taicpu(movp).oper[0]^.reg,UsedRegs);
  230. end;
  231. { finally get rid of the mov }
  232. taicpu(p).loadreg(0,taicpu(movp).oper[0]^.reg);
  233. { Remove preindexing and postindexing for LDR in some cases.
  234. For example:
  235. ldr reg2,[reg1, xxx]!
  236. mov reg1,reg2
  237. must be translated to:
  238. ldr reg1,[reg1, xxx]
  239. Preindexing must be removed there, since the same register is used as the base and as the target.
  240. Such case is not allowed for ARM CPU and produces crash. }
  241. if (taicpu(p).opcode = A_LDR) and (taicpu(p).oper[1]^.typ = top_ref)
  242. and (taicpu(movp).oper[0]^.reg = taicpu(p).oper[1]^.ref^.base)
  243. then
  244. taicpu(p).oper[1]^.ref^.addressmode:=AM_OFFSET;
  245. asml.remove(movp);
  246. movp.free;
  247. end;
  248. end;
  249. end;
  250. function TARMAsmOptimizer.RedundantMovProcess(var p: tai;hp1: tai):boolean;
  251. var
  252. I: Integer;
  253. current_hp: tai;
  254. LDRChange: Boolean;
  255. begin
  256. Result:=false;
  257. {
  258. change
  259. mov r1, r0
  260. add r1, r1, #1
  261. to
  262. add r1, r0, #1
  263. Todo: Make it work for mov+cmp too
  264. CAUTION! If this one is successful p might not be a mov instruction anymore!
  265. }
  266. if (taicpu(p).ops = 2) and
  267. (taicpu(p).oper[1]^.typ = top_reg) and
  268. (taicpu(p).oppostfix = PF_NONE) then
  269. begin
  270. if
  271. MatchInstruction(hp1, [A_ADD, A_ADC,
  272. {$ifdef ARM}
  273. A_RSB, A_RSC,
  274. {$endif ARM}
  275. A_SUB, A_SBC,
  276. A_AND, A_BIC, A_EOR, A_ORR, A_MOV, A_MVN],
  277. [taicpu(p).condition], []) and
  278. { MOV and MVN might only have 2 ops }
  279. (taicpu(hp1).ops >= 2) and
  280. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^.reg) and
  281. (taicpu(hp1).oper[1]^.typ = top_reg) and
  282. (
  283. (taicpu(hp1).ops = 2) or
  284. (taicpu(hp1).oper[2]^.typ in [top_reg, top_const, top_shifterop])
  285. ) and
  286. {$ifdef AARCH64}
  287. (taicpu(p).oper[1]^.reg<>NR_SP) and
  288. {$endif AARCH64}
  289. not(RegUsedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  290. begin
  291. { When we get here we still don't know if the registers match }
  292. for I:=1 to 2 do
  293. {
  294. If the first loop was successful p will be replaced with hp1.
  295. The checks will still be ok, because all required information
  296. will also be in hp1 then.
  297. }
  298. if (taicpu(hp1).ops > I) and
  299. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[I]^.reg)
  300. {$ifdef ARM}
  301. { prevent certain combinations on thumb(2), this is only a safe approximation }
  302. and (not(GenerateThumbCode or GenerateThumb2Code) or
  303. ((getsupreg(taicpu(p).oper[1]^.reg)<>RS_R13) and
  304. (getsupreg(taicpu(p).oper[1]^.reg)<>RS_R15)))
  305. {$endif ARM}
  306. then
  307. begin
  308. DebugMsg('Peephole RedundantMovProcess done', hp1);
  309. taicpu(hp1).oper[I]^.reg := taicpu(p).oper[1]^.reg;
  310. if p<>hp1 then
  311. begin
  312. asml.remove(p);
  313. p.free;
  314. p:=hp1;
  315. Result:=true;
  316. end;
  317. end;
  318. if Result then Exit;
  319. end
  320. { Change: Change:
  321. mov r1, r0 mov r1, r0
  322. ... ...
  323. ldr/str r2, [r1, etc.] mov r2, r1
  324. To: To:
  325. ldr/str r2, [r0, etc.] mov r2, r0
  326. }
  327. else if (taicpu(p).condition = C_None) and (taicpu(p).oper[1]^.typ = top_reg)
  328. {$ifdef ARM}
  329. and not (getsupreg(taicpu(p).oper[0]^.reg) in [RS_PC, RS_R14, RS_STACK_POINTER_REG])
  330. and (getsupreg(taicpu(p).oper[1]^.reg) <> RS_PC)
  331. {$endif ARM}
  332. {$ifdef AARCH64}
  333. and (getsupreg(taicpu(p).oper[0]^.reg) <> RS_STACK_POINTER_REG)
  334. {$endif AARCH64}
  335. then
  336. begin
  337. current_hp := p;
  338. TransferUsedRegs(TmpUsedRegs);
  339. { Search local instruction block }
  340. while GetNextInstruction(current_hp, hp1) and (hp1 <> BlockEnd) and (hp1.typ = ait_instruction) do
  341. begin
  342. UpdateUsedRegs(TmpUsedRegs, tai(current_hp.Next));
  343. LDRChange := False;
  344. if (taicpu(hp1).opcode in [A_LDR,A_STR]) and (taicpu(hp1).ops = 2) then
  345. begin
  346. { Change the registers from r1 to r0 }
  347. if (taicpu(hp1).oper[1]^.ref^.base = taicpu(p).oper[0]^.reg) and
  348. {$ifdef ARM}
  349. { This optimisation conflicts with something and raises
  350. an access violation - needs further investigation. [Kit] }
  351. (taicpu(hp1).opcode <> A_LDR) and
  352. {$endif ARM}
  353. { Don't mess around with the base register if the
  354. reference is pre- or post-indexed }
  355. (taicpu(hp1).oper[1]^.ref^.addressmode = AM_OFFSET) then
  356. begin
  357. taicpu(hp1).oper[1]^.ref^.base := taicpu(p).oper[1]^.reg;
  358. LDRChange := True;
  359. end;
  360. if taicpu(hp1).oper[1]^.ref^.index = taicpu(p).oper[0]^.reg then
  361. begin
  362. taicpu(hp1).oper[1]^.ref^.index := taicpu(p).oper[1]^.reg;
  363. LDRChange := True;
  364. end;
  365. if LDRChange then
  366. DebugMsg('Peephole Optimization: ' + std_regname(taicpu(p).oper[0]^.reg) + ' = ' + std_regname(taicpu(p).oper[1]^.reg) + ' (MovLdr2Ldr 1)', hp1);
  367. { Drop out if we're dealing with pre-indexed references }
  368. if (taicpu(hp1).oper[1]^.ref^.addressmode = AM_PREINDEXED) and
  369. (
  370. RegInRef(taicpu(p).oper[0]^.reg, taicpu(hp1).oper[1]^.ref^) or
  371. RegInRef(taicpu(p).oper[1]^.reg, taicpu(hp1).oper[1]^.ref^)
  372. ) then
  373. begin
  374. { Remember to update register allocations }
  375. if LDRChange then
  376. AllocRegBetween(taicpu(p).oper[1]^.reg, p, hp1, UsedRegs);
  377. Break;
  378. end;
  379. { The register being stored can be potentially changed (as long as it's not the stack pointer) }
  380. if (taicpu(hp1).opcode = A_STR) and (getsupreg(taicpu(p).oper[1]^.reg) <> RS_STACK_POINTER_REG) and
  381. MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^.reg) then
  382. begin
  383. DebugMsg('Peephole Optimization: ' + std_regname(taicpu(p).oper[0]^.reg) + ' = ' + std_regname(taicpu(p).oper[1]^.reg) + ' (MovLdr2Ldr 2)', hp1);
  384. taicpu(hp1).oper[0]^.reg := taicpu(p).oper[1]^.reg;
  385. LDRChange := True;
  386. end;
  387. if LDRChange and (getsupreg(taicpu(p).oper[1]^.reg) <> RS_STACK_POINTER_REG) then
  388. begin
  389. AllocRegBetween(taicpu(p).oper[1]^.reg, p, hp1, UsedRegs);
  390. if (taicpu(p).oppostfix = PF_None) and
  391. (
  392. (
  393. (taicpu(hp1).opcode = A_LDR) and
  394. MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^.reg)
  395. ) or
  396. not RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, hp1, TmpUsedRegs)
  397. ) and
  398. { Double-check to see if the old registers were actually
  399. changed (e.g. if the super registers matched, but not
  400. the sizes, they won't be changed). }
  401. (
  402. (taicpu(hp1).opcode = A_LDR) or
  403. not RegInOp(taicpu(p).oper[0]^.reg, taicpu(hp1).oper[0]^)
  404. ) and
  405. not RegInRef(taicpu(p).oper[0]^.reg, taicpu(hp1).oper[1]^.ref^) then
  406. begin
  407. DebugMsg('Peephole Optimization: RedundantMovProcess 2a done', p);
  408. RemoveCurrentP(p);
  409. Result := True;
  410. Exit;
  411. end;
  412. end;
  413. end
  414. else if (taicpu(hp1).opcode = A_MOV) and (taicpu(hp1).oppostfix = PF_None) and
  415. (taicpu(hp1).ops = 2) then
  416. begin
  417. if MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^.reg) then
  418. begin
  419. { Found another mov that writes entirely to the register }
  420. if RegUsedBetween(taicpu(p).oper[0]^.reg, p, hp1) then
  421. begin
  422. { Register was used beforehand }
  423. if MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[1]^.reg) then
  424. begin
  425. { This MOV is exactly the same as the first one.
  426. Since none of the registers have changed value
  427. at this point, we can remove it. }
  428. DebugMsg('Peephole Optimization: RedundantMovProcess 3a done', hp1);
  429. asml.Remove(hp1);
  430. hp1.Free;
  431. { We still have the original p, so we can continue optimising;
  432. if it was -O2 or below, this instruction appeared immediately
  433. after the first MOV, so we're technically not looking more
  434. than one instruction ahead after it's removed! [Kit] }
  435. Continue;
  436. end
  437. else
  438. { Register changes value - drop out }
  439. Break;
  440. end;
  441. { We can delete the first MOV (only if the second MOV is unconditional) }
  442. {$ifdef ARM}
  443. if (taicpu(p).oppostfix = PF_None) and
  444. (taicpu(hp1).condition = C_None) then
  445. {$endif ARM}
  446. begin
  447. DebugMsg('Peephole Optimization: RedundantMovProcess 2b done', p);
  448. RemoveCurrentP(p);
  449. Result := True;
  450. end;
  451. Exit;
  452. end
  453. else if MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) then
  454. begin
  455. if MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[1]^.reg)
  456. { Be careful - if the entire register is not used, removing this
  457. instruction will leave the unused part uninitialised }
  458. {$ifdef AARCH64}
  459. and (getsubreg(taicpu(p).oper[1]^.reg) = R_SUBQ)
  460. {$endif AARCH64}
  461. then
  462. begin
  463. { Instruction will become mov r1,r1 }
  464. DebugMsg('Peephole Optimization: Mov2None 2 done', hp1);
  465. asml.Remove(hp1);
  466. hp1.Free;
  467. Continue;
  468. end;
  469. { Change the old register (checking the first operand again
  470. forces it to be left alone if the full register is not
  471. used, lest mov w1,w1 gets optimised out by mistake. [Kit] }
  472. {$ifdef AARCH64}
  473. if not MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[1]^.reg) then
  474. {$endif AARCH64}
  475. begin
  476. DebugMsg('Peephole Optimization: ' + std_regname(taicpu(p).oper[0]^.reg) + ' = ' + std_regname(taicpu(p).oper[1]^.reg) + ' (MovMov2Mov 2)', hp1);
  477. taicpu(hp1).oper[1]^.reg := taicpu(p).oper[1]^.reg;
  478. AllocRegBetween(taicpu(p).oper[1]^.reg, p, hp1, UsedRegs);
  479. { If this was the only reference to the old register,
  480. then we can remove the original MOV now }
  481. if (taicpu(p).oppostfix = PF_None) and
  482. { A bit of a hack - sometimes registers aren't tracked properly, so do not
  483. remove if the register was apparently not allocated when its value is
  484. first set at the MOV command (this is especially true for the stack
  485. register). [Kit] }
  486. (getsupreg(taicpu(p).oper[1]^.reg) <> RS_STACK_POINTER_REG) and
  487. RegInUsedRegs(taicpu(p).oper[0]^.reg, UsedRegs) and
  488. not RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, hp1, TmpUsedRegs) then
  489. begin
  490. DebugMsg('Peephole Optimization: RedundantMovProcess 2c done', p);
  491. RemoveCurrentP(p);
  492. Result := True;
  493. Exit;
  494. end;
  495. end;
  496. end;
  497. end;
  498. { On low optimisation settions, don't search more than one instruction ahead }
  499. if not(cs_opt_level3 in current_settings.optimizerswitches) or
  500. { Stop at procedure calls and jumps }
  501. is_calljmp(taicpu(hp1).opcode) or
  502. { If the read register has changed value, or the MOV
  503. destination register has been used, drop out }
  504. RegInInstruction(taicpu(p).oper[0]^.reg, hp1) or
  505. RegModifiedByInstruction(taicpu(p).oper[1]^.reg, hp1) then
  506. Break;
  507. current_hp := hp1;
  508. end;
  509. end;
  510. end;
  511. end;
  512. function TARMAsmOptimizer.OptPass1UXTB(var p : tai) : Boolean;
  513. var
  514. hp1, hp2: tai;
  515. begin
  516. Result:=false;
  517. {
  518. change
  519. uxtb reg2,reg1
  520. strb reg2,[...]
  521. dealloc reg2
  522. to
  523. strb reg1,[...]
  524. }
  525. if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
  526. (taicpu(p).ops=2) and
  527. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  528. MatchInstruction(hp1, A_STR, [C_None], [PF_B]) and
  529. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
  530. { the reference in strb might not use reg2 }
  531. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  532. { reg1 might not be modified inbetween }
  533. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  534. begin
  535. DebugMsg('Peephole UxtbStrb2Strb done', p);
  536. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  537. GetNextInstruction(p,hp2);
  538. asml.remove(p);
  539. p.free;
  540. p:=hp2;
  541. result:=true;
  542. end
  543. {
  544. change
  545. uxtb reg2,reg1
  546. uxth reg3,reg2
  547. dealloc reg2
  548. to
  549. uxtb reg3,reg1
  550. }
  551. else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
  552. (taicpu(p).ops=2) and
  553. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  554. MatchInstruction(hp1, A_UXTH, [C_None], [PF_None]) and
  555. (taicpu(hp1).ops = 2) and
  556. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  557. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  558. { reg1 might not be modified inbetween }
  559. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  560. begin
  561. DebugMsg('Peephole UxtbUxth2Uxtb done', p);
  562. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  563. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  564. asml.remove(hp1);
  565. hp1.free;
  566. result:=true;
  567. end
  568. {
  569. change
  570. uxtb reg2,reg1
  571. uxtb reg3,reg2
  572. dealloc reg2
  573. to
  574. uxtb reg3,reg1
  575. }
  576. else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
  577. (taicpu(p).ops=2) and
  578. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  579. MatchInstruction(hp1, A_UXTB, [C_None], [PF_None]) and
  580. (taicpu(hp1).ops = 2) and
  581. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  582. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  583. { reg1 might not be modified inbetween }
  584. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  585. begin
  586. DebugMsg('Peephole UxtbUxtb2Uxtb done', p);
  587. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  588. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  589. asml.remove(hp1);
  590. hp1.free;
  591. result:=true;
  592. end
  593. {
  594. change
  595. uxtb reg2,reg1
  596. and reg3,reg2,#0x*FF
  597. dealloc reg2
  598. to
  599. uxtb reg3,reg1
  600. }
  601. else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
  602. (taicpu(p).ops=2) and
  603. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  604. MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
  605. (taicpu(hp1).ops=3) and
  606. (taicpu(hp1).oper[2]^.typ=top_const) and
  607. ((taicpu(hp1).oper[2]^.val and $FF)=$FF) and
  608. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  609. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  610. { reg1 might not be modified inbetween }
  611. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  612. begin
  613. DebugMsg('Peephole UxtbAndImm2Uxtb done', p);
  614. taicpu(hp1).opcode:=A_UXTB;
  615. taicpu(hp1).ops:=2;
  616. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  617. GetNextInstruction(p,hp2);
  618. asml.remove(p);
  619. p.free;
  620. p:=hp2;
  621. result:=true;
  622. end
  623. else if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  624. RemoveSuperfluousMove(p, hp1, 'UxtbMov2Data') then
  625. Result:=true;
  626. end;
  627. function TARMAsmOptimizer.OptPass1UXTH(var p : tai) : Boolean;
  628. var
  629. hp1: tai;
  630. begin
  631. Result:=false;
  632. {
  633. change
  634. uxth reg2,reg1
  635. strh reg2,[...]
  636. dealloc reg2
  637. to
  638. strh reg1,[...]
  639. }
  640. if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
  641. (taicpu(p).ops=2) and
  642. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  643. MatchInstruction(hp1, A_STR, [C_None], [PF_H]) and
  644. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  645. { the reference in strb might not use reg2 }
  646. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  647. { reg1 might not be modified inbetween }
  648. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  649. begin
  650. DebugMsg('Peephole UXTHStrh2Strh done', p);
  651. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  652. GetNextInstruction(p, hp1);
  653. asml.remove(p);
  654. p.free;
  655. p:=hp1;
  656. result:=true;
  657. end
  658. {
  659. change
  660. uxth reg2,reg1
  661. uxth reg3,reg2
  662. dealloc reg2
  663. to
  664. uxth reg3,reg1
  665. }
  666. else if MatchInstruction(p, A_UXTH, [C_None], [PF_None]) and
  667. (taicpu(p).ops=2) and
  668. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  669. MatchInstruction(hp1, A_UXTH, [C_None], [PF_None]) and
  670. (taicpu(hp1).ops=2) and
  671. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  672. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  673. { reg1 might not be modified inbetween }
  674. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  675. begin
  676. DebugMsg('Peephole UxthUxth2Uxth done', p);
  677. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  678. taicpu(hp1).opcode:=A_UXTH;
  679. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  680. GetNextInstruction(p, hp1);
  681. asml.remove(p);
  682. p.free;
  683. p:=hp1;
  684. result:=true;
  685. end
  686. {
  687. change
  688. uxth reg2,reg1
  689. and reg3,reg2,#65535
  690. dealloc reg2
  691. to
  692. uxth reg3,reg1
  693. }
  694. else if MatchInstruction(p, A_UXTH, [C_None], [PF_None]) and
  695. (taicpu(p).ops=2) and
  696. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  697. MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
  698. (taicpu(hp1).ops=3) and
  699. (taicpu(hp1).oper[2]^.typ=top_const) and
  700. ((taicpu(hp1).oper[2]^.val and $FFFF)=$FFFF) and
  701. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  702. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  703. { reg1 might not be modified inbetween }
  704. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  705. begin
  706. DebugMsg('Peephole UxthAndImm2Uxth done', p);
  707. taicpu(hp1).opcode:=A_UXTH;
  708. taicpu(hp1).ops:=2;
  709. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  710. GetNextInstruction(p, hp1);
  711. asml.remove(p);
  712. p.free;
  713. p:=hp1;
  714. result:=true;
  715. end
  716. else if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  717. RemoveSuperfluousMove(p, hp1, 'UxthMov2Data') then
  718. Result:=true;
  719. end;
  720. function TARMAsmOptimizer.OptPass1SXTB(var p : tai) : Boolean;
  721. var
  722. hp1, hp2: tai;
  723. begin
  724. Result:=false;
  725. {
  726. change
  727. sxtb reg2,reg1
  728. strb reg2,[...]
  729. dealloc reg2
  730. to
  731. strb reg1,[...]
  732. }
  733. if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
  734. (taicpu(p).ops=2) and
  735. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  736. MatchInstruction(hp1, A_STR, [C_None], [PF_B]) and
  737. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
  738. { the reference in strb might not use reg2 }
  739. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  740. { reg1 might not be modified inbetween }
  741. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  742. begin
  743. DebugMsg('Peephole SxtbStrb2Strb done', p);
  744. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  745. GetNextInstruction(p,hp2);
  746. asml.remove(p);
  747. p.free;
  748. p:=hp2;
  749. result:=true;
  750. end
  751. {
  752. change
  753. sxtb reg2,reg1
  754. sxth reg3,reg2
  755. dealloc reg2
  756. to
  757. sxtb reg3,reg1
  758. }
  759. else if MatchInstruction(p, A_SXTB, [C_None], [PF_None]) and
  760. (taicpu(p).ops=2) and
  761. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  762. MatchInstruction(hp1, A_SXTH, [C_None], [PF_None]) and
  763. (taicpu(hp1).ops = 2) and
  764. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  765. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  766. { reg1 might not be modified inbetween }
  767. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  768. begin
  769. DebugMsg('Peephole SxtbSxth2Sxtb done', p);
  770. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  771. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  772. asml.remove(hp1);
  773. hp1.free;
  774. result:=true;
  775. end
  776. {
  777. change
  778. sxtb reg2,reg1
  779. sxtb reg3,reg2
  780. dealloc reg2
  781. to
  782. uxtb reg3,reg1
  783. }
  784. else if MatchInstruction(p, A_SXTB, [C_None], [PF_None]) and
  785. (taicpu(p).ops=2) and
  786. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  787. MatchInstruction(hp1, A_SXTB, [C_None], [PF_None]) and
  788. (taicpu(hp1).ops = 2) and
  789. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  790. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  791. { reg1 might not be modified inbetween }
  792. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  793. begin
  794. DebugMsg('Peephole SxtbSxtb2Sxtb done', p);
  795. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  796. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  797. asml.remove(hp1);
  798. hp1.free;
  799. result:=true;
  800. end
  801. {
  802. change
  803. sxtb reg2,reg1
  804. and reg3,reg2,#0x*FF
  805. dealloc reg2
  806. to
  807. uxtb reg3,reg1
  808. }
  809. else if MatchInstruction(p, A_SXTB, [C_None], [PF_None]) and
  810. (taicpu(p).ops=2) and
  811. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  812. MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
  813. (taicpu(hp1).ops=3) and
  814. (taicpu(hp1).oper[2]^.typ=top_const) and
  815. ((taicpu(hp1).oper[2]^.val and $FF)=$FF) and
  816. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  817. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  818. { reg1 might not be modified inbetween }
  819. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  820. begin
  821. DebugMsg('Peephole SxtbAndImm2Sxtb done', p);
  822. taicpu(hp1).opcode:=A_SXTB;
  823. taicpu(hp1).ops:=2;
  824. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  825. GetNextInstruction(p,hp2);
  826. asml.remove(p);
  827. p.free;
  828. p:=hp2;
  829. result:=true;
  830. end
  831. else if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  832. RemoveSuperfluousMove(p, hp1, 'SxtbMov2Data') then
  833. Result:=true;
  834. end;
  835. function TARMAsmOptimizer.OptPass1SXTH(var p : tai) : Boolean;
  836. var
  837. hp1: tai;
  838. begin
  839. Result:=false;
  840. {
  841. change
  842. sxth reg2,reg1
  843. strh reg2,[...]
  844. dealloc reg2
  845. to
  846. strh reg1,[...]
  847. }
  848. if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
  849. (taicpu(p).ops=2) and
  850. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  851. MatchInstruction(hp1, A_STR, [C_None], [PF_H]) and
  852. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  853. { the reference in strb might not use reg2 }
  854. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  855. { reg1 might not be modified inbetween }
  856. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  857. begin
  858. DebugMsg('Peephole SXTHStrh2Strh done', p);
  859. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  860. GetNextInstruction(p, hp1);
  861. asml.remove(p);
  862. p.free;
  863. p:=hp1;
  864. result:=true;
  865. end
  866. {
  867. change
  868. sxth reg2,reg1
  869. sxth reg3,reg2
  870. dealloc reg2
  871. to
  872. sxth reg3,reg1
  873. }
  874. else if MatchInstruction(p, A_SXTH, [C_None], [PF_None]) and
  875. (taicpu(p).ops=2) and
  876. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  877. MatchInstruction(hp1, A_SXTH, [C_None], [PF_None]) and
  878. (taicpu(hp1).ops=2) and
  879. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  880. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  881. { reg1 might not be modified inbetween }
  882. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  883. begin
  884. DebugMsg('Peephole SxthSxth2Sxth done', p);
  885. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  886. taicpu(hp1).opcode:=A_SXTH;
  887. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  888. GetNextInstruction(p, hp1);
  889. asml.remove(p);
  890. p.free;
  891. p:=hp1;
  892. result:=true;
  893. end
  894. {
  895. change
  896. sxth reg2,reg1
  897. and reg3,reg2,#65535
  898. dealloc reg2
  899. to
  900. sxth reg3,reg1
  901. }
  902. else if MatchInstruction(p, A_SXTH, [C_None], [PF_None]) and
  903. (taicpu(p).ops=2) and
  904. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  905. MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
  906. (taicpu(hp1).ops=3) and
  907. (taicpu(hp1).oper[2]^.typ=top_const) and
  908. ((taicpu(hp1).oper[2]^.val and $FFFF)=$FFFF) and
  909. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  910. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  911. { reg1 might not be modified inbetween }
  912. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  913. begin
  914. DebugMsg('Peephole SxthAndImm2Sxth done', p);
  915. taicpu(hp1).opcode:=A_SXTH;
  916. taicpu(hp1).ops:=2;
  917. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  918. GetNextInstruction(p, hp1);
  919. asml.remove(p);
  920. p.free;
  921. p:=hp1;
  922. result:=true;
  923. end
  924. else if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  925. RemoveSuperfluousMove(p, hp1, 'SxthMov2Data') then
  926. Result:=true;
  927. end;
  928. function TARMAsmOptimizer.OptPass1And(var p : tai) : Boolean;
  929. var
  930. hp1, hp2: tai;
  931. i: longint;
  932. begin
  933. Result:=false;
  934. {
  935. optimize
  936. and reg2,reg1,const1
  937. ...
  938. }
  939. if (taicpu(p).ops>2) and
  940. (taicpu(p).oper[1]^.typ = top_reg) and
  941. (taicpu(p).oper[2]^.typ = top_const) then
  942. begin
  943. {
  944. change
  945. and reg2,reg1,const1
  946. ...
  947. and reg3,reg2,const2
  948. to
  949. and reg3,reg1,(const1 and const2)
  950. }
  951. if GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  952. MatchInstruction(hp1, A_AND, [taicpu(p).condition], [PF_None]) and
  953. RegEndOfLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  954. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  955. (taicpu(hp1).oper[2]^.typ = top_const)
  956. {$ifdef AARCH64}
  957. and ((((getsubreg(taicpu(p).oper[0]^.reg)=R_SUBQ) and is_shifter_const(taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val,OS_64)) or
  958. ((getsubreg(taicpu(p).oper[0]^.reg)=R_SUBL) and is_shifter_const(taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val,OS_32))
  959. ) or
  960. ((taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val)=0))
  961. {$endif AARCH64}
  962. then
  963. begin
  964. if not(RegUsedBetween(taicpu(hp1).oper[0]^.reg,p,hp1)) then
  965. begin
  966. DebugMsg('Peephole AndAnd2And done', p);
  967. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  968. if (taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val)=0 then
  969. begin
  970. DebugMsg('Peephole AndAnd2Mov0 1 done', p);
  971. taicpu(p).opcode:=A_MOV;
  972. taicpu(p).ops:=2;
  973. taicpu(p).loadConst(1,0);
  974. taicpu(p).oppostfix:=taicpu(hp1).oppostfix;
  975. end
  976. else
  977. begin
  978. DebugMsg('Peephole AndAnd2And 1 done', p);
  979. taicpu(p).loadConst(2,taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val);
  980. taicpu(p).oppostfix:=taicpu(hp1).oppostfix;
  981. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  982. end;
  983. asml.remove(hp1);
  984. hp1.free;
  985. Result:=true;
  986. exit;
  987. end
  988. else if not(RegUsedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  989. begin
  990. if (taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val)=0 then
  991. begin
  992. DebugMsg('Peephole AndAnd2Mov0 2 done', hp1);
  993. taicpu(hp1).opcode:=A_MOV;
  994. taicpu(hp1).loadConst(1,0);
  995. taicpu(hp1).ops:=2;
  996. taicpu(hp1).oppostfix:=taicpu(p).oppostfix;
  997. end
  998. else
  999. begin
  1000. DebugMsg('Peephole AndAnd2And 2 done', hp1);
  1001. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  1002. taicpu(hp1).loadConst(2,taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val);
  1003. taicpu(hp1).oppostfix:=taicpu(p).oppostfix;
  1004. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  1005. end;
  1006. GetNextInstruction(p, hp1);
  1007. RemoveCurrentP(p);
  1008. p:=hp1;
  1009. Result:=true;
  1010. exit;
  1011. end;
  1012. end
  1013. {
  1014. change
  1015. and reg2,reg1,$xxxxxxFF
  1016. strb reg2,[...]
  1017. dealloc reg2
  1018. to
  1019. strb reg1,[...]
  1020. }
  1021. else if ((taicpu(p).oper[2]^.val and $FF) = $FF) and
  1022. MatchInstruction(p, A_AND, [C_None], [PF_None]) and
  1023. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1024. MatchInstruction(hp1, A_STR, [C_None], [PF_B]) and
  1025. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
  1026. { the reference in strb might not use reg2 }
  1027. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  1028. { reg1 might not be modified inbetween }
  1029. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1030. begin
  1031. DebugMsg('Peephole AndStrb2Strb done', p);
  1032. {$ifdef AARCH64}
  1033. taicpu(hp1).loadReg(0,newreg(R_INTREGISTER,getsupreg(taicpu(p).oper[1]^.reg),R_SUBD));
  1034. {$else AARCH64}
  1035. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  1036. {$endif AARCH64}
  1037. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  1038. RemoveCurrentP(p);
  1039. result:=true;
  1040. exit;
  1041. end
  1042. {
  1043. change
  1044. and reg2,reg1,255
  1045. uxtb/uxth reg3,reg2
  1046. dealloc reg2
  1047. to
  1048. and reg3,reg1,x
  1049. }
  1050. else if ((taicpu(p).oper[2]^.val and $ffffff00)=0) and
  1051. MatchInstruction(p, A_AND, [C_None], [PF_None]) and
  1052. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1053. MatchInstruction(hp1, [A_UXTB,A_UXTH], [C_None], [PF_None]) and
  1054. (taicpu(hp1).ops = 2) and
  1055. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  1056. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1057. { reg1 might not be modified inbetween }
  1058. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1059. begin
  1060. DebugMsg('Peephole AndUxt2And done', p);
  1061. taicpu(hp1).opcode:=A_AND;
  1062. taicpu(hp1).ops:=3;
  1063. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  1064. taicpu(hp1).loadconst(2,taicpu(p).oper[2]^.val);
  1065. GetNextInstruction(p,hp1);
  1066. asml.remove(p);
  1067. p.Free;
  1068. p:=hp1;
  1069. result:=true;
  1070. exit;
  1071. end
  1072. else if ((taicpu(p).oper[2]^.val and $ffffff80)=0) and
  1073. MatchInstruction(p, A_AND, [C_None], [PF_None]) and
  1074. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1075. MatchInstruction(hp1, [A_SXTB,A_SXTH], [C_None], [PF_None]) and
  1076. (taicpu(hp1).ops = 2) and
  1077. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  1078. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1079. { reg1 might not be modified inbetween }
  1080. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1081. begin
  1082. DebugMsg('Peephole AndSxt2And done', p);
  1083. taicpu(hp1).opcode:=A_AND;
  1084. taicpu(hp1).ops:=3;
  1085. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  1086. taicpu(hp1).loadconst(2,taicpu(p).oper[2]^.val);
  1087. GetNextInstruction(p,hp1);
  1088. asml.remove(p);
  1089. p.Free;
  1090. p:=hp1;
  1091. result:=true;
  1092. exit;
  1093. end
  1094. {
  1095. from
  1096. and reg1,reg0,2^n-1
  1097. mov reg2,reg1, lsl imm1
  1098. (mov reg3,reg2, lsr/asr imm1)
  1099. remove either the and or the lsl/xsr sequence if possible
  1100. }
  1101. else if (taicpu(p).oper[2]^.val < high(int64)) and
  1102. cutils.ispowerof2(taicpu(p).oper[2]^.val+1,i) and
  1103. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1104. MatchInstruction(hp1, A_MOV, [taicpu(p).condition], [PF_None]) and
  1105. (taicpu(hp1).ops=3) and
  1106. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1107. (taicpu(hp1).oper[2]^.typ = top_shifterop) and
  1108. {$ifdef ARM}
  1109. (taicpu(hp1).oper[2]^.shifterop^.rs = NR_NO) and
  1110. {$endif ARM}
  1111. (taicpu(hp1).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  1112. RegEndOfLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) then
  1113. begin
  1114. {
  1115. and reg1,reg0,2^n-1
  1116. mov reg2,reg1, lsl imm1
  1117. mov reg3,reg2, lsr/asr imm1
  1118. =>
  1119. and reg1,reg0,2^n-1
  1120. if lsr and 2^n-1>=imm1 or asr and 2^n-1>imm1
  1121. }
  1122. if GetNextInstructionUsingReg(hp1,hp2,taicpu(p).oper[0]^.reg) and
  1123. MatchInstruction(hp2, A_MOV, [taicpu(p).condition], [PF_None]) and
  1124. (taicpu(hp2).ops=3) and
  1125. MatchOperand(taicpu(hp2).oper[1]^, taicpu(hp1).oper[0]^.reg) and
  1126. (taicpu(hp2).oper[2]^.typ = top_shifterop) and
  1127. {$ifdef ARM}
  1128. (taicpu(hp2).oper[2]^.shifterop^.rs = NR_NO) and
  1129. {$endif ARM}
  1130. (taicpu(hp2).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and
  1131. (taicpu(hp1).oper[2]^.shifterop^.shiftimm=taicpu(hp2).oper[2]^.shifterop^.shiftimm) and
  1132. RegEndOfLife(taicpu(hp1).oper[0]^.reg,taicpu(hp2)) and
  1133. ((i<32-taicpu(hp1).oper[2]^.shifterop^.shiftimm) or
  1134. ((i=32-taicpu(hp1).oper[2]^.shifterop^.shiftimm) and
  1135. (taicpu(hp2).oper[2]^.shifterop^.shiftmode=SM_LSR))) then
  1136. begin
  1137. DebugMsg('Peephole AndLslXsr2And done', p);
  1138. taicpu(p).oper[0]^.reg:=taicpu(hp2).oper[0]^.reg;
  1139. asml.Remove(hp1);
  1140. asml.Remove(hp2);
  1141. hp1.free;
  1142. hp2.free;
  1143. result:=true;
  1144. exit;
  1145. end
  1146. {
  1147. and reg1,reg0,2^n-1
  1148. mov reg2,reg1, lsl imm1
  1149. =>
  1150. mov reg2,reg0, lsl imm1
  1151. if imm1>i
  1152. }
  1153. else if (i>32-taicpu(hp1).oper[2]^.shifterop^.shiftimm) and
  1154. not(RegModifiedBetween(taicpu(p).oper[1]^.reg, p, hp1)) then
  1155. begin
  1156. DebugMsg('Peephole AndLsl2Lsl done', p);
  1157. taicpu(hp1).oper[1]^.reg:=taicpu(p).oper[1]^.reg;
  1158. GetNextInstruction(p, hp1);
  1159. asml.Remove(p);
  1160. p.free;
  1161. p:=hp1;
  1162. result:=true;
  1163. exit;
  1164. end
  1165. end;
  1166. end;
  1167. {
  1168. change
  1169. and reg1, ...
  1170. mov reg2, reg1
  1171. to
  1172. and reg2, ...
  1173. }
  1174. if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  1175. (taicpu(p).ops>=3) and
  1176. RemoveSuperfluousMove(p, hp1, 'DataMov2Data') then
  1177. Result:=true;
  1178. end;
  1179. end.