aoptarm.pas 72 KB


  1. {
  2. Copyright (c) 1998-2020 by Jonas Maebe and Florian Klaempfl, members of the Free Pascal
  3. Development Team
  4. This unit implements an ARM optimizer object used commonly for ARM and AAarch64
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. Unit aoptarm;
  19. {$i fpcdefs.inc}
  20. { $define DEBUG_PREREGSCHEDULER}
  21. {$ifdef EXTDEBUG}
  22. {$define DEBUG_AOPTCPU}
  23. {$endif EXTDEBUG}
  24. Interface
  25. uses
  26. cgbase, cgutils, globtype, cpubase, aasmtai, aasmcpu,aopt, aoptobj;
  27. Type
  28. { while ARM and AAarch64 look not very similar at a first glance,
  29. several optimizations can be shared between both }
  30. TARMAsmOptimizer = class(TAsmOptimizer)
  31. procedure DebugMsg(const s : string; p : tai);
  32. function RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string): boolean;
  33. function RedundantMovProcess(var p: tai; var hp1: tai): boolean;
  34. function GetNextInstructionUsingReg(Current: tai; out Next: tai; const reg: TRegister): Boolean;
  35. {$ifdef AARCH64}
  36. function USxtOp2Op(var p, hp1: tai; shiftmode: tshiftmode): Boolean;
  37. {$endif AARCH64}
  38. function OptPreSBFXUBFX(var p: tai): Boolean;
  39. function OptPass1UXTB(var p: tai): Boolean;
  40. function OptPass1UXTH(var p: tai): Boolean;
  41. function OptPass1SXTB(var p: tai): Boolean;
  42. function OptPass1SXTH(var p: tai): Boolean;
  43. function OptPass1LDR(var p: tai): Boolean; virtual;
  44. function OptPass1STR(var p: tai): Boolean; virtual;
  45. function OptPass1And(var p: tai): Boolean; virtual;
  46. function OptPass2AND(var p: tai): Boolean;
  47. function OptPass2TST(var p: tai): Boolean;
  48. End;
  49. function MatchInstruction(const instr: tai; const op: TCommonAsmOps; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  50. function MatchInstruction(const instr: tai; const op: TAsmOp; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  51. {$ifdef AARCH64}
  52. function MatchInstruction(const instr: tai; const ops : array of TAsmOp; const postfix: TOpPostfixes): boolean;
  53. {$endif AARCH64}
  54. function MatchInstruction(const instr: tai; const op: TAsmOp; const postfix: TOpPostfixes): boolean;
  55. function RefsEqual(const r1, r2: treference): boolean;
  56. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  57. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
  58. function MatchOperand(const oper: TOper; const a: TCGInt): boolean; inline;
  59. Implementation
  60. uses
  61. cutils,verbose,globals,
  62. systems,
  63. cpuinfo,
  64. cgobj,procinfo,
  65. aasmbase,aasmdata,itcpugas;
  66. {$ifdef DEBUG_AOPTCPU}
  67. const
  68. SPeepholeOptimization: shortstring = 'Peephole Optimization: ';
  69. procedure TARMAsmOptimizer.DebugMsg(const s: string;p : tai);
  70. begin
  71. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  72. end;
  73. {$else DEBUG_AOPTCPU}
  74. { Empty strings help the optimizer to remove string concatenations that won't
  75. ever appear to the user on release builds. [Kit] }
  76. const
  77. SPeepholeOptimization = '';
  78. procedure TARMAsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  79. begin
  80. end;
  81. {$endif DEBUG_AOPTCPU}
  82. function MatchInstruction(const instr: tai; const op: TCommonAsmOps; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  83. begin
  84. result :=
  85. (instr.typ = ait_instruction) and
  86. ((op = []) or ((taicpu(instr).opcode<=LastCommonAsmOp) and (taicpu(instr).opcode in op))) and
  87. ((cond = []) or (taicpu(instr).condition in cond)) and
  88. ((postfix = []) or (taicpu(instr).oppostfix in postfix));
  89. end;
  90. function MatchInstruction(const instr: tai; const op: TAsmOp; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  91. begin
  92. result :=
  93. (instr.typ = ait_instruction) and
  94. (taicpu(instr).opcode = op) and
  95. ((cond = []) or (taicpu(instr).condition in cond)) and
  96. ((postfix = []) or (taicpu(instr).oppostfix in postfix));
  97. end;
  98. {$ifdef AARCH64}
  99. function MatchInstruction(const instr: tai; const ops : array of TAsmOp; const postfix: TOpPostfixes): boolean;
  100. var
  101. op : TAsmOp;
  102. begin
  103. result:=false;
  104. if instr.typ <> ait_instruction then
  105. exit;
  106. for op in ops do
  107. begin
  108. if (taicpu(instr).opcode = op) and
  109. ((postfix = []) or (taicpu(instr).oppostfix in postfix)) then
  110. begin
  111. result:=true;
  112. exit;
  113. end;
  114. end;
  115. end;
  116. {$endif AARCH64}
  117. function MatchInstruction(const instr: tai; const op: TAsmOp; const postfix: TOpPostfixes): boolean;
  118. begin
  119. result :=
  120. (instr.typ = ait_instruction) and
  121. (taicpu(instr).opcode = op) and
  122. ((postfix = []) or (taicpu(instr).oppostfix in postfix));
  123. end;
  124. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  125. begin
  126. result := (oper.typ = top_reg) and (oper.reg = reg);
  127. end;
  128. function RefsEqual(const r1, r2: treference): boolean;
  129. begin
  130. refsequal :=
  131. (r1.offset = r2.offset) and
  132. (r1.base = r2.base) and
  133. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  134. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  135. (r1.relsymbol = r2.relsymbol) and
  136. {$ifdef ARM}
  137. (r1.signindex = r2.signindex) and
  138. {$endif ARM}
  139. (r1.shiftimm = r2.shiftimm) and
  140. (r1.addressmode = r2.addressmode) and
  141. (r1.shiftmode = r2.shiftmode) and
  142. (r1.volatility=[]) and
  143. (r2.volatility=[]);
  144. end;
  145. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
  146. begin
  147. result := oper1.typ = oper2.typ;
  148. if result then
  149. case oper1.typ of
  150. top_const:
  151. Result:=oper1.val = oper2.val;
  152. top_reg:
  153. Result:=oper1.reg = oper2.reg;
  154. top_conditioncode:
  155. Result:=oper1.cc = oper2.cc;
  156. top_realconst:
  157. Result:=oper1.val_real = oper2.val_real;
  158. top_ref:
  159. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  160. else Result:=false;
  161. end
  162. end;
  163. function MatchOperand(const oper: TOper; const a: TCGInt): boolean; inline;
  164. begin
  165. result := (oper.typ = top_const) and (oper.val = a);
  166. end;
  167. {$ifdef AARCH64}
  168. function TARMAsmOptimizer.USxtOp2Op(var p,hp1: tai; shiftmode: tshiftmode): Boolean;
  169. var
  170. so: tshifterop;
  171. opoffset: Integer;
  172. begin
  173. Result:=false;
  174. if (taicpu(p).ops=2) and
  175. ((MatchInstruction(hp1, [A_ADD,A_SUB], [C_None], [PF_None,PF_S]) and
  176. (taicpu(hp1).ops=3) and
  177. MatchOperand(taicpu(hp1).oper[2]^, taicpu(p).oper[0]^.reg) and
  178. not(MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg))) or
  179. (MatchInstruction(hp1, [A_CMP,A_CMN], [C_None], [PF_None]) and
  180. (taicpu(hp1).ops=2) and
  181. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg))
  182. ) and
  183. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  184. { reg1 might not be modified inbetween }
  185. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  186. begin
  187. DebugMsg('Peephole '+gas_op2str[taicpu(p).opcode]+gas_op2str[taicpu(hp1).opcode]+'2'+gas_op2str[taicpu(hp1).opcode]+' done', p);
  188. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  189. if MatchInstruction(hp1, [A_CMP,A_CMN], [C_None], [PF_None]) then
  190. opoffset:=0
  191. else
  192. opoffset:=1;
  193. taicpu(hp1).loadReg(opoffset+1,taicpu(p).oper[1]^.reg);
  194. taicpu(hp1).ops:=opoffset+3;
  195. shifterop_reset(so);
  196. so.shiftmode:=shiftmode;
  197. so.shiftimm:=0;
  198. taicpu(hp1).loadshifterop(opoffset+2,so);
  199. result:=RemoveCurrentP(p);
  200. end;
  201. end;
  202. {$endif AARCH64}
  203. function TARMAsmOptimizer.GetNextInstructionUsingReg(Current: tai;
  204. Out Next: tai; const reg: TRegister): Boolean;
  205. var
  206. gniResult: Boolean;
  207. begin
  208. Next:=Current;
  209. Result := False;
  210. repeat
  211. gniResult:=GetNextInstruction(Next,Next);
  212. if gniResult and RegInInstruction(reg,Next) then
  213. { Found something }
  214. Exit(True);
  215. until not gniResult or
  216. not(cs_opt_level3 in current_settings.optimizerswitches) or
  217. (Next.typ<>ait_instruction) or
  218. is_calljmp(taicpu(Next).opcode)
  219. {$ifdef ARM}
  220. or RegModifiedByInstruction(NR_PC,Next)
  221. {$endif ARM}
  222. ;
  223. end;
  224. function TARMAsmOptimizer.RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string):boolean;
  225. var
  226. alloc,
  227. dealloc : tai_regalloc;
  228. hp1 : tai;
  229. begin
  230. Result:=false;
  231. if MatchInstruction(movp, A_MOV, [taicpu(p).condition], [PF_None]) and
  232. { We can't optimize if there is a shiftop }
  233. (taicpu(movp).ops=2) and
  234. MatchOperand(taicpu(movp).oper[1]^, taicpu(p).oper[0]^.reg) and
  235. { don't mess with moves to fp }
  236. (taicpu(movp).oper[0]^.reg<>current_procinfo.framepointer) and
  237. { the destination register of the mov might not be used beween p and movp }
  238. not(RegUsedBetween(taicpu(movp).oper[0]^.reg,p,movp)) and
  239. {$ifdef ARM}
  240. { PC should be changed only by moves }
  241. (taicpu(movp).oper[0]^.reg<>NR_PC) and
  242. { cb[n]z are thumb instructions which require specific registers, with no wide forms }
  243. (taicpu(p).opcode<>A_CBZ) and
  244. (taicpu(p).opcode<>A_CBNZ) and
  245. { There is a special requirement for MUL and MLA, oper[0] and oper[1] are not allowed to be the same }
  246. not (
  247. (taicpu(p).opcode in [A_MLA, A_MUL]) and
  248. (taicpu(p).oper[1]^.reg = taicpu(movp).oper[0]^.reg) and
  249. (current_settings.cputype < cpu_armv6)
  250. ) and
  251. {$endif ARM}
  252. { Take care to only do this for instructions which REALLY load to the first register.
  253. Otherwise
  254. str reg0, [reg1]
  255. mov reg2, reg0
  256. will be optimized to
  257. str reg2, [reg1]
  258. }
  259. RegLoadedWithNewValue(taicpu(p).oper[0]^.reg, p) then
  260. begin
  261. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(movp.Next));
  262. if assigned(dealloc) then
  263. begin
  264. DebugMsg('Peephole '+optimizer+' removed superfluous mov', movp);
  265. result:=true;
  266. { taicpu(p).oper[0]^.reg is not used anymore, try to find its allocation
  267. and remove it if possible }
  268. asml.Remove(dealloc);
  269. alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.previous));
  270. if assigned(alloc) then
  271. begin
  272. asml.Remove(alloc);
  273. alloc.free;
  274. dealloc.free;
  275. end
  276. else
  277. asml.InsertAfter(dealloc,p);
  278. AllocRegBetween(taicpu(movp).oper[0]^.reg,p,movp,UsedRegs);
  279. { finally get rid of the mov }
  280. taicpu(p).loadreg(0,taicpu(movp).oper[0]^.reg);
  281. { Remove preindexing and postindexing for LDR in some cases.
  282. For example:
  283. ldr reg2,[reg1, xxx]!
  284. mov reg1,reg2
  285. must be translated to:
  286. ldr reg1,[reg1, xxx]
  287. Preindexing must be removed there, since the same register is used as the base and as the target.
  288. Such case is not allowed for ARM CPU and produces crash. }
  289. if (taicpu(p).opcode = A_LDR) and (taicpu(p).oper[1]^.typ = top_ref)
  290. and (taicpu(movp).oper[0]^.reg = taicpu(p).oper[1]^.ref^.base)
  291. then
  292. taicpu(p).oper[1]^.ref^.addressmode:=AM_OFFSET;
  293. asml.remove(movp);
  294. movp.free;
  295. end;
  296. end;
  297. end;
  298. function TARMAsmOptimizer.RedundantMovProcess(var p: tai; var hp1: tai):boolean;
  299. var
  300. I: Integer;
  301. current_hp, next_hp: tai;
  302. LDRChange: Boolean;
  303. begin
  304. Result:=false;
  305. {
  306. change
  307. mov r1, r0
  308. add r1, r1, #1
  309. to
  310. add r1, r0, #1
  311. Todo: Make it work for mov+cmp too
  312. CAUTION! If this one is successful p might not be a mov instruction anymore!
  313. }
  314. if (taicpu(p).ops = 2) and
  315. (taicpu(p).oper[1]^.typ = top_reg) and
  316. (taicpu(p).oppostfix = PF_NONE) then
  317. begin
  318. if
  319. MatchInstruction(hp1, [A_ADD, A_ADC,
  320. {$ifdef ARM}
  321. A_RSB, A_RSC,
  322. {$endif ARM}
  323. A_SUB, A_SBC,
  324. A_AND, A_BIC, A_EOR, A_ORR, A_MOV, A_MVN],
  325. [taicpu(p).condition], []) and
  326. { MOV and MVN might only have 2 ops }
  327. (taicpu(hp1).ops >= 2) and
  328. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^.reg) and
  329. (taicpu(hp1).oper[1]^.typ = top_reg) and
  330. (
  331. (taicpu(hp1).ops = 2) or
  332. (taicpu(hp1).oper[2]^.typ in [top_reg, top_const, top_shifterop])
  333. ) and
  334. {$ifdef AARCH64}
  335. (taicpu(p).oper[1]^.reg<>NR_SP) and
  336. { in this case you have to transform it to movk or the like }
  337. (getsupreg(taicpu(p).oper[1]^.reg)<>RS_XZR) and
  338. {$endif AARCH64}
  339. not(RegUsedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  340. begin
  341. { When we get here we still don't know if the registers match }
  342. for I:=1 to 2 do
  343. {
  344. If the first loop was successful p will be replaced with hp1.
  345. The checks will still be ok, because all required information
  346. will also be in hp1 then.
  347. }
  348. if (taicpu(hp1).ops > I) and
  349. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[I]^.reg)
  350. {$ifdef ARM}
  351. { prevent certain combinations on thumb(2), this is only a safe approximation }
  352. and (not(GenerateThumbCode or GenerateThumb2Code) or
  353. ((getsupreg(taicpu(p).oper[1]^.reg)<>RS_R13) and
  354. (getsupreg(taicpu(p).oper[1]^.reg)<>RS_R15)))
  355. {$endif ARM}
  356. then
  357. begin
  358. DebugMsg('Peephole RedundantMovProcess done', hp1);
  359. taicpu(hp1).oper[I]^.reg := taicpu(p).oper[1]^.reg;
  360. if p<>hp1 then
  361. begin
  362. asml.remove(p);
  363. p.free;
  364. p:=hp1;
  365. Result:=true;
  366. end;
  367. end;
  368. if Result then Exit;
  369. end
  370. { Change: Change:
  371. mov r1, r0 mov r1, r0
  372. ... ...
  373. ldr/str r2, [r1, etc.] mov r2, r1
  374. To: To:
  375. ldr/str r2, [r0, etc.] mov r2, r0
  376. }
  377. else if (taicpu(p).condition = C_None) and (taicpu(p).oper[1]^.typ = top_reg)
  378. {$ifdef ARM}
  379. and not (getsupreg(taicpu(p).oper[0]^.reg) in [RS_PC, RS_R14, RS_STACK_POINTER_REG])
  380. and (getsupreg(taicpu(p).oper[1]^.reg) <> RS_PC)
  381. { Thumb does not support references with base and index one being SP }
  382. and (not(GenerateThumbCode) or (getsupreg(taicpu(p).oper[1]^.reg) <> RS_STACK_POINTER_REG))
  383. {$endif ARM}
  384. {$ifdef AARCH64}
  385. and (getsupreg(taicpu(p).oper[0]^.reg) <> RS_STACK_POINTER_REG)
  386. {$endif AARCH64}
  387. then
  388. begin
  389. current_hp := p;
  390. TransferUsedRegs(TmpUsedRegs);
  391. { Search local instruction block }
  392. while GetNextInstruction(current_hp, next_hp) and (next_hp <> BlockEnd) and (next_hp.typ = ait_instruction) do
  393. begin
  394. UpdateUsedRegs(TmpUsedRegs, tai(current_hp.Next));
  395. LDRChange := False;
  396. if (taicpu(next_hp).opcode in [A_LDR,A_STR]) and (taicpu(next_hp).ops = 2)
  397. {$ifdef AARCH64}
  398. { If r0 is the zero register, then this sequence of instructions will cause
  399. an access violation, but that's better than an assembler error caused by
  400. changing r0 to xzr inside the reference (Where it's illegal). [Kit] }
  401. and (getsupreg(taicpu(p).oper[1]^.reg) <> RS_XZR)
  402. {$endif AARCH64}
  403. then
  404. begin
  405. { Change the registers from r1 to r0 }
  406. if (taicpu(next_hp).oper[1]^.ref^.base = taicpu(p).oper[0]^.reg) and
  407. {$ifdef ARM}
  408. { This optimisation conflicts with something and raises
  409. an access violation - needs further investigation. [Kit] }
  410. (taicpu(next_hp).opcode <> A_LDR) and
  411. {$endif ARM}
  412. { Don't mess around with the base register if the
  413. reference is pre- or post-indexed }
  414. (taicpu(next_hp).oper[1]^.ref^.addressmode = AM_OFFSET) then
  415. begin
  416. taicpu(next_hp).oper[1]^.ref^.base := taicpu(p).oper[1]^.reg;
  417. LDRChange := True;
  418. end;
  419. if taicpu(next_hp).oper[1]^.ref^.index = taicpu(p).oper[0]^.reg then
  420. begin
  421. taicpu(next_hp).oper[1]^.ref^.index := taicpu(p).oper[1]^.reg;
  422. LDRChange := True;
  423. end;
  424. if LDRChange then
  425. DebugMsg('Peephole Optimization: ' + std_regname(taicpu(p).oper[0]^.reg) + ' = ' + std_regname(taicpu(p).oper[1]^.reg) + ' (MovLdr2Ldr 1)', next_hp);
  426. { Drop out if we're dealing with pre-indexed references }
  427. if (taicpu(next_hp).oper[1]^.ref^.addressmode = AM_PREINDEXED) and
  428. (
  429. RegInRef(taicpu(p).oper[0]^.reg, taicpu(next_hp).oper[1]^.ref^) or
  430. RegInRef(taicpu(p).oper[1]^.reg, taicpu(next_hp).oper[1]^.ref^)
  431. ) then
  432. begin
  433. { Remember to update register allocations }
  434. if LDRChange then
  435. AllocRegBetween(taicpu(p).oper[1]^.reg, p, next_hp, UsedRegs);
  436. Break;
  437. end;
  438. { The register being stored can be potentially changed (as long as it's not the stack pointer) }
  439. if (taicpu(next_hp).opcode = A_STR) and (getsupreg(taicpu(p).oper[1]^.reg) <> RS_STACK_POINTER_REG) and
  440. MatchOperand(taicpu(next_hp).oper[0]^, taicpu(p).oper[0]^.reg) then
  441. begin
  442. DebugMsg('Peephole Optimization: ' + std_regname(taicpu(p).oper[0]^.reg) + ' = ' + std_regname(taicpu(p).oper[1]^.reg) + ' (MovLdr2Ldr 2)', next_hp);
  443. taicpu(next_hp).oper[0]^.reg := taicpu(p).oper[1]^.reg;
  444. LDRChange := True;
  445. end;
  446. if LDRChange and (getsupreg(taicpu(p).oper[1]^.reg) <> RS_STACK_POINTER_REG) then
  447. begin
  448. AllocRegBetween(taicpu(p).oper[1]^.reg, p, next_hp, UsedRegs);
  449. if (taicpu(p).oppostfix = PF_None) and
  450. (
  451. (
  452. (taicpu(next_hp).opcode = A_LDR) and
  453. MatchOperand(taicpu(next_hp).oper[0]^, taicpu(p).oper[0]^.reg)
  454. ) or
  455. not RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, next_hp, TmpUsedRegs)
  456. ) and
  457. { Double-check to see if the old registers were actually
  458. changed (e.g. if the super registers matched, but not
  459. the sizes, they won't be changed). }
  460. (
  461. (taicpu(next_hp).opcode = A_LDR) or
  462. not RegInOp(taicpu(p).oper[0]^.reg, taicpu(next_hp).oper[0]^)
  463. ) and
  464. not RegInRef(taicpu(p).oper[0]^.reg, taicpu(next_hp).oper[1]^.ref^) then
  465. begin
  466. DebugMsg('Peephole Optimization: RedundantMovProcess 2a done', p);
  467. RemoveCurrentP(p);
  468. Result := True;
  469. Exit;
  470. end;
  471. end;
  472. end
  473. else if (taicpu(next_hp).opcode = A_MOV) and (taicpu(next_hp).oppostfix = PF_None) and
  474. (taicpu(next_hp).ops = 2) then
  475. begin
  476. if MatchOperand(taicpu(next_hp).oper[0]^, taicpu(p).oper[0]^.reg) then
  477. begin
  478. { Found another mov that writes entirely to the register }
  479. if RegUsedBetween(taicpu(p).oper[0]^.reg, p, next_hp) then
  480. begin
  481. { Register was used beforehand }
  482. if MatchOperand(taicpu(next_hp).oper[1]^, taicpu(p).oper[1]^.reg) then
  483. begin
  484. { This MOV is exactly the same as the first one.
  485. Since none of the registers have changed value
  486. at this point, we can remove it. }
  487. DebugMsg(SPeepholeOptimization + 'RedundantMovProcess 3a done', next_hp);
  488. if (next_hp = hp1) then
  489. { Don't let hp1 become a dangling pointer }
  490. hp1 := nil;
  491. asml.Remove(next_hp);
  492. next_hp.Free;
  493. { We still have the original p, so we can continue optimising;
  494. if it was -O2 or below, this instruction appeared immediately
  495. after the first MOV, so we're technically not looking more
  496. than one instruction ahead after it's removed! [Kit] }
  497. Continue;
  498. end
  499. else
  500. { Register changes value - drop out }
  501. Break;
  502. end;
  503. { We can delete the first MOV (only if the second MOV is unconditional) }
  504. {$ifdef ARM}
  505. if (taicpu(p).oppostfix = PF_None) and
  506. (taicpu(next_hp).condition = C_None) then
  507. {$endif ARM}
  508. begin
  509. DebugMsg('Peephole Optimization: RedundantMovProcess 2b done', p);
  510. RemoveCurrentP(p);
  511. Result := True;
  512. end;
  513. Exit;
  514. end
  515. else if MatchOperand(taicpu(next_hp).oper[1]^, taicpu(p).oper[0]^.reg) then
  516. begin
  517. if MatchOperand(taicpu(next_hp).oper[0]^, taicpu(p).oper[1]^.reg)
  518. { Be careful - if the entire register is not used, removing this
  519. instruction will leave the unused part uninitialised }
  520. {$ifdef AARCH64}
  521. and (getsubreg(taicpu(p).oper[1]^.reg) = R_SUBQ)
  522. {$endif AARCH64}
  523. then
  524. begin
  525. { Instruction will become mov r1,r1 }
  526. DebugMsg(SPeepholeOptimization + 'Mov2None 2 done', next_hp);
  527. { Allocate r1 between the instructions; not doing
  528. so may cause problems when removing superfluous
  529. MOVs later (i38055) }
  530. AllocRegBetween(taicpu(p).oper[1]^.reg, p, next_hp, UsedRegs);
  531. if (next_hp = hp1) then
  532. { Don't let hp1 become a dangling pointer }
  533. hp1 := nil;
  534. asml.Remove(next_hp);
  535. next_hp.Free;
  536. Continue;
  537. end;
  538. { Change the old register (checking the first operand again
  539. forces it to be left alone if the full register is not
  540. used, lest mov w1,w1 gets optimised out by mistake. [Kit] }
  541. {$ifdef AARCH64}
  542. if not MatchOperand(taicpu(next_hp).oper[0]^, taicpu(p).oper[1]^.reg) then
  543. {$endif AARCH64}
  544. begin
  545. DebugMsg(SPeepholeOptimization + std_regname(taicpu(p).oper[0]^.reg) + ' = ' + std_regname(taicpu(p).oper[1]^.reg) + ' (MovMov2Mov 2)', next_hp);
  546. taicpu(next_hp).oper[1]^.reg := taicpu(p).oper[1]^.reg;
  547. AllocRegBetween(taicpu(p).oper[1]^.reg, p, next_hp, UsedRegs);
  548. { If this was the only reference to the old register,
  549. then we can remove the original MOV now }
  550. if (taicpu(p).oppostfix = PF_None) and
  551. { A bit of a hack - sometimes registers aren't tracked properly, so do not
  552. remove if the register was apparently not allocated when its value is
  553. first set at the MOV command (this is especially true for the stack
  554. register). [Kit] }
  555. (getsupreg(taicpu(p).oper[1]^.reg) <> RS_STACK_POINTER_REG) and
  556. RegInUsedRegs(taicpu(p).oper[0]^.reg, UsedRegs) and
  557. not RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, next_hp, TmpUsedRegs) then
  558. begin
  559. DebugMsg(SPeepholeOptimization + 'RedundantMovProcess 2c done', p);
  560. RemoveCurrentP(p);
  561. Result := True;
  562. Exit;
  563. end;
  564. end;
  565. end;
  566. end;
  567. { On low optimisation settions, don't search more than one instruction ahead }
  568. if not(cs_opt_level3 in current_settings.optimizerswitches) or
  569. { Stop at procedure calls and jumps }
  570. is_calljmp(taicpu(next_hp).opcode) or
  571. { If the read register has changed value, or the MOV
  572. destination register has been used, drop out }
  573. RegInInstruction(taicpu(p).oper[0]^.reg, next_hp) or
  574. RegModifiedByInstruction(taicpu(p).oper[1]^.reg, next_hp) then
  575. Break;
  576. current_hp := next_hp;
  577. end;
  578. end;
  579. end;
  580. end;
  581. function TARMAsmOptimizer.OptPass1UXTB(var p : tai) : Boolean;
  582. var
  583. hp1, hp2: tai;
  584. so: tshifterop;
  585. begin
  586. Result:=false;
  587. if GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) then
  588. begin
  589. {
  590. change
  591. uxtb reg2,reg1
  592. strb reg2,[...]
  593. dealloc reg2
  594. to
  595. strb reg1,[...]
  596. }
  597. if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
  598. (taicpu(p).ops=2) and
  599. MatchInstruction(hp1, A_STR, [C_None], [PF_B]) and
  600. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
  601. { the reference in strb might not use reg2 }
  602. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  603. { reg1 might not be modified inbetween }
  604. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  605. begin
  606. DebugMsg('Peephole UxtbStrb2Strb done', p);
  607. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  608. result:=RemoveCurrentP(p);
  609. end
  610. {
  611. change
  612. uxtb reg2,reg1
  613. uxth reg3,reg2
  614. dealloc reg2
  615. to
  616. uxtb reg3,reg1
  617. }
  618. else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
  619. (taicpu(p).ops=2) and
  620. MatchInstruction(hp1, A_UXTH, [C_None], [PF_None]) and
  621. (taicpu(hp1).ops = 2) and
  622. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  623. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  624. { reg1 might not be modified inbetween }
  625. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  626. begin
  627. DebugMsg('Peephole UxtbUxth2Uxtb done', p);
  628. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  629. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  630. asml.remove(hp1);
  631. hp1.free;
  632. result:=true;
  633. end
  634. {
  635. change
  636. uxtb reg2,reg1
  637. uxtb reg3,reg2
  638. dealloc reg2
  639. to
  640. uxtb reg3,reg1
  641. }
  642. else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
  643. (taicpu(p).ops=2) and
  644. MatchInstruction(hp1, A_UXTB, [C_None], [PF_None]) and
  645. (taicpu(hp1).ops = 2) and
  646. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  647. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  648. { reg1 might not be modified inbetween }
  649. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  650. begin
  651. DebugMsg('Peephole UxtbUxtb2Uxtb done', p);
  652. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  653. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  654. asml.remove(hp1);
  655. hp1.free;
  656. result:=true;
  657. end
  658. {
  659. change
  660. uxtb reg2,reg1
  661. and reg3,reg2,#0x*FF
  662. dealloc reg2
  663. to
  664. uxtb reg3,reg1
  665. }
  666. else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
  667. (taicpu(p).ops=2) and
  668. MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
  669. (taicpu(hp1).ops=3) and
  670. (taicpu(hp1).oper[2]^.typ=top_const) and
  671. ((taicpu(hp1).oper[2]^.val and $FF)=$FF) and
  672. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  673. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  674. { reg1 might not be modified inbetween }
  675. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  676. begin
  677. DebugMsg('Peephole UxtbAndImm2Uxtb done', p);
  678. taicpu(hp1).opcode:=A_UXTB;
  679. taicpu(hp1).ops:=2;
  680. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  681. result:=RemoveCurrentP(p);
  682. end
  683. {$ifdef AARCH64}
  684. else if USxtOp2Op(p,hp1,SM_UXTB) then
  685. Result:=true
  686. {$endif AARCH64}
  687. else if RemoveSuperfluousMove(p, hp1, 'UxtbMov2Uxtb') then
  688. Result:=true;
  689. end;
  690. end;
  691. function TARMAsmOptimizer.OptPass1UXTH(var p : tai) : Boolean;
  692. var
  693. hp1: tai;
  694. so: tshifterop;
  695. begin
  696. Result:=false;
  697. if GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) then
  698. begin
  699. {
  700. change
  701. uxth reg2,reg1
  702. strh reg2,[...]
  703. dealloc reg2
  704. to
  705. strh reg1,[...]
  706. }
  707. if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
  708. (taicpu(p).ops=2) and
  709. MatchInstruction(hp1, A_STR, [C_None], [PF_H]) and
  710. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  711. { the reference in strb might not use reg2 }
  712. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  713. { reg1 might not be modified inbetween }
  714. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  715. begin
  716. DebugMsg('Peephole UXTHStrh2Strh done', p);
  717. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  718. result:=RemoveCurrentP(p);
  719. end
  720. {
  721. change
  722. uxth reg2,reg1
  723. uxth reg3,reg2
  724. dealloc reg2
  725. to
  726. uxth reg3,reg1
  727. }
  728. else if MatchInstruction(p, A_UXTH, [C_None], [PF_None]) and
  729. (taicpu(p).ops=2) and
  730. MatchInstruction(hp1, A_UXTH, [C_None], [PF_None]) and
  731. (taicpu(hp1).ops=2) and
  732. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  733. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  734. { reg1 might not be modified inbetween }
  735. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  736. begin
  737. DebugMsg('Peephole UxthUxth2Uxth done', p);
  738. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  739. taicpu(hp1).opcode:=A_UXTH;
  740. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  741. result:=RemoveCurrentP(p);
  742. end
  743. {
  744. change
  745. uxth reg2,reg1
  746. and reg3,reg2,#65535
  747. dealloc reg2
  748. to
  749. uxth reg3,reg1
  750. }
  751. else if MatchInstruction(p, A_UXTH, [C_None], [PF_None]) and
  752. (taicpu(p).ops=2) and
  753. MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
  754. (taicpu(hp1).ops=3) and
  755. (taicpu(hp1).oper[2]^.typ=top_const) and
  756. ((taicpu(hp1).oper[2]^.val and $FFFF)=$FFFF) and
  757. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  758. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  759. { reg1 might not be modified inbetween }
  760. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  761. begin
  762. DebugMsg('Peephole UxthAndImm2Uxth done', p);
  763. taicpu(hp1).opcode:=A_UXTH;
  764. taicpu(hp1).ops:=2;
  765. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  766. result:=RemoveCurrentP(p);
  767. end
  768. {$ifdef AARCH64}
  769. else if USxtOp2Op(p,hp1,SM_UXTH) then
  770. Result:=true
  771. {$endif AARCH64}
  772. else if RemoveSuperfluousMove(p, hp1, 'UxthMov2Data') then
  773. Result:=true;
  774. end;
  775. end;
  776. function TARMAsmOptimizer.OptPass1SXTB(var p : tai) : Boolean;
  777. var
  778. hp1, hp2: tai;
  779. so: tshifterop;
  780. begin
  781. Result:=false;
  782. if GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) then
  783. begin
  784. {
  785. change
  786. sxtb reg2,reg1
  787. strb reg2,[...]
  788. dealloc reg2
  789. to
  790. strb reg1,[...]
  791. }
  792. if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
  793. (taicpu(p).ops=2) and
  794. MatchInstruction(hp1, A_STR, [C_None], [PF_B]) and
  795. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
  796. { the reference in strb might not use reg2 }
  797. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  798. { reg1 might not be modified inbetween }
  799. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  800. begin
  801. DebugMsg('Peephole SxtbStrb2Strb done', p);
  802. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  803. result:=RemoveCurrentP(p);
  804. end
  805. {
  806. change
  807. sxtb reg2,reg1
  808. sxth reg3,reg2
  809. dealloc reg2
  810. to
  811. sxtb reg3,reg1
  812. }
  813. else if MatchInstruction(p, A_SXTB, [C_None], [PF_None]) and
  814. (taicpu(p).ops=2) and
  815. MatchInstruction(hp1, A_SXTH, [C_None], [PF_None]) and
  816. (taicpu(hp1).ops = 2) and
  817. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  818. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  819. { reg1 might not be modified inbetween }
  820. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  821. begin
  822. DebugMsg('Peephole SxtbSxth2Sxtb done', p);
  823. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  824. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  825. asml.remove(hp1);
  826. hp1.free;
  827. result:=true;
  828. end
  829. {
  830. change
  831. sxtb reg2,reg1
  832. sxtb reg3,reg2
  833. dealloc reg2
  834. to
  835. uxtb reg3,reg1
  836. }
  837. else if MatchInstruction(p, A_SXTB, [C_None], [PF_None]) and
  838. (taicpu(p).ops=2) and
  839. MatchInstruction(hp1, A_SXTB, [C_None], [PF_None]) and
  840. (taicpu(hp1).ops = 2) and
  841. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  842. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  843. { reg1 might not be modified inbetween }
  844. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  845. begin
  846. DebugMsg('Peephole SxtbSxtb2Sxtb done', p);
  847. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  848. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  849. asml.remove(hp1);
  850. hp1.free;
  851. result:=true;
  852. end
  853. {
  854. change
  855. sxtb reg2,reg1
  856. and reg3,reg2,#0x*FF
  857. dealloc reg2
  858. to
  859. uxtb reg3,reg1
  860. }
  861. else if MatchInstruction(p, A_SXTB, [C_None], [PF_None]) and
  862. (taicpu(p).ops=2) and
  863. MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
  864. (taicpu(hp1).ops=3) and
  865. (taicpu(hp1).oper[2]^.typ=top_const) and
  866. ((taicpu(hp1).oper[2]^.val and $FF)=$FF) and
  867. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  868. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  869. { reg1 might not be modified inbetween }
  870. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  871. begin
  872. DebugMsg('Peephole SxtbAndImm2Uxtb done', p);
  873. taicpu(hp1).opcode:=A_UXTB;
  874. taicpu(hp1).ops:=2;
  875. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  876. result:=RemoveCurrentP(p);
  877. end
  878. {$ifdef AARCH64}
  879. else if USxtOp2Op(p,hp1,SM_SXTB) then
  880. Result:=true
  881. {$endif AARCH64}
  882. else if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  883. RemoveSuperfluousMove(p, hp1, 'SxtbMov2Sxtb') then
  884. Result:=true;
  885. end;
  886. end;
  887. function TARMAsmOptimizer.OptPass1SXTH(var p : tai) : Boolean;
  888. var
  889. hp1: tai;
  890. so: tshifterop;
  891. begin
  892. Result:=false;
  893. if GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) then
  894. begin
  895. {
  896. change
  897. sxth reg2,reg1
  898. strh reg2,[...]
  899. dealloc reg2
  900. to
  901. strh reg1,[...]
  902. }
  903. if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
  904. (taicpu(p).ops=2) and
  905. MatchInstruction(hp1, A_STR, [C_None], [PF_H]) and
  906. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  907. { the reference in strb might not use reg2 }
  908. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  909. { reg1 might not be modified inbetween }
  910. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  911. begin
  912. DebugMsg('Peephole SxthStrh2Strh done', p);
  913. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  914. result:=RemoveCurrentP(p);
  915. end
  916. {
  917. change
  918. sxth reg2,reg1
  919. sxth reg3,reg2
  920. dealloc reg2
  921. to
  922. sxth reg3,reg1
  923. }
  924. else if MatchInstruction(p, A_SXTH, [C_None], [PF_None]) and
  925. (taicpu(p).ops=2) and
  926. MatchInstruction(hp1, A_SXTH, [C_None], [PF_None]) and
  927. (taicpu(hp1).ops=2) and
  928. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  929. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  930. { reg1 might not be modified inbetween }
  931. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  932. begin
  933. DebugMsg('Peephole SxthSxth2Sxth done', p);
  934. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  935. taicpu(hp1).opcode:=A_SXTH;
  936. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  937. result:=RemoveCurrentP(p);
  938. end
  939. {$ifdef AARCH64}
  940. {
  941. change
  942. sxth reg2,reg1
  943. sxtw reg3,reg2
  944. dealloc reg2
  945. to
  946. sxth reg3,reg1
  947. }
  948. else if MatchInstruction(p, A_SXTH, [C_None], [PF_None]) and
  949. (taicpu(p).ops=2) and
  950. MatchInstruction(hp1, A_SXTW, [C_None], [PF_None]) and
  951. (taicpu(hp1).ops=2) and
  952. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  953. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  954. { reg1 might not be modified inbetween }
  955. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  956. begin
  957. DebugMsg('Peephole SxthSxtw2Sxth done', p);
  958. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  959. taicpu(hp1).opcode:=A_SXTH;
  960. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  961. result:=RemoveCurrentP(p);
  962. end
  963. {$endif AARCH64}
  964. {
  965. change
  966. sxth reg2,reg1
  967. and reg3,reg2,#65535
  968. dealloc reg2
  969. to
  970. uxth reg3,reg1
  971. }
  972. else if MatchInstruction(p, A_SXTH, [C_None], [PF_None]) and
  973. (taicpu(p).ops=2) and
  974. MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
  975. (taicpu(hp1).ops=3) and
  976. (taicpu(hp1).oper[2]^.typ=top_const) and
  977. ((taicpu(hp1).oper[2]^.val and $FFFF)=$FFFF) and
  978. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  979. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  980. { reg1 might not be modified inbetween }
  981. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  982. begin
  983. DebugMsg('Peephole SxthAndImm2Uxth done', p);
  984. taicpu(hp1).opcode:=A_UXTH;
  985. taicpu(hp1).ops:=2;
  986. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  987. result:=RemoveCurrentP(p);
  988. end
  989. {$ifdef AARCH64}
  990. else if USxtOp2Op(p,hp1,SM_SXTH) then
  991. Result:=true
  992. {$endif AARCH64}
  993. else if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  994. RemoveSuperfluousMove(p, hp1, 'SxthMov2Sxth') then
  995. Result:=true;
  996. end;
  997. end;
  998. function TARMAsmOptimizer.OptPreSBFXUBFX(var p: tai): Boolean;
  999. begin
  1000. Result := False;
  1001. { Convert:
  1002. s/ubfx reg1,reg2,#0,#64 (or #32 for 32-bit registers)
  1003. To:
  1004. mov reg1,reg2
  1005. }
  1006. if (taicpu(p).oper[2]^.val = 0) and
  1007. {$ifdef AARCH64}
  1008. (
  1009. (
  1010. (getsubreg(taicpu(p).oper[0]^.reg) = R_SUBQ) and
  1011. (taicpu(p).oper[3]^.val = 64)
  1012. ) or
  1013. (
  1014. (getsubreg(taicpu(p).oper[0]^.reg) = R_SUBD) and
  1015. (taicpu(p).oper[3]^.val = 32)
  1016. )
  1017. )
  1018. {$else AARCH64}
  1019. (taicpu(p).oper[3]^.val = 32)
  1020. {$endif AARCH64}
  1021. then
  1022. begin
  1023. DebugMsg(SPeepholeOptimization + 'SBFX or UBFX -> MOV (full bitfield extract)', p);
  1024. taicpu(p).opcode := A_MOV;
  1025. taicpu(p).ops := 2;
  1026. taicpu(p).clearop(2);
  1027. taicpu(p).clearop(3);
  1028. Result := True;
  1029. Exit;
  1030. end;
  1031. end;
  1032. function TARMAsmOptimizer.OptPass1LDR(var p : tai) : Boolean;
  1033. var
  1034. hp1: tai;
  1035. Reference: TReference;
  1036. NewOp: TAsmOp;
  1037. begin
  1038. Result := False;
  1039. if (taicpu(p).ops <> 2) or (taicpu(p).condition <> C_None) then
  1040. Exit;
  1041. Reference := taicpu(p).oper[1]^.ref^;
  1042. if (Reference.addressmode = AM_OFFSET) and
  1043. not RegInRef(taicpu(p).oper[0]^.reg, Reference) and
  1044. { Delay calling GetNextInstruction for as long as possible }
  1045. GetNextInstruction(p, hp1) and
  1046. (hp1.typ = ait_instruction) and
  1047. (taicpu(hp1).condition = C_None) and
  1048. (taicpu(hp1).oppostfix = taicpu(p).oppostfix) then
  1049. begin
  1050. if (taicpu(hp1).opcode = A_STR) and
  1051. RefsEqual(taicpu(hp1).oper[1]^.ref^, Reference) and
  1052. (getregtype(taicpu(p).oper[0]^.reg) = getregtype(taicpu(hp1).oper[0]^.reg)) then
  1053. begin
  1054. { With:
  1055. ldr reg1,[ref]
  1056. str reg2,[ref]
  1057. If reg1 = reg2, Remove str
  1058. }
  1059. if taicpu(p).oper[0]^.reg = taicpu(hp1).oper[0]^.reg then
  1060. begin
  1061. DebugMsg(SPeepholeOptimization + 'Removed redundant store instruction (load/store -> load/nop)', hp1);
  1062. RemoveInstruction(hp1);
  1063. Result := True;
  1064. Exit;
  1065. end;
  1066. end
  1067. else if (taicpu(hp1).opcode = A_LDR) and
  1068. RefsEqual(taicpu(hp1).oper[1]^.ref^, Reference) then
  1069. begin
  1070. { With:
  1071. ldr reg1,[ref]
  1072. ldr reg2,[ref]
  1073. If reg1 = reg2, delete the second ldr
  1074. If reg1 <> reg2, changing the 2nd ldr to a mov might introduce
  1075. a dependency, but it will likely open up new optimisations, so
  1076. do it for now and handle any new dependencies later.
  1077. }
  1078. if taicpu(p).oper[0]^.reg = taicpu(hp1).oper[0]^.reg then
  1079. begin
  1080. DebugMsg(SPeepholeOptimization + 'Removed duplicate load instruction (load/load -> load/nop)', hp1);
  1081. RemoveInstruction(hp1);
  1082. Result := True;
  1083. Exit;
  1084. end
  1085. else if
  1086. (getregtype(taicpu(p).oper[0]^.reg) = R_INTREGISTER) and
  1087. (getregtype(taicpu(hp1).oper[0]^.reg) = R_INTREGISTER) and
  1088. (getsubreg(taicpu(p).oper[0]^.reg) = getsubreg(taicpu(hp1).oper[0]^.reg)) then
  1089. begin
  1090. DebugMsg(SPeepholeOptimization + 'Changed second ldr' + oppostfix2str[taicpu(hp1).oppostfix] + ' to mov (load/load -> load/move)', hp1);
  1091. taicpu(hp1).opcode := A_MOV;
  1092. taicpu(hp1).oppostfix := PF_None;
  1093. taicpu(hp1).loadreg(1, taicpu(p).oper[0]^.reg);
  1094. AllocRegBetween(taicpu(p).oper[0]^.reg, p, hp1, UsedRegs);
  1095. Result := True;
  1096. Exit;
  1097. end;
  1098. end;
  1099. end;
  1100. end;
  1101. function TARMAsmOptimizer.OptPass1STR(var p : tai) : Boolean;
  1102. var
  1103. hp1: tai;
  1104. Reference: TReference;
  1105. SizeMismatch: Boolean;
  1106. SrcReg, DstReg: TRegister;
  1107. NewOp: TAsmOp;
  1108. begin
  1109. Result := False;
  1110. if (taicpu(p).ops <> 2) or (taicpu(p).condition <> C_None) then
  1111. Exit;
  1112. Reference := taicpu(p).oper[1]^.ref^;
  1113. if (Reference.addressmode = AM_OFFSET) and
  1114. not RegInRef(taicpu(p).oper[0]^.reg, Reference) and
  1115. { Delay calling GetNextInstruction for as long as possible }
  1116. GetNextInstruction(p, hp1) and
  1117. (hp1.typ = ait_instruction) and
  1118. (taicpu(hp1).condition = C_None) and
  1119. (taicpu(hp1).oppostfix = taicpu(p).oppostfix) and
  1120. (taicpu(hp1).ops>0) and (taicpu(hp1).oper[0]^.typ=top_reg) then
  1121. begin
  1122. { Saves constant dereferencing and makes it easier to change the size if necessary }
  1123. SrcReg := taicpu(p).oper[0]^.reg;
  1124. DstReg := taicpu(hp1).oper[0]^.reg;
  1125. if (taicpu(hp1).opcode = A_LDR) and
  1126. RefsEqual(taicpu(hp1).oper[1]^.ref^, Reference) and
  1127. (taicpu(hp1).oper[1]^.ref^.volatility=[]) and
  1128. (
  1129. (taicpu(hp1).oppostfix = taicpu(p).oppostfix) or
  1130. ((taicpu(p).oppostfix = PF_B) and (taicpu(hp1).oppostfix = PF_SB)) or
  1131. ((taicpu(p).oppostfix = PF_H) and (taicpu(hp1).oppostfix = PF_SH))
  1132. {$ifdef AARCH64}
  1133. or ((taicpu(p).oppostfix = PF_W) and (taicpu(hp1).oppostfix = PF_SW))
  1134. {$endif AARCH64}
  1135. ) then
  1136. begin
  1137. { With:
  1138. str reg1,[ref]
  1139. ldr reg2,[ref]
  1140. If reg1 = reg2, Remove ldr.
  1141. If reg1 <> reg2, replace ldr with "mov reg2,reg1"
  1142. }
  1143. if (SrcReg = DstReg) and
  1144. { e.g. the ldrb in strb/ldrb is not a null operation as it clears the upper 24 bits }
  1145. (taicpu(p).oppostfix=PF_None) then
  1146. begin
  1147. DebugMsg(SPeepholeOptimization + 'Removed redundant load instruction (store/load -> store/nop)', hp1);
  1148. RemoveInstruction(hp1);
  1149. Result := True;
  1150. Exit;
  1151. end
  1152. else if (getregtype(SrcReg) = R_INTREGISTER) and
  1153. (getregtype(DstReg) = R_INTREGISTER) and
  1154. (getsubreg(SrcReg) = getsubreg(DstReg)) then
  1155. begin
  1156. NewOp:=A_NONE;
  1157. if taicpu(hp1).oppostfix=PF_None then
  1158. NewOp:=A_MOV
  1159. else
  1160. {$ifdef ARM}
  1161. if (current_settings.cputype < cpu_armv6) then
  1162. begin
  1163. { The zero- and sign-extension operations were only
  1164. introduced under ARMv6 }
  1165. case taicpu(hp1).oppostfix of
  1166. PF_B:
  1167. begin
  1168. { The if-block afterwards will set the middle operand to the correct register }
  1169. taicpu(hp1).allocate_oper(3);
  1170. taicpu(hp1).ops := 3;
  1171. taicpu(hp1).loadconst(2, $FF);
  1172. NewOp := A_AND;
  1173. end;
  1174. PF_H:
  1175. { ARMv5 and under doesn't have a concise way of storing the immediate $FFFF, so leave alone };
  1176. PF_SB,
  1177. PF_SH:
  1178. { Do nothing - can't easily encode sign-extensions };
  1179. else
  1180. InternalError(2021043002);
  1181. end;
  1182. end
  1183. else
  1184. {$endif ARM}
  1185. case taicpu(hp1).oppostfix of
  1186. PF_B:
  1187. NewOp := A_UXTB;
  1188. PF_SB:
  1189. NewOp := A_SXTB;
  1190. PF_H:
  1191. NewOp := A_UXTH;
  1192. PF_SH:
  1193. NewOp := A_SXTH;
  1194. {$ifdef AARCH64}
  1195. PF_SW:
  1196. NewOp := A_SXTW;
  1197. PF_W:
  1198. NewOp := A_MOV;
  1199. {$endif AARCH64}
  1200. else
  1201. InternalError(2021043001);
  1202. end;
  1203. if (NewOp<>A_None) then
  1204. begin
  1205. DebugMsg(SPeepholeOptimization + 'Changed ldr' + oppostfix2str[taicpu(hp1).oppostfix] + ' to ' + gas_op2str[NewOp] + ' (store/load -> store/move)', hp1);
  1206. taicpu(hp1).oppostfix := PF_None;
  1207. taicpu(hp1).opcode := NewOp;
  1208. taicpu(hp1).loadreg(1, SrcReg);
  1209. AllocRegBetween(SrcReg, p, hp1, UsedRegs);
  1210. Result := True;
  1211. Exit;
  1212. end;
  1213. end
  1214. end
  1215. else if (taicpu(hp1).opcode = A_STR) and
  1216. RefsEqual(taicpu(hp1).oper[1]^.ref^, Reference) then
  1217. begin
  1218. { With:
  1219. str reg1,[ref]
  1220. str reg2,[ref]
  1221. If reg1 <> reg2, delete the first str
  1222. IF reg1 = reg2, delete the second str
  1223. }
  1224. if (SrcReg = DstReg) and (taicpu(hp1).oper[1]^.ref^.volatility=[]) then
  1225. begin
  1226. DebugMsg(SPeepholeOptimization + 'Removed duplicate store instruction (store/store -> store/nop)', hp1);
  1227. RemoveInstruction(hp1);
  1228. Result := True;
  1229. Exit;
  1230. end
  1231. else if
  1232. { Registers same byte size? }
  1233. (tcgsize2size[reg_cgsize(SrcReg)] = tcgsize2size[reg_cgsize(DstReg)]) and
  1234. (taicpu(p).oper[1]^.ref^.volatility=[]) then
  1235. begin
  1236. DebugMsg(SPeepholeOptimization + 'Removed dominated store instruction (store/store -> nop/store)', p);
  1237. RemoveCurrentP(p, hp1);
  1238. Result := True;
  1239. Exit;
  1240. end;
  1241. end;
  1242. end;
  1243. end;
  1244. function TARMAsmOptimizer.OptPass1And(var p : tai) : Boolean;
  1245. var
  1246. hp1, hp2: tai;
  1247. i: longint;
  1248. begin
  1249. Result:=false;
  1250. {
  1251. optimize
  1252. and reg2,reg1,const1
  1253. ...
  1254. }
  1255. if (taicpu(p).ops>2) and
  1256. (taicpu(p).oper[1]^.typ = top_reg) and
  1257. (taicpu(p).oper[2]^.typ = top_const) then
  1258. begin
  1259. {
  1260. change
  1261. and reg2,reg1,const1
  1262. ...
  1263. and reg3,reg2,const2
  1264. to
  1265. and reg3,reg1,(const1 and const2)
  1266. }
  1267. if GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1268. MatchInstruction(hp1, A_AND, [taicpu(p).condition], [PF_None]) and
  1269. RegEndOfLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  1270. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1271. (taicpu(hp1).oper[2]^.typ = top_const)
  1272. {$ifdef AARCH64}
  1273. and ((((getsubreg(taicpu(p).oper[0]^.reg)=R_SUBQ) and is_shifter_const(taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val,OS_64)) or
  1274. ((getsubreg(taicpu(p).oper[0]^.reg)=R_SUBL) and is_shifter_const(taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val,OS_32))
  1275. ) or
  1276. ((taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val)=0))
  1277. {$endif AARCH64}
  1278. then
  1279. begin
  1280. if not(RegUsedBetween(taicpu(hp1).oper[0]^.reg,p,hp1)) then
  1281. begin
  1282. DebugMsg('Peephole AndAnd2And done', p);
  1283. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  1284. if (taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val)=0 then
  1285. begin
  1286. DebugMsg('Peephole AndAnd2Mov0 1 done', p);
  1287. taicpu(p).opcode:=A_MOV;
  1288. taicpu(p).ops:=2;
  1289. taicpu(p).loadConst(1,0);
  1290. taicpu(p).oppostfix:=taicpu(hp1).oppostfix;
  1291. end
  1292. else
  1293. begin
  1294. DebugMsg('Peephole AndAnd2And 1 done', p);
  1295. taicpu(p).loadConst(2,taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val);
  1296. taicpu(p).oppostfix:=taicpu(hp1).oppostfix;
  1297. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  1298. end;
  1299. asml.remove(hp1);
  1300. hp1.free;
  1301. Result:=true;
  1302. exit;
  1303. end
  1304. else if not(RegUsedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1305. begin
  1306. if (taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val)=0 then
  1307. begin
  1308. DebugMsg('Peephole AndAnd2Mov0 2 done', hp1);
  1309. taicpu(hp1).opcode:=A_MOV;
  1310. taicpu(hp1).loadConst(1,0);
  1311. taicpu(hp1).ops:=2;
  1312. taicpu(hp1).oppostfix:=taicpu(p).oppostfix;
  1313. end
  1314. else
  1315. begin
  1316. DebugMsg('Peephole AndAnd2And 2 done', hp1);
  1317. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  1318. taicpu(hp1).loadConst(2,taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val);
  1319. taicpu(hp1).oppostfix:=taicpu(p).oppostfix;
  1320. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  1321. end;
  1322. GetNextInstruction(p, hp1);
  1323. RemoveCurrentP(p);
  1324. p:=hp1;
  1325. Result:=true;
  1326. exit;
  1327. end;
  1328. end
  1329. {
  1330. change
  1331. and reg2,reg1,$xxxxxxFF
  1332. strb reg2,[...]
  1333. dealloc reg2
  1334. to
  1335. strb reg1,[...]
  1336. }
  1337. else if ((taicpu(p).oper[2]^.val and $FF) = $FF) and
  1338. MatchInstruction(p, A_AND, [C_None], [PF_None]) and
  1339. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1340. MatchInstruction(hp1, A_STR, [C_None], [PF_B]) and
  1341. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
  1342. { the reference in strb might not use reg2 }
  1343. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  1344. { reg1 might not be modified inbetween }
  1345. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1346. begin
  1347. DebugMsg('Peephole AndStrb2Strb done', p);
  1348. {$ifdef AARCH64}
  1349. taicpu(hp1).loadReg(0,newreg(R_INTREGISTER,getsupreg(taicpu(p).oper[1]^.reg),R_SUBD));
  1350. {$else AARCH64}
  1351. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  1352. {$endif AARCH64}
  1353. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  1354. RemoveCurrentP(p);
  1355. result:=true;
  1356. exit;
  1357. end
  1358. {
  1359. change
  1360. and reg2,reg1,255
  1361. uxtb/uxth reg3,reg2
  1362. dealloc reg2
  1363. to
  1364. and reg3,reg1,x
  1365. }
  1366. else if MatchInstruction(p, A_AND, [C_None], [PF_None]) and
  1367. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1368. ((((taicpu(p).oper[2]^.val and $ffffff00)=0) and MatchInstruction(hp1, A_UXTB, [C_None], [PF_None])) or
  1369. (((taicpu(p).oper[2]^.val and $ffff0000)=0) and MatchInstruction(hp1, A_UXTH, [C_None], [PF_None]))) and
  1370. (taicpu(hp1).ops = 2) and
  1371. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  1372. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1373. { reg1 might not be modified inbetween }
  1374. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1375. begin
  1376. DebugMsg('Peephole AndUxt2And done', p);
  1377. taicpu(hp1).opcode:=A_AND;
  1378. taicpu(hp1).ops:=3;
  1379. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  1380. taicpu(hp1).loadconst(2,taicpu(p).oper[2]^.val);
  1381. GetNextInstruction(p,hp1);
  1382. asml.remove(p);
  1383. p.Free;
  1384. p:=hp1;
  1385. result:=true;
  1386. exit;
  1387. end
  1388. else if ((taicpu(p).oper[2]^.val and $ffffff80)=0) and
  1389. MatchInstruction(p, A_AND, [C_None], [PF_None]) and
  1390. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1391. MatchInstruction(hp1, [A_SXTB,A_SXTH], [C_None], [PF_None]) and
  1392. (taicpu(hp1).ops = 2) and
  1393. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  1394. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1395. { reg1 might not be modified inbetween }
  1396. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1397. begin
  1398. DebugMsg('Peephole AndSxt2And done', p);
  1399. taicpu(hp1).opcode:=A_AND;
  1400. taicpu(hp1).ops:=3;
  1401. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  1402. taicpu(hp1).loadconst(2,taicpu(p).oper[2]^.val);
  1403. GetNextInstruction(p,hp1);
  1404. asml.remove(p);
  1405. p.Free;
  1406. p:=hp1;
  1407. result:=true;
  1408. exit;
  1409. end
  1410. {
  1411. from
  1412. and reg1,reg0,2^n-1
  1413. mov reg2,reg1, lsl imm1
  1414. (mov reg3,reg2, lsr/asr imm1)
  1415. remove either the and or the lsl/xsr sequence if possible
  1416. }
  1417. else if (taicpu(p).oper[2]^.val < high(int64)) and
  1418. cutils.ispowerof2(taicpu(p).oper[2]^.val+1,i) and
  1419. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1420. MatchInstruction(hp1, A_MOV, [taicpu(p).condition], [PF_None]) and
  1421. (taicpu(hp1).ops=3) and
  1422. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1423. (taicpu(hp1).oper[2]^.typ = top_shifterop) and
  1424. {$ifdef ARM}
  1425. (taicpu(hp1).oper[2]^.shifterop^.rs = NR_NO) and
  1426. {$endif ARM}
  1427. (taicpu(hp1).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  1428. RegEndOfLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) then
  1429. begin
  1430. {
  1431. and reg1,reg0,2^n-1
  1432. mov reg2,reg1, lsl imm1
  1433. mov reg3,reg2, lsr/asr imm1
  1434. =>
  1435. and reg1,reg0,2^n-1
  1436. if lsr and 2^n-1>=imm1 or asr and 2^n-1>imm1
  1437. }
  1438. if GetNextInstructionUsingReg(hp1,hp2,taicpu(p).oper[0]^.reg) and
  1439. MatchInstruction(hp2, A_MOV, [taicpu(p).condition], [PF_None]) and
  1440. (taicpu(hp2).ops=3) and
  1441. MatchOperand(taicpu(hp2).oper[1]^, taicpu(hp1).oper[0]^.reg) and
  1442. (taicpu(hp2).oper[2]^.typ = top_shifterop) and
  1443. {$ifdef ARM}
  1444. (taicpu(hp2).oper[2]^.shifterop^.rs = NR_NO) and
  1445. {$endif ARM}
  1446. (taicpu(hp2).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and
  1447. (taicpu(hp1).oper[2]^.shifterop^.shiftimm=taicpu(hp2).oper[2]^.shifterop^.shiftimm) and
  1448. RegEndOfLife(taicpu(hp1).oper[0]^.reg,taicpu(hp2)) and
  1449. ((i<32-taicpu(hp1).oper[2]^.shifterop^.shiftimm) or
  1450. ((i=32-taicpu(hp1).oper[2]^.shifterop^.shiftimm) and
  1451. (taicpu(hp2).oper[2]^.shifterop^.shiftmode=SM_LSR))) then
  1452. begin
  1453. DebugMsg('Peephole AndLslXsr2And done', p);
  1454. taicpu(p).oper[0]^.reg:=taicpu(hp2).oper[0]^.reg;
  1455. asml.Remove(hp1);
  1456. asml.Remove(hp2);
  1457. hp1.free;
  1458. hp2.free;
  1459. result:=true;
  1460. exit;
  1461. end
  1462. {
  1463. and reg1,reg0,2^n-1
  1464. mov reg2,reg1, lsl imm1
  1465. =>
  1466. mov reg2,reg0, lsl imm1
  1467. if imm1>i
  1468. }
  1469. else if (i>32-taicpu(hp1).oper[2]^.shifterop^.shiftimm) and
  1470. not(RegModifiedBetween(taicpu(p).oper[1]^.reg, p, hp1)) then
  1471. begin
  1472. DebugMsg('Peephole AndLsl2Lsl done', p);
  1473. taicpu(hp1).oper[1]^.reg:=taicpu(p).oper[1]^.reg;
  1474. GetNextInstruction(p, hp1);
  1475. asml.Remove(p);
  1476. p.free;
  1477. p:=hp1;
  1478. result:=true;
  1479. exit;
  1480. end
  1481. end;
  1482. end;
  1483. {
  1484. change
  1485. and reg1, ...
  1486. mov reg2, reg1
  1487. to
  1488. and reg2, ...
  1489. }
  1490. if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  1491. (taicpu(p).ops>=3) and
  1492. RemoveSuperfluousMove(p, hp1, 'DataMov2Data') then
  1493. Result:=true;
  1494. end;
  1495. function TARMAsmOptimizer.OptPass2AND(var p: tai): Boolean;
  1496. var
  1497. hp1, hp2: tai;
  1498. WorkingReg: TRegister;
  1499. begin
  1500. Result := False;
  1501. {
  1502. change
  1503. and reg1, ...
  1504. ...
  1505. cmp reg1, #0
  1506. b<ne/eq> @Lbl
  1507. to
  1508. ands reg1, ...
  1509. Also:
  1510. and reg1, ...
  1511. ...
  1512. cmp reg1, #0
  1513. (reg1 end of life)
  1514. b<ne/eq> @Lbl
  1515. to
  1516. tst reg1, ...
  1517. }
  1518. if (taicpu(p).condition = C_None) and
  1519. (taicpu(p).ops>=3) and
  1520. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  1521. MatchInstruction(hp1, A_CMP, [C_None], [PF_None]) and
  1522. MatchOperand(taicpu(hp1).oper[1]^, 0) and
  1523. {$ifdef AARCH64}
  1524. (SuperRegistersEqual(taicpu(hp1).oper[0]^.reg, taicpu(p).oper[0]^.reg)) and
  1525. (
  1526. (getsubreg(taicpu(hp1).oper[0]^.reg) = getsubreg(taicpu(p).oper[0]^.reg))
  1527. or
  1528. (
  1529. (taicpu(p).oper[2]^.typ = top_const) and
  1530. (taicpu(p).oper[2]^.val >= 0) and
  1531. (taicpu(p).oper[2]^.val <= $FFFFFFFF)
  1532. )
  1533. ) and
  1534. {$else AARCH64}
  1535. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[0]^.reg) and
  1536. {$endif AARCH64}
  1537. not RegModifiedBetween(NR_DEFAULTFLAGS, p, hp1) and
  1538. GetNextInstruction(hp1, hp2) then
  1539. begin
  1540. if MatchInstruction(hp2, [A_B, A_CMP, A_CMN, A_TST{$ifndef AARCH64}, A_TEQ{$endif not AARCH64}], [C_EQ, C_NE], [PF_None]) then
  1541. begin
  1542. AllocRegBetween(NR_DEFAULTFLAGS, p, hp1, UsedRegs);
  1543. WorkingReg := taicpu(p).oper[0]^.reg;
  1544. if RegEndOfLife(WorkingReg, taicpu(hp1)) then
  1545. begin
  1546. taicpu(p).opcode := A_TST;
  1547. taicpu(p).oppostfix := PF_None;
  1548. taicpu(p).loadreg(0, taicpu(p).oper[1]^.reg);
  1549. taicpu(p).loadoper(1, taicpu(p).oper[2]^);
  1550. taicpu(p).ops := 2;
  1551. DebugMsg(SPeepholeOptimization + 'AND; CMP -> TST', p);
  1552. end
  1553. else
  1554. begin
  1555. taicpu(p).oppostfix := PF_S;
  1556. DebugMsg(SPeepholeOptimization + 'AND; CMP -> ANDS', p);
  1557. end;
  1558. RemoveInstruction(hp1);
  1559. { If a temporary register was used for and/cmp before, we might be
  1560. able to deallocate the register so it can be used for other
  1561. optimisations later }
  1562. if (taicpu(p).opcode = A_TST) and TryRemoveRegAlloc(WorkingReg, p, p) then
  1563. ExcludeRegFromUsedRegs(WorkingReg, UsedRegs);
  1564. Result := True;
  1565. Exit;
  1566. end
  1567. else if
  1568. (hp2.typ = ait_label) or
  1569. { Conditional comparison instructions have already been covered }
  1570. RegModifiedByInstruction(NR_DEFAULTFLAGS, hp2) then
  1571. begin
  1572. { The comparison is a null operation }
  1573. if RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  1574. begin
  1575. DebugMsg(SPeepholeOptimization + 'AND; CMP -> nop', p);
  1576. RemoveInstruction(hp1);
  1577. RemoveCurrentP(p);
  1578. end
  1579. else
  1580. begin
  1581. DebugMsg(SPeepholeOptimization + 'CMP -> nop', hp1);
  1582. RemoveInstruction(hp1);
  1583. end;
  1584. Result := True;
  1585. Exit;
  1586. end;
  1587. end;
  1588. end;
  1589. function TARMAsmOptimizer.OptPass2TST(var p: tai): Boolean;
  1590. var
  1591. hp1, hp2: tai;
  1592. begin
  1593. Result := False;
  1594. if
  1595. {$ifndef AARCH64}
  1596. (taicpu(p).condition = C_None) and
  1597. {$endif AARCH64}
  1598. GetNextInstruction(p, hp1) and
  1599. MatchInstruction(hp1, A_B, [C_EQ, C_NE], [PF_None]) and
  1600. GetNextInstructionUsingReg(hp1, hp2, taicpu(p).oper[0]^.reg) then
  1601. begin
  1602. case taicpu(hp2).opcode of
  1603. A_AND:
  1604. { Change:
  1605. tst r1,##
  1606. (r2 not in use, or r2 = r1)
  1607. b.c .Lbl
  1608. ...
  1609. and r2,r1,##
  1610. Optimise to:
  1611. ands r2,r1,##
  1612. b.c .Lbl
  1613. ...
  1614. }
  1615. if (taicpu(hp2).oppostfix in [PF_None, PF_S]) and
  1616. {$ifndef AARCH64}
  1617. (taicpu(hp2).condition = C_None) and
  1618. {$endif AARCH64}
  1619. (taicpu(hp2).ops = taicpu(p).ops + 1) and
  1620. not RegInUsedRegs(taicpu(hp2).oper[0]^.reg, UsedRegs) and
  1621. MatchOperand(taicpu(hp2).oper[1]^, taicpu(p).oper[0]^.reg) and
  1622. MatchOperand(taicpu(hp2).oper[2]^, taicpu(p).oper[1]^) and
  1623. (
  1624. (taicpu(hp2).ops = 3) or
  1625. MatchOperand(taicpu(hp2).oper[3]^, taicpu(p).oper[2]^)
  1626. ) and
  1627. (
  1628. not (cs_opt_level3 in current_settings.optimizerswitches) or
  1629. (
  1630. { Make sure the target register isn't used in between }
  1631. not RegUsedBetween(taicpu(hp2).oper[0]^.reg, hp1, hp2) and
  1632. (
  1633. { If the second operand is a register, make sure it isn't modified in between }
  1634. (taicpu(p).oper[1]^.typ <> top_reg) or
  1635. not RegModifiedBetween(taicpu(p).oper[1]^.reg, hp1, hp2)
  1636. )
  1637. )
  1638. ) then
  1639. begin
  1640. AllocRegBetween(taicpu(hp2).oper[0]^.reg, p, hp2, UsedRegs);
  1641. if (taicpu(hp2).oppostfix = PF_S) then
  1642. AllocRegBetween(NR_DEFAULTFLAGS, p, hp2, UsedRegs);
  1643. DebugMsg(SPeepholeOptimization + 'TST; B.c; AND -> ANDS; B.c (TstBcAnd2AndsBc)', p);
  1644. taicpu(hp2).oppostfix := PF_S;
  1645. Asml.Remove(hp2);
  1646. Asml.InsertAfter(hp2, p);
  1647. RemoveCurrentP(p, hp2);
  1648. Result := True;
  1649. Exit;
  1650. end;
  1651. A_TST:
  1652. { Change:
  1653. tst r1,##
  1654. b.c .Lbl
  1655. ... (flags not modified)
  1656. tst r1,##
  1657. Remove second tst
  1658. }
  1659. if
  1660. {$ifndef AARCH64}
  1661. (taicpu(hp2).condition = C_None) and
  1662. {$endif AARCH64}
  1663. (taicpu(hp2).ops = taicpu(p).ops) and
  1664. MatchOperand(taicpu(hp2).oper[0]^, taicpu(p).oper[0]^.reg) and
  1665. MatchOperand(taicpu(hp2).oper[1]^, taicpu(p).oper[1]^) and
  1666. (
  1667. (taicpu(hp2).ops = 2) or
  1668. MatchOperand(taicpu(hp2).oper[2]^, taicpu(p).oper[2]^)
  1669. ) and
  1670. (
  1671. not (cs_opt_level3 in current_settings.optimizerswitches) or
  1672. (
  1673. { Make sure the flags aren't modified in between }
  1674. not RegModifiedBetween(NR_DEFAULTFLAGS, hp1, hp2) and
  1675. (
  1676. { If the second operand is a register, make sure it isn't modified in between }
  1677. (taicpu(p).oper[1]^.typ <> top_reg) or
  1678. not RegModifiedBetween(taicpu(p).oper[1]^.reg, hp1, hp2)
  1679. )
  1680. )
  1681. ) then
  1682. begin
  1683. DebugMsg(SPeepholeOptimization + 'TST; B.c; TST -> TST; B.c (TstBcTst2TstBc)', p);
  1684. AllocRegBetween(NR_DEFAULTFLAGS, hp1, hp2, UsedRegs);
  1685. RemoveInstruction(hp2);
  1686. Result := True;
  1687. Exit;
  1688. end;
  1689. else
  1690. ;
  1691. end;
  1692. end;
  1693. end;
  1694. end.