aoptcpu.pas 137 KB


  1. {
  2. Copyright (c) 1998-2002 by Jonas Maebe, member of the Free Pascal
  3. Development Team
  4. This unit implements the ARM optimizer object
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. Unit aoptcpu;
  19. {$i fpcdefs.inc}
  20. {$define DEBUG_PREREGSCHEDULER}
  21. {$define DEBUG_AOPTCPU}
  22. Interface
  23. uses cgbase, cgutils, cpubase, aasmtai, aasmcpu,aopt, aoptobj;
  24. Type
  25. TCpuAsmOptimizer = class(TAsmOptimizer)
  26. { uses the same constructor as TAopObj }
  27. function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
  28. procedure PeepHoleOptPass2;override;
  29. Function RegInInstruction(Reg: TRegister; p1: tai): Boolean;override;
  30. function RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string): boolean;
  31. function RegUsedAfterInstruction(reg: Tregister; p: tai;
  32. var AllUsedRegs: TAllUsedRegs): Boolean;
  33. { returns true if reg reaches it's end of life at p, this means it is either
  34. reloaded with a new value or it is deallocated afterwards }
  35. function RegEndOfLife(reg: TRegister;p: taicpu): boolean;
  36. { gets the next tai object after current that contains info relevant
  37. to the optimizer in p1 which used the given register or does a
  38. change in program flow.
  39. If there is none, it returns false and
  40. sets p1 to nil }
  41. Function GetNextInstructionUsingReg(Current: tai; Out Next: tai; reg: TRegister): Boolean;
  42. Function GetNextInstructionUsingRef(Current: tai; Out Next: tai; const ref: TReference; StopOnStore: Boolean = true): Boolean;
  43. { outputs a debug message into the assembler file }
  44. procedure DebugMsg(const s: string; p: tai);
  45. protected
  46. function LookForPreindexedPattern(p: taicpu): boolean;
  47. function LookForPostindexedPattern(p: taicpu): boolean;
  48. End;
  49. TCpuPreRegallocScheduler = class(TAsmScheduler)
  50. function SchedulerPass1Cpu(var p: tai): boolean;override;
  51. procedure SwapRegLive(p, hp1: taicpu);
  52. end;
  53. TCpuThumb2AsmOptimizer = class(TCpuAsmOptimizer)
  54. { uses the same constructor as TAopObj }
  55. function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
  56. procedure PeepHoleOptPass2;override;
  57. function PostPeepHoleOptsCpu(var p: tai): boolean; override;
  58. End;
  59. function MustBeLast(p : tai) : boolean;
  60. Implementation
  61. uses
  62. cutils,verbose,globtype,globals,
  63. systems,
  64. cpuinfo,
  65. cgobj,procinfo,
  66. aasmbase,aasmdata;
  67. function CanBeCond(p : tai) : boolean;
  68. begin
  69. result:=
  70. not(GenerateThumbCode) and
  71. (p.typ=ait_instruction) and
  72. (taicpu(p).condition=C_None) and
  73. ((taicpu(p).opcode<A_IT) or (taicpu(p).opcode>A_ITTTT)) and
  74. (taicpu(p).opcode<>A_CBZ) and
  75. (taicpu(p).opcode<>A_CBNZ) and
  76. (taicpu(p).opcode<>A_PLD) and
  77. ((taicpu(p).opcode<>A_BLX) or
  78. (taicpu(p).oper[0]^.typ=top_reg));
  79. end;
  80. function RefsEqual(const r1, r2: treference): boolean;
  81. begin
  82. refsequal :=
  83. (r1.offset = r2.offset) and
  84. (r1.base = r2.base) and
  85. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  86. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  87. (r1.relsymbol = r2.relsymbol) and
  88. (r1.signindex = r2.signindex) and
  89. (r1.shiftimm = r2.shiftimm) and
  90. (r1.addressmode = r2.addressmode) and
  91. (r1.shiftmode = r2.shiftmode);
  92. end;
  93. function MatchInstruction(const instr: tai; const op: TCommonAsmOps; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  94. begin
  95. result :=
  96. (instr.typ = ait_instruction) and
  97. ((op = []) or ((ord(taicpu(instr).opcode)<256) and (taicpu(instr).opcode in op))) and
  98. ((cond = []) or (taicpu(instr).condition in cond)) and
  99. ((postfix = []) or (taicpu(instr).oppostfix in postfix));
  100. end;
  101. function MatchInstruction(const instr: tai; const op: TAsmOp; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  102. begin
  103. result :=
  104. (instr.typ = ait_instruction) and
  105. (taicpu(instr).opcode = op) and
  106. ((cond = []) or (taicpu(instr).condition in cond)) and
  107. ((postfix = []) or (taicpu(instr).oppostfix in postfix));
  108. end;
  109. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
  110. begin
  111. result := oper1.typ = oper2.typ;
  112. if result then
  113. case oper1.typ of
  114. top_const:
  115. Result:=oper1.val = oper2.val;
  116. top_reg:
  117. Result:=oper1.reg = oper2.reg;
  118. top_conditioncode:
  119. Result:=oper1.cc = oper2.cc;
  120. top_ref:
  121. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  122. else Result:=false;
  123. end
  124. end;
  125. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  126. begin
  127. result := (oper.typ = top_reg) and (oper.reg = reg);
  128. end;
  129. function RemoveRedundantMove(const cmpp: tai; movp: tai; asml: TAsmList):Boolean;
  130. begin
  131. Result:=false;
  132. if (taicpu(movp).condition = C_EQ) and
  133. (taicpu(cmpp).oper[0]^.reg = taicpu(movp).oper[0]^.reg) and
  134. (taicpu(cmpp).oper[1]^.val = taicpu(movp).oper[1]^.val) then
  135. begin
  136. asml.insertafter(tai_comment.Create(strpnew('Peephole CmpMovMov - Removed redundant moveq')), movp);
  137. asml.remove(movp);
  138. movp.free;
  139. Result:=true;
  140. end;
  141. end;
  142. function regLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  143. var
  144. p: taicpu;
  145. begin
  146. p := taicpu(hp);
  147. regLoadedWithNewValue := false;
  148. if not ((assigned(hp)) and (hp.typ = ait_instruction)) then
  149. exit;
  150. case p.opcode of
  151. { These operands do not write into a register at all }
  152. A_CMP, A_CMN, A_TST, A_TEQ, A_B, A_BL, A_BX, A_BLX, A_SWI, A_MSR, A_PLD:
  153. exit;
  154. {Take care of post/preincremented store and loads, they will change their base register}
  155. A_STR, A_LDR:
  156. begin
  157. regLoadedWithNewValue :=
  158. (taicpu(p).oper[1]^.typ=top_ref) and
  159. (taicpu(p).oper[1]^.ref^.addressmode in [AM_PREINDEXED,AM_POSTINDEXED]) and
  160. (taicpu(p).oper[1]^.ref^.base = reg);
  161. {STR does not load into it's first register}
  162. if p.opcode = A_STR then exit;
  163. end;
  164. { These four are writing into the first 2 register, UMLAL and SMLAL will also read from them }
  165. A_UMLAL, A_UMULL, A_SMLAL, A_SMULL:
  166. regLoadedWithNewValue :=
  167. (p.oper[1]^.typ = top_reg) and
  168. (p.oper[1]^.reg = reg);
  169. {Loads to oper2 from coprocessor}
  170. {
  171. MCR/MRC is currently not supported in FPC
  172. A_MRC:
  173. regLoadedWithNewValue :=
  174. (p.oper[2]^.typ = top_reg) and
  175. (p.oper[2]^.reg = reg);
  176. }
  177. {Loads to all register in the registerset}
  178. A_LDM:
  179. regLoadedWithNewValue := (getsupreg(reg) in p.oper[1]^.regset^);
  180. A_POP:
  181. regLoadedWithNewValue := (getsupreg(reg) in p.oper[0]^.regset^) or
  182. (reg=NR_STACK_POINTER_REG);
  183. end;
  184. if regLoadedWithNewValue then
  185. exit;
  186. case p.oper[0]^.typ of
  187. {This is the case}
  188. top_reg:
  189. regLoadedWithNewValue := (p.oper[0]^.reg = reg) or
  190. { LDRD }
  191. (p.opcode=A_LDR) and (p.oppostfix=PF_D) and (getsupreg(p.oper[0]^.reg)+1=getsupreg(reg));
  192. {LDM/STM might write a new value to their index register}
  193. top_ref:
  194. regLoadedWithNewValue :=
  195. (taicpu(p).oper[0]^.ref^.addressmode in [AM_PREINDEXED,AM_POSTINDEXED]) and
  196. (taicpu(p).oper[0]^.ref^.base = reg);
  197. end;
  198. end;
  199. function AlignedToQWord(const ref : treference) : boolean;
  200. begin
  201. { (safe) heuristics to ensure alignment }
  202. result:=(target_info.abi in [abi_eabi,abi_armeb,abi_eabihf]) and
  203. (((ref.offset>=0) and
  204. ((ref.offset mod 8)=0) and
  205. ((ref.base=NR_R13) or
  206. (ref.index=NR_R13))
  207. ) or
  208. ((ref.offset<=0) and
  209. { when using NR_R11, it has always a value of <qword align>+4 }
  210. ((abs(ref.offset+4) mod 8)=0) and
  211. (current_procinfo.framepointer=NR_R11) and
  212. ((ref.base=NR_R11) or
  213. (ref.index=NR_R11))
  214. )
  215. );
  216. end;
  217. function instructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean;
  218. var
  219. p: taicpu;
  220. i: longint;
  221. begin
  222. instructionLoadsFromReg := false;
  223. if not (assigned(hp) and (hp.typ = ait_instruction)) then
  224. exit;
  225. p:=taicpu(hp);
  226. i:=1;
  227. {For these instructions we have to start on oper[0]}
  228. if (p.opcode in [A_STR, A_LDM, A_STM, A_PLD,
  229. A_CMP, A_CMN, A_TST, A_TEQ,
  230. A_B, A_BL, A_BX, A_BLX,
  231. A_SMLAL, A_UMLAL]) then i:=0;
  232. while(i<p.ops) do
  233. begin
  234. case p.oper[I]^.typ of
  235. top_reg:
  236. instructionLoadsFromReg := (p.oper[I]^.reg = reg) or
  237. { STRD }
  238. ((i=0) and (p.opcode=A_STR) and (p.oppostfix=PF_D) and (getsupreg(p.oper[0]^.reg)+1=getsupreg(reg)));
  239. top_regset:
  240. instructionLoadsFromReg := (getsupreg(reg) in p.oper[I]^.regset^);
  241. top_shifterop:
  242. instructionLoadsFromReg := p.oper[I]^.shifterop^.rs = reg;
  243. top_ref:
  244. instructionLoadsFromReg :=
  245. (p.oper[I]^.ref^.base = reg) or
  246. (p.oper[I]^.ref^.index = reg);
  247. end;
  248. if instructionLoadsFromReg then exit; {Bailout if we found something}
  249. Inc(I);
  250. end;
  251. end;
  252. function isValidConstLoadStoreOffset(const aoffset: longint; const pf: TOpPostfix) : boolean;
  253. begin
  254. if GenerateThumb2Code then
  255. result := (aoffset<4096) and (aoffset>-256)
  256. else
  257. result := ((pf in [PF_None,PF_B]) and
  258. (abs(aoffset)<4096)) or
  259. (abs(aoffset)<256);
  260. end;
  261. function TCpuAsmOptimizer.RegUsedAfterInstruction(reg: Tregister; p: tai;
  262. var AllUsedRegs: TAllUsedRegs): Boolean;
  263. begin
  264. AllUsedRegs[getregtype(reg)].Update(tai(p.Next),true);
  265. RegUsedAfterInstruction :=
  266. AllUsedRegs[getregtype(reg)].IsUsed(reg) and
  267. not(regLoadedWithNewValue(reg,p)) and
  268. (
  269. not(GetNextInstruction(p,p)) or
  270. instructionLoadsFromReg(reg,p) or
  271. not(regLoadedWithNewValue(reg,p))
  272. );
  273. end;
  274. function TCpuAsmOptimizer.RegEndOfLife(reg : TRegister;p : taicpu) : boolean;
  275. begin
  276. Result:=assigned(FindRegDealloc(reg,tai(p.Next))) or
  277. RegLoadedWithNewValue(reg,p);
  278. end;
  279. function TCpuAsmOptimizer.GetNextInstructionUsingReg(Current: tai;
  280. Out Next: tai; reg: TRegister): Boolean;
  281. begin
  282. Next:=Current;
  283. repeat
  284. Result:=GetNextInstruction(Next,Next);
  285. until not (Result) or
  286. not(cs_opt_level3 in current_settings.optimizerswitches) or
  287. (Next.typ<>ait_instruction) or
  288. RegInInstruction(reg,Next) or
  289. is_calljmp(taicpu(Next).opcode) or
  290. RegModifiedByInstruction(NR_PC,Next);
  291. end;
  292. function TCpuAsmOptimizer.GetNextInstructionUsingRef(Current: tai;
  293. Out Next: tai; const ref: TReference; StopOnStore: Boolean = true): Boolean;
  294. begin
  295. Next:=Current;
  296. repeat
  297. Result:=GetNextInstruction(Next,Next);
  298. if Result and
  299. (Next.typ=ait_instruction) and
  300. (taicpu(Next).opcode in [A_LDR, A_STR]) and
  301. RefsEqual(taicpu(Next).oper[1]^.ref^,ref) then
  302. {We've found an instruction LDR or STR with the same reference}
  303. exit;
  304. until not(Result) or
  305. (Next.typ<>ait_instruction) or
  306. not(cs_opt_level3 in current_settings.optimizerswitches) or
  307. is_calljmp(taicpu(Next).opcode) or
  308. (StopOnStore and (taicpu(Next).opcode in [A_STR, A_STM])) or
  309. RegModifiedByInstruction(NR_PC,Next);
  310. Result:=false;
  311. end;
  312. {$ifdef DEBUG_AOPTCPU}
  313. procedure TCpuAsmOptimizer.DebugMsg(const s: string;p : tai);
  314. begin
  315. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  316. end;
  317. {$else DEBUG_AOPTCPU}
  318. procedure TCpuAsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  319. begin
  320. end;
  321. {$endif DEBUG_AOPTCPU}
  322. function TCpuAsmOptimizer.RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string):boolean;
  323. var
  324. alloc,
  325. dealloc : tai_regalloc;
  326. hp1 : tai;
  327. begin
  328. Result:=false;
  329. if MatchInstruction(movp, A_MOV, [taicpu(p).condition], [PF_None]) and
  330. (taicpu(movp).ops=2) and {We can't optimize if there is a shiftop}
  331. MatchOperand(taicpu(movp).oper[1]^, taicpu(p).oper[0]^.reg) and
  332. { don't mess with moves to pc }
  333. (taicpu(movp).oper[0]^.reg<>NR_PC) and
  334. { don't mess with moves to lr }
  335. (taicpu(movp).oper[0]^.reg<>NR_R14) and
  336. { the destination register of the mov might not be used beween p and movp }
  337. not(RegUsedBetween(taicpu(movp).oper[0]^.reg,p,movp)) and
  338. { cb[n]z are thumb instructions which require specific registers, with no wide forms }
  339. (taicpu(p).opcode<>A_CBZ) and
  340. (taicpu(p).opcode<>A_CBNZ) and
  341. {There is a special requirement for MUL and MLA, oper[0] and oper[1] are not allowed to be the same}
  342. not (
  343. (taicpu(p).opcode in [A_MLA, A_MUL]) and
  344. (taicpu(p).oper[1]^.reg = taicpu(movp).oper[0]^.reg) and
  345. (current_settings.cputype < cpu_armv6)
  346. ) and
  347. { Take care to only do this for instructions which REALLY load to the first register.
  348. Otherwise
  349. str reg0, [reg1]
  350. mov reg2, reg0
  351. will be optimized to
  352. str reg2, [reg1]
  353. }
  354. regLoadedWithNewValue(taicpu(p).oper[0]^.reg, p) then
  355. begin
  356. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(movp.Next));
  357. if assigned(dealloc) then
  358. begin
  359. DebugMsg('Peephole '+optimizer+' removed superfluous mov', movp);
  360. result:=true;
  361. { taicpu(p).oper[0]^.reg is not used anymore, try to find its allocation
  362. and remove it if possible }
  363. asml.Remove(dealloc);
  364. alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.previous));
  365. if assigned(alloc) then
  366. begin
  367. asml.Remove(alloc);
  368. alloc.free;
  369. dealloc.free;
  370. end
  371. else
  372. asml.InsertAfter(dealloc,p);
  373. { try to move the allocation of the target register }
  374. GetLastInstruction(movp,hp1);
  375. alloc:=FindRegAlloc(taicpu(movp).oper[0]^.reg,tai(hp1.Next));
  376. if assigned(alloc) then
  377. begin
  378. asml.Remove(alloc);
  379. asml.InsertBefore(alloc,p);
  380. { adjust used regs }
  381. IncludeRegInUsedRegs(taicpu(movp).oper[0]^.reg,UsedRegs);
  382. end;
  383. { finally get rid of the mov }
  384. taicpu(p).loadreg(0,taicpu(movp).oper[0]^.reg);
  385. asml.remove(movp);
  386. movp.free;
  387. end;
  388. end;
  389. end;
  390. {
  391. optimize
  392. add/sub reg1,reg1,regY/const
  393. ...
  394. ldr/str regX,[reg1]
  395. into
  396. ldr/str regX,[reg1, regY/const]!
  397. }
  398. function TCpuAsmOptimizer.LookForPreindexedPattern(p: taicpu): boolean;
  399. var
  400. hp1: tai;
  401. begin
  402. if GenerateARMCode and
  403. (p.ops=3) and
  404. MatchOperand(p.oper[0]^, p.oper[1]^.reg) and
  405. GetNextInstructionUsingReg(p, hp1, p.oper[0]^.reg) and
  406. (not RegModifiedBetween(p.oper[0]^.reg, p, hp1)) and
  407. MatchInstruction(hp1, [A_LDR,A_STR], [C_None], [PF_None,PF_B,PF_H,PF_SH,PF_SB]) and
  408. (taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) and
  409. (taicpu(hp1).oper[1]^.ref^.base=p.oper[0]^.reg) and
  410. (taicpu(hp1).oper[0]^.reg<>p.oper[0]^.reg) and
  411. (taicpu(hp1).oper[1]^.ref^.offset=0) and
  412. (taicpu(hp1).oper[1]^.ref^.index=NR_NO) and
  413. (((p.oper[2]^.typ=top_reg) and
  414. (not RegModifiedBetween(p.oper[2]^.reg, p, hp1))) or
  415. ((p.oper[2]^.typ=top_const) and
  416. ((abs(p.oper[2]^.val) < 256) or
  417. ((abs(p.oper[2]^.val) < 4096) and
  418. (taicpu(hp1).oppostfix in [PF_None,PF_B]))))) then
  419. begin
  420. taicpu(hp1).oper[1]^.ref^.addressmode:=AM_PREINDEXED;
  421. if p.oper[2]^.typ=top_reg then
  422. begin
  423. taicpu(hp1).oper[1]^.ref^.index:=p.oper[2]^.reg;
  424. if p.opcode=A_ADD then
  425. taicpu(hp1).oper[1]^.ref^.signindex:=1
  426. else
  427. taicpu(hp1).oper[1]^.ref^.signindex:=-1;
  428. end
  429. else
  430. begin
  431. if p.opcode=A_ADD then
  432. taicpu(hp1).oper[1]^.ref^.offset:=p.oper[2]^.val
  433. else
  434. taicpu(hp1).oper[1]^.ref^.offset:=-p.oper[2]^.val;
  435. end;
  436. result:=true;
  437. end
  438. else
  439. result:=false;
  440. end;
  441. {
  442. optimize
  443. ldr/str regX,[reg1]
  444. ...
  445. add/sub reg1,reg1,regY/const
  446. into
  447. ldr/str regX,[reg1], regY/const
  448. }
  449. function TCpuAsmOptimizer.LookForPostindexedPattern(p: taicpu) : boolean;
  450. var
  451. hp1 : tai;
  452. begin
  453. Result:=false;
  454. if (p.oper[1]^.ref^.addressmode=AM_OFFSET) and
  455. (p.oper[1]^.ref^.index=NR_NO) and
  456. (p.oper[1]^.ref^.offset=0) and
  457. GetNextInstructionUsingReg(p, hp1, p.oper[1]^.ref^.base) and
  458. { we cannot check NR_DEFAULTFLAGS for modification yet so don't allow a condition }
  459. MatchInstruction(hp1, [A_ADD, A_SUB], [C_None], [PF_None]) and
  460. (taicpu(hp1).oper[0]^.reg=p.oper[1]^.ref^.base) and
  461. (taicpu(hp1).oper[1]^.reg=p.oper[1]^.ref^.base) and
  462. (
  463. (taicpu(hp1).oper[2]^.typ=top_reg) or
  464. { valid offset? }
  465. ((taicpu(hp1).oper[2]^.typ=top_const) and
  466. ((abs(taicpu(hp1).oper[2]^.val)<256) or
  467. ((abs(taicpu(hp1).oper[2]^.val)<4096) and (p.oppostfix in [PF_None,PF_B]))
  468. )
  469. )
  470. ) and
  471. { don't apply the optimization if the base register is loaded }
  472. (p.oper[0]^.reg<>p.oper[1]^.ref^.base) and
  473. not(RegModifiedBetween(taicpu(hp1).oper[0]^.reg,p,hp1)) and
  474. { don't apply the optimization if the (new) index register is loaded }
  475. (p.oper[0]^.reg<>taicpu(hp1).oper[2]^.reg) and
  476. not(RegModifiedBetween(taicpu(hp1).oper[2]^.reg,p,hp1)) and
  477. GenerateARMCode then
  478. begin
  479. DebugMsg('Peephole Str/LdrAdd/Sub2Str/Ldr Postindex done', p);
  480. p.oper[1]^.ref^.addressmode:=AM_POSTINDEXED;
  481. if taicpu(hp1).oper[2]^.typ=top_const then
  482. begin
  483. if taicpu(hp1).opcode=A_ADD then
  484. p.oper[1]^.ref^.offset:=taicpu(hp1).oper[2]^.val
  485. else
  486. p.oper[1]^.ref^.offset:=-taicpu(hp1).oper[2]^.val;
  487. end
  488. else
  489. begin
  490. p.oper[1]^.ref^.index:=taicpu(hp1).oper[2]^.reg;
  491. if taicpu(hp1).opcode=A_ADD then
  492. p.oper[1]^.ref^.signindex:=1
  493. else
  494. p.oper[1]^.ref^.signindex:=-1;
  495. end;
  496. asml.Remove(hp1);
  497. hp1.Free;
  498. Result:=true;
  499. end;
  500. end;
  501. function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
  502. var
  503. hp1,hp2,hp3,hp4: tai;
  504. i, i2: longint;
  505. TmpUsedRegs: TAllUsedRegs;
  506. tempop: tasmop;
  507. oldreg: tregister;
  508. function IsPowerOf2(const value: DWord): boolean; inline;
  509. begin
  510. Result:=(value and (value - 1)) = 0;
  511. end;
  512. begin
  513. result := false;
  514. case p.typ of
  515. ait_instruction:
  516. begin
  517. {
  518. change
  519. <op> reg,x,y
  520. cmp reg,#0
  521. into
  522. <op>s reg,x,y
  523. }
  524. { this optimization can applied only to the currently enabled operations because
  525. the other operations do not update all flags and FPC does not track flag usage }
  526. if MatchInstruction(p, [A_ADC,A_ADD,A_BIC,A_SUB,A_MUL,A_MVN,A_MOV,A_ORR,A_EOR,A_AND,
  527. A_RSB,A_RSC,A_SBC,A_MLA], [C_None], [PF_None]) and
  528. GetNextInstruction(p, hp1) and
  529. MatchInstruction(hp1, A_CMP, [C_None], [PF_None]) and
  530. (taicpu(hp1).oper[1]^.typ = top_const) and
  531. (taicpu(p).oper[0]^.reg = taicpu(hp1).oper[0]^.reg) and
  532. (taicpu(hp1).oper[1]^.val = 0) and
  533. GetNextInstruction(hp1, hp2) and
  534. { be careful here, following instructions could use other flags
  535. however after a jump fpc never depends on the value of flags }
  536. { All above instructions set Z and N according to the following
  537. Z := result = 0;
  538. N := result[31];
  539. EQ = Z=1; NE = Z=0;
  540. MI = N=1; PL = N=0; }
  541. (MatchInstruction(hp2, A_B, [C_EQ,C_NE,C_MI,C_PL], []) or
  542. { mov is also possible, but only if there is no shifter operand, it could be an rxx,
  543. we are too lazy to check if it is rxx or something else }
  544. (MatchInstruction(hp2, A_MOV, [C_EQ,C_NE,C_MI,C_PL], []) and (taicpu(hp2).ops=2))) and
  545. assigned(FindRegDealloc(NR_DEFAULTFLAGS,tai(hp2.Next))) then
  546. begin
  547. DebugMsg('Peephole OpCmp2OpS done', p);
  548. taicpu(p).oppostfix:=PF_S;
  549. { move flag allocation if possible }
  550. GetLastInstruction(hp1, hp2);
  551. hp2:=FindRegAlloc(NR_DEFAULTFLAGS,tai(hp2.Next));
  552. if assigned(hp2) then
  553. begin
  554. asml.Remove(hp2);
  555. asml.insertbefore(hp2, p);
  556. end;
  557. asml.remove(hp1);
  558. hp1.free;
  559. Result:=true;
  560. end
  561. else
  562. case taicpu(p).opcode of
  563. A_STR:
  564. begin
  565. { change
  566. str reg1,ref
  567. ldr reg2,ref
  568. into
  569. str reg1,ref
  570. mov reg2,reg1
  571. }
  572. if (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
  573. (taicpu(p).oppostfix=PF_None) and
  574. (taicpu(p).condition=C_None) and
  575. GetNextInstructionUsingRef(p,hp1,taicpu(p).oper[1]^.ref^) and
  576. MatchInstruction(hp1, A_LDR, [taicpu(p).condition], [PF_None]) and
  577. (taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) and
  578. not(RegModifiedBetween(taicpu(p).oper[0]^.reg, p, hp1)) and
  579. ((taicpu(hp1).oper[1]^.ref^.index=NR_NO) or not (RegModifiedBetween(taicpu(hp1).oper[1]^.ref^.index, p, hp1))) and
  580. ((taicpu(hp1).oper[1]^.ref^.base=NR_NO) or not (RegModifiedBetween(taicpu(hp1).oper[1]^.ref^.base, p, hp1))) then
  581. begin
  582. if taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg then
  583. begin
  584. DebugMsg('Peephole StrLdr2StrMov 1 done', hp1);
  585. asml.remove(hp1);
  586. hp1.free;
  587. end
  588. else
  589. begin
  590. taicpu(hp1).opcode:=A_MOV;
  591. taicpu(hp1).oppostfix:=PF_None;
  592. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  593. DebugMsg('Peephole StrLdr2StrMov 2 done', hp1);
  594. end;
  595. result := true;
  596. end
  597. { change
  598. str reg1,ref
  599. str reg2,ref
  600. into
  601. strd reg1,ref
  602. }
  603. else if (GenerateARMCode or GenerateThumb2Code) and
  604. (CPUARM_HAS_EDSP in cpu_capabilities[current_settings.cputype]) and
  605. (taicpu(p).oppostfix=PF_None) and
  606. (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
  607. GetNextInstruction(p,hp1) and
  608. MatchInstruction(hp1, A_STR, [taicpu(p).condition, C_None], [PF_None]) and
  609. not(odd(getsupreg(taicpu(p).oper[0]^.reg))) and
  610. (getsupreg(taicpu(p).oper[0]^.reg)+1=getsupreg(taicpu(hp1).oper[0]^.reg)) and
  611. { str ensures that either base or index contain no register, else ldr wouldn't
  612. use an offset either
  613. }
  614. (taicpu(p).oper[1]^.ref^.base=taicpu(hp1).oper[1]^.ref^.base) and
  615. (taicpu(p).oper[1]^.ref^.index=taicpu(hp1).oper[1]^.ref^.index) and
  616. (taicpu(p).oper[1]^.ref^.offset+4=taicpu(hp1).oper[1]^.ref^.offset) and
  617. (abs(taicpu(p).oper[1]^.ref^.offset)<256) and
  618. AlignedToQWord(taicpu(p).oper[1]^.ref^) then
  619. begin
  620. DebugMsg('Peephole StrStr2Strd done', p);
  621. taicpu(p).oppostfix:=PF_D;
  622. asml.remove(hp1);
  623. hp1.free;
  624. result:=true;
  625. end;
  626. Result:=LookForPostindexedPattern(taicpu(p)) or Result;
  627. end;
  628. A_LDR:
  629. begin
  630. { change
  631. ldr reg1,ref
  632. ldr reg2,ref
  633. into ...
  634. }
  635. if (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
  636. GetNextInstruction(p,hp1) and
  637. { ldrd is not allowed here }
  638. MatchInstruction(hp1, A_LDR, [taicpu(p).condition, C_None], [taicpu(p).oppostfix,PF_None]-[PF_D]) then
  639. begin
  640. {
  641. ...
  642. ldr reg1,ref
  643. mov reg2,reg1
  644. }
  645. if (taicpu(p).oppostfix=taicpu(hp1).oppostfix) and
  646. RefsEqual(taicpu(p).oper[1]^.ref^,taicpu(hp1).oper[1]^.ref^) and
  647. (taicpu(p).oper[0]^.reg<>taicpu(hp1).oper[1]^.ref^.index) and
  648. (taicpu(p).oper[0]^.reg<>taicpu(hp1).oper[1]^.ref^.base) and
  649. (taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) then
  650. begin
  651. if taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg then
  652. begin
  653. DebugMsg('Peephole LdrLdr2Ldr done', hp1);
  654. asml.remove(hp1);
  655. hp1.free;
  656. end
  657. else
  658. begin
  659. DebugMsg('Peephole LdrLdr2LdrMov done', hp1);
  660. taicpu(hp1).opcode:=A_MOV;
  661. taicpu(hp1).oppostfix:=PF_None;
  662. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  663. end;
  664. result := true;
  665. end
  666. {
  667. ...
  668. ldrd reg1,ref
  669. }
  670. else if (GenerateARMCode or GenerateThumb2Code) and
  671. (CPUARM_HAS_EDSP in cpu_capabilities[current_settings.cputype]) and
  672. { ldrd does not allow any postfixes ... }
  673. (taicpu(p).oppostfix=PF_None) and
  674. not(odd(getsupreg(taicpu(p).oper[0]^.reg))) and
  675. (getsupreg(taicpu(p).oper[0]^.reg)+1=getsupreg(taicpu(hp1).oper[0]^.reg)) and
  676. { ldr ensures that either base or index contain no register, else ldr wouldn't
  677. use an offset either
  678. }
  679. (taicpu(p).oper[1]^.ref^.base=taicpu(hp1).oper[1]^.ref^.base) and
  680. (taicpu(p).oper[1]^.ref^.index=taicpu(hp1).oper[1]^.ref^.index) and
  681. (taicpu(p).oper[1]^.ref^.offset+4=taicpu(hp1).oper[1]^.ref^.offset) and
  682. (abs(taicpu(p).oper[1]^.ref^.offset)<256) and
  683. AlignedToQWord(taicpu(p).oper[1]^.ref^) then
  684. begin
  685. DebugMsg('Peephole LdrLdr2Ldrd done', p);
  686. taicpu(p).oppostfix:=PF_D;
  687. asml.remove(hp1);
  688. hp1.free;
  689. result:=true;
  690. end;
  691. end;
  692. {
  693. Change
  694. ldrb dst1, [REF]
  695. and dst2, dst1, #255
  696. into
  697. ldrb dst2, [ref]
  698. }
  699. if not(GenerateThumbCode) and
  700. (taicpu(p).oppostfix=PF_B) and
  701. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  702. MatchInstruction(hp1, A_AND, [taicpu(p).condition], [PF_NONE]) and
  703. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[0]^.reg) and
  704. (taicpu(hp1).oper[2]^.typ = top_const) and
  705. (taicpu(hp1).oper[2]^.val = $FF) and
  706. not(RegUsedBetween(taicpu(hp1).oper[0]^.reg, p, hp1)) and
  707. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  708. begin
  709. DebugMsg('Peephole LdrbAnd2Ldrb done', p);
  710. taicpu(p).oper[0]^.reg := taicpu(hp1).oper[0]^.reg;
  711. asml.remove(hp1);
  712. hp1.free;
  713. result:=true;
  714. end;
  715. Result:=LookForPostindexedPattern(taicpu(p)) or Result;
  716. { Remove superfluous mov after ldr
  717. changes
  718. ldr reg1, ref
  719. mov reg2, reg1
  720. to
  721. ldr reg2, ref
  722. conditions are:
  723. * no ldrd usage
  724. * reg1 must be released after mov
  725. * mov can not contain shifterops
  726. * ldr+mov have the same conditions
  727. * mov does not set flags
  728. }
  729. if (taicpu(p).oppostfix<>PF_D) and
  730. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  731. RemoveSuperfluousMove(p, hp1, 'LdrMov2Ldr') then
  732. Result:=true;
  733. end;
  734. A_MOV:
  735. begin
  736. { fold
  737. mov reg1,reg0, shift imm1
  738. mov reg1,reg1, shift imm2
  739. }
  740. if (taicpu(p).ops=3) and
  741. (taicpu(p).oper[2]^.typ = top_shifterop) and
  742. (taicpu(p).oper[2]^.shifterop^.rs = NR_NO) and
  743. getnextinstruction(p,hp1) and
  744. MatchInstruction(hp1, A_MOV, [taicpu(p).condition], [PF_None]) and
  745. (taicpu(hp1).ops=3) and
  746. MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^.reg) and
  747. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  748. (taicpu(hp1).oper[2]^.typ = top_shifterop) and
  749. (taicpu(hp1).oper[2]^.shifterop^.rs = NR_NO) then
  750. begin
  751. { fold
  752. mov reg1,reg0, lsl 16
  753. mov reg1,reg1, lsr 16
  754. strh reg1, ...
  755. dealloc reg1
  756. to
  757. strh reg1, ...
  758. dealloc reg1
  759. }
  760. if (taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  761. (taicpu(p).oper[2]^.shifterop^.shiftimm=16) and
  762. (taicpu(hp1).oper[2]^.shifterop^.shiftmode in [SM_LSR,SM_ASR]) and
  763. (taicpu(hp1).oper[2]^.shifterop^.shiftimm=16) and
  764. getnextinstruction(hp1,hp2) and
  765. MatchInstruction(hp2, A_STR, [taicpu(p).condition], [PF_H]) and
  766. MatchOperand(taicpu(hp2).oper[0]^, taicpu(p).oper[0]^.reg) then
  767. begin
  768. CopyUsedRegs(TmpUsedRegs);
  769. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  770. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  771. if not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg,hp2,TmpUsedRegs)) then
  772. begin
  773. DebugMsg('Peephole optimizer removed superfluous 16 Bit zero extension', hp1);
  774. taicpu(hp2).loadreg(0,taicpu(p).oper[1]^.reg);
  775. asml.remove(p);
  776. asml.remove(hp1);
  777. p.free;
  778. hp1.free;
  779. p:=hp2;
  780. Result:=true;
  781. end;
  782. ReleaseUsedRegs(TmpUsedRegs);
  783. end
  784. { fold
  785. mov reg1,reg0, shift imm1
  786. mov reg1,reg1, shift imm2
  787. to
  788. mov reg1,reg0, shift imm1+imm2
  789. }
  790. else if (taicpu(p).oper[2]^.shifterop^.shiftmode=taicpu(hp1).oper[2]^.shifterop^.shiftmode) or
  791. { asr makes no use after a lsr, the asr can be foled into the lsr }
  792. ((taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSR) and (taicpu(hp1).oper[2]^.shifterop^.shiftmode=SM_ASR) ) then
  793. begin
  794. inc(taicpu(p).oper[2]^.shifterop^.shiftimm,taicpu(hp1).oper[2]^.shifterop^.shiftimm);
  795. { avoid overflows }
  796. if taicpu(p).oper[2]^.shifterop^.shiftimm>31 then
  797. case taicpu(p).oper[2]^.shifterop^.shiftmode of
  798. SM_ROR:
  799. taicpu(p).oper[2]^.shifterop^.shiftimm:=taicpu(p).oper[2]^.shifterop^.shiftimm and 31;
  800. SM_ASR:
  801. taicpu(p).oper[2]^.shifterop^.shiftimm:=31;
  802. SM_LSR,
  803. SM_LSL:
  804. begin
  805. hp2:=taicpu.op_reg_const(A_MOV,taicpu(p).oper[0]^.reg,0);
  806. InsertLLItem(p.previous, p.next, hp2);
  807. p.free;
  808. p:=hp2;
  809. end;
  810. else
  811. internalerror(2008072803);
  812. end;
  813. DebugMsg('Peephole ShiftShift2Shift 1 done', p);
  814. asml.remove(hp1);
  815. hp1.free;
  816. result := true;
  817. end
  818. { fold
  819. mov reg1,reg0, shift imm1
  820. mov reg1,reg1, shift imm2
  821. mov reg1,reg1, shift imm3 ...
  822. mov reg2,reg1, shift imm3 ...
  823. }
  824. else if GetNextInstructionUsingReg(hp1,hp2, taicpu(hp1).oper[0]^.reg) and
  825. MatchInstruction(hp2, A_MOV, [taicpu(p).condition], [PF_None]) and
  826. (taicpu(hp2).ops=3) and
  827. MatchOperand(taicpu(hp2).oper[1]^, taicpu(hp1).oper[0]^.reg) and
  828. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp2)) and
  829. (taicpu(hp2).oper[2]^.typ = top_shifterop) and
  830. (taicpu(hp2).oper[2]^.shifterop^.rs = NR_NO) then
  831. begin
  832. { mov reg1,reg0, lsl imm1
  833. mov reg1,reg1, lsr/asr imm2
  834. mov reg2,reg1, lsl imm3 ...
  835. to
  836. mov reg1,reg0, lsl imm1
  837. mov reg2,reg1, lsr/asr imm2-imm3
  838. if
  839. imm1>=imm2
  840. }
  841. if (taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSL) and (taicpu(hp2).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  842. (taicpu(hp1).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and
  843. (taicpu(p).oper[2]^.shifterop^.shiftimm>=taicpu(hp1).oper[2]^.shifterop^.shiftimm) then
  844. begin
  845. if (taicpu(hp2).oper[2]^.shifterop^.shiftimm>=taicpu(hp1).oper[2]^.shifterop^.shiftimm) then
  846. begin
  847. if not(RegUsedBetween(taicpu(hp2).oper[0]^.reg,p,hp1)) and
  848. not(RegUsedBetween(taicpu(hp2).oper[0]^.reg,hp1,hp2)) then
  849. begin
  850. DebugMsg('Peephole ShiftShiftShift2ShiftShift 1a done', p);
  851. inc(taicpu(p).oper[2]^.shifterop^.shiftimm,taicpu(hp2).oper[2]^.shifterop^.shiftimm-taicpu(hp1).oper[2]^.shifterop^.shiftimm);
  852. taicpu(p).oper[0]^.reg:=taicpu(hp2).oper[0]^.reg;
  853. asml.remove(hp1);
  854. asml.remove(hp2);
  855. hp1.free;
  856. hp2.free;
  857. if taicpu(p).oper[2]^.shifterop^.shiftimm>=32 then
  858. begin
  859. taicpu(p).freeop(1);
  860. taicpu(p).freeop(2);
  861. taicpu(p).loadconst(1,0);
  862. end;
  863. result := true;
  864. end;
  865. end
  866. else if not(RegUsedBetween(taicpu(hp2).oper[0]^.reg,hp1,hp2)) then
  867. begin
  868. DebugMsg('Peephole ShiftShiftShift2ShiftShift 1b done', p);
  869. dec(taicpu(hp1).oper[2]^.shifterop^.shiftimm,taicpu(hp2).oper[2]^.shifterop^.shiftimm);
  870. taicpu(hp1).oper[0]^.reg:=taicpu(hp2).oper[0]^.reg;
  871. asml.remove(hp2);
  872. hp2.free;
  873. result := true;
  874. end;
  875. end
  876. { mov reg1,reg0, lsr/asr imm1
  877. mov reg1,reg1, lsl imm2
  878. mov reg1,reg1, lsr/asr imm3 ...
  879. if imm3>=imm1 and imm2>=imm1
  880. to
  881. mov reg1,reg0, lsl imm2-imm1
  882. mov reg1,reg1, lsr/asr imm3 ...
  883. }
  884. else if (taicpu(p).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and (taicpu(hp2).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and
  885. (taicpu(hp1).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  886. (taicpu(hp2).oper[2]^.shifterop^.shiftimm>=taicpu(p).oper[2]^.shifterop^.shiftimm) and
  887. (taicpu(hp1).oper[2]^.shifterop^.shiftimm>=taicpu(p).oper[2]^.shifterop^.shiftimm) then
  888. begin
  889. dec(taicpu(hp1).oper[2]^.shifterop^.shiftimm,taicpu(p).oper[2]^.shifterop^.shiftimm);
  890. taicpu(hp1).oper[1]^.reg:=taicpu(p).oper[1]^.reg;
  891. DebugMsg('Peephole ShiftShiftShift2ShiftShift 2 done', p);
  892. asml.remove(p);
  893. p.free;
  894. p:=hp2;
  895. if taicpu(hp1).oper[2]^.shifterop^.shiftimm=0 then
  896. begin
  897. taicpu(hp2).oper[1]^.reg:=taicpu(hp1).oper[1]^.reg;
  898. asml.remove(hp1);
  899. hp1.free;
  900. p:=hp2;
  901. end;
  902. result := true;
  903. end;
  904. end;
  905. end;
  906. { Change the common
  907. mov r0, r0, lsr #xxx
  908. and r0, r0, #yyy/bic r0, r0, #xxx
  909. and remove the superfluous and/bic if possible
  910. This could be extended to handle more cases.
  911. }
  912. if (taicpu(p).ops=3) and
  913. (taicpu(p).oper[2]^.typ = top_shifterop) and
  914. (taicpu(p).oper[2]^.shifterop^.rs = NR_NO) and
  915. (taicpu(p).oper[2]^.shifterop^.shiftmode = SM_LSR) and
  916. GetNextInstructionUsingReg(p,hp1, taicpu(p).oper[0]^.reg) and
  917. (hp1.typ=ait_instruction) and
  918. (taicpu(hp1).ops>=1) and
  919. (taicpu(hp1).oper[0]^.typ=top_reg) and
  920. (not RegModifiedBetween(taicpu(hp1).oper[0]^.reg, p, hp1)) and
  921. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  922. begin
  923. if (taicpu(p).oper[2]^.shifterop^.shiftimm >= 24 ) and
  924. MatchInstruction(hp1, A_AND, [taicpu(p).condition], [taicpu(p).oppostfix]) and
  925. (taicpu(hp1).ops=3) and
  926. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[1]^) and
  927. (taicpu(hp1).oper[2]^.typ = top_const) and
  928. { Check if the AND actually would only mask out bits being already zero because of the shift
  929. }
  930. ((($ffffffff shr taicpu(p).oper[2]^.shifterop^.shiftimm) and taicpu(hp1).oper[2]^.val) =
  931. ($ffffffff shr taicpu(p).oper[2]^.shifterop^.shiftimm)) then
  932. begin
  933. DebugMsg('Peephole LsrAnd2Lsr done', hp1);
  934. taicpu(p).oper[0]^.reg:=taicpu(hp1).oper[0]^.reg;
  935. asml.remove(hp1);
  936. hp1.free;
  937. result:=true;
  938. end
  939. else if MatchInstruction(hp1, A_BIC, [taicpu(p).condition], [taicpu(p).oppostfix]) and
  940. (taicpu(hp1).ops=3) and
  941. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[1]^) and
  942. (taicpu(hp1).oper[2]^.typ = top_const) and
  943. { Check if the BIC actually would only mask out bits beeing already zero because of the shift }
  944. (taicpu(hp1).oper[2]^.val<>0) and
  945. (BsfDWord(taicpu(hp1).oper[2]^.val)>=32-taicpu(p).oper[2]^.shifterop^.shiftimm) then
  946. begin
  947. DebugMsg('Peephole LsrBic2Lsr done', hp1);
  948. taicpu(p).oper[0]^.reg:=taicpu(hp1).oper[0]^.reg;
  949. asml.remove(hp1);
  950. hp1.free;
  951. result:=true;
  952. end;
  953. end;
  954. { Change
  955. mov rx, ry, lsr/ror #xxx
  956. uxtb/uxth rz,rx/and rz,rx,0xFF
  957. dealloc rx
  958. to
  959. uxtb/uxth rz,ry,ror #xxx
  960. }
  961. if (taicpu(p).ops=3) and
  962. (taicpu(p).oper[2]^.typ = top_shifterop) and
  963. (taicpu(p).oper[2]^.shifterop^.rs = NR_NO) and
  964. (taicpu(p).oper[2]^.shifterop^.shiftmode in [SM_LSR,SM_ROR]) and
  965. (GenerateThumb2Code) and
  966. GetNextInstructionUsingReg(p,hp1, taicpu(p).oper[0]^.reg) and
  967. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  968. begin
  969. if MatchInstruction(hp1, A_UXTB, [C_None], [PF_None]) and
  970. (taicpu(hp1).ops = 2) and
  971. (taicpu(p).oper[2]^.shifterop^.shiftimm in [8,16,24]) and
  972. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) then
  973. begin
  974. taicpu(hp1).oper[1]^.reg := taicpu(p).oper[1]^.reg;
  975. taicpu(hp1).loadshifterop(2,taicpu(p).oper[2]^.shifterop^);
  976. taicpu(hp1).oper[2]^.shifterop^.shiftmode:=SM_ROR;
  977. taicpu(hp1).ops := 3;
  978. GetNextInstruction(p,hp1);
  979. asml.Remove(p);
  980. p.Free;
  981. p:=hp1;
  982. result:=true;
  983. exit;
  984. end
  985. else if MatchInstruction(hp1, A_UXTH, [C_None], [PF_None]) and
  986. (taicpu(hp1).ops=2) and
  987. (taicpu(p).oper[2]^.shifterop^.shiftimm in [16]) and
  988. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) then
  989. begin
  990. taicpu(hp1).oper[1]^.reg := taicpu(p).oper[1]^.reg;
  991. taicpu(hp1).loadshifterop(2,taicpu(p).oper[2]^.shifterop^);
  992. taicpu(hp1).oper[2]^.shifterop^.shiftmode:=SM_ROR;
  993. taicpu(hp1).ops := 3;
  994. GetNextInstruction(p,hp1);
  995. asml.Remove(p);
  996. p.Free;
  997. p:=hp1;
  998. result:=true;
  999. exit;
  1000. end
  1001. else if MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
  1002. (taicpu(hp1).ops = 3) and
  1003. (taicpu(hp1).oper[2]^.typ = top_const) and
  1004. (taicpu(hp1).oper[2]^.val = $FF) and
  1005. (taicpu(p).oper[2]^.shifterop^.shiftimm in [8,16,24]) and
  1006. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) then
  1007. begin
  1008. taicpu(hp1).ops := 3;
  1009. taicpu(hp1).opcode := A_UXTB;
  1010. taicpu(hp1).oper[1]^.reg := taicpu(p).oper[1]^.reg;
  1011. taicpu(hp1).loadshifterop(2,taicpu(p).oper[2]^.shifterop^);
  1012. taicpu(hp1).oper[2]^.shifterop^.shiftmode:=SM_ROR;
  1013. GetNextInstruction(p,hp1);
  1014. asml.Remove(p);
  1015. p.Free;
  1016. p:=hp1;
  1017. result:=true;
  1018. exit;
  1019. end;
  1020. end;
  1021. {
  1022. optimize
  1023. mov rX, yyyy
  1024. ....
  1025. }
  1026. if (taicpu(p).ops = 2) and
  1027. GetNextInstruction(p,hp1) and
  1028. (tai(hp1).typ = ait_instruction) then
  1029. begin
  1030. {
  1031. This changes the very common
  1032. mov r0, #0
  1033. str r0, [...]
  1034. mov r0, #0
  1035. str r0, [...]
  1036. and removes all superfluous mov instructions
  1037. }
  1038. if (taicpu(p).oper[1]^.typ = top_const) and
  1039. (taicpu(hp1).opcode=A_STR) then
  1040. while MatchInstruction(hp1, A_STR, [taicpu(p).condition], []) and
  1041. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) and
  1042. GetNextInstruction(hp1, hp2) and
  1043. MatchInstruction(hp2, A_MOV, [taicpu(p).condition], [PF_None]) and
  1044. (taicpu(hp2).ops = 2) and
  1045. MatchOperand(taicpu(hp2).oper[0]^, taicpu(p).oper[0]^) and
  1046. MatchOperand(taicpu(hp2).oper[1]^, taicpu(p).oper[1]^) do
  1047. begin
  1048. DebugMsg('Peephole MovStrMov done', hp2);
  1049. GetNextInstruction(hp2,hp1);
  1050. asml.remove(hp2);
  1051. hp2.free;
  1052. result:=true;
  1053. if not assigned(hp1) then break;
  1054. end
  1055. {
  1056. This removes the first mov from
  1057. mov rX,...
  1058. mov rX,...
  1059. }
  1060. else if taicpu(hp1).opcode=A_MOV then
  1061. while MatchInstruction(hp1, A_MOV, [taicpu(p).condition], [taicpu(p).oppostfix]) and
  1062. (taicpu(hp1).ops = 2) and
  1063. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) and
  1064. { don't remove the first mov if the second is a mov rX,rX }
  1065. not(MatchOperand(taicpu(hp1).oper[0]^, taicpu(hp1).oper[1]^)) do
  1066. begin
  1067. DebugMsg('Peephole MovMov done', p);
  1068. asml.remove(p);
  1069. p.free;
  1070. p:=hp1;
  1071. GetNextInstruction(hp1,hp1);
  1072. result:=true;
  1073. if not assigned(hp1) then
  1074. break;
  1075. end;
  1076. end;
  1077. {
  1078. change
  1079. mov r1, r0
  1080. add r1, r1, #1
  1081. to
  1082. add r1, r0, #1
  1083. Todo: Make it work for mov+cmp too
  1084. CAUTION! If this one is successful p might not be a mov instruction anymore!
  1085. }
  1086. if (taicpu(p).ops = 2) and
  1087. (taicpu(p).oper[1]^.typ = top_reg) and
  1088. (taicpu(p).oppostfix = PF_NONE) and
  1089. GetNextInstruction(p, hp1) and
  1090. MatchInstruction(hp1, [A_ADD, A_ADC, A_RSB, A_RSC, A_SUB, A_SBC,
  1091. A_AND, A_BIC, A_EOR, A_ORR, A_MOV, A_MVN],
  1092. [taicpu(p).condition], []) and
  1093. {MOV and MVN might only have 2 ops}
  1094. (taicpu(hp1).ops >= 2) and
  1095. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^.reg) and
  1096. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1097. (
  1098. (taicpu(hp1).ops = 2) or
  1099. (taicpu(hp1).oper[2]^.typ in [top_reg, top_const, top_shifterop])
  1100. ) then
  1101. begin
  1102. { When we get here we still don't know if the registers match}
  1103. for I:=1 to 2 do
  1104. {
  1105. If the first loop was successful p will be replaced with hp1.
  1106. The checks will still be ok, because all required information
  1107. will also be in hp1 then.
  1108. }
  1109. if (taicpu(hp1).ops > I) and
  1110. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[I]^.reg) and
  1111. { prevent certain combinations on thumb(2), this is only a safe approximation }
  1112. (not(GenerateThumbCode or GenerateThumb2Code) or
  1113. ((getsupreg(taicpu(p).oper[1]^.reg)<>RS_R13) and
  1114. (getsupreg(taicpu(p).oper[1]^.reg)<>RS_R15))
  1115. ) then
  1116. begin
  1117. DebugMsg('Peephole RedundantMovProcess done', hp1);
  1118. taicpu(hp1).oper[I]^.reg := taicpu(p).oper[1]^.reg;
  1119. if p<>hp1 then
  1120. begin
  1121. asml.remove(p);
  1122. p.free;
  1123. p:=hp1;
  1124. Result:=true;
  1125. end;
  1126. end;
  1127. end;
  1128. { Fold the very common sequence
  1129. mov regA, regB
  1130. ldr* regA, [regA]
  1131. to
  1132. ldr* regA, [regB]
  1133. CAUTION! If this one is successful p might not be a mov instruction anymore!
  1134. }
  1135. if (taicpu(p).opcode = A_MOV) and
  1136. (taicpu(p).ops = 2) and
  1137. (taicpu(p).oper[1]^.typ = top_reg) and
  1138. (taicpu(p).oppostfix = PF_NONE) and
  1139. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  1140. MatchInstruction(hp1, [A_LDR, A_STR], [taicpu(p).condition], []) and
  1141. { We can change the base register only when the instruction uses AM_OFFSET }
  1142. ((taicpu(hp1).oper[1]^.ref^.index = taicpu(p).oper[0]^.reg) or
  1143. ((taicpu(hp1).oper[1]^.ref^.addressmode = AM_OFFSET) and
  1144. (taicpu(hp1).oper[1]^.ref^.base = taicpu(p).oper[0]^.reg))
  1145. ) and
  1146. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) and
  1147. // Make sure that Thumb code doesn't propagate a high register into a reference
  1148. ((GenerateThumbCode and
  1149. (getsupreg(taicpu(p).oper[1]^.reg) < RS_R8)) or
  1150. (not GenerateThumbCode)) and
  1151. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  1152. begin
  1153. DebugMsg('Peephole MovLdr2Ldr done', hp1);
  1154. if (taicpu(hp1).oper[1]^.ref^.addressmode = AM_OFFSET) and
  1155. (taicpu(hp1).oper[1]^.ref^.base = taicpu(p).oper[0]^.reg) then
  1156. taicpu(hp1).oper[1]^.ref^.base := taicpu(p).oper[1]^.reg;
  1157. if taicpu(hp1).oper[1]^.ref^.index = taicpu(p).oper[0]^.reg then
  1158. taicpu(hp1).oper[1]^.ref^.index := taicpu(p).oper[1]^.reg;
  1159. GetNextInstruction(p, hp1);
  1160. asml.remove(p);
  1161. p.free;
  1162. p:=hp1;
  1163. result:=true;
  1164. end;
  1165. { This folds shifterops into following instructions
  1166. mov r0, r1, lsl #8
  1167. add r2, r3, r0
  1168. to
  1169. add r2, r3, r1, lsl #8
  1170. CAUTION! If this one is successful p might not be a mov instruction anymore!
  1171. }
  1172. if (taicpu(p).opcode = A_MOV) and
  1173. (taicpu(p).ops = 3) and
  1174. (taicpu(p).oper[1]^.typ = top_reg) and
  1175. (taicpu(p).oper[2]^.typ = top_shifterop) and
  1176. (taicpu(p).oppostfix = PF_NONE) and
  1177. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  1178. MatchInstruction(hp1, [A_ADD, A_ADC, A_RSB, A_RSC, A_SUB, A_SBC,
  1179. A_AND, A_BIC, A_EOR, A_ORR, A_TEQ, A_TST,
  1180. A_CMP, A_CMN],
  1181. [taicpu(p).condition], [PF_None]) and
  1182. (not ((GenerateThumb2Code) and
  1183. (taicpu(hp1).opcode in [A_SBC]) and
  1184. (((taicpu(hp1).ops=3) and
  1185. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[1]^.reg)) or
  1186. ((taicpu(hp1).ops=2) and
  1187. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^.reg))))) and
  1188. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) and
  1189. (taicpu(hp1).ops >= 2) and
  1190. {Currently we can't fold into another shifterop}
  1191. (taicpu(hp1).oper[taicpu(hp1).ops-1]^.typ = top_reg) and
  1192. {Folding rrx is problematic because of the C-Flag, as we currently can't check
  1193. NR_DEFAULTFLAGS for modification}
  1194. (
  1195. {Everything is fine if we don't use RRX}
  1196. (taicpu(p).oper[2]^.shifterop^.shiftmode <> SM_RRX) or
  1197. (
  1198. {If it is RRX, then check if we're just accessing the next instruction}
  1199. GetNextInstruction(p, hp2) and
  1200. (hp1 = hp2)
  1201. )
  1202. ) and
  1203. { reg1 might not be modified inbetween }
  1204. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) and
  1205. { The shifterop can contain a register, might not be modified}
  1206. (
  1207. (taicpu(p).oper[2]^.shifterop^.rs = NR_NO) or
  1208. not(RegModifiedBetween(taicpu(p).oper[2]^.shifterop^.rs, p, hp1))
  1209. ) and
  1210. (
  1211. {Only ONE of the two src operands is allowed to match}
  1212. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[taicpu(hp1).ops-2]^) xor
  1213. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[taicpu(hp1).ops-1]^)
  1214. ) then
  1215. begin
  1216. if taicpu(hp1).opcode in [A_TST, A_TEQ, A_CMN] then
  1217. I2:=0
  1218. else
  1219. I2:=1;
  1220. for I:=I2 to taicpu(hp1).ops-1 do
  1221. if MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[I]^.reg) then
  1222. begin
  1223. { If the parameter matched on the second op from the RIGHT
  1224. we have to switch the parameters, this will not happen for CMP
  1225. were we're only evaluating the most right parameter
  1226. }
  1227. if I <> taicpu(hp1).ops-1 then
  1228. begin
  1229. {The SUB operators need to be changed when we swap parameters}
  1230. case taicpu(hp1).opcode of
  1231. A_SUB: tempop:=A_RSB;
  1232. A_SBC: tempop:=A_RSC;
  1233. A_RSB: tempop:=A_SUB;
  1234. A_RSC: tempop:=A_SBC;
  1235. else tempop:=taicpu(hp1).opcode;
  1236. end;
  1237. if taicpu(hp1).ops = 3 then
  1238. hp2:=taicpu.op_reg_reg_reg_shifterop(tempop,
  1239. taicpu(hp1).oper[0]^.reg, taicpu(hp1).oper[2]^.reg,
  1240. taicpu(p).oper[1]^.reg, taicpu(p).oper[2]^.shifterop^)
  1241. else
  1242. hp2:=taicpu.op_reg_reg_shifterop(tempop,
  1243. taicpu(hp1).oper[0]^.reg, taicpu(p).oper[1]^.reg,
  1244. taicpu(p).oper[2]^.shifterop^);
  1245. end
  1246. else
  1247. if taicpu(hp1).ops = 3 then
  1248. hp2:=taicpu.op_reg_reg_reg_shifterop(taicpu(hp1).opcode,
  1249. taicpu(hp1).oper[0]^.reg, taicpu(hp1).oper[1]^.reg,
  1250. taicpu(p).oper[1]^.reg, taicpu(p).oper[2]^.shifterop^)
  1251. else
  1252. hp2:=taicpu.op_reg_reg_shifterop(taicpu(hp1).opcode,
  1253. taicpu(hp1).oper[0]^.reg, taicpu(p).oper[1]^.reg,
  1254. taicpu(p).oper[2]^.shifterop^);
  1255. asml.insertbefore(hp2, hp1);
  1256. GetNextInstruction(p, hp2);
  1257. asml.remove(p);
  1258. asml.remove(hp1);
  1259. p.free;
  1260. hp1.free;
  1261. p:=hp2;
  1262. DebugMsg('Peephole FoldShiftProcess done', p);
  1263. Result:=true;
  1264. break;
  1265. end;
  1266. end;
  1267. {
  1268. Fold
  1269. mov r1, r1, lsl #2
  1270. ldr/ldrb r0, [r0, r1]
  1271. to
  1272. ldr/ldrb r0, [r0, r1, lsl #2]
  1273. XXX: This still needs some work, as we quite often encounter something like
  1274. mov r1, r2, lsl #2
  1275. add r2, r3, #imm
  1276. ldr r0, [r2, r1]
  1277. which can't be folded because r2 is overwritten between the shift and the ldr.
  1278. We could try to shuffle the registers around and fold it into.
  1279. add r1, r3, #imm
  1280. ldr r0, [r1, r2, lsl #2]
  1281. }
  1282. if (not(GenerateThumbCode)) and
  1283. (taicpu(p).opcode = A_MOV) and
  1284. (taicpu(p).ops = 3) and
  1285. (taicpu(p).oper[1]^.typ = top_reg) and
  1286. (taicpu(p).oper[2]^.typ = top_shifterop) and
  1287. { RRX is tough to handle, because it requires tracking the C-Flag,
  1288. it is also extremly unlikely to be emitted this way}
  1289. (taicpu(p).oper[2]^.shifterop^.shiftmode <> SM_RRX) and
  1290. (taicpu(p).oper[2]^.shifterop^.shiftimm <> 0) and
  1291. { thumb2 allows only lsl #0..#3 }
  1292. (not(GenerateThumb2Code) or
  1293. ((taicpu(p).oper[2]^.shifterop^.shiftimm in [0..3]) and
  1294. (taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSL)
  1295. )
  1296. ) and
  1297. (taicpu(p).oppostfix = PF_NONE) and
  1298. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  1299. {Only LDR, LDRB, STR, STRB can handle scaled register indexing}
  1300. (MatchInstruction(hp1, [A_LDR, A_STR], [taicpu(p).condition], [PF_None, PF_B]) or
  1301. (GenerateThumb2Code and
  1302. MatchInstruction(hp1, [A_LDR, A_STR], [taicpu(p).condition], [PF_None, PF_B, PF_SB, PF_H, PF_SH]))
  1303. ) and
  1304. (
  1305. {If this is address by offset, one of the two registers can be used}
  1306. ((taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) and
  1307. (
  1308. (taicpu(hp1).oper[1]^.ref^.index = taicpu(p).oper[0]^.reg) xor
  1309. (taicpu(hp1).oper[1]^.ref^.base = taicpu(p).oper[0]^.reg)
  1310. )
  1311. ) or
  1312. {For post and preindexed only the index register can be used}
  1313. ((taicpu(hp1).oper[1]^.ref^.addressmode in [AM_POSTINDEXED, AM_PREINDEXED]) and
  1314. (
  1315. (taicpu(hp1).oper[1]^.ref^.index = taicpu(p).oper[0]^.reg) and
  1316. (taicpu(hp1).oper[1]^.ref^.base <> taicpu(p).oper[0]^.reg)
  1317. ) and
  1318. (not GenerateThumb2Code)
  1319. )
  1320. ) and
  1321. { Only fold if there isn't another shifterop already, and offset is zero. }
  1322. (taicpu(hp1).oper[1]^.ref^.offset = 0) and
  1323. (taicpu(hp1).oper[1]^.ref^.shiftmode = SM_None) and
  1324. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) and
  1325. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  1326. begin
  1327. { If the register we want to do the shift for resides in base, we need to swap that}
  1328. if (taicpu(hp1).oper[1]^.ref^.base = taicpu(p).oper[0]^.reg) then
  1329. taicpu(hp1).oper[1]^.ref^.base := taicpu(hp1).oper[1]^.ref^.index;
  1330. taicpu(hp1).oper[1]^.ref^.index := taicpu(p).oper[1]^.reg;
  1331. taicpu(hp1).oper[1]^.ref^.shiftmode := taicpu(p).oper[2]^.shifterop^.shiftmode;
  1332. taicpu(hp1).oper[1]^.ref^.shiftimm := taicpu(p).oper[2]^.shifterop^.shiftimm;
  1333. DebugMsg('Peephole FoldShiftLdrStr done', hp1);
  1334. GetNextInstruction(p, hp1);
  1335. asml.remove(p);
  1336. p.free;
  1337. p:=hp1;
  1338. Result:=true;
  1339. end;
  1340. {
  1341. Often we see shifts and then a superfluous mov to another register
  1342. In the future this might be handled in RedundantMovProcess when it uses RegisterTracking
  1343. }
  1344. if (taicpu(p).opcode = A_MOV) and
  1345. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  1346. RemoveSuperfluousMove(p, hp1, 'MovMov2Mov') then
  1347. Result:=true;
  1348. end;
  1349. A_ADD,
  1350. A_ADC,
  1351. A_RSB,
  1352. A_RSC,
  1353. A_SUB,
  1354. A_SBC,
  1355. A_AND,
  1356. A_BIC,
  1357. A_EOR,
  1358. A_ORR,
  1359. A_MLA,
  1360. A_MLS,
  1361. A_MUL:
  1362. begin
  1363. {
  1364. optimize
  1365. and reg2,reg1,const1
  1366. ...
  1367. }
  1368. if (taicpu(p).opcode = A_AND) and
  1369. (taicpu(p).ops>2) and
  1370. (taicpu(p).oper[1]^.typ = top_reg) and
  1371. (taicpu(p).oper[2]^.typ = top_const) then
  1372. begin
  1373. {
  1374. change
  1375. and reg2,reg1,const1
  1376. ...
  1377. and reg3,reg2,const2
  1378. to
  1379. and reg3,reg1,(const1 and const2)
  1380. }
  1381. if GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1382. MatchInstruction(hp1, A_AND, [taicpu(p).condition], [PF_None]) and
  1383. RegEndOfLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  1384. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1385. (taicpu(hp1).oper[2]^.typ = top_const) then
  1386. begin
  1387. if not(RegUsedBetween(taicpu(hp1).oper[0]^.reg,p,hp1)) then
  1388. begin
  1389. DebugMsg('Peephole AndAnd2And done', p);
  1390. taicpu(p).loadConst(2,taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val);
  1391. taicpu(p).oppostfix:=taicpu(hp1).oppostfix;
  1392. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  1393. asml.remove(hp1);
  1394. hp1.free;
  1395. Result:=true;
  1396. end
  1397. else if not(RegUsedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1398. begin
  1399. DebugMsg('Peephole AndAnd2And done', hp1);
  1400. taicpu(hp1).loadConst(2,taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val);
  1401. taicpu(hp1).oppostfix:=taicpu(p).oppostfix;
  1402. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  1403. GetNextInstruction(p, hp1);
  1404. asml.remove(p);
  1405. p.free;
  1406. p:=hp1;
  1407. Result:=true;
  1408. end;
  1409. end
  1410. {
  1411. change
  1412. and reg2,reg1,$xxxxxxFF
  1413. strb reg2,[...]
  1414. dealloc reg2
  1415. to
  1416. strb reg1,[...]
  1417. }
  1418. else if ((taicpu(p).oper[2]^.val and $FF) = $FF) and
  1419. MatchInstruction(p, A_AND, [C_None], [PF_None]) and
  1420. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1421. MatchInstruction(hp1, A_STR, [C_None], [PF_B]) and
  1422. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
  1423. { the reference in strb might not use reg2 }
  1424. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  1425. { reg1 might not be modified inbetween }
  1426. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1427. begin
  1428. DebugMsg('Peephole AndStrb2Strb done', p);
  1429. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  1430. GetNextInstruction(p, hp1);
  1431. asml.remove(p);
  1432. p.free;
  1433. p:=hp1;
  1434. result:=true;
  1435. end
  1436. {
  1437. change
  1438. and reg2,reg1,255
  1439. uxtb/uxth reg3,reg2
  1440. dealloc reg2
  1441. to
  1442. and reg3,reg1,x
  1443. }
  1444. else if (taicpu(p).oper[2]^.val = $FF) and
  1445. MatchInstruction(p, A_AND, [C_None], [PF_None]) and
  1446. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1447. MatchInstruction(hp1, [A_UXTB,A_UXTH], [C_None], [PF_None]) and
  1448. (taicpu(hp1).ops = 2) and
  1449. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  1450. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1451. { reg1 might not be modified inbetween }
  1452. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1453. begin
  1454. DebugMsg('Peephole AndUxt2And done', p);
  1455. taicpu(hp1).opcode:=A_AND;
  1456. taicpu(hp1).ops:=3;
  1457. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  1458. taicpu(hp1).loadconst(2,255);
  1459. GetNextInstruction(p,hp1);
  1460. asml.remove(p);
  1461. p.Free;
  1462. p:=hp1;
  1463. result:=true;
  1464. end
  1465. {
  1466. from
  1467. and reg1,reg0,2^n-1
  1468. mov reg2,reg1, lsl imm1
  1469. (mov reg3,reg2, lsr/asr imm1)
  1470. remove either the and or the lsl/xsr sequence if possible
  1471. }
  1472. else if cutils.ispowerof2(taicpu(p).oper[2]^.val+1,i) and
  1473. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1474. MatchInstruction(hp1, A_MOV, [taicpu(p).condition], [PF_None]) and
  1475. (taicpu(hp1).ops=3) and
  1476. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1477. (taicpu(hp1).oper[2]^.typ = top_shifterop) and
  1478. (taicpu(hp1).oper[2]^.shifterop^.rs = NR_NO) and
  1479. (taicpu(hp1).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  1480. RegEndOfLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) then
  1481. begin
  1482. {
  1483. and reg1,reg0,2^n-1
  1484. mov reg2,reg1, lsl imm1
  1485. mov reg3,reg2, lsr/asr imm1
  1486. =>
  1487. and reg1,reg0,2^n-1
  1488. if lsr and 2^n-1>=imm1 or asr and 2^n-1>imm1
  1489. }
  1490. if GetNextInstructionUsingReg(hp1,hp2,taicpu(p).oper[0]^.reg) and
  1491. MatchInstruction(hp2, A_MOV, [taicpu(p).condition], [PF_None]) and
  1492. (taicpu(hp2).ops=3) and
  1493. MatchOperand(taicpu(hp2).oper[1]^, taicpu(hp1).oper[0]^.reg) and
  1494. (taicpu(hp2).oper[2]^.typ = top_shifterop) and
  1495. (taicpu(hp2).oper[2]^.shifterop^.rs = NR_NO) and
  1496. (taicpu(hp2).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and
  1497. (taicpu(hp1).oper[2]^.shifterop^.shiftimm=taicpu(hp2).oper[2]^.shifterop^.shiftimm) and
  1498. RegEndOfLife(taicpu(hp1).oper[0]^.reg,taicpu(hp2)) and
  1499. ((i<32-taicpu(hp1).oper[2]^.shifterop^.shiftimm) or
  1500. ((i=32-taicpu(hp1).oper[2]^.shifterop^.shiftimm) and
  1501. (taicpu(hp2).oper[2]^.shifterop^.shiftmode=SM_LSR))) then
  1502. begin
  1503. DebugMsg('Peephole AndLslXsr2And done', p);
  1504. taicpu(p).oper[0]^.reg:=taicpu(hp2).oper[0]^.reg;
  1505. asml.Remove(hp1);
  1506. asml.Remove(hp2);
  1507. hp1.free;
  1508. hp2.free;
  1509. result:=true;
  1510. end
  1511. {
  1512. and reg1,reg0,2^n-1
  1513. mov reg2,reg1, lsl imm1
  1514. =>
  1515. mov reg2,reg0, lsl imm1
  1516. if imm1>i
  1517. }
  1518. else if (i>32-taicpu(hp1).oper[2]^.shifterop^.shiftimm) and
  1519. not(RegModifiedBetween(taicpu(p).oper[1]^.reg, p, hp1)) then
  1520. begin
  1521. DebugMsg('Peephole AndLsl2Lsl done', p);
  1522. taicpu(hp1).oper[1]^.reg:=taicpu(p).oper[1]^.reg;
  1523. GetNextInstruction(p, hp1);
  1524. asml.Remove(p);
  1525. p.free;
  1526. p:=hp1;
  1527. result:=true;
  1528. end
  1529. end;
  1530. end;
  1531. {
  1532. change
  1533. add/sub reg2,reg1,const1
  1534. str/ldr reg3,[reg2,const2]
  1535. dealloc reg2
  1536. to
  1537. str/ldr reg3,[reg1,const2+/-const1]
  1538. }
  1539. if (not GenerateThumbCode) and
  1540. (taicpu(p).opcode in [A_ADD,A_SUB]) and
  1541. (taicpu(p).ops>2) and
  1542. (taicpu(p).oper[1]^.typ = top_reg) and
  1543. (taicpu(p).oper[2]^.typ = top_const) then
  1544. begin
  1545. hp1:=p;
  1546. while GetNextInstructionUsingReg(hp1, hp1, taicpu(p).oper[0]^.reg) and
  1547. { we cannot check NR_DEFAULTFLAGS for modification yet so don't allow a condition }
  1548. MatchInstruction(hp1, [A_LDR, A_STR], [C_None], []) and
  1549. (taicpu(hp1).oper[1]^.ref^.base=taicpu(p).oper[0]^.reg) and
  1550. { don't optimize if the register is stored/overwritten }
  1551. (taicpu(hp1).oper[0]^.reg<>taicpu(p).oper[1]^.reg) and
  1552. (taicpu(hp1).oper[1]^.ref^.index=NR_NO) and
  1553. (taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) and
  1554. { new offset must be valid: either in the range of 8 or 12 bit, depend on the
  1555. ldr postfix }
  1556. (((taicpu(p).opcode=A_ADD) and
  1557. isValidConstLoadStoreOffset(taicpu(hp1).oper[1]^.ref^.offset+taicpu(p).oper[2]^.val, taicpu(hp1).oppostfix)
  1558. ) or
  1559. ((taicpu(p).opcode=A_SUB) and
  1560. isValidConstLoadStoreOffset(taicpu(hp1).oper[1]^.ref^.offset-taicpu(p).oper[2]^.val, taicpu(hp1).oppostfix)
  1561. )
  1562. ) do
  1563. begin
  1564. { neither reg1 nor reg2 might be changed inbetween }
  1565. if RegModifiedBetween(taicpu(p).oper[0]^.reg,p,hp1) or
  1566. RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1) then
  1567. break;
  1568. { reg2 must be either overwritten by the ldr or it is deallocated afterwards }
  1569. if ((taicpu(hp1).opcode=A_LDR) and (taicpu(p).oper[0]^.reg=taicpu(hp1).oper[0]^.reg)) or
  1570. assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) then
  1571. begin
  1572. { remember last instruction }
  1573. hp2:=hp1;
  1574. DebugMsg('Peephole Add/SubLdr2Ldr done', p);
  1575. hp1:=p;
  1576. { fix all ldr/str }
  1577. while GetNextInstructionUsingReg(hp1, hp1, taicpu(p).oper[0]^.reg) do
  1578. begin
  1579. taicpu(hp1).oper[1]^.ref^.base:=taicpu(p).oper[1]^.reg;
  1580. if taicpu(p).opcode=A_ADD then
  1581. inc(taicpu(hp1).oper[1]^.ref^.offset,taicpu(p).oper[2]^.val)
  1582. else
  1583. dec(taicpu(hp1).oper[1]^.ref^.offset,taicpu(p).oper[2]^.val);
  1584. if hp1=hp2 then
  1585. break;
  1586. end;
  1587. GetNextInstruction(p,hp1);
  1588. asml.remove(p);
  1589. p.free;
  1590. p:=hp1;
  1591. result:=true;
  1592. break;
  1593. end;
  1594. end;
  1595. end;
  1596. {
  1597. change
  1598. add reg1, ...
  1599. mov reg2, reg1
  1600. to
  1601. add reg2, ...
  1602. }
  1603. if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  1604. (taicpu(p).ops>=3) and
  1605. RemoveSuperfluousMove(p, hp1, 'DataMov2Data') then
  1606. Result:=true;
  1607. if MatchInstruction(p, [A_ADD,A_SUB], [C_None], [PF_None]) and
  1608. LookForPreindexedPattern(taicpu(p)) then
  1609. begin
  1610. GetNextInstruction(p,hp1);
  1611. DebugMsg('Peephole Add/Sub to Preindexed done', p);
  1612. asml.remove(p);
  1613. p.free;
  1614. p:=hp1;
  1615. Result:=true;
  1616. end;
  1617. {
  1618. Turn
  1619. mul reg0, z,w
  1620. sub/add x, y, reg0
  1621. dealloc reg0
  1622. into
  1623. mls/mla x,z,w,y
  1624. }
  1625. if MatchInstruction(p, [A_MUL], [C_None], [PF_None]) and
  1626. (taicpu(p).ops=3) and
  1627. (taicpu(p).oper[0]^.typ = top_reg) and
  1628. (taicpu(p).oper[1]^.typ = top_reg) and
  1629. (taicpu(p).oper[2]^.typ = top_reg) and
  1630. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1631. MatchInstruction(hp1,[A_ADD,A_SUB],[C_None],[PF_None]) and
  1632. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p, hp1)) and
  1633. (not RegModifiedBetween(taicpu(p).oper[2]^.reg, p, hp1)) and
  1634. (((taicpu(hp1).opcode=A_ADD) and (current_settings.cputype>=cpu_armv4)) or
  1635. ((taicpu(hp1).opcode=A_SUB) and (current_settings.cputype in [cpu_armv6t2,cpu_armv7,cpu_armv7a,cpu_armv7r,cpu_armv7m,cpu_armv7em]))) and
  1636. // CPUs before ARMv6 don't recommend having the same Rd and Rm for MLA.
  1637. // TODO: A workaround would be to swap Rm and Rs
  1638. (not ((taicpu(hp1).opcode=A_ADD) and (current_settings.cputype<=cpu_armv6) and MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[1]^))) and
  1639. (((taicpu(hp1).ops=3) and
  1640. (taicpu(hp1).oper[2]^.typ=top_reg) and
  1641. ((MatchOperand(taicpu(hp1).oper[2]^, taicpu(p).oper[0]^.reg) and
  1642. (not RegModifiedBetween(taicpu(hp1).oper[1]^.reg, p, hp1))) or
  1643. ((MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1644. (taicpu(hp1).opcode=A_ADD) and
  1645. (not RegModifiedBetween(taicpu(hp1).oper[2]^.reg, p, hp1)))))) or
  1646. ((taicpu(hp1).ops=2) and
  1647. (taicpu(hp1).oper[1]^.typ=top_reg) and
  1648. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg))) and
  1649. (RegEndOfLife(taicpu(p).oper[0]^.reg,taicpu(hp1))) then
  1650. begin
  1651. if taicpu(hp1).opcode=A_ADD then
  1652. begin
  1653. taicpu(hp1).opcode:=A_MLA;
  1654. if taicpu(hp1).ops=3 then
  1655. begin
  1656. if MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^) then
  1657. oldreg:=taicpu(hp1).oper[2]^.reg
  1658. else
  1659. oldreg:=taicpu(hp1).oper[1]^.reg;
  1660. end
  1661. else
  1662. oldreg:=taicpu(hp1).oper[0]^.reg;
  1663. taicpu(hp1).loadreg(1,taicpu(p).oper[1]^.reg);
  1664. taicpu(hp1).loadreg(2,taicpu(p).oper[2]^.reg);
  1665. taicpu(hp1).loadreg(3,oldreg);
  1666. DebugMsg('MulAdd2MLA done', p);
  1667. taicpu(hp1).ops:=4;
  1668. asml.remove(p);
  1669. p.free;
  1670. p:=hp1;
  1671. end
  1672. else
  1673. begin
  1674. taicpu(hp1).opcode:=A_MLS;
  1675. taicpu(hp1).loadreg(3,taicpu(hp1).oper[1]^.reg);
  1676. if taicpu(hp1).ops=2 then
  1677. taicpu(hp1).loadreg(1,taicpu(hp1).oper[0]^.reg)
  1678. else
  1679. taicpu(hp1).loadreg(1,taicpu(p).oper[2]^.reg);
  1680. taicpu(hp1).loadreg(2,taicpu(p).oper[1]^.reg);
  1681. DebugMsg('MulSub2MLS done', p);
  1682. taicpu(hp1).ops:=4;
  1683. asml.remove(p);
  1684. p.free;
  1685. p:=hp1;
  1686. end;
  1687. result:=true;
  1688. end
  1689. end;
  1690. {$ifdef dummy}
  1691. A_MVN:
  1692. begin
  1693. {
  1694. change
  1695. mvn reg2,reg1
  1696. and reg3,reg4,reg2
  1697. dealloc reg2
  1698. to
  1699. bic reg3,reg4,reg1
  1700. }
  1701. if (taicpu(p).oper[1]^.typ = top_reg) and
  1702. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1703. MatchInstruction(hp1,A_AND,[],[]) and
  1704. (((taicpu(hp1).ops=3) and
  1705. (taicpu(hp1).oper[2]^.typ=top_reg) and
  1706. (MatchOperand(taicpu(hp1).oper[2]^, taicpu(p).oper[0]^.reg) or
  1707. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg))) or
  1708. ((taicpu(hp1).ops=2) and
  1709. (taicpu(hp1).oper[1]^.typ=top_reg) and
  1710. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg))) and
  1711. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
  1712. { reg1 might not be modified inbetween }
  1713. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1714. begin
  1715. DebugMsg('Peephole MvnAnd2Bic done', p);
  1716. taicpu(hp1).opcode:=A_BIC;
  1717. if taicpu(hp1).ops=3 then
  1718. begin
  1719. if MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) then
  1720. taicpu(hp1).loadReg(1,taicpu(hp1).oper[2]^.reg); // Swap operands
  1721. taicpu(hp1).loadReg(2,taicpu(p).oper[1]^.reg);
  1722. end
  1723. else
  1724. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  1725. GetNextInstruction(p, hp1);
  1726. asml.remove(p);
  1727. p.free;
  1728. p:=hp1;
  1729. end;
  1730. end;
  1731. {$endif dummy}
  1732. A_UXTB:
  1733. begin
  1734. {
  1735. change
  1736. uxtb reg2,reg1
  1737. strb reg2,[...]
  1738. dealloc reg2
  1739. to
  1740. strb reg1,[...]
  1741. }
  1742. if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
  1743. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1744. MatchInstruction(hp1, A_STR, [C_None], [PF_B]) and
  1745. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
  1746. { the reference in strb might not use reg2 }
  1747. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  1748. { reg1 might not be modified inbetween }
  1749. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1750. begin
  1751. DebugMsg('Peephole UxtbStrb2Strb done', p);
  1752. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  1753. GetNextInstruction(p,hp2);
  1754. asml.remove(p);
  1755. p.free;
  1756. p:=hp2;
  1757. result:=true;
  1758. end
  1759. {
  1760. change
  1761. uxtb reg2,reg1
  1762. uxth reg3,reg2
  1763. dealloc reg2
  1764. to
  1765. uxtb reg3,reg1
  1766. }
  1767. else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
  1768. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1769. MatchInstruction(hp1, A_UXTH, [C_None], [PF_None]) and
  1770. (taicpu(hp1).ops = 2) and
  1771. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1772. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  1773. { reg1 might not be modified inbetween }
  1774. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1775. begin
  1776. DebugMsg('Peephole UxtbUxth2Uxtb done', p);
  1777. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  1778. asml.remove(hp1);
  1779. hp1.free;
  1780. result:=true;
  1781. end
  1782. {
  1783. change
  1784. uxtb reg2,reg1
  1785. uxtb reg3,reg2
  1786. dealloc reg2
  1787. to
  1788. uxtb reg3,reg1
  1789. }
  1790. else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
  1791. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1792. MatchInstruction(hp1, A_UXTB, [C_None], [PF_None]) and
  1793. (taicpu(hp1).ops = 2) and
  1794. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1795. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  1796. { reg1 might not be modified inbetween }
  1797. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1798. begin
  1799. DebugMsg('Peephole UxtbUxtb2Uxtb done', p);
  1800. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  1801. asml.remove(hp1);
  1802. hp1.free;
  1803. result:=true;
  1804. end
  1805. {
  1806. change
  1807. uxtb reg2,reg1
  1808. and reg3,reg2,#0x*FF
  1809. dealloc reg2
  1810. to
  1811. uxtb reg3,reg1
  1812. }
  1813. else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
  1814. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1815. (taicpu(p).ops=2) and
  1816. MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
  1817. (taicpu(hp1).ops=3) and
  1818. (taicpu(hp1).oper[2]^.typ=top_const) and
  1819. ((taicpu(hp1).oper[2]^.val and $FF)=$FF) and
  1820. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1821. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  1822. { reg1 might not be modified inbetween }
  1823. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1824. begin
  1825. DebugMsg('Peephole UxtbAndImm2Uxtb done', p);
  1826. taicpu(hp1).opcode:=A_UXTB;
  1827. taicpu(hp1).ops:=2;
  1828. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  1829. GetNextInstruction(p,hp2);
  1830. asml.remove(p);
  1831. p.free;
  1832. p:=hp2;
  1833. result:=true;
  1834. end
  1835. else if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  1836. RemoveSuperfluousMove(p, hp1, 'UxtbMov2Data') then
  1837. Result:=true;
  1838. end;
  1839. A_UXTH:
  1840. begin
  1841. {
  1842. change
  1843. uxth reg2,reg1
  1844. strh reg2,[...]
  1845. dealloc reg2
  1846. to
  1847. strh reg1,[...]
  1848. }
  1849. if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
  1850. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1851. MatchInstruction(hp1, A_STR, [C_None], [PF_H]) and
  1852. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  1853. { the reference in strb might not use reg2 }
  1854. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  1855. { reg1 might not be modified inbetween }
  1856. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1857. begin
  1858. DebugMsg('Peephole UXTHStrh2Strh done', p);
  1859. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  1860. GetNextInstruction(p, hp1);
  1861. asml.remove(p);
  1862. p.free;
  1863. p:=hp1;
  1864. result:=true;
  1865. end
  1866. {
  1867. change
  1868. uxth reg2,reg1
  1869. uxth reg3,reg2
  1870. dealloc reg2
  1871. to
  1872. uxth reg3,reg1
  1873. }
  1874. else if MatchInstruction(p, A_UXTH, [C_None], [PF_None]) and
  1875. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1876. MatchInstruction(hp1, A_UXTH, [C_None], [PF_None]) and
  1877. (taicpu(hp1).ops=2) and
  1878. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1879. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  1880. { reg1 might not be modified inbetween }
  1881. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1882. begin
  1883. DebugMsg('Peephole UxthUxth2Uxth done', p);
  1884. taicpu(hp1).opcode:=A_UXTH;
  1885. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  1886. GetNextInstruction(p, hp1);
  1887. asml.remove(p);
  1888. p.free;
  1889. p:=hp1;
  1890. result:=true;
  1891. end
  1892. {
  1893. change
  1894. uxth reg2,reg1
  1895. and reg3,reg2,#65535
  1896. dealloc reg2
  1897. to
  1898. uxth reg3,reg1
  1899. }
  1900. else if MatchInstruction(p, A_UXTH, [C_None], [PF_None]) and
  1901. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1902. MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
  1903. (taicpu(hp1).ops=3) and
  1904. (taicpu(hp1).oper[2]^.typ=top_const) and
  1905. ((taicpu(hp1).oper[2]^.val and $FFFF)=$FFFF) and
  1906. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1907. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  1908. { reg1 might not be modified inbetween }
  1909. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1910. begin
  1911. DebugMsg('Peephole UxthAndImm2Uxth done', p);
  1912. taicpu(hp1).opcode:=A_UXTH;
  1913. taicpu(hp1).ops:=2;
  1914. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  1915. GetNextInstruction(p, hp1);
  1916. asml.remove(p);
  1917. p.free;
  1918. p:=hp1;
  1919. result:=true;
  1920. end
  1921. else if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  1922. RemoveSuperfluousMove(p, hp1, 'UxthMov2Data') then
  1923. Result:=true;
  1924. end;
  1925. A_CMP:
  1926. begin
  1927. {
  1928. change
  1929. cmp reg,const1
  1930. moveq reg,const1
  1931. movne reg,const2
  1932. to
  1933. cmp reg,const1
  1934. movne reg,const2
  1935. }
  1936. if (taicpu(p).oper[1]^.typ = top_const) and
  1937. GetNextInstruction(p, hp1) and
  1938. MatchInstruction(hp1, A_MOV, [C_EQ, C_NE], [PF_NONE]) and
  1939. (taicpu(hp1).oper[1]^.typ = top_const) and
  1940. GetNextInstruction(hp1, hp2) and
  1941. MatchInstruction(hp2, A_MOV, [C_EQ, C_NE], [PF_NONE]) and
  1942. (taicpu(hp1).oper[1]^.typ = top_const) then
  1943. begin
  1944. Result:=RemoveRedundantMove(p, hp1, asml) or Result;
  1945. Result:=RemoveRedundantMove(p, hp2, asml) or Result;
  1946. end;
  1947. end;
  1948. A_STM:
  1949. begin
  1950. {
  1951. change
  1952. stmfd r13!,[r14]
  1953. sub r13,r13,#4
  1954. bl abc
  1955. add r13,r13,#4
  1956. ldmfd r13!,[r15]
  1957. into
  1958. b abc
  1959. }
  1960. if not(ts_thumb_interworking in current_settings.targetswitches) and
  1961. MatchInstruction(p, A_STM, [C_None], [PF_FD]) and
  1962. GetNextInstruction(p, hp1) and
  1963. GetNextInstruction(hp1, hp2) and
  1964. SkipEntryExitMarker(hp2, hp2) and
  1965. GetNextInstruction(hp2, hp3) and
  1966. SkipEntryExitMarker(hp3, hp3) and
  1967. GetNextInstruction(hp3, hp4) and
  1968. (taicpu(p).oper[0]^.typ = top_ref) and
  1969. (taicpu(p).oper[0]^.ref^.index=NR_STACK_POINTER_REG) and
  1970. (taicpu(p).oper[0]^.ref^.base=NR_NO) and
  1971. (taicpu(p).oper[0]^.ref^.offset=0) and
  1972. (taicpu(p).oper[0]^.ref^.addressmode=AM_PREINDEXED) and
  1973. (taicpu(p).oper[1]^.typ = top_regset) and
  1974. (taicpu(p).oper[1]^.regset^ = [RS_R14]) and
  1975. MatchInstruction(hp1, A_SUB, [C_None], [PF_NONE]) and
  1976. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1977. (taicpu(hp1).oper[0]^.reg = NR_STACK_POINTER_REG) and
  1978. MatchOperand(taicpu(hp1).oper[0]^,taicpu(hp1).oper[1]^) and
  1979. (taicpu(hp1).oper[2]^.typ = top_const) and
  1980. MatchInstruction(hp3, A_ADD, [C_None], [PF_NONE]) and
  1981. MatchOperand(taicpu(hp1).oper[0]^,taicpu(hp3).oper[0]^) and
  1982. MatchOperand(taicpu(hp1).oper[0]^,taicpu(hp3).oper[1]^) and
  1983. MatchOperand(taicpu(hp1).oper[2]^,taicpu(hp3).oper[2]^) and
  1984. MatchInstruction(hp2, [A_BL,A_BLX], [C_None], [PF_NONE]) and
  1985. (taicpu(hp2).oper[0]^.typ = top_ref) and
  1986. MatchInstruction(hp4, A_LDM, [C_None], [PF_FD]) and
  1987. MatchOperand(taicpu(p).oper[0]^,taicpu(hp4).oper[0]^) and
  1988. (taicpu(hp4).oper[1]^.typ = top_regset) and
  1989. (taicpu(hp4).oper[1]^.regset^ = [RS_R15]) then
  1990. begin
  1991. asml.Remove(p);
  1992. asml.Remove(hp1);
  1993. asml.Remove(hp3);
  1994. asml.Remove(hp4);
  1995. taicpu(hp2).opcode:=A_B;
  1996. p.free;
  1997. hp1.free;
  1998. hp3.free;
  1999. hp4.free;
  2000. p:=hp2;
  2001. DebugMsg('Peephole Bl2B done', p);
  2002. end;
  2003. end;
  2004. end;
  2005. end;
  2006. end;
  2007. end;
  2008. { instructions modifying the CPSR can be only the last instruction }
  2009. function MustBeLast(p : tai) : boolean;
  2010. begin
  2011. Result:=(p.typ=ait_instruction) and
  2012. ((taicpu(p).opcode in [A_BL,A_BLX,A_CMP,A_CMN,A_SWI,A_TEQ,A_TST,A_CMF,A_CMFE {,A_MSR}]) or
  2013. ((taicpu(p).ops>=1) and (taicpu(p).oper[0]^.typ=top_reg) and (taicpu(p).oper[0]^.reg=NR_PC)) or
  2014. (taicpu(p).oppostfix=PF_S));
  2015. end;
  2016. procedure TCpuAsmOptimizer.PeepHoleOptPass2;
  2017. var
  2018. p,hp1,hp2: tai;
  2019. l : longint;
  2020. condition : tasmcond;
  2021. hp3: tai;
  2022. WasLast: boolean;
  2023. { UsedRegs, TmpUsedRegs: TRegSet; }
  2024. begin
  2025. p := BlockStart;
  2026. { UsedRegs := []; }
  2027. while (p <> BlockEnd) Do
  2028. begin
  2029. { UpdateUsedRegs(UsedRegs, tai(p.next)); }
  2030. case p.Typ Of
  2031. Ait_Instruction:
  2032. begin
  2033. case taicpu(p).opcode Of
  2034. A_B:
  2035. if (taicpu(p).condition<>C_None) and
  2036. not(GenerateThumbCode) then
  2037. begin
  2038. { check for
  2039. Bxx xxx
  2040. <several instructions>
  2041. xxx:
  2042. }
  2043. l:=0;
  2044. WasLast:=False;
  2045. GetNextInstruction(p, hp1);
  2046. while assigned(hp1) and
  2047. (l<=4) and
  2048. CanBeCond(hp1) and
  2049. { stop on labels }
  2050. not(hp1.typ=ait_label) do
  2051. begin
  2052. inc(l);
  2053. if MustBeLast(hp1) then
  2054. begin
  2055. WasLast:=True;
  2056. GetNextInstruction(hp1,hp1);
  2057. break;
  2058. end
  2059. else
  2060. GetNextInstruction(hp1,hp1);
  2061. end;
  2062. if assigned(hp1) then
  2063. begin
  2064. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  2065. begin
  2066. if (l<=4) and (l>0) then
  2067. begin
  2068. condition:=inverse_cond(taicpu(p).condition);
  2069. hp2:=p;
  2070. GetNextInstruction(p,hp1);
  2071. p:=hp1;
  2072. repeat
  2073. if hp1.typ=ait_instruction then
  2074. taicpu(hp1).condition:=condition;
  2075. if MustBeLast(hp1) then
  2076. begin
  2077. GetNextInstruction(hp1,hp1);
  2078. break;
  2079. end
  2080. else
  2081. GetNextInstruction(hp1,hp1);
  2082. until not(assigned(hp1)) or
  2083. not(CanBeCond(hp1)) or
  2084. (hp1.typ=ait_label);
  2085. { wait with removing else GetNextInstruction could
  2086. ignore the label if it was the only usage in the
  2087. jump moved away }
  2088. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  2089. asml.remove(hp2);
  2090. hp2.free;
  2091. continue;
  2092. end;
  2093. end
  2094. else
  2095. { do not perform further optimizations if there is inctructon
  2096. in block #1 which can not be optimized.
  2097. }
  2098. if not WasLast then
  2099. begin
  2100. { check further for
  2101. Bcc xxx
  2102. <several instructions 1>
  2103. B yyy
  2104. xxx:
  2105. <several instructions 2>
  2106. yyy:
  2107. }
  2108. { hp2 points to jmp yyy }
  2109. hp2:=hp1;
  2110. { skip hp1 to xxx }
  2111. GetNextInstruction(hp1, hp1);
  2112. if assigned(hp2) and
  2113. assigned(hp1) and
  2114. (l<=3) and
  2115. (hp2.typ=ait_instruction) and
  2116. (taicpu(hp2).is_jmp) and
  2117. (taicpu(hp2).condition=C_None) and
  2118. { real label and jump, no further references to the
  2119. label are allowed }
  2120. (tasmlabel(taicpu(p).oper[0]^.ref^.symbol).getrefs=2) and
  2121. FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  2122. begin
  2123. l:=0;
  2124. { skip hp1 to <several moves 2> }
  2125. GetNextInstruction(hp1, hp1);
  2126. while assigned(hp1) and
  2127. CanBeCond(hp1) do
  2128. begin
  2129. inc(l);
  2130. GetNextInstruction(hp1, hp1);
  2131. end;
  2132. { hp1 points to yyy: }
  2133. if assigned(hp1) and
  2134. FindLabel(tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol),hp1) then
  2135. begin
  2136. condition:=inverse_cond(taicpu(p).condition);
  2137. GetNextInstruction(p,hp1);
  2138. hp3:=p;
  2139. p:=hp1;
  2140. repeat
  2141. if hp1.typ=ait_instruction then
  2142. taicpu(hp1).condition:=condition;
  2143. GetNextInstruction(hp1,hp1);
  2144. until not(assigned(hp1)) or
  2145. not(CanBeCond(hp1));
  2146. { hp2 is still at jmp yyy }
  2147. GetNextInstruction(hp2,hp1);
  2148. { hp2 is now at xxx: }
  2149. condition:=inverse_cond(condition);
  2150. GetNextInstruction(hp1,hp1);
  2151. { hp1 is now at <several movs 2> }
  2152. repeat
  2153. taicpu(hp1).condition:=condition;
  2154. GetNextInstruction(hp1,hp1);
  2155. until not(assigned(hp1)) or
  2156. not(CanBeCond(hp1)) or
  2157. (hp1.typ=ait_label);
  2158. {
  2159. asml.remove(hp1.next)
  2160. hp1.next.free;
  2161. asml.remove(hp1);
  2162. hp1.free;
  2163. }
  2164. { remove Bcc }
  2165. tasmlabel(taicpu(hp3).oper[0]^.ref^.symbol).decrefs;
  2166. asml.remove(hp3);
  2167. hp3.free;
  2168. { remove jmp }
  2169. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  2170. asml.remove(hp2);
  2171. hp2.free;
  2172. continue;
  2173. end;
  2174. end;
  2175. end;
  2176. end;
  2177. end;
  2178. end;
  2179. end;
  2180. end;
  2181. p := tai(p.next)
  2182. end;
  2183. end;
  2184. function TCpuAsmOptimizer.RegInInstruction(Reg: TRegister; p1: tai): Boolean;
  2185. begin
  2186. If (p1.typ = ait_instruction) and (taicpu(p1).opcode=A_BL) then
  2187. Result:=true
  2188. else If MatchInstruction(p1, [A_LDR, A_STR], [], [PF_D]) and
  2189. (getsupreg(taicpu(p1).oper[0]^.reg)+1=getsupreg(reg)) then
  2190. Result:=true
  2191. else
  2192. Result:=inherited RegInInstruction(Reg, p1);
  2193. end;
  2194. const
  2195. { set of opcode which might or do write to memory }
  2196. { TODO : extend armins.dat to contain r/w info }
  2197. opcode_could_mem_write = [A_B,A_BL,A_BLX,A_BKPT,A_BX,A_STR,A_STRB,A_STRBT,
  2198. A_STRH,A_STRT,A_STF,A_SFM,A_STM,A_FSTS,A_FSTD];
  2199. { adjust the register live information when swapping the two instructions p and hp1,
  2200. they must follow one after the other }
  2201. procedure TCpuPreRegallocScheduler.SwapRegLive(p,hp1 : taicpu);
  2202. procedure CheckLiveEnd(reg : tregister);
  2203. var
  2204. supreg : TSuperRegister;
  2205. regtype : TRegisterType;
  2206. begin
  2207. if reg=NR_NO then
  2208. exit;
  2209. regtype:=getregtype(reg);
  2210. supreg:=getsupreg(reg);
  2211. if (cg.rg[regtype].live_end[supreg]=hp1) and
  2212. RegInInstruction(reg,p) then
  2213. cg.rg[regtype].live_end[supreg]:=p;
  2214. end;
  2215. procedure CheckLiveStart(reg : TRegister);
  2216. var
  2217. supreg : TSuperRegister;
  2218. regtype : TRegisterType;
  2219. begin
  2220. if reg=NR_NO then
  2221. exit;
  2222. regtype:=getregtype(reg);
  2223. supreg:=getsupreg(reg);
  2224. if (cg.rg[regtype].live_start[supreg]=p) and
  2225. RegInInstruction(reg,hp1) then
  2226. cg.rg[regtype].live_start[supreg]:=hp1;
  2227. end;
  2228. var
  2229. i : longint;
  2230. r : TSuperRegister;
  2231. begin
  2232. { assumption: p is directly followed by hp1 }
  2233. { if live of any reg used by p starts at p and hp1 uses this register then
  2234. set live start to hp1 }
  2235. for i:=0 to p.ops-1 do
  2236. case p.oper[i]^.typ of
  2237. Top_Reg:
  2238. CheckLiveStart(p.oper[i]^.reg);
  2239. Top_Ref:
  2240. begin
  2241. CheckLiveStart(p.oper[i]^.ref^.base);
  2242. CheckLiveStart(p.oper[i]^.ref^.index);
  2243. end;
  2244. Top_Shifterop:
  2245. CheckLiveStart(p.oper[i]^.shifterop^.rs);
  2246. Top_RegSet:
  2247. for r:=RS_R0 to RS_R15 do
  2248. if r in p.oper[i]^.regset^ then
  2249. CheckLiveStart(newreg(R_INTREGISTER,r,R_SUBWHOLE));
  2250. end;
  2251. { if live of any reg used by hp1 ends at hp1 and p uses this register then
  2252. set live end to p }
  2253. for i:=0 to hp1.ops-1 do
  2254. case hp1.oper[i]^.typ of
  2255. Top_Reg:
  2256. CheckLiveEnd(hp1.oper[i]^.reg);
  2257. Top_Ref:
  2258. begin
  2259. CheckLiveEnd(hp1.oper[i]^.ref^.base);
  2260. CheckLiveEnd(hp1.oper[i]^.ref^.index);
  2261. end;
  2262. Top_Shifterop:
  2263. CheckLiveStart(hp1.oper[i]^.shifterop^.rs);
  2264. Top_RegSet:
  2265. for r:=RS_R0 to RS_R15 do
  2266. if r in hp1.oper[i]^.regset^ then
  2267. CheckLiveEnd(newreg(R_INTREGISTER,r,R_SUBWHOLE));
  2268. end;
  2269. end;
  2270. function TCpuPreRegallocScheduler.SchedulerPass1Cpu(var p: tai): boolean;
  2271. { TODO : schedule also forward }
  2272. { TODO : schedule distance > 1 }
  2273. var
  2274. hp1,hp2,hp3,hp4,hp5,insertpos : tai;
  2275. list : TAsmList;
  2276. begin
  2277. result:=true;
  2278. list:=TAsmList.create_without_marker;
  2279. p:=BlockStart;
  2280. while p<>BlockEnd Do
  2281. begin
  2282. if (p.typ=ait_instruction) and
  2283. GetNextInstruction(p,hp1) and
  2284. (hp1.typ=ait_instruction) and
  2285. (taicpu(hp1).opcode in [A_LDR,A_LDRB,A_LDRH,A_LDRSB,A_LDRSH]) and
  2286. (taicpu(hp1).oppostfix in [PF_NONE, PF_B, PF_H, PF_SB, PF_SH]) and
  2287. { for now we don't reschedule if the previous instruction changes potentially a memory location }
  2288. ( (not(taicpu(p).opcode in opcode_could_mem_write) and
  2289. not(RegModifiedByInstruction(NR_PC,p))
  2290. ) or
  2291. ((taicpu(p).opcode in [A_STM,A_STRB,A_STRH,A_STR]) and
  2292. ((taicpu(hp1).oper[1]^.ref^.base=NR_PC) or
  2293. (assigned(taicpu(hp1).oper[1]^.ref^.symboldata) and
  2294. (taicpu(hp1).oper[1]^.ref^.offset=0)
  2295. )
  2296. ) or
  2297. { try to prove that the memory accesses don't overlapp }
  2298. ((taicpu(p).opcode in [A_STRB,A_STRH,A_STR]) and
  2299. (taicpu(p).oper[1]^.ref^.base=taicpu(hp1).oper[1]^.ref^.base) and
  2300. (taicpu(p).oppostfix=PF_None) and
  2301. (taicpu(hp1).oppostfix=PF_None) and
  2302. (taicpu(p).oper[1]^.ref^.index=NR_NO) and
  2303. (taicpu(hp1).oper[1]^.ref^.index=NR_NO) and
  2304. { get operand sizes and check if the offset distance is large enough to ensure no overlapp }
  2305. (abs(taicpu(p).oper[1]^.ref^.offset-taicpu(hp1).oper[1]^.ref^.offset)>=max(tcgsize2size[reg_cgsize(taicpu(p).oper[0]^.reg)],tcgsize2size[reg_cgsize(taicpu(hp1).oper[0]^.reg)]))
  2306. )
  2307. )
  2308. ) and
  2309. GetNextInstruction(hp1,hp2) and
  2310. (hp2.typ=ait_instruction) and
  2311. { loaded register used by next instruction? }
  2312. (RegInInstruction(taicpu(hp1).oper[0]^.reg,hp2)) and
  2313. { loaded register not used by previous instruction? }
  2314. not(RegInInstruction(taicpu(hp1).oper[0]^.reg,p)) and
  2315. { same condition? }
  2316. (taicpu(p).condition=taicpu(hp1).condition) and
  2317. { first instruction might not change the register used as base }
  2318. ((taicpu(hp1).oper[1]^.ref^.base=NR_NO) or
  2319. not(RegModifiedByInstruction(taicpu(hp1).oper[1]^.ref^.base,p))
  2320. ) and
  2321. { first instruction might not change the register used as index }
  2322. ((taicpu(hp1).oper[1]^.ref^.index=NR_NO) or
  2323. not(RegModifiedByInstruction(taicpu(hp1).oper[1]^.ref^.index,p))
  2324. ) and
  2325. { if we modify the basereg AND the first instruction used that reg, we can not schedule }
  2326. ((taicpu(hp1).oper[1]^.ref^.addressmode = AM_OFFSET) or
  2327. not(instructionLoadsFromReg(taicpu(hp1).oper[1]^.ref^.base,p))) then
  2328. begin
  2329. hp3:=tai(p.Previous);
  2330. hp5:=tai(p.next);
  2331. asml.Remove(p);
  2332. { if there is a reg. dealloc instruction associated with p, move it together with p }
  2333. { before the instruction? }
  2334. while assigned(hp3) and (hp3.typ<>ait_instruction) do
  2335. begin
  2336. if (hp3.typ=ait_regalloc) and (tai_regalloc(hp3).ratype in [ra_dealloc]) and
  2337. RegInInstruction(tai_regalloc(hp3).reg,p) then
  2338. begin
  2339. hp4:=hp3;
  2340. hp3:=tai(hp3.Previous);
  2341. asml.Remove(hp4);
  2342. list.Concat(hp4);
  2343. end
  2344. else
  2345. hp3:=tai(hp3.Previous);
  2346. end;
  2347. list.Concat(p);
  2348. SwapRegLive(taicpu(p),taicpu(hp1));
  2349. { after the instruction? }
  2350. while assigned(hp5) and (hp5.typ<>ait_instruction) do
  2351. begin
  2352. if (hp5.typ=ait_regalloc) and (tai_regalloc(hp5).ratype in [ra_dealloc]) and
  2353. RegInInstruction(tai_regalloc(hp5).reg,p) then
  2354. begin
  2355. hp4:=hp5;
  2356. hp5:=tai(hp5.next);
  2357. asml.Remove(hp4);
  2358. list.Concat(hp4);
  2359. end
  2360. else
  2361. hp5:=tai(hp5.Next);
  2362. end;
  2363. asml.Remove(hp1);
  2364. { if there are address labels associated with hp2, those must
  2365. stay with hp2 (e.g. for GOT-less PIC) }
  2366. insertpos:=hp2;
  2367. while assigned(hp2.previous) and
  2368. (tai(hp2.previous).typ<>ait_instruction) do
  2369. begin
  2370. hp2:=tai(hp2.previous);
  2371. if (hp2.typ=ait_label) and
  2372. (tai_label(hp2).labsym.typ=AT_ADDR) then
  2373. insertpos:=hp2;
  2374. end;
  2375. {$ifdef DEBUG_PREREGSCHEDULER}
  2376. asml.insertbefore(tai_comment.Create(strpnew('Rescheduled')),insertpos);
  2377. {$endif DEBUG_PREREGSCHEDULER}
  2378. asml.InsertBefore(hp1,insertpos);
  2379. asml.InsertListBefore(insertpos,list);
  2380. p:=tai(p.next)
  2381. end
  2382. else if p.typ=ait_instruction then
  2383. p:=hp1
  2384. else
  2385. p:=tai(p.next);
  2386. end;
  2387. list.Free;
  2388. end;
  2389. procedure DecrementPreceedingIT(list: TAsmList; p: tai);
  2390. var
  2391. hp : tai;
  2392. l : longint;
  2393. begin
  2394. hp := tai(p.Previous);
  2395. l := 1;
  2396. while assigned(hp) and
  2397. (l <= 4) do
  2398. begin
  2399. if hp.typ=ait_instruction then
  2400. begin
  2401. if (taicpu(hp).opcode>=A_IT) and
  2402. (taicpu(hp).opcode <= A_ITTTT) then
  2403. begin
  2404. if (taicpu(hp).opcode = A_IT) and
  2405. (l=1) then
  2406. list.Remove(hp)
  2407. else
  2408. case taicpu(hp).opcode of
  2409. A_ITE:
  2410. if l=2 then taicpu(hp).opcode := A_IT;
  2411. A_ITT:
  2412. if l=2 then taicpu(hp).opcode := A_IT;
  2413. A_ITEE:
  2414. if l=3 then taicpu(hp).opcode := A_ITE;
  2415. A_ITTE:
  2416. if l=3 then taicpu(hp).opcode := A_ITT;
  2417. A_ITET:
  2418. if l=3 then taicpu(hp).opcode := A_ITE;
  2419. A_ITTT:
  2420. if l=3 then taicpu(hp).opcode := A_ITT;
  2421. A_ITEEE:
  2422. if l=4 then taicpu(hp).opcode := A_ITEE;
  2423. A_ITTEE:
  2424. if l=4 then taicpu(hp).opcode := A_ITTE;
  2425. A_ITETE:
  2426. if l=4 then taicpu(hp).opcode := A_ITET;
  2427. A_ITTTE:
  2428. if l=4 then taicpu(hp).opcode := A_ITTT;
  2429. A_ITEET:
  2430. if l=4 then taicpu(hp).opcode := A_ITEE;
  2431. A_ITTET:
  2432. if l=4 then taicpu(hp).opcode := A_ITTE;
  2433. A_ITETT:
  2434. if l=4 then taicpu(hp).opcode := A_ITET;
  2435. A_ITTTT:
  2436. if l=4 then taicpu(hp).opcode := A_ITTT;
  2437. end;
  2438. break;
  2439. end;
  2440. {else if (taicpu(hp).condition<>taicpu(p).condition) or
  2441. (taicpu(hp).condition<>inverse_cond(taicpu(p).condition)) then
  2442. break;}
  2443. inc(l);
  2444. end;
  2445. hp := tai(hp.Previous);
  2446. end;
  2447. end;
  2448. function TCpuThumb2AsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
  2449. var
  2450. hp : taicpu;
  2451. hp1,hp2 : tai;
  2452. oldreg : TRegister;
  2453. begin
  2454. result:=false;
  2455. if inherited PeepHoleOptPass1Cpu(p) then
  2456. result:=true
  2457. else if (p.typ=ait_instruction) and
  2458. MatchInstruction(p, A_STM, [C_None], [PF_FD,PF_DB]) and
  2459. (taicpu(p).oper[0]^.ref^.addressmode=AM_PREINDEXED) and
  2460. (taicpu(p).oper[0]^.ref^.index=NR_STACK_POINTER_REG) and
  2461. ((taicpu(p).oper[1]^.regset^*[8..13,15])=[]) then
  2462. begin
  2463. DebugMsg('Peephole Stm2Push done', p);
  2464. hp := taicpu.op_regset(A_PUSH, R_INTREGISTER, R_SUBWHOLE, taicpu(p).oper[1]^.regset^);
  2465. AsmL.InsertAfter(hp, p);
  2466. asml.Remove(p);
  2467. p:=hp;
  2468. result:=true;
  2469. end
  2470. {else if (p.typ=ait_instruction) and
  2471. MatchInstruction(p, A_STR, [C_None], [PF_None]) and
  2472. (taicpu(p).oper[1]^.ref^.addressmode=AM_PREINDEXED) and
  2473. (taicpu(p).oper[1]^.ref^.index=NR_STACK_POINTER_REG) and
  2474. (taicpu(p).oper[1]^.ref^.offset=-4) and
  2475. (getsupreg(taicpu(p).oper[0]^.reg) in [0..7,14]) then
  2476. begin
  2477. DebugMsg('Peephole Str2Push done', p);
  2478. hp := taicpu.op_regset(A_PUSH, R_INTREGISTER, R_SUBWHOLE, [getsupreg(taicpu(p).oper[0]^.reg)]);
  2479. asml.InsertAfter(hp, p);
  2480. asml.Remove(p);
  2481. p.Free;
  2482. p:=hp;
  2483. result:=true;
  2484. end}
  2485. else if (p.typ=ait_instruction) and
  2486. MatchInstruction(p, A_LDM, [C_None], [PF_FD,PF_IA]) and
  2487. (taicpu(p).oper[0]^.ref^.addressmode=AM_PREINDEXED) and
  2488. (taicpu(p).oper[0]^.ref^.index=NR_STACK_POINTER_REG) and
  2489. ((taicpu(p).oper[1]^.regset^*[8..14])=[]) then
  2490. begin
  2491. DebugMsg('Peephole Ldm2Pop done', p);
  2492. hp := taicpu.op_regset(A_POP, R_INTREGISTER, R_SUBWHOLE, taicpu(p).oper[1]^.regset^);
  2493. asml.InsertBefore(hp, p);
  2494. asml.Remove(p);
  2495. p.Free;
  2496. p:=hp;
  2497. result:=true;
  2498. end
  2499. {else if (p.typ=ait_instruction) and
  2500. MatchInstruction(p, A_LDR, [C_None], [PF_None]) and
  2501. (taicpu(p).oper[1]^.ref^.addressmode=AM_POSTINDEXED) and
  2502. (taicpu(p).oper[1]^.ref^.index=NR_STACK_POINTER_REG) and
  2503. (taicpu(p).oper[1]^.ref^.offset=4) and
  2504. (getsupreg(taicpu(p).oper[0]^.reg) in [0..7,15]) then
  2505. begin
  2506. DebugMsg('Peephole Ldr2Pop done', p);
  2507. hp := taicpu.op_regset(A_POP, R_INTREGISTER, R_SUBWHOLE, [getsupreg(taicpu(p).oper[0]^.reg)]);
  2508. asml.InsertBefore(hp, p);
  2509. asml.Remove(p);
  2510. p.Free;
  2511. p:=hp;
  2512. result:=true;
  2513. end}
  2514. else if (p.typ=ait_instruction) and
  2515. MatchInstruction(p, [A_AND], [], [PF_None]) and
  2516. (taicpu(p).ops = 2) and
  2517. (taicpu(p).oper[1]^.typ=top_const) and
  2518. ((taicpu(p).oper[1]^.val=255) or
  2519. (taicpu(p).oper[1]^.val=65535)) then
  2520. begin
  2521. DebugMsg('Peephole AndR2Uxt done', p);
  2522. if taicpu(p).oper[1]^.val=255 then
  2523. taicpu(p).opcode:=A_UXTB
  2524. else
  2525. taicpu(p).opcode:=A_UXTH;
  2526. taicpu(p).loadreg(1, taicpu(p).oper[0]^.reg);
  2527. result := true;
  2528. end
  2529. else if (p.typ=ait_instruction) and
  2530. MatchInstruction(p, [A_AND], [], [PF_None]) and
  2531. (taicpu(p).ops = 3) and
  2532. (taicpu(p).oper[2]^.typ=top_const) and
  2533. ((taicpu(p).oper[2]^.val=255) or
  2534. (taicpu(p).oper[2]^.val=65535)) then
  2535. begin
  2536. DebugMsg('Peephole AndRR2Uxt done', p);
  2537. if taicpu(p).oper[2]^.val=255 then
  2538. taicpu(p).opcode:=A_UXTB
  2539. else
  2540. taicpu(p).opcode:=A_UXTH;
  2541. taicpu(p).ops:=2;
  2542. result := true;
  2543. end
  2544. {else if (p.typ=ait_instruction) and
  2545. MatchInstruction(p, [A_CMP], [C_None], [PF_None]) and
  2546. (taicpu(p).oper[1]^.typ=top_const) and
  2547. (taicpu(p).oper[1]^.val=0) and
  2548. GetNextInstruction(p,hp1) and
  2549. (taicpu(hp1).opcode=A_B) and
  2550. (taicpu(hp1).condition in [C_EQ,C_NE]) then
  2551. begin
  2552. if taicpu(hp1).condition = C_EQ then
  2553. hp2:=taicpu.op_reg_ref(A_CBZ, taicpu(p).oper[0]^.reg, taicpu(hp1).oper[0]^.ref^)
  2554. else
  2555. hp2:=taicpu.op_reg_ref(A_CBNZ, taicpu(p).oper[0]^.reg, taicpu(hp1).oper[0]^.ref^);
  2556. taicpu(hp2).is_jmp := true;
  2557. asml.InsertAfter(hp2, hp1);
  2558. asml.Remove(hp1);
  2559. hp1.Free;
  2560. asml.Remove(p);
  2561. p.Free;
  2562. p := hp2;
  2563. result := true;
  2564. end}
  2565. end;
  2566. procedure TCpuThumb2AsmOptimizer.PeepHoleOptPass2;
  2567. var
  2568. p,hp1,hp2: tai;
  2569. l,l2 : longint;
  2570. condition : tasmcond;
  2571. hp3: tai;
  2572. WasLast: boolean;
  2573. { UsedRegs, TmpUsedRegs: TRegSet; }
  2574. begin
  2575. p := BlockStart;
  2576. { UsedRegs := []; }
  2577. while (p <> BlockEnd) Do
  2578. begin
  2579. { UpdateUsedRegs(UsedRegs, tai(p.next)); }
  2580. case p.Typ Of
  2581. Ait_Instruction:
  2582. begin
  2583. case taicpu(p).opcode Of
  2584. A_B:
  2585. if taicpu(p).condition<>C_None then
  2586. begin
  2587. { check for
  2588. Bxx xxx
  2589. <several instructions>
  2590. xxx:
  2591. }
  2592. l:=0;
  2593. GetNextInstruction(p, hp1);
  2594. while assigned(hp1) and
  2595. (l<=4) and
  2596. CanBeCond(hp1) and
  2597. { stop on labels }
  2598. not(hp1.typ=ait_label) do
  2599. begin
  2600. inc(l);
  2601. if MustBeLast(hp1) then
  2602. begin
  2603. //hp1:=nil;
  2604. GetNextInstruction(hp1,hp1);
  2605. break;
  2606. end
  2607. else
  2608. GetNextInstruction(hp1,hp1);
  2609. end;
  2610. if assigned(hp1) then
  2611. begin
  2612. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  2613. begin
  2614. if (l<=4) and (l>0) then
  2615. begin
  2616. condition:=inverse_cond(taicpu(p).condition);
  2617. hp2:=p;
  2618. GetNextInstruction(p,hp1);
  2619. p:=hp1;
  2620. repeat
  2621. if hp1.typ=ait_instruction then
  2622. taicpu(hp1).condition:=condition;
  2623. if MustBeLast(hp1) then
  2624. begin
  2625. GetNextInstruction(hp1,hp1);
  2626. break;
  2627. end
  2628. else
  2629. GetNextInstruction(hp1,hp1);
  2630. until not(assigned(hp1)) or
  2631. not(CanBeCond(hp1)) or
  2632. (hp1.typ=ait_label);
  2633. { wait with removing else GetNextInstruction could
  2634. ignore the label if it was the only usage in the
  2635. jump moved away }
  2636. asml.InsertAfter(tai_comment.create(strpnew('Collapsed')), hp2);
  2637. DecrementPreceedingIT(asml, hp2);
  2638. case l of
  2639. 1: asml.InsertAfter(taicpu.op_cond(A_IT,condition), hp2);
  2640. 2: asml.InsertAfter(taicpu.op_cond(A_ITT,condition), hp2);
  2641. 3: asml.InsertAfter(taicpu.op_cond(A_ITTT,condition), hp2);
  2642. 4: asml.InsertAfter(taicpu.op_cond(A_ITTTT,condition), hp2);
  2643. end;
  2644. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  2645. asml.remove(hp2);
  2646. hp2.free;
  2647. continue;
  2648. end;
  2649. end;
  2650. end;
  2651. end;
  2652. end;
  2653. end;
  2654. end;
  2655. p := tai(p.next)
  2656. end;
  2657. end;
  2658. function TCpuThumb2AsmOptimizer.PostPeepHoleOptsCpu(var p: tai): boolean;
  2659. begin
  2660. result:=false;
  2661. if p.typ = ait_instruction then
  2662. begin
  2663. if MatchInstruction(p, A_MOV, [C_None], [PF_None]) and
  2664. (taicpu(p).oper[1]^.typ=top_const) and
  2665. (taicpu(p).oper[1]^.val >= 0) and
  2666. (taicpu(p).oper[1]^.val < 256) and
  2667. (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  2668. begin
  2669. DebugMsg('Peephole Mov2Movs done', p);
  2670. asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
  2671. asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
  2672. IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
  2673. taicpu(p).oppostfix:=PF_S;
  2674. result:=true;
  2675. end
  2676. else if MatchInstruction(p, A_MVN, [C_None], [PF_None]) and
  2677. (taicpu(p).oper[1]^.typ=top_reg) and
  2678. (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  2679. begin
  2680. DebugMsg('Peephole Mvn2Mvns done', p);
  2681. asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
  2682. asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
  2683. IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
  2684. taicpu(p).oppostfix:=PF_S;
  2685. result:=true;
  2686. end
  2687. else if MatchInstruction(p, A_RSB, [C_None], [PF_None]) and
  2688. (taicpu(p).ops = 3) and
  2689. (taicpu(p).oper[2]^.typ=top_const) and
  2690. (taicpu(p).oper[2]^.val=0) and
  2691. (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  2692. begin
  2693. DebugMsg('Peephole Rsb2Rsbs done', p);
  2694. asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
  2695. asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
  2696. IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
  2697. taicpu(p).oppostfix:=PF_S;
  2698. result:=true;
  2699. end
  2700. else if MatchInstruction(p, [A_ADD,A_SUB], [C_None], [PF_None]) and
  2701. (taicpu(p).ops = 3) and
  2702. MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[1]^) and
  2703. (not MatchOperand(taicpu(p).oper[0]^, NR_STACK_POINTER_REG)) and
  2704. (taicpu(p).oper[2]^.typ=top_const) and
  2705. (taicpu(p).oper[2]^.val >= 0) and
  2706. (taicpu(p).oper[2]^.val < 256) and
  2707. (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  2708. begin
  2709. DebugMsg('Peephole AddSub2*s done', p);
  2710. asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
  2711. asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
  2712. IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
  2713. taicpu(p).loadconst(1,taicpu(p).oper[2]^.val);
  2714. taicpu(p).oppostfix:=PF_S;
  2715. taicpu(p).ops := 2;
  2716. result:=true;
  2717. end
  2718. else if MatchInstruction(p, [A_ADD,A_SUB], [C_None], [PF_None]) and
  2719. (taicpu(p).ops = 2) and
  2720. (taicpu(p).oper[1]^.typ=top_reg) and
  2721. (not MatchOperand(taicpu(p).oper[0]^, NR_STACK_POINTER_REG)) and
  2722. (not MatchOperand(taicpu(p).oper[1]^, NR_STACK_POINTER_REG)) and
  2723. (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  2724. begin
  2725. DebugMsg('Peephole AddSub2*s done', p);
  2726. asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
  2727. asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
  2728. IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
  2729. taicpu(p).oppostfix:=PF_S;
  2730. result:=true;
  2731. end
  2732. else if MatchInstruction(p, [A_ADD], [C_None], [PF_None]) and
  2733. (taicpu(p).ops = 3) and
  2734. MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[1]^) and
  2735. (taicpu(p).oper[2]^.typ=top_reg) then
  2736. begin
  2737. DebugMsg('Peephole AddRRR2AddRR done', p);
  2738. taicpu(p).ops := 2;
  2739. taicpu(p).loadreg(1,taicpu(p).oper[2]^.reg);
  2740. result:=true;
  2741. end
  2742. else if MatchInstruction(p, [A_AND,A_ORR,A_EOR,A_BIC,A_LSL,A_LSR,A_ASR,A_ROR], [C_None], [PF_None]) and
  2743. (taicpu(p).ops = 3) and
  2744. MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[1]^) and
  2745. (taicpu(p).oper[2]^.typ=top_reg) and
  2746. (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  2747. begin
  2748. DebugMsg('Peephole opXXY2opsXY done', p);
  2749. asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
  2750. asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
  2751. IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
  2752. taicpu(p).ops := 2;
  2753. taicpu(p).loadreg(1,taicpu(p).oper[2]^.reg);
  2754. taicpu(p).oppostfix:=PF_S;
  2755. result:=true;
  2756. end
  2757. else if MatchInstruction(p, [A_AND,A_ORR,A_EOR,A_BIC,A_LSL,A_LSR,A_ASR,A_ROR], [C_None], [PF_S]) and
  2758. (taicpu(p).ops = 3) and
  2759. MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[1]^) and
  2760. (taicpu(p).oper[2]^.typ in [top_reg,top_const]) then
  2761. begin
  2762. DebugMsg('Peephole opXXY2opXY done', p);
  2763. taicpu(p).ops := 2;
  2764. if taicpu(p).oper[2]^.typ=top_reg then
  2765. taicpu(p).loadreg(1,taicpu(p).oper[2]^.reg)
  2766. else
  2767. taicpu(p).loadconst(1,taicpu(p).oper[2]^.val);
  2768. result:=true;
  2769. end
  2770. else if MatchInstruction(p, [A_AND,A_ORR,A_EOR], [C_None], [PF_None,PF_S]) and
  2771. (taicpu(p).ops = 3) and
  2772. MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[2]^) and
  2773. (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  2774. begin
  2775. DebugMsg('Peephole opXYX2opsXY done', p);
  2776. asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
  2777. asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
  2778. IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
  2779. taicpu(p).oppostfix:=PF_S;
  2780. taicpu(p).ops := 2;
  2781. result:=true;
  2782. end
  2783. else if MatchInstruction(p, [A_MOV], [C_None], [PF_None]) and
  2784. (taicpu(p).ops=3) and
  2785. (taicpu(p).oper[2]^.typ=top_shifterop) and
  2786. (taicpu(p).oper[2]^.shifterop^.shiftmode in [SM_LSL,SM_LSR,SM_ASR,SM_ROR]) and
  2787. //MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[1]^) and
  2788. (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  2789. begin
  2790. DebugMsg('Peephole Mov2Shift done', p);
  2791. asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
  2792. asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
  2793. IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
  2794. taicpu(p).oppostfix:=PF_S;
  2795. case taicpu(p).oper[2]^.shifterop^.shiftmode of
  2796. SM_LSL: taicpu(p).opcode:=A_LSL;
  2797. SM_LSR: taicpu(p).opcode:=A_LSR;
  2798. SM_ASR: taicpu(p).opcode:=A_ASR;
  2799. SM_ROR: taicpu(p).opcode:=A_ROR;
  2800. end;
  2801. if taicpu(p).oper[2]^.shifterop^.rs<>NR_NO then
  2802. taicpu(p).loadreg(2, taicpu(p).oper[2]^.shifterop^.rs)
  2803. else
  2804. taicpu(p).loadconst(2, taicpu(p).oper[2]^.shifterop^.shiftimm);
  2805. result:=true;
  2806. end
  2807. end;
  2808. end;
  2809. begin
  2810. casmoptimizer:=TCpuAsmOptimizer;
  2811. cpreregallocscheduler:=TCpuPreRegallocScheduler;
  2812. End.