aoptcpu.pas 90 KB


  1. {
  2. Copyright (c) 1998-2002 by Jonas Maebe, member of the Free Pascal
  3. Development Team
  4. This unit implements the ARM optimizer object
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. Unit aoptcpu;
  19. {$i fpcdefs.inc}
  20. {$define DEBUG_PREREGSCHEDULER}
  21. {$define DEBUG_AOPTCPU}
  22. Interface
  23. uses cgbase, cpubase, aasmtai, aasmcpu,aopt, aoptcpub, aoptobj, cclasses;
  24. Type
  25. TCpuAsmOptimizer = class(TAsmOptimizer)
  26. { uses the same constructor as TAopObj }
  27. function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
  28. procedure PeepHoleOptPass2;override;
  29. Function RegInInstruction(Reg: TRegister; p1: tai): Boolean;override;
  30. procedure RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string);
  31. function RegUsedAfterInstruction(reg: Tregister; p: tai;
  32. var AllUsedRegs: TAllUsedRegs): Boolean;
  33. { gets the next tai object after current that contains info relevant
  34. to the optimizer in p1 which used the given register or does a
  35. change in program flow.
  36. If there is none, it returns false and
  37. sets p1 to nil }
  38. Function GetNextInstructionUsingReg(Current: tai; Var Next: tai;reg : TRegister): Boolean;
  39. { outputs a debug message into the assembler file }
  40. procedure DebugMsg(const s: string; p: tai);
  41. protected
  42. function LookForPostindexedPattern(p: taicpu): boolean;
  43. End;
  44. TCpuPreRegallocScheduler = class(TAsmScheduler)
  45. function SchedulerPass1Cpu(var p: tai): boolean;override;
  46. procedure SwapRegLive(p, hp1: taicpu);
  47. end;
  48. TCpuThumb2AsmOptimizer = class(TCpuAsmOptimizer)
  49. { uses the same constructor as TAopObj }
  50. function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
  51. procedure PeepHoleOptPass2;override;
  52. End;
  53. function MustBeLast(p : tai) : boolean;
  54. Implementation
  55. uses
  56. cutils,verbose,globals,
  57. systems,
  58. cpuinfo,
  59. cgobj,cgutils,procinfo,
  60. aasmbase,aasmdata;
  61. function CanBeCond(p : tai) : boolean;
  62. begin
  63. result:=
  64. (p.typ=ait_instruction) and
  65. (taicpu(p).condition=C_None) and
  66. ((taicpu(p).opcode<A_IT) or (taicpu(p).opcode>A_ITTTT)) and
  67. (taicpu(p).opcode<>A_CBZ) and
  68. (taicpu(p).opcode<>A_CBNZ) and
  69. (taicpu(p).opcode<>A_PLD) and
  70. ((taicpu(p).opcode<>A_BLX) or
  71. (taicpu(p).oper[0]^.typ=top_reg));
  72. end;
  73. function RefsEqual(const r1, r2: treference): boolean;
  74. begin
  75. refsequal :=
  76. (r1.offset = r2.offset) and
  77. (r1.base = r2.base) and
  78. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  79. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  80. (r1.relsymbol = r2.relsymbol) and
  81. (r1.signindex = r2.signindex) and
  82. (r1.shiftimm = r2.shiftimm) and
  83. (r1.addressmode = r2.addressmode) and
  84. (r1.shiftmode = r2.shiftmode);
  85. end;
  86. function MatchInstruction(const instr: tai; const op: TCommonAsmOps; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  87. begin
  88. result :=
  89. (instr.typ = ait_instruction) and
  90. ((op = []) or ((ord(taicpu(instr).opcode)<256) and (taicpu(instr).opcode in op))) and
  91. ((cond = []) or (taicpu(instr).condition in cond)) and
  92. ((postfix = []) or (taicpu(instr).oppostfix in postfix));
  93. end;
  94. function MatchInstruction(const instr: tai; const op: TAsmOp; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  95. begin
  96. result :=
  97. (instr.typ = ait_instruction) and
  98. (taicpu(instr).opcode = op) and
  99. ((cond = []) or (taicpu(instr).condition in cond)) and
  100. ((postfix = []) or (taicpu(instr).oppostfix in postfix));
  101. end;
  102. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
  103. begin
  104. result := oper1.typ = oper2.typ;
  105. if result then
  106. case oper1.typ of
  107. top_const:
  108. Result:=oper1.val = oper2.val;
  109. top_reg:
  110. Result:=oper1.reg = oper2.reg;
  111. top_conditioncode:
  112. Result:=oper1.cc = oper2.cc;
  113. top_ref:
  114. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  115. else Result:=false;
  116. end
  117. end;
  118. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  119. begin
  120. result := (oper.typ = top_reg) and (oper.reg = reg);
  121. end;
  122. procedure RemoveRedundantMove(const cmpp: tai; movp: tai; asml: TAsmList);
  123. begin
  124. if (taicpu(movp).condition = C_EQ) and
  125. (taicpu(cmpp).oper[0]^.reg = taicpu(movp).oper[0]^.reg) and
  126. (taicpu(cmpp).oper[1]^.val = taicpu(movp).oper[1]^.val) then
  127. begin
  128. asml.insertafter(tai_comment.Create(strpnew('Peephole CmpMovMov - Removed redundant moveq')), movp);
  129. asml.remove(movp);
  130. movp.free;
  131. end;
  132. end;
  133. function regLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  134. var
  135. p: taicpu;
  136. begin
  137. p := taicpu(hp);
  138. regLoadedWithNewValue := false;
  139. if not ((assigned(hp)) and (hp.typ = ait_instruction)) then
  140. exit;
  141. case p.opcode of
  142. { These operands do not write into a register at all }
  143. A_CMP, A_CMN, A_TST, A_TEQ, A_B, A_BL, A_BX, A_BLX, A_SWI, A_MSR, A_PLD:
  144. exit;
  145. {Take care of post/preincremented store and loads, they will change their base register}
  146. A_STR, A_LDR:
  147. begin
  148. regLoadedWithNewValue :=
  149. (taicpu(p).oper[1]^.typ=top_ref) and
  150. (taicpu(p).oper[1]^.ref^.addressmode in [AM_PREINDEXED,AM_POSTINDEXED]) and
  151. (taicpu(p).oper[1]^.ref^.base = reg);
  152. {STR does not load into it's first register}
  153. if p.opcode = A_STR then exit;
  154. end;
  155. { These four are writing into the first 2 register, UMLAL and SMLAL will also read from them }
  156. A_UMLAL, A_UMULL, A_SMLAL, A_SMULL:
  157. regLoadedWithNewValue :=
  158. (p.oper[1]^.typ = top_reg) and
  159. (p.oper[1]^.reg = reg);
  160. {Loads to oper2 from coprocessor}
  161. {
  162. MCR/MRC is currently not supported in FPC
  163. A_MRC:
  164. regLoadedWithNewValue :=
  165. (p.oper[2]^.typ = top_reg) and
  166. (p.oper[2]^.reg = reg);
  167. }
  168. {Loads to all register in the registerset}
  169. A_LDM:
  170. regLoadedWithNewValue := (getsupreg(reg) in p.oper[1]^.regset^);
  171. end;
  172. if regLoadedWithNewValue then
  173. exit;
  174. case p.oper[0]^.typ of
  175. {This is the case}
  176. top_reg:
  177. regLoadedWithNewValue := (p.oper[0]^.reg = reg) or
  178. { LDRD }
  179. (p.opcode=A_LDR) and (p.oppostfix=PF_D) and (getsupreg(p.oper[0]^.reg)+1=getsupreg(reg));
  180. {LDM/STM might write a new value to their index register}
  181. top_ref:
  182. regLoadedWithNewValue :=
  183. (taicpu(p).oper[0]^.ref^.addressmode in [AM_PREINDEXED,AM_POSTINDEXED]) and
  184. (taicpu(p).oper[0]^.ref^.base = reg);
  185. end;
  186. end;
  187. function AlignedToQWord(const ref : treference) : boolean;
  188. begin
  189. { (safe) heuristics to ensure alignment }
  190. result:=(target_info.abi in [abi_eabi,abi_armeb,abi_eabihf]) and
  191. (((ref.offset>=0) and
  192. ((ref.offset mod 8)=0) and
  193. ((ref.base=NR_R13) or
  194. (ref.index=NR_R13))
  195. ) or
  196. ((ref.offset<=0) and
  197. { when using NR_R11, it has always a value of <qword align>+4 }
  198. ((abs(ref.offset+4) mod 8)=0) and
  199. (current_procinfo.framepointer=NR_R11) and
  200. ((ref.base=NR_R11) or
  201. (ref.index=NR_R11))
  202. )
  203. );
  204. end;
  205. function instructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean;
  206. var
  207. p: taicpu;
  208. i: longint;
  209. begin
  210. instructionLoadsFromReg := false;
  211. if not (assigned(hp) and (hp.typ = ait_instruction)) then
  212. exit;
  213. p:=taicpu(hp);
  214. i:=1;
  215. {For these instructions we have to start on oper[0]}
  216. if (p.opcode in [A_STR, A_LDM, A_STM, A_PLD,
  217. A_CMP, A_CMN, A_TST, A_TEQ,
  218. A_B, A_BL, A_BX, A_BLX,
  219. A_SMLAL, A_UMLAL]) then i:=0;
  220. while(i<p.ops) do
  221. begin
  222. case p.oper[I]^.typ of
  223. top_reg:
  224. instructionLoadsFromReg := (p.oper[I]^.reg = reg) or
  225. { STRD }
  226. ((i=0) and (p.opcode=A_STR) and (p.oppostfix=PF_D) and (getsupreg(p.oper[0]^.reg)+1=getsupreg(reg)));
  227. top_regset:
  228. instructionLoadsFromReg := (getsupreg(reg) in p.oper[I]^.regset^);
  229. top_shifterop:
  230. instructionLoadsFromReg := p.oper[I]^.shifterop^.rs = reg;
  231. top_ref:
  232. instructionLoadsFromReg :=
  233. (p.oper[I]^.ref^.base = reg) or
  234. (p.oper[I]^.ref^.index = reg);
  235. end;
  236. if instructionLoadsFromReg then exit; {Bailout if we found something}
  237. Inc(I);
  238. end;
  239. end;
  240. function isValidConstLoadStoreOffset(const aoffset: longint; const pf: TOpPostfix) : boolean;
  241. begin
  242. if current_settings.cputype in cpu_thumb2 then
  243. result := (aoffset<4096) and (aoffset>-256)
  244. else
  245. result := ((pf in [PF_None,PF_B]) and
  246. (abs(aoffset)<4096)) or
  247. (abs(aoffset)<256);
  248. end;
  249. function TCpuAsmOptimizer.RegUsedAfterInstruction(reg: Tregister; p: tai;
  250. var AllUsedRegs: TAllUsedRegs): Boolean;
  251. begin
  252. AllUsedRegs[getregtype(reg)].Update(tai(p.Next),true);
  253. RegUsedAfterInstruction :=
  254. AllUsedRegs[getregtype(reg)].IsUsed(reg) and
  255. not(regLoadedWithNewValue(reg,p)) and
  256. (
  257. not(GetNextInstruction(p,p)) or
  258. instructionLoadsFromReg(reg,p) or
  259. not(regLoadedWithNewValue(reg,p))
  260. );
  261. end;
  262. function TCpuAsmOptimizer.GetNextInstructionUsingReg(Current: tai;
  263. var Next: tai; reg: TRegister): Boolean;
  264. begin
  265. Next:=Current;
  266. repeat
  267. Result:=GetNextInstruction(Next,Next);
  268. until not(Result) or (Next.typ<>ait_instruction) or (RegInInstruction(reg,Next)) or
  269. (is_calljmp(taicpu(Next).opcode)) or (RegInInstruction(NR_PC,Next));
  270. end;
  271. {$ifdef DEBUG_AOPTCPU}
  272. procedure TCpuAsmOptimizer.DebugMsg(const s: string;p : tai);
  273. begin
  274. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  275. end;
  276. {$else DEBUG_AOPTCPU}
  277. procedure TCpuAsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  278. begin
  279. end;
  280. {$endif DEBUG_AOPTCPU}
  281. procedure TCpuAsmOptimizer.RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string);
  282. var
  283. alloc,
  284. dealloc : tai_regalloc;
  285. hp1 : tai;
  286. begin
  287. if MatchInstruction(movp, A_MOV, [taicpu(p).condition], [PF_None]) and
  288. (taicpu(movp).ops=2) and {We can't optimize if there is a shiftop}
  289. MatchOperand(taicpu(movp).oper[1]^, taicpu(p).oper[0]^.reg) and
  290. { don't mess with moves to pc }
  291. (taicpu(movp).oper[0]^.reg<>NR_PC) and
  292. { don't mess with moves to lr }
  293. (taicpu(movp).oper[0]^.reg<>NR_R14) and
  294. { the destination register of the mov might not be used beween p and movp }
  295. not(RegUsedBetween(taicpu(movp).oper[0]^.reg,p,movp)) and
  296. { cb[n]z are thumb instructions which require specific registers, with no wide forms }
  297. (taicpu(p).opcode<>A_CBZ) and
  298. (taicpu(p).opcode<>A_CBNZ) and
  299. {There is a special requirement for MUL and MLA, oper[0] and oper[1] are not allowed to be the same}
  300. not (
  301. (taicpu(p).opcode in [A_MLA, A_MUL]) and
  302. (taicpu(p).oper[1]^.reg = taicpu(movp).oper[0]^.reg)
  303. ) and
  304. { Take care to only do this for instructions which REALLY load to the first register.
  305. Otherwise
  306. str reg0, [reg1]
  307. mov reg2, reg0
  308. will be optimized to
  309. str reg2, [reg1]
  310. }
  311. regLoadedWithNewValue(taicpu(p).oper[0]^.reg, p) then
  312. begin
  313. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(movp.Next));
  314. if assigned(dealloc) then
  315. begin
  316. DebugMsg('Peephole '+optimizer+' removed superfluous mov', movp);
  317. { taicpu(p).oper[0]^.reg is not used anymore, try to find its allocation
  318. and remove it if possible }
  319. GetLastInstruction(p,hp1);
  320. asml.Remove(dealloc);
  321. alloc:=FindRegAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next));
  322. if assigned(alloc) then
  323. begin
  324. asml.Remove(alloc);
  325. alloc.free;
  326. dealloc.free;
  327. end
  328. else
  329. asml.InsertAfter(dealloc,p);
  330. { try to move the allocation of the target register }
  331. GetLastInstruction(movp,hp1);
  332. alloc:=FindRegAlloc(taicpu(movp).oper[0]^.reg,tai(hp1.Next));
  333. if assigned(alloc) then
  334. begin
  335. asml.Remove(alloc);
  336. asml.InsertBefore(alloc,p);
  337. { adjust used regs }
  338. IncludeRegInUsedRegs(taicpu(movp).oper[0]^.reg,UsedRegs);
  339. end;
  340. { finally get rid of the mov }
  341. taicpu(p).loadreg(0,taicpu(movp).oper[0]^.reg);
  342. asml.remove(movp);
  343. movp.free;
  344. end;
  345. end;
  346. end;
  347. {
  348. optimize
  349. ldr/str regX,[reg1]
  350. ...
  351. add/sub reg1,reg1,regY/const
  352. into
  353. ldr/str regX,[reg1], regY/const
  354. }
  355. function TCpuAsmOptimizer.LookForPostindexedPattern(p: taicpu) : boolean;
  356. var
  357. hp1 : tai;
  358. begin
  359. Result:=false;
  360. if (p.oper[1]^.ref^.addressmode=AM_OFFSET) and
  361. (p.oper[1]^.ref^.index=NR_NO) and
  362. (p.oper[1]^.ref^.offset=0) and
  363. GetNextInstructionUsingReg(p, hp1, p.oper[1]^.ref^.base) and
  364. { we cannot check NR_DEFAULTFLAGS for modification yet so don't allow a condition }
  365. MatchInstruction(hp1, [A_ADD, A_SUB], [C_None], [PF_None]) and
  366. (taicpu(hp1).oper[0]^.reg=p.oper[1]^.ref^.base) and
  367. (taicpu(hp1).oper[1]^.reg=p.oper[1]^.ref^.base) and
  368. (
  369. (taicpu(hp1).oper[2]^.typ=top_reg) or
  370. { valid offset? }
  371. ((taicpu(hp1).oper[2]^.typ=top_const) and
  372. ((abs(taicpu(hp1).oper[2]^.val)<256) or
  373. ((abs(taicpu(hp1).oper[2]^.val)<4096) and (p.oppostfix in [PF_None,PF_B]))
  374. )
  375. )
  376. ) and
  377. { don't apply the optimization if the base register is loaded }
  378. (p.oper[0]^.reg<>p.oper[1]^.ref^.base) and
  379. not(RegModifiedBetween(taicpu(hp1).oper[0]^.reg,p,hp1)) and
  380. { don't apply the optimization if the (new) index register is loaded }
  381. (p.oper[0]^.reg<>taicpu(hp1).oper[2]^.reg) and
  382. not(RegModifiedBetween(taicpu(hp1).oper[2]^.reg,p,hp1)) then
  383. begin
  384. DebugMsg('Peephole Str/LdrAdd/Sub2Str/Ldr Postindex done', p);
  385. p.oper[1]^.ref^.addressmode:=AM_POSTINDEXED;
  386. if taicpu(hp1).oper[2]^.typ=top_const then
  387. begin
  388. if taicpu(hp1).opcode=A_ADD then
  389. p.oper[1]^.ref^.offset:=taicpu(hp1).oper[2]^.val
  390. else
  391. p.oper[1]^.ref^.offset:=-taicpu(hp1).oper[2]^.val;
  392. end
  393. else
  394. begin
  395. p.oper[1]^.ref^.index:=taicpu(hp1).oper[2]^.reg;
  396. if taicpu(hp1).opcode=A_ADD then
  397. p.oper[1]^.ref^.signindex:=1
  398. else
  399. p.oper[1]^.ref^.signindex:=-1;
  400. end;
  401. asml.Remove(hp1);
  402. hp1.Free;
  403. Result:=true;
  404. end;
  405. end;
  406. function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
  407. var
  408. hp1,hp2: tai;
  409. i, i2: longint;
  410. TmpUsedRegs: TAllUsedRegs;
  411. tempop: tasmop;
  412. function IsPowerOf2(const value: DWord): boolean; inline;
  413. begin
  414. Result:=(value and (value - 1)) = 0;
  415. end;
  416. begin
  417. result := false;
  418. case p.typ of
  419. ait_instruction:
  420. begin
  421. {
  422. change
  423. <op> reg,x,y
  424. cmp reg,#0
  425. into
  426. <op>s reg,x,y
  427. }
  428. { this optimization can applied only to the currently enabled operations because
  429. the other operations do not update all flags and FPC does not track flag usage }
  430. if MatchInstruction(p, [A_ADC,A_ADD,A_BIC,A_SUB,A_MUL,A_MVN,A_MOV,A_ORR,A_EOR,A_AND,
  431. A_RSB,A_RSC,A_SBC,A_MLA], [C_None], [PF_None]) and
  432. GetNextInstruction(p, hp1) and
  433. MatchInstruction(hp1, A_CMP, [C_None], [PF_None]) and
  434. (taicpu(hp1).oper[1]^.typ = top_const) and
  435. (taicpu(p).oper[0]^.reg = taicpu(hp1).oper[0]^.reg) and
  436. (taicpu(hp1).oper[1]^.val = 0) and
  437. GetNextInstruction(hp1, hp2) and
  438. { be careful here, following instructions could use other flags
  439. however after a jump fpc never depends on the value of flags }
  440. { All above instructions set Z and N according to the following
  441. Z := result = 0;
  442. N := result[31];
  443. EQ = Z=1; NE = Z=0;
  444. MI = N=1; PL = N=0; }
  445. MatchInstruction(hp2, A_B, [C_EQ,C_NE,C_MI,C_PL], []) and
  446. assigned(FindRegDealloc(NR_DEFAULTFLAGS,tai(hp2.Next))) then
  447. begin
  448. DebugMsg('Peephole OpCmp2OpS done', p);
  449. taicpu(p).oppostfix:=PF_S;
  450. { move flag allocation if possible }
  451. GetLastInstruction(hp1, hp2);
  452. hp2:=FindRegAlloc(NR_DEFAULTFLAGS,tai(hp2.Next));
  453. if assigned(hp2) then
  454. begin
  455. asml.Remove(hp2);
  456. asml.insertbefore(hp2, p);
  457. end;
  458. asml.remove(hp1);
  459. hp1.free;
  460. end
  461. else
  462. case taicpu(p).opcode of
  463. A_STR:
  464. begin
  465. { change
  466. str reg1,ref
  467. ldr reg2,ref
  468. into
  469. str reg1,ref
  470. mov reg2,reg1
  471. }
  472. if (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
  473. (taicpu(p).oppostfix=PF_None) and
  474. GetNextInstruction(p,hp1) and
  475. MatchInstruction(hp1, A_LDR, [taicpu(p).condition, C_None], [PF_None]) and
  476. RefsEqual(taicpu(p).oper[1]^.ref^,taicpu(hp1).oper[1]^.ref^) and
  477. (taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) then
  478. begin
  479. if taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg then
  480. begin
  481. DebugMsg('Peephole StrLdr2StrMov 1 done', hp1);
  482. asml.remove(hp1);
  483. hp1.free;
  484. end
  485. else
  486. begin
  487. taicpu(hp1).opcode:=A_MOV;
  488. taicpu(hp1).oppostfix:=PF_None;
  489. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  490. DebugMsg('Peephole StrLdr2StrMov 2 done', hp1);
  491. end;
  492. result := true;
  493. end
  494. { change
  495. str reg1,ref
  496. str reg2,ref
  497. into
  498. strd reg1,ref
  499. }
  500. else if (CPUARM_HAS_EDSP in cpu_capabilities[current_settings.cputype]) and
  501. (taicpu(p).oppostfix=PF_None) and
  502. (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
  503. GetNextInstruction(p,hp1) and
  504. MatchInstruction(hp1, A_STR, [taicpu(p).condition, C_None], [PF_None]) and
  505. not(odd(getsupreg(taicpu(p).oper[0]^.reg))) and
  506. (getsupreg(taicpu(p).oper[0]^.reg)+1=getsupreg(taicpu(hp1).oper[0]^.reg)) and
  507. { str ensures that either base or index contain no register, else ldr wouldn't
  508. use an offset either
  509. }
  510. (taicpu(p).oper[1]^.ref^.base=taicpu(hp1).oper[1]^.ref^.base) and
  511. (taicpu(p).oper[1]^.ref^.index=taicpu(hp1).oper[1]^.ref^.index) and
  512. (taicpu(p).oper[1]^.ref^.offset+4=taicpu(hp1).oper[1]^.ref^.offset) and
  513. (abs(taicpu(p).oper[1]^.ref^.offset)<256) and
  514. AlignedToQWord(taicpu(p).oper[1]^.ref^) then
  515. begin
  516. DebugMsg('Peephole StrStr2Strd done', p);
  517. taicpu(p).oppostfix:=PF_D;
  518. asml.remove(hp1);
  519. hp1.free;
  520. end;
  521. LookForPostindexedPattern(taicpu(p));
  522. end;
  523. A_LDR:
  524. begin
  525. { change
  526. ldr reg1,ref
  527. ldr reg2,ref
  528. into ...
  529. }
  530. if (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
  531. GetNextInstruction(p,hp1) and
  532. { ldrd is not allowed here }
  533. MatchInstruction(hp1, A_LDR, [taicpu(p).condition, C_None], [taicpu(p).oppostfix,PF_None]-[PF_D]) then
  534. begin
  535. {
  536. ...
  537. ldr reg1,ref
  538. mov reg2,reg1
  539. }
  540. if RefsEqual(taicpu(p).oper[1]^.ref^,taicpu(hp1).oper[1]^.ref^) and
  541. (taicpu(p).oper[0]^.reg<>taicpu(hp1).oper[1]^.ref^.index) and
  542. (taicpu(p).oper[0]^.reg<>taicpu(hp1).oper[1]^.ref^.base) and
  543. (taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) then
  544. begin
  545. if taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg then
  546. begin
  547. DebugMsg('Peephole LdrLdr2Ldr done', hp1);
  548. asml.remove(hp1);
  549. hp1.free;
  550. end
  551. else
  552. begin
  553. DebugMsg('Peephole LdrLdr2LdrMov done', hp1);
  554. taicpu(hp1).opcode:=A_MOV;
  555. taicpu(hp1).oppostfix:=PF_None;
  556. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  557. end;
  558. result := true;
  559. end
  560. {
  561. ...
  562. ldrd reg1,ref
  563. }
  564. else if (CPUARM_HAS_EDSP in cpu_capabilities[current_settings.cputype]) and
  565. { ldrd does not allow any postfixes ... }
  566. (taicpu(p).oppostfix=PF_None) and
  567. not(odd(getsupreg(taicpu(p).oper[0]^.reg))) and
  568. (getsupreg(taicpu(p).oper[0]^.reg)+1=getsupreg(taicpu(hp1).oper[0]^.reg)) and
  569. { ldr ensures that either base or index contain no register, else ldr wouldn't
  570. use an offset either
  571. }
  572. (taicpu(p).oper[1]^.ref^.base=taicpu(hp1).oper[1]^.ref^.base) and
  573. (taicpu(p).oper[1]^.ref^.index=taicpu(hp1).oper[1]^.ref^.index) and
  574. (taicpu(p).oper[1]^.ref^.offset+4=taicpu(hp1).oper[1]^.ref^.offset) and
  575. (abs(taicpu(p).oper[1]^.ref^.offset)<256) and
  576. AlignedToQWord(taicpu(p).oper[1]^.ref^) then
  577. begin
  578. DebugMsg('Peephole LdrLdr2Ldrd done', p);
  579. taicpu(p).oppostfix:=PF_D;
  580. asml.remove(hp1);
  581. hp1.free;
  582. end;
  583. end;
  584. LookForPostindexedPattern(taicpu(p));
  585. { Remove superfluous mov after ldr
  586. changes
  587. ldr reg1, ref
  588. mov reg2, reg1
  589. to
  590. ldr reg2, ref
  591. conditions are:
  592. * no ldrd usage
  593. * reg1 must be released after mov
  594. * mov can not contain shifterops
  595. * ldr+mov have the same conditions
  596. * mov does not set flags
  597. }
  598. if (taicpu(p).oppostfix<>PF_D) and GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) then
  599. RemoveSuperfluousMove(p, hp1, 'LdrMov2Ldr');
  600. end;
  601. A_MOV:
  602. begin
  603. { fold
  604. mov reg1,reg0, shift imm1
  605. mov reg1,reg1, shift imm2
  606. }
  607. if (taicpu(p).ops=3) and
  608. (taicpu(p).oper[2]^.typ = top_shifterop) and
  609. (taicpu(p).oper[2]^.shifterop^.rs = NR_NO) and
  610. getnextinstruction(p,hp1) and
  611. MatchInstruction(hp1, A_MOV, [taicpu(p).condition], [PF_None]) and
  612. (taicpu(hp1).ops=3) and
  613. MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^.reg) and
  614. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  615. (taicpu(hp1).oper[2]^.typ = top_shifterop) and
  616. (taicpu(hp1).oper[2]^.shifterop^.rs = NR_NO) then
  617. begin
  618. { fold
  619. mov reg1,reg0, lsl 16
  620. mov reg1,reg1, lsr 16
  621. strh reg1, ...
  622. dealloc reg1
  623. to
  624. strh reg1, ...
  625. dealloc reg1
  626. }
  627. if (taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  628. (taicpu(p).oper[2]^.shifterop^.shiftimm=16) and
  629. (taicpu(hp1).oper[2]^.shifterop^.shiftmode in [SM_LSR,SM_ASR]) and
  630. (taicpu(hp1).oper[2]^.shifterop^.shiftimm=16) and
  631. getnextinstruction(hp1,hp2) and
  632. MatchInstruction(hp2, A_STR, [taicpu(p).condition], [PF_H]) and
  633. MatchOperand(taicpu(hp2).oper[0]^, taicpu(p).oper[0]^.reg) then
  634. begin
  635. CopyUsedRegs(TmpUsedRegs);
  636. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  637. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  638. if not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg,hp2,TmpUsedRegs)) then
  639. begin
  640. DebugMsg('Peephole optimizer removed superfluous 16 Bit zero extension', hp1);
  641. taicpu(hp2).loadreg(0,taicpu(p).oper[1]^.reg);
  642. asml.remove(p);
  643. asml.remove(hp1);
  644. p.free;
  645. hp1.free;
  646. p:=hp2;
  647. end;
  648. ReleaseUsedRegs(TmpUsedRegs);
  649. end
  650. { fold
  651. mov reg1,reg0, shift imm1
  652. mov reg1,reg1, shift imm2
  653. to
  654. mov reg1,reg0, shift imm1+imm2
  655. }
  656. else if (taicpu(p).oper[2]^.shifterop^.shiftmode=taicpu(hp1).oper[2]^.shifterop^.shiftmode) or
  657. { asr makes no use after a lsr, the asr can be foled into the lsr }
  658. ((taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSR) and (taicpu(hp1).oper[2]^.shifterop^.shiftmode=SM_ASR) ) then
  659. begin
  660. inc(taicpu(p).oper[2]^.shifterop^.shiftimm,taicpu(hp1).oper[2]^.shifterop^.shiftimm);
  661. { avoid overflows }
  662. if taicpu(p).oper[2]^.shifterop^.shiftimm>31 then
  663. case taicpu(p).oper[2]^.shifterop^.shiftmode of
  664. SM_ROR:
  665. taicpu(p).oper[2]^.shifterop^.shiftimm:=taicpu(p).oper[2]^.shifterop^.shiftimm and 31;
  666. SM_ASR:
  667. taicpu(p).oper[2]^.shifterop^.shiftimm:=31;
  668. SM_LSR,
  669. SM_LSL:
  670. begin
  671. hp1:=taicpu.op_reg_const(A_MOV,taicpu(p).oper[0]^.reg,0);
  672. InsertLLItem(p.previous, p.next, hp1);
  673. p.free;
  674. p:=hp1;
  675. end;
  676. else
  677. internalerror(2008072803);
  678. end;
  679. DebugMsg('Peephole ShiftShift2Shift 1 done', p);
  680. asml.remove(hp1);
  681. hp1.free;
  682. result := true;
  683. end
  684. { fold
  685. mov reg1,reg0, shift imm1
  686. mov reg1,reg1, shift imm2
  687. mov reg1,reg1, shift imm3 ...
  688. }
  689. else if getnextinstruction(hp1,hp2) and
  690. MatchInstruction(hp2, A_MOV, [taicpu(p).condition], [PF_None]) and
  691. (taicpu(hp2).ops=3) and
  692. MatchOperand(taicpu(hp2).oper[0]^, taicpu(hp1).oper[0]^.reg) and
  693. MatchOperand(taicpu(hp2).oper[1]^, taicpu(hp1).oper[0]^.reg) and
  694. (taicpu(hp2).oper[2]^.typ = top_shifterop) and
  695. (taicpu(hp2).oper[2]^.shifterop^.rs = NR_NO) then
  696. begin
  697. { mov reg1,reg0, lsl imm1
  698. mov reg1,reg1, lsr/asr imm2
  699. mov reg1,reg1, lsl imm3 ...
  700. if imm3<=imm1 and imm2>=imm3
  701. to
  702. mov reg1,reg0, lsl imm1
  703. mov reg1,reg1, lsr/asr imm2-imm3
  704. }
  705. if (taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSL) and (taicpu(hp2).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  706. (taicpu(hp1).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and
  707. (taicpu(hp2).oper[2]^.shifterop^.shiftimm<=taicpu(p).oper[2]^.shifterop^.shiftimm) and
  708. (taicpu(hp1).oper[2]^.shifterop^.shiftimm>=taicpu(hp2).oper[2]^.shifterop^.shiftimm) then
  709. begin
  710. dec(taicpu(hp1).oper[2]^.shifterop^.shiftimm,taicpu(hp2).oper[2]^.shifterop^.shiftimm);
  711. DebugMsg('Peephole ShiftShiftShift2ShiftShift 1 done', p);
  712. asml.remove(hp2);
  713. hp2.free;
  714. result := true;
  715. if taicpu(hp1).oper[2]^.shifterop^.shiftimm=0 then
  716. begin
  717. asml.remove(hp1);
  718. hp1.free;
  719. end;
  720. end
  721. { mov reg1,reg0, lsr/asr imm1
  722. mov reg1,reg1, lsl imm2
  723. mov reg1,reg1, lsr/asr imm3 ...
  724. if imm3>=imm1 and imm2>=imm1
  725. to
  726. mov reg1,reg0, lsl imm2-imm1
  727. mov reg1,reg1, lsr/asr imm3 ...
  728. }
  729. else if (taicpu(p).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and (taicpu(hp2).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and
  730. (taicpu(hp1).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  731. (taicpu(hp2).oper[2]^.shifterop^.shiftimm>=taicpu(p).oper[2]^.shifterop^.shiftimm) and
  732. (taicpu(hp1).oper[2]^.shifterop^.shiftimm>=taicpu(p).oper[2]^.shifterop^.shiftimm) then
  733. begin
  734. dec(taicpu(hp1).oper[2]^.shifterop^.shiftimm,taicpu(p).oper[2]^.shifterop^.shiftimm);
  735. taicpu(hp1).oper[1]^.reg:=taicpu(p).oper[1]^.reg;
  736. DebugMsg('Peephole ShiftShiftShift2ShiftShift 2 done', p);
  737. asml.remove(p);
  738. p.free;
  739. p:=hp2;
  740. if taicpu(hp1).oper[2]^.shifterop^.shiftimm=0 then
  741. begin
  742. taicpu(hp2).oper[1]^.reg:=taicpu(hp1).oper[1]^.reg;
  743. asml.remove(hp1);
  744. hp1.free;
  745. p:=hp2;
  746. end;
  747. result := true;
  748. end;
  749. end;
  750. end;
  751. { Change the common
  752. mov r0, r0, lsr #24
  753. and r0, r0, #255
  754. and remove the superfluous and
  755. This could be extended to handle more cases.
  756. }
  757. if (taicpu(p).ops=3) and
  758. (taicpu(p).oper[2]^.typ = top_shifterop) and
  759. (taicpu(p).oper[2]^.shifterop^.rs = NR_NO) and
  760. (taicpu(p).oper[2]^.shifterop^.shiftmode = SM_LSR) and
  761. (taicpu(p).oper[2]^.shifterop^.shiftimm >= 24 ) and
  762. getnextinstruction(p,hp1) and
  763. MatchInstruction(hp1, A_AND, [taicpu(p).condition], [taicpu(p).oppostfix]) and
  764. (taicpu(hp1).ops=3) and
  765. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) and
  766. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[1]^) and
  767. (taicpu(hp1).oper[2]^.typ = top_const) and
  768. { Check if the AND actually would only mask out bits beeing already zero because of the shift
  769. For LSR #25 and an AndConst of 255 that whould go like this:
  770. 255 and ((2 shl (32-25))-1)
  771. which results in 127, which is one less a power-of-2, meaning all lower bits are set.
  772. LSR #25 and AndConst of 254:
  773. 254 and ((2 shl (32-25))-1) = 126 -> lowest bit is clear, so we can't remove it.
  774. }
  775. ispowerof2((taicpu(hp1).oper[2]^.val and ((2 shl (32-taicpu(p).oper[2]^.shifterop^.shiftimm))-1))+1) then
  776. begin
  777. DebugMsg('Peephole LsrAnd2Lsr done', hp1);
  778. asml.remove(hp1);
  779. hp1.free;
  780. end;
  781. {
  782. optimize
  783. mov rX, yyyy
  784. ....
  785. }
  786. if (taicpu(p).ops = 2) and
  787. GetNextInstruction(p,hp1) and
  788. (tai(hp1).typ = ait_instruction) then
  789. begin
  790. {
  791. This changes the very common
  792. mov r0, #0
  793. str r0, [...]
  794. mov r0, #0
  795. str r0, [...]
  796. and removes all superfluous mov instructions
  797. }
  798. if (taicpu(p).oper[1]^.typ = top_const) and
  799. (taicpu(hp1).opcode=A_STR) then
  800. while MatchInstruction(hp1, A_STR, [taicpu(p).condition], []) and
  801. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) and
  802. GetNextInstruction(hp1, hp2) and
  803. MatchInstruction(hp2, A_MOV, [taicpu(p).condition], [PF_None]) and
  804. (taicpu(hp2).ops = 2) and
  805. MatchOperand(taicpu(hp2).oper[0]^, taicpu(p).oper[0]^) and
  806. MatchOperand(taicpu(hp2).oper[1]^, taicpu(p).oper[1]^) do
  807. begin
  808. DebugMsg('Peephole MovStrMov done', hp2);
  809. GetNextInstruction(hp2,hp1);
  810. asml.remove(hp2);
  811. hp2.free;
  812. if not assigned(hp1) then break;
  813. end
  814. {
  815. This removes the first mov from
  816. mov rX,...
  817. mov rX,...
  818. }
  819. else if taicpu(hp1).opcode=A_MOV then
  820. while MatchInstruction(hp1, A_MOV, [taicpu(p).condition], [taicpu(p).oppostfix]) and
  821. (taicpu(hp1).ops = 2) and
  822. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) and
  823. { don't remove the first mov if the second is a mov rX,rX }
  824. not(MatchOperand(taicpu(hp1).oper[0]^, taicpu(hp1).oper[1]^)) do
  825. begin
  826. DebugMsg('Peephole MovMov done', p);
  827. asml.remove(p);
  828. p.free;
  829. p:=hp1;
  830. GetNextInstruction(hp1,hp1);
  831. if not assigned(hp1) then
  832. break;
  833. end;
  834. end;
  835. {
  836. change
  837. mov r1, r0
  838. add r1, r1, #1
  839. to
  840. add r1, r0, #1
  841. Todo: Make it work for mov+cmp too
  842. CAUTION! If this one is successful p might not be a mov instruction anymore!
  843. }
  844. if (taicpu(p).ops = 2) and
  845. (taicpu(p).oper[1]^.typ = top_reg) and
  846. (taicpu(p).oppostfix = PF_NONE) and
  847. GetNextInstruction(p, hp1) and
  848. MatchInstruction(hp1, [A_ADD, A_ADC, A_RSB, A_RSC, A_SUB, A_SBC,
  849. A_AND, A_BIC, A_EOR, A_ORR, A_MOV, A_MVN],
  850. [taicpu(p).condition], []) and
  851. {MOV and MVN might only have 2 ops}
  852. (taicpu(hp1).ops = 3) and
  853. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^.reg) and
  854. (taicpu(hp1).oper[1]^.typ = top_reg) and
  855. (taicpu(hp1).oper[2]^.typ in [top_reg, top_const, top_shifterop]) then
  856. begin
  857. { When we get here we still don't know if the registers match}
  858. for I:=1 to 2 do
  859. {
  860. If the first loop was successful p will be replaced with hp1.
  861. The checks will still be ok, because all required information
  862. will also be in hp1 then.
  863. }
  864. if MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[I]^.reg) then
  865. begin
  866. DebugMsg('Peephole RedundantMovProcess done', hp1);
  867. taicpu(hp1).oper[I]^.reg := taicpu(p).oper[1]^.reg;
  868. if p<>hp1 then
  869. begin
  870. asml.remove(p);
  871. p.free;
  872. p:=hp1;
  873. end;
  874. end;
  875. end;
  876. { This folds shifterops into following instructions
  877. mov r0, r1, lsl #8
  878. add r2, r3, r0
  879. to
  880. add r2, r3, r1, lsl #8
  881. CAUTION! If this one is successful p might not be a mov instruction anymore!
  882. }
  883. if (taicpu(p).opcode = A_MOV) and
  884. (taicpu(p).ops = 3) and
  885. (taicpu(p).oper[1]^.typ = top_reg) and
  886. (taicpu(p).oper[2]^.typ = top_shifterop) and
  887. (taicpu(p).oppostfix = PF_NONE) and
  888. GetNextInstruction(p, hp1) and
  889. MatchInstruction(hp1, [A_ADD, A_ADC, A_RSB, A_RSC, A_SUB, A_SBC,
  890. A_AND, A_BIC, A_EOR, A_ORR, A_TEQ, A_TST,
  891. A_CMP, A_CMN],
  892. [taicpu(p).condition], [PF_None]) and
  893. (taicpu(hp1).ops >= 2) and {Currently we can't fold into another shifterop}
  894. (taicpu(hp1).oper[taicpu(hp1).ops-1]^.typ = top_reg) and
  895. (
  896. {Only ONE of the two src operands is allowed to match}
  897. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[taicpu(hp1).ops-2]^) xor
  898. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[taicpu(hp1).ops-1]^)
  899. ) then
  900. begin
  901. CopyUsedRegs(TmpUsedRegs);
  902. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  903. if taicpu(hp1).opcode in [A_TST, A_TEQ, A_CMN] then
  904. I2:=0
  905. else
  906. I2:=1;
  907. if not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg,hp1,TmpUsedRegs)) then
  908. for I:=I2 to taicpu(hp1).ops-1 do
  909. if MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[I]^.reg) then
  910. begin
  911. { If the parameter matched on the second op from the RIGHT
  912. we have to switch the parameters, this will not happen for CMP
  913. were we're only evaluating the most right parameter
  914. }
  915. if I <> taicpu(hp1).ops-1 then
  916. begin
  917. {The SUB operators need to be changed when we swap parameters}
  918. case taicpu(hp1).opcode of
  919. A_SUB: tempop:=A_RSB;
  920. A_SBC: tempop:=A_RSC;
  921. A_RSB: tempop:=A_SUB;
  922. A_RSC: tempop:=A_SBC;
  923. else tempop:=taicpu(hp1).opcode;
  924. end;
  925. if taicpu(hp1).ops = 3 then
  926. hp2:=taicpu.op_reg_reg_reg_shifterop(tempop,
  927. taicpu(hp1).oper[0]^.reg, taicpu(hp1).oper[2]^.reg,
  928. taicpu(p).oper[1]^.reg, taicpu(p).oper[2]^.shifterop^)
  929. else
  930. hp2:=taicpu.op_reg_reg_shifterop(tempop,
  931. taicpu(hp1).oper[0]^.reg, taicpu(p).oper[1]^.reg,
  932. taicpu(p).oper[2]^.shifterop^);
  933. end
  934. else
  935. if taicpu(hp1).ops = 3 then
  936. hp2:=taicpu.op_reg_reg_reg_shifterop(taicpu(hp1).opcode,
  937. taicpu(hp1).oper[0]^.reg, taicpu(hp1).oper[1]^.reg,
  938. taicpu(p).oper[1]^.reg, taicpu(p).oper[2]^.shifterop^)
  939. else
  940. hp2:=taicpu.op_reg_reg_shifterop(taicpu(hp1).opcode,
  941. taicpu(hp1).oper[0]^.reg, taicpu(p).oper[1]^.reg,
  942. taicpu(p).oper[2]^.shifterop^);
  943. asml.insertbefore(hp2, p);
  944. asml.remove(p);
  945. asml.remove(hp1);
  946. p.free;
  947. hp1.free;
  948. p:=hp2;
  949. GetNextInstruction(p,hp1);
  950. DebugMsg('Peephole FoldShiftProcess done', p);
  951. break;
  952. end;
  953. ReleaseUsedRegs(TmpUsedRegs);
  954. end;
  955. {
  956. Often we see shifts and then a superfluous mov to another register
  957. In the future this might be handled in RedundantMovProcess when it uses RegisterTracking
  958. }
  959. if (taicpu(p).opcode = A_MOV) and
  960. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) then
  961. RemoveSuperfluousMove(p, hp1, 'MovMov2Mov');
  962. end;
  963. A_ADD,
  964. A_ADC,
  965. A_RSB,
  966. A_RSC,
  967. A_SUB,
  968. A_SBC,
  969. A_AND,
  970. A_BIC,
  971. A_EOR,
  972. A_ORR,
  973. A_MLA,
  974. A_MUL:
  975. begin
  976. {
  977. optimize
  978. and reg2,reg1,const1
  979. ...
  980. }
  981. if (taicpu(p).opcode = A_AND) and
  982. (taicpu(p).oper[1]^.typ = top_reg) and
  983. (taicpu(p).oper[2]^.typ = top_const) then
  984. begin
  985. {
  986. change
  987. and reg2,reg1,const1
  988. and reg3,reg2,const2
  989. to
  990. and reg3,reg1,(const1 and const2)
  991. }
  992. if GetNextInstruction(p, hp1) and
  993. MatchInstruction(hp1, A_AND, [taicpu(p).condition], [PF_None]) and
  994. { either reg3 and reg2 are equal or reg2 is deallocated after the and }
  995. (MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^.reg) or
  996. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next)))) and
  997. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  998. (taicpu(hp1).oper[2]^.typ = top_const) then
  999. begin
  1000. DebugMsg('Peephole AndAnd2And done', p);
  1001. taicpu(p).loadConst(2,taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val);
  1002. taicpu(p).oppostfix:=taicpu(hp1).oppostfix;
  1003. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  1004. asml.remove(hp1);
  1005. hp1.free;
  1006. end
  1007. {
  1008. change
  1009. and reg2,reg1,255
  1010. strb reg2,[...]
  1011. dealloc reg2
  1012. to
  1013. strb reg1,[...]
  1014. }
  1015. else if (taicpu(p).oper[2]^.val = 255) and
  1016. MatchInstruction(p, A_AND, [C_None], [PF_None]) and
  1017. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1018. MatchInstruction(hp1, A_STR, [C_None], [PF_B]) and
  1019. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
  1020. { the reference in strb might not use reg2 }
  1021. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  1022. { reg1 might not be modified inbetween }
  1023. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1024. begin
  1025. DebugMsg('Peephole AndStrb2Strb done', p);
  1026. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  1027. asml.remove(p);
  1028. p.free;
  1029. p:=hp1;
  1030. end;
  1031. end;
  1032. {
  1033. change
  1034. add/sub reg2,reg1,const1
  1035. str/ldr reg3,[reg2,const2]
  1036. dealloc reg2
  1037. to
  1038. str/ldr reg3,[reg1,const2+/-const1]
  1039. }
  1040. if (taicpu(p).opcode in [A_ADD,A_SUB]) and
  1041. (taicpu(p).oper[1]^.typ = top_reg) and
  1042. (taicpu(p).oper[2]^.typ = top_const) then
  1043. begin
  1044. hp1:=p;
  1045. while GetNextInstructionUsingReg(hp1, hp1, taicpu(p).oper[0]^.reg) and
  1046. { we cannot check NR_DEFAULTFLAGS for modification yet so don't allow a condition }
  1047. MatchInstruction(hp1, [A_LDR, A_STR], [C_None], []) and
  1048. (taicpu(hp1).oper[1]^.ref^.base=taicpu(p).oper[0]^.reg) and
  1049. { don't optimize if the register is stored/overwritten }
  1050. (taicpu(hp1).oper[0]^.reg<>taicpu(p).oper[1]^.reg) and
  1051. (taicpu(hp1).oper[1]^.ref^.index=NR_NO) and
  1052. (taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) and
  1053. { new offset must be valid: either in the range of 8 or 12 bit, depend on the
  1054. ldr postfix }
  1055. (((taicpu(p).opcode=A_ADD) and
  1056. isValidConstLoadStoreOffset(taicpu(hp1).oper[1]^.ref^.offset+taicpu(p).oper[2]^.val, taicpu(hp1).oppostfix)
  1057. ) or
  1058. ((taicpu(p).opcode=A_SUB) and
  1059. isValidConstLoadStoreOffset(taicpu(hp1).oper[1]^.ref^.offset-taicpu(p).oper[2]^.val, taicpu(hp1).oppostfix)
  1060. )
  1061. ) do
  1062. begin
  1063. { neither reg1 nor reg2 might be changed inbetween }
  1064. if RegModifiedBetween(taicpu(p).oper[0]^.reg,p,hp1) or
  1065. RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1) then
  1066. break;
  1067. { reg2 must be either overwritten by the ldr or it is deallocated afterwards }
  1068. if ((taicpu(hp1).opcode=A_LDR) and (taicpu(p).oper[0]^.reg=taicpu(hp1).oper[0]^.reg)) or
  1069. assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) then
  1070. begin
  1071. { remember last instruction }
  1072. hp2:=hp1;
  1073. DebugMsg('Peephole Add/SubLdr2Ldr done', p);
  1074. hp1:=p;
  1075. { fix all ldr/str }
  1076. while GetNextInstructionUsingReg(hp1, hp1, taicpu(p).oper[0]^.reg) do
  1077. begin
  1078. taicpu(hp1).oper[1]^.ref^.base:=taicpu(p).oper[1]^.reg;
  1079. if taicpu(p).opcode=A_ADD then
  1080. inc(taicpu(hp1).oper[1]^.ref^.offset,taicpu(p).oper[2]^.val)
  1081. else
  1082. dec(taicpu(hp1).oper[1]^.ref^.offset,taicpu(p).oper[2]^.val);
  1083. if hp1=hp2 then
  1084. break;
  1085. end;
  1086. GetNextInstruction(p,hp1);
  1087. asml.remove(p);
  1088. p.free;
  1089. p:=hp1;
  1090. break;
  1091. end;
  1092. end;
  1093. end;
  1094. {
  1095. change
  1096. add reg1, ...
  1097. mov reg2, reg1
  1098. to
  1099. add reg2, ...
  1100. }
  1101. if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) then
  1102. RemoveSuperfluousMove(p, hp1, 'DataMov2Data');
  1103. end;
  1104. A_MVN:
  1105. begin
  1106. {
  1107. change
  1108. mvn reg2,reg1
  1109. and reg3,reg4,reg2
  1110. dealloc reg2
  1111. to
  1112. bic reg3,reg4,reg1
  1113. }
  1114. if (taicpu(p).oper[1]^.typ = top_reg) and
  1115. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1116. MatchInstruction(hp1,A_AND,[],[]) and
  1117. (((taicpu(hp1).ops=3) and
  1118. (taicpu(hp1).oper[2]^.typ=top_reg) and
  1119. (MatchOperand(taicpu(hp1).oper[2]^, taicpu(p).oper[0]^.reg) or
  1120. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg))) or
  1121. ((taicpu(hp1).ops=2) and
  1122. (taicpu(hp1).oper[1]^.typ=top_reg) and
  1123. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg))) and
  1124. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
  1125. { reg1 might not be modified inbetween }
  1126. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1127. begin
  1128. DebugMsg('Peephole MvnAnd2Bic done', p);
  1129. taicpu(hp1).opcode:=A_BIC;
  1130. if taicpu(hp1).ops=3 then
  1131. begin
  1132. if MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) then
  1133. taicpu(hp1).loadReg(1,taicpu(hp1).oper[2]^.reg); // Swap operands
  1134. taicpu(hp1).loadReg(2,taicpu(p).oper[1]^.reg);
  1135. end
  1136. else
  1137. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  1138. asml.remove(p);
  1139. p.free;
  1140. p:=hp1;
  1141. end;
  1142. end;
  1143. A_UXTB:
  1144. begin
  1145. {
  1146. change
  1147. uxtb reg2,reg1
  1148. strb reg2,[...]
  1149. dealloc reg2
  1150. to
  1151. strb reg1,[...]
  1152. }
  1153. if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
  1154. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1155. MatchInstruction(hp1, A_STR, [C_None], [PF_B]) and
  1156. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
  1157. { the reference in strb might not use reg2 }
  1158. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  1159. { reg1 might not be modified inbetween }
  1160. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1161. begin
  1162. DebugMsg('Peephole UxtbStrb2Strb done', p);
  1163. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  1164. asml.remove(p);
  1165. p.free;
  1166. p:=hp1;
  1167. end
  1168. {
  1169. change
  1170. uxtb reg2,reg1
  1171. uxth reg3,reg2
  1172. dealloc reg2
  1173. to
  1174. uxtb reg3,reg1
  1175. }
  1176. else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
  1177. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1178. MatchInstruction(hp1, A_UXTH, [C_None], [PF_None]) and
  1179. (assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) or
  1180. (taicpu(p).oper[0]^.reg = taicpu(hp1).oper[0]^.reg)) and
  1181. { reg1 might not be modified inbetween }
  1182. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1183. begin
  1184. DebugMsg('Peephole UxtbUxth2Uxtb done', p);
  1185. taicpu(hp1).opcode:=A_UXTB;
  1186. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  1187. asml.remove(p);
  1188. p.free;
  1189. p:=hp1;
  1190. end;
  1191. end;
  1192. A_UXTH:
  1193. begin
  1194. if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
  1195. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1196. MatchInstruction(hp1, A_STR, [C_None], [PF_H]) and
  1197. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
  1198. { the reference in strb might not use reg2 }
  1199. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  1200. { reg1 might not be modified inbetween }
  1201. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1202. begin
  1203. DebugMsg('Peephole UXTHStrh2Strh done', p);
  1204. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  1205. asml.remove(p);
  1206. p.free;
  1207. p:=hp1;
  1208. end;
  1209. end;
  1210. A_CMP:
  1211. begin
  1212. {
  1213. change
  1214. cmp reg,const1
  1215. moveq reg,const1
  1216. movne reg,const2
  1217. to
  1218. cmp reg,const1
  1219. movne reg,const2
  1220. }
  1221. if (taicpu(p).oper[1]^.typ = top_const) and
  1222. GetNextInstruction(p, hp1) and
  1223. MatchInstruction(hp1, A_MOV, [C_EQ, C_NE], [PF_NONE]) and
  1224. (taicpu(hp1).oper[1]^.typ = top_const) and
  1225. GetNextInstruction(hp1, hp2) and
  1226. MatchInstruction(hp2, A_MOV, [C_EQ, C_NE], [PF_NONE]) and
  1227. (taicpu(hp1).oper[1]^.typ = top_const) then
  1228. begin
  1229. RemoveRedundantMove(p, hp1, asml);
  1230. RemoveRedundantMove(p, hp2, asml);
  1231. end;
  1232. end;
  1233. end;
  1234. end;
  1235. end;
  1236. end;
  1237. { instructions modifying the CPSR can be only the last instruction }
  1238. function MustBeLast(p : tai) : boolean;
  1239. begin
  1240. Result:=(p.typ=ait_instruction) and
  1241. ((taicpu(p).opcode in [A_BL,A_BLX,A_CMP,A_CMN,A_SWI,A_TEQ,A_TST,A_CMF,A_CMFE {,A_MSR}]) or
  1242. ((taicpu(p).ops>=1) and (taicpu(p).oper[0]^.typ=top_reg) and (taicpu(p).oper[0]^.reg=NR_PC)) or
  1243. (taicpu(p).oppostfix=PF_S));
  1244. end;
  1245. procedure TCpuAsmOptimizer.PeepHoleOptPass2;
  1246. var
  1247. p,hp1,hp2: tai;
  1248. l : longint;
  1249. condition : tasmcond;
  1250. hp3: tai;
  1251. WasLast: boolean;
  1252. { UsedRegs, TmpUsedRegs: TRegSet; }
  1253. begin
  1254. p := BlockStart;
  1255. { UsedRegs := []; }
  1256. while (p <> BlockEnd) Do
  1257. begin
  1258. { UpdateUsedRegs(UsedRegs, tai(p.next)); }
  1259. case p.Typ Of
  1260. Ait_Instruction:
  1261. begin
  1262. case taicpu(p).opcode Of
  1263. A_B:
  1264. if taicpu(p).condition<>C_None then
  1265. begin
  1266. { check for
  1267. Bxx xxx
  1268. <several instructions>
  1269. xxx:
  1270. }
  1271. l:=0;
  1272. WasLast:=False;
  1273. GetNextInstruction(p, hp1);
  1274. while assigned(hp1) and
  1275. (l<=4) and
  1276. CanBeCond(hp1) and
  1277. { stop on labels }
  1278. not(hp1.typ=ait_label) do
  1279. begin
  1280. inc(l);
  1281. if MustBeLast(hp1) then
  1282. begin
  1283. WasLast:=True;
  1284. GetNextInstruction(hp1,hp1);
  1285. break;
  1286. end
  1287. else
  1288. GetNextInstruction(hp1,hp1);
  1289. end;
  1290. if assigned(hp1) then
  1291. begin
  1292. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  1293. begin
  1294. if (l<=4) and (l>0) then
  1295. begin
  1296. condition:=inverse_cond(taicpu(p).condition);
  1297. hp2:=p;
  1298. GetNextInstruction(p,hp1);
  1299. p:=hp1;
  1300. repeat
  1301. if hp1.typ=ait_instruction then
  1302. taicpu(hp1).condition:=condition;
  1303. if MustBeLast(hp1) then
  1304. begin
  1305. GetNextInstruction(hp1,hp1);
  1306. break;
  1307. end
  1308. else
  1309. GetNextInstruction(hp1,hp1);
  1310. until not(assigned(hp1)) or
  1311. not(CanBeCond(hp1)) or
  1312. (hp1.typ=ait_label);
  1313. { wait with removing else GetNextInstruction could
  1314. ignore the label if it was the only usage in the
  1315. jump moved away }
  1316. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  1317. asml.remove(hp2);
  1318. hp2.free;
  1319. continue;
  1320. end;
  1321. end
  1322. else
  1323. { do not perform further optimizations if there is inctructon
  1324. in block #1 which can not be optimized.
  1325. }
  1326. if not WasLast then
  1327. begin
  1328. { check further for
  1329. Bcc xxx
  1330. <several instructions 1>
  1331. B yyy
  1332. xxx:
  1333. <several instructions 2>
  1334. yyy:
  1335. }
  1336. { hp2 points to jmp yyy }
  1337. hp2:=hp1;
  1338. { skip hp1 to xxx }
  1339. GetNextInstruction(hp1, hp1);
  1340. if assigned(hp2) and
  1341. assigned(hp1) and
  1342. (l<=3) and
  1343. (hp2.typ=ait_instruction) and
  1344. (taicpu(hp2).is_jmp) and
  1345. (taicpu(hp2).condition=C_None) and
  1346. { real label and jump, no further references to the
  1347. label are allowed }
  1348. (tasmlabel(taicpu(p).oper[0]^.ref^.symbol).getrefs=2) and
  1349. FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  1350. begin
  1351. l:=0;
  1352. { skip hp1 to <several moves 2> }
  1353. GetNextInstruction(hp1, hp1);
  1354. while assigned(hp1) and
  1355. CanBeCond(hp1) do
  1356. begin
  1357. inc(l);
  1358. GetNextInstruction(hp1, hp1);
  1359. end;
  1360. { hp1 points to yyy: }
  1361. if assigned(hp1) and
  1362. FindLabel(tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol),hp1) then
  1363. begin
  1364. condition:=inverse_cond(taicpu(p).condition);
  1365. GetNextInstruction(p,hp1);
  1366. hp3:=p;
  1367. p:=hp1;
  1368. repeat
  1369. if hp1.typ=ait_instruction then
  1370. taicpu(hp1).condition:=condition;
  1371. GetNextInstruction(hp1,hp1);
  1372. until not(assigned(hp1)) or
  1373. not(CanBeCond(hp1));
  1374. { hp2 is still at jmp yyy }
  1375. GetNextInstruction(hp2,hp1);
  1376. { hp2 is now at xxx: }
  1377. condition:=inverse_cond(condition);
  1378. GetNextInstruction(hp1,hp1);
  1379. { hp1 is now at <several movs 2> }
  1380. repeat
  1381. taicpu(hp1).condition:=condition;
  1382. GetNextInstruction(hp1,hp1);
  1383. until not(assigned(hp1)) or
  1384. not(CanBeCond(hp1)) or
  1385. (hp1.typ=ait_label);
  1386. {
  1387. asml.remove(hp1.next)
  1388. hp1.next.free;
  1389. asml.remove(hp1);
  1390. hp1.free;
  1391. }
  1392. { remove Bcc }
  1393. tasmlabel(taicpu(hp3).oper[0]^.ref^.symbol).decrefs;
  1394. asml.remove(hp3);
  1395. hp3.free;
  1396. { remove jmp }
  1397. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  1398. asml.remove(hp2);
  1399. hp2.free;
  1400. continue;
  1401. end;
  1402. end;
  1403. end;
  1404. end;
  1405. end;
  1406. end;
  1407. end;
  1408. end;
  1409. p := tai(p.next)
  1410. end;
  1411. end;
  1412. function TCpuAsmOptimizer.RegInInstruction(Reg: TRegister; p1: tai): Boolean;
  1413. begin
  1414. If (p1.typ = ait_instruction) and (taicpu(p1).opcode=A_BL) then
  1415. Result:=true
  1416. else
  1417. Result:=inherited RegInInstruction(Reg, p1);
  1418. end;
  1419. const
  1420. { set of opcode which might or do write to memory }
  1421. { TODO : extend armins.dat to contain r/w info }
  1422. opcode_could_mem_write = [A_B,A_BL,A_BLX,A_BKPT,A_BX,A_STR,A_STRB,A_STRBT,
  1423. A_STRH,A_STRT,A_STF,A_SFM,A_STM,A_FSTS,A_FSTD];
  1424. { adjust the register live information when swapping the two instructions p and hp1,
  1425. they must follow one after the other }
  1426. procedure TCpuPreRegallocScheduler.SwapRegLive(p,hp1 : taicpu);
  1427. procedure CheckLiveEnd(reg : tregister);
  1428. var
  1429. supreg : TSuperRegister;
  1430. regtype : TRegisterType;
  1431. begin
  1432. if reg=NR_NO then
  1433. exit;
  1434. regtype:=getregtype(reg);
  1435. supreg:=getsupreg(reg);
  1436. if (cg.rg[regtype].live_end[supreg]=hp1) and
  1437. RegInInstruction(reg,p) then
  1438. cg.rg[regtype].live_end[supreg]:=p;
  1439. end;
  1440. procedure CheckLiveStart(reg : TRegister);
  1441. var
  1442. supreg : TSuperRegister;
  1443. regtype : TRegisterType;
  1444. begin
  1445. if reg=NR_NO then
  1446. exit;
  1447. regtype:=getregtype(reg);
  1448. supreg:=getsupreg(reg);
  1449. if (cg.rg[regtype].live_start[supreg]=p) and
  1450. RegInInstruction(reg,hp1) then
  1451. cg.rg[regtype].live_start[supreg]:=hp1;
  1452. end;
  1453. var
  1454. i : longint;
  1455. r : TSuperRegister;
  1456. begin
  1457. { assumption: p is directly followed by hp1 }
  1458. { if live of any reg used by p starts at p and hp1 uses this register then
  1459. set live start to hp1 }
  1460. for i:=0 to p.ops-1 do
  1461. case p.oper[i]^.typ of
  1462. Top_Reg:
  1463. CheckLiveStart(p.oper[i]^.reg);
  1464. Top_Ref:
  1465. begin
  1466. CheckLiveStart(p.oper[i]^.ref^.base);
  1467. CheckLiveStart(p.oper[i]^.ref^.index);
  1468. end;
  1469. Top_Shifterop:
  1470. CheckLiveStart(p.oper[i]^.shifterop^.rs);
  1471. Top_RegSet:
  1472. for r:=RS_R0 to RS_R15 do
  1473. if r in p.oper[i]^.regset^ then
  1474. CheckLiveStart(newreg(R_INTREGISTER,r,R_SUBWHOLE));
  1475. end;
  1476. { if live of any reg used by hp1 ends at hp1 and p uses this register then
  1477. set live end to p }
  1478. for i:=0 to hp1.ops-1 do
  1479. case hp1.oper[i]^.typ of
  1480. Top_Reg:
  1481. CheckLiveEnd(hp1.oper[i]^.reg);
  1482. Top_Ref:
  1483. begin
  1484. CheckLiveEnd(hp1.oper[i]^.ref^.base);
  1485. CheckLiveEnd(hp1.oper[i]^.ref^.index);
  1486. end;
  1487. Top_Shifterop:
  1488. CheckLiveStart(hp1.oper[i]^.shifterop^.rs);
  1489. Top_RegSet:
  1490. for r:=RS_R0 to RS_R15 do
  1491. if r in hp1.oper[i]^.regset^ then
  1492. CheckLiveEnd(newreg(R_INTREGISTER,r,R_SUBWHOLE));
  1493. end;
  1494. end;
  1495. function TCpuPreRegallocScheduler.SchedulerPass1Cpu(var p: tai): boolean;
  1496. { TODO : schedule also forward }
  1497. { TODO : schedule distance > 1 }
  1498. var
  1499. hp1,hp2,hp3,hp4,hp5 : tai;
  1500. list : TAsmList;
  1501. begin
  1502. result:=true;
  1503. list:=TAsmList.Create;
  1504. p:=BlockStart;
  1505. while p<>BlockEnd Do
  1506. begin
  1507. if (p.typ=ait_instruction) and
  1508. GetNextInstruction(p,hp1) and
  1509. (hp1.typ=ait_instruction) and
  1510. (taicpu(hp1).opcode in [A_LDR,A_LDRB,A_LDRH,A_LDRSB,A_LDRSH]) and
  1511. { for now we don't reschedule if the previous instruction changes potentially a memory location }
  1512. ( (not(taicpu(p).opcode in opcode_could_mem_write) and
  1513. not(RegModifiedByInstruction(NR_PC,p))
  1514. ) or
  1515. ((taicpu(p).opcode in [A_STM,A_STRB,A_STRH,A_STR]) and
  1516. ((taicpu(hp1).oper[1]^.ref^.base=NR_PC) or
  1517. (assigned(taicpu(hp1).oper[1]^.ref^.symboldata) and
  1518. (taicpu(hp1).oper[1]^.ref^.offset=0)
  1519. )
  1520. ) or
  1521. { try to prove that the memory accesses don't overlapp }
  1522. ((taicpu(p).opcode in [A_STRB,A_STRH,A_STR]) and
  1523. (taicpu(p).oper[1]^.ref^.base=taicpu(hp1).oper[1]^.ref^.base) and
  1524. (taicpu(p).oppostfix=PF_None) and
  1525. (taicpu(hp1).oppostfix=PF_None) and
  1526. (taicpu(p).oper[1]^.ref^.index=NR_NO) and
  1527. (taicpu(hp1).oper[1]^.ref^.index=NR_NO) and
  1528. { get operand sizes and check if the offset distance is large enough to ensure no overlapp }
  1529. (abs(taicpu(p).oper[1]^.ref^.offset-taicpu(hp1).oper[1]^.ref^.offset)>=max(tcgsize2size[reg_cgsize(taicpu(p).oper[0]^.reg)],tcgsize2size[reg_cgsize(taicpu(hp1).oper[0]^.reg)]))
  1530. )
  1531. )
  1532. ) and
  1533. GetNextInstruction(hp1,hp2) and
  1534. (hp2.typ=ait_instruction) and
  1535. { loaded register used by next instruction? }
  1536. (RegInInstruction(taicpu(hp1).oper[0]^.reg,hp2)) and
  1537. { loaded register not used by previous instruction? }
  1538. not(RegInInstruction(taicpu(hp1).oper[0]^.reg,p)) and
  1539. { same condition? }
  1540. (taicpu(p).condition=taicpu(hp1).condition) and
  1541. { first instruction might not change the register used as base }
  1542. ((taicpu(hp1).oper[1]^.ref^.base=NR_NO) or
  1543. not(RegModifiedByInstruction(taicpu(hp1).oper[1]^.ref^.base,p))
  1544. ) and
  1545. { first instruction might not change the register used as index }
  1546. ((taicpu(hp1).oper[1]^.ref^.index=NR_NO) or
  1547. not(RegModifiedByInstruction(taicpu(hp1).oper[1]^.ref^.index,p))
  1548. ) then
  1549. begin
  1550. hp3:=tai(p.Previous);
  1551. hp5:=tai(p.next);
  1552. asml.Remove(p);
  1553. { if there is a reg. dealloc instruction associated with p, move it together with p }
  1554. { before the instruction? }
  1555. while assigned(hp3) and (hp3.typ<>ait_instruction) do
  1556. begin
  1557. if (hp3.typ=ait_regalloc) and (tai_regalloc(hp3).ratype in [ra_dealloc]) and
  1558. RegInInstruction(tai_regalloc(hp3).reg,p) then
  1559. begin
  1560. hp4:=hp3;
  1561. hp3:=tai(hp3.Previous);
  1562. asml.Remove(hp4);
  1563. list.Concat(hp4);
  1564. end
  1565. else
  1566. hp3:=tai(hp3.Previous);
  1567. end;
  1568. list.Concat(p);
  1569. SwapRegLive(taicpu(p),taicpu(hp1));
  1570. { after the instruction? }
  1571. while assigned(hp5) and (hp5.typ<>ait_instruction) do
  1572. begin
  1573. if (hp5.typ=ait_regalloc) and (tai_regalloc(hp5).ratype in [ra_dealloc]) and
  1574. RegInInstruction(tai_regalloc(hp5).reg,p) then
  1575. begin
  1576. hp4:=hp5;
  1577. hp5:=tai(hp5.next);
  1578. asml.Remove(hp4);
  1579. list.Concat(hp4);
  1580. end
  1581. else
  1582. hp5:=tai(hp5.Next);
  1583. end;
  1584. asml.Remove(hp1);
  1585. {$ifdef DEBUG_PREREGSCHEDULER}
  1586. asml.insertbefore(tai_comment.Create(strpnew('Rescheduled')),hp2);
  1587. {$endif DEBUG_PREREGSCHEDULER}
  1588. asml.InsertBefore(hp1,hp2);
  1589. asml.InsertListBefore(hp2,list);
  1590. p:=tai(p.next)
  1591. end
  1592. else if p.typ=ait_instruction then
  1593. p:=hp1
  1594. else
  1595. p:=tai(p.next);
  1596. end;
  1597. list.Free;
  1598. end;
  1599. procedure DecrementPreceedingIT(list: TAsmList; p: tai);
  1600. var
  1601. hp : tai;
  1602. l : longint;
  1603. begin
  1604. hp := tai(p.Previous);
  1605. l := 1;
  1606. while assigned(hp) and
  1607. (l <= 4) do
  1608. begin
  1609. if hp.typ=ait_instruction then
  1610. begin
  1611. if (taicpu(hp).opcode>=A_IT) and
  1612. (taicpu(hp).opcode <= A_ITTTT) then
  1613. begin
  1614. if (taicpu(hp).opcode = A_IT) and
  1615. (l=1) then
  1616. list.Remove(hp)
  1617. else
  1618. case taicpu(hp).opcode of
  1619. A_ITE:
  1620. if l=2 then taicpu(hp).opcode := A_IT;
  1621. A_ITT:
  1622. if l=2 then taicpu(hp).opcode := A_IT;
  1623. A_ITEE:
  1624. if l=3 then taicpu(hp).opcode := A_ITE;
  1625. A_ITTE:
  1626. if l=3 then taicpu(hp).opcode := A_ITT;
  1627. A_ITET:
  1628. if l=3 then taicpu(hp).opcode := A_ITE;
  1629. A_ITTT:
  1630. if l=3 then taicpu(hp).opcode := A_ITT;
  1631. A_ITEEE:
  1632. if l=4 then taicpu(hp).opcode := A_ITEE;
  1633. A_ITTEE:
  1634. if l=4 then taicpu(hp).opcode := A_ITTE;
  1635. A_ITETE:
  1636. if l=4 then taicpu(hp).opcode := A_ITET;
  1637. A_ITTTE:
  1638. if l=4 then taicpu(hp).opcode := A_ITTT;
  1639. A_ITEET:
  1640. if l=4 then taicpu(hp).opcode := A_ITEE;
  1641. A_ITTET:
  1642. if l=4 then taicpu(hp).opcode := A_ITTE;
  1643. A_ITETT:
  1644. if l=4 then taicpu(hp).opcode := A_ITET;
  1645. A_ITTTT:
  1646. if l=4 then taicpu(hp).opcode := A_ITTT;
  1647. end;
  1648. break;
  1649. end;
  1650. {else if (taicpu(hp).condition<>taicpu(p).condition) or
  1651. (taicpu(hp).condition<>inverse_cond(taicpu(p).condition)) then
  1652. break;}
  1653. inc(l);
  1654. end;
  1655. hp := tai(hp.Previous);
  1656. end;
  1657. end;
  1658. function TCpuThumb2AsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
  1659. var
  1660. hp : taicpu;
  1661. hp1,hp2 : tai;
  1662. begin
  1663. if (p.typ=ait_instruction) and
  1664. MatchInstruction(p, A_STM, [C_None], [PF_FD,PF_DB]) and
  1665. (taicpu(p).oper[0]^.ref^.addressmode=AM_PREINDEXED) and
  1666. (taicpu(p).oper[0]^.ref^.index=NR_STACK_POINTER_REG) and
  1667. ((taicpu(p).oper[1]^.regset^*[8..13,15])=[]) then
  1668. begin
  1669. hp := taicpu.op_regset(A_PUSH, R_INTREGISTER, R_SUBWHOLE, taicpu(p).oper[1]^.regset^);
  1670. AsmL.InsertAfter(hp, p);
  1671. asml.Remove(p);
  1672. p:=hp;
  1673. result:=true;
  1674. end
  1675. else if (p.typ=ait_instruction) and
  1676. MatchInstruction(p, A_STR, [C_None], [PF_None]) and
  1677. (taicpu(p).oper[1]^.ref^.addressmode=AM_PREINDEXED) and
  1678. (taicpu(p).oper[1]^.ref^.index=NR_STACK_POINTER_REG) and
  1679. (taicpu(p).oper[1]^.ref^.offset=-4) and
  1680. (getsupreg(taicpu(p).oper[0]^.reg) in [0..7,14]) then
  1681. begin
  1682. hp := taicpu.op_regset(A_PUSH, R_INTREGISTER, R_SUBWHOLE, [getsupreg(taicpu(p).oper[0]^.reg)]);
  1683. asml.InsertAfter(hp, p);
  1684. asml.Remove(p);
  1685. p.Free;
  1686. p:=hp;
  1687. result:=true;
  1688. end
  1689. else if (p.typ=ait_instruction) and
  1690. MatchInstruction(p, A_LDM, [C_None], [PF_FD,PF_IA]) and
  1691. (taicpu(p).oper[0]^.ref^.addressmode=AM_PREINDEXED) and
  1692. (taicpu(p).oper[0]^.ref^.index=NR_STACK_POINTER_REG) and
  1693. ((taicpu(p).oper[1]^.regset^*[8..14])=[]) then
  1694. begin
  1695. hp := taicpu.op_regset(A_POP, R_INTREGISTER, R_SUBWHOLE, taicpu(p).oper[1]^.regset^);
  1696. asml.InsertBefore(hp, p);
  1697. asml.Remove(p);
  1698. p.Free;
  1699. p:=hp;
  1700. result:=true;
  1701. end
  1702. else if (p.typ=ait_instruction) and
  1703. MatchInstruction(p, A_LDR, [C_None], [PF_None]) and
  1704. (taicpu(p).oper[1]^.ref^.addressmode=AM_POSTINDEXED) and
  1705. (taicpu(p).oper[1]^.ref^.index=NR_STACK_POINTER_REG) and
  1706. (taicpu(p).oper[1]^.ref^.offset=4) and
  1707. (getsupreg(taicpu(p).oper[0]^.reg) in [0..7,15]) then
  1708. begin
  1709. hp := taicpu.op_regset(A_POP, R_INTREGISTER, R_SUBWHOLE, [getsupreg(taicpu(p).oper[0]^.reg)]);
  1710. asml.InsertBefore(hp, p);
  1711. asml.Remove(p);
  1712. p.Free;
  1713. p:=hp;
  1714. result:=true;
  1715. end
  1716. else if (p.typ=ait_instruction) and
  1717. MatchInstruction(p, A_MOV, [C_None], [PF_None]) and
  1718. (taicpu(p).oper[1]^.typ=top_const) and
  1719. (taicpu(p).oper[1]^.val >= 0) and
  1720. (taicpu(p).oper[1]^.val < 256) and
  1721. (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  1722. begin
  1723. asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
  1724. asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
  1725. IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
  1726. taicpu(p).oppostfix:=PF_S;
  1727. result:=true;
  1728. end
  1729. else if (p.typ=ait_instruction) and
  1730. MatchInstruction(p, A_MVN, [C_None], [PF_None]) and
  1731. (taicpu(p).oper[1]^.typ=top_reg) and
  1732. (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  1733. begin
  1734. asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
  1735. asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
  1736. IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
  1737. taicpu(p).oppostfix:=PF_S;
  1738. result:=true;
  1739. end
  1740. else if (p.typ=ait_instruction) and
  1741. MatchInstruction(p, [A_ADD,A_SUB], [C_None], [PF_None]) and
  1742. (taicpu(p).ops = 3) and
  1743. MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[1]^) and
  1744. (not MatchOperand(taicpu(p).oper[0]^, NR_STACK_POINTER_REG)) and
  1745. (taicpu(p).oper[2]^.typ=top_const) and
  1746. (taicpu(p).oper[2]^.val >= 0) and
  1747. (taicpu(p).oper[2]^.val < 256) and
  1748. (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  1749. begin
  1750. asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
  1751. asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
  1752. IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
  1753. taicpu(p).loadconst(1,taicpu(p).oper[2]^.val);
  1754. taicpu(p).oppostfix:=PF_S;
  1755. taicpu(p).ops := 2;
  1756. result:=true;
  1757. end
  1758. else if (p.typ=ait_instruction) and
  1759. MatchInstruction(p, [A_AND,A_ORR,A_EOR,A_LSL,A_LSR,A_ASR,A_ROR], [C_None], [PF_None,PF_S]) and
  1760. (taicpu(p).ops = 3) and
  1761. MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[1]^) and
  1762. (taicpu(p).oper[2]^.typ=top_reg) and
  1763. (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  1764. begin
  1765. asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
  1766. asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
  1767. IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
  1768. taicpu(p).ops := 2;
  1769. taicpu(p).loadreg(1,taicpu(p).oper[2]^.reg);
  1770. taicpu(p).oppostfix:=PF_S;
  1771. result:=true;
  1772. end
  1773. else if (p.typ=ait_instruction) and
  1774. MatchInstruction(p, [A_AND,A_ORR,A_EOR], [], [PF_None,PF_S]) and
  1775. (taicpu(p).ops = 3) and
  1776. MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[2]^) and
  1777. (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  1778. begin
  1779. asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
  1780. asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
  1781. IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
  1782. taicpu(p).oppostfix:=PF_S;
  1783. taicpu(p).ops := 2;
  1784. result:=true;
  1785. end
  1786. else if (p.typ=ait_instruction) and
  1787. MatchInstruction(p, [A_AND], [], [PF_None]) and
  1788. (taicpu(p).ops = 2) and
  1789. (taicpu(p).oper[1]^.typ=top_const) and
  1790. ((taicpu(p).oper[1]^.val=255) or
  1791. (taicpu(p).oper[1]^.val=65535)) then
  1792. begin
  1793. if taicpu(p).oper[1]^.val=255 then
  1794. taicpu(p).opcode:=A_UXTB
  1795. else
  1796. taicpu(p).opcode:=A_UXTH;
  1797. taicpu(p).loadreg(1, taicpu(p).oper[0]^.reg);
  1798. result := true;
  1799. end
  1800. else if (p.typ=ait_instruction) and
  1801. MatchInstruction(p, [A_AND], [], [PF_None]) and
  1802. (taicpu(p).ops = 3) and
  1803. (taicpu(p).oper[2]^.typ=top_const) and
  1804. ((taicpu(p).oper[2]^.val=255) or
  1805. (taicpu(p).oper[2]^.val=65535)) then
  1806. begin
  1807. if taicpu(p).oper[2]^.val=255 then
  1808. taicpu(p).opcode:=A_UXTB
  1809. else
  1810. taicpu(p).opcode:=A_UXTH;
  1811. taicpu(p).ops:=2;
  1812. result := true;
  1813. end
  1814. {else if (p.typ=ait_instruction) and
  1815. MatchInstruction(p, [A_CMP], [C_None], [PF_None]) and
  1816. (taicpu(p).oper[1]^.typ=top_const) and
  1817. (taicpu(p).oper[1]^.val=0) and
  1818. GetNextInstruction(p,hp1) and
  1819. (taicpu(hp1).opcode=A_B) and
  1820. (taicpu(hp1).condition in [C_EQ,C_NE]) then
  1821. begin
  1822. if taicpu(hp1).condition = C_EQ then
  1823. hp2:=taicpu.op_reg_ref(A_CBZ, taicpu(p).oper[0]^.reg, taicpu(hp1).oper[0]^.ref^)
  1824. else
  1825. hp2:=taicpu.op_reg_ref(A_CBNZ, taicpu(p).oper[0]^.reg, taicpu(hp1).oper[0]^.ref^);
  1826. taicpu(hp2).is_jmp := true;
  1827. asml.InsertAfter(hp2, hp1);
  1828. asml.Remove(hp1);
  1829. hp1.Free;
  1830. asml.Remove(p);
  1831. p.Free;
  1832. p := hp2;
  1833. result := true;
  1834. end}
  1835. else
  1836. Result := inherited PeepHoleOptPass1Cpu(p);
  1837. end;
  1838. procedure TCpuThumb2AsmOptimizer.PeepHoleOptPass2;
  1839. var
  1840. p,hp1,hp2: tai;
  1841. l,l2 : longint;
  1842. condition : tasmcond;
  1843. hp3: tai;
  1844. WasLast: boolean;
  1845. { UsedRegs, TmpUsedRegs: TRegSet; }
  1846. begin
  1847. p := BlockStart;
  1848. { UsedRegs := []; }
  1849. while (p <> BlockEnd) Do
  1850. begin
  1851. { UpdateUsedRegs(UsedRegs, tai(p.next)); }
  1852. case p.Typ Of
  1853. Ait_Instruction:
  1854. begin
  1855. case taicpu(p).opcode Of
  1856. A_B:
  1857. if taicpu(p).condition<>C_None then
  1858. begin
  1859. { check for
  1860. Bxx xxx
  1861. <several instructions>
  1862. xxx:
  1863. }
  1864. l:=0;
  1865. GetNextInstruction(p, hp1);
  1866. while assigned(hp1) and
  1867. (l<=4) and
  1868. CanBeCond(hp1) and
  1869. { stop on labels }
  1870. not(hp1.typ=ait_label) do
  1871. begin
  1872. inc(l);
  1873. if MustBeLast(hp1) then
  1874. begin
  1875. //hp1:=nil;
  1876. GetNextInstruction(hp1,hp1);
  1877. break;
  1878. end
  1879. else
  1880. GetNextInstruction(hp1,hp1);
  1881. end;
  1882. if assigned(hp1) then
  1883. begin
  1884. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  1885. begin
  1886. if (l<=4) and (l>0) then
  1887. begin
  1888. condition:=inverse_cond(taicpu(p).condition);
  1889. hp2:=p;
  1890. GetNextInstruction(p,hp1);
  1891. p:=hp1;
  1892. repeat
  1893. if hp1.typ=ait_instruction then
  1894. taicpu(hp1).condition:=condition;
  1895. if MustBeLast(hp1) then
  1896. begin
  1897. GetNextInstruction(hp1,hp1);
  1898. break;
  1899. end
  1900. else
  1901. GetNextInstruction(hp1,hp1);
  1902. until not(assigned(hp1)) or
  1903. not(CanBeCond(hp1)) or
  1904. (hp1.typ=ait_label);
  1905. { wait with removing else GetNextInstruction could
  1906. ignore the label if it was the only usage in the
  1907. jump moved away }
  1908. asml.InsertAfter(tai_comment.create(strpnew('Collapsed')), hp2);
  1909. DecrementPreceedingIT(asml, hp2);
  1910. case l of
  1911. 1: asml.InsertAfter(taicpu.op_cond(A_IT,condition), hp2);
  1912. 2: asml.InsertAfter(taicpu.op_cond(A_ITT,condition), hp2);
  1913. 3: asml.InsertAfter(taicpu.op_cond(A_ITTT,condition), hp2);
  1914. 4: asml.InsertAfter(taicpu.op_cond(A_ITTTT,condition), hp2);
  1915. end;
  1916. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  1917. asml.remove(hp2);
  1918. hp2.free;
  1919. continue;
  1920. end;
  1921. end;
  1922. end;
  1923. end;
  1924. end;
  1925. end;
  1926. end;
  1927. p := tai(p.next)
  1928. end;
  1929. end;
  1930. begin
  1931. casmoptimizer:=TCpuAsmOptimizer;
  1932. cpreregallocscheduler:=TCpuPreRegallocScheduler;
  1933. End.