aoptcpu.pas 71 KB


  1. {
  2. Copyright (c) 1998-2002 by Jonas Maebe, member of the Free Pascal
  3. Development Team
  4. This unit implements the ARM optimizer object
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. Unit aoptcpu;
  19. {$i fpcdefs.inc}
  20. {$define DEBUG_PREREGSCHEDULER}
  21. {$define DEBUG_AOPTCPU}
  22. Interface
  23. uses cgbase, cpubase, aasmtai, aasmcpu,aopt, aoptcpub, aoptobj;
  24. Type
  25. TCpuAsmOptimizer = class(TAsmOptimizer)
  26. { uses the same constructor as TAopObj }
  27. function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
  28. procedure PeepHoleOptPass2;override;
  29. Function RegInInstruction(Reg: TRegister; p1: tai): Boolean;override;
  30. procedure RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string);
  31. function RegUsedAfterInstruction(reg: Tregister; p: tai;
  32. var AllUsedRegs: TAllUsedRegs): Boolean;
  33. { gets the next tai object after current that contains info relevant
  34. to the optimizer in p1 which used the given register or does a
  35. change in program flow.
  36. If there is none, it returns false and
  37. sets p1 to nil }
  38. Function GetNextInstructionUsingReg(Current: tai; Var Next: tai;reg : TRegister): Boolean;
  39. { outputs a debug message into the assembler file }
  40. procedure DebugMsg(const s: string; p: tai);
  41. protected
  42. function LookForPostindexedPattern(p: taicpu): boolean;
  43. End;
  44. TCpuPreRegallocScheduler = class(TAsmScheduler)
  45. function SchedulerPass1Cpu(var p: tai): boolean;override;
  46. procedure SwapRegLive(p, hp1: taicpu);
  47. end;
  48. TCpuThumb2AsmOptimizer = class(TCpuAsmOptimizer)
  49. { uses the same constructor as TAopObj }
  50. procedure PeepHoleOptPass2;override;
  51. End;
  52. Implementation
  53. uses
  54. cutils,verbose,globals,
  55. systems,
  56. cpuinfo,
  57. cgobj,cgutils,procinfo,
  58. aasmbase,aasmdata;
  59. function CanBeCond(p : tai) : boolean;
  60. begin
  61. result:=
  62. (p.typ=ait_instruction) and
  63. (taicpu(p).condition=C_None) and
  64. (taicpu(p).opcode<>A_PLD) and
  65. ((taicpu(p).opcode<>A_BLX) or
  66. (taicpu(p).oper[0]^.typ=top_reg));
  67. end;
  68. function RefsEqual(const r1, r2: treference): boolean;
  69. begin
  70. refsequal :=
  71. (r1.offset = r2.offset) and
  72. (r1.base = r2.base) and
  73. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  74. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  75. (r1.relsymbol = r2.relsymbol) and
  76. (r1.signindex = r2.signindex) and
  77. (r1.shiftimm = r2.shiftimm) and
  78. (r1.addressmode = r2.addressmode) and
  79. (r1.shiftmode = r2.shiftmode);
  80. end;
  81. function MatchInstruction(const instr: tai; const op: TCommonAsmOps; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  82. begin
  83. result :=
  84. (instr.typ = ait_instruction) and
  85. ((op = []) or ((ord(taicpu(instr).opcode)<256) and (taicpu(instr).opcode in op))) and
  86. ((cond = []) or (taicpu(instr).condition in cond)) and
  87. ((postfix = []) or (taicpu(instr).oppostfix in postfix));
  88. end;
  89. function MatchInstruction(const instr: tai; const op: TAsmOp; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  90. begin
  91. result :=
  92. (instr.typ = ait_instruction) and
  93. (taicpu(instr).opcode = op) and
  94. ((cond = []) or (taicpu(instr).condition in cond)) and
  95. ((postfix = []) or (taicpu(instr).oppostfix in postfix));
  96. end;
  97. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
  98. begin
  99. result := oper1.typ = oper2.typ;
  100. if result then
  101. case oper1.typ of
  102. top_const:
  103. Result:=oper1.val = oper2.val;
  104. top_reg:
  105. Result:=oper1.reg = oper2.reg;
  106. top_conditioncode:
  107. Result:=oper1.cc = oper2.cc;
  108. top_ref:
  109. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  110. else Result:=false;
  111. end
  112. end;
  113. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  114. begin
  115. result := (oper.typ = top_reg) and (oper.reg = reg);
  116. end;
  117. procedure RemoveRedundantMove(const cmpp: tai; movp: tai; asml: TAsmList);
  118. begin
  119. if (taicpu(movp).condition = C_EQ) and
  120. (taicpu(cmpp).oper[0]^.reg = taicpu(movp).oper[0]^.reg) and
  121. (taicpu(cmpp).oper[1]^.val = taicpu(movp).oper[1]^.val) then
  122. begin
  123. asml.insertafter(tai_comment.Create(strpnew('Peephole CmpMovMov - Removed redundant moveq')), movp);
  124. asml.remove(movp);
  125. movp.free;
  126. end;
  127. end;
  128. function regLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  129. var
  130. p: taicpu;
  131. begin
  132. p := taicpu(hp);
  133. regLoadedWithNewValue := false;
  134. if not ((assigned(hp)) and (hp.typ = ait_instruction)) then
  135. exit;
  136. case p.opcode of
  137. { These operands do not write into a register at all }
  138. A_CMP, A_CMN, A_TST, A_TEQ, A_B, A_BL, A_BX, A_BLX, A_SWI, A_MSR, A_PLD:
  139. exit;
  140. {Take care of post/preincremented store and loads, they will change their base register}
  141. A_STR, A_LDR:
  142. begin
  143. regLoadedWithNewValue :=
  144. (taicpu(p).oper[1]^.typ=top_ref) and
  145. (taicpu(p).oper[1]^.ref^.addressmode in [AM_PREINDEXED,AM_POSTINDEXED]) and
  146. (taicpu(p).oper[1]^.ref^.base = reg);
  147. {STR does not load into it's first register}
  148. if p.opcode = A_STR then exit;
  149. end;
  150. { These four are writing into the first 2 register, UMLAL and SMLAL will also read from them }
  151. A_UMLAL, A_UMULL, A_SMLAL, A_SMULL:
  152. regLoadedWithNewValue :=
  153. (p.oper[1]^.typ = top_reg) and
  154. (p.oper[1]^.reg = reg);
  155. {Loads to oper2 from coprocessor}
  156. {
  157. MCR/MRC is currently not supported in FPC
  158. A_MRC:
  159. regLoadedWithNewValue :=
  160. (p.oper[2]^.typ = top_reg) and
  161. (p.oper[2]^.reg = reg);
  162. }
  163. {Loads to all register in the registerset}
  164. A_LDM:
  165. regLoadedWithNewValue := (getsupreg(reg) in p.oper[1]^.regset^);
  166. end;
  167. if regLoadedWithNewValue then
  168. exit;
  169. case p.oper[0]^.typ of
  170. {This is the case}
  171. top_reg:
  172. regLoadedWithNewValue := (p.oper[0]^.reg = reg) or
  173. { LDRD }
  174. (p.opcode=A_LDR) and (p.oppostfix=PF_D) and (getsupreg(p.oper[0]^.reg)+1=getsupreg(reg));
  175. {LDM/STM might write a new value to their index register}
  176. top_ref:
  177. regLoadedWithNewValue :=
  178. (taicpu(p).oper[0]^.ref^.addressmode in [AM_PREINDEXED,AM_POSTINDEXED]) and
  179. (taicpu(p).oper[0]^.ref^.base = reg);
  180. end;
  181. end;
  182. function AlignedToQWord(const ref : treference) : boolean;
  183. begin
  184. { (safe) heuristics to ensure alignment }
  185. result:=(target_info.abi in [abi_eabi,abi_armeb,abi_eabihf]) and
  186. (((ref.offset>=0) and
  187. ((ref.offset mod 8)=0) and
  188. ((ref.base=NR_R13) or
  189. (ref.index=NR_R13))
  190. ) or
  191. ((ref.offset<=0) and
  192. { when using NR_R11, it has always a value of <qword align>+4 }
  193. ((abs(ref.offset+4) mod 8)=0) and
  194. (current_procinfo.framepointer=NR_R11) and
  195. ((ref.base=NR_R11) or
  196. (ref.index=NR_R11))
  197. )
  198. );
  199. end;
  200. function instructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean;
  201. var
  202. p: taicpu;
  203. i: longint;
  204. begin
  205. instructionLoadsFromReg := false;
  206. if not (assigned(hp) and (hp.typ = ait_instruction)) then
  207. exit;
  208. p:=taicpu(hp);
  209. i:=1;
  210. {For these instructions we have to start on oper[0]}
  211. if (p.opcode in [A_STR, A_LDM, A_STM, A_PLD,
  212. A_CMP, A_CMN, A_TST, A_TEQ,
  213. A_B, A_BL, A_BX, A_BLX,
  214. A_SMLAL, A_UMLAL]) then i:=0;
  215. while(i<p.ops) do
  216. begin
  217. case p.oper[I]^.typ of
  218. top_reg:
  219. instructionLoadsFromReg := (p.oper[I]^.reg = reg) or
  220. { STRD }
  221. ((i=0) and (p.opcode=A_STR) and (p.oppostfix=PF_D) and (getsupreg(p.oper[0]^.reg)+1=getsupreg(reg)));
  222. top_regset:
  223. instructionLoadsFromReg := (getsupreg(reg) in p.oper[I]^.regset^);
  224. top_shifterop:
  225. instructionLoadsFromReg := p.oper[I]^.shifterop^.rs = reg;
  226. top_ref:
  227. instructionLoadsFromReg :=
  228. (p.oper[I]^.ref^.base = reg) or
  229. (p.oper[I]^.ref^.index = reg);
  230. end;
  231. if instructionLoadsFromReg then exit; {Bailout if we found something}
  232. Inc(I);
  233. end;
  234. end;
  235. function TCpuAsmOptimizer.RegUsedAfterInstruction(reg: Tregister; p: tai;
  236. var AllUsedRegs: TAllUsedRegs): Boolean;
  237. begin
  238. AllUsedRegs[getregtype(reg)].Update(tai(p.Next),true);
  239. RegUsedAfterInstruction :=
  240. AllUsedRegs[getregtype(reg)].IsUsed(reg) and
  241. not(regLoadedWithNewValue(reg,p)) and
  242. (
  243. not(GetNextInstruction(p,p)) or
  244. instructionLoadsFromReg(reg,p) or
  245. not(regLoadedWithNewValue(reg,p))
  246. );
  247. end;
  248. function TCpuAsmOptimizer.GetNextInstructionUsingReg(Current: tai;
  249. var Next: tai; reg: TRegister): Boolean;
  250. begin
  251. Next:=Current;
  252. repeat
  253. Result:=GetNextInstruction(Next,Next);
  254. until not(Result) or (Next.typ<>ait_instruction) or (RegInInstruction(reg,Next)) or
  255. (is_calljmp(taicpu(Next).opcode)) or (RegInInstruction(NR_PC,Next));
  256. end;
  257. {$ifdef DEBUG_AOPTCPU}
  258. procedure TCpuAsmOptimizer.DebugMsg(const s: string;p : tai);
  259. begin
  260. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  261. end;
  262. {$else DEBUG_AOPTCPU}
  263. procedure TCpuAsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  264. begin
  265. end;
  266. {$endif DEBUG_AOPTCPU}
  267. procedure TCpuAsmOptimizer.RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string);
  268. var
  269. alloc,
  270. dealloc : tai_regalloc;
  271. hp1 : tai;
  272. begin
  273. if MatchInstruction(movp, A_MOV, [taicpu(p).condition], [PF_None]) and
  274. (taicpu(movp).ops=2) and {We can't optimize if there is a shiftop}
  275. MatchOperand(taicpu(movp).oper[1]^, taicpu(p).oper[0]^.reg) and
  276. { don't mess with moves to pc }
  277. (taicpu(movp).oper[0]^.reg<>NR_PC) and
  278. { don't mess with moves to lr }
  279. (taicpu(movp).oper[0]^.reg<>NR_R14) and
  280. { the destination register of the mov might not be used beween p and movp }
  281. not(RegUsedBetween(taicpu(movp).oper[0]^.reg,p,movp)) and
  282. {There is a special requirement for MUL and MLA, oper[0] and oper[1] are not allowed to be the same}
  283. not (
  284. (taicpu(p).opcode in [A_MLA, A_MUL]) and
  285. (taicpu(p).oper[1]^.reg = taicpu(movp).oper[0]^.reg)
  286. ) and
  287. { Take care to only do this for instructions which REALLY load to the first register.
  288. Otherwise
  289. str reg0, [reg1]
  290. mov reg2, reg0
  291. will be optimized to
  292. str reg2, [reg1]
  293. }
  294. regLoadedWithNewValue(taicpu(p).oper[0]^.reg, p) then
  295. begin
  296. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(movp.Next));
  297. if assigned(dealloc) then
  298. begin
  299. DebugMsg('Peephole '+optimizer+' removed superfluous mov', movp);
  300. { taicpu(p).oper[0]^.reg is not used anymore, try to find its allocation
  301. and remove it if possible }
  302. GetLastInstruction(p,hp1);
  303. asml.Remove(dealloc);
  304. alloc:=FindRegAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next));
  305. if assigned(alloc) then
  306. begin
  307. asml.Remove(alloc);
  308. alloc.free;
  309. dealloc.free;
  310. end
  311. else
  312. asml.InsertAfter(dealloc,p);
  313. { try to move the allocation of the target register }
  314. GetLastInstruction(movp,hp1);
  315. alloc:=FindRegAlloc(taicpu(movp).oper[0]^.reg,tai(hp1.Next));
  316. if assigned(alloc) then
  317. begin
  318. asml.Remove(alloc);
  319. asml.InsertBefore(alloc,p);
  320. { adjust used regs }
  321. IncludeRegInUsedRegs(taicpu(movp).oper[0]^.reg,UsedRegs);
  322. end;
  323. { finally get rid of the mov }
  324. taicpu(p).loadreg(0,taicpu(movp).oper[0]^.reg);
  325. asml.remove(movp);
  326. movp.free;
  327. end;
  328. end;
  329. end;
  330. {
  331. optimize
  332. ldr/str regX,[reg1]
  333. ...
  334. add/sub reg1,reg1,regY/const
  335. into
  336. ldr/str regX,[reg1], regY/const
  337. }
  338. function TCpuAsmOptimizer.LookForPostindexedPattern(p: taicpu) : boolean;
  339. var
  340. hp1 : tai;
  341. begin
  342. Result:=false;
  343. if (p.oper[1]^.ref^.addressmode=AM_OFFSET) and
  344. (p.oper[1]^.ref^.index=NR_NO) and
  345. (p.oper[1]^.ref^.offset=0) and
  346. GetNextInstructionUsingReg(p, hp1, p.oper[1]^.ref^.base) and
  347. { we cannot check NR_DEFAULTFLAGS for modification yet so don't allow a condition }
  348. MatchInstruction(hp1, [A_ADD, A_SUB], [C_None], [PF_None]) and
  349. (taicpu(hp1).oper[0]^.reg=p.oper[1]^.ref^.base) and
  350. (taicpu(hp1).oper[1]^.reg=p.oper[1]^.ref^.base) and
  351. (
  352. (taicpu(hp1).oper[2]^.typ=top_reg) or
  353. { valid offset? }
  354. ((taicpu(hp1).oper[2]^.typ=top_const) and
  355. ((abs(taicpu(hp1).oper[2]^.val)<256) or
  356. ((abs(taicpu(hp1).oper[2]^.val)<4096) and (p.oppostfix in [PF_None,PF_B]))
  357. )
  358. )
  359. ) and
  360. { don't apply the optimization if the base register is loaded }
  361. (p.oper[0]^.reg<>p.oper[1]^.ref^.base) and
  362. not(RegModifiedBetween(taicpu(hp1).oper[0]^.reg,p,hp1)) and
  363. { don't apply the optimization if the (new) index register is loaded }
  364. (p.oper[0]^.reg<>taicpu(hp1).oper[2]^.reg) and
  365. not(RegModifiedBetween(taicpu(hp1).oper[2]^.reg,p,hp1)) then
  366. begin
  367. DebugMsg('Peephole Str/LdrAdd/Sub2Str/Ldr Postindex done', p);
  368. p.oper[1]^.ref^.addressmode:=AM_POSTINDEXED;
  369. if taicpu(hp1).oper[2]^.typ=top_const then
  370. begin
  371. if taicpu(hp1).opcode=A_ADD then
  372. p.oper[1]^.ref^.offset:=taicpu(hp1).oper[2]^.val
  373. else
  374. p.oper[1]^.ref^.offset:=-taicpu(hp1).oper[2]^.val;
  375. end
  376. else
  377. begin
  378. p.oper[1]^.ref^.index:=taicpu(hp1).oper[2]^.reg;
  379. if taicpu(hp1).opcode=A_ADD then
  380. p.oper[1]^.ref^.signindex:=1
  381. else
  382. p.oper[1]^.ref^.signindex:=-1;
  383. end;
  384. asml.Remove(hp1);
  385. hp1.Free;
  386. Result:=true;
  387. end;
  388. end;
  389. function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
  390. var
  391. hp1,hp2: tai;
  392. i, i2: longint;
  393. TmpUsedRegs: TAllUsedRegs;
  394. tempop: tasmop;
  395. function IsPowerOf2(const value: DWord): boolean; inline;
  396. begin
  397. Result:=(value and (value - 1)) = 0;
  398. end;
  399. begin
  400. result := false;
  401. case p.typ of
  402. ait_instruction:
  403. begin
  404. {
  405. change
  406. <op> reg,x,y
  407. cmp reg,#0
  408. into
  409. <op>s reg,x,y
  410. }
  411. { this optimization can applied only to the currently enabled operations because
  412. the other operations do not update all flags and FPC does not track flag usage }
  413. if MatchInstruction(p, [A_ADC,A_ADD,A_BIC,A_SUB,A_MUL,A_MVN,A_MOV,A_ORR,A_EOR,A_AND,
  414. A_RSB,A_RSC,A_SBC,A_MLA], [C_None], [PF_None]) and
  415. GetNextInstruction(p, hp1) and
  416. MatchInstruction(hp1, A_CMP, [C_None], [PF_None]) and
  417. (taicpu(hp1).oper[1]^.typ = top_const) and
  418. (taicpu(p).oper[0]^.reg = taicpu(hp1).oper[0]^.reg) and
  419. (taicpu(hp1).oper[1]^.val = 0) and
  420. GetNextInstruction(hp1, hp2) and
  421. { be careful here, following instructions could use other flags
  422. however after a jump fpc never depends on the value of flags }
  423. { All above instructions set Z and N according to the following
  424. Z := result = 0;
  425. N := result[31];
  426. EQ = Z=1; NE = Z=0;
  427. MI = N=1; PL = N=0; }
  428. MatchInstruction(hp2, A_B, [C_EQ,C_NE,C_MI,C_PL], []) and
  429. assigned(FindRegDealloc(NR_DEFAULTFLAGS,tai(hp2.Next))) then
  430. begin
  431. DebugMsg('Peephole OpCmp2OpS done', p);
  432. taicpu(p).oppostfix:=PF_S;
  433. { move flag allocation if possible }
  434. GetLastInstruction(hp1, hp2);
  435. hp2:=FindRegAlloc(NR_DEFAULTFLAGS,tai(hp2.Next));
  436. if assigned(hp2) then
  437. begin
  438. asml.Remove(hp2);
  439. asml.insertbefore(hp2, p);
  440. end;
  441. asml.remove(hp1);
  442. hp1.free;
  443. end
  444. else
  445. case taicpu(p).opcode of
  446. A_STR:
  447. begin
  448. { change
  449. str reg1,ref
  450. ldr reg2,ref
  451. into
  452. str reg1,ref
  453. mov reg2,reg1
  454. }
  455. if (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
  456. (taicpu(p).oppostfix=PF_None) and
  457. GetNextInstruction(p,hp1) and
  458. MatchInstruction(hp1, A_LDR, [taicpu(p).condition, C_None], [PF_None]) and
  459. RefsEqual(taicpu(p).oper[1]^.ref^,taicpu(hp1).oper[1]^.ref^) and
  460. (taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) then
  461. begin
  462. if taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg then
  463. begin
  464. DebugMsg('Peephole StrLdr2StrMov 1 done', hp1);
  465. asml.remove(hp1);
  466. hp1.free;
  467. end
  468. else
  469. begin
  470. taicpu(hp1).opcode:=A_MOV;
  471. taicpu(hp1).oppostfix:=PF_None;
  472. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  473. DebugMsg('Peephole StrLdr2StrMov 2 done', hp1);
  474. end;
  475. result := true;
  476. end
  477. { change
  478. str reg1,ref
  479. str reg2,ref
  480. into
  481. strd reg1,ref
  482. }
  483. else if (CPUARM_HAS_EDSP in cpu_capabilities[current_settings.cputype]) and
  484. (taicpu(p).oppostfix=PF_None) and
  485. (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
  486. GetNextInstruction(p,hp1) and
  487. MatchInstruction(hp1, A_STR, [taicpu(p).condition, C_None], [PF_None]) and
  488. not(odd(getsupreg(taicpu(p).oper[0]^.reg))) and
  489. (getsupreg(taicpu(p).oper[0]^.reg)+1=getsupreg(taicpu(hp1).oper[0]^.reg)) and
  490. { str ensures that either base or index contain no register, else ldr wouldn't
  491. use an offset either
  492. }
  493. (taicpu(p).oper[1]^.ref^.base=taicpu(hp1).oper[1]^.ref^.base) and
  494. (taicpu(p).oper[1]^.ref^.index=taicpu(hp1).oper[1]^.ref^.index) and
  495. (taicpu(p).oper[1]^.ref^.offset+4=taicpu(hp1).oper[1]^.ref^.offset) and
  496. (abs(taicpu(p).oper[1]^.ref^.offset)<256) and
  497. AlignedToQWord(taicpu(p).oper[1]^.ref^) then
  498. begin
  499. DebugMsg('Peephole StrStr2Strd done', p);
  500. taicpu(p).oppostfix:=PF_D;
  501. asml.remove(hp1);
  502. hp1.free;
  503. end;
  504. LookForPostindexedPattern(taicpu(p));
  505. end;
  506. A_LDR:
  507. begin
  508. { change
  509. ldr reg1,ref
  510. ldr reg2,ref
  511. into ...
  512. }
  513. if (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
  514. GetNextInstruction(p,hp1) and
  515. { ldrd is not allowed here }
  516. MatchInstruction(hp1, A_LDR, [taicpu(p).condition, C_None], [taicpu(p).oppostfix,PF_None]-[PF_D]) then
  517. begin
  518. {
  519. ...
  520. ldr reg1,ref
  521. mov reg2,reg1
  522. }
  523. if RefsEqual(taicpu(p).oper[1]^.ref^,taicpu(hp1).oper[1]^.ref^) and
  524. (taicpu(p).oper[0]^.reg<>taicpu(hp1).oper[1]^.ref^.index) and
  525. (taicpu(p).oper[0]^.reg<>taicpu(hp1).oper[1]^.ref^.base) and
  526. (taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) then
  527. begin
  528. if taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg then
  529. begin
  530. DebugMsg('Peephole LdrLdr2Ldr done', hp1);
  531. asml.remove(hp1);
  532. hp1.free;
  533. end
  534. else
  535. begin
  536. DebugMsg('Peephole LdrLdr2LdrMov done', hp1);
  537. taicpu(hp1).opcode:=A_MOV;
  538. taicpu(hp1).oppostfix:=PF_None;
  539. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  540. end;
  541. result := true;
  542. end
  543. {
  544. ...
  545. ldrd reg1,ref
  546. }
  547. else if (CPUARM_HAS_EDSP in cpu_capabilities[current_settings.cputype]) and
  548. { ldrd does not allow any postfixes ... }
  549. (taicpu(p).oppostfix=PF_None) and
  550. not(odd(getsupreg(taicpu(p).oper[0]^.reg))) and
  551. (getsupreg(taicpu(p).oper[0]^.reg)+1=getsupreg(taicpu(hp1).oper[0]^.reg)) and
  552. { ldr ensures that either base or index contain no register, else ldr wouldn't
  553. use an offset either
  554. }
  555. (taicpu(p).oper[1]^.ref^.base=taicpu(hp1).oper[1]^.ref^.base) and
  556. (taicpu(p).oper[1]^.ref^.index=taicpu(hp1).oper[1]^.ref^.index) and
  557. (taicpu(p).oper[1]^.ref^.offset+4=taicpu(hp1).oper[1]^.ref^.offset) and
  558. (abs(taicpu(p).oper[1]^.ref^.offset)<256) and
  559. AlignedToQWord(taicpu(p).oper[1]^.ref^) then
  560. begin
  561. DebugMsg('Peephole LdrLdr2Ldrd done', p);
  562. taicpu(p).oppostfix:=PF_D;
  563. asml.remove(hp1);
  564. hp1.free;
  565. end;
  566. end;
  567. LookForPostindexedPattern(taicpu(p));
  568. { Remove superfluous mov after ldr
  569. changes
  570. ldr reg1, ref
  571. mov reg2, reg1
  572. to
  573. ldr reg2, ref
  574. conditions are:
  575. * no ldrd usage
  576. * reg1 must be released after mov
  577. * mov can not contain shifterops
  578. * ldr+mov have the same conditions
  579. * mov does not set flags
  580. }
  581. if (taicpu(p).oppostfix<>PF_D) and GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) then
  582. RemoveSuperfluousMove(p, hp1, 'LdrMov2Ldr');
  583. end;
  584. A_MOV:
  585. begin
  586. { fold
  587. mov reg1,reg0, shift imm1
  588. mov reg1,reg1, shift imm2
  589. }
  590. if (taicpu(p).ops=3) and
  591. (taicpu(p).oper[2]^.typ = top_shifterop) and
  592. (taicpu(p).oper[2]^.shifterop^.rs = NR_NO) and
  593. getnextinstruction(p,hp1) and
  594. MatchInstruction(hp1, A_MOV, [taicpu(p).condition], [PF_None]) and
  595. (taicpu(hp1).ops=3) and
  596. MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^.reg) and
  597. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  598. (taicpu(hp1).oper[2]^.typ = top_shifterop) and
  599. (taicpu(hp1).oper[2]^.shifterop^.rs = NR_NO) then
  600. begin
  601. { fold
  602. mov reg1,reg0, lsl 16
  603. mov reg1,reg1, lsr 16
  604. strh reg1, ...
  605. dealloc reg1
  606. to
  607. strh reg1, ...
  608. dealloc reg1
  609. }
  610. if (taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  611. (taicpu(p).oper[2]^.shifterop^.shiftimm=16) and
  612. (taicpu(hp1).oper[2]^.shifterop^.shiftmode in [SM_LSR,SM_ASR]) and
  613. (taicpu(hp1).oper[2]^.shifterop^.shiftimm=16) and
  614. getnextinstruction(hp1,hp2) and
  615. MatchInstruction(hp2, A_STR, [taicpu(p).condition], [PF_H]) and
  616. MatchOperand(taicpu(hp2).oper[0]^, taicpu(p).oper[0]^.reg) then
  617. begin
  618. CopyUsedRegs(TmpUsedRegs);
  619. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  620. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  621. if not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg,hp2,TmpUsedRegs)) then
  622. begin
  623. DebugMsg('Peephole optimizer removed superfluous 16 Bit zero extension', hp1);
  624. taicpu(hp2).loadreg(0,taicpu(p).oper[1]^.reg);
  625. asml.remove(p);
  626. asml.remove(hp1);
  627. p.free;
  628. hp1.free;
  629. p:=hp2;
  630. end;
  631. ReleaseUsedRegs(TmpUsedRegs);
  632. end
  633. { fold
  634. mov reg1,reg0, shift imm1
  635. mov reg1,reg1, shift imm2
  636. to
  637. mov reg1,reg0, shift imm1+imm2
  638. }
  639. else if (taicpu(p).oper[2]^.shifterop^.shiftmode=taicpu(hp1).oper[2]^.shifterop^.shiftmode) or
  640. { asr makes no use after a lsr, the asr can be foled into the lsr }
  641. ((taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSR) and (taicpu(hp1).oper[2]^.shifterop^.shiftmode=SM_ASR) ) then
  642. begin
  643. inc(taicpu(p).oper[2]^.shifterop^.shiftimm,taicpu(hp1).oper[2]^.shifterop^.shiftimm);
  644. { avoid overflows }
  645. if taicpu(p).oper[2]^.shifterop^.shiftimm>31 then
  646. case taicpu(p).oper[2]^.shifterop^.shiftmode of
  647. SM_ROR:
  648. taicpu(p).oper[2]^.shifterop^.shiftimm:=taicpu(p).oper[2]^.shifterop^.shiftimm and 31;
  649. SM_ASR:
  650. taicpu(p).oper[2]^.shifterop^.shiftimm:=31;
  651. SM_LSR,
  652. SM_LSL:
  653. begin
  654. hp1:=taicpu.op_reg_const(A_MOV,taicpu(p).oper[0]^.reg,0);
  655. InsertLLItem(p.previous, p.next, hp1);
  656. p.free;
  657. p:=hp1;
  658. end;
  659. else
  660. internalerror(2008072803);
  661. end;
  662. DebugMsg('Peephole ShiftShift2Shift 1 done', p);
  663. asml.remove(hp1);
  664. hp1.free;
  665. result := true;
  666. end
  667. { fold
  668. mov reg1,reg0, shift imm1
  669. mov reg1,reg1, shift imm2
  670. mov reg1,reg1, shift imm3 ...
  671. }
  672. else if getnextinstruction(hp1,hp2) and
  673. MatchInstruction(hp2, A_MOV, [taicpu(p).condition], [PF_None]) and
  674. (taicpu(hp2).ops=3) and
  675. MatchOperand(taicpu(hp2).oper[0]^, taicpu(hp1).oper[0]^.reg) and
  676. MatchOperand(taicpu(hp2).oper[1]^, taicpu(hp1).oper[0]^.reg) and
  677. (taicpu(hp2).oper[2]^.typ = top_shifterop) and
  678. (taicpu(hp2).oper[2]^.shifterop^.rs = NR_NO) then
  679. begin
  680. { mov reg1,reg0, lsl imm1
  681. mov reg1,reg1, lsr/asr imm2
  682. mov reg1,reg1, lsl imm3 ...
  683. if imm3<=imm1 and imm2>=imm3
  684. to
  685. mov reg1,reg0, lsl imm1
  686. mov reg1,reg1, lsr/asr imm2-imm3
  687. }
  688. if (taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSL) and (taicpu(hp2).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  689. (taicpu(hp1).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and
  690. (taicpu(hp2).oper[2]^.shifterop^.shiftimm<=taicpu(p).oper[2]^.shifterop^.shiftimm) and
  691. (taicpu(hp1).oper[2]^.shifterop^.shiftimm>=taicpu(hp2).oper[2]^.shifterop^.shiftimm) then
  692. begin
  693. dec(taicpu(hp1).oper[2]^.shifterop^.shiftimm,taicpu(hp2).oper[2]^.shifterop^.shiftimm);
  694. DebugMsg('Peephole ShiftShiftShift2ShiftShift 1 done', p);
  695. asml.remove(hp2);
  696. hp2.free;
  697. result := true;
  698. if taicpu(hp1).oper[2]^.shifterop^.shiftimm=0 then
  699. begin
  700. asml.remove(hp1);
  701. hp1.free;
  702. end;
  703. end
  704. { mov reg1,reg0, lsr/asr imm1
  705. mov reg1,reg1, lsl imm2
  706. mov reg1,reg1, lsr/asr imm3 ...
  707. if imm3>=imm1 and imm2>=imm1
  708. to
  709. mov reg1,reg0, lsl imm2-imm1
  710. mov reg1,reg1, lsr/asr imm3 ...
  711. }
  712. else if (taicpu(p).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and (taicpu(hp2).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and
  713. (taicpu(hp1).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  714. (taicpu(hp2).oper[2]^.shifterop^.shiftimm>=taicpu(p).oper[2]^.shifterop^.shiftimm) and
  715. (taicpu(hp1).oper[2]^.shifterop^.shiftimm>=taicpu(p).oper[2]^.shifterop^.shiftimm) then
  716. begin
  717. dec(taicpu(hp1).oper[2]^.shifterop^.shiftimm,taicpu(p).oper[2]^.shifterop^.shiftimm);
  718. taicpu(hp1).oper[1]^.reg:=taicpu(p).oper[1]^.reg;
  719. DebugMsg('Peephole ShiftShiftShift2ShiftShift 2 done', p);
  720. asml.remove(p);
  721. p.free;
  722. p:=hp2;
  723. if taicpu(hp1).oper[2]^.shifterop^.shiftimm=0 then
  724. begin
  725. taicpu(hp2).oper[1]^.reg:=taicpu(hp1).oper[1]^.reg;
  726. asml.remove(hp1);
  727. hp1.free;
  728. p:=hp2;
  729. end;
  730. result := true;
  731. end;
  732. end;
  733. end;
  734. { Change the common
  735. mov r0, r0, lsr #24
  736. and r0, r0, #255
  737. and remove the superfluous and
  738. This could be extended to handle more cases.
  739. }
  740. if (taicpu(p).ops=3) and
  741. (taicpu(p).oper[2]^.typ = top_shifterop) and
  742. (taicpu(p).oper[2]^.shifterop^.rs = NR_NO) and
  743. (taicpu(p).oper[2]^.shifterop^.shiftmode = SM_LSR) and
  744. (taicpu(p).oper[2]^.shifterop^.shiftimm >= 24 ) and
  745. getnextinstruction(p,hp1) and
  746. MatchInstruction(hp1, A_AND, [taicpu(p).condition], [taicpu(p).oppostfix]) and
  747. (taicpu(hp1).ops=3) and
  748. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) and
  749. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[1]^) and
  750. (taicpu(hp1).oper[2]^.typ = top_const) and
  751. { Check if the AND actually would only mask out bits beeing already zero because of the shift
  752. For LSR #25 and an AndConst of 255 that whould go like this:
  753. 255 and ((2 shl (32-25))-1)
  754. which results in 127, which is one less a power-of-2, meaning all lower bits are set.
  755. LSR #25 and AndConst of 254:
  756. 254 and ((2 shl (32-25))-1) = 126 -> lowest bit is clear, so we can't remove it.
  757. }
  758. ispowerof2((taicpu(hp1).oper[2]^.val and ((2 shl (32-taicpu(p).oper[2]^.shifterop^.shiftimm))-1))+1) then
  759. begin
  760. DebugMsg('Peephole LsrAnd2Lsr done', hp1);
  761. asml.remove(hp1);
  762. hp1.free;
  763. end;
  764. {
  765. optimize
  766. mov rX, yyyy
  767. ....
  768. }
  769. if (taicpu(p).ops = 2) and
  770. GetNextInstruction(p,hp1) and
  771. (tai(hp1).typ = ait_instruction) then
  772. begin
  773. {
  774. This changes the very common
  775. mov r0, #0
  776. str r0, [...]
  777. mov r0, #0
  778. str r0, [...]
  779. and removes all superfluous mov instructions
  780. }
  781. if (taicpu(p).oper[1]^.typ = top_const) and
  782. (taicpu(hp1).opcode=A_STR) then
  783. while MatchInstruction(hp1, A_STR, [taicpu(p).condition], []) and
  784. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) and
  785. GetNextInstruction(hp1, hp2) and
  786. MatchInstruction(hp2, A_MOV, [taicpu(p).condition], [PF_None]) and
  787. (taicpu(hp2).ops = 2) and
  788. MatchOperand(taicpu(hp2).oper[0]^, taicpu(p).oper[0]^) and
  789. MatchOperand(taicpu(hp2).oper[1]^, taicpu(p).oper[1]^) do
  790. begin
  791. DebugMsg('Peephole MovStrMov done', hp2);
  792. GetNextInstruction(hp2,hp1);
  793. asml.remove(hp2);
  794. hp2.free;
  795. if not assigned(hp1) then break;
  796. end
  797. {
  798. This removes the first mov from
  799. mov rX,...
  800. mov rX,...
  801. }
  802. else if taicpu(hp1).opcode=A_MOV then
  803. while MatchInstruction(hp1, A_MOV, [taicpu(p).condition], [taicpu(p).oppostfix]) and
  804. (taicpu(hp1).ops = 2) and
  805. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) and
  806. { don't remove the first mov if the second is a mov rX,rX }
  807. not(MatchOperand(taicpu(hp1).oper[0]^, taicpu(hp1).oper[1]^)) do
  808. begin
  809. DebugMsg('Peephole MovMov done', p);
  810. asml.remove(p);
  811. p.free;
  812. p:=hp1;
  813. GetNextInstruction(hp1,hp1);
  814. if not assigned(hp1) then
  815. break;
  816. end;
  817. end;
  818. {
  819. change
  820. mov r1, r0
  821. add r1, r1, #1
  822. to
  823. add r1, r0, #1
  824. Todo: Make it work for mov+cmp too
  825. CAUTION! If this one is successful p might not be a mov instruction anymore!
  826. }
  827. if (taicpu(p).ops = 2) and
  828. (taicpu(p).oper[1]^.typ = top_reg) and
  829. (taicpu(p).oppostfix = PF_NONE) and
  830. GetNextInstruction(p, hp1) and
  831. MatchInstruction(hp1, [A_ADD, A_ADC, A_RSB, A_RSC, A_SUB, A_SBC,
  832. A_AND, A_BIC, A_EOR, A_ORR, A_MOV, A_MVN],
  833. [taicpu(p).condition], []) and
  834. {MOV and MVN might only have 2 ops}
  835. (taicpu(hp1).ops = 3) and
  836. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^.reg) and
  837. (taicpu(hp1).oper[1]^.typ = top_reg) and
  838. (taicpu(hp1).oper[2]^.typ in [top_reg, top_const, top_shifterop]) then
  839. begin
  840. { When we get here we still don't know if the registers match}
  841. for I:=1 to 2 do
  842. {
  843. If the first loop was successful p will be replaced with hp1.
  844. The checks will still be ok, because all required information
  845. will also be in hp1 then.
  846. }
  847. if MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[I]^.reg) then
  848. begin
  849. DebugMsg('Peephole RedundantMovProcess done', hp1);
  850. taicpu(hp1).oper[I]^.reg := taicpu(p).oper[1]^.reg;
  851. if p<>hp1 then
  852. begin
  853. asml.remove(p);
  854. p.free;
  855. p:=hp1;
  856. end;
  857. end;
  858. end;
  859. { This folds shifterops into following instructions
  860. mov r0, r1, lsl #8
  861. add r2, r3, r0
  862. to
  863. add r2, r3, r1, lsl #8
  864. CAUTION! If this one is successful p might not be a mov instruction anymore!
  865. }
  866. if (taicpu(p).opcode = A_MOV) and
  867. (taicpu(p).ops = 3) and
  868. (taicpu(p).oper[1]^.typ = top_reg) and
  869. (taicpu(p).oper[2]^.typ = top_shifterop) and
  870. (taicpu(p).oppostfix = PF_NONE) and
  871. GetNextInstruction(p, hp1) and
  872. MatchInstruction(hp1, [A_ADD, A_ADC, A_RSB, A_RSC, A_SUB, A_SBC,
  873. A_AND, A_BIC, A_EOR, A_ORR, A_TEQ, A_TST,
  874. A_CMP, A_CMN],
  875. [taicpu(p).condition], [PF_None]) and
  876. (taicpu(hp1).ops >= 2) and {Currently we can't fold into another shifterop}
  877. (taicpu(hp1).oper[taicpu(hp1).ops-1]^.typ = top_reg) and
  878. (
  879. {Only ONE of the two src operands is allowed to match}
  880. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[taicpu(hp1).ops-2]^) xor
  881. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[taicpu(hp1).ops-1]^)
  882. ) then
  883. begin
  884. CopyUsedRegs(TmpUsedRegs);
  885. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  886. if taicpu(hp1).opcode in [A_TST, A_TEQ, A_CMN] then
  887. I2:=0
  888. else
  889. I2:=1;
  890. if not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg,hp1,TmpUsedRegs)) then
  891. for I:=I2 to taicpu(hp1).ops-1 do
  892. if MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[I]^.reg) then
  893. begin
  894. { If the parameter matched on the second op from the RIGHT
  895. we have to switch the parameters, this will not happen for CMP
  896. were we're only evaluating the most right parameter
  897. }
  898. if I <> taicpu(hp1).ops-1 then
  899. begin
  900. {The SUB operators need to be changed when we swap parameters}
  901. case taicpu(hp1).opcode of
  902. A_SUB: tempop:=A_RSB;
  903. A_SBC: tempop:=A_RSC;
  904. A_RSB: tempop:=A_SUB;
  905. A_RSC: tempop:=A_SBC;
  906. else tempop:=taicpu(hp1).opcode;
  907. end;
  908. if taicpu(hp1).ops = 3 then
  909. hp2:=taicpu.op_reg_reg_reg_shifterop(tempop,
  910. taicpu(hp1).oper[0]^.reg, taicpu(hp1).oper[2]^.reg,
  911. taicpu(p).oper[1]^.reg, taicpu(p).oper[2]^.shifterop^)
  912. else
  913. hp2:=taicpu.op_reg_reg_shifterop(tempop,
  914. taicpu(hp1).oper[0]^.reg, taicpu(p).oper[1]^.reg,
  915. taicpu(p).oper[2]^.shifterop^);
  916. end
  917. else
  918. if taicpu(hp1).ops = 3 then
  919. hp2:=taicpu.op_reg_reg_reg_shifterop(taicpu(hp1).opcode,
  920. taicpu(hp1).oper[0]^.reg, taicpu(hp1).oper[1]^.reg,
  921. taicpu(p).oper[1]^.reg, taicpu(p).oper[2]^.shifterop^)
  922. else
  923. hp2:=taicpu.op_reg_reg_shifterop(taicpu(hp1).opcode,
  924. taicpu(hp1).oper[0]^.reg, taicpu(p).oper[1]^.reg,
  925. taicpu(p).oper[2]^.shifterop^);
  926. asml.insertbefore(hp2, p);
  927. asml.remove(p);
  928. asml.remove(hp1);
  929. p.free;
  930. hp1.free;
  931. p:=hp2;
  932. GetNextInstruction(p,hp1);
  933. DebugMsg('Peephole FoldShiftProcess done', p);
  934. break;
  935. end;
  936. ReleaseUsedRegs(TmpUsedRegs);
  937. end;
  938. {
  939. Often we see shifts and then a superfluous mov to another register
  940. In the future this might be handled in RedundantMovProcess when it uses RegisterTracking
  941. }
  942. if (taicpu(p).opcode = A_MOV) and
  943. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) then
  944. RemoveSuperfluousMove(p, hp1, 'MovMov2Mov');
  945. end;
  946. A_ADD,
  947. A_ADC,
  948. A_RSB,
  949. A_RSC,
  950. A_SUB,
  951. A_SBC,
  952. A_AND,
  953. A_BIC,
  954. A_EOR,
  955. A_ORR,
  956. A_MLA,
  957. A_MUL:
  958. begin
  959. {
  960. optimize
  961. and reg2,reg1,const1
  962. ...
  963. }
  964. if (taicpu(p).opcode = A_AND) and
  965. (taicpu(p).oper[1]^.typ = top_reg) and
  966. (taicpu(p).oper[2]^.typ = top_const) then
  967. begin
  968. {
  969. change
  970. and reg2,reg1,const1
  971. and reg3,reg2,const2
  972. to
  973. and reg3,reg1,(const1 and const2)
  974. }
  975. if GetNextInstruction(p, hp1) and
  976. MatchInstruction(hp1, A_AND, [taicpu(p).condition], [PF_None]) and
  977. { either reg3 and reg2 are equal or reg2 is deallocated after the and }
  978. (MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^.reg) or
  979. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next)))) and
  980. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  981. (taicpu(hp1).oper[2]^.typ = top_const) then
  982. begin
  983. DebugMsg('Peephole AndAnd2And done', p);
  984. taicpu(p).loadConst(2,taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val);
  985. taicpu(p).oppostfix:=taicpu(hp1).oppostfix;
  986. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  987. asml.remove(hp1);
  988. hp1.free;
  989. end
  990. {
  991. change
  992. and reg2,reg1,255
  993. strb reg2,[...]
  994. dealloc reg2
  995. to
  996. strb reg1,[...]
  997. }
  998. else if (taicpu(p).oper[2]^.val = 255) and
  999. MatchInstruction(p, A_AND, [C_None], [PF_None]) and
  1000. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1001. MatchInstruction(hp1, A_STR, [C_None], [PF_B]) and
  1002. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
  1003. { the reference in strb might not use reg2 }
  1004. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  1005. { reg1 might not be modified inbetween }
  1006. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1007. begin
  1008. DebugMsg('Peephole AndStrb2Strb done', p);
  1009. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  1010. asml.remove(p);
  1011. p.free;
  1012. p:=hp1;
  1013. end;
  1014. end;
  1015. {
  1016. change
  1017. add/sub reg2,reg1,const1
  1018. str/ldr reg3,[reg2,const2]
  1019. dealloc reg2
  1020. to
  1021. str/ldr reg3,[reg1,const2+/-const1]
  1022. }
  1023. if (taicpu(p).opcode in [A_ADD,A_SUB]) and
  1024. (taicpu(p).oper[1]^.typ = top_reg) and
  1025. (taicpu(p).oper[2]^.typ = top_const) then
  1026. begin
  1027. hp1:=p;
  1028. while GetNextInstructionUsingReg(hp1, hp1, taicpu(p).oper[0]^.reg) and
  1029. { we cannot check NR_DEFAULTFLAGS for modification yet so don't allow a condition }
  1030. MatchInstruction(hp1, [A_LDR, A_STR], [C_None], []) and
  1031. (taicpu(hp1).oper[1]^.ref^.base=taicpu(p).oper[0]^.reg) and
  1032. { don't optimize if the register is stored/overwritten }
  1033. (taicpu(hp1).oper[0]^.reg<>taicpu(p).oper[1]^.reg) and
  1034. (taicpu(hp1).oper[1]^.ref^.index=NR_NO) and
  1035. (taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) and
  1036. { new offset must be valid: either in the range of 8 or 12 bit, depend on the
  1037. ldr postfix }
  1038. (((taicpu(p).opcode=A_ADD) and
  1039. (((taicpu(hp1).oppostfix in [PF_None,PF_B]) and
  1040. (abs(taicpu(hp1).oper[1]^.ref^.offset+taicpu(p).oper[2]^.val)<4096)) or
  1041. (abs(taicpu(hp1).oper[1]^.ref^.offset+taicpu(p).oper[2]^.val)<256)
  1042. )
  1043. ) or
  1044. ((taicpu(p).opcode=A_SUB) and
  1045. (((taicpu(hp1).oppostfix in [PF_None,PF_B]) and
  1046. (abs(taicpu(hp1).oper[1]^.ref^.offset-taicpu(p).oper[2]^.val)<4096)) or
  1047. (abs(taicpu(hp1).oper[1]^.ref^.offset-taicpu(p).oper[2]^.val)<256)
  1048. )
  1049. )
  1050. ) do
  1051. begin
  1052. { neither reg1 nor reg2 might be changed inbetween }
  1053. if RegModifiedBetween(taicpu(p).oper[0]^.reg,p,hp1) or
  1054. RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1) then
  1055. break;
  1056. { reg2 must be either overwritten by the ldr or it is deallocated afterwards }
  1057. if ((taicpu(hp1).opcode=A_LDR) and (taicpu(p).oper[0]^.reg=taicpu(hp1).oper[0]^.reg)) or
  1058. assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) then
  1059. begin
  1060. { remember last instruction }
  1061. hp2:=hp1;
  1062. DebugMsg('Peephole Add/SubLdr2Ldr done', p);
  1063. hp1:=p;
  1064. { fix all ldr/str }
  1065. while GetNextInstructionUsingReg(hp1, hp1, taicpu(p).oper[0]^.reg) do
  1066. begin
  1067. taicpu(hp1).oper[1]^.ref^.base:=taicpu(p).oper[1]^.reg;
  1068. if taicpu(p).opcode=A_ADD then
  1069. inc(taicpu(hp1).oper[1]^.ref^.offset,taicpu(p).oper[2]^.val)
  1070. else
  1071. dec(taicpu(hp1).oper[1]^.ref^.offset,taicpu(p).oper[2]^.val);
  1072. if hp1=hp2 then
  1073. break;
  1074. end;
  1075. GetNextInstruction(p,hp1);
  1076. asml.remove(p);
  1077. p.free;
  1078. p:=hp1;
  1079. break;
  1080. end;
  1081. end;
  1082. end;
  1083. {
  1084. change
  1085. add reg1, ...
  1086. mov reg2, reg1
  1087. to
  1088. add reg2, ...
  1089. }
  1090. if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) then
  1091. RemoveSuperfluousMove(p, hp1, 'DataMov2Data');
  1092. end;
  1093. A_CMP:
  1094. begin
  1095. {
  1096. change
  1097. cmp reg,const1
  1098. moveq reg,const1
  1099. movne reg,const2
  1100. to
  1101. cmp reg,const1
  1102. movne reg,const2
  1103. }
  1104. if (taicpu(p).oper[1]^.typ = top_const) and
  1105. GetNextInstruction(p, hp1) and
  1106. MatchInstruction(hp1, A_MOV, [C_EQ, C_NE], [PF_NONE]) and
  1107. (taicpu(hp1).oper[1]^.typ = top_const) and
  1108. GetNextInstruction(hp1, hp2) and
  1109. MatchInstruction(hp2, A_MOV, [C_EQ, C_NE], [PF_NONE]) and
  1110. (taicpu(hp1).oper[1]^.typ = top_const) then
  1111. begin
  1112. RemoveRedundantMove(p, hp1, asml);
  1113. RemoveRedundantMove(p, hp2, asml);
  1114. end;
  1115. end;
  1116. end;
  1117. end;
  1118. end;
  1119. end;
  1120. { instructions modifying the CPSR can be only the last instruction }
  1121. function MustBeLast(p : tai) : boolean;
  1122. begin
  1123. Result:=(p.typ=ait_instruction) and
  1124. ((taicpu(p).opcode in [A_BL,A_BLX,A_CMP,A_CMN,A_SWI,A_TEQ,A_TST,A_CMF,A_CMFE {,A_MSR}]) or
  1125. ((taicpu(p).ops>=1) and (taicpu(p).oper[0]^.typ=top_reg) and (taicpu(p).oper[0]^.reg=NR_PC)) or
  1126. (taicpu(p).oppostfix=PF_S));
  1127. end;
  1128. procedure TCpuAsmOptimizer.PeepHoleOptPass2;
  1129. var
  1130. p,hp1,hp2: tai;
  1131. l : longint;
  1132. condition : tasmcond;
  1133. hp3: tai;
  1134. WasLast: boolean;
  1135. { UsedRegs, TmpUsedRegs: TRegSet; }
  1136. begin
  1137. p := BlockStart;
  1138. { UsedRegs := []; }
  1139. while (p <> BlockEnd) Do
  1140. begin
  1141. { UpdateUsedRegs(UsedRegs, tai(p.next)); }
  1142. case p.Typ Of
  1143. Ait_Instruction:
  1144. begin
  1145. case taicpu(p).opcode Of
  1146. A_B:
  1147. if taicpu(p).condition<>C_None then
  1148. begin
  1149. { check for
  1150. Bxx xxx
  1151. <several instructions>
  1152. xxx:
  1153. }
  1154. l:=0;
  1155. WasLast:=False;
  1156. GetNextInstruction(p, hp1);
  1157. while assigned(hp1) and
  1158. (l<=4) and
  1159. CanBeCond(hp1) and
  1160. { stop on labels }
  1161. not(hp1.typ=ait_label) do
  1162. begin
  1163. inc(l);
  1164. if MustBeLast(hp1) then
  1165. begin
  1166. WasLast:=True;
  1167. GetNextInstruction(hp1,hp1);
  1168. break;
  1169. end
  1170. else
  1171. GetNextInstruction(hp1,hp1);
  1172. end;
  1173. if assigned(hp1) then
  1174. begin
  1175. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  1176. begin
  1177. if (l<=4) and (l>0) then
  1178. begin
  1179. condition:=inverse_cond(taicpu(p).condition);
  1180. hp2:=p;
  1181. GetNextInstruction(p,hp1);
  1182. p:=hp1;
  1183. repeat
  1184. if hp1.typ=ait_instruction then
  1185. taicpu(hp1).condition:=condition;
  1186. if MustBeLast(hp1) then
  1187. begin
  1188. GetNextInstruction(hp1,hp1);
  1189. break;
  1190. end
  1191. else
  1192. GetNextInstruction(hp1,hp1);
  1193. until not(assigned(hp1)) or
  1194. not(CanBeCond(hp1)) or
  1195. (hp1.typ=ait_label);
  1196. { wait with removing else GetNextInstruction could
  1197. ignore the label if it was the only usage in the
  1198. jump moved away }
  1199. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  1200. asml.remove(hp2);
  1201. hp2.free;
  1202. continue;
  1203. end;
  1204. end
  1205. else
  1206. { do not perform further optimizations if there is inctructon
  1207. in block #1 which can not be optimized.
  1208. }
  1209. if not WasLast then
  1210. begin
  1211. { check further for
  1212. Bcc xxx
  1213. <several instructions 1>
  1214. B yyy
  1215. xxx:
  1216. <several instructions 2>
  1217. yyy:
  1218. }
  1219. { hp2 points to jmp yyy }
  1220. hp2:=hp1;
  1221. { skip hp1 to xxx }
  1222. GetNextInstruction(hp1, hp1);
  1223. if assigned(hp2) and
  1224. assigned(hp1) and
  1225. (l<=3) and
  1226. (hp2.typ=ait_instruction) and
  1227. (taicpu(hp2).is_jmp) and
  1228. (taicpu(hp2).condition=C_None) and
  1229. { real label and jump, no further references to the
  1230. label are allowed }
  1231. (tasmlabel(taicpu(p).oper[0]^.ref^.symbol).getrefs=2) and
  1232. FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  1233. begin
  1234. l:=0;
  1235. { skip hp1 to <several moves 2> }
  1236. GetNextInstruction(hp1, hp1);
  1237. while assigned(hp1) and
  1238. CanBeCond(hp1) do
  1239. begin
  1240. inc(l);
  1241. GetNextInstruction(hp1, hp1);
  1242. end;
  1243. { hp1 points to yyy: }
  1244. if assigned(hp1) and
  1245. FindLabel(tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol),hp1) then
  1246. begin
  1247. condition:=inverse_cond(taicpu(p).condition);
  1248. GetNextInstruction(p,hp1);
  1249. hp3:=p;
  1250. p:=hp1;
  1251. repeat
  1252. if hp1.typ=ait_instruction then
  1253. taicpu(hp1).condition:=condition;
  1254. GetNextInstruction(hp1,hp1);
  1255. until not(assigned(hp1)) or
  1256. not(CanBeCond(hp1));
  1257. { hp2 is still at jmp yyy }
  1258. GetNextInstruction(hp2,hp1);
  1259. { hp2 is now at xxx: }
  1260. condition:=inverse_cond(condition);
  1261. GetNextInstruction(hp1,hp1);
  1262. { hp1 is now at <several movs 2> }
  1263. repeat
  1264. taicpu(hp1).condition:=condition;
  1265. GetNextInstruction(hp1,hp1);
  1266. until not(assigned(hp1)) or
  1267. not(CanBeCond(hp1)) or
  1268. (hp1.typ=ait_label);
  1269. {
  1270. asml.remove(hp1.next)
  1271. hp1.next.free;
  1272. asml.remove(hp1);
  1273. hp1.free;
  1274. }
  1275. { remove Bcc }
  1276. tasmlabel(taicpu(hp3).oper[0]^.ref^.symbol).decrefs;
  1277. asml.remove(hp3);
  1278. hp3.free;
  1279. { remove jmp }
  1280. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  1281. asml.remove(hp2);
  1282. hp2.free;
  1283. continue;
  1284. end;
  1285. end;
  1286. end;
  1287. end;
  1288. end;
  1289. end;
  1290. end;
  1291. end;
  1292. p := tai(p.next)
  1293. end;
  1294. end;
  1295. function TCpuAsmOptimizer.RegInInstruction(Reg: TRegister; p1: tai): Boolean;
  1296. begin
  1297. If (p1.typ = ait_instruction) and (taicpu(p1).opcode=A_BL) then
  1298. Result:=true
  1299. else
  1300. Result:=inherited RegInInstruction(Reg, p1);
  1301. end;
  1302. const
  1303. { set of opcode which might or do write to memory }
  1304. { TODO : extend armins.dat to contain r/w info }
  1305. opcode_could_mem_write = [A_B,A_BL,A_BLX,A_BKPT,A_BX,A_STR,A_STRB,A_STRBT,
  1306. A_STRH,A_STRT,A_STF,A_SFM,A_STM,A_FSTS,A_FSTD];
  1307. { adjust the register live information when swapping the two instructions p and hp1,
  1308. they must follow one after the other }
  1309. procedure TCpuPreRegallocScheduler.SwapRegLive(p,hp1 : taicpu);
  1310. procedure CheckLiveEnd(reg : tregister);
  1311. var
  1312. supreg : TSuperRegister;
  1313. regtype : TRegisterType;
  1314. begin
  1315. if reg=NR_NO then
  1316. exit;
  1317. regtype:=getregtype(reg);
  1318. supreg:=getsupreg(reg);
  1319. if (cg.rg[regtype].live_end[supreg]=hp1) and
  1320. RegInInstruction(reg,p) then
  1321. cg.rg[regtype].live_end[supreg]:=p;
  1322. end;
  1323. procedure CheckLiveStart(reg : TRegister);
  1324. var
  1325. supreg : TSuperRegister;
  1326. regtype : TRegisterType;
  1327. begin
  1328. if reg=NR_NO then
  1329. exit;
  1330. regtype:=getregtype(reg);
  1331. supreg:=getsupreg(reg);
  1332. if (cg.rg[regtype].live_start[supreg]=p) and
  1333. RegInInstruction(reg,hp1) then
  1334. cg.rg[regtype].live_start[supreg]:=hp1;
  1335. end;
  1336. var
  1337. i : longint;
  1338. r : TSuperRegister;
  1339. begin
  1340. { assumption: p is directly followed by hp1 }
  1341. { if live of any reg used by p starts at p and hp1 uses this register then
  1342. set live start to hp1 }
  1343. for i:=0 to p.ops-1 do
  1344. case p.oper[i]^.typ of
  1345. Top_Reg:
  1346. CheckLiveStart(p.oper[i]^.reg);
  1347. Top_Ref:
  1348. begin
  1349. CheckLiveStart(p.oper[i]^.ref^.base);
  1350. CheckLiveStart(p.oper[i]^.ref^.index);
  1351. end;
  1352. Top_Shifterop:
  1353. CheckLiveStart(p.oper[i]^.shifterop^.rs);
  1354. Top_RegSet:
  1355. for r:=RS_R0 to RS_R15 do
  1356. if r in p.oper[i]^.regset^ then
  1357. CheckLiveStart(newreg(R_INTREGISTER,r,R_SUBWHOLE));
  1358. end;
  1359. { if live of any reg used by hp1 ends at hp1 and p uses this register then
  1360. set live end to p }
  1361. for i:=0 to hp1.ops-1 do
  1362. case hp1.oper[i]^.typ of
  1363. Top_Reg:
  1364. CheckLiveEnd(hp1.oper[i]^.reg);
  1365. Top_Ref:
  1366. begin
  1367. CheckLiveEnd(hp1.oper[i]^.ref^.base);
  1368. CheckLiveEnd(hp1.oper[i]^.ref^.index);
  1369. end;
  1370. Top_Shifterop:
  1371. CheckLiveStart(hp1.oper[i]^.shifterop^.rs);
  1372. Top_RegSet:
  1373. for r:=RS_R0 to RS_R15 do
  1374. if r in hp1.oper[i]^.regset^ then
  1375. CheckLiveEnd(newreg(R_INTREGISTER,r,R_SUBWHOLE));
  1376. end;
  1377. end;
  1378. function TCpuPreRegallocScheduler.SchedulerPass1Cpu(var p: tai): boolean;
  1379. { TODO : schedule also forward }
  1380. { TODO : schedule distance > 1 }
  1381. var
  1382. hp1,hp2,hp3,hp4,hp5 : tai;
  1383. list : TAsmList;
  1384. begin
  1385. result:=true;
  1386. list:=TAsmList.Create;
  1387. p:=BlockStart;
  1388. while p<>BlockEnd Do
  1389. begin
  1390. if (p.typ=ait_instruction) and
  1391. GetNextInstruction(p,hp1) and
  1392. (hp1.typ=ait_instruction) and
  1393. (taicpu(hp1).opcode in [A_LDR,A_LDRB,A_LDRH,A_LDRSB,A_LDRSH]) and
  1394. { for now we don't reschedule if the previous instruction changes potentially a memory location }
  1395. ( (not(taicpu(p).opcode in opcode_could_mem_write) and
  1396. not(RegModifiedByInstruction(NR_PC,p))
  1397. ) or
  1398. ((taicpu(p).opcode in [A_STM,A_STRB,A_STRH,A_STR]) and
  1399. ((taicpu(hp1).oper[1]^.ref^.base=NR_PC) or
  1400. (assigned(taicpu(hp1).oper[1]^.ref^.symboldata) and
  1401. (taicpu(hp1).oper[1]^.ref^.offset=0)
  1402. )
  1403. ) or
  1404. { try to prove that the memory accesses don't overlapp }
  1405. ((taicpu(p).opcode in [A_STRB,A_STRH,A_STR]) and
  1406. (taicpu(p).oper[1]^.ref^.base=taicpu(hp1).oper[1]^.ref^.base) and
  1407. (taicpu(p).oppostfix=PF_None) and
  1408. (taicpu(hp1).oppostfix=PF_None) and
  1409. (taicpu(p).oper[1]^.ref^.index=NR_NO) and
  1410. (taicpu(hp1).oper[1]^.ref^.index=NR_NO) and
  1411. { get operand sizes and check if the offset distance is large enough to ensure no overlapp }
  1412. (abs(taicpu(p).oper[1]^.ref^.offset-taicpu(hp1).oper[1]^.ref^.offset)>=max(tcgsize2size[reg_cgsize(taicpu(p).oper[0]^.reg)],tcgsize2size[reg_cgsize(taicpu(hp1).oper[0]^.reg)]))
  1413. )
  1414. )
  1415. ) and
  1416. GetNextInstruction(hp1,hp2) and
  1417. (hp2.typ=ait_instruction) and
  1418. { loaded register used by next instruction? }
  1419. (RegInInstruction(taicpu(hp1).oper[0]^.reg,hp2)) and
  1420. { loaded register not used by previous instruction? }
  1421. not(RegInInstruction(taicpu(hp1).oper[0]^.reg,p)) and
  1422. { same condition? }
  1423. (taicpu(p).condition=taicpu(hp1).condition) and
  1424. { first instruction might not change the register used as base }
  1425. ((taicpu(hp1).oper[1]^.ref^.base=NR_NO) or
  1426. not(RegModifiedByInstruction(taicpu(hp1).oper[1]^.ref^.base,p))
  1427. ) and
  1428. { first instruction might not change the register used as index }
  1429. ((taicpu(hp1).oper[1]^.ref^.index=NR_NO) or
  1430. not(RegModifiedByInstruction(taicpu(hp1).oper[1]^.ref^.index,p))
  1431. ) then
  1432. begin
  1433. hp3:=tai(p.Previous);
  1434. hp5:=tai(p.next);
  1435. asml.Remove(p);
  1436. { if there is a reg. dealloc instruction associated with p, move it together with p }
  1437. { before the instruction? }
  1438. while assigned(hp3) and (hp3.typ<>ait_instruction) do
  1439. begin
  1440. if (hp3.typ=ait_regalloc) and (tai_regalloc(hp3).ratype in [ra_dealloc]) and
  1441. RegInInstruction(tai_regalloc(hp3).reg,p) then
  1442. begin
  1443. hp4:=hp3;
  1444. hp3:=tai(hp3.Previous);
  1445. asml.Remove(hp4);
  1446. list.Concat(hp4);
  1447. end
  1448. else
  1449. hp3:=tai(hp3.Previous);
  1450. end;
  1451. list.Concat(p);
  1452. SwapRegLive(taicpu(p),taicpu(hp1));
  1453. { after the instruction? }
  1454. while assigned(hp5) and (hp5.typ<>ait_instruction) do
  1455. begin
  1456. if (hp5.typ=ait_regalloc) and (tai_regalloc(hp5).ratype in [ra_dealloc]) and
  1457. RegInInstruction(tai_regalloc(hp5).reg,p) then
  1458. begin
  1459. hp4:=hp5;
  1460. hp5:=tai(hp5.next);
  1461. asml.Remove(hp4);
  1462. list.Concat(hp4);
  1463. end
  1464. else
  1465. hp5:=tai(hp5.Next);
  1466. end;
  1467. asml.Remove(hp1);
  1468. {$ifdef DEBUG_PREREGSCHEDULER}
  1469. asml.insertbefore(tai_comment.Create(strpnew('Rescheduled')),hp2);
  1470. {$endif DEBUG_PREREGSCHEDULER}
  1471. asml.InsertBefore(hp1,hp2);
  1472. asml.InsertListBefore(hp2,list);
  1473. p:=tai(p.next)
  1474. end
  1475. else if p.typ=ait_instruction then
  1476. p:=hp1
  1477. else
  1478. p:=tai(p.next);
  1479. end;
  1480. list.Free;
  1481. end;
  1482. procedure TCpuThumb2AsmOptimizer.PeepHoleOptPass2;
  1483. begin
  1484. { TODO: Add optimizer code }
  1485. end;
  1486. begin
  1487. casmoptimizer:=TCpuAsmOptimizer;
  1488. cpreregallocscheduler:=TCpuPreRegallocScheduler;
  1489. End.