aoptcpu.pas 66 KB


  1. {
  2. Copyright (c) 1998-2002 by Jonas Maebe, member of the Free Pascal
  3. Development Team
  4. This unit implements the ARM optimizer object
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. Unit aoptcpu;
  19. {$i fpcdefs.inc}
  20. {$define DEBUG_PREREGSCHEDULER}
  21. {$define DEBUG_AOPTCPU}
  22. Interface
  23. uses cgbase, cpubase, aasmtai, aasmcpu,aopt, aoptcpub, aoptobj;
  24. Type
  25. TCpuAsmOptimizer = class(TAsmOptimizer)
  26. { uses the same constructor as TAopObj }
  27. function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
  28. procedure PeepHoleOptPass2;override;
  29. Function RegInInstruction(Reg: TRegister; p1: tai): Boolean;override;
  30. procedure RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string);
  31. function RegUsedAfterInstruction(reg: Tregister; p: tai;
  32. var AllUsedRegs: TAllUsedRegs): Boolean;
  33. { gets the next tai object after current that contains info relevant
  34. to the optimizer in p1 which used the given register or does a
  35. change in program flow.
  36. If there is none, it returns false and
  37. sets p1 to nil }
  38. Function GetNextInstructionUsingReg(Current: tai; Var Next: tai;reg : TRegister): Boolean;
  39. { outputs a debug message into the assembler file }
  40. procedure DebugMsg(const s: string; p: tai);
  41. End;
  42. TCpuPreRegallocScheduler = class(TAsmScheduler)
  43. function SchedulerPass1Cpu(var p: tai): boolean;override;
  44. procedure SwapRegLive(p, hp1: taicpu);
  45. end;
  46. TCpuThumb2AsmOptimizer = class(TCpuAsmOptimizer)
  47. { uses the same constructor as TAopObj }
  48. procedure PeepHoleOptPass2;override;
  49. End;
  50. Implementation
  51. uses
  52. cutils,verbose,globals,
  53. systems,
  54. cpuinfo,
  55. cgobj,cgutils,procinfo,
  56. aasmbase,aasmdata;
  57. function CanBeCond(p : tai) : boolean;
  58. begin
  59. result:=
  60. (p.typ=ait_instruction) and
  61. (taicpu(p).condition=C_None) and
  62. (taicpu(p).opcode<>A_PLD) and
  63. ((taicpu(p).opcode<>A_BLX) or
  64. (taicpu(p).oper[0]^.typ=top_reg));
  65. end;
  66. function RefsEqual(const r1, r2: treference): boolean;
  67. begin
  68. refsequal :=
  69. (r1.offset = r2.offset) and
  70. (r1.base = r2.base) and
  71. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  72. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  73. (r1.relsymbol = r2.relsymbol) and
  74. (r1.signindex = r2.signindex) and
  75. (r1.shiftimm = r2.shiftimm) and
  76. (r1.addressmode = r2.addressmode) and
  77. (r1.shiftmode = r2.shiftmode);
  78. end;
  79. function MatchInstruction(const instr: tai; const op: TAsmOp; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  80. begin
  81. result :=
  82. (instr.typ = ait_instruction) and
  83. (taicpu(instr).opcode = op) and
  84. ((cond = []) or (taicpu(instr).condition in cond)) and
  85. ((postfix = []) or (taicpu(instr).oppostfix in postfix));
  86. end;
  87. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
  88. begin
  89. result := oper1.typ = oper2.typ;
  90. if result then
  91. case oper1.typ of
  92. top_const:
  93. Result:=oper1.val = oper2.val;
  94. top_reg:
  95. Result:=oper1.reg = oper2.reg;
  96. top_conditioncode:
  97. Result:=oper1.cc = oper2.cc;
  98. top_ref:
  99. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  100. else Result:=false;
  101. end
  102. end;
  103. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  104. begin
  105. result := (oper.typ = top_reg) and (oper.reg = reg);
  106. end;
  107. procedure RemoveRedundantMove(const cmpp: tai; movp: tai; asml: TAsmList);
  108. begin
  109. if (taicpu(movp).condition = C_EQ) and
  110. (taicpu(cmpp).oper[0]^.reg = taicpu(movp).oper[0]^.reg) and
  111. (taicpu(cmpp).oper[1]^.val = taicpu(movp).oper[1]^.val) then
  112. begin
  113. asml.insertafter(tai_comment.Create(strpnew('Peephole CmpMovMov - Removed redundant moveq')), movp);
  114. asml.remove(movp);
  115. movp.free;
  116. end;
  117. end;
  118. function regLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  119. var
  120. p: taicpu;
  121. begin
  122. p := taicpu(hp);
  123. regLoadedWithNewValue := false;
  124. if not ((assigned(hp)) and (hp.typ = ait_instruction)) then
  125. exit;
  126. case p.opcode of
  127. { These operands do not write into a register at all }
  128. A_CMP, A_CMN, A_TST, A_TEQ, A_B, A_BL, A_BX, A_BLX, A_SWI, A_MSR, A_PLD:
  129. exit;
  130. {Take care of post/preincremented store and loads, they will change their base register}
  131. A_STR, A_LDR:
  132. regLoadedWithNewValue :=
  133. (taicpu(p).oper[1]^.typ=top_ref) and
  134. (taicpu(p).oper[1]^.ref^.addressmode in [AM_PREINDEXED,AM_POSTINDEXED]) and
  135. (taicpu(p).oper[1]^.ref^.base = reg);
  136. { These four are writing into the first 2 register, UMLAL and SMLAL will also read from them }
  137. A_UMLAL, A_UMULL, A_SMLAL, A_SMULL:
  138. regLoadedWithNewValue :=
  139. (p.oper[1]^.typ = top_reg) and
  140. (p.oper[1]^.reg = reg);
  141. {Loads to oper2 from coprocessor}
  142. {
  143. MCR/MRC is currently not supported in FPC
  144. A_MRC:
  145. regLoadedWithNewValue :=
  146. (p.oper[2]^.typ = top_reg) and
  147. (p.oper[2]^.reg = reg);
  148. }
  149. {Loads to all register in the registerset}
  150. A_LDM:
  151. regLoadedWithNewValue := (getsupreg(reg) in p.oper[1]^.regset^);
  152. end;
  153. if regLoadedWithNewValue then
  154. exit;
  155. case p.oper[0]^.typ of
  156. {This is the case}
  157. top_reg:
  158. regLoadedWithNewValue := (p.oper[0]^.reg = reg) or
  159. { LDRD }
  160. (p.opcode=A_LDR) and (p.oppostfix=PF_D) and (getsupreg(p.oper[0]^.reg)+1=getsupreg(reg));
  161. {LDM/STM might write a new value to their index register}
  162. top_ref:
  163. regLoadedWithNewValue :=
  164. (taicpu(p).oper[0]^.ref^.addressmode in [AM_PREINDEXED,AM_POSTINDEXED]) and
  165. (taicpu(p).oper[0]^.ref^.base = reg);
  166. end;
  167. end;
  168. function AlignedToQWord(const ref : treference) : boolean;
  169. begin
  170. { (safe) heuristics to ensure alignment }
  171. result:=(target_info.abi in [abi_eabi,abi_armeb,abi_eabihf]) and
  172. (((ref.offset>=0) and
  173. ((ref.offset mod 8)=0) and
  174. ((ref.base=NR_R13) or
  175. (ref.index=NR_R13))
  176. ) or
  177. ((ref.offset<=0) and
  178. { when using NR_R11, it has always a value of <qword align>+4 }
  179. ((abs(ref.offset+4) mod 8)=0) and
  180. (current_procinfo.framepointer=NR_R11) and
  181. ((ref.base=NR_R11) or
  182. (ref.index=NR_R11))
  183. )
  184. );
  185. end;
  186. function instructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean;
  187. var
  188. p: taicpu;
  189. i: longint;
  190. begin
  191. instructionLoadsFromReg := false;
  192. if not (assigned(hp) and (hp.typ = ait_instruction)) then
  193. exit;
  194. p:=taicpu(hp);
  195. i:=1;
  196. {For these instructions we have to start on oper[0]}
  197. if (p.opcode in [A_STR, A_LDM, A_STM, A_PLD,
  198. A_CMP, A_CMN, A_TST, A_TEQ,
  199. A_B, A_BL, A_BX, A_BLX,
  200. A_SMLAL, A_UMLAL]) then i:=0;
  201. while(i<p.ops) do
  202. begin
  203. case p.oper[I]^.typ of
  204. top_reg:
  205. instructionLoadsFromReg := (p.oper[I]^.reg = reg) or
  206. { STRD }
  207. ((i=0) and (p.opcode=A_STR) and (p.oppostfix=PF_D) and (getsupreg(p.oper[0]^.reg)+1=getsupreg(reg)));
  208. top_regset:
  209. instructionLoadsFromReg := (getsupreg(reg) in p.oper[I]^.regset^);
  210. top_shifterop:
  211. instructionLoadsFromReg := p.oper[I]^.shifterop^.rs = reg;
  212. top_ref:
  213. instructionLoadsFromReg :=
  214. (p.oper[I]^.ref^.base = reg) or
  215. (p.oper[I]^.ref^.index = reg);
  216. end;
  217. if instructionLoadsFromReg then exit; {Bailout if we found something}
  218. Inc(I);
  219. end;
  220. end;
  221. function TCpuAsmOptimizer.RegUsedAfterInstruction(reg: Tregister; p: tai;
  222. var AllUsedRegs: TAllUsedRegs): Boolean;
  223. begin
  224. AllUsedRegs[getregtype(reg)].Update(tai(p.Next),true);
  225. RegUsedAfterInstruction :=
  226. AllUsedRegs[getregtype(reg)].IsUsed(reg) and
  227. not(regLoadedWithNewValue(reg,p)) and
  228. (
  229. not(GetNextInstruction(p,p)) or
  230. instructionLoadsFromReg(reg,p) or
  231. not(regLoadedWithNewValue(reg,p))
  232. );
  233. end;
  234. function TCpuAsmOptimizer.GetNextInstructionUsingReg(Current: tai;
  235. var Next: tai; reg: TRegister): Boolean;
  236. begin
  237. Next:=Current;
  238. repeat
  239. Result:=GetNextInstruction(Next,Next);
  240. until not(Result) or (Next.typ<>ait_instruction) or (RegInInstruction(reg,Next)) or
  241. (is_calljmp(taicpu(Next).opcode)) or (RegInInstruction(NR_PC,Next));
  242. end;
  243. {$ifdef DEBUG_AOPTCPU}
  244. procedure TCpuAsmOptimizer.DebugMsg(const s: string;p : tai);
  245. begin
  246. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  247. end;
  248. {$else DEBUG_AOPTCPU}
  249. procedure TCpuAsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  250. begin
  251. end;
  252. {$endif DEBUG_AOPTCPU}
  253. procedure TCpuAsmOptimizer.RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string);
  254. var
  255. alloc,
  256. dealloc : tai_regalloc;
  257. hp1 : tai;
  258. begin
  259. if MatchInstruction(movp, A_MOV, [taicpu(p).condition], [PF_None]) and
  260. (taicpu(movp).ops=2) and {We can't optimize if there is a shiftop}
  261. MatchOperand(taicpu(movp).oper[1]^, taicpu(p).oper[0]^.reg) and
  262. { don't mess with moves to pc }
  263. (taicpu(movp).oper[0]^.reg<>NR_PC) and
  264. { don't mess with moves to lr }
  265. (taicpu(movp).oper[0]^.reg<>NR_R14) and
  266. { the destination register of the mov might not be used beween p and movp }
  267. not(RegUsedBetween(taicpu(movp).oper[0]^.reg,p,movp)) and
  268. {There is a special requirement for MUL and MLA, oper[0] and oper[1] are not allowed to be the same}
  269. not (
  270. (taicpu(p).opcode in [A_MLA, A_MUL]) and
  271. (taicpu(p).oper[1]^.reg = taicpu(movp).oper[0]^.reg)
  272. ) then
  273. begin
  274. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(movp.Next));
  275. if assigned(dealloc) then
  276. begin
  277. DebugMsg('Peephole '+optimizer+' removed superfluous mov', movp);
  278. { taicpu(p).oper[0]^.reg is not used anymore, try to find its allocation
  279. and remove it if possible }
  280. GetLastInstruction(p,hp1);
  281. asml.Remove(dealloc);
  282. alloc:=FindRegAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next));
  283. if assigned(alloc) then
  284. begin
  285. asml.Remove(alloc);
  286. alloc.free;
  287. dealloc.free;
  288. end
  289. else
  290. asml.InsertAfter(dealloc,p);
  291. { try to move the allocation of the target register }
  292. GetLastInstruction(movp,hp1);
  293. alloc:=FindRegAlloc(taicpu(movp).oper[0]^.reg,tai(hp1.Next));
  294. if assigned(alloc) then
  295. begin
  296. asml.Remove(alloc);
  297. asml.InsertBefore(alloc,p);
  298. { adjust used regs }
  299. IncludeRegInUsedRegs(taicpu(movp).oper[0]^.reg,UsedRegs);
  300. end;
  301. { finally get rid of the mov }
  302. taicpu(p).loadreg(0,taicpu(movp).oper[0]^.reg);
  303. asml.remove(movp);
  304. movp.free;
  305. end;
  306. end;
  307. end;
  308. function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
  309. var
  310. hp1,hp2: tai;
  311. i, i2: longint;
  312. TmpUsedRegs: TAllUsedRegs;
  313. tempop: tasmop;
  314. function IsPowerOf2(const value: DWord): boolean; inline;
  315. begin
  316. Result:=(value and (value - 1)) = 0;
  317. end;
  318. begin
  319. result := false;
  320. case p.typ of
  321. ait_instruction:
  322. begin
  323. {
  324. change
  325. <op> reg,x,y
  326. cmp reg,#0
  327. into
  328. <op>s reg,x,y
  329. }
  330. { this optimization can applied only to the currently enabled operations because
  331. the other operations do not update all flags and FPC does not track flag usage }
  332. if ((taicpu(p).opcode in [A_ADC,A_ADD,A_BIC,A_SUB,A_MUL,A_MVN,A_MOV,
  333. A_ORR,A_EOR,A_AND,A_RSB,A_RSC,A_SBC,A_MLA])
  334. ) and
  335. (taicpu(p).oppostfix = PF_None) and
  336. (taicpu(p).condition = C_None) and
  337. GetNextInstruction(p, hp1) and
  338. MatchInstruction(hp1, A_CMP, [C_None], [PF_None]) and
  339. (taicpu(hp1).oper[1]^.typ = top_const) and
  340. (taicpu(p).oper[0]^.reg = taicpu(hp1).oper[0]^.reg) and
  341. (taicpu(hp1).oper[1]^.val = 0) and
  342. GetNextInstruction(hp1, hp2) and
  343. (tai(hp2).typ = ait_instruction) and
  344. { be careful here, following instructions could use other flags
  345. however after a jump fpc never depends on the value of flags }
  346. (taicpu(hp2).opcode = A_B) and
  347. { All above instructions set Z and N according to the following
  348. Z := result = 0;
  349. N := result[31];
  350. EQ = Z=1; NE = Z=0;
  351. MI = N=1; PL = N=0; }
  352. (taicpu(hp2).condition in [C_EQ,C_NE,C_MI,C_PL]) and
  353. assigned(FindRegDealloc(NR_DEFAULTFLAGS,tai(hp2.Next))) then
  354. begin
  355. DebugMsg('Peephole OpCmp2OpS done', p);
  356. taicpu(p).oppostfix:=PF_S;
  357. { move flag allocation if possible }
  358. GetLastInstruction(hp1, hp2);
  359. hp2:=FindRegAlloc(NR_DEFAULTFLAGS,tai(hp2.Next));
  360. if assigned(hp2) then
  361. begin
  362. asml.Remove(hp2);
  363. asml.insertbefore(hp2, p);
  364. end;
  365. asml.remove(hp1);
  366. hp1.free;
  367. end
  368. else
  369. case taicpu(p).opcode of
  370. A_STR:
  371. begin
  372. { change
  373. str reg1,ref
  374. ldr reg2,ref
  375. into
  376. str reg1,ref
  377. mov reg2,reg1
  378. }
  379. if (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
  380. (taicpu(p).oppostfix=PF_None) and
  381. GetNextInstruction(p,hp1) and
  382. MatchInstruction(hp1, A_LDR, [taicpu(p).condition, C_None], [PF_None]) and
  383. RefsEqual(taicpu(p).oper[1]^.ref^,taicpu(hp1).oper[1]^.ref^) and
  384. (taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) then
  385. begin
  386. if taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg then
  387. begin
  388. DebugMsg('Peephole StrLdr2StrMov 1 done', hp1);
  389. asml.remove(hp1);
  390. hp1.free;
  391. end
  392. else
  393. begin
  394. taicpu(hp1).opcode:=A_MOV;
  395. taicpu(hp1).oppostfix:=PF_None;
  396. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  397. DebugMsg('Peephole StrLdr2StrMov 2 done', hp1);
  398. end;
  399. result := true;
  400. end
  401. { change
  402. str reg1,ref
  403. str reg2,ref
  404. into
  405. strd reg1,ref
  406. }
  407. else if (CPUARM_HAS_EDSP in cpu_capabilities[current_settings.cputype]) and
  408. (taicpu(p).oppostfix=PF_None) and
  409. (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
  410. GetNextInstruction(p,hp1) and
  411. MatchInstruction(hp1, A_STR, [taicpu(p).condition, C_None], [PF_None]) and
  412. not(odd(getsupreg(taicpu(p).oper[0]^.reg))) and
  413. (getsupreg(taicpu(p).oper[0]^.reg)+1=getsupreg(taicpu(hp1).oper[0]^.reg)) and
  414. { str ensures that either base or index contain no register, else ldr wouldn't
  415. use an offset either
  416. }
  417. (taicpu(p).oper[1]^.ref^.base=taicpu(hp1).oper[1]^.ref^.base) and
  418. (taicpu(p).oper[1]^.ref^.index=taicpu(hp1).oper[1]^.ref^.index) and
  419. (taicpu(p).oper[1]^.ref^.offset+4=taicpu(hp1).oper[1]^.ref^.offset) and
  420. (abs(taicpu(p).oper[1]^.ref^.offset)<256) and
  421. AlignedToQWord(taicpu(p).oper[1]^.ref^) then
  422. begin
  423. DebugMsg('Peephole StrStr2Strd done', p);
  424. taicpu(p).oppostfix:=PF_D;
  425. asml.remove(hp1);
  426. hp1.free;
  427. end;
  428. end;
  429. A_LDR:
  430. begin
  431. { change
  432. ldr reg1,ref
  433. ldr reg2,ref
  434. into ...
  435. }
  436. if (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
  437. GetNextInstruction(p,hp1) and
  438. { ldrd is not allowed here }
  439. MatchInstruction(hp1, A_LDR, [taicpu(p).condition, C_None], [taicpu(p).oppostfix,PF_None]-[PF_D]) then
  440. begin
  441. {
  442. ...
  443. ldr reg1,ref
  444. mov reg2,reg1
  445. }
  446. if RefsEqual(taicpu(p).oper[1]^.ref^,taicpu(hp1).oper[1]^.ref^) and
  447. (taicpu(p).oper[0]^.reg<>taicpu(hp1).oper[1]^.ref^.index) and
  448. (taicpu(p).oper[0]^.reg<>taicpu(hp1).oper[1]^.ref^.base) and
  449. (taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) then
  450. begin
  451. if taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg then
  452. begin
  453. DebugMsg('Peephole LdrLdr2Ldr done', hp1);
  454. asml.remove(hp1);
  455. hp1.free;
  456. end
  457. else
  458. begin
  459. DebugMsg('Peephole LdrLdr2LdrMov done', hp1);
  460. taicpu(hp1).opcode:=A_MOV;
  461. taicpu(hp1).oppostfix:=PF_None;
  462. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  463. end;
  464. result := true;
  465. end
  466. {
  467. ...
  468. ldrd reg1,ref
  469. }
  470. else if (CPUARM_HAS_EDSP in cpu_capabilities[current_settings.cputype]) and
  471. { ldrd does not allow any postfixes ... }
  472. (taicpu(p).oppostfix=PF_None) and
  473. not(odd(getsupreg(taicpu(p).oper[0]^.reg))) and
  474. (getsupreg(taicpu(p).oper[0]^.reg)+1=getsupreg(taicpu(hp1).oper[0]^.reg)) and
  475. { ldr ensures that either base or index contain no register, else ldr wouldn't
  476. use an offset either
  477. }
  478. (taicpu(p).oper[1]^.ref^.base=taicpu(hp1).oper[1]^.ref^.base) and
  479. (taicpu(p).oper[1]^.ref^.index=taicpu(hp1).oper[1]^.ref^.index) and
  480. (taicpu(p).oper[1]^.ref^.offset+4=taicpu(hp1).oper[1]^.ref^.offset) and
  481. (abs(taicpu(p).oper[1]^.ref^.offset)<256) and
  482. AlignedToQWord(taicpu(p).oper[1]^.ref^) then
  483. begin
  484. DebugMsg('Peephole LdrLdr2Ldrd done', p);
  485. taicpu(p).oppostfix:=PF_D;
  486. asml.remove(hp1);
  487. hp1.free;
  488. end;
  489. end;
  490. { Remove superfluous mov after ldr
  491. changes
  492. ldr reg1, ref
  493. mov reg2, reg1
  494. to
  495. ldr reg2, ref
  496. conditions are:
  497. * no ldrd usage
  498. * reg1 must be released after mov
  499. * mov can not contain shifterops
  500. * ldr+mov have the same conditions
  501. * mov does not set flags
  502. }
  503. if (taicpu(p).oppostfix<>PF_D) and GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) then
  504. RemoveSuperfluousMove(p, hp1, 'LdrMov2Ldr');
  505. end;
  506. A_MOV:
  507. begin
  508. { fold
  509. mov reg1,reg0, shift imm1
  510. mov reg1,reg1, shift imm2
  511. }
  512. if (taicpu(p).ops=3) and
  513. (taicpu(p).oper[2]^.typ = top_shifterop) and
  514. (taicpu(p).oper[2]^.shifterop^.rs = NR_NO) and
  515. getnextinstruction(p,hp1) and
  516. MatchInstruction(hp1, A_MOV, [taicpu(p).condition], [PF_None]) and
  517. (taicpu(hp1).ops=3) and
  518. MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^.reg) and
  519. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  520. (taicpu(hp1).oper[2]^.typ = top_shifterop) and
  521. (taicpu(hp1).oper[2]^.shifterop^.rs = NR_NO) then
  522. begin
  523. { fold
  524. mov reg1,reg0, lsl 16
  525. mov reg1,reg1, lsr 16
  526. strh reg1, ...
  527. dealloc reg1
  528. to
  529. strh reg1, ...
  530. dealloc reg1
  531. }
  532. if (taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  533. (taicpu(p).oper[2]^.shifterop^.shiftimm=16) and
  534. (taicpu(hp1).oper[2]^.shifterop^.shiftmode in [SM_LSR,SM_ASR]) and
  535. (taicpu(hp1).oper[2]^.shifterop^.shiftimm=16) and
  536. getnextinstruction(hp1,hp2) and
  537. MatchInstruction(hp2, A_STR, [taicpu(p).condition], [PF_H]) and
  538. MatchOperand(taicpu(hp2).oper[0]^, taicpu(p).oper[0]^.reg) then
  539. begin
  540. CopyUsedRegs(TmpUsedRegs);
  541. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  542. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  543. if not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg,hp2,TmpUsedRegs)) then
  544. begin
  545. DebugMsg('Peephole optimizer removed superfluous 16 Bit zero extension', hp1);
  546. taicpu(hp2).loadreg(0,taicpu(p).oper[1]^.reg);
  547. asml.remove(p);
  548. asml.remove(hp1);
  549. p.free;
  550. hp1.free;
  551. p:=hp2;
  552. end;
  553. ReleaseUsedRegs(TmpUsedRegs);
  554. end
  555. { fold
  556. mov reg1,reg0, shift imm1
  557. mov reg1,reg1, shift imm2
  558. to
  559. mov reg1,reg0, shift imm1+imm2
  560. }
  561. else if (taicpu(p).oper[2]^.shifterop^.shiftmode=taicpu(hp1).oper[2]^.shifterop^.shiftmode) or
  562. { asr makes no use after a lsr, the asr can be foled into the lsr }
  563. ((taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSR) and (taicpu(hp1).oper[2]^.shifterop^.shiftmode=SM_ASR) ) then
  564. begin
  565. inc(taicpu(p).oper[2]^.shifterop^.shiftimm,taicpu(hp1).oper[2]^.shifterop^.shiftimm);
  566. { avoid overflows }
  567. if taicpu(p).oper[2]^.shifterop^.shiftimm>31 then
  568. case taicpu(p).oper[2]^.shifterop^.shiftmode of
  569. SM_ROR:
  570. taicpu(p).oper[2]^.shifterop^.shiftimm:=taicpu(p).oper[2]^.shifterop^.shiftimm and 31;
  571. SM_ASR:
  572. taicpu(p).oper[2]^.shifterop^.shiftimm:=31;
  573. SM_LSR,
  574. SM_LSL:
  575. begin
  576. hp1:=taicpu.op_reg_const(A_MOV,taicpu(p).oper[0]^.reg,0);
  577. InsertLLItem(p.previous, p.next, hp1);
  578. p.free;
  579. p:=hp1;
  580. end;
  581. else
  582. internalerror(2008072803);
  583. end;
  584. DebugMsg('Peephole ShiftShift2Shift 1 done', p);
  585. asml.remove(hp1);
  586. hp1.free;
  587. result := true;
  588. end
  589. { fold
  590. mov reg1,reg0, shift imm1
  591. mov reg1,reg1, shift imm2
  592. mov reg1,reg1, shift imm3 ...
  593. }
  594. else if getnextinstruction(hp1,hp2) and
  595. MatchInstruction(hp2, A_MOV, [taicpu(p).condition], [PF_None]) and
  596. (taicpu(hp2).ops=3) and
  597. MatchOperand(taicpu(hp2).oper[0]^, taicpu(hp1).oper[0]^.reg) and
  598. MatchOperand(taicpu(hp2).oper[1]^, taicpu(hp1).oper[0]^.reg) and
  599. (taicpu(hp2).oper[2]^.typ = top_shifterop) and
  600. (taicpu(hp2).oper[2]^.shifterop^.rs = NR_NO) then
  601. begin
  602. { mov reg1,reg0, lsl imm1
  603. mov reg1,reg1, lsr/asr imm2
  604. mov reg1,reg1, lsl imm3 ...
  605. if imm3<=imm1 and imm2>=imm3
  606. to
  607. mov reg1,reg0, lsl imm1
  608. mov reg1,reg1, lsr/asr imm2-imm3
  609. }
  610. if (taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSL) and (taicpu(hp2).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  611. (taicpu(hp1).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and
  612. (taicpu(hp2).oper[2]^.shifterop^.shiftimm<=taicpu(p).oper[2]^.shifterop^.shiftimm) and
  613. (taicpu(hp1).oper[2]^.shifterop^.shiftimm>=taicpu(hp2).oper[2]^.shifterop^.shiftimm) then
  614. begin
  615. dec(taicpu(hp1).oper[2]^.shifterop^.shiftimm,taicpu(hp2).oper[2]^.shifterop^.shiftimm);
  616. DebugMsg('Peephole ShiftShiftShift2ShiftShift 1 done', p);
  617. asml.remove(hp2);
  618. hp2.free;
  619. result := true;
  620. if taicpu(hp1).oper[2]^.shifterop^.shiftimm=0 then
  621. begin
  622. asml.remove(hp1);
  623. hp1.free;
  624. end;
  625. end
  626. { mov reg1,reg0, lsr/asr imm1
  627. mov reg1,reg1, lsl imm2
  628. mov reg1,reg1, lsr/asr imm3 ...
  629. if imm3>=imm1 and imm2>=imm1
  630. to
  631. mov reg1,reg0, lsl imm2-imm1
  632. mov reg1,reg1, lsr/asr imm3 ...
  633. }
  634. else if (taicpu(p).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and (taicpu(hp2).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and
  635. (taicpu(hp1).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  636. (taicpu(hp2).oper[2]^.shifterop^.shiftimm>=taicpu(p).oper[2]^.shifterop^.shiftimm) and
  637. (taicpu(hp1).oper[2]^.shifterop^.shiftimm>=taicpu(p).oper[2]^.shifterop^.shiftimm) then
  638. begin
  639. dec(taicpu(hp1).oper[2]^.shifterop^.shiftimm,taicpu(p).oper[2]^.shifterop^.shiftimm);
  640. taicpu(hp1).oper[1]^.reg:=taicpu(p).oper[1]^.reg;
  641. DebugMsg('Peephole ShiftShiftShift2ShiftShift 2 done', p);
  642. asml.remove(p);
  643. p.free;
  644. p:=hp2;
  645. if taicpu(hp1).oper[2]^.shifterop^.shiftimm=0 then
  646. begin
  647. taicpu(hp2).oper[1]^.reg:=taicpu(hp1).oper[1]^.reg;
  648. asml.remove(hp1);
  649. hp1.free;
  650. p:=hp2;
  651. end;
  652. result := true;
  653. end;
  654. end;
  655. end;
  656. { Change the common
  657. mov r0, r0, lsr #24
  658. and r0, r0, #255
  659. and remove the superfluous and
  660. This could be extended to handle more cases.
  661. }
  662. if (taicpu(p).ops=3) and
  663. (taicpu(p).oper[2]^.typ = top_shifterop) and
  664. (taicpu(p).oper[2]^.shifterop^.rs = NR_NO) and
  665. (taicpu(p).oper[2]^.shifterop^.shiftmode = SM_LSR) and
  666. (taicpu(p).oper[2]^.shifterop^.shiftimm >= 24 ) and
  667. getnextinstruction(p,hp1) and
  668. MatchInstruction(hp1, A_AND, [taicpu(p).condition], [taicpu(p).oppostfix]) and
  669. (taicpu(hp1).ops=3) and
  670. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) and
  671. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[1]^) and
  672. (taicpu(hp1).oper[2]^.typ = top_const) and
  673. { Check if the AND actually would only mask out bits beeing already zero because of the shift
  674. For LSR #25 and an AndConst of 255 that whould go like this:
  675. 255 and ((2 shl (32-25))-1)
  676. which results in 127, which is one less a power-of-2, meaning all lower bits are set.
  677. LSR #25 and AndConst of 254:
  678. 254 and ((2 shl (32-25))-1) = 126 -> lowest bit is clear, so we can't remove it.
  679. }
  680. ispowerof2((taicpu(hp1).oper[2]^.val and ((2 shl (32-taicpu(p).oper[2]^.shifterop^.shiftimm))-1))+1) then
  681. begin
  682. DebugMsg('Peephole LsrAnd2Lsr done', hp1);
  683. asml.remove(hp1);
  684. hp1.free;
  685. end;
  686. {
  687. optimize
  688. mov rX, yyyy
  689. ....
  690. }
  691. if (taicpu(p).ops = 2) and
  692. GetNextInstruction(p,hp1) and
  693. (tai(hp1).typ = ait_instruction) then
  694. begin
  695. {
  696. This changes the very common
  697. mov r0, #0
  698. str r0, [...]
  699. mov r0, #0
  700. str r0, [...]
  701. and removes all superfluous mov instructions
  702. }
  703. if (taicpu(p).oper[1]^.typ = top_const) and
  704. (taicpu(hp1).opcode=A_STR) then
  705. while MatchInstruction(hp1, A_STR, [taicpu(p).condition], [PF_None]) and
  706. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) and
  707. GetNextInstruction(hp1, hp2) and
  708. MatchInstruction(hp2, A_MOV, [taicpu(p).condition], [PF_None]) and
  709. (taicpu(hp2).ops = 2) and
  710. MatchOperand(taicpu(hp2).oper[0]^, taicpu(p).oper[0]^) and
  711. MatchOperand(taicpu(hp2).oper[1]^, taicpu(p).oper[1]^) do
  712. begin
  713. DebugMsg('Peephole MovStrMov done', hp2);
  714. GetNextInstruction(hp2,hp1);
  715. asml.remove(hp2);
  716. hp2.free;
  717. if not assigned(hp1) then break;
  718. end
  719. {
  720. This removes the first mov from
  721. mov rX,...
  722. mov rX,...
  723. }
  724. else if taicpu(hp1).opcode=A_MOV then
  725. while MatchInstruction(hp1, A_MOV, [taicpu(p).condition], [taicpu(p).oppostfix]) and
  726. (taicpu(hp1).ops = 2) and
  727. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) and
  728. { don't remove the first mov if the second is a mov rX,rX }
  729. not(MatchOperand(taicpu(hp1).oper[0]^, taicpu(hp1).oper[1]^)) do
  730. begin
  731. DebugMsg('Peephole MovMov done', p);
  732. asml.remove(p);
  733. p.free;
  734. p:=hp1;
  735. GetNextInstruction(hp1,hp1);
  736. if not assigned(hp1) then
  737. break;
  738. end;
  739. end;
  740. {
  741. change
  742. mov r1, r0
  743. add r1, r1, #1
  744. to
  745. add r1, r0, #1
  746. Todo: Make it work for mov+cmp too
  747. CAUTION! If this one is successful p might not be a mov instruction anymore!
  748. }
  749. if (taicpu(p).ops = 2) and
  750. (taicpu(p).oper[1]^.typ = top_reg) and
  751. (taicpu(p).oppostfix = PF_NONE) and
  752. GetNextInstruction(p, hp1) and
  753. (tai(hp1).typ = ait_instruction) and
  754. (taicpu(hp1).opcode in [A_ADD, A_ADC, A_RSB, A_RSC, A_SUB, A_SBC,
  755. A_AND, A_BIC, A_EOR, A_ORR, A_MOV, A_MVN]) and
  756. {MOV and MVN might only have 2 ops}
  757. (taicpu(hp1).ops = 3) and
  758. (taicpu(hp1).condition in [C_NONE, taicpu(hp1).condition]) and
  759. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^.reg) and
  760. (taicpu(hp1).oper[1]^.typ = top_reg) and
  761. (taicpu(hp1).oper[2]^.typ in [top_reg, top_const, top_shifterop]) then
  762. begin
  763. { When we get here we still don't know if the registers match}
  764. for I:=1 to 2 do
  765. {
  766. If the first loop was successful p will be replaced with hp1.
  767. The checks will still be ok, because all required information
  768. will also be in hp1 then.
  769. }
  770. if MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[I]^.reg) then
  771. begin
  772. DebugMsg('Peephole RedundantMovProcess done', hp1);
  773. taicpu(hp1).oper[I]^.reg := taicpu(p).oper[1]^.reg;
  774. if p<>hp1 then
  775. begin
  776. asml.remove(p);
  777. p.free;
  778. p:=hp1;
  779. end;
  780. end;
  781. end;
  782. { This folds shifterops into following instructions
  783. mov r0, r1, lsl #8
  784. add r2, r3, r0
  785. to
  786. add r2, r3, r1, lsl #8
  787. CAUTION! If this one is successful p might not be a mov instruction anymore!
  788. }
  789. if (taicpu(p).opcode = A_MOV) and
  790. (taicpu(p).ops = 3) and
  791. (taicpu(p).oper[1]^.typ = top_reg) and
  792. (taicpu(p).oper[2]^.typ = top_shifterop) and
  793. (taicpu(p).oppostfix = PF_NONE) and
  794. GetNextInstruction(p, hp1) and
  795. (tai(hp1).typ = ait_instruction) and
  796. (taicpu(hp1).ops >= 2) and {Currently we can't fold into another shifterop}
  797. (taicpu(hp1).oper[taicpu(hp1).ops-1]^.typ = top_reg) and
  798. (taicpu(hp1).oppostfix = PF_NONE) and
  799. (taicpu(hp1).condition = taicpu(p).condition) and
  800. (taicpu(hp1).opcode in [A_ADD, A_ADC, A_RSB, A_RSC, A_SUB, A_SBC,
  801. A_AND, A_BIC, A_EOR, A_ORR, A_TEQ, A_TST,
  802. A_CMP, A_CMN]) and
  803. (
  804. {Only ONE of the two src operands is allowed to match}
  805. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[taicpu(hp1).ops-2]^) xor
  806. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[taicpu(hp1).ops-1]^)
  807. ) then
  808. begin
  809. CopyUsedRegs(TmpUsedRegs);
  810. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  811. if taicpu(hp1).opcode in [A_TST, A_TEQ, A_CMN] then
  812. I2:=0
  813. else
  814. I2:=1;
  815. if not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg,hp1,TmpUsedRegs)) then
  816. for I:=I2 to taicpu(hp1).ops-1 do
  817. if MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[I]^.reg) then
  818. begin
  819. { If the parameter matched on the second op from the RIGHT
  820. we have to switch the parameters, this will not happen for CMP
  821. were we're only evaluating the most right parameter
  822. }
  823. if I <> taicpu(hp1).ops-1 then
  824. begin
  825. {The SUB operators need to be changed when we swap parameters}
  826. case taicpu(hp1).opcode of
  827. A_SUB: tempop:=A_RSB;
  828. A_SBC: tempop:=A_RSC;
  829. A_RSB: tempop:=A_SUB;
  830. A_RSC: tempop:=A_SBC;
  831. else tempop:=taicpu(hp1).opcode;
  832. end;
  833. if taicpu(hp1).ops = 3 then
  834. hp2:=taicpu.op_reg_reg_reg_shifterop(tempop,
  835. taicpu(hp1).oper[0]^.reg, taicpu(hp1).oper[2]^.reg,
  836. taicpu(p).oper[1]^.reg, taicpu(p).oper[2]^.shifterop^)
  837. else
  838. hp2:=taicpu.op_reg_reg_shifterop(tempop,
  839. taicpu(hp1).oper[0]^.reg, taicpu(p).oper[1]^.reg,
  840. taicpu(p).oper[2]^.shifterop^);
  841. end
  842. else
  843. if taicpu(hp1).ops = 3 then
  844. hp2:=taicpu.op_reg_reg_reg_shifterop(taicpu(hp1).opcode,
  845. taicpu(hp1).oper[0]^.reg, taicpu(hp1).oper[1]^.reg,
  846. taicpu(p).oper[1]^.reg, taicpu(p).oper[2]^.shifterop^)
  847. else
  848. hp2:=taicpu.op_reg_reg_shifterop(taicpu(hp1).opcode,
  849. taicpu(hp1).oper[0]^.reg, taicpu(p).oper[1]^.reg,
  850. taicpu(p).oper[2]^.shifterop^);
  851. asml.insertbefore(hp2, p);
  852. asml.remove(p);
  853. asml.remove(hp1);
  854. p.free;
  855. hp1.free;
  856. p:=hp2;
  857. GetNextInstruction(p,hp1);
  858. DebugMsg('Peephole FoldShiftProcess done', p);
  859. break;
  860. end;
  861. ReleaseUsedRegs(TmpUsedRegs);
  862. end;
  863. {
  864. Often we see shifts and then a superfluous mov to another register
  865. In the future this might be handled in RedundantMovProcess when it uses RegisterTracking
  866. }
  867. if (taicpu(p).opcode = A_MOV) and
  868. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) then
  869. RemoveSuperfluousMove(p, hp1, 'MovMov2Mov');
  870. end;
  871. A_ADD,
  872. A_ADC,
  873. A_RSB,
  874. A_RSC,
  875. A_SUB,
  876. A_SBC,
  877. A_AND,
  878. A_BIC,
  879. A_EOR,
  880. A_ORR,
  881. A_MLA,
  882. A_MUL:
  883. begin
  884. {
  885. change
  886. and reg2,reg1,const1
  887. and reg2,reg2,const2
  888. to
  889. and reg2,reg1,(const1 and const2)
  890. }
  891. if (taicpu(p).opcode = A_AND) and
  892. (taicpu(p).oper[1]^.typ = top_reg) and
  893. (taicpu(p).oper[2]^.typ = top_const) and
  894. GetNextInstruction(p, hp1) and
  895. MatchInstruction(hp1, A_AND, [taicpu(p).condition], [PF_None]) and
  896. MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^.reg) and
  897. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  898. (taicpu(hp1).oper[2]^.typ = top_const) then
  899. begin
  900. DebugMsg('Peephole AndAnd2And done', p);
  901. taicpu(p).loadConst(2,taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val);
  902. taicpu(p).oppostfix:=taicpu(hp1).oppostfix;
  903. asml.remove(hp1);
  904. hp1.free;
  905. end;
  906. {
  907. change
  908. add/sub reg2,reg1,const1
  909. str/ldr reg3,[reg2,const2]
  910. dealloc reg2
  911. to
  912. str/ldr reg3,[reg1,const2+/-const1]
  913. }
  914. if (taicpu(p).opcode in [A_ADD,A_SUB]) and
  915. (taicpu(p).oper[1]^.typ = top_reg) and
  916. (taicpu(p).oper[2]^.typ = top_const) then
  917. begin
  918. hp1:=p;
  919. while GetNextInstructionUsingReg(hp1, hp1, taicpu(p).oper[0]^.reg) and
  920. { we cannot check NR_DEFAULTFLAGS for modification yet so don't allow a condition }
  921. (MatchInstruction(hp1, A_LDR, [C_None], []) or
  922. MatchInstruction(hp1, A_STR, [C_None], [])) and
  923. (taicpu(hp1).oper[1]^.ref^.base=taicpu(p).oper[0]^.reg) and
  924. { don't optimize if the register is stored/overwritten }
  925. (taicpu(hp1).oper[0]^.reg<>taicpu(p).oper[1]^.reg) and
  926. (taicpu(hp1).oper[1]^.ref^.index=NR_NO) and
  927. (taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) and
  928. { new offset must be valid: either in the range of 8 or 12 bit, depend on the
  929. ldr postfix }
  930. (((taicpu(p).opcode=A_ADD) and
  931. (((taicpu(hp1).oppostfix in [PF_None,PF_B]) and
  932. (abs(taicpu(hp1).oper[1]^.ref^.offset+taicpu(p).oper[2]^.val)<4096)) or
  933. (abs(taicpu(hp1).oper[1]^.ref^.offset+taicpu(p).oper[2]^.val)<256)
  934. )
  935. ) or
  936. ((taicpu(p).opcode=A_SUB) and
  937. (((taicpu(hp1).oppostfix in [PF_None,PF_B]) and
  938. (abs(taicpu(hp1).oper[1]^.ref^.offset-taicpu(p).oper[2]^.val)<4096)) or
  939. (abs(taicpu(hp1).oper[1]^.ref^.offset-taicpu(p).oper[2]^.val)<256)
  940. )
  941. )
  942. ) do
  943. begin
  944. { neither reg1 nor reg2 might be changed inbetween }
  945. if RegModifiedBetween(taicpu(p).oper[0]^.reg,p,hp1) or
  946. RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1) then
  947. break;
  948. { reg2 must be either overwritten by the ldr or it is deallocated afterwards }
  949. if ((taicpu(hp1).opcode=A_LDR) and (taicpu(p).oper[0]^.reg=taicpu(hp1).oper[0]^.reg)) or
  950. assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) then
  951. begin
  952. { remember last instruction }
  953. hp2:=hp1;
  954. DebugMsg('Peephole Add/SubLdr2Ldr done', p);
  955. hp1:=p;
  956. { fix all ldr/str }
  957. while GetNextInstructionUsingReg(hp1, hp1, taicpu(p).oper[0]^.reg) do
  958. begin
  959. taicpu(hp1).oper[1]^.ref^.base:=taicpu(p).oper[1]^.reg;
  960. if taicpu(p).opcode=A_ADD then
  961. inc(taicpu(hp1).oper[1]^.ref^.offset,taicpu(p).oper[2]^.val)
  962. else
  963. dec(taicpu(hp1).oper[1]^.ref^.offset,taicpu(p).oper[2]^.val);
  964. if hp1=hp2 then
  965. break;
  966. end;
  967. GetNextInstruction(p,hp1);
  968. asml.remove(p);
  969. p.free;
  970. p:=hp1;
  971. break;
  972. end;
  973. end;
  974. end;
  975. {
  976. change
  977. add reg1, ...
  978. mov reg2, reg1
  979. to
  980. add reg2, ...
  981. }
  982. if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) then
  983. RemoveSuperfluousMove(p, hp1, 'DataMov2Data');
  984. end;
  985. A_CMP:
  986. begin
  987. {
  988. change
  989. cmp reg,const1
  990. moveq reg,const1
  991. movne reg,const2
  992. to
  993. cmp reg,const1
  994. movne reg,const2
  995. }
  996. if (taicpu(p).oper[1]^.typ = top_const) and
  997. GetNextInstruction(p, hp1) and
  998. MatchInstruction(hp1, A_MOV, [C_EQ, C_NE], [PF_NONE]) and
  999. (taicpu(hp1).oper[1]^.typ = top_const) and
  1000. GetNextInstruction(hp1, hp2) and
  1001. MatchInstruction(hp2, A_MOV, [C_EQ, C_NE], [PF_NONE]) and
  1002. (taicpu(hp1).oper[1]^.typ = top_const) then
  1003. begin
  1004. RemoveRedundantMove(p, hp1, asml);
  1005. RemoveRedundantMove(p, hp2, asml);
  1006. end;
  1007. end;
  1008. end;
  1009. end;
  1010. end;
  1011. end;
  1012. { instructions modifying the CPSR can be only the last instruction }
  1013. function MustBeLast(p : tai) : boolean;
  1014. begin
  1015. Result:=(p.typ=ait_instruction) and
  1016. ((taicpu(p).opcode in [A_BL,A_BLX,A_CMP,A_CMN,A_SWI,A_TEQ,A_TST,A_CMF,A_CMFE {,A_MSR}]) or
  1017. ((taicpu(p).ops>=1) and (taicpu(p).oper[0]^.typ=top_reg) and (taicpu(p).oper[0]^.reg=NR_PC)) or
  1018. (taicpu(p).oppostfix=PF_S));
  1019. end;
  1020. procedure TCpuAsmOptimizer.PeepHoleOptPass2;
  1021. var
  1022. p,hp1,hp2: tai;
  1023. l : longint;
  1024. condition : tasmcond;
  1025. hp3: tai;
  1026. WasLast: boolean;
  1027. { UsedRegs, TmpUsedRegs: TRegSet; }
  1028. begin
  1029. p := BlockStart;
  1030. { UsedRegs := []; }
  1031. while (p <> BlockEnd) Do
  1032. begin
  1033. { UpdateUsedRegs(UsedRegs, tai(p.next)); }
  1034. case p.Typ Of
  1035. Ait_Instruction:
  1036. begin
  1037. case taicpu(p).opcode Of
  1038. A_B:
  1039. if taicpu(p).condition<>C_None then
  1040. begin
  1041. { check for
  1042. Bxx xxx
  1043. <several instructions>
  1044. xxx:
  1045. }
  1046. l:=0;
  1047. WasLast:=False;
  1048. GetNextInstruction(p, hp1);
  1049. while assigned(hp1) and
  1050. (l<=4) and
  1051. CanBeCond(hp1) and
  1052. { stop on labels }
  1053. not(hp1.typ=ait_label) do
  1054. begin
  1055. inc(l);
  1056. if MustBeLast(hp1) then
  1057. begin
  1058. WasLast:=True;
  1059. GetNextInstruction(hp1,hp1);
  1060. break;
  1061. end
  1062. else
  1063. GetNextInstruction(hp1,hp1);
  1064. end;
  1065. if assigned(hp1) then
  1066. begin
  1067. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  1068. begin
  1069. if (l<=4) and (l>0) then
  1070. begin
  1071. condition:=inverse_cond(taicpu(p).condition);
  1072. hp2:=p;
  1073. GetNextInstruction(p,hp1);
  1074. p:=hp1;
  1075. repeat
  1076. if hp1.typ=ait_instruction then
  1077. taicpu(hp1).condition:=condition;
  1078. if MustBeLast(hp1) then
  1079. begin
  1080. GetNextInstruction(hp1,hp1);
  1081. break;
  1082. end
  1083. else
  1084. GetNextInstruction(hp1,hp1);
  1085. until not(assigned(hp1)) or
  1086. not(CanBeCond(hp1)) or
  1087. (hp1.typ=ait_label);
  1088. { wait with removing else GetNextInstruction could
  1089. ignore the label if it was the only usage in the
  1090. jump moved away }
  1091. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  1092. asml.remove(hp2);
  1093. hp2.free;
  1094. continue;
  1095. end;
  1096. end
  1097. else
  1098. { do not perform further optimizations if there is inctructon
  1099. in block #1 which can not be optimized.
  1100. }
  1101. if not WasLast then
  1102. begin
  1103. { check further for
  1104. Bcc xxx
  1105. <several instructions 1>
  1106. B yyy
  1107. xxx:
  1108. <several instructions 2>
  1109. yyy:
  1110. }
  1111. { hp2 points to jmp yyy }
  1112. hp2:=hp1;
  1113. { skip hp1 to xxx }
  1114. GetNextInstruction(hp1, hp1);
  1115. if assigned(hp2) and
  1116. assigned(hp1) and
  1117. (l<=3) and
  1118. (hp2.typ=ait_instruction) and
  1119. (taicpu(hp2).is_jmp) and
  1120. (taicpu(hp2).condition=C_None) and
  1121. { real label and jump, no further references to the
  1122. label are allowed }
  1123. (tasmlabel(taicpu(p).oper[0]^.ref^.symbol).getrefs=2) and
  1124. FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  1125. begin
  1126. l:=0;
  1127. { skip hp1 to <several moves 2> }
  1128. GetNextInstruction(hp1, hp1);
  1129. while assigned(hp1) and
  1130. CanBeCond(hp1) do
  1131. begin
  1132. inc(l);
  1133. GetNextInstruction(hp1, hp1);
  1134. end;
  1135. { hp1 points to yyy: }
  1136. if assigned(hp1) and
  1137. FindLabel(tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol),hp1) then
  1138. begin
  1139. condition:=inverse_cond(taicpu(p).condition);
  1140. GetNextInstruction(p,hp1);
  1141. hp3:=p;
  1142. p:=hp1;
  1143. repeat
  1144. if hp1.typ=ait_instruction then
  1145. taicpu(hp1).condition:=condition;
  1146. GetNextInstruction(hp1,hp1);
  1147. until not(assigned(hp1)) or
  1148. not(CanBeCond(hp1));
  1149. { hp2 is still at jmp yyy }
  1150. GetNextInstruction(hp2,hp1);
  1151. { hp2 is now at xxx: }
  1152. condition:=inverse_cond(condition);
  1153. GetNextInstruction(hp1,hp1);
  1154. { hp1 is now at <several movs 2> }
  1155. repeat
  1156. taicpu(hp1).condition:=condition;
  1157. GetNextInstruction(hp1,hp1);
  1158. until not(assigned(hp1)) or
  1159. not(CanBeCond(hp1)) or
  1160. (hp1.typ=ait_label);
  1161. {
  1162. asml.remove(hp1.next)
  1163. hp1.next.free;
  1164. asml.remove(hp1);
  1165. hp1.free;
  1166. }
  1167. { remove Bcc }
  1168. tasmlabel(taicpu(hp3).oper[0]^.ref^.symbol).decrefs;
  1169. asml.remove(hp3);
  1170. hp3.free;
  1171. { remove jmp }
  1172. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  1173. asml.remove(hp2);
  1174. hp2.free;
  1175. continue;
  1176. end;
  1177. end;
  1178. end;
  1179. end;
  1180. end;
  1181. end;
  1182. end;
  1183. end;
  1184. p := tai(p.next)
  1185. end;
  1186. end;
  1187. function TCpuAsmOptimizer.RegInInstruction(Reg: TRegister; p1: tai): Boolean;
  1188. begin
  1189. If (p1.typ = ait_instruction) and (taicpu(p1).opcode=A_BL) then
  1190. Result:=true
  1191. else
  1192. Result:=inherited RegInInstruction(Reg, p1);
  1193. end;
  1194. const
  1195. { set of opcode which might or do write to memory }
  1196. { TODO : extend armins.dat to contain r/w info }
  1197. opcode_could_mem_write = [A_B,A_BL,A_BLX,A_BKPT,A_BX,A_STR,A_STRB,A_STRBT,
  1198. A_STRH,A_STRT,A_STF,A_SFM,A_STM,A_FSTS,A_FSTD];
  1199. { adjust the register live information when swapping the two instructions p and hp1,
  1200. they must follow one after the other }
  1201. procedure TCpuPreRegallocScheduler.SwapRegLive(p,hp1 : taicpu);
  1202. procedure CheckLiveEnd(reg : tregister);
  1203. var
  1204. supreg : TSuperRegister;
  1205. regtype : TRegisterType;
  1206. begin
  1207. if reg=NR_NO then
  1208. exit;
  1209. regtype:=getregtype(reg);
  1210. supreg:=getsupreg(reg);
  1211. if (cg.rg[regtype].live_end[supreg]=hp1) and
  1212. RegInInstruction(reg,p) then
  1213. cg.rg[regtype].live_end[supreg]:=p;
  1214. end;
  1215. procedure CheckLiveStart(reg : TRegister);
  1216. var
  1217. supreg : TSuperRegister;
  1218. regtype : TRegisterType;
  1219. begin
  1220. if reg=NR_NO then
  1221. exit;
  1222. regtype:=getregtype(reg);
  1223. supreg:=getsupreg(reg);
  1224. if (cg.rg[regtype].live_start[supreg]=p) and
  1225. RegInInstruction(reg,hp1) then
  1226. cg.rg[regtype].live_start[supreg]:=hp1;
  1227. end;
  1228. var
  1229. i : longint;
  1230. r : TSuperRegister;
  1231. begin
  1232. { assumption: p is directly followed by hp1 }
  1233. { if live of any reg used by p starts at p and hp1 uses this register then
  1234. set live start to hp1 }
  1235. for i:=0 to p.ops-1 do
  1236. case p.oper[i]^.typ of
  1237. Top_Reg:
  1238. CheckLiveStart(p.oper[i]^.reg);
  1239. Top_Ref:
  1240. begin
  1241. CheckLiveStart(p.oper[i]^.ref^.base);
  1242. CheckLiveStart(p.oper[i]^.ref^.index);
  1243. end;
  1244. Top_Shifterop:
  1245. CheckLiveStart(p.oper[i]^.shifterop^.rs);
  1246. Top_RegSet:
  1247. for r:=RS_R0 to RS_R15 do
  1248. if r in p.oper[i]^.regset^ then
  1249. CheckLiveStart(newreg(R_INTREGISTER,r,R_SUBWHOLE));
  1250. end;
  1251. { if live of any reg used by hp1 ends at hp1 and p uses this register then
  1252. set live end to p }
  1253. for i:=0 to hp1.ops-1 do
  1254. case hp1.oper[i]^.typ of
  1255. Top_Reg:
  1256. CheckLiveEnd(hp1.oper[i]^.reg);
  1257. Top_Ref:
  1258. begin
  1259. CheckLiveEnd(hp1.oper[i]^.ref^.base);
  1260. CheckLiveEnd(hp1.oper[i]^.ref^.index);
  1261. end;
  1262. Top_Shifterop:
  1263. CheckLiveStart(hp1.oper[i]^.shifterop^.rs);
  1264. Top_RegSet:
  1265. for r:=RS_R0 to RS_R15 do
  1266. if r in hp1.oper[i]^.regset^ then
  1267. CheckLiveEnd(newreg(R_INTREGISTER,r,R_SUBWHOLE));
  1268. end;
  1269. end;
  1270. function TCpuPreRegallocScheduler.SchedulerPass1Cpu(var p: tai): boolean;
  1271. { TODO : schedule also forward }
  1272. { TODO : schedule distance > 1 }
  1273. var
  1274. hp1,hp2,hp3,hp4,hp5 : tai;
  1275. list : TAsmList;
  1276. begin
  1277. result:=true;
  1278. list:=TAsmList.Create;
  1279. p:=BlockStart;
  1280. while p<>BlockEnd Do
  1281. begin
  1282. if (p.typ=ait_instruction) and
  1283. GetNextInstruction(p,hp1) and
  1284. (hp1.typ=ait_instruction) and
  1285. (taicpu(hp1).opcode in [A_LDR,A_LDRB,A_LDRH,A_LDRSB,A_LDRSH]) and
  1286. { for now we don't reschedule if the previous instruction changes potentially a memory location }
  1287. ( (not(taicpu(p).opcode in opcode_could_mem_write) and
  1288. not(RegModifiedByInstruction(NR_PC,p))
  1289. ) or
  1290. ((taicpu(p).opcode in [A_STM,A_STRB,A_STRH,A_STR]) and
  1291. ((taicpu(hp1).oper[1]^.ref^.base=NR_PC) or
  1292. (assigned(taicpu(hp1).oper[1]^.ref^.symboldata) and
  1293. (taicpu(hp1).oper[1]^.ref^.offset=0)
  1294. )
  1295. ) or
  1296. { try to prove that the memory accesses don't overlapp }
  1297. ((taicpu(p).opcode in [A_STRB,A_STRH,A_STR]) and
  1298. (taicpu(p).oper[1]^.ref^.base=taicpu(hp1).oper[1]^.ref^.base) and
  1299. (taicpu(p).oppostfix=PF_None) and
  1300. (taicpu(hp1).oppostfix=PF_None) and
  1301. (taicpu(p).oper[1]^.ref^.index=NR_NO) and
  1302. (taicpu(hp1).oper[1]^.ref^.index=NR_NO) and
  1303. { get operand sizes and check if the offset distance is large enough to ensure no overlapp }
  1304. (abs(taicpu(p).oper[1]^.ref^.offset-taicpu(hp1).oper[1]^.ref^.offset)>=max(tcgsize2size[reg_cgsize(taicpu(p).oper[0]^.reg)],tcgsize2size[reg_cgsize(taicpu(hp1).oper[0]^.reg)]))
  1305. )
  1306. )
  1307. ) and
  1308. GetNextInstruction(hp1,hp2) and
  1309. (hp2.typ=ait_instruction) and
  1310. { loaded register used by next instruction? }
  1311. (RegInInstruction(taicpu(hp1).oper[0]^.reg,hp2)) and
  1312. { loaded register not used by previous instruction? }
  1313. not(RegInInstruction(taicpu(hp1).oper[0]^.reg,p)) and
  1314. { same condition? }
  1315. (taicpu(p).condition=taicpu(hp1).condition) and
  1316. { first instruction might not change the register used as base }
  1317. ((taicpu(hp1).oper[1]^.ref^.base=NR_NO) or
  1318. not(RegModifiedByInstruction(taicpu(hp1).oper[1]^.ref^.base,p))
  1319. ) and
  1320. { first instruction might not change the register used as index }
  1321. ((taicpu(hp1).oper[1]^.ref^.index=NR_NO) or
  1322. not(RegModifiedByInstruction(taicpu(hp1).oper[1]^.ref^.index,p))
  1323. ) then
  1324. begin
  1325. hp3:=tai(p.Previous);
  1326. hp5:=tai(p.next);
  1327. asml.Remove(p);
  1328. { if there is a reg. dealloc instruction associated with p, move it together with p }
  1329. { before the instruction? }
  1330. while assigned(hp3) and (hp3.typ<>ait_instruction) do
  1331. begin
  1332. if (hp3.typ=ait_regalloc) and (tai_regalloc(hp3).ratype in [ra_dealloc]) and
  1333. RegInInstruction(tai_regalloc(hp3).reg,p) then
  1334. begin
  1335. hp4:=hp3;
  1336. hp3:=tai(hp3.Previous);
  1337. asml.Remove(hp4);
  1338. list.Concat(hp4);
  1339. end
  1340. else
  1341. hp3:=tai(hp3.Previous);
  1342. end;
  1343. list.Concat(p);
  1344. SwapRegLive(taicpu(p),taicpu(hp1));
  1345. { after the instruction? }
  1346. while assigned(hp5) and (hp5.typ<>ait_instruction) do
  1347. begin
  1348. if (hp5.typ=ait_regalloc) and (tai_regalloc(hp5).ratype in [ra_dealloc]) and
  1349. RegInInstruction(tai_regalloc(hp5).reg,p) then
  1350. begin
  1351. hp4:=hp5;
  1352. hp5:=tai(hp5.next);
  1353. asml.Remove(hp4);
  1354. list.Concat(hp4);
  1355. end
  1356. else
  1357. hp5:=tai(hp5.Next);
  1358. end;
  1359. asml.Remove(hp1);
  1360. {$ifdef DEBUG_PREREGSCHEDULER}
  1361. asml.insertbefore(tai_comment.Create(strpnew('Rescheduled')),hp2);
  1362. {$endif DEBUG_PREREGSCHEDULER}
  1363. asml.InsertBefore(hp1,hp2);
  1364. asml.InsertListBefore(hp2,list);
  1365. p:=tai(p.next)
  1366. end
  1367. else if p.typ=ait_instruction then
  1368. p:=hp1
  1369. else
  1370. p:=tai(p.next);
  1371. end;
  1372. list.Free;
  1373. end;
  1374. procedure TCpuThumb2AsmOptimizer.PeepHoleOptPass2;
  1375. begin
  1376. { TODO: Add optimizer code }
  1377. end;
  1378. begin
  1379. casmoptimizer:=TCpuAsmOptimizer;
  1380. cpreregallocscheduler:=TCpuPreRegallocScheduler;
  1381. End.