aoptcpu.pas 146 KB


  1. {
  2. Copyright (c) 1998-2002 by Jonas Maebe, member of the Free Pascal
  3. Development Team
  4. This unit implements the ARM optimizer object
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. Unit aoptcpu;
  19. {$i fpcdefs.inc}
  20. { $define DEBUG_PREREGSCHEDULER}
  21. {$define DEBUG_AOPTCPU}
  22. Interface
  23. uses cgbase, cgutils, cpubase, aasmtai, aasmcpu,aopt, aoptobj;
  24. Type
  25. TCpuAsmOptimizer = class(TAsmOptimizer)
  26. { uses the same constructor as TAopObj }
  27. function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
  28. procedure PeepHoleOptPass2;override;
  29. Function RegInInstruction(Reg: TRegister; p1: tai): Boolean;override;
  30. function RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string): boolean;
  31. function RemoveSuperfluousVMov(const p : tai; movp : tai; const optimizer : string) : boolean;
  32. { gets the next tai object after current that contains info relevant
  33. to the optimizer in p1 which used the given register or does a
  34. change in program flow.
  35. If there is none, it returns false and
  36. sets p1 to nil }
  37. Function GetNextInstructionUsingReg(Current: tai; Out Next: tai; reg: TRegister): Boolean;
  38. Function GetNextInstructionUsingRef(Current: tai; Out Next: tai; const ref: TReference; StopOnStore: Boolean = true): Boolean;
  39. { outputs a debug message into the assembler file }
  40. procedure DebugMsg(const s: string; p: tai);
  41. function InstructionLoadsFromReg(const reg : TRegister; const hp : tai) : boolean; override;
  42. function RegLoadedWithNewValue(reg : tregister; hp : tai) : boolean; override;
  43. protected
  44. function LookForPreindexedPattern(p: taicpu): boolean;
  45. function LookForPostindexedPattern(p: taicpu): boolean;
  46. End;
  47. TCpuPreRegallocScheduler = class(TAsmScheduler)
  48. function SchedulerPass1Cpu(var p: tai): boolean;override;
  49. procedure SwapRegLive(p, hp1: taicpu);
  50. end;
  51. TCpuThumb2AsmOptimizer = class(TCpuAsmOptimizer)
  52. { uses the same constructor as TAopObj }
  53. function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
  54. procedure PeepHoleOptPass2;override;
  55. function PostPeepHoleOptsCpu(var p: tai): boolean; override;
  56. End;
  57. function MustBeLast(p : tai) : boolean;
  58. Implementation
  59. uses
  60. cutils,verbose,globtype,globals,
  61. systems,
  62. cpuinfo,
  63. cgobj,procinfo,
  64. aasmbase,aasmdata;
  65. { Range check must be disabled explicitly as conversions between signed and unsigned
  66. 32-bit values are done without explicit typecasts }
  67. {$R-}
  68. function CanBeCond(p : tai) : boolean;
  69. begin
  70. result:=
  71. not(GenerateThumbCode) and
  72. (p.typ=ait_instruction) and
  73. (taicpu(p).condition=C_None) and
  74. ((taicpu(p).opcode<A_IT) or (taicpu(p).opcode>A_ITTTT)) and
  75. (taicpu(p).opcode<>A_CBZ) and
  76. (taicpu(p).opcode<>A_CBNZ) and
  77. (taicpu(p).opcode<>A_PLD) and
  78. (((taicpu(p).opcode<>A_BLX) and
  79. { BL may need to be converted into BLX by the linker -- could possibly
  80. be allowed in case it's to a local symbol of which we know that it
  81. uses the same instruction set as the current one }
  82. (taicpu(p).opcode<>A_BL)) or
  83. (taicpu(p).oper[0]^.typ=top_reg));
  84. end;
  85. function RefsEqual(const r1, r2: treference): boolean;
  86. begin
  87. refsequal :=
  88. (r1.offset = r2.offset) and
  89. (r1.base = r2.base) and
  90. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  91. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  92. (r1.relsymbol = r2.relsymbol) and
  93. (r1.signindex = r2.signindex) and
  94. (r1.shiftimm = r2.shiftimm) and
  95. (r1.addressmode = r2.addressmode) and
  96. (r1.shiftmode = r2.shiftmode) and
  97. (r1.volatility=[]) and
  98. (r2.volatility=[]);
  99. end;
  100. function MatchInstruction(const instr: tai; const op: TCommonAsmOps; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  101. begin
  102. result :=
  103. (instr.typ = ait_instruction) and
  104. ((op = []) or ((ord(taicpu(instr).opcode)<256) and (taicpu(instr).opcode in op))) and
  105. ((cond = []) or (taicpu(instr).condition in cond)) and
  106. ((postfix = []) or (taicpu(instr).oppostfix in postfix));
  107. end;
  108. function MatchInstruction(const instr: tai; const op: TAsmOp; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  109. begin
  110. result :=
  111. (instr.typ = ait_instruction) and
  112. (taicpu(instr).opcode = op) and
  113. ((cond = []) or (taicpu(instr).condition in cond)) and
  114. ((postfix = []) or (taicpu(instr).oppostfix in postfix));
  115. end;
  116. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
  117. begin
  118. result := oper1.typ = oper2.typ;
  119. if result then
  120. case oper1.typ of
  121. top_const:
  122. Result:=oper1.val = oper2.val;
  123. top_reg:
  124. Result:=oper1.reg = oper2.reg;
  125. top_conditioncode:
  126. Result:=oper1.cc = oper2.cc;
  127. top_ref:
  128. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  129. else Result:=false;
  130. end
  131. end;
  132. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  133. begin
  134. result := (oper.typ = top_reg) and (oper.reg = reg);
  135. end;
  136. function RemoveRedundantMove(const cmpp: tai; movp: tai; asml: TAsmList):Boolean;
  137. begin
  138. Result:=false;
  139. if (taicpu(movp).condition = C_EQ) and
  140. (taicpu(cmpp).oper[0]^.reg = taicpu(movp).oper[0]^.reg) and
  141. (taicpu(cmpp).oper[1]^.val = taicpu(movp).oper[1]^.val) then
  142. begin
  143. asml.insertafter(tai_comment.Create(strpnew('Peephole CmpMovMov - Removed redundant moveq')), movp);
  144. asml.remove(movp);
  145. movp.free;
  146. Result:=true;
  147. end;
  148. end;
  149. function AlignedToQWord(const ref : treference) : boolean;
  150. begin
  151. { (safe) heuristics to ensure alignment }
  152. result:=(target_info.abi in [abi_eabi,abi_armeb,abi_eabihf]) and
  153. (((ref.offset>=0) and
  154. ((ref.offset mod 8)=0) and
  155. ((ref.base=NR_R13) or
  156. (ref.index=NR_R13))
  157. ) or
  158. ((ref.offset<=0) and
  159. { when using NR_R11, it has always a value of <qword align>+4 }
  160. ((abs(ref.offset+4) mod 8)=0) and
  161. (current_procinfo.framepointer=NR_R11) and
  162. ((ref.base=NR_R11) or
  163. (ref.index=NR_R11))
  164. )
  165. );
  166. end;
  167. function isValidConstLoadStoreOffset(const aoffset: longint; const pf: TOpPostfix) : boolean;
  168. begin
  169. if GenerateThumb2Code then
  170. result := (aoffset<4096) and (aoffset>-256)
  171. else
  172. result := ((pf in [PF_None,PF_B]) and
  173. (abs(aoffset)<4096)) or
  174. (abs(aoffset)<256);
  175. end;
  176. function TCpuAsmOptimizer.InstructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean;
  177. var
  178. p: taicpu;
  179. i: longint;
  180. begin
  181. instructionLoadsFromReg := false;
  182. if not (assigned(hp) and (hp.typ = ait_instruction)) then
  183. exit;
  184. p:=taicpu(hp);
  185. i:=1;
  186. {For these instructions we have to start on oper[0]}
  187. if (p.opcode in [A_STR, A_LDM, A_STM, A_PLD,
  188. A_CMP, A_CMN, A_TST, A_TEQ,
  189. A_B, A_BL, A_BX, A_BLX,
  190. A_SMLAL, A_UMLAL]) then i:=0;
  191. while(i<p.ops) do
  192. begin
  193. case p.oper[I]^.typ of
  194. top_reg:
  195. instructionLoadsFromReg := (p.oper[I]^.reg = reg) or
  196. { STRD }
  197. ((i=0) and (p.opcode=A_STR) and (p.oppostfix=PF_D) and (getsupreg(p.oper[0]^.reg)+1=getsupreg(reg)));
  198. top_regset:
  199. instructionLoadsFromReg := (getsupreg(reg) in p.oper[I]^.regset^);
  200. top_shifterop:
  201. instructionLoadsFromReg := p.oper[I]^.shifterop^.rs = reg;
  202. top_ref:
  203. instructionLoadsFromReg :=
  204. (p.oper[I]^.ref^.base = reg) or
  205. (p.oper[I]^.ref^.index = reg);
  206. else
  207. ;
  208. end;
  209. if instructionLoadsFromReg then exit; {Bailout if we found something}
  210. Inc(I);
  211. end;
  212. end;
  213. function TCpuAsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  214. var
  215. p: taicpu;
  216. begin
  217. p := taicpu(hp);
  218. Result := false;
  219. if not ((assigned(hp)) and (hp.typ = ait_instruction)) then
  220. exit;
  221. case p.opcode of
  222. { These operands do not write into a register at all }
  223. A_CMP, A_CMN, A_TST, A_TEQ, A_B, A_BL, A_BX, A_BLX, A_SWI, A_MSR, A_PLD,
  224. A_VCMP:
  225. exit;
  226. {Take care of post/preincremented store and loads, they will change their base register}
  227. A_STR, A_LDR:
  228. begin
  229. Result := false;
  230. { actually, this does not apply here because post-/preindexed does not mean that a register
  231. is loaded with a new value, it is only modified
  232. (taicpu(p).oper[1]^.typ=top_ref) and
  233. (taicpu(p).oper[1]^.ref^.addressmode in [AM_PREINDEXED,AM_POSTINDEXED]) and
  234. (taicpu(p).oper[1]^.ref^.base = reg);
  235. }
  236. { STR does not load into it's first register }
  237. if p.opcode = A_STR then
  238. exit;
  239. end;
  240. A_VSTR:
  241. begin
  242. Result := false;
  243. exit;
  244. end;
  245. { These four are writing into the first 2 register, UMLAL and SMLAL will also read from them }
  246. A_UMLAL, A_UMULL, A_SMLAL, A_SMULL:
  247. Result :=
  248. (p.oper[1]^.typ = top_reg) and
  249. (p.oper[1]^.reg = reg);
  250. {Loads to oper2 from coprocessor}
  251. {
  252. MCR/MRC is currently not supported in FPC
  253. A_MRC:
  254. Result :=
  255. (p.oper[2]^.typ = top_reg) and
  256. (p.oper[2]^.reg = reg);
  257. }
  258. {Loads to all register in the registerset}
  259. A_LDM, A_VLDM:
  260. Result := (getsupreg(reg) in p.oper[1]^.regset^);
  261. A_POP:
  262. Result := (getsupreg(reg) in p.oper[0]^.regset^) or
  263. (reg=NR_STACK_POINTER_REG);
  264. else
  265. ;
  266. end;
  267. if Result then
  268. exit;
  269. case p.oper[0]^.typ of
  270. {This is the case}
  271. top_reg:
  272. Result := (p.oper[0]^.reg = reg) or
  273. { LDRD }
  274. (p.opcode=A_LDR) and (p.oppostfix=PF_D) and (getsupreg(p.oper[0]^.reg)+1=getsupreg(reg));
  275. {LDM/STM might write a new value to their index register}
  276. top_ref:
  277. Result :=
  278. (taicpu(p).oper[0]^.ref^.addressmode in [AM_PREINDEXED,AM_POSTINDEXED]) and
  279. (taicpu(p).oper[0]^.ref^.base = reg);
  280. else
  281. ;
  282. end;
  283. end;
  284. function TCpuAsmOptimizer.GetNextInstructionUsingReg(Current: tai;
  285. Out Next: tai; reg: TRegister): Boolean;
  286. begin
  287. Next:=Current;
  288. repeat
  289. Result:=GetNextInstruction(Next,Next);
  290. until not (Result) or
  291. not(cs_opt_level3 in current_settings.optimizerswitches) or
  292. (Next.typ<>ait_instruction) or
  293. RegInInstruction(reg,Next) or
  294. is_calljmp(taicpu(Next).opcode) or
  295. RegModifiedByInstruction(NR_PC,Next);
  296. end;
  297. function TCpuAsmOptimizer.GetNextInstructionUsingRef(Current: tai;
  298. Out Next: tai; const ref: TReference; StopOnStore: Boolean = true): Boolean;
  299. begin
  300. Next:=Current;
  301. repeat
  302. Result:=GetNextInstruction(Next,Next);
  303. if Result and
  304. (Next.typ=ait_instruction) and
  305. (taicpu(Next).opcode in [A_LDR, A_STR]) and
  306. (
  307. ((taicpu(Next).ops = 2) and
  308. (taicpu(Next).oper[1]^.typ = top_ref) and
  309. RefsEqual(taicpu(Next).oper[1]^.ref^,ref)) or
  310. ((taicpu(Next).ops = 3) and { LDRD/STRD }
  311. (taicpu(Next).oper[2]^.typ = top_ref) and
  312. RefsEqual(taicpu(Next).oper[2]^.ref^,ref))
  313. ) then
  314. {We've found an instruction LDR or STR with the same reference}
  315. exit;
  316. until not(Result) or
  317. (Next.typ<>ait_instruction) or
  318. not(cs_opt_level3 in current_settings.optimizerswitches) or
  319. is_calljmp(taicpu(Next).opcode) or
  320. (StopOnStore and (taicpu(Next).opcode in [A_STR, A_STM])) or
  321. RegModifiedByInstruction(NR_PC,Next);
  322. Result:=false;
  323. end;
  324. {$ifdef DEBUG_AOPTCPU}
  325. procedure TCpuAsmOptimizer.DebugMsg(const s: string;p : tai);
  326. begin
  327. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  328. end;
  329. {$else DEBUG_AOPTCPU}
  330. procedure TCpuAsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  331. begin
  332. end;
  333. {$endif DEBUG_AOPTCPU}
  334. function TCpuAsmOptimizer.RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string):boolean;
  335. var
  336. alloc,
  337. dealloc : tai_regalloc;
  338. hp1 : tai;
  339. begin
  340. Result:=false;
  341. if MatchInstruction(movp, A_MOV, [taicpu(p).condition], [PF_None]) and
  342. (taicpu(movp).ops=2) and {We can't optimize if there is a shiftop}
  343. MatchOperand(taicpu(movp).oper[1]^, taicpu(p).oper[0]^.reg) and
  344. { don't mess with moves to pc }
  345. (taicpu(movp).oper[0]^.reg<>NR_PC) and
  346. { don't mess with moves to lr }
  347. (taicpu(movp).oper[0]^.reg<>NR_R14) and
  348. { the destination register of the mov might not be used beween p and movp }
  349. not(RegUsedBetween(taicpu(movp).oper[0]^.reg,p,movp)) and
  350. { cb[n]z are thumb instructions which require specific registers, with no wide forms }
  351. (taicpu(p).opcode<>A_CBZ) and
  352. (taicpu(p).opcode<>A_CBNZ) and
  353. {There is a special requirement for MUL and MLA, oper[0] and oper[1] are not allowed to be the same}
  354. not (
  355. (taicpu(p).opcode in [A_MLA, A_MUL]) and
  356. (taicpu(p).oper[1]^.reg = taicpu(movp).oper[0]^.reg) and
  357. (current_settings.cputype < cpu_armv6)
  358. ) and
  359. { Take care to only do this for instructions which REALLY load to the first register.
  360. Otherwise
  361. str reg0, [reg1]
  362. mov reg2, reg0
  363. will be optimized to
  364. str reg2, [reg1]
  365. }
  366. regLoadedWithNewValue(taicpu(p).oper[0]^.reg, p) then
  367. begin
  368. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(movp.Next));
  369. if assigned(dealloc) then
  370. begin
  371. DebugMsg('Peephole '+optimizer+' removed superfluous mov', movp);
  372. result:=true;
  373. { taicpu(p).oper[0]^.reg is not used anymore, try to find its allocation
  374. and remove it if possible }
  375. asml.Remove(dealloc);
  376. alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.previous));
  377. if assigned(alloc) then
  378. begin
  379. asml.Remove(alloc);
  380. alloc.free;
  381. dealloc.free;
  382. end
  383. else
  384. asml.InsertAfter(dealloc,p);
  385. { try to move the allocation of the target register }
  386. GetLastInstruction(movp,hp1);
  387. alloc:=FindRegAlloc(taicpu(movp).oper[0]^.reg,tai(hp1.Next));
  388. if assigned(alloc) then
  389. begin
  390. asml.Remove(alloc);
  391. asml.InsertBefore(alloc,p);
  392. { adjust used regs }
  393. IncludeRegInUsedRegs(taicpu(movp).oper[0]^.reg,UsedRegs);
  394. end;
  395. { finally get rid of the mov }
  396. taicpu(p).loadreg(0,taicpu(movp).oper[0]^.reg);
  397. { Remove preindexing and postindexing for LDR in some cases.
  398. For example:
  399. ldr reg2,[reg1, xxx]!
  400. mov reg1,reg2
  401. must be translated to:
  402. ldr reg1,[reg1, xxx]
  403. Preindexing must be removed there, since the same register is used as the base and as the target.
  404. Such case is not allowed for ARM CPU and produces crash. }
  405. if (taicpu(p).opcode = A_LDR) and (taicpu(p).oper[1]^.typ = top_ref)
  406. and (taicpu(movp).oper[0]^.reg = taicpu(p).oper[1]^.ref^.base)
  407. then
  408. taicpu(p).oper[1]^.ref^.addressmode:=AM_OFFSET;
  409. asml.remove(movp);
  410. movp.free;
  411. end;
  412. end;
  413. end;
  414. function TCpuAsmOptimizer.RemoveSuperfluousVMov(const p: tai; movp: tai; const optimizer: string):boolean;
  415. var
  416. alloc,
  417. dealloc : tai_regalloc;
  418. hp1 : tai;
  419. begin
  420. Result:=false;
  421. if ((MatchInstruction(movp, A_VMOV, [taicpu(p).condition], [taicpu(p).oppostfix]) and
  422. ((getregtype(taicpu(movp).oper[0]^.reg)=R_MMREGISTER) or (taicpu(p).opcode=A_VLDR))
  423. ) or
  424. (((taicpu(p).oppostfix in [PF_F64F32,PF_F64S16,PF_F64S32,PF_F64U16,PF_F64U32]) or (getsubreg(taicpu(p).oper[0]^.reg)=R_SUBFD)) and MatchInstruction(movp, A_VMOV, [taicpu(p).condition], [PF_F64])) or
  425. (((taicpu(p).oppostfix in [PF_F32F64,PF_F32S16,PF_F32S32,PF_F32U16,PF_F32U32]) or (getsubreg(taicpu(p).oper[0]^.reg)=R_SUBFS)) and MatchInstruction(movp, A_VMOV, [taicpu(p).condition], [PF_F32]))
  426. ) and
  427. (taicpu(movp).ops=2) and
  428. MatchOperand(taicpu(movp).oper[1]^, taicpu(p).oper[0]^.reg) and
  429. { the destination register of the mov might not be used beween p and movp }
  430. not(RegUsedBetween(taicpu(movp).oper[0]^.reg,p,movp)) and
  431. { Take care to only do this for instructions which REALLY load to the first register.
  432. Otherwise
  433. vstr reg0, [reg1]
  434. vmov reg2, reg0
  435. will be optimized to
  436. vstr reg2, [reg1]
  437. }
  438. regLoadedWithNewValue(taicpu(p).oper[0]^.reg, p) then
  439. begin
  440. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(movp.Next));
  441. if assigned(dealloc) then
  442. begin
  443. DebugMsg('Peephole '+optimizer+' removed superfluous vmov', movp);
  444. result:=true;
  445. { taicpu(p).oper[0]^.reg is not used anymore, try to find its allocation
  446. and remove it if possible }
  447. asml.Remove(dealloc);
  448. alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.previous));
  449. if assigned(alloc) then
  450. begin
  451. asml.Remove(alloc);
  452. alloc.free;
  453. dealloc.free;
  454. end
  455. else
  456. asml.InsertAfter(dealloc,p);
  457. { try to move the allocation of the target register }
  458. GetLastInstruction(movp,hp1);
  459. alloc:=FindRegAlloc(taicpu(movp).oper[0]^.reg,tai(hp1.Next));
  460. if assigned(alloc) then
  461. begin
  462. asml.Remove(alloc);
  463. asml.InsertBefore(alloc,p);
  464. { adjust used regs }
  465. IncludeRegInUsedRegs(taicpu(movp).oper[0]^.reg,UsedRegs);
  466. end;
  467. { change
  468. vldr reg0,[reg1]
  469. vmov reg2,reg0
  470. into
  471. ldr reg2,[reg1]
  472. if reg2 is an int register
  473. }
  474. if (taicpu(p).opcode=A_VLDR) and (getregtype(taicpu(movp).oper[0]^.reg)=R_INTREGISTER) then
  475. taicpu(p).opcode:=A_LDR;
  476. { finally get rid of the mov }
  477. taicpu(p).loadreg(0,taicpu(movp).oper[0]^.reg);
  478. asml.remove(movp);
  479. movp.free;
  480. end;
  481. end;
  482. end;
  483. {
  484. optimize
  485. add/sub reg1,reg1,regY/const
  486. ...
  487. ldr/str regX,[reg1]
  488. into
  489. ldr/str regX,[reg1, regY/const]!
  490. }
  491. function TCpuAsmOptimizer.LookForPreindexedPattern(p: taicpu): boolean;
  492. var
  493. hp1: tai;
  494. begin
  495. if GenerateARMCode and
  496. (p.ops=3) and
  497. MatchOperand(p.oper[0]^, p.oper[1]^.reg) and
  498. GetNextInstructionUsingReg(p, hp1, p.oper[0]^.reg) and
  499. (not RegModifiedBetween(p.oper[0]^.reg, p, hp1)) and
  500. MatchInstruction(hp1, [A_LDR,A_STR], [C_None], [PF_None,PF_B,PF_H,PF_SH,PF_SB]) and
  501. (taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) and
  502. (taicpu(hp1).oper[1]^.ref^.base=p.oper[0]^.reg) and
  503. (taicpu(hp1).oper[0]^.reg<>p.oper[0]^.reg) and
  504. (taicpu(hp1).oper[1]^.ref^.offset=0) and
  505. (taicpu(hp1).oper[1]^.ref^.index=NR_NO) and
  506. (((p.oper[2]^.typ=top_reg) and
  507. (not RegModifiedBetween(p.oper[2]^.reg, p, hp1))) or
  508. ((p.oper[2]^.typ=top_const) and
  509. ((abs(p.oper[2]^.val) < 256) or
  510. ((abs(p.oper[2]^.val) < 4096) and
  511. (taicpu(hp1).oppostfix in [PF_None,PF_B]))))) then
  512. begin
  513. taicpu(hp1).oper[1]^.ref^.addressmode:=AM_PREINDEXED;
  514. if p.oper[2]^.typ=top_reg then
  515. begin
  516. taicpu(hp1).oper[1]^.ref^.index:=p.oper[2]^.reg;
  517. if p.opcode=A_ADD then
  518. taicpu(hp1).oper[1]^.ref^.signindex:=1
  519. else
  520. taicpu(hp1).oper[1]^.ref^.signindex:=-1;
  521. end
  522. else
  523. begin
  524. if p.opcode=A_ADD then
  525. taicpu(hp1).oper[1]^.ref^.offset:=p.oper[2]^.val
  526. else
  527. taicpu(hp1).oper[1]^.ref^.offset:=-p.oper[2]^.val;
  528. end;
  529. result:=true;
  530. end
  531. else
  532. result:=false;
  533. end;
  534. {
  535. optimize
  536. ldr/str regX,[reg1]
  537. ...
  538. add/sub reg1,reg1,regY/const
  539. into
  540. ldr/str regX,[reg1], regY/const
  541. }
  542. function TCpuAsmOptimizer.LookForPostindexedPattern(p: taicpu) : boolean;
  543. var
  544. hp1 : tai;
  545. begin
  546. Result:=false;
  547. if (p.oper[1]^.typ = top_ref) and
  548. (p.oper[1]^.ref^.addressmode=AM_OFFSET) and
  549. (p.oper[1]^.ref^.index=NR_NO) and
  550. (p.oper[1]^.ref^.offset=0) and
  551. GetNextInstructionUsingReg(p, hp1, p.oper[1]^.ref^.base) and
  552. { we cannot check NR_DEFAULTFLAGS for modification yet so don't allow a condition }
  553. MatchInstruction(hp1, [A_ADD, A_SUB], [C_None], [PF_None]) and
  554. (taicpu(hp1).oper[0]^.reg=p.oper[1]^.ref^.base) and
  555. (taicpu(hp1).oper[1]^.reg=p.oper[1]^.ref^.base) and
  556. (
  557. (taicpu(hp1).oper[2]^.typ=top_reg) or
  558. { valid offset? }
  559. ((taicpu(hp1).oper[2]^.typ=top_const) and
  560. ((abs(taicpu(hp1).oper[2]^.val)<256) or
  561. ((abs(taicpu(hp1).oper[2]^.val)<4096) and (p.oppostfix in [PF_None,PF_B]))
  562. )
  563. )
  564. ) and
  565. { don't apply the optimization if the base register is loaded }
  566. (p.oper[0]^.reg<>p.oper[1]^.ref^.base) and
  567. not(RegModifiedBetween(taicpu(hp1).oper[0]^.reg,p,hp1)) and
  568. { don't apply the optimization if the (new) index register is loaded }
  569. (p.oper[0]^.reg<>taicpu(hp1).oper[2]^.reg) and
  570. not(RegModifiedBetween(taicpu(hp1).oper[2]^.reg,p,hp1)) and
  571. GenerateARMCode then
  572. begin
  573. DebugMsg('Peephole Str/LdrAdd/Sub2Str/Ldr Postindex done', p);
  574. p.oper[1]^.ref^.addressmode:=AM_POSTINDEXED;
  575. if taicpu(hp1).oper[2]^.typ=top_const then
  576. begin
  577. if taicpu(hp1).opcode=A_ADD then
  578. p.oper[1]^.ref^.offset:=taicpu(hp1).oper[2]^.val
  579. else
  580. p.oper[1]^.ref^.offset:=-taicpu(hp1).oper[2]^.val;
  581. end
  582. else
  583. begin
  584. p.oper[1]^.ref^.index:=taicpu(hp1).oper[2]^.reg;
  585. if taicpu(hp1).opcode=A_ADD then
  586. p.oper[1]^.ref^.signindex:=1
  587. else
  588. p.oper[1]^.ref^.signindex:=-1;
  589. end;
  590. asml.Remove(hp1);
  591. hp1.Free;
  592. Result:=true;
  593. end;
  594. end;
  595. function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
  596. var
  597. hp1,hp2,hp3,hp4: tai;
  598. i, i2: longint;
  599. tempop: tasmop;
  600. oldreg: tregister;
  601. dealloc: tai_regalloc;
  602. function IsPowerOf2(const value: DWord): boolean; inline;
  603. begin
  604. Result:=(value and (value - 1)) = 0;
  605. end;
  606. begin
  607. result := false;
  608. case p.typ of
  609. ait_instruction:
  610. begin
  611. {
  612. change
  613. <op> reg,x,y
  614. cmp reg,#0
  615. into
  616. <op>s reg,x,y
  617. }
  618. { this optimization can applied only to the currently enabled operations because
  619. the other operations do not update all flags and FPC does not track flag usage }
  620. if MatchInstruction(p, [A_ADC,A_ADD,A_BIC,A_SUB,A_MUL,A_MVN,A_MOV,A_ORR,A_EOR,A_AND,
  621. A_RSB,A_RSC,A_SBC,A_MLA], [C_None], [PF_None]) and
  622. GetNextInstruction(p, hp1) and
  623. { mlas is only allowed in arm mode }
  624. ((taicpu(p).opcode<>A_MLA) or
  625. (current_settings.instructionset<>is_thumb)) and
  626. MatchInstruction(hp1, A_CMP, [C_None], [PF_None]) and
  627. (taicpu(hp1).oper[1]^.typ = top_const) and
  628. (taicpu(p).oper[0]^.reg = taicpu(hp1).oper[0]^.reg) and
  629. (taicpu(hp1).oper[1]^.val = 0) and
  630. GetNextInstruction(hp1, hp2) and
  631. { be careful here, following instructions could use other flags
  632. however after a jump fpc never depends on the value of flags }
  633. { All above instructions set Z and N according to the following
  634. Z := result = 0;
  635. N := result[31];
  636. EQ = Z=1; NE = Z=0;
  637. MI = N=1; PL = N=0; }
  638. (MatchInstruction(hp2, A_B, [C_EQ,C_NE,C_MI,C_PL], []) or
  639. { mov is also possible, but only if there is no shifter operand, it could be an rxx,
  640. we are too lazy to check if it is rxx or something else }
  641. (MatchInstruction(hp2, A_MOV, [C_EQ,C_NE,C_MI,C_PL], []) and (taicpu(hp2).ops=2))) and
  642. assigned(FindRegDealloc(NR_DEFAULTFLAGS,tai(hp2.Next))) then
  643. begin
  644. DebugMsg('Peephole OpCmp2OpS done', p);
  645. taicpu(p).oppostfix:=PF_S;
  646. { move flag allocation if possible }
  647. GetLastInstruction(hp1, hp2);
  648. hp2:=FindRegAlloc(NR_DEFAULTFLAGS,tai(hp2.Next));
  649. if assigned(hp2) then
  650. begin
  651. asml.Remove(hp2);
  652. asml.insertbefore(hp2, p);
  653. end;
  654. asml.remove(hp1);
  655. hp1.free;
  656. Result:=true;
  657. end
  658. else
  659. case taicpu(p).opcode of
  660. A_STR:
  661. begin
  662. { change
  663. str reg1,ref
  664. ldr reg2,ref
  665. into
  666. str reg1,ref
  667. mov reg2,reg1
  668. }
  669. if (taicpu(p).oper[1]^.typ = top_ref) and
  670. (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
  671. (taicpu(p).oppostfix=PF_None) and
  672. (taicpu(p).condition=C_None) and
  673. GetNextInstructionUsingRef(p,hp1,taicpu(p).oper[1]^.ref^) and
  674. MatchInstruction(hp1, A_LDR, [taicpu(p).condition], [PF_None]) and
  675. (taicpu(hp1).oper[1]^.typ=top_ref) and
  676. (taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) and
  677. not(RegModifiedBetween(taicpu(p).oper[0]^.reg, p, hp1)) and
  678. ((taicpu(hp1).oper[1]^.ref^.index=NR_NO) or not (RegModifiedBetween(taicpu(hp1).oper[1]^.ref^.index, p, hp1))) and
  679. ((taicpu(hp1).oper[1]^.ref^.base=NR_NO) or not (RegModifiedBetween(taicpu(hp1).oper[1]^.ref^.base, p, hp1))) then
  680. begin
  681. if taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg then
  682. begin
  683. DebugMsg('Peephole StrLdr2StrMov 1 done', hp1);
  684. asml.remove(hp1);
  685. hp1.free;
  686. end
  687. else
  688. begin
  689. taicpu(hp1).opcode:=A_MOV;
  690. taicpu(hp1).oppostfix:=PF_None;
  691. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  692. DebugMsg('Peephole StrLdr2StrMov 2 done', hp1);
  693. end;
  694. result := true;
  695. end
  696. { change
  697. str reg1,ref
  698. str reg2,ref
  699. into
  700. strd reg1,reg2,ref
  701. }
  702. else if (GenerateARMCode or GenerateThumb2Code) and
  703. (CPUARM_HAS_EDSP in cpu_capabilities[current_settings.cputype]) and
  704. (taicpu(p).oppostfix=PF_None) and
  705. (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
  706. GetNextInstruction(p,hp1) and
  707. MatchInstruction(hp1, A_STR, [taicpu(p).condition, C_None], [PF_None]) and
  708. not(odd(getsupreg(taicpu(p).oper[0]^.reg))) and
  709. (getsupreg(taicpu(p).oper[0]^.reg)+1=getsupreg(taicpu(hp1).oper[0]^.reg)) and
  710. { str ensures that either base or index contain no register, else ldr wouldn't
  711. use an offset either
  712. }
  713. (taicpu(p).oper[1]^.ref^.base=taicpu(hp1).oper[1]^.ref^.base) and
  714. (taicpu(p).oper[1]^.ref^.index=taicpu(hp1).oper[1]^.ref^.index) and
  715. (taicpu(p).oper[1]^.ref^.offset+4=taicpu(hp1).oper[1]^.ref^.offset) and
  716. (abs(taicpu(p).oper[1]^.ref^.offset)<256) and
  717. AlignedToQWord(taicpu(p).oper[1]^.ref^) then
  718. begin
  719. DebugMsg('Peephole StrStr2Strd done', p);
  720. taicpu(p).oppostfix:=PF_D;
  721. taicpu(p).loadref(2,taicpu(p).oper[1]^.ref^);
  722. taicpu(p).loadreg(1, taicpu(hp1).oper[0]^.reg);
  723. taicpu(p).ops:=3;
  724. asml.remove(hp1);
  725. hp1.free;
  726. result:=true;
  727. end;
  728. Result:=LookForPostindexedPattern(taicpu(p)) or Result;
  729. end;
  730. A_LDR:
  731. begin
  732. { change
  733. ldr reg1,ref
  734. ldr reg2,ref
  735. into ...
  736. }
  737. if (taicpu(p).oper[1]^.typ = top_ref) and
  738. (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
  739. GetNextInstruction(p,hp1) and
  740. { ldrd is not allowed here }
  741. MatchInstruction(hp1, A_LDR, [taicpu(p).condition, C_None], [taicpu(p).oppostfix,PF_None]-[PF_D]) then
  742. begin
  743. {
  744. ...
  745. ldr reg1,ref
  746. mov reg2,reg1
  747. }
  748. if (taicpu(p).oppostfix=taicpu(hp1).oppostfix) and
  749. RefsEqual(taicpu(p).oper[1]^.ref^,taicpu(hp1).oper[1]^.ref^) and
  750. (taicpu(p).oper[0]^.reg<>taicpu(hp1).oper[1]^.ref^.index) and
  751. (taicpu(p).oper[0]^.reg<>taicpu(hp1).oper[1]^.ref^.base) and
  752. (taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) then
  753. begin
  754. if taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg then
  755. begin
  756. DebugMsg('Peephole LdrLdr2Ldr done', hp1);
  757. asml.remove(hp1);
  758. hp1.free;
  759. end
  760. else
  761. begin
  762. DebugMsg('Peephole LdrLdr2LdrMov done', hp1);
  763. taicpu(hp1).opcode:=A_MOV;
  764. taicpu(hp1).oppostfix:=PF_None;
  765. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  766. end;
  767. result := true;
  768. end
  769. {
  770. ...
  771. ldrd reg1,reg1+1,ref
  772. }
  773. else if (GenerateARMCode or GenerateThumb2Code) and
  774. (CPUARM_HAS_EDSP in cpu_capabilities[current_settings.cputype]) and
  775. { ldrd does not allow any postfixes ... }
  776. (taicpu(p).oppostfix=PF_None) and
  777. not(odd(getsupreg(taicpu(p).oper[0]^.reg))) and
  778. (getsupreg(taicpu(p).oper[0]^.reg)+1=getsupreg(taicpu(hp1).oper[0]^.reg)) and
  779. { ldr ensures that either base or index contain no register, else ldr wouldn't
  780. use an offset either
  781. }
  782. (taicpu(p).oper[1]^.ref^.base=taicpu(hp1).oper[1]^.ref^.base) and
  783. (taicpu(p).oper[1]^.ref^.index=taicpu(hp1).oper[1]^.ref^.index) and
  784. (taicpu(p).oper[1]^.ref^.offset+4=taicpu(hp1).oper[1]^.ref^.offset) and
  785. (abs(taicpu(p).oper[1]^.ref^.offset)<256) and
  786. AlignedToQWord(taicpu(p).oper[1]^.ref^) then
  787. begin
  788. DebugMsg('Peephole LdrLdr2Ldrd done', p);
  789. taicpu(p).loadref(2,taicpu(p).oper[1]^.ref^);
  790. taicpu(p).loadreg(1, taicpu(hp1).oper[0]^.reg);
  791. taicpu(p).ops:=3;
  792. taicpu(p).oppostfix:=PF_D;
  793. asml.remove(hp1);
  794. hp1.free;
  795. result:=true;
  796. end;
  797. end;
  798. {
  799. Change
  800. ldrb dst1, [REF]
  801. and dst2, dst1, #255
  802. into
  803. ldrb dst2, [ref]
  804. }
  805. if not(GenerateThumbCode) and
  806. (taicpu(p).oppostfix=PF_B) and
  807. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  808. MatchInstruction(hp1, A_AND, [taicpu(p).condition], [PF_NONE]) and
  809. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[0]^.reg) and
  810. (taicpu(hp1).oper[2]^.typ = top_const) and
  811. (taicpu(hp1).oper[2]^.val = $FF) and
  812. not(RegUsedBetween(taicpu(hp1).oper[0]^.reg, p, hp1)) and
  813. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  814. begin
  815. DebugMsg('Peephole LdrbAnd2Ldrb done', p);
  816. taicpu(p).oper[0]^.reg := taicpu(hp1).oper[0]^.reg;
  817. asml.remove(hp1);
  818. hp1.free;
  819. result:=true;
  820. end;
  821. Result:=LookForPostindexedPattern(taicpu(p)) or Result;
  822. { Remove superfluous mov after ldr
  823. changes
  824. ldr reg1, ref
  825. mov reg2, reg1
  826. to
  827. ldr reg2, ref
  828. conditions are:
  829. * no ldrd usage
  830. * reg1 must be released after mov
  831. * mov can not contain shifterops
  832. * ldr+mov have the same conditions
  833. * mov does not set flags
  834. }
  835. if (taicpu(p).oppostfix<>PF_D) and
  836. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  837. RemoveSuperfluousMove(p, hp1, 'LdrMov2Ldr') then
  838. Result:=true;
  839. end;
  840. A_MOV:
  841. begin
  842. { fold
  843. mov reg1,reg0, shift imm1
  844. mov reg1,reg1, shift imm2
  845. }
  846. if (taicpu(p).ops=3) and
  847. (taicpu(p).oper[2]^.typ = top_shifterop) and
  848. (taicpu(p).oper[2]^.shifterop^.rs = NR_NO) and
  849. getnextinstruction(p,hp1) and
  850. MatchInstruction(hp1, A_MOV, [taicpu(p).condition], [PF_None]) and
  851. (taicpu(hp1).ops=3) and
  852. MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^.reg) and
  853. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  854. (taicpu(hp1).oper[2]^.typ = top_shifterop) and
  855. (taicpu(hp1).oper[2]^.shifterop^.rs = NR_NO) then
  856. begin
  857. { fold
  858. mov reg1,reg0, lsl 16
  859. mov reg1,reg1, lsr 16
  860. strh reg1, ...
  861. dealloc reg1
  862. to
  863. strh reg1, ...
  864. dealloc reg1
  865. }
  866. if (taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  867. (taicpu(p).oper[2]^.shifterop^.shiftimm=16) and
  868. (taicpu(hp1).oper[2]^.shifterop^.shiftmode in [SM_LSR,SM_ASR]) and
  869. (taicpu(hp1).oper[2]^.shifterop^.shiftimm=16) and
  870. getnextinstruction(hp1,hp2) and
  871. MatchInstruction(hp2, A_STR, [taicpu(p).condition], [PF_H]) and
  872. MatchOperand(taicpu(hp2).oper[0]^, taicpu(p).oper[0]^.reg) then
  873. begin
  874. TransferUsedRegs(TmpUsedRegs);
  875. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  876. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  877. if not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg,hp2,TmpUsedRegs)) then
  878. begin
  879. DebugMsg('Peephole optimizer removed superfluous 16 Bit zero extension', hp1);
  880. taicpu(hp2).loadreg(0,taicpu(p).oper[1]^.reg);
  881. asml.remove(p);
  882. asml.remove(hp1);
  883. p.free;
  884. hp1.free;
  885. p:=hp2;
  886. Result:=true;
  887. end;
  888. end
  889. { fold
  890. mov reg1,reg0, shift imm1
  891. mov reg1,reg1, shift imm2
  892. to
  893. mov reg1,reg0, shift imm1+imm2
  894. }
  895. else if (taicpu(p).oper[2]^.shifterop^.shiftmode=taicpu(hp1).oper[2]^.shifterop^.shiftmode) or
  896. { asr makes no use after a lsr, the asr can be foled into the lsr }
  897. ((taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSR) and (taicpu(hp1).oper[2]^.shifterop^.shiftmode=SM_ASR) ) then
  898. begin
  899. inc(taicpu(p).oper[2]^.shifterop^.shiftimm,taicpu(hp1).oper[2]^.shifterop^.shiftimm);
  900. { avoid overflows }
  901. if taicpu(p).oper[2]^.shifterop^.shiftimm>31 then
  902. case taicpu(p).oper[2]^.shifterop^.shiftmode of
  903. SM_ROR:
  904. taicpu(p).oper[2]^.shifterop^.shiftimm:=taicpu(p).oper[2]^.shifterop^.shiftimm and 31;
  905. SM_ASR:
  906. taicpu(p).oper[2]^.shifterop^.shiftimm:=31;
  907. SM_LSR,
  908. SM_LSL:
  909. begin
  910. hp2:=taicpu.op_reg_const(A_MOV,taicpu(p).oper[0]^.reg,0);
  911. InsertLLItem(p.previous, p.next, hp2);
  912. p.free;
  913. p:=hp2;
  914. end;
  915. else
  916. internalerror(2008072803);
  917. end;
  918. DebugMsg('Peephole ShiftShift2Shift 1 done', p);
  919. asml.remove(hp1);
  920. hp1.free;
  921. result := true;
  922. end
  923. { fold
  924. mov reg1,reg0, shift imm1
  925. mov reg1,reg1, shift imm2
  926. mov reg1,reg1, shift imm3 ...
  927. mov reg2,reg1, shift imm3 ...
  928. }
  929. else if GetNextInstructionUsingReg(hp1,hp2, taicpu(hp1).oper[0]^.reg) and
  930. MatchInstruction(hp2, A_MOV, [taicpu(p).condition], [PF_None]) and
  931. (taicpu(hp2).ops=3) and
  932. MatchOperand(taicpu(hp2).oper[1]^, taicpu(hp1).oper[0]^.reg) and
  933. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp2)) and
  934. (taicpu(hp2).oper[2]^.typ = top_shifterop) and
  935. (taicpu(hp2).oper[2]^.shifterop^.rs = NR_NO) then
  936. begin
  937. { mov reg1,reg0, lsl imm1
  938. mov reg1,reg1, lsr/asr imm2
  939. mov reg2,reg1, lsl imm3 ...
  940. to
  941. mov reg1,reg0, lsl imm1
  942. mov reg2,reg1, lsr/asr imm2-imm3
  943. if
  944. imm1>=imm2
  945. }
  946. if (taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSL) and (taicpu(hp2).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  947. (taicpu(hp1).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and
  948. (taicpu(p).oper[2]^.shifterop^.shiftimm>=taicpu(hp1).oper[2]^.shifterop^.shiftimm) then
  949. begin
  950. if (taicpu(hp2).oper[2]^.shifterop^.shiftimm>=taicpu(hp1).oper[2]^.shifterop^.shiftimm) then
  951. begin
  952. if not(RegUsedBetween(taicpu(hp2).oper[0]^.reg,p,hp1)) and
  953. not(RegUsedBetween(taicpu(hp2).oper[0]^.reg,hp1,hp2)) then
  954. begin
  955. DebugMsg('Peephole ShiftShiftShift2ShiftShift 1a done', p);
  956. inc(taicpu(p).oper[2]^.shifterop^.shiftimm,taicpu(hp2).oper[2]^.shifterop^.shiftimm-taicpu(hp1).oper[2]^.shifterop^.shiftimm);
  957. taicpu(p).oper[0]^.reg:=taicpu(hp2).oper[0]^.reg;
  958. asml.remove(hp1);
  959. asml.remove(hp2);
  960. hp1.free;
  961. hp2.free;
  962. if taicpu(p).oper[2]^.shifterop^.shiftimm>=32 then
  963. begin
  964. taicpu(p).freeop(1);
  965. taicpu(p).freeop(2);
  966. taicpu(p).loadconst(1,0);
  967. end;
  968. result := true;
  969. end;
  970. end
  971. else if not(RegUsedBetween(taicpu(hp2).oper[0]^.reg,hp1,hp2)) then
  972. begin
  973. DebugMsg('Peephole ShiftShiftShift2ShiftShift 1b done', p);
  974. dec(taicpu(hp1).oper[2]^.shifterop^.shiftimm,taicpu(hp2).oper[2]^.shifterop^.shiftimm);
  975. taicpu(hp1).oper[0]^.reg:=taicpu(hp2).oper[0]^.reg;
  976. asml.remove(hp2);
  977. hp2.free;
  978. result := true;
  979. end;
  980. end
  981. { mov reg1,reg0, lsr/asr imm1
  982. mov reg1,reg1, lsl imm2
  983. mov reg1,reg1, lsr/asr imm3 ...
  984. if imm3>=imm1 and imm2>=imm1
  985. to
  986. mov reg1,reg0, lsl imm2-imm1
  987. mov reg1,reg1, lsr/asr imm3 ...
  988. }
  989. else if (taicpu(p).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and (taicpu(hp2).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and
  990. (taicpu(hp1).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  991. (taicpu(hp2).oper[2]^.shifterop^.shiftimm>=taicpu(p).oper[2]^.shifterop^.shiftimm) and
  992. (taicpu(hp1).oper[2]^.shifterop^.shiftimm>=taicpu(p).oper[2]^.shifterop^.shiftimm) then
  993. begin
  994. dec(taicpu(hp1).oper[2]^.shifterop^.shiftimm,taicpu(p).oper[2]^.shifterop^.shiftimm);
  995. taicpu(hp1).oper[1]^.reg:=taicpu(p).oper[1]^.reg;
  996. DebugMsg('Peephole ShiftShiftShift2ShiftShift 2 done', p);
  997. asml.remove(p);
  998. p.free;
  999. p:=hp2;
  1000. if taicpu(hp1).oper[2]^.shifterop^.shiftimm=0 then
  1001. begin
  1002. taicpu(hp2).oper[1]^.reg:=taicpu(hp1).oper[1]^.reg;
  1003. asml.remove(hp1);
  1004. hp1.free;
  1005. p:=hp2;
  1006. end;
  1007. result := true;
  1008. end;
  1009. end;
  1010. end;
  1011. { Change the common
  1012. mov r0, r0, lsr #xxx
  1013. and r0, r0, #yyy/bic r0, r0, #xxx
  1014. and remove the superfluous and/bic if possible
  1015. This could be extended to handle more cases.
  1016. }
  1017. if (taicpu(p).ops=3) and
  1018. (taicpu(p).oper[2]^.typ = top_shifterop) and
  1019. (taicpu(p).oper[2]^.shifterop^.rs = NR_NO) and
  1020. (taicpu(p).oper[2]^.shifterop^.shiftmode = SM_LSR) and
  1021. GetNextInstructionUsingReg(p,hp1, taicpu(p).oper[0]^.reg) and
  1022. (hp1.typ=ait_instruction) and
  1023. (taicpu(hp1).ops>=1) and
  1024. (taicpu(hp1).oper[0]^.typ=top_reg) and
  1025. (not RegModifiedBetween(taicpu(hp1).oper[0]^.reg, p, hp1)) and
  1026. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  1027. begin
  1028. if (taicpu(p).oper[2]^.shifterop^.shiftimm >= 24 ) and
  1029. MatchInstruction(hp1, A_AND, [taicpu(p).condition], [taicpu(p).oppostfix]) and
  1030. (taicpu(hp1).ops=3) and
  1031. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[1]^) and
  1032. (taicpu(hp1).oper[2]^.typ = top_const) and
  1033. { Check if the AND actually would only mask out bits being already zero because of the shift
  1034. }
  1035. ((($ffffffff shr taicpu(p).oper[2]^.shifterop^.shiftimm) and taicpu(hp1).oper[2]^.val) =
  1036. ($ffffffff shr taicpu(p).oper[2]^.shifterop^.shiftimm)) then
  1037. begin
  1038. DebugMsg('Peephole LsrAnd2Lsr done', hp1);
  1039. taicpu(p).oper[0]^.reg:=taicpu(hp1).oper[0]^.reg;
  1040. asml.remove(hp1);
  1041. hp1.free;
  1042. result:=true;
  1043. end
  1044. else if MatchInstruction(hp1, A_BIC, [taicpu(p).condition], [taicpu(p).oppostfix]) and
  1045. (taicpu(hp1).ops=3) and
  1046. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[1]^) and
  1047. (taicpu(hp1).oper[2]^.typ = top_const) and
  1048. { Check if the BIC actually would only mask out bits beeing already zero because of the shift }
  1049. (taicpu(hp1).oper[2]^.val<>0) and
  1050. (BsfDWord(taicpu(hp1).oper[2]^.val)>=32-taicpu(p).oper[2]^.shifterop^.shiftimm) then
  1051. begin
  1052. DebugMsg('Peephole LsrBic2Lsr done', hp1);
  1053. taicpu(p).oper[0]^.reg:=taicpu(hp1).oper[0]^.reg;
  1054. asml.remove(hp1);
  1055. hp1.free;
  1056. result:=true;
  1057. end;
  1058. end;
  1059. { Change
  1060. mov rx, ry, lsr/ror #xxx
  1061. uxtb/uxth rz,rx/and rz,rx,0xFF
  1062. dealloc rx
  1063. to
  1064. uxtb/uxth rz,ry,ror #xxx
  1065. }
  1066. if (taicpu(p).ops=3) and
  1067. (taicpu(p).oper[2]^.typ = top_shifterop) and
  1068. (taicpu(p).oper[2]^.shifterop^.rs = NR_NO) and
  1069. (taicpu(p).oper[2]^.shifterop^.shiftmode in [SM_LSR,SM_ROR]) and
  1070. (GenerateThumb2Code) and
  1071. GetNextInstructionUsingReg(p,hp1, taicpu(p).oper[0]^.reg) and
  1072. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  1073. begin
  1074. if MatchInstruction(hp1, A_UXTB, [C_None], [PF_None]) and
  1075. (taicpu(hp1).ops = 2) and
  1076. (taicpu(p).oper[2]^.shifterop^.shiftimm in [8,16,24]) and
  1077. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) then
  1078. begin
  1079. taicpu(hp1).oper[1]^.reg := taicpu(p).oper[1]^.reg;
  1080. taicpu(hp1).loadshifterop(2,taicpu(p).oper[2]^.shifterop^);
  1081. taicpu(hp1).oper[2]^.shifterop^.shiftmode:=SM_ROR;
  1082. taicpu(hp1).ops := 3;
  1083. GetNextInstruction(p,hp1);
  1084. asml.Remove(p);
  1085. p.Free;
  1086. p:=hp1;
  1087. result:=true;
  1088. exit;
  1089. end
  1090. else if MatchInstruction(hp1, A_UXTH, [C_None], [PF_None]) and
  1091. (taicpu(hp1).ops=2) and
  1092. (taicpu(p).oper[2]^.shifterop^.shiftimm in [16]) and
  1093. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) then
  1094. begin
  1095. taicpu(hp1).oper[1]^.reg := taicpu(p).oper[1]^.reg;
  1096. taicpu(hp1).loadshifterop(2,taicpu(p).oper[2]^.shifterop^);
  1097. taicpu(hp1).oper[2]^.shifterop^.shiftmode:=SM_ROR;
  1098. taicpu(hp1).ops := 3;
  1099. GetNextInstruction(p,hp1);
  1100. asml.Remove(p);
  1101. p.Free;
  1102. p:=hp1;
  1103. result:=true;
  1104. exit;
  1105. end
  1106. else if MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
  1107. (taicpu(hp1).ops = 3) and
  1108. (taicpu(hp1).oper[2]^.typ = top_const) and
  1109. (taicpu(hp1).oper[2]^.val = $FF) and
  1110. (taicpu(p).oper[2]^.shifterop^.shiftimm in [8,16,24]) and
  1111. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) then
  1112. begin
  1113. taicpu(hp1).ops := 3;
  1114. taicpu(hp1).opcode := A_UXTB;
  1115. taicpu(hp1).oper[1]^.reg := taicpu(p).oper[1]^.reg;
  1116. taicpu(hp1).loadshifterop(2,taicpu(p).oper[2]^.shifterop^);
  1117. taicpu(hp1).oper[2]^.shifterop^.shiftmode:=SM_ROR;
  1118. GetNextInstruction(p,hp1);
  1119. asml.Remove(p);
  1120. p.Free;
  1121. p:=hp1;
  1122. result:=true;
  1123. exit;
  1124. end;
  1125. end;
  1126. {
  1127. optimize
  1128. mov rX, yyyy
  1129. ....
  1130. }
  1131. if (taicpu(p).ops = 2) and
  1132. GetNextInstruction(p,hp1) and
  1133. (tai(hp1).typ = ait_instruction) then
  1134. begin
  1135. {
  1136. This changes the very common
  1137. mov r0, #0
  1138. str r0, [...]
  1139. mov r0, #0
  1140. str r0, [...]
  1141. and removes all superfluous mov instructions
  1142. }
  1143. if (taicpu(p).oper[1]^.typ = top_const) and
  1144. (taicpu(hp1).opcode=A_STR) then
  1145. while MatchInstruction(hp1, A_STR, [taicpu(p).condition], []) and
  1146. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) and
  1147. GetNextInstruction(hp1, hp2) and
  1148. MatchInstruction(hp2, A_MOV, [taicpu(p).condition], [PF_None]) and
  1149. (taicpu(hp2).ops = 2) and
  1150. MatchOperand(taicpu(hp2).oper[0]^, taicpu(p).oper[0]^) and
  1151. MatchOperand(taicpu(hp2).oper[1]^, taicpu(p).oper[1]^) do
  1152. begin
  1153. DebugMsg('Peephole MovStrMov done', hp2);
  1154. GetNextInstruction(hp2,hp1);
  1155. asml.remove(hp2);
  1156. hp2.free;
  1157. result:=true;
  1158. if not assigned(hp1) then break;
  1159. end
  1160. {
  1161. This removes the first mov from
  1162. mov rX,...
  1163. mov rX,...
  1164. }
  1165. else if taicpu(hp1).opcode=A_MOV then
  1166. while MatchInstruction(hp1, A_MOV, [taicpu(p).condition], [taicpu(p).oppostfix]) and
  1167. (taicpu(hp1).ops = 2) and
  1168. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) and
  1169. { don't remove the first mov if the second is a mov rX,rX }
  1170. not(MatchOperand(taicpu(hp1).oper[0]^, taicpu(hp1).oper[1]^)) do
  1171. begin
  1172. DebugMsg('Peephole MovMov done', p);
  1173. asml.remove(p);
  1174. p.free;
  1175. p:=hp1;
  1176. GetNextInstruction(hp1,hp1);
  1177. result:=true;
  1178. if not assigned(hp1) then
  1179. break;
  1180. end;
  1181. end;
  1182. {
  1183. change
  1184. mov r1, r0
  1185. add r1, r1, #1
  1186. to
  1187. add r1, r0, #1
  1188. Todo: Make it work for mov+cmp too
  1189. CAUTION! If this one is successful p might not be a mov instruction anymore!
  1190. }
  1191. if (taicpu(p).ops = 2) and
  1192. (taicpu(p).oper[1]^.typ = top_reg) and
  1193. (taicpu(p).oppostfix = PF_NONE) and
  1194. GetNextInstruction(p, hp1) and
  1195. MatchInstruction(hp1, [A_ADD, A_ADC, A_RSB, A_RSC, A_SUB, A_SBC,
  1196. A_AND, A_BIC, A_EOR, A_ORR, A_MOV, A_MVN],
  1197. [taicpu(p).condition], []) and
  1198. {MOV and MVN might only have 2 ops}
  1199. (taicpu(hp1).ops >= 2) and
  1200. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^.reg) and
  1201. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1202. (
  1203. (taicpu(hp1).ops = 2) or
  1204. (taicpu(hp1).oper[2]^.typ in [top_reg, top_const, top_shifterop])
  1205. ) then
  1206. begin
  1207. { When we get here we still don't know if the registers match}
  1208. for I:=1 to 2 do
  1209. {
  1210. If the first loop was successful p will be replaced with hp1.
  1211. The checks will still be ok, because all required information
  1212. will also be in hp1 then.
  1213. }
  1214. if (taicpu(hp1).ops > I) and
  1215. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[I]^.reg) and
  1216. { prevent certain combinations on thumb(2), this is only a safe approximation }
  1217. (not(GenerateThumbCode or GenerateThumb2Code) or
  1218. ((getsupreg(taicpu(p).oper[1]^.reg)<>RS_R13) and
  1219. (getsupreg(taicpu(p).oper[1]^.reg)<>RS_R15))
  1220. ) then
  1221. begin
  1222. DebugMsg('Peephole RedundantMovProcess done', hp1);
  1223. taicpu(hp1).oper[I]^.reg := taicpu(p).oper[1]^.reg;
  1224. if p<>hp1 then
  1225. begin
  1226. asml.remove(p);
  1227. p.free;
  1228. p:=hp1;
  1229. Result:=true;
  1230. end;
  1231. end;
  1232. end;
  1233. { Fold the very common sequence
  1234. mov regA, regB
  1235. ldr* regA, [regA]
  1236. to
  1237. ldr* regA, [regB]
  1238. CAUTION! If this one is successful p might not be a mov instruction anymore!
  1239. }
  1240. if (taicpu(p).opcode = A_MOV) and
  1241. (taicpu(p).ops = 2) and
  1242. (taicpu(p).oper[1]^.typ = top_reg) and
  1243. (taicpu(p).oppostfix = PF_NONE) and
  1244. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  1245. MatchInstruction(hp1, [A_LDR, A_STR], [taicpu(p).condition], []) and
  1246. (taicpu(hp1).oper[1]^.typ = top_ref) and
  1247. { We can change the base register only when the instruction uses AM_OFFSET }
  1248. ((taicpu(hp1).oper[1]^.ref^.index = taicpu(p).oper[0]^.reg) or
  1249. ((taicpu(hp1).oper[1]^.ref^.addressmode = AM_OFFSET) and
  1250. (taicpu(hp1).oper[1]^.ref^.base = taicpu(p).oper[0]^.reg))
  1251. ) and
  1252. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) and
  1253. // Make sure that Thumb code doesn't propagate a high register into a reference
  1254. ((GenerateThumbCode and
  1255. (getsupreg(taicpu(p).oper[1]^.reg) < RS_R8)) or
  1256. (not GenerateThumbCode)) and
  1257. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  1258. begin
  1259. DebugMsg('Peephole MovLdr2Ldr done', hp1);
  1260. if (taicpu(hp1).oper[1]^.ref^.addressmode = AM_OFFSET) and
  1261. (taicpu(hp1).oper[1]^.ref^.base = taicpu(p).oper[0]^.reg) then
  1262. taicpu(hp1).oper[1]^.ref^.base := taicpu(p).oper[1]^.reg;
  1263. if taicpu(hp1).oper[1]^.ref^.index = taicpu(p).oper[0]^.reg then
  1264. taicpu(hp1).oper[1]^.ref^.index := taicpu(p).oper[1]^.reg;
  1265. dealloc:=FindRegDeAlloc(taicpu(p).oper[1]^.reg, tai(p.Next));
  1266. if Assigned(dealloc) then
  1267. begin
  1268. asml.remove(dealloc);
  1269. asml.InsertAfter(dealloc,hp1);
  1270. end;
  1271. GetNextInstruction(p, hp1);
  1272. asml.remove(p);
  1273. p.free;
  1274. p:=hp1;
  1275. result:=true;
  1276. end;
  1277. { This folds shifterops into following instructions
  1278. mov r0, r1, lsl #8
  1279. add r2, r3, r0
  1280. to
  1281. add r2, r3, r1, lsl #8
  1282. CAUTION! If this one is successful p might not be a mov instruction anymore!
  1283. }
  1284. if (taicpu(p).opcode = A_MOV) and
  1285. (taicpu(p).ops = 3) and
  1286. (taicpu(p).oper[1]^.typ = top_reg) and
  1287. (taicpu(p).oper[2]^.typ = top_shifterop) and
  1288. (taicpu(p).oppostfix = PF_NONE) and
  1289. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  1290. MatchInstruction(hp1, [A_ADD, A_ADC, A_RSB, A_RSC, A_SUB, A_SBC,
  1291. A_AND, A_BIC, A_EOR, A_ORR, A_TEQ, A_TST,
  1292. A_CMP, A_CMN],
  1293. [taicpu(p).condition], [PF_None]) and
  1294. (not ((GenerateThumb2Code) and
  1295. (taicpu(hp1).opcode in [A_SBC]) and
  1296. (((taicpu(hp1).ops=3) and
  1297. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[1]^.reg)) or
  1298. ((taicpu(hp1).ops=2) and
  1299. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^.reg))))) and
  1300. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) and
  1301. (taicpu(hp1).ops >= 2) and
  1302. {Currently we can't fold into another shifterop}
  1303. (taicpu(hp1).oper[taicpu(hp1).ops-1]^.typ = top_reg) and
  1304. {Folding rrx is problematic because of the C-Flag, as we currently can't check
  1305. NR_DEFAULTFLAGS for modification}
  1306. (
  1307. {Everything is fine if we don't use RRX}
  1308. (taicpu(p).oper[2]^.shifterop^.shiftmode <> SM_RRX) or
  1309. (
  1310. {If it is RRX, then check if we're just accessing the next instruction}
  1311. GetNextInstruction(p, hp2) and
  1312. (hp1 = hp2)
  1313. )
  1314. ) and
  1315. { reg1 might not be modified inbetween }
  1316. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) and
  1317. { The shifterop can contain a register, might not be modified}
  1318. (
  1319. (taicpu(p).oper[2]^.shifterop^.rs = NR_NO) or
  1320. not(RegModifiedBetween(taicpu(p).oper[2]^.shifterop^.rs, p, hp1))
  1321. ) and
  1322. (
  1323. {Only ONE of the two src operands is allowed to match}
  1324. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[taicpu(hp1).ops-2]^) xor
  1325. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[taicpu(hp1).ops-1]^)
  1326. ) then
  1327. begin
  1328. if taicpu(hp1).opcode in [A_TST, A_TEQ, A_CMN] then
  1329. I2:=0
  1330. else
  1331. I2:=1;
  1332. for I:=I2 to taicpu(hp1).ops-1 do
  1333. if MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[I]^.reg) then
  1334. begin
  1335. { If the parameter matched on the second op from the RIGHT
  1336. we have to switch the parameters, this will not happen for CMP
  1337. were we're only evaluating the most right parameter
  1338. }
  1339. if I <> taicpu(hp1).ops-1 then
  1340. begin
  1341. {The SUB operators need to be changed when we swap parameters}
  1342. case taicpu(hp1).opcode of
  1343. A_SUB: tempop:=A_RSB;
  1344. A_SBC: tempop:=A_RSC;
  1345. A_RSB: tempop:=A_SUB;
  1346. A_RSC: tempop:=A_SBC;
  1347. else tempop:=taicpu(hp1).opcode;
  1348. end;
  1349. if taicpu(hp1).ops = 3 then
  1350. hp2:=taicpu.op_reg_reg_reg_shifterop(tempop,
  1351. taicpu(hp1).oper[0]^.reg, taicpu(hp1).oper[2]^.reg,
  1352. taicpu(p).oper[1]^.reg, taicpu(p).oper[2]^.shifterop^)
  1353. else
  1354. hp2:=taicpu.op_reg_reg_shifterop(tempop,
  1355. taicpu(hp1).oper[0]^.reg, taicpu(p).oper[1]^.reg,
  1356. taicpu(p).oper[2]^.shifterop^);
  1357. end
  1358. else
  1359. if taicpu(hp1).ops = 3 then
  1360. hp2:=taicpu.op_reg_reg_reg_shifterop(taicpu(hp1).opcode,
  1361. taicpu(hp1).oper[0]^.reg, taicpu(hp1).oper[1]^.reg,
  1362. taicpu(p).oper[1]^.reg, taicpu(p).oper[2]^.shifterop^)
  1363. else
  1364. hp2:=taicpu.op_reg_reg_shifterop(taicpu(hp1).opcode,
  1365. taicpu(hp1).oper[0]^.reg, taicpu(p).oper[1]^.reg,
  1366. taicpu(p).oper[2]^.shifterop^);
  1367. asml.insertbefore(hp2, hp1);
  1368. GetNextInstruction(p, hp2);
  1369. asml.remove(p);
  1370. asml.remove(hp1);
  1371. p.free;
  1372. hp1.free;
  1373. p:=hp2;
  1374. DebugMsg('Peephole FoldShiftProcess done', p);
  1375. Result:=true;
  1376. break;
  1377. end;
  1378. end;
  1379. {
  1380. Fold
  1381. mov r1, r1, lsl #2
  1382. ldr/ldrb r0, [r0, r1]
  1383. to
  1384. ldr/ldrb r0, [r0, r1, lsl #2]
  1385. XXX: This still needs some work, as we quite often encounter something like
  1386. mov r1, r2, lsl #2
  1387. add r2, r3, #imm
  1388. ldr r0, [r2, r1]
  1389. which can't be folded because r2 is overwritten between the shift and the ldr.
  1390. We could try to shuffle the registers around and fold it into.
  1391. add r1, r3, #imm
  1392. ldr r0, [r1, r2, lsl #2]
  1393. }
  1394. if (not(GenerateThumbCode)) and
  1395. (taicpu(p).opcode = A_MOV) and
  1396. (taicpu(p).ops = 3) and
  1397. (taicpu(p).oper[1]^.typ = top_reg) and
  1398. (taicpu(p).oper[2]^.typ = top_shifterop) and
  1399. { RRX is tough to handle, because it requires tracking the C-Flag,
  1400. it is also extremly unlikely to be emitted this way}
  1401. (taicpu(p).oper[2]^.shifterop^.shiftmode <> SM_RRX) and
  1402. (taicpu(p).oper[2]^.shifterop^.shiftimm <> 0) and
  1403. { thumb2 allows only lsl #0..#3 }
  1404. (not(GenerateThumb2Code) or
  1405. ((taicpu(p).oper[2]^.shifterop^.shiftimm in [0..3]) and
  1406. (taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSL)
  1407. )
  1408. ) and
  1409. (taicpu(p).oppostfix = PF_NONE) and
  1410. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  1411. {Only LDR, LDRB, STR, STRB can handle scaled register indexing}
  1412. (MatchInstruction(hp1, [A_LDR, A_STR], [taicpu(p).condition], [PF_None, PF_B]) or
  1413. (GenerateThumb2Code and
  1414. MatchInstruction(hp1, [A_LDR, A_STR], [taicpu(p).condition], [PF_None, PF_B, PF_SB, PF_H, PF_SH]))
  1415. ) and
  1416. (
  1417. {If this is address by offset, one of the two registers can be used}
  1418. ((taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) and
  1419. (
  1420. (taicpu(hp1).oper[1]^.ref^.index = taicpu(p).oper[0]^.reg) xor
  1421. (taicpu(hp1).oper[1]^.ref^.base = taicpu(p).oper[0]^.reg)
  1422. )
  1423. ) or
  1424. {For post and preindexed only the index register can be used}
  1425. ((taicpu(hp1).oper[1]^.ref^.addressmode in [AM_POSTINDEXED, AM_PREINDEXED]) and
  1426. (
  1427. (taicpu(hp1).oper[1]^.ref^.index = taicpu(p).oper[0]^.reg) and
  1428. (taicpu(hp1).oper[1]^.ref^.base <> taicpu(p).oper[0]^.reg)
  1429. ) and
  1430. (not GenerateThumb2Code)
  1431. )
  1432. ) and
  1433. { Only fold if both registers are used. Otherwise we are folding p with itself }
  1434. (taicpu(hp1).oper[1]^.ref^.index<>NR_NO) and
  1435. (taicpu(hp1).oper[1]^.ref^.base<>NR_NO) and
  1436. { Only fold if there isn't another shifterop already, and offset is zero. }
  1437. (taicpu(hp1).oper[1]^.ref^.offset = 0) and
  1438. (taicpu(hp1).oper[1]^.ref^.shiftmode = SM_None) and
  1439. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) and
  1440. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  1441. begin
  1442. { If the register we want to do the shift for resides in base, we need to swap that}
  1443. if (taicpu(hp1).oper[1]^.ref^.base = taicpu(p).oper[0]^.reg) then
  1444. taicpu(hp1).oper[1]^.ref^.base := taicpu(hp1).oper[1]^.ref^.index;
  1445. taicpu(hp1).oper[1]^.ref^.index := taicpu(p).oper[1]^.reg;
  1446. taicpu(hp1).oper[1]^.ref^.shiftmode := taicpu(p).oper[2]^.shifterop^.shiftmode;
  1447. taicpu(hp1).oper[1]^.ref^.shiftimm := taicpu(p).oper[2]^.shifterop^.shiftimm;
  1448. DebugMsg('Peephole FoldShiftLdrStr done', hp1);
  1449. GetNextInstruction(p, hp1);
  1450. asml.remove(p);
  1451. p.free;
  1452. p:=hp1;
  1453. Result:=true;
  1454. end;
  1455. {
  1456. Often we see shifts and then a superfluous mov to another register
  1457. In the future this might be handled in RedundantMovProcess when it uses RegisterTracking
  1458. }
  1459. if (taicpu(p).opcode = A_MOV) and
  1460. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  1461. RemoveSuperfluousMove(p, hp1, 'MovMov2Mov') then
  1462. Result:=true;
  1463. end;
  1464. A_ADD,
  1465. A_ADC,
  1466. A_RSB,
  1467. A_RSC,
  1468. A_SUB,
  1469. A_SBC,
  1470. A_AND,
  1471. A_BIC,
  1472. A_EOR,
  1473. A_ORR,
  1474. A_MLA,
  1475. A_MLS,
  1476. A_MUL:
  1477. begin
  1478. {
  1479. optimize
  1480. and reg2,reg1,const1
  1481. ...
  1482. }
  1483. if (taicpu(p).opcode = A_AND) and
  1484. (taicpu(p).ops>2) and
  1485. (taicpu(p).oper[1]^.typ = top_reg) and
  1486. (taicpu(p).oper[2]^.typ = top_const) then
  1487. begin
  1488. {
  1489. change
  1490. and reg2,reg1,const1
  1491. ...
  1492. and reg3,reg2,const2
  1493. to
  1494. and reg3,reg1,(const1 and const2)
  1495. }
  1496. if GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1497. MatchInstruction(hp1, A_AND, [taicpu(p).condition], [PF_None]) and
  1498. RegEndOfLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  1499. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1500. (taicpu(hp1).oper[2]^.typ = top_const) then
  1501. begin
  1502. if not(RegUsedBetween(taicpu(hp1).oper[0]^.reg,p,hp1)) then
  1503. begin
  1504. DebugMsg('Peephole AndAnd2And done', p);
  1505. taicpu(p).loadConst(2,taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val);
  1506. taicpu(p).oppostfix:=taicpu(hp1).oppostfix;
  1507. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  1508. asml.remove(hp1);
  1509. hp1.free;
  1510. Result:=true;
  1511. end
  1512. else if not(RegUsedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1513. begin
  1514. DebugMsg('Peephole AndAnd2And done', hp1);
  1515. taicpu(hp1).loadConst(2,taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val);
  1516. taicpu(hp1).oppostfix:=taicpu(p).oppostfix;
  1517. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  1518. GetNextInstruction(p, hp1);
  1519. asml.remove(p);
  1520. p.free;
  1521. p:=hp1;
  1522. Result:=true;
  1523. end;
  1524. end
  1525. {
  1526. change
  1527. and reg2,reg1,$xxxxxxFF
  1528. strb reg2,[...]
  1529. dealloc reg2
  1530. to
  1531. strb reg1,[...]
  1532. }
  1533. else if ((taicpu(p).oper[2]^.val and $FF) = $FF) and
  1534. MatchInstruction(p, A_AND, [C_None], [PF_None]) and
  1535. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1536. MatchInstruction(hp1, A_STR, [C_None], [PF_B]) and
  1537. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
  1538. { the reference in strb might not use reg2 }
  1539. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  1540. { reg1 might not be modified inbetween }
  1541. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1542. begin
  1543. DebugMsg('Peephole AndStrb2Strb done', p);
  1544. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  1545. GetNextInstruction(p, hp1);
  1546. asml.remove(p);
  1547. p.free;
  1548. p:=hp1;
  1549. result:=true;
  1550. end
  1551. {
  1552. change
  1553. and reg2,reg1,255
  1554. uxtb/uxth reg3,reg2
  1555. dealloc reg2
  1556. to
  1557. and reg3,reg1,x
  1558. }
  1559. else if (taicpu(p).oper[2]^.val = $FF) and
  1560. MatchInstruction(p, A_AND, [C_None], [PF_None]) and
  1561. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1562. MatchInstruction(hp1, [A_UXTB,A_UXTH], [C_None], [PF_None]) and
  1563. (taicpu(hp1).ops = 2) and
  1564. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  1565. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1566. { reg1 might not be modified inbetween }
  1567. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1568. begin
  1569. DebugMsg('Peephole AndUxt2And done', p);
  1570. taicpu(hp1).opcode:=A_AND;
  1571. taicpu(hp1).ops:=3;
  1572. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  1573. taicpu(hp1).loadconst(2,255);
  1574. GetNextInstruction(p,hp1);
  1575. asml.remove(p);
  1576. p.Free;
  1577. p:=hp1;
  1578. result:=true;
  1579. end
  1580. {
  1581. from
  1582. and reg1,reg0,2^n-1
  1583. mov reg2,reg1, lsl imm1
  1584. (mov reg3,reg2, lsr/asr imm1)
  1585. remove either the and or the lsl/xsr sequence if possible
  1586. }
  1587. else if cutils.ispowerof2(taicpu(p).oper[2]^.val+1,i) and
  1588. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1589. MatchInstruction(hp1, A_MOV, [taicpu(p).condition], [PF_None]) and
  1590. (taicpu(hp1).ops=3) and
  1591. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1592. (taicpu(hp1).oper[2]^.typ = top_shifterop) and
  1593. (taicpu(hp1).oper[2]^.shifterop^.rs = NR_NO) and
  1594. (taicpu(hp1).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  1595. RegEndOfLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) then
  1596. begin
  1597. {
  1598. and reg1,reg0,2^n-1
  1599. mov reg2,reg1, lsl imm1
  1600. mov reg3,reg2, lsr/asr imm1
  1601. =>
  1602. and reg1,reg0,2^n-1
  1603. if lsr and 2^n-1>=imm1 or asr and 2^n-1>imm1
  1604. }
  1605. if GetNextInstructionUsingReg(hp1,hp2,taicpu(p).oper[0]^.reg) and
  1606. MatchInstruction(hp2, A_MOV, [taicpu(p).condition], [PF_None]) and
  1607. (taicpu(hp2).ops=3) and
  1608. MatchOperand(taicpu(hp2).oper[1]^, taicpu(hp1).oper[0]^.reg) and
  1609. (taicpu(hp2).oper[2]^.typ = top_shifterop) and
  1610. (taicpu(hp2).oper[2]^.shifterop^.rs = NR_NO) and
  1611. (taicpu(hp2).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and
  1612. (taicpu(hp1).oper[2]^.shifterop^.shiftimm=taicpu(hp2).oper[2]^.shifterop^.shiftimm) and
  1613. RegEndOfLife(taicpu(hp1).oper[0]^.reg,taicpu(hp2)) and
  1614. ((i<32-taicpu(hp1).oper[2]^.shifterop^.shiftimm) or
  1615. ((i=32-taicpu(hp1).oper[2]^.shifterop^.shiftimm) and
  1616. (taicpu(hp2).oper[2]^.shifterop^.shiftmode=SM_LSR))) then
  1617. begin
  1618. DebugMsg('Peephole AndLslXsr2And done', p);
  1619. taicpu(p).oper[0]^.reg:=taicpu(hp2).oper[0]^.reg;
  1620. asml.Remove(hp1);
  1621. asml.Remove(hp2);
  1622. hp1.free;
  1623. hp2.free;
  1624. result:=true;
  1625. end
  1626. {
  1627. and reg1,reg0,2^n-1
  1628. mov reg2,reg1, lsl imm1
  1629. =>
  1630. mov reg2,reg0, lsl imm1
  1631. if imm1>i
  1632. }
  1633. else if (i>32-taicpu(hp1).oper[2]^.shifterop^.shiftimm) and
  1634. not(RegModifiedBetween(taicpu(p).oper[1]^.reg, p, hp1)) then
  1635. begin
  1636. DebugMsg('Peephole AndLsl2Lsl done', p);
  1637. taicpu(hp1).oper[1]^.reg:=taicpu(p).oper[1]^.reg;
  1638. GetNextInstruction(p, hp1);
  1639. asml.Remove(p);
  1640. p.free;
  1641. p:=hp1;
  1642. result:=true;
  1643. end
  1644. end;
  1645. end;
  1646. {
  1647. change
  1648. add/sub reg2,reg1,const1
  1649. str/ldr reg3,[reg2,const2]
  1650. dealloc reg2
  1651. to
  1652. str/ldr reg3,[reg1,const2+/-const1]
  1653. }
  1654. if (not GenerateThumbCode) and
  1655. (taicpu(p).opcode in [A_ADD,A_SUB]) and
  1656. (taicpu(p).ops>2) and
  1657. (taicpu(p).oper[1]^.typ = top_reg) and
  1658. (taicpu(p).oper[2]^.typ = top_const) then
  1659. begin
  1660. hp1:=p;
  1661. while GetNextInstructionUsingReg(hp1, hp1, taicpu(p).oper[0]^.reg) and
  1662. { we cannot check NR_DEFAULTFLAGS for modification yet so don't allow a condition }
  1663. MatchInstruction(hp1, [A_LDR, A_STR], [C_None], []) and
  1664. (taicpu(hp1).oper[1]^.typ = top_ref) and
  1665. (taicpu(hp1).oper[1]^.ref^.base=taicpu(p).oper[0]^.reg) and
  1666. { don't optimize if the register is stored/overwritten }
  1667. (taicpu(hp1).oper[0]^.reg<>taicpu(p).oper[1]^.reg) and
  1668. (taicpu(hp1).oper[1]^.ref^.index=NR_NO) and
  1669. (taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) and
  1670. { new offset must be valid: either in the range of 8 or 12 bit, depend on the
  1671. ldr postfix }
  1672. (((taicpu(p).opcode=A_ADD) and
  1673. isValidConstLoadStoreOffset(taicpu(hp1).oper[1]^.ref^.offset+taicpu(p).oper[2]^.val, taicpu(hp1).oppostfix)
  1674. ) or
  1675. ((taicpu(p).opcode=A_SUB) and
  1676. isValidConstLoadStoreOffset(taicpu(hp1).oper[1]^.ref^.offset-taicpu(p).oper[2]^.val, taicpu(hp1).oppostfix)
  1677. )
  1678. ) do
  1679. begin
  1680. { neither reg1 nor reg2 might be changed inbetween }
  1681. if RegModifiedBetween(taicpu(p).oper[0]^.reg,p,hp1) or
  1682. RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1) then
  1683. break;
  1684. { reg2 must be either overwritten by the ldr or it is deallocated afterwards }
  1685. if ((taicpu(hp1).opcode=A_LDR) and (taicpu(p).oper[0]^.reg=taicpu(hp1).oper[0]^.reg)) or
  1686. assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) then
  1687. begin
  1688. { remember last instruction }
  1689. hp2:=hp1;
  1690. DebugMsg('Peephole Add/SubLdr2Ldr done', p);
  1691. hp1:=p;
  1692. { fix all ldr/str }
  1693. while GetNextInstructionUsingReg(hp1, hp1, taicpu(p).oper[0]^.reg) do
  1694. begin
  1695. taicpu(hp1).oper[1]^.ref^.base:=taicpu(p).oper[1]^.reg;
  1696. if taicpu(p).opcode=A_ADD then
  1697. inc(taicpu(hp1).oper[1]^.ref^.offset,taicpu(p).oper[2]^.val)
  1698. else
  1699. dec(taicpu(hp1).oper[1]^.ref^.offset,taicpu(p).oper[2]^.val);
  1700. if hp1=hp2 then
  1701. break;
  1702. end;
  1703. GetNextInstruction(p,hp1);
  1704. asml.remove(p);
  1705. p.free;
  1706. p:=hp1;
  1707. result:=true;
  1708. break;
  1709. end;
  1710. end;
  1711. end;
  1712. {
  1713. change
  1714. add reg1, ...
  1715. mov reg2, reg1
  1716. to
  1717. add reg2, ...
  1718. }
  1719. if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  1720. (taicpu(p).ops>=3) and
  1721. RemoveSuperfluousMove(p, hp1, 'DataMov2Data') then
  1722. Result:=true;
  1723. if MatchInstruction(p, [A_ADD,A_SUB], [C_None], [PF_None]) and
  1724. LookForPreindexedPattern(taicpu(p)) then
  1725. begin
  1726. GetNextInstruction(p,hp1);
  1727. DebugMsg('Peephole Add/Sub to Preindexed done', p);
  1728. asml.remove(p);
  1729. p.free;
  1730. p:=hp1;
  1731. Result:=true;
  1732. end;
  1733. {
  1734. Turn
  1735. mul reg0, z,w
  1736. sub/add x, y, reg0
  1737. dealloc reg0
  1738. into
  1739. mls/mla x,z,w,y
  1740. }
  1741. if MatchInstruction(p, [A_MUL], [C_None], [PF_None]) and
  1742. (taicpu(p).ops=3) and
  1743. (taicpu(p).oper[0]^.typ = top_reg) and
  1744. (taicpu(p).oper[1]^.typ = top_reg) and
  1745. (taicpu(p).oper[2]^.typ = top_reg) and
  1746. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1747. MatchInstruction(hp1,[A_ADD,A_SUB],[C_None],[PF_None]) and
  1748. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p, hp1)) and
  1749. (not RegModifiedBetween(taicpu(p).oper[2]^.reg, p, hp1)) and
  1750. (((taicpu(hp1).opcode=A_ADD) and (current_settings.cputype>=cpu_armv4)) or
  1751. ((taicpu(hp1).opcode=A_SUB) and (current_settings.cputype in [cpu_armv6t2,cpu_armv7,cpu_armv7a,cpu_armv7r,cpu_armv7m,cpu_armv7em]))) and
  1752. // CPUs before ARMv6 don't recommend having the same Rd and Rm for MLA.
  1753. // TODO: A workaround would be to swap Rm and Rs
  1754. (not ((taicpu(hp1).opcode=A_ADD) and (current_settings.cputype<=cpu_armv6) and MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[1]^))) and
  1755. (((taicpu(hp1).ops=3) and
  1756. (taicpu(hp1).oper[2]^.typ=top_reg) and
  1757. ((MatchOperand(taicpu(hp1).oper[2]^, taicpu(p).oper[0]^.reg) and
  1758. (not RegModifiedBetween(taicpu(hp1).oper[1]^.reg, p, hp1))) or
  1759. ((MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1760. (taicpu(hp1).opcode=A_ADD) and
  1761. (not RegModifiedBetween(taicpu(hp1).oper[2]^.reg, p, hp1)))))) or
  1762. ((taicpu(hp1).ops=2) and
  1763. (taicpu(hp1).oper[1]^.typ=top_reg) and
  1764. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg))) and
  1765. (RegEndOfLife(taicpu(p).oper[0]^.reg,taicpu(hp1))) then
  1766. begin
  1767. if taicpu(hp1).opcode=A_ADD then
  1768. begin
  1769. taicpu(hp1).opcode:=A_MLA;
  1770. if taicpu(hp1).ops=3 then
  1771. begin
  1772. if MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^) then
  1773. oldreg:=taicpu(hp1).oper[2]^.reg
  1774. else
  1775. oldreg:=taicpu(hp1).oper[1]^.reg;
  1776. end
  1777. else
  1778. oldreg:=taicpu(hp1).oper[0]^.reg;
  1779. taicpu(hp1).loadreg(1,taicpu(p).oper[1]^.reg);
  1780. taicpu(hp1).loadreg(2,taicpu(p).oper[2]^.reg);
  1781. taicpu(hp1).loadreg(3,oldreg);
  1782. DebugMsg('MulAdd2MLA done', p);
  1783. taicpu(hp1).ops:=4;
  1784. asml.remove(p);
  1785. p.free;
  1786. p:=hp1;
  1787. end
  1788. else
  1789. begin
  1790. taicpu(hp1).opcode:=A_MLS;
  1791. taicpu(hp1).loadreg(3,taicpu(hp1).oper[1]^.reg);
  1792. if taicpu(hp1).ops=2 then
  1793. taicpu(hp1).loadreg(1,taicpu(hp1).oper[0]^.reg)
  1794. else
  1795. taicpu(hp1).loadreg(1,taicpu(p).oper[2]^.reg);
  1796. taicpu(hp1).loadreg(2,taicpu(p).oper[1]^.reg);
  1797. DebugMsg('MulSub2MLS done', p);
  1798. taicpu(hp1).ops:=4;
  1799. asml.remove(p);
  1800. p.free;
  1801. p:=hp1;
  1802. end;
  1803. result:=true;
  1804. end
  1805. end;
  1806. {$ifdef dummy}
  1807. A_MVN:
  1808. begin
  1809. {
  1810. change
  1811. mvn reg2,reg1
  1812. and reg3,reg4,reg2
  1813. dealloc reg2
  1814. to
  1815. bic reg3,reg4,reg1
  1816. }
  1817. if (taicpu(p).oper[1]^.typ = top_reg) and
  1818. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1819. MatchInstruction(hp1,A_AND,[],[]) and
  1820. (((taicpu(hp1).ops=3) and
  1821. (taicpu(hp1).oper[2]^.typ=top_reg) and
  1822. (MatchOperand(taicpu(hp1).oper[2]^, taicpu(p).oper[0]^.reg) or
  1823. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg))) or
  1824. ((taicpu(hp1).ops=2) and
  1825. (taicpu(hp1).oper[1]^.typ=top_reg) and
  1826. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg))) and
  1827. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
  1828. { reg1 might not be modified inbetween }
  1829. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1830. begin
  1831. DebugMsg('Peephole MvnAnd2Bic done', p);
  1832. taicpu(hp1).opcode:=A_BIC;
  1833. if taicpu(hp1).ops=3 then
  1834. begin
  1835. if MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) then
  1836. taicpu(hp1).loadReg(1,taicpu(hp1).oper[2]^.reg); // Swap operands
  1837. taicpu(hp1).loadReg(2,taicpu(p).oper[1]^.reg);
  1838. end
  1839. else
  1840. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  1841. GetNextInstruction(p, hp1);
  1842. asml.remove(p);
  1843. p.free;
  1844. p:=hp1;
  1845. end;
  1846. end;
  1847. {$endif dummy}
  1848. A_UXTB:
  1849. begin
  1850. {
  1851. change
  1852. uxtb reg2,reg1
  1853. strb reg2,[...]
  1854. dealloc reg2
  1855. to
  1856. strb reg1,[...]
  1857. }
  1858. if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
  1859. (taicpu(p).ops=2) and
  1860. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1861. MatchInstruction(hp1, A_STR, [C_None], [PF_B]) and
  1862. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
  1863. { the reference in strb might not use reg2 }
  1864. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  1865. { reg1 might not be modified inbetween }
  1866. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1867. begin
  1868. DebugMsg('Peephole UxtbStrb2Strb done', p);
  1869. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  1870. GetNextInstruction(p,hp2);
  1871. asml.remove(p);
  1872. p.free;
  1873. p:=hp2;
  1874. result:=true;
  1875. end
  1876. {
  1877. change
  1878. uxtb reg2,reg1
  1879. uxth reg3,reg2
  1880. dealloc reg2
  1881. to
  1882. uxtb reg3,reg1
  1883. }
  1884. else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
  1885. (taicpu(p).ops=2) and
  1886. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1887. MatchInstruction(hp1, A_UXTH, [C_None], [PF_None]) and
  1888. (taicpu(hp1).ops = 2) and
  1889. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1890. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  1891. { reg1 might not be modified inbetween }
  1892. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1893. begin
  1894. DebugMsg('Peephole UxtbUxth2Uxtb done', p);
  1895. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  1896. asml.remove(hp1);
  1897. hp1.free;
  1898. result:=true;
  1899. end
  1900. {
  1901. change
  1902. uxtb reg2,reg1
  1903. uxtb reg3,reg2
  1904. dealloc reg2
  1905. to
  1906. uxtb reg3,reg1
  1907. }
  1908. else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
  1909. (taicpu(p).ops=2) and
  1910. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1911. MatchInstruction(hp1, A_UXTB, [C_None], [PF_None]) and
  1912. (taicpu(hp1).ops = 2) and
  1913. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1914. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  1915. { reg1 might not be modified inbetween }
  1916. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1917. begin
  1918. DebugMsg('Peephole UxtbUxtb2Uxtb done', p);
  1919. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  1920. asml.remove(hp1);
  1921. hp1.free;
  1922. result:=true;
  1923. end
  1924. {
  1925. change
  1926. uxtb reg2,reg1
  1927. and reg3,reg2,#0x*FF
  1928. dealloc reg2
  1929. to
  1930. uxtb reg3,reg1
  1931. }
  1932. else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
  1933. (taicpu(p).ops=2) and
  1934. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1935. MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
  1936. (taicpu(hp1).ops=3) and
  1937. (taicpu(hp1).oper[2]^.typ=top_const) and
  1938. ((taicpu(hp1).oper[2]^.val and $FF)=$FF) and
  1939. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1940. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  1941. { reg1 might not be modified inbetween }
  1942. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1943. begin
  1944. DebugMsg('Peephole UxtbAndImm2Uxtb done', p);
  1945. taicpu(hp1).opcode:=A_UXTB;
  1946. taicpu(hp1).ops:=2;
  1947. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  1948. GetNextInstruction(p,hp2);
  1949. asml.remove(p);
  1950. p.free;
  1951. p:=hp2;
  1952. result:=true;
  1953. end
  1954. else if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  1955. RemoveSuperfluousMove(p, hp1, 'UxtbMov2Data') then
  1956. Result:=true;
  1957. end;
  1958. A_UXTH:
  1959. begin
  1960. {
  1961. change
  1962. uxth reg2,reg1
  1963. strh reg2,[...]
  1964. dealloc reg2
  1965. to
  1966. strh reg1,[...]
  1967. }
  1968. if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
  1969. (taicpu(p).ops=2) and
  1970. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1971. MatchInstruction(hp1, A_STR, [C_None], [PF_H]) and
  1972. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  1973. { the reference in strb might not use reg2 }
  1974. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  1975. { reg1 might not be modified inbetween }
  1976. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1977. begin
  1978. DebugMsg('Peephole UXTHStrh2Strh done', p);
  1979. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  1980. GetNextInstruction(p, hp1);
  1981. asml.remove(p);
  1982. p.free;
  1983. p:=hp1;
  1984. result:=true;
  1985. end
  1986. {
  1987. change
  1988. uxth reg2,reg1
  1989. uxth reg3,reg2
  1990. dealloc reg2
  1991. to
  1992. uxth reg3,reg1
  1993. }
  1994. else if MatchInstruction(p, A_UXTH, [C_None], [PF_None]) and
  1995. (taicpu(p).ops=2) and
  1996. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1997. MatchInstruction(hp1, A_UXTH, [C_None], [PF_None]) and
  1998. (taicpu(hp1).ops=2) and
  1999. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  2000. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  2001. { reg1 might not be modified inbetween }
  2002. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  2003. begin
  2004. DebugMsg('Peephole UxthUxth2Uxth done', p);
  2005. taicpu(hp1).opcode:=A_UXTH;
  2006. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  2007. GetNextInstruction(p, hp1);
  2008. asml.remove(p);
  2009. p.free;
  2010. p:=hp1;
  2011. result:=true;
  2012. end
  2013. {
  2014. change
  2015. uxth reg2,reg1
  2016. and reg3,reg2,#65535
  2017. dealloc reg2
  2018. to
  2019. uxth reg3,reg1
  2020. }
  2021. else if MatchInstruction(p, A_UXTH, [C_None], [PF_None]) and
  2022. (taicpu(p).ops=2) and
  2023. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  2024. MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
  2025. (taicpu(hp1).ops=3) and
  2026. (taicpu(hp1).oper[2]^.typ=top_const) and
  2027. ((taicpu(hp1).oper[2]^.val and $FFFF)=$FFFF) and
  2028. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  2029. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  2030. { reg1 might not be modified inbetween }
  2031. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  2032. begin
  2033. DebugMsg('Peephole UxthAndImm2Uxth done', p);
  2034. taicpu(hp1).opcode:=A_UXTH;
  2035. taicpu(hp1).ops:=2;
  2036. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  2037. GetNextInstruction(p, hp1);
  2038. asml.remove(p);
  2039. p.free;
  2040. p:=hp1;
  2041. result:=true;
  2042. end
  2043. else if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  2044. RemoveSuperfluousMove(p, hp1, 'UxthMov2Data') then
  2045. Result:=true;
  2046. end;
  2047. A_CMP:
  2048. begin
  2049. {
  2050. change
  2051. cmp reg,const1
  2052. moveq reg,const1
  2053. movne reg,const2
  2054. to
  2055. cmp reg,const1
  2056. movne reg,const2
  2057. }
  2058. if (taicpu(p).oper[1]^.typ = top_const) and
  2059. GetNextInstruction(p, hp1) and
  2060. MatchInstruction(hp1, A_MOV, [C_EQ, C_NE], [PF_NONE]) and
  2061. (taicpu(hp1).oper[1]^.typ = top_const) and
  2062. GetNextInstruction(hp1, hp2) and
  2063. MatchInstruction(hp2, A_MOV, [C_EQ, C_NE], [PF_NONE]) and
  2064. (taicpu(hp1).oper[1]^.typ = top_const) then
  2065. begin
  2066. Result:=RemoveRedundantMove(p, hp1, asml) or Result;
  2067. Result:=RemoveRedundantMove(p, hp2, asml) or Result;
  2068. end;
  2069. end;
  2070. A_STM:
  2071. begin
  2072. {
  2073. change
  2074. stmfd r13!,[r14]
  2075. sub r13,r13,#4
  2076. bl abc
  2077. add r13,r13,#4
  2078. ldmfd r13!,[r15]
  2079. into
  2080. b abc
  2081. }
  2082. if not(ts_thumb_interworking in current_settings.targetswitches) and
  2083. MatchInstruction(p, A_STM, [C_None], [PF_FD]) and
  2084. GetNextInstruction(p, hp1) and
  2085. GetNextInstruction(hp1, hp2) and
  2086. SkipEntryExitMarker(hp2, hp2) and
  2087. GetNextInstruction(hp2, hp3) and
  2088. SkipEntryExitMarker(hp3, hp3) and
  2089. GetNextInstruction(hp3, hp4) and
  2090. (taicpu(p).oper[0]^.typ = top_ref) and
  2091. (taicpu(p).oper[0]^.ref^.index=NR_STACK_POINTER_REG) and
  2092. (taicpu(p).oper[0]^.ref^.base=NR_NO) and
  2093. (taicpu(p).oper[0]^.ref^.offset=0) and
  2094. (taicpu(p).oper[0]^.ref^.addressmode=AM_PREINDEXED) and
  2095. (taicpu(p).oper[1]^.typ = top_regset) and
  2096. (taicpu(p).oper[1]^.regset^ = [RS_R14]) and
  2097. MatchInstruction(hp1, A_SUB, [C_None], [PF_NONE]) and
  2098. (taicpu(hp1).oper[0]^.typ = top_reg) and
  2099. (taicpu(hp1).oper[0]^.reg = NR_STACK_POINTER_REG) and
  2100. MatchOperand(taicpu(hp1).oper[0]^,taicpu(hp1).oper[1]^) and
  2101. (taicpu(hp1).oper[2]^.typ = top_const) and
  2102. MatchInstruction(hp3, A_ADD, [C_None], [PF_NONE]) and
  2103. MatchOperand(taicpu(hp1).oper[0]^,taicpu(hp3).oper[0]^) and
  2104. MatchOperand(taicpu(hp1).oper[0]^,taicpu(hp3).oper[1]^) and
  2105. MatchOperand(taicpu(hp1).oper[2]^,taicpu(hp3).oper[2]^) and
  2106. MatchInstruction(hp2, [A_BL,A_BLX], [C_None], [PF_NONE]) and
  2107. (taicpu(hp2).oper[0]^.typ = top_ref) and
  2108. MatchInstruction(hp4, A_LDM, [C_None], [PF_FD]) and
  2109. MatchOperand(taicpu(p).oper[0]^,taicpu(hp4).oper[0]^) and
  2110. (taicpu(hp4).oper[1]^.typ = top_regset) and
  2111. (taicpu(hp4).oper[1]^.regset^ = [RS_R15]) then
  2112. begin
  2113. asml.Remove(p);
  2114. asml.Remove(hp1);
  2115. asml.Remove(hp3);
  2116. asml.Remove(hp4);
  2117. taicpu(hp2).opcode:=A_B;
  2118. p.free;
  2119. hp1.free;
  2120. hp3.free;
  2121. hp4.free;
  2122. p:=hp2;
  2123. DebugMsg('Peephole Bl2B done', p);
  2124. end;
  2125. end;
  2126. A_VMOV:
  2127. begin
  2128. {
  2129. change
  2130. vmov reg0,reg1,reg2
  2131. vmov reg1,reg2,reg0
  2132. into
  2133. vmov reg0,reg1,reg2
  2134. can be applied regardless if reg0 or reg2 is the vfp register
  2135. }
  2136. if (taicpu(p).ops = 3) and
  2137. GetNextInstruction(p, hp1) and
  2138. MatchInstruction(hp1, A_VMOV, [taicpu(p).condition], [taicpu(p).oppostfix]) and
  2139. (taicpu(hp1).ops = 3) and
  2140. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[2]^) and
  2141. MatchOperand(taicpu(p).oper[1]^, taicpu(hp1).oper[0]^) and
  2142. MatchOperand(taicpu(p).oper[2]^, taicpu(hp1).oper[1]^) then
  2143. begin
  2144. asml.Remove(hp1);
  2145. hp1.free;
  2146. DebugMsg('Peephole VMovVMov2VMov done', p);
  2147. end;
  2148. end;
  2149. A_VLDR,
  2150. A_VADD,
  2151. A_VMUL,
  2152. A_VDIV,
  2153. A_VSUB,
  2154. A_VSQRT,
  2155. A_VNEG,
  2156. A_VCVT,
  2157. A_VABS:
  2158. begin
  2159. if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  2160. RemoveSuperfluousVMov(p, hp1, 'VOpVMov2VOp') then
  2161. Result:=true;
  2162. end
  2163. else
  2164. ;
  2165. end;
  2166. end;
  2167. else
  2168. ;
  2169. end;
  2170. end;
  2171. { instructions modifying the CPSR can be only the last instruction }
  2172. function MustBeLast(p : tai) : boolean;
  2173. begin
  2174. Result:=(p.typ=ait_instruction) and
  2175. ((taicpu(p).opcode in [A_BL,A_BLX,A_CMP,A_CMN,A_SWI,A_TEQ,A_TST,A_CMF,A_CMFE {,A_MSR}]) or
  2176. ((taicpu(p).ops>=1) and (taicpu(p).oper[0]^.typ=top_reg) and (taicpu(p).oper[0]^.reg=NR_PC)) or
  2177. (taicpu(p).oppostfix=PF_S));
  2178. end;
  2179. procedure TCpuAsmOptimizer.PeepHoleOptPass2;
  2180. var
  2181. p,hp1,hp2: tai;
  2182. l : longint;
  2183. condition : tasmcond;
  2184. hp3: tai;
  2185. WasLast: boolean;
  2186. { UsedRegs, TmpUsedRegs: TRegSet; }
  2187. begin
  2188. p := BlockStart;
  2189. { UsedRegs := []; }
  2190. while (p <> BlockEnd) Do
  2191. begin
  2192. { UpdateUsedRegs(UsedRegs, tai(p.next)); }
  2193. case p.Typ Of
  2194. Ait_Instruction:
  2195. begin
  2196. case taicpu(p).opcode Of
  2197. A_B:
  2198. if (taicpu(p).condition<>C_None) and
  2199. not(GenerateThumbCode) then
  2200. begin
  2201. { check for
  2202. Bxx xxx
  2203. <several instructions>
  2204. xxx:
  2205. }
  2206. l:=0;
  2207. WasLast:=False;
  2208. GetNextInstruction(p, hp1);
  2209. while assigned(hp1) and
  2210. (l<=4) and
  2211. CanBeCond(hp1) and
  2212. { stop on labels }
  2213. not(hp1.typ=ait_label) and
  2214. { avoid that we cannot recognize the case BccB2Cond }
  2215. not((hp1.typ=ait_instruction) and (taicpu(hp1).opcode=A_B)) do
  2216. begin
  2217. inc(l);
  2218. if MustBeLast(hp1) then
  2219. begin
  2220. WasLast:=True;
  2221. GetNextInstruction(hp1,hp1);
  2222. break;
  2223. end
  2224. else
  2225. GetNextInstruction(hp1,hp1);
  2226. end;
  2227. if assigned(hp1) then
  2228. begin
  2229. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  2230. begin
  2231. if (l<=4) and (l>0) then
  2232. begin
  2233. condition:=inverse_cond(taicpu(p).condition);
  2234. hp2:=p;
  2235. GetNextInstruction(p,hp1);
  2236. p:=hp1;
  2237. repeat
  2238. if hp1.typ=ait_instruction then
  2239. taicpu(hp1).condition:=condition;
  2240. if MustBeLast(hp1) then
  2241. begin
  2242. GetNextInstruction(hp1,hp1);
  2243. break;
  2244. end
  2245. else
  2246. GetNextInstruction(hp1,hp1);
  2247. until not(assigned(hp1)) or
  2248. not(CanBeCond(hp1)) or
  2249. (hp1.typ=ait_label);
  2250. DebugMsg('Peephole Bcc2Cond done',hp2);
  2251. { wait with removing else GetNextInstruction could
  2252. ignore the label if it was the only usage in the
  2253. jump moved away }
  2254. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  2255. asml.remove(hp2);
  2256. hp2.free;
  2257. continue;
  2258. end;
  2259. end
  2260. else
  2261. { do not perform further optimizations if there is inctructon
  2262. in block #1 which can not be optimized.
  2263. }
  2264. if not WasLast then
  2265. begin
  2266. { check further for
  2267. Bcc xxx
  2268. <several instructions 1>
  2269. B yyy
  2270. xxx:
  2271. <several instructions 2>
  2272. yyy:
  2273. }
  2274. { hp2 points to jmp yyy }
  2275. hp2:=hp1;
  2276. { skip hp1 to xxx }
  2277. GetNextInstruction(hp1, hp1);
  2278. if assigned(hp2) and
  2279. assigned(hp1) and
  2280. (l<=3) and
  2281. (hp2.typ=ait_instruction) and
  2282. (taicpu(hp2).is_jmp) and
  2283. (taicpu(hp2).condition=C_None) and
  2284. { real label and jump, no further references to the
  2285. label are allowed }
  2286. (tasmlabel(taicpu(p).oper[0]^.ref^.symbol).getrefs=1) and
  2287. FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  2288. begin
  2289. l:=0;
  2290. { skip hp1 to <several moves 2> }
  2291. GetNextInstruction(hp1, hp1);
  2292. while assigned(hp1) and
  2293. CanBeCond(hp1) and
  2294. (l<=3) do
  2295. begin
  2296. inc(l);
  2297. if MustBeLast(hp1) then
  2298. begin
  2299. GetNextInstruction(hp1, hp1);
  2300. break;
  2301. end
  2302. else
  2303. GetNextInstruction(hp1, hp1);
  2304. end;
  2305. { hp1 points to yyy: }
  2306. if assigned(hp1) and
  2307. FindLabel(tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol),hp1) then
  2308. begin
  2309. condition:=inverse_cond(taicpu(p).condition);
  2310. GetNextInstruction(p,hp1);
  2311. hp3:=p;
  2312. p:=hp1;
  2313. repeat
  2314. if hp1.typ=ait_instruction then
  2315. taicpu(hp1).condition:=condition;
  2316. if MustBeLast(hp1) then
  2317. begin
  2318. GetNextInstruction(hp1, hp1);
  2319. break;
  2320. end
  2321. else
  2322. GetNextInstruction(hp1, hp1);
  2323. until not(assigned(hp1)) or
  2324. not(CanBeCond(hp1)) or
  2325. ((hp1.typ=ait_instruction) and (taicpu(hp1).opcode=A_B));
  2326. { hp2 is still at jmp yyy }
  2327. GetNextInstruction(hp2,hp1);
  2328. { hp1 is now at xxx: }
  2329. condition:=inverse_cond(condition);
  2330. GetNextInstruction(hp1,hp1);
  2331. { hp1 is now at <several movs 2> }
  2332. repeat
  2333. if hp1.typ=ait_instruction then
  2334. taicpu(hp1).condition:=condition;
  2335. GetNextInstruction(hp1,hp1);
  2336. until not(assigned(hp1)) or
  2337. not(CanBeCond(hp1)) or
  2338. (hp1.typ=ait_label);
  2339. DebugMsg('Peephole BccB2Cond done',hp3);
  2340. { remove Bcc }
  2341. tasmlabel(taicpu(hp3).oper[0]^.ref^.symbol).decrefs;
  2342. asml.remove(hp3);
  2343. hp3.free;
  2344. { remove B }
  2345. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  2346. asml.remove(hp2);
  2347. hp2.free;
  2348. continue;
  2349. end;
  2350. end;
  2351. end;
  2352. end;
  2353. end;
  2354. else
  2355. ;
  2356. end;
  2357. end;
  2358. else
  2359. ;
  2360. end;
  2361. p := tai(p.next)
  2362. end;
  2363. end;
  2364. function TCpuAsmOptimizer.RegInInstruction(Reg: TRegister; p1: tai): Boolean;
  2365. begin
  2366. If (p1.typ = ait_instruction) and (taicpu(p1).opcode=A_BL) then
  2367. Result:=true
  2368. else If MatchInstruction(p1, [A_LDR, A_STR], [], [PF_D]) and
  2369. (getsupreg(taicpu(p1).oper[0]^.reg)+1=getsupreg(reg)) then
  2370. Result:=true
  2371. else
  2372. Result:=inherited RegInInstruction(Reg, p1);
  2373. end;
  2374. const
  2375. { set of opcode which might or do write to memory }
  2376. { TODO : extend armins.dat to contain r/w info }
  2377. opcode_could_mem_write = [A_B,A_BL,A_BLX,A_BKPT,A_BX,A_STR,A_STRB,A_STRBT,
  2378. A_STRH,A_STRT,A_STF,A_SFM,A_STM,A_FSTS,A_FSTD,A_VSTR,A_VSTM];
  2379. { adjust the register live information when swapping the two instructions p and hp1,
  2380. they must follow one after the other }
  2381. procedure TCpuPreRegallocScheduler.SwapRegLive(p,hp1 : taicpu);
  2382. procedure CheckLiveEnd(reg : tregister);
  2383. var
  2384. supreg : TSuperRegister;
  2385. regtype : TRegisterType;
  2386. begin
  2387. if reg=NR_NO then
  2388. exit;
  2389. regtype:=getregtype(reg);
  2390. supreg:=getsupreg(reg);
  2391. if (cg.rg[regtype].live_end[supreg]=hp1) and
  2392. RegInInstruction(reg,p) then
  2393. cg.rg[regtype].live_end[supreg]:=p;
  2394. end;
  2395. procedure CheckLiveStart(reg : TRegister);
  2396. var
  2397. supreg : TSuperRegister;
  2398. regtype : TRegisterType;
  2399. begin
  2400. if reg=NR_NO then
  2401. exit;
  2402. regtype:=getregtype(reg);
  2403. supreg:=getsupreg(reg);
  2404. if (cg.rg[regtype].live_start[supreg]=p) and
  2405. RegInInstruction(reg,hp1) then
  2406. cg.rg[regtype].live_start[supreg]:=hp1;
  2407. end;
  2408. var
  2409. i : longint;
  2410. r : TSuperRegister;
  2411. begin
  2412. { assumption: p is directly followed by hp1 }
  2413. { if live of any reg used by p starts at p and hp1 uses this register then
  2414. set live start to hp1 }
  2415. for i:=0 to p.ops-1 do
  2416. case p.oper[i]^.typ of
  2417. Top_Reg:
  2418. CheckLiveStart(p.oper[i]^.reg);
  2419. Top_Ref:
  2420. begin
  2421. CheckLiveStart(p.oper[i]^.ref^.base);
  2422. CheckLiveStart(p.oper[i]^.ref^.index);
  2423. end;
  2424. Top_Shifterop:
  2425. CheckLiveStart(p.oper[i]^.shifterop^.rs);
  2426. Top_RegSet:
  2427. for r:=RS_R0 to RS_R15 do
  2428. if r in p.oper[i]^.regset^ then
  2429. CheckLiveStart(newreg(R_INTREGISTER,r,R_SUBWHOLE));
  2430. else
  2431. ;
  2432. end;
  2433. { if live of any reg used by hp1 ends at hp1 and p uses this register then
  2434. set live end to p }
  2435. for i:=0 to hp1.ops-1 do
  2436. case hp1.oper[i]^.typ of
  2437. Top_Reg:
  2438. CheckLiveEnd(hp1.oper[i]^.reg);
  2439. Top_Ref:
  2440. begin
  2441. CheckLiveEnd(hp1.oper[i]^.ref^.base);
  2442. CheckLiveEnd(hp1.oper[i]^.ref^.index);
  2443. end;
  2444. Top_Shifterop:
  2445. CheckLiveStart(hp1.oper[i]^.shifterop^.rs);
  2446. Top_RegSet:
  2447. for r:=RS_R0 to RS_R15 do
  2448. if r in hp1.oper[i]^.regset^ then
  2449. CheckLiveEnd(newreg(R_INTREGISTER,r,R_SUBWHOLE));
  2450. else
  2451. ;
  2452. end;
  2453. end;
  2454. function TCpuPreRegallocScheduler.SchedulerPass1Cpu(var p: tai): boolean;
  2455. { TODO : schedule also forward }
  2456. { TODO : schedule distance > 1 }
  2457. { returns true if p might be a load of a pc relative tls offset }
  2458. function PossibleTLSLoad(const p: tai) : boolean;
  2459. begin
  2460. Result:=(p.typ=ait_instruction) and (taicpu(p).opcode=A_LDR) and (taicpu(p).oper[1]^.typ=top_ref) and (((taicpu(p).oper[1]^.ref^.base=NR_PC) and
  2461. (taicpu(p).oper[1]^.ref^.index<>NR_NO)) or ((taicpu(p).oper[1]^.ref^.base<>NR_NO) and
  2462. (taicpu(p).oper[1]^.ref^.index=NR_PC)));
  2463. end;
  2464. var
  2465. hp1,hp2,hp3,hp4,hp5,insertpos : tai;
  2466. list : TAsmList;
  2467. begin
  2468. result:=true;
  2469. list:=TAsmList.create;
  2470. p:=BlockStart;
  2471. while p<>BlockEnd Do
  2472. begin
  2473. if (p.typ=ait_instruction) and
  2474. GetNextInstruction(p,hp1) and
  2475. (hp1.typ=ait_instruction) and
  2476. (taicpu(hp1).opcode in [A_LDR,A_LDRB,A_LDRH,A_LDRSB,A_LDRSH]) and
  2477. (taicpu(hp1).oppostfix in [PF_NONE, PF_B, PF_H, PF_SB, PF_SH]) and
  2478. { for now we don't reschedule if the previous instruction changes potentially a memory location }
  2479. ( (not(taicpu(p).opcode in opcode_could_mem_write) and
  2480. not(RegModifiedByInstruction(NR_PC,p))
  2481. ) or
  2482. ((taicpu(p).opcode in [A_STM,A_STRB,A_STRH,A_STR]) and
  2483. ((taicpu(hp1).oper[1]^.ref^.base=NR_PC) or
  2484. (assigned(taicpu(hp1).oper[1]^.ref^.symboldata) and
  2485. (taicpu(hp1).oper[1]^.ref^.offset=0)
  2486. )
  2487. ) or
  2488. { try to prove that the memory accesses don't overlapp }
  2489. ((taicpu(p).opcode in [A_STRB,A_STRH,A_STR]) and
  2490. (taicpu(p).oper[1]^.typ = top_ref) and
  2491. (taicpu(p).oper[1]^.ref^.base=taicpu(hp1).oper[1]^.ref^.base) and
  2492. (taicpu(p).oppostfix=PF_None) and
  2493. (taicpu(hp1).oppostfix=PF_None) and
  2494. (taicpu(p).oper[1]^.ref^.index=NR_NO) and
  2495. (taicpu(hp1).oper[1]^.ref^.index=NR_NO) and
  2496. { get operand sizes and check if the offset distance is large enough to ensure no overlapp }
  2497. (abs(taicpu(p).oper[1]^.ref^.offset-taicpu(hp1).oper[1]^.ref^.offset)>=max(tcgsize2size[reg_cgsize(taicpu(p).oper[0]^.reg)],tcgsize2size[reg_cgsize(taicpu(hp1).oper[0]^.reg)]))
  2498. )
  2499. )
  2500. ) and
  2501. GetNextInstruction(hp1,hp2) and
  2502. (hp2.typ=ait_instruction) and
  2503. { loaded register used by next instruction?
  2504. if we ever support labels (they could be skipped in theory) here, the gnu2 tls general-dynamic code could get broken (the ldr before
  2505. the bl may not be scheduled away from the bl) and it needs to be taken care of this case
  2506. }
  2507. (RegInInstruction(taicpu(hp1).oper[0]^.reg,hp2)) and
  2508. { loaded register not used by previous instruction? }
  2509. not(RegInInstruction(taicpu(hp1).oper[0]^.reg,p)) and
  2510. { same condition? }
  2511. (taicpu(p).condition=taicpu(hp1).condition) and
  2512. { first instruction might not change the register used as base }
  2513. ((taicpu(hp1).oper[1]^.ref^.base=NR_NO) or
  2514. not(RegModifiedByInstruction(taicpu(hp1).oper[1]^.ref^.base,p))
  2515. ) and
  2516. { first instruction might not change the register used as index }
  2517. ((taicpu(hp1).oper[1]^.ref^.index=NR_NO) or
  2518. not(RegModifiedByInstruction(taicpu(hp1).oper[1]^.ref^.index,p))
  2519. ) and
  2520. { if we modify the basereg AND the first instruction used that reg, we can not schedule }
  2521. ((taicpu(hp1).oper[1]^.ref^.addressmode = AM_OFFSET) or
  2522. not(instructionLoadsFromReg(taicpu(hp1).oper[1]^.ref^.base,p))) and
  2523. not(PossibleTLSLoad(p)) and
  2524. not(PossibleTLSLoad(hp1)) then
  2525. begin
  2526. hp3:=tai(p.Previous);
  2527. hp5:=tai(p.next);
  2528. asml.Remove(p);
  2529. { if there is a reg. alloc/dealloc/sync instructions or address labels (e.g. for GOT-less PIC)
  2530. associated with p, move it together with p }
  2531. { before the instruction? }
  2532. { find reg allocs,deallocs and PIC labels }
  2533. while assigned(hp3) and (hp3.typ<>ait_instruction) do
  2534. begin
  2535. if ( (hp3.typ=ait_regalloc) and (tai_regalloc(hp3).ratype in [ra_alloc, ra_dealloc]) and
  2536. RegInInstruction(tai_regalloc(hp3).reg,p) )
  2537. or ( (hp3.typ=ait_label) and (tai_label(hp3).labsym.typ=AT_ADDR) )
  2538. then
  2539. begin
  2540. hp4:=hp3;
  2541. hp3:=tai(hp3.Previous);
  2542. asml.Remove(hp4);
  2543. list.Insert(hp4);
  2544. end
  2545. else
  2546. hp3:=tai(hp3.Previous);
  2547. end;
  2548. list.Concat(p);
  2549. SwapRegLive(taicpu(p),taicpu(hp1));
  2550. { after the instruction? }
  2551. { find reg deallocs and reg syncs }
  2552. while assigned(hp5) and (hp5.typ<>ait_instruction) do
  2553. begin
  2554. if (hp5.typ=ait_regalloc) and (tai_regalloc(hp5).ratype in [ra_dealloc, ra_sync]) and
  2555. RegInInstruction(tai_regalloc(hp5).reg,p) then
  2556. begin
  2557. hp4:=hp5;
  2558. hp5:=tai(hp5.next);
  2559. asml.Remove(hp4);
  2560. list.Concat(hp4);
  2561. end
  2562. else
  2563. hp5:=tai(hp5.Next);
  2564. end;
  2565. asml.Remove(hp1);
  2566. { if there are address labels associated with hp2, those must
  2567. stay with hp2 (e.g. for GOT-less PIC) }
  2568. insertpos:=hp2;
  2569. while assigned(hp2.previous) and
  2570. (tai(hp2.previous).typ<>ait_instruction) do
  2571. begin
  2572. hp2:=tai(hp2.previous);
  2573. if (hp2.typ=ait_label) and
  2574. (tai_label(hp2).labsym.typ=AT_ADDR) then
  2575. insertpos:=hp2;
  2576. end;
  2577. {$ifdef DEBUG_PREREGSCHEDULER}
  2578. asml.insertbefore(tai_comment.Create(strpnew('Rescheduled')),insertpos);
  2579. {$endif DEBUG_PREREGSCHEDULER}
  2580. asml.InsertBefore(hp1,insertpos);
  2581. asml.InsertListBefore(insertpos,list);
  2582. p:=tai(p.next);
  2583. end
  2584. else if p.typ=ait_instruction then
  2585. p:=hp1
  2586. else
  2587. p:=tai(p.next);
  2588. end;
  2589. list.Free;
  2590. end;
  2591. procedure DecrementPreceedingIT(list: TAsmList; p: tai);
  2592. var
  2593. hp : tai;
  2594. l : longint;
  2595. begin
  2596. hp := tai(p.Previous);
  2597. l := 1;
  2598. while assigned(hp) and
  2599. (l <= 4) do
  2600. begin
  2601. if hp.typ=ait_instruction then
  2602. begin
  2603. if (taicpu(hp).opcode>=A_IT) and
  2604. (taicpu(hp).opcode <= A_ITTTT) then
  2605. begin
  2606. if (taicpu(hp).opcode = A_IT) and
  2607. (l=1) then
  2608. list.Remove(hp)
  2609. else
  2610. case taicpu(hp).opcode of
  2611. A_ITE:
  2612. if l=2 then taicpu(hp).opcode := A_IT;
  2613. A_ITT:
  2614. if l=2 then taicpu(hp).opcode := A_IT;
  2615. A_ITEE:
  2616. if l=3 then taicpu(hp).opcode := A_ITE;
  2617. A_ITTE:
  2618. if l=3 then taicpu(hp).opcode := A_ITT;
  2619. A_ITET:
  2620. if l=3 then taicpu(hp).opcode := A_ITE;
  2621. A_ITTT:
  2622. if l=3 then taicpu(hp).opcode := A_ITT;
  2623. A_ITEEE:
  2624. if l=4 then taicpu(hp).opcode := A_ITEE;
  2625. A_ITTEE:
  2626. if l=4 then taicpu(hp).opcode := A_ITTE;
  2627. A_ITETE:
  2628. if l=4 then taicpu(hp).opcode := A_ITET;
  2629. A_ITTTE:
  2630. if l=4 then taicpu(hp).opcode := A_ITTT;
  2631. A_ITEET:
  2632. if l=4 then taicpu(hp).opcode := A_ITEE;
  2633. A_ITTET:
  2634. if l=4 then taicpu(hp).opcode := A_ITTE;
  2635. A_ITETT:
  2636. if l=4 then taicpu(hp).opcode := A_ITET;
  2637. A_ITTTT:
  2638. begin
  2639. if l=4 then taicpu(hp).opcode := A_ITTT;
  2640. end
  2641. else
  2642. ;
  2643. end;
  2644. break;
  2645. end;
  2646. {else if (taicpu(hp).condition<>taicpu(p).condition) or
  2647. (taicpu(hp).condition<>inverse_cond(taicpu(p).condition)) then
  2648. break;}
  2649. inc(l);
  2650. end;
  2651. hp := tai(hp.Previous);
  2652. end;
  2653. end;
  2654. function TCpuThumb2AsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
  2655. var
  2656. hp : taicpu;
  2657. //hp1,hp2 : tai;
  2658. begin
  2659. result:=false;
  2660. if inherited PeepHoleOptPass1Cpu(p) then
  2661. result:=true
  2662. else if (p.typ=ait_instruction) and
  2663. MatchInstruction(p, A_STM, [C_None], [PF_FD,PF_DB]) and
  2664. (taicpu(p).oper[0]^.ref^.addressmode=AM_PREINDEXED) and
  2665. (taicpu(p).oper[0]^.ref^.index=NR_STACK_POINTER_REG) and
  2666. ((taicpu(p).oper[1]^.regset^*[8..13,15])=[]) then
  2667. begin
  2668. DebugMsg('Peephole Stm2Push done', p);
  2669. hp := taicpu.op_regset(A_PUSH, R_INTREGISTER, R_SUBWHOLE, taicpu(p).oper[1]^.regset^);
  2670. AsmL.InsertAfter(hp, p);
  2671. asml.Remove(p);
  2672. p:=hp;
  2673. result:=true;
  2674. end
  2675. {else if (p.typ=ait_instruction) and
  2676. MatchInstruction(p, A_STR, [C_None], [PF_None]) and
  2677. (taicpu(p).oper[1]^.ref^.addressmode=AM_PREINDEXED) and
  2678. (taicpu(p).oper[1]^.ref^.index=NR_STACK_POINTER_REG) and
  2679. (taicpu(p).oper[1]^.ref^.offset=-4) and
  2680. (getsupreg(taicpu(p).oper[0]^.reg) in [0..7,14]) then
  2681. begin
  2682. DebugMsg('Peephole Str2Push done', p);
  2683. hp := taicpu.op_regset(A_PUSH, R_INTREGISTER, R_SUBWHOLE, [getsupreg(taicpu(p).oper[0]^.reg)]);
  2684. asml.InsertAfter(hp, p);
  2685. asml.Remove(p);
  2686. p.Free;
  2687. p:=hp;
  2688. result:=true;
  2689. end}
  2690. else if (p.typ=ait_instruction) and
  2691. MatchInstruction(p, A_LDM, [C_None], [PF_FD,PF_IA]) and
  2692. (taicpu(p).oper[0]^.ref^.addressmode=AM_PREINDEXED) and
  2693. (taicpu(p).oper[0]^.ref^.index=NR_STACK_POINTER_REG) and
  2694. ((taicpu(p).oper[1]^.regset^*[8..14])=[]) then
  2695. begin
  2696. DebugMsg('Peephole Ldm2Pop done', p);
  2697. hp := taicpu.op_regset(A_POP, R_INTREGISTER, R_SUBWHOLE, taicpu(p).oper[1]^.regset^);
  2698. asml.InsertBefore(hp, p);
  2699. asml.Remove(p);
  2700. p.Free;
  2701. p:=hp;
  2702. result:=true;
  2703. end
  2704. {else if (p.typ=ait_instruction) and
  2705. MatchInstruction(p, A_LDR, [C_None], [PF_None]) and
  2706. (taicpu(p).oper[1]^.ref^.addressmode=AM_POSTINDEXED) and
  2707. (taicpu(p).oper[1]^.ref^.index=NR_STACK_POINTER_REG) and
  2708. (taicpu(p).oper[1]^.ref^.offset=4) and
  2709. (getsupreg(taicpu(p).oper[0]^.reg) in [0..7,15]) then
  2710. begin
  2711. DebugMsg('Peephole Ldr2Pop done', p);
  2712. hp := taicpu.op_regset(A_POP, R_INTREGISTER, R_SUBWHOLE, [getsupreg(taicpu(p).oper[0]^.reg)]);
  2713. asml.InsertBefore(hp, p);
  2714. asml.Remove(p);
  2715. p.Free;
  2716. p:=hp;
  2717. result:=true;
  2718. end}
  2719. else if (p.typ=ait_instruction) and
  2720. MatchInstruction(p, [A_AND], [], [PF_None]) and
  2721. (taicpu(p).ops = 2) and
  2722. (taicpu(p).oper[1]^.typ=top_const) and
  2723. ((taicpu(p).oper[1]^.val=255) or
  2724. (taicpu(p).oper[1]^.val=65535)) then
  2725. begin
  2726. DebugMsg('Peephole AndR2Uxt done', p);
  2727. if taicpu(p).oper[1]^.val=255 then
  2728. taicpu(p).opcode:=A_UXTB
  2729. else
  2730. taicpu(p).opcode:=A_UXTH;
  2731. taicpu(p).loadreg(1, taicpu(p).oper[0]^.reg);
  2732. result := true;
  2733. end
  2734. else if (p.typ=ait_instruction) and
  2735. MatchInstruction(p, [A_AND], [], [PF_None]) and
  2736. (taicpu(p).ops = 3) and
  2737. (taicpu(p).oper[2]^.typ=top_const) and
  2738. ((taicpu(p).oper[2]^.val=255) or
  2739. (taicpu(p).oper[2]^.val=65535)) then
  2740. begin
  2741. DebugMsg('Peephole AndRR2Uxt done', p);
  2742. if taicpu(p).oper[2]^.val=255 then
  2743. taicpu(p).opcode:=A_UXTB
  2744. else
  2745. taicpu(p).opcode:=A_UXTH;
  2746. taicpu(p).ops:=2;
  2747. result := true;
  2748. end
  2749. {else if (p.typ=ait_instruction) and
  2750. MatchInstruction(p, [A_CMP], [C_None], [PF_None]) and
  2751. (taicpu(p).oper[1]^.typ=top_const) and
  2752. (taicpu(p).oper[1]^.val=0) and
  2753. GetNextInstruction(p,hp1) and
  2754. (taicpu(hp1).opcode=A_B) and
  2755. (taicpu(hp1).condition in [C_EQ,C_NE]) then
  2756. begin
  2757. if taicpu(hp1).condition = C_EQ then
  2758. hp2:=taicpu.op_reg_ref(A_CBZ, taicpu(p).oper[0]^.reg, taicpu(hp1).oper[0]^.ref^)
  2759. else
  2760. hp2:=taicpu.op_reg_ref(A_CBNZ, taicpu(p).oper[0]^.reg, taicpu(hp1).oper[0]^.ref^);
  2761. taicpu(hp2).is_jmp := true;
  2762. asml.InsertAfter(hp2, hp1);
  2763. asml.Remove(hp1);
  2764. hp1.Free;
  2765. asml.Remove(p);
  2766. p.Free;
  2767. p := hp2;
  2768. result := true;
  2769. end}
  2770. end;
  2771. procedure TCpuThumb2AsmOptimizer.PeepHoleOptPass2;
  2772. var
  2773. p,hp1,hp2: tai;
  2774. l : longint;
  2775. condition : tasmcond;
  2776. { UsedRegs, TmpUsedRegs: TRegSet; }
  2777. begin
  2778. p := BlockStart;
  2779. { UsedRegs := []; }
  2780. while (p <> BlockEnd) Do
  2781. begin
  2782. { UpdateUsedRegs(UsedRegs, tai(p.next)); }
  2783. case p.Typ Of
  2784. Ait_Instruction:
  2785. begin
  2786. case taicpu(p).opcode Of
  2787. A_B:
  2788. if taicpu(p).condition<>C_None then
  2789. begin
  2790. { check for
  2791. Bxx xxx
  2792. <several instructions>
  2793. xxx:
  2794. }
  2795. l:=0;
  2796. GetNextInstruction(p, hp1);
  2797. while assigned(hp1) and
  2798. (l<=4) and
  2799. CanBeCond(hp1) and
  2800. { stop on labels }
  2801. not(hp1.typ=ait_label) do
  2802. begin
  2803. inc(l);
  2804. if MustBeLast(hp1) then
  2805. begin
  2806. //hp1:=nil;
  2807. GetNextInstruction(hp1,hp1);
  2808. break;
  2809. end
  2810. else
  2811. GetNextInstruction(hp1,hp1);
  2812. end;
  2813. if assigned(hp1) then
  2814. begin
  2815. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  2816. begin
  2817. if (l<=4) and (l>0) then
  2818. begin
  2819. condition:=inverse_cond(taicpu(p).condition);
  2820. hp2:=p;
  2821. GetNextInstruction(p,hp1);
  2822. p:=hp1;
  2823. repeat
  2824. if hp1.typ=ait_instruction then
  2825. taicpu(hp1).condition:=condition;
  2826. if MustBeLast(hp1) then
  2827. begin
  2828. GetNextInstruction(hp1,hp1);
  2829. break;
  2830. end
  2831. else
  2832. GetNextInstruction(hp1,hp1);
  2833. until not(assigned(hp1)) or
  2834. not(CanBeCond(hp1)) or
  2835. (hp1.typ=ait_label);
  2836. { wait with removing else GetNextInstruction could
  2837. ignore the label if it was the only usage in the
  2838. jump moved away }
  2839. asml.InsertAfter(tai_comment.create(strpnew('Collapsed')), hp2);
  2840. DecrementPreceedingIT(asml, hp2);
  2841. case l of
  2842. 1: asml.InsertAfter(taicpu.op_cond(A_IT,condition), hp2);
  2843. 2: asml.InsertAfter(taicpu.op_cond(A_ITT,condition), hp2);
  2844. 3: asml.InsertAfter(taicpu.op_cond(A_ITTT,condition), hp2);
  2845. 4: asml.InsertAfter(taicpu.op_cond(A_ITTTT,condition), hp2);
  2846. end;
  2847. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  2848. asml.remove(hp2);
  2849. hp2.free;
  2850. continue;
  2851. end;
  2852. end;
  2853. end;
  2854. end;
  2855. else
  2856. ;
  2857. end;
  2858. end;
  2859. else
  2860. ;
  2861. end;
  2862. p := tai(p.next)
  2863. end;
  2864. end;
  2865. function TCpuThumb2AsmOptimizer.PostPeepHoleOptsCpu(var p: tai): boolean;
  2866. begin
  2867. result:=false;
  2868. if p.typ = ait_instruction then
  2869. begin
  2870. if MatchInstruction(p, A_MOV, [C_None], [PF_None]) and
  2871. (taicpu(p).oper[1]^.typ=top_const) and
  2872. (taicpu(p).oper[1]^.val >= 0) and
  2873. (taicpu(p).oper[1]^.val < 256) and
  2874. (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  2875. begin
  2876. DebugMsg('Peephole Mov2Movs done', p);
  2877. asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
  2878. asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
  2879. IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
  2880. taicpu(p).oppostfix:=PF_S;
  2881. result:=true;
  2882. end
  2883. else if MatchInstruction(p, A_MVN, [C_None], [PF_None]) and
  2884. (taicpu(p).oper[1]^.typ=top_reg) and
  2885. (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  2886. begin
  2887. DebugMsg('Peephole Mvn2Mvns done', p);
  2888. asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
  2889. asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
  2890. IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
  2891. taicpu(p).oppostfix:=PF_S;
  2892. result:=true;
  2893. end
  2894. else if MatchInstruction(p, A_RSB, [C_None], [PF_None]) and
  2895. (taicpu(p).ops = 3) and
  2896. (taicpu(p).oper[2]^.typ=top_const) and
  2897. (taicpu(p).oper[2]^.val=0) and
  2898. (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  2899. begin
  2900. DebugMsg('Peephole Rsb2Rsbs done', p);
  2901. asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
  2902. asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
  2903. IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
  2904. taicpu(p).oppostfix:=PF_S;
  2905. result:=true;
  2906. end
  2907. else if MatchInstruction(p, [A_ADD,A_SUB], [C_None], [PF_None]) and
  2908. (taicpu(p).ops = 3) and
  2909. MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[1]^) and
  2910. (not MatchOperand(taicpu(p).oper[0]^, NR_STACK_POINTER_REG)) and
  2911. (taicpu(p).oper[2]^.typ=top_const) and
  2912. (taicpu(p).oper[2]^.val >= 0) and
  2913. (taicpu(p).oper[2]^.val < 256) and
  2914. (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  2915. begin
  2916. DebugMsg('Peephole AddSub2*s done', p);
  2917. asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
  2918. asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
  2919. IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
  2920. taicpu(p).loadconst(1,taicpu(p).oper[2]^.val);
  2921. taicpu(p).oppostfix:=PF_S;
  2922. taicpu(p).ops := 2;
  2923. result:=true;
  2924. end
  2925. else if MatchInstruction(p, [A_ADD,A_SUB], [C_None], [PF_None]) and
  2926. (taicpu(p).ops = 2) and
  2927. (taicpu(p).oper[1]^.typ=top_reg) and
  2928. (not MatchOperand(taicpu(p).oper[0]^, NR_STACK_POINTER_REG)) and
  2929. (not MatchOperand(taicpu(p).oper[1]^, NR_STACK_POINTER_REG)) and
  2930. (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  2931. begin
  2932. DebugMsg('Peephole AddSub2*s done', p);
  2933. asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
  2934. asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
  2935. IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
  2936. taicpu(p).oppostfix:=PF_S;
  2937. result:=true;
  2938. end
  2939. else if MatchInstruction(p, [A_ADD], [C_None], [PF_None]) and
  2940. (taicpu(p).ops = 3) and
  2941. MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[1]^) and
  2942. (taicpu(p).oper[2]^.typ=top_reg) then
  2943. begin
  2944. DebugMsg('Peephole AddRRR2AddRR done', p);
  2945. taicpu(p).ops := 2;
  2946. taicpu(p).loadreg(1,taicpu(p).oper[2]^.reg);
  2947. result:=true;
  2948. end
  2949. else if MatchInstruction(p, [A_AND,A_ORR,A_EOR,A_BIC,A_LSL,A_LSR,A_ASR,A_ROR], [C_None], [PF_None]) and
  2950. (taicpu(p).ops = 3) and
  2951. MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[1]^) and
  2952. (taicpu(p).oper[2]^.typ=top_reg) and
  2953. (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  2954. begin
  2955. DebugMsg('Peephole opXXY2opsXY done', p);
  2956. asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
  2957. asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
  2958. IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
  2959. taicpu(p).ops := 2;
  2960. taicpu(p).loadreg(1,taicpu(p).oper[2]^.reg);
  2961. taicpu(p).oppostfix:=PF_S;
  2962. result:=true;
  2963. end
  2964. else if MatchInstruction(p, [A_AND,A_ORR,A_EOR,A_BIC,A_LSL,A_LSR,A_ASR,A_ROR], [C_None], [PF_S]) and
  2965. (taicpu(p).ops = 3) and
  2966. MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[1]^) and
  2967. (taicpu(p).oper[2]^.typ in [top_reg,top_const]) then
  2968. begin
  2969. DebugMsg('Peephole opXXY2opXY done', p);
  2970. taicpu(p).ops := 2;
  2971. if taicpu(p).oper[2]^.typ=top_reg then
  2972. taicpu(p).loadreg(1,taicpu(p).oper[2]^.reg)
  2973. else
  2974. taicpu(p).loadconst(1,taicpu(p).oper[2]^.val);
  2975. result:=true;
  2976. end
  2977. else if MatchInstruction(p, [A_AND,A_ORR,A_EOR], [C_None], [PF_None,PF_S]) and
  2978. (taicpu(p).ops = 3) and
  2979. MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[2]^) and
  2980. (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  2981. begin
  2982. DebugMsg('Peephole opXYX2opsXY done', p);
  2983. asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
  2984. asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
  2985. IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
  2986. taicpu(p).oppostfix:=PF_S;
  2987. taicpu(p).ops := 2;
  2988. result:=true;
  2989. end
  2990. else if MatchInstruction(p, [A_MOV], [C_None], [PF_None]) and
  2991. (taicpu(p).ops=3) and
  2992. (taicpu(p).oper[2]^.typ=top_shifterop) and
  2993. (taicpu(p).oper[2]^.shifterop^.shiftmode in [SM_LSL,SM_LSR,SM_ASR,SM_ROR]) and
  2994. //MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[1]^) and
  2995. (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  2996. begin
  2997. DebugMsg('Peephole Mov2Shift done', p);
  2998. asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
  2999. asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
  3000. IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
  3001. taicpu(p).oppostfix:=PF_S;
  3002. case taicpu(p).oper[2]^.shifterop^.shiftmode of
  3003. SM_LSL: taicpu(p).opcode:=A_LSL;
  3004. SM_LSR: taicpu(p).opcode:=A_LSR;
  3005. SM_ASR: taicpu(p).opcode:=A_ASR;
  3006. SM_ROR: taicpu(p).opcode:=A_ROR;
  3007. else
  3008. internalerror(2019050912);
  3009. end;
  3010. if taicpu(p).oper[2]^.shifterop^.rs<>NR_NO then
  3011. taicpu(p).loadreg(2, taicpu(p).oper[2]^.shifterop^.rs)
  3012. else
  3013. taicpu(p).loadconst(2, taicpu(p).oper[2]^.shifterop^.shiftimm);
  3014. result:=true;
  3015. end
  3016. end;
  3017. end;
  3018. begin
  3019. casmoptimizer:=TCpuAsmOptimizer;
  3020. cpreregallocscheduler:=TCpuPreRegallocScheduler;
  3021. End.