aoptcpu.pas 142 KB


  1. {
  2. Copyright (c) 1998-2002 by Jonas Maebe, member of the Free Pascal
  3. Development Team
  4. This unit implements the ARM optimizer object
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. Unit aoptcpu;
  19. {$i fpcdefs.inc}
  20. { $define DEBUG_PREREGSCHEDULER}
  21. { $define DEBUG_AOPTCPU}
  22. Interface
  23. uses cgbase, cgutils, cpubase, aasmtai, aasmcpu,aopt, aoptobj;
  24. Type
  25. TCpuAsmOptimizer = class(TAsmOptimizer)
  26. { uses the same constructor as TAopObj }
  27. function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
  28. procedure PeepHoleOptPass2;override;
  29. Function RegInInstruction(Reg: TRegister; p1: tai): Boolean;override;
  30. function RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string): boolean;
  31. function RemoveSuperfluousVMov(const p : tai; movp : tai; const optimizer : string) : boolean;
  32. { gets the next tai object after current that contains info relevant
  33. to the optimizer in p1 which used the given register or does a
  34. change in program flow.
  35. If there is none, it returns false and
  36. sets p1 to nil }
  37. Function GetNextInstructionUsingReg(Current: tai; Out Next: tai; reg: TRegister): Boolean;
  38. Function GetNextInstructionUsingRef(Current: tai; Out Next: tai; const ref: TReference; StopOnStore: Boolean = true): Boolean;
  39. { outputs a debug message into the assembler file }
  40. procedure DebugMsg(const s: string; p: tai);
  41. function InstructionLoadsFromReg(const reg : TRegister; const hp : tai) : boolean; override;
  42. function RegLoadedWithNewValue(reg : tregister; hp : tai) : boolean; override;
  43. protected
  44. function LookForPreindexedPattern(p: taicpu): boolean;
  45. function LookForPostindexedPattern(p: taicpu): boolean;
  46. End;
  47. TCpuPreRegallocScheduler = class(TAsmScheduler)
  48. function SchedulerPass1Cpu(var p: tai): boolean;override;
  49. procedure SwapRegLive(p, hp1: taicpu);
  50. end;
  51. TCpuThumb2AsmOptimizer = class(TCpuAsmOptimizer)
  52. { uses the same constructor as TAopObj }
  53. function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
  54. procedure PeepHoleOptPass2;override;
  55. function PostPeepHoleOptsCpu(var p: tai): boolean; override;
  56. End;
  57. function MustBeLast(p : tai) : boolean;
  58. Implementation
  59. uses
  60. cutils,verbose,globtype,globals,
  61. systems,
  62. cpuinfo,
  63. cgobj,procinfo,
  64. aasmbase,aasmdata;
  65. { Range check must be disabled explicitly as conversions between signed and unsigned
  66. 32-bit values are done without explicit typecasts }
  67. {$R-}
  68. function CanBeCond(p : tai) : boolean;
  69. begin
  70. result:=
  71. not(GenerateThumbCode) and
  72. (p.typ=ait_instruction) and
  73. (taicpu(p).condition=C_None) and
  74. ((taicpu(p).opcode<A_IT) or (taicpu(p).opcode>A_ITTTT)) and
  75. (taicpu(p).opcode<>A_CBZ) and
  76. (taicpu(p).opcode<>A_CBNZ) and
  77. (taicpu(p).opcode<>A_PLD) and
  78. (((taicpu(p).opcode<>A_BLX) and
  79. { BL may need to be converted into BLX by the linker -- could possibly
  80. be allowed in case it's to a local symbol of which we know that it
  81. uses the same instruction set as the current one }
  82. (taicpu(p).opcode<>A_BL)) or
  83. (taicpu(p).oper[0]^.typ=top_reg));
  84. end;
  85. function RefsEqual(const r1, r2: treference): boolean;
  86. begin
  87. refsequal :=
  88. (r1.offset = r2.offset) and
  89. (r1.base = r2.base) and
  90. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  91. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  92. (r1.relsymbol = r2.relsymbol) and
  93. (r1.signindex = r2.signindex) and
  94. (r1.shiftimm = r2.shiftimm) and
  95. (r1.addressmode = r2.addressmode) and
  96. (r1.shiftmode = r2.shiftmode);
  97. end;
  98. function MatchInstruction(const instr: tai; const op: TCommonAsmOps; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  99. begin
  100. result :=
  101. (instr.typ = ait_instruction) and
  102. ((op = []) or ((ord(taicpu(instr).opcode)<256) and (taicpu(instr).opcode in op))) and
  103. ((cond = []) or (taicpu(instr).condition in cond)) and
  104. ((postfix = []) or (taicpu(instr).oppostfix in postfix));
  105. end;
  106. function MatchInstruction(const instr: tai; const op: TAsmOp; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  107. begin
  108. result :=
  109. (instr.typ = ait_instruction) and
  110. (taicpu(instr).opcode = op) and
  111. ((cond = []) or (taicpu(instr).condition in cond)) and
  112. ((postfix = []) or (taicpu(instr).oppostfix in postfix));
  113. end;
  114. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
  115. begin
  116. result := oper1.typ = oper2.typ;
  117. if result then
  118. case oper1.typ of
  119. top_const:
  120. Result:=oper1.val = oper2.val;
  121. top_reg:
  122. Result:=oper1.reg = oper2.reg;
  123. top_conditioncode:
  124. Result:=oper1.cc = oper2.cc;
  125. top_ref:
  126. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  127. else Result:=false;
  128. end
  129. end;
  130. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  131. begin
  132. result := (oper.typ = top_reg) and (oper.reg = reg);
  133. end;
  134. function RemoveRedundantMove(const cmpp: tai; movp: tai; asml: TAsmList):Boolean;
  135. begin
  136. Result:=false;
  137. if (taicpu(movp).condition = C_EQ) and
  138. (taicpu(cmpp).oper[0]^.reg = taicpu(movp).oper[0]^.reg) and
  139. (taicpu(cmpp).oper[1]^.val = taicpu(movp).oper[1]^.val) then
  140. begin
  141. asml.insertafter(tai_comment.Create(strpnew('Peephole CmpMovMov - Removed redundant moveq')), movp);
  142. asml.remove(movp);
  143. movp.free;
  144. Result:=true;
  145. end;
  146. end;
  147. function AlignedToQWord(const ref : treference) : boolean;
  148. begin
  149. { (safe) heuristics to ensure alignment }
  150. result:=(target_info.abi in [abi_eabi,abi_armeb,abi_eabihf]) and
  151. (((ref.offset>=0) and
  152. ((ref.offset mod 8)=0) and
  153. ((ref.base=NR_R13) or
  154. (ref.index=NR_R13))
  155. ) or
  156. ((ref.offset<=0) and
  157. { when using NR_R11, it has always a value of <qword align>+4 }
  158. ((abs(ref.offset+4) mod 8)=0) and
  159. (current_procinfo.framepointer=NR_R11) and
  160. ((ref.base=NR_R11) or
  161. (ref.index=NR_R11))
  162. )
  163. );
  164. end;
  165. function isValidConstLoadStoreOffset(const aoffset: longint; const pf: TOpPostfix) : boolean;
  166. begin
  167. if GenerateThumb2Code then
  168. result := (aoffset<4096) and (aoffset>-256)
  169. else
  170. result := ((pf in [PF_None,PF_B]) and
  171. (abs(aoffset)<4096)) or
  172. (abs(aoffset)<256);
  173. end;
  174. function TCpuAsmOptimizer.InstructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean;
  175. var
  176. p: taicpu;
  177. i: longint;
  178. begin
  179. instructionLoadsFromReg := false;
  180. if not (assigned(hp) and (hp.typ = ait_instruction)) then
  181. exit;
  182. p:=taicpu(hp);
  183. i:=1;
  184. {For these instructions we have to start on oper[0]}
  185. if (p.opcode in [A_STR, A_LDM, A_STM, A_PLD,
  186. A_CMP, A_CMN, A_TST, A_TEQ,
  187. A_B, A_BL, A_BX, A_BLX,
  188. A_SMLAL, A_UMLAL]) then i:=0;
  189. while(i<p.ops) do
  190. begin
  191. case p.oper[I]^.typ of
  192. top_reg:
  193. instructionLoadsFromReg := (p.oper[I]^.reg = reg) or
  194. { STRD }
  195. ((i=0) and (p.opcode=A_STR) and (p.oppostfix=PF_D) and (getsupreg(p.oper[0]^.reg)+1=getsupreg(reg)));
  196. top_regset:
  197. instructionLoadsFromReg := (getsupreg(reg) in p.oper[I]^.regset^);
  198. top_shifterop:
  199. instructionLoadsFromReg := p.oper[I]^.shifterop^.rs = reg;
  200. top_ref:
  201. instructionLoadsFromReg :=
  202. (p.oper[I]^.ref^.base = reg) or
  203. (p.oper[I]^.ref^.index = reg);
  204. end;
  205. if instructionLoadsFromReg then exit; {Bailout if we found something}
  206. Inc(I);
  207. end;
  208. end;
  209. function TCpuAsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  210. var
  211. p: taicpu;
  212. begin
  213. p := taicpu(hp);
  214. Result := false;
  215. if not ((assigned(hp)) and (hp.typ = ait_instruction)) then
  216. exit;
  217. case p.opcode of
  218. { These operands do not write into a register at all }
  219. A_CMP, A_CMN, A_TST, A_TEQ, A_B, A_BL, A_BX, A_BLX, A_SWI, A_MSR, A_PLD,
  220. A_VCMP:
  221. exit;
  222. {Take care of post/preincremented store and loads, they will change their base register}
  223. A_STR, A_LDR:
  224. begin
  225. Result := false;
  226. { actually, this does not apply here because post-/preindexed does not mean that a register
  227. is loaded with a new value, it is only modified
  228. (taicpu(p).oper[1]^.typ=top_ref) and
  229. (taicpu(p).oper[1]^.ref^.addressmode in [AM_PREINDEXED,AM_POSTINDEXED]) and
  230. (taicpu(p).oper[1]^.ref^.base = reg);
  231. }
  232. { STR does not load into it's first register }
  233. if p.opcode = A_STR then
  234. exit;
  235. end;
  236. A_VSTR:
  237. begin
  238. Result := false;
  239. exit;
  240. end;
  241. { These four are writing into the first 2 register, UMLAL and SMLAL will also read from them }
  242. A_UMLAL, A_UMULL, A_SMLAL, A_SMULL:
  243. Result :=
  244. (p.oper[1]^.typ = top_reg) and
  245. (p.oper[1]^.reg = reg);
  246. {Loads to oper2 from coprocessor}
  247. {
  248. MCR/MRC is currently not supported in FPC
  249. A_MRC:
  250. Result :=
  251. (p.oper[2]^.typ = top_reg) and
  252. (p.oper[2]^.reg = reg);
  253. }
  254. {Loads to all register in the registerset}
  255. A_LDM, A_VLDM:
  256. Result := (getsupreg(reg) in p.oper[1]^.regset^);
  257. A_POP:
  258. Result := (getsupreg(reg) in p.oper[0]^.regset^) or
  259. (reg=NR_STACK_POINTER_REG);
  260. end;
  261. if Result then
  262. exit;
  263. case p.oper[0]^.typ of
  264. {This is the case}
  265. top_reg:
  266. Result := (p.oper[0]^.reg = reg) or
  267. { LDRD }
  268. (p.opcode=A_LDR) and (p.oppostfix=PF_D) and (getsupreg(p.oper[0]^.reg)+1=getsupreg(reg));
  269. {LDM/STM might write a new value to their index register}
  270. top_ref:
  271. Result :=
  272. (taicpu(p).oper[0]^.ref^.addressmode in [AM_PREINDEXED,AM_POSTINDEXED]) and
  273. (taicpu(p).oper[0]^.ref^.base = reg);
  274. end;
  275. end;
  276. function TCpuAsmOptimizer.GetNextInstructionUsingReg(Current: tai;
  277. Out Next: tai; reg: TRegister): Boolean;
  278. begin
  279. Next:=Current;
  280. repeat
  281. Result:=GetNextInstruction(Next,Next);
  282. until not (Result) or
  283. not(cs_opt_level3 in current_settings.optimizerswitches) or
  284. (Next.typ<>ait_instruction) or
  285. RegInInstruction(reg,Next) or
  286. is_calljmp(taicpu(Next).opcode) or
  287. RegModifiedByInstruction(NR_PC,Next);
  288. end;
  289. function TCpuAsmOptimizer.GetNextInstructionUsingRef(Current: tai;
  290. Out Next: tai; const ref: TReference; StopOnStore: Boolean = true): Boolean;
  291. begin
  292. Next:=Current;
  293. repeat
  294. Result:=GetNextInstruction(Next,Next);
  295. if Result and
  296. (Next.typ=ait_instruction) and
  297. (taicpu(Next).opcode in [A_LDR, A_STR]) and
  298. (
  299. ((taicpu(Next).ops = 2) and
  300. (taicpu(Next).oper[1]^.typ = top_ref) and
  301. RefsEqual(taicpu(Next).oper[1]^.ref^,ref)) or
  302. ((taicpu(Next).ops = 3) and { LDRD/STRD }
  303. (taicpu(Next).oper[2]^.typ = top_ref) and
  304. RefsEqual(taicpu(Next).oper[2]^.ref^,ref))
  305. ) then
  306. {We've found an instruction LDR or STR with the same reference}
  307. exit;
  308. until not(Result) or
  309. (Next.typ<>ait_instruction) or
  310. not(cs_opt_level3 in current_settings.optimizerswitches) or
  311. is_calljmp(taicpu(Next).opcode) or
  312. (StopOnStore and (taicpu(Next).opcode in [A_STR, A_STM])) or
  313. RegModifiedByInstruction(NR_PC,Next);
  314. Result:=false;
  315. end;
  316. {$ifdef DEBUG_AOPTCPU}
  317. procedure TCpuAsmOptimizer.DebugMsg(const s: string;p : tai);
  318. begin
  319. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  320. end;
  321. {$else DEBUG_AOPTCPU}
  322. procedure TCpuAsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  323. begin
  324. end;
  325. {$endif DEBUG_AOPTCPU}
  326. function TCpuAsmOptimizer.RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string):boolean;
  327. var
  328. alloc,
  329. dealloc : tai_regalloc;
  330. hp1 : tai;
  331. begin
  332. Result:=false;
  333. if MatchInstruction(movp, A_MOV, [taicpu(p).condition], [PF_None]) and
  334. (taicpu(movp).ops=2) and {We can't optimize if there is a shiftop}
  335. MatchOperand(taicpu(movp).oper[1]^, taicpu(p).oper[0]^.reg) and
  336. { don't mess with moves to pc }
  337. (taicpu(movp).oper[0]^.reg<>NR_PC) and
  338. { don't mess with moves to lr }
  339. (taicpu(movp).oper[0]^.reg<>NR_R14) and
  340. { the destination register of the mov might not be used beween p and movp }
  341. not(RegUsedBetween(taicpu(movp).oper[0]^.reg,p,movp)) and
  342. { cb[n]z are thumb instructions which require specific registers, with no wide forms }
  343. (taicpu(p).opcode<>A_CBZ) and
  344. (taicpu(p).opcode<>A_CBNZ) and
  345. {There is a special requirement for MUL and MLA, oper[0] and oper[1] are not allowed to be the same}
  346. not (
  347. (taicpu(p).opcode in [A_MLA, A_MUL]) and
  348. (taicpu(p).oper[1]^.reg = taicpu(movp).oper[0]^.reg) and
  349. (current_settings.cputype < cpu_armv6)
  350. ) and
  351. { Take care to only do this for instructions which REALLY load to the first register.
  352. Otherwise
  353. str reg0, [reg1]
  354. mov reg2, reg0
  355. will be optimized to
  356. str reg2, [reg1]
  357. }
  358. regLoadedWithNewValue(taicpu(p).oper[0]^.reg, p) then
  359. begin
  360. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(movp.Next));
  361. if assigned(dealloc) then
  362. begin
  363. DebugMsg('Peephole '+optimizer+' removed superfluous mov', movp);
  364. result:=true;
  365. { taicpu(p).oper[0]^.reg is not used anymore, try to find its allocation
  366. and remove it if possible }
  367. asml.Remove(dealloc);
  368. alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.previous));
  369. if assigned(alloc) then
  370. begin
  371. asml.Remove(alloc);
  372. alloc.free;
  373. dealloc.free;
  374. end
  375. else
  376. asml.InsertAfter(dealloc,p);
  377. { try to move the allocation of the target register }
  378. GetLastInstruction(movp,hp1);
  379. alloc:=FindRegAlloc(taicpu(movp).oper[0]^.reg,tai(hp1.Next));
  380. if assigned(alloc) then
  381. begin
  382. asml.Remove(alloc);
  383. asml.InsertBefore(alloc,p);
  384. { adjust used regs }
  385. IncludeRegInUsedRegs(taicpu(movp).oper[0]^.reg,UsedRegs);
  386. end;
  387. { finally get rid of the mov }
  388. taicpu(p).loadreg(0,taicpu(movp).oper[0]^.reg);
  389. asml.remove(movp);
  390. movp.free;
  391. end;
  392. end;
  393. end;
  394. function TCpuAsmOptimizer.RemoveSuperfluousVMov(const p: tai; movp: tai; const optimizer: string):boolean;
  395. var
  396. alloc,
  397. dealloc : tai_regalloc;
  398. hp1 : tai;
  399. begin
  400. Result:=false;
  401. if (MatchInstruction(movp, A_VMOV, [taicpu(p).condition], [taicpu(p).oppostfix]) or
  402. ((taicpu(p).oppostfix in [PF_F64F32,PF_F64S16,PF_F64S32,PF_F64U16,PF_F64U32]) and MatchInstruction(movp, A_VMOV, [taicpu(p).condition], [PF_F64])) or
  403. ((taicpu(p).oppostfix in [PF_F32F64,PF_F32S16,PF_F32S32,PF_F32U16,PF_F32U32]) and MatchInstruction(movp, A_VMOV, [taicpu(p).condition], [PF_F32]))
  404. ) and
  405. (taicpu(movp).ops=2) and
  406. MatchOperand(taicpu(movp).oper[1]^, taicpu(p).oper[0]^.reg) and
  407. { the destination register of the mov might not be used beween p and movp }
  408. not(RegUsedBetween(taicpu(movp).oper[0]^.reg,p,movp)) and
  409. { Take care to only do this for instructions which REALLY load to the first register.
  410. Otherwise
  411. vstr reg0, [reg1]
  412. vmov reg2, reg0
  413. will be optimized to
  414. vstr reg2, [reg1]
  415. }
  416. regLoadedWithNewValue(taicpu(p).oper[0]^.reg, p) then
  417. begin
  418. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(movp.Next));
  419. if assigned(dealloc) then
  420. begin
  421. DebugMsg('Peephole '+optimizer+' removed superfluous vmov', movp);
  422. result:=true;
  423. { taicpu(p).oper[0]^.reg is not used anymore, try to find its allocation
  424. and remove it if possible }
  425. asml.Remove(dealloc);
  426. alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.previous));
  427. if assigned(alloc) then
  428. begin
  429. asml.Remove(alloc);
  430. alloc.free;
  431. dealloc.free;
  432. end
  433. else
  434. asml.InsertAfter(dealloc,p);
  435. { try to move the allocation of the target register }
  436. GetLastInstruction(movp,hp1);
  437. alloc:=FindRegAlloc(taicpu(movp).oper[0]^.reg,tai(hp1.Next));
  438. if assigned(alloc) then
  439. begin
  440. asml.Remove(alloc);
  441. asml.InsertBefore(alloc,p);
  442. { adjust used regs }
  443. IncludeRegInUsedRegs(taicpu(movp).oper[0]^.reg,UsedRegs);
  444. end;
  445. { finally get rid of the mov }
  446. taicpu(p).loadreg(0,taicpu(movp).oper[0]^.reg);
  447. asml.remove(movp);
  448. movp.free;
  449. end;
  450. end;
  451. end;
  452. {
  453. optimize
  454. add/sub reg1,reg1,regY/const
  455. ...
  456. ldr/str regX,[reg1]
  457. into
  458. ldr/str regX,[reg1, regY/const]!
  459. }
  460. function TCpuAsmOptimizer.LookForPreindexedPattern(p: taicpu): boolean;
  461. var
  462. hp1: tai;
  463. begin
  464. if GenerateARMCode and
  465. (p.ops=3) and
  466. MatchOperand(p.oper[0]^, p.oper[1]^.reg) and
  467. GetNextInstructionUsingReg(p, hp1, p.oper[0]^.reg) and
  468. (not RegModifiedBetween(p.oper[0]^.reg, p, hp1)) and
  469. MatchInstruction(hp1, [A_LDR,A_STR], [C_None], [PF_None,PF_B,PF_H,PF_SH,PF_SB]) and
  470. (taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) and
  471. (taicpu(hp1).oper[1]^.ref^.base=p.oper[0]^.reg) and
  472. (taicpu(hp1).oper[0]^.reg<>p.oper[0]^.reg) and
  473. (taicpu(hp1).oper[1]^.ref^.offset=0) and
  474. (taicpu(hp1).oper[1]^.ref^.index=NR_NO) and
  475. (((p.oper[2]^.typ=top_reg) and
  476. (not RegModifiedBetween(p.oper[2]^.reg, p, hp1))) or
  477. ((p.oper[2]^.typ=top_const) and
  478. ((abs(p.oper[2]^.val) < 256) or
  479. ((abs(p.oper[2]^.val) < 4096) and
  480. (taicpu(hp1).oppostfix in [PF_None,PF_B]))))) then
  481. begin
  482. taicpu(hp1).oper[1]^.ref^.addressmode:=AM_PREINDEXED;
  483. if p.oper[2]^.typ=top_reg then
  484. begin
  485. taicpu(hp1).oper[1]^.ref^.index:=p.oper[2]^.reg;
  486. if p.opcode=A_ADD then
  487. taicpu(hp1).oper[1]^.ref^.signindex:=1
  488. else
  489. taicpu(hp1).oper[1]^.ref^.signindex:=-1;
  490. end
  491. else
  492. begin
  493. if p.opcode=A_ADD then
  494. taicpu(hp1).oper[1]^.ref^.offset:=p.oper[2]^.val
  495. else
  496. taicpu(hp1).oper[1]^.ref^.offset:=-p.oper[2]^.val;
  497. end;
  498. result:=true;
  499. end
  500. else
  501. result:=false;
  502. end;
  503. {
  504. optimize
  505. ldr/str regX,[reg1]
  506. ...
  507. add/sub reg1,reg1,regY/const
  508. into
  509. ldr/str regX,[reg1], regY/const
  510. }
  511. function TCpuAsmOptimizer.LookForPostindexedPattern(p: taicpu) : boolean;
  512. var
  513. hp1 : tai;
  514. begin
  515. Result:=false;
  516. if (p.oper[1]^.typ = top_ref) and
  517. (p.oper[1]^.ref^.addressmode=AM_OFFSET) and
  518. (p.oper[1]^.ref^.index=NR_NO) and
  519. (p.oper[1]^.ref^.offset=0) and
  520. GetNextInstructionUsingReg(p, hp1, p.oper[1]^.ref^.base) and
  521. { we cannot check NR_DEFAULTFLAGS for modification yet so don't allow a condition }
  522. MatchInstruction(hp1, [A_ADD, A_SUB], [C_None], [PF_None]) and
  523. (taicpu(hp1).oper[0]^.reg=p.oper[1]^.ref^.base) and
  524. (taicpu(hp1).oper[1]^.reg=p.oper[1]^.ref^.base) and
  525. (
  526. (taicpu(hp1).oper[2]^.typ=top_reg) or
  527. { valid offset? }
  528. ((taicpu(hp1).oper[2]^.typ=top_const) and
  529. ((abs(taicpu(hp1).oper[2]^.val)<256) or
  530. ((abs(taicpu(hp1).oper[2]^.val)<4096) and (p.oppostfix in [PF_None,PF_B]))
  531. )
  532. )
  533. ) and
  534. { don't apply the optimization if the base register is loaded }
  535. (p.oper[0]^.reg<>p.oper[1]^.ref^.base) and
  536. not(RegModifiedBetween(taicpu(hp1).oper[0]^.reg,p,hp1)) and
  537. { don't apply the optimization if the (new) index register is loaded }
  538. (p.oper[0]^.reg<>taicpu(hp1).oper[2]^.reg) and
  539. not(RegModifiedBetween(taicpu(hp1).oper[2]^.reg,p,hp1)) and
  540. GenerateARMCode then
  541. begin
  542. DebugMsg('Peephole Str/LdrAdd/Sub2Str/Ldr Postindex done', p);
  543. p.oper[1]^.ref^.addressmode:=AM_POSTINDEXED;
  544. if taicpu(hp1).oper[2]^.typ=top_const then
  545. begin
  546. if taicpu(hp1).opcode=A_ADD then
  547. p.oper[1]^.ref^.offset:=taicpu(hp1).oper[2]^.val
  548. else
  549. p.oper[1]^.ref^.offset:=-taicpu(hp1).oper[2]^.val;
  550. end
  551. else
  552. begin
  553. p.oper[1]^.ref^.index:=taicpu(hp1).oper[2]^.reg;
  554. if taicpu(hp1).opcode=A_ADD then
  555. p.oper[1]^.ref^.signindex:=1
  556. else
  557. p.oper[1]^.ref^.signindex:=-1;
  558. end;
  559. asml.Remove(hp1);
  560. hp1.Free;
  561. Result:=true;
  562. end;
  563. end;
  564. function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
  565. var
  566. hp1,hp2,hp3,hp4: tai;
  567. i, i2: longint;
  568. TmpUsedRegs: TAllUsedRegs;
  569. tempop: tasmop;
  570. oldreg: tregister;
  571. dealloc: tai_regalloc;
  572. function IsPowerOf2(const value: DWord): boolean; inline;
  573. begin
  574. Result:=(value and (value - 1)) = 0;
  575. end;
  576. begin
  577. result := false;
  578. case p.typ of
  579. ait_instruction:
  580. begin
  581. {
  582. change
  583. <op> reg,x,y
  584. cmp reg,#0
  585. into
  586. <op>s reg,x,y
  587. }
  588. { this optimization can applied only to the currently enabled operations because
  589. the other operations do not update all flags and FPC does not track flag usage }
  590. if MatchInstruction(p, [A_ADC,A_ADD,A_BIC,A_SUB,A_MUL,A_MVN,A_MOV,A_ORR,A_EOR,A_AND,
  591. A_RSB,A_RSC,A_SBC,A_MLA], [C_None], [PF_None]) and
  592. GetNextInstruction(p, hp1) and
  593. { mlas is only allowed in arm mode }
  594. ((taicpu(p).opcode<>A_MLA) or
  595. (current_settings.instructionset<>is_thumb)) and
  596. MatchInstruction(hp1, A_CMP, [C_None], [PF_None]) and
  597. (taicpu(hp1).oper[1]^.typ = top_const) and
  598. (taicpu(p).oper[0]^.reg = taicpu(hp1).oper[0]^.reg) and
  599. (taicpu(hp1).oper[1]^.val = 0) and
  600. GetNextInstruction(hp1, hp2) and
  601. { be careful here, following instructions could use other flags
  602. however after a jump fpc never depends on the value of flags }
  603. { All above instructions set Z and N according to the following
  604. Z := result = 0;
  605. N := result[31];
  606. EQ = Z=1; NE = Z=0;
  607. MI = N=1; PL = N=0; }
  608. (MatchInstruction(hp2, A_B, [C_EQ,C_NE,C_MI,C_PL], []) or
  609. { mov is also possible, but only if there is no shifter operand, it could be an rxx,
  610. we are too lazy to check if it is rxx or something else }
  611. (MatchInstruction(hp2, A_MOV, [C_EQ,C_NE,C_MI,C_PL], []) and (taicpu(hp2).ops=2))) and
  612. assigned(FindRegDealloc(NR_DEFAULTFLAGS,tai(hp2.Next))) then
  613. begin
  614. DebugMsg('Peephole OpCmp2OpS done', p);
  615. taicpu(p).oppostfix:=PF_S;
  616. { move flag allocation if possible }
  617. GetLastInstruction(hp1, hp2);
  618. hp2:=FindRegAlloc(NR_DEFAULTFLAGS,tai(hp2.Next));
  619. if assigned(hp2) then
  620. begin
  621. asml.Remove(hp2);
  622. asml.insertbefore(hp2, p);
  623. end;
  624. asml.remove(hp1);
  625. hp1.free;
  626. Result:=true;
  627. end
  628. else
  629. case taicpu(p).opcode of
  630. A_STR:
  631. begin
  632. { change
  633. str reg1,ref
  634. ldr reg2,ref
  635. into
  636. str reg1,ref
  637. mov reg2,reg1
  638. }
  639. if (taicpu(p).oper[1]^.typ = top_ref) and
  640. (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
  641. (taicpu(p).oppostfix=PF_None) and
  642. (taicpu(p).condition=C_None) and
  643. GetNextInstructionUsingRef(p,hp1,taicpu(p).oper[1]^.ref^) and
  644. MatchInstruction(hp1, A_LDR, [taicpu(p).condition], [PF_None]) and
  645. (taicpu(hp1).oper[1]^.typ=top_ref) and
  646. (taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) and
  647. not(RegModifiedBetween(taicpu(p).oper[0]^.reg, p, hp1)) and
  648. ((taicpu(hp1).oper[1]^.ref^.index=NR_NO) or not (RegModifiedBetween(taicpu(hp1).oper[1]^.ref^.index, p, hp1))) and
  649. ((taicpu(hp1).oper[1]^.ref^.base=NR_NO) or not (RegModifiedBetween(taicpu(hp1).oper[1]^.ref^.base, p, hp1))) then
  650. begin
  651. if taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg then
  652. begin
  653. DebugMsg('Peephole StrLdr2StrMov 1 done', hp1);
  654. asml.remove(hp1);
  655. hp1.free;
  656. end
  657. else
  658. begin
  659. taicpu(hp1).opcode:=A_MOV;
  660. taicpu(hp1).oppostfix:=PF_None;
  661. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  662. DebugMsg('Peephole StrLdr2StrMov 2 done', hp1);
  663. end;
  664. result := true;
  665. end
  666. { change
  667. str reg1,ref
  668. str reg2,ref
  669. into
  670. strd reg1,reg2,ref
  671. }
  672. else if (GenerateARMCode or GenerateThumb2Code) and
  673. (CPUARM_HAS_EDSP in cpu_capabilities[current_settings.cputype]) and
  674. (taicpu(p).oppostfix=PF_None) and
  675. (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
  676. GetNextInstruction(p,hp1) and
  677. MatchInstruction(hp1, A_STR, [taicpu(p).condition, C_None], [PF_None]) and
  678. not(odd(getsupreg(taicpu(p).oper[0]^.reg))) and
  679. (getsupreg(taicpu(p).oper[0]^.reg)+1=getsupreg(taicpu(hp1).oper[0]^.reg)) and
  680. { str ensures that either base or index contain no register, else ldr wouldn't
  681. use an offset either
  682. }
  683. (taicpu(p).oper[1]^.ref^.base=taicpu(hp1).oper[1]^.ref^.base) and
  684. (taicpu(p).oper[1]^.ref^.index=taicpu(hp1).oper[1]^.ref^.index) and
  685. (taicpu(p).oper[1]^.ref^.offset+4=taicpu(hp1).oper[1]^.ref^.offset) and
  686. (abs(taicpu(p).oper[1]^.ref^.offset)<256) and
  687. AlignedToQWord(taicpu(p).oper[1]^.ref^) then
  688. begin
  689. DebugMsg('Peephole StrStr2Strd done', p);
  690. taicpu(p).oppostfix:=PF_D;
  691. taicpu(p).loadref(2,taicpu(p).oper[1]^.ref^);
  692. taicpu(p).loadreg(1, taicpu(hp1).oper[0]^.reg);
  693. taicpu(p).ops:=3;
  694. asml.remove(hp1);
  695. hp1.free;
  696. result:=true;
  697. end;
  698. Result:=LookForPostindexedPattern(taicpu(p)) or Result;
  699. end;
  700. A_LDR:
  701. begin
  702. { change
  703. ldr reg1,ref
  704. ldr reg2,ref
  705. into ...
  706. }
  707. if (taicpu(p).oper[1]^.typ = top_ref) and
  708. (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
  709. GetNextInstruction(p,hp1) and
  710. { ldrd is not allowed here }
  711. MatchInstruction(hp1, A_LDR, [taicpu(p).condition, C_None], [taicpu(p).oppostfix,PF_None]-[PF_D]) then
  712. begin
  713. {
  714. ...
  715. ldr reg1,ref
  716. mov reg2,reg1
  717. }
  718. if (taicpu(p).oppostfix=taicpu(hp1).oppostfix) and
  719. RefsEqual(taicpu(p).oper[1]^.ref^,taicpu(hp1).oper[1]^.ref^) and
  720. (taicpu(p).oper[0]^.reg<>taicpu(hp1).oper[1]^.ref^.index) and
  721. (taicpu(p).oper[0]^.reg<>taicpu(hp1).oper[1]^.ref^.base) and
  722. (taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) then
  723. begin
  724. if taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg then
  725. begin
  726. DebugMsg('Peephole LdrLdr2Ldr done', hp1);
  727. asml.remove(hp1);
  728. hp1.free;
  729. end
  730. else
  731. begin
  732. DebugMsg('Peephole LdrLdr2LdrMov done', hp1);
  733. taicpu(hp1).opcode:=A_MOV;
  734. taicpu(hp1).oppostfix:=PF_None;
  735. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  736. end;
  737. result := true;
  738. end
  739. {
  740. ...
  741. ldrd reg1,reg1+1,ref
  742. }
  743. else if (GenerateARMCode or GenerateThumb2Code) and
  744. (CPUARM_HAS_EDSP in cpu_capabilities[current_settings.cputype]) and
  745. { ldrd does not allow any postfixes ... }
  746. (taicpu(p).oppostfix=PF_None) and
  747. not(odd(getsupreg(taicpu(p).oper[0]^.reg))) and
  748. (getsupreg(taicpu(p).oper[0]^.reg)+1=getsupreg(taicpu(hp1).oper[0]^.reg)) and
  749. { ldr ensures that either base or index contain no register, else ldr wouldn't
  750. use an offset either
  751. }
  752. (taicpu(p).oper[1]^.ref^.base=taicpu(hp1).oper[1]^.ref^.base) and
  753. (taicpu(p).oper[1]^.ref^.index=taicpu(hp1).oper[1]^.ref^.index) and
  754. (taicpu(p).oper[1]^.ref^.offset+4=taicpu(hp1).oper[1]^.ref^.offset) and
  755. (abs(taicpu(p).oper[1]^.ref^.offset)<256) and
  756. AlignedToQWord(taicpu(p).oper[1]^.ref^) then
  757. begin
  758. DebugMsg('Peephole LdrLdr2Ldrd done', p);
  759. taicpu(p).loadref(2,taicpu(p).oper[1]^.ref^);
  760. taicpu(p).loadreg(1, taicpu(hp1).oper[0]^.reg);
  761. taicpu(p).ops:=3;
  762. taicpu(p).oppostfix:=PF_D;
  763. asml.remove(hp1);
  764. hp1.free;
  765. result:=true;
  766. end;
  767. end;
  768. {
  769. Change
  770. ldrb dst1, [REF]
  771. and dst2, dst1, #255
  772. into
  773. ldrb dst2, [ref]
  774. }
  775. if not(GenerateThumbCode) and
  776. (taicpu(p).oppostfix=PF_B) and
  777. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  778. MatchInstruction(hp1, A_AND, [taicpu(p).condition], [PF_NONE]) and
  779. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[0]^.reg) and
  780. (taicpu(hp1).oper[2]^.typ = top_const) and
  781. (taicpu(hp1).oper[2]^.val = $FF) and
  782. not(RegUsedBetween(taicpu(hp1).oper[0]^.reg, p, hp1)) and
  783. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  784. begin
  785. DebugMsg('Peephole LdrbAnd2Ldrb done', p);
  786. taicpu(p).oper[0]^.reg := taicpu(hp1).oper[0]^.reg;
  787. asml.remove(hp1);
  788. hp1.free;
  789. result:=true;
  790. end;
  791. Result:=LookForPostindexedPattern(taicpu(p)) or Result;
  792. { Remove superfluous mov after ldr
  793. changes
  794. ldr reg1, ref
  795. mov reg2, reg1
  796. to
  797. ldr reg2, ref
  798. conditions are:
  799. * no ldrd usage
  800. * reg1 must be released after mov
  801. * mov can not contain shifterops
  802. * ldr+mov have the same conditions
  803. * mov does not set flags
  804. }
  805. if (taicpu(p).oppostfix<>PF_D) and
  806. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  807. RemoveSuperfluousMove(p, hp1, 'LdrMov2Ldr') then
  808. Result:=true;
  809. end;
  810. A_MOV:
  811. begin
  812. { fold
  813. mov reg1,reg0, shift imm1
  814. mov reg1,reg1, shift imm2
  815. }
  816. if (taicpu(p).ops=3) and
  817. (taicpu(p).oper[2]^.typ = top_shifterop) and
  818. (taicpu(p).oper[2]^.shifterop^.rs = NR_NO) and
  819. getnextinstruction(p,hp1) and
  820. MatchInstruction(hp1, A_MOV, [taicpu(p).condition], [PF_None]) and
  821. (taicpu(hp1).ops=3) and
  822. MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^.reg) and
  823. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  824. (taicpu(hp1).oper[2]^.typ = top_shifterop) and
  825. (taicpu(hp1).oper[2]^.shifterop^.rs = NR_NO) then
  826. begin
  827. { fold
  828. mov reg1,reg0, lsl 16
  829. mov reg1,reg1, lsr 16
  830. strh reg1, ...
  831. dealloc reg1
  832. to
  833. strh reg1, ...
  834. dealloc reg1
  835. }
  836. if (taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  837. (taicpu(p).oper[2]^.shifterop^.shiftimm=16) and
  838. (taicpu(hp1).oper[2]^.shifterop^.shiftmode in [SM_LSR,SM_ASR]) and
  839. (taicpu(hp1).oper[2]^.shifterop^.shiftimm=16) and
  840. getnextinstruction(hp1,hp2) and
  841. MatchInstruction(hp2, A_STR, [taicpu(p).condition], [PF_H]) and
  842. MatchOperand(taicpu(hp2).oper[0]^, taicpu(p).oper[0]^.reg) then
  843. begin
  844. CopyUsedRegs(TmpUsedRegs);
  845. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  846. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  847. if not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg,hp2,TmpUsedRegs)) then
  848. begin
  849. DebugMsg('Peephole optimizer removed superfluous 16 Bit zero extension', hp1);
  850. taicpu(hp2).loadreg(0,taicpu(p).oper[1]^.reg);
  851. asml.remove(p);
  852. asml.remove(hp1);
  853. p.free;
  854. hp1.free;
  855. p:=hp2;
  856. Result:=true;
  857. end;
  858. ReleaseUsedRegs(TmpUsedRegs);
  859. end
  860. { fold
  861. mov reg1,reg0, shift imm1
  862. mov reg1,reg1, shift imm2
  863. to
  864. mov reg1,reg0, shift imm1+imm2
  865. }
  866. else if (taicpu(p).oper[2]^.shifterop^.shiftmode=taicpu(hp1).oper[2]^.shifterop^.shiftmode) or
  867. { asr makes no use after a lsr, the asr can be foled into the lsr }
  868. ((taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSR) and (taicpu(hp1).oper[2]^.shifterop^.shiftmode=SM_ASR) ) then
  869. begin
  870. inc(taicpu(p).oper[2]^.shifterop^.shiftimm,taicpu(hp1).oper[2]^.shifterop^.shiftimm);
  871. { avoid overflows }
  872. if taicpu(p).oper[2]^.shifterop^.shiftimm>31 then
  873. case taicpu(p).oper[2]^.shifterop^.shiftmode of
  874. SM_ROR:
  875. taicpu(p).oper[2]^.shifterop^.shiftimm:=taicpu(p).oper[2]^.shifterop^.shiftimm and 31;
  876. SM_ASR:
  877. taicpu(p).oper[2]^.shifterop^.shiftimm:=31;
  878. SM_LSR,
  879. SM_LSL:
  880. begin
  881. hp2:=taicpu.op_reg_const(A_MOV,taicpu(p).oper[0]^.reg,0);
  882. InsertLLItem(p.previous, p.next, hp2);
  883. p.free;
  884. p:=hp2;
  885. end;
  886. else
  887. internalerror(2008072803);
  888. end;
  889. DebugMsg('Peephole ShiftShift2Shift 1 done', p);
  890. asml.remove(hp1);
  891. hp1.free;
  892. result := true;
  893. end
  894. { fold
  895. mov reg1,reg0, shift imm1
  896. mov reg1,reg1, shift imm2
  897. mov reg1,reg1, shift imm3 ...
  898. mov reg2,reg1, shift imm3 ...
  899. }
  900. else if GetNextInstructionUsingReg(hp1,hp2, taicpu(hp1).oper[0]^.reg) and
  901. MatchInstruction(hp2, A_MOV, [taicpu(p).condition], [PF_None]) and
  902. (taicpu(hp2).ops=3) and
  903. MatchOperand(taicpu(hp2).oper[1]^, taicpu(hp1).oper[0]^.reg) and
  904. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp2)) and
  905. (taicpu(hp2).oper[2]^.typ = top_shifterop) and
  906. (taicpu(hp2).oper[2]^.shifterop^.rs = NR_NO) then
  907. begin
  908. { mov reg1,reg0, lsl imm1
  909. mov reg1,reg1, lsr/asr imm2
  910. mov reg2,reg1, lsl imm3 ...
  911. to
  912. mov reg1,reg0, lsl imm1
  913. mov reg2,reg1, lsr/asr imm2-imm3
  914. if
  915. imm1>=imm2
  916. }
  917. if (taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSL) and (taicpu(hp2).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  918. (taicpu(hp1).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and
  919. (taicpu(p).oper[2]^.shifterop^.shiftimm>=taicpu(hp1).oper[2]^.shifterop^.shiftimm) then
  920. begin
  921. if (taicpu(hp2).oper[2]^.shifterop^.shiftimm>=taicpu(hp1).oper[2]^.shifterop^.shiftimm) then
  922. begin
  923. if not(RegUsedBetween(taicpu(hp2).oper[0]^.reg,p,hp1)) and
  924. not(RegUsedBetween(taicpu(hp2).oper[0]^.reg,hp1,hp2)) then
  925. begin
  926. DebugMsg('Peephole ShiftShiftShift2ShiftShift 1a done', p);
  927. inc(taicpu(p).oper[2]^.shifterop^.shiftimm,taicpu(hp2).oper[2]^.shifterop^.shiftimm-taicpu(hp1).oper[2]^.shifterop^.shiftimm);
  928. taicpu(p).oper[0]^.reg:=taicpu(hp2).oper[0]^.reg;
  929. asml.remove(hp1);
  930. asml.remove(hp2);
  931. hp1.free;
  932. hp2.free;
  933. if taicpu(p).oper[2]^.shifterop^.shiftimm>=32 then
  934. begin
  935. taicpu(p).freeop(1);
  936. taicpu(p).freeop(2);
  937. taicpu(p).loadconst(1,0);
  938. end;
  939. result := true;
  940. end;
  941. end
  942. else if not(RegUsedBetween(taicpu(hp2).oper[0]^.reg,hp1,hp2)) then
  943. begin
  944. DebugMsg('Peephole ShiftShiftShift2ShiftShift 1b done', p);
  945. dec(taicpu(hp1).oper[2]^.shifterop^.shiftimm,taicpu(hp2).oper[2]^.shifterop^.shiftimm);
  946. taicpu(hp1).oper[0]^.reg:=taicpu(hp2).oper[0]^.reg;
  947. asml.remove(hp2);
  948. hp2.free;
  949. result := true;
  950. end;
  951. end
  952. { mov reg1,reg0, lsr/asr imm1
  953. mov reg1,reg1, lsl imm2
  954. mov reg1,reg1, lsr/asr imm3 ...
  955. if imm3>=imm1 and imm2>=imm1
  956. to
  957. mov reg1,reg0, lsl imm2-imm1
  958. mov reg1,reg1, lsr/asr imm3 ...
  959. }
  960. else if (taicpu(p).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and (taicpu(hp2).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and
  961. (taicpu(hp1).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  962. (taicpu(hp2).oper[2]^.shifterop^.shiftimm>=taicpu(p).oper[2]^.shifterop^.shiftimm) and
  963. (taicpu(hp1).oper[2]^.shifterop^.shiftimm>=taicpu(p).oper[2]^.shifterop^.shiftimm) then
  964. begin
  965. dec(taicpu(hp1).oper[2]^.shifterop^.shiftimm,taicpu(p).oper[2]^.shifterop^.shiftimm);
  966. taicpu(hp1).oper[1]^.reg:=taicpu(p).oper[1]^.reg;
  967. DebugMsg('Peephole ShiftShiftShift2ShiftShift 2 done', p);
  968. asml.remove(p);
  969. p.free;
  970. p:=hp2;
  971. if taicpu(hp1).oper[2]^.shifterop^.shiftimm=0 then
  972. begin
  973. taicpu(hp2).oper[1]^.reg:=taicpu(hp1).oper[1]^.reg;
  974. asml.remove(hp1);
  975. hp1.free;
  976. p:=hp2;
  977. end;
  978. result := true;
  979. end;
  980. end;
  981. end;
  982. { Change the common
  983. mov r0, r0, lsr #xxx
  984. and r0, r0, #yyy/bic r0, r0, #xxx
  985. and remove the superfluous and/bic if possible
  986. This could be extended to handle more cases.
  987. }
  988. if (taicpu(p).ops=3) and
  989. (taicpu(p).oper[2]^.typ = top_shifterop) and
  990. (taicpu(p).oper[2]^.shifterop^.rs = NR_NO) and
  991. (taicpu(p).oper[2]^.shifterop^.shiftmode = SM_LSR) and
  992. GetNextInstructionUsingReg(p,hp1, taicpu(p).oper[0]^.reg) and
  993. (hp1.typ=ait_instruction) and
  994. (taicpu(hp1).ops>=1) and
  995. (taicpu(hp1).oper[0]^.typ=top_reg) and
  996. (not RegModifiedBetween(taicpu(hp1).oper[0]^.reg, p, hp1)) and
  997. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  998. begin
  999. if (taicpu(p).oper[2]^.shifterop^.shiftimm >= 24 ) and
  1000. MatchInstruction(hp1, A_AND, [taicpu(p).condition], [taicpu(p).oppostfix]) and
  1001. (taicpu(hp1).ops=3) and
  1002. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[1]^) and
  1003. (taicpu(hp1).oper[2]^.typ = top_const) and
  1004. { Check if the AND actually would only mask out bits being already zero because of the shift
  1005. }
  1006. ((($ffffffff shr taicpu(p).oper[2]^.shifterop^.shiftimm) and taicpu(hp1).oper[2]^.val) =
  1007. ($ffffffff shr taicpu(p).oper[2]^.shifterop^.shiftimm)) then
  1008. begin
  1009. DebugMsg('Peephole LsrAnd2Lsr done', hp1);
  1010. taicpu(p).oper[0]^.reg:=taicpu(hp1).oper[0]^.reg;
  1011. asml.remove(hp1);
  1012. hp1.free;
  1013. result:=true;
  1014. end
  1015. else if MatchInstruction(hp1, A_BIC, [taicpu(p).condition], [taicpu(p).oppostfix]) and
  1016. (taicpu(hp1).ops=3) and
  1017. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[1]^) and
  1018. (taicpu(hp1).oper[2]^.typ = top_const) and
  1019. { Check if the BIC actually would only mask out bits beeing already zero because of the shift }
  1020. (taicpu(hp1).oper[2]^.val<>0) and
  1021. (BsfDWord(taicpu(hp1).oper[2]^.val)>=32-taicpu(p).oper[2]^.shifterop^.shiftimm) then
  1022. begin
  1023. DebugMsg('Peephole LsrBic2Lsr done', hp1);
  1024. taicpu(p).oper[0]^.reg:=taicpu(hp1).oper[0]^.reg;
  1025. asml.remove(hp1);
  1026. hp1.free;
  1027. result:=true;
  1028. end;
  1029. end;
  1030. { Change
  1031. mov rx, ry, lsr/ror #xxx
  1032. uxtb/uxth rz,rx/and rz,rx,0xFF
  1033. dealloc rx
  1034. to
  1035. uxtb/uxth rz,ry,ror #xxx
  1036. }
  1037. if (taicpu(p).ops=3) and
  1038. (taicpu(p).oper[2]^.typ = top_shifterop) and
  1039. (taicpu(p).oper[2]^.shifterop^.rs = NR_NO) and
  1040. (taicpu(p).oper[2]^.shifterop^.shiftmode in [SM_LSR,SM_ROR]) and
  1041. (GenerateThumb2Code) and
  1042. GetNextInstructionUsingReg(p,hp1, taicpu(p).oper[0]^.reg) and
  1043. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  1044. begin
  1045. if MatchInstruction(hp1, A_UXTB, [C_None], [PF_None]) and
  1046. (taicpu(hp1).ops = 2) and
  1047. (taicpu(p).oper[2]^.shifterop^.shiftimm in [8,16,24]) and
  1048. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) then
  1049. begin
  1050. taicpu(hp1).oper[1]^.reg := taicpu(p).oper[1]^.reg;
  1051. taicpu(hp1).loadshifterop(2,taicpu(p).oper[2]^.shifterop^);
  1052. taicpu(hp1).oper[2]^.shifterop^.shiftmode:=SM_ROR;
  1053. taicpu(hp1).ops := 3;
  1054. GetNextInstruction(p,hp1);
  1055. asml.Remove(p);
  1056. p.Free;
  1057. p:=hp1;
  1058. result:=true;
  1059. exit;
  1060. end
  1061. else if MatchInstruction(hp1, A_UXTH, [C_None], [PF_None]) and
  1062. (taicpu(hp1).ops=2) and
  1063. (taicpu(p).oper[2]^.shifterop^.shiftimm in [16]) and
  1064. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) then
  1065. begin
  1066. taicpu(hp1).oper[1]^.reg := taicpu(p).oper[1]^.reg;
  1067. taicpu(hp1).loadshifterop(2,taicpu(p).oper[2]^.shifterop^);
  1068. taicpu(hp1).oper[2]^.shifterop^.shiftmode:=SM_ROR;
  1069. taicpu(hp1).ops := 3;
  1070. GetNextInstruction(p,hp1);
  1071. asml.Remove(p);
  1072. p.Free;
  1073. p:=hp1;
  1074. result:=true;
  1075. exit;
  1076. end
  1077. else if MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
  1078. (taicpu(hp1).ops = 3) and
  1079. (taicpu(hp1).oper[2]^.typ = top_const) and
  1080. (taicpu(hp1).oper[2]^.val = $FF) and
  1081. (taicpu(p).oper[2]^.shifterop^.shiftimm in [8,16,24]) and
  1082. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) then
  1083. begin
  1084. taicpu(hp1).ops := 3;
  1085. taicpu(hp1).opcode := A_UXTB;
  1086. taicpu(hp1).oper[1]^.reg := taicpu(p).oper[1]^.reg;
  1087. taicpu(hp1).loadshifterop(2,taicpu(p).oper[2]^.shifterop^);
  1088. taicpu(hp1).oper[2]^.shifterop^.shiftmode:=SM_ROR;
  1089. GetNextInstruction(p,hp1);
  1090. asml.Remove(p);
  1091. p.Free;
  1092. p:=hp1;
  1093. result:=true;
  1094. exit;
  1095. end;
  1096. end;
  1097. {
  1098. optimize
  1099. mov rX, yyyy
  1100. ....
  1101. }
  1102. if (taicpu(p).ops = 2) and
  1103. GetNextInstruction(p,hp1) and
  1104. (tai(hp1).typ = ait_instruction) then
  1105. begin
  1106. {
  1107. This changes the very common
  1108. mov r0, #0
  1109. str r0, [...]
  1110. mov r0, #0
  1111. str r0, [...]
  1112. and removes all superfluous mov instructions
  1113. }
  1114. if (taicpu(p).oper[1]^.typ = top_const) and
  1115. (taicpu(hp1).opcode=A_STR) then
  1116. while MatchInstruction(hp1, A_STR, [taicpu(p).condition], []) and
  1117. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) and
  1118. GetNextInstruction(hp1, hp2) and
  1119. MatchInstruction(hp2, A_MOV, [taicpu(p).condition], [PF_None]) and
  1120. (taicpu(hp2).ops = 2) and
  1121. MatchOperand(taicpu(hp2).oper[0]^, taicpu(p).oper[0]^) and
  1122. MatchOperand(taicpu(hp2).oper[1]^, taicpu(p).oper[1]^) do
  1123. begin
  1124. DebugMsg('Peephole MovStrMov done', hp2);
  1125. GetNextInstruction(hp2,hp1);
  1126. asml.remove(hp2);
  1127. hp2.free;
  1128. result:=true;
  1129. if not assigned(hp1) then break;
  1130. end
  1131. {
  1132. This removes the first mov from
  1133. mov rX,...
  1134. mov rX,...
  1135. }
  1136. else if taicpu(hp1).opcode=A_MOV then
  1137. while MatchInstruction(hp1, A_MOV, [taicpu(p).condition], [taicpu(p).oppostfix]) and
  1138. (taicpu(hp1).ops = 2) and
  1139. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) and
  1140. { don't remove the first mov if the second is a mov rX,rX }
  1141. not(MatchOperand(taicpu(hp1).oper[0]^, taicpu(hp1).oper[1]^)) do
  1142. begin
  1143. DebugMsg('Peephole MovMov done', p);
  1144. asml.remove(p);
  1145. p.free;
  1146. p:=hp1;
  1147. GetNextInstruction(hp1,hp1);
  1148. result:=true;
  1149. if not assigned(hp1) then
  1150. break;
  1151. end;
  1152. end;
  1153. {
  1154. change
  1155. mov r1, r0
  1156. add r1, r1, #1
  1157. to
  1158. add r1, r0, #1
  1159. Todo: Make it work for mov+cmp too
  1160. CAUTION! If this one is successful p might not be a mov instruction anymore!
  1161. }
  1162. if (taicpu(p).ops = 2) and
  1163. (taicpu(p).oper[1]^.typ = top_reg) and
  1164. (taicpu(p).oppostfix = PF_NONE) and
  1165. GetNextInstruction(p, hp1) and
  1166. MatchInstruction(hp1, [A_ADD, A_ADC, A_RSB, A_RSC, A_SUB, A_SBC,
  1167. A_AND, A_BIC, A_EOR, A_ORR, A_MOV, A_MVN],
  1168. [taicpu(p).condition], []) and
  1169. {MOV and MVN might only have 2 ops}
  1170. (taicpu(hp1).ops >= 2) and
  1171. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^.reg) and
  1172. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1173. (
  1174. (taicpu(hp1).ops = 2) or
  1175. (taicpu(hp1).oper[2]^.typ in [top_reg, top_const, top_shifterop])
  1176. ) then
  1177. begin
  1178. { When we get here we still don't know if the registers match}
  1179. for I:=1 to 2 do
  1180. {
  1181. If the first loop was successful p will be replaced with hp1.
  1182. The checks will still be ok, because all required information
  1183. will also be in hp1 then.
  1184. }
  1185. if (taicpu(hp1).ops > I) and
  1186. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[I]^.reg) and
  1187. { prevent certain combinations on thumb(2), this is only a safe approximation }
  1188. (not(GenerateThumbCode or GenerateThumb2Code) or
  1189. ((getsupreg(taicpu(p).oper[1]^.reg)<>RS_R13) and
  1190. (getsupreg(taicpu(p).oper[1]^.reg)<>RS_R15))
  1191. ) then
  1192. begin
  1193. DebugMsg('Peephole RedundantMovProcess done', hp1);
  1194. taicpu(hp1).oper[I]^.reg := taicpu(p).oper[1]^.reg;
  1195. if p<>hp1 then
  1196. begin
  1197. asml.remove(p);
  1198. p.free;
  1199. p:=hp1;
  1200. Result:=true;
  1201. end;
  1202. end;
  1203. end;
  1204. { Fold the very common sequence
  1205. mov regA, regB
  1206. ldr* regA, [regA]
  1207. to
  1208. ldr* regA, [regB]
  1209. CAUTION! If this one is successful p might not be a mov instruction anymore!
  1210. }
  1211. if (taicpu(p).opcode = A_MOV) and
  1212. (taicpu(p).ops = 2) and
  1213. (taicpu(p).oper[1]^.typ = top_reg) and
  1214. (taicpu(p).oppostfix = PF_NONE) and
  1215. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  1216. MatchInstruction(hp1, [A_LDR, A_STR], [taicpu(p).condition], []) and
  1217. (taicpu(hp1).oper[1]^.typ = top_ref) and
  1218. { We can change the base register only when the instruction uses AM_OFFSET }
  1219. ((taicpu(hp1).oper[1]^.ref^.index = taicpu(p).oper[0]^.reg) or
  1220. ((taicpu(hp1).oper[1]^.ref^.addressmode = AM_OFFSET) and
  1221. (taicpu(hp1).oper[1]^.ref^.base = taicpu(p).oper[0]^.reg))
  1222. ) and
  1223. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) and
  1224. // Make sure that Thumb code doesn't propagate a high register into a reference
  1225. ((GenerateThumbCode and
  1226. (getsupreg(taicpu(p).oper[1]^.reg) < RS_R8)) or
  1227. (not GenerateThumbCode)) and
  1228. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  1229. begin
  1230. DebugMsg('Peephole MovLdr2Ldr done', hp1);
  1231. if (taicpu(hp1).oper[1]^.ref^.addressmode = AM_OFFSET) and
  1232. (taicpu(hp1).oper[1]^.ref^.base = taicpu(p).oper[0]^.reg) then
  1233. taicpu(hp1).oper[1]^.ref^.base := taicpu(p).oper[1]^.reg;
  1234. if taicpu(hp1).oper[1]^.ref^.index = taicpu(p).oper[0]^.reg then
  1235. taicpu(hp1).oper[1]^.ref^.index := taicpu(p).oper[1]^.reg;
  1236. dealloc:=FindRegDeAlloc(taicpu(p).oper[1]^.reg, tai(p.Next));
  1237. if Assigned(dealloc) then
  1238. begin
  1239. asml.remove(dealloc);
  1240. asml.InsertAfter(dealloc,hp1);
  1241. end;
  1242. GetNextInstruction(p, hp1);
  1243. asml.remove(p);
  1244. p.free;
  1245. p:=hp1;
  1246. result:=true;
  1247. end;
  1248. { This folds shifterops into following instructions
  1249. mov r0, r1, lsl #8
  1250. add r2, r3, r0
  1251. to
  1252. add r2, r3, r1, lsl #8
  1253. CAUTION! If this one is successful p might not be a mov instruction anymore!
  1254. }
  1255. if (taicpu(p).opcode = A_MOV) and
  1256. (taicpu(p).ops = 3) and
  1257. (taicpu(p).oper[1]^.typ = top_reg) and
  1258. (taicpu(p).oper[2]^.typ = top_shifterop) and
  1259. (taicpu(p).oppostfix = PF_NONE) and
  1260. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  1261. MatchInstruction(hp1, [A_ADD, A_ADC, A_RSB, A_RSC, A_SUB, A_SBC,
  1262. A_AND, A_BIC, A_EOR, A_ORR, A_TEQ, A_TST,
  1263. A_CMP, A_CMN],
  1264. [taicpu(p).condition], [PF_None]) and
  1265. (not ((GenerateThumb2Code) and
  1266. (taicpu(hp1).opcode in [A_SBC]) and
  1267. (((taicpu(hp1).ops=3) and
  1268. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[1]^.reg)) or
  1269. ((taicpu(hp1).ops=2) and
  1270. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^.reg))))) and
  1271. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) and
  1272. (taicpu(hp1).ops >= 2) and
  1273. {Currently we can't fold into another shifterop}
  1274. (taicpu(hp1).oper[taicpu(hp1).ops-1]^.typ = top_reg) and
  1275. {Folding rrx is problematic because of the C-Flag, as we currently can't check
  1276. NR_DEFAULTFLAGS for modification}
  1277. (
  1278. {Everything is fine if we don't use RRX}
  1279. (taicpu(p).oper[2]^.shifterop^.shiftmode <> SM_RRX) or
  1280. (
  1281. {If it is RRX, then check if we're just accessing the next instruction}
  1282. GetNextInstruction(p, hp2) and
  1283. (hp1 = hp2)
  1284. )
  1285. ) and
  1286. { reg1 might not be modified inbetween }
  1287. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) and
  1288. { The shifterop can contain a register, might not be modified}
  1289. (
  1290. (taicpu(p).oper[2]^.shifterop^.rs = NR_NO) or
  1291. not(RegModifiedBetween(taicpu(p).oper[2]^.shifterop^.rs, p, hp1))
  1292. ) and
  1293. (
  1294. {Only ONE of the two src operands is allowed to match}
  1295. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[taicpu(hp1).ops-2]^) xor
  1296. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[taicpu(hp1).ops-1]^)
  1297. ) then
  1298. begin
  1299. if taicpu(hp1).opcode in [A_TST, A_TEQ, A_CMN] then
  1300. I2:=0
  1301. else
  1302. I2:=1;
  1303. for I:=I2 to taicpu(hp1).ops-1 do
  1304. if MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[I]^.reg) then
  1305. begin
  1306. { If the parameter matched on the second op from the RIGHT
  1307. we have to switch the parameters, this will not happen for CMP
  1308. were we're only evaluating the most right parameter
  1309. }
  1310. if I <> taicpu(hp1).ops-1 then
  1311. begin
  1312. {The SUB operators need to be changed when we swap parameters}
  1313. case taicpu(hp1).opcode of
  1314. A_SUB: tempop:=A_RSB;
  1315. A_SBC: tempop:=A_RSC;
  1316. A_RSB: tempop:=A_SUB;
  1317. A_RSC: tempop:=A_SBC;
  1318. else tempop:=taicpu(hp1).opcode;
  1319. end;
  1320. if taicpu(hp1).ops = 3 then
  1321. hp2:=taicpu.op_reg_reg_reg_shifterop(tempop,
  1322. taicpu(hp1).oper[0]^.reg, taicpu(hp1).oper[2]^.reg,
  1323. taicpu(p).oper[1]^.reg, taicpu(p).oper[2]^.shifterop^)
  1324. else
  1325. hp2:=taicpu.op_reg_reg_shifterop(tempop,
  1326. taicpu(hp1).oper[0]^.reg, taicpu(p).oper[1]^.reg,
  1327. taicpu(p).oper[2]^.shifterop^);
  1328. end
  1329. else
  1330. if taicpu(hp1).ops = 3 then
  1331. hp2:=taicpu.op_reg_reg_reg_shifterop(taicpu(hp1).opcode,
  1332. taicpu(hp1).oper[0]^.reg, taicpu(hp1).oper[1]^.reg,
  1333. taicpu(p).oper[1]^.reg, taicpu(p).oper[2]^.shifterop^)
  1334. else
  1335. hp2:=taicpu.op_reg_reg_shifterop(taicpu(hp1).opcode,
  1336. taicpu(hp1).oper[0]^.reg, taicpu(p).oper[1]^.reg,
  1337. taicpu(p).oper[2]^.shifterop^);
  1338. asml.insertbefore(hp2, hp1);
  1339. GetNextInstruction(p, hp2);
  1340. asml.remove(p);
  1341. asml.remove(hp1);
  1342. p.free;
  1343. hp1.free;
  1344. p:=hp2;
  1345. DebugMsg('Peephole FoldShiftProcess done', p);
  1346. Result:=true;
  1347. break;
  1348. end;
  1349. end;
  1350. {
  1351. Fold
  1352. mov r1, r1, lsl #2
  1353. ldr/ldrb r0, [r0, r1]
  1354. to
  1355. ldr/ldrb r0, [r0, r1, lsl #2]
  1356. XXX: This still needs some work, as we quite often encounter something like
  1357. mov r1, r2, lsl #2
  1358. add r2, r3, #imm
  1359. ldr r0, [r2, r1]
  1360. which can't be folded because r2 is overwritten between the shift and the ldr.
  1361. We could try to shuffle the registers around and fold it into.
  1362. add r1, r3, #imm
  1363. ldr r0, [r1, r2, lsl #2]
  1364. }
  1365. if (not(GenerateThumbCode)) and
  1366. (taicpu(p).opcode = A_MOV) and
  1367. (taicpu(p).ops = 3) and
  1368. (taicpu(p).oper[1]^.typ = top_reg) and
  1369. (taicpu(p).oper[2]^.typ = top_shifterop) and
  1370. { RRX is tough to handle, because it requires tracking the C-Flag,
  1371. it is also extremly unlikely to be emitted this way}
  1372. (taicpu(p).oper[2]^.shifterop^.shiftmode <> SM_RRX) and
  1373. (taicpu(p).oper[2]^.shifterop^.shiftimm <> 0) and
  1374. { thumb2 allows only lsl #0..#3 }
  1375. (not(GenerateThumb2Code) or
  1376. ((taicpu(p).oper[2]^.shifterop^.shiftimm in [0..3]) and
  1377. (taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSL)
  1378. )
  1379. ) and
  1380. (taicpu(p).oppostfix = PF_NONE) and
  1381. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  1382. {Only LDR, LDRB, STR, STRB can handle scaled register indexing}
  1383. (MatchInstruction(hp1, [A_LDR, A_STR], [taicpu(p).condition], [PF_None, PF_B]) or
  1384. (GenerateThumb2Code and
  1385. MatchInstruction(hp1, [A_LDR, A_STR], [taicpu(p).condition], [PF_None, PF_B, PF_SB, PF_H, PF_SH]))
  1386. ) and
  1387. (
  1388. {If this is address by offset, one of the two registers can be used}
  1389. ((taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) and
  1390. (
  1391. (taicpu(hp1).oper[1]^.ref^.index = taicpu(p).oper[0]^.reg) xor
  1392. (taicpu(hp1).oper[1]^.ref^.base = taicpu(p).oper[0]^.reg)
  1393. )
  1394. ) or
  1395. {For post and preindexed only the index register can be used}
  1396. ((taicpu(hp1).oper[1]^.ref^.addressmode in [AM_POSTINDEXED, AM_PREINDEXED]) and
  1397. (
  1398. (taicpu(hp1).oper[1]^.ref^.index = taicpu(p).oper[0]^.reg) and
  1399. (taicpu(hp1).oper[1]^.ref^.base <> taicpu(p).oper[0]^.reg)
  1400. ) and
  1401. (not GenerateThumb2Code)
  1402. )
  1403. ) and
  1404. { Only fold if both registers are used. Otherwise we are folding p with itself }
  1405. (taicpu(hp1).oper[1]^.ref^.index<>NR_NO) and
  1406. (taicpu(hp1).oper[1]^.ref^.base<>NR_NO) and
  1407. { Only fold if there isn't another shifterop already, and offset is zero. }
  1408. (taicpu(hp1).oper[1]^.ref^.offset = 0) and
  1409. (taicpu(hp1).oper[1]^.ref^.shiftmode = SM_None) and
  1410. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) and
  1411. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  1412. begin
  1413. { If the register we want to do the shift for resides in base, we need to swap that}
  1414. if (taicpu(hp1).oper[1]^.ref^.base = taicpu(p).oper[0]^.reg) then
  1415. taicpu(hp1).oper[1]^.ref^.base := taicpu(hp1).oper[1]^.ref^.index;
  1416. taicpu(hp1).oper[1]^.ref^.index := taicpu(p).oper[1]^.reg;
  1417. taicpu(hp1).oper[1]^.ref^.shiftmode := taicpu(p).oper[2]^.shifterop^.shiftmode;
  1418. taicpu(hp1).oper[1]^.ref^.shiftimm := taicpu(p).oper[2]^.shifterop^.shiftimm;
  1419. DebugMsg('Peephole FoldShiftLdrStr done', hp1);
  1420. GetNextInstruction(p, hp1);
  1421. asml.remove(p);
  1422. p.free;
  1423. p:=hp1;
  1424. Result:=true;
  1425. end;
  1426. {
  1427. Often we see shifts and then a superfluous mov to another register
  1428. In the future this might be handled in RedundantMovProcess when it uses RegisterTracking
  1429. }
  1430. if (taicpu(p).opcode = A_MOV) and
  1431. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  1432. RemoveSuperfluousMove(p, hp1, 'MovMov2Mov') then
  1433. Result:=true;
  1434. end;
  1435. A_ADD,
  1436. A_ADC,
  1437. A_RSB,
  1438. A_RSC,
  1439. A_SUB,
  1440. A_SBC,
  1441. A_AND,
  1442. A_BIC,
  1443. A_EOR,
  1444. A_ORR,
  1445. A_MLA,
  1446. A_MLS,
  1447. A_MUL:
  1448. begin
  1449. {
  1450. optimize
  1451. and reg2,reg1,const1
  1452. ...
  1453. }
  1454. if (taicpu(p).opcode = A_AND) and
  1455. (taicpu(p).ops>2) and
  1456. (taicpu(p).oper[1]^.typ = top_reg) and
  1457. (taicpu(p).oper[2]^.typ = top_const) then
  1458. begin
  1459. {
  1460. change
  1461. and reg2,reg1,const1
  1462. ...
  1463. and reg3,reg2,const2
  1464. to
  1465. and reg3,reg1,(const1 and const2)
  1466. }
  1467. if GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1468. MatchInstruction(hp1, A_AND, [taicpu(p).condition], [PF_None]) and
  1469. RegEndOfLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  1470. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1471. (taicpu(hp1).oper[2]^.typ = top_const) then
  1472. begin
  1473. if not(RegUsedBetween(taicpu(hp1).oper[0]^.reg,p,hp1)) then
  1474. begin
  1475. DebugMsg('Peephole AndAnd2And done', p);
  1476. taicpu(p).loadConst(2,taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val);
  1477. taicpu(p).oppostfix:=taicpu(hp1).oppostfix;
  1478. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  1479. asml.remove(hp1);
  1480. hp1.free;
  1481. Result:=true;
  1482. end
  1483. else if not(RegUsedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1484. begin
  1485. DebugMsg('Peephole AndAnd2And done', hp1);
  1486. taicpu(hp1).loadConst(2,taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val);
  1487. taicpu(hp1).oppostfix:=taicpu(p).oppostfix;
  1488. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  1489. GetNextInstruction(p, hp1);
  1490. asml.remove(p);
  1491. p.free;
  1492. p:=hp1;
  1493. Result:=true;
  1494. end;
  1495. end
  1496. {
  1497. change
  1498. and reg2,reg1,$xxxxxxFF
  1499. strb reg2,[...]
  1500. dealloc reg2
  1501. to
  1502. strb reg1,[...]
  1503. }
  1504. else if ((taicpu(p).oper[2]^.val and $FF) = $FF) and
  1505. MatchInstruction(p, A_AND, [C_None], [PF_None]) and
  1506. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1507. MatchInstruction(hp1, A_STR, [C_None], [PF_B]) and
  1508. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
  1509. { the reference in strb might not use reg2 }
  1510. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  1511. { reg1 might not be modified inbetween }
  1512. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1513. begin
  1514. DebugMsg('Peephole AndStrb2Strb done', p);
  1515. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  1516. GetNextInstruction(p, hp1);
  1517. asml.remove(p);
  1518. p.free;
  1519. p:=hp1;
  1520. result:=true;
  1521. end
  1522. {
  1523. change
  1524. and reg2,reg1,255
  1525. uxtb/uxth reg3,reg2
  1526. dealloc reg2
  1527. to
  1528. and reg3,reg1,x
  1529. }
  1530. else if (taicpu(p).oper[2]^.val = $FF) and
  1531. MatchInstruction(p, A_AND, [C_None], [PF_None]) and
  1532. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1533. MatchInstruction(hp1, [A_UXTB,A_UXTH], [C_None], [PF_None]) and
  1534. (taicpu(hp1).ops = 2) and
  1535. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  1536. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1537. { reg1 might not be modified inbetween }
  1538. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1539. begin
  1540. DebugMsg('Peephole AndUxt2And done', p);
  1541. taicpu(hp1).opcode:=A_AND;
  1542. taicpu(hp1).ops:=3;
  1543. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  1544. taicpu(hp1).loadconst(2,255);
  1545. GetNextInstruction(p,hp1);
  1546. asml.remove(p);
  1547. p.Free;
  1548. p:=hp1;
  1549. result:=true;
  1550. end
  1551. {
  1552. from
  1553. and reg1,reg0,2^n-1
  1554. mov reg2,reg1, lsl imm1
  1555. (mov reg3,reg2, lsr/asr imm1)
  1556. remove either the and or the lsl/xsr sequence if possible
  1557. }
  1558. else if cutils.ispowerof2(taicpu(p).oper[2]^.val+1,i) and
  1559. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1560. MatchInstruction(hp1, A_MOV, [taicpu(p).condition], [PF_None]) and
  1561. (taicpu(hp1).ops=3) and
  1562. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1563. (taicpu(hp1).oper[2]^.typ = top_shifterop) and
  1564. (taicpu(hp1).oper[2]^.shifterop^.rs = NR_NO) and
  1565. (taicpu(hp1).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  1566. RegEndOfLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) then
  1567. begin
  1568. {
  1569. and reg1,reg0,2^n-1
  1570. mov reg2,reg1, lsl imm1
  1571. mov reg3,reg2, lsr/asr imm1
  1572. =>
  1573. and reg1,reg0,2^n-1
  1574. if lsr and 2^n-1>=imm1 or asr and 2^n-1>imm1
  1575. }
  1576. if GetNextInstructionUsingReg(hp1,hp2,taicpu(p).oper[0]^.reg) and
  1577. MatchInstruction(hp2, A_MOV, [taicpu(p).condition], [PF_None]) and
  1578. (taicpu(hp2).ops=3) and
  1579. MatchOperand(taicpu(hp2).oper[1]^, taicpu(hp1).oper[0]^.reg) and
  1580. (taicpu(hp2).oper[2]^.typ = top_shifterop) and
  1581. (taicpu(hp2).oper[2]^.shifterop^.rs = NR_NO) and
  1582. (taicpu(hp2).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and
  1583. (taicpu(hp1).oper[2]^.shifterop^.shiftimm=taicpu(hp2).oper[2]^.shifterop^.shiftimm) and
  1584. RegEndOfLife(taicpu(hp1).oper[0]^.reg,taicpu(hp2)) and
  1585. ((i<32-taicpu(hp1).oper[2]^.shifterop^.shiftimm) or
  1586. ((i=32-taicpu(hp1).oper[2]^.shifterop^.shiftimm) and
  1587. (taicpu(hp2).oper[2]^.shifterop^.shiftmode=SM_LSR))) then
  1588. begin
  1589. DebugMsg('Peephole AndLslXsr2And done', p);
  1590. taicpu(p).oper[0]^.reg:=taicpu(hp2).oper[0]^.reg;
  1591. asml.Remove(hp1);
  1592. asml.Remove(hp2);
  1593. hp1.free;
  1594. hp2.free;
  1595. result:=true;
  1596. end
  1597. {
  1598. and reg1,reg0,2^n-1
  1599. mov reg2,reg1, lsl imm1
  1600. =>
  1601. mov reg2,reg0, lsl imm1
  1602. if imm1>i
  1603. }
  1604. else if (i>32-taicpu(hp1).oper[2]^.shifterop^.shiftimm) and
  1605. not(RegModifiedBetween(taicpu(p).oper[1]^.reg, p, hp1)) then
  1606. begin
  1607. DebugMsg('Peephole AndLsl2Lsl done', p);
  1608. taicpu(hp1).oper[1]^.reg:=taicpu(p).oper[1]^.reg;
  1609. GetNextInstruction(p, hp1);
  1610. asml.Remove(p);
  1611. p.free;
  1612. p:=hp1;
  1613. result:=true;
  1614. end
  1615. end;
  1616. end;
  1617. {
  1618. change
  1619. add/sub reg2,reg1,const1
  1620. str/ldr reg3,[reg2,const2]
  1621. dealloc reg2
  1622. to
  1623. str/ldr reg3,[reg1,const2+/-const1]
  1624. }
  1625. if (not GenerateThumbCode) and
  1626. (taicpu(p).opcode in [A_ADD,A_SUB]) and
  1627. (taicpu(p).ops>2) and
  1628. (taicpu(p).oper[1]^.typ = top_reg) and
  1629. (taicpu(p).oper[2]^.typ = top_const) then
  1630. begin
  1631. hp1:=p;
  1632. while GetNextInstructionUsingReg(hp1, hp1, taicpu(p).oper[0]^.reg) and
  1633. { we cannot check NR_DEFAULTFLAGS for modification yet so don't allow a condition }
  1634. MatchInstruction(hp1, [A_LDR, A_STR], [C_None], []) and
  1635. (taicpu(hp1).oper[1]^.typ = top_ref) and
  1636. (taicpu(hp1).oper[1]^.ref^.base=taicpu(p).oper[0]^.reg) and
  1637. { don't optimize if the register is stored/overwritten }
  1638. (taicpu(hp1).oper[0]^.reg<>taicpu(p).oper[1]^.reg) and
  1639. (taicpu(hp1).oper[1]^.ref^.index=NR_NO) and
  1640. (taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) and
  1641. { new offset must be valid: either in the range of 8 or 12 bit, depend on the
  1642. ldr postfix }
  1643. (((taicpu(p).opcode=A_ADD) and
  1644. isValidConstLoadStoreOffset(taicpu(hp1).oper[1]^.ref^.offset+taicpu(p).oper[2]^.val, taicpu(hp1).oppostfix)
  1645. ) or
  1646. ((taicpu(p).opcode=A_SUB) and
  1647. isValidConstLoadStoreOffset(taicpu(hp1).oper[1]^.ref^.offset-taicpu(p).oper[2]^.val, taicpu(hp1).oppostfix)
  1648. )
  1649. ) do
  1650. begin
  1651. { neither reg1 nor reg2 might be changed inbetween }
  1652. if RegModifiedBetween(taicpu(p).oper[0]^.reg,p,hp1) or
  1653. RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1) then
  1654. break;
  1655. { reg2 must be either overwritten by the ldr or it is deallocated afterwards }
  1656. if ((taicpu(hp1).opcode=A_LDR) and (taicpu(p).oper[0]^.reg=taicpu(hp1).oper[0]^.reg)) or
  1657. assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) then
  1658. begin
  1659. { remember last instruction }
  1660. hp2:=hp1;
  1661. DebugMsg('Peephole Add/SubLdr2Ldr done', p);
  1662. hp1:=p;
  1663. { fix all ldr/str }
  1664. while GetNextInstructionUsingReg(hp1, hp1, taicpu(p).oper[0]^.reg) do
  1665. begin
  1666. taicpu(hp1).oper[1]^.ref^.base:=taicpu(p).oper[1]^.reg;
  1667. if taicpu(p).opcode=A_ADD then
  1668. inc(taicpu(hp1).oper[1]^.ref^.offset,taicpu(p).oper[2]^.val)
  1669. else
  1670. dec(taicpu(hp1).oper[1]^.ref^.offset,taicpu(p).oper[2]^.val);
  1671. if hp1=hp2 then
  1672. break;
  1673. end;
  1674. GetNextInstruction(p,hp1);
  1675. asml.remove(p);
  1676. p.free;
  1677. p:=hp1;
  1678. result:=true;
  1679. break;
  1680. end;
  1681. end;
  1682. end;
  1683. {
  1684. change
  1685. add reg1, ...
  1686. mov reg2, reg1
  1687. to
  1688. add reg2, ...
  1689. }
  1690. if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  1691. (taicpu(p).ops>=3) and
  1692. RemoveSuperfluousMove(p, hp1, 'DataMov2Data') then
  1693. Result:=true;
  1694. if MatchInstruction(p, [A_ADD,A_SUB], [C_None], [PF_None]) and
  1695. LookForPreindexedPattern(taicpu(p)) then
  1696. begin
  1697. GetNextInstruction(p,hp1);
  1698. DebugMsg('Peephole Add/Sub to Preindexed done', p);
  1699. asml.remove(p);
  1700. p.free;
  1701. p:=hp1;
  1702. Result:=true;
  1703. end;
  1704. {
  1705. Turn
  1706. mul reg0, z,w
  1707. sub/add x, y, reg0
  1708. dealloc reg0
  1709. into
  1710. mls/mla x,z,w,y
  1711. }
  1712. if MatchInstruction(p, [A_MUL], [C_None], [PF_None]) and
  1713. (taicpu(p).ops=3) and
  1714. (taicpu(p).oper[0]^.typ = top_reg) and
  1715. (taicpu(p).oper[1]^.typ = top_reg) and
  1716. (taicpu(p).oper[2]^.typ = top_reg) and
  1717. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1718. MatchInstruction(hp1,[A_ADD,A_SUB],[C_None],[PF_None]) and
  1719. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p, hp1)) and
  1720. (not RegModifiedBetween(taicpu(p).oper[2]^.reg, p, hp1)) and
  1721. (((taicpu(hp1).opcode=A_ADD) and (current_settings.cputype>=cpu_armv4)) or
  1722. ((taicpu(hp1).opcode=A_SUB) and (current_settings.cputype in [cpu_armv6t2,cpu_armv7,cpu_armv7a,cpu_armv7r,cpu_armv7m,cpu_armv7em]))) and
  1723. // CPUs before ARMv6 don't recommend having the same Rd and Rm for MLA.
  1724. // TODO: A workaround would be to swap Rm and Rs
  1725. (not ((taicpu(hp1).opcode=A_ADD) and (current_settings.cputype<=cpu_armv6) and MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[1]^))) and
  1726. (((taicpu(hp1).ops=3) and
  1727. (taicpu(hp1).oper[2]^.typ=top_reg) and
  1728. ((MatchOperand(taicpu(hp1).oper[2]^, taicpu(p).oper[0]^.reg) and
  1729. (not RegModifiedBetween(taicpu(hp1).oper[1]^.reg, p, hp1))) or
  1730. ((MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1731. (taicpu(hp1).opcode=A_ADD) and
  1732. (not RegModifiedBetween(taicpu(hp1).oper[2]^.reg, p, hp1)))))) or
  1733. ((taicpu(hp1).ops=2) and
  1734. (taicpu(hp1).oper[1]^.typ=top_reg) and
  1735. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg))) and
  1736. (RegEndOfLife(taicpu(p).oper[0]^.reg,taicpu(hp1))) then
  1737. begin
  1738. if taicpu(hp1).opcode=A_ADD then
  1739. begin
  1740. taicpu(hp1).opcode:=A_MLA;
  1741. if taicpu(hp1).ops=3 then
  1742. begin
  1743. if MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^) then
  1744. oldreg:=taicpu(hp1).oper[2]^.reg
  1745. else
  1746. oldreg:=taicpu(hp1).oper[1]^.reg;
  1747. end
  1748. else
  1749. oldreg:=taicpu(hp1).oper[0]^.reg;
  1750. taicpu(hp1).loadreg(1,taicpu(p).oper[1]^.reg);
  1751. taicpu(hp1).loadreg(2,taicpu(p).oper[2]^.reg);
  1752. taicpu(hp1).loadreg(3,oldreg);
  1753. DebugMsg('MulAdd2MLA done', p);
  1754. taicpu(hp1).ops:=4;
  1755. asml.remove(p);
  1756. p.free;
  1757. p:=hp1;
  1758. end
  1759. else
  1760. begin
  1761. taicpu(hp1).opcode:=A_MLS;
  1762. taicpu(hp1).loadreg(3,taicpu(hp1).oper[1]^.reg);
  1763. if taicpu(hp1).ops=2 then
  1764. taicpu(hp1).loadreg(1,taicpu(hp1).oper[0]^.reg)
  1765. else
  1766. taicpu(hp1).loadreg(1,taicpu(p).oper[2]^.reg);
  1767. taicpu(hp1).loadreg(2,taicpu(p).oper[1]^.reg);
  1768. DebugMsg('MulSub2MLS done', p);
  1769. taicpu(hp1).ops:=4;
  1770. asml.remove(p);
  1771. p.free;
  1772. p:=hp1;
  1773. end;
  1774. result:=true;
  1775. end
  1776. end;
  1777. {$ifdef dummy}
  1778. A_MVN:
  1779. begin
  1780. {
  1781. change
  1782. mvn reg2,reg1
  1783. and reg3,reg4,reg2
  1784. dealloc reg2
  1785. to
  1786. bic reg3,reg4,reg1
  1787. }
  1788. if (taicpu(p).oper[1]^.typ = top_reg) and
  1789. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1790. MatchInstruction(hp1,A_AND,[],[]) and
  1791. (((taicpu(hp1).ops=3) and
  1792. (taicpu(hp1).oper[2]^.typ=top_reg) and
  1793. (MatchOperand(taicpu(hp1).oper[2]^, taicpu(p).oper[0]^.reg) or
  1794. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg))) or
  1795. ((taicpu(hp1).ops=2) and
  1796. (taicpu(hp1).oper[1]^.typ=top_reg) and
  1797. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg))) and
  1798. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
  1799. { reg1 might not be modified inbetween }
  1800. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1801. begin
  1802. DebugMsg('Peephole MvnAnd2Bic done', p);
  1803. taicpu(hp1).opcode:=A_BIC;
  1804. if taicpu(hp1).ops=3 then
  1805. begin
  1806. if MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) then
  1807. taicpu(hp1).loadReg(1,taicpu(hp1).oper[2]^.reg); // Swap operands
  1808. taicpu(hp1).loadReg(2,taicpu(p).oper[1]^.reg);
  1809. end
  1810. else
  1811. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  1812. GetNextInstruction(p, hp1);
  1813. asml.remove(p);
  1814. p.free;
  1815. p:=hp1;
  1816. end;
  1817. end;
  1818. {$endif dummy}
  1819. A_UXTB:
  1820. begin
  1821. {
  1822. change
  1823. uxtb reg2,reg1
  1824. strb reg2,[...]
  1825. dealloc reg2
  1826. to
  1827. strb reg1,[...]
  1828. }
  1829. if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
  1830. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1831. MatchInstruction(hp1, A_STR, [C_None], [PF_B]) and
  1832. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
  1833. { the reference in strb might not use reg2 }
  1834. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  1835. { reg1 might not be modified inbetween }
  1836. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1837. begin
  1838. DebugMsg('Peephole UxtbStrb2Strb done', p);
  1839. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  1840. GetNextInstruction(p,hp2);
  1841. asml.remove(p);
  1842. p.free;
  1843. p:=hp2;
  1844. result:=true;
  1845. end
  1846. {
  1847. change
  1848. uxtb reg2,reg1
  1849. uxth reg3,reg2
  1850. dealloc reg2
  1851. to
  1852. uxtb reg3,reg1
  1853. }
  1854. else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
  1855. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1856. MatchInstruction(hp1, A_UXTH, [C_None], [PF_None]) and
  1857. (taicpu(hp1).ops = 2) and
  1858. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1859. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  1860. { reg1 might not be modified inbetween }
  1861. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1862. begin
  1863. DebugMsg('Peephole UxtbUxth2Uxtb done', p);
  1864. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  1865. asml.remove(hp1);
  1866. hp1.free;
  1867. result:=true;
  1868. end
  1869. {
  1870. change
  1871. uxtb reg2,reg1
  1872. uxtb reg3,reg2
  1873. dealloc reg2
  1874. to
  1875. uxtb reg3,reg1
  1876. }
  1877. else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
  1878. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1879. MatchInstruction(hp1, A_UXTB, [C_None], [PF_None]) and
  1880. (taicpu(hp1).ops = 2) and
  1881. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1882. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  1883. { reg1 might not be modified inbetween }
  1884. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1885. begin
  1886. DebugMsg('Peephole UxtbUxtb2Uxtb done', p);
  1887. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  1888. asml.remove(hp1);
  1889. hp1.free;
  1890. result:=true;
  1891. end
  1892. {
  1893. change
  1894. uxtb reg2,reg1
  1895. and reg3,reg2,#0x*FF
  1896. dealloc reg2
  1897. to
  1898. uxtb reg3,reg1
  1899. }
  1900. else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
  1901. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1902. (taicpu(p).ops=2) and
  1903. MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
  1904. (taicpu(hp1).ops=3) and
  1905. (taicpu(hp1).oper[2]^.typ=top_const) and
  1906. ((taicpu(hp1).oper[2]^.val and $FF)=$FF) and
  1907. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1908. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  1909. { reg1 might not be modified inbetween }
  1910. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1911. begin
  1912. DebugMsg('Peephole UxtbAndImm2Uxtb done', p);
  1913. taicpu(hp1).opcode:=A_UXTB;
  1914. taicpu(hp1).ops:=2;
  1915. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  1916. GetNextInstruction(p,hp2);
  1917. asml.remove(p);
  1918. p.free;
  1919. p:=hp2;
  1920. result:=true;
  1921. end
  1922. else if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  1923. RemoveSuperfluousMove(p, hp1, 'UxtbMov2Data') then
  1924. Result:=true;
  1925. end;
  1926. A_UXTH:
  1927. begin
  1928. {
  1929. change
  1930. uxth reg2,reg1
  1931. strh reg2,[...]
  1932. dealloc reg2
  1933. to
  1934. strh reg1,[...]
  1935. }
  1936. if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
  1937. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1938. MatchInstruction(hp1, A_STR, [C_None], [PF_H]) and
  1939. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  1940. { the reference in strb might not use reg2 }
  1941. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  1942. { reg1 might not be modified inbetween }
  1943. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1944. begin
  1945. DebugMsg('Peephole UXTHStrh2Strh done', p);
  1946. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  1947. GetNextInstruction(p, hp1);
  1948. asml.remove(p);
  1949. p.free;
  1950. p:=hp1;
  1951. result:=true;
  1952. end
  1953. {
  1954. change
  1955. uxth reg2,reg1
  1956. uxth reg3,reg2
  1957. dealloc reg2
  1958. to
  1959. uxth reg3,reg1
  1960. }
  1961. else if MatchInstruction(p, A_UXTH, [C_None], [PF_None]) and
  1962. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1963. MatchInstruction(hp1, A_UXTH, [C_None], [PF_None]) and
  1964. (taicpu(hp1).ops=2) and
  1965. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1966. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  1967. { reg1 might not be modified inbetween }
  1968. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1969. begin
  1970. DebugMsg('Peephole UxthUxth2Uxth done', p);
  1971. taicpu(hp1).opcode:=A_UXTH;
  1972. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  1973. GetNextInstruction(p, hp1);
  1974. asml.remove(p);
  1975. p.free;
  1976. p:=hp1;
  1977. result:=true;
  1978. end
  1979. {
  1980. change
  1981. uxth reg2,reg1
  1982. and reg3,reg2,#65535
  1983. dealloc reg2
  1984. to
  1985. uxth reg3,reg1
  1986. }
  1987. else if MatchInstruction(p, A_UXTH, [C_None], [PF_None]) and
  1988. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1989. MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
  1990. (taicpu(hp1).ops=3) and
  1991. (taicpu(hp1).oper[2]^.typ=top_const) and
  1992. ((taicpu(hp1).oper[2]^.val and $FFFF)=$FFFF) and
  1993. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1994. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  1995. { reg1 might not be modified inbetween }
  1996. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1997. begin
  1998. DebugMsg('Peephole UxthAndImm2Uxth done', p);
  1999. taicpu(hp1).opcode:=A_UXTH;
  2000. taicpu(hp1).ops:=2;
  2001. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  2002. GetNextInstruction(p, hp1);
  2003. asml.remove(p);
  2004. p.free;
  2005. p:=hp1;
  2006. result:=true;
  2007. end
  2008. else if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  2009. RemoveSuperfluousMove(p, hp1, 'UxthMov2Data') then
  2010. Result:=true;
  2011. end;
  2012. A_CMP:
  2013. begin
  2014. {
  2015. change
  2016. cmp reg,const1
  2017. moveq reg,const1
  2018. movne reg,const2
  2019. to
  2020. cmp reg,const1
  2021. movne reg,const2
  2022. }
  2023. if (taicpu(p).oper[1]^.typ = top_const) and
  2024. GetNextInstruction(p, hp1) and
  2025. MatchInstruction(hp1, A_MOV, [C_EQ, C_NE], [PF_NONE]) and
  2026. (taicpu(hp1).oper[1]^.typ = top_const) and
  2027. GetNextInstruction(hp1, hp2) and
  2028. MatchInstruction(hp2, A_MOV, [C_EQ, C_NE], [PF_NONE]) and
  2029. (taicpu(hp1).oper[1]^.typ = top_const) then
  2030. begin
  2031. Result:=RemoveRedundantMove(p, hp1, asml) or Result;
  2032. Result:=RemoveRedundantMove(p, hp2, asml) or Result;
  2033. end;
  2034. end;
  2035. A_STM:
  2036. begin
  2037. {
  2038. change
  2039. stmfd r13!,[r14]
  2040. sub r13,r13,#4
  2041. bl abc
  2042. add r13,r13,#4
  2043. ldmfd r13!,[r15]
  2044. into
  2045. b abc
  2046. }
  2047. if not(ts_thumb_interworking in current_settings.targetswitches) and
  2048. MatchInstruction(p, A_STM, [C_None], [PF_FD]) and
  2049. GetNextInstruction(p, hp1) and
  2050. GetNextInstruction(hp1, hp2) and
  2051. SkipEntryExitMarker(hp2, hp2) and
  2052. GetNextInstruction(hp2, hp3) and
  2053. SkipEntryExitMarker(hp3, hp3) and
  2054. GetNextInstruction(hp3, hp4) and
  2055. (taicpu(p).oper[0]^.typ = top_ref) and
  2056. (taicpu(p).oper[0]^.ref^.index=NR_STACK_POINTER_REG) and
  2057. (taicpu(p).oper[0]^.ref^.base=NR_NO) and
  2058. (taicpu(p).oper[0]^.ref^.offset=0) and
  2059. (taicpu(p).oper[0]^.ref^.addressmode=AM_PREINDEXED) and
  2060. (taicpu(p).oper[1]^.typ = top_regset) and
  2061. (taicpu(p).oper[1]^.regset^ = [RS_R14]) and
  2062. MatchInstruction(hp1, A_SUB, [C_None], [PF_NONE]) and
  2063. (taicpu(hp1).oper[0]^.typ = top_reg) and
  2064. (taicpu(hp1).oper[0]^.reg = NR_STACK_POINTER_REG) and
  2065. MatchOperand(taicpu(hp1).oper[0]^,taicpu(hp1).oper[1]^) and
  2066. (taicpu(hp1).oper[2]^.typ = top_const) and
  2067. MatchInstruction(hp3, A_ADD, [C_None], [PF_NONE]) and
  2068. MatchOperand(taicpu(hp1).oper[0]^,taicpu(hp3).oper[0]^) and
  2069. MatchOperand(taicpu(hp1).oper[0]^,taicpu(hp3).oper[1]^) and
  2070. MatchOperand(taicpu(hp1).oper[2]^,taicpu(hp3).oper[2]^) and
  2071. MatchInstruction(hp2, [A_BL,A_BLX], [C_None], [PF_NONE]) and
  2072. (taicpu(hp2).oper[0]^.typ = top_ref) and
  2073. MatchInstruction(hp4, A_LDM, [C_None], [PF_FD]) and
  2074. MatchOperand(taicpu(p).oper[0]^,taicpu(hp4).oper[0]^) and
  2075. (taicpu(hp4).oper[1]^.typ = top_regset) and
  2076. (taicpu(hp4).oper[1]^.regset^ = [RS_R15]) then
  2077. begin
  2078. asml.Remove(p);
  2079. asml.Remove(hp1);
  2080. asml.Remove(hp3);
  2081. asml.Remove(hp4);
  2082. taicpu(hp2).opcode:=A_B;
  2083. p.free;
  2084. hp1.free;
  2085. hp3.free;
  2086. hp4.free;
  2087. p:=hp2;
  2088. DebugMsg('Peephole Bl2B done', p);
  2089. end;
  2090. end;
  2091. A_VADD,
  2092. A_VMUL,
  2093. A_VDIV,
  2094. A_VSUB,
  2095. A_VSQRT,
  2096. A_VNEG,
  2097. A_VCVT,
  2098. A_VABS:
  2099. begin
  2100. if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  2101. RemoveSuperfluousVMov(p, hp1, 'VOpVMov2VOp') then
  2102. Result:=true;
  2103. end
  2104. end;
  2105. end;
  2106. end;
  2107. end;
  2108. { instructions modifying the CPSR can be only the last instruction }
  2109. function MustBeLast(p : tai) : boolean;
  2110. begin
  2111. Result:=(p.typ=ait_instruction) and
  2112. ((taicpu(p).opcode in [A_BL,A_BLX,A_CMP,A_CMN,A_SWI,A_TEQ,A_TST,A_CMF,A_CMFE {,A_MSR}]) or
  2113. ((taicpu(p).ops>=1) and (taicpu(p).oper[0]^.typ=top_reg) and (taicpu(p).oper[0]^.reg=NR_PC)) or
  2114. (taicpu(p).oppostfix=PF_S));
  2115. end;
  2116. procedure TCpuAsmOptimizer.PeepHoleOptPass2;
  2117. var
  2118. p,hp1,hp2: tai;
  2119. l : longint;
  2120. condition : tasmcond;
  2121. hp3: tai;
  2122. WasLast: boolean;
  2123. { UsedRegs, TmpUsedRegs: TRegSet; }
  2124. begin
  2125. p := BlockStart;
  2126. { UsedRegs := []; }
  2127. while (p <> BlockEnd) Do
  2128. begin
  2129. { UpdateUsedRegs(UsedRegs, tai(p.next)); }
  2130. case p.Typ Of
  2131. Ait_Instruction:
  2132. begin
  2133. case taicpu(p).opcode Of
  2134. A_B:
  2135. if (taicpu(p).condition<>C_None) and
  2136. not(GenerateThumbCode) then
  2137. begin
  2138. { check for
  2139. Bxx xxx
  2140. <several instructions>
  2141. xxx:
  2142. }
  2143. l:=0;
  2144. WasLast:=False;
  2145. GetNextInstruction(p, hp1);
  2146. while assigned(hp1) and
  2147. (l<=4) and
  2148. CanBeCond(hp1) and
  2149. { stop on labels }
  2150. not(hp1.typ=ait_label) do
  2151. begin
  2152. inc(l);
  2153. if MustBeLast(hp1) then
  2154. begin
  2155. WasLast:=True;
  2156. GetNextInstruction(hp1,hp1);
  2157. break;
  2158. end
  2159. else
  2160. GetNextInstruction(hp1,hp1);
  2161. end;
  2162. if assigned(hp1) then
  2163. begin
  2164. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  2165. begin
  2166. if (l<=4) and (l>0) then
  2167. begin
  2168. condition:=inverse_cond(taicpu(p).condition);
  2169. hp2:=p;
  2170. GetNextInstruction(p,hp1);
  2171. p:=hp1;
  2172. repeat
  2173. if hp1.typ=ait_instruction then
  2174. taicpu(hp1).condition:=condition;
  2175. if MustBeLast(hp1) then
  2176. begin
  2177. GetNextInstruction(hp1,hp1);
  2178. break;
  2179. end
  2180. else
  2181. GetNextInstruction(hp1,hp1);
  2182. until not(assigned(hp1)) or
  2183. not(CanBeCond(hp1)) or
  2184. (hp1.typ=ait_label);
  2185. { wait with removing else GetNextInstruction could
  2186. ignore the label if it was the only usage in the
  2187. jump moved away }
  2188. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  2189. asml.remove(hp2);
  2190. hp2.free;
  2191. continue;
  2192. end;
  2193. end
  2194. else
  2195. { do not perform further optimizations if there is inctructon
  2196. in block #1 which can not be optimized.
  2197. }
  2198. if not WasLast then
  2199. begin
  2200. { check further for
  2201. Bcc xxx
  2202. <several instructions 1>
  2203. B yyy
  2204. xxx:
  2205. <several instructions 2>
  2206. yyy:
  2207. }
  2208. { hp2 points to jmp yyy }
  2209. hp2:=hp1;
  2210. { skip hp1 to xxx }
  2211. GetNextInstruction(hp1, hp1);
  2212. if assigned(hp2) and
  2213. assigned(hp1) and
  2214. (l<=3) and
  2215. (hp2.typ=ait_instruction) and
  2216. (taicpu(hp2).is_jmp) and
  2217. (taicpu(hp2).condition=C_None) and
  2218. { real label and jump, no further references to the
  2219. label are allowed }
  2220. (tasmlabel(taicpu(p).oper[0]^.ref^.symbol).getrefs=2) and
  2221. FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  2222. begin
  2223. l:=0;
  2224. { skip hp1 to <several moves 2> }
  2225. GetNextInstruction(hp1, hp1);
  2226. while assigned(hp1) and
  2227. CanBeCond(hp1) do
  2228. begin
  2229. inc(l);
  2230. GetNextInstruction(hp1, hp1);
  2231. end;
  2232. { hp1 points to yyy: }
  2233. if assigned(hp1) and
  2234. FindLabel(tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol),hp1) then
  2235. begin
  2236. condition:=inverse_cond(taicpu(p).condition);
  2237. GetNextInstruction(p,hp1);
  2238. hp3:=p;
  2239. p:=hp1;
  2240. repeat
  2241. if hp1.typ=ait_instruction then
  2242. taicpu(hp1).condition:=condition;
  2243. GetNextInstruction(hp1,hp1);
  2244. until not(assigned(hp1)) or
  2245. not(CanBeCond(hp1));
  2246. { hp2 is still at jmp yyy }
  2247. GetNextInstruction(hp2,hp1);
  2248. { hp2 is now at xxx: }
  2249. condition:=inverse_cond(condition);
  2250. GetNextInstruction(hp1,hp1);
  2251. { hp1 is now at <several movs 2> }
  2252. repeat
  2253. taicpu(hp1).condition:=condition;
  2254. GetNextInstruction(hp1,hp1);
  2255. until not(assigned(hp1)) or
  2256. not(CanBeCond(hp1)) or
  2257. (hp1.typ=ait_label);
  2258. {
  2259. asml.remove(hp1.next)
  2260. hp1.next.free;
  2261. asml.remove(hp1);
  2262. hp1.free;
  2263. }
  2264. { remove Bcc }
  2265. tasmlabel(taicpu(hp3).oper[0]^.ref^.symbol).decrefs;
  2266. asml.remove(hp3);
  2267. hp3.free;
  2268. { remove jmp }
  2269. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  2270. asml.remove(hp2);
  2271. hp2.free;
  2272. continue;
  2273. end;
  2274. end;
  2275. end;
  2276. end;
  2277. end;
  2278. end;
  2279. end;
  2280. end;
  2281. p := tai(p.next)
  2282. end;
  2283. end;
  2284. function TCpuAsmOptimizer.RegInInstruction(Reg: TRegister; p1: tai): Boolean;
  2285. begin
  2286. If (p1.typ = ait_instruction) and (taicpu(p1).opcode=A_BL) then
  2287. Result:=true
  2288. else If MatchInstruction(p1, [A_LDR, A_STR], [], [PF_D]) and
  2289. (getsupreg(taicpu(p1).oper[0]^.reg)+1=getsupreg(reg)) then
  2290. Result:=true
  2291. else
  2292. Result:=inherited RegInInstruction(Reg, p1);
  2293. end;
  2294. const
  2295. { set of opcode which might or do write to memory }
  2296. { TODO : extend armins.dat to contain r/w info }
  2297. opcode_could_mem_write = [A_B,A_BL,A_BLX,A_BKPT,A_BX,A_STR,A_STRB,A_STRBT,
  2298. A_STRH,A_STRT,A_STF,A_SFM,A_STM,A_FSTS,A_FSTD,A_VSTR,A_VSTM];
  2299. { adjust the register live information when swapping the two instructions p and hp1,
  2300. they must follow one after the other }
  2301. procedure TCpuPreRegallocScheduler.SwapRegLive(p,hp1 : taicpu);
  2302. procedure CheckLiveEnd(reg : tregister);
  2303. var
  2304. supreg : TSuperRegister;
  2305. regtype : TRegisterType;
  2306. begin
  2307. if reg=NR_NO then
  2308. exit;
  2309. regtype:=getregtype(reg);
  2310. supreg:=getsupreg(reg);
  2311. if (cg.rg[regtype].live_end[supreg]=hp1) and
  2312. RegInInstruction(reg,p) then
  2313. cg.rg[regtype].live_end[supreg]:=p;
  2314. end;
  2315. procedure CheckLiveStart(reg : TRegister);
  2316. var
  2317. supreg : TSuperRegister;
  2318. regtype : TRegisterType;
  2319. begin
  2320. if reg=NR_NO then
  2321. exit;
  2322. regtype:=getregtype(reg);
  2323. supreg:=getsupreg(reg);
  2324. if (cg.rg[regtype].live_start[supreg]=p) and
  2325. RegInInstruction(reg,hp1) then
  2326. cg.rg[regtype].live_start[supreg]:=hp1;
  2327. end;
  2328. var
  2329. i : longint;
  2330. r : TSuperRegister;
  2331. begin
  2332. { assumption: p is directly followed by hp1 }
  2333. { if live of any reg used by p starts at p and hp1 uses this register then
  2334. set live start to hp1 }
  2335. for i:=0 to p.ops-1 do
  2336. case p.oper[i]^.typ of
  2337. Top_Reg:
  2338. CheckLiveStart(p.oper[i]^.reg);
  2339. Top_Ref:
  2340. begin
  2341. CheckLiveStart(p.oper[i]^.ref^.base);
  2342. CheckLiveStart(p.oper[i]^.ref^.index);
  2343. end;
  2344. Top_Shifterop:
  2345. CheckLiveStart(p.oper[i]^.shifterop^.rs);
  2346. Top_RegSet:
  2347. for r:=RS_R0 to RS_R15 do
  2348. if r in p.oper[i]^.regset^ then
  2349. CheckLiveStart(newreg(R_INTREGISTER,r,R_SUBWHOLE));
  2350. end;
  2351. { if live of any reg used by hp1 ends at hp1 and p uses this register then
  2352. set live end to p }
  2353. for i:=0 to hp1.ops-1 do
  2354. case hp1.oper[i]^.typ of
  2355. Top_Reg:
  2356. CheckLiveEnd(hp1.oper[i]^.reg);
  2357. Top_Ref:
  2358. begin
  2359. CheckLiveEnd(hp1.oper[i]^.ref^.base);
  2360. CheckLiveEnd(hp1.oper[i]^.ref^.index);
  2361. end;
  2362. Top_Shifterop:
  2363. CheckLiveStart(hp1.oper[i]^.shifterop^.rs);
  2364. Top_RegSet:
  2365. for r:=RS_R0 to RS_R15 do
  2366. if r in hp1.oper[i]^.regset^ then
  2367. CheckLiveEnd(newreg(R_INTREGISTER,r,R_SUBWHOLE));
  2368. end;
  2369. end;
  2370. function TCpuPreRegallocScheduler.SchedulerPass1Cpu(var p: tai): boolean;
  2371. { TODO : schedule also forward }
  2372. { TODO : schedule distance > 1 }
  2373. { returns true if p might be a load of a pc relative tls offset }
  2374. function PossibleTLSLoad(const p: tai) : boolean;
  2375. begin
  2376. Result:=(p.typ=ait_instruction) and (taicpu(p).opcode=A_LDR) and (taicpu(p).oper[1]^.typ=top_ref) and (((taicpu(p).oper[1]^.ref^.base=NR_PC) and
  2377. (taicpu(p).oper[1]^.ref^.index<>NR_NO)) or ((taicpu(p).oper[1]^.ref^.base<>NR_NO) and
  2378. (taicpu(p).oper[1]^.ref^.index=NR_PC)));
  2379. end;
  2380. var
  2381. hp1,hp2,hp3,hp4,hp5,insertpos : tai;
  2382. list : TAsmList;
  2383. begin
  2384. result:=true;
  2385. list:=TAsmList.create;
  2386. p:=BlockStart;
  2387. while p<>BlockEnd Do
  2388. begin
  2389. if (p.typ=ait_instruction) and
  2390. GetNextInstruction(p,hp1) and
  2391. (hp1.typ=ait_instruction) and
  2392. (taicpu(hp1).opcode in [A_LDR,A_LDRB,A_LDRH,A_LDRSB,A_LDRSH]) and
  2393. (taicpu(hp1).oppostfix in [PF_NONE, PF_B, PF_H, PF_SB, PF_SH]) and
  2394. { for now we don't reschedule if the previous instruction changes potentially a memory location }
  2395. ( (not(taicpu(p).opcode in opcode_could_mem_write) and
  2396. not(RegModifiedByInstruction(NR_PC,p))
  2397. ) or
  2398. ((taicpu(p).opcode in [A_STM,A_STRB,A_STRH,A_STR]) and
  2399. ((taicpu(hp1).oper[1]^.ref^.base=NR_PC) or
  2400. (assigned(taicpu(hp1).oper[1]^.ref^.symboldata) and
  2401. (taicpu(hp1).oper[1]^.ref^.offset=0)
  2402. )
  2403. ) or
  2404. { try to prove that the memory accesses don't overlapp }
  2405. ((taicpu(p).opcode in [A_STRB,A_STRH,A_STR]) and
  2406. (taicpu(p).oper[1]^.typ = top_ref) and
  2407. (taicpu(p).oper[1]^.ref^.base=taicpu(hp1).oper[1]^.ref^.base) and
  2408. (taicpu(p).oppostfix=PF_None) and
  2409. (taicpu(hp1).oppostfix=PF_None) and
  2410. (taicpu(p).oper[1]^.ref^.index=NR_NO) and
  2411. (taicpu(hp1).oper[1]^.ref^.index=NR_NO) and
  2412. { get operand sizes and check if the offset distance is large enough to ensure no overlapp }
  2413. (abs(taicpu(p).oper[1]^.ref^.offset-taicpu(hp1).oper[1]^.ref^.offset)>=max(tcgsize2size[reg_cgsize(taicpu(p).oper[0]^.reg)],tcgsize2size[reg_cgsize(taicpu(hp1).oper[0]^.reg)]))
  2414. )
  2415. )
  2416. ) and
  2417. GetNextInstruction(hp1,hp2) and
  2418. (hp2.typ=ait_instruction) and
  2419. { loaded register used by next instruction? }
  2420. (RegInInstruction(taicpu(hp1).oper[0]^.reg,hp2)) and
  2421. { loaded register not used by previous instruction? }
  2422. not(RegInInstruction(taicpu(hp1).oper[0]^.reg,p)) and
  2423. { same condition? }
  2424. (taicpu(p).condition=taicpu(hp1).condition) and
  2425. { first instruction might not change the register used as base }
  2426. ((taicpu(hp1).oper[1]^.ref^.base=NR_NO) or
  2427. not(RegModifiedByInstruction(taicpu(hp1).oper[1]^.ref^.base,p))
  2428. ) and
  2429. { first instruction might not change the register used as index }
  2430. ((taicpu(hp1).oper[1]^.ref^.index=NR_NO) or
  2431. not(RegModifiedByInstruction(taicpu(hp1).oper[1]^.ref^.index,p))
  2432. ) and
  2433. { if we modify the basereg AND the first instruction used that reg, we can not schedule }
  2434. ((taicpu(hp1).oper[1]^.ref^.addressmode = AM_OFFSET) or
  2435. not(instructionLoadsFromReg(taicpu(hp1).oper[1]^.ref^.base,p))) and
  2436. not(PossibleTLSLoad(p)) and
  2437. not(PossibleTLSLoad(hp1)) then
  2438. begin
  2439. hp3:=tai(p.Previous);
  2440. hp5:=tai(p.next);
  2441. asml.Remove(p);
  2442. { if there is a reg. alloc/dealloc/sync instructions or address labels (e.g. for GOT-less PIC)
  2443. associated with p, move it together with p }
  2444. { before the instruction? }
  2445. { find reg allocs,deallocs and PIC labels }
  2446. while assigned(hp3) and (hp3.typ<>ait_instruction) do
  2447. begin
  2448. if ( (hp3.typ=ait_regalloc) and (tai_regalloc(hp3).ratype in [ra_alloc, ra_dealloc]) and
  2449. RegInInstruction(tai_regalloc(hp3).reg,p) )
  2450. or ( (hp3.typ=ait_label) and (tai_label(hp3).labsym.typ=AT_ADDR) )
  2451. then
  2452. begin
  2453. hp4:=hp3;
  2454. hp3:=tai(hp3.Previous);
  2455. asml.Remove(hp4);
  2456. list.Insert(hp4);
  2457. end
  2458. else
  2459. hp3:=tai(hp3.Previous);
  2460. end;
  2461. list.Concat(p);
  2462. SwapRegLive(taicpu(p),taicpu(hp1));
  2463. { after the instruction? }
  2464. { find reg deallocs and reg syncs }
  2465. while assigned(hp5) and (hp5.typ<>ait_instruction) do
  2466. begin
  2467. if (hp5.typ=ait_regalloc) and (tai_regalloc(hp5).ratype in [ra_dealloc, ra_sync]) and
  2468. RegInInstruction(tai_regalloc(hp5).reg,p) then
  2469. begin
  2470. hp4:=hp5;
  2471. hp5:=tai(hp5.next);
  2472. asml.Remove(hp4);
  2473. list.Concat(hp4);
  2474. end
  2475. else
  2476. hp5:=tai(hp5.Next);
  2477. end;
  2478. asml.Remove(hp1);
  2479. { if there are address labels associated with hp2, those must
  2480. stay with hp2 (e.g. for GOT-less PIC) }
  2481. insertpos:=hp2;
  2482. while assigned(hp2.previous) and
  2483. (tai(hp2.previous).typ<>ait_instruction) do
  2484. begin
  2485. hp2:=tai(hp2.previous);
  2486. if (hp2.typ=ait_label) and
  2487. (tai_label(hp2).labsym.typ=AT_ADDR) then
  2488. insertpos:=hp2;
  2489. end;
  2490. {$ifdef DEBUG_PREREGSCHEDULER}
  2491. asml.insertbefore(tai_comment.Create(strpnew('Rescheduled')),insertpos);
  2492. {$endif DEBUG_PREREGSCHEDULER}
  2493. asml.InsertBefore(hp1,insertpos);
  2494. asml.InsertListBefore(insertpos,list);
  2495. p:=tai(p.next);
  2496. end
  2497. else if p.typ=ait_instruction then
  2498. p:=hp1
  2499. else
  2500. p:=tai(p.next);
  2501. end;
  2502. list.Free;
  2503. end;
  2504. procedure DecrementPreceedingIT(list: TAsmList; p: tai);
  2505. var
  2506. hp : tai;
  2507. l : longint;
  2508. begin
  2509. hp := tai(p.Previous);
  2510. l := 1;
  2511. while assigned(hp) and
  2512. (l <= 4) do
  2513. begin
  2514. if hp.typ=ait_instruction then
  2515. begin
  2516. if (taicpu(hp).opcode>=A_IT) and
  2517. (taicpu(hp).opcode <= A_ITTTT) then
  2518. begin
  2519. if (taicpu(hp).opcode = A_IT) and
  2520. (l=1) then
  2521. list.Remove(hp)
  2522. else
  2523. case taicpu(hp).opcode of
  2524. A_ITE:
  2525. if l=2 then taicpu(hp).opcode := A_IT;
  2526. A_ITT:
  2527. if l=2 then taicpu(hp).opcode := A_IT;
  2528. A_ITEE:
  2529. if l=3 then taicpu(hp).opcode := A_ITE;
  2530. A_ITTE:
  2531. if l=3 then taicpu(hp).opcode := A_ITT;
  2532. A_ITET:
  2533. if l=3 then taicpu(hp).opcode := A_ITE;
  2534. A_ITTT:
  2535. if l=3 then taicpu(hp).opcode := A_ITT;
  2536. A_ITEEE:
  2537. if l=4 then taicpu(hp).opcode := A_ITEE;
  2538. A_ITTEE:
  2539. if l=4 then taicpu(hp).opcode := A_ITTE;
  2540. A_ITETE:
  2541. if l=4 then taicpu(hp).opcode := A_ITET;
  2542. A_ITTTE:
  2543. if l=4 then taicpu(hp).opcode := A_ITTT;
  2544. A_ITEET:
  2545. if l=4 then taicpu(hp).opcode := A_ITEE;
  2546. A_ITTET:
  2547. if l=4 then taicpu(hp).opcode := A_ITTE;
  2548. A_ITETT:
  2549. if l=4 then taicpu(hp).opcode := A_ITET;
  2550. A_ITTTT:
  2551. if l=4 then taicpu(hp).opcode := A_ITTT;
  2552. end;
  2553. break;
  2554. end;
  2555. {else if (taicpu(hp).condition<>taicpu(p).condition) or
  2556. (taicpu(hp).condition<>inverse_cond(taicpu(p).condition)) then
  2557. break;}
  2558. inc(l);
  2559. end;
  2560. hp := tai(hp.Previous);
  2561. end;
  2562. end;
  2563. function TCpuThumb2AsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
  2564. var
  2565. hp : taicpu;
  2566. //hp1,hp2 : tai;
  2567. begin
  2568. result:=false;
  2569. if inherited PeepHoleOptPass1Cpu(p) then
  2570. result:=true
  2571. else if (p.typ=ait_instruction) and
  2572. MatchInstruction(p, A_STM, [C_None], [PF_FD,PF_DB]) and
  2573. (taicpu(p).oper[0]^.ref^.addressmode=AM_PREINDEXED) and
  2574. (taicpu(p).oper[0]^.ref^.index=NR_STACK_POINTER_REG) and
  2575. ((taicpu(p).oper[1]^.regset^*[8..13,15])=[]) then
  2576. begin
  2577. DebugMsg('Peephole Stm2Push done', p);
  2578. hp := taicpu.op_regset(A_PUSH, R_INTREGISTER, R_SUBWHOLE, taicpu(p).oper[1]^.regset^);
  2579. AsmL.InsertAfter(hp, p);
  2580. asml.Remove(p);
  2581. p:=hp;
  2582. result:=true;
  2583. end
  2584. {else if (p.typ=ait_instruction) and
  2585. MatchInstruction(p, A_STR, [C_None], [PF_None]) and
  2586. (taicpu(p).oper[1]^.ref^.addressmode=AM_PREINDEXED) and
  2587. (taicpu(p).oper[1]^.ref^.index=NR_STACK_POINTER_REG) and
  2588. (taicpu(p).oper[1]^.ref^.offset=-4) and
  2589. (getsupreg(taicpu(p).oper[0]^.reg) in [0..7,14]) then
  2590. begin
  2591. DebugMsg('Peephole Str2Push done', p);
  2592. hp := taicpu.op_regset(A_PUSH, R_INTREGISTER, R_SUBWHOLE, [getsupreg(taicpu(p).oper[0]^.reg)]);
  2593. asml.InsertAfter(hp, p);
  2594. asml.Remove(p);
  2595. p.Free;
  2596. p:=hp;
  2597. result:=true;
  2598. end}
  2599. else if (p.typ=ait_instruction) and
  2600. MatchInstruction(p, A_LDM, [C_None], [PF_FD,PF_IA]) and
  2601. (taicpu(p).oper[0]^.ref^.addressmode=AM_PREINDEXED) and
  2602. (taicpu(p).oper[0]^.ref^.index=NR_STACK_POINTER_REG) and
  2603. ((taicpu(p).oper[1]^.regset^*[8..14])=[]) then
  2604. begin
  2605. DebugMsg('Peephole Ldm2Pop done', p);
  2606. hp := taicpu.op_regset(A_POP, R_INTREGISTER, R_SUBWHOLE, taicpu(p).oper[1]^.regset^);
  2607. asml.InsertBefore(hp, p);
  2608. asml.Remove(p);
  2609. p.Free;
  2610. p:=hp;
  2611. result:=true;
  2612. end
  2613. {else if (p.typ=ait_instruction) and
  2614. MatchInstruction(p, A_LDR, [C_None], [PF_None]) and
  2615. (taicpu(p).oper[1]^.ref^.addressmode=AM_POSTINDEXED) and
  2616. (taicpu(p).oper[1]^.ref^.index=NR_STACK_POINTER_REG) and
  2617. (taicpu(p).oper[1]^.ref^.offset=4) and
  2618. (getsupreg(taicpu(p).oper[0]^.reg) in [0..7,15]) then
  2619. begin
  2620. DebugMsg('Peephole Ldr2Pop done', p);
  2621. hp := taicpu.op_regset(A_POP, R_INTREGISTER, R_SUBWHOLE, [getsupreg(taicpu(p).oper[0]^.reg)]);
  2622. asml.InsertBefore(hp, p);
  2623. asml.Remove(p);
  2624. p.Free;
  2625. p:=hp;
  2626. result:=true;
  2627. end}
  2628. else if (p.typ=ait_instruction) and
  2629. MatchInstruction(p, [A_AND], [], [PF_None]) and
  2630. (taicpu(p).ops = 2) and
  2631. (taicpu(p).oper[1]^.typ=top_const) and
  2632. ((taicpu(p).oper[1]^.val=255) or
  2633. (taicpu(p).oper[1]^.val=65535)) then
  2634. begin
  2635. DebugMsg('Peephole AndR2Uxt done', p);
  2636. if taicpu(p).oper[1]^.val=255 then
  2637. taicpu(p).opcode:=A_UXTB
  2638. else
  2639. taicpu(p).opcode:=A_UXTH;
  2640. taicpu(p).loadreg(1, taicpu(p).oper[0]^.reg);
  2641. result := true;
  2642. end
  2643. else if (p.typ=ait_instruction) and
  2644. MatchInstruction(p, [A_AND], [], [PF_None]) and
  2645. (taicpu(p).ops = 3) and
  2646. (taicpu(p).oper[2]^.typ=top_const) and
  2647. ((taicpu(p).oper[2]^.val=255) or
  2648. (taicpu(p).oper[2]^.val=65535)) then
  2649. begin
  2650. DebugMsg('Peephole AndRR2Uxt done', p);
  2651. if taicpu(p).oper[2]^.val=255 then
  2652. taicpu(p).opcode:=A_UXTB
  2653. else
  2654. taicpu(p).opcode:=A_UXTH;
  2655. taicpu(p).ops:=2;
  2656. result := true;
  2657. end
  2658. {else if (p.typ=ait_instruction) and
  2659. MatchInstruction(p, [A_CMP], [C_None], [PF_None]) and
  2660. (taicpu(p).oper[1]^.typ=top_const) and
  2661. (taicpu(p).oper[1]^.val=0) and
  2662. GetNextInstruction(p,hp1) and
  2663. (taicpu(hp1).opcode=A_B) and
  2664. (taicpu(hp1).condition in [C_EQ,C_NE]) then
  2665. begin
  2666. if taicpu(hp1).condition = C_EQ then
  2667. hp2:=taicpu.op_reg_ref(A_CBZ, taicpu(p).oper[0]^.reg, taicpu(hp1).oper[0]^.ref^)
  2668. else
  2669. hp2:=taicpu.op_reg_ref(A_CBNZ, taicpu(p).oper[0]^.reg, taicpu(hp1).oper[0]^.ref^);
  2670. taicpu(hp2).is_jmp := true;
  2671. asml.InsertAfter(hp2, hp1);
  2672. asml.Remove(hp1);
  2673. hp1.Free;
  2674. asml.Remove(p);
  2675. p.Free;
  2676. p := hp2;
  2677. result := true;
  2678. end}
  2679. end;
  2680. procedure TCpuThumb2AsmOptimizer.PeepHoleOptPass2;
  2681. var
  2682. p,hp1,hp2: tai;
  2683. l : longint;
  2684. condition : tasmcond;
  2685. { UsedRegs, TmpUsedRegs: TRegSet; }
  2686. begin
  2687. p := BlockStart;
  2688. { UsedRegs := []; }
  2689. while (p <> BlockEnd) Do
  2690. begin
  2691. { UpdateUsedRegs(UsedRegs, tai(p.next)); }
  2692. case p.Typ Of
  2693. Ait_Instruction:
  2694. begin
  2695. case taicpu(p).opcode Of
  2696. A_B:
  2697. if taicpu(p).condition<>C_None then
  2698. begin
  2699. { check for
  2700. Bxx xxx
  2701. <several instructions>
  2702. xxx:
  2703. }
  2704. l:=0;
  2705. GetNextInstruction(p, hp1);
  2706. while assigned(hp1) and
  2707. (l<=4) and
  2708. CanBeCond(hp1) and
  2709. { stop on labels }
  2710. not(hp1.typ=ait_label) do
  2711. begin
  2712. inc(l);
  2713. if MustBeLast(hp1) then
  2714. begin
  2715. //hp1:=nil;
  2716. GetNextInstruction(hp1,hp1);
  2717. break;
  2718. end
  2719. else
  2720. GetNextInstruction(hp1,hp1);
  2721. end;
  2722. if assigned(hp1) then
  2723. begin
  2724. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  2725. begin
  2726. if (l<=4) and (l>0) then
  2727. begin
  2728. condition:=inverse_cond(taicpu(p).condition);
  2729. hp2:=p;
  2730. GetNextInstruction(p,hp1);
  2731. p:=hp1;
  2732. repeat
  2733. if hp1.typ=ait_instruction then
  2734. taicpu(hp1).condition:=condition;
  2735. if MustBeLast(hp1) then
  2736. begin
  2737. GetNextInstruction(hp1,hp1);
  2738. break;
  2739. end
  2740. else
  2741. GetNextInstruction(hp1,hp1);
  2742. until not(assigned(hp1)) or
  2743. not(CanBeCond(hp1)) or
  2744. (hp1.typ=ait_label);
  2745. { wait with removing else GetNextInstruction could
  2746. ignore the label if it was the only usage in the
  2747. jump moved away }
  2748. asml.InsertAfter(tai_comment.create(strpnew('Collapsed')), hp2);
  2749. DecrementPreceedingIT(asml, hp2);
  2750. case l of
  2751. 1: asml.InsertAfter(taicpu.op_cond(A_IT,condition), hp2);
  2752. 2: asml.InsertAfter(taicpu.op_cond(A_ITT,condition), hp2);
  2753. 3: asml.InsertAfter(taicpu.op_cond(A_ITTT,condition), hp2);
  2754. 4: asml.InsertAfter(taicpu.op_cond(A_ITTTT,condition), hp2);
  2755. end;
  2756. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  2757. asml.remove(hp2);
  2758. hp2.free;
  2759. continue;
  2760. end;
  2761. end;
  2762. end;
  2763. end;
  2764. end;
  2765. end;
  2766. end;
  2767. p := tai(p.next)
  2768. end;
  2769. end;
  2770. function TCpuThumb2AsmOptimizer.PostPeepHoleOptsCpu(var p: tai): boolean;
  2771. begin
  2772. result:=false;
  2773. if p.typ = ait_instruction then
  2774. begin
  2775. if MatchInstruction(p, A_MOV, [C_None], [PF_None]) and
  2776. (taicpu(p).oper[1]^.typ=top_const) and
  2777. (taicpu(p).oper[1]^.val >= 0) and
  2778. (taicpu(p).oper[1]^.val < 256) and
  2779. (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  2780. begin
  2781. DebugMsg('Peephole Mov2Movs done', p);
  2782. asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
  2783. asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
  2784. IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
  2785. taicpu(p).oppostfix:=PF_S;
  2786. result:=true;
  2787. end
  2788. else if MatchInstruction(p, A_MVN, [C_None], [PF_None]) and
  2789. (taicpu(p).oper[1]^.typ=top_reg) and
  2790. (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  2791. begin
  2792. DebugMsg('Peephole Mvn2Mvns done', p);
  2793. asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
  2794. asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
  2795. IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
  2796. taicpu(p).oppostfix:=PF_S;
  2797. result:=true;
  2798. end
  2799. else if MatchInstruction(p, A_RSB, [C_None], [PF_None]) and
  2800. (taicpu(p).ops = 3) and
  2801. (taicpu(p).oper[2]^.typ=top_const) and
  2802. (taicpu(p).oper[2]^.val=0) and
  2803. (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  2804. begin
  2805. DebugMsg('Peephole Rsb2Rsbs done', p);
  2806. asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
  2807. asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
  2808. IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
  2809. taicpu(p).oppostfix:=PF_S;
  2810. result:=true;
  2811. end
  2812. else if MatchInstruction(p, [A_ADD,A_SUB], [C_None], [PF_None]) and
  2813. (taicpu(p).ops = 3) and
  2814. MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[1]^) and
  2815. (not MatchOperand(taicpu(p).oper[0]^, NR_STACK_POINTER_REG)) and
  2816. (taicpu(p).oper[2]^.typ=top_const) and
  2817. (taicpu(p).oper[2]^.val >= 0) and
  2818. (taicpu(p).oper[2]^.val < 256) and
  2819. (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  2820. begin
  2821. DebugMsg('Peephole AddSub2*s done', p);
  2822. asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
  2823. asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
  2824. IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
  2825. taicpu(p).loadconst(1,taicpu(p).oper[2]^.val);
  2826. taicpu(p).oppostfix:=PF_S;
  2827. taicpu(p).ops := 2;
  2828. result:=true;
  2829. end
  2830. else if MatchInstruction(p, [A_ADD,A_SUB], [C_None], [PF_None]) and
  2831. (taicpu(p).ops = 2) and
  2832. (taicpu(p).oper[1]^.typ=top_reg) and
  2833. (not MatchOperand(taicpu(p).oper[0]^, NR_STACK_POINTER_REG)) and
  2834. (not MatchOperand(taicpu(p).oper[1]^, NR_STACK_POINTER_REG)) and
  2835. (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  2836. begin
  2837. DebugMsg('Peephole AddSub2*s done', p);
  2838. asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
  2839. asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
  2840. IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
  2841. taicpu(p).oppostfix:=PF_S;
  2842. result:=true;
  2843. end
  2844. else if MatchInstruction(p, [A_ADD], [C_None], [PF_None]) and
  2845. (taicpu(p).ops = 3) and
  2846. MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[1]^) and
  2847. (taicpu(p).oper[2]^.typ=top_reg) then
  2848. begin
  2849. DebugMsg('Peephole AddRRR2AddRR done', p);
  2850. taicpu(p).ops := 2;
  2851. taicpu(p).loadreg(1,taicpu(p).oper[2]^.reg);
  2852. result:=true;
  2853. end
  2854. else if MatchInstruction(p, [A_AND,A_ORR,A_EOR,A_BIC,A_LSL,A_LSR,A_ASR,A_ROR], [C_None], [PF_None]) and
  2855. (taicpu(p).ops = 3) and
  2856. MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[1]^) and
  2857. (taicpu(p).oper[2]^.typ=top_reg) and
  2858. (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  2859. begin
  2860. DebugMsg('Peephole opXXY2opsXY done', p);
  2861. asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
  2862. asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
  2863. IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
  2864. taicpu(p).ops := 2;
  2865. taicpu(p).loadreg(1,taicpu(p).oper[2]^.reg);
  2866. taicpu(p).oppostfix:=PF_S;
  2867. result:=true;
  2868. end
  2869. else if MatchInstruction(p, [A_AND,A_ORR,A_EOR,A_BIC,A_LSL,A_LSR,A_ASR,A_ROR], [C_None], [PF_S]) and
  2870. (taicpu(p).ops = 3) and
  2871. MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[1]^) and
  2872. (taicpu(p).oper[2]^.typ in [top_reg,top_const]) then
  2873. begin
  2874. DebugMsg('Peephole opXXY2opXY done', p);
  2875. taicpu(p).ops := 2;
  2876. if taicpu(p).oper[2]^.typ=top_reg then
  2877. taicpu(p).loadreg(1,taicpu(p).oper[2]^.reg)
  2878. else
  2879. taicpu(p).loadconst(1,taicpu(p).oper[2]^.val);
  2880. result:=true;
  2881. end
  2882. else if MatchInstruction(p, [A_AND,A_ORR,A_EOR], [C_None], [PF_None,PF_S]) and
  2883. (taicpu(p).ops = 3) and
  2884. MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[2]^) and
  2885. (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  2886. begin
  2887. DebugMsg('Peephole opXYX2opsXY done', p);
  2888. asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
  2889. asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
  2890. IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
  2891. taicpu(p).oppostfix:=PF_S;
  2892. taicpu(p).ops := 2;
  2893. result:=true;
  2894. end
  2895. else if MatchInstruction(p, [A_MOV], [C_None], [PF_None]) and
  2896. (taicpu(p).ops=3) and
  2897. (taicpu(p).oper[2]^.typ=top_shifterop) and
  2898. (taicpu(p).oper[2]^.shifterop^.shiftmode in [SM_LSL,SM_LSR,SM_ASR,SM_ROR]) and
  2899. //MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[1]^) and
  2900. (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  2901. begin
  2902. DebugMsg('Peephole Mov2Shift done', p);
  2903. asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
  2904. asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
  2905. IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
  2906. taicpu(p).oppostfix:=PF_S;
  2907. case taicpu(p).oper[2]^.shifterop^.shiftmode of
  2908. SM_LSL: taicpu(p).opcode:=A_LSL;
  2909. SM_LSR: taicpu(p).opcode:=A_LSR;
  2910. SM_ASR: taicpu(p).opcode:=A_ASR;
  2911. SM_ROR: taicpu(p).opcode:=A_ROR;
  2912. end;
  2913. if taicpu(p).oper[2]^.shifterop^.rs<>NR_NO then
  2914. taicpu(p).loadreg(2, taicpu(p).oper[2]^.shifterop^.rs)
  2915. else
  2916. taicpu(p).loadconst(2, taicpu(p).oper[2]^.shifterop^.shiftimm);
  2917. result:=true;
  2918. end
  2919. end;
  2920. end;
  2921. begin
  2922. casmoptimizer:=TCpuAsmOptimizer;
  2923. cpreregallocscheduler:=TCpuPreRegallocScheduler;
  2924. End.