aoptcpu.pas 45 KB


  1. {
  2. Copyright (c) 1998-2002 by Jonas Maebe, member of the Free Pascal
  3. Development Team
  4. This unit implements the ARM optimizer object
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. Unit aoptcpu;
  19. {$i fpcdefs.inc}
  20. { $define DEBUG_AOPTCPU}
  21. Interface
  22. uses cpubase,cgbase,aasmtai,aopt,AoptObj, cclasses,aoptcpub;
  23. Type
  24. TCpuAsmOptimizer = class(TAsmOptimizer)
  25. { outputs a debug message into the assembler file }
  26. procedure DebugMsg(const s: string; p: tai);
  27. Function GetNextInstructionUsingReg(Current: tai; Var Next: tai;reg : TRegister): Boolean;
  28. function RegInInstruction(Reg: TRegister; p1: tai): Boolean; override;
  29. function RegLoadedWithNewValue(reg : tregister; hp : tai) : boolean; override;
  30. function InstructionLoadsFromReg(const reg : TRegister; const hp : tai) : boolean; override;
  31. function InvertSkipInstruction(var p: tai): boolean;
  32. { uses the same constructor as TAopObj }
  33. function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
  34. function PeepHoleOptPass2Cpu(var p: tai): boolean; override;
  35. private
  36. function OptPass1ADD(var p : tai) : boolean;
  37. function OptPass1ANDI(var p : tai) : boolean;
  38. function OptPass1CALL(var p : tai) : boolean;
  39. function OptPass1CLR(var p : tai) : boolean;
  40. function OptPass1IN(var p : tai) : boolean;
  41. function OptPass1LDI(var p : tai) : boolean;
  42. function OptPass1LDS(var p : tai) : boolean;
  43. function OptPass1MOV(var p : tai) : boolean;
  44. function OptPass1PUSH(var p : tai) : boolean;
  45. function OptPass1RCALL(var p : tai) : boolean;
  46. function OptPass1SBI(var p : tai) : boolean;
  47. function OptPass1SBR(var p : tai) : boolean;
  48. function OptPass1STS(var p : tai) : boolean;
  49. function OptPass1SUB(var p : tai) : boolean;
  50. function OptPass2MOV(var p : tai) : boolean;
  51. End;
  52. Implementation
  53. uses
  54. cutils,
  55. verbose,
  56. cpuinfo,
  57. aasmbase,aasmcpu,aasmdata,
  58. aoptutils,
  59. globals,globtype,
  60. cgutils;
  61. type
  62. TAsmOpSet = set of TAsmOp;
  63. function CanBeCond(p : tai) : boolean;
  64. begin
  65. result:=(p.typ=ait_instruction) and (taicpu(p).condition=C_None);
  66. end;
  67. function RefsEqual(const r1, r2: treference): boolean;
  68. begin
  69. refsequal :=
  70. (r1.offset = r2.offset) and
  71. (r1.base = r2.base) and
  72. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  73. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  74. (r1.relsymbol = r2.relsymbol) and
  75. (r1.addressmode = r2.addressmode) and
  76. (r1.volatility=[]) and
  77. (r2.volatility=[]);
  78. end;
  79. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
  80. begin
  81. result:=oper1.typ=oper2.typ;
  82. if result then
  83. case oper1.typ of
  84. top_const:
  85. Result:=oper1.val = oper2.val;
  86. top_reg:
  87. Result:=oper1.reg = oper2.reg;
  88. top_ref:
  89. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  90. else Result:=false;
  91. end
  92. end;
  93. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  94. begin
  95. result := (oper.typ = top_reg) and (oper.reg = reg);
  96. end;
  97. function MatchInstruction(const instr: tai; const op: TAsmOp): boolean;
  98. begin
  99. result :=
  100. (instr.typ = ait_instruction) and
  101. (taicpu(instr).opcode = op);
  102. end;
  103. function MatchInstruction(const instr: tai; const ops: TAsmOpSet): boolean;
  104. begin
  105. result :=
  106. (instr.typ = ait_instruction) and
  107. (taicpu(instr).opcode in ops);
  108. end;
  109. function MatchInstruction(const instr: tai; const ops: TAsmOpSet;opcount : byte): boolean;
  110. begin
  111. result :=
  112. (instr.typ = ait_instruction) and
  113. (taicpu(instr).opcode in ops) and
  114. (taicpu(instr).ops=opcount);
  115. end;
  116. {$ifdef DEBUG_AOPTCPU}
  117. procedure TCpuAsmOptimizer.DebugMsg(const s: string;p : tai);
  118. begin
  119. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  120. end;
  121. {$else DEBUG_AOPTCPU}
  122. procedure TCpuAsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  123. begin
  124. end;
  125. {$endif DEBUG_AOPTCPU}
  126. function TCpuAsmOptimizer.RegInInstruction(Reg: TRegister; p1: tai): Boolean;
  127. begin
  128. If (p1.typ = ait_instruction) and (taicpu(p1).opcode in [A_MUL,A_MULS,A_FMUL,A_FMULS,A_FMULSU]) and
  129. ((getsupreg(reg)=RS_R0) or (getsupreg(reg)=RS_R1)) then
  130. Result:=true
  131. else if (p1.typ = ait_instruction) and (taicpu(p1).opcode=A_MOVW) and
  132. ((TRegister(ord(taicpu(p1).oper[0]^.reg)+1)=reg) or (TRegister(ord(taicpu(p1).oper[1]^.reg)+1)=reg) or
  133. (taicpu(p1).oper[0]^.reg=reg) or (taicpu(p1).oper[1]^.reg=reg)) then
  134. Result:=true
  135. else if (p1.typ = ait_instruction) and (taicpu(p1).opcode=A_ADIW) and
  136. ((TRegister(ord(taicpu(p1).oper[0]^.reg)+1)=reg) or (taicpu(p1).oper[0]^.reg=reg)) then
  137. Result:=true
  138. else
  139. Result:=inherited RegInInstruction(Reg, p1);
  140. end;
  141. function TCpuAsmOptimizer.GetNextInstructionUsingReg(Current: tai;
  142. var Next: tai; reg: TRegister): Boolean;
  143. begin
  144. Next:=Current;
  145. repeat
  146. Result:=GetNextInstruction(Next,Next);
  147. until not(cs_opt_level3 in current_settings.optimizerswitches) or not(Result) or (Next.typ<>ait_instruction) or (RegInInstruction(reg,Next)) or
  148. (is_calljmp(taicpu(Next).opcode));
  149. end;
  150. function TCpuAsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  151. var
  152. p: taicpu;
  153. begin
  154. if not assigned(hp) or
  155. (hp.typ <> ait_instruction) then
  156. begin
  157. Result := false;
  158. exit;
  159. end;
  160. p := taicpu(hp);
  161. Result := ((p.opcode in [A_LDI,A_MOV,A_LDS]) and (reg=p.oper[0]^.reg) and ((p.oper[1]^.typ<>top_reg) or (reg<>p.oper[1]^.reg))) or
  162. ((p.opcode in [A_LD,A_LDD,A_LPM]) and (reg=p.oper[0]^.reg) and not(RegInRef(reg,p.oper[1]^.ref^))) or
  163. ((p.opcode in [A_MOVW]) and ((reg=p.oper[0]^.reg) or (TRegister(ord(reg)+1)=p.oper[0]^.reg)) and not(reg=p.oper[1]^.reg) and not(TRegister(ord(reg)+1)=p.oper[1]^.reg)) or
  164. ((p.opcode in [A_POP]) and (reg=p.oper[0]^.reg));
  165. end;
  166. function TCpuAsmOptimizer.InstructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean;
  167. var
  168. p: taicpu;
  169. i: longint;
  170. begin
  171. Result := false;
  172. if not (assigned(hp) and (hp.typ = ait_instruction)) then
  173. exit;
  174. p:=taicpu(hp);
  175. i:=0;
  176. { we do not care about the stack pointer }
  177. if p.opcode in [A_POP] then
  178. exit;
  179. { first operand only written?
  180. then skip it }
  181. if p.opcode in [A_MOV,A_LD,A_LDD,A_LDS,A_LPM,A_LDI,A_MOVW] then
  182. i:=1;
  183. while i<p.ops do
  184. begin
  185. case p.oper[i]^.typ of
  186. top_reg:
  187. Result := (p.oper[i]^.reg = reg) or
  188. { MOVW }
  189. ((i=1) and (p.opcode=A_MOVW) and (getsupreg(p.oper[i]^.reg)+1=getsupreg(reg))) or
  190. { ADIW }
  191. ((i=0) and (p.opcode=A_ADIW) and (getsupreg(p.oper[i]^.reg)+1=getsupreg(reg)));
  192. top_ref:
  193. Result :=
  194. (p.oper[i]^.ref^.base = reg) or
  195. (p.oper[i]^.ref^.index = reg);
  196. end;
  197. { Bailout if we found something }
  198. if Result then
  199. exit;
  200. Inc(i);
  201. end;
  202. end;
  203. {
  204. Turns
  205. sbis ?
  206. jmp .Lx
  207. op
  208. .Lx:
  209. Into
  210. sbic ?
  211. op
  212. For all types of skip instructions
  213. }
  214. function TCpuAsmOptimizer.InvertSkipInstruction(var p: tai): boolean;
  215. function GetNextInstructionWithoutLabel(p: tai; var next: tai): boolean;
  216. begin
  217. repeat
  218. result:=GetNextInstruction(p,next);
  219. p:=next;
  220. until
  221. (not result) or
  222. (not assigned(next)) or
  223. (next.typ in [ait_instruction]);
  224. result:=assigned(next) and (next.typ in [ait_instruction]);
  225. end;
  226. var
  227. hp1, hp2, hp3: tai;
  228. begin
  229. result:=false;
  230. if GetNextInstruction(taicpu(p),hp1) and
  231. (hp1.typ=ait_instruction) and
  232. (taicpu(hp1).opcode in [A_RJMP,A_JMP]) and
  233. (taicpu(hp1).ops=1) and
  234. (taicpu(hp1).oper[0]^.typ=top_ref) and
  235. (taicpu(hp1).oper[0]^.ref^.offset=0) and
  236. (taicpu(hp1).oper[0]^.ref^.symbol is TAsmLabel) and
  237. GetNextInstructionWithoutLabel(hp1,hp2) and
  238. (hp2.typ=ait_instruction) and
  239. (not taicpu(hp2).is_jmp) and
  240. GetNextInstruction(hp2,hp3) and
  241. FindLabel(TAsmLabel(taicpu(hp1).oper[0]^.ref^.symbol),hp3) then
  242. begin
  243. DebugMsg('SkipJump2InvertedSkip', p);
  244. case taicpu(p).opcode of
  245. A_SBIS: taicpu(p).opcode:=A_SBIC;
  246. A_SBIC: taicpu(p).opcode:=A_SBIS;
  247. A_SBRS: taicpu(p).opcode:=A_SBRC;
  248. A_SBRC: taicpu(p).opcode:=A_SBRS;
  249. end;
  250. TAsmLabel(taicpu(hp1).oper[0]^.ref^.symbol).decrefs;
  251. asml.remove(hp1);
  252. hp1.free;
  253. end;
  254. end;
  255. function TCpuAsmOptimizer.OptPass1LDI(var p : tai) : boolean;
  256. var
  257. hp1 : tai;
  258. alloc ,dealloc: tai_regalloc;
  259. begin
  260. Result:=false;
  261. { turn
  262. ldi reg0, imm
  263. <op> reg1, reg0
  264. dealloc reg0
  265. into
  266. <op>i reg1, imm
  267. }
  268. if MatchOpType(taicpu(p),top_reg,top_const) and
  269. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  270. MatchInstruction(hp1,[A_CP,A_MOV,A_AND,A_SUB],2) and
  271. (not RegModifiedBetween(taicpu(p).oper[0]^.reg, p, hp1)) and
  272. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  273. (getsupreg(taicpu(hp1).oper[0]^.reg) in [16..31]) and
  274. (taicpu(hp1).oper[1]^.reg=taicpu(p).oper[0]^.reg) and
  275. not(MatchOperand(taicpu(hp1).oper[0]^,taicpu(hp1).oper[1]^)) then
  276. begin
  277. TransferUsedRegs(TmpUsedRegs);
  278. UpdateUsedRegs(TmpUsedRegs,tai(p.next));
  279. UpdateUsedRegs(TmpUsedRegs,tai(hp1.next));
  280. if not(RegUsedAfterInstruction(taicpu(hp1).oper[1]^.reg, hp1, TmpUsedRegs)) then
  281. begin
  282. case taicpu(hp1).opcode of
  283. A_CP:
  284. taicpu(hp1).opcode:=A_CPI;
  285. A_MOV:
  286. taicpu(hp1).opcode:=A_LDI;
  287. A_AND:
  288. taicpu(hp1).opcode:=A_ANDI;
  289. A_SUB:
  290. taicpu(hp1).opcode:=A_SUBI;
  291. else
  292. internalerror(2016111901);
  293. end;
  294. taicpu(hp1).loadconst(1, taicpu(p).oper[1]^.val);
  295. alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.Previous));
  296. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next));
  297. if assigned(alloc) and assigned(dealloc) then
  298. begin
  299. asml.Remove(alloc);
  300. alloc.Free;
  301. asml.Remove(dealloc);
  302. dealloc.Free;
  303. end;
  304. DebugMsg('Peephole LdiOp2Opi performed', p);
  305. result:=RemoveCurrentP(p);
  306. end;
  307. end;
  308. end;
  309. function TCpuAsmOptimizer.OptPass1STS(var p : tai) : boolean;
  310. begin
  311. Result:=false;
  312. if (taicpu(p).oper[0]^.ref^.symbol=nil) and
  313. (taicpu(p).oper[0]^.ref^.relsymbol=nil) and
  314. (getsupreg(taicpu(p).oper[0]^.ref^.base)=RS_NO) and
  315. (getsupreg(taicpu(p).oper[0]^.ref^.index)=RS_NO) and
  316. (taicpu(p).oper[0]^.ref^.addressmode=AM_UNCHANGED) and
  317. (((CPUAVR_NOMEMMAPPED_REGS in cpu_capabilities[current_settings.cputype]) and
  318. (taicpu(p).oper[0]^.ref^.offset>=0) and
  319. (taicpu(p).oper[0]^.ref^.offset<=63)) or
  320. (not(CPUAVR_NOMEMMAPPED_REGS in cpu_capabilities[current_settings.cputype]) and
  321. (taicpu(p).oper[0]^.ref^.offset>=32) and
  322. (taicpu(p).oper[0]^.ref^.offset<=95))) then
  323. begin
  324. DebugMsg('Peephole Sts2Out performed', p);
  325. taicpu(p).opcode:=A_OUT;
  326. if CPUAVR_NOMEMMAPPED_REGS in cpu_capabilities[current_settings.cputype] then
  327. taicpu(p).loadconst(0,taicpu(p).oper[0]^.ref^.offset)
  328. else
  329. taicpu(p).loadconst(0,taicpu(p).oper[0]^.ref^.offset-32);
  330. result:=true;
  331. end;
  332. end;
  333. function TCpuAsmOptimizer.OptPass1LDS(var p : tai) : boolean;
  334. begin
  335. Result:=false;
  336. if (taicpu(p).oper[1]^.ref^.symbol=nil) and
  337. (taicpu(p).oper[1]^.ref^.relsymbol=nil) and
  338. (getsupreg(taicpu(p).oper[1]^.ref^.base)=RS_NO) and
  339. (getsupreg(taicpu(p).oper[1]^.ref^.index)=RS_NO) and
  340. (taicpu(p).oper[1]^.ref^.addressmode=AM_UNCHANGED) and
  341. (((CPUAVR_NOMEMMAPPED_REGS in cpu_capabilities[current_settings.cputype]) and
  342. (taicpu(p).oper[1]^.ref^.offset>=0) and
  343. (taicpu(p).oper[1]^.ref^.offset<=63)) or
  344. (not(CPUAVR_NOMEMMAPPED_REGS in cpu_capabilities[current_settings.cputype]) and
  345. (taicpu(p).oper[1]^.ref^.offset>=32) and
  346. (taicpu(p).oper[1]^.ref^.offset<=95))) then
  347. begin
  348. DebugMsg('Peephole Lds2In performed', p);
  349. taicpu(p).opcode:=A_IN;
  350. if CPUAVR_NOMEMMAPPED_REGS in cpu_capabilities[current_settings.cputype] then
  351. taicpu(p).loadconst(1,taicpu(p).oper[1]^.ref^.offset)
  352. else
  353. taicpu(p).loadconst(1,taicpu(p).oper[1]^.ref^.offset-32);
  354. result:=true;
  355. end;
  356. end;
  357. function TCpuAsmOptimizer.OptPass1IN(var p : tai) : boolean;
  358. var
  359. hp1, hp2: tai;
  360. l : TAsmLabel;
  361. begin
  362. Result:=false;
  363. if GetNextInstruction(p,hp1) then
  364. begin
  365. {
  366. in rX,Y
  367. ori rX,n
  368. out Y,rX
  369. into
  370. sbi rX,lg(n)
  371. }
  372. if (taicpu(p).oper[1]^.val<=31) and
  373. MatchInstruction(hp1,A_ORI) and
  374. (taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg) and
  375. (PopCnt(byte(taicpu(hp1).oper[1]^.val))=1) and
  376. GetNextInstruction(hp1,hp2) and
  377. MatchInstruction(hp2,A_OUT) and
  378. MatchOperand(taicpu(hp2).oper[1]^,taicpu(p).oper[0]^) and
  379. MatchOperand(taicpu(hp2).oper[0]^,taicpu(p).oper[1]^) then
  380. begin
  381. DebugMsg('Peephole InOriOut2Sbi performed', p);
  382. taicpu(p).opcode:=A_SBI;
  383. taicpu(p).loadconst(0,taicpu(p).oper[1]^.val);
  384. taicpu(p).loadconst(1,BsrByte(taicpu(hp1).oper[1]^.val));
  385. asml.Remove(hp1);
  386. hp1.Free;
  387. asml.Remove(hp2);
  388. hp2.Free;
  389. result:=true;
  390. end
  391. {
  392. in rX,Y
  393. andi rX,not(n)
  394. out Y,rX
  395. into
  396. cbi rX,lg(n)
  397. }
  398. else if (taicpu(p).oper[1]^.val<=31) and
  399. MatchInstruction(hp1,A_ANDI) and
  400. (taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg) and
  401. (PopCnt(byte(not(taicpu(hp1).oper[1]^.val)))=1) and
  402. GetNextInstruction(hp1,hp2) and
  403. MatchInstruction(hp2,A_OUT) and
  404. MatchOperand(taicpu(hp2).oper[1]^,taicpu(p).oper[0]^) and
  405. MatchOperand(taicpu(hp2).oper[0]^,taicpu(p).oper[1]^) then
  406. begin
  407. DebugMsg('Peephole InAndiOut2Cbi performed', p);
  408. taicpu(p).opcode:=A_CBI;
  409. taicpu(p).loadconst(0,taicpu(p).oper[1]^.val);
  410. taicpu(p).loadconst(1,BsrByte(not(taicpu(hp1).oper[1]^.val)));
  411. asml.Remove(hp1);
  412. hp1.Free;
  413. asml.Remove(hp2);
  414. hp2.Free;
  415. result:=true;
  416. end
  417. {
  418. in rX,Y
  419. andi rX,n
  420. breq/brne L1
  421. into
  422. sbis/sbic Y,lg(n)
  423. jmp L1
  424. .Ltemp:
  425. }
  426. else if (taicpu(p).oper[1]^.val<=31) and
  427. MatchInstruction(hp1,A_ANDI) and
  428. (taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg) and
  429. (PopCnt(byte(taicpu(hp1).oper[1]^.val))=1) and
  430. GetNextInstruction(hp1,hp2) and
  431. MatchInstruction(hp2,A_BRxx) and
  432. (taicpu(hp2).condition in [C_EQ,C_NE]) then
  433. begin
  434. if taicpu(hp2).condition=C_EQ then
  435. taicpu(p).opcode:=A_SBIS
  436. else
  437. taicpu(p).opcode:=A_SBIC;
  438. DebugMsg('Peephole InAndiBrx2SbixJmp performed', p);
  439. taicpu(p).loadconst(0,taicpu(p).oper[1]^.val);
  440. taicpu(p).loadconst(1,BsrByte(taicpu(hp1).oper[1]^.val));
  441. asml.Remove(hp1);
  442. hp1.Free;
  443. taicpu(hp2).condition:=C_None;
  444. if CPUAVR_HAS_JMP_CALL in cpu_capabilities[current_settings.cputype] then
  445. taicpu(hp2).opcode:=A_JMP
  446. else
  447. taicpu(hp2).opcode:=A_RJMP;
  448. current_asmdata.getjumplabel(l);
  449. l.increfs;
  450. asml.InsertAfter(tai_label.create(l), hp2);
  451. result:=true;
  452. end;
  453. end;
  454. end;
  455. function TCpuAsmOptimizer.OptPass1SBR(var p : tai) : boolean;
  456. var
  457. hp1 : tai;
  458. begin
  459. Result:=false;
  460. {
  461. Turn
  462. in rx, y
  463. sbr* rx, z
  464. Into
  465. sbi* y, z
  466. }
  467. if (taicpu(p).ops=2) and
  468. (taicpu(p).oper[0]^.typ=top_reg) and
  469. assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(p.next))) and
  470. GetLastInstruction(p,hp1) and
  471. (hp1.typ=ait_instruction) and
  472. (taicpu(hp1).opcode=A_IN) and
  473. (taicpu(hp1).ops=2) and
  474. (taicpu(hp1).oper[1]^.typ=top_const) and
  475. (taicpu(hp1).oper[1]^.val in [0..31]) and
  476. MatchOperand(taicpu(hp1).oper[0]^,taicpu(p).oper[0]^.reg) and
  477. (not RegModifiedBetween(taicpu(p).oper[0]^.reg, hp1, p)) then
  478. begin
  479. if taicpu(p).opcode=A_SBRS then
  480. taicpu(p).opcode:=A_SBIS
  481. else
  482. taicpu(p).opcode:=A_SBIC;
  483. taicpu(p).loadconst(0, taicpu(hp1).oper[1]^.val);
  484. DebugMsg('Peephole InSbrx2Sbix performed', p);
  485. asml.Remove(hp1);
  486. hp1.free;
  487. result:=true;
  488. end;
  489. if InvertSkipInstruction(p) then
  490. result:=true;
  491. end;
  492. function TCpuAsmOptimizer.OptPass1SBI(var p : tai) : boolean;
  493. var
  494. hp1, hp2, hp3, hp4, hp5: tai;
  495. begin
  496. Result:=false;
  497. {
  498. Turn
  499. sbic/sbis X, y
  500. jmp .L1
  501. op
  502. .L1:
  503. into
  504. sbis/sbic X,y
  505. op
  506. .L1:
  507. }
  508. if InvertSkipInstruction(p) then
  509. result:=true
  510. {
  511. Turn
  512. sbiX X, y
  513. jmp .L1
  514. jmp .L2
  515. .L1:
  516. op
  517. .L2:
  518. into
  519. sbiX X,y
  520. .L1:
  521. op
  522. .L2:
  523. }
  524. else if GetNextInstruction(p, hp1) and
  525. (hp1.typ=ait_instruction) and
  526. (taicpu(hp1).opcode in [A_JMP,A_RJMP]) and
  527. (taicpu(hp1).ops>0) and
  528. (taicpu(hp1).oper[0]^.typ = top_ref) and
  529. (taicpu(hp1).oper[0]^.ref^.symbol is TAsmLabel) and
  530. GetNextInstruction(hp1, hp2) and
  531. (hp2.typ=ait_instruction) and
  532. (taicpu(hp2).opcode in [A_JMP,A_RJMP]) and
  533. (taicpu(hp2).ops>0) and
  534. (taicpu(hp2).oper[0]^.typ = top_ref) and
  535. (taicpu(hp2).oper[0]^.ref^.symbol is TAsmLabel) and
  536. GetNextInstruction(hp2, hp3) and
  537. (hp3.typ=ait_label) and
  538. (taicpu(hp1).oper[0]^.ref^.symbol=tai_label(hp3).labsym) and
  539. GetNextInstruction(hp3, hp4) and
  540. (hp4.typ=ait_instruction) and
  541. GetNextInstruction(hp4, hp5) and
  542. (hp3.typ=ait_label) and
  543. (taicpu(hp2).oper[0]^.ref^.symbol=tai_label(hp5).labsym) then
  544. begin
  545. DebugMsg('Peephole SbiJmpJmp2Sbi performed',p);
  546. tai_label(hp3).labsym.decrefs;
  547. tai_label(hp5).labsym.decrefs;
  548. AsmL.remove(hp1);
  549. taicpu(hp1).Free;
  550. AsmL.remove(hp2);
  551. taicpu(hp2).Free;
  552. result:=true;
  553. end;
  554. end;
  555. function TCpuAsmOptimizer.OptPass1ANDI(var p : tai) : boolean;
  556. var
  557. hp1, hp2, hp3: tai;
  558. i : longint;
  559. begin
  560. Result:=false;
  561. {
  562. Turn
  563. andi rx, #pow2
  564. brne l
  565. <op>
  566. l:
  567. Into
  568. sbrs rx, #(1 shl imm)
  569. <op>
  570. l:
  571. }
  572. if (taicpu(p).ops=2) and
  573. (taicpu(p).oper[1]^.typ=top_const) and
  574. ispowerof2(taicpu(p).oper[1]^.val,i) and
  575. assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(p.next))) and
  576. GetNextInstruction(p,hp1) and
  577. (hp1.typ=ait_instruction) and
  578. (taicpu(hp1).opcode=A_BRxx) and
  579. (taicpu(hp1).condition in [C_EQ,C_NE]) and
  580. (taicpu(hp1).ops>0) and
  581. (taicpu(hp1).oper[0]^.typ = top_ref) and
  582. (taicpu(hp1).oper[0]^.ref^.symbol is TAsmLabel) and
  583. GetNextInstruction(hp1,hp2) and
  584. (hp2.typ=ait_instruction) and
  585. GetNextInstruction(hp2,hp3) and
  586. (hp3.typ=ait_label) and
  587. (taicpu(hp1).oper[0]^.ref^.symbol=tai_label(hp3).labsym) then
  588. begin
  589. DebugMsg('Peephole AndiBr2Sbr performed', p);
  590. taicpu(p).oper[1]^.val:=i;
  591. if taicpu(hp1).condition=C_NE then
  592. taicpu(p).opcode:=A_SBRS
  593. else
  594. taicpu(p).opcode:=A_SBRC;
  595. asml.Remove(hp1);
  596. hp1.free;
  597. result:=true;
  598. end
  599. {
  600. Remove
  601. andi rx, #y
  602. dealloc rx
  603. }
  604. else if (taicpu(p).ops=2) and
  605. (taicpu(p).oper[0]^.typ=top_reg) and
  606. assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(p.next))) and
  607. (assigned(FindRegDeAlloc(NR_DEFAULTFLAGS,tai(p.Next))) or
  608. (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs))) then
  609. begin
  610. DebugMsg('Redundant Andi removed', p);
  611. result:=RemoveCurrentP(p);
  612. end;
  613. end;
  614. function TCpuAsmOptimizer.OptPass1ADD(var p : tai) : boolean;
  615. var
  616. hp1: tai;
  617. begin
  618. Result:=false;
  619. if (taicpu(p).oper[1]^.reg=GetDefaultZeroReg) and
  620. GetNextInstruction(p, hp1) and
  621. MatchInstruction(hp1,A_ADC) then
  622. begin
  623. DebugMsg('Peephole AddAdc2Add performed', p);
  624. RemoveCurrentP(p, hp1);
  625. Result := True;
  626. end;
  627. end;
  628. function TCpuAsmOptimizer.OptPass1SUB(var p : tai) : boolean;
  629. var
  630. hp1: tai;
  631. begin
  632. Result:=false;
  633. if (taicpu(p).oper[1]^.reg=GetDefaultZeroReg) and
  634. GetNextInstruction(p, hp1) and
  635. MatchInstruction(hp1,A_SBC) then
  636. begin
  637. DebugMsg('Peephole SubSbc2Sub performed', p);
  638. taicpu(hp1).opcode:=A_SUB;
  639. RemoveCurrentP(p, hp1);
  640. Result := True;
  641. end;
  642. end;
  643. function TCpuAsmOptimizer.OptPass2MOV(var p: tai): boolean;
  644. var
  645. hp1: tai;
  646. begin
  647. result:=false;
  648. { fold
  649. mov reg2,reg0
  650. mov reg3,reg1
  651. to
  652. movw reg2,reg0
  653. }
  654. if (CPUAVR_HAS_MOVW in cpu_capabilities[current_settings.cputype]) and
  655. (taicpu(p).ops=2) and
  656. (taicpu(p).oper[0]^.typ = top_reg) and
  657. (taicpu(p).oper[1]^.typ = top_reg) and
  658. getnextinstruction(p,hp1) and
  659. (hp1.typ = ait_instruction) and
  660. (taicpu(hp1).opcode = A_MOV) and
  661. (taicpu(hp1).ops=2) and
  662. (taicpu(hp1).oper[0]^.typ = top_reg) and
  663. (taicpu(hp1).oper[1]^.typ = top_reg) and
  664. (getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(p).oper[0]^.reg)+1) and
  665. ((getsupreg(taicpu(p).oper[0]^.reg) mod 2)=0) and
  666. ((getsupreg(taicpu(p).oper[1]^.reg) mod 2)=0) and
  667. (getsupreg(taicpu(hp1).oper[1]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)+1) then
  668. begin
  669. DebugMsg('Peephole MovMov2Movw performed', p);
  670. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  671. AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,UsedRegs);
  672. taicpu(p).opcode:=A_MOVW;
  673. asml.remove(hp1);
  674. hp1.free;
  675. result:=true;
  676. end
  677. end;
  678. function TCpuAsmOptimizer.OptPass1CLR(var p : tai) : boolean;
  679. var
  680. hp1: tai;
  681. alloc, dealloc: tai_regalloc;
  682. begin
  683. Result:=false;
  684. { turn the common
  685. clr rX
  686. mov/ld rX, rY
  687. into
  688. mov/ld rX, rY
  689. }
  690. if (taicpu(p).ops=1) and
  691. (taicpu(p).oper[0]^.typ=top_reg) and
  692. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  693. (not RegModifiedBetween(taicpu(p).oper[0]^.reg, p, hp1)) and
  694. (hp1.typ=ait_instruction) and
  695. (taicpu(hp1).opcode in [A_MOV,A_LD]) and
  696. (taicpu(hp1).ops>0) and
  697. (taicpu(hp1).oper[0]^.typ=top_reg) and
  698. (taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg) then
  699. begin
  700. DebugMsg('Peephole ClrMov2Mov performed', p);
  701. result:=RemoveCurrentP(p);
  702. end
  703. { turn
  704. clr rX
  705. ...
  706. adc rY, rX
  707. into
  708. ...
  709. adc rY, r1
  710. }
  711. else if (taicpu(p).ops=1) and
  712. (taicpu(p).oper[0]^.typ=top_reg) and
  713. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  714. (not RegModifiedBetween(taicpu(p).oper[0]^.reg, p, hp1)) and
  715. (hp1.typ=ait_instruction) and
  716. (taicpu(hp1).opcode in [A_ADC,A_SBC]) and
  717. (taicpu(hp1).ops=2) and
  718. (taicpu(hp1).oper[1]^.typ=top_reg) and
  719. (taicpu(hp1).oper[1]^.reg=taicpu(p).oper[0]^.reg) and
  720. (taicpu(hp1).oper[0]^.reg<>taicpu(p).oper[0]^.reg) and
  721. assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) then
  722. begin
  723. DebugMsg('Peephole ClrAdc2Adc performed', p);
  724. taicpu(hp1).oper[1]^.reg:=GetDefaultZeroReg;
  725. alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.Previous));
  726. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next));
  727. if assigned(alloc) and assigned(dealloc) then
  728. begin
  729. asml.Remove(alloc);
  730. alloc.Free;
  731. asml.Remove(dealloc);
  732. dealloc.Free;
  733. end;
  734. result:=RemoveCurrentP(p);
  735. end;
  736. end;
  737. function TCpuAsmOptimizer.OptPass1PUSH(var p : tai) : boolean;
  738. var
  739. hp1, hp2, hp3: tai;
  740. begin
  741. Result:=false;
  742. { turn
  743. push reg0
  744. push reg1
  745. pop reg3
  746. pop reg2
  747. into
  748. movw reg2,reg0
  749. or
  750. mov reg3,reg1
  751. mov reg2,reg0
  752. }
  753. if GetNextInstruction(p,hp1) and
  754. MatchInstruction(hp1,A_PUSH) and
  755. GetNextInstruction(hp1,hp2) and
  756. MatchInstruction(hp2,A_POP) and
  757. GetNextInstruction(hp2,hp3) and
  758. MatchInstruction(hp3,A_POP) then
  759. begin
  760. if (CPUAVR_HAS_MOVW in cpu_capabilities[current_settings.cputype]) and
  761. (getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(p).oper[0]^.reg)+1) and
  762. ((getsupreg(taicpu(p).oper[0]^.reg) mod 2)=0) and
  763. (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp3).oper[0]^.reg)+1) and
  764. ((getsupreg(taicpu(hp3).oper[0]^.reg) mod 2)=0) then
  765. begin
  766. DebugMsg('Peephole PushPushPopPop2Movw performed', p);
  767. taicpu(hp3).ops:=2;
  768. taicpu(hp3).opcode:=A_MOVW;
  769. taicpu(hp3).loadreg(1, taicpu(p).oper[0]^.reg);
  770. { We're removing 3 concurrent instructions. Remove hp1
  771. and hp2 manually instead of calling RemoveCurrentP
  772. as this means we won't be calling UpdateUsedRegs 3 times }
  773. asml.Remove(hp1);
  774. hp1.Free;
  775. asml.Remove(hp2);
  776. hp2.Free;
  777. { By removing p last, we've guaranteed that p.Next is
  778. valid (storing it prior to removing the instructions
  779. may result in a dangling pointer if hp1 immediately
  780. follows p), and because hp1, hp2 and hp3 came from
  781. sequential calls to GetNextInstruction, it is
  782. guaranteed that UpdateUsedRegs will stop at hp3. [Kit] }
  783. RemoveCurrentP(p, hp3);
  784. Result := True;
  785. end
  786. else
  787. begin
  788. DebugMsg('Peephole PushPushPopPop2MovMov performed', p);
  789. taicpu(p).ops:=2;
  790. taicpu(p).opcode:=A_MOV;
  791. taicpu(hp1).ops:=2;
  792. taicpu(hp1).opcode:=A_MOV;
  793. taicpu(p).loadreg(1, taicpu(p).oper[0]^.reg);
  794. taicpu(p).loadreg(0, taicpu(hp3).oper[0]^.reg);
  795. taicpu(hp1).loadreg(1, taicpu(hp1).oper[0]^.reg);
  796. taicpu(hp1).loadreg(0, taicpu(hp2).oper[0]^.reg);
  797. { life range of reg2 and reg3 is increased, fix register allocation entries }
  798. TransferUsedRegs(TmpUsedRegs);
  799. UpdateUsedRegs(TmpUsedRegs,tai(p.Next));
  800. AllocRegBetween(taicpu(hp2).oper[0]^.reg,hp1,hp2,TmpUsedRegs);
  801. TransferUsedRegs(TmpUsedRegs);
  802. AllocRegBetween(taicpu(hp3).oper[0]^.reg,p,hp3,TmpUsedRegs);
  803. IncludeRegInUsedRegs(taicpu(hp3).oper[0]^.reg,UsedRegs);
  804. UpdateUsedRegs(tai(p.Next));
  805. asml.Remove(hp2);
  806. hp2.Free;
  807. asml.Remove(hp3);
  808. hp3.Free;
  809. result:=true;
  810. end
  811. end;
  812. end;
  813. function TCpuAsmOptimizer.OptPass1CALL(var p : tai) : boolean;
  814. var
  815. hp1: tai;
  816. begin
  817. Result:=false;
  818. if (cs_opt_level4 in current_settings.optimizerswitches) and
  819. GetNextInstruction(p,hp1) and
  820. MatchInstruction(hp1,A_RET) then
  821. begin
  822. DebugMsg('Peephole CallReg2Jmp performed', p);
  823. taicpu(p).opcode:=A_JMP;
  824. asml.Remove(hp1);
  825. hp1.Free;
  826. result:=true;
  827. end;
  828. end;
  829. function TCpuAsmOptimizer.OptPass1RCALL(var p : tai) : boolean;
  830. var
  831. hp1: tai;
  832. begin
  833. Result:=false;
  834. if (cs_opt_level4 in current_settings.optimizerswitches) and
  835. GetNextInstruction(p,hp1) and
  836. MatchInstruction(hp1,A_RET) then
  837. begin
  838. DebugMsg('Peephole RCallReg2RJmp performed', p);
  839. taicpu(p).opcode:=A_RJMP;
  840. asml.Remove(hp1);
  841. hp1.Free;
  842. result:=true;
  843. end;
  844. end;
  845. function TCpuAsmOptimizer.OptPass1MOV(var p : tai) : boolean;
  846. var
  847. hp1, hp2: tai;
  848. i : Integer;
  849. alloc, dealloc: tai_regalloc;
  850. begin
  851. Result:=false;
  852. { change
  853. mov reg0, reg1
  854. dealloc reg0
  855. into
  856. dealloc reg0
  857. }
  858. if MatchOpType(taicpu(p),top_reg,top_reg) then
  859. begin
  860. TransferUsedRegs(TmpUsedRegs);
  861. UpdateUsedRegs(TmpUsedRegs,tai(p.Next));
  862. if not(RegInUsedRegs(taicpu(p).oper[0]^.reg,TmpUsedRegs)) and
  863. { reg. allocation information before calls is not perfect, so don't do this before
  864. calls/icalls }
  865. GetNextInstruction(p,hp1) and
  866. not(MatchInstruction(hp1,[A_CALL,A_RCALL])) then
  867. begin
  868. DebugMsg('Peephole Mov2Nop performed', p);
  869. RemoveCurrentP(p, hp1);
  870. Result := True;
  871. exit;
  872. end;
  873. end;
  874. { turn
  875. mov reg0, reg1
  876. <op> reg2,reg0
  877. dealloc reg0
  878. into
  879. <op> reg2,reg1
  880. }
  881. if MatchOpType(taicpu(p),top_reg,top_reg) and
  882. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  883. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p, hp1)) and
  884. (MatchInstruction(hp1,[A_PUSH,A_MOV,A_CP,A_CPC,A_ADD,A_SUB,A_ADC,A_SBC,A_EOR,A_AND,A_OR,
  885. A_OUT,A_IN]) or
  886. { the reference register of ST/STD cannot be replaced }
  887. (MatchInstruction(hp1,[A_STD,A_ST,A_STS]) and (MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^)))) and
  888. (not RegModifiedByInstruction(taicpu(p).oper[0]^.reg, hp1)) and
  889. {(taicpu(hp1).ops=1) and
  890. (taicpu(hp1).oper[0]^.typ = top_reg) and
  891. (taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg) and }
  892. assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) then
  893. begin
  894. DebugMsg('Peephole MovOp2Op 1 performed', p);
  895. for i := 0 to taicpu(hp1).ops-1 do
  896. if taicpu(hp1).oper[i]^.typ=top_reg then
  897. if taicpu(hp1).oper[i]^.reg=taicpu(p).oper[0]^.reg then
  898. taicpu(hp1).oper[i]^.reg:=taicpu(p).oper[1]^.reg;
  899. alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.Previous));
  900. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next));
  901. if assigned(alloc) and assigned(dealloc) then
  902. begin
  903. asml.Remove(alloc);
  904. alloc.Free;
  905. asml.Remove(dealloc);
  906. dealloc.Free;
  907. end;
  908. { life range of reg1 is increased }
  909. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,usedregs);
  910. { p will be removed, update used register as we continue
  911. with the next instruction after p }
  912. result:=RemoveCurrentP(p);
  913. end
  914. { turn
  915. mov reg1, reg0
  916. <op> reg1,xxxx
  917. dealloc reg1
  918. into
  919. <op> reg1,xxx
  920. }
  921. else if MatchOpType(taicpu(p),top_reg,top_reg) and
  922. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  923. not(RegModifiedBetween(taicpu(p).oper[1]^.reg, p, hp1)) and
  924. MatchInstruction(hp1,[A_CP,A_CPC,A_CPI,A_SBRS,A_SBRC]) and
  925. assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) then
  926. begin
  927. DebugMsg('Peephole MovOp2Op 2 performed', p);
  928. for i := 0 to taicpu(hp1).ops-1 do
  929. if taicpu(hp1).oper[i]^.typ=top_reg then
  930. if taicpu(hp1).oper[i]^.reg=taicpu(p).oper[0]^.reg then
  931. taicpu(hp1).oper[i]^.reg:=taicpu(p).oper[1]^.reg;
  932. alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.Previous));
  933. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next));
  934. if assigned(alloc) and assigned(dealloc) then
  935. begin
  936. asml.Remove(alloc);
  937. alloc.Free;
  938. asml.Remove(dealloc);
  939. dealloc.Free;
  940. end;
  941. { life range of reg1 is increased }
  942. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,usedregs);
  943. { p will be removed, update used register as we continue
  944. with the next instruction after p }
  945. result:=RemoveCurrentP(p);
  946. end
  947. { remove
  948. mov reg0,reg0
  949. }
  950. else if (taicpu(p).ops=2) and
  951. (taicpu(p).oper[0]^.typ = top_reg) and
  952. (taicpu(p).oper[1]^.typ = top_reg) and
  953. (taicpu(p).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  954. begin
  955. DebugMsg('Peephole RedundantMov performed', p);
  956. result:=RemoveCurrentP(p);
  957. end
  958. {
  959. Turn
  960. mov rx,ry
  961. op rx,rz
  962. mov ry, rx
  963. Into
  964. op ry,rz
  965. }
  966. else if (taicpu(p).ops=2) and
  967. MatchOpType(taicpu(p),top_reg,top_reg) and
  968. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  969. (hp1.typ=ait_instruction) and
  970. (taicpu(hp1).ops >= 1) and
  971. (taicpu(hp1).oper[0]^.typ = top_reg) and
  972. GetNextInstructionUsingReg(hp1,hp2,taicpu(hp1).oper[0]^.reg) and
  973. MatchInstruction(hp2,A_MOV) and
  974. MatchOpType(taicpu(hp2),top_reg,top_reg) and
  975. (taicpu(hp2).oper[0]^.reg = taicpu(p).oper[1]^.reg) and
  976. (taicpu(hp2).oper[1]^.reg = taicpu(hp1).oper[0]^.reg) and
  977. (taicpu(hp2).oper[1]^.reg = taicpu(p).oper[0]^.reg) and
  978. (not RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp2)) and
  979. (taicpu(hp1).opcode in [A_ADD,A_ADC,A_SUB,A_SBC,A_AND,A_OR,A_EOR,
  980. A_INC,A_DEC,
  981. A_LSL,A_LSR,A_ASR,A_ROR,A_ROL]) and
  982. assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg, tai(hp2.Next))) then
  983. begin
  984. DebugMsg('Peephole MovOpMov2Op performed', p);
  985. if (taicpu(hp1).ops=2) and
  986. (taicpu(hp1).oper[1]^.typ=top_reg) and
  987. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  988. taicpu(hp1).oper[1]^.reg:=taicpu(p).oper[1]^.reg;
  989. taicpu(hp1).oper[0]^.reg:=taicpu(p).oper[1]^.reg;
  990. alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.Previous));
  991. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp2.Next));
  992. if assigned(alloc) and assigned(dealloc) then
  993. begin
  994. asml.Remove(alloc);
  995. alloc.Free;
  996. asml.Remove(dealloc);
  997. dealloc.Free;
  998. end;
  999. asml.remove(hp2);
  1000. hp2.free;
  1001. result:=RemoveCurrentP(p);
  1002. end
  1003. {
  1004. Turn
  1005. mov rx,ry
  1006. op rx,rw
  1007. mov rw,rx
  1008. Into
  1009. op rw,ry
  1010. }
  1011. else if (taicpu(p).ops=2) and
  1012. MatchOpType(taicpu(p),top_reg,top_reg) and
  1013. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1014. (hp1.typ=ait_instruction) and
  1015. (taicpu(hp1).ops = 2) and
  1016. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  1017. GetNextInstructionUsingReg(hp1,hp2,taicpu(hp1).oper[0]^.reg) and
  1018. (hp2.typ=ait_instruction) and
  1019. (taicpu(hp2).opcode=A_MOV) and
  1020. MatchOpType(taicpu(hp2),top_reg,top_reg) and
  1021. (taicpu(hp2).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) and
  1022. (taicpu(hp2).oper[1]^.reg = taicpu(hp1).oper[0]^.reg) and
  1023. (taicpu(hp2).oper[1]^.reg = taicpu(p).oper[0]^.reg) and
  1024. (not RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) and
  1025. (taicpu(hp1).opcode in [A_ADD,A_ADC,A_AND,A_OR,A_EOR]) and
  1026. assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg, tai(hp2.Next))) then
  1027. begin
  1028. DebugMsg('Peephole MovOpMov2Op2 performed', p);
  1029. taicpu(hp1).oper[0]^.reg:=taicpu(hp2).oper[0]^.reg;
  1030. taicpu(hp1).oper[1]^.reg:=taicpu(p).oper[1]^.reg;
  1031. alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.Previous));
  1032. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp2.Next));
  1033. if assigned(alloc) and assigned(dealloc) then
  1034. begin
  1035. asml.Remove(alloc);
  1036. alloc.Free;
  1037. asml.Remove(dealloc);
  1038. dealloc.Free;
  1039. end;
  1040. result:=RemoveCurrentP(p);
  1041. asml.remove(hp2);
  1042. hp2.free;
  1043. end
  1044. {
  1045. This removes the first mov from
  1046. mov rX,...
  1047. mov rX,...
  1048. }
  1049. else if GetNextInstruction(p,hp1) and MatchInstruction(hp1,A_MOV) and
  1050. { test condition here already instead in the while loop only, else MovMov2Mov 2 might be oversight }
  1051. MatchInstruction(hp1,A_MOV) and
  1052. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) then
  1053. while MatchInstruction(hp1,A_MOV) and
  1054. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) and
  1055. { don't remove the first mov if the second is a mov rX,rX }
  1056. not(MatchOperand(taicpu(hp1).oper[0]^,taicpu(hp1).oper[1]^)) do
  1057. begin
  1058. DebugMsg('Peephole MovMov2Mov 1 performed', p);
  1059. RemoveCurrentP(p,hp1);
  1060. Result := True;
  1061. GetNextInstruction(hp1,hp1);
  1062. if not assigned(hp1) then
  1063. break;
  1064. end
  1065. {
  1066. This removes the second mov from
  1067. mov rX,rY
  1068. ...
  1069. mov rX,rY
  1070. if rX and rY are not modified in-between
  1071. }
  1072. else if GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[1]^.reg) and
  1073. MatchInstruction(hp1,A_MOV) and
  1074. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) and
  1075. MatchOperand(taicpu(p).oper[1]^, taicpu(hp1).oper[1]^) and
  1076. not(RegModifiedBetween(taicpu(p).oper[0]^.reg,p,hp1)) then
  1077. begin
  1078. DebugMsg('Peephole MovMov2Mov 2 performed', p);
  1079. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);
  1080. RemoveInstruction(hp1);
  1081. Result := True;
  1082. end;
  1083. end;
  1084. function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
  1085. var
  1086. hp1,hp2: tai;
  1087. begin
  1088. result := false;
  1089. case p.typ of
  1090. ait_instruction:
  1091. begin
  1092. {
  1093. change
  1094. <op> reg,x,y
  1095. cp reg,r1
  1096. into
  1097. <op>s reg,x,y
  1098. }
  1099. { this optimization can applied only to the currently enabled operations because
  1100. the other operations do not update all flags and FPC does not track flag usage }
  1101. if MatchInstruction(p, [A_ADC,A_ADD,A_AND,A_ANDI,A_ASR,A_COM,A_DEC,A_EOR,
  1102. A_INC,A_LSL,A_LSR,
  1103. A_OR,A_ORI,A_ROL,A_ROR,A_SBC,A_SBCI,A_SUB,A_SUBI]) and
  1104. GetNextInstruction(p, hp1) and
  1105. ((MatchInstruction(hp1, A_CP) and
  1106. (((taicpu(p).oper[0]^.reg = taicpu(hp1).oper[0]^.reg) and
  1107. (taicpu(hp1).oper[1]^.reg = GetDefaultZeroReg)) or
  1108. ((taicpu(p).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) and
  1109. (taicpu(hp1).oper[0]^.reg = GetDefaultZeroReg) and
  1110. (taicpu(p).opcode in [A_ADC,A_ADD,A_AND,A_ANDI,A_ASR,A_COM,A_EOR,
  1111. A_LSL,A_LSR,
  1112. A_OR,A_ORI,A_ROL,A_ROR,A_SUB,A_SBI])))) or
  1113. (MatchInstruction(hp1, A_CPI) and
  1114. (taicpu(p).opcode = A_ANDI) and
  1115. (taicpu(p).oper[1]^.typ=top_const) and
  1116. (taicpu(hp1).oper[1]^.typ=top_const) and
  1117. (taicpu(p).oper[1]^.val=taicpu(hp1).oper[1]^.val))) and
  1118. GetNextInstruction(hp1, hp2) and
  1119. { be careful here, following instructions could use other flags
  1120. however after a jump fpc never depends on the value of flags }
  1121. { All above instructions set Z and N according to the following
  1122. Z := result = 0;
  1123. N := result[31];
  1124. EQ = Z=1; NE = Z=0;
  1125. MI = N=1; PL = N=0; }
  1126. MatchInstruction(hp2, A_BRxx) and
  1127. ((taicpu(hp2).condition in [C_EQ,C_NE,C_MI,C_PL]) or
  1128. { sub/sbc set all flags }
  1129. (taicpu(p).opcode in [A_SUB,A_SBI])){ and
  1130. no flag allocation tracking implemented yet on avr
  1131. assigned(FindRegDealloc(NR_DEFAULTFLAGS,tai(hp2.Next)))} then
  1132. begin
  1133. { move flag allocation if possible }
  1134. { no flag allocation tracking implemented yet on avr
  1135. GetLastInstruction(hp1, hp2);
  1136. hp2:=FindRegAlloc(NR_DEFAULTFLAGS,tai(hp2.Next));
  1137. if assigned(hp2) then
  1138. begin
  1139. asml.Remove(hp2);
  1140. asml.insertbefore(hp2, p);
  1141. end;
  1142. }
  1143. // If we compare to the same value we are masking then invert the comparison
  1144. if (taicpu(hp1).opcode=A_CPI) or
  1145. { sub/sbc with reverted? }
  1146. ((taicpu(hp1).oper[0]^.reg = GetDefaultZeroReg) and (taicpu(p).opcode in [A_SUB,A_SBI])) then
  1147. taicpu(hp2).condition:=inverse_cond(taicpu(hp2).condition);
  1148. asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
  1149. asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,hp2), hp2);
  1150. IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
  1151. DebugMsg('Peephole OpCp2Op performed', p);
  1152. asml.remove(hp1);
  1153. hp1.free;
  1154. Result:=true;
  1155. end
  1156. else
  1157. case taicpu(p).opcode of
  1158. A_LDI:
  1159. Result:=OptPass1LDI(p);
  1160. A_STS:
  1161. Result:=OptPass1STS(p);
  1162. A_LDS:
  1163. Result:=OptPass1LDS(p);
  1164. A_IN:
  1165. Result:=OptPass1IN(p);
  1166. A_SBRS,
  1167. A_SBRC:
  1168. Result:=OptPass1SBR(p);
  1169. A_ANDI:
  1170. Result:=OptPass1ANDI(p);
  1171. A_ADD:
  1172. Result:=OptPass1ADD(p);
  1173. A_SUB:
  1174. Result:=OptPass1SUB(p);
  1175. A_CLR:
  1176. Result:=OptPass1CLR(p);
  1177. A_PUSH:
  1178. Result:=OptPass1PUSH(p);
  1179. A_CALL:
  1180. Result:=OptPass1CALL(p);
  1181. A_RCALL:
  1182. Result:=OptPass1RCALL(p);
  1183. A_MOV:
  1184. Result:=OptPass1MOV(p);
  1185. A_SBIC,
  1186. A_SBIS:
  1187. Result:=OptPass1SBI(p);
  1188. end;
  1189. end;
  1190. end;
  1191. end;
  1192. function TCpuAsmOptimizer.PeepHoleOptPass2Cpu(var p: tai): boolean;
  1193. begin
  1194. result := false;
  1195. case p.typ of
  1196. ait_instruction:
  1197. begin
  1198. case taicpu(p).opcode of
  1199. A_MOV:
  1200. Result:=OptPass2MOV(p);
  1201. end;
  1202. end;
  1203. end;
  1204. end;
  1205. begin
  1206. casmoptimizer:=TCpuAsmOptimizer;
  1207. End.