aoptcpu.pas 44 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389
  1. {
  2. Copyright (c) 1998-2002 by Jonas Maebe, member of the Free Pascal
  3. Development Team
  4. This unit implements the ARM optimizer object
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. Unit aoptcpu;
  19. {$i fpcdefs.inc}
  20. { $define DEBUG_AOPTCPU}
  21. Interface
  22. uses cpubase,cgbase,aasmtai,aopt,AoptObj,aoptcpub;
  23. Type
  24. TCpuAsmOptimizer = class(TAsmOptimizer)
  25. { outputs a debug message into the assembler file }
  26. procedure DebugMsg(const s: string; p: tai);
  27. Function GetNextInstructionUsingReg(Current: tai; Var Next: tai;reg : TRegister): Boolean;
  28. function RegInInstruction(Reg: TRegister; p1: tai): Boolean; override;
  29. function RegLoadedWithNewValue(reg : tregister; hp : tai) : boolean; override;
  30. function InstructionLoadsFromReg(const reg : TRegister; const hp : tai) : boolean; override;
  31. function InvertSkipInstruction(var p: tai): boolean;
  32. { uses the same constructor as TAopObj }
  33. function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
  34. procedure PeepHoleOptPass2;override;
  35. private
  36. function OptPass1ADD(var p : tai) : boolean;
  37. function OptPass1ANDI(var p : tai) : boolean;
  38. function OptPass1CALL(var p : tai) : boolean;
  39. function OptPass1CLR(var p : tai) : boolean;
  40. function OptPass1IN(var p : tai) : boolean;
  41. function OptPass1LDI(var p : tai) : boolean;
  42. function OptPass1LDS(var p : tai) : boolean;
  43. function OptPass1MOV(var p : tai) : boolean;
  44. function OptPass1PUSH(var p : tai) : boolean;
  45. function OptPass1RCALL(var p : tai) : boolean;
  46. function OptPass1SBI(var p : tai) : boolean;
  47. function OptPass1SBR(var p : tai) : boolean;
  48. function OptPass1STS(var p : tai) : boolean;
  49. function OptPass1SUB(var p : tai) : boolean;
  50. End;
  51. Implementation
  52. uses
  53. cutils,
  54. verbose,
  55. cpuinfo,
  56. aasmbase,aasmcpu,aasmdata,
  57. aoptutils,
  58. globals,globtype,
  59. cgutils;
  60. type
  61. TAsmOpSet = set of TAsmOp;
  62. function CanBeCond(p : tai) : boolean;
  63. begin
  64. result:=(p.typ=ait_instruction) and (taicpu(p).condition=C_None);
  65. end;
  66. function RefsEqual(const r1, r2: treference): boolean;
  67. begin
  68. refsequal :=
  69. (r1.offset = r2.offset) and
  70. (r1.base = r2.base) and
  71. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  72. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  73. (r1.relsymbol = r2.relsymbol) and
  74. (r1.addressmode = r2.addressmode) and
  75. (r1.volatility=[]) and
  76. (r2.volatility=[]);
  77. end;
  78. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
  79. begin
  80. result:=oper1.typ=oper2.typ;
  81. if result then
  82. case oper1.typ of
  83. top_const:
  84. Result:=oper1.val = oper2.val;
  85. top_reg:
  86. Result:=oper1.reg = oper2.reg;
  87. top_ref:
  88. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  89. else Result:=false;
  90. end
  91. end;
  92. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  93. begin
  94. result := (oper.typ = top_reg) and (oper.reg = reg);
  95. end;
  96. function MatchInstruction(const instr: tai; const op: TAsmOp): boolean;
  97. begin
  98. result :=
  99. (instr.typ = ait_instruction) and
  100. (taicpu(instr).opcode = op);
  101. end;
  102. function MatchInstruction(const instr: tai; const ops: TAsmOpSet): boolean;
  103. begin
  104. result :=
  105. (instr.typ = ait_instruction) and
  106. (taicpu(instr).opcode in ops);
  107. end;
  108. function MatchInstruction(const instr: tai; const ops: TAsmOpSet;opcount : byte): boolean;
  109. begin
  110. result :=
  111. (instr.typ = ait_instruction) and
  112. (taicpu(instr).opcode in ops) and
  113. (taicpu(instr).ops=opcount);
  114. end;
  115. {$ifdef DEBUG_AOPTCPU}
  116. procedure TCpuAsmOptimizer.DebugMsg(const s: string;p : tai);
  117. begin
  118. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  119. end;
  120. {$else DEBUG_AOPTCPU}
  121. procedure TCpuAsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  122. begin
  123. end;
  124. {$endif DEBUG_AOPTCPU}
  125. function TCpuAsmOptimizer.RegInInstruction(Reg: TRegister; p1: tai): Boolean;
  126. begin
  127. If (p1.typ = ait_instruction) and (taicpu(p1).opcode in [A_MUL,A_MULS,A_FMUL,A_FMULS,A_FMULSU]) and
  128. ((getsupreg(reg)=RS_R0) or (getsupreg(reg)=RS_R1)) then
  129. Result:=true
  130. else if (p1.typ = ait_instruction) and (taicpu(p1).opcode=A_MOVW) and
  131. ((TRegister(ord(taicpu(p1).oper[0]^.reg)+1)=reg) or (TRegister(ord(taicpu(p1).oper[1]^.reg)+1)=reg) or
  132. (taicpu(p1).oper[0]^.reg=reg) or (taicpu(p1).oper[1]^.reg=reg)) then
  133. Result:=true
  134. else
  135. Result:=inherited RegInInstruction(Reg, p1);
  136. end;
  137. function TCpuAsmOptimizer.GetNextInstructionUsingReg(Current: tai;
  138. var Next: tai; reg: TRegister): Boolean;
  139. begin
  140. Next:=Current;
  141. repeat
  142. Result:=GetNextInstruction(Next,Next);
  143. until not(cs_opt_level3 in current_settings.optimizerswitches) or not(Result) or (Next.typ<>ait_instruction) or (RegInInstruction(reg,Next)) or
  144. (is_calljmp(taicpu(Next).opcode));
  145. end;
  146. function TCpuAsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  147. var
  148. p: taicpu;
  149. begin
  150. if not assigned(hp) or
  151. (hp.typ <> ait_instruction) then
  152. begin
  153. Result := false;
  154. exit;
  155. end;
  156. p := taicpu(hp);
  157. Result := ((p.opcode in [A_LDI,A_MOV,A_LDS]) and (reg=p.oper[0]^.reg) and ((p.oper[1]^.typ<>top_reg) or (reg<>p.oper[1]^.reg))) or
  158. ((p.opcode in [A_LD,A_LDD,A_LPM]) and (reg=p.oper[0]^.reg) and not(RegInRef(reg,p.oper[1]^.ref^))) or
  159. ((p.opcode in [A_MOVW]) and ((reg=p.oper[0]^.reg) or (TRegister(ord(reg)+1)=p.oper[0]^.reg)) and not(reg=p.oper[1]^.reg) and not(TRegister(ord(reg)+1)=p.oper[1]^.reg)) or
  160. ((p.opcode in [A_POP]) and (reg=p.oper[0]^.reg));
  161. end;
  162. function TCpuAsmOptimizer.InstructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean;
  163. var
  164. p: taicpu;
  165. i: longint;
  166. begin
  167. Result := false;
  168. if not (assigned(hp) and (hp.typ = ait_instruction)) then
  169. exit;
  170. p:=taicpu(hp);
  171. i:=0;
  172. { we do not care about the stack pointer }
  173. if p.opcode in [A_POP] then
  174. exit;
  175. { first operand only written?
  176. then skip it }
  177. if p.opcode in [A_MOV,A_LD,A_LDD,A_LDS,A_LPM,A_LDI,A_MOVW] then
  178. i:=1;
  179. while i<p.ops do
  180. begin
  181. case p.oper[i]^.typ of
  182. top_reg:
  183. Result := (p.oper[i]^.reg = reg) or
  184. { MOVW }
  185. ((i=1) and (p.opcode=A_MOVW) and (getsupreg(p.oper[0]^.reg)+1=getsupreg(reg)));
  186. top_ref:
  187. Result :=
  188. (p.oper[i]^.ref^.base = reg) or
  189. (p.oper[i]^.ref^.index = reg);
  190. end;
  191. { Bailout if we found something }
  192. if Result then
  193. exit;
  194. Inc(i);
  195. end;
  196. end;
  197. {
  198. Turns
  199. sbis ?
  200. jmp .Lx
  201. op
  202. .Lx:
  203. Into
  204. sbic ?
  205. op
  206. For all types of skip instructions
  207. }
  208. function TCpuAsmOptimizer.InvertSkipInstruction(var p: tai): boolean;
  209. function GetNextInstructionWithoutLabel(p: tai; var next: tai): boolean;
  210. begin
  211. repeat
  212. result:=GetNextInstruction(p,next);
  213. p:=next;
  214. until
  215. (not result) or
  216. (not assigned(next)) or
  217. (next.typ in [ait_instruction]);
  218. result:=assigned(next) and (next.typ in [ait_instruction]);
  219. end;
  220. var
  221. hp1, hp2, hp3: tai;
  222. begin
  223. result:=false;
  224. if GetNextInstruction(taicpu(p),hp1) and
  225. (hp1.typ=ait_instruction) and
  226. (taicpu(hp1).opcode in [A_RJMP,A_JMP]) and
  227. (taicpu(hp1).ops=1) and
  228. (taicpu(hp1).oper[0]^.typ=top_ref) and
  229. (taicpu(hp1).oper[0]^.ref^.offset=0) and
  230. (taicpu(hp1).oper[0]^.ref^.symbol is TAsmLabel) and
  231. GetNextInstructionWithoutLabel(hp1,hp2) and
  232. (hp2.typ=ait_instruction) and
  233. (not taicpu(hp2).is_jmp) and
  234. GetNextInstruction(hp2,hp3) and
  235. FindLabel(TAsmLabel(taicpu(hp1).oper[0]^.ref^.symbol),hp3) then
  236. begin
  237. DebugMsg('SkipJump2InvertedSkip', p);
  238. case taicpu(p).opcode of
  239. A_SBIS: taicpu(p).opcode:=A_SBIC;
  240. A_SBIC: taicpu(p).opcode:=A_SBIS;
  241. A_SBRS: taicpu(p).opcode:=A_SBRC;
  242. A_SBRC: taicpu(p).opcode:=A_SBRS;
  243. end;
  244. TAsmLabel(taicpu(hp1).oper[0]^.ref^.symbol).decrefs;
  245. asml.remove(hp1);
  246. hp1.free;
  247. end;
  248. end;
  249. function TCpuAsmOptimizer.OptPass1LDI(var p : tai) : boolean;
  250. var
  251. hp1 : tai;
  252. alloc ,dealloc: tai_regalloc;
  253. begin
  254. Result:=false;
  255. { turn
  256. ldi reg0, imm
  257. <op> reg1, reg0
  258. dealloc reg0
  259. into
  260. <op>i reg1, imm
  261. }
  262. if MatchOpType(taicpu(p),top_reg,top_const) and
  263. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  264. MatchInstruction(hp1,[A_CP,A_MOV,A_AND,A_SUB],2) and
  265. (not RegModifiedBetween(taicpu(p).oper[0]^.reg, p, hp1)) and
  266. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  267. (getsupreg(taicpu(hp1).oper[0]^.reg) in [16..31]) and
  268. (taicpu(hp1).oper[1]^.reg=taicpu(p).oper[0]^.reg) and
  269. not(MatchOperand(taicpu(hp1).oper[0]^,taicpu(hp1).oper[1]^)) then
  270. begin
  271. TransferUsedRegs(TmpUsedRegs);
  272. UpdateUsedRegs(TmpUsedRegs,tai(p.next));
  273. UpdateUsedRegs(TmpUsedRegs,tai(hp1.next));
  274. if not(RegUsedAfterInstruction(taicpu(hp1).oper[1]^.reg, hp1, TmpUsedRegs)) then
  275. begin
  276. case taicpu(hp1).opcode of
  277. A_CP:
  278. taicpu(hp1).opcode:=A_CPI;
  279. A_MOV:
  280. taicpu(hp1).opcode:=A_LDI;
  281. A_AND:
  282. taicpu(hp1).opcode:=A_ANDI;
  283. A_SUB:
  284. taicpu(hp1).opcode:=A_SUBI;
  285. else
  286. internalerror(2016111901);
  287. end;
  288. taicpu(hp1).loadconst(1, taicpu(p).oper[1]^.val);
  289. alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.Previous));
  290. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next));
  291. if assigned(alloc) and assigned(dealloc) then
  292. begin
  293. asml.Remove(alloc);
  294. alloc.Free;
  295. asml.Remove(dealloc);
  296. dealloc.Free;
  297. end;
  298. DebugMsg('Peephole LdiOp2Opi performed', p);
  299. result:=RemoveCurrentP(p);
  300. end;
  301. end;
  302. end;
  303. function TCpuAsmOptimizer.OptPass1STS(var p : tai) : boolean;
  304. begin
  305. Result:=false;
  306. if (taicpu(p).oper[0]^.ref^.symbol=nil) and
  307. (taicpu(p).oper[0]^.ref^.relsymbol=nil) and
  308. (getsupreg(taicpu(p).oper[0]^.ref^.base)=RS_NO) and
  309. (getsupreg(taicpu(p).oper[0]^.ref^.index)=RS_NO) and
  310. (taicpu(p).oper[0]^.ref^.addressmode=AM_UNCHANGED) and
  311. (((CPUAVR_NOMEMMAPPED_REGS in cpu_capabilities[current_settings.cputype]) and
  312. (taicpu(p).oper[0]^.ref^.offset>=0) and
  313. (taicpu(p).oper[0]^.ref^.offset<=63)) or
  314. (not(CPUAVR_NOMEMMAPPED_REGS in cpu_capabilities[current_settings.cputype]) and
  315. (taicpu(p).oper[0]^.ref^.offset>=32) and
  316. (taicpu(p).oper[0]^.ref^.offset<=95))) then
  317. begin
  318. DebugMsg('Peephole Sts2Out performed', p);
  319. taicpu(p).opcode:=A_OUT;
  320. if CPUAVR_NOMEMMAPPED_REGS in cpu_capabilities[current_settings.cputype] then
  321. taicpu(p).loadconst(0,taicpu(p).oper[0]^.ref^.offset)
  322. else
  323. taicpu(p).loadconst(0,taicpu(p).oper[0]^.ref^.offset-32);
  324. result:=true;
  325. end;
  326. end;
  327. function TCpuAsmOptimizer.OptPass1LDS(var p : tai) : boolean;
  328. begin
  329. Result:=false;
  330. if (taicpu(p).oper[1]^.ref^.symbol=nil) and
  331. (taicpu(p).oper[1]^.ref^.relsymbol=nil) and
  332. (getsupreg(taicpu(p).oper[1]^.ref^.base)=RS_NO) and
  333. (getsupreg(taicpu(p).oper[1]^.ref^.index)=RS_NO) and
  334. (taicpu(p).oper[1]^.ref^.addressmode=AM_UNCHANGED) and
  335. (((CPUAVR_NOMEMMAPPED_REGS in cpu_capabilities[current_settings.cputype]) and
  336. (taicpu(p).oper[1]^.ref^.offset>=0) and
  337. (taicpu(p).oper[1]^.ref^.offset<=63)) or
  338. (not(CPUAVR_NOMEMMAPPED_REGS in cpu_capabilities[current_settings.cputype]) and
  339. (taicpu(p).oper[1]^.ref^.offset>=32) and
  340. (taicpu(p).oper[1]^.ref^.offset<=95))) then
  341. begin
  342. DebugMsg('Peephole Lds2In performed', p);
  343. taicpu(p).opcode:=A_IN;
  344. if CPUAVR_NOMEMMAPPED_REGS in cpu_capabilities[current_settings.cputype] then
  345. taicpu(p).loadconst(1,taicpu(p).oper[1]^.ref^.offset)
  346. else
  347. taicpu(p).loadconst(1,taicpu(p).oper[1]^.ref^.offset-32);
  348. result:=true;
  349. end;
  350. end;
  351. function TCpuAsmOptimizer.OptPass1IN(var p : tai) : boolean;
  352. var
  353. hp1, hp2: tai;
  354. l : TAsmLabel;
  355. begin
  356. Result:=false;
  357. if GetNextInstruction(p,hp1) then
  358. begin
  359. {
  360. in rX,Y
  361. ori rX,n
  362. out Y,rX
  363. into
  364. sbi rX,lg(n)
  365. }
  366. if (taicpu(p).oper[1]^.val<=31) and
  367. MatchInstruction(hp1,A_ORI) and
  368. (taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg) and
  369. (PopCnt(byte(taicpu(hp1).oper[1]^.val))=1) and
  370. GetNextInstruction(hp1,hp2) and
  371. MatchInstruction(hp2,A_OUT) and
  372. MatchOperand(taicpu(hp2).oper[1]^,taicpu(p).oper[0]^) and
  373. MatchOperand(taicpu(hp2).oper[0]^,taicpu(p).oper[1]^) then
  374. begin
  375. DebugMsg('Peephole InOriOut2Sbi performed', p);
  376. taicpu(p).opcode:=A_SBI;
  377. taicpu(p).loadconst(0,taicpu(p).oper[1]^.val);
  378. taicpu(p).loadconst(1,BsrByte(taicpu(hp1).oper[1]^.val));
  379. asml.Remove(hp1);
  380. hp1.Free;
  381. asml.Remove(hp2);
  382. hp2.Free;
  383. result:=true;
  384. end
  385. {
  386. in rX,Y
  387. andi rX,not(n)
  388. out Y,rX
  389. into
  390. cbi rX,lg(n)
  391. }
  392. else if (taicpu(p).oper[1]^.val<=31) and
  393. MatchInstruction(hp1,A_ANDI) and
  394. (taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg) and
  395. (PopCnt(byte(not(taicpu(hp1).oper[1]^.val)))=1) and
  396. GetNextInstruction(hp1,hp2) and
  397. MatchInstruction(hp2,A_OUT) and
  398. MatchOperand(taicpu(hp2).oper[1]^,taicpu(p).oper[0]^) and
  399. MatchOperand(taicpu(hp2).oper[0]^,taicpu(p).oper[1]^) then
  400. begin
  401. DebugMsg('Peephole InAndiOut2Cbi performed', p);
  402. taicpu(p).opcode:=A_CBI;
  403. taicpu(p).loadconst(0,taicpu(p).oper[1]^.val);
  404. taicpu(p).loadconst(1,BsrByte(not(taicpu(hp1).oper[1]^.val)));
  405. asml.Remove(hp1);
  406. hp1.Free;
  407. asml.Remove(hp2);
  408. hp2.Free;
  409. result:=true;
  410. end
  411. {
  412. in rX,Y
  413. andi rX,n
  414. breq/brne L1
  415. into
  416. sbis/sbic Y,lg(n)
  417. jmp L1
  418. .Ltemp:
  419. }
  420. else if (taicpu(p).oper[1]^.val<=31) and
  421. MatchInstruction(hp1,A_ANDI) and
  422. (taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg) and
  423. (PopCnt(byte(taicpu(hp1).oper[1]^.val))=1) and
  424. GetNextInstruction(hp1,hp2) and
  425. MatchInstruction(hp2,A_BRxx) and
  426. (taicpu(hp2).condition in [C_EQ,C_NE]) then
  427. begin
  428. if taicpu(hp2).condition=C_EQ then
  429. taicpu(p).opcode:=A_SBIS
  430. else
  431. taicpu(p).opcode:=A_SBIC;
  432. DebugMsg('Peephole InAndiBrx2SbixJmp performed', p);
  433. taicpu(p).loadconst(0,taicpu(p).oper[1]^.val);
  434. taicpu(p).loadconst(1,BsrByte(taicpu(hp1).oper[1]^.val));
  435. asml.Remove(hp1);
  436. hp1.Free;
  437. taicpu(hp2).condition:=C_None;
  438. if CPUAVR_HAS_JMP_CALL in cpu_capabilities[current_settings.cputype] then
  439. taicpu(hp2).opcode:=A_JMP
  440. else
  441. taicpu(hp2).opcode:=A_RJMP;
  442. current_asmdata.getjumplabel(l);
  443. l.increfs;
  444. asml.InsertAfter(tai_label.create(l), hp2);
  445. result:=true;
  446. end;
  447. end;
  448. end;
  449. function TCpuAsmOptimizer.OptPass1SBR(var p : tai) : boolean;
  450. var
  451. hp1 : tai;
  452. begin
  453. Result:=false;
  454. {
  455. Turn
  456. in rx, y
  457. sbr* rx, z
  458. Into
  459. sbi* y, z
  460. }
  461. if (taicpu(p).ops=2) and
  462. (taicpu(p).oper[0]^.typ=top_reg) and
  463. assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(p.next))) and
  464. GetLastInstruction(p,hp1) and
  465. (hp1.typ=ait_instruction) and
  466. (taicpu(hp1).opcode=A_IN) and
  467. (taicpu(hp1).ops=2) and
  468. (taicpu(hp1).oper[1]^.typ=top_const) and
  469. (taicpu(hp1).oper[1]^.val in [0..31]) and
  470. MatchOperand(taicpu(hp1).oper[0]^,taicpu(p).oper[0]^.reg) and
  471. (not RegModifiedBetween(taicpu(p).oper[0]^.reg, hp1, p)) then
  472. begin
  473. if taicpu(p).opcode=A_SBRS then
  474. taicpu(p).opcode:=A_SBIS
  475. else
  476. taicpu(p).opcode:=A_SBIC;
  477. taicpu(p).loadconst(0, taicpu(hp1).oper[1]^.val);
  478. DebugMsg('Peephole InSbrx2Sbix performed', p);
  479. asml.Remove(hp1);
  480. hp1.free;
  481. result:=true;
  482. end;
  483. if InvertSkipInstruction(p) then
  484. result:=true;
  485. end;
  486. function TCpuAsmOptimizer.OptPass1SBI(var p : tai) : boolean;
  487. var
  488. hp1, hp2, hp3, hp4, hp5: tai;
  489. begin
  490. Result:=false;
  491. {
  492. Turn
  493. sbic/sbis X, y
  494. jmp .L1
  495. op
  496. .L1:
  497. into
  498. sbis/sbic X,y
  499. op
  500. .L1:
  501. }
  502. if InvertSkipInstruction(p) then
  503. result:=true
  504. {
  505. Turn
  506. sbiX X, y
  507. jmp .L1
  508. jmp .L2
  509. .L1:
  510. op
  511. .L2:
  512. into
  513. sbiX X,y
  514. .L1:
  515. op
  516. .L2:
  517. }
  518. else if GetNextInstruction(p, hp1) and
  519. (hp1.typ=ait_instruction) and
  520. (taicpu(hp1).opcode in [A_JMP,A_RJMP]) and
  521. (taicpu(hp1).ops>0) and
  522. (taicpu(hp1).oper[0]^.typ = top_ref) and
  523. (taicpu(hp1).oper[0]^.ref^.symbol is TAsmLabel) and
  524. GetNextInstruction(hp1, hp2) and
  525. (hp2.typ=ait_instruction) and
  526. (taicpu(hp2).opcode in [A_JMP,A_RJMP]) and
  527. (taicpu(hp2).ops>0) and
  528. (taicpu(hp2).oper[0]^.typ = top_ref) and
  529. (taicpu(hp2).oper[0]^.ref^.symbol is TAsmLabel) and
  530. GetNextInstruction(hp2, hp3) and
  531. (hp3.typ=ait_label) and
  532. (taicpu(hp1).oper[0]^.ref^.symbol=tai_label(hp3).labsym) and
  533. GetNextInstruction(hp3, hp4) and
  534. (hp4.typ=ait_instruction) and
  535. GetNextInstruction(hp4, hp5) and
  536. (hp3.typ=ait_label) and
  537. (taicpu(hp2).oper[0]^.ref^.symbol=tai_label(hp5).labsym) then
  538. begin
  539. DebugMsg('Peephole SbiJmpJmp2Sbi performed',p);
  540. tai_label(hp3).labsym.decrefs;
  541. tai_label(hp5).labsym.decrefs;
  542. AsmL.remove(hp1);
  543. taicpu(hp1).Free;
  544. AsmL.remove(hp2);
  545. taicpu(hp2).Free;
  546. result:=true;
  547. end;
  548. end;
  549. function TCpuAsmOptimizer.OptPass1ANDI(var p : tai) : boolean;
  550. var
  551. hp1, hp2, hp3: tai;
  552. i : longint;
  553. begin
  554. Result:=false;
  555. {
  556. Turn
  557. andi rx, #pow2
  558. brne l
  559. <op>
  560. l:
  561. Into
  562. sbrs rx, #(1 shl imm)
  563. <op>
  564. l:
  565. }
  566. if (taicpu(p).ops=2) and
  567. (taicpu(p).oper[1]^.typ=top_const) and
  568. ispowerof2(taicpu(p).oper[1]^.val,i) and
  569. assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(p.next))) and
  570. GetNextInstruction(p,hp1) and
  571. (hp1.typ=ait_instruction) and
  572. (taicpu(hp1).opcode=A_BRxx) and
  573. (taicpu(hp1).condition in [C_EQ,C_NE]) and
  574. (taicpu(hp1).ops>0) and
  575. (taicpu(hp1).oper[0]^.typ = top_ref) and
  576. (taicpu(hp1).oper[0]^.ref^.symbol is TAsmLabel) and
  577. GetNextInstruction(hp1,hp2) and
  578. (hp2.typ=ait_instruction) and
  579. GetNextInstruction(hp2,hp3) and
  580. (hp3.typ=ait_label) and
  581. (taicpu(hp1).oper[0]^.ref^.symbol=tai_label(hp3).labsym) then
  582. begin
  583. DebugMsg('Peephole AndiBr2Sbr performed', p);
  584. taicpu(p).oper[1]^.val:=i;
  585. if taicpu(hp1).condition=C_NE then
  586. taicpu(p).opcode:=A_SBRS
  587. else
  588. taicpu(p).opcode:=A_SBRC;
  589. asml.Remove(hp1);
  590. hp1.free;
  591. result:=true;
  592. end
  593. {
  594. Remove
  595. andi rx, #y
  596. dealloc rx
  597. }
  598. else if (taicpu(p).ops=2) and
  599. (taicpu(p).oper[0]^.typ=top_reg) and
  600. assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(p.next))) and
  601. (assigned(FindRegDeAlloc(NR_DEFAULTFLAGS,tai(p.Next))) or
  602. (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs))) then
  603. begin
  604. DebugMsg('Redundant Andi removed', p);
  605. result:=RemoveCurrentP(p);
  606. end;
  607. end;
  608. function TCpuAsmOptimizer.OptPass1ADD(var p : tai) : boolean;
  609. var
  610. hp1: tai;
  611. begin
  612. Result:=false;
  613. if (taicpu(p).oper[1]^.reg=GetDefaultZeroReg) and
  614. GetNextInstruction(p, hp1) and
  615. MatchInstruction(hp1,A_ADC) then
  616. begin
  617. DebugMsg('Peephole AddAdc2Add performed', p);
  618. RemoveCurrentP(p, hp1);
  619. Result := True;
  620. end;
  621. end;
  622. function TCpuAsmOptimizer.OptPass1SUB(var p : tai) : boolean;
  623. var
  624. hp1: tai;
  625. begin
  626. Result:=false;
  627. if (taicpu(p).oper[1]^.reg=GetDefaultZeroReg) and
  628. GetNextInstruction(p, hp1) and
  629. MatchInstruction(hp1,A_SBC) then
  630. begin
  631. DebugMsg('Peephole SubSbc2Sub performed', p);
  632. taicpu(hp1).opcode:=A_SUB;
  633. RemoveCurrentP(p, hp1);
  634. Result := True;
  635. end;
  636. end;
  637. function TCpuAsmOptimizer.OptPass1CLR(var p : tai) : boolean;
  638. var
  639. hp1: tai;
  640. alloc, dealloc: tai_regalloc;
  641. begin
  642. Result:=false;
  643. { turn the common
  644. clr rX
  645. mov/ld rX, rY
  646. into
  647. mov/ld rX, rY
  648. }
  649. if (taicpu(p).ops=1) and
  650. (taicpu(p).oper[0]^.typ=top_reg) and
  651. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  652. (not RegModifiedBetween(taicpu(p).oper[0]^.reg, p, hp1)) and
  653. (hp1.typ=ait_instruction) and
  654. (taicpu(hp1).opcode in [A_MOV,A_LD]) and
  655. (taicpu(hp1).ops>0) and
  656. (taicpu(hp1).oper[0]^.typ=top_reg) and
  657. (taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg) then
  658. begin
  659. DebugMsg('Peephole ClrMov2Mov performed', p);
  660. result:=RemoveCurrentP(p);
  661. end
  662. { turn
  663. clr rX
  664. ...
  665. adc rY, rX
  666. into
  667. ...
  668. adc rY, r1
  669. }
  670. else if (taicpu(p).ops=1) and
  671. (taicpu(p).oper[0]^.typ=top_reg) and
  672. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  673. (not RegModifiedBetween(taicpu(p).oper[0]^.reg, p, hp1)) and
  674. (hp1.typ=ait_instruction) and
  675. (taicpu(hp1).opcode in [A_ADC,A_SBC]) and
  676. (taicpu(hp1).ops=2) and
  677. (taicpu(hp1).oper[1]^.typ=top_reg) and
  678. (taicpu(hp1).oper[1]^.reg=taicpu(p).oper[0]^.reg) and
  679. (taicpu(hp1).oper[0]^.reg<>taicpu(p).oper[0]^.reg) and
  680. assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) then
  681. begin
  682. DebugMsg('Peephole ClrAdc2Adc performed', p);
  683. taicpu(hp1).oper[1]^.reg:=GetDefaultZeroReg;
  684. alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.Previous));
  685. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next));
  686. if assigned(alloc) and assigned(dealloc) then
  687. begin
  688. asml.Remove(alloc);
  689. alloc.Free;
  690. asml.Remove(dealloc);
  691. dealloc.Free;
  692. end;
  693. result:=RemoveCurrentP(p);
  694. end;
  695. end;
  696. function TCpuAsmOptimizer.OptPass1PUSH(var p : tai) : boolean;
  697. var
  698. hp1, hp2, hp3: tai;
  699. begin
  700. Result:=false;
  701. { turn
  702. push reg0
  703. push reg1
  704. pop reg3
  705. pop reg2
  706. into
  707. movw reg2,reg0
  708. or
  709. mov reg3,reg1
  710. mov reg2,reg0
  711. }
  712. if GetNextInstruction(p,hp1) and
  713. MatchInstruction(hp1,A_PUSH) and
  714. GetNextInstruction(hp1,hp2) and
  715. MatchInstruction(hp2,A_POP) and
  716. GetNextInstruction(hp2,hp3) and
  717. MatchInstruction(hp3,A_POP) then
  718. begin
  719. if (CPUAVR_HAS_MOVW in cpu_capabilities[current_settings.cputype]) and
  720. (getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(p).oper[0]^.reg)+1) and
  721. ((getsupreg(taicpu(p).oper[0]^.reg) mod 2)=0) and
  722. (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp3).oper[0]^.reg)+1) and
  723. ((getsupreg(taicpu(hp3).oper[0]^.reg) mod 2)=0) then
  724. begin
  725. DebugMsg('Peephole PushPushPopPop2Movw performed', p);
  726. taicpu(hp3).ops:=2;
  727. taicpu(hp3).opcode:=A_MOVW;
  728. taicpu(hp3).loadreg(1, taicpu(p).oper[0]^.reg);
  729. { We're removing 3 concurrent instructions. Remove hp1
  730. and hp2 manually instead of calling RemoveCurrentP
  731. as this means we won't be calling UpdateUsedRegs 3 times }
  732. asml.Remove(hp1);
  733. hp1.Free;
  734. asml.Remove(hp2);
  735. hp2.Free;
  736. { By removing p last, we've guaranteed that p.Next is
  737. valid (storing it prior to removing the instructions
  738. may result in a dangling pointer if hp1 immediately
  739. follows p), and because hp1, hp2 and hp3 came from
  740. sequential calls to GetNextInstruction, it is
  741. guaranteed that UpdateUsedRegs will stop at hp3. [Kit] }
  742. RemoveCurrentP(p, hp3);
  743. Result := True;
  744. end
  745. else
  746. begin
  747. DebugMsg('Peephole PushPushPopPop2MovMov performed', p);
  748. taicpu(p).ops:=2;
  749. taicpu(p).opcode:=A_MOV;
  750. taicpu(hp1).ops:=2;
  751. taicpu(hp1).opcode:=A_MOV;
  752. taicpu(p).loadreg(1, taicpu(p).oper[0]^.reg);
  753. taicpu(p).loadreg(0, taicpu(hp3).oper[0]^.reg);
  754. taicpu(hp1).loadreg(1, taicpu(hp1).oper[0]^.reg);
  755. taicpu(hp1).loadreg(0, taicpu(hp2).oper[0]^.reg);
  756. { life range of reg2 and reg3 is increased, fix register allocation entries }
  757. TransferUsedRegs(TmpUsedRegs);
  758. UpdateUsedRegs(TmpUsedRegs,tai(p.Next));
  759. AllocRegBetween(taicpu(hp2).oper[0]^.reg,hp1,hp2,TmpUsedRegs);
  760. TransferUsedRegs(TmpUsedRegs);
  761. AllocRegBetween(taicpu(hp3).oper[0]^.reg,p,hp3,TmpUsedRegs);
  762. IncludeRegInUsedRegs(taicpu(hp3).oper[0]^.reg,UsedRegs);
  763. UpdateUsedRegs(tai(p.Next));
  764. asml.Remove(hp2);
  765. hp2.Free;
  766. asml.Remove(hp3);
  767. hp3.Free;
  768. result:=true;
  769. end
  770. end;
  771. end;
  772. function TCpuAsmOptimizer.OptPass1CALL(var p : tai) : boolean;
  773. var
  774. hp1: tai;
  775. begin
  776. Result:=false;
  777. if (cs_opt_level4 in current_settings.optimizerswitches) and
  778. GetNextInstruction(p,hp1) and
  779. MatchInstruction(hp1,A_RET) then
  780. begin
  781. DebugMsg('Peephole CallReg2Jmp performed', p);
  782. taicpu(p).opcode:=A_JMP;
  783. asml.Remove(hp1);
  784. hp1.Free;
  785. result:=true;
  786. end;
  787. end;
  788. function TCpuAsmOptimizer.OptPass1RCALL(var p : tai) : boolean;
  789. var
  790. hp1: tai;
  791. begin
  792. Result:=false;
  793. if (cs_opt_level4 in current_settings.optimizerswitches) and
  794. GetNextInstruction(p,hp1) and
  795. MatchInstruction(hp1,A_RET) then
  796. begin
  797. DebugMsg('Peephole RCallReg2RJmp performed', p);
  798. taicpu(p).opcode:=A_RJMP;
  799. asml.Remove(hp1);
  800. hp1.Free;
  801. result:=true;
  802. end;
  803. end;
  804. function TCpuAsmOptimizer.OptPass1MOV(var p : tai) : boolean;
  805. var
  806. hp1, hp2: tai;
  807. i : Integer;
  808. alloc, dealloc: tai_regalloc;
  809. begin
  810. Result:=false;
  811. { change
  812. mov reg0, reg1
  813. dealloc reg0
  814. into
  815. dealloc reg0
  816. }
  817. if MatchOpType(taicpu(p),top_reg,top_reg) then
  818. begin
  819. TransferUsedRegs(TmpUsedRegs);
  820. UpdateUsedRegs(TmpUsedRegs,tai(p.Next));
  821. if not(RegInUsedRegs(taicpu(p).oper[0]^.reg,TmpUsedRegs)) and
  822. { reg. allocation information before calls is not perfect, so don't do this before
  823. calls/icalls }
  824. GetNextInstruction(p,hp1) and
  825. not(MatchInstruction(hp1,[A_CALL,A_RCALL])) then
  826. begin
  827. DebugMsg('Peephole Mov2Nop performed', p);
  828. RemoveCurrentP(p, hp1);
  829. Result := True;
  830. exit;
  831. end;
  832. end;
  833. { turn
  834. mov reg0, reg1
  835. <op> reg2,reg0
  836. dealloc reg0
  837. into
  838. <op> reg2,reg1
  839. }
  840. if MatchOpType(taicpu(p),top_reg,top_reg) and
  841. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  842. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p, hp1)) and
  843. (MatchInstruction(hp1,[A_PUSH,A_MOV,A_CP,A_CPC,A_ADD,A_SUB,A_ADC,A_SBC,A_EOR,A_AND,A_OR,
  844. A_OUT,A_IN]) or
  845. { the reference register of ST/STD cannot be replaced }
  846. (MatchInstruction(hp1,[A_STD,A_ST,A_STS]) and (MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^)))) and
  847. (not RegModifiedByInstruction(taicpu(p).oper[0]^.reg, hp1)) and
  848. {(taicpu(hp1).ops=1) and
  849. (taicpu(hp1).oper[0]^.typ = top_reg) and
  850. (taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg) and }
  851. assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) then
  852. begin
  853. DebugMsg('Peephole MovOp2Op 1 performed', p);
  854. for i := 0 to taicpu(hp1).ops-1 do
  855. if taicpu(hp1).oper[i]^.typ=top_reg then
  856. if taicpu(hp1).oper[i]^.reg=taicpu(p).oper[0]^.reg then
  857. taicpu(hp1).oper[i]^.reg:=taicpu(p).oper[1]^.reg;
  858. alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.Previous));
  859. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next));
  860. if assigned(alloc) and assigned(dealloc) then
  861. begin
  862. asml.Remove(alloc);
  863. alloc.Free;
  864. asml.Remove(dealloc);
  865. dealloc.Free;
  866. end;
  867. { life range of reg1 is increased }
  868. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,usedregs);
  869. { p will be removed, update used register as we continue
  870. with the next instruction after p }
  871. result:=RemoveCurrentP(p);
  872. end
  873. { turn
  874. mov reg1, reg0
  875. <op> reg1,xxxx
  876. dealloc reg1
  877. into
  878. <op> reg1,xxx
  879. }
  880. else if MatchOpType(taicpu(p),top_reg,top_reg) and
  881. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  882. not(RegModifiedBetween(taicpu(p).oper[1]^.reg, p, hp1)) and
  883. MatchInstruction(hp1,[A_CP,A_CPC,A_CPI,A_SBRS,A_SBRC]) and
  884. assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) then
  885. begin
  886. DebugMsg('Peephole MovOp2Op 2 performed', p);
  887. for i := 0 to taicpu(hp1).ops-1 do
  888. if taicpu(hp1).oper[i]^.typ=top_reg then
  889. if taicpu(hp1).oper[i]^.reg=taicpu(p).oper[0]^.reg then
  890. taicpu(hp1).oper[i]^.reg:=taicpu(p).oper[1]^.reg;
  891. alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.Previous));
  892. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next));
  893. if assigned(alloc) and assigned(dealloc) then
  894. begin
  895. asml.Remove(alloc);
  896. alloc.Free;
  897. asml.Remove(dealloc);
  898. dealloc.Free;
  899. end;
  900. { life range of reg1 is increased }
  901. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,usedregs);
  902. { p will be removed, update used register as we continue
  903. with the next instruction after p }
  904. result:=RemoveCurrentP(p);
  905. end
  906. { remove
  907. mov reg0,reg0
  908. }
  909. else if (taicpu(p).ops=2) and
  910. (taicpu(p).oper[0]^.typ = top_reg) and
  911. (taicpu(p).oper[1]^.typ = top_reg) and
  912. (taicpu(p).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  913. begin
  914. DebugMsg('Peephole RedundantMov performed', p);
  915. result:=RemoveCurrentP(p);
  916. end
  917. {
  918. Turn
  919. mov rx,ry
  920. op rx,rz
  921. mov ry, rx
  922. Into
  923. op ry,rz
  924. }
  925. else if (taicpu(p).ops=2) and
  926. MatchOpType(taicpu(p),top_reg,top_reg) and
  927. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  928. (hp1.typ=ait_instruction) and
  929. (taicpu(hp1).ops >= 1) and
  930. (taicpu(hp1).oper[0]^.typ = top_reg) and
  931. GetNextInstructionUsingReg(hp1,hp2,taicpu(hp1).oper[0]^.reg) and
  932. MatchInstruction(hp2,A_MOV) and
  933. MatchOpType(taicpu(hp2),top_reg,top_reg) and
  934. (taicpu(hp2).oper[0]^.reg = taicpu(p).oper[1]^.reg) and
  935. (taicpu(hp2).oper[1]^.reg = taicpu(hp1).oper[0]^.reg) and
  936. (taicpu(hp2).oper[1]^.reg = taicpu(p).oper[0]^.reg) and
  937. (not RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp2)) and
  938. (taicpu(hp1).opcode in [A_ADD,A_ADC,A_SUB,A_SBC,A_AND,A_OR,A_EOR,
  939. A_INC,A_DEC,
  940. A_LSL,A_LSR,A_ASR,A_ROR,A_ROL]) and
  941. assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg, tai(hp2.Next))) then
  942. begin
  943. DebugMsg('Peephole MovOpMov2Op performed', p);
  944. if (taicpu(hp1).ops=2) and
  945. (taicpu(hp1).oper[1]^.typ=top_reg) and
  946. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  947. taicpu(hp1).oper[1]^.reg:=taicpu(p).oper[1]^.reg;
  948. taicpu(hp1).oper[0]^.reg:=taicpu(p).oper[1]^.reg;
  949. alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.Previous));
  950. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp2.Next));
  951. if assigned(alloc) and assigned(dealloc) then
  952. begin
  953. asml.Remove(alloc);
  954. alloc.Free;
  955. asml.Remove(dealloc);
  956. dealloc.Free;
  957. end;
  958. asml.remove(hp2);
  959. hp2.free;
  960. result:=RemoveCurrentP(p);
  961. end
  962. {
  963. Turn
  964. mov rx,ry
  965. op rx,rw
  966. mov rw,rx
  967. Into
  968. op rw,ry
  969. }
  970. else if (taicpu(p).ops=2) and
  971. MatchOpType(taicpu(p),top_reg,top_reg) and
  972. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  973. (hp1.typ=ait_instruction) and
  974. (taicpu(hp1).ops = 2) and
  975. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  976. GetNextInstructionUsingReg(hp1,hp2,taicpu(hp1).oper[0]^.reg) and
  977. (hp2.typ=ait_instruction) and
  978. (taicpu(hp2).opcode=A_MOV) and
  979. MatchOpType(taicpu(hp2),top_reg,top_reg) and
  980. (taicpu(hp2).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) and
  981. (taicpu(hp2).oper[1]^.reg = taicpu(hp1).oper[0]^.reg) and
  982. (taicpu(hp2).oper[1]^.reg = taicpu(p).oper[0]^.reg) and
  983. (not RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) and
  984. (taicpu(hp1).opcode in [A_ADD,A_ADC,A_AND,A_OR,A_EOR]) and
  985. assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg, tai(hp2.Next))) then
  986. begin
  987. DebugMsg('Peephole MovOpMov2Op2 performed', p);
  988. taicpu(hp1).oper[0]^.reg:=taicpu(hp2).oper[0]^.reg;
  989. taicpu(hp1).oper[1]^.reg:=taicpu(p).oper[1]^.reg;
  990. alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.Previous));
  991. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp2.Next));
  992. if assigned(alloc) and assigned(dealloc) then
  993. begin
  994. asml.Remove(alloc);
  995. alloc.Free;
  996. asml.Remove(dealloc);
  997. dealloc.Free;
  998. end;
  999. result:=RemoveCurrentP(p);
  1000. asml.remove(hp2);
  1001. hp2.free;
  1002. end
  1003. { fold
  1004. mov reg2,reg0
  1005. mov reg3,reg1
  1006. to
  1007. movw reg2,reg0
  1008. }
  1009. else if (CPUAVR_HAS_MOVW in cpu_capabilities[current_settings.cputype]) and
  1010. (taicpu(p).ops=2) and
  1011. (taicpu(p).oper[0]^.typ = top_reg) and
  1012. (taicpu(p).oper[1]^.typ = top_reg) and
  1013. getnextinstruction(p,hp1) and
  1014. (hp1.typ = ait_instruction) and
  1015. (taicpu(hp1).opcode = A_MOV) and
  1016. (taicpu(hp1).ops=2) and
  1017. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1018. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1019. (getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(p).oper[0]^.reg)+1) and
  1020. ((getsupreg(taicpu(p).oper[0]^.reg) mod 2)=0) and
  1021. ((getsupreg(taicpu(p).oper[1]^.reg) mod 2)=0) and
  1022. (getsupreg(taicpu(hp1).oper[1]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)+1) then
  1023. begin
  1024. DebugMsg('Peephole MovMov2Movw performed', p);
  1025. alloc:=FindRegAllocBackward(taicpu(hp1).oper[0]^.reg,tai(hp1.Previous));
  1026. if assigned(alloc) then
  1027. begin
  1028. asml.Remove(alloc);
  1029. asml.InsertBefore(alloc,p);
  1030. { proper book keeping of currently used registers }
  1031. IncludeRegInUsedRegs(taicpu(hp1).oper[0]^.reg,UsedRegs);
  1032. end;
  1033. taicpu(p).opcode:=A_MOVW;
  1034. asml.remove(hp1);
  1035. hp1.free;
  1036. result:=true;
  1037. end
  1038. {
  1039. This removes the first mov from
  1040. mov rX,...
  1041. mov rX,...
  1042. }
  1043. else if GetNextInstruction(p,hp1) and MatchInstruction(hp1,A_MOV) and
  1044. { test condition here already instead in the while loop only, else MovMov2Mov 2 might be oversight }
  1045. MatchInstruction(hp1,A_MOV) and
  1046. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) then
  1047. while MatchInstruction(hp1,A_MOV) and
  1048. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) and
  1049. { don't remove the first mov if the second is a mov rX,rX }
  1050. not(MatchOperand(taicpu(hp1).oper[0]^,taicpu(hp1).oper[1]^)) do
  1051. begin
  1052. DebugMsg('Peephole MovMov2Mov 1 performed', p);
  1053. RemoveCurrentP(p,hp1);
  1054. Result := True;
  1055. GetNextInstruction(hp1,hp1);
  1056. if not assigned(hp1) then
  1057. break;
  1058. end
  1059. {
  1060. This removes the second mov from
  1061. mov rX,rY
  1062. ...
  1063. mov rX,rY
  1064. if rX and rY are not modified in-between
  1065. }
  1066. else if GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[1]^.reg) and
  1067. MatchInstruction(hp1,A_MOV) and
  1068. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) and
  1069. MatchOperand(taicpu(p).oper[1]^, taicpu(hp1).oper[1]^) and
  1070. not(RegModifiedBetween(taicpu(p).oper[0]^.reg,p,hp1)) then
  1071. begin
  1072. DebugMsg('Peephole MovMov2Mov 2 performed', p);
  1073. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);
  1074. RemoveInstruction(hp1);
  1075. Result := True;
  1076. end;
  1077. end;
  1078. function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
  1079. var
  1080. hp1,hp2: tai;
  1081. begin
  1082. result := false;
  1083. case p.typ of
  1084. ait_instruction:
  1085. begin
  1086. {
  1087. change
  1088. <op> reg,x,y
  1089. cp reg,r1
  1090. into
  1091. <op>s reg,x,y
  1092. }
  1093. { this optimization can applied only to the currently enabled operations because
  1094. the other operations do not update all flags and FPC does not track flag usage }
  1095. if MatchInstruction(p, [A_ADC,A_ADD,A_AND,A_ANDI,A_ASR,A_COM,A_DEC,A_EOR,
  1096. A_INC,A_LSL,A_LSR,
  1097. A_OR,A_ORI,A_ROL,A_ROR,A_SBC,A_SBCI,A_SUB,A_SUBI]) and
  1098. GetNextInstruction(p, hp1) and
  1099. ((MatchInstruction(hp1, A_CP) and
  1100. (((taicpu(p).oper[0]^.reg = taicpu(hp1).oper[0]^.reg) and
  1101. (taicpu(hp1).oper[1]^.reg = GetDefaultZeroReg)) or
  1102. ((taicpu(p).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) and
  1103. (taicpu(hp1).oper[0]^.reg = GetDefaultZeroReg) and
  1104. (taicpu(p).opcode in [A_ADC,A_ADD,A_AND,A_ANDI,A_ASR,A_COM,A_EOR,
  1105. A_LSL,A_LSR,
  1106. A_OR,A_ORI,A_ROL,A_ROR,A_SUB,A_SBI])))) or
  1107. (MatchInstruction(hp1, A_CPI) and
  1108. (taicpu(p).opcode = A_ANDI) and
  1109. (taicpu(p).oper[1]^.typ=top_const) and
  1110. (taicpu(hp1).oper[1]^.typ=top_const) and
  1111. (taicpu(p).oper[1]^.val=taicpu(hp1).oper[1]^.val))) and
  1112. GetNextInstruction(hp1, hp2) and
  1113. { be careful here, following instructions could use other flags
  1114. however after a jump fpc never depends on the value of flags }
  1115. { All above instructions set Z and N according to the following
  1116. Z := result = 0;
  1117. N := result[31];
  1118. EQ = Z=1; NE = Z=0;
  1119. MI = N=1; PL = N=0; }
  1120. MatchInstruction(hp2, A_BRxx) and
  1121. ((taicpu(hp2).condition in [C_EQ,C_NE,C_MI,C_PL]) or
  1122. { sub/sbc set all flags }
  1123. (taicpu(p).opcode in [A_SUB,A_SBI])){ and
  1124. no flag allocation tracking implemented yet on avr
  1125. assigned(FindRegDealloc(NR_DEFAULTFLAGS,tai(hp2.Next)))} then
  1126. begin
  1127. { move flag allocation if possible }
  1128. { no flag allocation tracking implemented yet on avr
  1129. GetLastInstruction(hp1, hp2);
  1130. hp2:=FindRegAlloc(NR_DEFAULTFLAGS,tai(hp2.Next));
  1131. if assigned(hp2) then
  1132. begin
  1133. asml.Remove(hp2);
  1134. asml.insertbefore(hp2, p);
  1135. end;
  1136. }
  1137. // If we compare to the same value we are masking then invert the comparison
  1138. if (taicpu(hp1).opcode=A_CPI) or
  1139. { sub/sbc with reverted? }
  1140. ((taicpu(hp1).oper[0]^.reg = GetDefaultZeroReg) and (taicpu(p).opcode in [A_SUB,A_SBI])) then
  1141. taicpu(hp2).condition:=inverse_cond(taicpu(hp2).condition);
  1142. asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
  1143. asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,hp2), hp2);
  1144. IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
  1145. DebugMsg('Peephole OpCp2Op performed', p);
  1146. asml.remove(hp1);
  1147. hp1.free;
  1148. Result:=true;
  1149. end
  1150. else
  1151. case taicpu(p).opcode of
  1152. A_LDI:
  1153. Result:=OptPass1LDI(p);
  1154. A_STS:
  1155. Result:=OptPass1STS(p);
  1156. A_LDS:
  1157. Result:=OptPass1LDS(p);
  1158. A_IN:
  1159. Result:=OptPass1IN(p);
  1160. A_SBRS,
  1161. A_SBRC:
  1162. Result:=OptPass1SBR(p);
  1163. A_ANDI:
  1164. Result:=OptPass1ANDI(p);
  1165. A_ADD:
  1166. Result:=OptPass1ADD(p);
  1167. A_SUB:
  1168. Result:=OptPass1SUB(p);
  1169. A_CLR:
  1170. Result:=OptPass1CLR(p);
  1171. A_PUSH:
  1172. Result:=OptPass1PUSH(p);
  1173. A_CALL:
  1174. Result:=OptPass1CALL(p);
  1175. A_RCALL:
  1176. Result:=OptPass1RCALL(p);
  1177. A_MOV:
  1178. Result:=OptPass1MOV(p);
  1179. A_SBIC,
  1180. A_SBIS:
  1181. Result:=OptPass1SBI(p);
  1182. end;
  1183. end;
  1184. end;
  1185. end;
  1186. procedure TCpuAsmOptimizer.PeepHoleOptPass2;
  1187. begin
  1188. end;
  1189. begin
  1190. casmoptimizer:=TCpuAsmOptimizer;
  1191. End.