aoptcpu.pas 44 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404
  1. {
  2. Copyright (c) 1998-2002 by Jonas Maebe, member of the Free Pascal
  3. Development Team
  4. This unit implements the ARM optimizer object
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. Unit aoptcpu;
  19. {$i fpcdefs.inc}
  20. { $define DEBUG_AOPTCPU}
  21. Interface
  22. uses cpubase,cgbase,aasmtai,aopt,AoptObj, cclasses,aoptcpub;
  23. Type
  24. TCpuAsmOptimizer = class(TAsmOptimizer)
  25. { outputs a debug message into the assembler file }
  26. procedure DebugMsg(const s: string; p: tai);
  27. Function GetNextInstructionUsingReg(Current: tai; Var Next: tai;reg : TRegister): Boolean;
  28. function RegInInstruction(Reg: TRegister; p1: tai): Boolean; override;
  29. function RegLoadedWithNewValue(reg : tregister; hp : tai) : boolean; override;
  30. function InstructionLoadsFromReg(const reg : TRegister; const hp : tai) : boolean; override;
  31. function InvertSkipInstruction(var p: tai): boolean;
  32. { uses the same constructor as TAopObj }
  33. function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
  34. function PeepHoleOptPass2Cpu(var p: tai): boolean; override;
  35. private
  36. function OptPass1ADD(var p : tai) : boolean;
  37. function OptPass1ANDI(var p : tai) : boolean;
  38. function OptPass1CALL(var p : tai) : boolean;
  39. function OptPass1CLR(var p : tai) : boolean;
  40. function OptPass1IN(var p : tai) : boolean;
  41. function OptPass1LDI(var p : tai) : boolean;
  42. function OptPass1LDS(var p : tai) : boolean;
  43. function OptPass1MOV(var p : tai) : boolean;
  44. function OptPass1PUSH(var p : tai) : boolean;
  45. function OptPass1RCALL(var p : tai) : boolean;
  46. function OptPass1SBI(var p : tai) : boolean;
  47. function OptPass1SBR(var p : tai) : boolean;
  48. function OptPass1STS(var p : tai) : boolean;
  49. function OptPass1SUB(var p : tai) : boolean;
  50. function OptPass2MOV(var p : tai) : boolean;
  51. End;
  52. Implementation
  53. uses
  54. cutils,
  55. verbose,
  56. cpuinfo,
  57. aasmbase,aasmcpu,aasmdata,
  58. aoptutils,
  59. globals,globtype,
  60. cgutils;
  61. type
  62. TAsmOpSet = set of TAsmOp;
  63. function CanBeCond(p : tai) : boolean;
  64. begin
  65. result:=(p.typ=ait_instruction) and (taicpu(p).condition=C_None);
  66. end;
  67. function RefsEqual(const r1, r2: treference): boolean;
  68. begin
  69. refsequal :=
  70. (r1.offset = r2.offset) and
  71. (r1.base = r2.base) and
  72. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  73. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  74. (r1.relsymbol = r2.relsymbol) and
  75. (r1.addressmode = r2.addressmode) and
  76. (r1.volatility=[]) and
  77. (r2.volatility=[]);
  78. end;
  79. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
  80. begin
  81. result:=oper1.typ=oper2.typ;
  82. if result then
  83. case oper1.typ of
  84. top_const:
  85. Result:=oper1.val = oper2.val;
  86. top_reg:
  87. Result:=oper1.reg = oper2.reg;
  88. top_ref:
  89. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  90. else Result:=false;
  91. end
  92. end;
  93. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  94. begin
  95. result := (oper.typ = top_reg) and (oper.reg = reg);
  96. end;
  97. function MatchInstruction(const instr: tai; const op: TAsmOp): boolean;
  98. begin
  99. result :=
  100. (instr.typ = ait_instruction) and
  101. (taicpu(instr).opcode = op);
  102. end;
  103. function MatchInstruction(const instr: tai; const ops: TAsmOpSet): boolean;
  104. begin
  105. result :=
  106. (instr.typ = ait_instruction) and
  107. (taicpu(instr).opcode in ops);
  108. end;
  109. function MatchInstruction(const instr: tai; const ops: TAsmOpSet;opcount : byte): boolean;
  110. begin
  111. result :=
  112. (instr.typ = ait_instruction) and
  113. (taicpu(instr).opcode in ops) and
  114. (taicpu(instr).ops=opcount);
  115. end;
  116. {$ifdef DEBUG_AOPTCPU}
  117. procedure TCpuAsmOptimizer.DebugMsg(const s: string;p : tai);
  118. begin
  119. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  120. end;
  121. {$else DEBUG_AOPTCPU}
  122. procedure TCpuAsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  123. begin
  124. end;
  125. {$endif DEBUG_AOPTCPU}
  126. function TCpuAsmOptimizer.RegInInstruction(Reg: TRegister; p1: tai): Boolean;
  127. begin
  128. If (p1.typ = ait_instruction) and (taicpu(p1).opcode in [A_MUL,A_MULS,A_FMUL,A_FMULS,A_FMULSU]) and
  129. ((getsupreg(reg)=RS_R0) or (getsupreg(reg)=RS_R1)) then
  130. Result:=true
  131. else if (p1.typ = ait_instruction) and (taicpu(p1).opcode=A_MOVW) and
  132. ((TRegister(ord(taicpu(p1).oper[0]^.reg)+1)=reg) or (TRegister(ord(taicpu(p1).oper[1]^.reg)+1)=reg) or
  133. (taicpu(p1).oper[0]^.reg=reg) or (taicpu(p1).oper[1]^.reg=reg)) then
  134. Result:=true
  135. else
  136. Result:=inherited RegInInstruction(Reg, p1);
  137. end;
  138. function TCpuAsmOptimizer.GetNextInstructionUsingReg(Current: tai;
  139. var Next: tai; reg: TRegister): Boolean;
  140. begin
  141. Next:=Current;
  142. repeat
  143. Result:=GetNextInstruction(Next,Next);
  144. until not(cs_opt_level3 in current_settings.optimizerswitches) or not(Result) or (Next.typ<>ait_instruction) or (RegInInstruction(reg,Next)) or
  145. (is_calljmp(taicpu(Next).opcode));
  146. end;
  147. function TCpuAsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  148. var
  149. p: taicpu;
  150. begin
  151. if not assigned(hp) or
  152. (hp.typ <> ait_instruction) then
  153. begin
  154. Result := false;
  155. exit;
  156. end;
  157. p := taicpu(hp);
  158. Result := ((p.opcode in [A_LDI,A_MOV,A_LDS]) and (reg=p.oper[0]^.reg) and ((p.oper[1]^.typ<>top_reg) or (reg<>p.oper[1]^.reg))) or
  159. ((p.opcode in [A_LD,A_LDD,A_LPM]) and (reg=p.oper[0]^.reg) and not(RegInRef(reg,p.oper[1]^.ref^))) or
  160. ((p.opcode in [A_MOVW]) and ((reg=p.oper[0]^.reg) or (TRegister(ord(reg)+1)=p.oper[0]^.reg)) and not(reg=p.oper[1]^.reg) and not(TRegister(ord(reg)+1)=p.oper[1]^.reg)) or
  161. ((p.opcode in [A_POP]) and (reg=p.oper[0]^.reg));
  162. end;
  163. function TCpuAsmOptimizer.InstructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean;
  164. var
  165. p: taicpu;
  166. i: longint;
  167. begin
  168. Result := false;
  169. if not (assigned(hp) and (hp.typ = ait_instruction)) then
  170. exit;
  171. p:=taicpu(hp);
  172. i:=0;
  173. { we do not care about the stack pointer }
  174. if p.opcode in [A_POP] then
  175. exit;
  176. { first operand only written?
  177. then skip it }
  178. if p.opcode in [A_MOV,A_LD,A_LDD,A_LDS,A_LPM,A_LDI,A_MOVW] then
  179. i:=1;
  180. while i<p.ops do
  181. begin
  182. case p.oper[i]^.typ of
  183. top_reg:
  184. Result := (p.oper[i]^.reg = reg) or
  185. { MOVW }
  186. ((i=1) and (p.opcode=A_MOVW) and (getsupreg(p.oper[i]^.reg)+1=getsupreg(reg)));
  187. top_ref:
  188. Result :=
  189. (p.oper[i]^.ref^.base = reg) or
  190. (p.oper[i]^.ref^.index = reg);
  191. end;
  192. { Bailout if we found something }
  193. if Result then
  194. exit;
  195. Inc(i);
  196. end;
  197. end;
  198. {
  199. Turns
  200. sbis ?
  201. jmp .Lx
  202. op
  203. .Lx:
  204. Into
  205. sbic ?
  206. op
  207. For all types of skip instructions
  208. }
  209. function TCpuAsmOptimizer.InvertSkipInstruction(var p: tai): boolean;
  210. function GetNextInstructionWithoutLabel(p: tai; var next: tai): boolean;
  211. begin
  212. repeat
  213. result:=GetNextInstruction(p,next);
  214. p:=next;
  215. until
  216. (not result) or
  217. (not assigned(next)) or
  218. (next.typ in [ait_instruction]);
  219. result:=assigned(next) and (next.typ in [ait_instruction]);
  220. end;
  221. var
  222. hp1, hp2, hp3: tai;
  223. begin
  224. result:=false;
  225. if GetNextInstruction(taicpu(p),hp1) and
  226. (hp1.typ=ait_instruction) and
  227. (taicpu(hp1).opcode in [A_RJMP,A_JMP]) and
  228. (taicpu(hp1).ops=1) and
  229. (taicpu(hp1).oper[0]^.typ=top_ref) and
  230. (taicpu(hp1).oper[0]^.ref^.offset=0) and
  231. (taicpu(hp1).oper[0]^.ref^.symbol is TAsmLabel) and
  232. GetNextInstructionWithoutLabel(hp1,hp2) and
  233. (hp2.typ=ait_instruction) and
  234. (not taicpu(hp2).is_jmp) and
  235. GetNextInstruction(hp2,hp3) and
  236. FindLabel(TAsmLabel(taicpu(hp1).oper[0]^.ref^.symbol),hp3) then
  237. begin
  238. DebugMsg('SkipJump2InvertedSkip', p);
  239. case taicpu(p).opcode of
  240. A_SBIS: taicpu(p).opcode:=A_SBIC;
  241. A_SBIC: taicpu(p).opcode:=A_SBIS;
  242. A_SBRS: taicpu(p).opcode:=A_SBRC;
  243. A_SBRC: taicpu(p).opcode:=A_SBRS;
  244. end;
  245. TAsmLabel(taicpu(hp1).oper[0]^.ref^.symbol).decrefs;
  246. asml.remove(hp1);
  247. hp1.free;
  248. end;
  249. end;
  250. function TCpuAsmOptimizer.OptPass1LDI(var p : tai) : boolean;
  251. var
  252. hp1 : tai;
  253. alloc ,dealloc: tai_regalloc;
  254. begin
  255. Result:=false;
  256. { turn
  257. ldi reg0, imm
  258. <op> reg1, reg0
  259. dealloc reg0
  260. into
  261. <op>i reg1, imm
  262. }
  263. if MatchOpType(taicpu(p),top_reg,top_const) and
  264. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  265. MatchInstruction(hp1,[A_CP,A_MOV,A_AND,A_SUB],2) and
  266. (not RegModifiedBetween(taicpu(p).oper[0]^.reg, p, hp1)) and
  267. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  268. (getsupreg(taicpu(hp1).oper[0]^.reg) in [16..31]) and
  269. (taicpu(hp1).oper[1]^.reg=taicpu(p).oper[0]^.reg) and
  270. not(MatchOperand(taicpu(hp1).oper[0]^,taicpu(hp1).oper[1]^)) then
  271. begin
  272. TransferUsedRegs(TmpUsedRegs);
  273. UpdateUsedRegs(TmpUsedRegs,tai(p.next));
  274. UpdateUsedRegs(TmpUsedRegs,tai(hp1.next));
  275. if not(RegUsedAfterInstruction(taicpu(hp1).oper[1]^.reg, hp1, TmpUsedRegs)) then
  276. begin
  277. case taicpu(hp1).opcode of
  278. A_CP:
  279. taicpu(hp1).opcode:=A_CPI;
  280. A_MOV:
  281. taicpu(hp1).opcode:=A_LDI;
  282. A_AND:
  283. taicpu(hp1).opcode:=A_ANDI;
  284. A_SUB:
  285. taicpu(hp1).opcode:=A_SUBI;
  286. else
  287. internalerror(2016111901);
  288. end;
  289. taicpu(hp1).loadconst(1, taicpu(p).oper[1]^.val);
  290. alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.Previous));
  291. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next));
  292. if assigned(alloc) and assigned(dealloc) then
  293. begin
  294. asml.Remove(alloc);
  295. alloc.Free;
  296. asml.Remove(dealloc);
  297. dealloc.Free;
  298. end;
  299. DebugMsg('Peephole LdiOp2Opi performed', p);
  300. result:=RemoveCurrentP(p);
  301. end;
  302. end;
  303. end;
  304. function TCpuAsmOptimizer.OptPass1STS(var p : tai) : boolean;
  305. begin
  306. Result:=false;
  307. if (taicpu(p).oper[0]^.ref^.symbol=nil) and
  308. (taicpu(p).oper[0]^.ref^.relsymbol=nil) and
  309. (getsupreg(taicpu(p).oper[0]^.ref^.base)=RS_NO) and
  310. (getsupreg(taicpu(p).oper[0]^.ref^.index)=RS_NO) and
  311. (taicpu(p).oper[0]^.ref^.addressmode=AM_UNCHANGED) and
  312. (((CPUAVR_NOMEMMAPPED_REGS in cpu_capabilities[current_settings.cputype]) and
  313. (taicpu(p).oper[0]^.ref^.offset>=0) and
  314. (taicpu(p).oper[0]^.ref^.offset<=63)) or
  315. (not(CPUAVR_NOMEMMAPPED_REGS in cpu_capabilities[current_settings.cputype]) and
  316. (taicpu(p).oper[0]^.ref^.offset>=32) and
  317. (taicpu(p).oper[0]^.ref^.offset<=95))) then
  318. begin
  319. DebugMsg('Peephole Sts2Out performed', p);
  320. taicpu(p).opcode:=A_OUT;
  321. if CPUAVR_NOMEMMAPPED_REGS in cpu_capabilities[current_settings.cputype] then
  322. taicpu(p).loadconst(0,taicpu(p).oper[0]^.ref^.offset)
  323. else
  324. taicpu(p).loadconst(0,taicpu(p).oper[0]^.ref^.offset-32);
  325. result:=true;
  326. end;
  327. end;
  328. function TCpuAsmOptimizer.OptPass1LDS(var p : tai) : boolean;
  329. begin
  330. Result:=false;
  331. if (taicpu(p).oper[1]^.ref^.symbol=nil) and
  332. (taicpu(p).oper[1]^.ref^.relsymbol=nil) and
  333. (getsupreg(taicpu(p).oper[1]^.ref^.base)=RS_NO) and
  334. (getsupreg(taicpu(p).oper[1]^.ref^.index)=RS_NO) and
  335. (taicpu(p).oper[1]^.ref^.addressmode=AM_UNCHANGED) and
  336. (((CPUAVR_NOMEMMAPPED_REGS in cpu_capabilities[current_settings.cputype]) and
  337. (taicpu(p).oper[1]^.ref^.offset>=0) and
  338. (taicpu(p).oper[1]^.ref^.offset<=63)) or
  339. (not(CPUAVR_NOMEMMAPPED_REGS in cpu_capabilities[current_settings.cputype]) and
  340. (taicpu(p).oper[1]^.ref^.offset>=32) and
  341. (taicpu(p).oper[1]^.ref^.offset<=95))) then
  342. begin
  343. DebugMsg('Peephole Lds2In performed', p);
  344. taicpu(p).opcode:=A_IN;
  345. if CPUAVR_NOMEMMAPPED_REGS in cpu_capabilities[current_settings.cputype] then
  346. taicpu(p).loadconst(1,taicpu(p).oper[1]^.ref^.offset)
  347. else
  348. taicpu(p).loadconst(1,taicpu(p).oper[1]^.ref^.offset-32);
  349. result:=true;
  350. end;
  351. end;
  352. function TCpuAsmOptimizer.OptPass1IN(var p : tai) : boolean;
  353. var
  354. hp1, hp2: tai;
  355. l : TAsmLabel;
  356. begin
  357. Result:=false;
  358. if GetNextInstruction(p,hp1) then
  359. begin
  360. {
  361. in rX,Y
  362. ori rX,n
  363. out Y,rX
  364. into
  365. sbi rX,lg(n)
  366. }
  367. if (taicpu(p).oper[1]^.val<=31) and
  368. MatchInstruction(hp1,A_ORI) and
  369. (taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg) and
  370. (PopCnt(byte(taicpu(hp1).oper[1]^.val))=1) and
  371. GetNextInstruction(hp1,hp2) and
  372. MatchInstruction(hp2,A_OUT) and
  373. MatchOperand(taicpu(hp2).oper[1]^,taicpu(p).oper[0]^) and
  374. MatchOperand(taicpu(hp2).oper[0]^,taicpu(p).oper[1]^) then
  375. begin
  376. DebugMsg('Peephole InOriOut2Sbi performed', p);
  377. taicpu(p).opcode:=A_SBI;
  378. taicpu(p).loadconst(0,taicpu(p).oper[1]^.val);
  379. taicpu(p).loadconst(1,BsrByte(taicpu(hp1).oper[1]^.val));
  380. asml.Remove(hp1);
  381. hp1.Free;
  382. asml.Remove(hp2);
  383. hp2.Free;
  384. result:=true;
  385. end
  386. {
  387. in rX,Y
  388. andi rX,not(n)
  389. out Y,rX
  390. into
  391. cbi rX,lg(n)
  392. }
  393. else if (taicpu(p).oper[1]^.val<=31) and
  394. MatchInstruction(hp1,A_ANDI) and
  395. (taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg) and
  396. (PopCnt(byte(not(taicpu(hp1).oper[1]^.val)))=1) and
  397. GetNextInstruction(hp1,hp2) and
  398. MatchInstruction(hp2,A_OUT) and
  399. MatchOperand(taicpu(hp2).oper[1]^,taicpu(p).oper[0]^) and
  400. MatchOperand(taicpu(hp2).oper[0]^,taicpu(p).oper[1]^) then
  401. begin
  402. DebugMsg('Peephole InAndiOut2Cbi performed', p);
  403. taicpu(p).opcode:=A_CBI;
  404. taicpu(p).loadconst(0,taicpu(p).oper[1]^.val);
  405. taicpu(p).loadconst(1,BsrByte(not(taicpu(hp1).oper[1]^.val)));
  406. asml.Remove(hp1);
  407. hp1.Free;
  408. asml.Remove(hp2);
  409. hp2.Free;
  410. result:=true;
  411. end
  412. {
  413. in rX,Y
  414. andi rX,n
  415. breq/brne L1
  416. into
  417. sbis/sbic Y,lg(n)
  418. jmp L1
  419. .Ltemp:
  420. }
  421. else if (taicpu(p).oper[1]^.val<=31) and
  422. MatchInstruction(hp1,A_ANDI) and
  423. (taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg) and
  424. (PopCnt(byte(taicpu(hp1).oper[1]^.val))=1) and
  425. GetNextInstruction(hp1,hp2) and
  426. MatchInstruction(hp2,A_BRxx) and
  427. (taicpu(hp2).condition in [C_EQ,C_NE]) then
  428. begin
  429. if taicpu(hp2).condition=C_EQ then
  430. taicpu(p).opcode:=A_SBIS
  431. else
  432. taicpu(p).opcode:=A_SBIC;
  433. DebugMsg('Peephole InAndiBrx2SbixJmp performed', p);
  434. taicpu(p).loadconst(0,taicpu(p).oper[1]^.val);
  435. taicpu(p).loadconst(1,BsrByte(taicpu(hp1).oper[1]^.val));
  436. asml.Remove(hp1);
  437. hp1.Free;
  438. taicpu(hp2).condition:=C_None;
  439. if CPUAVR_HAS_JMP_CALL in cpu_capabilities[current_settings.cputype] then
  440. taicpu(hp2).opcode:=A_JMP
  441. else
  442. taicpu(hp2).opcode:=A_RJMP;
  443. current_asmdata.getjumplabel(l);
  444. l.increfs;
  445. asml.InsertAfter(tai_label.create(l), hp2);
  446. result:=true;
  447. end;
  448. end;
  449. end;
  450. function TCpuAsmOptimizer.OptPass1SBR(var p : tai) : boolean;
  451. var
  452. hp1 : tai;
  453. begin
  454. Result:=false;
  455. {
  456. Turn
  457. in rx, y
  458. sbr* rx, z
  459. Into
  460. sbi* y, z
  461. }
  462. if (taicpu(p).ops=2) and
  463. (taicpu(p).oper[0]^.typ=top_reg) and
  464. assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(p.next))) and
  465. GetLastInstruction(p,hp1) and
  466. (hp1.typ=ait_instruction) and
  467. (taicpu(hp1).opcode=A_IN) and
  468. (taicpu(hp1).ops=2) and
  469. (taicpu(hp1).oper[1]^.typ=top_const) and
  470. (taicpu(hp1).oper[1]^.val in [0..31]) and
  471. MatchOperand(taicpu(hp1).oper[0]^,taicpu(p).oper[0]^.reg) and
  472. (not RegModifiedBetween(taicpu(p).oper[0]^.reg, hp1, p)) then
  473. begin
  474. if taicpu(p).opcode=A_SBRS then
  475. taicpu(p).opcode:=A_SBIS
  476. else
  477. taicpu(p).opcode:=A_SBIC;
  478. taicpu(p).loadconst(0, taicpu(hp1).oper[1]^.val);
  479. DebugMsg('Peephole InSbrx2Sbix performed', p);
  480. asml.Remove(hp1);
  481. hp1.free;
  482. result:=true;
  483. end;
  484. if InvertSkipInstruction(p) then
  485. result:=true;
  486. end;
  487. function TCpuAsmOptimizer.OptPass1SBI(var p : tai) : boolean;
  488. var
  489. hp1, hp2, hp3, hp4, hp5: tai;
  490. begin
  491. Result:=false;
  492. {
  493. Turn
  494. sbic/sbis X, y
  495. jmp .L1
  496. op
  497. .L1:
  498. into
  499. sbis/sbic X,y
  500. op
  501. .L1:
  502. }
  503. if InvertSkipInstruction(p) then
  504. result:=true
  505. {
  506. Turn
  507. sbiX X, y
  508. jmp .L1
  509. jmp .L2
  510. .L1:
  511. op
  512. .L2:
  513. into
  514. sbiX X,y
  515. .L1:
  516. op
  517. .L2:
  518. }
  519. else if GetNextInstruction(p, hp1) and
  520. (hp1.typ=ait_instruction) and
  521. (taicpu(hp1).opcode in [A_JMP,A_RJMP]) and
  522. (taicpu(hp1).ops>0) and
  523. (taicpu(hp1).oper[0]^.typ = top_ref) and
  524. (taicpu(hp1).oper[0]^.ref^.symbol is TAsmLabel) and
  525. GetNextInstruction(hp1, hp2) and
  526. (hp2.typ=ait_instruction) and
  527. (taicpu(hp2).opcode in [A_JMP,A_RJMP]) and
  528. (taicpu(hp2).ops>0) and
  529. (taicpu(hp2).oper[0]^.typ = top_ref) and
  530. (taicpu(hp2).oper[0]^.ref^.symbol is TAsmLabel) and
  531. GetNextInstruction(hp2, hp3) and
  532. (hp3.typ=ait_label) and
  533. (taicpu(hp1).oper[0]^.ref^.symbol=tai_label(hp3).labsym) and
  534. GetNextInstruction(hp3, hp4) and
  535. (hp4.typ=ait_instruction) and
  536. GetNextInstruction(hp4, hp5) and
  537. (hp3.typ=ait_label) and
  538. (taicpu(hp2).oper[0]^.ref^.symbol=tai_label(hp5).labsym) then
  539. begin
  540. DebugMsg('Peephole SbiJmpJmp2Sbi performed',p);
  541. tai_label(hp3).labsym.decrefs;
  542. tai_label(hp5).labsym.decrefs;
  543. AsmL.remove(hp1);
  544. taicpu(hp1).Free;
  545. AsmL.remove(hp2);
  546. taicpu(hp2).Free;
  547. result:=true;
  548. end;
  549. end;
  550. function TCpuAsmOptimizer.OptPass1ANDI(var p : tai) : boolean;
  551. var
  552. hp1, hp2, hp3: tai;
  553. i : longint;
  554. begin
  555. Result:=false;
  556. {
  557. Turn
  558. andi rx, #pow2
  559. brne l
  560. <op>
  561. l:
  562. Into
  563. sbrs rx, #(1 shl imm)
  564. <op>
  565. l:
  566. }
  567. if (taicpu(p).ops=2) and
  568. (taicpu(p).oper[1]^.typ=top_const) and
  569. ispowerof2(taicpu(p).oper[1]^.val,i) and
  570. assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(p.next))) and
  571. GetNextInstruction(p,hp1) and
  572. (hp1.typ=ait_instruction) and
  573. (taicpu(hp1).opcode=A_BRxx) and
  574. (taicpu(hp1).condition in [C_EQ,C_NE]) and
  575. (taicpu(hp1).ops>0) and
  576. (taicpu(hp1).oper[0]^.typ = top_ref) and
  577. (taicpu(hp1).oper[0]^.ref^.symbol is TAsmLabel) and
  578. GetNextInstruction(hp1,hp2) and
  579. (hp2.typ=ait_instruction) and
  580. GetNextInstruction(hp2,hp3) and
  581. (hp3.typ=ait_label) and
  582. (taicpu(hp1).oper[0]^.ref^.symbol=tai_label(hp3).labsym) then
  583. begin
  584. DebugMsg('Peephole AndiBr2Sbr performed', p);
  585. taicpu(p).oper[1]^.val:=i;
  586. if taicpu(hp1).condition=C_NE then
  587. taicpu(p).opcode:=A_SBRS
  588. else
  589. taicpu(p).opcode:=A_SBRC;
  590. asml.Remove(hp1);
  591. hp1.free;
  592. result:=true;
  593. end
  594. {
  595. Remove
  596. andi rx, #y
  597. dealloc rx
  598. }
  599. else if (taicpu(p).ops=2) and
  600. (taicpu(p).oper[0]^.typ=top_reg) and
  601. assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(p.next))) and
  602. (assigned(FindRegDeAlloc(NR_DEFAULTFLAGS,tai(p.Next))) or
  603. (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs))) then
  604. begin
  605. DebugMsg('Redundant Andi removed', p);
  606. result:=RemoveCurrentP(p);
  607. end;
  608. end;
  609. function TCpuAsmOptimizer.OptPass1ADD(var p : tai) : boolean;
  610. var
  611. hp1: tai;
  612. begin
  613. Result:=false;
  614. if (taicpu(p).oper[1]^.reg=GetDefaultZeroReg) and
  615. GetNextInstruction(p, hp1) and
  616. MatchInstruction(hp1,A_ADC) then
  617. begin
  618. DebugMsg('Peephole AddAdc2Add performed', p);
  619. RemoveCurrentP(p, hp1);
  620. Result := True;
  621. end;
  622. end;
  623. function TCpuAsmOptimizer.OptPass1SUB(var p : tai) : boolean;
  624. var
  625. hp1: tai;
  626. begin
  627. Result:=false;
  628. if (taicpu(p).oper[1]^.reg=GetDefaultZeroReg) and
  629. GetNextInstruction(p, hp1) and
  630. MatchInstruction(hp1,A_SBC) then
  631. begin
  632. DebugMsg('Peephole SubSbc2Sub performed', p);
  633. taicpu(hp1).opcode:=A_SUB;
  634. RemoveCurrentP(p, hp1);
  635. Result := True;
  636. end;
  637. end;
  638. function TCpuAsmOptimizer.OptPass2MOV(var p: tai): boolean;
  639. var
  640. hp1: tai;
  641. begin
  642. result:=false;
  643. { fold
  644. mov reg2,reg0
  645. mov reg3,reg1
  646. to
  647. movw reg2,reg0
  648. }
  649. if (CPUAVR_HAS_MOVW in cpu_capabilities[current_settings.cputype]) and
  650. (taicpu(p).ops=2) and
  651. (taicpu(p).oper[0]^.typ = top_reg) and
  652. (taicpu(p).oper[1]^.typ = top_reg) and
  653. getnextinstruction(p,hp1) and
  654. (hp1.typ = ait_instruction) and
  655. (taicpu(hp1).opcode = A_MOV) and
  656. (taicpu(hp1).ops=2) and
  657. (taicpu(hp1).oper[0]^.typ = top_reg) and
  658. (taicpu(hp1).oper[1]^.typ = top_reg) and
  659. (getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(p).oper[0]^.reg)+1) and
  660. ((getsupreg(taicpu(p).oper[0]^.reg) mod 2)=0) and
  661. ((getsupreg(taicpu(p).oper[1]^.reg) mod 2)=0) and
  662. (getsupreg(taicpu(hp1).oper[1]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)+1) then
  663. begin
  664. DebugMsg('Peephole MovMov2Movw performed', p);
  665. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  666. AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,UsedRegs);
  667. taicpu(p).opcode:=A_MOVW;
  668. asml.remove(hp1);
  669. hp1.free;
  670. result:=true;
  671. end
  672. end;
  673. function TCpuAsmOptimizer.OptPass1CLR(var p : tai) : boolean;
  674. var
  675. hp1: tai;
  676. alloc, dealloc: tai_regalloc;
  677. begin
  678. Result:=false;
  679. { turn the common
  680. clr rX
  681. mov/ld rX, rY
  682. into
  683. mov/ld rX, rY
  684. }
  685. if (taicpu(p).ops=1) and
  686. (taicpu(p).oper[0]^.typ=top_reg) and
  687. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  688. (not RegModifiedBetween(taicpu(p).oper[0]^.reg, p, hp1)) and
  689. (hp1.typ=ait_instruction) and
  690. (taicpu(hp1).opcode in [A_MOV,A_LD]) and
  691. (taicpu(hp1).ops>0) and
  692. (taicpu(hp1).oper[0]^.typ=top_reg) and
  693. (taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg) then
  694. begin
  695. DebugMsg('Peephole ClrMov2Mov performed', p);
  696. result:=RemoveCurrentP(p);
  697. end
  698. { turn
  699. clr rX
  700. ...
  701. adc rY, rX
  702. into
  703. ...
  704. adc rY, r1
  705. }
  706. else if (taicpu(p).ops=1) and
  707. (taicpu(p).oper[0]^.typ=top_reg) and
  708. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  709. (not RegModifiedBetween(taicpu(p).oper[0]^.reg, p, hp1)) and
  710. (hp1.typ=ait_instruction) and
  711. (taicpu(hp1).opcode in [A_ADC,A_SBC]) and
  712. (taicpu(hp1).ops=2) and
  713. (taicpu(hp1).oper[1]^.typ=top_reg) and
  714. (taicpu(hp1).oper[1]^.reg=taicpu(p).oper[0]^.reg) and
  715. (taicpu(hp1).oper[0]^.reg<>taicpu(p).oper[0]^.reg) and
  716. assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) then
  717. begin
  718. DebugMsg('Peephole ClrAdc2Adc performed', p);
  719. taicpu(hp1).oper[1]^.reg:=GetDefaultZeroReg;
  720. alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.Previous));
  721. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next));
  722. if assigned(alloc) and assigned(dealloc) then
  723. begin
  724. asml.Remove(alloc);
  725. alloc.Free;
  726. asml.Remove(dealloc);
  727. dealloc.Free;
  728. end;
  729. result:=RemoveCurrentP(p);
  730. end;
  731. end;
  732. function TCpuAsmOptimizer.OptPass1PUSH(var p : tai) : boolean;
  733. var
  734. hp1, hp2, hp3: tai;
  735. begin
  736. Result:=false;
  737. { turn
  738. push reg0
  739. push reg1
  740. pop reg3
  741. pop reg2
  742. into
  743. movw reg2,reg0
  744. or
  745. mov reg3,reg1
  746. mov reg2,reg0
  747. }
  748. if GetNextInstruction(p,hp1) and
  749. MatchInstruction(hp1,A_PUSH) and
  750. GetNextInstruction(hp1,hp2) and
  751. MatchInstruction(hp2,A_POP) and
  752. GetNextInstruction(hp2,hp3) and
  753. MatchInstruction(hp3,A_POP) then
  754. begin
  755. if (CPUAVR_HAS_MOVW in cpu_capabilities[current_settings.cputype]) and
  756. (getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(p).oper[0]^.reg)+1) and
  757. ((getsupreg(taicpu(p).oper[0]^.reg) mod 2)=0) and
  758. (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp3).oper[0]^.reg)+1) and
  759. ((getsupreg(taicpu(hp3).oper[0]^.reg) mod 2)=0) then
  760. begin
  761. DebugMsg('Peephole PushPushPopPop2Movw performed', p);
  762. taicpu(hp3).ops:=2;
  763. taicpu(hp3).opcode:=A_MOVW;
  764. taicpu(hp3).loadreg(1, taicpu(p).oper[0]^.reg);
  765. { We're removing 3 concurrent instructions. Remove hp1
  766. and hp2 manually instead of calling RemoveCurrentP
  767. as this means we won't be calling UpdateUsedRegs 3 times }
  768. asml.Remove(hp1);
  769. hp1.Free;
  770. asml.Remove(hp2);
  771. hp2.Free;
  772. { By removing p last, we've guaranteed that p.Next is
  773. valid (storing it prior to removing the instructions
  774. may result in a dangling pointer if hp1 immediately
  775. follows p), and because hp1, hp2 and hp3 came from
  776. sequential calls to GetNextInstruction, it is
  777. guaranteed that UpdateUsedRegs will stop at hp3. [Kit] }
  778. RemoveCurrentP(p, hp3);
  779. Result := True;
  780. end
  781. else
  782. begin
  783. DebugMsg('Peephole PushPushPopPop2MovMov performed', p);
  784. taicpu(p).ops:=2;
  785. taicpu(p).opcode:=A_MOV;
  786. taicpu(hp1).ops:=2;
  787. taicpu(hp1).opcode:=A_MOV;
  788. taicpu(p).loadreg(1, taicpu(p).oper[0]^.reg);
  789. taicpu(p).loadreg(0, taicpu(hp3).oper[0]^.reg);
  790. taicpu(hp1).loadreg(1, taicpu(hp1).oper[0]^.reg);
  791. taicpu(hp1).loadreg(0, taicpu(hp2).oper[0]^.reg);
  792. { life range of reg2 and reg3 is increased, fix register allocation entries }
  793. TransferUsedRegs(TmpUsedRegs);
  794. UpdateUsedRegs(TmpUsedRegs,tai(p.Next));
  795. AllocRegBetween(taicpu(hp2).oper[0]^.reg,hp1,hp2,TmpUsedRegs);
  796. TransferUsedRegs(TmpUsedRegs);
  797. AllocRegBetween(taicpu(hp3).oper[0]^.reg,p,hp3,TmpUsedRegs);
  798. IncludeRegInUsedRegs(taicpu(hp3).oper[0]^.reg,UsedRegs);
  799. UpdateUsedRegs(tai(p.Next));
  800. asml.Remove(hp2);
  801. hp2.Free;
  802. asml.Remove(hp3);
  803. hp3.Free;
  804. result:=true;
  805. end
  806. end;
  807. end;
  808. function TCpuAsmOptimizer.OptPass1CALL(var p : tai) : boolean;
  809. var
  810. hp1: tai;
  811. begin
  812. Result:=false;
  813. if (cs_opt_level4 in current_settings.optimizerswitches) and
  814. GetNextInstruction(p,hp1) and
  815. MatchInstruction(hp1,A_RET) then
  816. begin
  817. DebugMsg('Peephole CallReg2Jmp performed', p);
  818. taicpu(p).opcode:=A_JMP;
  819. asml.Remove(hp1);
  820. hp1.Free;
  821. result:=true;
  822. end;
  823. end;
  824. function TCpuAsmOptimizer.OptPass1RCALL(var p : tai) : boolean;
  825. var
  826. hp1: tai;
  827. begin
  828. Result:=false;
  829. if (cs_opt_level4 in current_settings.optimizerswitches) and
  830. GetNextInstruction(p,hp1) and
  831. MatchInstruction(hp1,A_RET) then
  832. begin
  833. DebugMsg('Peephole RCallReg2RJmp performed', p);
  834. taicpu(p).opcode:=A_RJMP;
  835. asml.Remove(hp1);
  836. hp1.Free;
  837. result:=true;
  838. end;
  839. end;
  840. function TCpuAsmOptimizer.OptPass1MOV(var p : tai) : boolean;
  841. var
  842. hp1, hp2: tai;
  843. i : Integer;
  844. alloc, dealloc: tai_regalloc;
  845. begin
  846. Result:=false;
  847. { change
  848. mov reg0, reg1
  849. dealloc reg0
  850. into
  851. dealloc reg0
  852. }
  853. if MatchOpType(taicpu(p),top_reg,top_reg) then
  854. begin
  855. TransferUsedRegs(TmpUsedRegs);
  856. UpdateUsedRegs(TmpUsedRegs,tai(p.Next));
  857. if not(RegInUsedRegs(taicpu(p).oper[0]^.reg,TmpUsedRegs)) and
  858. { reg. allocation information before calls is not perfect, so don't do this before
  859. calls/icalls }
  860. GetNextInstruction(p,hp1) and
  861. not(MatchInstruction(hp1,[A_CALL,A_RCALL])) then
  862. begin
  863. DebugMsg('Peephole Mov2Nop performed', p);
  864. RemoveCurrentP(p, hp1);
  865. Result := True;
  866. exit;
  867. end;
  868. end;
  869. { turn
  870. mov reg0, reg1
  871. <op> reg2,reg0
  872. dealloc reg0
  873. into
  874. <op> reg2,reg1
  875. }
  876. if MatchOpType(taicpu(p),top_reg,top_reg) and
  877. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  878. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p, hp1)) and
  879. (MatchInstruction(hp1,[A_PUSH,A_MOV,A_CP,A_CPC,A_ADD,A_SUB,A_ADC,A_SBC,A_EOR,A_AND,A_OR,
  880. A_OUT,A_IN]) or
  881. { the reference register of ST/STD cannot be replaced }
  882. (MatchInstruction(hp1,[A_STD,A_ST,A_STS]) and (MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^)))) and
  883. (not RegModifiedByInstruction(taicpu(p).oper[0]^.reg, hp1)) and
  884. {(taicpu(hp1).ops=1) and
  885. (taicpu(hp1).oper[0]^.typ = top_reg) and
  886. (taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg) and }
  887. assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) then
  888. begin
  889. DebugMsg('Peephole MovOp2Op 1 performed', p);
  890. for i := 0 to taicpu(hp1).ops-1 do
  891. if taicpu(hp1).oper[i]^.typ=top_reg then
  892. if taicpu(hp1).oper[i]^.reg=taicpu(p).oper[0]^.reg then
  893. taicpu(hp1).oper[i]^.reg:=taicpu(p).oper[1]^.reg;
  894. alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.Previous));
  895. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next));
  896. if assigned(alloc) and assigned(dealloc) then
  897. begin
  898. asml.Remove(alloc);
  899. alloc.Free;
  900. asml.Remove(dealloc);
  901. dealloc.Free;
  902. end;
  903. { life range of reg1 is increased }
  904. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,usedregs);
  905. { p will be removed, update used register as we continue
  906. with the next instruction after p }
  907. result:=RemoveCurrentP(p);
  908. end
  909. { turn
  910. mov reg1, reg0
  911. <op> reg1,xxxx
  912. dealloc reg1
  913. into
  914. <op> reg1,xxx
  915. }
  916. else if MatchOpType(taicpu(p),top_reg,top_reg) and
  917. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  918. not(RegModifiedBetween(taicpu(p).oper[1]^.reg, p, hp1)) and
  919. MatchInstruction(hp1,[A_CP,A_CPC,A_CPI,A_SBRS,A_SBRC]) and
  920. assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) then
  921. begin
  922. DebugMsg('Peephole MovOp2Op 2 performed', p);
  923. for i := 0 to taicpu(hp1).ops-1 do
  924. if taicpu(hp1).oper[i]^.typ=top_reg then
  925. if taicpu(hp1).oper[i]^.reg=taicpu(p).oper[0]^.reg then
  926. taicpu(hp1).oper[i]^.reg:=taicpu(p).oper[1]^.reg;
  927. alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.Previous));
  928. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next));
  929. if assigned(alloc) and assigned(dealloc) then
  930. begin
  931. asml.Remove(alloc);
  932. alloc.Free;
  933. asml.Remove(dealloc);
  934. dealloc.Free;
  935. end;
  936. { life range of reg1 is increased }
  937. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,usedregs);
  938. { p will be removed, update used register as we continue
  939. with the next instruction after p }
  940. result:=RemoveCurrentP(p);
  941. end
  942. { remove
  943. mov reg0,reg0
  944. }
  945. else if (taicpu(p).ops=2) and
  946. (taicpu(p).oper[0]^.typ = top_reg) and
  947. (taicpu(p).oper[1]^.typ = top_reg) and
  948. (taicpu(p).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  949. begin
  950. DebugMsg('Peephole RedundantMov performed', p);
  951. result:=RemoveCurrentP(p);
  952. end
  953. {
  954. Turn
  955. mov rx,ry
  956. op rx,rz
  957. mov ry, rx
  958. Into
  959. op ry,rz
  960. }
  961. else if (taicpu(p).ops=2) and
  962. MatchOpType(taicpu(p),top_reg,top_reg) and
  963. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  964. (hp1.typ=ait_instruction) and
  965. (taicpu(hp1).ops >= 1) and
  966. (taicpu(hp1).oper[0]^.typ = top_reg) and
  967. GetNextInstructionUsingReg(hp1,hp2,taicpu(hp1).oper[0]^.reg) and
  968. MatchInstruction(hp2,A_MOV) and
  969. MatchOpType(taicpu(hp2),top_reg,top_reg) and
  970. (taicpu(hp2).oper[0]^.reg = taicpu(p).oper[1]^.reg) and
  971. (taicpu(hp2).oper[1]^.reg = taicpu(hp1).oper[0]^.reg) and
  972. (taicpu(hp2).oper[1]^.reg = taicpu(p).oper[0]^.reg) and
  973. (not RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp2)) and
  974. (taicpu(hp1).opcode in [A_ADD,A_ADC,A_SUB,A_SBC,A_AND,A_OR,A_EOR,
  975. A_INC,A_DEC,
  976. A_LSL,A_LSR,A_ASR,A_ROR,A_ROL]) and
  977. assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg, tai(hp2.Next))) then
  978. begin
  979. DebugMsg('Peephole MovOpMov2Op performed', p);
  980. if (taicpu(hp1).ops=2) and
  981. (taicpu(hp1).oper[1]^.typ=top_reg) and
  982. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  983. taicpu(hp1).oper[1]^.reg:=taicpu(p).oper[1]^.reg;
  984. taicpu(hp1).oper[0]^.reg:=taicpu(p).oper[1]^.reg;
  985. alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.Previous));
  986. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp2.Next));
  987. if assigned(alloc) and assigned(dealloc) then
  988. begin
  989. asml.Remove(alloc);
  990. alloc.Free;
  991. asml.Remove(dealloc);
  992. dealloc.Free;
  993. end;
  994. asml.remove(hp2);
  995. hp2.free;
  996. result:=RemoveCurrentP(p);
  997. end
  998. {
  999. Turn
  1000. mov rx,ry
  1001. op rx,rw
  1002. mov rw,rx
  1003. Into
  1004. op rw,ry
  1005. }
  1006. else if (taicpu(p).ops=2) and
  1007. MatchOpType(taicpu(p),top_reg,top_reg) and
  1008. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1009. (hp1.typ=ait_instruction) and
  1010. (taicpu(hp1).ops = 2) and
  1011. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  1012. GetNextInstructionUsingReg(hp1,hp2,taicpu(hp1).oper[0]^.reg) and
  1013. (hp2.typ=ait_instruction) and
  1014. (taicpu(hp2).opcode=A_MOV) and
  1015. MatchOpType(taicpu(hp2),top_reg,top_reg) and
  1016. (taicpu(hp2).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) and
  1017. (taicpu(hp2).oper[1]^.reg = taicpu(hp1).oper[0]^.reg) and
  1018. (taicpu(hp2).oper[1]^.reg = taicpu(p).oper[0]^.reg) and
  1019. (not RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) and
  1020. (taicpu(hp1).opcode in [A_ADD,A_ADC,A_AND,A_OR,A_EOR]) and
  1021. assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg, tai(hp2.Next))) then
  1022. begin
  1023. DebugMsg('Peephole MovOpMov2Op2 performed', p);
  1024. taicpu(hp1).oper[0]^.reg:=taicpu(hp2).oper[0]^.reg;
  1025. taicpu(hp1).oper[1]^.reg:=taicpu(p).oper[1]^.reg;
  1026. alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.Previous));
  1027. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp2.Next));
  1028. if assigned(alloc) and assigned(dealloc) then
  1029. begin
  1030. asml.Remove(alloc);
  1031. alloc.Free;
  1032. asml.Remove(dealloc);
  1033. dealloc.Free;
  1034. end;
  1035. result:=RemoveCurrentP(p);
  1036. asml.remove(hp2);
  1037. hp2.free;
  1038. end
  1039. {
  1040. This removes the first mov from
  1041. mov rX,...
  1042. mov rX,...
  1043. }
  1044. else if GetNextInstruction(p,hp1) and MatchInstruction(hp1,A_MOV) and
  1045. { test condition here already instead in the while loop only, else MovMov2Mov 2 might be oversight }
  1046. MatchInstruction(hp1,A_MOV) and
  1047. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) then
  1048. while MatchInstruction(hp1,A_MOV) and
  1049. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) and
  1050. { don't remove the first mov if the second is a mov rX,rX }
  1051. not(MatchOperand(taicpu(hp1).oper[0]^,taicpu(hp1).oper[1]^)) do
  1052. begin
  1053. DebugMsg('Peephole MovMov2Mov 1 performed', p);
  1054. RemoveCurrentP(p,hp1);
  1055. Result := True;
  1056. GetNextInstruction(hp1,hp1);
  1057. if not assigned(hp1) then
  1058. break;
  1059. end
  1060. {
  1061. This removes the second mov from
  1062. mov rX,rY
  1063. ...
  1064. mov rX,rY
  1065. if rX and rY are not modified in-between
  1066. }
  1067. else if GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[1]^.reg) and
  1068. MatchInstruction(hp1,A_MOV) and
  1069. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) and
  1070. MatchOperand(taicpu(p).oper[1]^, taicpu(hp1).oper[1]^) and
  1071. not(RegModifiedBetween(taicpu(p).oper[0]^.reg,p,hp1)) then
  1072. begin
  1073. DebugMsg('Peephole MovMov2Mov 2 performed', p);
  1074. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);
  1075. RemoveInstruction(hp1);
  1076. Result := True;
  1077. end;
  1078. end;
  1079. function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
  1080. var
  1081. hp1,hp2: tai;
  1082. begin
  1083. result := false;
  1084. case p.typ of
  1085. ait_instruction:
  1086. begin
  1087. {
  1088. change
  1089. <op> reg,x,y
  1090. cp reg,r1
  1091. into
  1092. <op>s reg,x,y
  1093. }
  1094. { this optimization can applied only to the currently enabled operations because
  1095. the other operations do not update all flags and FPC does not track flag usage }
  1096. if MatchInstruction(p, [A_ADC,A_ADD,A_AND,A_ANDI,A_ASR,A_COM,A_DEC,A_EOR,
  1097. A_INC,A_LSL,A_LSR,
  1098. A_OR,A_ORI,A_ROL,A_ROR,A_SBC,A_SBCI,A_SUB,A_SUBI]) and
  1099. GetNextInstruction(p, hp1) and
  1100. ((MatchInstruction(hp1, A_CP) and
  1101. (((taicpu(p).oper[0]^.reg = taicpu(hp1).oper[0]^.reg) and
  1102. (taicpu(hp1).oper[1]^.reg = GetDefaultZeroReg)) or
  1103. ((taicpu(p).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) and
  1104. (taicpu(hp1).oper[0]^.reg = GetDefaultZeroReg) and
  1105. (taicpu(p).opcode in [A_ADC,A_ADD,A_AND,A_ANDI,A_ASR,A_COM,A_EOR,
  1106. A_LSL,A_LSR,
  1107. A_OR,A_ORI,A_ROL,A_ROR,A_SUB,A_SBI])))) or
  1108. (MatchInstruction(hp1, A_CPI) and
  1109. (taicpu(p).opcode = A_ANDI) and
  1110. (taicpu(p).oper[1]^.typ=top_const) and
  1111. (taicpu(hp1).oper[1]^.typ=top_const) and
  1112. (taicpu(p).oper[1]^.val=taicpu(hp1).oper[1]^.val))) and
  1113. GetNextInstruction(hp1, hp2) and
  1114. { be careful here, following instructions could use other flags
  1115. however after a jump fpc never depends on the value of flags }
  1116. { All above instructions set Z and N according to the following
  1117. Z := result = 0;
  1118. N := result[31];
  1119. EQ = Z=1; NE = Z=0;
  1120. MI = N=1; PL = N=0; }
  1121. MatchInstruction(hp2, A_BRxx) and
  1122. ((taicpu(hp2).condition in [C_EQ,C_NE,C_MI,C_PL]) or
  1123. { sub/sbc set all flags }
  1124. (taicpu(p).opcode in [A_SUB,A_SBI])){ and
  1125. no flag allocation tracking implemented yet on avr
  1126. assigned(FindRegDealloc(NR_DEFAULTFLAGS,tai(hp2.Next)))} then
  1127. begin
  1128. { move flag allocation if possible }
  1129. { no flag allocation tracking implemented yet on avr
  1130. GetLastInstruction(hp1, hp2);
  1131. hp2:=FindRegAlloc(NR_DEFAULTFLAGS,tai(hp2.Next));
  1132. if assigned(hp2) then
  1133. begin
  1134. asml.Remove(hp2);
  1135. asml.insertbefore(hp2, p);
  1136. end;
  1137. }
  1138. // If we compare to the same value we are masking then invert the comparison
  1139. if (taicpu(hp1).opcode=A_CPI) or
  1140. { sub/sbc with reverted? }
  1141. ((taicpu(hp1).oper[0]^.reg = GetDefaultZeroReg) and (taicpu(p).opcode in [A_SUB,A_SBI])) then
  1142. taicpu(hp2).condition:=inverse_cond(taicpu(hp2).condition);
  1143. asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
  1144. asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,hp2), hp2);
  1145. IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
  1146. DebugMsg('Peephole OpCp2Op performed', p);
  1147. asml.remove(hp1);
  1148. hp1.free;
  1149. Result:=true;
  1150. end
  1151. else
  1152. case taicpu(p).opcode of
  1153. A_LDI:
  1154. Result:=OptPass1LDI(p);
  1155. A_STS:
  1156. Result:=OptPass1STS(p);
  1157. A_LDS:
  1158. Result:=OptPass1LDS(p);
  1159. A_IN:
  1160. Result:=OptPass1IN(p);
  1161. A_SBRS,
  1162. A_SBRC:
  1163. Result:=OptPass1SBR(p);
  1164. A_ANDI:
  1165. Result:=OptPass1ANDI(p);
  1166. A_ADD:
  1167. Result:=OptPass1ADD(p);
  1168. A_SUB:
  1169. Result:=OptPass1SUB(p);
  1170. A_CLR:
  1171. Result:=OptPass1CLR(p);
  1172. A_PUSH:
  1173. Result:=OptPass1PUSH(p);
  1174. A_CALL:
  1175. Result:=OptPass1CALL(p);
  1176. A_RCALL:
  1177. Result:=OptPass1RCALL(p);
  1178. A_MOV:
  1179. Result:=OptPass1MOV(p);
  1180. A_SBIC,
  1181. A_SBIS:
  1182. Result:=OptPass1SBI(p);
  1183. end;
  1184. end;
  1185. end;
  1186. end;
  1187. function TCpuAsmOptimizer.PeepHoleOptPass2Cpu(var p: tai): boolean;
  1188. begin
  1189. result := false;
  1190. case p.typ of
  1191. ait_instruction:
  1192. begin
  1193. case taicpu(p).opcode of
  1194. A_MOV:
  1195. Result:=OptPass2MOV(p);
  1196. end;
  1197. end;
  1198. end;
  1199. end;
  1200. begin
  1201. casmoptimizer:=TCpuAsmOptimizer;
  1202. End.