aoptcpu.pas 48 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500
  1. {
  2. Copyright (c) 1998-2002 by Jonas Maebe, member of the Free Pascal
  3. Development Team
  4. This unit implements the ARM optimizer object
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. Unit aoptcpu;
  19. {$i fpcdefs.inc}
  20. { $define DEBUG_AOPTCPU}
  21. Interface
  22. uses cpubase,cgbase,aasmtai,aopt,AoptObj, cclasses,aoptcpub;
  23. Type
  24. TCpuAsmOptimizer = class(TAsmOptimizer)
  25. { outputs a debug message into the assembler file }
  26. procedure DebugMsg(const s: string; p: tai);
  27. Function GetNextInstructionUsingReg(Current: tai; Var Next: tai;reg : TRegister): Boolean;
  28. function RegInInstruction(Reg: TRegister; p1: tai): Boolean; override;
  29. function RegLoadedWithNewValue(reg : tregister; hp : tai) : boolean; override;
  30. function InstructionLoadsFromReg(const reg : TRegister; const hp : tai) : boolean; override;
  31. function InvertSkipInstruction(var p: tai): boolean;
  32. { uses the same constructor as TAopObj }
  33. function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
  34. function PeepHoleOptPass2Cpu(var p: tai): boolean; override;
  35. private
  36. function OptPass1ADD(var p : tai) : boolean;
  37. function OptPass1ANDI(var p : tai) : boolean;
  38. function OptPass1CALL(var p : tai) : boolean;
  39. function OptPass1CLR(var p : tai) : boolean;
  40. function OptPass1IN(var p : tai) : boolean;
  41. function OptPass1LDI(var p : tai) : boolean;
  42. function OptPass1LDS(var p : tai) : boolean;
  43. function OptPass1MOV(var p : tai) : boolean;
  44. function OptPass1PUSH(var p : tai) : boolean;
  45. function OptPass1RCALL(var p : tai) : boolean;
  46. function OptPass1SBI(var p : tai) : boolean;
  47. function OptPass1SBR(var p : tai) : boolean;
  48. function OptPass1STS(var p : tai) : boolean;
  49. function OptPass1SUB(var p : tai) : boolean;
  50. function OptPass2MOV(var p : tai) : boolean;
  51. End;
  52. Implementation
  53. uses
  54. cutils,
  55. verbose,
  56. cpuinfo,
  57. aasmbase,aasmcpu,aasmdata,
  58. aoptutils,
  59. globals,globtype,
  60. cgutils;
  61. type
  62. TAsmOpSet = set of TAsmOp;
  63. function CanBeCond(p : tai) : boolean;
  64. begin
  65. result:=(p.typ=ait_instruction) and (taicpu(p).condition=C_None);
  66. end;
  67. function RefsEqual(const r1, r2: treference): boolean;
  68. begin
  69. refsequal :=
  70. (r1.offset = r2.offset) and
  71. (r1.base = r2.base) and
  72. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  73. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  74. (r1.relsymbol = r2.relsymbol) and
  75. (r1.addressmode = r2.addressmode) and
  76. (r1.volatility=[]) and
  77. (r2.volatility=[]);
  78. end;
  79. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
  80. begin
  81. result:=oper1.typ=oper2.typ;
  82. if result then
  83. case oper1.typ of
  84. top_const:
  85. Result:=oper1.val = oper2.val;
  86. top_reg:
  87. Result:=oper1.reg = oper2.reg;
  88. top_ref:
  89. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  90. else Result:=false;
  91. end
  92. end;
  93. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  94. begin
  95. result := (oper.typ = top_reg) and (oper.reg = reg);
  96. end;
  97. function MatchInstruction(const instr: tai; const op: TAsmOp): boolean;
  98. begin
  99. result :=
  100. (instr.typ = ait_instruction) and
  101. (taicpu(instr).opcode = op);
  102. end;
  103. function MatchInstruction(const instr: tai; const ops: TAsmOpSet): boolean;
  104. begin
  105. result :=
  106. (instr.typ = ait_instruction) and
  107. (taicpu(instr).opcode in ops);
  108. end;
  109. function MatchInstruction(const instr: tai; const ops: TAsmOpSet;opcount : byte): boolean;
  110. begin
  111. result :=
  112. (instr.typ = ait_instruction) and
  113. (taicpu(instr).opcode in ops) and
  114. (taicpu(instr).ops=opcount);
  115. end;
  116. {$ifdef DEBUG_AOPTCPU}
  117. procedure TCpuAsmOptimizer.DebugMsg(const s: string;p : tai);
  118. begin
  119. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  120. end;
  121. {$else DEBUG_AOPTCPU}
  122. procedure TCpuAsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  123. begin
  124. end;
  125. {$endif DEBUG_AOPTCPU}
  126. function TCpuAsmOptimizer.RegInInstruction(Reg: TRegister; p1: tai): Boolean;
  127. begin
  128. If (p1.typ = ait_instruction) and (taicpu(p1).opcode in [A_MUL,A_MULS,A_FMUL,A_FMULS,A_FMULSU]) and
  129. ((getsupreg(reg)=RS_R0) or (getsupreg(reg)=RS_R1)) then
  130. Result:=true
  131. else if (p1.typ = ait_instruction) and (taicpu(p1).opcode=A_MOVW) and
  132. ((TRegister(ord(taicpu(p1).oper[0]^.reg)+1)=reg) or (TRegister(ord(taicpu(p1).oper[1]^.reg)+1)=reg) or
  133. (taicpu(p1).oper[0]^.reg=reg) or (taicpu(p1).oper[1]^.reg=reg)) then
  134. Result:=true
  135. else if (p1.typ = ait_instruction) and (taicpu(p1).opcode=A_ADIW) and
  136. ((TRegister(ord(taicpu(p1).oper[0]^.reg)+1)=reg) or (taicpu(p1).oper[0]^.reg=reg)) then
  137. Result:=true
  138. else
  139. Result:=inherited RegInInstruction(Reg, p1);
  140. end;
  141. function TCpuAsmOptimizer.GetNextInstructionUsingReg(Current: tai;
  142. var Next: tai; reg: TRegister): Boolean;
  143. begin
  144. Next:=Current;
  145. repeat
  146. Result:=GetNextInstruction(Next,Next);
  147. until not(cs_opt_level3 in current_settings.optimizerswitches) or not(Result) or (Next.typ<>ait_instruction) or (RegInInstruction(reg,Next)) or
  148. (is_calljmp(taicpu(Next).opcode));
  149. end;
  150. function TCpuAsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  151. var
  152. p: taicpu;
  153. begin
  154. if not assigned(hp) or
  155. (hp.typ <> ait_instruction) then
  156. begin
  157. Result := false;
  158. exit;
  159. end;
  160. p := taicpu(hp);
  161. Result := ((p.opcode in [A_LDI,A_MOV,A_LDS]) and (reg=p.oper[0]^.reg) and ((p.oper[1]^.typ<>top_reg) or (reg<>p.oper[1]^.reg))) or
  162. ((p.opcode in [A_LD,A_LDD,A_LPM]) and (reg=p.oper[0]^.reg) and not(RegInRef(reg,p.oper[1]^.ref^))) or
  163. ((p.opcode in [A_MOVW]) and ((reg=p.oper[0]^.reg) or (TRegister(ord(reg)+1)=p.oper[0]^.reg)) and not(reg=p.oper[1]^.reg) and not(TRegister(ord(reg)+1)=p.oper[1]^.reg)) or
  164. ((p.opcode in [A_POP]) and (reg=p.oper[0]^.reg));
  165. end;
  166. function TCpuAsmOptimizer.InstructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean;
  167. var
  168. p: taicpu;
  169. i: longint;
  170. begin
  171. Result := false;
  172. if not (assigned(hp) and (hp.typ = ait_instruction)) then
  173. exit;
  174. p:=taicpu(hp);
  175. i:=0;
  176. { we do not care about the stack pointer }
  177. if p.opcode in [A_POP] then
  178. exit;
  179. { first operand only written?
  180. then skip it }
  181. if p.opcode in [A_MOV,A_LD,A_LDD,A_LDS,A_LPM,A_LDI,A_MOVW] then
  182. i:=1;
  183. while i<p.ops do
  184. begin
  185. case p.oper[i]^.typ of
  186. top_reg:
  187. Result := (p.oper[i]^.reg = reg) or
  188. { MOVW }
  189. ((i=1) and (p.opcode=A_MOVW) and (getsupreg(p.oper[i]^.reg)+1=getsupreg(reg))) or
  190. { ADIW }
  191. ((i=0) and (p.opcode=A_ADIW) and (getsupreg(p.oper[i]^.reg)+1=getsupreg(reg)));
  192. top_ref:
  193. Result :=
  194. (p.oper[i]^.ref^.base = reg) or
  195. (p.oper[i]^.ref^.index = reg);
  196. end;
  197. { Bailout if we found something }
  198. if Result then
  199. exit;
  200. Inc(i);
  201. end;
  202. end;
  203. {
  204. Turns
  205. sbis ?
  206. jmp .Lx
  207. op
  208. .Lx:
  209. Into
  210. sbic ?
  211. op
  212. For all types of skip instructions
  213. }
  214. function TCpuAsmOptimizer.InvertSkipInstruction(var p: tai): boolean;
  215. function GetNextInstructionWithoutLabel(p: tai; var next: tai): boolean;
  216. begin
  217. repeat
  218. result:=GetNextInstruction(p,next);
  219. p:=next;
  220. until
  221. (not result) or
  222. (not assigned(next)) or
  223. (next.typ in [ait_instruction]);
  224. result:=assigned(next) and (next.typ in [ait_instruction]);
  225. end;
  226. var
  227. hp1, hp2, hp3: tai;
  228. begin
  229. result:=false;
  230. if GetNextInstruction(taicpu(p),hp1) and
  231. (hp1.typ=ait_instruction) and
  232. (taicpu(hp1).opcode in [A_RJMP,A_JMP]) and
  233. (taicpu(hp1).ops=1) and
  234. (taicpu(hp1).oper[0]^.typ=top_ref) and
  235. (taicpu(hp1).oper[0]^.ref^.offset=0) and
  236. (taicpu(hp1).oper[0]^.ref^.symbol is TAsmLabel) and
  237. GetNextInstructionWithoutLabel(hp1,hp2) and
  238. (hp2.typ=ait_instruction) and
  239. (not taicpu(hp2).is_jmp) and
  240. GetNextInstruction(hp2,hp3) and
  241. FindLabel(TAsmLabel(taicpu(hp1).oper[0]^.ref^.symbol),hp3) then
  242. begin
  243. DebugMsg('SkipJump2InvertedSkip', p);
  244. case taicpu(p).opcode of
  245. A_SBIS: taicpu(p).opcode:=A_SBIC;
  246. A_SBIC: taicpu(p).opcode:=A_SBIS;
  247. A_SBRS: taicpu(p).opcode:=A_SBRC;
  248. A_SBRC: taicpu(p).opcode:=A_SBRS;
  249. end;
  250. TAsmLabel(taicpu(hp1).oper[0]^.ref^.symbol).decrefs;
  251. asml.remove(hp1);
  252. hp1.free;
  253. end;
  254. end;
  255. function TCpuAsmOptimizer.OptPass1LDI(var p : tai) : boolean;
  256. var
  257. hp1 : tai;
  258. alloc ,dealloc: tai_regalloc;
  259. begin
  260. Result:=false;
  261. { turn
  262. ldi reg0, imm
  263. <op> reg1, reg0
  264. dealloc reg0
  265. into
  266. <op>i reg1, imm
  267. }
  268. if MatchOpType(taicpu(p),top_reg,top_const) and
  269. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  270. MatchInstruction(hp1,[A_CP,A_MOV,A_AND,A_SUB],2) and
  271. (not RegModifiedBetween(taicpu(p).oper[0]^.reg, p, hp1)) and
  272. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  273. (getsupreg(taicpu(hp1).oper[0]^.reg) in [16..31]) and
  274. (taicpu(hp1).oper[1]^.reg=taicpu(p).oper[0]^.reg) and
  275. not(MatchOperand(taicpu(hp1).oper[0]^,taicpu(hp1).oper[1]^)) then
  276. begin
  277. TransferUsedRegs(TmpUsedRegs);
  278. UpdateUsedRegs(TmpUsedRegs,tai(p.next));
  279. UpdateUsedRegs(TmpUsedRegs,tai(hp1.next));
  280. if not(RegUsedAfterInstruction(taicpu(hp1).oper[1]^.reg, hp1, TmpUsedRegs)) then
  281. begin
  282. case taicpu(hp1).opcode of
  283. A_CP:
  284. taicpu(hp1).opcode:=A_CPI;
  285. A_MOV:
  286. taicpu(hp1).opcode:=A_LDI;
  287. A_AND:
  288. taicpu(hp1).opcode:=A_ANDI;
  289. A_SUB:
  290. taicpu(hp1).opcode:=A_SUBI;
  291. else
  292. internalerror(2016111901);
  293. end;
  294. taicpu(hp1).loadconst(1, taicpu(p).oper[1]^.val);
  295. alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.Previous));
  296. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next));
  297. if assigned(alloc) and assigned(dealloc) then
  298. begin
  299. asml.Remove(alloc);
  300. alloc.Free;
  301. asml.Remove(dealloc);
  302. dealloc.Free;
  303. end;
  304. DebugMsg('Peephole LdiOp2Opi performed', p);
  305. result:=RemoveCurrentP(p);
  306. end;
  307. end;
  308. end;
  309. function TCpuAsmOptimizer.OptPass1STS(var p : tai) : boolean;
  310. begin
  311. Result:=false;
  312. if (taicpu(p).oper[0]^.ref^.symbol=nil) and
  313. (taicpu(p).oper[0]^.ref^.relsymbol=nil) and
  314. (getsupreg(taicpu(p).oper[0]^.ref^.base)=RS_NO) and
  315. (getsupreg(taicpu(p).oper[0]^.ref^.index)=RS_NO) and
  316. (taicpu(p).oper[0]^.ref^.addressmode=AM_UNCHANGED) and
  317. (((CPUAVR_NOMEMMAPPED_REGS in cpu_capabilities[current_settings.cputype]) and
  318. (taicpu(p).oper[0]^.ref^.offset>=0) and
  319. (taicpu(p).oper[0]^.ref^.offset<=63)) or
  320. (not(CPUAVR_NOMEMMAPPED_REGS in cpu_capabilities[current_settings.cputype]) and
  321. (taicpu(p).oper[0]^.ref^.offset>=32) and
  322. (taicpu(p).oper[0]^.ref^.offset<=95))) then
  323. begin
  324. DebugMsg('Peephole Sts2Out performed', p);
  325. taicpu(p).opcode:=A_OUT;
  326. if CPUAVR_NOMEMMAPPED_REGS in cpu_capabilities[current_settings.cputype] then
  327. taicpu(p).loadconst(0,taicpu(p).oper[0]^.ref^.offset)
  328. else
  329. taicpu(p).loadconst(0,taicpu(p).oper[0]^.ref^.offset-32);
  330. result:=true;
  331. end;
  332. end;
  333. function TCpuAsmOptimizer.OptPass1LDS(var p : tai) : boolean;
  334. var
  335. hp1, hp2, hp3, alloc, dealloc: tai;
  336. begin
  337. Result:=false;
  338. if (taicpu(p).oper[1]^.ref^.symbol=nil) and
  339. (taicpu(p).oper[1]^.ref^.relsymbol=nil) and
  340. (getsupreg(taicpu(p).oper[1]^.ref^.base)=RS_NO) and
  341. (getsupreg(taicpu(p).oper[1]^.ref^.index)=RS_NO) and
  342. (taicpu(p).oper[1]^.ref^.addressmode=AM_UNCHANGED) and
  343. (((CPUAVR_NOMEMMAPPED_REGS in cpu_capabilities[current_settings.cputype]) and
  344. (taicpu(p).oper[1]^.ref^.offset>=0) and
  345. (taicpu(p).oper[1]^.ref^.offset<=63)) or
  346. (not(CPUAVR_NOMEMMAPPED_REGS in cpu_capabilities[current_settings.cputype]) and
  347. (taicpu(p).oper[1]^.ref^.offset>=32) and
  348. (taicpu(p).oper[1]^.ref^.offset<=95))) then
  349. begin
  350. DebugMsg('Peephole Lds2In performed', p);
  351. taicpu(p).opcode:=A_IN;
  352. if CPUAVR_NOMEMMAPPED_REGS in cpu_capabilities[current_settings.cputype] then
  353. taicpu(p).loadconst(1,taicpu(p).oper[1]^.ref^.offset)
  354. else
  355. taicpu(p).loadconst(1,taicpu(p).oper[1]^.ref^.offset-32);
  356. result:=true;
  357. end
  358. { turn
  359. alloc reg0
  360. alloc reg1
  361. lds reg0, label
  362. lds reg1, label
  363. mov reg2, reg0
  364. mov reg3, reg1
  365. dealloc reg0
  366. dealloc reg1
  367. into
  368. lds reg2, label
  369. lds reg3, label
  370. }
  371. else if not(cs_opt_level3 in current_settings.optimizerswitches) and
  372. (taicpu(p).oper[0]^.typ=top_reg) and
  373. assigned(FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.Previous))) and
  374. (GetNextInstruction(p,hp1)) and MatchInstruction(hp1,A_LDS) and
  375. (taicpu(hp1).oper[0]^.typ=top_reg) and
  376. assigned(FindRegAllocBackward(taicpu(hp1).oper[0]^.reg,tai(hp1.Previous))) and
  377. (GetNextInstruction(hp1, hp2)) and MatchInstruction(hp2,A_MOV) and
  378. (taicpu(hp2).oper[1]^.reg=taicpu(p).oper[0]^.reg) and
  379. assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp2.Next))) and
  380. (GetNextInstruction(hp2, hp3)) and MatchInstruction(hp3,A_MOV) and
  381. (taicpu(hp3).oper[1]^.reg=taicpu(hp1).oper[0]^.reg) and
  382. assigned(FindRegDeAlloc(taicpu(hp1).oper[0]^.reg,tai(hp3.Next))) then
  383. begin
  384. DebugMsg('Peephole LdsLdsMovMov2LdsLds performed', p);
  385. alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.Previous));
  386. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp2.Next));
  387. if assigned(alloc) and assigned(dealloc) then
  388. begin
  389. asml.Remove(alloc);
  390. alloc.Free;
  391. asml.Remove(dealloc);
  392. dealloc.Free;
  393. end;
  394. taicpu(p).oper[0]^.reg:=taicpu(hp2).oper[0]^.reg;
  395. RemoveInstruction(hp2);
  396. alloc:=FindRegAllocBackward(taicpu(hp1).oper[0]^.reg,tai(hp1.Previous));
  397. dealloc:=FindRegDeAlloc(taicpu(hp1).oper[0]^.reg,tai(hp3.Next));
  398. if assigned(alloc) and assigned(dealloc) then
  399. begin
  400. asml.Remove(alloc);
  401. alloc.Free;
  402. asml.Remove(dealloc);
  403. dealloc.Free;
  404. end;
  405. taicpu(hp1).oper[0]^.reg:=taicpu(hp3).oper[0]^.reg;
  406. RemoveInstruction(hp3);
  407. Result:=true;
  408. end
  409. { turn
  410. alloc reg0
  411. lds reg0, label
  412. ...
  413. mov reg1, reg0
  414. dealloc reg0
  415. into
  416. lds reg1, label
  417. }
  418. else if (cs_opt_level3 in current_settings.optimizerswitches) and
  419. (taicpu(p).oper[0]^.typ=top_reg) and
  420. (GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg)) and
  421. MatchInstruction(hp1,A_MOV) and
  422. (taicpu(hp1).oper[1]^.reg=taicpu(p).oper[0]^.reg) and
  423. (not RegModifiedBetween(taicpu(p).oper[0]^.reg, p, hp1)) and
  424. (not RegUsedBetween(taicpu(hp1).oper[0]^.reg, p, hp1)) then
  425. begin
  426. alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.Previous));
  427. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next));
  428. if assigned(alloc) and assigned(dealloc) then
  429. begin
  430. DebugMsg('Peephole LdsMov2Lds performed', p);
  431. asml.Remove(alloc);
  432. alloc.Free;
  433. asml.Remove(dealloc);
  434. dealloc.Free;
  435. taicpu(p).oper[0]^.reg:=taicpu(hp1).oper[0]^.reg;
  436. RemoveInstruction(hp1);
  437. result:=true;
  438. end;
  439. end;
  440. end;
  441. function TCpuAsmOptimizer.OptPass1IN(var p : tai) : boolean;
  442. var
  443. hp1, hp2: tai;
  444. l : TAsmLabel;
  445. begin
  446. Result:=false;
  447. if GetNextInstruction(p,hp1) then
  448. begin
  449. {
  450. in rX,Y
  451. ori rX,n
  452. out Y,rX
  453. into
  454. sbi rX,lg(n)
  455. }
  456. if (taicpu(p).oper[1]^.val<=31) and
  457. MatchInstruction(hp1,A_ORI) and
  458. (taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg) and
  459. (PopCnt(byte(taicpu(hp1).oper[1]^.val))=1) and
  460. GetNextInstruction(hp1,hp2) and
  461. MatchInstruction(hp2,A_OUT) and
  462. MatchOperand(taicpu(hp2).oper[1]^,taicpu(p).oper[0]^) and
  463. MatchOperand(taicpu(hp2).oper[0]^,taicpu(p).oper[1]^) then
  464. begin
  465. DebugMsg('Peephole InOriOut2Sbi performed', p);
  466. taicpu(p).opcode:=A_SBI;
  467. taicpu(p).loadconst(0,taicpu(p).oper[1]^.val);
  468. taicpu(p).loadconst(1,BsrByte(taicpu(hp1).oper[1]^.val));
  469. asml.Remove(hp1);
  470. hp1.Free;
  471. asml.Remove(hp2);
  472. hp2.Free;
  473. result:=true;
  474. end
  475. {
  476. in rX,Y
  477. andi rX,not(n)
  478. out Y,rX
  479. into
  480. cbi rX,lg(n)
  481. }
  482. else if (taicpu(p).oper[1]^.val<=31) and
  483. MatchInstruction(hp1,A_ANDI) and
  484. (taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg) and
  485. (PopCnt(byte(not(taicpu(hp1).oper[1]^.val)))=1) and
  486. GetNextInstruction(hp1,hp2) and
  487. MatchInstruction(hp2,A_OUT) and
  488. MatchOperand(taicpu(hp2).oper[1]^,taicpu(p).oper[0]^) and
  489. MatchOperand(taicpu(hp2).oper[0]^,taicpu(p).oper[1]^) then
  490. begin
  491. DebugMsg('Peephole InAndiOut2Cbi performed', p);
  492. taicpu(p).opcode:=A_CBI;
  493. taicpu(p).loadconst(0,taicpu(p).oper[1]^.val);
  494. taicpu(p).loadconst(1,BsrByte(not(taicpu(hp1).oper[1]^.val)));
  495. asml.Remove(hp1);
  496. hp1.Free;
  497. asml.Remove(hp2);
  498. hp2.Free;
  499. result:=true;
  500. end
  501. {
  502. in rX,Y
  503. andi rX,n
  504. breq/brne L1
  505. into
  506. sbis/sbic Y,lg(n)
  507. jmp L1
  508. .Ltemp:
  509. }
  510. else if (taicpu(p).oper[1]^.val<=31) and
  511. MatchInstruction(hp1,A_ANDI) and
  512. (taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg) and
  513. (PopCnt(byte(taicpu(hp1).oper[1]^.val))=1) and
  514. GetNextInstruction(hp1,hp2) and
  515. MatchInstruction(hp2,A_BRxx) and
  516. (taicpu(hp2).condition in [C_EQ,C_NE]) then
  517. begin
  518. if taicpu(hp2).condition=C_EQ then
  519. taicpu(p).opcode:=A_SBIS
  520. else
  521. taicpu(p).opcode:=A_SBIC;
  522. DebugMsg('Peephole InAndiBrx2SbixJmp performed', p);
  523. taicpu(p).loadconst(0,taicpu(p).oper[1]^.val);
  524. taicpu(p).loadconst(1,BsrByte(taicpu(hp1).oper[1]^.val));
  525. asml.Remove(hp1);
  526. hp1.Free;
  527. taicpu(hp2).condition:=C_None;
  528. if CPUAVR_HAS_JMP_CALL in cpu_capabilities[current_settings.cputype] then
  529. taicpu(hp2).opcode:=A_JMP
  530. else
  531. taicpu(hp2).opcode:=A_RJMP;
  532. current_asmdata.getjumplabel(l);
  533. l.increfs;
  534. asml.InsertAfter(tai_label.create(l), hp2);
  535. result:=true;
  536. end;
  537. end;
  538. end;
  539. function TCpuAsmOptimizer.OptPass1SBR(var p : tai) : boolean;
  540. var
  541. hp1 : tai;
  542. begin
  543. Result:=false;
  544. {
  545. Turn
  546. in rx, y
  547. sbr* rx, z
  548. Into
  549. sbi* y, z
  550. }
  551. if (taicpu(p).ops=2) and
  552. (taicpu(p).oper[0]^.typ=top_reg) and
  553. assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(p.next))) and
  554. GetLastInstruction(p,hp1) and
  555. (hp1.typ=ait_instruction) and
  556. (taicpu(hp1).opcode=A_IN) and
  557. (taicpu(hp1).ops=2) and
  558. (taicpu(hp1).oper[1]^.typ=top_const) and
  559. (taicpu(hp1).oper[1]^.val in [0..31]) and
  560. MatchOperand(taicpu(hp1).oper[0]^,taicpu(p).oper[0]^.reg) and
  561. (not RegModifiedBetween(taicpu(p).oper[0]^.reg, hp1, p)) then
  562. begin
  563. if taicpu(p).opcode=A_SBRS then
  564. taicpu(p).opcode:=A_SBIS
  565. else
  566. taicpu(p).opcode:=A_SBIC;
  567. taicpu(p).loadconst(0, taicpu(hp1).oper[1]^.val);
  568. DebugMsg('Peephole InSbrx2Sbix performed', p);
  569. asml.Remove(hp1);
  570. hp1.free;
  571. result:=true;
  572. end;
  573. if InvertSkipInstruction(p) then
  574. result:=true;
  575. end;
  576. function TCpuAsmOptimizer.OptPass1SBI(var p : tai) : boolean;
  577. var
  578. hp1, hp2, hp3, hp4, hp5: tai;
  579. begin
  580. Result:=false;
  581. {
  582. Turn
  583. sbic/sbis X, y
  584. jmp .L1
  585. op
  586. .L1:
  587. into
  588. sbis/sbic X,y
  589. op
  590. .L1:
  591. }
  592. if InvertSkipInstruction(p) then
  593. result:=true
  594. {
  595. Turn
  596. sbiX X, y
  597. jmp .L1
  598. jmp .L2
  599. .L1:
  600. op
  601. .L2:
  602. into
  603. sbiX X,y
  604. .L1:
  605. op
  606. .L2:
  607. }
  608. else if GetNextInstruction(p, hp1) and
  609. (hp1.typ=ait_instruction) and
  610. (taicpu(hp1).opcode in [A_JMP,A_RJMP]) and
  611. (taicpu(hp1).ops>0) and
  612. (taicpu(hp1).oper[0]^.typ = top_ref) and
  613. (taicpu(hp1).oper[0]^.ref^.symbol is TAsmLabel) and
  614. GetNextInstruction(hp1, hp2) and
  615. (hp2.typ=ait_instruction) and
  616. (taicpu(hp2).opcode in [A_JMP,A_RJMP]) and
  617. (taicpu(hp2).ops>0) and
  618. (taicpu(hp2).oper[0]^.typ = top_ref) and
  619. (taicpu(hp2).oper[0]^.ref^.symbol is TAsmLabel) and
  620. GetNextInstruction(hp2, hp3) and
  621. (hp3.typ=ait_label) and
  622. (taicpu(hp1).oper[0]^.ref^.symbol=tai_label(hp3).labsym) and
  623. GetNextInstruction(hp3, hp4) and
  624. (hp4.typ=ait_instruction) and
  625. GetNextInstruction(hp4, hp5) and
  626. (hp3.typ=ait_label) and
  627. (taicpu(hp2).oper[0]^.ref^.symbol=tai_label(hp5).labsym) then
  628. begin
  629. DebugMsg('Peephole SbiJmpJmp2Sbi performed',p);
  630. tai_label(hp3).labsym.decrefs;
  631. tai_label(hp5).labsym.decrefs;
  632. AsmL.remove(hp1);
  633. taicpu(hp1).Free;
  634. AsmL.remove(hp2);
  635. taicpu(hp2).Free;
  636. result:=true;
  637. end;
  638. end;
  639. function TCpuAsmOptimizer.OptPass1ANDI(var p : tai) : boolean;
  640. var
  641. hp1, hp2, hp3: tai;
  642. i : longint;
  643. begin
  644. Result:=false;
  645. {
  646. Turn
  647. andi rx, #pow2
  648. brne l
  649. <op>
  650. l:
  651. Into
  652. sbrs rx, #(1 shl imm)
  653. <op>
  654. l:
  655. }
  656. if (taicpu(p).ops=2) and
  657. (taicpu(p).oper[1]^.typ=top_const) and
  658. ispowerof2(taicpu(p).oper[1]^.val,i) and
  659. assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(p.next))) and
  660. GetNextInstruction(p,hp1) and
  661. (hp1.typ=ait_instruction) and
  662. (taicpu(hp1).opcode=A_BRxx) and
  663. (taicpu(hp1).condition in [C_EQ,C_NE]) and
  664. (taicpu(hp1).ops>0) and
  665. (taicpu(hp1).oper[0]^.typ = top_ref) and
  666. (taicpu(hp1).oper[0]^.ref^.symbol is TAsmLabel) and
  667. GetNextInstruction(hp1,hp2) and
  668. (hp2.typ=ait_instruction) and
  669. GetNextInstruction(hp2,hp3) and
  670. (hp3.typ=ait_label) and
  671. (taicpu(hp1).oper[0]^.ref^.symbol=tai_label(hp3).labsym) then
  672. begin
  673. DebugMsg('Peephole AndiBr2Sbr performed', p);
  674. taicpu(p).oper[1]^.val:=i;
  675. if taicpu(hp1).condition=C_NE then
  676. taicpu(p).opcode:=A_SBRS
  677. else
  678. taicpu(p).opcode:=A_SBRC;
  679. asml.Remove(hp1);
  680. hp1.free;
  681. result:=true;
  682. end
  683. {
  684. Remove
  685. andi rx, #y
  686. dealloc rx
  687. }
  688. else if (taicpu(p).ops=2) and
  689. (taicpu(p).oper[0]^.typ=top_reg) and
  690. assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(p.next))) and
  691. (assigned(FindRegDeAlloc(NR_DEFAULTFLAGS,tai(p.Next))) or
  692. (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs))) then
  693. begin
  694. DebugMsg('Redundant Andi removed', p);
  695. result:=RemoveCurrentP(p);
  696. end;
  697. end;
  698. function TCpuAsmOptimizer.OptPass1ADD(var p : tai) : boolean;
  699. var
  700. hp1: tai;
  701. begin
  702. Result:=false;
  703. if (taicpu(p).oper[1]^.reg=GetDefaultZeroReg) and
  704. GetNextInstruction(p, hp1) and
  705. MatchInstruction(hp1,A_ADC) then
  706. begin
  707. DebugMsg('Peephole AddAdc2Add performed', p);
  708. RemoveCurrentP(p, hp1);
  709. Result := True;
  710. end;
  711. end;
  712. function TCpuAsmOptimizer.OptPass1SUB(var p : tai) : boolean;
  713. var
  714. hp1: tai;
  715. begin
  716. Result:=false;
  717. if (taicpu(p).oper[1]^.reg=GetDefaultZeroReg) and
  718. GetNextInstruction(p, hp1) and
  719. MatchInstruction(hp1,A_SBC) then
  720. begin
  721. DebugMsg('Peephole SubSbc2Sub performed', p);
  722. taicpu(hp1).opcode:=A_SUB;
  723. RemoveCurrentP(p, hp1);
  724. Result := True;
  725. end;
  726. end;
  727. function TCpuAsmOptimizer.OptPass2MOV(var p: tai): boolean;
  728. var
  729. hp1: tai;
  730. begin
  731. result:=false;
  732. { fold
  733. mov reg2,reg0
  734. mov reg3,reg1
  735. to
  736. movw reg2,reg0
  737. }
  738. if (CPUAVR_HAS_MOVW in cpu_capabilities[current_settings.cputype]) and
  739. (taicpu(p).ops=2) and
  740. (taicpu(p).oper[0]^.typ = top_reg) and
  741. (taicpu(p).oper[1]^.typ = top_reg) and
  742. getnextinstruction(p,hp1) and
  743. (hp1.typ = ait_instruction) and
  744. (taicpu(hp1).opcode = A_MOV) and
  745. (taicpu(hp1).ops=2) and
  746. (taicpu(hp1).oper[0]^.typ = top_reg) and
  747. (taicpu(hp1).oper[1]^.typ = top_reg) and
  748. (getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(p).oper[0]^.reg)+1) and
  749. ((getsupreg(taicpu(p).oper[0]^.reg) mod 2)=0) and
  750. ((getsupreg(taicpu(p).oper[1]^.reg) mod 2)=0) and
  751. (getsupreg(taicpu(hp1).oper[1]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)+1) then
  752. begin
  753. DebugMsg('Peephole MovMov2Movw performed', p);
  754. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  755. AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,UsedRegs);
  756. taicpu(p).opcode:=A_MOVW;
  757. asml.remove(hp1);
  758. hp1.free;
  759. result:=true;
  760. end
  761. end;
  762. function TCpuAsmOptimizer.OptPass1CLR(var p : tai) : boolean;
  763. var
  764. hp1: tai;
  765. alloc, dealloc: tai_regalloc;
  766. begin
  767. Result:=false;
  768. { turn the common
  769. clr rX
  770. mov/ld rX, rY
  771. into
  772. mov/ld rX, rY
  773. }
  774. if (taicpu(p).ops=1) and
  775. (taicpu(p).oper[0]^.typ=top_reg) and
  776. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  777. (not RegModifiedBetween(taicpu(p).oper[0]^.reg, p, hp1)) and
  778. (hp1.typ=ait_instruction) and
  779. (taicpu(hp1).opcode in [A_MOV,A_LD]) and
  780. (taicpu(hp1).ops>0) and
  781. (taicpu(hp1).oper[0]^.typ=top_reg) and
  782. (taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg) then
  783. begin
  784. DebugMsg('Peephole ClrMov2Mov performed', p);
  785. result:=RemoveCurrentP(p);
  786. end
  787. { turn
  788. clr rX
  789. ...
  790. adc rY, rX
  791. into
  792. ...
  793. adc rY, r1
  794. }
  795. else if (taicpu(p).ops=1) and
  796. (taicpu(p).oper[0]^.typ=top_reg) and
  797. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  798. (not RegModifiedBetween(taicpu(p).oper[0]^.reg, p, hp1)) and
  799. (hp1.typ=ait_instruction) and
  800. (taicpu(hp1).opcode in [A_ADC,A_SBC]) and
  801. (taicpu(hp1).ops=2) and
  802. (taicpu(hp1).oper[1]^.typ=top_reg) and
  803. (taicpu(hp1).oper[1]^.reg=taicpu(p).oper[0]^.reg) and
  804. (taicpu(hp1).oper[0]^.reg<>taicpu(p).oper[0]^.reg) and
  805. assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) then
  806. begin
  807. DebugMsg('Peephole ClrAdc2Adc performed', p);
  808. taicpu(hp1).oper[1]^.reg:=GetDefaultZeroReg;
  809. alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.Previous));
  810. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next));
  811. if assigned(alloc) and assigned(dealloc) then
  812. begin
  813. asml.Remove(alloc);
  814. alloc.Free;
  815. asml.Remove(dealloc);
  816. dealloc.Free;
  817. end;
  818. result:=RemoveCurrentP(p);
  819. end;
  820. end;
  821. function TCpuAsmOptimizer.OptPass1PUSH(var p : tai) : boolean;
  822. var
  823. hp1, hp2, hp3: tai;
  824. begin
  825. Result:=false;
  826. { turn
  827. push reg0
  828. push reg1
  829. pop reg3
  830. pop reg2
  831. into
  832. movw reg2,reg0
  833. or
  834. mov reg3,reg1
  835. mov reg2,reg0
  836. }
  837. if GetNextInstruction(p,hp1) and
  838. MatchInstruction(hp1,A_PUSH) and
  839. GetNextInstruction(hp1,hp2) and
  840. MatchInstruction(hp2,A_POP) and
  841. GetNextInstruction(hp2,hp3) and
  842. MatchInstruction(hp3,A_POP) then
  843. begin
  844. if (CPUAVR_HAS_MOVW in cpu_capabilities[current_settings.cputype]) and
  845. (getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(p).oper[0]^.reg)+1) and
  846. ((getsupreg(taicpu(p).oper[0]^.reg) mod 2)=0) and
  847. (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp3).oper[0]^.reg)+1) and
  848. ((getsupreg(taicpu(hp3).oper[0]^.reg) mod 2)=0) then
  849. begin
  850. DebugMsg('Peephole PushPushPopPop2Movw performed', p);
  851. taicpu(hp3).ops:=2;
  852. taicpu(hp3).opcode:=A_MOVW;
  853. taicpu(hp3).loadreg(1, taicpu(p).oper[0]^.reg);
  854. { We're removing 3 concurrent instructions. Remove hp1
  855. and hp2 manually instead of calling RemoveCurrentP
  856. as this means we won't be calling UpdateUsedRegs 3 times }
  857. asml.Remove(hp1);
  858. hp1.Free;
  859. asml.Remove(hp2);
  860. hp2.Free;
  861. { By removing p last, we've guaranteed that p.Next is
  862. valid (storing it prior to removing the instructions
  863. may result in a dangling pointer if hp1 immediately
  864. follows p), and because hp1, hp2 and hp3 came from
  865. sequential calls to GetNextInstruction, it is
  866. guaranteed that UpdateUsedRegs will stop at hp3. [Kit] }
  867. RemoveCurrentP(p, hp3);
  868. Result := True;
  869. end
  870. else
  871. begin
  872. DebugMsg('Peephole PushPushPopPop2MovMov performed', p);
  873. taicpu(p).ops:=2;
  874. taicpu(p).opcode:=A_MOV;
  875. taicpu(hp1).ops:=2;
  876. taicpu(hp1).opcode:=A_MOV;
  877. taicpu(p).loadreg(1, taicpu(p).oper[0]^.reg);
  878. taicpu(p).loadreg(0, taicpu(hp3).oper[0]^.reg);
  879. taicpu(hp1).loadreg(1, taicpu(hp1).oper[0]^.reg);
  880. taicpu(hp1).loadreg(0, taicpu(hp2).oper[0]^.reg);
  881. { life range of reg2 and reg3 is increased, fix register allocation entries }
  882. TransferUsedRegs(TmpUsedRegs);
  883. UpdateUsedRegs(TmpUsedRegs,tai(p.Next));
  884. AllocRegBetween(taicpu(hp2).oper[0]^.reg,hp1,hp2,TmpUsedRegs);
  885. TransferUsedRegs(TmpUsedRegs);
  886. AllocRegBetween(taicpu(hp3).oper[0]^.reg,p,hp3,TmpUsedRegs);
  887. IncludeRegInUsedRegs(taicpu(hp3).oper[0]^.reg,UsedRegs);
  888. UpdateUsedRegs(tai(p.Next));
  889. asml.Remove(hp2);
  890. hp2.Free;
  891. asml.Remove(hp3);
  892. hp3.Free;
  893. result:=true;
  894. end
  895. end;
  896. end;
  897. function TCpuAsmOptimizer.OptPass1CALL(var p : tai) : boolean;
  898. var
  899. hp1: tai;
  900. begin
  901. Result:=false;
  902. if (cs_opt_level4 in current_settings.optimizerswitches) and
  903. GetNextInstruction(p,hp1) and
  904. MatchInstruction(hp1,A_RET) then
  905. begin
  906. DebugMsg('Peephole CallReg2Jmp performed', p);
  907. taicpu(p).opcode:=A_JMP;
  908. asml.Remove(hp1);
  909. hp1.Free;
  910. result:=true;
  911. end;
  912. end;
  913. function TCpuAsmOptimizer.OptPass1RCALL(var p : tai) : boolean;
  914. var
  915. hp1: tai;
  916. begin
  917. Result:=false;
  918. if (cs_opt_level4 in current_settings.optimizerswitches) and
  919. GetNextInstruction(p,hp1) and
  920. MatchInstruction(hp1,A_RET) then
  921. begin
  922. DebugMsg('Peephole RCallReg2RJmp performed', p);
  923. taicpu(p).opcode:=A_RJMP;
  924. asml.Remove(hp1);
  925. hp1.Free;
  926. result:=true;
  927. end;
  928. end;
  929. function TCpuAsmOptimizer.OptPass1MOV(var p : tai) : boolean;
  930. var
  931. hp1, hp2: tai;
  932. i : Integer;
  933. alloc, dealloc: tai_regalloc;
  934. begin
  935. Result:=false;
  936. { change
  937. mov reg0, reg1
  938. dealloc reg0
  939. into
  940. dealloc reg0
  941. }
  942. if MatchOpType(taicpu(p),top_reg,top_reg) then
  943. begin
  944. TransferUsedRegs(TmpUsedRegs);
  945. UpdateUsedRegs(TmpUsedRegs,tai(p.Next));
  946. if not(RegInUsedRegs(taicpu(p).oper[0]^.reg,TmpUsedRegs)) and
  947. { reg. allocation information before calls is not perfect, so don't do this before
  948. calls/icalls }
  949. GetNextInstruction(p,hp1) and
  950. not(MatchInstruction(hp1,[A_CALL,A_RCALL])) then
  951. begin
  952. DebugMsg('Peephole Mov2Nop performed', p);
  953. RemoveCurrentP(p, hp1);
  954. Result := True;
  955. exit;
  956. end;
  957. end;
  958. { turn
  959. mov reg0, reg1
  960. <op> reg2,reg0
  961. dealloc reg0
  962. into
  963. <op> reg2,reg1
  964. }
  965. if MatchOpType(taicpu(p),top_reg,top_reg) and
  966. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  967. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p, hp1)) and
  968. (MatchInstruction(hp1,[A_PUSH,A_MOV,A_CP,A_CPC,A_ADD,A_SUB,A_ADC,A_SBC,A_EOR,A_AND,A_OR,
  969. A_OUT,A_IN]) or
  970. { the reference register of ST/STD cannot be replaced }
  971. (MatchInstruction(hp1,[A_STD,A_ST,A_STS]) and (MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^)))) and
  972. (not RegModifiedByInstruction(taicpu(p).oper[0]^.reg, hp1)) and
  973. {(taicpu(hp1).ops=1) and
  974. (taicpu(hp1).oper[0]^.typ = top_reg) and
  975. (taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg) and }
  976. assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) then
  977. begin
  978. DebugMsg('Peephole MovOp2Op 1 performed', p);
  979. for i := 0 to taicpu(hp1).ops-1 do
  980. if taicpu(hp1).oper[i]^.typ=top_reg then
  981. if taicpu(hp1).oper[i]^.reg=taicpu(p).oper[0]^.reg then
  982. taicpu(hp1).oper[i]^.reg:=taicpu(p).oper[1]^.reg;
  983. alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.Previous));
  984. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next));
  985. if assigned(alloc) and assigned(dealloc) then
  986. begin
  987. asml.Remove(alloc);
  988. alloc.Free;
  989. asml.Remove(dealloc);
  990. dealloc.Free;
  991. end;
  992. { life range of reg1 is increased }
  993. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,usedregs);
  994. { p will be removed, update used register as we continue
  995. with the next instruction after p }
  996. result:=RemoveCurrentP(p);
  997. end
  998. { turn
  999. mov reg1, reg0
  1000. <op> reg1,xxxx
  1001. dealloc reg1
  1002. into
  1003. <op> reg1,xxx
  1004. }
  1005. else if MatchOpType(taicpu(p),top_reg,top_reg) and
  1006. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1007. not(RegModifiedBetween(taicpu(p).oper[1]^.reg, p, hp1)) and
  1008. MatchInstruction(hp1,[A_CP,A_CPC,A_CPI,A_SBRS,A_SBRC]) and
  1009. assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) then
  1010. begin
  1011. DebugMsg('Peephole MovOp2Op 2 performed', p);
  1012. for i := 0 to taicpu(hp1).ops-1 do
  1013. if taicpu(hp1).oper[i]^.typ=top_reg then
  1014. if taicpu(hp1).oper[i]^.reg=taicpu(p).oper[0]^.reg then
  1015. taicpu(hp1).oper[i]^.reg:=taicpu(p).oper[1]^.reg;
  1016. alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.Previous));
  1017. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next));
  1018. if assigned(alloc) and assigned(dealloc) then
  1019. begin
  1020. asml.Remove(alloc);
  1021. alloc.Free;
  1022. asml.Remove(dealloc);
  1023. dealloc.Free;
  1024. end;
  1025. { life range of reg1 is increased }
  1026. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,usedregs);
  1027. { p will be removed, update used register as we continue
  1028. with the next instruction after p }
  1029. result:=RemoveCurrentP(p);
  1030. end
  1031. { remove
  1032. mov reg0,reg0
  1033. }
  1034. else if (taicpu(p).ops=2) and
  1035. (taicpu(p).oper[0]^.typ = top_reg) and
  1036. (taicpu(p).oper[1]^.typ = top_reg) and
  1037. (taicpu(p).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  1038. begin
  1039. DebugMsg('Peephole RedundantMov performed', p);
  1040. result:=RemoveCurrentP(p);
  1041. end
  1042. {
  1043. Turn
  1044. mov rx,ry
  1045. op rx,rz
  1046. mov ry, rx
  1047. Into
  1048. op ry,rz
  1049. }
  1050. else if (taicpu(p).ops=2) and
  1051. MatchOpType(taicpu(p),top_reg,top_reg) and
  1052. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1053. (hp1.typ=ait_instruction) and
  1054. (taicpu(hp1).ops >= 1) and
  1055. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1056. GetNextInstructionUsingReg(hp1,hp2,taicpu(hp1).oper[0]^.reg) and
  1057. MatchInstruction(hp2,A_MOV) and
  1058. MatchOpType(taicpu(hp2),top_reg,top_reg) and
  1059. (taicpu(hp2).oper[0]^.reg = taicpu(p).oper[1]^.reg) and
  1060. (taicpu(hp2).oper[1]^.reg = taicpu(hp1).oper[0]^.reg) and
  1061. (taicpu(hp2).oper[1]^.reg = taicpu(p).oper[0]^.reg) and
  1062. (not RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp2)) and
  1063. (taicpu(hp1).opcode in [A_ADD,A_ADC,A_SUB,A_SBC,A_AND,A_OR,A_EOR,
  1064. A_INC,A_DEC,
  1065. A_LSL,A_LSR,A_ASR,A_ROR,A_ROL]) and
  1066. assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg, tai(hp2.Next))) then
  1067. begin
  1068. DebugMsg('Peephole MovOpMov2Op performed', p);
  1069. if (taicpu(hp1).ops=2) and
  1070. (taicpu(hp1).oper[1]^.typ=top_reg) and
  1071. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1072. taicpu(hp1).oper[1]^.reg:=taicpu(p).oper[1]^.reg;
  1073. taicpu(hp1).oper[0]^.reg:=taicpu(p).oper[1]^.reg;
  1074. alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.Previous));
  1075. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp2.Next));
  1076. if assigned(alloc) and assigned(dealloc) then
  1077. begin
  1078. asml.Remove(alloc);
  1079. alloc.Free;
  1080. asml.Remove(dealloc);
  1081. dealloc.Free;
  1082. end;
  1083. asml.remove(hp2);
  1084. hp2.free;
  1085. result:=RemoveCurrentP(p);
  1086. end
  1087. {
  1088. Turn
  1089. mov rx,ry
  1090. op rx,rw
  1091. mov rw,rx
  1092. Into
  1093. op rw,ry
  1094. }
  1095. else if (taicpu(p).ops=2) and
  1096. MatchOpType(taicpu(p),top_reg,top_reg) and
  1097. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1098. (hp1.typ=ait_instruction) and
  1099. (taicpu(hp1).ops = 2) and
  1100. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  1101. GetNextInstructionUsingReg(hp1,hp2,taicpu(hp1).oper[0]^.reg) and
  1102. (hp2.typ=ait_instruction) and
  1103. (taicpu(hp2).opcode=A_MOV) and
  1104. MatchOpType(taicpu(hp2),top_reg,top_reg) and
  1105. (taicpu(hp2).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) and
  1106. (taicpu(hp2).oper[1]^.reg = taicpu(hp1).oper[0]^.reg) and
  1107. (taicpu(hp2).oper[1]^.reg = taicpu(p).oper[0]^.reg) and
  1108. (not RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) and
  1109. (taicpu(hp1).opcode in [A_ADD,A_ADC,A_AND,A_OR,A_EOR]) and
  1110. assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg, tai(hp2.Next))) then
  1111. begin
  1112. DebugMsg('Peephole MovOpMov2Op2 performed', p);
  1113. taicpu(hp1).oper[0]^.reg:=taicpu(hp2).oper[0]^.reg;
  1114. taicpu(hp1).oper[1]^.reg:=taicpu(p).oper[1]^.reg;
  1115. alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.Previous));
  1116. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp2.Next));
  1117. if assigned(alloc) and assigned(dealloc) then
  1118. begin
  1119. asml.Remove(alloc);
  1120. alloc.Free;
  1121. asml.Remove(dealloc);
  1122. dealloc.Free;
  1123. end;
  1124. result:=RemoveCurrentP(p);
  1125. asml.remove(hp2);
  1126. hp2.free;
  1127. end
  1128. {
  1129. This removes the first mov from
  1130. mov rX,...
  1131. mov rX,...
  1132. }
  1133. else if GetNextInstruction(p,hp1) and MatchInstruction(hp1,A_MOV) and
  1134. { test condition here already instead in the while loop only, else MovMov2Mov 2 might be oversight }
  1135. MatchInstruction(hp1,A_MOV) and
  1136. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) then
  1137. while MatchInstruction(hp1,A_MOV) and
  1138. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) and
  1139. { don't remove the first mov if the second is a mov rX,rX }
  1140. not(MatchOperand(taicpu(hp1).oper[0]^,taicpu(hp1).oper[1]^)) do
  1141. begin
  1142. DebugMsg('Peephole MovMov2Mov 1 performed', p);
  1143. RemoveCurrentP(p,hp1);
  1144. Result := True;
  1145. GetNextInstruction(hp1,hp1);
  1146. if not assigned(hp1) then
  1147. break;
  1148. end
  1149. {
  1150. This removes the second mov from
  1151. mov rX,rY
  1152. ...
  1153. mov rX,rY
  1154. if rX and rY are not modified in-between
  1155. }
  1156. else if GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[1]^.reg) and
  1157. MatchInstruction(hp1,A_MOV) and
  1158. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) and
  1159. MatchOperand(taicpu(p).oper[1]^, taicpu(hp1).oper[1]^) and
  1160. not(RegModifiedBetween(taicpu(p).oper[0]^.reg,p,hp1)) then
  1161. begin
  1162. DebugMsg('Peephole MovMov2Mov 2 performed', p);
  1163. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);
  1164. RemoveInstruction(hp1);
  1165. Result := True;
  1166. end;
  1167. end;
  1168. function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
  1169. var
  1170. hp1,hp2: tai;
  1171. begin
  1172. result := false;
  1173. case p.typ of
  1174. ait_instruction:
  1175. begin
  1176. {
  1177. change
  1178. <op> reg,x
  1179. cp reg,r1
  1180. into
  1181. <op>s reg,x
  1182. }
  1183. { this optimization can applied only to the currently enabled operations because
  1184. the other operations do not update all flags and FPC does not track flag usage }
  1185. if MatchInstruction(p, [A_ADC,A_ADD,A_AND,A_ANDI,A_ASR,A_COM,A_DEC,A_EOR,
  1186. A_INC,A_LSL,A_LSR,
  1187. A_OR,A_ORI,A_ROL,A_ROR,A_SBC,A_SBCI,A_SUB,A_SUBI]) and
  1188. GetNextInstruction(p, hp1) and
  1189. ((MatchInstruction(hp1, A_CP) and
  1190. (((taicpu(p).oper[0]^.reg = taicpu(hp1).oper[0]^.reg) and
  1191. (taicpu(hp1).oper[1]^.reg = GetDefaultZeroReg)) or
  1192. ((taicpu(p).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) and
  1193. (taicpu(hp1).oper[0]^.reg = GetDefaultZeroReg) and
  1194. (taicpu(p).opcode in [A_ADC,A_ADD,A_AND,A_ANDI,A_ASR,A_COM,A_EOR,
  1195. A_LSL,A_LSR,
  1196. A_OR,A_ORI,A_ROL,A_ROR,A_SUB,A_SBI])))) or
  1197. (MatchInstruction(hp1, A_CPI) and
  1198. (taicpu(p).opcode = A_ANDI) and
  1199. (taicpu(p).oper[1]^.typ=top_const) and
  1200. (taicpu(hp1).oper[1]^.typ=top_const) and
  1201. (taicpu(hp1).oper[1]^.val=0))) and
  1202. GetNextInstruction(hp1, hp2) and
  1203. { be careful here, following instructions could use other flags
  1204. however after a jump fpc never depends on the value of flags }
  1205. { All above instructions set Z and N according to the following
  1206. Z := result = 0;
  1207. N := result[7];
  1208. EQ = Z=1; NE = Z=0;
  1209. MI = N=1; PL = N=0; }
  1210. MatchInstruction(hp2, A_BRxx) and
  1211. ((taicpu(hp2).condition in [C_EQ,C_NE,C_MI,C_PL]) or
  1212. { sub/sbc set all flags }
  1213. (taicpu(p).opcode in [A_SUB,A_SBI])){ and
  1214. no flag allocation tracking implemented yet on avr
  1215. assigned(FindRegDealloc(NR_DEFAULTFLAGS,tai(hp2.Next)))} then
  1216. begin
  1217. { move flag allocation if possible }
  1218. { no flag allocation tracking implemented yet on avr
  1219. GetLastInstruction(hp1, hp2);
  1220. hp2:=FindRegAlloc(NR_DEFAULTFLAGS,tai(hp2.Next));
  1221. if assigned(hp2) then
  1222. begin
  1223. asml.Remove(hp2);
  1224. asml.insertbefore(hp2, p);
  1225. end;
  1226. }
  1227. // If we compare to the same value we are masking then invert the comparison
  1228. if (taicpu(hp1).opcode=A_CPI) or
  1229. { sub/sbc with reverted? }
  1230. ((taicpu(hp1).oper[0]^.reg = GetDefaultZeroReg) and (taicpu(p).opcode in [A_SUB,A_SBI])) then
  1231. taicpu(hp2).condition:=inverse_cond(taicpu(hp2).condition);
  1232. asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
  1233. asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,hp2), hp2);
  1234. IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
  1235. DebugMsg('Peephole OpCp2Op performed', p);
  1236. asml.remove(hp1);
  1237. hp1.free;
  1238. Result:=true;
  1239. end
  1240. else
  1241. case taicpu(p).opcode of
  1242. A_LDI:
  1243. Result:=OptPass1LDI(p);
  1244. A_STS:
  1245. Result:=OptPass1STS(p);
  1246. A_LDS:
  1247. Result:=OptPass1LDS(p);
  1248. A_IN:
  1249. Result:=OptPass1IN(p);
  1250. A_SBRS,
  1251. A_SBRC:
  1252. Result:=OptPass1SBR(p);
  1253. A_ANDI:
  1254. Result:=OptPass1ANDI(p);
  1255. A_ADD:
  1256. Result:=OptPass1ADD(p);
  1257. A_SUB:
  1258. Result:=OptPass1SUB(p);
  1259. A_CLR:
  1260. Result:=OptPass1CLR(p);
  1261. A_PUSH:
  1262. Result:=OptPass1PUSH(p);
  1263. A_CALL:
  1264. Result:=OptPass1CALL(p);
  1265. A_RCALL:
  1266. Result:=OptPass1RCALL(p);
  1267. A_MOV:
  1268. Result:=OptPass1MOV(p);
  1269. A_SBIC,
  1270. A_SBIS:
  1271. Result:=OptPass1SBI(p);
  1272. end;
  1273. end;
  1274. end;
  1275. end;
  1276. function TCpuAsmOptimizer.PeepHoleOptPass2Cpu(var p: tai): boolean;
  1277. begin
  1278. result := false;
  1279. case p.typ of
  1280. ait_instruction:
  1281. begin
  1282. case taicpu(p).opcode of
  1283. A_MOV:
  1284. Result:=OptPass2MOV(p);
  1285. end;
  1286. end;
  1287. end;
  1288. end;
  1289. begin
  1290. casmoptimizer:=TCpuAsmOptimizer;
  1291. End.