aoptcpu.pas 48 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498
  1. {
  2. Copyright (c) 1998-2002 by Jonas Maebe, member of the Free Pascal
  3. Development Team
  4. This unit implements the ARM optimizer object
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. Unit aoptcpu;
  19. {$i fpcdefs.inc}
  20. { $define DEBUG_AOPTCPU}
  21. Interface
  22. uses cpubase,cgbase,aasmtai,aopt,AoptObj, cclasses,aoptcpub;
  23. Type
  24. TCpuAsmOptimizer = class(TAsmOptimizer)
  25. { outputs a debug message into the assembler file }
  26. procedure DebugMsg(const s: string; p: tai);
  27. Function GetNextInstructionUsingReg(Current: tai; Var Next: tai;reg : TRegister): Boolean;
  28. function RegInInstruction(Reg: TRegister; p1: tai): Boolean; override;
  29. function RegLoadedWithNewValue(reg : tregister; hp : tai) : boolean; override;
  30. function InstructionLoadsFromReg(const reg : TRegister; const hp : tai) : boolean; override;
  31. function InvertSkipInstruction(var p: tai): boolean;
  32. { uses the same constructor as TAopObj }
  33. function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
  34. function PeepHoleOptPass2Cpu(var p: tai): boolean; override;
  35. private
  36. function OptPass1ADD(var p : tai) : boolean;
  37. function OptPass1ANDI(var p : tai) : boolean;
  38. function OptPass1CALL(var p : tai) : boolean;
  39. function OptPass1CLR(var p : tai) : boolean;
  40. function OptPass1IN(var p : tai) : boolean;
  41. function OptPass1LDI(var p : tai) : boolean;
  42. function OptPass1LDS(var p : tai) : boolean;
  43. function OptPass1MOV(var p : tai) : boolean;
  44. function OptPass1PUSH(var p : tai) : boolean;
  45. function OptPass1RCALL(var p : tai) : boolean;
  46. function OptPass1SBI(var p : tai) : boolean;
  47. function OptPass1SBR(var p : tai) : boolean;
  48. function OptPass1STS(var p : tai) : boolean;
  49. function OptPass1SUB(var p : tai) : boolean;
  50. function OptPass2MOV(var p : tai) : boolean;
  51. End;
  52. Implementation
  53. uses
  54. cutils,
  55. verbose,
  56. cpuinfo,
  57. aasmbase,aasmcpu,aasmdata,
  58. aoptutils,
  59. globals,globtype,
  60. cgutils;
  61. type
  62. TAsmOpSet = set of TAsmOp;
  63. function CanBeCond(p : tai) : boolean;
  64. begin
  65. result:=(p.typ=ait_instruction) and (taicpu(p).condition=C_None);
  66. end;
  67. function RefsEqual(const r1, r2: treference): boolean;
  68. begin
  69. refsequal :=
  70. (r1.offset = r2.offset) and
  71. (r1.base = r2.base) and
  72. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  73. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  74. (r1.relsymbol = r2.relsymbol) and
  75. (r1.addressmode = r2.addressmode) and
  76. (r1.volatility=[]) and
  77. (r2.volatility=[]);
  78. end;
  79. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
  80. begin
  81. result:=oper1.typ=oper2.typ;
  82. if result then
  83. case oper1.typ of
  84. top_const:
  85. Result:=oper1.val = oper2.val;
  86. top_reg:
  87. Result:=oper1.reg = oper2.reg;
  88. top_ref:
  89. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  90. else Result:=false;
  91. end
  92. end;
  93. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  94. begin
  95. result := (oper.typ = top_reg) and (oper.reg = reg);
  96. end;
  97. function MatchInstruction(const instr: tai; const op: TAsmOp): boolean;
  98. begin
  99. result :=
  100. (instr.typ = ait_instruction) and
  101. (taicpu(instr).opcode = op);
  102. end;
  103. function MatchInstruction(const instr: tai; const ops: TAsmOpSet): boolean;
  104. begin
  105. result :=
  106. (instr.typ = ait_instruction) and
  107. (taicpu(instr).opcode in ops);
  108. end;
  109. function MatchInstruction(const instr: tai; const ops: TAsmOpSet;opcount : byte): boolean;
  110. begin
  111. result :=
  112. (instr.typ = ait_instruction) and
  113. (taicpu(instr).opcode in ops) and
  114. (taicpu(instr).ops=opcount);
  115. end;
  116. {$ifdef DEBUG_AOPTCPU}
  117. procedure TCpuAsmOptimizer.DebugMsg(const s: string;p : tai);
  118. begin
  119. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  120. end;
  121. {$else DEBUG_AOPTCPU}
  122. procedure TCpuAsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  123. begin
  124. end;
  125. {$endif DEBUG_AOPTCPU}
  126. function TCpuAsmOptimizer.RegInInstruction(Reg: TRegister; p1: tai): Boolean;
  127. begin
  128. If (p1.typ = ait_instruction) and (taicpu(p1).opcode in [A_MUL,A_MULS,A_FMUL,A_FMULS,A_FMULSU]) and
  129. ((getsupreg(reg)=RS_R0) or (getsupreg(reg)=RS_R1)) then
  130. Result:=true
  131. else if (p1.typ = ait_instruction) and (taicpu(p1).opcode=A_MOVW) and
  132. ((TRegister(ord(taicpu(p1).oper[0]^.reg)+1)=reg) or (TRegister(ord(taicpu(p1).oper[1]^.reg)+1)=reg) or
  133. (taicpu(p1).oper[0]^.reg=reg) or (taicpu(p1).oper[1]^.reg=reg)) then
  134. Result:=true
  135. else if (p1.typ = ait_instruction) and (taicpu(p1).opcode=A_ADIW) and
  136. ((TRegister(ord(taicpu(p1).oper[0]^.reg)+1)=reg) or (taicpu(p1).oper[0]^.reg=reg)) then
  137. Result:=true
  138. else
  139. Result:=inherited RegInInstruction(Reg, p1);
  140. end;
  141. function TCpuAsmOptimizer.GetNextInstructionUsingReg(Current: tai;
  142. var Next: tai; reg: TRegister): Boolean;
  143. begin
  144. Next:=Current;
  145. repeat
  146. Result:=GetNextInstruction(Next,Next);
  147. until not(cs_opt_level3 in current_settings.optimizerswitches) or not(Result) or (Next.typ<>ait_instruction) or (RegInInstruction(reg,Next)) or
  148. (is_calljmp(taicpu(Next).opcode));
  149. end;
  150. function TCpuAsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  151. var
  152. p: taicpu;
  153. begin
  154. if not assigned(hp) or
  155. (hp.typ <> ait_instruction) then
  156. begin
  157. Result := false;
  158. exit;
  159. end;
  160. p := taicpu(hp);
  161. Result := ((p.opcode in [A_LDI,A_MOV,A_LDS]) and (reg=p.oper[0]^.reg) and ((p.oper[1]^.typ<>top_reg) or (reg<>p.oper[1]^.reg))) or
  162. ((p.opcode in [A_LD,A_LDD,A_LPM]) and (reg=p.oper[0]^.reg) and not(RegInRef(reg,p.oper[1]^.ref^))) or
  163. ((p.opcode in [A_MOVW]) and ((reg=p.oper[0]^.reg) or (TRegister(ord(reg)+1)=p.oper[0]^.reg)) and not(reg=p.oper[1]^.reg) and not(TRegister(ord(reg)+1)=p.oper[1]^.reg)) or
  164. ((p.opcode in [A_POP]) and (reg=p.oper[0]^.reg));
  165. end;
  166. function TCpuAsmOptimizer.InstructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean;
  167. var
  168. p: taicpu;
  169. i: longint;
  170. begin
  171. Result := false;
  172. if not (assigned(hp) and (hp.typ = ait_instruction)) then
  173. exit;
  174. p:=taicpu(hp);
  175. i:=0;
  176. { we do not care about the stack pointer }
  177. if p.opcode in [A_POP] then
  178. exit;
  179. { first operand only written?
  180. then skip it }
  181. if p.opcode in [A_MOV,A_LD,A_LDD,A_LDS,A_LPM,A_LDI,A_MOVW] then
  182. i:=1;
  183. while i<p.ops do
  184. begin
  185. case p.oper[i]^.typ of
  186. top_reg:
  187. Result := (p.oper[i]^.reg = reg) or
  188. { MOVW }
  189. ((i=1) and (p.opcode=A_MOVW) and (getsupreg(p.oper[i]^.reg)+1=getsupreg(reg))) or
  190. { ADIW }
  191. ((i=0) and (p.opcode=A_ADIW) and (getsupreg(p.oper[i]^.reg)+1=getsupreg(reg)));
  192. top_ref:
  193. Result :=
  194. (p.oper[i]^.ref^.base = reg) or
  195. (p.oper[i]^.ref^.index = reg);
  196. end;
  197. { Bailout if we found something }
  198. if Result then
  199. exit;
  200. Inc(i);
  201. end;
  202. end;
  203. {
  204. Turns
  205. sbis ?
  206. jmp .Lx
  207. op
  208. .Lx:
  209. Into
  210. sbic ?
  211. op
  212. For all types of skip instructions
  213. }
  214. function TCpuAsmOptimizer.InvertSkipInstruction(var p: tai): boolean;
  215. function GetNextInstructionWithoutLabel(p: tai; var next: tai): boolean;
  216. begin
  217. repeat
  218. result:=GetNextInstruction(p,next);
  219. p:=next;
  220. until
  221. (not result) or
  222. (not assigned(next)) or
  223. (next.typ in [ait_instruction]);
  224. result:=assigned(next) and (next.typ in [ait_instruction]);
  225. end;
  226. var
  227. hp1, hp2, hp3: tai;
  228. begin
  229. result:=false;
  230. if GetNextInstruction(taicpu(p),hp1) and
  231. (hp1.typ=ait_instruction) and
  232. (taicpu(hp1).opcode in [A_RJMP,A_JMP]) and
  233. (taicpu(hp1).ops=1) and
  234. (taicpu(hp1).oper[0]^.typ=top_ref) and
  235. (taicpu(hp1).oper[0]^.ref^.offset=0) and
  236. (taicpu(hp1).oper[0]^.ref^.symbol is TAsmLabel) and
  237. GetNextInstructionWithoutLabel(hp1,hp2) and
  238. (hp2.typ=ait_instruction) and
  239. (not taicpu(hp2).is_jmp) and
  240. GetNextInstruction(hp2,hp3) and
  241. FindLabel(TAsmLabel(taicpu(hp1).oper[0]^.ref^.symbol),hp3) then
  242. begin
  243. DebugMsg('SkipJump2InvertedSkip', p);
  244. case taicpu(p).opcode of
  245. A_SBIS: taicpu(p).opcode:=A_SBIC;
  246. A_SBIC: taicpu(p).opcode:=A_SBIS;
  247. A_SBRS: taicpu(p).opcode:=A_SBRC;
  248. A_SBRC: taicpu(p).opcode:=A_SBRS;
  249. end;
  250. TAsmLabel(taicpu(hp1).oper[0]^.ref^.symbol).decrefs;
  251. asml.remove(hp1);
  252. hp1.free;
  253. end;
  254. end;
  255. function TCpuAsmOptimizer.OptPass1LDI(var p : tai) : boolean;
  256. var
  257. hp1 : tai;
  258. alloc ,dealloc: tai_regalloc;
  259. begin
  260. Result:=false;
  261. { turn
  262. ldi reg0, imm
  263. <op> reg1, reg0
  264. dealloc reg0
  265. into
  266. <op>i reg1, imm
  267. }
  268. if MatchOpType(taicpu(p),top_reg,top_const) and
  269. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  270. MatchInstruction(hp1,[A_CP,A_MOV,A_AND,A_SUB],2) and
  271. (not RegModifiedBetween(taicpu(p).oper[0]^.reg, p, hp1)) and
  272. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  273. (getsupreg(taicpu(hp1).oper[0]^.reg) in [16..31]) and
  274. (taicpu(hp1).oper[1]^.reg=taicpu(p).oper[0]^.reg) and
  275. not(MatchOperand(taicpu(hp1).oper[0]^,taicpu(hp1).oper[1]^)) then
  276. begin
  277. TransferUsedRegs(TmpUsedRegs);
  278. UpdateUsedRegs(TmpUsedRegs,tai(p.next));
  279. UpdateUsedRegs(TmpUsedRegs,tai(hp1.next));
  280. if not(RegUsedAfterInstruction(taicpu(hp1).oper[1]^.reg, hp1, TmpUsedRegs)) then
  281. begin
  282. case taicpu(hp1).opcode of
  283. A_CP:
  284. taicpu(hp1).opcode:=A_CPI;
  285. A_MOV:
  286. taicpu(hp1).opcode:=A_LDI;
  287. A_AND:
  288. taicpu(hp1).opcode:=A_ANDI;
  289. A_SUB:
  290. taicpu(hp1).opcode:=A_SUBI;
  291. else
  292. internalerror(2016111901);
  293. end;
  294. taicpu(hp1).loadconst(1, taicpu(p).oper[1]^.val);
  295. alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.Previous));
  296. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next));
  297. if assigned(alloc) and assigned(dealloc) then
  298. begin
  299. asml.Remove(alloc);
  300. alloc.Free;
  301. asml.Remove(dealloc);
  302. dealloc.Free;
  303. end;
  304. DebugMsg('Peephole LdiOp2Opi performed', p);
  305. result:=RemoveCurrentP(p);
  306. end;
  307. end;
  308. end;
  309. function TCpuAsmOptimizer.OptPass1STS(var p : tai) : boolean;
  310. begin
  311. Result:=false;
  312. if (taicpu(p).oper[0]^.ref^.symbol=nil) and
  313. (taicpu(p).oper[0]^.ref^.relsymbol=nil) and
  314. (getsupreg(taicpu(p).oper[0]^.ref^.base)=RS_NO) and
  315. (getsupreg(taicpu(p).oper[0]^.ref^.index)=RS_NO) and
  316. (taicpu(p).oper[0]^.ref^.addressmode=AM_UNCHANGED) and
  317. (((CPUAVR_NOMEMMAPPED_REGS in cpu_capabilities[current_settings.cputype]) and
  318. (taicpu(p).oper[0]^.ref^.offset>=0) and
  319. (taicpu(p).oper[0]^.ref^.offset<=63)) or
  320. (not(CPUAVR_NOMEMMAPPED_REGS in cpu_capabilities[current_settings.cputype]) and
  321. (taicpu(p).oper[0]^.ref^.offset>=32) and
  322. (taicpu(p).oper[0]^.ref^.offset<=95))) then
  323. begin
  324. DebugMsg('Peephole Sts2Out performed', p);
  325. taicpu(p).opcode:=A_OUT;
  326. if CPUAVR_NOMEMMAPPED_REGS in cpu_capabilities[current_settings.cputype] then
  327. taicpu(p).loadconst(0,taicpu(p).oper[0]^.ref^.offset)
  328. else
  329. taicpu(p).loadconst(0,taicpu(p).oper[0]^.ref^.offset-32);
  330. result:=true;
  331. end;
  332. end;
  333. function TCpuAsmOptimizer.OptPass1LDS(var p : tai) : boolean;
  334. var
  335. hp1, hp2, hp3, alloc, dealloc: tai;
  336. begin
  337. Result:=false;
  338. if (taicpu(p).oper[1]^.ref^.symbol=nil) and
  339. (taicpu(p).oper[1]^.ref^.relsymbol=nil) and
  340. (getsupreg(taicpu(p).oper[1]^.ref^.base)=RS_NO) and
  341. (getsupreg(taicpu(p).oper[1]^.ref^.index)=RS_NO) and
  342. (taicpu(p).oper[1]^.ref^.addressmode=AM_UNCHANGED) and
  343. (((CPUAVR_NOMEMMAPPED_REGS in cpu_capabilities[current_settings.cputype]) and
  344. (taicpu(p).oper[1]^.ref^.offset>=0) and
  345. (taicpu(p).oper[1]^.ref^.offset<=63)) or
  346. (not(CPUAVR_NOMEMMAPPED_REGS in cpu_capabilities[current_settings.cputype]) and
  347. (taicpu(p).oper[1]^.ref^.offset>=32) and
  348. (taicpu(p).oper[1]^.ref^.offset<=95))) then
  349. begin
  350. DebugMsg('Peephole Lds2In performed', p);
  351. taicpu(p).opcode:=A_IN;
  352. if CPUAVR_NOMEMMAPPED_REGS in cpu_capabilities[current_settings.cputype] then
  353. taicpu(p).loadconst(1,taicpu(p).oper[1]^.ref^.offset)
  354. else
  355. taicpu(p).loadconst(1,taicpu(p).oper[1]^.ref^.offset-32);
  356. result:=true;
  357. end
  358. { turn
  359. alloc reg0
  360. alloc reg1
  361. lds reg0, label
  362. lds reg1, label
  363. mov reg2, reg0
  364. mov reg3, reg1
  365. dealloc reg0
  366. dealloc reg1
  367. into
  368. lds reg2, label
  369. lds reg3, label
  370. }
  371. else if not(cs_opt_level3 in current_settings.optimizerswitches) and
  372. (taicpu(p).oper[0]^.typ=top_reg) and
  373. (GetNextInstruction(p,hp1)) and MatchInstruction(hp1,A_LDS) and
  374. (taicpu(hp1).oper[0]^.typ=top_reg) and
  375. (GetNextInstruction(hp1, hp2)) and MatchInstruction(hp2,A_MOV) and
  376. (taicpu(hp2).oper[1]^.reg=taicpu(p).oper[0]^.reg) and
  377. (GetNextInstruction(hp2, hp3)) and MatchInstruction(hp3,A_MOV) and
  378. (taicpu(hp3).oper[1]^.reg=taicpu(hp1).oper[0]^.reg) then
  379. begin
  380. DebugMsg('Peephole LdsLdsMovMov2LdsLds performed', p);
  381. alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.Previous));
  382. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp2.Next));
  383. if assigned(alloc) and assigned(dealloc) then
  384. begin
  385. asml.Remove(alloc);
  386. alloc.Free;
  387. asml.Remove(dealloc);
  388. dealloc.Free;
  389. end;
  390. taicpu(p).oper[0]^.reg:=taicpu(hp2).oper[0]^.reg;
  391. RemoveInstruction(hp2);
  392. alloc:=FindRegAllocBackward(taicpu(hp1).oper[0]^.reg,tai(hp1.Previous));
  393. dealloc:=FindRegDeAlloc(taicpu(hp1).oper[0]^.reg,tai(hp3.Next));
  394. if assigned(alloc) and assigned(dealloc) then
  395. begin
  396. asml.Remove(alloc);
  397. alloc.Free;
  398. asml.Remove(dealloc);
  399. dealloc.Free;
  400. end;
  401. taicpu(hp1).oper[0]^.reg:=taicpu(hp3).oper[0]^.reg;
  402. RemoveInstruction(hp3);
  403. Result:=true;
  404. end
  405. { turn
  406. alloc reg0
  407. lds reg0, label
  408. ...
  409. mov reg1, reg0
  410. dealloc reg0
  411. into
  412. lds reg1, label
  413. }
  414. else if (cs_opt_level3 in current_settings.optimizerswitches) and
  415. (taicpu(p).oper[0]^.typ=top_reg) and
  416. (GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg)) and
  417. MatchInstruction(hp1,A_MOV) and
  418. (taicpu(hp1).oper[1]^.reg=taicpu(p).oper[0]^.reg) and
  419. (not RegModifiedBetween(taicpu(p).oper[0]^.reg, p, hp1)) and
  420. (not RegUsedBetween(taicpu(hp1).oper[0]^.reg, p, hp1)) then
  421. begin
  422. DebugMsg('Peephole LdsMov2Lds performed', p);
  423. alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.Previous));
  424. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next));
  425. if assigned(alloc) and assigned(dealloc) then
  426. begin
  427. asml.Remove(alloc);
  428. alloc.Free;
  429. asml.Remove(dealloc);
  430. dealloc.Free;
  431. end;
  432. taicpu(p).oper[0]^.reg:=taicpu(hp1).oper[0]^.reg;
  433. RemoveInstruction(hp1);
  434. result:=true;
  435. end;
  436. end;
  437. function TCpuAsmOptimizer.OptPass1IN(var p : tai) : boolean;
  438. var
  439. hp1, hp2: tai;
  440. l : TAsmLabel;
  441. begin
  442. Result:=false;
  443. if GetNextInstruction(p,hp1) then
  444. begin
  445. {
  446. in rX,Y
  447. ori rX,n
  448. out Y,rX
  449. into
  450. sbi rX,lg(n)
  451. }
  452. if (taicpu(p).oper[1]^.val<=31) and
  453. MatchInstruction(hp1,A_ORI) and
  454. (taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg) and
  455. (PopCnt(byte(taicpu(hp1).oper[1]^.val))=1) and
  456. GetNextInstruction(hp1,hp2) and
  457. MatchInstruction(hp2,A_OUT) and
  458. MatchOperand(taicpu(hp2).oper[1]^,taicpu(p).oper[0]^) and
  459. MatchOperand(taicpu(hp2).oper[0]^,taicpu(p).oper[1]^) then
  460. begin
  461. DebugMsg('Peephole InOriOut2Sbi performed', p);
  462. taicpu(p).opcode:=A_SBI;
  463. taicpu(p).loadconst(0,taicpu(p).oper[1]^.val);
  464. taicpu(p).loadconst(1,BsrByte(taicpu(hp1).oper[1]^.val));
  465. asml.Remove(hp1);
  466. hp1.Free;
  467. asml.Remove(hp2);
  468. hp2.Free;
  469. result:=true;
  470. end
  471. {
  472. in rX,Y
  473. andi rX,not(n)
  474. out Y,rX
  475. into
  476. cbi rX,lg(n)
  477. }
  478. else if (taicpu(p).oper[1]^.val<=31) and
  479. MatchInstruction(hp1,A_ANDI) and
  480. (taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg) and
  481. (PopCnt(byte(not(taicpu(hp1).oper[1]^.val)))=1) and
  482. GetNextInstruction(hp1,hp2) and
  483. MatchInstruction(hp2,A_OUT) and
  484. MatchOperand(taicpu(hp2).oper[1]^,taicpu(p).oper[0]^) and
  485. MatchOperand(taicpu(hp2).oper[0]^,taicpu(p).oper[1]^) then
  486. begin
  487. DebugMsg('Peephole InAndiOut2Cbi performed', p);
  488. taicpu(p).opcode:=A_CBI;
  489. taicpu(p).loadconst(0,taicpu(p).oper[1]^.val);
  490. taicpu(p).loadconst(1,BsrByte(not(taicpu(hp1).oper[1]^.val)));
  491. asml.Remove(hp1);
  492. hp1.Free;
  493. asml.Remove(hp2);
  494. hp2.Free;
  495. result:=true;
  496. end
  497. {
  498. in rX,Y
  499. andi rX,n
  500. breq/brne L1
  501. into
  502. sbis/sbic Y,lg(n)
  503. jmp L1
  504. .Ltemp:
  505. }
  506. else if (taicpu(p).oper[1]^.val<=31) and
  507. MatchInstruction(hp1,A_ANDI) and
  508. (taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg) and
  509. (PopCnt(byte(taicpu(hp1).oper[1]^.val))=1) and
  510. GetNextInstruction(hp1,hp2) and
  511. MatchInstruction(hp2,A_BRxx) and
  512. (taicpu(hp2).condition in [C_EQ,C_NE]) then
  513. begin
  514. if taicpu(hp2).condition=C_EQ then
  515. taicpu(p).opcode:=A_SBIS
  516. else
  517. taicpu(p).opcode:=A_SBIC;
  518. DebugMsg('Peephole InAndiBrx2SbixJmp performed', p);
  519. taicpu(p).loadconst(0,taicpu(p).oper[1]^.val);
  520. taicpu(p).loadconst(1,BsrByte(taicpu(hp1).oper[1]^.val));
  521. asml.Remove(hp1);
  522. hp1.Free;
  523. taicpu(hp2).condition:=C_None;
  524. if CPUAVR_HAS_JMP_CALL in cpu_capabilities[current_settings.cputype] then
  525. taicpu(hp2).opcode:=A_JMP
  526. else
  527. taicpu(hp2).opcode:=A_RJMP;
  528. current_asmdata.getjumplabel(l);
  529. l.increfs;
  530. asml.InsertAfter(tai_label.create(l), hp2);
  531. result:=true;
  532. end;
  533. end;
  534. end;
  535. function TCpuAsmOptimizer.OptPass1SBR(var p : tai) : boolean;
  536. var
  537. hp1 : tai;
  538. begin
  539. Result:=false;
  540. {
  541. Turn
  542. in rx, y
  543. sbr* rx, z
  544. Into
  545. sbi* y, z
  546. }
  547. if (taicpu(p).ops=2) and
  548. (taicpu(p).oper[0]^.typ=top_reg) and
  549. assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(p.next))) and
  550. GetLastInstruction(p,hp1) and
  551. (hp1.typ=ait_instruction) and
  552. (taicpu(hp1).opcode=A_IN) and
  553. (taicpu(hp1).ops=2) and
  554. (taicpu(hp1).oper[1]^.typ=top_const) and
  555. (taicpu(hp1).oper[1]^.val in [0..31]) and
  556. MatchOperand(taicpu(hp1).oper[0]^,taicpu(p).oper[0]^.reg) and
  557. (not RegModifiedBetween(taicpu(p).oper[0]^.reg, hp1, p)) then
  558. begin
  559. if taicpu(p).opcode=A_SBRS then
  560. taicpu(p).opcode:=A_SBIS
  561. else
  562. taicpu(p).opcode:=A_SBIC;
  563. taicpu(p).loadconst(0, taicpu(hp1).oper[1]^.val);
  564. DebugMsg('Peephole InSbrx2Sbix performed', p);
  565. asml.Remove(hp1);
  566. hp1.free;
  567. result:=true;
  568. end;
  569. if InvertSkipInstruction(p) then
  570. result:=true;
  571. end;
  572. function TCpuAsmOptimizer.OptPass1SBI(var p : tai) : boolean;
  573. var
  574. hp1, hp2, hp3, hp4, hp5: tai;
  575. begin
  576. Result:=false;
  577. {
  578. Turn
  579. sbic/sbis X, y
  580. jmp .L1
  581. op
  582. .L1:
  583. into
  584. sbis/sbic X,y
  585. op
  586. .L1:
  587. }
  588. if InvertSkipInstruction(p) then
  589. result:=true
  590. {
  591. Turn
  592. sbiX X, y
  593. jmp .L1
  594. jmp .L2
  595. .L1:
  596. op
  597. .L2:
  598. into
  599. sbiX X,y
  600. .L1:
  601. op
  602. .L2:
  603. }
  604. else if GetNextInstruction(p, hp1) and
  605. (hp1.typ=ait_instruction) and
  606. (taicpu(hp1).opcode in [A_JMP,A_RJMP]) and
  607. (taicpu(hp1).ops>0) and
  608. (taicpu(hp1).oper[0]^.typ = top_ref) and
  609. (taicpu(hp1).oper[0]^.ref^.symbol is TAsmLabel) and
  610. GetNextInstruction(hp1, hp2) and
  611. (hp2.typ=ait_instruction) and
  612. (taicpu(hp2).opcode in [A_JMP,A_RJMP]) and
  613. (taicpu(hp2).ops>0) and
  614. (taicpu(hp2).oper[0]^.typ = top_ref) and
  615. (taicpu(hp2).oper[0]^.ref^.symbol is TAsmLabel) and
  616. GetNextInstruction(hp2, hp3) and
  617. (hp3.typ=ait_label) and
  618. (taicpu(hp1).oper[0]^.ref^.symbol=tai_label(hp3).labsym) and
  619. GetNextInstruction(hp3, hp4) and
  620. (hp4.typ=ait_instruction) and
  621. GetNextInstruction(hp4, hp5) and
  622. (hp3.typ=ait_label) and
  623. (taicpu(hp2).oper[0]^.ref^.symbol=tai_label(hp5).labsym) then
  624. begin
  625. DebugMsg('Peephole SbiJmpJmp2Sbi performed',p);
  626. tai_label(hp3).labsym.decrefs;
  627. tai_label(hp5).labsym.decrefs;
  628. AsmL.remove(hp1);
  629. taicpu(hp1).Free;
  630. AsmL.remove(hp2);
  631. taicpu(hp2).Free;
  632. result:=true;
  633. end;
  634. end;
  635. function TCpuAsmOptimizer.OptPass1ANDI(var p : tai) : boolean;
  636. var
  637. hp1, hp2, hp3: tai;
  638. i : longint;
  639. begin
  640. Result:=false;
  641. {
  642. Turn
  643. andi rx, #pow2
  644. brne l
  645. <op>
  646. l:
  647. Into
  648. sbrs rx, #(1 shl imm)
  649. <op>
  650. l:
  651. }
  652. if (taicpu(p).ops=2) and
  653. (taicpu(p).oper[1]^.typ=top_const) and
  654. ispowerof2(taicpu(p).oper[1]^.val,i) and
  655. assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(p.next))) and
  656. GetNextInstruction(p,hp1) and
  657. (hp1.typ=ait_instruction) and
  658. (taicpu(hp1).opcode=A_BRxx) and
  659. (taicpu(hp1).condition in [C_EQ,C_NE]) and
  660. (taicpu(hp1).ops>0) and
  661. (taicpu(hp1).oper[0]^.typ = top_ref) and
  662. (taicpu(hp1).oper[0]^.ref^.symbol is TAsmLabel) and
  663. GetNextInstruction(hp1,hp2) and
  664. (hp2.typ=ait_instruction) and
  665. GetNextInstruction(hp2,hp3) and
  666. (hp3.typ=ait_label) and
  667. (taicpu(hp1).oper[0]^.ref^.symbol=tai_label(hp3).labsym) then
  668. begin
  669. DebugMsg('Peephole AndiBr2Sbr performed', p);
  670. taicpu(p).oper[1]^.val:=i;
  671. if taicpu(hp1).condition=C_NE then
  672. taicpu(p).opcode:=A_SBRS
  673. else
  674. taicpu(p).opcode:=A_SBRC;
  675. asml.Remove(hp1);
  676. hp1.free;
  677. result:=true;
  678. end
  679. {
  680. Remove
  681. andi rx, #y
  682. dealloc rx
  683. }
  684. else if (taicpu(p).ops=2) and
  685. (taicpu(p).oper[0]^.typ=top_reg) and
  686. assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(p.next))) and
  687. (assigned(FindRegDeAlloc(NR_DEFAULTFLAGS,tai(p.Next))) or
  688. (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs))) then
  689. begin
  690. DebugMsg('Redundant Andi removed', p);
  691. result:=RemoveCurrentP(p);
  692. end;
  693. end;
  694. function TCpuAsmOptimizer.OptPass1ADD(var p : tai) : boolean;
  695. var
  696. hp1: tai;
  697. begin
  698. Result:=false;
  699. if (taicpu(p).oper[1]^.reg=GetDefaultZeroReg) and
  700. GetNextInstruction(p, hp1) and
  701. MatchInstruction(hp1,A_ADC) then
  702. begin
  703. DebugMsg('Peephole AddAdc2Add performed', p);
  704. RemoveCurrentP(p, hp1);
  705. Result := True;
  706. end;
  707. end;
  708. function TCpuAsmOptimizer.OptPass1SUB(var p : tai) : boolean;
  709. var
  710. hp1: tai;
  711. begin
  712. Result:=false;
  713. if (taicpu(p).oper[1]^.reg=GetDefaultZeroReg) and
  714. GetNextInstruction(p, hp1) and
  715. MatchInstruction(hp1,A_SBC) then
  716. begin
  717. DebugMsg('Peephole SubSbc2Sub performed', p);
  718. taicpu(hp1).opcode:=A_SUB;
  719. RemoveCurrentP(p, hp1);
  720. Result := True;
  721. end;
  722. end;
  723. function TCpuAsmOptimizer.OptPass2MOV(var p: tai): boolean;
  724. var
  725. hp1: tai;
  726. begin
  727. result:=false;
  728. { fold
  729. mov reg2,reg0
  730. mov reg3,reg1
  731. to
  732. movw reg2,reg0
  733. }
  734. if (CPUAVR_HAS_MOVW in cpu_capabilities[current_settings.cputype]) and
  735. (taicpu(p).ops=2) and
  736. (taicpu(p).oper[0]^.typ = top_reg) and
  737. (taicpu(p).oper[1]^.typ = top_reg) and
  738. getnextinstruction(p,hp1) and
  739. (hp1.typ = ait_instruction) and
  740. (taicpu(hp1).opcode = A_MOV) and
  741. (taicpu(hp1).ops=2) and
  742. (taicpu(hp1).oper[0]^.typ = top_reg) and
  743. (taicpu(hp1).oper[1]^.typ = top_reg) and
  744. (getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(p).oper[0]^.reg)+1) and
  745. ((getsupreg(taicpu(p).oper[0]^.reg) mod 2)=0) and
  746. ((getsupreg(taicpu(p).oper[1]^.reg) mod 2)=0) and
  747. (getsupreg(taicpu(hp1).oper[1]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)+1) then
  748. begin
  749. DebugMsg('Peephole MovMov2Movw performed', p);
  750. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  751. AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,UsedRegs);
  752. taicpu(p).opcode:=A_MOVW;
  753. asml.remove(hp1);
  754. hp1.free;
  755. result:=true;
  756. end
  757. end;
  758. function TCpuAsmOptimizer.OptPass1CLR(var p : tai) : boolean;
  759. var
  760. hp1: tai;
  761. alloc, dealloc: tai_regalloc;
  762. begin
  763. Result:=false;
  764. { turn the common
  765. clr rX
  766. mov/ld rX, rY
  767. into
  768. mov/ld rX, rY
  769. }
  770. if (taicpu(p).ops=1) and
  771. (taicpu(p).oper[0]^.typ=top_reg) and
  772. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  773. (not RegModifiedBetween(taicpu(p).oper[0]^.reg, p, hp1)) and
  774. (hp1.typ=ait_instruction) and
  775. (taicpu(hp1).opcode in [A_MOV,A_LD]) and
  776. (taicpu(hp1).ops>0) and
  777. (taicpu(hp1).oper[0]^.typ=top_reg) and
  778. (taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg) then
  779. begin
  780. DebugMsg('Peephole ClrMov2Mov performed', p);
  781. result:=RemoveCurrentP(p);
  782. end
  783. { turn
  784. clr rX
  785. ...
  786. adc rY, rX
  787. into
  788. ...
  789. adc rY, r1
  790. }
  791. else if (taicpu(p).ops=1) and
  792. (taicpu(p).oper[0]^.typ=top_reg) and
  793. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  794. (not RegModifiedBetween(taicpu(p).oper[0]^.reg, p, hp1)) and
  795. (hp1.typ=ait_instruction) and
  796. (taicpu(hp1).opcode in [A_ADC,A_SBC]) and
  797. (taicpu(hp1).ops=2) and
  798. (taicpu(hp1).oper[1]^.typ=top_reg) and
  799. (taicpu(hp1).oper[1]^.reg=taicpu(p).oper[0]^.reg) and
  800. (taicpu(hp1).oper[0]^.reg<>taicpu(p).oper[0]^.reg) and
  801. assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) then
  802. begin
  803. DebugMsg('Peephole ClrAdc2Adc performed', p);
  804. taicpu(hp1).oper[1]^.reg:=GetDefaultZeroReg;
  805. alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.Previous));
  806. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next));
  807. if assigned(alloc) and assigned(dealloc) then
  808. begin
  809. asml.Remove(alloc);
  810. alloc.Free;
  811. asml.Remove(dealloc);
  812. dealloc.Free;
  813. end;
  814. result:=RemoveCurrentP(p);
  815. end;
  816. end;
  817. function TCpuAsmOptimizer.OptPass1PUSH(var p : tai) : boolean;
  818. var
  819. hp1, hp2, hp3: tai;
  820. begin
  821. Result:=false;
  822. { turn
  823. push reg0
  824. push reg1
  825. pop reg3
  826. pop reg2
  827. into
  828. movw reg2,reg0
  829. or
  830. mov reg3,reg1
  831. mov reg2,reg0
  832. }
  833. if GetNextInstruction(p,hp1) and
  834. MatchInstruction(hp1,A_PUSH) and
  835. GetNextInstruction(hp1,hp2) and
  836. MatchInstruction(hp2,A_POP) and
  837. GetNextInstruction(hp2,hp3) and
  838. MatchInstruction(hp3,A_POP) then
  839. begin
  840. if (CPUAVR_HAS_MOVW in cpu_capabilities[current_settings.cputype]) and
  841. (getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(p).oper[0]^.reg)+1) and
  842. ((getsupreg(taicpu(p).oper[0]^.reg) mod 2)=0) and
  843. (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp3).oper[0]^.reg)+1) and
  844. ((getsupreg(taicpu(hp3).oper[0]^.reg) mod 2)=0) then
  845. begin
  846. DebugMsg('Peephole PushPushPopPop2Movw performed', p);
  847. taicpu(hp3).ops:=2;
  848. taicpu(hp3).opcode:=A_MOVW;
  849. taicpu(hp3).loadreg(1, taicpu(p).oper[0]^.reg);
  850. { We're removing 3 concurrent instructions. Remove hp1
  851. and hp2 manually instead of calling RemoveCurrentP
  852. as this means we won't be calling UpdateUsedRegs 3 times }
  853. asml.Remove(hp1);
  854. hp1.Free;
  855. asml.Remove(hp2);
  856. hp2.Free;
  857. { By removing p last, we've guaranteed that p.Next is
  858. valid (storing it prior to removing the instructions
  859. may result in a dangling pointer if hp1 immediately
  860. follows p), and because hp1, hp2 and hp3 came from
  861. sequential calls to GetNextInstruction, it is
  862. guaranteed that UpdateUsedRegs will stop at hp3. [Kit] }
  863. RemoveCurrentP(p, hp3);
  864. Result := True;
  865. end
  866. else
  867. begin
  868. DebugMsg('Peephole PushPushPopPop2MovMov performed', p);
  869. taicpu(p).ops:=2;
  870. taicpu(p).opcode:=A_MOV;
  871. taicpu(hp1).ops:=2;
  872. taicpu(hp1).opcode:=A_MOV;
  873. taicpu(p).loadreg(1, taicpu(p).oper[0]^.reg);
  874. taicpu(p).loadreg(0, taicpu(hp3).oper[0]^.reg);
  875. taicpu(hp1).loadreg(1, taicpu(hp1).oper[0]^.reg);
  876. taicpu(hp1).loadreg(0, taicpu(hp2).oper[0]^.reg);
  877. { life range of reg2 and reg3 is increased, fix register allocation entries }
  878. TransferUsedRegs(TmpUsedRegs);
  879. UpdateUsedRegs(TmpUsedRegs,tai(p.Next));
  880. AllocRegBetween(taicpu(hp2).oper[0]^.reg,hp1,hp2,TmpUsedRegs);
  881. TransferUsedRegs(TmpUsedRegs);
  882. AllocRegBetween(taicpu(hp3).oper[0]^.reg,p,hp3,TmpUsedRegs);
  883. IncludeRegInUsedRegs(taicpu(hp3).oper[0]^.reg,UsedRegs);
  884. UpdateUsedRegs(tai(p.Next));
  885. asml.Remove(hp2);
  886. hp2.Free;
  887. asml.Remove(hp3);
  888. hp3.Free;
  889. result:=true;
  890. end
  891. end;
  892. end;
  893. function TCpuAsmOptimizer.OptPass1CALL(var p : tai) : boolean;
  894. var
  895. hp1: tai;
  896. begin
  897. Result:=false;
  898. if (cs_opt_level4 in current_settings.optimizerswitches) and
  899. GetNextInstruction(p,hp1) and
  900. MatchInstruction(hp1,A_RET) then
  901. begin
  902. DebugMsg('Peephole CallReg2Jmp performed', p);
  903. taicpu(p).opcode:=A_JMP;
  904. asml.Remove(hp1);
  905. hp1.Free;
  906. result:=true;
  907. end;
  908. end;
  909. function TCpuAsmOptimizer.OptPass1RCALL(var p : tai) : boolean;
  910. var
  911. hp1: tai;
  912. begin
  913. Result:=false;
  914. if (cs_opt_level4 in current_settings.optimizerswitches) and
  915. GetNextInstruction(p,hp1) and
  916. MatchInstruction(hp1,A_RET) then
  917. begin
  918. DebugMsg('Peephole RCallReg2RJmp performed', p);
  919. taicpu(p).opcode:=A_RJMP;
  920. asml.Remove(hp1);
  921. hp1.Free;
  922. result:=true;
  923. end;
  924. end;
  925. function TCpuAsmOptimizer.OptPass1MOV(var p : tai) : boolean;
  926. var
  927. hp1, hp2: tai;
  928. i : Integer;
  929. alloc, dealloc: tai_regalloc;
  930. begin
  931. Result:=false;
  932. { change
  933. mov reg0, reg1
  934. dealloc reg0
  935. into
  936. dealloc reg0
  937. }
  938. if MatchOpType(taicpu(p),top_reg,top_reg) then
  939. begin
  940. TransferUsedRegs(TmpUsedRegs);
  941. UpdateUsedRegs(TmpUsedRegs,tai(p.Next));
  942. if not(RegInUsedRegs(taicpu(p).oper[0]^.reg,TmpUsedRegs)) and
  943. { reg. allocation information before calls is not perfect, so don't do this before
  944. calls/icalls }
  945. GetNextInstruction(p,hp1) and
  946. not(MatchInstruction(hp1,[A_CALL,A_RCALL])) then
  947. begin
  948. DebugMsg('Peephole Mov2Nop performed', p);
  949. RemoveCurrentP(p, hp1);
  950. Result := True;
  951. exit;
  952. end;
  953. end;
  954. { turn
  955. mov reg0, reg1
  956. <op> reg2,reg0
  957. dealloc reg0
  958. into
  959. <op> reg2,reg1
  960. }
  961. if MatchOpType(taicpu(p),top_reg,top_reg) and
  962. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  963. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p, hp1)) and
  964. (MatchInstruction(hp1,[A_PUSH,A_MOV,A_CP,A_CPC,A_ADD,A_SUB,A_ADC,A_SBC,A_EOR,A_AND,A_OR,
  965. A_OUT,A_IN]) or
  966. { the reference register of ST/STD cannot be replaced }
  967. (MatchInstruction(hp1,[A_STD,A_ST,A_STS]) and (MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^)))) and
  968. (not RegModifiedByInstruction(taicpu(p).oper[0]^.reg, hp1)) and
  969. {(taicpu(hp1).ops=1) and
  970. (taicpu(hp1).oper[0]^.typ = top_reg) and
  971. (taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg) and }
  972. assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) then
  973. begin
  974. DebugMsg('Peephole MovOp2Op 1 performed', p);
  975. for i := 0 to taicpu(hp1).ops-1 do
  976. if taicpu(hp1).oper[i]^.typ=top_reg then
  977. if taicpu(hp1).oper[i]^.reg=taicpu(p).oper[0]^.reg then
  978. taicpu(hp1).oper[i]^.reg:=taicpu(p).oper[1]^.reg;
  979. alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.Previous));
  980. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next));
  981. if assigned(alloc) and assigned(dealloc) then
  982. begin
  983. asml.Remove(alloc);
  984. alloc.Free;
  985. asml.Remove(dealloc);
  986. dealloc.Free;
  987. end;
  988. { life range of reg1 is increased }
  989. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,usedregs);
  990. { p will be removed, update used register as we continue
  991. with the next instruction after p }
  992. result:=RemoveCurrentP(p);
  993. end
  994. { turn
  995. mov reg1, reg0
  996. <op> reg1,xxxx
  997. dealloc reg1
  998. into
  999. <op> reg1,xxx
  1000. }
  1001. else if MatchOpType(taicpu(p),top_reg,top_reg) and
  1002. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1003. not(RegModifiedBetween(taicpu(p).oper[1]^.reg, p, hp1)) and
  1004. MatchInstruction(hp1,[A_CP,A_CPC,A_CPI,A_SBRS,A_SBRC]) and
  1005. assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) then
  1006. begin
  1007. DebugMsg('Peephole MovOp2Op 2 performed', p);
  1008. for i := 0 to taicpu(hp1).ops-1 do
  1009. if taicpu(hp1).oper[i]^.typ=top_reg then
  1010. if taicpu(hp1).oper[i]^.reg=taicpu(p).oper[0]^.reg then
  1011. taicpu(hp1).oper[i]^.reg:=taicpu(p).oper[1]^.reg;
  1012. alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.Previous));
  1013. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next));
  1014. if assigned(alloc) and assigned(dealloc) then
  1015. begin
  1016. asml.Remove(alloc);
  1017. alloc.Free;
  1018. asml.Remove(dealloc);
  1019. dealloc.Free;
  1020. end;
  1021. { life range of reg1 is increased }
  1022. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,usedregs);
  1023. { p will be removed, update used register as we continue
  1024. with the next instruction after p }
  1025. result:=RemoveCurrentP(p);
  1026. end
  1027. { remove
  1028. mov reg0,reg0
  1029. }
  1030. else if (taicpu(p).ops=2) and
  1031. (taicpu(p).oper[0]^.typ = top_reg) and
  1032. (taicpu(p).oper[1]^.typ = top_reg) and
  1033. (taicpu(p).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  1034. begin
  1035. DebugMsg('Peephole RedundantMov performed', p);
  1036. result:=RemoveCurrentP(p);
  1037. end
  1038. {
  1039. Turn
  1040. mov rx,ry
  1041. op rx,rz
  1042. mov ry, rx
  1043. Into
  1044. op ry,rz
  1045. }
  1046. else if (taicpu(p).ops=2) and
  1047. MatchOpType(taicpu(p),top_reg,top_reg) and
  1048. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1049. (hp1.typ=ait_instruction) and
  1050. (taicpu(hp1).ops >= 1) and
  1051. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1052. GetNextInstructionUsingReg(hp1,hp2,taicpu(hp1).oper[0]^.reg) and
  1053. MatchInstruction(hp2,A_MOV) and
  1054. MatchOpType(taicpu(hp2),top_reg,top_reg) and
  1055. (taicpu(hp2).oper[0]^.reg = taicpu(p).oper[1]^.reg) and
  1056. (taicpu(hp2).oper[1]^.reg = taicpu(hp1).oper[0]^.reg) and
  1057. (taicpu(hp2).oper[1]^.reg = taicpu(p).oper[0]^.reg) and
  1058. (not RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp2)) and
  1059. (taicpu(hp1).opcode in [A_ADD,A_ADC,A_SUB,A_SBC,A_AND,A_OR,A_EOR,
  1060. A_INC,A_DEC,
  1061. A_LSL,A_LSR,A_ASR,A_ROR,A_ROL]) and
  1062. assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg, tai(hp2.Next))) then
  1063. begin
  1064. DebugMsg('Peephole MovOpMov2Op performed', p);
  1065. if (taicpu(hp1).ops=2) and
  1066. (taicpu(hp1).oper[1]^.typ=top_reg) and
  1067. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1068. taicpu(hp1).oper[1]^.reg:=taicpu(p).oper[1]^.reg;
  1069. taicpu(hp1).oper[0]^.reg:=taicpu(p).oper[1]^.reg;
  1070. alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.Previous));
  1071. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp2.Next));
  1072. if assigned(alloc) and assigned(dealloc) then
  1073. begin
  1074. asml.Remove(alloc);
  1075. alloc.Free;
  1076. asml.Remove(dealloc);
  1077. dealloc.Free;
  1078. end;
  1079. asml.remove(hp2);
  1080. hp2.free;
  1081. result:=RemoveCurrentP(p);
  1082. end
  1083. {
  1084. Turn
  1085. mov rx,ry
  1086. op rx,rw
  1087. mov rw,rx
  1088. Into
  1089. op rw,ry
  1090. }
  1091. else if (taicpu(p).ops=2) and
  1092. MatchOpType(taicpu(p),top_reg,top_reg) and
  1093. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1094. (hp1.typ=ait_instruction) and
  1095. (taicpu(hp1).ops = 2) and
  1096. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  1097. GetNextInstructionUsingReg(hp1,hp2,taicpu(hp1).oper[0]^.reg) and
  1098. (hp2.typ=ait_instruction) and
  1099. (taicpu(hp2).opcode=A_MOV) and
  1100. MatchOpType(taicpu(hp2),top_reg,top_reg) and
  1101. (taicpu(hp2).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) and
  1102. (taicpu(hp2).oper[1]^.reg = taicpu(hp1).oper[0]^.reg) and
  1103. (taicpu(hp2).oper[1]^.reg = taicpu(p).oper[0]^.reg) and
  1104. (not RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) and
  1105. (taicpu(hp1).opcode in [A_ADD,A_ADC,A_AND,A_OR,A_EOR]) and
  1106. assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg, tai(hp2.Next))) then
  1107. begin
  1108. DebugMsg('Peephole MovOpMov2Op2 performed', p);
  1109. taicpu(hp1).oper[0]^.reg:=taicpu(hp2).oper[0]^.reg;
  1110. taicpu(hp1).oper[1]^.reg:=taicpu(p).oper[1]^.reg;
  1111. alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.Previous));
  1112. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp2.Next));
  1113. if assigned(alloc) and assigned(dealloc) then
  1114. begin
  1115. asml.Remove(alloc);
  1116. alloc.Free;
  1117. asml.Remove(dealloc);
  1118. dealloc.Free;
  1119. end;
  1120. result:=RemoveCurrentP(p);
  1121. asml.remove(hp2);
  1122. hp2.free;
  1123. end
  1124. {
  1125. This removes the first mov from
  1126. mov rX,...
  1127. mov rX,...
  1128. }
  1129. else if GetNextInstruction(p,hp1) and MatchInstruction(hp1,A_MOV) and
  1130. { test condition here already instead in the while loop only, else MovMov2Mov 2 might be oversight }
  1131. MatchInstruction(hp1,A_MOV) and
  1132. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) then
  1133. while MatchInstruction(hp1,A_MOV) and
  1134. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) and
  1135. { don't remove the first mov if the second is a mov rX,rX }
  1136. not(MatchOperand(taicpu(hp1).oper[0]^,taicpu(hp1).oper[1]^)) do
  1137. begin
  1138. DebugMsg('Peephole MovMov2Mov 1 performed', p);
  1139. RemoveCurrentP(p,hp1);
  1140. Result := True;
  1141. GetNextInstruction(hp1,hp1);
  1142. if not assigned(hp1) then
  1143. break;
  1144. end
  1145. {
  1146. This removes the second mov from
  1147. mov rX,rY
  1148. ...
  1149. mov rX,rY
  1150. if rX and rY are not modified in-between
  1151. }
  1152. else if GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[1]^.reg) and
  1153. MatchInstruction(hp1,A_MOV) and
  1154. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) and
  1155. MatchOperand(taicpu(p).oper[1]^, taicpu(hp1).oper[1]^) and
  1156. not(RegModifiedBetween(taicpu(p).oper[0]^.reg,p,hp1)) then
  1157. begin
  1158. DebugMsg('Peephole MovMov2Mov 2 performed', p);
  1159. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);
  1160. RemoveInstruction(hp1);
  1161. Result := True;
  1162. end;
  1163. end;
  1164. function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
  1165. var
  1166. hp1,hp2: tai;
  1167. begin
  1168. result := false;
  1169. case p.typ of
  1170. ait_instruction:
  1171. begin
  1172. {
  1173. change
  1174. <op> reg,x
  1175. cp reg,r1
  1176. into
  1177. <op>s reg,x
  1178. }
  1179. { this optimization can applied only to the currently enabled operations because
  1180. the other operations do not update all flags and FPC does not track flag usage }
  1181. if MatchInstruction(p, [A_ADC,A_ADD,A_AND,A_ANDI,A_ASR,A_COM,A_DEC,A_EOR,
  1182. A_INC,A_LSL,A_LSR,
  1183. A_OR,A_ORI,A_ROL,A_ROR,A_SBC,A_SBCI,A_SUB,A_SUBI]) and
  1184. GetNextInstruction(p, hp1) and
  1185. ((MatchInstruction(hp1, A_CP) and
  1186. (((taicpu(p).oper[0]^.reg = taicpu(hp1).oper[0]^.reg) and
  1187. (taicpu(hp1).oper[1]^.reg = GetDefaultZeroReg)) or
  1188. ((taicpu(p).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) and
  1189. (taicpu(hp1).oper[0]^.reg = GetDefaultZeroReg) and
  1190. (taicpu(p).opcode in [A_ADC,A_ADD,A_AND,A_ANDI,A_ASR,A_COM,A_EOR,
  1191. A_LSL,A_LSR,
  1192. A_OR,A_ORI,A_ROL,A_ROR,A_SUB,A_SBI])))) or
  1193. (MatchInstruction(hp1, A_CPI) and
  1194. (taicpu(p).opcode = A_ANDI) and
  1195. (taicpu(p).oper[1]^.typ=top_const) and
  1196. (taicpu(hp1).oper[1]^.typ=top_const) and
  1197. (taicpu(hp1).oper[1]^.val=0))) and
  1198. GetNextInstruction(hp1, hp2) and
  1199. { be careful here, following instructions could use other flags
  1200. however after a jump fpc never depends on the value of flags }
  1201. { All above instructions set Z and N according to the following
  1202. Z := result = 0;
  1203. N := result[7];
  1204. EQ = Z=1; NE = Z=0;
  1205. MI = N=1; PL = N=0; }
  1206. MatchInstruction(hp2, A_BRxx) and
  1207. ((taicpu(hp2).condition in [C_EQ,C_NE,C_MI,C_PL]) or
  1208. { sub/sbc set all flags }
  1209. (taicpu(p).opcode in [A_SUB,A_SBI])){ and
  1210. no flag allocation tracking implemented yet on avr
  1211. assigned(FindRegDealloc(NR_DEFAULTFLAGS,tai(hp2.Next)))} then
  1212. begin
  1213. { move flag allocation if possible }
  1214. { no flag allocation tracking implemented yet on avr
  1215. GetLastInstruction(hp1, hp2);
  1216. hp2:=FindRegAlloc(NR_DEFAULTFLAGS,tai(hp2.Next));
  1217. if assigned(hp2) then
  1218. begin
  1219. asml.Remove(hp2);
  1220. asml.insertbefore(hp2, p);
  1221. end;
  1222. }
  1223. // If we compare to the same value we are masking then invert the comparison
  1224. if (taicpu(hp1).opcode=A_CPI) or
  1225. { sub/sbc with reverted? }
  1226. ((taicpu(hp1).oper[0]^.reg = GetDefaultZeroReg) and (taicpu(p).opcode in [A_SUB,A_SBI])) then
  1227. taicpu(hp2).condition:=inverse_cond(taicpu(hp2).condition);
  1228. asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
  1229. asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,hp2), hp2);
  1230. IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
  1231. DebugMsg('Peephole OpCp2Op performed', p);
  1232. asml.remove(hp1);
  1233. hp1.free;
  1234. Result:=true;
  1235. end
  1236. else
  1237. case taicpu(p).opcode of
  1238. A_LDI:
  1239. Result:=OptPass1LDI(p);
  1240. A_STS:
  1241. Result:=OptPass1STS(p);
  1242. A_LDS:
  1243. Result:=OptPass1LDS(p);
  1244. A_IN:
  1245. Result:=OptPass1IN(p);
  1246. A_SBRS,
  1247. A_SBRC:
  1248. Result:=OptPass1SBR(p);
  1249. A_ANDI:
  1250. Result:=OptPass1ANDI(p);
  1251. A_ADD:
  1252. Result:=OptPass1ADD(p);
  1253. A_SUB:
  1254. Result:=OptPass1SUB(p);
  1255. A_CLR:
  1256. Result:=OptPass1CLR(p);
  1257. A_PUSH:
  1258. Result:=OptPass1PUSH(p);
  1259. A_CALL:
  1260. Result:=OptPass1CALL(p);
  1261. A_RCALL:
  1262. Result:=OptPass1RCALL(p);
  1263. A_MOV:
  1264. Result:=OptPass1MOV(p);
  1265. A_SBIC,
  1266. A_SBIS:
  1267. Result:=OptPass1SBI(p);
  1268. end;
  1269. end;
  1270. end;
  1271. end;
  1272. function TCpuAsmOptimizer.PeepHoleOptPass2Cpu(var p: tai): boolean;
  1273. begin
  1274. result := false;
  1275. case p.typ of
  1276. ait_instruction:
  1277. begin
  1278. case taicpu(p).opcode of
  1279. A_MOV:
  1280. Result:=OptPass2MOV(p);
  1281. end;
  1282. end;
  1283. end;
  1284. end;
  1285. begin
  1286. casmoptimizer:=TCpuAsmOptimizer;
  1287. End.