aoptcpu.pas 49 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541
  1. {
  2. Copyright (c) 1998-2002 by Jonas Maebe, member of the Free Pascal
  3. Development Team
  4. This unit implements the ARM optimizer object
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. Unit aoptcpu;
  19. {$i fpcdefs.inc}
  20. { $define DEBUG_AOPTCPU}
  21. Interface
  22. uses cpubase,cgbase,aasmtai,aopt,AoptObj, cclasses,aoptcpub;
  23. Type
  24. TCpuAsmOptimizer = class(TAsmOptimizer)
  25. { outputs a debug message into the assembler file }
  26. procedure DebugMsg(const s: string; p: tai);
  27. Function GetNextInstructionUsingReg(Current: tai; Var Next: tai;reg : TRegister): Boolean;
  28. function RegInInstruction(Reg: TRegister; p1: tai): Boolean; override;
  29. function RegLoadedWithNewValue(reg : tregister; hp : tai) : boolean; override;
  30. function InstructionLoadsFromReg(const reg : TRegister; const hp : tai) : boolean; override;
  31. function InvertSkipInstruction(var p: tai): boolean;
  32. { uses the same constructor as TAopObj }
  33. function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
  34. function PeepHoleOptPass2Cpu(var p: tai): boolean; override;
  35. private
  36. function OptPass1ADD(var p : tai) : boolean;
  37. function OptPass1ANDI(var p : tai) : boolean;
  38. function OptPass1CALL(var p : tai) : boolean;
  39. function OptPass1CLR(var p : tai) : boolean;
  40. function OptPass1IN(var p : tai) : boolean;
  41. function OptPass1LDI(var p : tai) : boolean;
  42. function OptPass1LDS(var p : tai) : boolean;
  43. function OptPass1LDD(var p: tai): boolean;
  44. function OptPass1MOV(var p : tai) : boolean;
  45. function OptPass1PUSH(var p : tai) : boolean;
  46. function OptPass1RCALL(var p : tai) : boolean;
  47. function OptPass1SBI(var p : tai) : boolean;
  48. function OptPass1SBR(var p : tai) : boolean;
  49. function OptPass1STS(var p : tai) : boolean;
  50. function OptPass1SUB(var p : tai) : boolean;
  51. function OptPass2MOV(var p : tai) : boolean;
  52. End;
  53. Implementation
  54. uses
  55. cutils,
  56. verbose,
  57. cpuinfo,
  58. aasmbase,aasmcpu,aasmdata,
  59. aoptutils,
  60. globals,globtype,
  61. cgutils;
  62. type
  63. TAsmOpSet = set of TAsmOp;
  64. function CanBeCond(p : tai) : boolean;
  65. begin
  66. result:=(p.typ=ait_instruction) and (taicpu(p).condition=C_None);
  67. end;
  68. function RefsEqual(const r1, r2: treference): boolean;
  69. begin
  70. refsequal :=
  71. (r1.offset = r2.offset) and
  72. (r1.base = r2.base) and
  73. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  74. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  75. (r1.relsymbol = r2.relsymbol) and
  76. (r1.addressmode = r2.addressmode) and
  77. (r1.volatility=[]) and
  78. (r2.volatility=[]);
  79. end;
  80. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
  81. begin
  82. result:=oper1.typ=oper2.typ;
  83. if result then
  84. case oper1.typ of
  85. top_const:
  86. Result:=oper1.val = oper2.val;
  87. top_reg:
  88. Result:=oper1.reg = oper2.reg;
  89. top_ref:
  90. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  91. else Result:=false;
  92. end
  93. end;
  94. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  95. begin
  96. result := (oper.typ = top_reg) and (oper.reg = reg);
  97. end;
  98. function MatchInstruction(const instr: tai; const op: TAsmOp): boolean;
  99. begin
  100. result :=
  101. (instr.typ = ait_instruction) and
  102. (taicpu(instr).opcode = op);
  103. end;
  104. function MatchInstruction(const instr: tai; const ops: TAsmOpSet): boolean;
  105. begin
  106. result :=
  107. (instr.typ = ait_instruction) and
  108. (taicpu(instr).opcode in ops);
  109. end;
  110. function MatchInstruction(const instr: tai; const ops: TAsmOpSet;opcount : byte): boolean;
  111. begin
  112. result :=
  113. (instr.typ = ait_instruction) and
  114. (taicpu(instr).opcode in ops) and
  115. (taicpu(instr).ops=opcount);
  116. end;
  117. {$ifdef DEBUG_AOPTCPU}
  118. procedure TCpuAsmOptimizer.DebugMsg(const s: string;p : tai);
  119. begin
  120. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  121. end;
  122. {$else DEBUG_AOPTCPU}
  123. procedure TCpuAsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  124. begin
  125. end;
  126. {$endif DEBUG_AOPTCPU}
  127. function TCpuAsmOptimizer.RegInInstruction(Reg: TRegister; p1: tai): Boolean;
  128. begin
  129. If (p1.typ = ait_instruction) and (taicpu(p1).opcode in [A_MUL,A_MULS,A_FMUL,A_FMULS,A_FMULSU]) and
  130. ((getsupreg(reg)=RS_R0) or (getsupreg(reg)=RS_R1)) then
  131. Result:=true
  132. else if (p1.typ = ait_instruction) and (taicpu(p1).opcode=A_MOVW) and
  133. ((TRegister(ord(taicpu(p1).oper[0]^.reg)+1)=reg) or (TRegister(ord(taicpu(p1).oper[1]^.reg)+1)=reg) or
  134. (taicpu(p1).oper[0]^.reg=reg) or (taicpu(p1).oper[1]^.reg=reg)) then
  135. Result:=true
  136. else if (p1.typ = ait_instruction) and (taicpu(p1).opcode=A_ADIW) and
  137. ((TRegister(ord(taicpu(p1).oper[0]^.reg)+1)=reg) or (taicpu(p1).oper[0]^.reg=reg)) then
  138. Result:=true
  139. else
  140. Result:=inherited RegInInstruction(Reg, p1);
  141. end;
  142. function TCpuAsmOptimizer.GetNextInstructionUsingReg(Current: tai;
  143. var Next: tai; reg: TRegister): Boolean;
  144. begin
  145. Next:=Current;
  146. repeat
  147. Result:=GetNextInstruction(Next,Next);
  148. until not(cs_opt_level3 in current_settings.optimizerswitches) or not(Result) or (Next.typ<>ait_instruction) or (RegInInstruction(reg,Next)) or
  149. (is_calljmp(taicpu(Next).opcode));
  150. end;
  151. function TCpuAsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  152. var
  153. p: taicpu;
  154. begin
  155. if not assigned(hp) or
  156. (hp.typ <> ait_instruction) then
  157. begin
  158. Result := false;
  159. exit;
  160. end;
  161. p := taicpu(hp);
  162. Result := ((p.opcode in [A_LDI,A_MOV,A_LDS]) and (reg=p.oper[0]^.reg) and ((p.oper[1]^.typ<>top_reg) or (reg<>p.oper[1]^.reg))) or
  163. ((p.opcode in [A_LD,A_LDD,A_LPM]) and (reg=p.oper[0]^.reg) and not(RegInRef(reg,p.oper[1]^.ref^))) or
  164. ((p.opcode in [A_MOVW]) and ((reg=p.oper[0]^.reg) or (TRegister(ord(reg)+1)=p.oper[0]^.reg)) and not(reg=p.oper[1]^.reg) and not(TRegister(ord(reg)+1)=p.oper[1]^.reg)) or
  165. ((p.opcode in [A_POP]) and (reg=p.oper[0]^.reg));
  166. end;
  167. function TCpuAsmOptimizer.InstructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean;
  168. var
  169. p: taicpu;
  170. i: longint;
  171. begin
  172. Result := false;
  173. if not (assigned(hp) and (hp.typ = ait_instruction)) then
  174. exit;
  175. p:=taicpu(hp);
  176. i:=0;
  177. { we do not care about the stack pointer }
  178. if p.opcode in [A_POP] then
  179. exit;
  180. { first operand only written?
  181. then skip it }
  182. if p.opcode in [A_MOV,A_LD,A_LDD,A_LDS,A_LPM,A_LDI,A_MOVW] then
  183. i:=1;
  184. while i<p.ops do
  185. begin
  186. case p.oper[i]^.typ of
  187. top_reg:
  188. Result := (p.oper[i]^.reg = reg) or
  189. { MOVW }
  190. ((i=1) and (p.opcode=A_MOVW) and (getsupreg(p.oper[i]^.reg)+1=getsupreg(reg))) or
  191. { ADIW }
  192. ((i=0) and (p.opcode=A_ADIW) and (getsupreg(p.oper[i]^.reg)+1=getsupreg(reg)));
  193. top_ref:
  194. Result :=
  195. (p.oper[i]^.ref^.base = reg) or
  196. (p.oper[i]^.ref^.index = reg);
  197. end;
  198. { Bailout if we found something }
  199. if Result then
  200. exit;
  201. Inc(i);
  202. end;
  203. end;
  204. {
  205. Turns
  206. sbis ?
  207. jmp .Lx
  208. op
  209. .Lx:
  210. Into
  211. sbic ?
  212. op
  213. For all types of skip instructions
  214. }
  215. function TCpuAsmOptimizer.InvertSkipInstruction(var p: tai): boolean;
  216. function GetNextInstructionWithoutLabel(p: tai; var next: tai): boolean;
  217. begin
  218. repeat
  219. result:=GetNextInstruction(p,next);
  220. p:=next;
  221. until
  222. (not result) or
  223. (not assigned(next)) or
  224. (next.typ in [ait_instruction]);
  225. result:=assigned(next) and (next.typ in [ait_instruction]);
  226. end;
  227. var
  228. hp1, hp2, hp3: tai;
  229. begin
  230. result:=false;
  231. if GetNextInstruction(taicpu(p),hp1) and
  232. (hp1.typ=ait_instruction) and
  233. (taicpu(hp1).opcode in [A_RJMP,A_JMP]) and
  234. (taicpu(hp1).ops=1) and
  235. (taicpu(hp1).oper[0]^.typ=top_ref) and
  236. (taicpu(hp1).oper[0]^.ref^.offset=0) and
  237. (taicpu(hp1).oper[0]^.ref^.symbol is TAsmLabel) and
  238. GetNextInstructionWithoutLabel(hp1,hp2) and
  239. (hp2.typ=ait_instruction) and
  240. (not taicpu(hp2).is_jmp) and
  241. GetNextInstruction(hp2,hp3) and
  242. FindLabel(TAsmLabel(taicpu(hp1).oper[0]^.ref^.symbol),hp3) then
  243. begin
  244. DebugMsg('SkipJump2InvertedSkip', p);
  245. case taicpu(p).opcode of
  246. A_SBIS: taicpu(p).opcode:=A_SBIC;
  247. A_SBIC: taicpu(p).opcode:=A_SBIS;
  248. A_SBRS: taicpu(p).opcode:=A_SBRC;
  249. A_SBRC: taicpu(p).opcode:=A_SBRS;
  250. end;
  251. TAsmLabel(taicpu(hp1).oper[0]^.ref^.symbol).decrefs;
  252. asml.remove(hp1);
  253. hp1.free;
  254. end;
  255. end;
  256. function TCpuAsmOptimizer.OptPass1LDI(var p : tai) : boolean;
  257. var
  258. hp1 : tai;
  259. alloc ,dealloc: tai_regalloc;
  260. begin
  261. Result:=false;
  262. { turn
  263. ldi reg0, imm
  264. <op> reg1, reg0
  265. dealloc reg0
  266. into
  267. <op>i reg1, imm
  268. }
  269. if MatchOpType(taicpu(p),top_reg,top_const) and
  270. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  271. MatchInstruction(hp1,[A_CP,A_MOV,A_AND,A_SUB],2) and
  272. (not RegModifiedBetween(taicpu(p).oper[0]^.reg, p, hp1)) and
  273. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  274. (getsupreg(taicpu(hp1).oper[0]^.reg) in [16..31]) and
  275. (taicpu(hp1).oper[1]^.reg=taicpu(p).oper[0]^.reg) and
  276. not(MatchOperand(taicpu(hp1).oper[0]^,taicpu(hp1).oper[1]^)) then
  277. begin
  278. TransferUsedRegs(TmpUsedRegs);
  279. UpdateUsedRegs(TmpUsedRegs,tai(p.next));
  280. UpdateUsedRegs(TmpUsedRegs,tai(hp1.next));
  281. if not(RegUsedAfterInstruction(taicpu(hp1).oper[1]^.reg, hp1, TmpUsedRegs)) then
  282. begin
  283. case taicpu(hp1).opcode of
  284. A_CP:
  285. taicpu(hp1).opcode:=A_CPI;
  286. A_MOV:
  287. taicpu(hp1).opcode:=A_LDI;
  288. A_AND:
  289. taicpu(hp1).opcode:=A_ANDI;
  290. A_SUB:
  291. taicpu(hp1).opcode:=A_SUBI;
  292. else
  293. internalerror(2016111901);
  294. end;
  295. taicpu(hp1).loadconst(1, taicpu(p).oper[1]^.val);
  296. alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.Previous));
  297. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next));
  298. if assigned(alloc) and assigned(dealloc) then
  299. begin
  300. asml.Remove(alloc);
  301. alloc.Free;
  302. asml.Remove(dealloc);
  303. dealloc.Free;
  304. end;
  305. DebugMsg('Peephole LdiOp2Opi performed', p);
  306. result:=RemoveCurrentP(p);
  307. end;
  308. end;
  309. end;
  310. function TCpuAsmOptimizer.OptPass1STS(var p : tai) : boolean;
  311. begin
  312. Result:=false;
  313. if (taicpu(p).oper[0]^.ref^.symbol=nil) and
  314. (taicpu(p).oper[0]^.ref^.relsymbol=nil) and
  315. (getsupreg(taicpu(p).oper[0]^.ref^.base)=RS_NO) and
  316. (getsupreg(taicpu(p).oper[0]^.ref^.index)=RS_NO) and
  317. (taicpu(p).oper[0]^.ref^.addressmode=AM_UNCHANGED) and
  318. (((CPUAVR_NOMEMMAPPED_REGS in cpu_capabilities[current_settings.cputype]) and
  319. (taicpu(p).oper[0]^.ref^.offset>=0) and
  320. (taicpu(p).oper[0]^.ref^.offset<=63)) or
  321. (not(CPUAVR_NOMEMMAPPED_REGS in cpu_capabilities[current_settings.cputype]) and
  322. (taicpu(p).oper[0]^.ref^.offset>=32) and
  323. (taicpu(p).oper[0]^.ref^.offset<=95))) then
  324. begin
  325. DebugMsg('Peephole Sts2Out performed', p);
  326. taicpu(p).opcode:=A_OUT;
  327. if CPUAVR_NOMEMMAPPED_REGS in cpu_capabilities[current_settings.cputype] then
  328. taicpu(p).loadconst(0,taicpu(p).oper[0]^.ref^.offset)
  329. else
  330. taicpu(p).loadconst(0,taicpu(p).oper[0]^.ref^.offset-32);
  331. result:=true;
  332. end;
  333. end;
  334. function TCpuAsmOptimizer.OptPass1LDS(var p : tai) : boolean;
  335. var
  336. hp1, hp2, hp3, alloc, dealloc: tai;
  337. begin
  338. Result:=false;
  339. if (taicpu(p).oper[1]^.ref^.symbol=nil) and
  340. (taicpu(p).oper[1]^.ref^.relsymbol=nil) and
  341. (getsupreg(taicpu(p).oper[1]^.ref^.base)=RS_NO) and
  342. (getsupreg(taicpu(p).oper[1]^.ref^.index)=RS_NO) and
  343. (taicpu(p).oper[1]^.ref^.addressmode=AM_UNCHANGED) and
  344. (((CPUAVR_NOMEMMAPPED_REGS in cpu_capabilities[current_settings.cputype]) and
  345. (taicpu(p).oper[1]^.ref^.offset>=0) and
  346. (taicpu(p).oper[1]^.ref^.offset<=63)) or
  347. (not(CPUAVR_NOMEMMAPPED_REGS in cpu_capabilities[current_settings.cputype]) and
  348. (taicpu(p).oper[1]^.ref^.offset>=32) and
  349. (taicpu(p).oper[1]^.ref^.offset<=95))) then
  350. begin
  351. DebugMsg('Peephole Lds2In performed', p);
  352. taicpu(p).opcode:=A_IN;
  353. if CPUAVR_NOMEMMAPPED_REGS in cpu_capabilities[current_settings.cputype] then
  354. taicpu(p).loadconst(1,taicpu(p).oper[1]^.ref^.offset)
  355. else
  356. taicpu(p).loadconst(1,taicpu(p).oper[1]^.ref^.offset-32);
  357. result:=true;
  358. end
  359. { turn
  360. alloc reg0
  361. alloc reg1
  362. lds reg0, label
  363. lds reg1, label
  364. mov reg2, reg0
  365. mov reg3, reg1
  366. dealloc reg0
  367. dealloc reg1
  368. into
  369. lds reg2, label
  370. lds reg3, label
  371. }
  372. else if not(cs_opt_level3 in current_settings.optimizerswitches) and
  373. (taicpu(p).oper[0]^.typ=top_reg) and
  374. assigned(FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.Previous))) and
  375. (GetNextInstruction(p,hp1)) and MatchInstruction(hp1,A_LDS) and
  376. (taicpu(hp1).oper[0]^.typ=top_reg) and
  377. assigned(FindRegAllocBackward(taicpu(hp1).oper[0]^.reg,tai(hp1.Previous))) and
  378. (GetNextInstruction(hp1, hp2)) and MatchInstruction(hp2,A_MOV) and
  379. (taicpu(hp2).oper[1]^.reg=taicpu(p).oper[0]^.reg) and
  380. assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp2.Next))) and
  381. (GetNextInstruction(hp2, hp3)) and MatchInstruction(hp3,A_MOV) and
  382. (taicpu(hp3).oper[1]^.reg=taicpu(hp1).oper[0]^.reg) and
  383. assigned(FindRegDeAlloc(taicpu(hp1).oper[0]^.reg,tai(hp3.Next))) then
  384. begin
  385. DebugMsg('Peephole LdsLdsMovMov2LdsLds performed', p);
  386. alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.Previous));
  387. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp2.Next));
  388. if assigned(alloc) and assigned(dealloc) then
  389. begin
  390. asml.Remove(alloc);
  391. alloc.Free;
  392. asml.Remove(dealloc);
  393. dealloc.Free;
  394. end;
  395. taicpu(p).oper[0]^.reg:=taicpu(hp2).oper[0]^.reg;
  396. RemoveInstruction(hp2);
  397. alloc:=FindRegAllocBackward(taicpu(hp1).oper[0]^.reg,tai(hp1.Previous));
  398. dealloc:=FindRegDeAlloc(taicpu(hp1).oper[0]^.reg,tai(hp3.Next));
  399. if assigned(alloc) and assigned(dealloc) then
  400. begin
  401. asml.Remove(alloc);
  402. alloc.Free;
  403. asml.Remove(dealloc);
  404. dealloc.Free;
  405. end;
  406. taicpu(hp1).oper[0]^.reg:=taicpu(hp3).oper[0]^.reg;
  407. RemoveInstruction(hp3);
  408. Result:=true;
  409. end
  410. { turn
  411. alloc reg0
  412. lds reg0, label
  413. ...
  414. mov reg1, reg0
  415. dealloc reg0
  416. into
  417. lds reg1, label
  418. }
  419. else if (cs_opt_level3 in current_settings.optimizerswitches) and
  420. (taicpu(p).oper[0]^.typ=top_reg) and
  421. (GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg)) and
  422. MatchInstruction(hp1,A_MOV) and
  423. (taicpu(hp1).oper[1]^.reg=taicpu(p).oper[0]^.reg) and
  424. (not RegModifiedBetween(taicpu(p).oper[0]^.reg, p, hp1)) and
  425. (not RegUsedBetween(taicpu(hp1).oper[0]^.reg, p, hp1)) then
  426. begin
  427. alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.Previous));
  428. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next));
  429. if assigned(alloc) and assigned(dealloc) then
  430. begin
  431. DebugMsg('Peephole LdsMov2Lds performed', p);
  432. asml.Remove(alloc);
  433. alloc.Free;
  434. asml.Remove(dealloc);
  435. dealloc.Free;
  436. taicpu(p).oper[0]^.reg:=taicpu(hp1).oper[0]^.reg;
  437. RemoveInstruction(hp1);
  438. result:=true;
  439. end;
  440. end;
  441. end;
  442. function TCpuAsmOptimizer.OptPass1LDD(var p : tai) : boolean;
  443. var
  444. hp1, alloc, dealloc: tai;
  445. begin
  446. Result:=false;
  447. { turn
  448. alloc reg0
  449. ldd reg0, <mem>
  450. ...
  451. mov reg1, reg0
  452. dealloc reg0
  453. into
  454. lds reg1, <mem>
  455. }
  456. if (GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg)) and
  457. MatchInstruction(hp1,A_MOV) and
  458. (taicpu(hp1).oper[1]^.reg=taicpu(p).oper[0]^.reg) and
  459. (not RegModifiedBetween(taicpu(p).oper[0]^.reg, p, hp1)) and
  460. (not RegUsedBetween(taicpu(hp1).oper[0]^.reg, p, hp1)) then
  461. begin
  462. alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.Previous));
  463. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next));
  464. if assigned(alloc) and assigned(dealloc) then
  465. begin
  466. DebugMsg('Peephole LddMov2Ldd performed', p);
  467. asml.Remove(alloc);
  468. alloc.Free;
  469. asml.Remove(dealloc);
  470. dealloc.Free;
  471. taicpu(p).oper[0]^.reg:=taicpu(hp1).oper[0]^.reg;
  472. RemoveInstruction(hp1);
  473. result:=true;
  474. end;
  475. end;
  476. end;
  477. function TCpuAsmOptimizer.OptPass1IN(var p : tai) : boolean;
  478. var
  479. hp1, hp2: tai;
  480. l : TAsmLabel;
  481. begin
  482. Result:=false;
  483. if GetNextInstruction(p,hp1) then
  484. begin
  485. {
  486. in rX,Y
  487. ori rX,n
  488. out Y,rX
  489. into
  490. sbi rX,lg(n)
  491. }
  492. if (taicpu(p).oper[1]^.val<=31) and
  493. MatchInstruction(hp1,A_ORI) and
  494. (taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg) and
  495. (PopCnt(byte(taicpu(hp1).oper[1]^.val))=1) and
  496. GetNextInstruction(hp1,hp2) and
  497. MatchInstruction(hp2,A_OUT) and
  498. MatchOperand(taicpu(hp2).oper[1]^,taicpu(p).oper[0]^) and
  499. MatchOperand(taicpu(hp2).oper[0]^,taicpu(p).oper[1]^) then
  500. begin
  501. DebugMsg('Peephole InOriOut2Sbi performed', p);
  502. taicpu(p).opcode:=A_SBI;
  503. taicpu(p).loadconst(0,taicpu(p).oper[1]^.val);
  504. taicpu(p).loadconst(1,BsrByte(taicpu(hp1).oper[1]^.val));
  505. asml.Remove(hp1);
  506. hp1.Free;
  507. asml.Remove(hp2);
  508. hp2.Free;
  509. result:=true;
  510. end
  511. {
  512. in rX,Y
  513. andi rX,not(n)
  514. out Y,rX
  515. into
  516. cbi rX,lg(n)
  517. }
  518. else if (taicpu(p).oper[1]^.val<=31) and
  519. MatchInstruction(hp1,A_ANDI) and
  520. (taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg) and
  521. (PopCnt(byte(not(taicpu(hp1).oper[1]^.val)))=1) and
  522. GetNextInstruction(hp1,hp2) and
  523. MatchInstruction(hp2,A_OUT) and
  524. MatchOperand(taicpu(hp2).oper[1]^,taicpu(p).oper[0]^) and
  525. MatchOperand(taicpu(hp2).oper[0]^,taicpu(p).oper[1]^) then
  526. begin
  527. DebugMsg('Peephole InAndiOut2Cbi performed', p);
  528. taicpu(p).opcode:=A_CBI;
  529. taicpu(p).loadconst(0,taicpu(p).oper[1]^.val);
  530. taicpu(p).loadconst(1,BsrByte(not(taicpu(hp1).oper[1]^.val)));
  531. asml.Remove(hp1);
  532. hp1.Free;
  533. asml.Remove(hp2);
  534. hp2.Free;
  535. result:=true;
  536. end
  537. {
  538. in rX,Y
  539. andi rX,n
  540. breq/brne L1
  541. into
  542. sbis/sbic Y,lg(n)
  543. jmp L1
  544. .Ltemp:
  545. }
  546. else if (taicpu(p).oper[1]^.val<=31) and
  547. MatchInstruction(hp1,A_ANDI) and
  548. (taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg) and
  549. (PopCnt(byte(taicpu(hp1).oper[1]^.val))=1) and
  550. GetNextInstruction(hp1,hp2) and
  551. MatchInstruction(hp2,A_BRxx) and
  552. (taicpu(hp2).condition in [C_EQ,C_NE]) then
  553. begin
  554. if taicpu(hp2).condition=C_EQ then
  555. taicpu(p).opcode:=A_SBIS
  556. else
  557. taicpu(p).opcode:=A_SBIC;
  558. DebugMsg('Peephole InAndiBrx2SbixJmp performed', p);
  559. taicpu(p).loadconst(0,taicpu(p).oper[1]^.val);
  560. taicpu(p).loadconst(1,BsrByte(taicpu(hp1).oper[1]^.val));
  561. asml.Remove(hp1);
  562. hp1.Free;
  563. taicpu(hp2).condition:=C_None;
  564. if CPUAVR_HAS_JMP_CALL in cpu_capabilities[current_settings.cputype] then
  565. taicpu(hp2).opcode:=A_JMP
  566. else
  567. taicpu(hp2).opcode:=A_RJMP;
  568. current_asmdata.getjumplabel(l);
  569. l.increfs;
  570. asml.InsertAfter(tai_label.create(l), hp2);
  571. result:=true;
  572. end;
  573. end;
  574. end;
  575. function TCpuAsmOptimizer.OptPass1SBR(var p : tai) : boolean;
  576. var
  577. hp1 : tai;
  578. begin
  579. Result:=false;
  580. {
  581. Turn
  582. in rx, y
  583. sbr* rx, z
  584. Into
  585. sbi* y, z
  586. }
  587. if (taicpu(p).ops=2) and
  588. (taicpu(p).oper[0]^.typ=top_reg) and
  589. assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(p.next))) and
  590. GetLastInstruction(p,hp1) and
  591. (hp1.typ=ait_instruction) and
  592. (taicpu(hp1).opcode=A_IN) and
  593. (taicpu(hp1).ops=2) and
  594. (taicpu(hp1).oper[1]^.typ=top_const) and
  595. (taicpu(hp1).oper[1]^.val in [0..31]) and
  596. MatchOperand(taicpu(hp1).oper[0]^,taicpu(p).oper[0]^.reg) and
  597. (not RegModifiedBetween(taicpu(p).oper[0]^.reg, hp1, p)) then
  598. begin
  599. if taicpu(p).opcode=A_SBRS then
  600. taicpu(p).opcode:=A_SBIS
  601. else
  602. taicpu(p).opcode:=A_SBIC;
  603. taicpu(p).loadconst(0, taicpu(hp1).oper[1]^.val);
  604. DebugMsg('Peephole InSbrx2Sbix performed', p);
  605. asml.Remove(hp1);
  606. hp1.free;
  607. result:=true;
  608. end;
  609. if InvertSkipInstruction(p) then
  610. result:=true;
  611. end;
  612. function TCpuAsmOptimizer.OptPass1SBI(var p : tai) : boolean;
  613. var
  614. hp1, hp2, hp3, hp4, hp5: tai;
  615. begin
  616. Result:=false;
  617. {
  618. Turn
  619. sbic/sbis X, y
  620. jmp .L1
  621. op
  622. .L1:
  623. into
  624. sbis/sbic X,y
  625. op
  626. .L1:
  627. }
  628. if InvertSkipInstruction(p) then
  629. result:=true
  630. {
  631. Turn
  632. sbiX X, y
  633. jmp .L1
  634. jmp .L2
  635. .L1:
  636. op
  637. .L2:
  638. into
  639. sbiX X,y
  640. .L1:
  641. op
  642. .L2:
  643. }
  644. else if GetNextInstruction(p, hp1) and
  645. (hp1.typ=ait_instruction) and
  646. (taicpu(hp1).opcode in [A_JMP,A_RJMP]) and
  647. (taicpu(hp1).ops>0) and
  648. (taicpu(hp1).oper[0]^.typ = top_ref) and
  649. (taicpu(hp1).oper[0]^.ref^.symbol is TAsmLabel) and
  650. GetNextInstruction(hp1, hp2) and
  651. (hp2.typ=ait_instruction) and
  652. (taicpu(hp2).opcode in [A_JMP,A_RJMP]) and
  653. (taicpu(hp2).ops>0) and
  654. (taicpu(hp2).oper[0]^.typ = top_ref) and
  655. (taicpu(hp2).oper[0]^.ref^.symbol is TAsmLabel) and
  656. GetNextInstruction(hp2, hp3) and
  657. (hp3.typ=ait_label) and
  658. (taicpu(hp1).oper[0]^.ref^.symbol=tai_label(hp3).labsym) and
  659. GetNextInstruction(hp3, hp4) and
  660. (hp4.typ=ait_instruction) and
  661. GetNextInstruction(hp4, hp5) and
  662. (hp3.typ=ait_label) and
  663. (taicpu(hp2).oper[0]^.ref^.symbol=tai_label(hp5).labsym) then
  664. begin
  665. DebugMsg('Peephole SbiJmpJmp2Sbi performed',p);
  666. tai_label(hp3).labsym.decrefs;
  667. tai_label(hp5).labsym.decrefs;
  668. AsmL.remove(hp1);
  669. taicpu(hp1).Free;
  670. AsmL.remove(hp2);
  671. taicpu(hp2).Free;
  672. result:=true;
  673. end;
  674. end;
  675. function TCpuAsmOptimizer.OptPass1ANDI(var p : tai) : boolean;
  676. var
  677. hp1, hp2, hp3: tai;
  678. i : longint;
  679. begin
  680. Result:=false;
  681. {
  682. Turn
  683. andi rx, #pow2
  684. brne l
  685. <op>
  686. l:
  687. Into
  688. sbrs rx, #(1 shl imm)
  689. <op>
  690. l:
  691. }
  692. if (taicpu(p).ops=2) and
  693. (taicpu(p).oper[1]^.typ=top_const) and
  694. ispowerof2(taicpu(p).oper[1]^.val,i) and
  695. assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(p.next))) and
  696. GetNextInstruction(p,hp1) and
  697. (hp1.typ=ait_instruction) and
  698. (taicpu(hp1).opcode=A_BRxx) and
  699. (taicpu(hp1).condition in [C_EQ,C_NE]) and
  700. (taicpu(hp1).ops>0) and
  701. (taicpu(hp1).oper[0]^.typ = top_ref) and
  702. (taicpu(hp1).oper[0]^.ref^.symbol is TAsmLabel) and
  703. GetNextInstruction(hp1,hp2) and
  704. (hp2.typ=ait_instruction) and
  705. GetNextInstruction(hp2,hp3) and
  706. (hp3.typ=ait_label) and
  707. (taicpu(hp1).oper[0]^.ref^.symbol=tai_label(hp3).labsym) then
  708. begin
  709. DebugMsg('Peephole AndiBr2Sbr performed', p);
  710. taicpu(p).oper[1]^.val:=i;
  711. if taicpu(hp1).condition=C_NE then
  712. taicpu(p).opcode:=A_SBRS
  713. else
  714. taicpu(p).opcode:=A_SBRC;
  715. asml.Remove(hp1);
  716. hp1.free;
  717. result:=true;
  718. end
  719. {
  720. Remove
  721. andi rx, #y
  722. dealloc rx
  723. }
  724. else if (taicpu(p).ops=2) and
  725. (taicpu(p).oper[0]^.typ=top_reg) and
  726. assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(p.next))) and
  727. (assigned(FindRegDeAlloc(NR_DEFAULTFLAGS,tai(p.Next))) or
  728. (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs))) then
  729. begin
  730. DebugMsg('Redundant Andi removed', p);
  731. result:=RemoveCurrentP(p);
  732. end;
  733. end;
  734. function TCpuAsmOptimizer.OptPass1ADD(var p : tai) : boolean;
  735. var
  736. hp1: tai;
  737. begin
  738. Result:=false;
  739. if (taicpu(p).oper[1]^.reg=GetDefaultZeroReg) and
  740. GetNextInstruction(p, hp1) and
  741. MatchInstruction(hp1,A_ADC) then
  742. begin
  743. DebugMsg('Peephole AddAdc2Add performed', p);
  744. RemoveCurrentP(p, hp1);
  745. Result := True;
  746. end;
  747. end;
  748. function TCpuAsmOptimizer.OptPass1SUB(var p : tai) : boolean;
  749. var
  750. hp1: tai;
  751. begin
  752. Result:=false;
  753. if (taicpu(p).oper[1]^.reg=GetDefaultZeroReg) and
  754. GetNextInstruction(p, hp1) and
  755. MatchInstruction(hp1,A_SBC) then
  756. begin
  757. DebugMsg('Peephole SubSbc2Sub performed', p);
  758. taicpu(hp1).opcode:=A_SUB;
  759. RemoveCurrentP(p, hp1);
  760. Result := True;
  761. end;
  762. end;
  763. function TCpuAsmOptimizer.OptPass2MOV(var p: tai): boolean;
  764. var
  765. hp1: tai;
  766. begin
  767. result:=false;
  768. { fold
  769. mov reg2,reg0
  770. mov reg3,reg1
  771. to
  772. movw reg2,reg0
  773. }
  774. if (CPUAVR_HAS_MOVW in cpu_capabilities[current_settings.cputype]) and
  775. (taicpu(p).ops=2) and
  776. (taicpu(p).oper[0]^.typ = top_reg) and
  777. (taicpu(p).oper[1]^.typ = top_reg) and
  778. getnextinstruction(p,hp1) and
  779. (hp1.typ = ait_instruction) and
  780. (taicpu(hp1).opcode = A_MOV) and
  781. (taicpu(hp1).ops=2) and
  782. (taicpu(hp1).oper[0]^.typ = top_reg) and
  783. (taicpu(hp1).oper[1]^.typ = top_reg) and
  784. (getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(p).oper[0]^.reg)+1) and
  785. ((getsupreg(taicpu(p).oper[0]^.reg) mod 2)=0) and
  786. ((getsupreg(taicpu(p).oper[1]^.reg) mod 2)=0) and
  787. (getsupreg(taicpu(hp1).oper[1]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)+1) then
  788. begin
  789. DebugMsg('Peephole MovMov2Movw performed', p);
  790. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  791. AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,UsedRegs);
  792. taicpu(p).opcode:=A_MOVW;
  793. asml.remove(hp1);
  794. hp1.free;
  795. result:=true;
  796. end
  797. end;
  798. function TCpuAsmOptimizer.OptPass1CLR(var p : tai) : boolean;
  799. var
  800. hp1: tai;
  801. alloc, dealloc: tai_regalloc;
  802. begin
  803. Result:=false;
  804. { turn the common
  805. clr rX
  806. mov/ld rX, rY
  807. into
  808. mov/ld rX, rY
  809. }
  810. if (taicpu(p).ops=1) and
  811. (taicpu(p).oper[0]^.typ=top_reg) and
  812. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  813. (not RegModifiedBetween(taicpu(p).oper[0]^.reg, p, hp1)) and
  814. (hp1.typ=ait_instruction) and
  815. (taicpu(hp1).opcode in [A_MOV,A_LD]) and
  816. (taicpu(hp1).ops>0) and
  817. (taicpu(hp1).oper[0]^.typ=top_reg) and
  818. (taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg) then
  819. begin
  820. DebugMsg('Peephole ClrMov2Mov performed', p);
  821. result:=RemoveCurrentP(p);
  822. end
  823. { turn
  824. clr rX
  825. ...
  826. adc rY, rX
  827. into
  828. ...
  829. adc rY, r1
  830. }
  831. else if (taicpu(p).ops=1) and
  832. (taicpu(p).oper[0]^.typ=top_reg) and
  833. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  834. (not RegModifiedBetween(taicpu(p).oper[0]^.reg, p, hp1)) and
  835. (hp1.typ=ait_instruction) and
  836. (taicpu(hp1).opcode in [A_ADC,A_SBC]) and
  837. (taicpu(hp1).ops=2) and
  838. (taicpu(hp1).oper[1]^.typ=top_reg) and
  839. (taicpu(hp1).oper[1]^.reg=taicpu(p).oper[0]^.reg) and
  840. (taicpu(hp1).oper[0]^.reg<>taicpu(p).oper[0]^.reg) and
  841. assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) then
  842. begin
  843. DebugMsg('Peephole ClrAdc2Adc performed', p);
  844. taicpu(hp1).oper[1]^.reg:=GetDefaultZeroReg;
  845. alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.Previous));
  846. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next));
  847. if assigned(alloc) and assigned(dealloc) then
  848. begin
  849. asml.Remove(alloc);
  850. alloc.Free;
  851. asml.Remove(dealloc);
  852. dealloc.Free;
  853. end;
  854. result:=RemoveCurrentP(p);
  855. end;
  856. end;
  857. function TCpuAsmOptimizer.OptPass1PUSH(var p : tai) : boolean;
  858. var
  859. hp1, hp2, hp3: tai;
  860. begin
  861. Result:=false;
  862. { turn
  863. push reg0
  864. push reg1
  865. pop reg3
  866. pop reg2
  867. into
  868. movw reg2,reg0
  869. or
  870. mov reg3,reg1
  871. mov reg2,reg0
  872. }
  873. if GetNextInstruction(p,hp1) and
  874. MatchInstruction(hp1,A_PUSH) and
  875. GetNextInstruction(hp1,hp2) and
  876. MatchInstruction(hp2,A_POP) and
  877. GetNextInstruction(hp2,hp3) and
  878. MatchInstruction(hp3,A_POP) then
  879. begin
  880. if (CPUAVR_HAS_MOVW in cpu_capabilities[current_settings.cputype]) and
  881. (getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(p).oper[0]^.reg)+1) and
  882. ((getsupreg(taicpu(p).oper[0]^.reg) mod 2)=0) and
  883. (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp3).oper[0]^.reg)+1) and
  884. ((getsupreg(taicpu(hp3).oper[0]^.reg) mod 2)=0) then
  885. begin
  886. DebugMsg('Peephole PushPushPopPop2Movw performed', p);
  887. taicpu(hp3).ops:=2;
  888. taicpu(hp3).opcode:=A_MOVW;
  889. taicpu(hp3).loadreg(1, taicpu(p).oper[0]^.reg);
  890. { We're removing 3 concurrent instructions. Remove hp1
  891. and hp2 manually instead of calling RemoveCurrentP
  892. as this means we won't be calling UpdateUsedRegs 3 times }
  893. asml.Remove(hp1);
  894. hp1.Free;
  895. asml.Remove(hp2);
  896. hp2.Free;
  897. { By removing p last, we've guaranteed that p.Next is
  898. valid (storing it prior to removing the instructions
  899. may result in a dangling pointer if hp1 immediately
  900. follows p), and because hp1, hp2 and hp3 came from
  901. sequential calls to GetNextInstruction, it is
  902. guaranteed that UpdateUsedRegs will stop at hp3. [Kit] }
  903. RemoveCurrentP(p, hp3);
  904. Result := True;
  905. end
  906. else
  907. begin
  908. DebugMsg('Peephole PushPushPopPop2MovMov performed', p);
  909. taicpu(p).ops:=2;
  910. taicpu(p).opcode:=A_MOV;
  911. taicpu(hp1).ops:=2;
  912. taicpu(hp1).opcode:=A_MOV;
  913. taicpu(p).loadreg(1, taicpu(p).oper[0]^.reg);
  914. taicpu(p).loadreg(0, taicpu(hp3).oper[0]^.reg);
  915. taicpu(hp1).loadreg(1, taicpu(hp1).oper[0]^.reg);
  916. taicpu(hp1).loadreg(0, taicpu(hp2).oper[0]^.reg);
  917. { life range of reg2 and reg3 is increased, fix register allocation entries }
  918. TransferUsedRegs(TmpUsedRegs);
  919. UpdateUsedRegs(TmpUsedRegs,tai(p.Next));
  920. AllocRegBetween(taicpu(hp2).oper[0]^.reg,hp1,hp2,TmpUsedRegs);
  921. TransferUsedRegs(TmpUsedRegs);
  922. AllocRegBetween(taicpu(hp3).oper[0]^.reg,p,hp3,TmpUsedRegs);
  923. IncludeRegInUsedRegs(taicpu(hp3).oper[0]^.reg,UsedRegs);
  924. UpdateUsedRegs(tai(p.Next));
  925. asml.Remove(hp2);
  926. hp2.Free;
  927. asml.Remove(hp3);
  928. hp3.Free;
  929. result:=true;
  930. end
  931. end;
  932. end;
  933. function TCpuAsmOptimizer.OptPass1CALL(var p : tai) : boolean;
  934. var
  935. hp1: tai;
  936. begin
  937. Result:=false;
  938. if (cs_opt_level4 in current_settings.optimizerswitches) and
  939. GetNextInstruction(p,hp1) and
  940. MatchInstruction(hp1,A_RET) then
  941. begin
  942. DebugMsg('Peephole CallReg2Jmp performed', p);
  943. taicpu(p).opcode:=A_JMP;
  944. asml.Remove(hp1);
  945. hp1.Free;
  946. result:=true;
  947. end;
  948. end;
  949. function TCpuAsmOptimizer.OptPass1RCALL(var p : tai) : boolean;
  950. var
  951. hp1: tai;
  952. begin
  953. Result:=false;
  954. if (cs_opt_level4 in current_settings.optimizerswitches) and
  955. GetNextInstruction(p,hp1) and
  956. MatchInstruction(hp1,A_RET) then
  957. begin
  958. DebugMsg('Peephole RCallReg2RJmp performed', p);
  959. taicpu(p).opcode:=A_RJMP;
  960. asml.Remove(hp1);
  961. hp1.Free;
  962. result:=true;
  963. end;
  964. end;
  965. function TCpuAsmOptimizer.OptPass1MOV(var p : tai) : boolean;
  966. var
  967. hp1, hp2: tai;
  968. i : Integer;
  969. alloc, dealloc: tai_regalloc;
  970. begin
  971. Result:=false;
  972. { change
  973. mov reg0, reg1
  974. dealloc reg0
  975. into
  976. dealloc reg0
  977. }
  978. if MatchOpType(taicpu(p),top_reg,top_reg) then
  979. begin
  980. TransferUsedRegs(TmpUsedRegs);
  981. UpdateUsedRegs(TmpUsedRegs,tai(p.Next));
  982. if not(RegInUsedRegs(taicpu(p).oper[0]^.reg,TmpUsedRegs)) and
  983. { reg. allocation information before calls is not perfect, so don't do this before
  984. calls/icalls }
  985. GetNextInstruction(p,hp1) and
  986. not(MatchInstruction(hp1,[A_CALL,A_RCALL])) then
  987. begin
  988. DebugMsg('Peephole Mov2Nop performed', p);
  989. RemoveCurrentP(p, hp1);
  990. Result := True;
  991. exit;
  992. end;
  993. end;
  994. { turn
  995. mov reg0, reg1
  996. <op> reg2,reg0
  997. dealloc reg0
  998. into
  999. <op> reg2,reg1
  1000. }
  1001. if MatchOpType(taicpu(p),top_reg,top_reg) and
  1002. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1003. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p, hp1)) and
  1004. (MatchInstruction(hp1,[A_PUSH,A_MOV,A_CP,A_CPC,A_ADD,A_SUB,A_ADC,A_SBC,A_EOR,A_AND,A_OR,
  1005. A_OUT,A_IN]) or
  1006. { the reference register of ST/STD cannot be replaced }
  1007. (MatchInstruction(hp1,[A_STD,A_ST,A_STS]) and (MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^)))) and
  1008. (not RegModifiedByInstruction(taicpu(p).oper[0]^.reg, hp1)) and
  1009. {(taicpu(hp1).ops=1) and
  1010. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1011. (taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg) and }
  1012. assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) then
  1013. begin
  1014. DebugMsg('Peephole MovOp2Op 1 performed', p);
  1015. for i := 0 to taicpu(hp1).ops-1 do
  1016. if taicpu(hp1).oper[i]^.typ=top_reg then
  1017. if taicpu(hp1).oper[i]^.reg=taicpu(p).oper[0]^.reg then
  1018. taicpu(hp1).oper[i]^.reg:=taicpu(p).oper[1]^.reg;
  1019. alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.Previous));
  1020. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next));
  1021. if assigned(alloc) and assigned(dealloc) then
  1022. begin
  1023. asml.Remove(alloc);
  1024. alloc.Free;
  1025. asml.Remove(dealloc);
  1026. dealloc.Free;
  1027. end;
  1028. { life range of reg1 is increased }
  1029. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,usedregs);
  1030. { p will be removed, update used register as we continue
  1031. with the next instruction after p }
  1032. result:=RemoveCurrentP(p);
  1033. end
  1034. { turn
  1035. mov reg1, reg0
  1036. <op> reg1,xxxx
  1037. dealloc reg1
  1038. into
  1039. <op> reg1,xxx
  1040. }
  1041. else if MatchOpType(taicpu(p),top_reg,top_reg) and
  1042. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1043. not(RegModifiedBetween(taicpu(p).oper[1]^.reg, p, hp1)) and
  1044. MatchInstruction(hp1,[A_CP,A_CPC,A_CPI,A_SBRS,A_SBRC]) and
  1045. assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) then
  1046. begin
  1047. DebugMsg('Peephole MovOp2Op 2 performed', p);
  1048. for i := 0 to taicpu(hp1).ops-1 do
  1049. if taicpu(hp1).oper[i]^.typ=top_reg then
  1050. if taicpu(hp1).oper[i]^.reg=taicpu(p).oper[0]^.reg then
  1051. taicpu(hp1).oper[i]^.reg:=taicpu(p).oper[1]^.reg;
  1052. alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.Previous));
  1053. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next));
  1054. if assigned(alloc) and assigned(dealloc) then
  1055. begin
  1056. asml.Remove(alloc);
  1057. alloc.Free;
  1058. asml.Remove(dealloc);
  1059. dealloc.Free;
  1060. end;
  1061. { life range of reg1 is increased }
  1062. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,usedregs);
  1063. { p will be removed, update used register as we continue
  1064. with the next instruction after p }
  1065. result:=RemoveCurrentP(p);
  1066. end
  1067. { remove
  1068. mov reg0,reg0
  1069. }
  1070. else if (taicpu(p).ops=2) and
  1071. (taicpu(p).oper[0]^.typ = top_reg) and
  1072. (taicpu(p).oper[1]^.typ = top_reg) and
  1073. (taicpu(p).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  1074. begin
  1075. DebugMsg('Peephole RedundantMov performed', p);
  1076. result:=RemoveCurrentP(p);
  1077. end
  1078. {
  1079. Turn
  1080. mov rx,ry
  1081. op rx,rz
  1082. mov ry, rx
  1083. Into
  1084. op ry,rz
  1085. }
  1086. else if (taicpu(p).ops=2) and
  1087. MatchOpType(taicpu(p),top_reg,top_reg) and
  1088. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1089. (hp1.typ=ait_instruction) and
  1090. (taicpu(hp1).ops >= 1) and
  1091. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1092. GetNextInstructionUsingReg(hp1,hp2,taicpu(hp1).oper[0]^.reg) and
  1093. MatchInstruction(hp2,A_MOV) and
  1094. MatchOpType(taicpu(hp2),top_reg,top_reg) and
  1095. (taicpu(hp2).oper[0]^.reg = taicpu(p).oper[1]^.reg) and
  1096. (taicpu(hp2).oper[1]^.reg = taicpu(hp1).oper[0]^.reg) and
  1097. (taicpu(hp2).oper[1]^.reg = taicpu(p).oper[0]^.reg) and
  1098. (not RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp2)) and
  1099. (taicpu(hp1).opcode in [A_ADD,A_ADC,A_SUB,A_SBC,A_AND,A_OR,A_EOR,
  1100. A_INC,A_DEC,
  1101. A_LSL,A_LSR,A_ASR,A_ROR,A_ROL]) and
  1102. assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg, tai(hp2.Next))) then
  1103. begin
  1104. DebugMsg('Peephole MovOpMov2Op performed', p);
  1105. if (taicpu(hp1).ops=2) and
  1106. (taicpu(hp1).oper[1]^.typ=top_reg) and
  1107. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1108. taicpu(hp1).oper[1]^.reg:=taicpu(p).oper[1]^.reg;
  1109. taicpu(hp1).oper[0]^.reg:=taicpu(p).oper[1]^.reg;
  1110. alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.Previous));
  1111. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp2.Next));
  1112. if assigned(alloc) and assigned(dealloc) then
  1113. begin
  1114. asml.Remove(alloc);
  1115. alloc.Free;
  1116. asml.Remove(dealloc);
  1117. dealloc.Free;
  1118. end;
  1119. asml.remove(hp2);
  1120. hp2.free;
  1121. result:=RemoveCurrentP(p);
  1122. end
  1123. {
  1124. Turn
  1125. mov rx,ry
  1126. op rx,rw
  1127. mov rw,rx
  1128. Into
  1129. op rw,ry
  1130. }
  1131. else if (taicpu(p).ops=2) and
  1132. MatchOpType(taicpu(p),top_reg,top_reg) and
  1133. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1134. (hp1.typ=ait_instruction) and
  1135. (taicpu(hp1).ops = 2) and
  1136. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  1137. GetNextInstructionUsingReg(hp1,hp2,taicpu(hp1).oper[0]^.reg) and
  1138. (hp2.typ=ait_instruction) and
  1139. (taicpu(hp2).opcode=A_MOV) and
  1140. MatchOpType(taicpu(hp2),top_reg,top_reg) and
  1141. (taicpu(hp2).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) and
  1142. (taicpu(hp2).oper[1]^.reg = taicpu(hp1).oper[0]^.reg) and
  1143. (taicpu(hp2).oper[1]^.reg = taicpu(p).oper[0]^.reg) and
  1144. (not RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) and
  1145. (taicpu(hp1).opcode in [A_ADD,A_ADC,A_AND,A_OR,A_EOR]) and
  1146. assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg, tai(hp2.Next))) then
  1147. begin
  1148. DebugMsg('Peephole MovOpMov2Op2 performed', p);
  1149. taicpu(hp1).oper[0]^.reg:=taicpu(hp2).oper[0]^.reg;
  1150. taicpu(hp1).oper[1]^.reg:=taicpu(p).oper[1]^.reg;
  1151. alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.Previous));
  1152. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp2.Next));
  1153. if assigned(alloc) and assigned(dealloc) then
  1154. begin
  1155. asml.Remove(alloc);
  1156. alloc.Free;
  1157. asml.Remove(dealloc);
  1158. dealloc.Free;
  1159. end;
  1160. result:=RemoveCurrentP(p);
  1161. asml.remove(hp2);
  1162. hp2.free;
  1163. end
  1164. {
  1165. This removes the first mov from
  1166. mov rX,...
  1167. mov rX,...
  1168. }
  1169. else if GetNextInstruction(p,hp1) and MatchInstruction(hp1,A_MOV) and
  1170. { test condition here already instead in the while loop only, else MovMov2Mov 2 might be oversight }
  1171. MatchInstruction(hp1,A_MOV) and
  1172. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) then
  1173. while MatchInstruction(hp1,A_MOV) and
  1174. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) and
  1175. { don't remove the first mov if the second is a mov rX,rX }
  1176. not(MatchOperand(taicpu(hp1).oper[0]^,taicpu(hp1).oper[1]^)) do
  1177. begin
  1178. DebugMsg('Peephole MovMov2Mov 1 performed', p);
  1179. RemoveCurrentP(p,hp1);
  1180. Result := True;
  1181. GetNextInstruction(hp1,hp1);
  1182. if not assigned(hp1) then
  1183. break;
  1184. end
  1185. {
  1186. This removes the second mov from
  1187. mov rX,rY
  1188. ...
  1189. mov rX,rY
  1190. if rX and rY are not modified in-between
  1191. }
  1192. else if GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[1]^.reg) and
  1193. MatchInstruction(hp1,A_MOV) and
  1194. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) and
  1195. MatchOperand(taicpu(p).oper[1]^, taicpu(hp1).oper[1]^) and
  1196. not(RegModifiedBetween(taicpu(p).oper[0]^.reg,p,hp1)) then
  1197. begin
  1198. DebugMsg('Peephole MovMov2Mov 2 performed', p);
  1199. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);
  1200. RemoveInstruction(hp1);
  1201. Result := True;
  1202. end;
  1203. end;
  1204. function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
  1205. var
  1206. hp1,hp2: tai;
  1207. begin
  1208. result := false;
  1209. case p.typ of
  1210. ait_instruction:
  1211. begin
  1212. {
  1213. change
  1214. <op> reg,x
  1215. cp reg,r1
  1216. into
  1217. <op>s reg,x
  1218. }
  1219. { this optimization can applied only to the currently enabled operations because
  1220. the other operations do not update all flags and FPC does not track flag usage }
  1221. if MatchInstruction(p, [A_ADC,A_ADD,A_AND,A_ANDI,A_ASR,A_COM,A_DEC,A_EOR,
  1222. A_INC,A_LSL,A_LSR,
  1223. A_OR,A_ORI,A_ROL,A_ROR,A_SBC,A_SBCI,A_SUB,A_SUBI]) and
  1224. GetNextInstruction(p, hp1) and
  1225. ((MatchInstruction(hp1, A_CP) and
  1226. (((taicpu(p).oper[0]^.reg = taicpu(hp1).oper[0]^.reg) and
  1227. (taicpu(hp1).oper[1]^.reg = GetDefaultZeroReg)) or
  1228. ((taicpu(p).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) and
  1229. (taicpu(hp1).oper[0]^.reg = GetDefaultZeroReg) and
  1230. (taicpu(p).opcode in [A_ADC,A_ADD,A_AND,A_ANDI,A_ASR,A_COM,A_EOR,
  1231. A_LSL,A_LSR,
  1232. A_OR,A_ORI,A_ROL,A_ROR,A_SUB,A_SBI])))) or
  1233. (MatchInstruction(hp1, A_CPI) and
  1234. (taicpu(p).opcode = A_ANDI) and
  1235. (taicpu(p).oper[1]^.typ=top_const) and
  1236. (taicpu(hp1).oper[1]^.typ=top_const) and
  1237. (taicpu(hp1).oper[1]^.val=0))) and
  1238. GetNextInstruction(hp1, hp2) and
  1239. { be careful here, following instructions could use other flags
  1240. however after a jump fpc never depends on the value of flags }
  1241. { All above instructions set Z and N according to the following
  1242. Z := result = 0;
  1243. N := result[7];
  1244. EQ = Z=1; NE = Z=0;
  1245. MI = N=1; PL = N=0; }
  1246. MatchInstruction(hp2, A_BRxx) and
  1247. ((taicpu(hp2).condition in [C_EQ,C_NE,C_MI,C_PL]) or
  1248. { sub/sbc set all flags }
  1249. (taicpu(p).opcode in [A_SUB,A_SBI])){ and
  1250. no flag allocation tracking implemented yet on avr
  1251. assigned(FindRegDealloc(NR_DEFAULTFLAGS,tai(hp2.Next)))} then
  1252. begin
  1253. { move flag allocation if possible }
  1254. { no flag allocation tracking implemented yet on avr
  1255. GetLastInstruction(hp1, hp2);
  1256. hp2:=FindRegAlloc(NR_DEFAULTFLAGS,tai(hp2.Next));
  1257. if assigned(hp2) then
  1258. begin
  1259. asml.Remove(hp2);
  1260. asml.insertbefore(hp2, p);
  1261. end;
  1262. }
  1263. // If we compare to the same value we are masking then invert the comparison
  1264. if (taicpu(hp1).opcode=A_CPI) or
  1265. { sub/sbc with reverted? }
  1266. ((taicpu(hp1).oper[0]^.reg = GetDefaultZeroReg) and (taicpu(p).opcode in [A_SUB,A_SBI])) then
  1267. taicpu(hp2).condition:=inverse_cond(taicpu(hp2).condition);
  1268. asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
  1269. asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,hp2), hp2);
  1270. IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
  1271. DebugMsg('Peephole OpCp2Op performed', p);
  1272. asml.remove(hp1);
  1273. hp1.free;
  1274. Result:=true;
  1275. end
  1276. else
  1277. case taicpu(p).opcode of
  1278. A_LDI:
  1279. Result:=OptPass1LDI(p);
  1280. A_STS:
  1281. Result:=OptPass1STS(p);
  1282. A_LDS:
  1283. Result:=OptPass1LDS(p);
  1284. A_LDD:
  1285. Result:=OptPass1LDD(p);
  1286. A_IN:
  1287. Result:=OptPass1IN(p);
  1288. A_SBRS,
  1289. A_SBRC:
  1290. Result:=OptPass1SBR(p);
  1291. A_ANDI:
  1292. Result:=OptPass1ANDI(p);
  1293. A_ADD:
  1294. Result:=OptPass1ADD(p);
  1295. A_SUB:
  1296. Result:=OptPass1SUB(p);
  1297. A_CLR:
  1298. Result:=OptPass1CLR(p);
  1299. A_PUSH:
  1300. Result:=OptPass1PUSH(p);
  1301. A_CALL:
  1302. Result:=OptPass1CALL(p);
  1303. A_RCALL:
  1304. Result:=OptPass1RCALL(p);
  1305. A_MOV:
  1306. Result:=OptPass1MOV(p);
  1307. A_SBIC,
  1308. A_SBIS:
  1309. Result:=OptPass1SBI(p);
  1310. end;
  1311. end;
  1312. end;
  1313. end;
  1314. function TCpuAsmOptimizer.PeepHoleOptPass2Cpu(var p: tai): boolean;
  1315. begin
  1316. result := false;
  1317. case p.typ of
  1318. ait_instruction:
  1319. begin
  1320. case taicpu(p).opcode of
  1321. A_MOV:
  1322. Result:=OptPass2MOV(p);
  1323. end;
  1324. end;
  1325. end;
  1326. end;
  1327. begin
  1328. casmoptimizer:=TCpuAsmOptimizer;
  1329. End.