aoptx86.pas 56 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl and Jonas Maebe
  3. This unit contains the peephole optimizer.
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit aoptx86;
  18. {$i fpcdefs.inc}
  19. { $define DEBUG_AOPTCPU}
  20. interface
  21. uses
  22. globtype,
  23. cpubase,
  24. aasmtai,aasmcpu,
  25. cgbase,cgutils,
  26. aopt,aoptobj;
  27. type
  28. TX86AsmOptimizer = class(TAsmOptimizer)
  29. function RegLoadedWithNewValue(reg : tregister; hp : tai) : boolean; override;
  30. protected
  31. { checks whether loading a new value in reg1 overwrites the entirety of reg2 }
  32. function Reg1WriteOverwritesReg2Entirely(reg1, reg2: tregister): boolean;
  33. { checks whether reading the value in reg1 depends on the value of reg2. This
  34. is very similar to SuperRegisterEquals, except it takes into account that
  35. R_SUBH and R_SUBL are independendent (e.g. reading from AL does not
  36. depend on the value in AH). }
  37. function Reg1ReadDependsOnReg2(reg1, reg2: tregister): boolean;
  38. procedure PostPeepholeOptMov(const p : tai);
  39. function OptPass1AND(var p : tai) : boolean;
  40. function OptPass1VMOVAP(var p : tai) : boolean;
  41. function OptPass1VOP(const p : tai) : boolean;
  42. function OptPass1MOV(var p : tai) : boolean;
  43. function OptPass2MOV(var p : tai) : boolean;
  44. function OptPass2Imul(var p : tai) : boolean;
  45. procedure DebugMsg(const s : string; p : tai);inline;
  46. procedure AllocRegBetween(reg : tregister; p1,p2 : tai;var initialusedregs : TAllUsedRegs);
  47. class function IsExitCode(p : tai) : boolean;
  48. class function isFoldableArithOp(hp1 : taicpu; reg : tregister) : boolean;
  49. procedure RemoveLastDeallocForFuncRes(p : tai);
  50. end;
  51. function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
  52. function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
  53. function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
  54. function MatchInstruction(const instr: tai; const ops: array of TAsmOp; const opsize: topsizes): boolean;
  55. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  56. function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
  57. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean;
  58. function RefsEqual(const r1, r2: treference): boolean;
  59. function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
  60. { returns true, if ref is a reference using only the registers passed as base and index
  61. and having an offset }
  62. function MatchReferenceWithOffset(const ref : treference;base,index : TRegister) : Boolean;
  63. function MatchOpType(const instr : tai;ot0,ot1 : toptype) : Boolean;
  64. implementation
  65. uses
  66. cutils,
  67. verbose,
  68. procinfo,
  69. symconst,symsym,
  70. itcpugas;
  71. function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
  72. begin
  73. result :=
  74. (instr.typ = ait_instruction) and
  75. (taicpu(instr).opcode = op) and
  76. ((opsize = []) or (taicpu(instr).opsize in opsize));
  77. end;
  78. function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
  79. begin
  80. result :=
  81. (instr.typ = ait_instruction) and
  82. ((taicpu(instr).opcode = op1) or
  83. (taicpu(instr).opcode = op2)
  84. ) and
  85. ((opsize = []) or (taicpu(instr).opsize in opsize));
  86. end;
  87. function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
  88. begin
  89. result :=
  90. (instr.typ = ait_instruction) and
  91. ((taicpu(instr).opcode = op1) or
  92. (taicpu(instr).opcode = op2) or
  93. (taicpu(instr).opcode = op3)
  94. ) and
  95. ((opsize = []) or (taicpu(instr).opsize in opsize));
  96. end;
  97. function MatchInstruction(const instr : tai;const ops : array of TAsmOp;
  98. const opsize : topsizes) : boolean;
  99. var
  100. op : TAsmOp;
  101. begin
  102. result:=false;
  103. for op in ops do
  104. begin
  105. if (instr.typ = ait_instruction) and
  106. (taicpu(instr).opcode = op) and
  107. ((opsize = []) or (taicpu(instr).opsize in opsize)) then
  108. begin
  109. result:=true;
  110. exit;
  111. end;
  112. end;
  113. end;
  114. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  115. begin
  116. result := (oper.typ = top_reg) and (oper.reg = reg);
  117. end;
  118. function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
  119. begin
  120. result := (oper.typ = top_const) and (oper.val = a);
  121. end;
  122. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean;
  123. begin
  124. result := oper1.typ = oper2.typ;
  125. if result then
  126. case oper1.typ of
  127. top_const:
  128. Result:=oper1.val = oper2.val;
  129. top_reg:
  130. Result:=oper1.reg = oper2.reg;
  131. top_ref:
  132. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  133. else
  134. internalerror(2013102801);
  135. end
  136. end;
  137. function RefsEqual(const r1, r2: treference): boolean;
  138. begin
  139. RefsEqual :=
  140. (r1.offset = r2.offset) and
  141. (r1.segment = r2.segment) and (r1.base = r2.base) and
  142. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  143. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  144. (r1.relsymbol = r2.relsymbol);
  145. end;
  146. function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
  147. begin
  148. Result:=(ref.offset=0) and
  149. (ref.scalefactor in [0,1]) and
  150. (ref.segment=NR_NO) and
  151. (ref.symbol=nil) and
  152. (ref.relsymbol=nil) and
  153. ((base=NR_INVALID) or
  154. (ref.base=base)) and
  155. ((index=NR_INVALID) or
  156. (ref.index=index));
  157. end;
  158. function MatchReferenceWithOffset(const ref : treference;base,index : TRegister) : Boolean;
  159. begin
  160. Result:=(ref.scalefactor in [0,1]) and
  161. (ref.segment=NR_NO) and
  162. (ref.symbol=nil) and
  163. (ref.relsymbol=nil) and
  164. ((base=NR_INVALID) or
  165. (ref.base=base)) and
  166. ((index=NR_INVALID) or
  167. (ref.index=index));
  168. end;
  169. function MatchOpType(const instr : tai;ot0,ot1 : toptype) : Boolean;
  170. begin
  171. Result:=(taicpu(instr).ops=2) and
  172. (taicpu(instr).oper[0]^.typ=ot0) and
  173. (taicpu(instr).oper[1]^.typ=ot1);
  174. end;
  175. {$ifdef DEBUG_AOPTCPU}
  176. procedure TX86AsmOptimizer.DebugMsg(const s: string;p : tai);
  177. begin
  178. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  179. end;
  180. {$else DEBUG_AOPTCPU}
  181. procedure TX86AsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  182. begin
  183. end;
  184. {$endif DEBUG_AOPTCPU}
  185. function TX86AsmOptimizer.Reg1WriteOverwritesReg2Entirely(reg1, reg2: tregister): boolean;
  186. begin
  187. if not SuperRegistersEqual(reg1,reg2) then
  188. exit(false);
  189. if getregtype(reg1)<>R_INTREGISTER then
  190. exit(true); {because SuperRegisterEqual is true}
  191. case getsubreg(reg1) of
  192. { A write to R_SUBL doesn't change R_SUBH and if reg2 is R_SUBW or
  193. higher, it preserves the high bits, so the new value depends on
  194. reg2's previous value. In other words, it is equivalent to doing:
  195. reg2 := (reg2 and $ffffff00) or byte(reg1); }
  196. R_SUBL:
  197. exit(getsubreg(reg2)=R_SUBL);
  198. { A write to R_SUBH doesn't change R_SUBL and if reg2 is R_SUBW or
  199. higher, it actually does a:
  200. reg2 := (reg2 and $ffff00ff) or (reg1 and $ff00); }
  201. R_SUBH:
  202. exit(getsubreg(reg2)=R_SUBH);
  203. { If reg2 is R_SUBD or larger, a write to R_SUBW preserves the high 16
  204. bits of reg2:
  205. reg2 := (reg2 and $ffff0000) or word(reg1); }
  206. R_SUBW:
  207. exit(getsubreg(reg2) in [R_SUBL,R_SUBH,R_SUBW]);
  208. { a write to R_SUBD always overwrites every other subregister,
  209. because it clears the high 32 bits of R_SUBQ on x86_64 }
  210. R_SUBD,
  211. R_SUBQ:
  212. exit(true);
  213. else
  214. internalerror(2017042801);
  215. end;
  216. end;
  217. function TX86AsmOptimizer.Reg1ReadDependsOnReg2(reg1, reg2: tregister): boolean;
  218. begin
  219. if not SuperRegistersEqual(reg1,reg2) then
  220. exit(false);
  221. if getregtype(reg1)<>R_INTREGISTER then
  222. exit(true); {because SuperRegisterEqual is true}
  223. case getsubreg(reg1) of
  224. R_SUBL:
  225. exit(getsubreg(reg2)<>R_SUBH);
  226. R_SUBH:
  227. exit(getsubreg(reg2)<>R_SUBL);
  228. R_SUBW,
  229. R_SUBD,
  230. R_SUBQ:
  231. exit(true);
  232. else
  233. internalerror(2017042802);
  234. end;
  235. end;
  236. { allocates register reg between (and including) instructions p1 and p2
  237. the type of p1 and p2 must not be in SkipInstr
  238. note that this routine is both called from the peephole optimizer
  239. where optinfo is not yet initialised) and from the cse (where it is) }
  240. procedure TX86AsmOptimizer.AllocRegBetween(reg: tregister; p1, p2: tai; var initialusedregs: TAllUsedRegs);
  241. var
  242. hp, start: tai;
  243. removedsomething,
  244. firstRemovedWasAlloc,
  245. lastRemovedWasDealloc: boolean;
  246. begin
  247. {$ifdef EXTDEBUG}
  248. { if assigned(p1.optinfo) and
  249. (ptaiprop(p1.optinfo)^.usedregs <> initialusedregs) then
  250. internalerror(2004101010); }
  251. {$endif EXTDEBUG}
  252. start := p1;
  253. if (reg = NR_ESP) or
  254. (reg = current_procinfo.framepointer) or
  255. not(assigned(p1)) then
  256. { this happens with registers which are loaded implicitely, outside the }
  257. { current block (e.g. esi with self) }
  258. exit;
  259. { make sure we allocate it for this instruction }
  260. getnextinstruction(p2,p2);
  261. lastRemovedWasDealloc := false;
  262. removedSomething := false;
  263. firstRemovedWasAlloc := false;
  264. {$ifdef allocregdebug}
  265. hp := tai_comment.Create(strpnew('allocating '+std_regname(newreg(R_INTREGISTER,supreg,R_SUBWHOLE))+
  266. ' from here...'));
  267. insertllitem(asml,p1.previous,p1,hp);
  268. hp := tai_comment.Create(strpnew('allocated '+std_regname(newreg(R_INTREGISTER,supreg,R_SUBWHOLE))+
  269. ' till here...'));
  270. insertllitem(asml,p2,p2.next,hp);
  271. {$endif allocregdebug}
  272. { do it the safe way: always allocate the full super register,
  273. as we do no register re-allocation in the peephole optimizer,
  274. this does not hurt
  275. }
  276. case getregtype(reg) of
  277. R_MMREGISTER:
  278. reg:=newreg(R_MMREGISTER,getsupreg(reg),R_SUBMMWHOLE);
  279. R_INTREGISTER:
  280. reg:=newreg(R_INTREGISTER,getsupreg(reg),R_SUBWHOLE);
  281. end;
  282. if not(RegInUsedRegs(reg,initialusedregs)) then
  283. begin
  284. hp := tai_regalloc.alloc(reg,nil);
  285. insertllItem(p1.previous,p1,hp);
  286. IncludeRegInUsedRegs(reg,initialusedregs);
  287. end;
  288. while assigned(p1) and
  289. (p1 <> p2) do
  290. begin
  291. if assigned(p1.optinfo) then
  292. internalerror(2014022301); // IncludeRegInUsedRegs(reg,ptaiprop(p1.optinfo)^.usedregs);
  293. p1 := tai(p1.next);
  294. repeat
  295. while assigned(p1) and
  296. (p1.typ in (SkipInstr-[ait_regalloc])) Do
  297. p1 := tai(p1.next);
  298. { remove all allocation/deallocation info about the register in between }
  299. if assigned(p1) and
  300. (p1.typ = ait_regalloc) then
  301. begin
  302. { same super register, different sub register? }
  303. if SuperRegistersEqual(reg,tai_regalloc(p1).reg) and (tai_regalloc(p1).reg<>reg) then
  304. begin
  305. if (getsubreg(tai_regalloc(p1).reg)>getsubreg(reg)) or (getsubreg(reg)=R_SUBH) then
  306. internalerror(2016101501);
  307. tai_regalloc(p1).reg:=reg;
  308. end;
  309. if tai_regalloc(p1).reg=reg then
  310. begin
  311. if not removedSomething then
  312. begin
  313. firstRemovedWasAlloc := tai_regalloc(p1).ratype=ra_alloc;
  314. removedSomething := true;
  315. end;
  316. lastRemovedWasDealloc := (tai_regalloc(p1).ratype=ra_dealloc);
  317. hp := tai(p1.Next);
  318. asml.Remove(p1);
  319. p1.free;
  320. p1 := hp;
  321. end
  322. else
  323. p1 := tai(p1.next);
  324. end;
  325. until not(assigned(p1)) or
  326. not(p1.typ in SkipInstr);
  327. end;
  328. if assigned(p1) then
  329. begin
  330. if firstRemovedWasAlloc then
  331. begin
  332. hp := tai_regalloc.Alloc(reg,nil);
  333. insertLLItem(start.previous,start,hp);
  334. end;
  335. if lastRemovedWasDealloc then
  336. begin
  337. hp := tai_regalloc.DeAlloc(reg,nil);
  338. insertLLItem(p1.previous,p1,hp);
  339. end;
  340. end;
  341. end;
  342. function TX86AsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  343. var
  344. p: taicpu;
  345. begin
  346. if not assigned(hp) or
  347. (hp.typ <> ait_instruction) then
  348. begin
  349. Result := false;
  350. exit;
  351. end;
  352. p := taicpu(hp);
  353. Result :=
  354. (((p.opcode = A_MOV) or
  355. (p.opcode = A_MOVZX) or
  356. (p.opcode = A_MOVSX) or
  357. (p.opcode = A_LEA) or
  358. (p.opcode = A_VMOVSS) or
  359. (p.opcode = A_VMOVSD) or
  360. (p.opcode = A_VMOVAPD) or
  361. (p.opcode = A_VMOVAPS) or
  362. (p.opcode = A_VMOVQ) or
  363. (p.opcode = A_MOVSS) or
  364. (p.opcode = A_MOVSD) or
  365. (p.opcode = A_MOVQ) or
  366. (p.opcode = A_MOVAPD) or
  367. (p.opcode = A_MOVAPS)) and
  368. (p.oper[1]^.typ = top_reg) and
  369. (SuperRegistersEqual(p.oper[1]^.reg,reg)) and
  370. ((p.oper[0]^.typ = top_const) or
  371. ((p.oper[0]^.typ = top_reg) and
  372. not(SuperRegistersEqual(p.oper[0]^.reg,reg))) or
  373. ((p.oper[0]^.typ = top_ref) and
  374. not RegInRef(reg,p.oper[0]^.ref^)))) or
  375. ((p.opcode = A_POP) and
  376. (SuperRegistersEqual(p.oper[0]^.reg,reg))) or
  377. ((p.opcode = A_IMUL) and
  378. (p.ops=3) and
  379. (SuperRegistersEqual(p.oper[2]^.reg,reg)) and
  380. not((SuperRegistersEqual(p.oper[1]^.reg,reg))));
  381. end;
  382. class function TX86AsmOptimizer.IsExitCode(p : tai) : boolean;
  383. var
  384. hp2,hp3 : tai;
  385. begin
  386. result:=(p.typ=ait_instruction) and
  387. ((taicpu(p).opcode = A_RET) or
  388. ((taicpu(p).opcode=A_LEAVE) and
  389. GetNextInstruction(p,hp2) and
  390. (hp2.typ=ait_instruction) and
  391. (taicpu(hp2).opcode=A_RET)
  392. ) or
  393. ((taicpu(p).opcode=A_MOV) and
  394. (taicpu(p).oper[0]^.typ=top_reg) and
  395. (taicpu(p).oper[0]^.reg=NR_EBP) and
  396. (taicpu(p).oper[1]^.typ=top_reg) and
  397. (taicpu(p).oper[1]^.reg=NR_ESP) and
  398. GetNextInstruction(p,hp2) and
  399. (hp2.typ=ait_instruction) and
  400. (taicpu(hp2).opcode=A_POP) and
  401. (taicpu(hp2).oper[0]^.typ=top_reg) and
  402. (taicpu(hp2).oper[0]^.reg=NR_EBP) and
  403. GetNextInstruction(hp2,hp3) and
  404. (hp3.typ=ait_instruction) and
  405. (taicpu(hp3).opcode=A_RET)
  406. )
  407. );
  408. end;
  409. class function TX86AsmOptimizer.isFoldableArithOp(hp1: taicpu; reg: tregister): boolean;
  410. begin
  411. isFoldableArithOp := False;
  412. case hp1.opcode of
  413. A_ADD,A_SUB,A_OR,A_XOR,A_AND,A_SHL,A_SHR,A_SAR:
  414. isFoldableArithOp :=
  415. ((taicpu(hp1).oper[0]^.typ = top_const) or
  416. ((taicpu(hp1).oper[0]^.typ = top_reg) and
  417. (taicpu(hp1).oper[0]^.reg <> reg))) and
  418. (taicpu(hp1).oper[1]^.typ = top_reg) and
  419. (taicpu(hp1).oper[1]^.reg = reg);
  420. A_INC,A_DEC,A_NEG,A_NOT:
  421. isFoldableArithOp :=
  422. (taicpu(hp1).oper[0]^.typ = top_reg) and
  423. (taicpu(hp1).oper[0]^.reg = reg);
  424. end;
  425. end;
  426. procedure TX86AsmOptimizer.RemoveLastDeallocForFuncRes(p: tai);
  427. procedure DoRemoveLastDeallocForFuncRes( supreg: tsuperregister);
  428. var
  429. hp2: tai;
  430. begin
  431. hp2 := p;
  432. repeat
  433. hp2 := tai(hp2.previous);
  434. if assigned(hp2) and
  435. (hp2.typ = ait_regalloc) and
  436. (tai_regalloc(hp2).ratype=ra_dealloc) and
  437. (getregtype(tai_regalloc(hp2).reg) = R_INTREGISTER) and
  438. (getsupreg(tai_regalloc(hp2).reg) = supreg) then
  439. begin
  440. asml.remove(hp2);
  441. hp2.free;
  442. break;
  443. end;
  444. until not(assigned(hp2)) or regInInstruction(newreg(R_INTREGISTER,supreg,R_SUBWHOLE),hp2);
  445. end;
  446. begin
  447. case current_procinfo.procdef.returndef.typ of
  448. arraydef,recorddef,pointerdef,
  449. stringdef,enumdef,procdef,objectdef,errordef,
  450. filedef,setdef,procvardef,
  451. classrefdef,forwarddef:
  452. DoRemoveLastDeallocForFuncRes(RS_EAX);
  453. orddef:
  454. if current_procinfo.procdef.returndef.size <> 0 then
  455. begin
  456. DoRemoveLastDeallocForFuncRes(RS_EAX);
  457. { for int64/qword }
  458. if current_procinfo.procdef.returndef.size = 8 then
  459. DoRemoveLastDeallocForFuncRes(RS_EDX);
  460. end;
  461. end;
  462. end;
  463. function TX86AsmOptimizer.OptPass1VMOVAP(var p : tai) : boolean;
  464. var
  465. TmpUsedRegs : TAllUsedRegs;
  466. hp1,hp2 : tai;
  467. begin
  468. result:=false;
  469. if MatchOpType(taicpu(p),top_reg,top_reg) then
  470. begin
  471. { vmova* reg1,reg1
  472. =>
  473. <nop> }
  474. if MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then
  475. begin
  476. GetNextInstruction(p,hp1);
  477. asml.Remove(p);
  478. p.Free;
  479. p:=hp1;
  480. result:=true;
  481. end
  482. else if GetNextInstruction(p,hp1) then
  483. begin
  484. if MatchInstruction(hp1,[taicpu(p).opcode],[S_NO]) and
  485. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  486. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  487. begin
  488. { vmova* reg1,reg2
  489. vmova* reg2,reg3
  490. dealloc reg2
  491. =>
  492. vmova* reg1,reg3 }
  493. CopyUsedRegs(TmpUsedRegs);
  494. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  495. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  496. begin
  497. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  498. asml.Remove(hp1);
  499. hp1.Free;
  500. result:=true;
  501. end
  502. { special case:
  503. vmova* reg1,reg2
  504. vmova* reg2,reg1
  505. =>
  506. vmova* reg1,reg2 }
  507. else if MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) then
  508. begin
  509. asml.Remove(hp1);
  510. hp1.Free;
  511. result:=true;
  512. end
  513. end
  514. else if MatchInstruction(hp1,[A_VFMADD132PD,A_VFNMADD231SD,A_VFMADD231SD],[S_NO]) and
  515. { we mix single and double opperations here because we assume that the compiler
  516. generates vmovapd only after double operations and vmovaps only after single operations }
  517. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[2]^) and
  518. GetNextInstruction(hp1,hp2) and
  519. MatchInstruction(hp2,A_VMOVAPD,A_VMOVAPS,[S_NO]) and
  520. MatchOperand(taicpu(p).oper[0]^,taicpu(hp2).oper[1]^) then
  521. begin
  522. CopyUsedRegs(TmpUsedRegs);
  523. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  524. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  525. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs))
  526. then
  527. begin
  528. taicpu(hp1).loadoper(2,taicpu(p).oper[0]^);
  529. asml.Remove(p);
  530. p.Free;
  531. asml.Remove(hp2);
  532. hp2.Free;
  533. p:=hp1;
  534. end;
  535. end;
  536. end;
  537. end;
  538. end;
  539. function TX86AsmOptimizer.OptPass1VOP(const p : tai) : boolean;
  540. var
  541. TmpUsedRegs : TAllUsedRegs;
  542. hp1 : tai;
  543. begin
  544. result:=false;
  545. if GetNextInstruction(p,hp1) and
  546. { we mix single and double opperations here because we assume that the compiler
  547. generates vmovapd only after double operations and vmovaps only after single operations }
  548. MatchInstruction(hp1,A_VMOVAPD,A_VMOVAPS,[S_NO]) and
  549. MatchOperand(taicpu(p).oper[2]^,taicpu(hp1).oper[0]^) and
  550. (taicpu(hp1).oper[1]^.typ=top_reg) then
  551. begin
  552. CopyUsedRegs(TmpUsedRegs);
  553. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  554. if not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg,hp1,TmpUsedRegs)
  555. ) then
  556. begin
  557. taicpu(p).loadoper(2,taicpu(hp1).oper[1]^);
  558. asml.Remove(hp1);
  559. hp1.Free;
  560. result:=true;
  561. end;
  562. end;
  563. end;
  564. function TX86AsmOptimizer.OptPass1MOV(var p : tai) : boolean;
  565. var
  566. hp1, hp2: tai;
  567. TmpUsedRegs : TAllUsedRegs;
  568. GetNextIntruction_p : Boolean;
  569. begin
  570. Result:=false;
  571. GetNextIntruction_p:=GetNextInstruction(p, hp1);
  572. if GetNextIntruction_p and
  573. MatchInstruction(hp1,A_AND,[]) and
  574. (taicpu(p).oper[1]^.typ = top_reg) and
  575. MatchOpType(taicpu(hp1),top_const,top_reg) and
  576. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) then
  577. case taicpu(p).opsize Of
  578. S_L:
  579. if (taicpu(hp1).oper[0]^.val = $ffffffff) then
  580. begin
  581. DebugMsg('PeepHole Optimization,MovAnd2Mov',p);
  582. asml.remove(hp1);
  583. hp1.free;
  584. Result:=true;
  585. exit;
  586. end;
  587. end
  588. else if GetNextIntruction_p and
  589. MatchInstruction(hp1,A_MOV,[]) and
  590. (taicpu(p).oper[1]^.typ = top_reg) and
  591. (getsupreg(taicpu(p).oper[1]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX, RS_ESI, RS_EDI]) and
  592. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  593. begin
  594. CopyUsedRegs(TmpUsedRegs);
  595. UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
  596. { we have
  597. mov x, %treg
  598. mov %treg, y
  599. }
  600. if not(RegInOp(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[1]^)) and
  601. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  602. { we've got
  603. mov x, %treg
  604. mov %treg, y
  605. with %treg is not used after }
  606. case taicpu(p).oper[0]^.typ Of
  607. top_reg:
  608. begin
  609. { change
  610. mov %reg, %treg
  611. mov %treg, y
  612. to
  613. mov %reg, y
  614. }
  615. taicpu(p).loadOper(1,taicpu(hp1).oper[1]^);
  616. DebugMsg('PeepHole Optimization,MovMov2Mov 2',p);
  617. asml.remove(hp1);
  618. hp1.free;
  619. ReleaseUsedRegs(TmpUsedRegs);
  620. Exit;
  621. end;
  622. top_ref:
  623. if (taicpu(hp1).oper[1]^.typ = top_reg) then
  624. begin
  625. { change
  626. mov mem, %treg
  627. mov %treg, %reg
  628. to
  629. mov mem, %reg"
  630. }
  631. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  632. DebugMsg('PeepHole Optimization,MovMov2Mov 3',p);
  633. asml.remove(hp1);
  634. hp1.free;
  635. ReleaseUsedRegs(TmpUsedRegs);
  636. Exit;
  637. end;
  638. end;
  639. ReleaseUsedRegs(TmpUsedRegs);
  640. end
  641. else
  642. { Change
  643. mov %reg1, %reg2
  644. xxx %reg2, ???
  645. to
  646. mov %reg1, %reg2
  647. xxx %reg1, ???
  648. to avoid a write/read penalty
  649. }
  650. if MatchOpType(taicpu(p),top_reg,top_reg) and
  651. GetNextInstruction(p,hp1) and
  652. (tai(hp1).typ = ait_instruction) and
  653. (taicpu(hp1).ops >= 1) and
  654. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  655. { we have
  656. mov %reg1, %reg2
  657. XXX %reg2, ???
  658. }
  659. begin
  660. if ((taicpu(hp1).opcode = A_OR) or
  661. (taicpu(hp1).opcode = A_TEST)) and
  662. (taicpu(hp1).oper[1]^.typ = top_reg) and
  663. (taicpu(hp1).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) then
  664. { we have
  665. mov %reg1, %reg2
  666. test/or %reg2, %reg2
  667. }
  668. begin
  669. CopyUsedRegs(TmpUsedRegs);
  670. { reg1 will be used after the first instruction,
  671. so update the allocation info }
  672. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  673. if GetNextInstruction(hp1, hp2) and
  674. (hp2.typ = ait_instruction) and
  675. taicpu(hp2).is_jmp and
  676. not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg, hp1, TmpUsedRegs)) then
  677. { change
  678. mov %reg1, %reg2
  679. test/or %reg2, %reg2
  680. jxx
  681. to
  682. test %reg1, %reg1
  683. jxx
  684. }
  685. begin
  686. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  687. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  688. asml.remove(p);
  689. p.free;
  690. p := hp1;
  691. ReleaseUsedRegs(TmpUsedRegs);
  692. Exit;
  693. end
  694. else
  695. { change
  696. mov %reg1, %reg2
  697. test/or %reg2, %reg2
  698. to
  699. mov %reg1, %reg2
  700. test/or %reg1, %reg1
  701. }
  702. begin
  703. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  704. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  705. end;
  706. ReleaseUsedRegs(TmpUsedRegs);
  707. end
  708. end
  709. else
  710. { leave out the mov from "mov reg, x(%frame_pointer); leave/ret" (with
  711. x >= RetOffset) as it doesn't do anything (it writes either to a
  712. parameter or to the temporary storage room for the function
  713. result)
  714. }
  715. if GetNextIntruction_p and
  716. (tai(hp1).typ = ait_instruction) then
  717. begin
  718. if IsExitCode(hp1) and
  719. MatchOpType(p,top_reg,top_ref) and
  720. (taicpu(p).oper[1]^.ref^.base = current_procinfo.FramePointer) and
  721. not(assigned(current_procinfo.procdef.funcretsym) and
  722. (taicpu(p).oper[1]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  723. (taicpu(p).oper[1]^.ref^.index = NR_NO) then
  724. begin
  725. asml.remove(p);
  726. p.free;
  727. p:=hp1;
  728. DebugMsg('Peephole removed deadstore before leave/ret',p);
  729. RemoveLastDeallocForFuncRes(p);
  730. exit;
  731. end
  732. { change
  733. mov reg1, mem1
  734. cmp x, mem1
  735. to
  736. mov reg1, mem1
  737. cmp x, reg1
  738. }
  739. else if MatchOpType(p,top_reg,top_ref) and
  740. MatchInstruction(hp1,A_CMP,[taicpu(p).opsize]) and
  741. (taicpu(hp1).oper[1]^.typ = top_ref) and
  742. RefsEqual(taicpu(p).oper[1]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  743. begin
  744. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  745. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  746. end;
  747. end;
  748. { Next instruction is also a MOV ? }
  749. if GetNextIntruction_p and
  750. MatchInstruction(hp1,A_MOV,[taicpu(p).opsize]) then
  751. begin
  752. if (taicpu(hp1).oper[0]^.typ = taicpu(p).oper[1]^.typ) and
  753. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[0]^.typ) then
  754. { mov reg1, mem1 or mov mem1, reg1
  755. mov mem2, reg2 mov reg2, mem2}
  756. begin
  757. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) then
  758. { mov reg1, mem1 or mov mem1, reg1
  759. mov mem2, reg1 mov reg2, mem1}
  760. begin
  761. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  762. { Removes the second statement from
  763. mov reg1, mem1/reg2
  764. mov mem1/reg2, reg1 }
  765. begin
  766. if taicpu(p).oper[0]^.typ=top_reg then
  767. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  768. DebugMsg('PeepHole Optimization,MovMov2Mov 1',p);
  769. asml.remove(hp1);
  770. hp1.free;
  771. Result:=true;
  772. exit;
  773. end
  774. else
  775. begin
  776. CopyUsedRegs(TmpUsedRegs);
  777. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  778. if (taicpu(p).oper[1]^.typ = top_ref) and
  779. { mov reg1, mem1
  780. mov mem2, reg1 }
  781. (taicpu(hp1).oper[0]^.ref^.refaddr = addr_no) and
  782. GetNextInstruction(hp1, hp2) and
  783. MatchInstruction(hp2,A_CMP,[taicpu(p).opsize]) and
  784. OpsEqual(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^) and
  785. OpsEqual(taicpu(p).oper[0]^,taicpu(hp2).oper[1]^) and
  786. not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, hp2, TmpUsedRegs)) then
  787. { change to
  788. mov reg1, mem1 mov reg1, mem1
  789. mov mem2, reg1 cmp reg1, mem2
  790. cmp mem1, reg1
  791. }
  792. begin
  793. asml.remove(hp2);
  794. hp2.free;
  795. taicpu(hp1).opcode := A_CMP;
  796. taicpu(hp1).loadref(1,taicpu(hp1).oper[0]^.ref^);
  797. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  798. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);
  799. DebugMsg('Peephole MovMovCmp2MovCmp done',hp1);
  800. end;
  801. ReleaseUsedRegs(TmpUsedRegs);
  802. end;
  803. end
  804. else if (taicpu(p).oper[1]^.typ=top_ref) and
  805. OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  806. begin
  807. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);
  808. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  809. DebugMsg('PeepHole Optimization,MovMov2MovMov1',p);
  810. end
  811. else
  812. begin
  813. CopyUsedRegs(TmpUsedRegs);
  814. if GetNextInstruction(hp1, hp2) and
  815. MatchOpType(taicpu(p),top_ref,top_reg) and
  816. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) and
  817. (taicpu(hp1).oper[1]^.typ = top_ref) and
  818. MatchInstruction(hp2,A_MOV,[taicpu(p).opsize]) and
  819. MatchOpType(taicpu(hp2),top_ref,top_reg) and
  820. RefsEqual(taicpu(hp2).oper[0]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  821. if not RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^) and
  822. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,tmpUsedRegs)) then
  823. { mov mem1, %reg1
  824. mov %reg1, mem2
  825. mov mem2, reg2
  826. to:
  827. mov mem1, reg2
  828. mov reg2, mem2}
  829. begin
  830. AllocRegBetween(taicpu(hp2).oper[1]^.reg,p,hp2,usedregs);
  831. DebugMsg('PeepHole Optimization,MovMovMov2MovMov 1',p);
  832. taicpu(p).loadoper(1,taicpu(hp2).oper[1]^);
  833. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  834. asml.remove(hp2);
  835. hp2.free;
  836. end
  837. {$ifdef i386}
  838. { this is enabled for i386 only, as the rules to create the reg sets below
  839. are too complicated for x86-64, so this makes this code too error prone
  840. on x86-64
  841. }
  842. else if (taicpu(p).oper[1]^.reg <> taicpu(hp2).oper[1]^.reg) and
  843. not(RegInRef(taicpu(p).oper[1]^.reg,taicpu(p).oper[0]^.ref^)) and
  844. not(RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^)) then
  845. { mov mem1, reg1 mov mem1, reg1
  846. mov reg1, mem2 mov reg1, mem2
  847. mov mem2, reg2 mov mem2, reg1
  848. to: to:
  849. mov mem1, reg1 mov mem1, reg1
  850. mov mem1, reg2 mov reg1, mem2
  851. mov reg1, mem2
  852. or (if mem1 depends on reg1
  853. and/or if mem2 depends on reg2)
  854. to:
  855. mov mem1, reg1
  856. mov reg1, mem2
  857. mov reg1, reg2
  858. }
  859. begin
  860. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  861. taicpu(hp1).loadReg(1,taicpu(hp2).oper[1]^.reg);
  862. taicpu(hp2).loadRef(1,taicpu(hp2).oper[0]^.ref^);
  863. taicpu(hp2).loadReg(0,taicpu(p).oper[1]^.reg);
  864. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  865. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  866. (getsupreg(taicpu(p).oper[0]^.ref^.base) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  867. AllocRegBetween(taicpu(p).oper[0]^.ref^.base,p,hp2,usedregs);
  868. if (taicpu(p).oper[0]^.ref^.index <> NR_NO) and
  869. (getsupreg(taicpu(p).oper[0]^.ref^.index) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  870. AllocRegBetween(taicpu(p).oper[0]^.ref^.index,p,hp2,usedregs);
  871. end
  872. else if (taicpu(hp1).Oper[0]^.reg <> taicpu(hp2).Oper[1]^.reg) then
  873. begin
  874. taicpu(hp2).loadReg(0,taicpu(hp1).Oper[0]^.reg);
  875. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  876. end
  877. else
  878. begin
  879. asml.remove(hp2);
  880. hp2.free;
  881. end
  882. {$endif i386}
  883. ;
  884. ReleaseUsedRegs(TmpUsedRegs);
  885. end;
  886. end
  887. (* { movl [mem1],reg1
  888. movl [mem1],reg2
  889. to
  890. movl [mem1],reg1
  891. movl reg1,reg2
  892. }
  893. else if (taicpu(p).oper[0]^.typ = top_ref) and
  894. (taicpu(p).oper[1]^.typ = top_reg) and
  895. (taicpu(hp1).oper[0]^.typ = top_ref) and
  896. (taicpu(hp1).oper[1]^.typ = top_reg) and
  897. (taicpu(p).opsize = taicpu(hp1).opsize) and
  898. RefsEqual(TReference(taicpu(p).oper[0]^^),taicpu(hp1).oper[0]^^.ref^) and
  899. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.base) and
  900. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.index) then
  901. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg)
  902. else*)
  903. { movl const1,[mem1]
  904. movl [mem1],reg1
  905. to
  906. movl const1,reg1
  907. movl reg1,[mem1]
  908. }
  909. else if MatchOpType(Taicpu(p),top_const,top_ref) and
  910. MatchOpType(Taicpu(hp1),top_ref,top_reg) and
  911. (taicpu(p).opsize = taicpu(hp1).opsize) and
  912. RefsEqual(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.ref^) and
  913. not(RegInRef(taicpu(hp1).oper[1]^.reg,taicpu(hp1).oper[0]^.ref^)) then
  914. begin
  915. AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,usedregs);
  916. taicpu(hp1).loadReg(0,taicpu(hp1).oper[1]^.reg);
  917. taicpu(hp1).loadRef(1,taicpu(p).oper[1]^.ref^);
  918. taicpu(p).loadReg(1,taicpu(hp1).oper[0]^.reg);
  919. taicpu(hp1).fileinfo := taicpu(p).fileinfo;
  920. DebugMsg('PeepHole Optimization,MovMov2MovMov 1',p);
  921. end
  922. end
  923. else if (taicpu(p).oper[1]^.typ = top_reg) and
  924. GetNextIntruction_p and
  925. (hp1.typ = ait_instruction) and
  926. GetNextInstruction(hp1, hp2) and
  927. MatchInstruction(hp2,A_MOV,[]) and
  928. OpsEqual(taicpu(hp2).oper[1]^, taicpu(p).oper[0]^) and
  929. (taicpu(hp2).oper[0]^.typ=top_reg) and
  930. (SuperRegistersEqual(taicpu(hp2).oper[0]^.reg,taicpu(p).oper[1]^.reg)) and
  931. (IsFoldableArithOp(taicpu(hp1), taicpu(p).oper[1]^.reg) or
  932. ((taicpu(p).opsize=S_L) and (taicpu(hp1).opsize=S_Q) and
  933. IsFoldableArithOp(taicpu(hp1), newreg(R_INTREGISTER,getsupreg(taicpu(p).oper[1]^.reg),R_SUBQ)))
  934. ) then
  935. { change movsX/movzX reg/ref, reg2
  936. add/sub/or/... reg3/$const, reg2
  937. mov reg2 reg/ref
  938. to add/sub/or/... reg3/$const, reg/ref }
  939. begin
  940. CopyUsedRegs(TmpUsedRegs);
  941. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  942. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  943. If not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs)) then
  944. begin
  945. { by example:
  946. movswl %si,%eax movswl %si,%eax p
  947. decl %eax addl %edx,%eax hp1
  948. movw %ax,%si movw %ax,%si hp2
  949. ->
  950. movswl %si,%eax movswl %si,%eax p
  951. decw %eax addw %edx,%eax hp1
  952. movw %ax,%si movw %ax,%si hp2
  953. }
  954. DebugMsg('Peephole Optimization MovOpMov2Op ('+
  955. std_op2str[taicpu(p).opcode]+gas_opsize2str[taicpu(p).opsize]+' '+
  956. std_op2str[taicpu(hp1).opcode]+gas_opsize2str[taicpu(hp1).opsize]+' '+
  957. std_op2str[taicpu(hp2).opcode]+gas_opsize2str[taicpu(hp2).opsize],p);
  958. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  959. {
  960. ->
  961. movswl %si,%eax movswl %si,%eax p
  962. decw %si addw %dx,%si hp1
  963. movw %ax,%si movw %ax,%si hp2
  964. }
  965. case taicpu(hp1).ops of
  966. 1:
  967. begin
  968. taicpu(hp1).loadoper(0, taicpu(hp2).oper[1]^);
  969. if taicpu(hp1).oper[0]^.typ=top_reg then
  970. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  971. end;
  972. 2:
  973. begin
  974. taicpu(hp1).loadoper(1, taicpu(hp2).oper[1]^);
  975. if (taicpu(hp1).oper[0]^.typ=top_reg) and
  976. (taicpu(hp1).opcode<>A_SHL) and
  977. (taicpu(hp1).opcode<>A_SHR) and
  978. (taicpu(hp1).opcode<>A_SAR) then
  979. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  980. end;
  981. else
  982. internalerror(2008042701);
  983. end;
  984. {
  985. ->
  986. decw %si addw %dx,%si p
  987. }
  988. asml.remove(p);
  989. asml.remove(hp2);
  990. p.Free;
  991. hp2.Free;
  992. p := hp1;
  993. end;
  994. ReleaseUsedRegs(TmpUsedRegs);
  995. end
  996. else if GetNextIntruction_p and
  997. MatchInstruction(hp1,A_BTS,A_BTR,[Taicpu(p).opsize]) and
  998. GetNextInstruction(hp1, hp2) and
  999. MatchInstruction(hp2,A_OR,[Taicpu(p).opsize]) and
  1000. MatchOperand(Taicpu(p).oper[0]^,0) and
  1001. (Taicpu(p).oper[1]^.typ = top_reg) and
  1002. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp1).oper[1]^) and
  1003. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp2).oper[1]^) then
  1004. { mov reg1,0
  1005. bts reg1,operand1 --> mov reg1,operand2
  1006. or reg1,operand2 bts reg1,operand1}
  1007. begin
  1008. Taicpu(hp2).opcode:=A_MOV;
  1009. asml.remove(hp1);
  1010. insertllitem(hp2,hp2.next,hp1);
  1011. asml.remove(p);
  1012. p.free;
  1013. p:=hp1;
  1014. end
  1015. else if GetNextIntruction_p and
  1016. MatchInstruction(hp1,A_LEA,[S_L]) and
  1017. MatchOpType(Taicpu(p),top_ref,top_reg) and
  1018. ((MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(hp1).oper[1]^.reg,Taicpu(p).oper[1]^.reg) and
  1019. (Taicpu(hp1).oper[0]^.ref^.base<>Taicpu(p).oper[1]^.reg)
  1020. ) or
  1021. (MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(p).oper[1]^.reg,Taicpu(hp1).oper[1]^.reg) and
  1022. (Taicpu(hp1).oper[0]^.ref^.index<>Taicpu(p).oper[1]^.reg)
  1023. )
  1024. ) then
  1025. { mov reg1,ref
  1026. lea reg2,[reg1,reg2]
  1027. to
  1028. add reg2,ref}
  1029. begin
  1030. CopyUsedRegs(TmpUsedRegs);
  1031. { reg1 may not be used afterwards }
  1032. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  1033. begin
  1034. Taicpu(hp1).opcode:=A_ADD;
  1035. Taicpu(hp1).oper[0]^.ref^:=Taicpu(p).oper[0]^.ref^;
  1036. DebugMsg('Peephole MovLea2Add done',hp1);
  1037. asml.remove(p);
  1038. p.free;
  1039. p:=hp1;
  1040. end;
  1041. ReleaseUsedRegs(TmpUsedRegs);
  1042. end;
  1043. end;
  1044. function TX86AsmOptimizer.OptPass2MOV(var p : tai) : boolean;
  1045. var
  1046. TmpUsedRegs : TAllUsedRegs;
  1047. hp1,hp2: tai;
  1048. begin
  1049. Result:=false;
  1050. if MatchOpType(taicpu(p),top_reg,top_reg) and
  1051. GetNextInstruction(p, hp1) and
  1052. MatchInstruction(hp1,A_MOV,A_MOVZX,A_MOVSX,[]) and
  1053. MatchOpType(taicpu(hp1),top_ref,top_reg) and
  1054. ((taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg)
  1055. or
  1056. (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg)
  1057. ) and
  1058. (getsupreg(taicpu(hp1).oper[1]^.reg) = getsupreg(taicpu(p).oper[1]^.reg)) then
  1059. { mov reg1, reg2
  1060. mov/zx/sx (reg2, ..), reg2 to mov/zx/sx (reg1, ..), reg2}
  1061. begin
  1062. if (taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg) then
  1063. taicpu(hp1).oper[0]^.ref^.base := taicpu(p).oper[0]^.reg;
  1064. if (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg) then
  1065. taicpu(hp1).oper[0]^.ref^.index := taicpu(p).oper[0]^.reg;
  1066. asml.remove(p);
  1067. p.free;
  1068. p := hp1;
  1069. Result:=true;
  1070. exit;
  1071. end
  1072. else if (taicpu(p).oper[0]^.typ = top_ref) and
  1073. GetNextInstruction(p,hp1) and
  1074. (hp1.typ = ait_instruction) and
  1075. { while the GetNextInstruction(hp1,hp2) call could be factored out,
  1076. doing it separately in both branches allows to do the cheap checks
  1077. with low probability earlier }
  1078. ((IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) and
  1079. GetNextInstruction(hp1,hp2) and
  1080. MatchInstruction(hp2,A_MOV,[])
  1081. ) or
  1082. ((taicpu(hp1).opcode=A_LEA) and
  1083. GetNextInstruction(hp1,hp2) and
  1084. MatchInstruction(hp2,A_MOV,[]) and
  1085. ((MatchReference(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_INVALID) and
  1086. (taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg)
  1087. ) or
  1088. (MatchReference(taicpu(hp1).oper[0]^.ref^,NR_INVALID,
  1089. taicpu(p).oper[1]^.reg) and
  1090. (taicpu(hp1).oper[0]^.ref^.base<>taicpu(p).oper[1]^.reg)) or
  1091. (MatchReferenceWithOffset(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_NO)) or
  1092. (MatchReferenceWithOffset(taicpu(hp1).oper[0]^.ref^,NR_NO,taicpu(p).oper[1]^.reg))
  1093. ) and
  1094. ((MatchOperand(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^)) or not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,UsedRegs)))
  1095. )
  1096. ) and
  1097. MatchOperand(taicpu(hp1).oper[taicpu(hp1).ops-1]^,taicpu(hp2).oper[0]^) and
  1098. (taicpu(hp2).oper[1]^.typ = top_ref) then
  1099. begin
  1100. CopyUsedRegs(TmpUsedRegs);
  1101. UpdateUsedRegs(TmpUsedRegs,tai(hp1.next));
  1102. if (RefsEqual(taicpu(hp2).oper[1]^.ref^, taicpu(p).oper[0]^.ref^) and
  1103. not(RegUsedAfterInstruction(taicpu(hp2).oper[0]^.reg,hp2, TmpUsedRegs))) then
  1104. { change mov (ref), reg
  1105. add/sub/or/... reg2/$const, reg
  1106. mov reg, (ref)
  1107. # release reg
  1108. to add/sub/or/... reg2/$const, (ref) }
  1109. begin
  1110. case taicpu(hp1).opcode of
  1111. A_INC,A_DEC,A_NOT,A_NEG :
  1112. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  1113. A_LEA :
  1114. begin
  1115. taicpu(hp1).opcode:=A_ADD;
  1116. if (taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg) and (taicpu(hp1).oper[0]^.ref^.index<>NR_NO) then
  1117. taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.index)
  1118. else if (taicpu(hp1).oper[0]^.ref^.base<>taicpu(p).oper[1]^.reg) and (taicpu(hp1).oper[0]^.ref^.base<>NR_NO) then
  1119. taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.base)
  1120. else
  1121. taicpu(hp1).loadconst(0,taicpu(hp1).oper[0]^.ref^.offset);
  1122. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  1123. DebugMsg('Peephole FoldLea done',hp1);
  1124. end
  1125. else
  1126. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  1127. end;
  1128. asml.remove(p);
  1129. asml.remove(hp2);
  1130. p.free;
  1131. hp2.free;
  1132. p := hp1
  1133. end;
  1134. ReleaseUsedRegs(TmpUsedRegs);
  1135. end;
  1136. end;
  1137. function TX86AsmOptimizer.OptPass2Imul(var p : tai) : boolean;
  1138. var
  1139. TmpUsedRegs : TAllUsedRegs;
  1140. hp1 : tai;
  1141. begin
  1142. Result:=false;
  1143. if (taicpu(p).ops >= 2) and
  1144. ((taicpu(p).oper[0]^.typ = top_const) or
  1145. ((taicpu(p).oper[0]^.typ = top_ref) and (taicpu(p).oper[0]^.ref^.refaddr=addr_full))) and
  1146. (taicpu(p).oper[1]^.typ = top_reg) and
  1147. ((taicpu(p).ops = 2) or
  1148. ((taicpu(p).oper[2]^.typ = top_reg) and
  1149. (taicpu(p).oper[2]^.reg = taicpu(p).oper[1]^.reg))) and
  1150. GetLastInstruction(p,hp1) and
  1151. MatchInstruction(hp1,A_MOV,[]) and
  1152. MatchOpType(hp1,top_reg,top_reg) and
  1153. ((taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) or
  1154. ((taicpu(hp1).opsize=S_L) and (taicpu(p).opsize=S_Q) and SuperRegistersEqual(taicpu(hp1).oper[1]^.reg,taicpu(p).oper[1]^.reg))) then
  1155. begin
  1156. CopyUsedRegs(TmpUsedRegs);
  1157. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,p,TmpUsedRegs)) then
  1158. { change
  1159. mov reg1,reg2
  1160. imul y,reg2 to imul y,reg1,reg2 }
  1161. begin
  1162. taicpu(p).ops := 3;
  1163. taicpu(p).loadreg(1,taicpu(hp1).oper[0]^.reg);
  1164. taicpu(p).loadreg(2,taicpu(hp1).oper[1]^.reg);
  1165. DebugMsg('Peephole MovImul2Imul done',p);
  1166. asml.remove(hp1);
  1167. hp1.free;
  1168. result:=true;
  1169. end;
  1170. ReleaseUsedRegs(TmpUsedRegs);
  1171. end;
  1172. end;
  1173. function TX86AsmOptimizer.OptPass1AND(var p : tai) : boolean;
  1174. var
  1175. hp1 : tai;
  1176. begin
  1177. Result:=false;
  1178. if not(GetNextInstruction(p, hp1)) then
  1179. exit;
  1180. if MatchOpType(p,top_const,top_reg) and
  1181. MatchInstruction(hp1,A_AND,[]) and
  1182. MatchOpType(hp1,top_const,top_reg) and
  1183. (getsupreg(taicpu(p).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg)) and
  1184. { the second register must contain the first one, so compare their subreg types }
  1185. (getsubreg(taicpu(p).oper[1]^.reg)<=getsubreg(taicpu(hp1).oper[1]^.reg)) and
  1186. (abs(taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val)<$80000000) then
  1187. { change
  1188. and const1, reg
  1189. and const2, reg
  1190. to
  1191. and (const1 and const2), reg
  1192. }
  1193. begin
  1194. taicpu(hp1).loadConst(0, taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val);
  1195. DebugMsg('Peephole AndAnd2And done',hp1);
  1196. asml.remove(p);
  1197. p.Free;
  1198. p:=hp1;
  1199. Result:=true;
  1200. exit;
  1201. end
  1202. else if MatchOpType(p,top_const,top_reg) and
  1203. MatchInstruction(hp1,A_MOVZX,[]) and
  1204. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1205. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  1206. (getsubreg(taicpu(hp1).oper[0]^.reg)=getsubreg(taicpu(hp1).oper[1]^.reg)) and
  1207. (((taicpu(p).opsize=S_W) and
  1208. (taicpu(hp1).opsize=S_BW)) or
  1209. ((taicpu(p).opsize=S_L) and
  1210. (taicpu(hp1).opsize in [S_WL,S_BL]))
  1211. {$ifdef x86_64}
  1212. or
  1213. ((taicpu(p).opsize=S_Q) and
  1214. (taicpu(hp1).opsize in [S_BQ,S_WQ,S_LQ]))
  1215. {$endif x86_64}
  1216. ) then
  1217. begin
  1218. if (((taicpu(hp1).opsize) in [S_BW,S_BL{$ifdef x86_64},S_BQ{$endif x86_64}]) and
  1219. ((taicpu(p).oper[0]^.val and $ff)=taicpu(p).oper[0]^.val)
  1220. ) or
  1221. (((taicpu(hp1).opsize) in [S_WL{$ifdef x86_64},S_WQ{$endif x86_64}]) and
  1222. ((taicpu(p).oper[0]^.val and $ffff)=taicpu(p).oper[0]^.val))
  1223. {$ifdef x86_64}
  1224. or
  1225. (((taicpu(hp1).opsize)=S_LQ) and
  1226. ((taicpu(p).oper[0]^.val and $ffffffff)=taicpu(p).oper[0]^.val)
  1227. )
  1228. {$endif x86_64}
  1229. then
  1230. begin
  1231. DebugMsg('Peephole AndMovzToAnd done',p);
  1232. asml.remove(hp1);
  1233. hp1.free;
  1234. end;
  1235. end
  1236. else if MatchOpType(p,top_const,top_reg) and
  1237. MatchInstruction(hp1,A_MOVSX{$ifdef x86_64},A_MOVSXD{$endif x86_64},[]) and
  1238. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1239. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  1240. (getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg)) and
  1241. (((taicpu(p).opsize=S_W) and
  1242. (taicpu(hp1).opsize=S_BW)) or
  1243. ((taicpu(p).opsize=S_L) and
  1244. (taicpu(hp1).opsize in [S_WL,S_BL]))
  1245. {$ifdef x86_64}
  1246. or
  1247. ((taicpu(p).opsize=S_Q) and
  1248. (taicpu(hp1).opsize in [S_BQ,S_WQ,S_LQ]))
  1249. {$endif x86_64}
  1250. ) then
  1251. begin
  1252. if (((taicpu(hp1).opsize) in [S_BW,S_BL{$ifdef x86_64},S_BQ{$endif x86_64}]) and
  1253. ((taicpu(p).oper[0]^.val and $7f)=taicpu(p).oper[0]^.val)
  1254. ) or
  1255. (((taicpu(hp1).opsize) in [S_WL{$ifdef x86_64},S_WQ{$endif x86_64}]) and
  1256. ((taicpu(p).oper[0]^.val and $7fff)=taicpu(p).oper[0]^.val))
  1257. {$ifdef x86_64}
  1258. or
  1259. (((taicpu(hp1).opsize)=S_LQ) and
  1260. ((taicpu(p).oper[0]^.val and $7fffffff)=taicpu(p).oper[0]^.val)
  1261. )
  1262. {$endif x86_64}
  1263. then
  1264. begin
  1265. DebugMsg('PeepHole Optimization,AndMovsxToAnd',p);
  1266. asml.remove(hp1);
  1267. hp1.free;
  1268. end;
  1269. end
  1270. else if (taicpu(p).oper[1]^.typ = top_reg) and
  1271. (hp1.typ = ait_instruction) and
  1272. (taicpu(hp1).is_jmp) and
  1273. (taicpu(hp1).opcode<>A_JMP) and
  1274. not(RegInUsedRegs(taicpu(p).oper[1]^.reg,UsedRegs)) then
  1275. { change
  1276. and x, reg
  1277. jxx
  1278. to
  1279. test x, reg
  1280. jxx
  1281. if reg is deallocated before the
  1282. jump, but only if it's a conditional jump (PFV)
  1283. }
  1284. taicpu(p).opcode := A_TEST;
  1285. end;
  1286. procedure TX86AsmOptimizer.PostPeepholeOptMov(const p : tai);
  1287. begin
  1288. if MatchOperand(taicpu(p).oper[0]^,0) and
  1289. (taicpu(p).oper[1]^.typ = Top_Reg) and
  1290. not(RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  1291. { change "mov $0, %reg" into "xor %reg, %reg" }
  1292. begin
  1293. taicpu(p).opcode := A_XOR;
  1294. taicpu(p).loadReg(0,taicpu(p).oper[1]^.reg);
  1295. end;
  1296. end;
  1297. end.